xref: /openbmc/qemu/hw/net/rocker/rocker.c (revision 09a274d8)
1 /*
2  * QEMU rocker switch emulation - PCI device
3  *
4  * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
5  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "hw/hw.h"
20 #include "hw/pci/pci.h"
21 #include "hw/pci/msix.h"
22 #include "net/net.h"
23 #include "net/eth.h"
24 #include "qapi/error.h"
25 #include "qapi/qapi-commands-rocker.h"
26 #include "qemu/iov.h"
27 #include "qemu/bitops.h"
28 
29 #include "rocker.h"
30 #include "rocker_hw.h"
31 #include "rocker_fp.h"
32 #include "rocker_desc.h"
33 #include "rocker_tlv.h"
34 #include "rocker_world.h"
35 #include "rocker_of_dpa.h"
36 
37 struct rocker {
38     /* private */
39     PCIDevice parent_obj;
40     /* public */
41 
42     MemoryRegion mmio;
43     MemoryRegion msix_bar;
44 
45     /* switch configuration */
46     char *name;                  /* switch name */
47     char *world_name;            /* world name */
48     uint32_t fp_ports;           /* front-panel port count */
49     NICPeers *fp_ports_peers;
50     MACAddr fp_start_macaddr;    /* front-panel port 0 mac addr */
51     uint64_t switch_id;          /* switch id */
52 
53     /* front-panel ports */
54     FpPort *fp_port[ROCKER_FP_PORTS_MAX];
55 
56     /* register backings */
57     uint32_t test_reg;
58     uint64_t test_reg64;
59     dma_addr_t test_dma_addr;
60     uint32_t test_dma_size;
61     uint64_t lower32;            /* lower 32-bit val in 2-part 64-bit access */
62 
63     /* desc rings */
64     DescRing **rings;
65 
66     /* switch worlds */
67     World *worlds[ROCKER_WORLD_TYPE_MAX];
68     World *world_dflt;
69 
70     QLIST_ENTRY(rocker) next;
71 };
72 
73 #define TYPE_ROCKER "rocker"
74 
75 #define ROCKER(obj) \
76     OBJECT_CHECK(Rocker, (obj), TYPE_ROCKER)
77 
78 static QLIST_HEAD(, rocker) rockers;
79 
80 Rocker *rocker_find(const char *name)
81 {
82     Rocker *r;
83 
84     QLIST_FOREACH(r, &rockers, next)
85         if (strcmp(r->name, name) == 0) {
86             return r;
87         }
88 
89     return NULL;
90 }
91 
92 World *rocker_get_world(Rocker *r, enum rocker_world_type type)
93 {
94     if (type < ROCKER_WORLD_TYPE_MAX) {
95         return r->worlds[type];
96     }
97     return NULL;
98 }
99 
100 RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
101 {
102     RockerSwitch *rocker;
103     Rocker *r;
104 
105     r = rocker_find(name);
106     if (!r) {
107         error_setg(errp, "rocker %s not found", name);
108         return NULL;
109     }
110 
111     rocker = g_new0(RockerSwitch, 1);
112     rocker->name = g_strdup(r->name);
113     rocker->id = r->switch_id;
114     rocker->ports = r->fp_ports;
115 
116     return rocker;
117 }
118 
119 RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp)
120 {
121     RockerPortList *list = NULL;
122     Rocker *r;
123     int i;
124 
125     r = rocker_find(name);
126     if (!r) {
127         error_setg(errp, "rocker %s not found", name);
128         return NULL;
129     }
130 
131     for (i = r->fp_ports - 1; i >= 0; i--) {
132         RockerPortList *info = g_malloc0(sizeof(*info));
133         info->value = g_malloc0(sizeof(*info->value));
134         struct fp_port *port = r->fp_port[i];
135 
136         fp_port_get_info(port, info);
137         info->next = list;
138         list = info;
139     }
140 
141     return list;
142 }
143 
144 uint32_t rocker_fp_ports(Rocker *r)
145 {
146     return r->fp_ports;
147 }
148 
149 static uint32_t rocker_get_pport_by_tx_ring(Rocker *r,
150                                             DescRing *ring)
151 {
152     return (desc_ring_index(ring) - 2) / 2 + 1;
153 }
154 
155 static int tx_consume(Rocker *r, DescInfo *info)
156 {
157     PCIDevice *dev = PCI_DEVICE(r);
158     char *buf = desc_get_buf(info, true);
159     RockerTlv *tlv_frag;
160     RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1];
161     struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, };
162     uint32_t pport;
163     uint32_t port;
164     uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE;
165     uint16_t tx_l3_csum_off = 0;
166     uint16_t tx_tso_mss = 0;
167     uint16_t tx_tso_hdr_len = 0;
168     int iovcnt = 0;
169     int err = ROCKER_OK;
170     int rem;
171     int i;
172 
173     if (!buf) {
174         return -ROCKER_ENXIO;
175     }
176 
177     rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info));
178 
179     if (!tlvs[ROCKER_TLV_TX_FRAGS]) {
180         return -ROCKER_EINVAL;
181     }
182 
183     pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info));
184     if (!fp_port_from_pport(pport, &port)) {
185         return -ROCKER_EINVAL;
186     }
187 
188     if (tlvs[ROCKER_TLV_TX_OFFLOAD]) {
189         tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]);
190     }
191 
192     switch (tx_offload) {
193     case ROCKER_TX_OFFLOAD_L3_CSUM:
194         if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
195             return -ROCKER_EINVAL;
196         }
197         break;
198     case ROCKER_TX_OFFLOAD_TSO:
199         if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
200             !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
201             return -ROCKER_EINVAL;
202         }
203         break;
204     }
205 
206     if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
207         tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]);
208     }
209 
210     if (tlvs[ROCKER_TLV_TX_TSO_MSS]) {
211         tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]);
212     }
213 
214     if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
215         tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]);
216     }
217 
218     rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) {
219         hwaddr frag_addr;
220         uint16_t frag_len;
221 
222         if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) {
223             err = -ROCKER_EINVAL;
224             goto err_bad_attr;
225         }
226 
227         rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag);
228 
229         if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
230             !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) {
231             err = -ROCKER_EINVAL;
232             goto err_bad_attr;
233         }
234 
235         frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
236         frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
237 
238         if (iovcnt >= ROCKER_TX_FRAGS_MAX) {
239             goto err_too_many_frags;
240         }
241         iov[iovcnt].iov_len = frag_len;
242         iov[iovcnt].iov_base = g_malloc(frag_len);
243 
244         pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base,
245                      iov[iovcnt].iov_len);
246 
247         iovcnt++;
248     }
249 
250     if (iovcnt) {
251         /* XXX perform Tx offloads */
252         /* XXX   silence compiler for now */
253         tx_l3_csum_off += tx_tso_mss = tx_tso_hdr_len = 0;
254     }
255 
256     err = fp_port_eg(r->fp_port[port], iov, iovcnt);
257 
258 err_too_many_frags:
259 err_bad_attr:
260     for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) {
261         g_free(iov[i].iov_base);
262     }
263 
264     return err;
265 }
266 
267 static int cmd_get_port_settings(Rocker *r,
268                                  DescInfo *info, char *buf,
269                                  RockerTlv *cmd_info_tlv)
270 {
271     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
272     RockerTlv *nest;
273     FpPort *fp_port;
274     uint32_t pport;
275     uint32_t port;
276     uint32_t speed;
277     uint8_t duplex;
278     uint8_t autoneg;
279     uint8_t learning;
280     char *phys_name;
281     MACAddr macaddr;
282     enum rocker_world_type mode;
283     size_t tlv_size;
284     int pos;
285     int err;
286 
287     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
288                             cmd_info_tlv);
289 
290     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
291         return -ROCKER_EINVAL;
292     }
293 
294     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
295     if (!fp_port_from_pport(pport, &port)) {
296         return -ROCKER_EINVAL;
297     }
298     fp_port = r->fp_port[port];
299 
300     err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg);
301     if (err) {
302         return err;
303     }
304 
305     fp_port_get_macaddr(fp_port, &macaddr);
306     mode = world_type(fp_port_get_world(fp_port));
307     learning = fp_port_get_learning(fp_port);
308     phys_name = fp_port_get_name(fp_port);
309 
310     tlv_size = rocker_tlv_total_size(0) +                 /* nest */
311                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
312                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   speed */
313                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   duplex */
314                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   autoneg */
315                rocker_tlv_total_size(sizeof(macaddr.a)) + /*   macaddr */
316                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   mode */
317                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   learning */
318                rocker_tlv_total_size(strlen(phys_name));
319 
320     if (tlv_size > desc_buf_size(info)) {
321         return -ROCKER_EMSGSIZE;
322     }
323 
324     pos = 0;
325     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO);
326     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport);
327     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed);
328     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex);
329     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg);
330     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
331                    sizeof(macaddr.a), macaddr.a);
332     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode);
333     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,
334                       learning);
335     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME,
336                    strlen(phys_name), phys_name);
337     rocker_tlv_nest_end(buf, &pos, nest);
338 
339     return desc_set_buf(info, tlv_size);
340 }
341 
342 static int cmd_set_port_settings(Rocker *r,
343                                  RockerTlv *cmd_info_tlv)
344 {
345     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
346     FpPort *fp_port;
347     uint32_t pport;
348     uint32_t port;
349     uint32_t speed;
350     uint8_t duplex;
351     uint8_t autoneg;
352     uint8_t learning;
353     MACAddr macaddr;
354     enum rocker_world_type mode;
355     int err;
356 
357     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
358                             cmd_info_tlv);
359 
360     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
361         return -ROCKER_EINVAL;
362     }
363 
364     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
365     if (!fp_port_from_pport(pport, &port)) {
366         return -ROCKER_EINVAL;
367     }
368     fp_port = r->fp_port[port];
369 
370     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] &&
371         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] &&
372         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) {
373 
374         speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
375         duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
376         autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
377 
378         err = fp_port_set_settings(fp_port, speed, duplex, autoneg);
379         if (err) {
380             return err;
381         }
382     }
383 
384     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) {
385         if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) !=
386             sizeof(macaddr.a)) {
387             return -ROCKER_EINVAL;
388         }
389         memcpy(macaddr.a,
390                rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]),
391                sizeof(macaddr.a));
392         fp_port_set_macaddr(fp_port, &macaddr);
393     }
394 
395     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
396         mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
397         if (mode >= ROCKER_WORLD_TYPE_MAX) {
398             return -ROCKER_EINVAL;
399         }
400         /* We don't support world change. */
401         if (!fp_port_check_world(fp_port, r->worlds[mode])) {
402             return -ROCKER_EINVAL;
403         }
404     }
405 
406     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
407         learning =
408             rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]);
409         fp_port_set_learning(fp_port, learning);
410     }
411 
412     return ROCKER_OK;
413 }
414 
415 static int cmd_consume(Rocker *r, DescInfo *info)
416 {
417     char *buf = desc_get_buf(info, false);
418     RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1];
419     RockerTlv *info_tlv;
420     World *world;
421     uint16_t cmd;
422     int err;
423 
424     if (!buf) {
425         return -ROCKER_ENXIO;
426     }
427 
428     rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info));
429 
430     if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) {
431         return -ROCKER_EINVAL;
432     }
433 
434     cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]);
435     info_tlv = tlvs[ROCKER_TLV_CMD_INFO];
436 
437     /* This might be reworked to something like this:
438      * Every world will have an array of command handlers from
439      * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is
440      * up to each world to implement whatever command it want.
441      * It can reference "generic" commands as cmd_set_port_settings or
442      * cmd_get_port_settings
443      */
444 
445     switch (cmd) {
446     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
447     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
448     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
449     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
450     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
451     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
452     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
453     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
454         world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
455         err = world_do_cmd(world, info, buf, cmd, info_tlv);
456         break;
457     case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS:
458         err = cmd_get_port_settings(r, info, buf, info_tlv);
459         break;
460     case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS:
461         err = cmd_set_port_settings(r, info_tlv);
462         break;
463     default:
464         err = -ROCKER_EINVAL;
465         break;
466     }
467 
468     return err;
469 }
470 
471 static void rocker_msix_irq(Rocker *r, unsigned vector)
472 {
473     PCIDevice *dev = PCI_DEVICE(r);
474 
475     DPRINTF("MSI-X notify request for vector %d\n", vector);
476     if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) {
477         DPRINTF("incorrect vector %d\n", vector);
478         return;
479     }
480     msix_notify(dev, vector);
481 }
482 
483 int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up)
484 {
485     DescRing *ring = r->rings[ROCKER_RING_EVENT];
486     DescInfo *info = desc_ring_fetch_desc(ring);
487     RockerTlv *nest;
488     char *buf;
489     size_t tlv_size;
490     int pos;
491     int err;
492 
493     if (!info) {
494         return -ROCKER_ENOBUFS;
495     }
496 
497     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
498                rocker_tlv_total_size(0) +                 /* nest */
499                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
500                rocker_tlv_total_size(sizeof(uint8_t));    /*   link up */
501 
502     if (tlv_size > desc_buf_size(info)) {
503         err = -ROCKER_EMSGSIZE;
504         goto err_too_big;
505     }
506 
507     buf = desc_get_buf(info, false);
508     if (!buf) {
509         err = -ROCKER_ENOMEM;
510         goto err_no_mem;
511     }
512 
513     pos = 0;
514     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
515                         ROCKER_TLV_EVENT_TYPE_LINK_CHANGED);
516     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
517     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport);
518     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP,
519                       link_up ? 1 : 0);
520     rocker_tlv_nest_end(buf, &pos, nest);
521 
522     err = desc_set_buf(info, tlv_size);
523 
524 err_too_big:
525 err_no_mem:
526     if (desc_ring_post_desc(ring, err)) {
527         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
528     }
529 
530     return err;
531 }
532 
533 int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
534                                uint16_t vlan_id)
535 {
536     DescRing *ring = r->rings[ROCKER_RING_EVENT];
537     DescInfo *info;
538     FpPort *fp_port;
539     uint32_t port;
540     RockerTlv *nest;
541     char *buf;
542     size_t tlv_size;
543     int pos;
544     int err;
545 
546     if (!fp_port_from_pport(pport, &port)) {
547         return -ROCKER_EINVAL;
548     }
549     fp_port = r->fp_port[port];
550     if (!fp_port_get_learning(fp_port)) {
551         return ROCKER_OK;
552     }
553 
554     info = desc_ring_fetch_desc(ring);
555     if (!info) {
556         return -ROCKER_ENOBUFS;
557     }
558 
559     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
560                rocker_tlv_total_size(0) +                 /* nest */
561                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
562                rocker_tlv_total_size(ETH_ALEN) +          /*   mac addr */
563                rocker_tlv_total_size(sizeof(uint16_t));   /*   vlan_id */
564 
565     if (tlv_size > desc_buf_size(info)) {
566         err = -ROCKER_EMSGSIZE;
567         goto err_too_big;
568     }
569 
570     buf = desc_get_buf(info, false);
571     if (!buf) {
572         err = -ROCKER_ENOMEM;
573         goto err_no_mem;
574     }
575 
576     pos = 0;
577     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
578                         ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN);
579     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
580     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport);
581     rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr);
582     rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id);
583     rocker_tlv_nest_end(buf, &pos, nest);
584 
585     err = desc_set_buf(info, tlv_size);
586 
587 err_too_big:
588 err_no_mem:
589     if (desc_ring_post_desc(ring, err)) {
590         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
591     }
592 
593     return err;
594 }
595 
596 static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
597                                                      uint32_t pport)
598 {
599     return r->rings[(pport - 1) * 2 + 3];
600 }
601 
602 int rx_produce(World *world, uint32_t pport,
603                const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu)
604 {
605     Rocker *r = world_rocker(world);
606     PCIDevice *dev = (PCIDevice *)r;
607     DescRing *ring = rocker_get_rx_ring_by_pport(r, pport);
608     DescInfo *info = desc_ring_fetch_desc(ring);
609     char *data;
610     size_t data_size = iov_size(iov, iovcnt);
611     char *buf;
612     uint16_t rx_flags = 0;
613     uint16_t rx_csum = 0;
614     size_t tlv_size;
615     RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1];
616     hwaddr frag_addr;
617     uint16_t frag_max_len;
618     int pos;
619     int err;
620 
621     if (!info) {
622         return -ROCKER_ENOBUFS;
623     }
624 
625     buf = desc_get_buf(info, false);
626     if (!buf) {
627         err = -ROCKER_ENXIO;
628         goto out;
629     }
630     rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info));
631 
632     if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] ||
633         !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) {
634         err = -ROCKER_EINVAL;
635         goto out;
636     }
637 
638     frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]);
639     frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
640 
641     if (data_size > frag_max_len) {
642         err = -ROCKER_EMSGSIZE;
643         goto out;
644     }
645 
646     if (copy_to_cpu) {
647         rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD;
648     }
649 
650     /* XXX calc rx flags/csum */
651 
652     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
653                rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */
654                rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */
655                rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */
656                rocker_tlv_total_size(sizeof(uint16_t));  /* frag len */
657 
658     if (tlv_size > desc_buf_size(info)) {
659         err = -ROCKER_EMSGSIZE;
660         goto out;
661     }
662 
663     /* TODO:
664      * iov dma write can be optimized in similar way e1000 does it in
665      * e1000_receive_iov. But maybe if would make sense to introduce
666      * generic helper iov_dma_write.
667      */
668 
669     data = g_malloc(data_size);
670 
671     iov_to_buf(iov, iovcnt, 0, data, data_size);
672     pci_dma_write(dev, frag_addr, data, data_size);
673     g_free(data);
674 
675     pos = 0;
676     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags);
677     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum);
678     rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr);
679     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len);
680     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size);
681 
682     err = desc_set_buf(info, tlv_size);
683 
684 out:
685     if (desc_ring_post_desc(ring, err)) {
686         rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1));
687     }
688 
689     return err;
690 }
691 
692 int rocker_port_eg(Rocker *r, uint32_t pport,
693                    const struct iovec *iov, int iovcnt)
694 {
695     FpPort *fp_port;
696     uint32_t port;
697 
698     if (!fp_port_from_pport(pport, &port)) {
699         return -ROCKER_EINVAL;
700     }
701 
702     fp_port = r->fp_port[port];
703 
704     return fp_port_eg(fp_port, iov, iovcnt);
705 }
706 
707 static void rocker_test_dma_ctrl(Rocker *r, uint32_t val)
708 {
709     PCIDevice *dev = PCI_DEVICE(r);
710     char *buf;
711     int i;
712 
713     buf = g_malloc(r->test_dma_size);
714 
715     switch (val) {
716     case ROCKER_TEST_DMA_CTRL_CLEAR:
717         memset(buf, 0, r->test_dma_size);
718         break;
719     case ROCKER_TEST_DMA_CTRL_FILL:
720         memset(buf, 0x96, r->test_dma_size);
721         break;
722     case ROCKER_TEST_DMA_CTRL_INVERT:
723         pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size);
724         for (i = 0; i < r->test_dma_size; i++) {
725             buf[i] = ~buf[i];
726         }
727         break;
728     default:
729         DPRINTF("not test dma control val=0x%08x\n", val);
730         goto err_out;
731     }
732     pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size);
733 
734     rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST);
735 
736 err_out:
737     g_free(buf);
738 }
739 
740 static void rocker_reset(DeviceState *dev);
741 
742 static void rocker_control(Rocker *r, uint32_t val)
743 {
744     if (val & ROCKER_CONTROL_RESET) {
745         rocker_reset(DEVICE(r));
746     }
747 }
748 
749 static int rocker_pci_ring_count(Rocker *r)
750 {
751     /* There are:
752      * - command ring
753      * - event ring
754      * - tx and rx ring per each port
755      */
756     return 2 + (2 * r->fp_ports);
757 }
758 
759 static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr)
760 {
761     hwaddr start = ROCKER_DMA_DESC_BASE;
762     hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r));
763 
764     return addr >= start && addr < end;
765 }
766 
767 static void rocker_port_phys_enable_write(Rocker *r, uint64_t new)
768 {
769     int i;
770     bool old_enabled;
771     bool new_enabled;
772     FpPort *fp_port;
773 
774     for (i = 0; i < r->fp_ports; i++) {
775         fp_port = r->fp_port[i];
776         old_enabled = fp_port_enabled(fp_port);
777         new_enabled = (new >> (i + 1)) & 0x1;
778         if (new_enabled == old_enabled) {
779             continue;
780         }
781         if (new_enabled) {
782             fp_port_enable(r->fp_port[i]);
783         } else {
784             fp_port_disable(r->fp_port[i]);
785         }
786     }
787 }
788 
789 static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
790 {
791     Rocker *r = opaque;
792 
793     if (rocker_addr_is_desc_reg(r, addr)) {
794         unsigned index = ROCKER_RING_INDEX(addr);
795         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
796 
797         switch (offset) {
798         case ROCKER_DMA_DESC_ADDR_OFFSET:
799             r->lower32 = (uint64_t)val;
800             break;
801         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
802             desc_ring_set_base_addr(r->rings[index],
803                                     ((uint64_t)val) << 32 | r->lower32);
804             r->lower32 = 0;
805             break;
806         case ROCKER_DMA_DESC_SIZE_OFFSET:
807             desc_ring_set_size(r->rings[index], val);
808             break;
809         case ROCKER_DMA_DESC_HEAD_OFFSET:
810             if (desc_ring_set_head(r->rings[index], val)) {
811                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
812             }
813             break;
814         case ROCKER_DMA_DESC_CTRL_OFFSET:
815             desc_ring_set_ctrl(r->rings[index], val);
816             break;
817         case ROCKER_DMA_DESC_CREDITS_OFFSET:
818             if (desc_ring_ret_credits(r->rings[index], val)) {
819                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
820             }
821             break;
822         default:
823             DPRINTF("not implemented dma reg write(l) addr=0x" TARGET_FMT_plx
824                     " val=0x%08x (ring %d, addr=0x%02x)\n",
825                     addr, val, index, offset);
826             break;
827         }
828         return;
829     }
830 
831     switch (addr) {
832     case ROCKER_TEST_REG:
833         r->test_reg = val;
834         break;
835     case ROCKER_TEST_REG64:
836     case ROCKER_TEST_DMA_ADDR:
837     case ROCKER_PORT_PHYS_ENABLE:
838         r->lower32 = (uint64_t)val;
839         break;
840     case ROCKER_TEST_REG64 + 4:
841         r->test_reg64 = ((uint64_t)val) << 32 | r->lower32;
842         r->lower32 = 0;
843         break;
844     case ROCKER_TEST_IRQ:
845         rocker_msix_irq(r, val);
846         break;
847     case ROCKER_TEST_DMA_SIZE:
848         r->test_dma_size = val & 0xFFFF;
849         break;
850     case ROCKER_TEST_DMA_ADDR + 4:
851         r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
852         r->lower32 = 0;
853         break;
854     case ROCKER_TEST_DMA_CTRL:
855         rocker_test_dma_ctrl(r, val);
856         break;
857     case ROCKER_CONTROL:
858         rocker_control(r, val);
859         break;
860     case ROCKER_PORT_PHYS_ENABLE + 4:
861         rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32);
862         r->lower32 = 0;
863         break;
864     default:
865         DPRINTF("not implemented write(l) addr=0x" TARGET_FMT_plx
866                 " val=0x%08x\n", addr, val);
867         break;
868     }
869 }
870 
871 static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val)
872 {
873     Rocker *r = opaque;
874 
875     if (rocker_addr_is_desc_reg(r, addr)) {
876         unsigned index = ROCKER_RING_INDEX(addr);
877         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
878 
879         switch (offset) {
880         case ROCKER_DMA_DESC_ADDR_OFFSET:
881             desc_ring_set_base_addr(r->rings[index], val);
882             break;
883         default:
884             DPRINTF("not implemented dma reg write(q) addr=0x" TARGET_FMT_plx
885                     " val=0x" TARGET_FMT_plx " (ring %d, offset=0x%02x)\n",
886                     addr, val, index, offset);
887             break;
888         }
889         return;
890     }
891 
892     switch (addr) {
893     case ROCKER_TEST_REG64:
894         r->test_reg64 = val;
895         break;
896     case ROCKER_TEST_DMA_ADDR:
897         r->test_dma_addr = val;
898         break;
899     case ROCKER_PORT_PHYS_ENABLE:
900         rocker_port_phys_enable_write(r, val);
901         break;
902     default:
903         DPRINTF("not implemented write(q) addr=0x" TARGET_FMT_plx
904                 " val=0x" TARGET_FMT_plx "\n", addr, val);
905         break;
906     }
907 }
908 
909 #ifdef DEBUG_ROCKER
910 #define regname(reg) case (reg): return #reg
911 static const char *rocker_reg_name(void *opaque, hwaddr addr)
912 {
913     Rocker *r = opaque;
914 
915     if (rocker_addr_is_desc_reg(r, addr)) {
916         unsigned index = ROCKER_RING_INDEX(addr);
917         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
918         static char buf[100];
919         char ring_name[10];
920 
921         switch (index) {
922         case 0:
923             sprintf(ring_name, "cmd");
924             break;
925         case 1:
926             sprintf(ring_name, "event");
927             break;
928         default:
929             sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx",
930                     (index - 2) / 2);
931         }
932 
933         switch (offset) {
934         case ROCKER_DMA_DESC_ADDR_OFFSET:
935             sprintf(buf, "Ring[%s] ADDR", ring_name);
936             return buf;
937         case ROCKER_DMA_DESC_ADDR_OFFSET+4:
938             sprintf(buf, "Ring[%s] ADDR+4", ring_name);
939             return buf;
940         case ROCKER_DMA_DESC_SIZE_OFFSET:
941             sprintf(buf, "Ring[%s] SIZE", ring_name);
942             return buf;
943         case ROCKER_DMA_DESC_HEAD_OFFSET:
944             sprintf(buf, "Ring[%s] HEAD", ring_name);
945             return buf;
946         case ROCKER_DMA_DESC_TAIL_OFFSET:
947             sprintf(buf, "Ring[%s] TAIL", ring_name);
948             return buf;
949         case ROCKER_DMA_DESC_CTRL_OFFSET:
950             sprintf(buf, "Ring[%s] CTRL", ring_name);
951             return buf;
952         case ROCKER_DMA_DESC_CREDITS_OFFSET:
953             sprintf(buf, "Ring[%s] CREDITS", ring_name);
954             return buf;
955         default:
956             sprintf(buf, "Ring[%s] ???", ring_name);
957             return buf;
958         }
959     } else {
960         switch (addr) {
961             regname(ROCKER_BOGUS_REG0);
962             regname(ROCKER_BOGUS_REG1);
963             regname(ROCKER_BOGUS_REG2);
964             regname(ROCKER_BOGUS_REG3);
965             regname(ROCKER_TEST_REG);
966             regname(ROCKER_TEST_REG64);
967             regname(ROCKER_TEST_REG64+4);
968             regname(ROCKER_TEST_IRQ);
969             regname(ROCKER_TEST_DMA_ADDR);
970             regname(ROCKER_TEST_DMA_ADDR+4);
971             regname(ROCKER_TEST_DMA_SIZE);
972             regname(ROCKER_TEST_DMA_CTRL);
973             regname(ROCKER_CONTROL);
974             regname(ROCKER_PORT_PHYS_COUNT);
975             regname(ROCKER_PORT_PHYS_LINK_STATUS);
976             regname(ROCKER_PORT_PHYS_LINK_STATUS+4);
977             regname(ROCKER_PORT_PHYS_ENABLE);
978             regname(ROCKER_PORT_PHYS_ENABLE+4);
979             regname(ROCKER_SWITCH_ID);
980             regname(ROCKER_SWITCH_ID+4);
981         }
982     }
983     return "???";
984 }
985 #else
986 static const char *rocker_reg_name(void *opaque, hwaddr addr)
987 {
988     return NULL;
989 }
990 #endif
991 
992 static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val,
993                               unsigned size)
994 {
995     DPRINTF("Write %s addr " TARGET_FMT_plx
996             ", size %u, val " TARGET_FMT_plx "\n",
997             rocker_reg_name(opaque, addr), addr, size, val);
998 
999     switch (size) {
1000     case 4:
1001         rocker_io_writel(opaque, addr, val);
1002         break;
1003     case 8:
1004         rocker_io_writeq(opaque, addr, val);
1005         break;
1006     }
1007 }
1008 
1009 static uint64_t rocker_port_phys_link_status(Rocker *r)
1010 {
1011     int i;
1012     uint64_t status = 0;
1013 
1014     for (i = 0; i < r->fp_ports; i++) {
1015         FpPort *port = r->fp_port[i];
1016 
1017         if (fp_port_get_link_up(port)) {
1018             status |= 1 << (i + 1);
1019         }
1020     }
1021     return status;
1022 }
1023 
1024 static uint64_t rocker_port_phys_enable_read(Rocker *r)
1025 {
1026     int i;
1027     uint64_t ret = 0;
1028 
1029     for (i = 0; i < r->fp_ports; i++) {
1030         FpPort *port = r->fp_port[i];
1031 
1032         if (fp_port_enabled(port)) {
1033             ret |= 1 << (i + 1);
1034         }
1035     }
1036     return ret;
1037 }
1038 
1039 static uint32_t rocker_io_readl(void *opaque, hwaddr addr)
1040 {
1041     Rocker *r = opaque;
1042     uint32_t ret;
1043 
1044     if (rocker_addr_is_desc_reg(r, addr)) {
1045         unsigned index = ROCKER_RING_INDEX(addr);
1046         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1047 
1048         switch (offset) {
1049         case ROCKER_DMA_DESC_ADDR_OFFSET:
1050             ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]);
1051             break;
1052         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
1053             ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32);
1054             break;
1055         case ROCKER_DMA_DESC_SIZE_OFFSET:
1056             ret = desc_ring_get_size(r->rings[index]);
1057             break;
1058         case ROCKER_DMA_DESC_HEAD_OFFSET:
1059             ret = desc_ring_get_head(r->rings[index]);
1060             break;
1061         case ROCKER_DMA_DESC_TAIL_OFFSET:
1062             ret = desc_ring_get_tail(r->rings[index]);
1063             break;
1064         case ROCKER_DMA_DESC_CREDITS_OFFSET:
1065             ret = desc_ring_get_credits(r->rings[index]);
1066             break;
1067         default:
1068             DPRINTF("not implemented dma reg read(l) addr=0x" TARGET_FMT_plx
1069                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1070             ret = 0;
1071             break;
1072         }
1073         return ret;
1074     }
1075 
1076     switch (addr) {
1077     case ROCKER_BOGUS_REG0:
1078     case ROCKER_BOGUS_REG1:
1079     case ROCKER_BOGUS_REG2:
1080     case ROCKER_BOGUS_REG3:
1081         ret = 0xDEADBABE;
1082         break;
1083     case ROCKER_TEST_REG:
1084         ret = r->test_reg * 2;
1085         break;
1086     case ROCKER_TEST_REG64:
1087         ret = (uint32_t)(r->test_reg64 * 2);
1088         break;
1089     case ROCKER_TEST_REG64 + 4:
1090         ret = (uint32_t)((r->test_reg64 * 2) >> 32);
1091         break;
1092     case ROCKER_TEST_DMA_SIZE:
1093         ret = r->test_dma_size;
1094         break;
1095     case ROCKER_TEST_DMA_ADDR:
1096         ret = (uint32_t)r->test_dma_addr;
1097         break;
1098     case ROCKER_TEST_DMA_ADDR + 4:
1099         ret = (uint32_t)(r->test_dma_addr >> 32);
1100         break;
1101     case ROCKER_PORT_PHYS_COUNT:
1102         ret = r->fp_ports;
1103         break;
1104     case ROCKER_PORT_PHYS_LINK_STATUS:
1105         ret = (uint32_t)rocker_port_phys_link_status(r);
1106         break;
1107     case ROCKER_PORT_PHYS_LINK_STATUS + 4:
1108         ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32);
1109         break;
1110     case ROCKER_PORT_PHYS_ENABLE:
1111         ret = (uint32_t)rocker_port_phys_enable_read(r);
1112         break;
1113     case ROCKER_PORT_PHYS_ENABLE + 4:
1114         ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32);
1115         break;
1116     case ROCKER_SWITCH_ID:
1117         ret = (uint32_t)r->switch_id;
1118         break;
1119     case ROCKER_SWITCH_ID + 4:
1120         ret = (uint32_t)(r->switch_id >> 32);
1121         break;
1122     default:
1123         DPRINTF("not implemented read(l) addr=0x" TARGET_FMT_plx "\n", addr);
1124         ret = 0;
1125         break;
1126     }
1127     return ret;
1128 }
1129 
1130 static uint64_t rocker_io_readq(void *opaque, hwaddr addr)
1131 {
1132     Rocker *r = opaque;
1133     uint64_t ret;
1134 
1135     if (rocker_addr_is_desc_reg(r, addr)) {
1136         unsigned index = ROCKER_RING_INDEX(addr);
1137         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1138 
1139         switch (addr & ROCKER_DMA_DESC_MASK) {
1140         case ROCKER_DMA_DESC_ADDR_OFFSET:
1141             ret = desc_ring_get_base_addr(r->rings[index]);
1142             break;
1143         default:
1144             DPRINTF("not implemented dma reg read(q) addr=0x" TARGET_FMT_plx
1145                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1146             ret = 0;
1147             break;
1148         }
1149         return ret;
1150     }
1151 
1152     switch (addr) {
1153     case ROCKER_BOGUS_REG0:
1154     case ROCKER_BOGUS_REG2:
1155         ret = 0xDEADBABEDEADBABEULL;
1156         break;
1157     case ROCKER_TEST_REG64:
1158         ret = r->test_reg64 * 2;
1159         break;
1160     case ROCKER_TEST_DMA_ADDR:
1161         ret = r->test_dma_addr;
1162         break;
1163     case ROCKER_PORT_PHYS_LINK_STATUS:
1164         ret = rocker_port_phys_link_status(r);
1165         break;
1166     case ROCKER_PORT_PHYS_ENABLE:
1167         ret = rocker_port_phys_enable_read(r);
1168         break;
1169     case ROCKER_SWITCH_ID:
1170         ret = r->switch_id;
1171         break;
1172     default:
1173         DPRINTF("not implemented read(q) addr=0x" TARGET_FMT_plx "\n", addr);
1174         ret = 0;
1175         break;
1176     }
1177     return ret;
1178 }
1179 
1180 static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size)
1181 {
1182     DPRINTF("Read %s addr " TARGET_FMT_plx ", size %u\n",
1183             rocker_reg_name(opaque, addr), addr, size);
1184 
1185     switch (size) {
1186     case 4:
1187         return rocker_io_readl(opaque, addr);
1188     case 8:
1189         return rocker_io_readq(opaque, addr);
1190     }
1191 
1192     return -1;
1193 }
1194 
1195 static const MemoryRegionOps rocker_mmio_ops = {
1196     .read = rocker_mmio_read,
1197     .write = rocker_mmio_write,
1198     .endianness = DEVICE_LITTLE_ENDIAN,
1199     .valid = {
1200         .min_access_size = 4,
1201         .max_access_size = 8,
1202     },
1203     .impl = {
1204         .min_access_size = 4,
1205         .max_access_size = 8,
1206     },
1207 };
1208 
1209 static void rocker_msix_vectors_unuse(Rocker *r,
1210                                       unsigned int num_vectors)
1211 {
1212     PCIDevice *dev = PCI_DEVICE(r);
1213     int i;
1214 
1215     for (i = 0; i < num_vectors; i++) {
1216         msix_vector_unuse(dev, i);
1217     }
1218 }
1219 
1220 static int rocker_msix_vectors_use(Rocker *r,
1221                                    unsigned int num_vectors)
1222 {
1223     PCIDevice *dev = PCI_DEVICE(r);
1224     int err;
1225     int i;
1226 
1227     for (i = 0; i < num_vectors; i++) {
1228         err = msix_vector_use(dev, i);
1229         if (err) {
1230             goto rollback;
1231         }
1232     }
1233     return 0;
1234 
1235 rollback:
1236     rocker_msix_vectors_unuse(r, i);
1237     return err;
1238 }
1239 
1240 static int rocker_msix_init(Rocker *r, Error **errp)
1241 {
1242     PCIDevice *dev = PCI_DEVICE(r);
1243     int err;
1244 
1245     err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
1246                     &r->msix_bar,
1247                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
1248                     &r->msix_bar,
1249                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
1250                     0, errp);
1251     if (err) {
1252         return err;
1253     }
1254 
1255     err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1256     if (err) {
1257         goto err_msix_vectors_use;
1258     }
1259 
1260     return 0;
1261 
1262 err_msix_vectors_use:
1263     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1264     return err;
1265 }
1266 
1267 static void rocker_msix_uninit(Rocker *r)
1268 {
1269     PCIDevice *dev = PCI_DEVICE(r);
1270 
1271     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1272     rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1273 }
1274 
1275 static World *rocker_world_type_by_name(Rocker *r, const char *name)
1276 {
1277     int i;
1278 
1279     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1280         if (strcmp(name, world_name(r->worlds[i])) == 0) {
1281             return r->worlds[i];
1282         }
1283     }
1284     return NULL;
1285 }
1286 
1287 static void pci_rocker_realize(PCIDevice *dev, Error **errp)
1288 {
1289     Rocker *r = ROCKER(dev);
1290     const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
1291     const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } };
1292     static int sw_index;
1293     int i, err = 0;
1294 
1295     /* allocate worlds */
1296 
1297     r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
1298 
1299     if (!r->world_name) {
1300         r->world_name = g_strdup(world_name(r->worlds[ROCKER_WORLD_TYPE_OF_DPA]));
1301     }
1302 
1303     r->world_dflt = rocker_world_type_by_name(r, r->world_name);
1304     if (!r->world_dflt) {
1305         error_setg(errp,
1306                 "invalid argument requested world %s does not exist",
1307                 r->world_name);
1308         goto err_world_type_by_name;
1309     }
1310 
1311     /* set up memory-mapped region at BAR0 */
1312 
1313     memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r,
1314                           "rocker-mmio", ROCKER_PCI_BAR0_SIZE);
1315     pci_register_bar(dev, ROCKER_PCI_BAR0_IDX,
1316                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio);
1317 
1318     /* set up memory-mapped region for MSI-X */
1319 
1320     memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar",
1321                        ROCKER_PCI_MSIX_BAR_SIZE);
1322     pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX,
1323                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar);
1324 
1325     /* MSI-X init */
1326 
1327     err = rocker_msix_init(r, errp);
1328     if (err) {
1329         goto err_msix_init;
1330     }
1331 
1332     /* validate switch properties */
1333 
1334     if (!r->name) {
1335         r->name = g_strdup(TYPE_ROCKER);
1336     }
1337 
1338     if (rocker_find(r->name)) {
1339         error_setg(errp, "%s already exists", r->name);
1340         goto err_duplicate;
1341     }
1342 
1343     /* Rocker name is passed in port name requests to OS with the intention
1344      * that the name is used in interface names. Limit the length of the
1345      * rocker name to avoid naming problems in the OS. Also, adding the
1346      * port number as p# and unganged breakout b#, where # is at most 2
1347      * digits, so leave room for it too (-1 for string terminator, -3 for
1348      * p# and -3 for b#)
1349      */
1350 #define ROCKER_IFNAMSIZ 16
1351 #define MAX_ROCKER_NAME_LEN  (ROCKER_IFNAMSIZ - 1 - 3 - 3)
1352     if (strlen(r->name) > MAX_ROCKER_NAME_LEN) {
1353         error_setg(errp,
1354                 "name too long; please shorten to at most %d chars",
1355                 MAX_ROCKER_NAME_LEN);
1356         goto err_name_too_long;
1357     }
1358 
1359     if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) {
1360         memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt));
1361         r->fp_start_macaddr.a[4] += (sw_index++);
1362     }
1363 
1364     if (!r->switch_id) {
1365         memcpy(&r->switch_id, &r->fp_start_macaddr,
1366                sizeof(r->fp_start_macaddr));
1367     }
1368 
1369     if (r->fp_ports > ROCKER_FP_PORTS_MAX) {
1370         r->fp_ports = ROCKER_FP_PORTS_MAX;
1371     }
1372 
1373     r->rings = g_new(DescRing *, rocker_pci_ring_count(r));
1374 
1375     /* Rings are ordered like this:
1376      * - command ring
1377      * - event ring
1378      * - port0 tx ring
1379      * - port0 rx ring
1380      * - port1 tx ring
1381      * - port1 rx ring
1382      * .....
1383      */
1384 
1385     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1386         DescRing *ring = desc_ring_alloc(r, i);
1387 
1388         if (i == ROCKER_RING_CMD) {
1389             desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD);
1390         } else if (i == ROCKER_RING_EVENT) {
1391             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT);
1392         } else if (i % 2 == 0) {
1393             desc_ring_set_consume(ring, tx_consume,
1394                                   ROCKER_MSIX_VEC_TX((i - 2) / 2));
1395         } else if (i % 2 == 1) {
1396             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2));
1397         }
1398 
1399         r->rings[i] = ring;
1400     }
1401 
1402     for (i = 0; i < r->fp_ports; i++) {
1403         FpPort *port =
1404             fp_port_alloc(r, r->name, &r->fp_start_macaddr,
1405                           i, &r->fp_ports_peers[i]);
1406 
1407         r->fp_port[i] = port;
1408         fp_port_set_world(port, r->world_dflt);
1409     }
1410 
1411     QLIST_INSERT_HEAD(&rockers, r, next);
1412 
1413     return;
1414 
1415 err_name_too_long:
1416 err_duplicate:
1417     rocker_msix_uninit(r);
1418 err_msix_init:
1419     object_unparent(OBJECT(&r->msix_bar));
1420     object_unparent(OBJECT(&r->mmio));
1421 err_world_type_by_name:
1422     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1423         if (r->worlds[i]) {
1424             world_free(r->worlds[i]);
1425         }
1426     }
1427 }
1428 
1429 static void pci_rocker_uninit(PCIDevice *dev)
1430 {
1431     Rocker *r = ROCKER(dev);
1432     int i;
1433 
1434     QLIST_REMOVE(r, next);
1435 
1436     for (i = 0; i < r->fp_ports; i++) {
1437         FpPort *port = r->fp_port[i];
1438 
1439         fp_port_free(port);
1440         r->fp_port[i] = NULL;
1441     }
1442 
1443     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1444         if (r->rings[i]) {
1445             desc_ring_free(r->rings[i]);
1446         }
1447     }
1448     g_free(r->rings);
1449 
1450     rocker_msix_uninit(r);
1451     object_unparent(OBJECT(&r->msix_bar));
1452     object_unparent(OBJECT(&r->mmio));
1453 
1454     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1455         if (r->worlds[i]) {
1456             world_free(r->worlds[i]);
1457         }
1458     }
1459     g_free(r->fp_ports_peers);
1460 }
1461 
1462 static void rocker_reset(DeviceState *dev)
1463 {
1464     Rocker *r = ROCKER(dev);
1465     int i;
1466 
1467     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1468         if (r->worlds[i]) {
1469             world_reset(r->worlds[i]);
1470         }
1471     }
1472     for (i = 0; i < r->fp_ports; i++) {
1473         fp_port_reset(r->fp_port[i]);
1474         fp_port_set_world(r->fp_port[i], r->world_dflt);
1475     }
1476 
1477     r->test_reg = 0;
1478     r->test_reg64 = 0;
1479     r->test_dma_addr = 0;
1480     r->test_dma_size = 0;
1481 
1482     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1483         desc_ring_reset(r->rings[i]);
1484     }
1485 
1486     DPRINTF("Reset done\n");
1487 }
1488 
1489 static Property rocker_properties[] = {
1490     DEFINE_PROP_STRING("name", Rocker, name),
1491     DEFINE_PROP_STRING("world", Rocker, world_name),
1492     DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
1493                         fp_start_macaddr),
1494     DEFINE_PROP_UINT64("switch_id", Rocker,
1495                        switch_id, 0),
1496     DEFINE_PROP_ARRAY("ports", Rocker, fp_ports,
1497                       fp_ports_peers, qdev_prop_netdev, NICPeers),
1498     DEFINE_PROP_END_OF_LIST(),
1499 };
1500 
1501 static const VMStateDescription rocker_vmsd = {
1502     .name = TYPE_ROCKER,
1503     .unmigratable = 1,
1504 };
1505 
1506 static void rocker_class_init(ObjectClass *klass, void *data)
1507 {
1508     DeviceClass *dc = DEVICE_CLASS(klass);
1509     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1510 
1511     k->realize = pci_rocker_realize;
1512     k->exit = pci_rocker_uninit;
1513     k->vendor_id = PCI_VENDOR_ID_REDHAT;
1514     k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER;
1515     k->revision = ROCKER_PCI_REVISION;
1516     k->class_id = PCI_CLASS_NETWORK_OTHER;
1517     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1518     dc->desc = "Rocker Switch";
1519     dc->reset = rocker_reset;
1520     dc->props = rocker_properties;
1521     dc->vmsd = &rocker_vmsd;
1522 }
1523 
1524 static const TypeInfo rocker_info = {
1525     .name          = TYPE_ROCKER,
1526     .parent        = TYPE_PCI_DEVICE,
1527     .instance_size = sizeof(Rocker),
1528     .class_init    = rocker_class_init,
1529     .interfaces = (InterfaceInfo[]) {
1530         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1531         { },
1532     },
1533 };
1534 
1535 static void rocker_register_types(void)
1536 {
1537     type_register_static(&rocker_info);
1538 }
1539 
1540 type_init(rocker_register_types)
1541