xref: /openbmc/qemu/hw/net/rocker/rocker.c (revision 8d3031fa)
1 /*
2  * QEMU rocker switch emulation - PCI device
3  *
4  * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
5  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "hw/pci/pci_device.h"
20 #include "hw/qdev-properties.h"
21 #include "hw/qdev-properties-system.h"
22 #include "migration/vmstate.h"
23 #include "hw/pci/msix.h"
24 #include "net/net.h"
25 #include "net/eth.h"
26 #include "qapi/error.h"
27 #include "qapi/qapi-commands-rocker.h"
28 #include "qemu/iov.h"
29 #include "qemu/module.h"
30 #include "qemu/bitops.h"
31 #include "qemu/log.h"
32 
33 #include "rocker.h"
34 #include "rocker_hw.h"
35 #include "rocker_fp.h"
36 #include "rocker_desc.h"
37 #include "rocker_tlv.h"
38 #include "rocker_world.h"
39 #include "rocker_of_dpa.h"
40 
41 struct rocker {
42     /* private */
43     PCIDevice parent_obj;
44     /* public */
45 
46     MemoryRegion mmio;
47     MemoryRegion msix_bar;
48 
49     /* switch configuration */
50     char *name;                  /* switch name */
51     char *world_name;            /* world name */
52     uint32_t fp_ports;           /* front-panel port count */
53     NICPeers *fp_ports_peers;
54     MACAddr fp_start_macaddr;    /* front-panel port 0 mac addr */
55     uint64_t switch_id;          /* switch id */
56 
57     /* front-panel ports */
58     FpPort *fp_port[ROCKER_FP_PORTS_MAX];
59 
60     /* register backings */
61     uint32_t test_reg;
62     uint64_t test_reg64;
63     dma_addr_t test_dma_addr;
64     uint32_t test_dma_size;
65     uint64_t lower32;            /* lower 32-bit val in 2-part 64-bit access */
66 
67     /* desc rings */
68     DescRing **rings;
69 
70     /* switch worlds */
71     World *worlds[ROCKER_WORLD_TYPE_MAX];
72     World *world_dflt;
73 
74     QLIST_ENTRY(rocker) next;
75 };
76 
77 static QLIST_HEAD(, rocker) rockers;
78 
79 Rocker *rocker_find(const char *name)
80 {
81     Rocker *r;
82 
83     QLIST_FOREACH(r, &rockers, next)
84         if (strcmp(r->name, name) == 0) {
85             return r;
86         }
87 
88     return NULL;
89 }
90 
91 World *rocker_get_world(Rocker *r, enum rocker_world_type type)
92 {
93     if (type < ROCKER_WORLD_TYPE_MAX) {
94         return r->worlds[type];
95     }
96     return NULL;
97 }
98 
99 RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
100 {
101     RockerSwitch *rocker;
102     Rocker *r;
103 
104     r = rocker_find(name);
105     if (!r) {
106         error_setg(errp, "rocker %s not found", name);
107         return NULL;
108     }
109 
110     rocker = g_new0(RockerSwitch, 1);
111     rocker->name = g_strdup(r->name);
112     rocker->id = r->switch_id;
113     rocker->ports = r->fp_ports;
114 
115     return rocker;
116 }
117 
118 RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp)
119 {
120     RockerPortList *list = NULL;
121     Rocker *r;
122     int i;
123 
124     r = rocker_find(name);
125     if (!r) {
126         error_setg(errp, "rocker %s not found", name);
127         return NULL;
128     }
129 
130     for (i = r->fp_ports - 1; i >= 0; i--) {
131         QAPI_LIST_PREPEND(list, fp_port_get_info(r->fp_port[i]));
132     }
133 
134     return list;
135 }
136 
137 static uint32_t rocker_get_pport_by_tx_ring(Rocker *r,
138                                             DescRing *ring)
139 {
140     return (desc_ring_index(ring) - 2) / 2 + 1;
141 }
142 
143 static int tx_consume(Rocker *r, DescInfo *info)
144 {
145     PCIDevice *dev = PCI_DEVICE(r);
146     char *buf = desc_get_buf(info, true);
147     RockerTlv *tlv_frag;
148     RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1];
149     struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, };
150     uint32_t pport;
151     uint32_t port;
152     uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE;
153     uint16_t tx_l3_csum_off = 0;
154     uint16_t tx_tso_mss = 0;
155     uint16_t tx_tso_hdr_len = 0;
156     int iovcnt = 0;
157     int err = ROCKER_OK;
158     int rem;
159     int i;
160 
161     if (!buf) {
162         return -ROCKER_ENXIO;
163     }
164 
165     rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info));
166 
167     if (!tlvs[ROCKER_TLV_TX_FRAGS]) {
168         return -ROCKER_EINVAL;
169     }
170 
171     pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info));
172     if (!fp_port_from_pport(pport, &port)) {
173         return -ROCKER_EINVAL;
174     }
175 
176     if (tlvs[ROCKER_TLV_TX_OFFLOAD]) {
177         tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]);
178     }
179 
180     switch (tx_offload) {
181     case ROCKER_TX_OFFLOAD_L3_CSUM:
182         if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
183             return -ROCKER_EINVAL;
184         }
185         break;
186     case ROCKER_TX_OFFLOAD_TSO:
187         if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
188             !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
189             return -ROCKER_EINVAL;
190         }
191         break;
192     }
193 
194     if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
195         tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]);
196         qemu_log_mask(LOG_UNIMP, "rocker %s: L3 not implemented"
197                                  " (cksum off: %u)\n",
198                       __func__, tx_l3_csum_off);
199     }
200 
201     if (tlvs[ROCKER_TLV_TX_TSO_MSS]) {
202         tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]);
203         qemu_log_mask(LOG_UNIMP, "rocker %s: TSO not implemented (MSS: %u)\n",
204                       __func__, tx_tso_mss);
205     }
206 
207     if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
208         tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]);
209         qemu_log_mask(LOG_UNIMP, "rocker %s: TSO not implemented"
210                                  " (hdr length: %u)\n",
211                       __func__, tx_tso_hdr_len);
212     }
213 
214     rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) {
215         hwaddr frag_addr;
216         uint16_t frag_len;
217 
218         if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) {
219             err = -ROCKER_EINVAL;
220             goto err_bad_attr;
221         }
222 
223         rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag);
224 
225         if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
226             !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) {
227             err = -ROCKER_EINVAL;
228             goto err_bad_attr;
229         }
230 
231         frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
232         frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
233 
234         if (iovcnt >= ROCKER_TX_FRAGS_MAX) {
235             goto err_too_many_frags;
236         }
237         iov[iovcnt].iov_len = frag_len;
238         iov[iovcnt].iov_base = g_malloc(frag_len);
239 
240         pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base,
241                      iov[iovcnt].iov_len);
242 
243         iovcnt++;
244     }
245 
246     err = fp_port_eg(r->fp_port[port], iov, iovcnt);
247 
248 err_too_many_frags:
249 err_bad_attr:
250     for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) {
251         g_free(iov[i].iov_base);
252     }
253 
254     return err;
255 }
256 
257 static int cmd_get_port_settings(Rocker *r,
258                                  DescInfo *info, char *buf,
259                                  RockerTlv *cmd_info_tlv)
260 {
261     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
262     RockerTlv *nest;
263     FpPort *fp_port;
264     uint32_t pport;
265     uint32_t port;
266     uint32_t speed;
267     uint8_t duplex;
268     uint8_t autoneg;
269     uint8_t learning;
270     char *phys_name;
271     MACAddr macaddr;
272     enum rocker_world_type mode;
273     size_t tlv_size;
274     int pos;
275     int err;
276 
277     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
278                             cmd_info_tlv);
279 
280     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
281         return -ROCKER_EINVAL;
282     }
283 
284     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
285     if (!fp_port_from_pport(pport, &port)) {
286         return -ROCKER_EINVAL;
287     }
288     fp_port = r->fp_port[port];
289 
290     err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg);
291     if (err) {
292         return err;
293     }
294 
295     fp_port_get_macaddr(fp_port, &macaddr);
296     mode = world_type(fp_port_get_world(fp_port));
297     learning = fp_port_get_learning(fp_port);
298     phys_name = fp_port_get_name(fp_port);
299 
300     tlv_size = rocker_tlv_total_size(0) +                 /* nest */
301                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
302                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   speed */
303                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   duplex */
304                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   autoneg */
305                rocker_tlv_total_size(sizeof(macaddr.a)) + /*   macaddr */
306                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   mode */
307                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   learning */
308                rocker_tlv_total_size(strlen(phys_name));
309 
310     if (tlv_size > desc_buf_size(info)) {
311         return -ROCKER_EMSGSIZE;
312     }
313 
314     pos = 0;
315     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO);
316     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport);
317     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed);
318     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex);
319     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg);
320     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
321                    sizeof(macaddr.a), macaddr.a);
322     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode);
323     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,
324                       learning);
325     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME,
326                    strlen(phys_name), phys_name);
327     rocker_tlv_nest_end(buf, &pos, nest);
328 
329     return desc_set_buf(info, tlv_size);
330 }
331 
332 static int cmd_set_port_settings(Rocker *r,
333                                  RockerTlv *cmd_info_tlv)
334 {
335     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
336     FpPort *fp_port;
337     uint32_t pport;
338     uint32_t port;
339     uint32_t speed;
340     uint8_t duplex;
341     uint8_t autoneg;
342     uint8_t learning;
343     MACAddr macaddr;
344     enum rocker_world_type mode;
345     int err;
346 
347     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
348                             cmd_info_tlv);
349 
350     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
351         return -ROCKER_EINVAL;
352     }
353 
354     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
355     if (!fp_port_from_pport(pport, &port)) {
356         return -ROCKER_EINVAL;
357     }
358     fp_port = r->fp_port[port];
359 
360     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] &&
361         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] &&
362         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) {
363 
364         speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
365         duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
366         autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
367 
368         err = fp_port_set_settings(fp_port, speed, duplex, autoneg);
369         if (err) {
370             return err;
371         }
372     }
373 
374     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) {
375         if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) !=
376             sizeof(macaddr.a)) {
377             return -ROCKER_EINVAL;
378         }
379         memcpy(macaddr.a,
380                rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]),
381                sizeof(macaddr.a));
382         fp_port_set_macaddr(fp_port, &macaddr);
383     }
384 
385     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
386         mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
387         if (mode >= ROCKER_WORLD_TYPE_MAX) {
388             return -ROCKER_EINVAL;
389         }
390         /* We don't support world change. */
391         if (!fp_port_check_world(fp_port, r->worlds[mode])) {
392             return -ROCKER_EINVAL;
393         }
394     }
395 
396     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
397         learning =
398             rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]);
399         fp_port_set_learning(fp_port, learning);
400     }
401 
402     return ROCKER_OK;
403 }
404 
405 static int cmd_consume(Rocker *r, DescInfo *info)
406 {
407     char *buf = desc_get_buf(info, false);
408     RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1];
409     RockerTlv *info_tlv;
410     World *world;
411     uint16_t cmd;
412     int err;
413 
414     if (!buf) {
415         return -ROCKER_ENXIO;
416     }
417 
418     rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info));
419 
420     if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) {
421         return -ROCKER_EINVAL;
422     }
423 
424     cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]);
425     info_tlv = tlvs[ROCKER_TLV_CMD_INFO];
426 
427     /* This might be reworked to something like this:
428      * Every world will have an array of command handlers from
429      * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is
430      * up to each world to implement whatever command it want.
431      * It can reference "generic" commands as cmd_set_port_settings or
432      * cmd_get_port_settings
433      */
434 
435     switch (cmd) {
436     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
437     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
438     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
439     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
440     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
441     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
442     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
443     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
444         world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
445         err = world_do_cmd(world, info, buf, cmd, info_tlv);
446         break;
447     case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS:
448         err = cmd_get_port_settings(r, info, buf, info_tlv);
449         break;
450     case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS:
451         err = cmd_set_port_settings(r, info_tlv);
452         break;
453     default:
454         err = -ROCKER_EINVAL;
455         break;
456     }
457 
458     return err;
459 }
460 
461 static void rocker_msix_irq(Rocker *r, unsigned vector)
462 {
463     PCIDevice *dev = PCI_DEVICE(r);
464 
465     DPRINTF("MSI-X notify request for vector %d\n", vector);
466     if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) {
467         DPRINTF("incorrect vector %d\n", vector);
468         return;
469     }
470     msix_notify(dev, vector);
471 }
472 
473 int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up)
474 {
475     DescRing *ring = r->rings[ROCKER_RING_EVENT];
476     DescInfo *info = desc_ring_fetch_desc(ring);
477     RockerTlv *nest;
478     char *buf;
479     size_t tlv_size;
480     int pos;
481     int err;
482 
483     if (!info) {
484         return -ROCKER_ENOBUFS;
485     }
486 
487     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
488                rocker_tlv_total_size(0) +                 /* nest */
489                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
490                rocker_tlv_total_size(sizeof(uint8_t));    /*   link up */
491 
492     if (tlv_size > desc_buf_size(info)) {
493         err = -ROCKER_EMSGSIZE;
494         goto err_too_big;
495     }
496 
497     buf = desc_get_buf(info, false);
498     if (!buf) {
499         err = -ROCKER_ENOMEM;
500         goto err_no_mem;
501     }
502 
503     pos = 0;
504     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
505                         ROCKER_TLV_EVENT_TYPE_LINK_CHANGED);
506     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
507     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport);
508     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP,
509                       link_up ? 1 : 0);
510     rocker_tlv_nest_end(buf, &pos, nest);
511 
512     err = desc_set_buf(info, tlv_size);
513 
514 err_too_big:
515 err_no_mem:
516     if (desc_ring_post_desc(ring, err)) {
517         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
518     }
519 
520     return err;
521 }
522 
523 int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
524                                uint16_t vlan_id)
525 {
526     DescRing *ring = r->rings[ROCKER_RING_EVENT];
527     DescInfo *info;
528     FpPort *fp_port;
529     uint32_t port;
530     RockerTlv *nest;
531     char *buf;
532     size_t tlv_size;
533     int pos;
534     int err;
535 
536     if (!fp_port_from_pport(pport, &port)) {
537         return -ROCKER_EINVAL;
538     }
539     fp_port = r->fp_port[port];
540     if (!fp_port_get_learning(fp_port)) {
541         return ROCKER_OK;
542     }
543 
544     info = desc_ring_fetch_desc(ring);
545     if (!info) {
546         return -ROCKER_ENOBUFS;
547     }
548 
549     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
550                rocker_tlv_total_size(0) +                 /* nest */
551                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
552                rocker_tlv_total_size(ETH_ALEN) +          /*   mac addr */
553                rocker_tlv_total_size(sizeof(uint16_t));   /*   vlan_id */
554 
555     if (tlv_size > desc_buf_size(info)) {
556         err = -ROCKER_EMSGSIZE;
557         goto err_too_big;
558     }
559 
560     buf = desc_get_buf(info, false);
561     if (!buf) {
562         err = -ROCKER_ENOMEM;
563         goto err_no_mem;
564     }
565 
566     pos = 0;
567     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
568                         ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN);
569     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
570     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport);
571     rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr);
572     rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id);
573     rocker_tlv_nest_end(buf, &pos, nest);
574 
575     err = desc_set_buf(info, tlv_size);
576 
577 err_too_big:
578 err_no_mem:
579     if (desc_ring_post_desc(ring, err)) {
580         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
581     }
582 
583     return err;
584 }
585 
586 static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
587                                                      uint32_t pport)
588 {
589     return r->rings[(pport - 1) * 2 + 3];
590 }
591 
592 int rx_produce(World *world, uint32_t pport,
593                const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu)
594 {
595     Rocker *r = world_rocker(world);
596     PCIDevice *dev = (PCIDevice *)r;
597     DescRing *ring = rocker_get_rx_ring_by_pport(r, pport);
598     DescInfo *info = desc_ring_fetch_desc(ring);
599     char *data;
600     size_t data_size = iov_size(iov, iovcnt);
601     char *buf;
602     uint16_t rx_flags = 0;
603     uint16_t rx_csum = 0;
604     size_t tlv_size;
605     RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1];
606     hwaddr frag_addr;
607     uint16_t frag_max_len;
608     int pos;
609     int err;
610 
611     if (!info) {
612         return -ROCKER_ENOBUFS;
613     }
614 
615     buf = desc_get_buf(info, false);
616     if (!buf) {
617         err = -ROCKER_ENXIO;
618         goto out;
619     }
620     rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info));
621 
622     if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] ||
623         !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) {
624         err = -ROCKER_EINVAL;
625         goto out;
626     }
627 
628     frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]);
629     frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
630 
631     if (data_size > frag_max_len) {
632         err = -ROCKER_EMSGSIZE;
633         goto out;
634     }
635 
636     if (copy_to_cpu) {
637         rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD;
638     }
639 
640     /* XXX calc rx flags/csum */
641 
642     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
643                rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */
644                rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */
645                rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */
646                rocker_tlv_total_size(sizeof(uint16_t));  /* frag len */
647 
648     if (tlv_size > desc_buf_size(info)) {
649         err = -ROCKER_EMSGSIZE;
650         goto out;
651     }
652 
653     /* TODO:
654      * iov dma write can be optimized in similar way e1000 does it in
655      * e1000_receive_iov. But maybe if would make sense to introduce
656      * generic helper iov_dma_write.
657      */
658 
659     data = g_malloc(data_size);
660 
661     iov_to_buf(iov, iovcnt, 0, data, data_size);
662     pci_dma_write(dev, frag_addr, data, data_size);
663     g_free(data);
664 
665     pos = 0;
666     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags);
667     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum);
668     rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr);
669     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len);
670     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size);
671 
672     err = desc_set_buf(info, tlv_size);
673 
674 out:
675     if (desc_ring_post_desc(ring, err)) {
676         rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1));
677     }
678 
679     return err;
680 }
681 
682 int rocker_port_eg(Rocker *r, uint32_t pport,
683                    const struct iovec *iov, int iovcnt)
684 {
685     FpPort *fp_port;
686     uint32_t port;
687 
688     if (!fp_port_from_pport(pport, &port)) {
689         return -ROCKER_EINVAL;
690     }
691 
692     fp_port = r->fp_port[port];
693 
694     return fp_port_eg(fp_port, iov, iovcnt);
695 }
696 
697 static void rocker_test_dma_ctrl(Rocker *r, uint32_t val)
698 {
699     PCIDevice *dev = PCI_DEVICE(r);
700     char *buf;
701     int i;
702 
703     buf = g_malloc(r->test_dma_size);
704 
705     switch (val) {
706     case ROCKER_TEST_DMA_CTRL_CLEAR:
707         memset(buf, 0, r->test_dma_size);
708         break;
709     case ROCKER_TEST_DMA_CTRL_FILL:
710         memset(buf, 0x96, r->test_dma_size);
711         break;
712     case ROCKER_TEST_DMA_CTRL_INVERT:
713         pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size);
714         for (i = 0; i < r->test_dma_size; i++) {
715             buf[i] = ~buf[i];
716         }
717         break;
718     default:
719         DPRINTF("not test dma control val=0x%08x\n", val);
720         goto err_out;
721     }
722     pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size);
723 
724     rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST);
725 
726 err_out:
727     g_free(buf);
728 }
729 
730 static void rocker_reset(DeviceState *dev);
731 
732 static void rocker_control(Rocker *r, uint32_t val)
733 {
734     if (val & ROCKER_CONTROL_RESET) {
735         rocker_reset(DEVICE(r));
736     }
737 }
738 
739 static int rocker_pci_ring_count(Rocker *r)
740 {
741     /* There are:
742      * - command ring
743      * - event ring
744      * - tx and rx ring per each port
745      */
746     return 2 + (2 * r->fp_ports);
747 }
748 
749 static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr)
750 {
751     hwaddr start = ROCKER_DMA_DESC_BASE;
752     hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r));
753 
754     return addr >= start && addr < end;
755 }
756 
757 static void rocker_port_phys_enable_write(Rocker *r, uint64_t new)
758 {
759     int i;
760     bool old_enabled;
761     bool new_enabled;
762     FpPort *fp_port;
763 
764     for (i = 0; i < r->fp_ports; i++) {
765         fp_port = r->fp_port[i];
766         old_enabled = fp_port_enabled(fp_port);
767         new_enabled = (new >> (i + 1)) & 0x1;
768         if (new_enabled == old_enabled) {
769             continue;
770         }
771         if (new_enabled) {
772             fp_port_enable(r->fp_port[i]);
773         } else {
774             fp_port_disable(r->fp_port[i]);
775         }
776     }
777 }
778 
779 static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
780 {
781     Rocker *r = opaque;
782 
783     if (rocker_addr_is_desc_reg(r, addr)) {
784         unsigned index = ROCKER_RING_INDEX(addr);
785         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
786 
787         switch (offset) {
788         case ROCKER_DMA_DESC_ADDR_OFFSET:
789             r->lower32 = (uint64_t)val;
790             break;
791         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
792             desc_ring_set_base_addr(r->rings[index],
793                                     ((uint64_t)val) << 32 | r->lower32);
794             r->lower32 = 0;
795             break;
796         case ROCKER_DMA_DESC_SIZE_OFFSET:
797             desc_ring_set_size(r->rings[index], val);
798             break;
799         case ROCKER_DMA_DESC_HEAD_OFFSET:
800             if (desc_ring_set_head(r->rings[index], val)) {
801                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
802             }
803             break;
804         case ROCKER_DMA_DESC_CTRL_OFFSET:
805             desc_ring_set_ctrl(r->rings[index], val);
806             break;
807         case ROCKER_DMA_DESC_CREDITS_OFFSET:
808             if (desc_ring_ret_credits(r->rings[index], val)) {
809                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
810             }
811             break;
812         default:
813             DPRINTF("not implemented dma reg write(l) addr=0x" HWADDR_FMT_plx
814                     " val=0x%08x (ring %d, addr=0x%02x)\n",
815                     addr, val, index, offset);
816             break;
817         }
818         return;
819     }
820 
821     switch (addr) {
822     case ROCKER_TEST_REG:
823         r->test_reg = val;
824         break;
825     case ROCKER_TEST_REG64:
826     case ROCKER_TEST_DMA_ADDR:
827     case ROCKER_PORT_PHYS_ENABLE:
828         r->lower32 = (uint64_t)val;
829         break;
830     case ROCKER_TEST_REG64 + 4:
831         r->test_reg64 = ((uint64_t)val) << 32 | r->lower32;
832         r->lower32 = 0;
833         break;
834     case ROCKER_TEST_IRQ:
835         rocker_msix_irq(r, val);
836         break;
837     case ROCKER_TEST_DMA_SIZE:
838         r->test_dma_size = val & 0xFFFF;
839         break;
840     case ROCKER_TEST_DMA_ADDR + 4:
841         r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
842         r->lower32 = 0;
843         break;
844     case ROCKER_TEST_DMA_CTRL:
845         rocker_test_dma_ctrl(r, val);
846         break;
847     case ROCKER_CONTROL:
848         rocker_control(r, val);
849         break;
850     case ROCKER_PORT_PHYS_ENABLE + 4:
851         rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32);
852         r->lower32 = 0;
853         break;
854     default:
855         DPRINTF("not implemented write(l) addr=0x" HWADDR_FMT_plx
856                 " val=0x%08x\n", addr, val);
857         break;
858     }
859 }
860 
861 static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val)
862 {
863     Rocker *r = opaque;
864 
865     if (rocker_addr_is_desc_reg(r, addr)) {
866         unsigned index = ROCKER_RING_INDEX(addr);
867         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
868 
869         switch (offset) {
870         case ROCKER_DMA_DESC_ADDR_OFFSET:
871             desc_ring_set_base_addr(r->rings[index], val);
872             break;
873         default:
874             DPRINTF("not implemented dma reg write(q) addr=0x" HWADDR_FMT_plx
875                     " val=0x" HWADDR_FMT_plx " (ring %d, offset=0x%02x)\n",
876                     addr, val, index, offset);
877             break;
878         }
879         return;
880     }
881 
882     switch (addr) {
883     case ROCKER_TEST_REG64:
884         r->test_reg64 = val;
885         break;
886     case ROCKER_TEST_DMA_ADDR:
887         r->test_dma_addr = val;
888         break;
889     case ROCKER_PORT_PHYS_ENABLE:
890         rocker_port_phys_enable_write(r, val);
891         break;
892     default:
893         DPRINTF("not implemented write(q) addr=0x" HWADDR_FMT_plx
894                 " val=0x" HWADDR_FMT_plx "\n", addr, val);
895         break;
896     }
897 }
898 
899 #ifdef DEBUG_ROCKER
900 #define regname(reg) case (reg): return #reg
901 static const char *rocker_reg_name(void *opaque, hwaddr addr)
902 {
903     Rocker *r = opaque;
904 
905     if (rocker_addr_is_desc_reg(r, addr)) {
906         unsigned index = ROCKER_RING_INDEX(addr);
907         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
908         static char buf[100];
909         char ring_name[10];
910 
911         switch (index) {
912         case 0:
913             sprintf(ring_name, "cmd");
914             break;
915         case 1:
916             sprintf(ring_name, "event");
917             break;
918         default:
919             sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx",
920                     (index - 2) / 2);
921         }
922 
923         switch (offset) {
924         case ROCKER_DMA_DESC_ADDR_OFFSET:
925             sprintf(buf, "Ring[%s] ADDR", ring_name);
926             return buf;
927         case ROCKER_DMA_DESC_ADDR_OFFSET+4:
928             sprintf(buf, "Ring[%s] ADDR+4", ring_name);
929             return buf;
930         case ROCKER_DMA_DESC_SIZE_OFFSET:
931             sprintf(buf, "Ring[%s] SIZE", ring_name);
932             return buf;
933         case ROCKER_DMA_DESC_HEAD_OFFSET:
934             sprintf(buf, "Ring[%s] HEAD", ring_name);
935             return buf;
936         case ROCKER_DMA_DESC_TAIL_OFFSET:
937             sprintf(buf, "Ring[%s] TAIL", ring_name);
938             return buf;
939         case ROCKER_DMA_DESC_CTRL_OFFSET:
940             sprintf(buf, "Ring[%s] CTRL", ring_name);
941             return buf;
942         case ROCKER_DMA_DESC_CREDITS_OFFSET:
943             sprintf(buf, "Ring[%s] CREDITS", ring_name);
944             return buf;
945         default:
946             sprintf(buf, "Ring[%s] ???", ring_name);
947             return buf;
948         }
949     } else {
950         switch (addr) {
951             regname(ROCKER_BOGUS_REG0);
952             regname(ROCKER_BOGUS_REG1);
953             regname(ROCKER_BOGUS_REG2);
954             regname(ROCKER_BOGUS_REG3);
955             regname(ROCKER_TEST_REG);
956             regname(ROCKER_TEST_REG64);
957             regname(ROCKER_TEST_REG64+4);
958             regname(ROCKER_TEST_IRQ);
959             regname(ROCKER_TEST_DMA_ADDR);
960             regname(ROCKER_TEST_DMA_ADDR+4);
961             regname(ROCKER_TEST_DMA_SIZE);
962             regname(ROCKER_TEST_DMA_CTRL);
963             regname(ROCKER_CONTROL);
964             regname(ROCKER_PORT_PHYS_COUNT);
965             regname(ROCKER_PORT_PHYS_LINK_STATUS);
966             regname(ROCKER_PORT_PHYS_LINK_STATUS+4);
967             regname(ROCKER_PORT_PHYS_ENABLE);
968             regname(ROCKER_PORT_PHYS_ENABLE+4);
969             regname(ROCKER_SWITCH_ID);
970             regname(ROCKER_SWITCH_ID+4);
971         }
972     }
973     return "???";
974 }
975 #else
976 static const char *rocker_reg_name(void *opaque, hwaddr addr)
977 {
978     return NULL;
979 }
980 #endif
981 
982 static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val,
983                               unsigned size)
984 {
985     DPRINTF("Write %s addr " HWADDR_FMT_plx
986             ", size %u, val " HWADDR_FMT_plx "\n",
987             rocker_reg_name(opaque, addr), addr, size, val);
988 
989     switch (size) {
990     case 4:
991         rocker_io_writel(opaque, addr, val);
992         break;
993     case 8:
994         rocker_io_writeq(opaque, addr, val);
995         break;
996     }
997 }
998 
999 static uint64_t rocker_port_phys_link_status(Rocker *r)
1000 {
1001     int i;
1002     uint64_t status = 0;
1003 
1004     for (i = 0; i < r->fp_ports; i++) {
1005         FpPort *port = r->fp_port[i];
1006 
1007         if (fp_port_get_link_up(port)) {
1008             status |= 1ULL << (i + 1);
1009         }
1010     }
1011     return status;
1012 }
1013 
1014 static uint64_t rocker_port_phys_enable_read(Rocker *r)
1015 {
1016     int i;
1017     uint64_t ret = 0;
1018 
1019     for (i = 0; i < r->fp_ports; i++) {
1020         FpPort *port = r->fp_port[i];
1021 
1022         if (fp_port_enabled(port)) {
1023             ret |= 1ULL << (i + 1);
1024         }
1025     }
1026     return ret;
1027 }
1028 
1029 static uint32_t rocker_io_readl(void *opaque, hwaddr addr)
1030 {
1031     Rocker *r = opaque;
1032     uint32_t ret;
1033 
1034     if (rocker_addr_is_desc_reg(r, addr)) {
1035         unsigned index = ROCKER_RING_INDEX(addr);
1036         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1037 
1038         switch (offset) {
1039         case ROCKER_DMA_DESC_ADDR_OFFSET:
1040             ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]);
1041             break;
1042         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
1043             ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32);
1044             break;
1045         case ROCKER_DMA_DESC_SIZE_OFFSET:
1046             ret = desc_ring_get_size(r->rings[index]);
1047             break;
1048         case ROCKER_DMA_DESC_HEAD_OFFSET:
1049             ret = desc_ring_get_head(r->rings[index]);
1050             break;
1051         case ROCKER_DMA_DESC_TAIL_OFFSET:
1052             ret = desc_ring_get_tail(r->rings[index]);
1053             break;
1054         case ROCKER_DMA_DESC_CREDITS_OFFSET:
1055             ret = desc_ring_get_credits(r->rings[index]);
1056             break;
1057         default:
1058             DPRINTF("not implemented dma reg read(l) addr=0x" HWADDR_FMT_plx
1059                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1060             ret = 0;
1061             break;
1062         }
1063         return ret;
1064     }
1065 
1066     switch (addr) {
1067     case ROCKER_BOGUS_REG0:
1068     case ROCKER_BOGUS_REG1:
1069     case ROCKER_BOGUS_REG2:
1070     case ROCKER_BOGUS_REG3:
1071         ret = 0xDEADBABE;
1072         break;
1073     case ROCKER_TEST_REG:
1074         ret = r->test_reg * 2;
1075         break;
1076     case ROCKER_TEST_REG64:
1077         ret = (uint32_t)(r->test_reg64 * 2);
1078         break;
1079     case ROCKER_TEST_REG64 + 4:
1080         ret = (uint32_t)((r->test_reg64 * 2) >> 32);
1081         break;
1082     case ROCKER_TEST_DMA_SIZE:
1083         ret = r->test_dma_size;
1084         break;
1085     case ROCKER_TEST_DMA_ADDR:
1086         ret = (uint32_t)r->test_dma_addr;
1087         break;
1088     case ROCKER_TEST_DMA_ADDR + 4:
1089         ret = (uint32_t)(r->test_dma_addr >> 32);
1090         break;
1091     case ROCKER_PORT_PHYS_COUNT:
1092         ret = r->fp_ports;
1093         break;
1094     case ROCKER_PORT_PHYS_LINK_STATUS:
1095         ret = (uint32_t)rocker_port_phys_link_status(r);
1096         break;
1097     case ROCKER_PORT_PHYS_LINK_STATUS + 4:
1098         ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32);
1099         break;
1100     case ROCKER_PORT_PHYS_ENABLE:
1101         ret = (uint32_t)rocker_port_phys_enable_read(r);
1102         break;
1103     case ROCKER_PORT_PHYS_ENABLE + 4:
1104         ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32);
1105         break;
1106     case ROCKER_SWITCH_ID:
1107         ret = (uint32_t)r->switch_id;
1108         break;
1109     case ROCKER_SWITCH_ID + 4:
1110         ret = (uint32_t)(r->switch_id >> 32);
1111         break;
1112     default:
1113         DPRINTF("not implemented read(l) addr=0x" HWADDR_FMT_plx "\n", addr);
1114         ret = 0;
1115         break;
1116     }
1117     return ret;
1118 }
1119 
1120 static uint64_t rocker_io_readq(void *opaque, hwaddr addr)
1121 {
1122     Rocker *r = opaque;
1123     uint64_t ret;
1124 
1125     if (rocker_addr_is_desc_reg(r, addr)) {
1126         unsigned index = ROCKER_RING_INDEX(addr);
1127         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1128 
1129         switch (addr & ROCKER_DMA_DESC_MASK) {
1130         case ROCKER_DMA_DESC_ADDR_OFFSET:
1131             ret = desc_ring_get_base_addr(r->rings[index]);
1132             break;
1133         default:
1134             DPRINTF("not implemented dma reg read(q) addr=0x" HWADDR_FMT_plx
1135                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1136             ret = 0;
1137             break;
1138         }
1139         return ret;
1140     }
1141 
1142     switch (addr) {
1143     case ROCKER_BOGUS_REG0:
1144     case ROCKER_BOGUS_REG2:
1145         ret = 0xDEADBABEDEADBABEULL;
1146         break;
1147     case ROCKER_TEST_REG64:
1148         ret = r->test_reg64 * 2;
1149         break;
1150     case ROCKER_TEST_DMA_ADDR:
1151         ret = r->test_dma_addr;
1152         break;
1153     case ROCKER_PORT_PHYS_LINK_STATUS:
1154         ret = rocker_port_phys_link_status(r);
1155         break;
1156     case ROCKER_PORT_PHYS_ENABLE:
1157         ret = rocker_port_phys_enable_read(r);
1158         break;
1159     case ROCKER_SWITCH_ID:
1160         ret = r->switch_id;
1161         break;
1162     default:
1163         DPRINTF("not implemented read(q) addr=0x" HWADDR_FMT_plx "\n", addr);
1164         ret = 0;
1165         break;
1166     }
1167     return ret;
1168 }
1169 
1170 static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size)
1171 {
1172     DPRINTF("Read %s addr " HWADDR_FMT_plx ", size %u\n",
1173             rocker_reg_name(opaque, addr), addr, size);
1174 
1175     switch (size) {
1176     case 4:
1177         return rocker_io_readl(opaque, addr);
1178     case 8:
1179         return rocker_io_readq(opaque, addr);
1180     }
1181 
1182     return -1;
1183 }
1184 
1185 static const MemoryRegionOps rocker_mmio_ops = {
1186     .read = rocker_mmio_read,
1187     .write = rocker_mmio_write,
1188     .endianness = DEVICE_LITTLE_ENDIAN,
1189     .valid = {
1190         .min_access_size = 4,
1191         .max_access_size = 8,
1192     },
1193     .impl = {
1194         .min_access_size = 4,
1195         .max_access_size = 8,
1196     },
1197 };
1198 
1199 static void rocker_msix_vectors_unuse(Rocker *r,
1200                                       unsigned int num_vectors)
1201 {
1202     PCIDevice *dev = PCI_DEVICE(r);
1203     int i;
1204 
1205     for (i = 0; i < num_vectors; i++) {
1206         msix_vector_unuse(dev, i);
1207     }
1208 }
1209 
1210 static void rocker_msix_vectors_use(Rocker *r, unsigned int num_vectors)
1211 {
1212     PCIDevice *dev = PCI_DEVICE(r);
1213     int i;
1214 
1215     for (i = 0; i < num_vectors; i++) {
1216         msix_vector_use(dev, i);
1217     }
1218 }
1219 
1220 static int rocker_msix_init(Rocker *r, Error **errp)
1221 {
1222     PCIDevice *dev = PCI_DEVICE(r);
1223     int err;
1224 
1225     err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
1226                     &r->msix_bar,
1227                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
1228                     &r->msix_bar,
1229                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
1230                     0, errp);
1231     if (err) {
1232         return err;
1233     }
1234 
1235     rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1236 
1237     return 0;
1238 }
1239 
1240 static void rocker_msix_uninit(Rocker *r)
1241 {
1242     PCIDevice *dev = PCI_DEVICE(r);
1243 
1244     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1245     rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1246 }
1247 
1248 static World *rocker_world_type_by_name(Rocker *r, const char *name)
1249 {
1250     int i;
1251 
1252     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1253         if (strcmp(name, world_name(r->worlds[i])) == 0) {
1254             return r->worlds[i];
1255         }
1256     }
1257     return NULL;
1258 }
1259 
1260 static void pci_rocker_realize(PCIDevice *dev, Error **errp)
1261 {
1262     Rocker *r = ROCKER(dev);
1263     const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
1264     const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } };
1265     static int sw_index;
1266     int i, err = 0;
1267 
1268     /* allocate worlds */
1269 
1270     r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
1271 
1272     if (!r->world_name) {
1273         r->world_name = g_strdup(world_name(r->worlds[ROCKER_WORLD_TYPE_OF_DPA]));
1274     }
1275 
1276     r->world_dflt = rocker_world_type_by_name(r, r->world_name);
1277     if (!r->world_dflt) {
1278         error_setg(errp,
1279                 "invalid argument requested world %s does not exist",
1280                 r->world_name);
1281         goto err_world_type_by_name;
1282     }
1283 
1284     /* set up memory-mapped region at BAR0 */
1285 
1286     memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r,
1287                           "rocker-mmio", ROCKER_PCI_BAR0_SIZE);
1288     pci_register_bar(dev, ROCKER_PCI_BAR0_IDX,
1289                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio);
1290 
1291     /* set up memory-mapped region for MSI-X */
1292 
1293     memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar",
1294                        ROCKER_PCI_MSIX_BAR_SIZE);
1295     pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX,
1296                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar);
1297 
1298     /* MSI-X init */
1299 
1300     err = rocker_msix_init(r, errp);
1301     if (err) {
1302         goto err_msix_init;
1303     }
1304 
1305     /* validate switch properties */
1306 
1307     if (!r->name) {
1308         r->name = g_strdup(TYPE_ROCKER);
1309     }
1310 
1311     if (rocker_find(r->name)) {
1312         error_setg(errp, "%s already exists", r->name);
1313         goto err_duplicate;
1314     }
1315 
1316     /* Rocker name is passed in port name requests to OS with the intention
1317      * that the name is used in interface names. Limit the length of the
1318      * rocker name to avoid naming problems in the OS. Also, adding the
1319      * port number as p# and unganged breakout b#, where # is at most 2
1320      * digits, so leave room for it too (-1 for string terminator, -3 for
1321      * p# and -3 for b#)
1322      */
1323 #define ROCKER_IFNAMSIZ 16
1324 #define MAX_ROCKER_NAME_LEN  (ROCKER_IFNAMSIZ - 1 - 3 - 3)
1325     if (strlen(r->name) > MAX_ROCKER_NAME_LEN) {
1326         error_setg(errp,
1327                 "name too long; please shorten to at most %d chars",
1328                 MAX_ROCKER_NAME_LEN);
1329         goto err_name_too_long;
1330     }
1331 
1332     if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) {
1333         memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt));
1334         r->fp_start_macaddr.a[4] += (sw_index++);
1335     }
1336 
1337     if (!r->switch_id) {
1338         memcpy(&r->switch_id, &r->fp_start_macaddr,
1339                sizeof(r->fp_start_macaddr));
1340     }
1341 
1342     if (r->fp_ports > ROCKER_FP_PORTS_MAX) {
1343         r->fp_ports = ROCKER_FP_PORTS_MAX;
1344     }
1345 
1346     r->rings = g_new(DescRing *, rocker_pci_ring_count(r));
1347 
1348     /* Rings are ordered like this:
1349      * - command ring
1350      * - event ring
1351      * - port0 tx ring
1352      * - port0 rx ring
1353      * - port1 tx ring
1354      * - port1 rx ring
1355      * .....
1356      */
1357 
1358     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1359         DescRing *ring = desc_ring_alloc(r, i);
1360 
1361         if (i == ROCKER_RING_CMD) {
1362             desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD);
1363         } else if (i == ROCKER_RING_EVENT) {
1364             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT);
1365         } else if (i % 2 == 0) {
1366             desc_ring_set_consume(ring, tx_consume,
1367                                   ROCKER_MSIX_VEC_TX((i - 2) / 2));
1368         } else if (i % 2 == 1) {
1369             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2));
1370         }
1371 
1372         r->rings[i] = ring;
1373     }
1374 
1375     for (i = 0; i < r->fp_ports; i++) {
1376         FpPort *port =
1377             fp_port_alloc(r, r->name, &r->fp_start_macaddr,
1378                           i, &r->fp_ports_peers[i]);
1379 
1380         r->fp_port[i] = port;
1381         fp_port_set_world(port, r->world_dflt);
1382     }
1383 
1384     QLIST_INSERT_HEAD(&rockers, r, next);
1385 
1386     return;
1387 
1388 err_name_too_long:
1389 err_duplicate:
1390     rocker_msix_uninit(r);
1391 err_msix_init:
1392     object_unparent(OBJECT(&r->msix_bar));
1393     object_unparent(OBJECT(&r->mmio));
1394 err_world_type_by_name:
1395     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1396         if (r->worlds[i]) {
1397             world_free(r->worlds[i]);
1398         }
1399     }
1400 }
1401 
1402 static void pci_rocker_uninit(PCIDevice *dev)
1403 {
1404     Rocker *r = ROCKER(dev);
1405     int i;
1406 
1407     QLIST_REMOVE(r, next);
1408 
1409     for (i = 0; i < r->fp_ports; i++) {
1410         FpPort *port = r->fp_port[i];
1411 
1412         fp_port_free(port);
1413         r->fp_port[i] = NULL;
1414     }
1415 
1416     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1417         if (r->rings[i]) {
1418             desc_ring_free(r->rings[i]);
1419         }
1420     }
1421     g_free(r->rings);
1422 
1423     rocker_msix_uninit(r);
1424     object_unparent(OBJECT(&r->msix_bar));
1425     object_unparent(OBJECT(&r->mmio));
1426 
1427     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1428         if (r->worlds[i]) {
1429             world_free(r->worlds[i]);
1430         }
1431     }
1432     g_free(r->fp_ports_peers);
1433 }
1434 
1435 static void rocker_reset(DeviceState *dev)
1436 {
1437     Rocker *r = ROCKER(dev);
1438     int i;
1439 
1440     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1441         if (r->worlds[i]) {
1442             world_reset(r->worlds[i]);
1443         }
1444     }
1445     for (i = 0; i < r->fp_ports; i++) {
1446         fp_port_reset(r->fp_port[i]);
1447         fp_port_set_world(r->fp_port[i], r->world_dflt);
1448     }
1449 
1450     r->test_reg = 0;
1451     r->test_reg64 = 0;
1452     r->test_dma_addr = 0;
1453     r->test_dma_size = 0;
1454 
1455     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1456         desc_ring_reset(r->rings[i]);
1457     }
1458 
1459     DPRINTF("Reset done\n");
1460 }
1461 
1462 static Property rocker_properties[] = {
1463     DEFINE_PROP_STRING("name", Rocker, name),
1464     DEFINE_PROP_STRING("world", Rocker, world_name),
1465     DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
1466                         fp_start_macaddr),
1467     DEFINE_PROP_UINT64("switch_id", Rocker,
1468                        switch_id, 0),
1469     DEFINE_PROP_ARRAY("ports", Rocker, fp_ports,
1470                       fp_ports_peers, qdev_prop_netdev, NICPeers),
1471     DEFINE_PROP_END_OF_LIST(),
1472 };
1473 
1474 static const VMStateDescription rocker_vmsd = {
1475     .name = TYPE_ROCKER,
1476     .unmigratable = 1,
1477 };
1478 
1479 static void rocker_class_init(ObjectClass *klass, void *data)
1480 {
1481     DeviceClass *dc = DEVICE_CLASS(klass);
1482     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1483 
1484     k->realize = pci_rocker_realize;
1485     k->exit = pci_rocker_uninit;
1486     k->vendor_id = PCI_VENDOR_ID_REDHAT;
1487     k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER;
1488     k->revision = ROCKER_PCI_REVISION;
1489     k->class_id = PCI_CLASS_NETWORK_OTHER;
1490     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1491     dc->desc = "Rocker Switch";
1492     device_class_set_legacy_reset(dc, rocker_reset);
1493     device_class_set_props(dc, rocker_properties);
1494     dc->vmsd = &rocker_vmsd;
1495 }
1496 
1497 static const TypeInfo rocker_info = {
1498     .name          = TYPE_ROCKER,
1499     .parent        = TYPE_PCI_DEVICE,
1500     .instance_size = sizeof(Rocker),
1501     .class_init    = rocker_class_init,
1502     .interfaces = (InterfaceInfo[]) {
1503         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1504         { },
1505     },
1506 };
1507 
1508 static void rocker_register_types(void)
1509 {
1510     type_register_static(&rocker_info);
1511 }
1512 
1513 type_init(rocker_register_types)
1514