xref: /openbmc/qemu/hw/net/rocker/rocker.c (revision 500eb6db)
1 /*
2  * QEMU rocker switch emulation - PCI device
3  *
4  * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
5  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "hw/hw.h"
20 #include "hw/pci/pci.h"
21 #include "hw/pci/msix.h"
22 #include "net/net.h"
23 #include "net/eth.h"
24 #include "qapi/error.h"
25 #include "qapi/qapi-commands-rocker.h"
26 #include "qemu/iov.h"
27 #include "qemu/module.h"
28 #include "qemu/bitops.h"
29 
30 #include "rocker.h"
31 #include "rocker_hw.h"
32 #include "rocker_fp.h"
33 #include "rocker_desc.h"
34 #include "rocker_tlv.h"
35 #include "rocker_world.h"
36 #include "rocker_of_dpa.h"
37 
38 struct rocker {
39     /* private */
40     PCIDevice parent_obj;
41     /* public */
42 
43     MemoryRegion mmio;
44     MemoryRegion msix_bar;
45 
46     /* switch configuration */
47     char *name;                  /* switch name */
48     char *world_name;            /* world name */
49     uint32_t fp_ports;           /* front-panel port count */
50     NICPeers *fp_ports_peers;
51     MACAddr fp_start_macaddr;    /* front-panel port 0 mac addr */
52     uint64_t switch_id;          /* switch id */
53 
54     /* front-panel ports */
55     FpPort *fp_port[ROCKER_FP_PORTS_MAX];
56 
57     /* register backings */
58     uint32_t test_reg;
59     uint64_t test_reg64;
60     dma_addr_t test_dma_addr;
61     uint32_t test_dma_size;
62     uint64_t lower32;            /* lower 32-bit val in 2-part 64-bit access */
63 
64     /* desc rings */
65     DescRing **rings;
66 
67     /* switch worlds */
68     World *worlds[ROCKER_WORLD_TYPE_MAX];
69     World *world_dflt;
70 
71     QLIST_ENTRY(rocker) next;
72 };
73 
74 #define TYPE_ROCKER "rocker"
75 
76 #define ROCKER(obj) \
77     OBJECT_CHECK(Rocker, (obj), TYPE_ROCKER)
78 
79 static QLIST_HEAD(, rocker) rockers;
80 
81 Rocker *rocker_find(const char *name)
82 {
83     Rocker *r;
84 
85     QLIST_FOREACH(r, &rockers, next)
86         if (strcmp(r->name, name) == 0) {
87             return r;
88         }
89 
90     return NULL;
91 }
92 
93 World *rocker_get_world(Rocker *r, enum rocker_world_type type)
94 {
95     if (type < ROCKER_WORLD_TYPE_MAX) {
96         return r->worlds[type];
97     }
98     return NULL;
99 }
100 
101 RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
102 {
103     RockerSwitch *rocker;
104     Rocker *r;
105 
106     r = rocker_find(name);
107     if (!r) {
108         error_setg(errp, "rocker %s not found", name);
109         return NULL;
110     }
111 
112     rocker = g_new0(RockerSwitch, 1);
113     rocker->name = g_strdup(r->name);
114     rocker->id = r->switch_id;
115     rocker->ports = r->fp_ports;
116 
117     return rocker;
118 }
119 
120 RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp)
121 {
122     RockerPortList *list = NULL;
123     Rocker *r;
124     int i;
125 
126     r = rocker_find(name);
127     if (!r) {
128         error_setg(errp, "rocker %s not found", name);
129         return NULL;
130     }
131 
132     for (i = r->fp_ports - 1; i >= 0; i--) {
133         RockerPortList *info = g_malloc0(sizeof(*info));
134         info->value = g_malloc0(sizeof(*info->value));
135         struct fp_port *port = r->fp_port[i];
136 
137         fp_port_get_info(port, info);
138         info->next = list;
139         list = info;
140     }
141 
142     return list;
143 }
144 
145 uint32_t rocker_fp_ports(Rocker *r)
146 {
147     return r->fp_ports;
148 }
149 
150 static uint32_t rocker_get_pport_by_tx_ring(Rocker *r,
151                                             DescRing *ring)
152 {
153     return (desc_ring_index(ring) - 2) / 2 + 1;
154 }
155 
156 static int tx_consume(Rocker *r, DescInfo *info)
157 {
158     PCIDevice *dev = PCI_DEVICE(r);
159     char *buf = desc_get_buf(info, true);
160     RockerTlv *tlv_frag;
161     RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1];
162     struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, };
163     uint32_t pport;
164     uint32_t port;
165     uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE;
166     uint16_t tx_l3_csum_off = 0;
167     uint16_t tx_tso_mss = 0;
168     uint16_t tx_tso_hdr_len = 0;
169     int iovcnt = 0;
170     int err = ROCKER_OK;
171     int rem;
172     int i;
173 
174     if (!buf) {
175         return -ROCKER_ENXIO;
176     }
177 
178     rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info));
179 
180     if (!tlvs[ROCKER_TLV_TX_FRAGS]) {
181         return -ROCKER_EINVAL;
182     }
183 
184     pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info));
185     if (!fp_port_from_pport(pport, &port)) {
186         return -ROCKER_EINVAL;
187     }
188 
189     if (tlvs[ROCKER_TLV_TX_OFFLOAD]) {
190         tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]);
191     }
192 
193     switch (tx_offload) {
194     case ROCKER_TX_OFFLOAD_L3_CSUM:
195         if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
196             return -ROCKER_EINVAL;
197         }
198         break;
199     case ROCKER_TX_OFFLOAD_TSO:
200         if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
201             !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
202             return -ROCKER_EINVAL;
203         }
204         break;
205     }
206 
207     if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
208         tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]);
209     }
210 
211     if (tlvs[ROCKER_TLV_TX_TSO_MSS]) {
212         tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]);
213     }
214 
215     if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
216         tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]);
217     }
218 
219     rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) {
220         hwaddr frag_addr;
221         uint16_t frag_len;
222 
223         if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) {
224             err = -ROCKER_EINVAL;
225             goto err_bad_attr;
226         }
227 
228         rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag);
229 
230         if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
231             !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) {
232             err = -ROCKER_EINVAL;
233             goto err_bad_attr;
234         }
235 
236         frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
237         frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
238 
239         if (iovcnt >= ROCKER_TX_FRAGS_MAX) {
240             goto err_too_many_frags;
241         }
242         iov[iovcnt].iov_len = frag_len;
243         iov[iovcnt].iov_base = g_malloc(frag_len);
244 
245         pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base,
246                      iov[iovcnt].iov_len);
247 
248         iovcnt++;
249     }
250 
251     if (iovcnt) {
252         /* XXX perform Tx offloads */
253         /* XXX   silence compiler for now */
254         tx_l3_csum_off += tx_tso_mss = tx_tso_hdr_len = 0;
255     }
256 
257     err = fp_port_eg(r->fp_port[port], iov, iovcnt);
258 
259 err_too_many_frags:
260 err_bad_attr:
261     for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) {
262         g_free(iov[i].iov_base);
263     }
264 
265     return err;
266 }
267 
268 static int cmd_get_port_settings(Rocker *r,
269                                  DescInfo *info, char *buf,
270                                  RockerTlv *cmd_info_tlv)
271 {
272     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
273     RockerTlv *nest;
274     FpPort *fp_port;
275     uint32_t pport;
276     uint32_t port;
277     uint32_t speed;
278     uint8_t duplex;
279     uint8_t autoneg;
280     uint8_t learning;
281     char *phys_name;
282     MACAddr macaddr;
283     enum rocker_world_type mode;
284     size_t tlv_size;
285     int pos;
286     int err;
287 
288     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
289                             cmd_info_tlv);
290 
291     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
292         return -ROCKER_EINVAL;
293     }
294 
295     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
296     if (!fp_port_from_pport(pport, &port)) {
297         return -ROCKER_EINVAL;
298     }
299     fp_port = r->fp_port[port];
300 
301     err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg);
302     if (err) {
303         return err;
304     }
305 
306     fp_port_get_macaddr(fp_port, &macaddr);
307     mode = world_type(fp_port_get_world(fp_port));
308     learning = fp_port_get_learning(fp_port);
309     phys_name = fp_port_get_name(fp_port);
310 
311     tlv_size = rocker_tlv_total_size(0) +                 /* nest */
312                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
313                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   speed */
314                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   duplex */
315                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   autoneg */
316                rocker_tlv_total_size(sizeof(macaddr.a)) + /*   macaddr */
317                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   mode */
318                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   learning */
319                rocker_tlv_total_size(strlen(phys_name));
320 
321     if (tlv_size > desc_buf_size(info)) {
322         return -ROCKER_EMSGSIZE;
323     }
324 
325     pos = 0;
326     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO);
327     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport);
328     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed);
329     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex);
330     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg);
331     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
332                    sizeof(macaddr.a), macaddr.a);
333     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode);
334     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,
335                       learning);
336     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME,
337                    strlen(phys_name), phys_name);
338     rocker_tlv_nest_end(buf, &pos, nest);
339 
340     return desc_set_buf(info, tlv_size);
341 }
342 
343 static int cmd_set_port_settings(Rocker *r,
344                                  RockerTlv *cmd_info_tlv)
345 {
346     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
347     FpPort *fp_port;
348     uint32_t pport;
349     uint32_t port;
350     uint32_t speed;
351     uint8_t duplex;
352     uint8_t autoneg;
353     uint8_t learning;
354     MACAddr macaddr;
355     enum rocker_world_type mode;
356     int err;
357 
358     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
359                             cmd_info_tlv);
360 
361     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
362         return -ROCKER_EINVAL;
363     }
364 
365     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
366     if (!fp_port_from_pport(pport, &port)) {
367         return -ROCKER_EINVAL;
368     }
369     fp_port = r->fp_port[port];
370 
371     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] &&
372         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] &&
373         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) {
374 
375         speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
376         duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
377         autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
378 
379         err = fp_port_set_settings(fp_port, speed, duplex, autoneg);
380         if (err) {
381             return err;
382         }
383     }
384 
385     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) {
386         if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) !=
387             sizeof(macaddr.a)) {
388             return -ROCKER_EINVAL;
389         }
390         memcpy(macaddr.a,
391                rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]),
392                sizeof(macaddr.a));
393         fp_port_set_macaddr(fp_port, &macaddr);
394     }
395 
396     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
397         mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
398         if (mode >= ROCKER_WORLD_TYPE_MAX) {
399             return -ROCKER_EINVAL;
400         }
401         /* We don't support world change. */
402         if (!fp_port_check_world(fp_port, r->worlds[mode])) {
403             return -ROCKER_EINVAL;
404         }
405     }
406 
407     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
408         learning =
409             rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]);
410         fp_port_set_learning(fp_port, learning);
411     }
412 
413     return ROCKER_OK;
414 }
415 
416 static int cmd_consume(Rocker *r, DescInfo *info)
417 {
418     char *buf = desc_get_buf(info, false);
419     RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1];
420     RockerTlv *info_tlv;
421     World *world;
422     uint16_t cmd;
423     int err;
424 
425     if (!buf) {
426         return -ROCKER_ENXIO;
427     }
428 
429     rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info));
430 
431     if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) {
432         return -ROCKER_EINVAL;
433     }
434 
435     cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]);
436     info_tlv = tlvs[ROCKER_TLV_CMD_INFO];
437 
438     /* This might be reworked to something like this:
439      * Every world will have an array of command handlers from
440      * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is
441      * up to each world to implement whatever command it want.
442      * It can reference "generic" commands as cmd_set_port_settings or
443      * cmd_get_port_settings
444      */
445 
446     switch (cmd) {
447     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
448     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
449     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
450     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
451     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
452     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
453     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
454     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
455         world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
456         err = world_do_cmd(world, info, buf, cmd, info_tlv);
457         break;
458     case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS:
459         err = cmd_get_port_settings(r, info, buf, info_tlv);
460         break;
461     case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS:
462         err = cmd_set_port_settings(r, info_tlv);
463         break;
464     default:
465         err = -ROCKER_EINVAL;
466         break;
467     }
468 
469     return err;
470 }
471 
472 static void rocker_msix_irq(Rocker *r, unsigned vector)
473 {
474     PCIDevice *dev = PCI_DEVICE(r);
475 
476     DPRINTF("MSI-X notify request for vector %d\n", vector);
477     if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) {
478         DPRINTF("incorrect vector %d\n", vector);
479         return;
480     }
481     msix_notify(dev, vector);
482 }
483 
484 int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up)
485 {
486     DescRing *ring = r->rings[ROCKER_RING_EVENT];
487     DescInfo *info = desc_ring_fetch_desc(ring);
488     RockerTlv *nest;
489     char *buf;
490     size_t tlv_size;
491     int pos;
492     int err;
493 
494     if (!info) {
495         return -ROCKER_ENOBUFS;
496     }
497 
498     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
499                rocker_tlv_total_size(0) +                 /* nest */
500                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
501                rocker_tlv_total_size(sizeof(uint8_t));    /*   link up */
502 
503     if (tlv_size > desc_buf_size(info)) {
504         err = -ROCKER_EMSGSIZE;
505         goto err_too_big;
506     }
507 
508     buf = desc_get_buf(info, false);
509     if (!buf) {
510         err = -ROCKER_ENOMEM;
511         goto err_no_mem;
512     }
513 
514     pos = 0;
515     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
516                         ROCKER_TLV_EVENT_TYPE_LINK_CHANGED);
517     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
518     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport);
519     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP,
520                       link_up ? 1 : 0);
521     rocker_tlv_nest_end(buf, &pos, nest);
522 
523     err = desc_set_buf(info, tlv_size);
524 
525 err_too_big:
526 err_no_mem:
527     if (desc_ring_post_desc(ring, err)) {
528         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
529     }
530 
531     return err;
532 }
533 
534 int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
535                                uint16_t vlan_id)
536 {
537     DescRing *ring = r->rings[ROCKER_RING_EVENT];
538     DescInfo *info;
539     FpPort *fp_port;
540     uint32_t port;
541     RockerTlv *nest;
542     char *buf;
543     size_t tlv_size;
544     int pos;
545     int err;
546 
547     if (!fp_port_from_pport(pport, &port)) {
548         return -ROCKER_EINVAL;
549     }
550     fp_port = r->fp_port[port];
551     if (!fp_port_get_learning(fp_port)) {
552         return ROCKER_OK;
553     }
554 
555     info = desc_ring_fetch_desc(ring);
556     if (!info) {
557         return -ROCKER_ENOBUFS;
558     }
559 
560     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
561                rocker_tlv_total_size(0) +                 /* nest */
562                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
563                rocker_tlv_total_size(ETH_ALEN) +          /*   mac addr */
564                rocker_tlv_total_size(sizeof(uint16_t));   /*   vlan_id */
565 
566     if (tlv_size > desc_buf_size(info)) {
567         err = -ROCKER_EMSGSIZE;
568         goto err_too_big;
569     }
570 
571     buf = desc_get_buf(info, false);
572     if (!buf) {
573         err = -ROCKER_ENOMEM;
574         goto err_no_mem;
575     }
576 
577     pos = 0;
578     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
579                         ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN);
580     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
581     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport);
582     rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr);
583     rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id);
584     rocker_tlv_nest_end(buf, &pos, nest);
585 
586     err = desc_set_buf(info, tlv_size);
587 
588 err_too_big:
589 err_no_mem:
590     if (desc_ring_post_desc(ring, err)) {
591         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
592     }
593 
594     return err;
595 }
596 
597 static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
598                                                      uint32_t pport)
599 {
600     return r->rings[(pport - 1) * 2 + 3];
601 }
602 
603 int rx_produce(World *world, uint32_t pport,
604                const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu)
605 {
606     Rocker *r = world_rocker(world);
607     PCIDevice *dev = (PCIDevice *)r;
608     DescRing *ring = rocker_get_rx_ring_by_pport(r, pport);
609     DescInfo *info = desc_ring_fetch_desc(ring);
610     char *data;
611     size_t data_size = iov_size(iov, iovcnt);
612     char *buf;
613     uint16_t rx_flags = 0;
614     uint16_t rx_csum = 0;
615     size_t tlv_size;
616     RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1];
617     hwaddr frag_addr;
618     uint16_t frag_max_len;
619     int pos;
620     int err;
621 
622     if (!info) {
623         return -ROCKER_ENOBUFS;
624     }
625 
626     buf = desc_get_buf(info, false);
627     if (!buf) {
628         err = -ROCKER_ENXIO;
629         goto out;
630     }
631     rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info));
632 
633     if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] ||
634         !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) {
635         err = -ROCKER_EINVAL;
636         goto out;
637     }
638 
639     frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]);
640     frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
641 
642     if (data_size > frag_max_len) {
643         err = -ROCKER_EMSGSIZE;
644         goto out;
645     }
646 
647     if (copy_to_cpu) {
648         rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD;
649     }
650 
651     /* XXX calc rx flags/csum */
652 
653     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
654                rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */
655                rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */
656                rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */
657                rocker_tlv_total_size(sizeof(uint16_t));  /* frag len */
658 
659     if (tlv_size > desc_buf_size(info)) {
660         err = -ROCKER_EMSGSIZE;
661         goto out;
662     }
663 
664     /* TODO:
665      * iov dma write can be optimized in similar way e1000 does it in
666      * e1000_receive_iov. But maybe if would make sense to introduce
667      * generic helper iov_dma_write.
668      */
669 
670     data = g_malloc(data_size);
671 
672     iov_to_buf(iov, iovcnt, 0, data, data_size);
673     pci_dma_write(dev, frag_addr, data, data_size);
674     g_free(data);
675 
676     pos = 0;
677     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags);
678     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum);
679     rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr);
680     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len);
681     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size);
682 
683     err = desc_set_buf(info, tlv_size);
684 
685 out:
686     if (desc_ring_post_desc(ring, err)) {
687         rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1));
688     }
689 
690     return err;
691 }
692 
693 int rocker_port_eg(Rocker *r, uint32_t pport,
694                    const struct iovec *iov, int iovcnt)
695 {
696     FpPort *fp_port;
697     uint32_t port;
698 
699     if (!fp_port_from_pport(pport, &port)) {
700         return -ROCKER_EINVAL;
701     }
702 
703     fp_port = r->fp_port[port];
704 
705     return fp_port_eg(fp_port, iov, iovcnt);
706 }
707 
708 static void rocker_test_dma_ctrl(Rocker *r, uint32_t val)
709 {
710     PCIDevice *dev = PCI_DEVICE(r);
711     char *buf;
712     int i;
713 
714     buf = g_malloc(r->test_dma_size);
715 
716     switch (val) {
717     case ROCKER_TEST_DMA_CTRL_CLEAR:
718         memset(buf, 0, r->test_dma_size);
719         break;
720     case ROCKER_TEST_DMA_CTRL_FILL:
721         memset(buf, 0x96, r->test_dma_size);
722         break;
723     case ROCKER_TEST_DMA_CTRL_INVERT:
724         pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size);
725         for (i = 0; i < r->test_dma_size; i++) {
726             buf[i] = ~buf[i];
727         }
728         break;
729     default:
730         DPRINTF("not test dma control val=0x%08x\n", val);
731         goto err_out;
732     }
733     pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size);
734 
735     rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST);
736 
737 err_out:
738     g_free(buf);
739 }
740 
741 static void rocker_reset(DeviceState *dev);
742 
743 static void rocker_control(Rocker *r, uint32_t val)
744 {
745     if (val & ROCKER_CONTROL_RESET) {
746         rocker_reset(DEVICE(r));
747     }
748 }
749 
750 static int rocker_pci_ring_count(Rocker *r)
751 {
752     /* There are:
753      * - command ring
754      * - event ring
755      * - tx and rx ring per each port
756      */
757     return 2 + (2 * r->fp_ports);
758 }
759 
760 static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr)
761 {
762     hwaddr start = ROCKER_DMA_DESC_BASE;
763     hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r));
764 
765     return addr >= start && addr < end;
766 }
767 
768 static void rocker_port_phys_enable_write(Rocker *r, uint64_t new)
769 {
770     int i;
771     bool old_enabled;
772     bool new_enabled;
773     FpPort *fp_port;
774 
775     for (i = 0; i < r->fp_ports; i++) {
776         fp_port = r->fp_port[i];
777         old_enabled = fp_port_enabled(fp_port);
778         new_enabled = (new >> (i + 1)) & 0x1;
779         if (new_enabled == old_enabled) {
780             continue;
781         }
782         if (new_enabled) {
783             fp_port_enable(r->fp_port[i]);
784         } else {
785             fp_port_disable(r->fp_port[i]);
786         }
787     }
788 }
789 
790 static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
791 {
792     Rocker *r = opaque;
793 
794     if (rocker_addr_is_desc_reg(r, addr)) {
795         unsigned index = ROCKER_RING_INDEX(addr);
796         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
797 
798         switch (offset) {
799         case ROCKER_DMA_DESC_ADDR_OFFSET:
800             r->lower32 = (uint64_t)val;
801             break;
802         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
803             desc_ring_set_base_addr(r->rings[index],
804                                     ((uint64_t)val) << 32 | r->lower32);
805             r->lower32 = 0;
806             break;
807         case ROCKER_DMA_DESC_SIZE_OFFSET:
808             desc_ring_set_size(r->rings[index], val);
809             break;
810         case ROCKER_DMA_DESC_HEAD_OFFSET:
811             if (desc_ring_set_head(r->rings[index], val)) {
812                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
813             }
814             break;
815         case ROCKER_DMA_DESC_CTRL_OFFSET:
816             desc_ring_set_ctrl(r->rings[index], val);
817             break;
818         case ROCKER_DMA_DESC_CREDITS_OFFSET:
819             if (desc_ring_ret_credits(r->rings[index], val)) {
820                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
821             }
822             break;
823         default:
824             DPRINTF("not implemented dma reg write(l) addr=0x" TARGET_FMT_plx
825                     " val=0x%08x (ring %d, addr=0x%02x)\n",
826                     addr, val, index, offset);
827             break;
828         }
829         return;
830     }
831 
832     switch (addr) {
833     case ROCKER_TEST_REG:
834         r->test_reg = val;
835         break;
836     case ROCKER_TEST_REG64:
837     case ROCKER_TEST_DMA_ADDR:
838     case ROCKER_PORT_PHYS_ENABLE:
839         r->lower32 = (uint64_t)val;
840         break;
841     case ROCKER_TEST_REG64 + 4:
842         r->test_reg64 = ((uint64_t)val) << 32 | r->lower32;
843         r->lower32 = 0;
844         break;
845     case ROCKER_TEST_IRQ:
846         rocker_msix_irq(r, val);
847         break;
848     case ROCKER_TEST_DMA_SIZE:
849         r->test_dma_size = val & 0xFFFF;
850         break;
851     case ROCKER_TEST_DMA_ADDR + 4:
852         r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
853         r->lower32 = 0;
854         break;
855     case ROCKER_TEST_DMA_CTRL:
856         rocker_test_dma_ctrl(r, val);
857         break;
858     case ROCKER_CONTROL:
859         rocker_control(r, val);
860         break;
861     case ROCKER_PORT_PHYS_ENABLE + 4:
862         rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32);
863         r->lower32 = 0;
864         break;
865     default:
866         DPRINTF("not implemented write(l) addr=0x" TARGET_FMT_plx
867                 " val=0x%08x\n", addr, val);
868         break;
869     }
870 }
871 
872 static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val)
873 {
874     Rocker *r = opaque;
875 
876     if (rocker_addr_is_desc_reg(r, addr)) {
877         unsigned index = ROCKER_RING_INDEX(addr);
878         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
879 
880         switch (offset) {
881         case ROCKER_DMA_DESC_ADDR_OFFSET:
882             desc_ring_set_base_addr(r->rings[index], val);
883             break;
884         default:
885             DPRINTF("not implemented dma reg write(q) addr=0x" TARGET_FMT_plx
886                     " val=0x" TARGET_FMT_plx " (ring %d, offset=0x%02x)\n",
887                     addr, val, index, offset);
888             break;
889         }
890         return;
891     }
892 
893     switch (addr) {
894     case ROCKER_TEST_REG64:
895         r->test_reg64 = val;
896         break;
897     case ROCKER_TEST_DMA_ADDR:
898         r->test_dma_addr = val;
899         break;
900     case ROCKER_PORT_PHYS_ENABLE:
901         rocker_port_phys_enable_write(r, val);
902         break;
903     default:
904         DPRINTF("not implemented write(q) addr=0x" TARGET_FMT_plx
905                 " val=0x" TARGET_FMT_plx "\n", addr, val);
906         break;
907     }
908 }
909 
910 #ifdef DEBUG_ROCKER
911 #define regname(reg) case (reg): return #reg
912 static const char *rocker_reg_name(void *opaque, hwaddr addr)
913 {
914     Rocker *r = opaque;
915 
916     if (rocker_addr_is_desc_reg(r, addr)) {
917         unsigned index = ROCKER_RING_INDEX(addr);
918         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
919         static char buf[100];
920         char ring_name[10];
921 
922         switch (index) {
923         case 0:
924             sprintf(ring_name, "cmd");
925             break;
926         case 1:
927             sprintf(ring_name, "event");
928             break;
929         default:
930             sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx",
931                     (index - 2) / 2);
932         }
933 
934         switch (offset) {
935         case ROCKER_DMA_DESC_ADDR_OFFSET:
936             sprintf(buf, "Ring[%s] ADDR", ring_name);
937             return buf;
938         case ROCKER_DMA_DESC_ADDR_OFFSET+4:
939             sprintf(buf, "Ring[%s] ADDR+4", ring_name);
940             return buf;
941         case ROCKER_DMA_DESC_SIZE_OFFSET:
942             sprintf(buf, "Ring[%s] SIZE", ring_name);
943             return buf;
944         case ROCKER_DMA_DESC_HEAD_OFFSET:
945             sprintf(buf, "Ring[%s] HEAD", ring_name);
946             return buf;
947         case ROCKER_DMA_DESC_TAIL_OFFSET:
948             sprintf(buf, "Ring[%s] TAIL", ring_name);
949             return buf;
950         case ROCKER_DMA_DESC_CTRL_OFFSET:
951             sprintf(buf, "Ring[%s] CTRL", ring_name);
952             return buf;
953         case ROCKER_DMA_DESC_CREDITS_OFFSET:
954             sprintf(buf, "Ring[%s] CREDITS", ring_name);
955             return buf;
956         default:
957             sprintf(buf, "Ring[%s] ???", ring_name);
958             return buf;
959         }
960     } else {
961         switch (addr) {
962             regname(ROCKER_BOGUS_REG0);
963             regname(ROCKER_BOGUS_REG1);
964             regname(ROCKER_BOGUS_REG2);
965             regname(ROCKER_BOGUS_REG3);
966             regname(ROCKER_TEST_REG);
967             regname(ROCKER_TEST_REG64);
968             regname(ROCKER_TEST_REG64+4);
969             regname(ROCKER_TEST_IRQ);
970             regname(ROCKER_TEST_DMA_ADDR);
971             regname(ROCKER_TEST_DMA_ADDR+4);
972             regname(ROCKER_TEST_DMA_SIZE);
973             regname(ROCKER_TEST_DMA_CTRL);
974             regname(ROCKER_CONTROL);
975             regname(ROCKER_PORT_PHYS_COUNT);
976             regname(ROCKER_PORT_PHYS_LINK_STATUS);
977             regname(ROCKER_PORT_PHYS_LINK_STATUS+4);
978             regname(ROCKER_PORT_PHYS_ENABLE);
979             regname(ROCKER_PORT_PHYS_ENABLE+4);
980             regname(ROCKER_SWITCH_ID);
981             regname(ROCKER_SWITCH_ID+4);
982         }
983     }
984     return "???";
985 }
986 #else
987 static const char *rocker_reg_name(void *opaque, hwaddr addr)
988 {
989     return NULL;
990 }
991 #endif
992 
993 static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val,
994                               unsigned size)
995 {
996     DPRINTF("Write %s addr " TARGET_FMT_plx
997             ", size %u, val " TARGET_FMT_plx "\n",
998             rocker_reg_name(opaque, addr), addr, size, val);
999 
1000     switch (size) {
1001     case 4:
1002         rocker_io_writel(opaque, addr, val);
1003         break;
1004     case 8:
1005         rocker_io_writeq(opaque, addr, val);
1006         break;
1007     }
1008 }
1009 
1010 static uint64_t rocker_port_phys_link_status(Rocker *r)
1011 {
1012     int i;
1013     uint64_t status = 0;
1014 
1015     for (i = 0; i < r->fp_ports; i++) {
1016         FpPort *port = r->fp_port[i];
1017 
1018         if (fp_port_get_link_up(port)) {
1019             status |= 1 << (i + 1);
1020         }
1021     }
1022     return status;
1023 }
1024 
1025 static uint64_t rocker_port_phys_enable_read(Rocker *r)
1026 {
1027     int i;
1028     uint64_t ret = 0;
1029 
1030     for (i = 0; i < r->fp_ports; i++) {
1031         FpPort *port = r->fp_port[i];
1032 
1033         if (fp_port_enabled(port)) {
1034             ret |= 1 << (i + 1);
1035         }
1036     }
1037     return ret;
1038 }
1039 
1040 static uint32_t rocker_io_readl(void *opaque, hwaddr addr)
1041 {
1042     Rocker *r = opaque;
1043     uint32_t ret;
1044 
1045     if (rocker_addr_is_desc_reg(r, addr)) {
1046         unsigned index = ROCKER_RING_INDEX(addr);
1047         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1048 
1049         switch (offset) {
1050         case ROCKER_DMA_DESC_ADDR_OFFSET:
1051             ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]);
1052             break;
1053         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
1054             ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32);
1055             break;
1056         case ROCKER_DMA_DESC_SIZE_OFFSET:
1057             ret = desc_ring_get_size(r->rings[index]);
1058             break;
1059         case ROCKER_DMA_DESC_HEAD_OFFSET:
1060             ret = desc_ring_get_head(r->rings[index]);
1061             break;
1062         case ROCKER_DMA_DESC_TAIL_OFFSET:
1063             ret = desc_ring_get_tail(r->rings[index]);
1064             break;
1065         case ROCKER_DMA_DESC_CREDITS_OFFSET:
1066             ret = desc_ring_get_credits(r->rings[index]);
1067             break;
1068         default:
1069             DPRINTF("not implemented dma reg read(l) addr=0x" TARGET_FMT_plx
1070                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1071             ret = 0;
1072             break;
1073         }
1074         return ret;
1075     }
1076 
1077     switch (addr) {
1078     case ROCKER_BOGUS_REG0:
1079     case ROCKER_BOGUS_REG1:
1080     case ROCKER_BOGUS_REG2:
1081     case ROCKER_BOGUS_REG3:
1082         ret = 0xDEADBABE;
1083         break;
1084     case ROCKER_TEST_REG:
1085         ret = r->test_reg * 2;
1086         break;
1087     case ROCKER_TEST_REG64:
1088         ret = (uint32_t)(r->test_reg64 * 2);
1089         break;
1090     case ROCKER_TEST_REG64 + 4:
1091         ret = (uint32_t)((r->test_reg64 * 2) >> 32);
1092         break;
1093     case ROCKER_TEST_DMA_SIZE:
1094         ret = r->test_dma_size;
1095         break;
1096     case ROCKER_TEST_DMA_ADDR:
1097         ret = (uint32_t)r->test_dma_addr;
1098         break;
1099     case ROCKER_TEST_DMA_ADDR + 4:
1100         ret = (uint32_t)(r->test_dma_addr >> 32);
1101         break;
1102     case ROCKER_PORT_PHYS_COUNT:
1103         ret = r->fp_ports;
1104         break;
1105     case ROCKER_PORT_PHYS_LINK_STATUS:
1106         ret = (uint32_t)rocker_port_phys_link_status(r);
1107         break;
1108     case ROCKER_PORT_PHYS_LINK_STATUS + 4:
1109         ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32);
1110         break;
1111     case ROCKER_PORT_PHYS_ENABLE:
1112         ret = (uint32_t)rocker_port_phys_enable_read(r);
1113         break;
1114     case ROCKER_PORT_PHYS_ENABLE + 4:
1115         ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32);
1116         break;
1117     case ROCKER_SWITCH_ID:
1118         ret = (uint32_t)r->switch_id;
1119         break;
1120     case ROCKER_SWITCH_ID + 4:
1121         ret = (uint32_t)(r->switch_id >> 32);
1122         break;
1123     default:
1124         DPRINTF("not implemented read(l) addr=0x" TARGET_FMT_plx "\n", addr);
1125         ret = 0;
1126         break;
1127     }
1128     return ret;
1129 }
1130 
1131 static uint64_t rocker_io_readq(void *opaque, hwaddr addr)
1132 {
1133     Rocker *r = opaque;
1134     uint64_t ret;
1135 
1136     if (rocker_addr_is_desc_reg(r, addr)) {
1137         unsigned index = ROCKER_RING_INDEX(addr);
1138         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1139 
1140         switch (addr & ROCKER_DMA_DESC_MASK) {
1141         case ROCKER_DMA_DESC_ADDR_OFFSET:
1142             ret = desc_ring_get_base_addr(r->rings[index]);
1143             break;
1144         default:
1145             DPRINTF("not implemented dma reg read(q) addr=0x" TARGET_FMT_plx
1146                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1147             ret = 0;
1148             break;
1149         }
1150         return ret;
1151     }
1152 
1153     switch (addr) {
1154     case ROCKER_BOGUS_REG0:
1155     case ROCKER_BOGUS_REG2:
1156         ret = 0xDEADBABEDEADBABEULL;
1157         break;
1158     case ROCKER_TEST_REG64:
1159         ret = r->test_reg64 * 2;
1160         break;
1161     case ROCKER_TEST_DMA_ADDR:
1162         ret = r->test_dma_addr;
1163         break;
1164     case ROCKER_PORT_PHYS_LINK_STATUS:
1165         ret = rocker_port_phys_link_status(r);
1166         break;
1167     case ROCKER_PORT_PHYS_ENABLE:
1168         ret = rocker_port_phys_enable_read(r);
1169         break;
1170     case ROCKER_SWITCH_ID:
1171         ret = r->switch_id;
1172         break;
1173     default:
1174         DPRINTF("not implemented read(q) addr=0x" TARGET_FMT_plx "\n", addr);
1175         ret = 0;
1176         break;
1177     }
1178     return ret;
1179 }
1180 
1181 static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size)
1182 {
1183     DPRINTF("Read %s addr " TARGET_FMT_plx ", size %u\n",
1184             rocker_reg_name(opaque, addr), addr, size);
1185 
1186     switch (size) {
1187     case 4:
1188         return rocker_io_readl(opaque, addr);
1189     case 8:
1190         return rocker_io_readq(opaque, addr);
1191     }
1192 
1193     return -1;
1194 }
1195 
1196 static const MemoryRegionOps rocker_mmio_ops = {
1197     .read = rocker_mmio_read,
1198     .write = rocker_mmio_write,
1199     .endianness = DEVICE_LITTLE_ENDIAN,
1200     .valid = {
1201         .min_access_size = 4,
1202         .max_access_size = 8,
1203     },
1204     .impl = {
1205         .min_access_size = 4,
1206         .max_access_size = 8,
1207     },
1208 };
1209 
1210 static void rocker_msix_vectors_unuse(Rocker *r,
1211                                       unsigned int num_vectors)
1212 {
1213     PCIDevice *dev = PCI_DEVICE(r);
1214     int i;
1215 
1216     for (i = 0; i < num_vectors; i++) {
1217         msix_vector_unuse(dev, i);
1218     }
1219 }
1220 
1221 static int rocker_msix_vectors_use(Rocker *r,
1222                                    unsigned int num_vectors)
1223 {
1224     PCIDevice *dev = PCI_DEVICE(r);
1225     int err;
1226     int i;
1227 
1228     for (i = 0; i < num_vectors; i++) {
1229         err = msix_vector_use(dev, i);
1230         if (err) {
1231             goto rollback;
1232         }
1233     }
1234     return 0;
1235 
1236 rollback:
1237     rocker_msix_vectors_unuse(r, i);
1238     return err;
1239 }
1240 
1241 static int rocker_msix_init(Rocker *r, Error **errp)
1242 {
1243     PCIDevice *dev = PCI_DEVICE(r);
1244     int err;
1245 
1246     err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
1247                     &r->msix_bar,
1248                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
1249                     &r->msix_bar,
1250                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
1251                     0, errp);
1252     if (err) {
1253         return err;
1254     }
1255 
1256     err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1257     if (err) {
1258         goto err_msix_vectors_use;
1259     }
1260 
1261     return 0;
1262 
1263 err_msix_vectors_use:
1264     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1265     return err;
1266 }
1267 
1268 static void rocker_msix_uninit(Rocker *r)
1269 {
1270     PCIDevice *dev = PCI_DEVICE(r);
1271 
1272     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1273     rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1274 }
1275 
1276 static World *rocker_world_type_by_name(Rocker *r, const char *name)
1277 {
1278     int i;
1279 
1280     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1281         if (strcmp(name, world_name(r->worlds[i])) == 0) {
1282             return r->worlds[i];
1283         }
1284     }
1285     return NULL;
1286 }
1287 
1288 static void pci_rocker_realize(PCIDevice *dev, Error **errp)
1289 {
1290     Rocker *r = ROCKER(dev);
1291     const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
1292     const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } };
1293     static int sw_index;
1294     int i, err = 0;
1295 
1296     /* allocate worlds */
1297 
1298     r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
1299 
1300     if (!r->world_name) {
1301         r->world_name = g_strdup(world_name(r->worlds[ROCKER_WORLD_TYPE_OF_DPA]));
1302     }
1303 
1304     r->world_dflt = rocker_world_type_by_name(r, r->world_name);
1305     if (!r->world_dflt) {
1306         error_setg(errp,
1307                 "invalid argument requested world %s does not exist",
1308                 r->world_name);
1309         goto err_world_type_by_name;
1310     }
1311 
1312     /* set up memory-mapped region at BAR0 */
1313 
1314     memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r,
1315                           "rocker-mmio", ROCKER_PCI_BAR0_SIZE);
1316     pci_register_bar(dev, ROCKER_PCI_BAR0_IDX,
1317                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio);
1318 
1319     /* set up memory-mapped region for MSI-X */
1320 
1321     memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar",
1322                        ROCKER_PCI_MSIX_BAR_SIZE);
1323     pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX,
1324                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar);
1325 
1326     /* MSI-X init */
1327 
1328     err = rocker_msix_init(r, errp);
1329     if (err) {
1330         goto err_msix_init;
1331     }
1332 
1333     /* validate switch properties */
1334 
1335     if (!r->name) {
1336         r->name = g_strdup(TYPE_ROCKER);
1337     }
1338 
1339     if (rocker_find(r->name)) {
1340         error_setg(errp, "%s already exists", r->name);
1341         goto err_duplicate;
1342     }
1343 
1344     /* Rocker name is passed in port name requests to OS with the intention
1345      * that the name is used in interface names. Limit the length of the
1346      * rocker name to avoid naming problems in the OS. Also, adding the
1347      * port number as p# and unganged breakout b#, where # is at most 2
1348      * digits, so leave room for it too (-1 for string terminator, -3 for
1349      * p# and -3 for b#)
1350      */
1351 #define ROCKER_IFNAMSIZ 16
1352 #define MAX_ROCKER_NAME_LEN  (ROCKER_IFNAMSIZ - 1 - 3 - 3)
1353     if (strlen(r->name) > MAX_ROCKER_NAME_LEN) {
1354         error_setg(errp,
1355                 "name too long; please shorten to at most %d chars",
1356                 MAX_ROCKER_NAME_LEN);
1357         goto err_name_too_long;
1358     }
1359 
1360     if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) {
1361         memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt));
1362         r->fp_start_macaddr.a[4] += (sw_index++);
1363     }
1364 
1365     if (!r->switch_id) {
1366         memcpy(&r->switch_id, &r->fp_start_macaddr,
1367                sizeof(r->fp_start_macaddr));
1368     }
1369 
1370     if (r->fp_ports > ROCKER_FP_PORTS_MAX) {
1371         r->fp_ports = ROCKER_FP_PORTS_MAX;
1372     }
1373 
1374     r->rings = g_new(DescRing *, rocker_pci_ring_count(r));
1375 
1376     /* Rings are ordered like this:
1377      * - command ring
1378      * - event ring
1379      * - port0 tx ring
1380      * - port0 rx ring
1381      * - port1 tx ring
1382      * - port1 rx ring
1383      * .....
1384      */
1385 
1386     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1387         DescRing *ring = desc_ring_alloc(r, i);
1388 
1389         if (i == ROCKER_RING_CMD) {
1390             desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD);
1391         } else if (i == ROCKER_RING_EVENT) {
1392             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT);
1393         } else if (i % 2 == 0) {
1394             desc_ring_set_consume(ring, tx_consume,
1395                                   ROCKER_MSIX_VEC_TX((i - 2) / 2));
1396         } else if (i % 2 == 1) {
1397             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2));
1398         }
1399 
1400         r->rings[i] = ring;
1401     }
1402 
1403     for (i = 0; i < r->fp_ports; i++) {
1404         FpPort *port =
1405             fp_port_alloc(r, r->name, &r->fp_start_macaddr,
1406                           i, &r->fp_ports_peers[i]);
1407 
1408         r->fp_port[i] = port;
1409         fp_port_set_world(port, r->world_dflt);
1410     }
1411 
1412     QLIST_INSERT_HEAD(&rockers, r, next);
1413 
1414     return;
1415 
1416 err_name_too_long:
1417 err_duplicate:
1418     rocker_msix_uninit(r);
1419 err_msix_init:
1420     object_unparent(OBJECT(&r->msix_bar));
1421     object_unparent(OBJECT(&r->mmio));
1422 err_world_type_by_name:
1423     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1424         if (r->worlds[i]) {
1425             world_free(r->worlds[i]);
1426         }
1427     }
1428 }
1429 
1430 static void pci_rocker_uninit(PCIDevice *dev)
1431 {
1432     Rocker *r = ROCKER(dev);
1433     int i;
1434 
1435     QLIST_REMOVE(r, next);
1436 
1437     for (i = 0; i < r->fp_ports; i++) {
1438         FpPort *port = r->fp_port[i];
1439 
1440         fp_port_free(port);
1441         r->fp_port[i] = NULL;
1442     }
1443 
1444     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1445         if (r->rings[i]) {
1446             desc_ring_free(r->rings[i]);
1447         }
1448     }
1449     g_free(r->rings);
1450 
1451     rocker_msix_uninit(r);
1452     object_unparent(OBJECT(&r->msix_bar));
1453     object_unparent(OBJECT(&r->mmio));
1454 
1455     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1456         if (r->worlds[i]) {
1457             world_free(r->worlds[i]);
1458         }
1459     }
1460     g_free(r->fp_ports_peers);
1461 }
1462 
1463 static void rocker_reset(DeviceState *dev)
1464 {
1465     Rocker *r = ROCKER(dev);
1466     int i;
1467 
1468     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1469         if (r->worlds[i]) {
1470             world_reset(r->worlds[i]);
1471         }
1472     }
1473     for (i = 0; i < r->fp_ports; i++) {
1474         fp_port_reset(r->fp_port[i]);
1475         fp_port_set_world(r->fp_port[i], r->world_dflt);
1476     }
1477 
1478     r->test_reg = 0;
1479     r->test_reg64 = 0;
1480     r->test_dma_addr = 0;
1481     r->test_dma_size = 0;
1482 
1483     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1484         desc_ring_reset(r->rings[i]);
1485     }
1486 
1487     DPRINTF("Reset done\n");
1488 }
1489 
1490 static Property rocker_properties[] = {
1491     DEFINE_PROP_STRING("name", Rocker, name),
1492     DEFINE_PROP_STRING("world", Rocker, world_name),
1493     DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
1494                         fp_start_macaddr),
1495     DEFINE_PROP_UINT64("switch_id", Rocker,
1496                        switch_id, 0),
1497     DEFINE_PROP_ARRAY("ports", Rocker, fp_ports,
1498                       fp_ports_peers, qdev_prop_netdev, NICPeers),
1499     DEFINE_PROP_END_OF_LIST(),
1500 };
1501 
1502 static const VMStateDescription rocker_vmsd = {
1503     .name = TYPE_ROCKER,
1504     .unmigratable = 1,
1505 };
1506 
1507 static void rocker_class_init(ObjectClass *klass, void *data)
1508 {
1509     DeviceClass *dc = DEVICE_CLASS(klass);
1510     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1511 
1512     k->realize = pci_rocker_realize;
1513     k->exit = pci_rocker_uninit;
1514     k->vendor_id = PCI_VENDOR_ID_REDHAT;
1515     k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER;
1516     k->revision = ROCKER_PCI_REVISION;
1517     k->class_id = PCI_CLASS_NETWORK_OTHER;
1518     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1519     dc->desc = "Rocker Switch";
1520     dc->reset = rocker_reset;
1521     dc->props = rocker_properties;
1522     dc->vmsd = &rocker_vmsd;
1523 }
1524 
1525 static const TypeInfo rocker_info = {
1526     .name          = TYPE_ROCKER,
1527     .parent        = TYPE_PCI_DEVICE,
1528     .instance_size = sizeof(Rocker),
1529     .class_init    = rocker_class_init,
1530     .interfaces = (InterfaceInfo[]) {
1531         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1532         { },
1533     },
1534 };
1535 
1536 static void rocker_register_types(void)
1537 {
1538     type_register_static(&rocker_info);
1539 }
1540 
1541 type_init(rocker_register_types)
1542