xref: /openbmc/qemu/hw/net/rocker/rocker.c (revision 60e58bd9f08a3b91a35850f7501a0a1bcf912b6f)
1 /*
2  * QEMU rocker switch emulation - PCI device
3  *
4  * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
5  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "hw/hw.h"
20 #include "hw/pci/pci.h"
21 #include "hw/pci/msix.h"
22 #include "net/net.h"
23 #include "net/eth.h"
24 #include "qemu/iov.h"
25 #include "qemu/bitops.h"
26 #include "qmp-commands.h"
27 
28 #include "rocker.h"
29 #include "rocker_hw.h"
30 #include "rocker_fp.h"
31 #include "rocker_desc.h"
32 #include "rocker_tlv.h"
33 #include "rocker_world.h"
34 #include "rocker_of_dpa.h"
35 
36 struct rocker {
37     /* private */
38     PCIDevice parent_obj;
39     /* public */
40 
41     MemoryRegion mmio;
42     MemoryRegion msix_bar;
43 
44     /* switch configuration */
45     char *name;                  /* switch name */
46     char *world_name;            /* world name */
47     uint32_t fp_ports;           /* front-panel port count */
48     NICPeers *fp_ports_peers;
49     MACAddr fp_start_macaddr;    /* front-panel port 0 mac addr */
50     uint64_t switch_id;          /* switch id */
51 
52     /* front-panel ports */
53     FpPort *fp_port[ROCKER_FP_PORTS_MAX];
54 
55     /* register backings */
56     uint32_t test_reg;
57     uint64_t test_reg64;
58     dma_addr_t test_dma_addr;
59     uint32_t test_dma_size;
60     uint64_t lower32;            /* lower 32-bit val in 2-part 64-bit access */
61 
62     /* desc rings */
63     DescRing **rings;
64 
65     /* switch worlds */
66     World *worlds[ROCKER_WORLD_TYPE_MAX];
67     World *world_dflt;
68 
69     QLIST_ENTRY(rocker) next;
70 };
71 
72 #define TYPE_ROCKER "rocker"
73 
74 #define ROCKER(obj) \
75     OBJECT_CHECK(Rocker, (obj), TYPE_ROCKER)
76 
77 static QLIST_HEAD(, rocker) rockers;
78 
79 Rocker *rocker_find(const char *name)
80 {
81     Rocker *r;
82 
83     QLIST_FOREACH(r, &rockers, next)
84         if (strcmp(r->name, name) == 0) {
85             return r;
86         }
87 
88     return NULL;
89 }
90 
91 World *rocker_get_world(Rocker *r, enum rocker_world_type type)
92 {
93     if (type < ROCKER_WORLD_TYPE_MAX) {
94         return r->worlds[type];
95     }
96     return NULL;
97 }
98 
99 RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
100 {
101     RockerSwitch *rocker;
102     Rocker *r;
103 
104     r = rocker_find(name);
105     if (!r) {
106         error_setg(errp, "rocker %s not found", name);
107         return NULL;
108     }
109 
110     rocker = g_new0(RockerSwitch, 1);
111     rocker->name = g_strdup(r->name);
112     rocker->id = r->switch_id;
113     rocker->ports = r->fp_ports;
114 
115     return rocker;
116 }
117 
118 RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp)
119 {
120     RockerPortList *list = NULL;
121     Rocker *r;
122     int i;
123 
124     r = rocker_find(name);
125     if (!r) {
126         error_setg(errp, "rocker %s not found", name);
127         return NULL;
128     }
129 
130     for (i = r->fp_ports - 1; i >= 0; i--) {
131         RockerPortList *info = g_malloc0(sizeof(*info));
132         info->value = g_malloc0(sizeof(*info->value));
133         struct fp_port *port = r->fp_port[i];
134 
135         fp_port_get_info(port, info);
136         info->next = list;
137         list = info;
138     }
139 
140     return list;
141 }
142 
143 uint32_t rocker_fp_ports(Rocker *r)
144 {
145     return r->fp_ports;
146 }
147 
148 static uint32_t rocker_get_pport_by_tx_ring(Rocker *r,
149                                             DescRing *ring)
150 {
151     return (desc_ring_index(ring) - 2) / 2 + 1;
152 }
153 
154 static int tx_consume(Rocker *r, DescInfo *info)
155 {
156     PCIDevice *dev = PCI_DEVICE(r);
157     char *buf = desc_get_buf(info, true);
158     RockerTlv *tlv_frag;
159     RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1];
160     struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, };
161     uint32_t pport;
162     uint32_t port;
163     uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE;
164     uint16_t tx_l3_csum_off = 0;
165     uint16_t tx_tso_mss = 0;
166     uint16_t tx_tso_hdr_len = 0;
167     int iovcnt = 0;
168     int err = ROCKER_OK;
169     int rem;
170     int i;
171 
172     if (!buf) {
173         return -ROCKER_ENXIO;
174     }
175 
176     rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info));
177 
178     if (!tlvs[ROCKER_TLV_TX_FRAGS]) {
179         return -ROCKER_EINVAL;
180     }
181 
182     pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info));
183     if (!fp_port_from_pport(pport, &port)) {
184         return -ROCKER_EINVAL;
185     }
186 
187     if (tlvs[ROCKER_TLV_TX_OFFLOAD]) {
188         tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]);
189     }
190 
191     switch (tx_offload) {
192     case ROCKER_TX_OFFLOAD_L3_CSUM:
193         if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
194             return -ROCKER_EINVAL;
195         }
196         break;
197     case ROCKER_TX_OFFLOAD_TSO:
198         if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
199             !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
200             return -ROCKER_EINVAL;
201         }
202         break;
203     }
204 
205     if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
206         tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]);
207     }
208 
209     if (tlvs[ROCKER_TLV_TX_TSO_MSS]) {
210         tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]);
211     }
212 
213     if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
214         tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]);
215     }
216 
217     rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) {
218         hwaddr frag_addr;
219         uint16_t frag_len;
220 
221         if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) {
222             err = -ROCKER_EINVAL;
223             goto err_bad_attr;
224         }
225 
226         rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag);
227 
228         if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
229             !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) {
230             err = -ROCKER_EINVAL;
231             goto err_bad_attr;
232         }
233 
234         frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
235         frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
236 
237         if (iovcnt >= ROCKER_TX_FRAGS_MAX) {
238             goto err_too_many_frags;
239         }
240         iov[iovcnt].iov_len = frag_len;
241         iov[iovcnt].iov_base = g_malloc(frag_len);
242 
243         pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base,
244                      iov[iovcnt].iov_len);
245 
246         iovcnt++;
247     }
248 
249     if (iovcnt) {
250         /* XXX perform Tx offloads */
251         /* XXX   silence compiler for now */
252         tx_l3_csum_off += tx_tso_mss = tx_tso_hdr_len = 0;
253     }
254 
255     err = fp_port_eg(r->fp_port[port], iov, iovcnt);
256 
257 err_too_many_frags:
258 err_bad_attr:
259     for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) {
260         g_free(iov[i].iov_base);
261     }
262 
263     return err;
264 }
265 
266 static int cmd_get_port_settings(Rocker *r,
267                                  DescInfo *info, char *buf,
268                                  RockerTlv *cmd_info_tlv)
269 {
270     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
271     RockerTlv *nest;
272     FpPort *fp_port;
273     uint32_t pport;
274     uint32_t port;
275     uint32_t speed;
276     uint8_t duplex;
277     uint8_t autoneg;
278     uint8_t learning;
279     char *phys_name;
280     MACAddr macaddr;
281     enum rocker_world_type mode;
282     size_t tlv_size;
283     int pos;
284     int err;
285 
286     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
287                             cmd_info_tlv);
288 
289     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
290         return -ROCKER_EINVAL;
291     }
292 
293     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
294     if (!fp_port_from_pport(pport, &port)) {
295         return -ROCKER_EINVAL;
296     }
297     fp_port = r->fp_port[port];
298 
299     err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg);
300     if (err) {
301         return err;
302     }
303 
304     fp_port_get_macaddr(fp_port, &macaddr);
305     mode = world_type(fp_port_get_world(fp_port));
306     learning = fp_port_get_learning(fp_port);
307     phys_name = fp_port_get_name(fp_port);
308 
309     tlv_size = rocker_tlv_total_size(0) +                 /* nest */
310                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
311                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   speed */
312                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   duplex */
313                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   autoneg */
314                rocker_tlv_total_size(sizeof(macaddr.a)) + /*   macaddr */
315                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   mode */
316                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   learning */
317                rocker_tlv_total_size(strlen(phys_name));
318 
319     if (tlv_size > desc_buf_size(info)) {
320         return -ROCKER_EMSGSIZE;
321     }
322 
323     pos = 0;
324     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO);
325     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport);
326     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed);
327     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex);
328     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg);
329     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
330                    sizeof(macaddr.a), macaddr.a);
331     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode);
332     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,
333                       learning);
334     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME,
335                    strlen(phys_name), phys_name);
336     rocker_tlv_nest_end(buf, &pos, nest);
337 
338     return desc_set_buf(info, tlv_size);
339 }
340 
341 static int cmd_set_port_settings(Rocker *r,
342                                  RockerTlv *cmd_info_tlv)
343 {
344     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
345     FpPort *fp_port;
346     uint32_t pport;
347     uint32_t port;
348     uint32_t speed;
349     uint8_t duplex;
350     uint8_t autoneg;
351     uint8_t learning;
352     MACAddr macaddr;
353     enum rocker_world_type mode;
354     int err;
355 
356     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
357                             cmd_info_tlv);
358 
359     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
360         return -ROCKER_EINVAL;
361     }
362 
363     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
364     if (!fp_port_from_pport(pport, &port)) {
365         return -ROCKER_EINVAL;
366     }
367     fp_port = r->fp_port[port];
368 
369     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] &&
370         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] &&
371         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) {
372 
373         speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
374         duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
375         autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
376 
377         err = fp_port_set_settings(fp_port, speed, duplex, autoneg);
378         if (err) {
379             return err;
380         }
381     }
382 
383     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) {
384         if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) !=
385             sizeof(macaddr.a)) {
386             return -ROCKER_EINVAL;
387         }
388         memcpy(macaddr.a,
389                rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]),
390                sizeof(macaddr.a));
391         fp_port_set_macaddr(fp_port, &macaddr);
392     }
393 
394     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
395         mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
396         if (mode >= ROCKER_WORLD_TYPE_MAX) {
397             return -ROCKER_EINVAL;
398         }
399         /* We don't support world change. */
400         if (!fp_port_check_world(fp_port, r->worlds[mode])) {
401             return -ROCKER_EINVAL;
402         }
403     }
404 
405     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
406         learning =
407             rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]);
408         fp_port_set_learning(fp_port, learning);
409     }
410 
411     return ROCKER_OK;
412 }
413 
414 static int cmd_consume(Rocker *r, DescInfo *info)
415 {
416     char *buf = desc_get_buf(info, false);
417     RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1];
418     RockerTlv *info_tlv;
419     World *world;
420     uint16_t cmd;
421     int err;
422 
423     if (!buf) {
424         return -ROCKER_ENXIO;
425     }
426 
427     rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info));
428 
429     if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) {
430         return -ROCKER_EINVAL;
431     }
432 
433     cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]);
434     info_tlv = tlvs[ROCKER_TLV_CMD_INFO];
435 
436     /* This might be reworked to something like this:
437      * Every world will have an array of command handlers from
438      * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is
439      * up to each world to implement whatever command it want.
440      * It can reference "generic" commands as cmd_set_port_settings or
441      * cmd_get_port_settings
442      */
443 
444     switch (cmd) {
445     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
446     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
447     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
448     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
449     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
450     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
451     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
452     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
453         world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
454         err = world_do_cmd(world, info, buf, cmd, info_tlv);
455         break;
456     case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS:
457         err = cmd_get_port_settings(r, info, buf, info_tlv);
458         break;
459     case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS:
460         err = cmd_set_port_settings(r, info_tlv);
461         break;
462     default:
463         err = -ROCKER_EINVAL;
464         break;
465     }
466 
467     return err;
468 }
469 
470 static void rocker_msix_irq(Rocker *r, unsigned vector)
471 {
472     PCIDevice *dev = PCI_DEVICE(r);
473 
474     DPRINTF("MSI-X notify request for vector %d\n", vector);
475     if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) {
476         DPRINTF("incorrect vector %d\n", vector);
477         return;
478     }
479     msix_notify(dev, vector);
480 }
481 
482 int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up)
483 {
484     DescRing *ring = r->rings[ROCKER_RING_EVENT];
485     DescInfo *info = desc_ring_fetch_desc(ring);
486     RockerTlv *nest;
487     char *buf;
488     size_t tlv_size;
489     int pos;
490     int err;
491 
492     if (!info) {
493         return -ROCKER_ENOBUFS;
494     }
495 
496     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
497                rocker_tlv_total_size(0) +                 /* nest */
498                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
499                rocker_tlv_total_size(sizeof(uint8_t));    /*   link up */
500 
501     if (tlv_size > desc_buf_size(info)) {
502         err = -ROCKER_EMSGSIZE;
503         goto err_too_big;
504     }
505 
506     buf = desc_get_buf(info, false);
507     if (!buf) {
508         err = -ROCKER_ENOMEM;
509         goto err_no_mem;
510     }
511 
512     pos = 0;
513     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
514                         ROCKER_TLV_EVENT_TYPE_LINK_CHANGED);
515     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
516     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport);
517     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP,
518                       link_up ? 1 : 0);
519     rocker_tlv_nest_end(buf, &pos, nest);
520 
521     err = desc_set_buf(info, tlv_size);
522 
523 err_too_big:
524 err_no_mem:
525     if (desc_ring_post_desc(ring, err)) {
526         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
527     }
528 
529     return err;
530 }
531 
532 int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
533                                uint16_t vlan_id)
534 {
535     DescRing *ring = r->rings[ROCKER_RING_EVENT];
536     DescInfo *info;
537     FpPort *fp_port;
538     uint32_t port;
539     RockerTlv *nest;
540     char *buf;
541     size_t tlv_size;
542     int pos;
543     int err;
544 
545     if (!fp_port_from_pport(pport, &port)) {
546         return -ROCKER_EINVAL;
547     }
548     fp_port = r->fp_port[port];
549     if (!fp_port_get_learning(fp_port)) {
550         return ROCKER_OK;
551     }
552 
553     info = desc_ring_fetch_desc(ring);
554     if (!info) {
555         return -ROCKER_ENOBUFS;
556     }
557 
558     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
559                rocker_tlv_total_size(0) +                 /* nest */
560                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
561                rocker_tlv_total_size(ETH_ALEN) +          /*   mac addr */
562                rocker_tlv_total_size(sizeof(uint16_t));   /*   vlan_id */
563 
564     if (tlv_size > desc_buf_size(info)) {
565         err = -ROCKER_EMSGSIZE;
566         goto err_too_big;
567     }
568 
569     buf = desc_get_buf(info, false);
570     if (!buf) {
571         err = -ROCKER_ENOMEM;
572         goto err_no_mem;
573     }
574 
575     pos = 0;
576     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
577                         ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN);
578     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
579     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport);
580     rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr);
581     rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id);
582     rocker_tlv_nest_end(buf, &pos, nest);
583 
584     err = desc_set_buf(info, tlv_size);
585 
586 err_too_big:
587 err_no_mem:
588     if (desc_ring_post_desc(ring, err)) {
589         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
590     }
591 
592     return err;
593 }
594 
595 static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
596                                                      uint32_t pport)
597 {
598     return r->rings[(pport - 1) * 2 + 3];
599 }
600 
601 int rx_produce(World *world, uint32_t pport,
602                const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu)
603 {
604     Rocker *r = world_rocker(world);
605     PCIDevice *dev = (PCIDevice *)r;
606     DescRing *ring = rocker_get_rx_ring_by_pport(r, pport);
607     DescInfo *info = desc_ring_fetch_desc(ring);
608     char *data;
609     size_t data_size = iov_size(iov, iovcnt);
610     char *buf;
611     uint16_t rx_flags = 0;
612     uint16_t rx_csum = 0;
613     size_t tlv_size;
614     RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1];
615     hwaddr frag_addr;
616     uint16_t frag_max_len;
617     int pos;
618     int err;
619 
620     if (!info) {
621         return -ROCKER_ENOBUFS;
622     }
623 
624     buf = desc_get_buf(info, false);
625     if (!buf) {
626         err = -ROCKER_ENXIO;
627         goto out;
628     }
629     rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info));
630 
631     if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] ||
632         !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) {
633         err = -ROCKER_EINVAL;
634         goto out;
635     }
636 
637     frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]);
638     frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
639 
640     if (data_size > frag_max_len) {
641         err = -ROCKER_EMSGSIZE;
642         goto out;
643     }
644 
645     if (copy_to_cpu) {
646         rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD;
647     }
648 
649     /* XXX calc rx flags/csum */
650 
651     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
652                rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */
653                rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */
654                rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */
655                rocker_tlv_total_size(sizeof(uint16_t));  /* frag len */
656 
657     if (tlv_size > desc_buf_size(info)) {
658         err = -ROCKER_EMSGSIZE;
659         goto out;
660     }
661 
662     /* TODO:
663      * iov dma write can be optimized in similar way e1000 does it in
664      * e1000_receive_iov. But maybe if would make sense to introduce
665      * generic helper iov_dma_write.
666      */
667 
668     data = g_malloc(data_size);
669 
670     iov_to_buf(iov, iovcnt, 0, data, data_size);
671     pci_dma_write(dev, frag_addr, data, data_size);
672     g_free(data);
673 
674     pos = 0;
675     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags);
676     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum);
677     rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr);
678     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len);
679     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size);
680 
681     err = desc_set_buf(info, tlv_size);
682 
683 out:
684     if (desc_ring_post_desc(ring, err)) {
685         rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1));
686     }
687 
688     return err;
689 }
690 
691 int rocker_port_eg(Rocker *r, uint32_t pport,
692                    const struct iovec *iov, int iovcnt)
693 {
694     FpPort *fp_port;
695     uint32_t port;
696 
697     if (!fp_port_from_pport(pport, &port)) {
698         return -ROCKER_EINVAL;
699     }
700 
701     fp_port = r->fp_port[port];
702 
703     return fp_port_eg(fp_port, iov, iovcnt);
704 }
705 
706 static void rocker_test_dma_ctrl(Rocker *r, uint32_t val)
707 {
708     PCIDevice *dev = PCI_DEVICE(r);
709     char *buf;
710     int i;
711 
712     buf = g_malloc(r->test_dma_size);
713 
714     switch (val) {
715     case ROCKER_TEST_DMA_CTRL_CLEAR:
716         memset(buf, 0, r->test_dma_size);
717         break;
718     case ROCKER_TEST_DMA_CTRL_FILL:
719         memset(buf, 0x96, r->test_dma_size);
720         break;
721     case ROCKER_TEST_DMA_CTRL_INVERT:
722         pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size);
723         for (i = 0; i < r->test_dma_size; i++) {
724             buf[i] = ~buf[i];
725         }
726         break;
727     default:
728         DPRINTF("not test dma control val=0x%08x\n", val);
729         goto err_out;
730     }
731     pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size);
732 
733     rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST);
734 
735 err_out:
736     g_free(buf);
737 }
738 
739 static void rocker_reset(DeviceState *dev);
740 
741 static void rocker_control(Rocker *r, uint32_t val)
742 {
743     if (val & ROCKER_CONTROL_RESET) {
744         rocker_reset(DEVICE(r));
745     }
746 }
747 
748 static int rocker_pci_ring_count(Rocker *r)
749 {
750     /* There are:
751      * - command ring
752      * - event ring
753      * - tx and rx ring per each port
754      */
755     return 2 + (2 * r->fp_ports);
756 }
757 
758 static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr)
759 {
760     hwaddr start = ROCKER_DMA_DESC_BASE;
761     hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r));
762 
763     return addr >= start && addr < end;
764 }
765 
766 static void rocker_port_phys_enable_write(Rocker *r, uint64_t new)
767 {
768     int i;
769     bool old_enabled;
770     bool new_enabled;
771     FpPort *fp_port;
772 
773     for (i = 0; i < r->fp_ports; i++) {
774         fp_port = r->fp_port[i];
775         old_enabled = fp_port_enabled(fp_port);
776         new_enabled = (new >> (i + 1)) & 0x1;
777         if (new_enabled == old_enabled) {
778             continue;
779         }
780         if (new_enabled) {
781             fp_port_enable(r->fp_port[i]);
782         } else {
783             fp_port_disable(r->fp_port[i]);
784         }
785     }
786 }
787 
788 static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
789 {
790     Rocker *r = opaque;
791 
792     if (rocker_addr_is_desc_reg(r, addr)) {
793         unsigned index = ROCKER_RING_INDEX(addr);
794         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
795 
796         switch (offset) {
797         case ROCKER_DMA_DESC_ADDR_OFFSET:
798             r->lower32 = (uint64_t)val;
799             break;
800         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
801             desc_ring_set_base_addr(r->rings[index],
802                                     ((uint64_t)val) << 32 | r->lower32);
803             r->lower32 = 0;
804             break;
805         case ROCKER_DMA_DESC_SIZE_OFFSET:
806             desc_ring_set_size(r->rings[index], val);
807             break;
808         case ROCKER_DMA_DESC_HEAD_OFFSET:
809             if (desc_ring_set_head(r->rings[index], val)) {
810                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
811             }
812             break;
813         case ROCKER_DMA_DESC_CTRL_OFFSET:
814             desc_ring_set_ctrl(r->rings[index], val);
815             break;
816         case ROCKER_DMA_DESC_CREDITS_OFFSET:
817             if (desc_ring_ret_credits(r->rings[index], val)) {
818                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
819             }
820             break;
821         default:
822             DPRINTF("not implemented dma reg write(l) addr=0x" TARGET_FMT_plx
823                     " val=0x%08x (ring %d, addr=0x%02x)\n",
824                     addr, val, index, offset);
825             break;
826         }
827         return;
828     }
829 
830     switch (addr) {
831     case ROCKER_TEST_REG:
832         r->test_reg = val;
833         break;
834     case ROCKER_TEST_REG64:
835     case ROCKER_TEST_DMA_ADDR:
836     case ROCKER_PORT_PHYS_ENABLE:
837         r->lower32 = (uint64_t)val;
838         break;
839     case ROCKER_TEST_REG64 + 4:
840         r->test_reg64 = ((uint64_t)val) << 32 | r->lower32;
841         r->lower32 = 0;
842         break;
843     case ROCKER_TEST_IRQ:
844         rocker_msix_irq(r, val);
845         break;
846     case ROCKER_TEST_DMA_SIZE:
847         r->test_dma_size = val & 0xFFFF;
848         break;
849     case ROCKER_TEST_DMA_ADDR + 4:
850         r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
851         r->lower32 = 0;
852         break;
853     case ROCKER_TEST_DMA_CTRL:
854         rocker_test_dma_ctrl(r, val);
855         break;
856     case ROCKER_CONTROL:
857         rocker_control(r, val);
858         break;
859     case ROCKER_PORT_PHYS_ENABLE + 4:
860         rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32);
861         r->lower32 = 0;
862         break;
863     default:
864         DPRINTF("not implemented write(l) addr=0x" TARGET_FMT_plx
865                 " val=0x%08x\n", addr, val);
866         break;
867     }
868 }
869 
870 static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val)
871 {
872     Rocker *r = opaque;
873 
874     if (rocker_addr_is_desc_reg(r, addr)) {
875         unsigned index = ROCKER_RING_INDEX(addr);
876         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
877 
878         switch (offset) {
879         case ROCKER_DMA_DESC_ADDR_OFFSET:
880             desc_ring_set_base_addr(r->rings[index], val);
881             break;
882         default:
883             DPRINTF("not implemented dma reg write(q) addr=0x" TARGET_FMT_plx
884                     " val=0x" TARGET_FMT_plx " (ring %d, offset=0x%02x)\n",
885                     addr, val, index, offset);
886             break;
887         }
888         return;
889     }
890 
891     switch (addr) {
892     case ROCKER_TEST_REG64:
893         r->test_reg64 = val;
894         break;
895     case ROCKER_TEST_DMA_ADDR:
896         r->test_dma_addr = val;
897         break;
898     case ROCKER_PORT_PHYS_ENABLE:
899         rocker_port_phys_enable_write(r, val);
900         break;
901     default:
902         DPRINTF("not implemented write(q) addr=0x" TARGET_FMT_plx
903                 " val=0x" TARGET_FMT_plx "\n", addr, val);
904         break;
905     }
906 }
907 
908 #ifdef DEBUG_ROCKER
909 #define regname(reg) case (reg): return #reg
910 static const char *rocker_reg_name(void *opaque, hwaddr addr)
911 {
912     Rocker *r = opaque;
913 
914     if (rocker_addr_is_desc_reg(r, addr)) {
915         unsigned index = ROCKER_RING_INDEX(addr);
916         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
917         static char buf[100];
918         char ring_name[10];
919 
920         switch (index) {
921         case 0:
922             sprintf(ring_name, "cmd");
923             break;
924         case 1:
925             sprintf(ring_name, "event");
926             break;
927         default:
928             sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx",
929                     (index - 2) / 2);
930         }
931 
932         switch (offset) {
933         case ROCKER_DMA_DESC_ADDR_OFFSET:
934             sprintf(buf, "Ring[%s] ADDR", ring_name);
935             return buf;
936         case ROCKER_DMA_DESC_ADDR_OFFSET+4:
937             sprintf(buf, "Ring[%s] ADDR+4", ring_name);
938             return buf;
939         case ROCKER_DMA_DESC_SIZE_OFFSET:
940             sprintf(buf, "Ring[%s] SIZE", ring_name);
941             return buf;
942         case ROCKER_DMA_DESC_HEAD_OFFSET:
943             sprintf(buf, "Ring[%s] HEAD", ring_name);
944             return buf;
945         case ROCKER_DMA_DESC_TAIL_OFFSET:
946             sprintf(buf, "Ring[%s] TAIL", ring_name);
947             return buf;
948         case ROCKER_DMA_DESC_CTRL_OFFSET:
949             sprintf(buf, "Ring[%s] CTRL", ring_name);
950             return buf;
951         case ROCKER_DMA_DESC_CREDITS_OFFSET:
952             sprintf(buf, "Ring[%s] CREDITS", ring_name);
953             return buf;
954         default:
955             sprintf(buf, "Ring[%s] ???", ring_name);
956             return buf;
957         }
958     } else {
959         switch (addr) {
960             regname(ROCKER_BOGUS_REG0);
961             regname(ROCKER_BOGUS_REG1);
962             regname(ROCKER_BOGUS_REG2);
963             regname(ROCKER_BOGUS_REG3);
964             regname(ROCKER_TEST_REG);
965             regname(ROCKER_TEST_REG64);
966             regname(ROCKER_TEST_REG64+4);
967             regname(ROCKER_TEST_IRQ);
968             regname(ROCKER_TEST_DMA_ADDR);
969             regname(ROCKER_TEST_DMA_ADDR+4);
970             regname(ROCKER_TEST_DMA_SIZE);
971             regname(ROCKER_TEST_DMA_CTRL);
972             regname(ROCKER_CONTROL);
973             regname(ROCKER_PORT_PHYS_COUNT);
974             regname(ROCKER_PORT_PHYS_LINK_STATUS);
975             regname(ROCKER_PORT_PHYS_LINK_STATUS+4);
976             regname(ROCKER_PORT_PHYS_ENABLE);
977             regname(ROCKER_PORT_PHYS_ENABLE+4);
978             regname(ROCKER_SWITCH_ID);
979             regname(ROCKER_SWITCH_ID+4);
980         }
981     }
982     return "???";
983 }
984 #else
985 static const char *rocker_reg_name(void *opaque, hwaddr addr)
986 {
987     return NULL;
988 }
989 #endif
990 
991 static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val,
992                               unsigned size)
993 {
994     DPRINTF("Write %s addr " TARGET_FMT_plx
995             ", size %u, val " TARGET_FMT_plx "\n",
996             rocker_reg_name(opaque, addr), addr, size, val);
997 
998     switch (size) {
999     case 4:
1000         rocker_io_writel(opaque, addr, val);
1001         break;
1002     case 8:
1003         rocker_io_writeq(opaque, addr, val);
1004         break;
1005     }
1006 }
1007 
1008 static uint64_t rocker_port_phys_link_status(Rocker *r)
1009 {
1010     int i;
1011     uint64_t status = 0;
1012 
1013     for (i = 0; i < r->fp_ports; i++) {
1014         FpPort *port = r->fp_port[i];
1015 
1016         if (fp_port_get_link_up(port)) {
1017             status |= 1 << (i + 1);
1018         }
1019     }
1020     return status;
1021 }
1022 
1023 static uint64_t rocker_port_phys_enable_read(Rocker *r)
1024 {
1025     int i;
1026     uint64_t ret = 0;
1027 
1028     for (i = 0; i < r->fp_ports; i++) {
1029         FpPort *port = r->fp_port[i];
1030 
1031         if (fp_port_enabled(port)) {
1032             ret |= 1 << (i + 1);
1033         }
1034     }
1035     return ret;
1036 }
1037 
1038 static uint32_t rocker_io_readl(void *opaque, hwaddr addr)
1039 {
1040     Rocker *r = opaque;
1041     uint32_t ret;
1042 
1043     if (rocker_addr_is_desc_reg(r, addr)) {
1044         unsigned index = ROCKER_RING_INDEX(addr);
1045         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1046 
1047         switch (offset) {
1048         case ROCKER_DMA_DESC_ADDR_OFFSET:
1049             ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]);
1050             break;
1051         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
1052             ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32);
1053             break;
1054         case ROCKER_DMA_DESC_SIZE_OFFSET:
1055             ret = desc_ring_get_size(r->rings[index]);
1056             break;
1057         case ROCKER_DMA_DESC_HEAD_OFFSET:
1058             ret = desc_ring_get_head(r->rings[index]);
1059             break;
1060         case ROCKER_DMA_DESC_TAIL_OFFSET:
1061             ret = desc_ring_get_tail(r->rings[index]);
1062             break;
1063         case ROCKER_DMA_DESC_CREDITS_OFFSET:
1064             ret = desc_ring_get_credits(r->rings[index]);
1065             break;
1066         default:
1067             DPRINTF("not implemented dma reg read(l) addr=0x" TARGET_FMT_plx
1068                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1069             ret = 0;
1070             break;
1071         }
1072         return ret;
1073     }
1074 
1075     switch (addr) {
1076     case ROCKER_BOGUS_REG0:
1077     case ROCKER_BOGUS_REG1:
1078     case ROCKER_BOGUS_REG2:
1079     case ROCKER_BOGUS_REG3:
1080         ret = 0xDEADBABE;
1081         break;
1082     case ROCKER_TEST_REG:
1083         ret = r->test_reg * 2;
1084         break;
1085     case ROCKER_TEST_REG64:
1086         ret = (uint32_t)(r->test_reg64 * 2);
1087         break;
1088     case ROCKER_TEST_REG64 + 4:
1089         ret = (uint32_t)((r->test_reg64 * 2) >> 32);
1090         break;
1091     case ROCKER_TEST_DMA_SIZE:
1092         ret = r->test_dma_size;
1093         break;
1094     case ROCKER_TEST_DMA_ADDR:
1095         ret = (uint32_t)r->test_dma_addr;
1096         break;
1097     case ROCKER_TEST_DMA_ADDR + 4:
1098         ret = (uint32_t)(r->test_dma_addr >> 32);
1099         break;
1100     case ROCKER_PORT_PHYS_COUNT:
1101         ret = r->fp_ports;
1102         break;
1103     case ROCKER_PORT_PHYS_LINK_STATUS:
1104         ret = (uint32_t)rocker_port_phys_link_status(r);
1105         break;
1106     case ROCKER_PORT_PHYS_LINK_STATUS + 4:
1107         ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32);
1108         break;
1109     case ROCKER_PORT_PHYS_ENABLE:
1110         ret = (uint32_t)rocker_port_phys_enable_read(r);
1111         break;
1112     case ROCKER_PORT_PHYS_ENABLE + 4:
1113         ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32);
1114         break;
1115     case ROCKER_SWITCH_ID:
1116         ret = (uint32_t)r->switch_id;
1117         break;
1118     case ROCKER_SWITCH_ID + 4:
1119         ret = (uint32_t)(r->switch_id >> 32);
1120         break;
1121     default:
1122         DPRINTF("not implemented read(l) addr=0x" TARGET_FMT_plx "\n", addr);
1123         ret = 0;
1124         break;
1125     }
1126     return ret;
1127 }
1128 
1129 static uint64_t rocker_io_readq(void *opaque, hwaddr addr)
1130 {
1131     Rocker *r = opaque;
1132     uint64_t ret;
1133 
1134     if (rocker_addr_is_desc_reg(r, addr)) {
1135         unsigned index = ROCKER_RING_INDEX(addr);
1136         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1137 
1138         switch (addr & ROCKER_DMA_DESC_MASK) {
1139         case ROCKER_DMA_DESC_ADDR_OFFSET:
1140             ret = desc_ring_get_base_addr(r->rings[index]);
1141             break;
1142         default:
1143             DPRINTF("not implemented dma reg read(q) addr=0x" TARGET_FMT_plx
1144                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1145             ret = 0;
1146             break;
1147         }
1148         return ret;
1149     }
1150 
1151     switch (addr) {
1152     case ROCKER_BOGUS_REG0:
1153     case ROCKER_BOGUS_REG2:
1154         ret = 0xDEADBABEDEADBABEULL;
1155         break;
1156     case ROCKER_TEST_REG64:
1157         ret = r->test_reg64 * 2;
1158         break;
1159     case ROCKER_TEST_DMA_ADDR:
1160         ret = r->test_dma_addr;
1161         break;
1162     case ROCKER_PORT_PHYS_LINK_STATUS:
1163         ret = rocker_port_phys_link_status(r);
1164         break;
1165     case ROCKER_PORT_PHYS_ENABLE:
1166         ret = rocker_port_phys_enable_read(r);
1167         break;
1168     case ROCKER_SWITCH_ID:
1169         ret = r->switch_id;
1170         break;
1171     default:
1172         DPRINTF("not implemented read(q) addr=0x" TARGET_FMT_plx "\n", addr);
1173         ret = 0;
1174         break;
1175     }
1176     return ret;
1177 }
1178 
1179 static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size)
1180 {
1181     DPRINTF("Read %s addr " TARGET_FMT_plx ", size %u\n",
1182             rocker_reg_name(opaque, addr), addr, size);
1183 
1184     switch (size) {
1185     case 4:
1186         return rocker_io_readl(opaque, addr);
1187     case 8:
1188         return rocker_io_readq(opaque, addr);
1189     }
1190 
1191     return -1;
1192 }
1193 
1194 static const MemoryRegionOps rocker_mmio_ops = {
1195     .read = rocker_mmio_read,
1196     .write = rocker_mmio_write,
1197     .endianness = DEVICE_LITTLE_ENDIAN,
1198     .valid = {
1199         .min_access_size = 4,
1200         .max_access_size = 8,
1201     },
1202     .impl = {
1203         .min_access_size = 4,
1204         .max_access_size = 8,
1205     },
1206 };
1207 
1208 static void rocker_msix_vectors_unuse(Rocker *r,
1209                                       unsigned int num_vectors)
1210 {
1211     PCIDevice *dev = PCI_DEVICE(r);
1212     int i;
1213 
1214     for (i = 0; i < num_vectors; i++) {
1215         msix_vector_unuse(dev, i);
1216     }
1217 }
1218 
1219 static int rocker_msix_vectors_use(Rocker *r,
1220                                    unsigned int num_vectors)
1221 {
1222     PCIDevice *dev = PCI_DEVICE(r);
1223     int err;
1224     int i;
1225 
1226     for (i = 0; i < num_vectors; i++) {
1227         err = msix_vector_use(dev, i);
1228         if (err) {
1229             goto rollback;
1230         }
1231     }
1232     return 0;
1233 
1234 rollback:
1235     rocker_msix_vectors_unuse(r, i);
1236     return err;
1237 }
1238 
1239 static int rocker_msix_init(Rocker *r, Error **errp)
1240 {
1241     PCIDevice *dev = PCI_DEVICE(r);
1242     int err;
1243 
1244     err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
1245                     &r->msix_bar,
1246                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
1247                     &r->msix_bar,
1248                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
1249                     0, errp);
1250     if (err) {
1251         return err;
1252     }
1253 
1254     err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1255     if (err) {
1256         goto err_msix_vectors_use;
1257     }
1258 
1259     return 0;
1260 
1261 err_msix_vectors_use:
1262     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1263     return err;
1264 }
1265 
1266 static void rocker_msix_uninit(Rocker *r)
1267 {
1268     PCIDevice *dev = PCI_DEVICE(r);
1269 
1270     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1271     rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1272 }
1273 
1274 static World *rocker_world_type_by_name(Rocker *r, const char *name)
1275 {
1276     int i;
1277 
1278     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1279         if (strcmp(name, world_name(r->worlds[i])) == 0) {
1280             return r->worlds[i];
1281 	}
1282     }
1283     return NULL;
1284 }
1285 
1286 static void pci_rocker_realize(PCIDevice *dev, Error **errp)
1287 {
1288     Rocker *r = ROCKER(dev);
1289     const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
1290     const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } };
1291     static int sw_index;
1292     int i, err = 0;
1293 
1294     /* allocate worlds */
1295 
1296     r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
1297 
1298     if (!r->world_name) {
1299         r->world_name = g_strdup(world_name(r->worlds[ROCKER_WORLD_TYPE_OF_DPA]));
1300     }
1301 
1302     r->world_dflt = rocker_world_type_by_name(r, r->world_name);
1303     if (!r->world_dflt) {
1304         error_setg(errp,
1305                 "invalid argument requested world %s does not exist",
1306                 r->world_name);
1307         goto err_world_type_by_name;
1308     }
1309 
1310     /* set up memory-mapped region at BAR0 */
1311 
1312     memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r,
1313                           "rocker-mmio", ROCKER_PCI_BAR0_SIZE);
1314     pci_register_bar(dev, ROCKER_PCI_BAR0_IDX,
1315                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio);
1316 
1317     /* set up memory-mapped region for MSI-X */
1318 
1319     memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar",
1320                        ROCKER_PCI_MSIX_BAR_SIZE);
1321     pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX,
1322                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar);
1323 
1324     /* MSI-X init */
1325 
1326     err = rocker_msix_init(r, errp);
1327     if (err) {
1328         goto err_msix_init;
1329     }
1330 
1331     /* validate switch properties */
1332 
1333     if (!r->name) {
1334         r->name = g_strdup(TYPE_ROCKER);
1335     }
1336 
1337     if (rocker_find(r->name)) {
1338         error_setg(errp, "%s already exists", r->name);
1339         goto err_duplicate;
1340     }
1341 
1342     /* Rocker name is passed in port name requests to OS with the intention
1343      * that the name is used in interface names. Limit the length of the
1344      * rocker name to avoid naming problems in the OS. Also, adding the
1345      * port number as p# and unganged breakout b#, where # is at most 2
1346      * digits, so leave room for it too (-1 for string terminator, -3 for
1347      * p# and -3 for b#)
1348      */
1349 #define ROCKER_IFNAMSIZ 16
1350 #define MAX_ROCKER_NAME_LEN  (ROCKER_IFNAMSIZ - 1 - 3 - 3)
1351     if (strlen(r->name) > MAX_ROCKER_NAME_LEN) {
1352         error_setg(errp,
1353                 "name too long; please shorten to at most %d chars",
1354                 MAX_ROCKER_NAME_LEN);
1355         goto err_name_too_long;
1356     }
1357 
1358     if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) {
1359         memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt));
1360         r->fp_start_macaddr.a[4] += (sw_index++);
1361     }
1362 
1363     if (!r->switch_id) {
1364         memcpy(&r->switch_id, &r->fp_start_macaddr,
1365                sizeof(r->fp_start_macaddr));
1366     }
1367 
1368     if (r->fp_ports > ROCKER_FP_PORTS_MAX) {
1369         r->fp_ports = ROCKER_FP_PORTS_MAX;
1370     }
1371 
1372     r->rings = g_new(DescRing *, rocker_pci_ring_count(r));
1373 
1374     /* Rings are ordered like this:
1375      * - command ring
1376      * - event ring
1377      * - port0 tx ring
1378      * - port0 rx ring
1379      * - port1 tx ring
1380      * - port1 rx ring
1381      * .....
1382      */
1383 
1384     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1385         DescRing *ring = desc_ring_alloc(r, i);
1386 
1387         if (i == ROCKER_RING_CMD) {
1388             desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD);
1389         } else if (i == ROCKER_RING_EVENT) {
1390             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT);
1391         } else if (i % 2 == 0) {
1392             desc_ring_set_consume(ring, tx_consume,
1393                                   ROCKER_MSIX_VEC_TX((i - 2) / 2));
1394         } else if (i % 2 == 1) {
1395             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2));
1396         }
1397 
1398         r->rings[i] = ring;
1399     }
1400 
1401     for (i = 0; i < r->fp_ports; i++) {
1402         FpPort *port =
1403             fp_port_alloc(r, r->name, &r->fp_start_macaddr,
1404                           i, &r->fp_ports_peers[i]);
1405 
1406         r->fp_port[i] = port;
1407         fp_port_set_world(port, r->world_dflt);
1408     }
1409 
1410     QLIST_INSERT_HEAD(&rockers, r, next);
1411 
1412     return;
1413 
1414 err_name_too_long:
1415 err_duplicate:
1416     rocker_msix_uninit(r);
1417 err_msix_init:
1418     object_unparent(OBJECT(&r->msix_bar));
1419     object_unparent(OBJECT(&r->mmio));
1420 err_world_type_by_name:
1421     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1422         if (r->worlds[i]) {
1423             world_free(r->worlds[i]);
1424         }
1425     }
1426 }
1427 
1428 static void pci_rocker_uninit(PCIDevice *dev)
1429 {
1430     Rocker *r = ROCKER(dev);
1431     int i;
1432 
1433     QLIST_REMOVE(r, next);
1434 
1435     for (i = 0; i < r->fp_ports; i++) {
1436         FpPort *port = r->fp_port[i];
1437 
1438         fp_port_free(port);
1439         r->fp_port[i] = NULL;
1440     }
1441 
1442     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1443         if (r->rings[i]) {
1444             desc_ring_free(r->rings[i]);
1445         }
1446     }
1447     g_free(r->rings);
1448 
1449     rocker_msix_uninit(r);
1450     object_unparent(OBJECT(&r->msix_bar));
1451     object_unparent(OBJECT(&r->mmio));
1452 
1453     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1454         if (r->worlds[i]) {
1455             world_free(r->worlds[i]);
1456         }
1457     }
1458     g_free(r->fp_ports_peers);
1459 }
1460 
1461 static void rocker_reset(DeviceState *dev)
1462 {
1463     Rocker *r = ROCKER(dev);
1464     int i;
1465 
1466     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1467         if (r->worlds[i]) {
1468             world_reset(r->worlds[i]);
1469         }
1470     }
1471     for (i = 0; i < r->fp_ports; i++) {
1472         fp_port_reset(r->fp_port[i]);
1473         fp_port_set_world(r->fp_port[i], r->world_dflt);
1474     }
1475 
1476     r->test_reg = 0;
1477     r->test_reg64 = 0;
1478     r->test_dma_addr = 0;
1479     r->test_dma_size = 0;
1480 
1481     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1482         desc_ring_reset(r->rings[i]);
1483     }
1484 
1485     DPRINTF("Reset done\n");
1486 }
1487 
1488 static Property rocker_properties[] = {
1489     DEFINE_PROP_STRING("name", Rocker, name),
1490     DEFINE_PROP_STRING("world", Rocker, world_name),
1491     DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
1492                         fp_start_macaddr),
1493     DEFINE_PROP_UINT64("switch_id", Rocker,
1494                        switch_id, 0),
1495     DEFINE_PROP_ARRAY("ports", Rocker, fp_ports,
1496                       fp_ports_peers, qdev_prop_netdev, NICPeers),
1497     DEFINE_PROP_END_OF_LIST(),
1498 };
1499 
1500 static const VMStateDescription rocker_vmsd = {
1501     .name = TYPE_ROCKER,
1502     .unmigratable = 1,
1503 };
1504 
1505 static void rocker_class_init(ObjectClass *klass, void *data)
1506 {
1507     DeviceClass *dc = DEVICE_CLASS(klass);
1508     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1509 
1510     k->realize = pci_rocker_realize;
1511     k->exit = pci_rocker_uninit;
1512     k->vendor_id = PCI_VENDOR_ID_REDHAT;
1513     k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER;
1514     k->revision = ROCKER_PCI_REVISION;
1515     k->class_id = PCI_CLASS_NETWORK_OTHER;
1516     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1517     dc->desc = "Rocker Switch";
1518     dc->reset = rocker_reset;
1519     dc->props = rocker_properties;
1520     dc->vmsd = &rocker_vmsd;
1521 }
1522 
1523 static const TypeInfo rocker_info = {
1524     .name          = TYPE_ROCKER,
1525     .parent        = TYPE_PCI_DEVICE,
1526     .instance_size = sizeof(Rocker),
1527     .class_init    = rocker_class_init,
1528     .interfaces = (InterfaceInfo[]) {
1529         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1530         { },
1531     },
1532 };
1533 
1534 static void rocker_register_types(void)
1535 {
1536     type_register_static(&rocker_info);
1537 }
1538 
1539 type_init(rocker_register_types)
1540