xref: /openbmc/qemu/hw/net/rocker/rocker.c (revision 773495364ffbfc6a4d1e13e24e932f96409ba1d3)
1 /*
2  * QEMU rocker switch emulation - PCI device
3  *
4  * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
5  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  */
17 
18 #include "hw/hw.h"
19 #include "hw/pci/pci.h"
20 #include "hw/pci/msix.h"
21 #include "net/net.h"
22 #include "net/eth.h"
23 #include "qemu/iov.h"
24 #include "qemu/bitops.h"
25 #include "qmp-commands.h"
26 
27 #include "rocker.h"
28 #include "rocker_hw.h"
29 #include "rocker_fp.h"
30 #include "rocker_desc.h"
31 #include "rocker_tlv.h"
32 #include "rocker_world.h"
33 #include "rocker_of_dpa.h"
34 
35 struct rocker {
36     /* private */
37     PCIDevice parent_obj;
38     /* public */
39 
40     MemoryRegion mmio;
41     MemoryRegion msix_bar;
42 
43     /* switch configuration */
44     char *name;                  /* switch name */
45     uint32_t fp_ports;           /* front-panel port count */
46     NICPeers *fp_ports_peers;
47     MACAddr fp_start_macaddr;    /* front-panel port 0 mac addr */
48     uint64_t switch_id;          /* switch id */
49 
50     /* front-panel ports */
51     FpPort *fp_port[ROCKER_FP_PORTS_MAX];
52 
53     /* register backings */
54     uint32_t test_reg;
55     uint64_t test_reg64;
56     dma_addr_t test_dma_addr;
57     uint32_t test_dma_size;
58     uint64_t lower32;            /* lower 32-bit val in 2-part 64-bit access */
59 
60     /* desc rings */
61     DescRing **rings;
62 
63     /* switch worlds */
64     World *worlds[ROCKER_WORLD_TYPE_MAX];
65     World *world_dflt;
66 
67     QLIST_ENTRY(rocker) next;
68 };
69 
70 #define ROCKER "rocker"
71 
72 #define to_rocker(obj) \
73     OBJECT_CHECK(Rocker, (obj), ROCKER)
74 
75 static QLIST_HEAD(, rocker) rockers;
76 
77 Rocker *rocker_find(const char *name)
78 {
79     Rocker *r;
80 
81     QLIST_FOREACH(r, &rockers, next)
82         if (strcmp(r->name, name) == 0) {
83             return r;
84         }
85 
86     return NULL;
87 }
88 
89 World *rocker_get_world(Rocker *r, enum rocker_world_type type)
90 {
91     if (type < ROCKER_WORLD_TYPE_MAX) {
92         return r->worlds[type];
93     }
94     return NULL;
95 }
96 
97 uint32_t rocker_fp_ports(Rocker *r)
98 {
99     return r->fp_ports;
100 }
101 
102 static uint32_t rocker_get_pport_by_tx_ring(Rocker *r,
103                                             DescRing *ring)
104 {
105     return (desc_ring_index(ring) - 2) / 2 + 1;
106 }
107 
108 static int tx_consume(Rocker *r, DescInfo *info)
109 {
110     PCIDevice *dev = PCI_DEVICE(r);
111     char *buf = desc_get_buf(info, true);
112     RockerTlv *tlv_frag;
113     RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1];
114     struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, };
115     uint32_t pport;
116     uint32_t port;
117     uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE;
118     uint16_t tx_l3_csum_off = 0;
119     uint16_t tx_tso_mss = 0;
120     uint16_t tx_tso_hdr_len = 0;
121     int iovcnt = 0;
122     int err = ROCKER_OK;
123     int rem;
124     int i;
125 
126     if (!buf) {
127         return -ROCKER_ENXIO;
128     }
129 
130     rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info));
131 
132     if (!tlvs[ROCKER_TLV_TX_FRAGS]) {
133         return -ROCKER_EINVAL;
134     }
135 
136     pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info));
137     if (!fp_port_from_pport(pport, &port)) {
138         return -ROCKER_EINVAL;
139     }
140 
141     if (tlvs[ROCKER_TLV_TX_OFFLOAD]) {
142         tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]);
143     }
144 
145     switch (tx_offload) {
146     case ROCKER_TX_OFFLOAD_L3_CSUM:
147         if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
148             return -ROCKER_EINVAL;
149         }
150     case ROCKER_TX_OFFLOAD_TSO:
151         if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
152             !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
153             return -ROCKER_EINVAL;
154         }
155     }
156 
157     if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
158         tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]);
159     }
160 
161     if (tlvs[ROCKER_TLV_TX_TSO_MSS]) {
162         tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]);
163     }
164 
165     if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
166         tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]);
167     }
168 
169     rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) {
170         hwaddr frag_addr;
171         uint16_t frag_len;
172 
173         if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) {
174             err = -ROCKER_EINVAL;
175             goto err_bad_attr;
176         }
177 
178         rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag);
179 
180         if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
181             !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) {
182             err = -ROCKER_EINVAL;
183             goto err_bad_attr;
184         }
185 
186         frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
187         frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
188 
189         iov[iovcnt].iov_len = frag_len;
190         iov[iovcnt].iov_base = g_malloc(frag_len);
191         if (!iov[iovcnt].iov_base) {
192             err = -ROCKER_ENOMEM;
193             goto err_no_mem;
194         }
195 
196         if (pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base,
197                      iov[iovcnt].iov_len)) {
198             err = -ROCKER_ENXIO;
199             goto err_bad_io;
200         }
201 
202         if (++iovcnt > ROCKER_TX_FRAGS_MAX) {
203             goto err_too_many_frags;
204         }
205     }
206 
207     if (iovcnt) {
208         /* XXX perform Tx offloads */
209         /* XXX   silence compiler for now */
210         tx_l3_csum_off += tx_tso_mss = tx_tso_hdr_len = 0;
211     }
212 
213     err = fp_port_eg(r->fp_port[port], iov, iovcnt);
214 
215 err_too_many_frags:
216 err_bad_io:
217 err_no_mem:
218 err_bad_attr:
219     for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) {
220         if (iov[i].iov_base) {
221             g_free(iov[i].iov_base);
222         }
223     }
224 
225     return err;
226 }
227 
228 static int cmd_get_port_settings(Rocker *r,
229                                  DescInfo *info, char *buf,
230                                  RockerTlv *cmd_info_tlv)
231 {
232     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
233     RockerTlv *nest;
234     FpPort *fp_port;
235     uint32_t pport;
236     uint32_t port;
237     uint32_t speed;
238     uint8_t duplex;
239     uint8_t autoneg;
240     uint8_t learning;
241     char *phys_name;
242     MACAddr macaddr;
243     enum rocker_world_type mode;
244     size_t tlv_size;
245     int pos;
246     int err;
247 
248     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
249                             cmd_info_tlv);
250 
251     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
252         return -ROCKER_EINVAL;
253     }
254 
255     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
256     if (!fp_port_from_pport(pport, &port)) {
257         return -ROCKER_EINVAL;
258     }
259     fp_port = r->fp_port[port];
260 
261     err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg);
262     if (err) {
263         return err;
264     }
265 
266     fp_port_get_macaddr(fp_port, &macaddr);
267     mode = world_type(fp_port_get_world(fp_port));
268     learning = fp_port_get_learning(fp_port);
269     phys_name = fp_port_get_name(fp_port);
270 
271     tlv_size = rocker_tlv_total_size(0) +                 /* nest */
272                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
273                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   speed */
274                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   duplex */
275                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   autoneg */
276                rocker_tlv_total_size(sizeof(macaddr.a)) + /*   macaddr */
277                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   mode */
278                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   learning */
279                rocker_tlv_total_size(strlen(phys_name));
280 
281     if (tlv_size > desc_buf_size(info)) {
282         return -ROCKER_EMSGSIZE;
283     }
284 
285     pos = 0;
286     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO);
287     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport);
288     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed);
289     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex);
290     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg);
291     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
292                    sizeof(macaddr.a), macaddr.a);
293     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode);
294     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,
295                       learning);
296     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME,
297                    strlen(phys_name), phys_name);
298     rocker_tlv_nest_end(buf, &pos, nest);
299 
300     return desc_set_buf(info, tlv_size);
301 }
302 
303 static int cmd_set_port_settings(Rocker *r,
304                                  RockerTlv *cmd_info_tlv)
305 {
306     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
307     FpPort *fp_port;
308     uint32_t pport;
309     uint32_t port;
310     uint32_t speed;
311     uint8_t duplex;
312     uint8_t autoneg;
313     uint8_t learning;
314     MACAddr macaddr;
315     enum rocker_world_type mode;
316     int err;
317 
318     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
319                             cmd_info_tlv);
320 
321     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
322         return -ROCKER_EINVAL;
323     }
324 
325     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
326     if (!fp_port_from_pport(pport, &port)) {
327         return -ROCKER_EINVAL;
328     }
329     fp_port = r->fp_port[port];
330 
331     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] &&
332         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] &&
333         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) {
334 
335         speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
336         duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
337         autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
338 
339         err = fp_port_set_settings(fp_port, speed, duplex, autoneg);
340         if (err) {
341             return err;
342         }
343     }
344 
345     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) {
346         if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) !=
347             sizeof(macaddr.a)) {
348             return -ROCKER_EINVAL;
349         }
350         memcpy(macaddr.a,
351                rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]),
352                sizeof(macaddr.a));
353         fp_port_set_macaddr(fp_port, &macaddr);
354     }
355 
356     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
357         mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
358         fp_port_set_world(fp_port, r->worlds[mode]);
359     }
360 
361     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
362         learning =
363             rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]);
364         fp_port_set_learning(fp_port, learning);
365     }
366 
367     return ROCKER_OK;
368 }
369 
370 static int cmd_consume(Rocker *r, DescInfo *info)
371 {
372     char *buf = desc_get_buf(info, false);
373     RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1];
374     RockerTlv *info_tlv;
375     World *world;
376     uint16_t cmd;
377     int err;
378 
379     if (!buf) {
380         return -ROCKER_ENXIO;
381     }
382 
383     rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info));
384 
385     if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) {
386         return -ROCKER_EINVAL;
387     }
388 
389     cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]);
390     info_tlv = tlvs[ROCKER_TLV_CMD_INFO];
391 
392     /* This might be reworked to something like this:
393      * Every world will have an array of command handlers from
394      * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is
395      * up to each world to implement whatever command it want.
396      * It can reference "generic" commands as cmd_set_port_settings or
397      * cmd_get_port_settings
398      */
399 
400     switch (cmd) {
401     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
402     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
403     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
404     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
405     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
406     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
407     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
408     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
409         world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
410         err = world_do_cmd(world, info, buf, cmd, info_tlv);
411         break;
412     case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS:
413         err = cmd_get_port_settings(r, info, buf, info_tlv);
414         break;
415     case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS:
416         err = cmd_set_port_settings(r, info_tlv);
417         break;
418     default:
419         err = -ROCKER_EINVAL;
420         break;
421     }
422 
423     return err;
424 }
425 
426 static void rocker_msix_irq(Rocker *r, unsigned vector)
427 {
428     PCIDevice *dev = PCI_DEVICE(r);
429 
430     DPRINTF("MSI-X notify request for vector %d\n", vector);
431     if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) {
432         DPRINTF("incorrect vector %d\n", vector);
433         return;
434     }
435     msix_notify(dev, vector);
436 }
437 
438 int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up)
439 {
440     DescRing *ring = r->rings[ROCKER_RING_EVENT];
441     DescInfo *info = desc_ring_fetch_desc(ring);
442     RockerTlv *nest;
443     char *buf;
444     size_t tlv_size;
445     int pos;
446     int err;
447 
448     if (!info) {
449         return -ROCKER_ENOBUFS;
450     }
451 
452     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
453                rocker_tlv_total_size(0) +                 /* nest */
454                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
455                rocker_tlv_total_size(sizeof(uint8_t));    /*   link up */
456 
457     if (tlv_size > desc_buf_size(info)) {
458         err = -ROCKER_EMSGSIZE;
459         goto err_too_big;
460     }
461 
462     buf = desc_get_buf(info, false);
463     if (!buf) {
464         err = -ROCKER_ENOMEM;
465         goto err_no_mem;
466     }
467 
468     pos = 0;
469     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
470                         ROCKER_TLV_EVENT_TYPE_LINK_CHANGED);
471     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
472     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport);
473     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP,
474                       link_up ? 1 : 0);
475     rocker_tlv_nest_end(buf, &pos, nest);
476 
477     err = desc_set_buf(info, tlv_size);
478 
479 err_too_big:
480 err_no_mem:
481     if (desc_ring_post_desc(ring, err)) {
482         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
483     }
484 
485     return err;
486 }
487 
488 int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
489                                uint16_t vlan_id)
490 {
491     DescRing *ring = r->rings[ROCKER_RING_EVENT];
492     DescInfo *info;
493     FpPort *fp_port;
494     uint32_t port;
495     RockerTlv *nest;
496     char *buf;
497     size_t tlv_size;
498     int pos;
499     int err;
500 
501     if (!fp_port_from_pport(pport, &port)) {
502         return -ROCKER_EINVAL;
503     }
504     fp_port = r->fp_port[port];
505     if (!fp_port_get_learning(fp_port)) {
506         return ROCKER_OK;
507     }
508 
509     info = desc_ring_fetch_desc(ring);
510     if (!info) {
511         return -ROCKER_ENOBUFS;
512     }
513 
514     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
515                rocker_tlv_total_size(0) +                 /* nest */
516                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
517                rocker_tlv_total_size(ETH_ALEN) +          /*   mac addr */
518                rocker_tlv_total_size(sizeof(uint16_t));   /*   vlan_id */
519 
520     if (tlv_size > desc_buf_size(info)) {
521         err = -ROCKER_EMSGSIZE;
522         goto err_too_big;
523     }
524 
525     buf = desc_get_buf(info, false);
526     if (!buf) {
527         err = -ROCKER_ENOMEM;
528         goto err_no_mem;
529     }
530 
531     pos = 0;
532     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
533                         ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN);
534     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
535     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport);
536     rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr);
537     rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id);
538     rocker_tlv_nest_end(buf, &pos, nest);
539 
540     err = desc_set_buf(info, tlv_size);
541 
542 err_too_big:
543 err_no_mem:
544     if (desc_ring_post_desc(ring, err)) {
545         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
546     }
547 
548     return err;
549 }
550 
551 static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
552                                                      uint32_t pport)
553 {
554     return r->rings[(pport - 1) * 2 + 3];
555 }
556 
557 int rx_produce(World *world, uint32_t pport,
558                const struct iovec *iov, int iovcnt)
559 {
560     Rocker *r = world_rocker(world);
561     PCIDevice *dev = (PCIDevice *)r;
562     DescRing *ring = rocker_get_rx_ring_by_pport(r, pport);
563     DescInfo *info = desc_ring_fetch_desc(ring);
564     char *data;
565     size_t data_size = iov_size(iov, iovcnt);
566     char *buf;
567     uint16_t rx_flags = 0;
568     uint16_t rx_csum = 0;
569     size_t tlv_size;
570     RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1];
571     hwaddr frag_addr;
572     uint16_t frag_max_len;
573     int pos;
574     int err;
575 
576     if (!info) {
577         return -ROCKER_ENOBUFS;
578     }
579 
580     buf = desc_get_buf(info, false);
581     if (!buf) {
582         err = -ROCKER_ENXIO;
583         goto out;
584     }
585     rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info));
586 
587     if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] ||
588         !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) {
589         err = -ROCKER_EINVAL;
590         goto out;
591     }
592 
593     frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]);
594     frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
595 
596     if (data_size > frag_max_len) {
597         err = -ROCKER_EMSGSIZE;
598         goto out;
599     }
600 
601     /* XXX calc rx flags/csum */
602 
603     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
604                rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */
605                rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */
606                rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */
607                rocker_tlv_total_size(sizeof(uint16_t));  /* frag len */
608 
609     if (tlv_size > desc_buf_size(info)) {
610         err = -ROCKER_EMSGSIZE;
611         goto out;
612     }
613 
614     /* TODO:
615      * iov dma write can be optimized in similar way e1000 does it in
616      * e1000_receive_iov. But maybe if would make sense to introduce
617      * generic helper iov_dma_write.
618      */
619 
620     data = g_malloc(data_size);
621     if (!data) {
622         err = -ROCKER_ENOMEM;
623         goto out;
624     }
625     iov_to_buf(iov, iovcnt, 0, data, data_size);
626     pci_dma_write(dev, frag_addr, data, data_size);
627     g_free(data);
628 
629     pos = 0;
630     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags);
631     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum);
632     rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr);
633     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len);
634     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size);
635 
636     err = desc_set_buf(info, tlv_size);
637 
638 out:
639     if (desc_ring_post_desc(ring, err)) {
640         rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1));
641     }
642 
643     return err;
644 }
645 
646 int rocker_port_eg(Rocker *r, uint32_t pport,
647                    const struct iovec *iov, int iovcnt)
648 {
649     FpPort *fp_port;
650     uint32_t port;
651 
652     if (!fp_port_from_pport(pport, &port)) {
653         return -ROCKER_EINVAL;
654     }
655 
656     fp_port = r->fp_port[port];
657 
658     return fp_port_eg(fp_port, iov, iovcnt);
659 }
660 
661 static void rocker_test_dma_ctrl(Rocker *r, uint32_t val)
662 {
663     PCIDevice *dev = PCI_DEVICE(r);
664     char *buf;
665     int i;
666 
667     buf = g_malloc(r->test_dma_size);
668 
669     if (!buf) {
670         DPRINTF("test dma buffer alloc failed");
671         return;
672     }
673 
674     switch (val) {
675     case ROCKER_TEST_DMA_CTRL_CLEAR:
676         memset(buf, 0, r->test_dma_size);
677         break;
678     case ROCKER_TEST_DMA_CTRL_FILL:
679         memset(buf, 0x96, r->test_dma_size);
680         break;
681     case ROCKER_TEST_DMA_CTRL_INVERT:
682         pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size);
683         for (i = 0; i < r->test_dma_size; i++) {
684             buf[i] = ~buf[i];
685         }
686         break;
687     default:
688         DPRINTF("not test dma control val=0x%08x\n", val);
689         goto err_out;
690     }
691     pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size);
692 
693     rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST);
694 
695 err_out:
696     g_free(buf);
697 }
698 
699 static void rocker_reset(DeviceState *dev);
700 
701 static void rocker_control(Rocker *r, uint32_t val)
702 {
703     if (val & ROCKER_CONTROL_RESET) {
704         rocker_reset(DEVICE(r));
705     }
706 }
707 
708 static int rocker_pci_ring_count(Rocker *r)
709 {
710     /* There are:
711      * - command ring
712      * - event ring
713      * - tx and rx ring per each port
714      */
715     return 2 + (2 * r->fp_ports);
716 }
717 
718 static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr)
719 {
720     hwaddr start = ROCKER_DMA_DESC_BASE;
721     hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r));
722 
723     return addr >= start && addr < end;
724 }
725 
726 static void rocker_port_phys_enable_write(Rocker *r, uint64_t new)
727 {
728     int i;
729     bool old_enabled;
730     bool new_enabled;
731     FpPort *fp_port;
732 
733     for (i = 0; i < r->fp_ports; i++) {
734         fp_port = r->fp_port[i];
735         old_enabled = fp_port_enabled(fp_port);
736         new_enabled = (new >> (i + 1)) & 0x1;
737         if (new_enabled == old_enabled) {
738             continue;
739         }
740         if (new_enabled) {
741             fp_port_enable(r->fp_port[i]);
742         } else {
743             fp_port_disable(r->fp_port[i]);
744         }
745     }
746 }
747 
748 static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
749 {
750     Rocker *r = opaque;
751 
752     if (rocker_addr_is_desc_reg(r, addr)) {
753         unsigned index = ROCKER_RING_INDEX(addr);
754         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
755 
756         switch (offset) {
757         case ROCKER_DMA_DESC_ADDR_OFFSET:
758             r->lower32 = (uint64_t)val;
759             break;
760         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
761             desc_ring_set_base_addr(r->rings[index],
762                                     ((uint64_t)val) << 32 | r->lower32);
763             r->lower32 = 0;
764             break;
765         case ROCKER_DMA_DESC_SIZE_OFFSET:
766             desc_ring_set_size(r->rings[index], val);
767             break;
768         case ROCKER_DMA_DESC_HEAD_OFFSET:
769             if (desc_ring_set_head(r->rings[index], val)) {
770                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
771             }
772             break;
773         case ROCKER_DMA_DESC_CTRL_OFFSET:
774             desc_ring_set_ctrl(r->rings[index], val);
775             break;
776         case ROCKER_DMA_DESC_CREDITS_OFFSET:
777             if (desc_ring_ret_credits(r->rings[index], val)) {
778                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
779             }
780             break;
781         default:
782             DPRINTF("not implemented dma reg write(l) addr=0x" TARGET_FMT_plx
783                     " val=0x%08x (ring %d, addr=0x%02x)\n",
784                     addr, val, index, offset);
785             break;
786         }
787         return;
788     }
789 
790     switch (addr) {
791     case ROCKER_TEST_REG:
792         r->test_reg = val;
793         break;
794     case ROCKER_TEST_REG64:
795     case ROCKER_TEST_DMA_ADDR:
796     case ROCKER_PORT_PHYS_ENABLE:
797         r->lower32 = (uint64_t)val;
798         break;
799     case ROCKER_TEST_REG64 + 4:
800         r->test_reg64 = ((uint64_t)val) << 32 | r->lower32;
801         r->lower32 = 0;
802         break;
803     case ROCKER_TEST_IRQ:
804         rocker_msix_irq(r, val);
805         break;
806     case ROCKER_TEST_DMA_SIZE:
807         r->test_dma_size = val;
808         break;
809     case ROCKER_TEST_DMA_ADDR + 4:
810         r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
811         r->lower32 = 0;
812         break;
813     case ROCKER_TEST_DMA_CTRL:
814         rocker_test_dma_ctrl(r, val);
815         break;
816     case ROCKER_CONTROL:
817         rocker_control(r, val);
818         break;
819     case ROCKER_PORT_PHYS_ENABLE + 4:
820         rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32);
821         r->lower32 = 0;
822         break;
823     default:
824         DPRINTF("not implemented write(l) addr=0x" TARGET_FMT_plx
825                 " val=0x%08x\n", addr, val);
826         break;
827     }
828 }
829 
830 static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val)
831 {
832     Rocker *r = opaque;
833 
834     if (rocker_addr_is_desc_reg(r, addr)) {
835         unsigned index = ROCKER_RING_INDEX(addr);
836         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
837 
838         switch (offset) {
839         case ROCKER_DMA_DESC_ADDR_OFFSET:
840             desc_ring_set_base_addr(r->rings[index], val);
841             break;
842         default:
843             DPRINTF("not implemented dma reg write(q) addr=0x" TARGET_FMT_plx
844                     " val=0x" TARGET_FMT_plx " (ring %d, offset=0x%02x)\n",
845                     addr, val, index, offset);
846             break;
847         }
848         return;
849     }
850 
851     switch (addr) {
852     case ROCKER_TEST_REG64:
853         r->test_reg64 = val;
854         break;
855     case ROCKER_TEST_DMA_ADDR:
856         r->test_dma_addr = val;
857         break;
858     case ROCKER_PORT_PHYS_ENABLE:
859         rocker_port_phys_enable_write(r, val);
860         break;
861     default:
862         DPRINTF("not implemented write(q) addr=0x" TARGET_FMT_plx
863                 " val=0x" TARGET_FMT_plx "\n", addr, val);
864         break;
865     }
866 }
867 
868 #ifdef DEBUG_ROCKER
869 #define regname(reg) case (reg): return #reg
870 static const char *rocker_reg_name(void *opaque, hwaddr addr)
871 {
872     Rocker *r = opaque;
873 
874     if (rocker_addr_is_desc_reg(r, addr)) {
875         unsigned index = ROCKER_RING_INDEX(addr);
876         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
877         static char buf[100];
878         char ring_name[10];
879 
880         switch (index) {
881         case 0:
882             sprintf(ring_name, "cmd");
883             break;
884         case 1:
885             sprintf(ring_name, "event");
886             break;
887         default:
888             sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx",
889                     (index - 2) / 2);
890         }
891 
892         switch (offset) {
893         case ROCKER_DMA_DESC_ADDR_OFFSET:
894             sprintf(buf, "Ring[%s] ADDR", ring_name);
895             return buf;
896         case ROCKER_DMA_DESC_ADDR_OFFSET+4:
897             sprintf(buf, "Ring[%s] ADDR+4", ring_name);
898             return buf;
899         case ROCKER_DMA_DESC_SIZE_OFFSET:
900             sprintf(buf, "Ring[%s] SIZE", ring_name);
901             return buf;
902         case ROCKER_DMA_DESC_HEAD_OFFSET:
903             sprintf(buf, "Ring[%s] HEAD", ring_name);
904             return buf;
905         case ROCKER_DMA_DESC_TAIL_OFFSET:
906             sprintf(buf, "Ring[%s] TAIL", ring_name);
907             return buf;
908         case ROCKER_DMA_DESC_CTRL_OFFSET:
909             sprintf(buf, "Ring[%s] CTRL", ring_name);
910             return buf;
911         case ROCKER_DMA_DESC_CREDITS_OFFSET:
912             sprintf(buf, "Ring[%s] CREDITS", ring_name);
913             return buf;
914         default:
915             sprintf(buf, "Ring[%s] ???", ring_name);
916             return buf;
917         }
918     } else {
919         switch (addr) {
920             regname(ROCKER_BOGUS_REG0);
921             regname(ROCKER_BOGUS_REG1);
922             regname(ROCKER_BOGUS_REG2);
923             regname(ROCKER_BOGUS_REG3);
924             regname(ROCKER_TEST_REG);
925             regname(ROCKER_TEST_REG64);
926             regname(ROCKER_TEST_REG64+4);
927             regname(ROCKER_TEST_IRQ);
928             regname(ROCKER_TEST_DMA_ADDR);
929             regname(ROCKER_TEST_DMA_ADDR+4);
930             regname(ROCKER_TEST_DMA_SIZE);
931             regname(ROCKER_TEST_DMA_CTRL);
932             regname(ROCKER_CONTROL);
933             regname(ROCKER_PORT_PHYS_COUNT);
934             regname(ROCKER_PORT_PHYS_LINK_STATUS);
935             regname(ROCKER_PORT_PHYS_LINK_STATUS+4);
936             regname(ROCKER_PORT_PHYS_ENABLE);
937             regname(ROCKER_PORT_PHYS_ENABLE+4);
938             regname(ROCKER_SWITCH_ID);
939             regname(ROCKER_SWITCH_ID+4);
940         }
941     }
942     return "???";
943 }
944 #else
945 static const char *rocker_reg_name(void *opaque, hwaddr addr)
946 {
947     return NULL;
948 }
949 #endif
950 
951 static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val,
952                               unsigned size)
953 {
954     DPRINTF("Write %s addr " TARGET_FMT_plx
955             ", size %u, val " TARGET_FMT_plx "\n",
956             rocker_reg_name(opaque, addr), addr, size, val);
957 
958     switch (size) {
959     case 4:
960         rocker_io_writel(opaque, addr, val);
961         break;
962     case 8:
963         rocker_io_writeq(opaque, addr, val);
964         break;
965     }
966 }
967 
968 static uint64_t rocker_port_phys_link_status(Rocker *r)
969 {
970     int i;
971     uint64_t status = 0;
972 
973     for (i = 0; i < r->fp_ports; i++) {
974         FpPort *port = r->fp_port[i];
975 
976         if (fp_port_get_link_up(port)) {
977             status |= 1 << (i + 1);
978         }
979     }
980     return status;
981 }
982 
983 static uint64_t rocker_port_phys_enable_read(Rocker *r)
984 {
985     int i;
986     uint64_t ret = 0;
987 
988     for (i = 0; i < r->fp_ports; i++) {
989         FpPort *port = r->fp_port[i];
990 
991         if (fp_port_enabled(port)) {
992             ret |= 1 << (i + 1);
993         }
994     }
995     return ret;
996 }
997 
998 static uint32_t rocker_io_readl(void *opaque, hwaddr addr)
999 {
1000     Rocker *r = opaque;
1001     uint32_t ret;
1002 
1003     if (rocker_addr_is_desc_reg(r, addr)) {
1004         unsigned index = ROCKER_RING_INDEX(addr);
1005         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1006 
1007         switch (offset) {
1008         case ROCKER_DMA_DESC_ADDR_OFFSET:
1009             ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]);
1010             break;
1011         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
1012             ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32);
1013             break;
1014         case ROCKER_DMA_DESC_SIZE_OFFSET:
1015             ret = desc_ring_get_size(r->rings[index]);
1016             break;
1017         case ROCKER_DMA_DESC_HEAD_OFFSET:
1018             ret = desc_ring_get_head(r->rings[index]);
1019             break;
1020         case ROCKER_DMA_DESC_TAIL_OFFSET:
1021             ret = desc_ring_get_tail(r->rings[index]);
1022             break;
1023         case ROCKER_DMA_DESC_CREDITS_OFFSET:
1024             ret = desc_ring_get_credits(r->rings[index]);
1025             break;
1026         default:
1027             DPRINTF("not implemented dma reg read(l) addr=0x" TARGET_FMT_plx
1028                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1029             ret = 0;
1030             break;
1031         }
1032         return ret;
1033     }
1034 
1035     switch (addr) {
1036     case ROCKER_BOGUS_REG0:
1037     case ROCKER_BOGUS_REG1:
1038     case ROCKER_BOGUS_REG2:
1039     case ROCKER_BOGUS_REG3:
1040         ret = 0xDEADBABE;
1041         break;
1042     case ROCKER_TEST_REG:
1043         ret = r->test_reg * 2;
1044         break;
1045     case ROCKER_TEST_REG64:
1046         ret = (uint32_t)(r->test_reg64 * 2);
1047         break;
1048     case ROCKER_TEST_REG64 + 4:
1049         ret = (uint32_t)((r->test_reg64 * 2) >> 32);
1050         break;
1051     case ROCKER_TEST_DMA_SIZE:
1052         ret = r->test_dma_size;
1053         break;
1054     case ROCKER_TEST_DMA_ADDR:
1055         ret = (uint32_t)r->test_dma_addr;
1056         break;
1057     case ROCKER_TEST_DMA_ADDR + 4:
1058         ret = (uint32_t)(r->test_dma_addr >> 32);
1059         break;
1060     case ROCKER_PORT_PHYS_COUNT:
1061         ret = r->fp_ports;
1062         break;
1063     case ROCKER_PORT_PHYS_LINK_STATUS:
1064         ret = (uint32_t)rocker_port_phys_link_status(r);
1065         break;
1066     case ROCKER_PORT_PHYS_LINK_STATUS + 4:
1067         ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32);
1068         break;
1069     case ROCKER_PORT_PHYS_ENABLE:
1070         ret = (uint32_t)rocker_port_phys_enable_read(r);
1071         break;
1072     case ROCKER_PORT_PHYS_ENABLE + 4:
1073         ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32);
1074         break;
1075     case ROCKER_SWITCH_ID:
1076         ret = (uint32_t)r->switch_id;
1077         break;
1078     case ROCKER_SWITCH_ID + 4:
1079         ret = (uint32_t)(r->switch_id >> 32);
1080         break;
1081     default:
1082         DPRINTF("not implemented read(l) addr=0x" TARGET_FMT_plx "\n", addr);
1083         ret = 0;
1084         break;
1085     }
1086     return ret;
1087 }
1088 
1089 static uint64_t rocker_io_readq(void *opaque, hwaddr addr)
1090 {
1091     Rocker *r = opaque;
1092     uint64_t ret;
1093 
1094     if (rocker_addr_is_desc_reg(r, addr)) {
1095         unsigned index = ROCKER_RING_INDEX(addr);
1096         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1097 
1098         switch (addr & ROCKER_DMA_DESC_MASK) {
1099         case ROCKER_DMA_DESC_ADDR_OFFSET:
1100             ret = desc_ring_get_base_addr(r->rings[index]);
1101             break;
1102         default:
1103             DPRINTF("not implemented dma reg read(q) addr=0x" TARGET_FMT_plx
1104                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1105             ret = 0;
1106             break;
1107         }
1108         return ret;
1109     }
1110 
1111     switch (addr) {
1112     case ROCKER_BOGUS_REG0:
1113     case ROCKER_BOGUS_REG2:
1114         ret = 0xDEADBABEDEADBABEULL;
1115         break;
1116     case ROCKER_TEST_REG64:
1117         ret = r->test_reg64 * 2;
1118         break;
1119     case ROCKER_TEST_DMA_ADDR:
1120         ret = r->test_dma_addr;
1121         break;
1122     case ROCKER_PORT_PHYS_LINK_STATUS:
1123         ret = rocker_port_phys_link_status(r);
1124         break;
1125     case ROCKER_PORT_PHYS_ENABLE:
1126         ret = rocker_port_phys_enable_read(r);
1127         break;
1128     case ROCKER_SWITCH_ID:
1129         ret = r->switch_id;
1130         break;
1131     default:
1132         DPRINTF("not implemented read(q) addr=0x" TARGET_FMT_plx "\n", addr);
1133         ret = 0;
1134         break;
1135     }
1136     return ret;
1137 }
1138 
1139 static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size)
1140 {
1141     DPRINTF("Read %s addr " TARGET_FMT_plx ", size %u\n",
1142             rocker_reg_name(opaque, addr), addr, size);
1143 
1144     switch (size) {
1145     case 4:
1146         return rocker_io_readl(opaque, addr);
1147     case 8:
1148         return rocker_io_readq(opaque, addr);
1149     }
1150 
1151     return -1;
1152 }
1153 
1154 static const MemoryRegionOps rocker_mmio_ops = {
1155     .read = rocker_mmio_read,
1156     .write = rocker_mmio_write,
1157     .endianness = DEVICE_LITTLE_ENDIAN,
1158     .valid = {
1159         .min_access_size = 4,
1160         .max_access_size = 8,
1161     },
1162     .impl = {
1163         .min_access_size = 4,
1164         .max_access_size = 8,
1165     },
1166 };
1167 
1168 static void rocker_msix_vectors_unuse(Rocker *r,
1169                                       unsigned int num_vectors)
1170 {
1171     PCIDevice *dev = PCI_DEVICE(r);
1172     int i;
1173 
1174     for (i = 0; i < num_vectors; i++) {
1175         msix_vector_unuse(dev, i);
1176     }
1177 }
1178 
1179 static int rocker_msix_vectors_use(Rocker *r,
1180                                    unsigned int num_vectors)
1181 {
1182     PCIDevice *dev = PCI_DEVICE(r);
1183     int err;
1184     int i;
1185 
1186     for (i = 0; i < num_vectors; i++) {
1187         err = msix_vector_use(dev, i);
1188         if (err) {
1189             goto rollback;
1190         }
1191     }
1192     return 0;
1193 
1194 rollback:
1195     rocker_msix_vectors_unuse(r, i);
1196     return err;
1197 }
1198 
1199 static int rocker_msix_init(Rocker *r)
1200 {
1201     PCIDevice *dev = PCI_DEVICE(r);
1202     int err;
1203 
1204     err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
1205                     &r->msix_bar,
1206                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
1207                     &r->msix_bar,
1208                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
1209                     0);
1210     if (err) {
1211         return err;
1212     }
1213 
1214     err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1215     if (err) {
1216         goto err_msix_vectors_use;
1217     }
1218 
1219     return 0;
1220 
1221 err_msix_vectors_use:
1222     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1223     return err;
1224 }
1225 
1226 static void rocker_msix_uninit(Rocker *r)
1227 {
1228     PCIDevice *dev = PCI_DEVICE(r);
1229 
1230     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1231     rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1232 }
1233 
1234 static int pci_rocker_init(PCIDevice *dev)
1235 {
1236     Rocker *r = to_rocker(dev);
1237     const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
1238     const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } };
1239     static int sw_index;
1240     int i, err = 0;
1241 
1242     /* allocate worlds */
1243 
1244     r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
1245     r->world_dflt = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
1246 
1247     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1248         if (!r->worlds[i]) {
1249             goto err_world_alloc;
1250         }
1251     }
1252 
1253     /* set up memory-mapped region at BAR0 */
1254 
1255     memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r,
1256                           "rocker-mmio", ROCKER_PCI_BAR0_SIZE);
1257     pci_register_bar(dev, ROCKER_PCI_BAR0_IDX,
1258                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio);
1259 
1260     /* set up memory-mapped region for MSI-X */
1261 
1262     memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar",
1263                        ROCKER_PCI_MSIX_BAR_SIZE);
1264     pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX,
1265                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar);
1266 
1267     /* MSI-X init */
1268 
1269     err = rocker_msix_init(r);
1270     if (err) {
1271         goto err_msix_init;
1272     }
1273 
1274     /* validate switch properties */
1275 
1276     if (!r->name) {
1277         r->name = g_strdup(ROCKER);
1278     }
1279 
1280     if (rocker_find(r->name)) {
1281         err = -EEXIST;
1282         goto err_duplicate;
1283     }
1284 
1285     /* Rocker name is passed in port name requests to OS with the intention
1286      * that the name is used in interface names. Limit the length of the
1287      * rocker name to avoid naming problems in the OS. Also, adding the
1288      * port number as p# and unganged breakout b#, where # is at most 2
1289      * digits, so leave room for it too (-1 for string terminator, -3 for
1290      * p# and -3 for b#)
1291      */
1292 #define ROCKER_IFNAMSIZ 16
1293 #define MAX_ROCKER_NAME_LEN  (ROCKER_IFNAMSIZ - 1 - 3 - 3)
1294     if (strlen(r->name) > MAX_ROCKER_NAME_LEN) {
1295         fprintf(stderr,
1296                 "rocker: name too long; please shorten to at most %d chars\n",
1297                 MAX_ROCKER_NAME_LEN);
1298         return -EINVAL;
1299     }
1300 
1301     if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) {
1302         memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt));
1303         r->fp_start_macaddr.a[4] += (sw_index++);
1304     }
1305 
1306     if (!r->switch_id) {
1307         memcpy(&r->switch_id, &r->fp_start_macaddr,
1308                sizeof(r->fp_start_macaddr));
1309     }
1310 
1311     if (r->fp_ports > ROCKER_FP_PORTS_MAX) {
1312         r->fp_ports = ROCKER_FP_PORTS_MAX;
1313     }
1314 
1315     r->rings = g_malloc(sizeof(DescRing *) * rocker_pci_ring_count(r));
1316     if (!r->rings) {
1317         goto err_rings_alloc;
1318     }
1319 
1320     /* Rings are ordered like this:
1321      * - command ring
1322      * - event ring
1323      * - port0 tx ring
1324      * - port0 rx ring
1325      * - port1 tx ring
1326      * - port1 rx ring
1327      * .....
1328      */
1329 
1330     err = -ENOMEM;
1331     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1332         DescRing *ring = desc_ring_alloc(r, i);
1333 
1334         if (!ring) {
1335             goto err_ring_alloc;
1336         }
1337 
1338         if (i == ROCKER_RING_CMD) {
1339             desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD);
1340         } else if (i == ROCKER_RING_EVENT) {
1341             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT);
1342         } else if (i % 2 == 0) {
1343             desc_ring_set_consume(ring, tx_consume,
1344                                   ROCKER_MSIX_VEC_TX((i - 2) / 2));
1345         } else if (i % 2 == 1) {
1346             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2));
1347         }
1348 
1349         r->rings[i] = ring;
1350     }
1351 
1352     for (i = 0; i < r->fp_ports; i++) {
1353         FpPort *port =
1354             fp_port_alloc(r, r->name, &r->fp_start_macaddr,
1355                           i, &r->fp_ports_peers[i]);
1356 
1357         if (!port) {
1358             goto err_port_alloc;
1359         }
1360 
1361         r->fp_port[i] = port;
1362         fp_port_set_world(port, r->world_dflt);
1363     }
1364 
1365     QLIST_INSERT_HEAD(&rockers, r, next);
1366 
1367     return 0;
1368 
1369 err_port_alloc:
1370     for (--i; i >= 0; i--) {
1371         FpPort *port = r->fp_port[i];
1372         fp_port_free(port);
1373     }
1374     i = rocker_pci_ring_count(r);
1375 err_ring_alloc:
1376     for (--i; i >= 0; i--) {
1377         desc_ring_free(r->rings[i]);
1378     }
1379     g_free(r->rings);
1380 err_rings_alloc:
1381 err_duplicate:
1382     rocker_msix_uninit(r);
1383 err_msix_init:
1384     object_unparent(OBJECT(&r->msix_bar));
1385     object_unparent(OBJECT(&r->mmio));
1386 err_world_alloc:
1387     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1388         if (r->worlds[i]) {
1389             world_free(r->worlds[i]);
1390         }
1391     }
1392     return err;
1393 }
1394 
1395 static void pci_rocker_uninit(PCIDevice *dev)
1396 {
1397     Rocker *r = to_rocker(dev);
1398     int i;
1399 
1400     QLIST_REMOVE(r, next);
1401 
1402     for (i = 0; i < r->fp_ports; i++) {
1403         FpPort *port = r->fp_port[i];
1404 
1405         fp_port_free(port);
1406         r->fp_port[i] = NULL;
1407     }
1408 
1409     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1410         if (r->rings[i]) {
1411             desc_ring_free(r->rings[i]);
1412         }
1413     }
1414     g_free(r->rings);
1415 
1416     rocker_msix_uninit(r);
1417     object_unparent(OBJECT(&r->msix_bar));
1418     object_unparent(OBJECT(&r->mmio));
1419 
1420     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1421         if (r->worlds[i]) {
1422             world_free(r->worlds[i]);
1423         }
1424     }
1425     g_free(r->fp_ports_peers);
1426 }
1427 
1428 static void rocker_reset(DeviceState *dev)
1429 {
1430     Rocker *r = to_rocker(dev);
1431     int i;
1432 
1433     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1434         if (r->worlds[i]) {
1435             world_reset(r->worlds[i]);
1436         }
1437     }
1438     for (i = 0; i < r->fp_ports; i++) {
1439         fp_port_reset(r->fp_port[i]);
1440         fp_port_set_world(r->fp_port[i], r->world_dflt);
1441     }
1442 
1443     r->test_reg = 0;
1444     r->test_reg64 = 0;
1445     r->test_dma_addr = 0;
1446     r->test_dma_size = 0;
1447 
1448     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1449         desc_ring_reset(r->rings[i]);
1450     }
1451 
1452     DPRINTF("Reset done\n");
1453 }
1454 
1455 static Property rocker_properties[] = {
1456     DEFINE_PROP_STRING("name", Rocker, name),
1457     DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
1458                         fp_start_macaddr),
1459     DEFINE_PROP_UINT64("switch_id", Rocker,
1460                        switch_id, 0),
1461     DEFINE_PROP_ARRAY("ports", Rocker, fp_ports,
1462                       fp_ports_peers, qdev_prop_netdev, NICPeers),
1463     DEFINE_PROP_END_OF_LIST(),
1464 };
1465 
1466 static const VMStateDescription rocker_vmsd = {
1467     .name = ROCKER,
1468     .unmigratable = 1,
1469 };
1470 
1471 static void rocker_class_init(ObjectClass *klass, void *data)
1472 {
1473     DeviceClass *dc = DEVICE_CLASS(klass);
1474     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1475 
1476     k->init = pci_rocker_init;
1477     k->exit = pci_rocker_uninit;
1478     k->vendor_id = PCI_VENDOR_ID_REDHAT;
1479     k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER;
1480     k->revision = ROCKER_PCI_REVISION;
1481     k->class_id = PCI_CLASS_NETWORK_OTHER;
1482     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1483     dc->desc = "Rocker Switch";
1484     dc->reset = rocker_reset;
1485     dc->props = rocker_properties;
1486     dc->vmsd = &rocker_vmsd;
1487 }
1488 
1489 static const TypeInfo rocker_info = {
1490     .name          = ROCKER,
1491     .parent        = TYPE_PCI_DEVICE,
1492     .instance_size = sizeof(Rocker),
1493     .class_init    = rocker_class_init,
1494 };
1495 
1496 static void rocker_register_types(void)
1497 {
1498     type_register_static(&rocker_info);
1499 }
1500 
1501 type_init(rocker_register_types)
1502