xref: /openbmc/qemu/hw/net/rocker/rocker.c (revision f1f7e4bf)
1 /*
2  * QEMU rocker switch emulation - PCI device
3  *
4  * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
5  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  */
17 
18 #include "hw/hw.h"
19 #include "hw/pci/pci.h"
20 #include "hw/pci/msix.h"
21 #include "net/net.h"
22 #include "net/eth.h"
23 #include "qemu/iov.h"
24 #include "qemu/bitops.h"
25 #include "qmp-commands.h"
26 
27 #include "rocker.h"
28 #include "rocker_hw.h"
29 #include "rocker_fp.h"
30 #include "rocker_desc.h"
31 #include "rocker_tlv.h"
32 #include "rocker_world.h"
33 #include "rocker_of_dpa.h"
34 
35 struct rocker {
36     /* private */
37     PCIDevice parent_obj;
38     /* public */
39 
40     MemoryRegion mmio;
41     MemoryRegion msix_bar;
42 
43     /* switch configuration */
44     char *name;                  /* switch name */
45     uint32_t fp_ports;           /* front-panel port count */
46     NICPeers *fp_ports_peers;
47     MACAddr fp_start_macaddr;    /* front-panel port 0 mac addr */
48     uint64_t switch_id;          /* switch id */
49 
50     /* front-panel ports */
51     FpPort *fp_port[ROCKER_FP_PORTS_MAX];
52 
53     /* register backings */
54     uint32_t test_reg;
55     uint64_t test_reg64;
56     dma_addr_t test_dma_addr;
57     uint32_t test_dma_size;
58     uint64_t lower32;            /* lower 32-bit val in 2-part 64-bit access */
59 
60     /* desc rings */
61     DescRing **rings;
62 
63     /* switch worlds */
64     World *worlds[ROCKER_WORLD_TYPE_MAX];
65     World *world_dflt;
66 
67     QLIST_ENTRY(rocker) next;
68 };
69 
70 #define ROCKER "rocker"
71 
72 #define to_rocker(obj) \
73     OBJECT_CHECK(Rocker, (obj), ROCKER)
74 
75 static QLIST_HEAD(, rocker) rockers;
76 
77 Rocker *rocker_find(const char *name)
78 {
79     Rocker *r;
80 
81     QLIST_FOREACH(r, &rockers, next)
82         if (strcmp(r->name, name) == 0) {
83             return r;
84         }
85 
86     return NULL;
87 }
88 
89 World *rocker_get_world(Rocker *r, enum rocker_world_type type)
90 {
91     if (type < ROCKER_WORLD_TYPE_MAX) {
92         return r->worlds[type];
93     }
94     return NULL;
95 }
96 
97 RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
98 {
99     RockerSwitch *rocker;
100     Rocker *r;
101 
102     r = rocker_find(name);
103     if (!r) {
104         error_setg(errp, "rocker %s not found", name);
105         return NULL;
106     }
107 
108     rocker = g_new0(RockerSwitch, 1);
109     rocker->name = g_strdup(r->name);
110     rocker->id = r->switch_id;
111     rocker->ports = r->fp_ports;
112 
113     return rocker;
114 }
115 
116 RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp)
117 {
118     RockerPortList *list = NULL;
119     Rocker *r;
120     int i;
121 
122     r = rocker_find(name);
123     if (!r) {
124         error_setg(errp, "rocker %s not found", name);
125         return NULL;
126     }
127 
128     for (i = r->fp_ports - 1; i >= 0; i--) {
129         RockerPortList *info = g_malloc0(sizeof(*info));
130         info->value = g_malloc0(sizeof(*info->value));
131         struct fp_port *port = r->fp_port[i];
132 
133         fp_port_get_info(port, info);
134         info->next = list;
135         list = info;
136     }
137 
138     return list;
139 }
140 
141 uint32_t rocker_fp_ports(Rocker *r)
142 {
143     return r->fp_ports;
144 }
145 
146 static uint32_t rocker_get_pport_by_tx_ring(Rocker *r,
147                                             DescRing *ring)
148 {
149     return (desc_ring_index(ring) - 2) / 2 + 1;
150 }
151 
152 static int tx_consume(Rocker *r, DescInfo *info)
153 {
154     PCIDevice *dev = PCI_DEVICE(r);
155     char *buf = desc_get_buf(info, true);
156     RockerTlv *tlv_frag;
157     RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1];
158     struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, };
159     uint32_t pport;
160     uint32_t port;
161     uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE;
162     uint16_t tx_l3_csum_off = 0;
163     uint16_t tx_tso_mss = 0;
164     uint16_t tx_tso_hdr_len = 0;
165     int iovcnt = 0;
166     int err = ROCKER_OK;
167     int rem;
168     int i;
169 
170     if (!buf) {
171         return -ROCKER_ENXIO;
172     }
173 
174     rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info));
175 
176     if (!tlvs[ROCKER_TLV_TX_FRAGS]) {
177         return -ROCKER_EINVAL;
178     }
179 
180     pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info));
181     if (!fp_port_from_pport(pport, &port)) {
182         return -ROCKER_EINVAL;
183     }
184 
185     if (tlvs[ROCKER_TLV_TX_OFFLOAD]) {
186         tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]);
187     }
188 
189     switch (tx_offload) {
190     case ROCKER_TX_OFFLOAD_L3_CSUM:
191         if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
192             return -ROCKER_EINVAL;
193         }
194         break;
195     case ROCKER_TX_OFFLOAD_TSO:
196         if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
197             !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
198             return -ROCKER_EINVAL;
199         }
200         break;
201     }
202 
203     if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
204         tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]);
205     }
206 
207     if (tlvs[ROCKER_TLV_TX_TSO_MSS]) {
208         tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]);
209     }
210 
211     if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
212         tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]);
213     }
214 
215     rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) {
216         hwaddr frag_addr;
217         uint16_t frag_len;
218 
219         if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) {
220             err = -ROCKER_EINVAL;
221             goto err_bad_attr;
222         }
223 
224         rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag);
225 
226         if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
227             !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) {
228             err = -ROCKER_EINVAL;
229             goto err_bad_attr;
230         }
231 
232         frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
233         frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
234 
235         iov[iovcnt].iov_len = frag_len;
236         iov[iovcnt].iov_base = g_malloc(frag_len);
237         if (!iov[iovcnt].iov_base) {
238             err = -ROCKER_ENOMEM;
239             goto err_no_mem;
240         }
241 
242         if (pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base,
243                      iov[iovcnt].iov_len)) {
244             err = -ROCKER_ENXIO;
245             goto err_bad_io;
246         }
247 
248         if (++iovcnt > ROCKER_TX_FRAGS_MAX) {
249             goto err_too_many_frags;
250         }
251     }
252 
253     if (iovcnt) {
254         /* XXX perform Tx offloads */
255         /* XXX   silence compiler for now */
256         tx_l3_csum_off += tx_tso_mss = tx_tso_hdr_len = 0;
257     }
258 
259     err = fp_port_eg(r->fp_port[port], iov, iovcnt);
260 
261 err_too_many_frags:
262 err_bad_io:
263 err_no_mem:
264 err_bad_attr:
265     for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) {
266         g_free(iov[i].iov_base);
267     }
268 
269     return err;
270 }
271 
272 static int cmd_get_port_settings(Rocker *r,
273                                  DescInfo *info, char *buf,
274                                  RockerTlv *cmd_info_tlv)
275 {
276     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
277     RockerTlv *nest;
278     FpPort *fp_port;
279     uint32_t pport;
280     uint32_t port;
281     uint32_t speed;
282     uint8_t duplex;
283     uint8_t autoneg;
284     uint8_t learning;
285     char *phys_name;
286     MACAddr macaddr;
287     enum rocker_world_type mode;
288     size_t tlv_size;
289     int pos;
290     int err;
291 
292     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
293                             cmd_info_tlv);
294 
295     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
296         return -ROCKER_EINVAL;
297     }
298 
299     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
300     if (!fp_port_from_pport(pport, &port)) {
301         return -ROCKER_EINVAL;
302     }
303     fp_port = r->fp_port[port];
304 
305     err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg);
306     if (err) {
307         return err;
308     }
309 
310     fp_port_get_macaddr(fp_port, &macaddr);
311     mode = world_type(fp_port_get_world(fp_port));
312     learning = fp_port_get_learning(fp_port);
313     phys_name = fp_port_get_name(fp_port);
314 
315     tlv_size = rocker_tlv_total_size(0) +                 /* nest */
316                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
317                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   speed */
318                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   duplex */
319                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   autoneg */
320                rocker_tlv_total_size(sizeof(macaddr.a)) + /*   macaddr */
321                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   mode */
322                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   learning */
323                rocker_tlv_total_size(strlen(phys_name));
324 
325     if (tlv_size > desc_buf_size(info)) {
326         return -ROCKER_EMSGSIZE;
327     }
328 
329     pos = 0;
330     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO);
331     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport);
332     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed);
333     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex);
334     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg);
335     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
336                    sizeof(macaddr.a), macaddr.a);
337     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode);
338     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,
339                       learning);
340     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME,
341                    strlen(phys_name), phys_name);
342     rocker_tlv_nest_end(buf, &pos, nest);
343 
344     return desc_set_buf(info, tlv_size);
345 }
346 
347 static int cmd_set_port_settings(Rocker *r,
348                                  RockerTlv *cmd_info_tlv)
349 {
350     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
351     FpPort *fp_port;
352     uint32_t pport;
353     uint32_t port;
354     uint32_t speed;
355     uint8_t duplex;
356     uint8_t autoneg;
357     uint8_t learning;
358     MACAddr macaddr;
359     enum rocker_world_type mode;
360     int err;
361 
362     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
363                             cmd_info_tlv);
364 
365     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
366         return -ROCKER_EINVAL;
367     }
368 
369     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
370     if (!fp_port_from_pport(pport, &port)) {
371         return -ROCKER_EINVAL;
372     }
373     fp_port = r->fp_port[port];
374 
375     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] &&
376         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] &&
377         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) {
378 
379         speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
380         duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
381         autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
382 
383         err = fp_port_set_settings(fp_port, speed, duplex, autoneg);
384         if (err) {
385             return err;
386         }
387     }
388 
389     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) {
390         if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) !=
391             sizeof(macaddr.a)) {
392             return -ROCKER_EINVAL;
393         }
394         memcpy(macaddr.a,
395                rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]),
396                sizeof(macaddr.a));
397         fp_port_set_macaddr(fp_port, &macaddr);
398     }
399 
400     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
401         mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
402         fp_port_set_world(fp_port, r->worlds[mode]);
403     }
404 
405     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
406         learning =
407             rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]);
408         fp_port_set_learning(fp_port, learning);
409     }
410 
411     return ROCKER_OK;
412 }
413 
414 static int cmd_consume(Rocker *r, DescInfo *info)
415 {
416     char *buf = desc_get_buf(info, false);
417     RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1];
418     RockerTlv *info_tlv;
419     World *world;
420     uint16_t cmd;
421     int err;
422 
423     if (!buf) {
424         return -ROCKER_ENXIO;
425     }
426 
427     rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info));
428 
429     if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) {
430         return -ROCKER_EINVAL;
431     }
432 
433     cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]);
434     info_tlv = tlvs[ROCKER_TLV_CMD_INFO];
435 
436     /* This might be reworked to something like this:
437      * Every world will have an array of command handlers from
438      * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is
439      * up to each world to implement whatever command it want.
440      * It can reference "generic" commands as cmd_set_port_settings or
441      * cmd_get_port_settings
442      */
443 
444     switch (cmd) {
445     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
446     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
447     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
448     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
449     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
450     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
451     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
452     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
453         world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
454         err = world_do_cmd(world, info, buf, cmd, info_tlv);
455         break;
456     case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS:
457         err = cmd_get_port_settings(r, info, buf, info_tlv);
458         break;
459     case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS:
460         err = cmd_set_port_settings(r, info_tlv);
461         break;
462     default:
463         err = -ROCKER_EINVAL;
464         break;
465     }
466 
467     return err;
468 }
469 
470 static void rocker_msix_irq(Rocker *r, unsigned vector)
471 {
472     PCIDevice *dev = PCI_DEVICE(r);
473 
474     DPRINTF("MSI-X notify request for vector %d\n", vector);
475     if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) {
476         DPRINTF("incorrect vector %d\n", vector);
477         return;
478     }
479     msix_notify(dev, vector);
480 }
481 
482 int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up)
483 {
484     DescRing *ring = r->rings[ROCKER_RING_EVENT];
485     DescInfo *info = desc_ring_fetch_desc(ring);
486     RockerTlv *nest;
487     char *buf;
488     size_t tlv_size;
489     int pos;
490     int err;
491 
492     if (!info) {
493         return -ROCKER_ENOBUFS;
494     }
495 
496     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
497                rocker_tlv_total_size(0) +                 /* nest */
498                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
499                rocker_tlv_total_size(sizeof(uint8_t));    /*   link up */
500 
501     if (tlv_size > desc_buf_size(info)) {
502         err = -ROCKER_EMSGSIZE;
503         goto err_too_big;
504     }
505 
506     buf = desc_get_buf(info, false);
507     if (!buf) {
508         err = -ROCKER_ENOMEM;
509         goto err_no_mem;
510     }
511 
512     pos = 0;
513     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
514                         ROCKER_TLV_EVENT_TYPE_LINK_CHANGED);
515     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
516     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport);
517     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP,
518                       link_up ? 1 : 0);
519     rocker_tlv_nest_end(buf, &pos, nest);
520 
521     err = desc_set_buf(info, tlv_size);
522 
523 err_too_big:
524 err_no_mem:
525     if (desc_ring_post_desc(ring, err)) {
526         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
527     }
528 
529     return err;
530 }
531 
532 int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
533                                uint16_t vlan_id)
534 {
535     DescRing *ring = r->rings[ROCKER_RING_EVENT];
536     DescInfo *info;
537     FpPort *fp_port;
538     uint32_t port;
539     RockerTlv *nest;
540     char *buf;
541     size_t tlv_size;
542     int pos;
543     int err;
544 
545     if (!fp_port_from_pport(pport, &port)) {
546         return -ROCKER_EINVAL;
547     }
548     fp_port = r->fp_port[port];
549     if (!fp_port_get_learning(fp_port)) {
550         return ROCKER_OK;
551     }
552 
553     info = desc_ring_fetch_desc(ring);
554     if (!info) {
555         return -ROCKER_ENOBUFS;
556     }
557 
558     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
559                rocker_tlv_total_size(0) +                 /* nest */
560                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
561                rocker_tlv_total_size(ETH_ALEN) +          /*   mac addr */
562                rocker_tlv_total_size(sizeof(uint16_t));   /*   vlan_id */
563 
564     if (tlv_size > desc_buf_size(info)) {
565         err = -ROCKER_EMSGSIZE;
566         goto err_too_big;
567     }
568 
569     buf = desc_get_buf(info, false);
570     if (!buf) {
571         err = -ROCKER_ENOMEM;
572         goto err_no_mem;
573     }
574 
575     pos = 0;
576     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
577                         ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN);
578     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
579     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport);
580     rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr);
581     rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id);
582     rocker_tlv_nest_end(buf, &pos, nest);
583 
584     err = desc_set_buf(info, tlv_size);
585 
586 err_too_big:
587 err_no_mem:
588     if (desc_ring_post_desc(ring, err)) {
589         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
590     }
591 
592     return err;
593 }
594 
595 static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
596                                                      uint32_t pport)
597 {
598     return r->rings[(pport - 1) * 2 + 3];
599 }
600 
601 int rx_produce(World *world, uint32_t pport,
602                const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu)
603 {
604     Rocker *r = world_rocker(world);
605     PCIDevice *dev = (PCIDevice *)r;
606     DescRing *ring = rocker_get_rx_ring_by_pport(r, pport);
607     DescInfo *info = desc_ring_fetch_desc(ring);
608     char *data;
609     size_t data_size = iov_size(iov, iovcnt);
610     char *buf;
611     uint16_t rx_flags = 0;
612     uint16_t rx_csum = 0;
613     size_t tlv_size;
614     RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1];
615     hwaddr frag_addr;
616     uint16_t frag_max_len;
617     int pos;
618     int err;
619 
620     if (!info) {
621         return -ROCKER_ENOBUFS;
622     }
623 
624     buf = desc_get_buf(info, false);
625     if (!buf) {
626         err = -ROCKER_ENXIO;
627         goto out;
628     }
629     rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info));
630 
631     if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] ||
632         !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) {
633         err = -ROCKER_EINVAL;
634         goto out;
635     }
636 
637     frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]);
638     frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
639 
640     if (data_size > frag_max_len) {
641         err = -ROCKER_EMSGSIZE;
642         goto out;
643     }
644 
645     if (copy_to_cpu) {
646         rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD;
647     }
648 
649     /* XXX calc rx flags/csum */
650 
651     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
652                rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */
653                rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */
654                rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */
655                rocker_tlv_total_size(sizeof(uint16_t));  /* frag len */
656 
657     if (tlv_size > desc_buf_size(info)) {
658         err = -ROCKER_EMSGSIZE;
659         goto out;
660     }
661 
662     /* TODO:
663      * iov dma write can be optimized in similar way e1000 does it in
664      * e1000_receive_iov. But maybe if would make sense to introduce
665      * generic helper iov_dma_write.
666      */
667 
668     data = g_malloc(data_size);
669     if (!data) {
670         err = -ROCKER_ENOMEM;
671         goto out;
672     }
673     iov_to_buf(iov, iovcnt, 0, data, data_size);
674     pci_dma_write(dev, frag_addr, data, data_size);
675     g_free(data);
676 
677     pos = 0;
678     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags);
679     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum);
680     rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr);
681     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len);
682     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size);
683 
684     err = desc_set_buf(info, tlv_size);
685 
686 out:
687     if (desc_ring_post_desc(ring, err)) {
688         rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1));
689     }
690 
691     return err;
692 }
693 
694 int rocker_port_eg(Rocker *r, uint32_t pport,
695                    const struct iovec *iov, int iovcnt)
696 {
697     FpPort *fp_port;
698     uint32_t port;
699 
700     if (!fp_port_from_pport(pport, &port)) {
701         return -ROCKER_EINVAL;
702     }
703 
704     fp_port = r->fp_port[port];
705 
706     return fp_port_eg(fp_port, iov, iovcnt);
707 }
708 
709 static void rocker_test_dma_ctrl(Rocker *r, uint32_t val)
710 {
711     PCIDevice *dev = PCI_DEVICE(r);
712     char *buf;
713     int i;
714 
715     buf = g_malloc(r->test_dma_size);
716 
717     if (!buf) {
718         DPRINTF("test dma buffer alloc failed");
719         return;
720     }
721 
722     switch (val) {
723     case ROCKER_TEST_DMA_CTRL_CLEAR:
724         memset(buf, 0, r->test_dma_size);
725         break;
726     case ROCKER_TEST_DMA_CTRL_FILL:
727         memset(buf, 0x96, r->test_dma_size);
728         break;
729     case ROCKER_TEST_DMA_CTRL_INVERT:
730         pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size);
731         for (i = 0; i < r->test_dma_size; i++) {
732             buf[i] = ~buf[i];
733         }
734         break;
735     default:
736         DPRINTF("not test dma control val=0x%08x\n", val);
737         goto err_out;
738     }
739     pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size);
740 
741     rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST);
742 
743 err_out:
744     g_free(buf);
745 }
746 
747 static void rocker_reset(DeviceState *dev);
748 
749 static void rocker_control(Rocker *r, uint32_t val)
750 {
751     if (val & ROCKER_CONTROL_RESET) {
752         rocker_reset(DEVICE(r));
753     }
754 }
755 
756 static int rocker_pci_ring_count(Rocker *r)
757 {
758     /* There are:
759      * - command ring
760      * - event ring
761      * - tx and rx ring per each port
762      */
763     return 2 + (2 * r->fp_ports);
764 }
765 
766 static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr)
767 {
768     hwaddr start = ROCKER_DMA_DESC_BASE;
769     hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r));
770 
771     return addr >= start && addr < end;
772 }
773 
774 static void rocker_port_phys_enable_write(Rocker *r, uint64_t new)
775 {
776     int i;
777     bool old_enabled;
778     bool new_enabled;
779     FpPort *fp_port;
780 
781     for (i = 0; i < r->fp_ports; i++) {
782         fp_port = r->fp_port[i];
783         old_enabled = fp_port_enabled(fp_port);
784         new_enabled = (new >> (i + 1)) & 0x1;
785         if (new_enabled == old_enabled) {
786             continue;
787         }
788         if (new_enabled) {
789             fp_port_enable(r->fp_port[i]);
790         } else {
791             fp_port_disable(r->fp_port[i]);
792         }
793     }
794 }
795 
796 static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
797 {
798     Rocker *r = opaque;
799 
800     if (rocker_addr_is_desc_reg(r, addr)) {
801         unsigned index = ROCKER_RING_INDEX(addr);
802         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
803 
804         switch (offset) {
805         case ROCKER_DMA_DESC_ADDR_OFFSET:
806             r->lower32 = (uint64_t)val;
807             break;
808         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
809             desc_ring_set_base_addr(r->rings[index],
810                                     ((uint64_t)val) << 32 | r->lower32);
811             r->lower32 = 0;
812             break;
813         case ROCKER_DMA_DESC_SIZE_OFFSET:
814             desc_ring_set_size(r->rings[index], val);
815             break;
816         case ROCKER_DMA_DESC_HEAD_OFFSET:
817             if (desc_ring_set_head(r->rings[index], val)) {
818                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
819             }
820             break;
821         case ROCKER_DMA_DESC_CTRL_OFFSET:
822             desc_ring_set_ctrl(r->rings[index], val);
823             break;
824         case ROCKER_DMA_DESC_CREDITS_OFFSET:
825             if (desc_ring_ret_credits(r->rings[index], val)) {
826                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
827             }
828             break;
829         default:
830             DPRINTF("not implemented dma reg write(l) addr=0x" TARGET_FMT_plx
831                     " val=0x%08x (ring %d, addr=0x%02x)\n",
832                     addr, val, index, offset);
833             break;
834         }
835         return;
836     }
837 
838     switch (addr) {
839     case ROCKER_TEST_REG:
840         r->test_reg = val;
841         break;
842     case ROCKER_TEST_REG64:
843     case ROCKER_TEST_DMA_ADDR:
844     case ROCKER_PORT_PHYS_ENABLE:
845         r->lower32 = (uint64_t)val;
846         break;
847     case ROCKER_TEST_REG64 + 4:
848         r->test_reg64 = ((uint64_t)val) << 32 | r->lower32;
849         r->lower32 = 0;
850         break;
851     case ROCKER_TEST_IRQ:
852         rocker_msix_irq(r, val);
853         break;
854     case ROCKER_TEST_DMA_SIZE:
855         r->test_dma_size = val;
856         break;
857     case ROCKER_TEST_DMA_ADDR + 4:
858         r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
859         r->lower32 = 0;
860         break;
861     case ROCKER_TEST_DMA_CTRL:
862         rocker_test_dma_ctrl(r, val);
863         break;
864     case ROCKER_CONTROL:
865         rocker_control(r, val);
866         break;
867     case ROCKER_PORT_PHYS_ENABLE + 4:
868         rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32);
869         r->lower32 = 0;
870         break;
871     default:
872         DPRINTF("not implemented write(l) addr=0x" TARGET_FMT_plx
873                 " val=0x%08x\n", addr, val);
874         break;
875     }
876 }
877 
878 static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val)
879 {
880     Rocker *r = opaque;
881 
882     if (rocker_addr_is_desc_reg(r, addr)) {
883         unsigned index = ROCKER_RING_INDEX(addr);
884         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
885 
886         switch (offset) {
887         case ROCKER_DMA_DESC_ADDR_OFFSET:
888             desc_ring_set_base_addr(r->rings[index], val);
889             break;
890         default:
891             DPRINTF("not implemented dma reg write(q) addr=0x" TARGET_FMT_plx
892                     " val=0x" TARGET_FMT_plx " (ring %d, offset=0x%02x)\n",
893                     addr, val, index, offset);
894             break;
895         }
896         return;
897     }
898 
899     switch (addr) {
900     case ROCKER_TEST_REG64:
901         r->test_reg64 = val;
902         break;
903     case ROCKER_TEST_DMA_ADDR:
904         r->test_dma_addr = val;
905         break;
906     case ROCKER_PORT_PHYS_ENABLE:
907         rocker_port_phys_enable_write(r, val);
908         break;
909     default:
910         DPRINTF("not implemented write(q) addr=0x" TARGET_FMT_plx
911                 " val=0x" TARGET_FMT_plx "\n", addr, val);
912         break;
913     }
914 }
915 
916 #ifdef DEBUG_ROCKER
917 #define regname(reg) case (reg): return #reg
918 static const char *rocker_reg_name(void *opaque, hwaddr addr)
919 {
920     Rocker *r = opaque;
921 
922     if (rocker_addr_is_desc_reg(r, addr)) {
923         unsigned index = ROCKER_RING_INDEX(addr);
924         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
925         static char buf[100];
926         char ring_name[10];
927 
928         switch (index) {
929         case 0:
930             sprintf(ring_name, "cmd");
931             break;
932         case 1:
933             sprintf(ring_name, "event");
934             break;
935         default:
936             sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx",
937                     (index - 2) / 2);
938         }
939 
940         switch (offset) {
941         case ROCKER_DMA_DESC_ADDR_OFFSET:
942             sprintf(buf, "Ring[%s] ADDR", ring_name);
943             return buf;
944         case ROCKER_DMA_DESC_ADDR_OFFSET+4:
945             sprintf(buf, "Ring[%s] ADDR+4", ring_name);
946             return buf;
947         case ROCKER_DMA_DESC_SIZE_OFFSET:
948             sprintf(buf, "Ring[%s] SIZE", ring_name);
949             return buf;
950         case ROCKER_DMA_DESC_HEAD_OFFSET:
951             sprintf(buf, "Ring[%s] HEAD", ring_name);
952             return buf;
953         case ROCKER_DMA_DESC_TAIL_OFFSET:
954             sprintf(buf, "Ring[%s] TAIL", ring_name);
955             return buf;
956         case ROCKER_DMA_DESC_CTRL_OFFSET:
957             sprintf(buf, "Ring[%s] CTRL", ring_name);
958             return buf;
959         case ROCKER_DMA_DESC_CREDITS_OFFSET:
960             sprintf(buf, "Ring[%s] CREDITS", ring_name);
961             return buf;
962         default:
963             sprintf(buf, "Ring[%s] ???", ring_name);
964             return buf;
965         }
966     } else {
967         switch (addr) {
968             regname(ROCKER_BOGUS_REG0);
969             regname(ROCKER_BOGUS_REG1);
970             regname(ROCKER_BOGUS_REG2);
971             regname(ROCKER_BOGUS_REG3);
972             regname(ROCKER_TEST_REG);
973             regname(ROCKER_TEST_REG64);
974             regname(ROCKER_TEST_REG64+4);
975             regname(ROCKER_TEST_IRQ);
976             regname(ROCKER_TEST_DMA_ADDR);
977             regname(ROCKER_TEST_DMA_ADDR+4);
978             regname(ROCKER_TEST_DMA_SIZE);
979             regname(ROCKER_TEST_DMA_CTRL);
980             regname(ROCKER_CONTROL);
981             regname(ROCKER_PORT_PHYS_COUNT);
982             regname(ROCKER_PORT_PHYS_LINK_STATUS);
983             regname(ROCKER_PORT_PHYS_LINK_STATUS+4);
984             regname(ROCKER_PORT_PHYS_ENABLE);
985             regname(ROCKER_PORT_PHYS_ENABLE+4);
986             regname(ROCKER_SWITCH_ID);
987             regname(ROCKER_SWITCH_ID+4);
988         }
989     }
990     return "???";
991 }
992 #else
993 static const char *rocker_reg_name(void *opaque, hwaddr addr)
994 {
995     return NULL;
996 }
997 #endif
998 
999 static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1000                               unsigned size)
1001 {
1002     DPRINTF("Write %s addr " TARGET_FMT_plx
1003             ", size %u, val " TARGET_FMT_plx "\n",
1004             rocker_reg_name(opaque, addr), addr, size, val);
1005 
1006     switch (size) {
1007     case 4:
1008         rocker_io_writel(opaque, addr, val);
1009         break;
1010     case 8:
1011         rocker_io_writeq(opaque, addr, val);
1012         break;
1013     }
1014 }
1015 
1016 static uint64_t rocker_port_phys_link_status(Rocker *r)
1017 {
1018     int i;
1019     uint64_t status = 0;
1020 
1021     for (i = 0; i < r->fp_ports; i++) {
1022         FpPort *port = r->fp_port[i];
1023 
1024         if (fp_port_get_link_up(port)) {
1025             status |= 1 << (i + 1);
1026         }
1027     }
1028     return status;
1029 }
1030 
1031 static uint64_t rocker_port_phys_enable_read(Rocker *r)
1032 {
1033     int i;
1034     uint64_t ret = 0;
1035 
1036     for (i = 0; i < r->fp_ports; i++) {
1037         FpPort *port = r->fp_port[i];
1038 
1039         if (fp_port_enabled(port)) {
1040             ret |= 1 << (i + 1);
1041         }
1042     }
1043     return ret;
1044 }
1045 
1046 static uint32_t rocker_io_readl(void *opaque, hwaddr addr)
1047 {
1048     Rocker *r = opaque;
1049     uint32_t ret;
1050 
1051     if (rocker_addr_is_desc_reg(r, addr)) {
1052         unsigned index = ROCKER_RING_INDEX(addr);
1053         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1054 
1055         switch (offset) {
1056         case ROCKER_DMA_DESC_ADDR_OFFSET:
1057             ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]);
1058             break;
1059         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
1060             ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32);
1061             break;
1062         case ROCKER_DMA_DESC_SIZE_OFFSET:
1063             ret = desc_ring_get_size(r->rings[index]);
1064             break;
1065         case ROCKER_DMA_DESC_HEAD_OFFSET:
1066             ret = desc_ring_get_head(r->rings[index]);
1067             break;
1068         case ROCKER_DMA_DESC_TAIL_OFFSET:
1069             ret = desc_ring_get_tail(r->rings[index]);
1070             break;
1071         case ROCKER_DMA_DESC_CREDITS_OFFSET:
1072             ret = desc_ring_get_credits(r->rings[index]);
1073             break;
1074         default:
1075             DPRINTF("not implemented dma reg read(l) addr=0x" TARGET_FMT_plx
1076                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1077             ret = 0;
1078             break;
1079         }
1080         return ret;
1081     }
1082 
1083     switch (addr) {
1084     case ROCKER_BOGUS_REG0:
1085     case ROCKER_BOGUS_REG1:
1086     case ROCKER_BOGUS_REG2:
1087     case ROCKER_BOGUS_REG3:
1088         ret = 0xDEADBABE;
1089         break;
1090     case ROCKER_TEST_REG:
1091         ret = r->test_reg * 2;
1092         break;
1093     case ROCKER_TEST_REG64:
1094         ret = (uint32_t)(r->test_reg64 * 2);
1095         break;
1096     case ROCKER_TEST_REG64 + 4:
1097         ret = (uint32_t)((r->test_reg64 * 2) >> 32);
1098         break;
1099     case ROCKER_TEST_DMA_SIZE:
1100         ret = r->test_dma_size;
1101         break;
1102     case ROCKER_TEST_DMA_ADDR:
1103         ret = (uint32_t)r->test_dma_addr;
1104         break;
1105     case ROCKER_TEST_DMA_ADDR + 4:
1106         ret = (uint32_t)(r->test_dma_addr >> 32);
1107         break;
1108     case ROCKER_PORT_PHYS_COUNT:
1109         ret = r->fp_ports;
1110         break;
1111     case ROCKER_PORT_PHYS_LINK_STATUS:
1112         ret = (uint32_t)rocker_port_phys_link_status(r);
1113         break;
1114     case ROCKER_PORT_PHYS_LINK_STATUS + 4:
1115         ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32);
1116         break;
1117     case ROCKER_PORT_PHYS_ENABLE:
1118         ret = (uint32_t)rocker_port_phys_enable_read(r);
1119         break;
1120     case ROCKER_PORT_PHYS_ENABLE + 4:
1121         ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32);
1122         break;
1123     case ROCKER_SWITCH_ID:
1124         ret = (uint32_t)r->switch_id;
1125         break;
1126     case ROCKER_SWITCH_ID + 4:
1127         ret = (uint32_t)(r->switch_id >> 32);
1128         break;
1129     default:
1130         DPRINTF("not implemented read(l) addr=0x" TARGET_FMT_plx "\n", addr);
1131         ret = 0;
1132         break;
1133     }
1134     return ret;
1135 }
1136 
1137 static uint64_t rocker_io_readq(void *opaque, hwaddr addr)
1138 {
1139     Rocker *r = opaque;
1140     uint64_t ret;
1141 
1142     if (rocker_addr_is_desc_reg(r, addr)) {
1143         unsigned index = ROCKER_RING_INDEX(addr);
1144         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1145 
1146         switch (addr & ROCKER_DMA_DESC_MASK) {
1147         case ROCKER_DMA_DESC_ADDR_OFFSET:
1148             ret = desc_ring_get_base_addr(r->rings[index]);
1149             break;
1150         default:
1151             DPRINTF("not implemented dma reg read(q) addr=0x" TARGET_FMT_plx
1152                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1153             ret = 0;
1154             break;
1155         }
1156         return ret;
1157     }
1158 
1159     switch (addr) {
1160     case ROCKER_BOGUS_REG0:
1161     case ROCKER_BOGUS_REG2:
1162         ret = 0xDEADBABEDEADBABEULL;
1163         break;
1164     case ROCKER_TEST_REG64:
1165         ret = r->test_reg64 * 2;
1166         break;
1167     case ROCKER_TEST_DMA_ADDR:
1168         ret = r->test_dma_addr;
1169         break;
1170     case ROCKER_PORT_PHYS_LINK_STATUS:
1171         ret = rocker_port_phys_link_status(r);
1172         break;
1173     case ROCKER_PORT_PHYS_ENABLE:
1174         ret = rocker_port_phys_enable_read(r);
1175         break;
1176     case ROCKER_SWITCH_ID:
1177         ret = r->switch_id;
1178         break;
1179     default:
1180         DPRINTF("not implemented read(q) addr=0x" TARGET_FMT_plx "\n", addr);
1181         ret = 0;
1182         break;
1183     }
1184     return ret;
1185 }
1186 
1187 static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size)
1188 {
1189     DPRINTF("Read %s addr " TARGET_FMT_plx ", size %u\n",
1190             rocker_reg_name(opaque, addr), addr, size);
1191 
1192     switch (size) {
1193     case 4:
1194         return rocker_io_readl(opaque, addr);
1195     case 8:
1196         return rocker_io_readq(opaque, addr);
1197     }
1198 
1199     return -1;
1200 }
1201 
1202 static const MemoryRegionOps rocker_mmio_ops = {
1203     .read = rocker_mmio_read,
1204     .write = rocker_mmio_write,
1205     .endianness = DEVICE_LITTLE_ENDIAN,
1206     .valid = {
1207         .min_access_size = 4,
1208         .max_access_size = 8,
1209     },
1210     .impl = {
1211         .min_access_size = 4,
1212         .max_access_size = 8,
1213     },
1214 };
1215 
1216 static void rocker_msix_vectors_unuse(Rocker *r,
1217                                       unsigned int num_vectors)
1218 {
1219     PCIDevice *dev = PCI_DEVICE(r);
1220     int i;
1221 
1222     for (i = 0; i < num_vectors; i++) {
1223         msix_vector_unuse(dev, i);
1224     }
1225 }
1226 
1227 static int rocker_msix_vectors_use(Rocker *r,
1228                                    unsigned int num_vectors)
1229 {
1230     PCIDevice *dev = PCI_DEVICE(r);
1231     int err;
1232     int i;
1233 
1234     for (i = 0; i < num_vectors; i++) {
1235         err = msix_vector_use(dev, i);
1236         if (err) {
1237             goto rollback;
1238         }
1239     }
1240     return 0;
1241 
1242 rollback:
1243     rocker_msix_vectors_unuse(r, i);
1244     return err;
1245 }
1246 
1247 static int rocker_msix_init(Rocker *r)
1248 {
1249     PCIDevice *dev = PCI_DEVICE(r);
1250     int err;
1251 
1252     err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
1253                     &r->msix_bar,
1254                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
1255                     &r->msix_bar,
1256                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
1257                     0);
1258     if (err) {
1259         return err;
1260     }
1261 
1262     err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1263     if (err) {
1264         goto err_msix_vectors_use;
1265     }
1266 
1267     return 0;
1268 
1269 err_msix_vectors_use:
1270     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1271     return err;
1272 }
1273 
1274 static void rocker_msix_uninit(Rocker *r)
1275 {
1276     PCIDevice *dev = PCI_DEVICE(r);
1277 
1278     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1279     rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1280 }
1281 
1282 static int pci_rocker_init(PCIDevice *dev)
1283 {
1284     Rocker *r = to_rocker(dev);
1285     const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
1286     const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } };
1287     static int sw_index;
1288     int i, err = 0;
1289 
1290     /* allocate worlds */
1291 
1292     r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
1293     r->world_dflt = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
1294 
1295     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1296         if (!r->worlds[i]) {
1297             goto err_world_alloc;
1298         }
1299     }
1300 
1301     /* set up memory-mapped region at BAR0 */
1302 
1303     memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r,
1304                           "rocker-mmio", ROCKER_PCI_BAR0_SIZE);
1305     pci_register_bar(dev, ROCKER_PCI_BAR0_IDX,
1306                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio);
1307 
1308     /* set up memory-mapped region for MSI-X */
1309 
1310     memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar",
1311                        ROCKER_PCI_MSIX_BAR_SIZE);
1312     pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX,
1313                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar);
1314 
1315     /* MSI-X init */
1316 
1317     err = rocker_msix_init(r);
1318     if (err) {
1319         goto err_msix_init;
1320     }
1321 
1322     /* validate switch properties */
1323 
1324     if (!r->name) {
1325         r->name = g_strdup(ROCKER);
1326     }
1327 
1328     if (rocker_find(r->name)) {
1329         err = -EEXIST;
1330         goto err_duplicate;
1331     }
1332 
1333     /* Rocker name is passed in port name requests to OS with the intention
1334      * that the name is used in interface names. Limit the length of the
1335      * rocker name to avoid naming problems in the OS. Also, adding the
1336      * port number as p# and unganged breakout b#, where # is at most 2
1337      * digits, so leave room for it too (-1 for string terminator, -3 for
1338      * p# and -3 for b#)
1339      */
1340 #define ROCKER_IFNAMSIZ 16
1341 #define MAX_ROCKER_NAME_LEN  (ROCKER_IFNAMSIZ - 1 - 3 - 3)
1342     if (strlen(r->name) > MAX_ROCKER_NAME_LEN) {
1343         fprintf(stderr,
1344                 "rocker: name too long; please shorten to at most %d chars\n",
1345                 MAX_ROCKER_NAME_LEN);
1346         return -EINVAL;
1347     }
1348 
1349     if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) {
1350         memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt));
1351         r->fp_start_macaddr.a[4] += (sw_index++);
1352     }
1353 
1354     if (!r->switch_id) {
1355         memcpy(&r->switch_id, &r->fp_start_macaddr,
1356                sizeof(r->fp_start_macaddr));
1357     }
1358 
1359     if (r->fp_ports > ROCKER_FP_PORTS_MAX) {
1360         r->fp_ports = ROCKER_FP_PORTS_MAX;
1361     }
1362 
1363     r->rings = g_new(DescRing *, rocker_pci_ring_count(r));
1364     if (!r->rings) {
1365         goto err_rings_alloc;
1366     }
1367 
1368     /* Rings are ordered like this:
1369      * - command ring
1370      * - event ring
1371      * - port0 tx ring
1372      * - port0 rx ring
1373      * - port1 tx ring
1374      * - port1 rx ring
1375      * .....
1376      */
1377 
1378     err = -ENOMEM;
1379     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1380         DescRing *ring = desc_ring_alloc(r, i);
1381 
1382         if (!ring) {
1383             goto err_ring_alloc;
1384         }
1385 
1386         if (i == ROCKER_RING_CMD) {
1387             desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD);
1388         } else if (i == ROCKER_RING_EVENT) {
1389             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT);
1390         } else if (i % 2 == 0) {
1391             desc_ring_set_consume(ring, tx_consume,
1392                                   ROCKER_MSIX_VEC_TX((i - 2) / 2));
1393         } else if (i % 2 == 1) {
1394             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2));
1395         }
1396 
1397         r->rings[i] = ring;
1398     }
1399 
1400     for (i = 0; i < r->fp_ports; i++) {
1401         FpPort *port =
1402             fp_port_alloc(r, r->name, &r->fp_start_macaddr,
1403                           i, &r->fp_ports_peers[i]);
1404 
1405         if (!port) {
1406             goto err_port_alloc;
1407         }
1408 
1409         r->fp_port[i] = port;
1410         fp_port_set_world(port, r->world_dflt);
1411     }
1412 
1413     QLIST_INSERT_HEAD(&rockers, r, next);
1414 
1415     return 0;
1416 
1417 err_port_alloc:
1418     for (--i; i >= 0; i--) {
1419         FpPort *port = r->fp_port[i];
1420         fp_port_free(port);
1421     }
1422     i = rocker_pci_ring_count(r);
1423 err_ring_alloc:
1424     for (--i; i >= 0; i--) {
1425         desc_ring_free(r->rings[i]);
1426     }
1427     g_free(r->rings);
1428 err_rings_alloc:
1429 err_duplicate:
1430     rocker_msix_uninit(r);
1431 err_msix_init:
1432     object_unparent(OBJECT(&r->msix_bar));
1433     object_unparent(OBJECT(&r->mmio));
1434 err_world_alloc:
1435     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1436         if (r->worlds[i]) {
1437             world_free(r->worlds[i]);
1438         }
1439     }
1440     return err;
1441 }
1442 
1443 static void pci_rocker_uninit(PCIDevice *dev)
1444 {
1445     Rocker *r = to_rocker(dev);
1446     int i;
1447 
1448     QLIST_REMOVE(r, next);
1449 
1450     for (i = 0; i < r->fp_ports; i++) {
1451         FpPort *port = r->fp_port[i];
1452 
1453         fp_port_free(port);
1454         r->fp_port[i] = NULL;
1455     }
1456 
1457     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1458         if (r->rings[i]) {
1459             desc_ring_free(r->rings[i]);
1460         }
1461     }
1462     g_free(r->rings);
1463 
1464     rocker_msix_uninit(r);
1465     object_unparent(OBJECT(&r->msix_bar));
1466     object_unparent(OBJECT(&r->mmio));
1467 
1468     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1469         if (r->worlds[i]) {
1470             world_free(r->worlds[i]);
1471         }
1472     }
1473     g_free(r->fp_ports_peers);
1474 }
1475 
1476 static void rocker_reset(DeviceState *dev)
1477 {
1478     Rocker *r = to_rocker(dev);
1479     int i;
1480 
1481     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1482         if (r->worlds[i]) {
1483             world_reset(r->worlds[i]);
1484         }
1485     }
1486     for (i = 0; i < r->fp_ports; i++) {
1487         fp_port_reset(r->fp_port[i]);
1488         fp_port_set_world(r->fp_port[i], r->world_dflt);
1489     }
1490 
1491     r->test_reg = 0;
1492     r->test_reg64 = 0;
1493     r->test_dma_addr = 0;
1494     r->test_dma_size = 0;
1495 
1496     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1497         desc_ring_reset(r->rings[i]);
1498     }
1499 
1500     DPRINTF("Reset done\n");
1501 }
1502 
1503 static Property rocker_properties[] = {
1504     DEFINE_PROP_STRING("name", Rocker, name),
1505     DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
1506                         fp_start_macaddr),
1507     DEFINE_PROP_UINT64("switch_id", Rocker,
1508                        switch_id, 0),
1509     DEFINE_PROP_ARRAY("ports", Rocker, fp_ports,
1510                       fp_ports_peers, qdev_prop_netdev, NICPeers),
1511     DEFINE_PROP_END_OF_LIST(),
1512 };
1513 
1514 static const VMStateDescription rocker_vmsd = {
1515     .name = ROCKER,
1516     .unmigratable = 1,
1517 };
1518 
1519 static void rocker_class_init(ObjectClass *klass, void *data)
1520 {
1521     DeviceClass *dc = DEVICE_CLASS(klass);
1522     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1523 
1524     k->init = pci_rocker_init;
1525     k->exit = pci_rocker_uninit;
1526     k->vendor_id = PCI_VENDOR_ID_REDHAT;
1527     k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER;
1528     k->revision = ROCKER_PCI_REVISION;
1529     k->class_id = PCI_CLASS_NETWORK_OTHER;
1530     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1531     dc->desc = "Rocker Switch";
1532     dc->reset = rocker_reset;
1533     dc->props = rocker_properties;
1534     dc->vmsd = &rocker_vmsd;
1535 }
1536 
1537 static const TypeInfo rocker_info = {
1538     .name          = ROCKER,
1539     .parent        = TYPE_PCI_DEVICE,
1540     .instance_size = sizeof(Rocker),
1541     .class_init    = rocker_class_init,
1542 };
1543 
1544 static void rocker_register_types(void)
1545 {
1546     type_register_static(&rocker_info);
1547 }
1548 
1549 type_init(rocker_register_types)
1550