xref: /openbmc/qemu/hw/net/rocker/rocker.c (revision 585ec727)
1 /*
2  * QEMU rocker switch emulation - PCI device
3  *
4  * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
5  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "hw/hw.h"
20 #include "hw/pci/pci.h"
21 #include "hw/pci/msix.h"
22 #include "net/net.h"
23 #include "net/eth.h"
24 #include "qemu/iov.h"
25 #include "qemu/bitops.h"
26 #include "qmp-commands.h"
27 
28 #include "rocker.h"
29 #include "rocker_hw.h"
30 #include "rocker_fp.h"
31 #include "rocker_desc.h"
32 #include "rocker_tlv.h"
33 #include "rocker_world.h"
34 #include "rocker_of_dpa.h"
35 
36 struct rocker {
37     /* private */
38     PCIDevice parent_obj;
39     /* public */
40 
41     MemoryRegion mmio;
42     MemoryRegion msix_bar;
43 
44     /* switch configuration */
45     char *name;                  /* switch name */
46     uint32_t fp_ports;           /* front-panel port count */
47     NICPeers *fp_ports_peers;
48     MACAddr fp_start_macaddr;    /* front-panel port 0 mac addr */
49     uint64_t switch_id;          /* switch id */
50 
51     /* front-panel ports */
52     FpPort *fp_port[ROCKER_FP_PORTS_MAX];
53 
54     /* register backings */
55     uint32_t test_reg;
56     uint64_t test_reg64;
57     dma_addr_t test_dma_addr;
58     uint32_t test_dma_size;
59     uint64_t lower32;            /* lower 32-bit val in 2-part 64-bit access */
60 
61     /* desc rings */
62     DescRing **rings;
63 
64     /* switch worlds */
65     World *worlds[ROCKER_WORLD_TYPE_MAX];
66     World *world_dflt;
67 
68     QLIST_ENTRY(rocker) next;
69 };
70 
71 #define ROCKER "rocker"
72 
73 #define to_rocker(obj) \
74     OBJECT_CHECK(Rocker, (obj), ROCKER)
75 
76 static QLIST_HEAD(, rocker) rockers;
77 
78 Rocker *rocker_find(const char *name)
79 {
80     Rocker *r;
81 
82     QLIST_FOREACH(r, &rockers, next)
83         if (strcmp(r->name, name) == 0) {
84             return r;
85         }
86 
87     return NULL;
88 }
89 
90 World *rocker_get_world(Rocker *r, enum rocker_world_type type)
91 {
92     if (type < ROCKER_WORLD_TYPE_MAX) {
93         return r->worlds[type];
94     }
95     return NULL;
96 }
97 
98 RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
99 {
100     RockerSwitch *rocker;
101     Rocker *r;
102 
103     r = rocker_find(name);
104     if (!r) {
105         error_setg(errp, "rocker %s not found", name);
106         return NULL;
107     }
108 
109     rocker = g_new0(RockerSwitch, 1);
110     rocker->name = g_strdup(r->name);
111     rocker->id = r->switch_id;
112     rocker->ports = r->fp_ports;
113 
114     return rocker;
115 }
116 
117 RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp)
118 {
119     RockerPortList *list = NULL;
120     Rocker *r;
121     int i;
122 
123     r = rocker_find(name);
124     if (!r) {
125         error_setg(errp, "rocker %s not found", name);
126         return NULL;
127     }
128 
129     for (i = r->fp_ports - 1; i >= 0; i--) {
130         RockerPortList *info = g_malloc0(sizeof(*info));
131         info->value = g_malloc0(sizeof(*info->value));
132         struct fp_port *port = r->fp_port[i];
133 
134         fp_port_get_info(port, info);
135         info->next = list;
136         list = info;
137     }
138 
139     return list;
140 }
141 
142 uint32_t rocker_fp_ports(Rocker *r)
143 {
144     return r->fp_ports;
145 }
146 
147 static uint32_t rocker_get_pport_by_tx_ring(Rocker *r,
148                                             DescRing *ring)
149 {
150     return (desc_ring_index(ring) - 2) / 2 + 1;
151 }
152 
153 static int tx_consume(Rocker *r, DescInfo *info)
154 {
155     PCIDevice *dev = PCI_DEVICE(r);
156     char *buf = desc_get_buf(info, true);
157     RockerTlv *tlv_frag;
158     RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1];
159     struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, };
160     uint32_t pport;
161     uint32_t port;
162     uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE;
163     uint16_t tx_l3_csum_off = 0;
164     uint16_t tx_tso_mss = 0;
165     uint16_t tx_tso_hdr_len = 0;
166     int iovcnt = 0;
167     int err = ROCKER_OK;
168     int rem;
169     int i;
170 
171     if (!buf) {
172         return -ROCKER_ENXIO;
173     }
174 
175     rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info));
176 
177     if (!tlvs[ROCKER_TLV_TX_FRAGS]) {
178         return -ROCKER_EINVAL;
179     }
180 
181     pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info));
182     if (!fp_port_from_pport(pport, &port)) {
183         return -ROCKER_EINVAL;
184     }
185 
186     if (tlvs[ROCKER_TLV_TX_OFFLOAD]) {
187         tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]);
188     }
189 
190     switch (tx_offload) {
191     case ROCKER_TX_OFFLOAD_L3_CSUM:
192         if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
193             return -ROCKER_EINVAL;
194         }
195         break;
196     case ROCKER_TX_OFFLOAD_TSO:
197         if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
198             !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
199             return -ROCKER_EINVAL;
200         }
201         break;
202     }
203 
204     if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
205         tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]);
206     }
207 
208     if (tlvs[ROCKER_TLV_TX_TSO_MSS]) {
209         tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]);
210     }
211 
212     if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
213         tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]);
214     }
215 
216     rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) {
217         hwaddr frag_addr;
218         uint16_t frag_len;
219 
220         if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) {
221             err = -ROCKER_EINVAL;
222             goto err_bad_attr;
223         }
224 
225         rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag);
226 
227         if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
228             !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) {
229             err = -ROCKER_EINVAL;
230             goto err_bad_attr;
231         }
232 
233         frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
234         frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
235 
236         if (iovcnt >= ROCKER_TX_FRAGS_MAX) {
237             goto err_too_many_frags;
238         }
239         iov[iovcnt].iov_len = frag_len;
240         iov[iovcnt].iov_base = g_malloc(frag_len);
241         if (!iov[iovcnt].iov_base) {
242             err = -ROCKER_ENOMEM;
243             goto err_no_mem;
244         }
245 
246         if (pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base,
247                      iov[iovcnt].iov_len)) {
248             err = -ROCKER_ENXIO;
249             goto err_bad_io;
250         }
251         iovcnt++;
252     }
253 
254     if (iovcnt) {
255         /* XXX perform Tx offloads */
256         /* XXX   silence compiler for now */
257         tx_l3_csum_off += tx_tso_mss = tx_tso_hdr_len = 0;
258     }
259 
260     err = fp_port_eg(r->fp_port[port], iov, iovcnt);
261 
262 err_too_many_frags:
263 err_bad_io:
264 err_no_mem:
265 err_bad_attr:
266     for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) {
267         g_free(iov[i].iov_base);
268     }
269 
270     return err;
271 }
272 
273 static int cmd_get_port_settings(Rocker *r,
274                                  DescInfo *info, char *buf,
275                                  RockerTlv *cmd_info_tlv)
276 {
277     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
278     RockerTlv *nest;
279     FpPort *fp_port;
280     uint32_t pport;
281     uint32_t port;
282     uint32_t speed;
283     uint8_t duplex;
284     uint8_t autoneg;
285     uint8_t learning;
286     char *phys_name;
287     MACAddr macaddr;
288     enum rocker_world_type mode;
289     size_t tlv_size;
290     int pos;
291     int err;
292 
293     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
294                             cmd_info_tlv);
295 
296     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
297         return -ROCKER_EINVAL;
298     }
299 
300     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
301     if (!fp_port_from_pport(pport, &port)) {
302         return -ROCKER_EINVAL;
303     }
304     fp_port = r->fp_port[port];
305 
306     err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg);
307     if (err) {
308         return err;
309     }
310 
311     fp_port_get_macaddr(fp_port, &macaddr);
312     mode = world_type(fp_port_get_world(fp_port));
313     learning = fp_port_get_learning(fp_port);
314     phys_name = fp_port_get_name(fp_port);
315 
316     tlv_size = rocker_tlv_total_size(0) +                 /* nest */
317                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
318                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   speed */
319                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   duplex */
320                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   autoneg */
321                rocker_tlv_total_size(sizeof(macaddr.a)) + /*   macaddr */
322                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   mode */
323                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   learning */
324                rocker_tlv_total_size(strlen(phys_name));
325 
326     if (tlv_size > desc_buf_size(info)) {
327         return -ROCKER_EMSGSIZE;
328     }
329 
330     pos = 0;
331     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO);
332     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport);
333     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed);
334     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex);
335     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg);
336     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
337                    sizeof(macaddr.a), macaddr.a);
338     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode);
339     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,
340                       learning);
341     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME,
342                    strlen(phys_name), phys_name);
343     rocker_tlv_nest_end(buf, &pos, nest);
344 
345     return desc_set_buf(info, tlv_size);
346 }
347 
348 static int cmd_set_port_settings(Rocker *r,
349                                  RockerTlv *cmd_info_tlv)
350 {
351     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
352     FpPort *fp_port;
353     uint32_t pport;
354     uint32_t port;
355     uint32_t speed;
356     uint8_t duplex;
357     uint8_t autoneg;
358     uint8_t learning;
359     MACAddr macaddr;
360     enum rocker_world_type mode;
361     int err;
362 
363     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
364                             cmd_info_tlv);
365 
366     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
367         return -ROCKER_EINVAL;
368     }
369 
370     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
371     if (!fp_port_from_pport(pport, &port)) {
372         return -ROCKER_EINVAL;
373     }
374     fp_port = r->fp_port[port];
375 
376     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] &&
377         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] &&
378         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) {
379 
380         speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
381         duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
382         autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
383 
384         err = fp_port_set_settings(fp_port, speed, duplex, autoneg);
385         if (err) {
386             return err;
387         }
388     }
389 
390     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) {
391         if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) !=
392             sizeof(macaddr.a)) {
393             return -ROCKER_EINVAL;
394         }
395         memcpy(macaddr.a,
396                rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]),
397                sizeof(macaddr.a));
398         fp_port_set_macaddr(fp_port, &macaddr);
399     }
400 
401     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
402         mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
403         fp_port_set_world(fp_port, r->worlds[mode]);
404     }
405 
406     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
407         learning =
408             rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]);
409         fp_port_set_learning(fp_port, learning);
410     }
411 
412     return ROCKER_OK;
413 }
414 
415 static int cmd_consume(Rocker *r, DescInfo *info)
416 {
417     char *buf = desc_get_buf(info, false);
418     RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1];
419     RockerTlv *info_tlv;
420     World *world;
421     uint16_t cmd;
422     int err;
423 
424     if (!buf) {
425         return -ROCKER_ENXIO;
426     }
427 
428     rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info));
429 
430     if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) {
431         return -ROCKER_EINVAL;
432     }
433 
434     cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]);
435     info_tlv = tlvs[ROCKER_TLV_CMD_INFO];
436 
437     /* This might be reworked to something like this:
438      * Every world will have an array of command handlers from
439      * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is
440      * up to each world to implement whatever command it want.
441      * It can reference "generic" commands as cmd_set_port_settings or
442      * cmd_get_port_settings
443      */
444 
445     switch (cmd) {
446     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
447     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
448     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
449     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
450     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
451     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
452     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
453     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
454         world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
455         err = world_do_cmd(world, info, buf, cmd, info_tlv);
456         break;
457     case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS:
458         err = cmd_get_port_settings(r, info, buf, info_tlv);
459         break;
460     case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS:
461         err = cmd_set_port_settings(r, info_tlv);
462         break;
463     default:
464         err = -ROCKER_EINVAL;
465         break;
466     }
467 
468     return err;
469 }
470 
471 static void rocker_msix_irq(Rocker *r, unsigned vector)
472 {
473     PCIDevice *dev = PCI_DEVICE(r);
474 
475     DPRINTF("MSI-X notify request for vector %d\n", vector);
476     if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) {
477         DPRINTF("incorrect vector %d\n", vector);
478         return;
479     }
480     msix_notify(dev, vector);
481 }
482 
483 int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up)
484 {
485     DescRing *ring = r->rings[ROCKER_RING_EVENT];
486     DescInfo *info = desc_ring_fetch_desc(ring);
487     RockerTlv *nest;
488     char *buf;
489     size_t tlv_size;
490     int pos;
491     int err;
492 
493     if (!info) {
494         return -ROCKER_ENOBUFS;
495     }
496 
497     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
498                rocker_tlv_total_size(0) +                 /* nest */
499                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
500                rocker_tlv_total_size(sizeof(uint8_t));    /*   link up */
501 
502     if (tlv_size > desc_buf_size(info)) {
503         err = -ROCKER_EMSGSIZE;
504         goto err_too_big;
505     }
506 
507     buf = desc_get_buf(info, false);
508     if (!buf) {
509         err = -ROCKER_ENOMEM;
510         goto err_no_mem;
511     }
512 
513     pos = 0;
514     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
515                         ROCKER_TLV_EVENT_TYPE_LINK_CHANGED);
516     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
517     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport);
518     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP,
519                       link_up ? 1 : 0);
520     rocker_tlv_nest_end(buf, &pos, nest);
521 
522     err = desc_set_buf(info, tlv_size);
523 
524 err_too_big:
525 err_no_mem:
526     if (desc_ring_post_desc(ring, err)) {
527         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
528     }
529 
530     return err;
531 }
532 
533 int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
534                                uint16_t vlan_id)
535 {
536     DescRing *ring = r->rings[ROCKER_RING_EVENT];
537     DescInfo *info;
538     FpPort *fp_port;
539     uint32_t port;
540     RockerTlv *nest;
541     char *buf;
542     size_t tlv_size;
543     int pos;
544     int err;
545 
546     if (!fp_port_from_pport(pport, &port)) {
547         return -ROCKER_EINVAL;
548     }
549     fp_port = r->fp_port[port];
550     if (!fp_port_get_learning(fp_port)) {
551         return ROCKER_OK;
552     }
553 
554     info = desc_ring_fetch_desc(ring);
555     if (!info) {
556         return -ROCKER_ENOBUFS;
557     }
558 
559     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
560                rocker_tlv_total_size(0) +                 /* nest */
561                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
562                rocker_tlv_total_size(ETH_ALEN) +          /*   mac addr */
563                rocker_tlv_total_size(sizeof(uint16_t));   /*   vlan_id */
564 
565     if (tlv_size > desc_buf_size(info)) {
566         err = -ROCKER_EMSGSIZE;
567         goto err_too_big;
568     }
569 
570     buf = desc_get_buf(info, false);
571     if (!buf) {
572         err = -ROCKER_ENOMEM;
573         goto err_no_mem;
574     }
575 
576     pos = 0;
577     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
578                         ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN);
579     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
580     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport);
581     rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr);
582     rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id);
583     rocker_tlv_nest_end(buf, &pos, nest);
584 
585     err = desc_set_buf(info, tlv_size);
586 
587 err_too_big:
588 err_no_mem:
589     if (desc_ring_post_desc(ring, err)) {
590         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
591     }
592 
593     return err;
594 }
595 
596 static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
597                                                      uint32_t pport)
598 {
599     return r->rings[(pport - 1) * 2 + 3];
600 }
601 
602 int rx_produce(World *world, uint32_t pport,
603                const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu)
604 {
605     Rocker *r = world_rocker(world);
606     PCIDevice *dev = (PCIDevice *)r;
607     DescRing *ring = rocker_get_rx_ring_by_pport(r, pport);
608     DescInfo *info = desc_ring_fetch_desc(ring);
609     char *data;
610     size_t data_size = iov_size(iov, iovcnt);
611     char *buf;
612     uint16_t rx_flags = 0;
613     uint16_t rx_csum = 0;
614     size_t tlv_size;
615     RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1];
616     hwaddr frag_addr;
617     uint16_t frag_max_len;
618     int pos;
619     int err;
620 
621     if (!info) {
622         return -ROCKER_ENOBUFS;
623     }
624 
625     buf = desc_get_buf(info, false);
626     if (!buf) {
627         err = -ROCKER_ENXIO;
628         goto out;
629     }
630     rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info));
631 
632     if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] ||
633         !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) {
634         err = -ROCKER_EINVAL;
635         goto out;
636     }
637 
638     frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]);
639     frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
640 
641     if (data_size > frag_max_len) {
642         err = -ROCKER_EMSGSIZE;
643         goto out;
644     }
645 
646     if (copy_to_cpu) {
647         rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD;
648     }
649 
650     /* XXX calc rx flags/csum */
651 
652     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
653                rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */
654                rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */
655                rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */
656                rocker_tlv_total_size(sizeof(uint16_t));  /* frag len */
657 
658     if (tlv_size > desc_buf_size(info)) {
659         err = -ROCKER_EMSGSIZE;
660         goto out;
661     }
662 
663     /* TODO:
664      * iov dma write can be optimized in similar way e1000 does it in
665      * e1000_receive_iov. But maybe if would make sense to introduce
666      * generic helper iov_dma_write.
667      */
668 
669     data = g_malloc(data_size);
670     if (!data) {
671         err = -ROCKER_ENOMEM;
672         goto out;
673     }
674     iov_to_buf(iov, iovcnt, 0, data, data_size);
675     pci_dma_write(dev, frag_addr, data, data_size);
676     g_free(data);
677 
678     pos = 0;
679     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags);
680     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum);
681     rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr);
682     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len);
683     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size);
684 
685     err = desc_set_buf(info, tlv_size);
686 
687 out:
688     if (desc_ring_post_desc(ring, err)) {
689         rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1));
690     }
691 
692     return err;
693 }
694 
695 int rocker_port_eg(Rocker *r, uint32_t pport,
696                    const struct iovec *iov, int iovcnt)
697 {
698     FpPort *fp_port;
699     uint32_t port;
700 
701     if (!fp_port_from_pport(pport, &port)) {
702         return -ROCKER_EINVAL;
703     }
704 
705     fp_port = r->fp_port[port];
706 
707     return fp_port_eg(fp_port, iov, iovcnt);
708 }
709 
710 static void rocker_test_dma_ctrl(Rocker *r, uint32_t val)
711 {
712     PCIDevice *dev = PCI_DEVICE(r);
713     char *buf;
714     int i;
715 
716     buf = g_malloc(r->test_dma_size);
717 
718     if (!buf) {
719         DPRINTF("test dma buffer alloc failed");
720         return;
721     }
722 
723     switch (val) {
724     case ROCKER_TEST_DMA_CTRL_CLEAR:
725         memset(buf, 0, r->test_dma_size);
726         break;
727     case ROCKER_TEST_DMA_CTRL_FILL:
728         memset(buf, 0x96, r->test_dma_size);
729         break;
730     case ROCKER_TEST_DMA_CTRL_INVERT:
731         pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size);
732         for (i = 0; i < r->test_dma_size; i++) {
733             buf[i] = ~buf[i];
734         }
735         break;
736     default:
737         DPRINTF("not test dma control val=0x%08x\n", val);
738         goto err_out;
739     }
740     pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size);
741 
742     rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST);
743 
744 err_out:
745     g_free(buf);
746 }
747 
748 static void rocker_reset(DeviceState *dev);
749 
750 static void rocker_control(Rocker *r, uint32_t val)
751 {
752     if (val & ROCKER_CONTROL_RESET) {
753         rocker_reset(DEVICE(r));
754     }
755 }
756 
757 static int rocker_pci_ring_count(Rocker *r)
758 {
759     /* There are:
760      * - command ring
761      * - event ring
762      * - tx and rx ring per each port
763      */
764     return 2 + (2 * r->fp_ports);
765 }
766 
767 static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr)
768 {
769     hwaddr start = ROCKER_DMA_DESC_BASE;
770     hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r));
771 
772     return addr >= start && addr < end;
773 }
774 
775 static void rocker_port_phys_enable_write(Rocker *r, uint64_t new)
776 {
777     int i;
778     bool old_enabled;
779     bool new_enabled;
780     FpPort *fp_port;
781 
782     for (i = 0; i < r->fp_ports; i++) {
783         fp_port = r->fp_port[i];
784         old_enabled = fp_port_enabled(fp_port);
785         new_enabled = (new >> (i + 1)) & 0x1;
786         if (new_enabled == old_enabled) {
787             continue;
788         }
789         if (new_enabled) {
790             fp_port_enable(r->fp_port[i]);
791         } else {
792             fp_port_disable(r->fp_port[i]);
793         }
794     }
795 }
796 
797 static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
798 {
799     Rocker *r = opaque;
800 
801     if (rocker_addr_is_desc_reg(r, addr)) {
802         unsigned index = ROCKER_RING_INDEX(addr);
803         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
804 
805         switch (offset) {
806         case ROCKER_DMA_DESC_ADDR_OFFSET:
807             r->lower32 = (uint64_t)val;
808             break;
809         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
810             desc_ring_set_base_addr(r->rings[index],
811                                     ((uint64_t)val) << 32 | r->lower32);
812             r->lower32 = 0;
813             break;
814         case ROCKER_DMA_DESC_SIZE_OFFSET:
815             desc_ring_set_size(r->rings[index], val);
816             break;
817         case ROCKER_DMA_DESC_HEAD_OFFSET:
818             if (desc_ring_set_head(r->rings[index], val)) {
819                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
820             }
821             break;
822         case ROCKER_DMA_DESC_CTRL_OFFSET:
823             desc_ring_set_ctrl(r->rings[index], val);
824             break;
825         case ROCKER_DMA_DESC_CREDITS_OFFSET:
826             if (desc_ring_ret_credits(r->rings[index], val)) {
827                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
828             }
829             break;
830         default:
831             DPRINTF("not implemented dma reg write(l) addr=0x" TARGET_FMT_plx
832                     " val=0x%08x (ring %d, addr=0x%02x)\n",
833                     addr, val, index, offset);
834             break;
835         }
836         return;
837     }
838 
839     switch (addr) {
840     case ROCKER_TEST_REG:
841         r->test_reg = val;
842         break;
843     case ROCKER_TEST_REG64:
844     case ROCKER_TEST_DMA_ADDR:
845     case ROCKER_PORT_PHYS_ENABLE:
846         r->lower32 = (uint64_t)val;
847         break;
848     case ROCKER_TEST_REG64 + 4:
849         r->test_reg64 = ((uint64_t)val) << 32 | r->lower32;
850         r->lower32 = 0;
851         break;
852     case ROCKER_TEST_IRQ:
853         rocker_msix_irq(r, val);
854         break;
855     case ROCKER_TEST_DMA_SIZE:
856         r->test_dma_size = val;
857         break;
858     case ROCKER_TEST_DMA_ADDR + 4:
859         r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
860         r->lower32 = 0;
861         break;
862     case ROCKER_TEST_DMA_CTRL:
863         rocker_test_dma_ctrl(r, val);
864         break;
865     case ROCKER_CONTROL:
866         rocker_control(r, val);
867         break;
868     case ROCKER_PORT_PHYS_ENABLE + 4:
869         rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32);
870         r->lower32 = 0;
871         break;
872     default:
873         DPRINTF("not implemented write(l) addr=0x" TARGET_FMT_plx
874                 " val=0x%08x\n", addr, val);
875         break;
876     }
877 }
878 
879 static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val)
880 {
881     Rocker *r = opaque;
882 
883     if (rocker_addr_is_desc_reg(r, addr)) {
884         unsigned index = ROCKER_RING_INDEX(addr);
885         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
886 
887         switch (offset) {
888         case ROCKER_DMA_DESC_ADDR_OFFSET:
889             desc_ring_set_base_addr(r->rings[index], val);
890             break;
891         default:
892             DPRINTF("not implemented dma reg write(q) addr=0x" TARGET_FMT_plx
893                     " val=0x" TARGET_FMT_plx " (ring %d, offset=0x%02x)\n",
894                     addr, val, index, offset);
895             break;
896         }
897         return;
898     }
899 
900     switch (addr) {
901     case ROCKER_TEST_REG64:
902         r->test_reg64 = val;
903         break;
904     case ROCKER_TEST_DMA_ADDR:
905         r->test_dma_addr = val;
906         break;
907     case ROCKER_PORT_PHYS_ENABLE:
908         rocker_port_phys_enable_write(r, val);
909         break;
910     default:
911         DPRINTF("not implemented write(q) addr=0x" TARGET_FMT_plx
912                 " val=0x" TARGET_FMT_plx "\n", addr, val);
913         break;
914     }
915 }
916 
917 #ifdef DEBUG_ROCKER
918 #define regname(reg) case (reg): return #reg
919 static const char *rocker_reg_name(void *opaque, hwaddr addr)
920 {
921     Rocker *r = opaque;
922 
923     if (rocker_addr_is_desc_reg(r, addr)) {
924         unsigned index = ROCKER_RING_INDEX(addr);
925         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
926         static char buf[100];
927         char ring_name[10];
928 
929         switch (index) {
930         case 0:
931             sprintf(ring_name, "cmd");
932             break;
933         case 1:
934             sprintf(ring_name, "event");
935             break;
936         default:
937             sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx",
938                     (index - 2) / 2);
939         }
940 
941         switch (offset) {
942         case ROCKER_DMA_DESC_ADDR_OFFSET:
943             sprintf(buf, "Ring[%s] ADDR", ring_name);
944             return buf;
945         case ROCKER_DMA_DESC_ADDR_OFFSET+4:
946             sprintf(buf, "Ring[%s] ADDR+4", ring_name);
947             return buf;
948         case ROCKER_DMA_DESC_SIZE_OFFSET:
949             sprintf(buf, "Ring[%s] SIZE", ring_name);
950             return buf;
951         case ROCKER_DMA_DESC_HEAD_OFFSET:
952             sprintf(buf, "Ring[%s] HEAD", ring_name);
953             return buf;
954         case ROCKER_DMA_DESC_TAIL_OFFSET:
955             sprintf(buf, "Ring[%s] TAIL", ring_name);
956             return buf;
957         case ROCKER_DMA_DESC_CTRL_OFFSET:
958             sprintf(buf, "Ring[%s] CTRL", ring_name);
959             return buf;
960         case ROCKER_DMA_DESC_CREDITS_OFFSET:
961             sprintf(buf, "Ring[%s] CREDITS", ring_name);
962             return buf;
963         default:
964             sprintf(buf, "Ring[%s] ???", ring_name);
965             return buf;
966         }
967     } else {
968         switch (addr) {
969             regname(ROCKER_BOGUS_REG0);
970             regname(ROCKER_BOGUS_REG1);
971             regname(ROCKER_BOGUS_REG2);
972             regname(ROCKER_BOGUS_REG3);
973             regname(ROCKER_TEST_REG);
974             regname(ROCKER_TEST_REG64);
975             regname(ROCKER_TEST_REG64+4);
976             regname(ROCKER_TEST_IRQ);
977             regname(ROCKER_TEST_DMA_ADDR);
978             regname(ROCKER_TEST_DMA_ADDR+4);
979             regname(ROCKER_TEST_DMA_SIZE);
980             regname(ROCKER_TEST_DMA_CTRL);
981             regname(ROCKER_CONTROL);
982             regname(ROCKER_PORT_PHYS_COUNT);
983             regname(ROCKER_PORT_PHYS_LINK_STATUS);
984             regname(ROCKER_PORT_PHYS_LINK_STATUS+4);
985             regname(ROCKER_PORT_PHYS_ENABLE);
986             regname(ROCKER_PORT_PHYS_ENABLE+4);
987             regname(ROCKER_SWITCH_ID);
988             regname(ROCKER_SWITCH_ID+4);
989         }
990     }
991     return "???";
992 }
993 #else
994 static const char *rocker_reg_name(void *opaque, hwaddr addr)
995 {
996     return NULL;
997 }
998 #endif
999 
1000 static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1001                               unsigned size)
1002 {
1003     DPRINTF("Write %s addr " TARGET_FMT_plx
1004             ", size %u, val " TARGET_FMT_plx "\n",
1005             rocker_reg_name(opaque, addr), addr, size, val);
1006 
1007     switch (size) {
1008     case 4:
1009         rocker_io_writel(opaque, addr, val);
1010         break;
1011     case 8:
1012         rocker_io_writeq(opaque, addr, val);
1013         break;
1014     }
1015 }
1016 
1017 static uint64_t rocker_port_phys_link_status(Rocker *r)
1018 {
1019     int i;
1020     uint64_t status = 0;
1021 
1022     for (i = 0; i < r->fp_ports; i++) {
1023         FpPort *port = r->fp_port[i];
1024 
1025         if (fp_port_get_link_up(port)) {
1026             status |= 1 << (i + 1);
1027         }
1028     }
1029     return status;
1030 }
1031 
1032 static uint64_t rocker_port_phys_enable_read(Rocker *r)
1033 {
1034     int i;
1035     uint64_t ret = 0;
1036 
1037     for (i = 0; i < r->fp_ports; i++) {
1038         FpPort *port = r->fp_port[i];
1039 
1040         if (fp_port_enabled(port)) {
1041             ret |= 1 << (i + 1);
1042         }
1043     }
1044     return ret;
1045 }
1046 
1047 static uint32_t rocker_io_readl(void *opaque, hwaddr addr)
1048 {
1049     Rocker *r = opaque;
1050     uint32_t ret;
1051 
1052     if (rocker_addr_is_desc_reg(r, addr)) {
1053         unsigned index = ROCKER_RING_INDEX(addr);
1054         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1055 
1056         switch (offset) {
1057         case ROCKER_DMA_DESC_ADDR_OFFSET:
1058             ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]);
1059             break;
1060         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
1061             ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32);
1062             break;
1063         case ROCKER_DMA_DESC_SIZE_OFFSET:
1064             ret = desc_ring_get_size(r->rings[index]);
1065             break;
1066         case ROCKER_DMA_DESC_HEAD_OFFSET:
1067             ret = desc_ring_get_head(r->rings[index]);
1068             break;
1069         case ROCKER_DMA_DESC_TAIL_OFFSET:
1070             ret = desc_ring_get_tail(r->rings[index]);
1071             break;
1072         case ROCKER_DMA_DESC_CREDITS_OFFSET:
1073             ret = desc_ring_get_credits(r->rings[index]);
1074             break;
1075         default:
1076             DPRINTF("not implemented dma reg read(l) addr=0x" TARGET_FMT_plx
1077                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1078             ret = 0;
1079             break;
1080         }
1081         return ret;
1082     }
1083 
1084     switch (addr) {
1085     case ROCKER_BOGUS_REG0:
1086     case ROCKER_BOGUS_REG1:
1087     case ROCKER_BOGUS_REG2:
1088     case ROCKER_BOGUS_REG3:
1089         ret = 0xDEADBABE;
1090         break;
1091     case ROCKER_TEST_REG:
1092         ret = r->test_reg * 2;
1093         break;
1094     case ROCKER_TEST_REG64:
1095         ret = (uint32_t)(r->test_reg64 * 2);
1096         break;
1097     case ROCKER_TEST_REG64 + 4:
1098         ret = (uint32_t)((r->test_reg64 * 2) >> 32);
1099         break;
1100     case ROCKER_TEST_DMA_SIZE:
1101         ret = r->test_dma_size;
1102         break;
1103     case ROCKER_TEST_DMA_ADDR:
1104         ret = (uint32_t)r->test_dma_addr;
1105         break;
1106     case ROCKER_TEST_DMA_ADDR + 4:
1107         ret = (uint32_t)(r->test_dma_addr >> 32);
1108         break;
1109     case ROCKER_PORT_PHYS_COUNT:
1110         ret = r->fp_ports;
1111         break;
1112     case ROCKER_PORT_PHYS_LINK_STATUS:
1113         ret = (uint32_t)rocker_port_phys_link_status(r);
1114         break;
1115     case ROCKER_PORT_PHYS_LINK_STATUS + 4:
1116         ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32);
1117         break;
1118     case ROCKER_PORT_PHYS_ENABLE:
1119         ret = (uint32_t)rocker_port_phys_enable_read(r);
1120         break;
1121     case ROCKER_PORT_PHYS_ENABLE + 4:
1122         ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32);
1123         break;
1124     case ROCKER_SWITCH_ID:
1125         ret = (uint32_t)r->switch_id;
1126         break;
1127     case ROCKER_SWITCH_ID + 4:
1128         ret = (uint32_t)(r->switch_id >> 32);
1129         break;
1130     default:
1131         DPRINTF("not implemented read(l) addr=0x" TARGET_FMT_plx "\n", addr);
1132         ret = 0;
1133         break;
1134     }
1135     return ret;
1136 }
1137 
1138 static uint64_t rocker_io_readq(void *opaque, hwaddr addr)
1139 {
1140     Rocker *r = opaque;
1141     uint64_t ret;
1142 
1143     if (rocker_addr_is_desc_reg(r, addr)) {
1144         unsigned index = ROCKER_RING_INDEX(addr);
1145         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1146 
1147         switch (addr & ROCKER_DMA_DESC_MASK) {
1148         case ROCKER_DMA_DESC_ADDR_OFFSET:
1149             ret = desc_ring_get_base_addr(r->rings[index]);
1150             break;
1151         default:
1152             DPRINTF("not implemented dma reg read(q) addr=0x" TARGET_FMT_plx
1153                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1154             ret = 0;
1155             break;
1156         }
1157         return ret;
1158     }
1159 
1160     switch (addr) {
1161     case ROCKER_BOGUS_REG0:
1162     case ROCKER_BOGUS_REG2:
1163         ret = 0xDEADBABEDEADBABEULL;
1164         break;
1165     case ROCKER_TEST_REG64:
1166         ret = r->test_reg64 * 2;
1167         break;
1168     case ROCKER_TEST_DMA_ADDR:
1169         ret = r->test_dma_addr;
1170         break;
1171     case ROCKER_PORT_PHYS_LINK_STATUS:
1172         ret = rocker_port_phys_link_status(r);
1173         break;
1174     case ROCKER_PORT_PHYS_ENABLE:
1175         ret = rocker_port_phys_enable_read(r);
1176         break;
1177     case ROCKER_SWITCH_ID:
1178         ret = r->switch_id;
1179         break;
1180     default:
1181         DPRINTF("not implemented read(q) addr=0x" TARGET_FMT_plx "\n", addr);
1182         ret = 0;
1183         break;
1184     }
1185     return ret;
1186 }
1187 
1188 static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size)
1189 {
1190     DPRINTF("Read %s addr " TARGET_FMT_plx ", size %u\n",
1191             rocker_reg_name(opaque, addr), addr, size);
1192 
1193     switch (size) {
1194     case 4:
1195         return rocker_io_readl(opaque, addr);
1196     case 8:
1197         return rocker_io_readq(opaque, addr);
1198     }
1199 
1200     return -1;
1201 }
1202 
1203 static const MemoryRegionOps rocker_mmio_ops = {
1204     .read = rocker_mmio_read,
1205     .write = rocker_mmio_write,
1206     .endianness = DEVICE_LITTLE_ENDIAN,
1207     .valid = {
1208         .min_access_size = 4,
1209         .max_access_size = 8,
1210     },
1211     .impl = {
1212         .min_access_size = 4,
1213         .max_access_size = 8,
1214     },
1215 };
1216 
1217 static void rocker_msix_vectors_unuse(Rocker *r,
1218                                       unsigned int num_vectors)
1219 {
1220     PCIDevice *dev = PCI_DEVICE(r);
1221     int i;
1222 
1223     for (i = 0; i < num_vectors; i++) {
1224         msix_vector_unuse(dev, i);
1225     }
1226 }
1227 
1228 static int rocker_msix_vectors_use(Rocker *r,
1229                                    unsigned int num_vectors)
1230 {
1231     PCIDevice *dev = PCI_DEVICE(r);
1232     int err;
1233     int i;
1234 
1235     for (i = 0; i < num_vectors; i++) {
1236         err = msix_vector_use(dev, i);
1237         if (err) {
1238             goto rollback;
1239         }
1240     }
1241     return 0;
1242 
1243 rollback:
1244     rocker_msix_vectors_unuse(r, i);
1245     return err;
1246 }
1247 
1248 static int rocker_msix_init(Rocker *r)
1249 {
1250     PCIDevice *dev = PCI_DEVICE(r);
1251     int err;
1252 
1253     err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
1254                     &r->msix_bar,
1255                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
1256                     &r->msix_bar,
1257                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
1258                     0);
1259     if (err) {
1260         return err;
1261     }
1262 
1263     err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1264     if (err) {
1265         goto err_msix_vectors_use;
1266     }
1267 
1268     return 0;
1269 
1270 err_msix_vectors_use:
1271     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1272     return err;
1273 }
1274 
1275 static void rocker_msix_uninit(Rocker *r)
1276 {
1277     PCIDevice *dev = PCI_DEVICE(r);
1278 
1279     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1280     rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1281 }
1282 
1283 static int pci_rocker_init(PCIDevice *dev)
1284 {
1285     Rocker *r = to_rocker(dev);
1286     const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
1287     const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } };
1288     static int sw_index;
1289     int i, err = 0;
1290 
1291     /* allocate worlds */
1292 
1293     r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
1294     r->world_dflt = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
1295 
1296     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1297         if (!r->worlds[i]) {
1298             goto err_world_alloc;
1299         }
1300     }
1301 
1302     /* set up memory-mapped region at BAR0 */
1303 
1304     memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r,
1305                           "rocker-mmio", ROCKER_PCI_BAR0_SIZE);
1306     pci_register_bar(dev, ROCKER_PCI_BAR0_IDX,
1307                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio);
1308 
1309     /* set up memory-mapped region for MSI-X */
1310 
1311     memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar",
1312                        ROCKER_PCI_MSIX_BAR_SIZE);
1313     pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX,
1314                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar);
1315 
1316     /* MSI-X init */
1317 
1318     err = rocker_msix_init(r);
1319     if (err) {
1320         goto err_msix_init;
1321     }
1322 
1323     /* validate switch properties */
1324 
1325     if (!r->name) {
1326         r->name = g_strdup(ROCKER);
1327     }
1328 
1329     if (rocker_find(r->name)) {
1330         err = -EEXIST;
1331         goto err_duplicate;
1332     }
1333 
1334     /* Rocker name is passed in port name requests to OS with the intention
1335      * that the name is used in interface names. Limit the length of the
1336      * rocker name to avoid naming problems in the OS. Also, adding the
1337      * port number as p# and unganged breakout b#, where # is at most 2
1338      * digits, so leave room for it too (-1 for string terminator, -3 for
1339      * p# and -3 for b#)
1340      */
1341 #define ROCKER_IFNAMSIZ 16
1342 #define MAX_ROCKER_NAME_LEN  (ROCKER_IFNAMSIZ - 1 - 3 - 3)
1343     if (strlen(r->name) > MAX_ROCKER_NAME_LEN) {
1344         fprintf(stderr,
1345                 "rocker: name too long; please shorten to at most %d chars\n",
1346                 MAX_ROCKER_NAME_LEN);
1347         return -EINVAL;
1348     }
1349 
1350     if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) {
1351         memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt));
1352         r->fp_start_macaddr.a[4] += (sw_index++);
1353     }
1354 
1355     if (!r->switch_id) {
1356         memcpy(&r->switch_id, &r->fp_start_macaddr,
1357                sizeof(r->fp_start_macaddr));
1358     }
1359 
1360     if (r->fp_ports > ROCKER_FP_PORTS_MAX) {
1361         r->fp_ports = ROCKER_FP_PORTS_MAX;
1362     }
1363 
1364     r->rings = g_new(DescRing *, rocker_pci_ring_count(r));
1365     if (!r->rings) {
1366         goto err_rings_alloc;
1367     }
1368 
1369     /* Rings are ordered like this:
1370      * - command ring
1371      * - event ring
1372      * - port0 tx ring
1373      * - port0 rx ring
1374      * - port1 tx ring
1375      * - port1 rx ring
1376      * .....
1377      */
1378 
1379     err = -ENOMEM;
1380     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1381         DescRing *ring = desc_ring_alloc(r, i);
1382 
1383         if (!ring) {
1384             goto err_ring_alloc;
1385         }
1386 
1387         if (i == ROCKER_RING_CMD) {
1388             desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD);
1389         } else if (i == ROCKER_RING_EVENT) {
1390             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT);
1391         } else if (i % 2 == 0) {
1392             desc_ring_set_consume(ring, tx_consume,
1393                                   ROCKER_MSIX_VEC_TX((i - 2) / 2));
1394         } else if (i % 2 == 1) {
1395             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2));
1396         }
1397 
1398         r->rings[i] = ring;
1399     }
1400 
1401     for (i = 0; i < r->fp_ports; i++) {
1402         FpPort *port =
1403             fp_port_alloc(r, r->name, &r->fp_start_macaddr,
1404                           i, &r->fp_ports_peers[i]);
1405 
1406         if (!port) {
1407             goto err_port_alloc;
1408         }
1409 
1410         r->fp_port[i] = port;
1411         fp_port_set_world(port, r->world_dflt);
1412     }
1413 
1414     QLIST_INSERT_HEAD(&rockers, r, next);
1415 
1416     return 0;
1417 
1418 err_port_alloc:
1419     for (--i; i >= 0; i--) {
1420         FpPort *port = r->fp_port[i];
1421         fp_port_free(port);
1422     }
1423     i = rocker_pci_ring_count(r);
1424 err_ring_alloc:
1425     for (--i; i >= 0; i--) {
1426         desc_ring_free(r->rings[i]);
1427     }
1428     g_free(r->rings);
1429 err_rings_alloc:
1430 err_duplicate:
1431     rocker_msix_uninit(r);
1432 err_msix_init:
1433     object_unparent(OBJECT(&r->msix_bar));
1434     object_unparent(OBJECT(&r->mmio));
1435 err_world_alloc:
1436     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1437         if (r->worlds[i]) {
1438             world_free(r->worlds[i]);
1439         }
1440     }
1441     return err;
1442 }
1443 
1444 static void pci_rocker_uninit(PCIDevice *dev)
1445 {
1446     Rocker *r = to_rocker(dev);
1447     int i;
1448 
1449     QLIST_REMOVE(r, next);
1450 
1451     for (i = 0; i < r->fp_ports; i++) {
1452         FpPort *port = r->fp_port[i];
1453 
1454         fp_port_free(port);
1455         r->fp_port[i] = NULL;
1456     }
1457 
1458     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1459         if (r->rings[i]) {
1460             desc_ring_free(r->rings[i]);
1461         }
1462     }
1463     g_free(r->rings);
1464 
1465     rocker_msix_uninit(r);
1466     object_unparent(OBJECT(&r->msix_bar));
1467     object_unparent(OBJECT(&r->mmio));
1468 
1469     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1470         if (r->worlds[i]) {
1471             world_free(r->worlds[i]);
1472         }
1473     }
1474     g_free(r->fp_ports_peers);
1475 }
1476 
1477 static void rocker_reset(DeviceState *dev)
1478 {
1479     Rocker *r = to_rocker(dev);
1480     int i;
1481 
1482     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1483         if (r->worlds[i]) {
1484             world_reset(r->worlds[i]);
1485         }
1486     }
1487     for (i = 0; i < r->fp_ports; i++) {
1488         fp_port_reset(r->fp_port[i]);
1489         fp_port_set_world(r->fp_port[i], r->world_dflt);
1490     }
1491 
1492     r->test_reg = 0;
1493     r->test_reg64 = 0;
1494     r->test_dma_addr = 0;
1495     r->test_dma_size = 0;
1496 
1497     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1498         desc_ring_reset(r->rings[i]);
1499     }
1500 
1501     DPRINTF("Reset done\n");
1502 }
1503 
1504 static Property rocker_properties[] = {
1505     DEFINE_PROP_STRING("name", Rocker, name),
1506     DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
1507                         fp_start_macaddr),
1508     DEFINE_PROP_UINT64("switch_id", Rocker,
1509                        switch_id, 0),
1510     DEFINE_PROP_ARRAY("ports", Rocker, fp_ports,
1511                       fp_ports_peers, qdev_prop_netdev, NICPeers),
1512     DEFINE_PROP_END_OF_LIST(),
1513 };
1514 
1515 static const VMStateDescription rocker_vmsd = {
1516     .name = ROCKER,
1517     .unmigratable = 1,
1518 };
1519 
1520 static void rocker_class_init(ObjectClass *klass, void *data)
1521 {
1522     DeviceClass *dc = DEVICE_CLASS(klass);
1523     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1524 
1525     k->init = pci_rocker_init;
1526     k->exit = pci_rocker_uninit;
1527     k->vendor_id = PCI_VENDOR_ID_REDHAT;
1528     k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER;
1529     k->revision = ROCKER_PCI_REVISION;
1530     k->class_id = PCI_CLASS_NETWORK_OTHER;
1531     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1532     dc->desc = "Rocker Switch";
1533     dc->reset = rocker_reset;
1534     dc->props = rocker_properties;
1535     dc->vmsd = &rocker_vmsd;
1536 }
1537 
1538 static const TypeInfo rocker_info = {
1539     .name          = ROCKER,
1540     .parent        = TYPE_PCI_DEVICE,
1541     .instance_size = sizeof(Rocker),
1542     .class_init    = rocker_class_init,
1543 };
1544 
1545 static void rocker_register_types(void)
1546 {
1547     type_register_static(&rocker_info);
1548 }
1549 
1550 type_init(rocker_register_types)
1551