xref: /openbmc/qemu/hw/net/rocker/rocker.c (revision c11b0583)
1 /*
2  * QEMU rocker switch emulation - PCI device
3  *
4  * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
5  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  */
17 
18 #include "hw/hw.h"
19 #include "hw/pci/pci.h"
20 #include "hw/pci/msix.h"
21 #include "net/net.h"
22 #include "net/eth.h"
23 #include "qemu/iov.h"
24 #include "qemu/bitops.h"
25 #include "qmp-commands.h"
26 
27 #include "rocker.h"
28 #include "rocker_hw.h"
29 #include "rocker_fp.h"
30 #include "rocker_desc.h"
31 #include "rocker_tlv.h"
32 #include "rocker_world.h"
33 #include "rocker_of_dpa.h"
34 
35 struct rocker {
36     /* private */
37     PCIDevice parent_obj;
38     /* public */
39 
40     MemoryRegion mmio;
41     MemoryRegion msix_bar;
42 
43     /* switch configuration */
44     char *name;                  /* switch name */
45     uint32_t fp_ports;           /* front-panel port count */
46     NICPeers *fp_ports_peers;
47     MACAddr fp_start_macaddr;    /* front-panel port 0 mac addr */
48     uint64_t switch_id;          /* switch id */
49 
50     /* front-panel ports */
51     FpPort *fp_port[ROCKER_FP_PORTS_MAX];
52 
53     /* register backings */
54     uint32_t test_reg;
55     uint64_t test_reg64;
56     dma_addr_t test_dma_addr;
57     uint32_t test_dma_size;
58     uint64_t lower32;            /* lower 32-bit val in 2-part 64-bit access */
59 
60     /* desc rings */
61     DescRing **rings;
62 
63     /* switch worlds */
64     World *worlds[ROCKER_WORLD_TYPE_MAX];
65     World *world_dflt;
66 
67     QLIST_ENTRY(rocker) next;
68 };
69 
70 #define ROCKER "rocker"
71 
72 #define to_rocker(obj) \
73     OBJECT_CHECK(Rocker, (obj), ROCKER)
74 
75 static QLIST_HEAD(, rocker) rockers;
76 
77 Rocker *rocker_find(const char *name)
78 {
79     Rocker *r;
80 
81     QLIST_FOREACH(r, &rockers, next)
82         if (strcmp(r->name, name) == 0) {
83             return r;
84         }
85 
86     return NULL;
87 }
88 
89 World *rocker_get_world(Rocker *r, enum rocker_world_type type)
90 {
91     if (type < ROCKER_WORLD_TYPE_MAX) {
92         return r->worlds[type];
93     }
94     return NULL;
95 }
96 
97 RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
98 {
99     RockerSwitch *rocker;
100     Rocker *r;
101 
102     r = rocker_find(name);
103     if (!r) {
104         error_set(errp, ERROR_CLASS_GENERIC_ERROR,
105                   "rocker %s not found", name);
106         return NULL;
107     }
108 
109     rocker = g_new0(RockerSwitch, 1);
110     rocker->name = g_strdup(r->name);
111     rocker->id = r->switch_id;
112     rocker->ports = r->fp_ports;
113 
114     return rocker;
115 }
116 
117 RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp)
118 {
119     RockerPortList *list = NULL;
120     Rocker *r;
121     int i;
122 
123     r = rocker_find(name);
124     if (!r) {
125         error_set(errp, ERROR_CLASS_GENERIC_ERROR,
126                   "rocker %s not found", name);
127         return NULL;
128     }
129 
130     for (i = r->fp_ports - 1; i >= 0; i--) {
131         RockerPortList *info = g_malloc0(sizeof(*info));
132         info->value = g_malloc0(sizeof(*info->value));
133         struct fp_port *port = r->fp_port[i];
134 
135         fp_port_get_info(port, info);
136         info->next = list;
137         list = info;
138     }
139 
140     return list;
141 }
142 
143 uint32_t rocker_fp_ports(Rocker *r)
144 {
145     return r->fp_ports;
146 }
147 
148 static uint32_t rocker_get_pport_by_tx_ring(Rocker *r,
149                                             DescRing *ring)
150 {
151     return (desc_ring_index(ring) - 2) / 2 + 1;
152 }
153 
154 static int tx_consume(Rocker *r, DescInfo *info)
155 {
156     PCIDevice *dev = PCI_DEVICE(r);
157     char *buf = desc_get_buf(info, true);
158     RockerTlv *tlv_frag;
159     RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1];
160     struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, };
161     uint32_t pport;
162     uint32_t port;
163     uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE;
164     uint16_t tx_l3_csum_off = 0;
165     uint16_t tx_tso_mss = 0;
166     uint16_t tx_tso_hdr_len = 0;
167     int iovcnt = 0;
168     int err = ROCKER_OK;
169     int rem;
170     int i;
171 
172     if (!buf) {
173         return -ROCKER_ENXIO;
174     }
175 
176     rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info));
177 
178     if (!tlvs[ROCKER_TLV_TX_FRAGS]) {
179         return -ROCKER_EINVAL;
180     }
181 
182     pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info));
183     if (!fp_port_from_pport(pport, &port)) {
184         return -ROCKER_EINVAL;
185     }
186 
187     if (tlvs[ROCKER_TLV_TX_OFFLOAD]) {
188         tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]);
189     }
190 
191     switch (tx_offload) {
192     case ROCKER_TX_OFFLOAD_L3_CSUM:
193         if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
194             return -ROCKER_EINVAL;
195         }
196         break;
197     case ROCKER_TX_OFFLOAD_TSO:
198         if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
199             !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
200             return -ROCKER_EINVAL;
201         }
202         break;
203     }
204 
205     if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
206         tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]);
207     }
208 
209     if (tlvs[ROCKER_TLV_TX_TSO_MSS]) {
210         tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]);
211     }
212 
213     if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
214         tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]);
215     }
216 
217     rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) {
218         hwaddr frag_addr;
219         uint16_t frag_len;
220 
221         if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) {
222             err = -ROCKER_EINVAL;
223             goto err_bad_attr;
224         }
225 
226         rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag);
227 
228         if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
229             !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) {
230             err = -ROCKER_EINVAL;
231             goto err_bad_attr;
232         }
233 
234         frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
235         frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
236 
237         iov[iovcnt].iov_len = frag_len;
238         iov[iovcnt].iov_base = g_malloc(frag_len);
239         if (!iov[iovcnt].iov_base) {
240             err = -ROCKER_ENOMEM;
241             goto err_no_mem;
242         }
243 
244         if (pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base,
245                      iov[iovcnt].iov_len)) {
246             err = -ROCKER_ENXIO;
247             goto err_bad_io;
248         }
249 
250         if (++iovcnt > ROCKER_TX_FRAGS_MAX) {
251             goto err_too_many_frags;
252         }
253     }
254 
255     if (iovcnt) {
256         /* XXX perform Tx offloads */
257         /* XXX   silence compiler for now */
258         tx_l3_csum_off += tx_tso_mss = tx_tso_hdr_len = 0;
259     }
260 
261     err = fp_port_eg(r->fp_port[port], iov, iovcnt);
262 
263 err_too_many_frags:
264 err_bad_io:
265 err_no_mem:
266 err_bad_attr:
267     for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) {
268         g_free(iov[i].iov_base);
269     }
270 
271     return err;
272 }
273 
274 static int cmd_get_port_settings(Rocker *r,
275                                  DescInfo *info, char *buf,
276                                  RockerTlv *cmd_info_tlv)
277 {
278     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
279     RockerTlv *nest;
280     FpPort *fp_port;
281     uint32_t pport;
282     uint32_t port;
283     uint32_t speed;
284     uint8_t duplex;
285     uint8_t autoneg;
286     uint8_t learning;
287     char *phys_name;
288     MACAddr macaddr;
289     enum rocker_world_type mode;
290     size_t tlv_size;
291     int pos;
292     int err;
293 
294     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
295                             cmd_info_tlv);
296 
297     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
298         return -ROCKER_EINVAL;
299     }
300 
301     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
302     if (!fp_port_from_pport(pport, &port)) {
303         return -ROCKER_EINVAL;
304     }
305     fp_port = r->fp_port[port];
306 
307     err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg);
308     if (err) {
309         return err;
310     }
311 
312     fp_port_get_macaddr(fp_port, &macaddr);
313     mode = world_type(fp_port_get_world(fp_port));
314     learning = fp_port_get_learning(fp_port);
315     phys_name = fp_port_get_name(fp_port);
316 
317     tlv_size = rocker_tlv_total_size(0) +                 /* nest */
318                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
319                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   speed */
320                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   duplex */
321                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   autoneg */
322                rocker_tlv_total_size(sizeof(macaddr.a)) + /*   macaddr */
323                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   mode */
324                rocker_tlv_total_size(sizeof(uint8_t)) +   /*   learning */
325                rocker_tlv_total_size(strlen(phys_name));
326 
327     if (tlv_size > desc_buf_size(info)) {
328         return -ROCKER_EMSGSIZE;
329     }
330 
331     pos = 0;
332     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO);
333     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport);
334     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed);
335     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex);
336     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg);
337     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
338                    sizeof(macaddr.a), macaddr.a);
339     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode);
340     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,
341                       learning);
342     rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME,
343                    strlen(phys_name), phys_name);
344     rocker_tlv_nest_end(buf, &pos, nest);
345 
346     return desc_set_buf(info, tlv_size);
347 }
348 
349 static int cmd_set_port_settings(Rocker *r,
350                                  RockerTlv *cmd_info_tlv)
351 {
352     RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
353     FpPort *fp_port;
354     uint32_t pport;
355     uint32_t port;
356     uint32_t speed;
357     uint8_t duplex;
358     uint8_t autoneg;
359     uint8_t learning;
360     MACAddr macaddr;
361     enum rocker_world_type mode;
362     int err;
363 
364     rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
365                             cmd_info_tlv);
366 
367     if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) {
368         return -ROCKER_EINVAL;
369     }
370 
371     pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]);
372     if (!fp_port_from_pport(pport, &port)) {
373         return -ROCKER_EINVAL;
374     }
375     fp_port = r->fp_port[port];
376 
377     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] &&
378         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] &&
379         tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) {
380 
381         speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
382         duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
383         autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
384 
385         err = fp_port_set_settings(fp_port, speed, duplex, autoneg);
386         if (err) {
387             return err;
388         }
389     }
390 
391     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) {
392         if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) !=
393             sizeof(macaddr.a)) {
394             return -ROCKER_EINVAL;
395         }
396         memcpy(macaddr.a,
397                rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]),
398                sizeof(macaddr.a));
399         fp_port_set_macaddr(fp_port, &macaddr);
400     }
401 
402     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
403         mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
404         fp_port_set_world(fp_port, r->worlds[mode]);
405     }
406 
407     if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
408         learning =
409             rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]);
410         fp_port_set_learning(fp_port, learning);
411     }
412 
413     return ROCKER_OK;
414 }
415 
416 static int cmd_consume(Rocker *r, DescInfo *info)
417 {
418     char *buf = desc_get_buf(info, false);
419     RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1];
420     RockerTlv *info_tlv;
421     World *world;
422     uint16_t cmd;
423     int err;
424 
425     if (!buf) {
426         return -ROCKER_ENXIO;
427     }
428 
429     rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info));
430 
431     if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) {
432         return -ROCKER_EINVAL;
433     }
434 
435     cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]);
436     info_tlv = tlvs[ROCKER_TLV_CMD_INFO];
437 
438     /* This might be reworked to something like this:
439      * Every world will have an array of command handlers from
440      * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is
441      * up to each world to implement whatever command it want.
442      * It can reference "generic" commands as cmd_set_port_settings or
443      * cmd_get_port_settings
444      */
445 
446     switch (cmd) {
447     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD:
448     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD:
449     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL:
450     case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS:
451     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD:
452     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD:
453     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL:
454     case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS:
455         world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
456         err = world_do_cmd(world, info, buf, cmd, info_tlv);
457         break;
458     case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS:
459         err = cmd_get_port_settings(r, info, buf, info_tlv);
460         break;
461     case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS:
462         err = cmd_set_port_settings(r, info_tlv);
463         break;
464     default:
465         err = -ROCKER_EINVAL;
466         break;
467     }
468 
469     return err;
470 }
471 
472 static void rocker_msix_irq(Rocker *r, unsigned vector)
473 {
474     PCIDevice *dev = PCI_DEVICE(r);
475 
476     DPRINTF("MSI-X notify request for vector %d\n", vector);
477     if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) {
478         DPRINTF("incorrect vector %d\n", vector);
479         return;
480     }
481     msix_notify(dev, vector);
482 }
483 
484 int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up)
485 {
486     DescRing *ring = r->rings[ROCKER_RING_EVENT];
487     DescInfo *info = desc_ring_fetch_desc(ring);
488     RockerTlv *nest;
489     char *buf;
490     size_t tlv_size;
491     int pos;
492     int err;
493 
494     if (!info) {
495         return -ROCKER_ENOBUFS;
496     }
497 
498     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
499                rocker_tlv_total_size(0) +                 /* nest */
500                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
501                rocker_tlv_total_size(sizeof(uint8_t));    /*   link up */
502 
503     if (tlv_size > desc_buf_size(info)) {
504         err = -ROCKER_EMSGSIZE;
505         goto err_too_big;
506     }
507 
508     buf = desc_get_buf(info, false);
509     if (!buf) {
510         err = -ROCKER_ENOMEM;
511         goto err_no_mem;
512     }
513 
514     pos = 0;
515     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
516                         ROCKER_TLV_EVENT_TYPE_LINK_CHANGED);
517     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
518     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport);
519     rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP,
520                       link_up ? 1 : 0);
521     rocker_tlv_nest_end(buf, &pos, nest);
522 
523     err = desc_set_buf(info, tlv_size);
524 
525 err_too_big:
526 err_no_mem:
527     if (desc_ring_post_desc(ring, err)) {
528         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
529     }
530 
531     return err;
532 }
533 
534 int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
535                                uint16_t vlan_id)
536 {
537     DescRing *ring = r->rings[ROCKER_RING_EVENT];
538     DescInfo *info;
539     FpPort *fp_port;
540     uint32_t port;
541     RockerTlv *nest;
542     char *buf;
543     size_t tlv_size;
544     int pos;
545     int err;
546 
547     if (!fp_port_from_pport(pport, &port)) {
548         return -ROCKER_EINVAL;
549     }
550     fp_port = r->fp_port[port];
551     if (!fp_port_get_learning(fp_port)) {
552         return ROCKER_OK;
553     }
554 
555     info = desc_ring_fetch_desc(ring);
556     if (!info) {
557         return -ROCKER_ENOBUFS;
558     }
559 
560     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) +  /* event type */
561                rocker_tlv_total_size(0) +                 /* nest */
562                rocker_tlv_total_size(sizeof(uint32_t)) +  /*   pport */
563                rocker_tlv_total_size(ETH_ALEN) +          /*   mac addr */
564                rocker_tlv_total_size(sizeof(uint16_t));   /*   vlan_id */
565 
566     if (tlv_size > desc_buf_size(info)) {
567         err = -ROCKER_EMSGSIZE;
568         goto err_too_big;
569     }
570 
571     buf = desc_get_buf(info, false);
572     if (!buf) {
573         err = -ROCKER_ENOMEM;
574         goto err_no_mem;
575     }
576 
577     pos = 0;
578     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE,
579                         ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN);
580     nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO);
581     rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport);
582     rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr);
583     rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id);
584     rocker_tlv_nest_end(buf, &pos, nest);
585 
586     err = desc_set_buf(info, tlv_size);
587 
588 err_too_big:
589 err_no_mem:
590     if (desc_ring_post_desc(ring, err)) {
591         rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT);
592     }
593 
594     return err;
595 }
596 
597 static DescRing *rocker_get_rx_ring_by_pport(Rocker *r,
598                                                      uint32_t pport)
599 {
600     return r->rings[(pport - 1) * 2 + 3];
601 }
602 
603 int rx_produce(World *world, uint32_t pport,
604                const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu)
605 {
606     Rocker *r = world_rocker(world);
607     PCIDevice *dev = (PCIDevice *)r;
608     DescRing *ring = rocker_get_rx_ring_by_pport(r, pport);
609     DescInfo *info = desc_ring_fetch_desc(ring);
610     char *data;
611     size_t data_size = iov_size(iov, iovcnt);
612     char *buf;
613     uint16_t rx_flags = 0;
614     uint16_t rx_csum = 0;
615     size_t tlv_size;
616     RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1];
617     hwaddr frag_addr;
618     uint16_t frag_max_len;
619     int pos;
620     int err;
621 
622     if (!info) {
623         return -ROCKER_ENOBUFS;
624     }
625 
626     buf = desc_get_buf(info, false);
627     if (!buf) {
628         err = -ROCKER_ENXIO;
629         goto out;
630     }
631     rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info));
632 
633     if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] ||
634         !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) {
635         err = -ROCKER_EINVAL;
636         goto out;
637     }
638 
639     frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]);
640     frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
641 
642     if (data_size > frag_max_len) {
643         err = -ROCKER_EMSGSIZE;
644         goto out;
645     }
646 
647     if (copy_to_cpu) {
648         rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD;
649     }
650 
651     /* XXX calc rx flags/csum */
652 
653     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
654                rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */
655                rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */
656                rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */
657                rocker_tlv_total_size(sizeof(uint16_t));  /* frag len */
658 
659     if (tlv_size > desc_buf_size(info)) {
660         err = -ROCKER_EMSGSIZE;
661         goto out;
662     }
663 
664     /* TODO:
665      * iov dma write can be optimized in similar way e1000 does it in
666      * e1000_receive_iov. But maybe if would make sense to introduce
667      * generic helper iov_dma_write.
668      */
669 
670     data = g_malloc(data_size);
671     if (!data) {
672         err = -ROCKER_ENOMEM;
673         goto out;
674     }
675     iov_to_buf(iov, iovcnt, 0, data, data_size);
676     pci_dma_write(dev, frag_addr, data, data_size);
677     g_free(data);
678 
679     pos = 0;
680     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags);
681     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum);
682     rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr);
683     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len);
684     rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size);
685 
686     err = desc_set_buf(info, tlv_size);
687 
688 out:
689     if (desc_ring_post_desc(ring, err)) {
690         rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1));
691     }
692 
693     return err;
694 }
695 
696 int rocker_port_eg(Rocker *r, uint32_t pport,
697                    const struct iovec *iov, int iovcnt)
698 {
699     FpPort *fp_port;
700     uint32_t port;
701 
702     if (!fp_port_from_pport(pport, &port)) {
703         return -ROCKER_EINVAL;
704     }
705 
706     fp_port = r->fp_port[port];
707 
708     return fp_port_eg(fp_port, iov, iovcnt);
709 }
710 
711 static void rocker_test_dma_ctrl(Rocker *r, uint32_t val)
712 {
713     PCIDevice *dev = PCI_DEVICE(r);
714     char *buf;
715     int i;
716 
717     buf = g_malloc(r->test_dma_size);
718 
719     if (!buf) {
720         DPRINTF("test dma buffer alloc failed");
721         return;
722     }
723 
724     switch (val) {
725     case ROCKER_TEST_DMA_CTRL_CLEAR:
726         memset(buf, 0, r->test_dma_size);
727         break;
728     case ROCKER_TEST_DMA_CTRL_FILL:
729         memset(buf, 0x96, r->test_dma_size);
730         break;
731     case ROCKER_TEST_DMA_CTRL_INVERT:
732         pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size);
733         for (i = 0; i < r->test_dma_size; i++) {
734             buf[i] = ~buf[i];
735         }
736         break;
737     default:
738         DPRINTF("not test dma control val=0x%08x\n", val);
739         goto err_out;
740     }
741     pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size);
742 
743     rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST);
744 
745 err_out:
746     g_free(buf);
747 }
748 
749 static void rocker_reset(DeviceState *dev);
750 
751 static void rocker_control(Rocker *r, uint32_t val)
752 {
753     if (val & ROCKER_CONTROL_RESET) {
754         rocker_reset(DEVICE(r));
755     }
756 }
757 
758 static int rocker_pci_ring_count(Rocker *r)
759 {
760     /* There are:
761      * - command ring
762      * - event ring
763      * - tx and rx ring per each port
764      */
765     return 2 + (2 * r->fp_ports);
766 }
767 
768 static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr)
769 {
770     hwaddr start = ROCKER_DMA_DESC_BASE;
771     hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r));
772 
773     return addr >= start && addr < end;
774 }
775 
776 static void rocker_port_phys_enable_write(Rocker *r, uint64_t new)
777 {
778     int i;
779     bool old_enabled;
780     bool new_enabled;
781     FpPort *fp_port;
782 
783     for (i = 0; i < r->fp_ports; i++) {
784         fp_port = r->fp_port[i];
785         old_enabled = fp_port_enabled(fp_port);
786         new_enabled = (new >> (i + 1)) & 0x1;
787         if (new_enabled == old_enabled) {
788             continue;
789         }
790         if (new_enabled) {
791             fp_port_enable(r->fp_port[i]);
792         } else {
793             fp_port_disable(r->fp_port[i]);
794         }
795     }
796 }
797 
798 static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
799 {
800     Rocker *r = opaque;
801 
802     if (rocker_addr_is_desc_reg(r, addr)) {
803         unsigned index = ROCKER_RING_INDEX(addr);
804         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
805 
806         switch (offset) {
807         case ROCKER_DMA_DESC_ADDR_OFFSET:
808             r->lower32 = (uint64_t)val;
809             break;
810         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
811             desc_ring_set_base_addr(r->rings[index],
812                                     ((uint64_t)val) << 32 | r->lower32);
813             r->lower32 = 0;
814             break;
815         case ROCKER_DMA_DESC_SIZE_OFFSET:
816             desc_ring_set_size(r->rings[index], val);
817             break;
818         case ROCKER_DMA_DESC_HEAD_OFFSET:
819             if (desc_ring_set_head(r->rings[index], val)) {
820                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
821             }
822             break;
823         case ROCKER_DMA_DESC_CTRL_OFFSET:
824             desc_ring_set_ctrl(r->rings[index], val);
825             break;
826         case ROCKER_DMA_DESC_CREDITS_OFFSET:
827             if (desc_ring_ret_credits(r->rings[index], val)) {
828                 rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index]));
829             }
830             break;
831         default:
832             DPRINTF("not implemented dma reg write(l) addr=0x" TARGET_FMT_plx
833                     " val=0x%08x (ring %d, addr=0x%02x)\n",
834                     addr, val, index, offset);
835             break;
836         }
837         return;
838     }
839 
840     switch (addr) {
841     case ROCKER_TEST_REG:
842         r->test_reg = val;
843         break;
844     case ROCKER_TEST_REG64:
845     case ROCKER_TEST_DMA_ADDR:
846     case ROCKER_PORT_PHYS_ENABLE:
847         r->lower32 = (uint64_t)val;
848         break;
849     case ROCKER_TEST_REG64 + 4:
850         r->test_reg64 = ((uint64_t)val) << 32 | r->lower32;
851         r->lower32 = 0;
852         break;
853     case ROCKER_TEST_IRQ:
854         rocker_msix_irq(r, val);
855         break;
856     case ROCKER_TEST_DMA_SIZE:
857         r->test_dma_size = val;
858         break;
859     case ROCKER_TEST_DMA_ADDR + 4:
860         r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
861         r->lower32 = 0;
862         break;
863     case ROCKER_TEST_DMA_CTRL:
864         rocker_test_dma_ctrl(r, val);
865         break;
866     case ROCKER_CONTROL:
867         rocker_control(r, val);
868         break;
869     case ROCKER_PORT_PHYS_ENABLE + 4:
870         rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32);
871         r->lower32 = 0;
872         break;
873     default:
874         DPRINTF("not implemented write(l) addr=0x" TARGET_FMT_plx
875                 " val=0x%08x\n", addr, val);
876         break;
877     }
878 }
879 
880 static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val)
881 {
882     Rocker *r = opaque;
883 
884     if (rocker_addr_is_desc_reg(r, addr)) {
885         unsigned index = ROCKER_RING_INDEX(addr);
886         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
887 
888         switch (offset) {
889         case ROCKER_DMA_DESC_ADDR_OFFSET:
890             desc_ring_set_base_addr(r->rings[index], val);
891             break;
892         default:
893             DPRINTF("not implemented dma reg write(q) addr=0x" TARGET_FMT_plx
894                     " val=0x" TARGET_FMT_plx " (ring %d, offset=0x%02x)\n",
895                     addr, val, index, offset);
896             break;
897         }
898         return;
899     }
900 
901     switch (addr) {
902     case ROCKER_TEST_REG64:
903         r->test_reg64 = val;
904         break;
905     case ROCKER_TEST_DMA_ADDR:
906         r->test_dma_addr = val;
907         break;
908     case ROCKER_PORT_PHYS_ENABLE:
909         rocker_port_phys_enable_write(r, val);
910         break;
911     default:
912         DPRINTF("not implemented write(q) addr=0x" TARGET_FMT_plx
913                 " val=0x" TARGET_FMT_plx "\n", addr, val);
914         break;
915     }
916 }
917 
918 #ifdef DEBUG_ROCKER
919 #define regname(reg) case (reg): return #reg
920 static const char *rocker_reg_name(void *opaque, hwaddr addr)
921 {
922     Rocker *r = opaque;
923 
924     if (rocker_addr_is_desc_reg(r, addr)) {
925         unsigned index = ROCKER_RING_INDEX(addr);
926         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
927         static char buf[100];
928         char ring_name[10];
929 
930         switch (index) {
931         case 0:
932             sprintf(ring_name, "cmd");
933             break;
934         case 1:
935             sprintf(ring_name, "event");
936             break;
937         default:
938             sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx",
939                     (index - 2) / 2);
940         }
941 
942         switch (offset) {
943         case ROCKER_DMA_DESC_ADDR_OFFSET:
944             sprintf(buf, "Ring[%s] ADDR", ring_name);
945             return buf;
946         case ROCKER_DMA_DESC_ADDR_OFFSET+4:
947             sprintf(buf, "Ring[%s] ADDR+4", ring_name);
948             return buf;
949         case ROCKER_DMA_DESC_SIZE_OFFSET:
950             sprintf(buf, "Ring[%s] SIZE", ring_name);
951             return buf;
952         case ROCKER_DMA_DESC_HEAD_OFFSET:
953             sprintf(buf, "Ring[%s] HEAD", ring_name);
954             return buf;
955         case ROCKER_DMA_DESC_TAIL_OFFSET:
956             sprintf(buf, "Ring[%s] TAIL", ring_name);
957             return buf;
958         case ROCKER_DMA_DESC_CTRL_OFFSET:
959             sprintf(buf, "Ring[%s] CTRL", ring_name);
960             return buf;
961         case ROCKER_DMA_DESC_CREDITS_OFFSET:
962             sprintf(buf, "Ring[%s] CREDITS", ring_name);
963             return buf;
964         default:
965             sprintf(buf, "Ring[%s] ???", ring_name);
966             return buf;
967         }
968     } else {
969         switch (addr) {
970             regname(ROCKER_BOGUS_REG0);
971             regname(ROCKER_BOGUS_REG1);
972             regname(ROCKER_BOGUS_REG2);
973             regname(ROCKER_BOGUS_REG3);
974             regname(ROCKER_TEST_REG);
975             regname(ROCKER_TEST_REG64);
976             regname(ROCKER_TEST_REG64+4);
977             regname(ROCKER_TEST_IRQ);
978             regname(ROCKER_TEST_DMA_ADDR);
979             regname(ROCKER_TEST_DMA_ADDR+4);
980             regname(ROCKER_TEST_DMA_SIZE);
981             regname(ROCKER_TEST_DMA_CTRL);
982             regname(ROCKER_CONTROL);
983             regname(ROCKER_PORT_PHYS_COUNT);
984             regname(ROCKER_PORT_PHYS_LINK_STATUS);
985             regname(ROCKER_PORT_PHYS_LINK_STATUS+4);
986             regname(ROCKER_PORT_PHYS_ENABLE);
987             regname(ROCKER_PORT_PHYS_ENABLE+4);
988             regname(ROCKER_SWITCH_ID);
989             regname(ROCKER_SWITCH_ID+4);
990         }
991     }
992     return "???";
993 }
994 #else
995 static const char *rocker_reg_name(void *opaque, hwaddr addr)
996 {
997     return NULL;
998 }
999 #endif
1000 
1001 static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1002                               unsigned size)
1003 {
1004     DPRINTF("Write %s addr " TARGET_FMT_plx
1005             ", size %u, val " TARGET_FMT_plx "\n",
1006             rocker_reg_name(opaque, addr), addr, size, val);
1007 
1008     switch (size) {
1009     case 4:
1010         rocker_io_writel(opaque, addr, val);
1011         break;
1012     case 8:
1013         rocker_io_writeq(opaque, addr, val);
1014         break;
1015     }
1016 }
1017 
1018 static uint64_t rocker_port_phys_link_status(Rocker *r)
1019 {
1020     int i;
1021     uint64_t status = 0;
1022 
1023     for (i = 0; i < r->fp_ports; i++) {
1024         FpPort *port = r->fp_port[i];
1025 
1026         if (fp_port_get_link_up(port)) {
1027             status |= 1 << (i + 1);
1028         }
1029     }
1030     return status;
1031 }
1032 
1033 static uint64_t rocker_port_phys_enable_read(Rocker *r)
1034 {
1035     int i;
1036     uint64_t ret = 0;
1037 
1038     for (i = 0; i < r->fp_ports; i++) {
1039         FpPort *port = r->fp_port[i];
1040 
1041         if (fp_port_enabled(port)) {
1042             ret |= 1 << (i + 1);
1043         }
1044     }
1045     return ret;
1046 }
1047 
1048 static uint32_t rocker_io_readl(void *opaque, hwaddr addr)
1049 {
1050     Rocker *r = opaque;
1051     uint32_t ret;
1052 
1053     if (rocker_addr_is_desc_reg(r, addr)) {
1054         unsigned index = ROCKER_RING_INDEX(addr);
1055         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1056 
1057         switch (offset) {
1058         case ROCKER_DMA_DESC_ADDR_OFFSET:
1059             ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]);
1060             break;
1061         case ROCKER_DMA_DESC_ADDR_OFFSET + 4:
1062             ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32);
1063             break;
1064         case ROCKER_DMA_DESC_SIZE_OFFSET:
1065             ret = desc_ring_get_size(r->rings[index]);
1066             break;
1067         case ROCKER_DMA_DESC_HEAD_OFFSET:
1068             ret = desc_ring_get_head(r->rings[index]);
1069             break;
1070         case ROCKER_DMA_DESC_TAIL_OFFSET:
1071             ret = desc_ring_get_tail(r->rings[index]);
1072             break;
1073         case ROCKER_DMA_DESC_CREDITS_OFFSET:
1074             ret = desc_ring_get_credits(r->rings[index]);
1075             break;
1076         default:
1077             DPRINTF("not implemented dma reg read(l) addr=0x" TARGET_FMT_plx
1078                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1079             ret = 0;
1080             break;
1081         }
1082         return ret;
1083     }
1084 
1085     switch (addr) {
1086     case ROCKER_BOGUS_REG0:
1087     case ROCKER_BOGUS_REG1:
1088     case ROCKER_BOGUS_REG2:
1089     case ROCKER_BOGUS_REG3:
1090         ret = 0xDEADBABE;
1091         break;
1092     case ROCKER_TEST_REG:
1093         ret = r->test_reg * 2;
1094         break;
1095     case ROCKER_TEST_REG64:
1096         ret = (uint32_t)(r->test_reg64 * 2);
1097         break;
1098     case ROCKER_TEST_REG64 + 4:
1099         ret = (uint32_t)((r->test_reg64 * 2) >> 32);
1100         break;
1101     case ROCKER_TEST_DMA_SIZE:
1102         ret = r->test_dma_size;
1103         break;
1104     case ROCKER_TEST_DMA_ADDR:
1105         ret = (uint32_t)r->test_dma_addr;
1106         break;
1107     case ROCKER_TEST_DMA_ADDR + 4:
1108         ret = (uint32_t)(r->test_dma_addr >> 32);
1109         break;
1110     case ROCKER_PORT_PHYS_COUNT:
1111         ret = r->fp_ports;
1112         break;
1113     case ROCKER_PORT_PHYS_LINK_STATUS:
1114         ret = (uint32_t)rocker_port_phys_link_status(r);
1115         break;
1116     case ROCKER_PORT_PHYS_LINK_STATUS + 4:
1117         ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32);
1118         break;
1119     case ROCKER_PORT_PHYS_ENABLE:
1120         ret = (uint32_t)rocker_port_phys_enable_read(r);
1121         break;
1122     case ROCKER_PORT_PHYS_ENABLE + 4:
1123         ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32);
1124         break;
1125     case ROCKER_SWITCH_ID:
1126         ret = (uint32_t)r->switch_id;
1127         break;
1128     case ROCKER_SWITCH_ID + 4:
1129         ret = (uint32_t)(r->switch_id >> 32);
1130         break;
1131     default:
1132         DPRINTF("not implemented read(l) addr=0x" TARGET_FMT_plx "\n", addr);
1133         ret = 0;
1134         break;
1135     }
1136     return ret;
1137 }
1138 
1139 static uint64_t rocker_io_readq(void *opaque, hwaddr addr)
1140 {
1141     Rocker *r = opaque;
1142     uint64_t ret;
1143 
1144     if (rocker_addr_is_desc_reg(r, addr)) {
1145         unsigned index = ROCKER_RING_INDEX(addr);
1146         unsigned offset = addr & ROCKER_DMA_DESC_MASK;
1147 
1148         switch (addr & ROCKER_DMA_DESC_MASK) {
1149         case ROCKER_DMA_DESC_ADDR_OFFSET:
1150             ret = desc_ring_get_base_addr(r->rings[index]);
1151             break;
1152         default:
1153             DPRINTF("not implemented dma reg read(q) addr=0x" TARGET_FMT_plx
1154                     " (ring %d, addr=0x%02x)\n", addr, index, offset);
1155             ret = 0;
1156             break;
1157         }
1158         return ret;
1159     }
1160 
1161     switch (addr) {
1162     case ROCKER_BOGUS_REG0:
1163     case ROCKER_BOGUS_REG2:
1164         ret = 0xDEADBABEDEADBABEULL;
1165         break;
1166     case ROCKER_TEST_REG64:
1167         ret = r->test_reg64 * 2;
1168         break;
1169     case ROCKER_TEST_DMA_ADDR:
1170         ret = r->test_dma_addr;
1171         break;
1172     case ROCKER_PORT_PHYS_LINK_STATUS:
1173         ret = rocker_port_phys_link_status(r);
1174         break;
1175     case ROCKER_PORT_PHYS_ENABLE:
1176         ret = rocker_port_phys_enable_read(r);
1177         break;
1178     case ROCKER_SWITCH_ID:
1179         ret = r->switch_id;
1180         break;
1181     default:
1182         DPRINTF("not implemented read(q) addr=0x" TARGET_FMT_plx "\n", addr);
1183         ret = 0;
1184         break;
1185     }
1186     return ret;
1187 }
1188 
1189 static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size)
1190 {
1191     DPRINTF("Read %s addr " TARGET_FMT_plx ", size %u\n",
1192             rocker_reg_name(opaque, addr), addr, size);
1193 
1194     switch (size) {
1195     case 4:
1196         return rocker_io_readl(opaque, addr);
1197     case 8:
1198         return rocker_io_readq(opaque, addr);
1199     }
1200 
1201     return -1;
1202 }
1203 
1204 static const MemoryRegionOps rocker_mmio_ops = {
1205     .read = rocker_mmio_read,
1206     .write = rocker_mmio_write,
1207     .endianness = DEVICE_LITTLE_ENDIAN,
1208     .valid = {
1209         .min_access_size = 4,
1210         .max_access_size = 8,
1211     },
1212     .impl = {
1213         .min_access_size = 4,
1214         .max_access_size = 8,
1215     },
1216 };
1217 
1218 static void rocker_msix_vectors_unuse(Rocker *r,
1219                                       unsigned int num_vectors)
1220 {
1221     PCIDevice *dev = PCI_DEVICE(r);
1222     int i;
1223 
1224     for (i = 0; i < num_vectors; i++) {
1225         msix_vector_unuse(dev, i);
1226     }
1227 }
1228 
1229 static int rocker_msix_vectors_use(Rocker *r,
1230                                    unsigned int num_vectors)
1231 {
1232     PCIDevice *dev = PCI_DEVICE(r);
1233     int err;
1234     int i;
1235 
1236     for (i = 0; i < num_vectors; i++) {
1237         err = msix_vector_use(dev, i);
1238         if (err) {
1239             goto rollback;
1240         }
1241     }
1242     return 0;
1243 
1244 rollback:
1245     rocker_msix_vectors_unuse(r, i);
1246     return err;
1247 }
1248 
1249 static int rocker_msix_init(Rocker *r)
1250 {
1251     PCIDevice *dev = PCI_DEVICE(r);
1252     int err;
1253 
1254     err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports),
1255                     &r->msix_bar,
1256                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET,
1257                     &r->msix_bar,
1258                     ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET,
1259                     0);
1260     if (err) {
1261         return err;
1262     }
1263 
1264     err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1265     if (err) {
1266         goto err_msix_vectors_use;
1267     }
1268 
1269     return 0;
1270 
1271 err_msix_vectors_use:
1272     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1273     return err;
1274 }
1275 
1276 static void rocker_msix_uninit(Rocker *r)
1277 {
1278     PCIDevice *dev = PCI_DEVICE(r);
1279 
1280     msix_uninit(dev, &r->msix_bar, &r->msix_bar);
1281     rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
1282 }
1283 
1284 static int pci_rocker_init(PCIDevice *dev)
1285 {
1286     Rocker *r = to_rocker(dev);
1287     const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
1288     const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } };
1289     static int sw_index;
1290     int i, err = 0;
1291 
1292     /* allocate worlds */
1293 
1294     r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
1295     r->world_dflt = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
1296 
1297     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1298         if (!r->worlds[i]) {
1299             goto err_world_alloc;
1300         }
1301     }
1302 
1303     /* set up memory-mapped region at BAR0 */
1304 
1305     memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r,
1306                           "rocker-mmio", ROCKER_PCI_BAR0_SIZE);
1307     pci_register_bar(dev, ROCKER_PCI_BAR0_IDX,
1308                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio);
1309 
1310     /* set up memory-mapped region for MSI-X */
1311 
1312     memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar",
1313                        ROCKER_PCI_MSIX_BAR_SIZE);
1314     pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX,
1315                      PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar);
1316 
1317     /* MSI-X init */
1318 
1319     err = rocker_msix_init(r);
1320     if (err) {
1321         goto err_msix_init;
1322     }
1323 
1324     /* validate switch properties */
1325 
1326     if (!r->name) {
1327         r->name = g_strdup(ROCKER);
1328     }
1329 
1330     if (rocker_find(r->name)) {
1331         err = -EEXIST;
1332         goto err_duplicate;
1333     }
1334 
1335     /* Rocker name is passed in port name requests to OS with the intention
1336      * that the name is used in interface names. Limit the length of the
1337      * rocker name to avoid naming problems in the OS. Also, adding the
1338      * port number as p# and unganged breakout b#, where # is at most 2
1339      * digits, so leave room for it too (-1 for string terminator, -3 for
1340      * p# and -3 for b#)
1341      */
1342 #define ROCKER_IFNAMSIZ 16
1343 #define MAX_ROCKER_NAME_LEN  (ROCKER_IFNAMSIZ - 1 - 3 - 3)
1344     if (strlen(r->name) > MAX_ROCKER_NAME_LEN) {
1345         fprintf(stderr,
1346                 "rocker: name too long; please shorten to at most %d chars\n",
1347                 MAX_ROCKER_NAME_LEN);
1348         return -EINVAL;
1349     }
1350 
1351     if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) {
1352         memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt));
1353         r->fp_start_macaddr.a[4] += (sw_index++);
1354     }
1355 
1356     if (!r->switch_id) {
1357         memcpy(&r->switch_id, &r->fp_start_macaddr,
1358                sizeof(r->fp_start_macaddr));
1359     }
1360 
1361     if (r->fp_ports > ROCKER_FP_PORTS_MAX) {
1362         r->fp_ports = ROCKER_FP_PORTS_MAX;
1363     }
1364 
1365     r->rings = g_malloc(sizeof(DescRing *) * rocker_pci_ring_count(r));
1366     if (!r->rings) {
1367         goto err_rings_alloc;
1368     }
1369 
1370     /* Rings are ordered like this:
1371      * - command ring
1372      * - event ring
1373      * - port0 tx ring
1374      * - port0 rx ring
1375      * - port1 tx ring
1376      * - port1 rx ring
1377      * .....
1378      */
1379 
1380     err = -ENOMEM;
1381     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1382         DescRing *ring = desc_ring_alloc(r, i);
1383 
1384         if (!ring) {
1385             goto err_ring_alloc;
1386         }
1387 
1388         if (i == ROCKER_RING_CMD) {
1389             desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD);
1390         } else if (i == ROCKER_RING_EVENT) {
1391             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT);
1392         } else if (i % 2 == 0) {
1393             desc_ring_set_consume(ring, tx_consume,
1394                                   ROCKER_MSIX_VEC_TX((i - 2) / 2));
1395         } else if (i % 2 == 1) {
1396             desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2));
1397         }
1398 
1399         r->rings[i] = ring;
1400     }
1401 
1402     for (i = 0; i < r->fp_ports; i++) {
1403         FpPort *port =
1404             fp_port_alloc(r, r->name, &r->fp_start_macaddr,
1405                           i, &r->fp_ports_peers[i]);
1406 
1407         if (!port) {
1408             goto err_port_alloc;
1409         }
1410 
1411         r->fp_port[i] = port;
1412         fp_port_set_world(port, r->world_dflt);
1413     }
1414 
1415     QLIST_INSERT_HEAD(&rockers, r, next);
1416 
1417     return 0;
1418 
1419 err_port_alloc:
1420     for (--i; i >= 0; i--) {
1421         FpPort *port = r->fp_port[i];
1422         fp_port_free(port);
1423     }
1424     i = rocker_pci_ring_count(r);
1425 err_ring_alloc:
1426     for (--i; i >= 0; i--) {
1427         desc_ring_free(r->rings[i]);
1428     }
1429     g_free(r->rings);
1430 err_rings_alloc:
1431 err_duplicate:
1432     rocker_msix_uninit(r);
1433 err_msix_init:
1434     object_unparent(OBJECT(&r->msix_bar));
1435     object_unparent(OBJECT(&r->mmio));
1436 err_world_alloc:
1437     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1438         if (r->worlds[i]) {
1439             world_free(r->worlds[i]);
1440         }
1441     }
1442     return err;
1443 }
1444 
1445 static void pci_rocker_uninit(PCIDevice *dev)
1446 {
1447     Rocker *r = to_rocker(dev);
1448     int i;
1449 
1450     QLIST_REMOVE(r, next);
1451 
1452     for (i = 0; i < r->fp_ports; i++) {
1453         FpPort *port = r->fp_port[i];
1454 
1455         fp_port_free(port);
1456         r->fp_port[i] = NULL;
1457     }
1458 
1459     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1460         if (r->rings[i]) {
1461             desc_ring_free(r->rings[i]);
1462         }
1463     }
1464     g_free(r->rings);
1465 
1466     rocker_msix_uninit(r);
1467     object_unparent(OBJECT(&r->msix_bar));
1468     object_unparent(OBJECT(&r->mmio));
1469 
1470     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1471         if (r->worlds[i]) {
1472             world_free(r->worlds[i]);
1473         }
1474     }
1475     g_free(r->fp_ports_peers);
1476 }
1477 
1478 static void rocker_reset(DeviceState *dev)
1479 {
1480     Rocker *r = to_rocker(dev);
1481     int i;
1482 
1483     for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
1484         if (r->worlds[i]) {
1485             world_reset(r->worlds[i]);
1486         }
1487     }
1488     for (i = 0; i < r->fp_ports; i++) {
1489         fp_port_reset(r->fp_port[i]);
1490         fp_port_set_world(r->fp_port[i], r->world_dflt);
1491     }
1492 
1493     r->test_reg = 0;
1494     r->test_reg64 = 0;
1495     r->test_dma_addr = 0;
1496     r->test_dma_size = 0;
1497 
1498     for (i = 0; i < rocker_pci_ring_count(r); i++) {
1499         desc_ring_reset(r->rings[i]);
1500     }
1501 
1502     DPRINTF("Reset done\n");
1503 }
1504 
1505 static Property rocker_properties[] = {
1506     DEFINE_PROP_STRING("name", Rocker, name),
1507     DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
1508                         fp_start_macaddr),
1509     DEFINE_PROP_UINT64("switch_id", Rocker,
1510                        switch_id, 0),
1511     DEFINE_PROP_ARRAY("ports", Rocker, fp_ports,
1512                       fp_ports_peers, qdev_prop_netdev, NICPeers),
1513     DEFINE_PROP_END_OF_LIST(),
1514 };
1515 
1516 static const VMStateDescription rocker_vmsd = {
1517     .name = ROCKER,
1518     .unmigratable = 1,
1519 };
1520 
1521 static void rocker_class_init(ObjectClass *klass, void *data)
1522 {
1523     DeviceClass *dc = DEVICE_CLASS(klass);
1524     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1525 
1526     k->init = pci_rocker_init;
1527     k->exit = pci_rocker_uninit;
1528     k->vendor_id = PCI_VENDOR_ID_REDHAT;
1529     k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER;
1530     k->revision = ROCKER_PCI_REVISION;
1531     k->class_id = PCI_CLASS_NETWORK_OTHER;
1532     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1533     dc->desc = "Rocker Switch";
1534     dc->reset = rocker_reset;
1535     dc->props = rocker_properties;
1536     dc->vmsd = &rocker_vmsd;
1537 }
1538 
1539 static const TypeInfo rocker_info = {
1540     .name          = ROCKER,
1541     .parent        = TYPE_PCI_DEVICE,
1542     .instance_size = sizeof(Rocker),
1543     .class_init    = rocker_class_init,
1544 };
1545 
1546 static void rocker_register_types(void)
1547 {
1548     type_register_static(&rocker_info);
1549 }
1550 
1551 type_init(rocker_register_types)
1552