1*cb039ef3SIlya Maximets /* 2*cb039ef3SIlya Maximets * AF_XDP network backend. 3*cb039ef3SIlya Maximets * 4*cb039ef3SIlya Maximets * Copyright (c) 2023 Red Hat, Inc. 5*cb039ef3SIlya Maximets * 6*cb039ef3SIlya Maximets * Authors: 7*cb039ef3SIlya Maximets * Ilya Maximets <i.maximets@ovn.org> 8*cb039ef3SIlya Maximets * 9*cb039ef3SIlya Maximets * This work is licensed under the terms of the GNU GPL, version 2 or later. 10*cb039ef3SIlya Maximets * See the COPYING file in the top-level directory. 11*cb039ef3SIlya Maximets */ 12*cb039ef3SIlya Maximets 13*cb039ef3SIlya Maximets 14*cb039ef3SIlya Maximets #include "qemu/osdep.h" 15*cb039ef3SIlya Maximets #include <bpf/bpf.h> 16*cb039ef3SIlya Maximets #include <inttypes.h> 17*cb039ef3SIlya Maximets #include <linux/if_link.h> 18*cb039ef3SIlya Maximets #include <linux/if_xdp.h> 19*cb039ef3SIlya Maximets #include <net/if.h> 20*cb039ef3SIlya Maximets #include <xdp/xsk.h> 21*cb039ef3SIlya Maximets 22*cb039ef3SIlya Maximets #include "clients.h" 23*cb039ef3SIlya Maximets #include "monitor/monitor.h" 24*cb039ef3SIlya Maximets #include "net/net.h" 25*cb039ef3SIlya Maximets #include "qapi/error.h" 26*cb039ef3SIlya Maximets #include "qemu/cutils.h" 27*cb039ef3SIlya Maximets #include "qemu/error-report.h" 28*cb039ef3SIlya Maximets #include "qemu/iov.h" 29*cb039ef3SIlya Maximets #include "qemu/main-loop.h" 30*cb039ef3SIlya Maximets #include "qemu/memalign.h" 31*cb039ef3SIlya Maximets 32*cb039ef3SIlya Maximets 33*cb039ef3SIlya Maximets typedef struct AFXDPState { 34*cb039ef3SIlya Maximets NetClientState nc; 35*cb039ef3SIlya Maximets 36*cb039ef3SIlya Maximets struct xsk_socket *xsk; 37*cb039ef3SIlya Maximets struct xsk_ring_cons rx; 38*cb039ef3SIlya Maximets struct xsk_ring_prod tx; 39*cb039ef3SIlya Maximets struct xsk_ring_cons cq; 40*cb039ef3SIlya Maximets struct xsk_ring_prod fq; 41*cb039ef3SIlya Maximets 42*cb039ef3SIlya Maximets char ifname[IFNAMSIZ]; 43*cb039ef3SIlya Maximets int ifindex; 44*cb039ef3SIlya Maximets bool read_poll; 45*cb039ef3SIlya Maximets bool write_poll; 46*cb039ef3SIlya Maximets uint32_t outstanding_tx; 47*cb039ef3SIlya Maximets 48*cb039ef3SIlya Maximets uint64_t *pool; 49*cb039ef3SIlya Maximets uint32_t n_pool; 50*cb039ef3SIlya Maximets char *buffer; 51*cb039ef3SIlya Maximets struct xsk_umem *umem; 52*cb039ef3SIlya Maximets 53*cb039ef3SIlya Maximets uint32_t n_queues; 54*cb039ef3SIlya Maximets uint32_t xdp_flags; 55*cb039ef3SIlya Maximets bool inhibit; 56*cb039ef3SIlya Maximets } AFXDPState; 57*cb039ef3SIlya Maximets 58*cb039ef3SIlya Maximets #define AF_XDP_BATCH_SIZE 64 59*cb039ef3SIlya Maximets 60*cb039ef3SIlya Maximets static void af_xdp_send(void *opaque); 61*cb039ef3SIlya Maximets static void af_xdp_writable(void *opaque); 62*cb039ef3SIlya Maximets 63*cb039ef3SIlya Maximets /* Set the event-loop handlers for the af-xdp backend. */ 64*cb039ef3SIlya Maximets static void af_xdp_update_fd_handler(AFXDPState *s) 65*cb039ef3SIlya Maximets { 66*cb039ef3SIlya Maximets qemu_set_fd_handler(xsk_socket__fd(s->xsk), 67*cb039ef3SIlya Maximets s->read_poll ? af_xdp_send : NULL, 68*cb039ef3SIlya Maximets s->write_poll ? af_xdp_writable : NULL, 69*cb039ef3SIlya Maximets s); 70*cb039ef3SIlya Maximets } 71*cb039ef3SIlya Maximets 72*cb039ef3SIlya Maximets /* Update the read handler. */ 73*cb039ef3SIlya Maximets static void af_xdp_read_poll(AFXDPState *s, bool enable) 74*cb039ef3SIlya Maximets { 75*cb039ef3SIlya Maximets if (s->read_poll != enable) { 76*cb039ef3SIlya Maximets s->read_poll = enable; 77*cb039ef3SIlya Maximets af_xdp_update_fd_handler(s); 78*cb039ef3SIlya Maximets } 79*cb039ef3SIlya Maximets } 80*cb039ef3SIlya Maximets 81*cb039ef3SIlya Maximets /* Update the write handler. */ 82*cb039ef3SIlya Maximets static void af_xdp_write_poll(AFXDPState *s, bool enable) 83*cb039ef3SIlya Maximets { 84*cb039ef3SIlya Maximets if (s->write_poll != enable) { 85*cb039ef3SIlya Maximets s->write_poll = enable; 86*cb039ef3SIlya Maximets af_xdp_update_fd_handler(s); 87*cb039ef3SIlya Maximets } 88*cb039ef3SIlya Maximets } 89*cb039ef3SIlya Maximets 90*cb039ef3SIlya Maximets static void af_xdp_poll(NetClientState *nc, bool enable) 91*cb039ef3SIlya Maximets { 92*cb039ef3SIlya Maximets AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc); 93*cb039ef3SIlya Maximets 94*cb039ef3SIlya Maximets if (s->read_poll != enable || s->write_poll != enable) { 95*cb039ef3SIlya Maximets s->write_poll = enable; 96*cb039ef3SIlya Maximets s->read_poll = enable; 97*cb039ef3SIlya Maximets af_xdp_update_fd_handler(s); 98*cb039ef3SIlya Maximets } 99*cb039ef3SIlya Maximets } 100*cb039ef3SIlya Maximets 101*cb039ef3SIlya Maximets static void af_xdp_complete_tx(AFXDPState *s) 102*cb039ef3SIlya Maximets { 103*cb039ef3SIlya Maximets uint32_t idx = 0; 104*cb039ef3SIlya Maximets uint32_t done, i; 105*cb039ef3SIlya Maximets uint64_t *addr; 106*cb039ef3SIlya Maximets 107*cb039ef3SIlya Maximets done = xsk_ring_cons__peek(&s->cq, XSK_RING_CONS__DEFAULT_NUM_DESCS, &idx); 108*cb039ef3SIlya Maximets 109*cb039ef3SIlya Maximets for (i = 0; i < done; i++) { 110*cb039ef3SIlya Maximets addr = (void *) xsk_ring_cons__comp_addr(&s->cq, idx++); 111*cb039ef3SIlya Maximets s->pool[s->n_pool++] = *addr; 112*cb039ef3SIlya Maximets s->outstanding_tx--; 113*cb039ef3SIlya Maximets } 114*cb039ef3SIlya Maximets 115*cb039ef3SIlya Maximets if (done) { 116*cb039ef3SIlya Maximets xsk_ring_cons__release(&s->cq, done); 117*cb039ef3SIlya Maximets } 118*cb039ef3SIlya Maximets } 119*cb039ef3SIlya Maximets 120*cb039ef3SIlya Maximets /* 121*cb039ef3SIlya Maximets * The fd_write() callback, invoked if the fd is marked as writable 122*cb039ef3SIlya Maximets * after a poll. 123*cb039ef3SIlya Maximets */ 124*cb039ef3SIlya Maximets static void af_xdp_writable(void *opaque) 125*cb039ef3SIlya Maximets { 126*cb039ef3SIlya Maximets AFXDPState *s = opaque; 127*cb039ef3SIlya Maximets 128*cb039ef3SIlya Maximets /* Try to recover buffers that are already sent. */ 129*cb039ef3SIlya Maximets af_xdp_complete_tx(s); 130*cb039ef3SIlya Maximets 131*cb039ef3SIlya Maximets /* 132*cb039ef3SIlya Maximets * Unregister the handler, unless we still have packets to transmit 133*cb039ef3SIlya Maximets * and kernel needs a wake up. 134*cb039ef3SIlya Maximets */ 135*cb039ef3SIlya Maximets if (!s->outstanding_tx || !xsk_ring_prod__needs_wakeup(&s->tx)) { 136*cb039ef3SIlya Maximets af_xdp_write_poll(s, false); 137*cb039ef3SIlya Maximets } 138*cb039ef3SIlya Maximets 139*cb039ef3SIlya Maximets /* Flush any buffered packets. */ 140*cb039ef3SIlya Maximets qemu_flush_queued_packets(&s->nc); 141*cb039ef3SIlya Maximets } 142*cb039ef3SIlya Maximets 143*cb039ef3SIlya Maximets static ssize_t af_xdp_receive(NetClientState *nc, 144*cb039ef3SIlya Maximets const uint8_t *buf, size_t size) 145*cb039ef3SIlya Maximets { 146*cb039ef3SIlya Maximets AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc); 147*cb039ef3SIlya Maximets struct xdp_desc *desc; 148*cb039ef3SIlya Maximets uint32_t idx; 149*cb039ef3SIlya Maximets void *data; 150*cb039ef3SIlya Maximets 151*cb039ef3SIlya Maximets /* Try to recover buffers that are already sent. */ 152*cb039ef3SIlya Maximets af_xdp_complete_tx(s); 153*cb039ef3SIlya Maximets 154*cb039ef3SIlya Maximets if (size > XSK_UMEM__DEFAULT_FRAME_SIZE) { 155*cb039ef3SIlya Maximets /* We can't transmit packet this size... */ 156*cb039ef3SIlya Maximets return size; 157*cb039ef3SIlya Maximets } 158*cb039ef3SIlya Maximets 159*cb039ef3SIlya Maximets if (!s->n_pool || !xsk_ring_prod__reserve(&s->tx, 1, &idx)) { 160*cb039ef3SIlya Maximets /* 161*cb039ef3SIlya Maximets * Out of buffers or space in tx ring. Poll until we can write. 162*cb039ef3SIlya Maximets * This will also kick the Tx, if it was waiting on CQ. 163*cb039ef3SIlya Maximets */ 164*cb039ef3SIlya Maximets af_xdp_write_poll(s, true); 165*cb039ef3SIlya Maximets return 0; 166*cb039ef3SIlya Maximets } 167*cb039ef3SIlya Maximets 168*cb039ef3SIlya Maximets desc = xsk_ring_prod__tx_desc(&s->tx, idx); 169*cb039ef3SIlya Maximets desc->addr = s->pool[--s->n_pool]; 170*cb039ef3SIlya Maximets desc->len = size; 171*cb039ef3SIlya Maximets 172*cb039ef3SIlya Maximets data = xsk_umem__get_data(s->buffer, desc->addr); 173*cb039ef3SIlya Maximets memcpy(data, buf, size); 174*cb039ef3SIlya Maximets 175*cb039ef3SIlya Maximets xsk_ring_prod__submit(&s->tx, 1); 176*cb039ef3SIlya Maximets s->outstanding_tx++; 177*cb039ef3SIlya Maximets 178*cb039ef3SIlya Maximets if (xsk_ring_prod__needs_wakeup(&s->tx)) { 179*cb039ef3SIlya Maximets af_xdp_write_poll(s, true); 180*cb039ef3SIlya Maximets } 181*cb039ef3SIlya Maximets 182*cb039ef3SIlya Maximets return size; 183*cb039ef3SIlya Maximets } 184*cb039ef3SIlya Maximets 185*cb039ef3SIlya Maximets /* 186*cb039ef3SIlya Maximets * Complete a previous send (backend --> guest) and enable the 187*cb039ef3SIlya Maximets * fd_read callback. 188*cb039ef3SIlya Maximets */ 189*cb039ef3SIlya Maximets static void af_xdp_send_completed(NetClientState *nc, ssize_t len) 190*cb039ef3SIlya Maximets { 191*cb039ef3SIlya Maximets AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc); 192*cb039ef3SIlya Maximets 193*cb039ef3SIlya Maximets af_xdp_read_poll(s, true); 194*cb039ef3SIlya Maximets } 195*cb039ef3SIlya Maximets 196*cb039ef3SIlya Maximets static void af_xdp_fq_refill(AFXDPState *s, uint32_t n) 197*cb039ef3SIlya Maximets { 198*cb039ef3SIlya Maximets uint32_t i, idx = 0; 199*cb039ef3SIlya Maximets 200*cb039ef3SIlya Maximets /* Leave one packet for Tx, just in case. */ 201*cb039ef3SIlya Maximets if (s->n_pool < n + 1) { 202*cb039ef3SIlya Maximets n = s->n_pool; 203*cb039ef3SIlya Maximets } 204*cb039ef3SIlya Maximets 205*cb039ef3SIlya Maximets if (!n || !xsk_ring_prod__reserve(&s->fq, n, &idx)) { 206*cb039ef3SIlya Maximets return; 207*cb039ef3SIlya Maximets } 208*cb039ef3SIlya Maximets 209*cb039ef3SIlya Maximets for (i = 0; i < n; i++) { 210*cb039ef3SIlya Maximets *xsk_ring_prod__fill_addr(&s->fq, idx++) = s->pool[--s->n_pool]; 211*cb039ef3SIlya Maximets } 212*cb039ef3SIlya Maximets xsk_ring_prod__submit(&s->fq, n); 213*cb039ef3SIlya Maximets 214*cb039ef3SIlya Maximets if (xsk_ring_prod__needs_wakeup(&s->fq)) { 215*cb039ef3SIlya Maximets /* Receive was blocked by not having enough buffers. Wake it up. */ 216*cb039ef3SIlya Maximets af_xdp_read_poll(s, true); 217*cb039ef3SIlya Maximets } 218*cb039ef3SIlya Maximets } 219*cb039ef3SIlya Maximets 220*cb039ef3SIlya Maximets static void af_xdp_send(void *opaque) 221*cb039ef3SIlya Maximets { 222*cb039ef3SIlya Maximets uint32_t i, n_rx, idx = 0; 223*cb039ef3SIlya Maximets AFXDPState *s = opaque; 224*cb039ef3SIlya Maximets 225*cb039ef3SIlya Maximets n_rx = xsk_ring_cons__peek(&s->rx, AF_XDP_BATCH_SIZE, &idx); 226*cb039ef3SIlya Maximets if (!n_rx) { 227*cb039ef3SIlya Maximets return; 228*cb039ef3SIlya Maximets } 229*cb039ef3SIlya Maximets 230*cb039ef3SIlya Maximets for (i = 0; i < n_rx; i++) { 231*cb039ef3SIlya Maximets const struct xdp_desc *desc; 232*cb039ef3SIlya Maximets struct iovec iov; 233*cb039ef3SIlya Maximets 234*cb039ef3SIlya Maximets desc = xsk_ring_cons__rx_desc(&s->rx, idx++); 235*cb039ef3SIlya Maximets 236*cb039ef3SIlya Maximets iov.iov_base = xsk_umem__get_data(s->buffer, desc->addr); 237*cb039ef3SIlya Maximets iov.iov_len = desc->len; 238*cb039ef3SIlya Maximets 239*cb039ef3SIlya Maximets s->pool[s->n_pool++] = desc->addr; 240*cb039ef3SIlya Maximets 241*cb039ef3SIlya Maximets if (!qemu_sendv_packet_async(&s->nc, &iov, 1, 242*cb039ef3SIlya Maximets af_xdp_send_completed)) { 243*cb039ef3SIlya Maximets /* 244*cb039ef3SIlya Maximets * The peer does not receive anymore. Packet is queued, stop 245*cb039ef3SIlya Maximets * reading from the backend until af_xdp_send_completed(). 246*cb039ef3SIlya Maximets */ 247*cb039ef3SIlya Maximets af_xdp_read_poll(s, false); 248*cb039ef3SIlya Maximets 249*cb039ef3SIlya Maximets /* Return unused descriptors to not break the ring cache. */ 250*cb039ef3SIlya Maximets xsk_ring_cons__cancel(&s->rx, n_rx - i - 1); 251*cb039ef3SIlya Maximets n_rx = i + 1; 252*cb039ef3SIlya Maximets break; 253*cb039ef3SIlya Maximets } 254*cb039ef3SIlya Maximets } 255*cb039ef3SIlya Maximets 256*cb039ef3SIlya Maximets /* Release actually sent descriptors and try to re-fill. */ 257*cb039ef3SIlya Maximets xsk_ring_cons__release(&s->rx, n_rx); 258*cb039ef3SIlya Maximets af_xdp_fq_refill(s, AF_XDP_BATCH_SIZE); 259*cb039ef3SIlya Maximets } 260*cb039ef3SIlya Maximets 261*cb039ef3SIlya Maximets /* Flush and close. */ 262*cb039ef3SIlya Maximets static void af_xdp_cleanup(NetClientState *nc) 263*cb039ef3SIlya Maximets { 264*cb039ef3SIlya Maximets AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc); 265*cb039ef3SIlya Maximets 266*cb039ef3SIlya Maximets qemu_purge_queued_packets(nc); 267*cb039ef3SIlya Maximets 268*cb039ef3SIlya Maximets af_xdp_poll(nc, false); 269*cb039ef3SIlya Maximets 270*cb039ef3SIlya Maximets xsk_socket__delete(s->xsk); 271*cb039ef3SIlya Maximets s->xsk = NULL; 272*cb039ef3SIlya Maximets g_free(s->pool); 273*cb039ef3SIlya Maximets s->pool = NULL; 274*cb039ef3SIlya Maximets xsk_umem__delete(s->umem); 275*cb039ef3SIlya Maximets s->umem = NULL; 276*cb039ef3SIlya Maximets qemu_vfree(s->buffer); 277*cb039ef3SIlya Maximets s->buffer = NULL; 278*cb039ef3SIlya Maximets 279*cb039ef3SIlya Maximets /* Remove the program if it's the last open queue. */ 280*cb039ef3SIlya Maximets if (!s->inhibit && nc->queue_index == s->n_queues - 1 && s->xdp_flags 281*cb039ef3SIlya Maximets && bpf_xdp_detach(s->ifindex, s->xdp_flags, NULL) != 0) { 282*cb039ef3SIlya Maximets fprintf(stderr, 283*cb039ef3SIlya Maximets "af-xdp: unable to remove XDP program from '%s', ifindex: %d\n", 284*cb039ef3SIlya Maximets s->ifname, s->ifindex); 285*cb039ef3SIlya Maximets } 286*cb039ef3SIlya Maximets } 287*cb039ef3SIlya Maximets 288*cb039ef3SIlya Maximets static int af_xdp_umem_create(AFXDPState *s, int sock_fd, Error **errp) 289*cb039ef3SIlya Maximets { 290*cb039ef3SIlya Maximets struct xsk_umem_config config = { 291*cb039ef3SIlya Maximets .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 292*cb039ef3SIlya Maximets .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, 293*cb039ef3SIlya Maximets .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, 294*cb039ef3SIlya Maximets .frame_headroom = 0, 295*cb039ef3SIlya Maximets }; 296*cb039ef3SIlya Maximets uint64_t n_descs; 297*cb039ef3SIlya Maximets uint64_t size; 298*cb039ef3SIlya Maximets int64_t i; 299*cb039ef3SIlya Maximets int ret; 300*cb039ef3SIlya Maximets 301*cb039ef3SIlya Maximets /* Number of descriptors if all 4 queues (rx, tx, cq, fq) are full. */ 302*cb039ef3SIlya Maximets n_descs = (XSK_RING_PROD__DEFAULT_NUM_DESCS 303*cb039ef3SIlya Maximets + XSK_RING_CONS__DEFAULT_NUM_DESCS) * 2; 304*cb039ef3SIlya Maximets size = n_descs * XSK_UMEM__DEFAULT_FRAME_SIZE; 305*cb039ef3SIlya Maximets 306*cb039ef3SIlya Maximets s->buffer = qemu_memalign(qemu_real_host_page_size(), size); 307*cb039ef3SIlya Maximets memset(s->buffer, 0, size); 308*cb039ef3SIlya Maximets 309*cb039ef3SIlya Maximets if (sock_fd < 0) { 310*cb039ef3SIlya Maximets ret = xsk_umem__create(&s->umem, s->buffer, size, 311*cb039ef3SIlya Maximets &s->fq, &s->cq, &config); 312*cb039ef3SIlya Maximets } else { 313*cb039ef3SIlya Maximets ret = xsk_umem__create_with_fd(&s->umem, sock_fd, s->buffer, size, 314*cb039ef3SIlya Maximets &s->fq, &s->cq, &config); 315*cb039ef3SIlya Maximets } 316*cb039ef3SIlya Maximets 317*cb039ef3SIlya Maximets if (ret) { 318*cb039ef3SIlya Maximets qemu_vfree(s->buffer); 319*cb039ef3SIlya Maximets error_setg_errno(errp, errno, 320*cb039ef3SIlya Maximets "failed to create umem for %s queue_index: %d", 321*cb039ef3SIlya Maximets s->ifname, s->nc.queue_index); 322*cb039ef3SIlya Maximets return -1; 323*cb039ef3SIlya Maximets } 324*cb039ef3SIlya Maximets 325*cb039ef3SIlya Maximets s->pool = g_new(uint64_t, n_descs); 326*cb039ef3SIlya Maximets /* Fill the pool in the opposite order, because it's a LIFO queue. */ 327*cb039ef3SIlya Maximets for (i = n_descs; i >= 0; i--) { 328*cb039ef3SIlya Maximets s->pool[i] = i * XSK_UMEM__DEFAULT_FRAME_SIZE; 329*cb039ef3SIlya Maximets } 330*cb039ef3SIlya Maximets s->n_pool = n_descs; 331*cb039ef3SIlya Maximets 332*cb039ef3SIlya Maximets af_xdp_fq_refill(s, XSK_RING_PROD__DEFAULT_NUM_DESCS); 333*cb039ef3SIlya Maximets 334*cb039ef3SIlya Maximets return 0; 335*cb039ef3SIlya Maximets } 336*cb039ef3SIlya Maximets 337*cb039ef3SIlya Maximets static int af_xdp_socket_create(AFXDPState *s, 338*cb039ef3SIlya Maximets const NetdevAFXDPOptions *opts, Error **errp) 339*cb039ef3SIlya Maximets { 340*cb039ef3SIlya Maximets struct xsk_socket_config cfg = { 341*cb039ef3SIlya Maximets .rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, 342*cb039ef3SIlya Maximets .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 343*cb039ef3SIlya Maximets .libxdp_flags = 0, 344*cb039ef3SIlya Maximets .bind_flags = XDP_USE_NEED_WAKEUP, 345*cb039ef3SIlya Maximets .xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST, 346*cb039ef3SIlya Maximets }; 347*cb039ef3SIlya Maximets int queue_id, error = 0; 348*cb039ef3SIlya Maximets 349*cb039ef3SIlya Maximets s->inhibit = opts->has_inhibit && opts->inhibit; 350*cb039ef3SIlya Maximets if (s->inhibit) { 351*cb039ef3SIlya Maximets cfg.libxdp_flags |= XSK_LIBXDP_FLAGS__INHIBIT_PROG_LOAD; 352*cb039ef3SIlya Maximets } 353*cb039ef3SIlya Maximets 354*cb039ef3SIlya Maximets if (opts->has_force_copy && opts->force_copy) { 355*cb039ef3SIlya Maximets cfg.bind_flags |= XDP_COPY; 356*cb039ef3SIlya Maximets } 357*cb039ef3SIlya Maximets 358*cb039ef3SIlya Maximets queue_id = s->nc.queue_index; 359*cb039ef3SIlya Maximets if (opts->has_start_queue && opts->start_queue > 0) { 360*cb039ef3SIlya Maximets queue_id += opts->start_queue; 361*cb039ef3SIlya Maximets } 362*cb039ef3SIlya Maximets 363*cb039ef3SIlya Maximets if (opts->has_mode) { 364*cb039ef3SIlya Maximets /* Specific mode requested. */ 365*cb039ef3SIlya Maximets cfg.xdp_flags |= (opts->mode == AFXDP_MODE_NATIVE) 366*cb039ef3SIlya Maximets ? XDP_FLAGS_DRV_MODE : XDP_FLAGS_SKB_MODE; 367*cb039ef3SIlya Maximets if (xsk_socket__create(&s->xsk, s->ifname, queue_id, 368*cb039ef3SIlya Maximets s->umem, &s->rx, &s->tx, &cfg)) { 369*cb039ef3SIlya Maximets error = errno; 370*cb039ef3SIlya Maximets } 371*cb039ef3SIlya Maximets } else { 372*cb039ef3SIlya Maximets /* No mode requested, try native first. */ 373*cb039ef3SIlya Maximets cfg.xdp_flags |= XDP_FLAGS_DRV_MODE; 374*cb039ef3SIlya Maximets 375*cb039ef3SIlya Maximets if (xsk_socket__create(&s->xsk, s->ifname, queue_id, 376*cb039ef3SIlya Maximets s->umem, &s->rx, &s->tx, &cfg)) { 377*cb039ef3SIlya Maximets /* Can't use native mode, try skb. */ 378*cb039ef3SIlya Maximets cfg.xdp_flags &= ~XDP_FLAGS_DRV_MODE; 379*cb039ef3SIlya Maximets cfg.xdp_flags |= XDP_FLAGS_SKB_MODE; 380*cb039ef3SIlya Maximets 381*cb039ef3SIlya Maximets if (xsk_socket__create(&s->xsk, s->ifname, queue_id, 382*cb039ef3SIlya Maximets s->umem, &s->rx, &s->tx, &cfg)) { 383*cb039ef3SIlya Maximets error = errno; 384*cb039ef3SIlya Maximets } 385*cb039ef3SIlya Maximets } 386*cb039ef3SIlya Maximets } 387*cb039ef3SIlya Maximets 388*cb039ef3SIlya Maximets if (error) { 389*cb039ef3SIlya Maximets error_setg_errno(errp, error, 390*cb039ef3SIlya Maximets "failed to create AF_XDP socket for %s queue_id: %d", 391*cb039ef3SIlya Maximets s->ifname, queue_id); 392*cb039ef3SIlya Maximets return -1; 393*cb039ef3SIlya Maximets } 394*cb039ef3SIlya Maximets 395*cb039ef3SIlya Maximets s->xdp_flags = cfg.xdp_flags; 396*cb039ef3SIlya Maximets 397*cb039ef3SIlya Maximets return 0; 398*cb039ef3SIlya Maximets } 399*cb039ef3SIlya Maximets 400*cb039ef3SIlya Maximets /* NetClientInfo methods. */ 401*cb039ef3SIlya Maximets static NetClientInfo net_af_xdp_info = { 402*cb039ef3SIlya Maximets .type = NET_CLIENT_DRIVER_AF_XDP, 403*cb039ef3SIlya Maximets .size = sizeof(AFXDPState), 404*cb039ef3SIlya Maximets .receive = af_xdp_receive, 405*cb039ef3SIlya Maximets .poll = af_xdp_poll, 406*cb039ef3SIlya Maximets .cleanup = af_xdp_cleanup, 407*cb039ef3SIlya Maximets }; 408*cb039ef3SIlya Maximets 409*cb039ef3SIlya Maximets static int *parse_socket_fds(const char *sock_fds_str, 410*cb039ef3SIlya Maximets int64_t n_expected, Error **errp) 411*cb039ef3SIlya Maximets { 412*cb039ef3SIlya Maximets gchar **substrings = g_strsplit(sock_fds_str, ":", -1); 413*cb039ef3SIlya Maximets int64_t i, n_sock_fds = g_strv_length(substrings); 414*cb039ef3SIlya Maximets int *sock_fds = NULL; 415*cb039ef3SIlya Maximets 416*cb039ef3SIlya Maximets if (n_sock_fds != n_expected) { 417*cb039ef3SIlya Maximets error_setg(errp, "expected %"PRIi64" socket fds, got %"PRIi64, 418*cb039ef3SIlya Maximets n_expected, n_sock_fds); 419*cb039ef3SIlya Maximets goto exit; 420*cb039ef3SIlya Maximets } 421*cb039ef3SIlya Maximets 422*cb039ef3SIlya Maximets sock_fds = g_new(int, n_sock_fds); 423*cb039ef3SIlya Maximets 424*cb039ef3SIlya Maximets for (i = 0; i < n_sock_fds; i++) { 425*cb039ef3SIlya Maximets sock_fds[i] = monitor_fd_param(monitor_cur(), substrings[i], errp); 426*cb039ef3SIlya Maximets if (sock_fds[i] < 0) { 427*cb039ef3SIlya Maximets g_free(sock_fds); 428*cb039ef3SIlya Maximets sock_fds = NULL; 429*cb039ef3SIlya Maximets goto exit; 430*cb039ef3SIlya Maximets } 431*cb039ef3SIlya Maximets } 432*cb039ef3SIlya Maximets 433*cb039ef3SIlya Maximets exit: 434*cb039ef3SIlya Maximets g_strfreev(substrings); 435*cb039ef3SIlya Maximets return sock_fds; 436*cb039ef3SIlya Maximets } 437*cb039ef3SIlya Maximets 438*cb039ef3SIlya Maximets /* 439*cb039ef3SIlya Maximets * The exported init function. 440*cb039ef3SIlya Maximets * 441*cb039ef3SIlya Maximets * ... -netdev af-xdp,ifname="..." 442*cb039ef3SIlya Maximets */ 443*cb039ef3SIlya Maximets int net_init_af_xdp(const Netdev *netdev, 444*cb039ef3SIlya Maximets const char *name, NetClientState *peer, Error **errp) 445*cb039ef3SIlya Maximets { 446*cb039ef3SIlya Maximets const NetdevAFXDPOptions *opts = &netdev->u.af_xdp; 447*cb039ef3SIlya Maximets NetClientState *nc, *nc0 = NULL; 448*cb039ef3SIlya Maximets unsigned int ifindex; 449*cb039ef3SIlya Maximets uint32_t prog_id = 0; 450*cb039ef3SIlya Maximets int *sock_fds = NULL; 451*cb039ef3SIlya Maximets int64_t i, queues; 452*cb039ef3SIlya Maximets Error *err = NULL; 453*cb039ef3SIlya Maximets AFXDPState *s; 454*cb039ef3SIlya Maximets 455*cb039ef3SIlya Maximets ifindex = if_nametoindex(opts->ifname); 456*cb039ef3SIlya Maximets if (!ifindex) { 457*cb039ef3SIlya Maximets error_setg_errno(errp, errno, "failed to get ifindex for '%s'", 458*cb039ef3SIlya Maximets opts->ifname); 459*cb039ef3SIlya Maximets return -1; 460*cb039ef3SIlya Maximets } 461*cb039ef3SIlya Maximets 462*cb039ef3SIlya Maximets queues = opts->has_queues ? opts->queues : 1; 463*cb039ef3SIlya Maximets if (queues < 1) { 464*cb039ef3SIlya Maximets error_setg(errp, "invalid number of queues (%" PRIi64 ") for '%s'", 465*cb039ef3SIlya Maximets queues, opts->ifname); 466*cb039ef3SIlya Maximets return -1; 467*cb039ef3SIlya Maximets } 468*cb039ef3SIlya Maximets 469*cb039ef3SIlya Maximets if ((opts->has_inhibit && opts->inhibit) != !!opts->sock_fds) { 470*cb039ef3SIlya Maximets error_setg(errp, "'inhibit=on' requires 'sock-fds' and vice versa"); 471*cb039ef3SIlya Maximets return -1; 472*cb039ef3SIlya Maximets } 473*cb039ef3SIlya Maximets 474*cb039ef3SIlya Maximets if (opts->sock_fds) { 475*cb039ef3SIlya Maximets sock_fds = parse_socket_fds(opts->sock_fds, queues, errp); 476*cb039ef3SIlya Maximets if (!sock_fds) { 477*cb039ef3SIlya Maximets return -1; 478*cb039ef3SIlya Maximets } 479*cb039ef3SIlya Maximets } 480*cb039ef3SIlya Maximets 481*cb039ef3SIlya Maximets for (i = 0; i < queues; i++) { 482*cb039ef3SIlya Maximets nc = qemu_new_net_client(&net_af_xdp_info, peer, "af-xdp", name); 483*cb039ef3SIlya Maximets qemu_set_info_str(nc, "af-xdp%"PRIi64" to %s", i, opts->ifname); 484*cb039ef3SIlya Maximets nc->queue_index = i; 485*cb039ef3SIlya Maximets 486*cb039ef3SIlya Maximets if (!nc0) { 487*cb039ef3SIlya Maximets nc0 = nc; 488*cb039ef3SIlya Maximets } 489*cb039ef3SIlya Maximets 490*cb039ef3SIlya Maximets s = DO_UPCAST(AFXDPState, nc, nc); 491*cb039ef3SIlya Maximets 492*cb039ef3SIlya Maximets pstrcpy(s->ifname, sizeof(s->ifname), opts->ifname); 493*cb039ef3SIlya Maximets s->ifindex = ifindex; 494*cb039ef3SIlya Maximets s->n_queues = queues; 495*cb039ef3SIlya Maximets 496*cb039ef3SIlya Maximets if (af_xdp_umem_create(s, sock_fds ? sock_fds[i] : -1, errp) 497*cb039ef3SIlya Maximets || af_xdp_socket_create(s, opts, errp)) { 498*cb039ef3SIlya Maximets /* Make sure the XDP program will be removed. */ 499*cb039ef3SIlya Maximets s->n_queues = i; 500*cb039ef3SIlya Maximets error_propagate(errp, err); 501*cb039ef3SIlya Maximets goto err; 502*cb039ef3SIlya Maximets } 503*cb039ef3SIlya Maximets } 504*cb039ef3SIlya Maximets 505*cb039ef3SIlya Maximets if (nc0) { 506*cb039ef3SIlya Maximets s = DO_UPCAST(AFXDPState, nc, nc0); 507*cb039ef3SIlya Maximets if (bpf_xdp_query_id(s->ifindex, s->xdp_flags, &prog_id) || !prog_id) { 508*cb039ef3SIlya Maximets error_setg_errno(errp, errno, 509*cb039ef3SIlya Maximets "no XDP program loaded on '%s', ifindex: %d", 510*cb039ef3SIlya Maximets s->ifname, s->ifindex); 511*cb039ef3SIlya Maximets goto err; 512*cb039ef3SIlya Maximets } 513*cb039ef3SIlya Maximets } 514*cb039ef3SIlya Maximets 515*cb039ef3SIlya Maximets af_xdp_read_poll(s, true); /* Initially only poll for reads. */ 516*cb039ef3SIlya Maximets 517*cb039ef3SIlya Maximets return 0; 518*cb039ef3SIlya Maximets 519*cb039ef3SIlya Maximets err: 520*cb039ef3SIlya Maximets g_free(sock_fds); 521*cb039ef3SIlya Maximets if (nc0) { 522*cb039ef3SIlya Maximets qemu_del_net_client(nc0); 523*cb039ef3SIlya Maximets } 524*cb039ef3SIlya Maximets 525*cb039ef3SIlya Maximets return -1; 526*cb039ef3SIlya Maximets } 527