1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 3 /* 4 * AF_XDP user-space access library. 5 * 6 * Copyright(c) 2018 - 2019 Intel Corporation. 7 * 8 * Author(s): Magnus Karlsson <magnus.karlsson@intel.com> 9 */ 10 11 #include <errno.h> 12 #include <stdlib.h> 13 #include <string.h> 14 #include <unistd.h> 15 #include <arpa/inet.h> 16 #include <asm/barrier.h> 17 #include <linux/compiler.h> 18 #include <linux/ethtool.h> 19 #include <linux/filter.h> 20 #include <linux/if_ether.h> 21 #include <linux/if_packet.h> 22 #include <linux/if_xdp.h> 23 #include <linux/kernel.h> 24 #include <linux/list.h> 25 #include <linux/sockios.h> 26 #include <net/if.h> 27 #include <sys/ioctl.h> 28 #include <sys/mman.h> 29 #include <sys/socket.h> 30 #include <sys/types.h> 31 #include <linux/if_link.h> 32 33 #include <bpf/bpf.h> 34 #include <bpf/libbpf.h> 35 #include "xsk.h" 36 #include "bpf_util.h" 37 38 #ifndef SOL_XDP 39 #define SOL_XDP 283 40 #endif 41 42 #ifndef AF_XDP 43 #define AF_XDP 44 44 #endif 45 46 #ifndef PF_XDP 47 #define PF_XDP AF_XDP 48 #endif 49 50 #define pr_warn(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) 51 52 #define XSKMAP_SIZE 1 53 54 struct xsk_umem { 55 struct xsk_ring_prod *fill_save; 56 struct xsk_ring_cons *comp_save; 57 char *umem_area; 58 struct xsk_umem_config config; 59 int fd; 60 int refcount; 61 struct list_head ctx_list; 62 bool rx_ring_setup_done; 63 bool tx_ring_setup_done; 64 }; 65 66 struct xsk_ctx { 67 struct xsk_ring_prod *fill; 68 struct xsk_ring_cons *comp; 69 __u32 queue_id; 70 struct xsk_umem *umem; 71 int refcount; 72 int ifindex; 73 struct list_head list; 74 }; 75 76 struct xsk_socket { 77 struct xsk_ring_cons *rx; 78 struct xsk_ring_prod *tx; 79 struct xsk_ctx *ctx; 80 struct xsk_socket_config config; 81 int fd; 82 }; 83 84 int xsk_umem__fd(const struct xsk_umem *umem) 85 { 86 return umem ? umem->fd : -EINVAL; 87 } 88 89 int xsk_socket__fd(const struct xsk_socket *xsk) 90 { 91 return xsk ? xsk->fd : -EINVAL; 92 } 93 94 static bool xsk_page_aligned(void *buffer) 95 { 96 unsigned long addr = (unsigned long)buffer; 97 98 return !(addr & (getpagesize() - 1)); 99 } 100 101 static void xsk_set_umem_config(struct xsk_umem_config *cfg, 102 const struct xsk_umem_config *usr_cfg) 103 { 104 if (!usr_cfg) { 105 cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; 106 cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; 107 cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; 108 cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; 109 cfg->flags = XSK_UMEM__DEFAULT_FLAGS; 110 return; 111 } 112 113 cfg->fill_size = usr_cfg->fill_size; 114 cfg->comp_size = usr_cfg->comp_size; 115 cfg->frame_size = usr_cfg->frame_size; 116 cfg->frame_headroom = usr_cfg->frame_headroom; 117 cfg->flags = usr_cfg->flags; 118 } 119 120 static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, 121 const struct xsk_socket_config *usr_cfg) 122 { 123 if (!usr_cfg) { 124 cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; 125 cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; 126 cfg->bind_flags = 0; 127 return 0; 128 } 129 130 cfg->rx_size = usr_cfg->rx_size; 131 cfg->tx_size = usr_cfg->tx_size; 132 cfg->bind_flags = usr_cfg->bind_flags; 133 134 return 0; 135 } 136 137 static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off) 138 { 139 socklen_t optlen; 140 int err; 141 142 optlen = sizeof(*off); 143 err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen); 144 if (err) 145 return err; 146 147 if (optlen == sizeof(*off)) 148 return 0; 149 150 return -EINVAL; 151 } 152 153 static int xsk_create_umem_rings(struct xsk_umem *umem, int fd, 154 struct xsk_ring_prod *fill, 155 struct xsk_ring_cons *comp) 156 { 157 struct xdp_mmap_offsets off; 158 void *map; 159 int err; 160 161 err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING, 162 &umem->config.fill_size, 163 sizeof(umem->config.fill_size)); 164 if (err) 165 return -errno; 166 167 err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, 168 &umem->config.comp_size, 169 sizeof(umem->config.comp_size)); 170 if (err) 171 return -errno; 172 173 err = xsk_get_mmap_offsets(fd, &off); 174 if (err) 175 return -errno; 176 177 map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64), 178 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 179 XDP_UMEM_PGOFF_FILL_RING); 180 if (map == MAP_FAILED) 181 return -errno; 182 183 fill->mask = umem->config.fill_size - 1; 184 fill->size = umem->config.fill_size; 185 fill->producer = map + off.fr.producer; 186 fill->consumer = map + off.fr.consumer; 187 fill->flags = map + off.fr.flags; 188 fill->ring = map + off.fr.desc; 189 fill->cached_cons = umem->config.fill_size; 190 191 map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64), 192 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 193 XDP_UMEM_PGOFF_COMPLETION_RING); 194 if (map == MAP_FAILED) { 195 err = -errno; 196 goto out_mmap; 197 } 198 199 comp->mask = umem->config.comp_size - 1; 200 comp->size = umem->config.comp_size; 201 comp->producer = map + off.cr.producer; 202 comp->consumer = map + off.cr.consumer; 203 comp->flags = map + off.cr.flags; 204 comp->ring = map + off.cr.desc; 205 206 return 0; 207 208 out_mmap: 209 munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64)); 210 return err; 211 } 212 213 int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, 214 __u64 size, struct xsk_ring_prod *fill, 215 struct xsk_ring_cons *comp, 216 const struct xsk_umem_config *usr_config) 217 { 218 struct xdp_umem_reg mr; 219 struct xsk_umem *umem; 220 int err; 221 222 if (!umem_area || !umem_ptr || !fill || !comp) 223 return -EFAULT; 224 if (!size && !xsk_page_aligned(umem_area)) 225 return -EINVAL; 226 227 umem = calloc(1, sizeof(*umem)); 228 if (!umem) 229 return -ENOMEM; 230 231 umem->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0); 232 if (umem->fd < 0) { 233 err = -errno; 234 goto out_umem_alloc; 235 } 236 237 umem->umem_area = umem_area; 238 INIT_LIST_HEAD(&umem->ctx_list); 239 xsk_set_umem_config(&umem->config, usr_config); 240 241 memset(&mr, 0, sizeof(mr)); 242 mr.addr = (uintptr_t)umem_area; 243 mr.len = size; 244 mr.chunk_size = umem->config.frame_size; 245 mr.headroom = umem->config.frame_headroom; 246 mr.flags = umem->config.flags; 247 248 err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)); 249 if (err) { 250 err = -errno; 251 goto out_socket; 252 } 253 254 err = xsk_create_umem_rings(umem, umem->fd, fill, comp); 255 if (err) 256 goto out_socket; 257 258 umem->fill_save = fill; 259 umem->comp_save = comp; 260 *umem_ptr = umem; 261 return 0; 262 263 out_socket: 264 close(umem->fd); 265 out_umem_alloc: 266 free(umem); 267 return err; 268 } 269 270 bool xsk_is_in_mode(u32 ifindex, int mode) 271 { 272 LIBBPF_OPTS(bpf_xdp_query_opts, opts); 273 int ret; 274 275 ret = bpf_xdp_query(ifindex, mode, &opts); 276 if (ret) { 277 printf("XDP mode query returned error %s\n", strerror(errno)); 278 return false; 279 } 280 281 if (mode == XDP_FLAGS_DRV_MODE) 282 return opts.attach_mode == XDP_ATTACHED_DRV; 283 else if (mode == XDP_FLAGS_SKB_MODE) 284 return opts.attach_mode == XDP_ATTACHED_SKB; 285 286 return false; 287 } 288 289 int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags) 290 { 291 int prog_fd; 292 293 prog_fd = bpf_program__fd(prog); 294 return bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL); 295 } 296 297 void xsk_detach_xdp_program(int ifindex, u32 xdp_flags) 298 { 299 bpf_xdp_detach(ifindex, xdp_flags, NULL); 300 } 301 302 void xsk_clear_xskmap(struct bpf_map *map) 303 { 304 u32 index = 0; 305 int map_fd; 306 307 map_fd = bpf_map__fd(map); 308 bpf_map_delete_elem(map_fd, &index); 309 } 310 311 int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk) 312 { 313 int map_fd, sock_fd; 314 u32 index = 0; 315 316 map_fd = bpf_map__fd(map); 317 sock_fd = xsk_socket__fd(xsk); 318 319 return bpf_map_update_elem(map_fd, &index, &sock_fd, 0); 320 } 321 322 static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex, 323 __u32 queue_id) 324 { 325 struct xsk_ctx *ctx; 326 327 if (list_empty(&umem->ctx_list)) 328 return NULL; 329 330 list_for_each_entry(ctx, &umem->ctx_list, list) { 331 if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) { 332 ctx->refcount++; 333 return ctx; 334 } 335 } 336 337 return NULL; 338 } 339 340 static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap) 341 { 342 struct xsk_umem *umem = ctx->umem; 343 struct xdp_mmap_offsets off; 344 int err; 345 346 if (--ctx->refcount) 347 return; 348 349 if (!unmap) 350 goto out_free; 351 352 err = xsk_get_mmap_offsets(umem->fd, &off); 353 if (err) 354 goto out_free; 355 356 munmap(ctx->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size * 357 sizeof(__u64)); 358 munmap(ctx->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size * 359 sizeof(__u64)); 360 361 out_free: 362 list_del(&ctx->list); 363 free(ctx); 364 } 365 366 static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, 367 struct xsk_umem *umem, int ifindex, 368 __u32 queue_id, 369 struct xsk_ring_prod *fill, 370 struct xsk_ring_cons *comp) 371 { 372 struct xsk_ctx *ctx; 373 int err; 374 375 ctx = calloc(1, sizeof(*ctx)); 376 if (!ctx) 377 return NULL; 378 379 if (!umem->fill_save) { 380 err = xsk_create_umem_rings(umem, xsk->fd, fill, comp); 381 if (err) { 382 free(ctx); 383 return NULL; 384 } 385 } else if (umem->fill_save != fill || umem->comp_save != comp) { 386 /* Copy over rings to new structs. */ 387 memcpy(fill, umem->fill_save, sizeof(*fill)); 388 memcpy(comp, umem->comp_save, sizeof(*comp)); 389 } 390 391 ctx->ifindex = ifindex; 392 ctx->refcount = 1; 393 ctx->umem = umem; 394 ctx->queue_id = queue_id; 395 396 ctx->fill = fill; 397 ctx->comp = comp; 398 list_add(&ctx->list, &umem->ctx_list); 399 return ctx; 400 } 401 402 int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, 403 int ifindex, 404 __u32 queue_id, struct xsk_umem *umem, 405 struct xsk_ring_cons *rx, 406 struct xsk_ring_prod *tx, 407 struct xsk_ring_prod *fill, 408 struct xsk_ring_cons *comp, 409 const struct xsk_socket_config *usr_config) 410 { 411 bool unmap, rx_setup_done = false, tx_setup_done = false; 412 void *rx_map = NULL, *tx_map = NULL; 413 struct sockaddr_xdp sxdp = {}; 414 struct xdp_mmap_offsets off; 415 struct xsk_socket *xsk; 416 struct xsk_ctx *ctx; 417 int err; 418 419 if (!umem || !xsk_ptr || !(rx || tx)) 420 return -EFAULT; 421 422 unmap = umem->fill_save != fill; 423 424 xsk = calloc(1, sizeof(*xsk)); 425 if (!xsk) 426 return -ENOMEM; 427 428 err = xsk_set_xdp_socket_config(&xsk->config, usr_config); 429 if (err) 430 goto out_xsk_alloc; 431 432 if (umem->refcount++ > 0) { 433 xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0); 434 if (xsk->fd < 0) { 435 err = -errno; 436 goto out_xsk_alloc; 437 } 438 } else { 439 xsk->fd = umem->fd; 440 rx_setup_done = umem->rx_ring_setup_done; 441 tx_setup_done = umem->tx_ring_setup_done; 442 } 443 444 ctx = xsk_get_ctx(umem, ifindex, queue_id); 445 if (!ctx) { 446 if (!fill || !comp) { 447 err = -EFAULT; 448 goto out_socket; 449 } 450 451 ctx = xsk_create_ctx(xsk, umem, ifindex, queue_id, fill, comp); 452 if (!ctx) { 453 err = -ENOMEM; 454 goto out_socket; 455 } 456 } 457 xsk->ctx = ctx; 458 459 if (rx && !rx_setup_done) { 460 err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, 461 &xsk->config.rx_size, 462 sizeof(xsk->config.rx_size)); 463 if (err) { 464 err = -errno; 465 goto out_put_ctx; 466 } 467 if (xsk->fd == umem->fd) 468 umem->rx_ring_setup_done = true; 469 } 470 if (tx && !tx_setup_done) { 471 err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING, 472 &xsk->config.tx_size, 473 sizeof(xsk->config.tx_size)); 474 if (err) { 475 err = -errno; 476 goto out_put_ctx; 477 } 478 if (xsk->fd == umem->fd) 479 umem->tx_ring_setup_done = true; 480 } 481 482 err = xsk_get_mmap_offsets(xsk->fd, &off); 483 if (err) { 484 err = -errno; 485 goto out_put_ctx; 486 } 487 488 if (rx) { 489 rx_map = mmap(NULL, off.rx.desc + 490 xsk->config.rx_size * sizeof(struct xdp_desc), 491 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, 492 xsk->fd, XDP_PGOFF_RX_RING); 493 if (rx_map == MAP_FAILED) { 494 err = -errno; 495 goto out_put_ctx; 496 } 497 498 rx->mask = xsk->config.rx_size - 1; 499 rx->size = xsk->config.rx_size; 500 rx->producer = rx_map + off.rx.producer; 501 rx->consumer = rx_map + off.rx.consumer; 502 rx->flags = rx_map + off.rx.flags; 503 rx->ring = rx_map + off.rx.desc; 504 rx->cached_prod = *rx->producer; 505 rx->cached_cons = *rx->consumer; 506 } 507 xsk->rx = rx; 508 509 if (tx) { 510 tx_map = mmap(NULL, off.tx.desc + 511 xsk->config.tx_size * sizeof(struct xdp_desc), 512 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, 513 xsk->fd, XDP_PGOFF_TX_RING); 514 if (tx_map == MAP_FAILED) { 515 err = -errno; 516 goto out_mmap_rx; 517 } 518 519 tx->mask = xsk->config.tx_size - 1; 520 tx->size = xsk->config.tx_size; 521 tx->producer = tx_map + off.tx.producer; 522 tx->consumer = tx_map + off.tx.consumer; 523 tx->flags = tx_map + off.tx.flags; 524 tx->ring = tx_map + off.tx.desc; 525 tx->cached_prod = *tx->producer; 526 /* cached_cons is r->size bigger than the real consumer pointer 527 * See xsk_prod_nb_free 528 */ 529 tx->cached_cons = *tx->consumer + xsk->config.tx_size; 530 } 531 xsk->tx = tx; 532 533 sxdp.sxdp_family = PF_XDP; 534 sxdp.sxdp_ifindex = ctx->ifindex; 535 sxdp.sxdp_queue_id = ctx->queue_id; 536 if (umem->refcount > 1) { 537 sxdp.sxdp_flags |= XDP_SHARED_UMEM; 538 sxdp.sxdp_shared_umem_fd = umem->fd; 539 } else { 540 sxdp.sxdp_flags = xsk->config.bind_flags; 541 } 542 543 err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp)); 544 if (err) { 545 err = -errno; 546 goto out_mmap_tx; 547 } 548 549 *xsk_ptr = xsk; 550 umem->fill_save = NULL; 551 umem->comp_save = NULL; 552 return 0; 553 554 out_mmap_tx: 555 if (tx) 556 munmap(tx_map, off.tx.desc + 557 xsk->config.tx_size * sizeof(struct xdp_desc)); 558 out_mmap_rx: 559 if (rx) 560 munmap(rx_map, off.rx.desc + 561 xsk->config.rx_size * sizeof(struct xdp_desc)); 562 out_put_ctx: 563 xsk_put_ctx(ctx, unmap); 564 out_socket: 565 if (--umem->refcount) 566 close(xsk->fd); 567 out_xsk_alloc: 568 free(xsk); 569 return err; 570 } 571 572 int xsk_socket__create(struct xsk_socket **xsk_ptr, int ifindex, 573 __u32 queue_id, struct xsk_umem *umem, 574 struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, 575 const struct xsk_socket_config *usr_config) 576 { 577 if (!umem) 578 return -EFAULT; 579 580 return xsk_socket__create_shared(xsk_ptr, ifindex, queue_id, umem, 581 rx, tx, umem->fill_save, 582 umem->comp_save, usr_config); 583 } 584 585 int xsk_umem__delete(struct xsk_umem *umem) 586 { 587 struct xdp_mmap_offsets off; 588 int err; 589 590 if (!umem) 591 return 0; 592 593 if (umem->refcount) 594 return -EBUSY; 595 596 err = xsk_get_mmap_offsets(umem->fd, &off); 597 if (!err && umem->fill_save && umem->comp_save) { 598 munmap(umem->fill_save->ring - off.fr.desc, 599 off.fr.desc + umem->config.fill_size * sizeof(__u64)); 600 munmap(umem->comp_save->ring - off.cr.desc, 601 off.cr.desc + umem->config.comp_size * sizeof(__u64)); 602 } 603 604 close(umem->fd); 605 free(umem); 606 607 return 0; 608 } 609 610 void xsk_socket__delete(struct xsk_socket *xsk) 611 { 612 size_t desc_sz = sizeof(struct xdp_desc); 613 struct xdp_mmap_offsets off; 614 struct xsk_umem *umem; 615 struct xsk_ctx *ctx; 616 int err; 617 618 if (!xsk) 619 return; 620 621 ctx = xsk->ctx; 622 umem = ctx->umem; 623 624 xsk_put_ctx(ctx, true); 625 626 err = xsk_get_mmap_offsets(xsk->fd, &off); 627 if (!err) { 628 if (xsk->rx) { 629 munmap(xsk->rx->ring - off.rx.desc, 630 off.rx.desc + xsk->config.rx_size * desc_sz); 631 } 632 if (xsk->tx) { 633 munmap(xsk->tx->ring - off.tx.desc, 634 off.tx.desc + xsk->config.tx_size * desc_sz); 635 } 636 } 637 638 umem->refcount--; 639 /* Do not close an fd that also has an associated umem connected 640 * to it. 641 */ 642 if (xsk->fd != umem->fd) 643 close(xsk->fd); 644 free(xsk); 645 } 646