1 // SPDX-License-Identifier: GPL-2.0 2 3 /* Reference program for verifying XDP metadata on real HW. Functional test 4 * only, doesn't test the performance. 5 * 6 * RX: 7 * - UDP 9091 packets are diverted into AF_XDP 8 * - Metadata verified: 9 * - rx_timestamp 10 * - rx_hash 11 * 12 * TX: 13 * - TBD 14 */ 15 16 #include <test_progs.h> 17 #include <network_helpers.h> 18 #include "xdp_hw_metadata.skel.h" 19 #include "xsk.h" 20 21 #include <error.h> 22 #include <linux/errqueue.h> 23 #include <linux/if_link.h> 24 #include <linux/net_tstamp.h> 25 #include <linux/udp.h> 26 #include <linux/sockios.h> 27 #include <sys/mman.h> 28 #include <net/if.h> 29 #include <poll.h> 30 #include <time.h> 31 32 #include "xdp_metadata.h" 33 34 #define UMEM_NUM 16 35 #define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE 36 #define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM) 37 #define XDP_FLAGS (XDP_FLAGS_DRV_MODE | XDP_FLAGS_REPLACE) 38 39 struct xsk { 40 void *umem_area; 41 struct xsk_umem *umem; 42 struct xsk_ring_prod fill; 43 struct xsk_ring_cons comp; 44 struct xsk_ring_prod tx; 45 struct xsk_ring_cons rx; 46 struct xsk_socket *socket; 47 }; 48 49 struct xdp_hw_metadata *bpf_obj; 50 struct xsk *rx_xsk; 51 const char *ifname; 52 int ifindex; 53 int rxq; 54 55 void test__fail(void) { /* for network_helpers.c */ } 56 57 static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id) 58 { 59 int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; 60 const struct xsk_socket_config socket_config = { 61 .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 62 .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 63 .bind_flags = XDP_COPY, 64 }; 65 const struct xsk_umem_config umem_config = { 66 .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 67 .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, 68 .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, 69 .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG, 70 }; 71 __u32 idx = 0; 72 u64 addr; 73 int ret; 74 int i; 75 76 xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); 77 if (xsk->umem_area == MAP_FAILED) 78 return -ENOMEM; 79 80 ret = xsk_umem__create(&xsk->umem, 81 xsk->umem_area, UMEM_SIZE, 82 &xsk->fill, 83 &xsk->comp, 84 &umem_config); 85 if (ret) 86 return ret; 87 88 ret = xsk_socket__create(&xsk->socket, ifindex, queue_id, 89 xsk->umem, 90 &xsk->rx, 91 &xsk->tx, 92 &socket_config); 93 if (ret) 94 return ret; 95 96 /* First half of umem is for TX. This way address matches 1-to-1 97 * to the completion queue index. 98 */ 99 100 for (i = 0; i < UMEM_NUM / 2; i++) { 101 addr = i * UMEM_FRAME_SIZE; 102 printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr); 103 } 104 105 /* Second half of umem is for RX. */ 106 107 ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx); 108 for (i = 0; i < UMEM_NUM / 2; i++) { 109 addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE; 110 printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr); 111 *xsk_ring_prod__fill_addr(&xsk->fill, i) = addr; 112 } 113 xsk_ring_prod__submit(&xsk->fill, ret); 114 115 return 0; 116 } 117 118 static void close_xsk(struct xsk *xsk) 119 { 120 if (xsk->umem) 121 xsk_umem__delete(xsk->umem); 122 if (xsk->socket) 123 xsk_socket__delete(xsk->socket); 124 munmap(xsk->umem_area, UMEM_SIZE); 125 } 126 127 static void refill_rx(struct xsk *xsk, __u64 addr) 128 { 129 __u32 idx; 130 131 if (xsk_ring_prod__reserve(&xsk->fill, 1, &idx) == 1) { 132 printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr); 133 *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr; 134 xsk_ring_prod__submit(&xsk->fill, 1); 135 } 136 } 137 138 #define NANOSEC_PER_SEC 1000000000 /* 10^9 */ 139 static __u64 gettime(clockid_t clock_id) 140 { 141 struct timespec t; 142 int res; 143 144 /* See man clock_gettime(2) for type of clock_id's */ 145 res = clock_gettime(clock_id, &t); 146 147 if (res < 0) 148 error(res, errno, "Error with clock_gettime()"); 149 150 return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; 151 } 152 153 static void verify_xdp_metadata(void *data, clockid_t clock_id) 154 { 155 struct xdp_meta *meta; 156 157 meta = data - sizeof(*meta); 158 159 if (meta->rx_hash_err < 0) 160 printf("No rx_hash err=%d\n", meta->rx_hash_err); 161 else 162 printf("rx_hash: 0x%X with RSS type:0x%X\n", 163 meta->rx_hash, meta->rx_hash_type); 164 165 printf("rx_timestamp: %llu (sec:%0.4f)\n", meta->rx_timestamp, 166 (double)meta->rx_timestamp / NANOSEC_PER_SEC); 167 if (meta->rx_timestamp) { 168 __u64 usr_clock = gettime(clock_id); 169 __u64 xdp_clock = meta->xdp_timestamp; 170 __s64 delta_X = xdp_clock - meta->rx_timestamp; 171 __s64 delta_X2U = usr_clock - xdp_clock; 172 173 printf("XDP RX-time: %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n", 174 xdp_clock, (double)xdp_clock / NANOSEC_PER_SEC, 175 (double)delta_X / NANOSEC_PER_SEC, 176 (double)delta_X / 1000); 177 178 printf("AF_XDP time: %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n", 179 usr_clock, (double)usr_clock / NANOSEC_PER_SEC, 180 (double)delta_X2U / NANOSEC_PER_SEC, 181 (double)delta_X2U / 1000); 182 } 183 184 } 185 186 static void verify_skb_metadata(int fd) 187 { 188 char cmsg_buf[1024]; 189 char packet_buf[128]; 190 191 struct scm_timestamping *ts; 192 struct iovec packet_iov; 193 struct cmsghdr *cmsg; 194 struct msghdr hdr; 195 196 memset(&hdr, 0, sizeof(hdr)); 197 hdr.msg_iov = &packet_iov; 198 hdr.msg_iovlen = 1; 199 packet_iov.iov_base = packet_buf; 200 packet_iov.iov_len = sizeof(packet_buf); 201 202 hdr.msg_control = cmsg_buf; 203 hdr.msg_controllen = sizeof(cmsg_buf); 204 205 if (recvmsg(fd, &hdr, 0) < 0) 206 error(1, errno, "recvmsg"); 207 208 for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL; 209 cmsg = CMSG_NXTHDR(&hdr, cmsg)) { 210 211 if (cmsg->cmsg_level != SOL_SOCKET) 212 continue; 213 214 switch (cmsg->cmsg_type) { 215 case SCM_TIMESTAMPING: 216 ts = (struct scm_timestamping *)CMSG_DATA(cmsg); 217 if (ts->ts[2].tv_sec || ts->ts[2].tv_nsec) { 218 printf("found skb hwtstamp = %lu.%lu\n", 219 ts->ts[2].tv_sec, ts->ts[2].tv_nsec); 220 return; 221 } 222 break; 223 default: 224 break; 225 } 226 } 227 228 printf("skb hwtstamp is not found!\n"); 229 } 230 231 static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id) 232 { 233 const struct xdp_desc *rx_desc; 234 struct pollfd fds[rxq + 1]; 235 __u64 comp_addr; 236 __u64 addr; 237 __u32 idx; 238 int ret; 239 int i; 240 241 for (i = 0; i < rxq; i++) { 242 fds[i].fd = xsk_socket__fd(rx_xsk[i].socket); 243 fds[i].events = POLLIN; 244 fds[i].revents = 0; 245 } 246 247 fds[rxq].fd = server_fd; 248 fds[rxq].events = POLLIN; 249 fds[rxq].revents = 0; 250 251 while (true) { 252 errno = 0; 253 ret = poll(fds, rxq + 1, 1000); 254 printf("poll: %d (%d) skip=%llu fail=%llu redir=%llu\n", 255 ret, errno, bpf_obj->bss->pkts_skip, 256 bpf_obj->bss->pkts_fail, bpf_obj->bss->pkts_redir); 257 if (ret < 0) 258 break; 259 if (ret == 0) 260 continue; 261 262 if (fds[rxq].revents) 263 verify_skb_metadata(server_fd); 264 265 for (i = 0; i < rxq; i++) { 266 if (fds[i].revents == 0) 267 continue; 268 269 struct xsk *xsk = &rx_xsk[i]; 270 271 ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx); 272 printf("xsk_ring_cons__peek: %d\n", ret); 273 if (ret != 1) 274 continue; 275 276 rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx); 277 comp_addr = xsk_umem__extract_addr(rx_desc->addr); 278 addr = xsk_umem__add_offset_to_addr(rx_desc->addr); 279 printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n", 280 xsk, idx, rx_desc->addr, addr, comp_addr); 281 verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr), 282 clock_id); 283 xsk_ring_cons__release(&xsk->rx, 1); 284 refill_rx(xsk, comp_addr); 285 } 286 } 287 288 return 0; 289 } 290 291 struct ethtool_channels { 292 __u32 cmd; 293 __u32 max_rx; 294 __u32 max_tx; 295 __u32 max_other; 296 __u32 max_combined; 297 __u32 rx_count; 298 __u32 tx_count; 299 __u32 other_count; 300 __u32 combined_count; 301 }; 302 303 #define ETHTOOL_GCHANNELS 0x0000003c /* Get no of channels */ 304 305 static int rxq_num(const char *ifname) 306 { 307 struct ethtool_channels ch = { 308 .cmd = ETHTOOL_GCHANNELS, 309 }; 310 311 struct ifreq ifr = { 312 .ifr_data = (void *)&ch, 313 }; 314 strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1); 315 int fd, ret; 316 317 fd = socket(AF_UNIX, SOCK_DGRAM, 0); 318 if (fd < 0) 319 error(1, errno, "socket"); 320 321 ret = ioctl(fd, SIOCETHTOOL, &ifr); 322 if (ret < 0) 323 error(1, errno, "ioctl(SIOCETHTOOL)"); 324 325 close(fd); 326 327 return ch.rx_count + ch.combined_count; 328 } 329 330 static void hwtstamp_ioctl(int op, const char *ifname, struct hwtstamp_config *cfg) 331 { 332 struct ifreq ifr = { 333 .ifr_data = (void *)cfg, 334 }; 335 strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1); 336 int fd, ret; 337 338 fd = socket(AF_UNIX, SOCK_DGRAM, 0); 339 if (fd < 0) 340 error(1, errno, "socket"); 341 342 ret = ioctl(fd, op, &ifr); 343 if (ret < 0) 344 error(1, errno, "ioctl(%d)", op); 345 346 close(fd); 347 } 348 349 static struct hwtstamp_config saved_hwtstamp_cfg; 350 static const char *saved_hwtstamp_ifname; 351 352 static void hwtstamp_restore(void) 353 { 354 hwtstamp_ioctl(SIOCSHWTSTAMP, saved_hwtstamp_ifname, &saved_hwtstamp_cfg); 355 } 356 357 static void hwtstamp_enable(const char *ifname) 358 { 359 struct hwtstamp_config cfg = { 360 .rx_filter = HWTSTAMP_FILTER_ALL, 361 }; 362 363 hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg); 364 saved_hwtstamp_ifname = strdup(ifname); 365 atexit(hwtstamp_restore); 366 367 hwtstamp_ioctl(SIOCSHWTSTAMP, ifname, &cfg); 368 } 369 370 static void cleanup(void) 371 { 372 LIBBPF_OPTS(bpf_xdp_attach_opts, opts); 373 int ret; 374 int i; 375 376 if (bpf_obj) { 377 opts.old_prog_fd = bpf_program__fd(bpf_obj->progs.rx); 378 if (opts.old_prog_fd >= 0) { 379 printf("detaching bpf program....\n"); 380 ret = bpf_xdp_detach(ifindex, XDP_FLAGS, &opts); 381 if (ret) 382 printf("failed to detach XDP program: %d\n", ret); 383 } 384 } 385 386 for (i = 0; i < rxq; i++) 387 close_xsk(&rx_xsk[i]); 388 389 if (bpf_obj) 390 xdp_hw_metadata__destroy(bpf_obj); 391 } 392 393 static void handle_signal(int sig) 394 { 395 /* interrupting poll() is all we need */ 396 } 397 398 static void timestamping_enable(int fd, int val) 399 { 400 int ret; 401 402 ret = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val)); 403 if (ret < 0) 404 error(1, errno, "setsockopt(SO_TIMESTAMPING)"); 405 } 406 407 int main(int argc, char *argv[]) 408 { 409 clockid_t clock_id = CLOCK_TAI; 410 int server_fd = -1; 411 int ret; 412 int i; 413 414 struct bpf_program *prog; 415 416 if (argc != 2) { 417 fprintf(stderr, "pass device name\n"); 418 return -1; 419 } 420 421 ifname = argv[1]; 422 ifindex = if_nametoindex(ifname); 423 rxq = rxq_num(ifname); 424 425 printf("rxq: %d\n", rxq); 426 427 hwtstamp_enable(ifname); 428 429 rx_xsk = malloc(sizeof(struct xsk) * rxq); 430 if (!rx_xsk) 431 error(1, ENOMEM, "malloc"); 432 433 for (i = 0; i < rxq; i++) { 434 printf("open_xsk(%s, %p, %d)\n", ifname, &rx_xsk[i], i); 435 ret = open_xsk(ifindex, &rx_xsk[i], i); 436 if (ret) 437 error(1, -ret, "open_xsk"); 438 439 printf("xsk_socket__fd() -> %d\n", xsk_socket__fd(rx_xsk[i].socket)); 440 } 441 442 printf("open bpf program...\n"); 443 bpf_obj = xdp_hw_metadata__open(); 444 if (libbpf_get_error(bpf_obj)) 445 error(1, libbpf_get_error(bpf_obj), "xdp_hw_metadata__open"); 446 447 prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx"); 448 bpf_program__set_ifindex(prog, ifindex); 449 bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY); 450 451 printf("load bpf program...\n"); 452 ret = xdp_hw_metadata__load(bpf_obj); 453 if (ret) 454 error(1, -ret, "xdp_hw_metadata__load"); 455 456 printf("prepare skb endpoint...\n"); 457 server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 9092, 1000); 458 if (server_fd < 0) 459 error(1, errno, "start_server"); 460 timestamping_enable(server_fd, 461 SOF_TIMESTAMPING_SOFTWARE | 462 SOF_TIMESTAMPING_RAW_HARDWARE); 463 464 printf("prepare xsk map...\n"); 465 for (i = 0; i < rxq; i++) { 466 int sock_fd = xsk_socket__fd(rx_xsk[i].socket); 467 __u32 queue_id = i; 468 469 printf("map[%d] = %d\n", queue_id, sock_fd); 470 ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0); 471 if (ret) 472 error(1, -ret, "bpf_map_update_elem"); 473 } 474 475 printf("attach bpf program...\n"); 476 ret = bpf_xdp_attach(ifindex, 477 bpf_program__fd(bpf_obj->progs.rx), 478 XDP_FLAGS, NULL); 479 if (ret) 480 error(1, -ret, "bpf_xdp_attach"); 481 482 signal(SIGINT, handle_signal); 483 ret = verify_metadata(rx_xsk, rxq, server_fd, clock_id); 484 close(server_fd); 485 cleanup(); 486 if (ret) 487 error(1, -ret, "verify_metadata"); 488 } 489