1 // SPDX-License-Identifier: GPL-2.0 2 3 /* Reference program for verifying XDP metadata on real HW. Functional test 4 * only, doesn't test the performance. 5 * 6 * RX: 7 * - UDP 9091 packets are diverted into AF_XDP 8 * - Metadata verified: 9 * - rx_timestamp 10 * - rx_hash 11 * 12 * TX: 13 * - TBD 14 */ 15 16 #include <test_progs.h> 17 #include <network_helpers.h> 18 #include "xdp_hw_metadata.skel.h" 19 #include "xsk.h" 20 21 #include <error.h> 22 #include <linux/errqueue.h> 23 #include <linux/if_link.h> 24 #include <linux/net_tstamp.h> 25 #include <linux/udp.h> 26 #include <linux/sockios.h> 27 #include <sys/mman.h> 28 #include <net/if.h> 29 #include <poll.h> 30 31 #include "xdp_metadata.h" 32 33 #define UMEM_NUM 16 34 #define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE 35 #define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM) 36 #define XDP_FLAGS (XDP_FLAGS_DRV_MODE | XDP_FLAGS_REPLACE) 37 38 struct xsk { 39 void *umem_area; 40 struct xsk_umem *umem; 41 struct xsk_ring_prod fill; 42 struct xsk_ring_cons comp; 43 struct xsk_ring_prod tx; 44 struct xsk_ring_cons rx; 45 struct xsk_socket *socket; 46 }; 47 48 struct xdp_hw_metadata *bpf_obj; 49 struct xsk *rx_xsk; 50 const char *ifname; 51 int ifindex; 52 int rxq; 53 54 void test__fail(void) { /* for network_helpers.c */ } 55 56 static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id) 57 { 58 int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; 59 const struct xsk_socket_config socket_config = { 60 .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 61 .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 62 .bind_flags = XDP_COPY, 63 }; 64 const struct xsk_umem_config umem_config = { 65 .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 66 .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, 67 .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, 68 .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG, 69 }; 70 __u32 idx; 71 u64 addr; 72 int ret; 73 int i; 74 75 xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); 76 if (xsk->umem_area == MAP_FAILED) 77 return -ENOMEM; 78 79 ret = xsk_umem__create(&xsk->umem, 80 xsk->umem_area, UMEM_SIZE, 81 &xsk->fill, 82 &xsk->comp, 83 &umem_config); 84 if (ret) 85 return ret; 86 87 ret = xsk_socket__create(&xsk->socket, ifindex, queue_id, 88 xsk->umem, 89 &xsk->rx, 90 &xsk->tx, 91 &socket_config); 92 if (ret) 93 return ret; 94 95 /* First half of umem is for TX. This way address matches 1-to-1 96 * to the completion queue index. 97 */ 98 99 for (i = 0; i < UMEM_NUM / 2; i++) { 100 addr = i * UMEM_FRAME_SIZE; 101 printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr); 102 } 103 104 /* Second half of umem is for RX. */ 105 106 ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx); 107 for (i = 0; i < UMEM_NUM / 2; i++) { 108 addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE; 109 printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr); 110 *xsk_ring_prod__fill_addr(&xsk->fill, i) = addr; 111 } 112 xsk_ring_prod__submit(&xsk->fill, ret); 113 114 return 0; 115 } 116 117 static void close_xsk(struct xsk *xsk) 118 { 119 if (xsk->umem) 120 xsk_umem__delete(xsk->umem); 121 if (xsk->socket) 122 xsk_socket__delete(xsk->socket); 123 munmap(xsk->umem_area, UMEM_SIZE); 124 } 125 126 static void refill_rx(struct xsk *xsk, __u64 addr) 127 { 128 __u32 idx; 129 130 if (xsk_ring_prod__reserve(&xsk->fill, 1, &idx) == 1) { 131 printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr); 132 *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr; 133 xsk_ring_prod__submit(&xsk->fill, 1); 134 } 135 } 136 137 static void verify_xdp_metadata(void *data) 138 { 139 struct xdp_meta *meta; 140 141 meta = data - sizeof(*meta); 142 143 printf("rx_timestamp: %llu\n", meta->rx_timestamp); 144 if (meta->rx_hash_err < 0) 145 printf("No rx_hash err=%d\n", meta->rx_hash_err); 146 else 147 printf("rx_hash: 0x%X with RSS type:0x%X\n", 148 meta->rx_hash, meta->rx_hash_type); 149 } 150 151 static void verify_skb_metadata(int fd) 152 { 153 char cmsg_buf[1024]; 154 char packet_buf[128]; 155 156 struct scm_timestamping *ts; 157 struct iovec packet_iov; 158 struct cmsghdr *cmsg; 159 struct msghdr hdr; 160 161 memset(&hdr, 0, sizeof(hdr)); 162 hdr.msg_iov = &packet_iov; 163 hdr.msg_iovlen = 1; 164 packet_iov.iov_base = packet_buf; 165 packet_iov.iov_len = sizeof(packet_buf); 166 167 hdr.msg_control = cmsg_buf; 168 hdr.msg_controllen = sizeof(cmsg_buf); 169 170 if (recvmsg(fd, &hdr, 0) < 0) 171 error(1, errno, "recvmsg"); 172 173 for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL; 174 cmsg = CMSG_NXTHDR(&hdr, cmsg)) { 175 176 if (cmsg->cmsg_level != SOL_SOCKET) 177 continue; 178 179 switch (cmsg->cmsg_type) { 180 case SCM_TIMESTAMPING: 181 ts = (struct scm_timestamping *)CMSG_DATA(cmsg); 182 if (ts->ts[2].tv_sec || ts->ts[2].tv_nsec) { 183 printf("found skb hwtstamp = %lu.%lu\n", 184 ts->ts[2].tv_sec, ts->ts[2].tv_nsec); 185 return; 186 } 187 break; 188 default: 189 break; 190 } 191 } 192 193 printf("skb hwtstamp is not found!\n"); 194 } 195 196 static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd) 197 { 198 const struct xdp_desc *rx_desc; 199 struct pollfd fds[rxq + 1]; 200 __u64 comp_addr; 201 __u64 addr; 202 __u32 idx; 203 int ret; 204 int i; 205 206 for (i = 0; i < rxq; i++) { 207 fds[i].fd = xsk_socket__fd(rx_xsk[i].socket); 208 fds[i].events = POLLIN; 209 fds[i].revents = 0; 210 } 211 212 fds[rxq].fd = server_fd; 213 fds[rxq].events = POLLIN; 214 fds[rxq].revents = 0; 215 216 while (true) { 217 errno = 0; 218 ret = poll(fds, rxq + 1, 1000); 219 printf("poll: %d (%d) skip=%llu fail=%llu redir=%llu\n", 220 ret, errno, bpf_obj->bss->pkts_skip, 221 bpf_obj->bss->pkts_fail, bpf_obj->bss->pkts_redir); 222 if (ret < 0) 223 break; 224 if (ret == 0) 225 continue; 226 227 if (fds[rxq].revents) 228 verify_skb_metadata(server_fd); 229 230 for (i = 0; i < rxq; i++) { 231 if (fds[i].revents == 0) 232 continue; 233 234 struct xsk *xsk = &rx_xsk[i]; 235 236 ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx); 237 printf("xsk_ring_cons__peek: %d\n", ret); 238 if (ret != 1) 239 continue; 240 241 rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx); 242 comp_addr = xsk_umem__extract_addr(rx_desc->addr); 243 addr = xsk_umem__add_offset_to_addr(rx_desc->addr); 244 printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n", 245 xsk, idx, rx_desc->addr, addr, comp_addr); 246 verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr)); 247 xsk_ring_cons__release(&xsk->rx, 1); 248 refill_rx(xsk, comp_addr); 249 } 250 } 251 252 return 0; 253 } 254 255 struct ethtool_channels { 256 __u32 cmd; 257 __u32 max_rx; 258 __u32 max_tx; 259 __u32 max_other; 260 __u32 max_combined; 261 __u32 rx_count; 262 __u32 tx_count; 263 __u32 other_count; 264 __u32 combined_count; 265 }; 266 267 #define ETHTOOL_GCHANNELS 0x0000003c /* Get no of channels */ 268 269 static int rxq_num(const char *ifname) 270 { 271 struct ethtool_channels ch = { 272 .cmd = ETHTOOL_GCHANNELS, 273 }; 274 275 struct ifreq ifr = { 276 .ifr_data = (void *)&ch, 277 }; 278 strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1); 279 int fd, ret; 280 281 fd = socket(AF_UNIX, SOCK_DGRAM, 0); 282 if (fd < 0) 283 error(1, errno, "socket"); 284 285 ret = ioctl(fd, SIOCETHTOOL, &ifr); 286 if (ret < 0) 287 error(1, errno, "ioctl(SIOCETHTOOL)"); 288 289 close(fd); 290 291 return ch.rx_count + ch.combined_count; 292 } 293 294 static void hwtstamp_ioctl(int op, const char *ifname, struct hwtstamp_config *cfg) 295 { 296 struct ifreq ifr = { 297 .ifr_data = (void *)cfg, 298 }; 299 strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1); 300 int fd, ret; 301 302 fd = socket(AF_UNIX, SOCK_DGRAM, 0); 303 if (fd < 0) 304 error(1, errno, "socket"); 305 306 ret = ioctl(fd, op, &ifr); 307 if (ret < 0) 308 error(1, errno, "ioctl(%d)", op); 309 310 close(fd); 311 } 312 313 static struct hwtstamp_config saved_hwtstamp_cfg; 314 static const char *saved_hwtstamp_ifname; 315 316 static void hwtstamp_restore(void) 317 { 318 hwtstamp_ioctl(SIOCSHWTSTAMP, saved_hwtstamp_ifname, &saved_hwtstamp_cfg); 319 } 320 321 static void hwtstamp_enable(const char *ifname) 322 { 323 struct hwtstamp_config cfg = { 324 .rx_filter = HWTSTAMP_FILTER_ALL, 325 }; 326 327 hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg); 328 saved_hwtstamp_ifname = strdup(ifname); 329 atexit(hwtstamp_restore); 330 331 hwtstamp_ioctl(SIOCSHWTSTAMP, ifname, &cfg); 332 } 333 334 static void cleanup(void) 335 { 336 LIBBPF_OPTS(bpf_xdp_attach_opts, opts); 337 int ret; 338 int i; 339 340 if (bpf_obj) { 341 opts.old_prog_fd = bpf_program__fd(bpf_obj->progs.rx); 342 if (opts.old_prog_fd >= 0) { 343 printf("detaching bpf program....\n"); 344 ret = bpf_xdp_detach(ifindex, XDP_FLAGS, &opts); 345 if (ret) 346 printf("failed to detach XDP program: %d\n", ret); 347 } 348 } 349 350 for (i = 0; i < rxq; i++) 351 close_xsk(&rx_xsk[i]); 352 353 if (bpf_obj) 354 xdp_hw_metadata__destroy(bpf_obj); 355 } 356 357 static void handle_signal(int sig) 358 { 359 /* interrupting poll() is all we need */ 360 } 361 362 static void timestamping_enable(int fd, int val) 363 { 364 int ret; 365 366 ret = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val)); 367 if (ret < 0) 368 error(1, errno, "setsockopt(SO_TIMESTAMPING)"); 369 } 370 371 int main(int argc, char *argv[]) 372 { 373 int server_fd = -1; 374 int ret; 375 int i; 376 377 struct bpf_program *prog; 378 379 if (argc != 2) { 380 fprintf(stderr, "pass device name\n"); 381 return -1; 382 } 383 384 ifname = argv[1]; 385 ifindex = if_nametoindex(ifname); 386 rxq = rxq_num(ifname); 387 388 printf("rxq: %d\n", rxq); 389 390 hwtstamp_enable(ifname); 391 392 rx_xsk = malloc(sizeof(struct xsk) * rxq); 393 if (!rx_xsk) 394 error(1, ENOMEM, "malloc"); 395 396 for (i = 0; i < rxq; i++) { 397 printf("open_xsk(%s, %p, %d)\n", ifname, &rx_xsk[i], i); 398 ret = open_xsk(ifindex, &rx_xsk[i], i); 399 if (ret) 400 error(1, -ret, "open_xsk"); 401 402 printf("xsk_socket__fd() -> %d\n", xsk_socket__fd(rx_xsk[i].socket)); 403 } 404 405 printf("open bpf program...\n"); 406 bpf_obj = xdp_hw_metadata__open(); 407 if (libbpf_get_error(bpf_obj)) 408 error(1, libbpf_get_error(bpf_obj), "xdp_hw_metadata__open"); 409 410 prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx"); 411 bpf_program__set_ifindex(prog, ifindex); 412 bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY); 413 414 printf("load bpf program...\n"); 415 ret = xdp_hw_metadata__load(bpf_obj); 416 if (ret) 417 error(1, -ret, "xdp_hw_metadata__load"); 418 419 printf("prepare skb endpoint...\n"); 420 server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 9092, 1000); 421 if (server_fd < 0) 422 error(1, errno, "start_server"); 423 timestamping_enable(server_fd, 424 SOF_TIMESTAMPING_SOFTWARE | 425 SOF_TIMESTAMPING_RAW_HARDWARE); 426 427 printf("prepare xsk map...\n"); 428 for (i = 0; i < rxq; i++) { 429 int sock_fd = xsk_socket__fd(rx_xsk[i].socket); 430 __u32 queue_id = i; 431 432 printf("map[%d] = %d\n", queue_id, sock_fd); 433 ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0); 434 if (ret) 435 error(1, -ret, "bpf_map_update_elem"); 436 } 437 438 printf("attach bpf program...\n"); 439 ret = bpf_xdp_attach(ifindex, 440 bpf_program__fd(bpf_obj->progs.rx), 441 XDP_FLAGS, NULL); 442 if (ret) 443 error(1, -ret, "bpf_xdp_attach"); 444 445 signal(SIGINT, handle_signal); 446 ret = verify_metadata(rx_xsk, rxq, server_fd); 447 close(server_fd); 448 cleanup(); 449 if (ret) 450 error(1, -ret, "verify_metadata"); 451 } 452