1 // SPDX-License-Identifier: GPL-2.0 2 #include <test_progs.h> 3 #include <network_helpers.h> 4 #include "xdp_metadata.skel.h" 5 #include "xdp_metadata2.skel.h" 6 #include "xdp_metadata.h" 7 #include "xsk.h" 8 9 #include <bpf/btf.h> 10 #include <linux/errqueue.h> 11 #include <linux/if_link.h> 12 #include <linux/net_tstamp.h> 13 #include <linux/udp.h> 14 #include <sys/mman.h> 15 #include <net/if.h> 16 #include <poll.h> 17 18 #define TX_NAME "veTX" 19 #define RX_NAME "veRX" 20 21 #define UDP_PAYLOAD_BYTES 4 22 23 #define AF_XDP_SOURCE_PORT 1234 24 #define AF_XDP_CONSUMER_PORT 8080 25 26 #define UMEM_NUM 16 27 #define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE 28 #define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM) 29 #define XDP_FLAGS XDP_FLAGS_DRV_MODE 30 #define QUEUE_ID 0 31 32 #define TX_ADDR "10.0.0.1" 33 #define RX_ADDR "10.0.0.2" 34 #define PREFIX_LEN "8" 35 #define FAMILY AF_INET 36 37 struct xsk { 38 void *umem_area; 39 struct xsk_umem *umem; 40 struct xsk_ring_prod fill; 41 struct xsk_ring_cons comp; 42 struct xsk_ring_prod tx; 43 struct xsk_ring_cons rx; 44 struct xsk_socket *socket; 45 }; 46 47 static int open_xsk(int ifindex, struct xsk *xsk) 48 { 49 int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; 50 const struct xsk_socket_config socket_config = { 51 .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 52 .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 53 .bind_flags = XDP_COPY, 54 }; 55 const struct xsk_umem_config umem_config = { 56 .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 57 .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, 58 .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, 59 .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG, 60 }; 61 __u32 idx; 62 u64 addr; 63 int ret; 64 int i; 65 66 xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); 67 if (!ASSERT_NEQ(xsk->umem_area, MAP_FAILED, "mmap")) 68 return -1; 69 70 ret = xsk_umem__create(&xsk->umem, 71 xsk->umem_area, UMEM_SIZE, 72 &xsk->fill, 73 &xsk->comp, 74 &umem_config); 75 if (!ASSERT_OK(ret, "xsk_umem__create")) 76 return ret; 77 78 ret = xsk_socket__create(&xsk->socket, ifindex, QUEUE_ID, 79 xsk->umem, 80 &xsk->rx, 81 &xsk->tx, 82 &socket_config); 83 if (!ASSERT_OK(ret, "xsk_socket__create")) 84 return ret; 85 86 /* First half of umem is for TX. This way address matches 1-to-1 87 * to the completion queue index. 88 */ 89 90 for (i = 0; i < UMEM_NUM / 2; i++) { 91 addr = i * UMEM_FRAME_SIZE; 92 printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr); 93 } 94 95 /* Second half of umem is for RX. */ 96 97 ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx); 98 if (!ASSERT_EQ(UMEM_NUM / 2, ret, "xsk_ring_prod__reserve")) 99 return ret; 100 if (!ASSERT_EQ(idx, 0, "fill idx != 0")) 101 return -1; 102 103 for (i = 0; i < UMEM_NUM / 2; i++) { 104 addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE; 105 printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr); 106 *xsk_ring_prod__fill_addr(&xsk->fill, i) = addr; 107 } 108 xsk_ring_prod__submit(&xsk->fill, ret); 109 110 return 0; 111 } 112 113 static void close_xsk(struct xsk *xsk) 114 { 115 if (xsk->umem) 116 xsk_umem__delete(xsk->umem); 117 if (xsk->socket) 118 xsk_socket__delete(xsk->socket); 119 munmap(xsk->umem_area, UMEM_SIZE); 120 } 121 122 static void ip_csum(struct iphdr *iph) 123 { 124 __u32 sum = 0; 125 __u16 *p; 126 int i; 127 128 iph->check = 0; 129 p = (void *)iph; 130 for (i = 0; i < sizeof(*iph) / sizeof(*p); i++) 131 sum += p[i]; 132 133 while (sum >> 16) 134 sum = (sum & 0xffff) + (sum >> 16); 135 136 iph->check = ~sum; 137 } 138 139 static int generate_packet(struct xsk *xsk, __u16 dst_port) 140 { 141 struct xdp_desc *tx_desc; 142 struct udphdr *udph; 143 struct ethhdr *eth; 144 struct iphdr *iph; 145 void *data; 146 __u32 idx; 147 int ret; 148 149 ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx); 150 if (!ASSERT_EQ(ret, 1, "xsk_ring_prod__reserve")) 151 return -1; 152 153 tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx); 154 tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE; 155 printf("%p: tx_desc[%u]->addr=%llx\n", xsk, idx, tx_desc->addr); 156 data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr); 157 158 eth = data; 159 iph = (void *)(eth + 1); 160 udph = (void *)(iph + 1); 161 162 memcpy(eth->h_dest, "\x00\x00\x00\x00\x00\x02", ETH_ALEN); 163 memcpy(eth->h_source, "\x00\x00\x00\x00\x00\x01", ETH_ALEN); 164 eth->h_proto = htons(ETH_P_IP); 165 166 iph->version = 0x4; 167 iph->ihl = 0x5; 168 iph->tos = 0x9; 169 iph->tot_len = htons(sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES); 170 iph->id = 0; 171 iph->frag_off = 0; 172 iph->ttl = 0; 173 iph->protocol = IPPROTO_UDP; 174 ASSERT_EQ(inet_pton(FAMILY, TX_ADDR, &iph->saddr), 1, "inet_pton(TX_ADDR)"); 175 ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)"); 176 ip_csum(iph); 177 178 udph->source = htons(AF_XDP_SOURCE_PORT); 179 udph->dest = htons(dst_port); 180 udph->len = htons(sizeof(*udph) + UDP_PAYLOAD_BYTES); 181 udph->check = 0; 182 183 memset(udph + 1, 0xAA, UDP_PAYLOAD_BYTES); 184 185 tx_desc->len = sizeof(*eth) + sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES; 186 xsk_ring_prod__submit(&xsk->tx, 1); 187 188 ret = sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0); 189 if (!ASSERT_GE(ret, 0, "sendto")) 190 return ret; 191 192 return 0; 193 } 194 195 static void complete_tx(struct xsk *xsk) 196 { 197 __u32 idx; 198 __u64 addr; 199 200 if (ASSERT_EQ(xsk_ring_cons__peek(&xsk->comp, 1, &idx), 1, "xsk_ring_cons__peek")) { 201 addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx); 202 203 printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr); 204 xsk_ring_cons__release(&xsk->comp, 1); 205 } 206 } 207 208 static void refill_rx(struct xsk *xsk, __u64 addr) 209 { 210 __u32 idx; 211 212 if (ASSERT_EQ(xsk_ring_prod__reserve(&xsk->fill, 1, &idx), 1, "xsk_ring_prod__reserve")) { 213 printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr); 214 *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr; 215 xsk_ring_prod__submit(&xsk->fill, 1); 216 } 217 } 218 219 static int verify_xsk_metadata(struct xsk *xsk) 220 { 221 const struct xdp_desc *rx_desc; 222 struct pollfd fds = {}; 223 struct xdp_meta *meta; 224 struct ethhdr *eth; 225 struct iphdr *iph; 226 __u64 comp_addr; 227 void *data; 228 __u64 addr; 229 __u32 idx; 230 int ret; 231 232 ret = recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL); 233 if (!ASSERT_EQ(ret, 0, "recvfrom")) 234 return -1; 235 236 fds.fd = xsk_socket__fd(xsk->socket); 237 fds.events = POLLIN; 238 239 ret = poll(&fds, 1, 1000); 240 if (!ASSERT_GT(ret, 0, "poll")) 241 return -1; 242 243 ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx); 244 if (!ASSERT_EQ(ret, 1, "xsk_ring_cons__peek")) 245 return -2; 246 247 rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx); 248 comp_addr = xsk_umem__extract_addr(rx_desc->addr); 249 addr = xsk_umem__add_offset_to_addr(rx_desc->addr); 250 printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n", 251 xsk, idx, rx_desc->addr, addr, comp_addr); 252 data = xsk_umem__get_data(xsk->umem_area, addr); 253 254 /* Make sure we got the packet offset correctly. */ 255 256 eth = data; 257 ASSERT_EQ(eth->h_proto, htons(ETH_P_IP), "eth->h_proto"); 258 iph = (void *)(eth + 1); 259 ASSERT_EQ((int)iph->version, 4, "iph->version"); 260 261 /* custom metadata */ 262 263 meta = data - sizeof(struct xdp_meta); 264 265 if (!ASSERT_NEQ(meta->rx_timestamp, 0, "rx_timestamp")) 266 return -1; 267 268 if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash")) 269 return -1; 270 271 ASSERT_EQ(meta->rx_hash_type, 0, "rx_hash_type"); 272 273 xsk_ring_cons__release(&xsk->rx, 1); 274 refill_rx(xsk, comp_addr); 275 276 return 0; 277 } 278 279 void test_xdp_metadata(void) 280 { 281 struct xdp_metadata2 *bpf_obj2 = NULL; 282 struct xdp_metadata *bpf_obj = NULL; 283 struct bpf_program *new_prog, *prog; 284 struct nstoken *tok = NULL; 285 __u32 queue_id = QUEUE_ID; 286 struct bpf_map *prog_arr; 287 struct xsk tx_xsk = {}; 288 struct xsk rx_xsk = {}; 289 __u32 val, key = 0; 290 int retries = 10; 291 int rx_ifindex; 292 int tx_ifindex; 293 int sock_fd; 294 int ret; 295 296 /* Setup new networking namespace, with a veth pair. */ 297 298 SYS(out, "ip netns add xdp_metadata"); 299 tok = open_netns("xdp_metadata"); 300 SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME 301 " type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1"); 302 SYS(out, "ip link set dev " TX_NAME " address 00:00:00:00:00:01"); 303 SYS(out, "ip link set dev " RX_NAME " address 00:00:00:00:00:02"); 304 SYS(out, "ip link set dev " TX_NAME " up"); 305 SYS(out, "ip link set dev " RX_NAME " up"); 306 SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME); 307 SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME); 308 309 rx_ifindex = if_nametoindex(RX_NAME); 310 tx_ifindex = if_nametoindex(TX_NAME); 311 312 /* Setup separate AF_XDP for TX and RX interfaces. */ 313 314 ret = open_xsk(tx_ifindex, &tx_xsk); 315 if (!ASSERT_OK(ret, "open_xsk(TX_NAME)")) 316 goto out; 317 318 ret = open_xsk(rx_ifindex, &rx_xsk); 319 if (!ASSERT_OK(ret, "open_xsk(RX_NAME)")) 320 goto out; 321 322 bpf_obj = xdp_metadata__open(); 323 if (!ASSERT_OK_PTR(bpf_obj, "open skeleton")) 324 goto out; 325 326 prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx"); 327 bpf_program__set_ifindex(prog, rx_ifindex); 328 bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY); 329 330 if (!ASSERT_OK(xdp_metadata__load(bpf_obj), "load skeleton")) 331 goto out; 332 333 /* Make sure we can't add dev-bound programs to prog maps. */ 334 prog_arr = bpf_object__find_map_by_name(bpf_obj->obj, "prog_arr"); 335 if (!ASSERT_OK_PTR(prog_arr, "no prog_arr map")) 336 goto out; 337 338 val = bpf_program__fd(prog); 339 if (!ASSERT_ERR(bpf_map__update_elem(prog_arr, &key, sizeof(key), 340 &val, sizeof(val), BPF_ANY), 341 "update prog_arr")) 342 goto out; 343 344 /* Attach BPF program to RX interface. */ 345 346 ret = bpf_xdp_attach(rx_ifindex, 347 bpf_program__fd(bpf_obj->progs.rx), 348 XDP_FLAGS, NULL); 349 if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) 350 goto out; 351 352 sock_fd = xsk_socket__fd(rx_xsk.socket); 353 ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0); 354 if (!ASSERT_GE(ret, 0, "bpf_map_update_elem")) 355 goto out; 356 357 /* Send packet destined to RX AF_XDP socket. */ 358 if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0, 359 "generate AF_XDP_CONSUMER_PORT")) 360 goto out; 361 362 /* Verify AF_XDP RX packet has proper metadata. */ 363 if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk), 0, 364 "verify_xsk_metadata")) 365 goto out; 366 367 complete_tx(&tx_xsk); 368 369 /* Make sure freplace correctly picks up original bound device 370 * and doesn't crash. 371 */ 372 373 bpf_obj2 = xdp_metadata2__open(); 374 if (!ASSERT_OK_PTR(bpf_obj2, "open skeleton")) 375 goto out; 376 377 new_prog = bpf_object__find_program_by_name(bpf_obj2->obj, "freplace_rx"); 378 bpf_program__set_attach_target(new_prog, bpf_program__fd(prog), "rx"); 379 380 if (!ASSERT_OK(xdp_metadata2__load(bpf_obj2), "load freplace skeleton")) 381 goto out; 382 383 if (!ASSERT_OK(xdp_metadata2__attach(bpf_obj2), "attach freplace")) 384 goto out; 385 386 /* Send packet to trigger . */ 387 if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0, 388 "generate freplace packet")) 389 goto out; 390 391 while (!retries--) { 392 if (bpf_obj2->bss->called) 393 break; 394 usleep(10); 395 } 396 ASSERT_GT(bpf_obj2->bss->called, 0, "not called"); 397 398 out: 399 close_xsk(&rx_xsk); 400 close_xsk(&tx_xsk); 401 xdp_metadata2__destroy(bpf_obj2); 402 xdp_metadata__destroy(bpf_obj); 403 if (tok) 404 close_netns(tok); 405 SYS_NOFAIL("ip netns del xdp_metadata"); 406 } 407