1 // SPDX-License-Identifier: GPL-2.0
2 #include <test_progs.h>
3 #include <network_helpers.h>
4 #include "xdp_metadata.skel.h"
5 #include "xdp_metadata2.skel.h"
6 #include "xdp_metadata.h"
7 #include "xsk.h"
8
9 #include <bpf/btf.h>
10 #include <linux/errqueue.h>
11 #include <linux/if_link.h>
12 #include <linux/net_tstamp.h>
13 #include <linux/udp.h>
14 #include <sys/mman.h>
15 #include <net/if.h>
16 #include <poll.h>
17
18 #define TX_NAME "veTX"
19 #define RX_NAME "veRX"
20
21 #define UDP_PAYLOAD_BYTES 4
22
23 #define AF_XDP_SOURCE_PORT 1234
24 #define AF_XDP_CONSUMER_PORT 8080
25
26 #define UMEM_NUM 16
27 #define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
28 #define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM)
29 #define XDP_FLAGS XDP_FLAGS_DRV_MODE
30 #define QUEUE_ID 0
31
32 #define TX_ADDR "10.0.0.1"
33 #define RX_ADDR "10.0.0.2"
34 #define PREFIX_LEN "8"
35 #define FAMILY AF_INET
36
37 struct xsk {
38 void *umem_area;
39 struct xsk_umem *umem;
40 struct xsk_ring_prod fill;
41 struct xsk_ring_cons comp;
42 struct xsk_ring_prod tx;
43 struct xsk_ring_cons rx;
44 struct xsk_socket *socket;
45 };
46
open_xsk(int ifindex,struct xsk * xsk)47 static int open_xsk(int ifindex, struct xsk *xsk)
48 {
49 int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
50 const struct xsk_socket_config socket_config = {
51 .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
52 .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
53 .bind_flags = XDP_COPY,
54 };
55 const struct xsk_umem_config umem_config = {
56 .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
57 .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
58 .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
59 .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
60 };
61 __u32 idx;
62 u64 addr;
63 int ret;
64 int i;
65
66 xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
67 if (!ASSERT_NEQ(xsk->umem_area, MAP_FAILED, "mmap"))
68 return -1;
69
70 ret = xsk_umem__create(&xsk->umem,
71 xsk->umem_area, UMEM_SIZE,
72 &xsk->fill,
73 &xsk->comp,
74 &umem_config);
75 if (!ASSERT_OK(ret, "xsk_umem__create"))
76 return ret;
77
78 ret = xsk_socket__create(&xsk->socket, ifindex, QUEUE_ID,
79 xsk->umem,
80 &xsk->rx,
81 &xsk->tx,
82 &socket_config);
83 if (!ASSERT_OK(ret, "xsk_socket__create"))
84 return ret;
85
86 /* First half of umem is for TX. This way address matches 1-to-1
87 * to the completion queue index.
88 */
89
90 for (i = 0; i < UMEM_NUM / 2; i++) {
91 addr = i * UMEM_FRAME_SIZE;
92 printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
93 }
94
95 /* Second half of umem is for RX. */
96
97 ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx);
98 if (!ASSERT_EQ(UMEM_NUM / 2, ret, "xsk_ring_prod__reserve"))
99 return ret;
100 if (!ASSERT_EQ(idx, 0, "fill idx != 0"))
101 return -1;
102
103 for (i = 0; i < UMEM_NUM / 2; i++) {
104 addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
105 printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
106 *xsk_ring_prod__fill_addr(&xsk->fill, i) = addr;
107 }
108 xsk_ring_prod__submit(&xsk->fill, ret);
109
110 return 0;
111 }
112
close_xsk(struct xsk * xsk)113 static void close_xsk(struct xsk *xsk)
114 {
115 if (xsk->umem)
116 xsk_umem__delete(xsk->umem);
117 if (xsk->socket)
118 xsk_socket__delete(xsk->socket);
119 munmap(xsk->umem_area, UMEM_SIZE);
120 }
121
ip_csum(struct iphdr * iph)122 static void ip_csum(struct iphdr *iph)
123 {
124 __u32 sum = 0;
125 __u16 *p;
126 int i;
127
128 iph->check = 0;
129 p = (void *)iph;
130 for (i = 0; i < sizeof(*iph) / sizeof(*p); i++)
131 sum += p[i];
132
133 while (sum >> 16)
134 sum = (sum & 0xffff) + (sum >> 16);
135
136 iph->check = ~sum;
137 }
138
generate_packet(struct xsk * xsk,__u16 dst_port)139 static int generate_packet(struct xsk *xsk, __u16 dst_port)
140 {
141 struct xdp_desc *tx_desc;
142 struct udphdr *udph;
143 struct ethhdr *eth;
144 struct iphdr *iph;
145 void *data;
146 __u32 idx;
147 int ret;
148
149 ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx);
150 if (!ASSERT_EQ(ret, 1, "xsk_ring_prod__reserve"))
151 return -1;
152
153 tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx);
154 tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE;
155 printf("%p: tx_desc[%u]->addr=%llx\n", xsk, idx, tx_desc->addr);
156 data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
157
158 eth = data;
159 iph = (void *)(eth + 1);
160 udph = (void *)(iph + 1);
161
162 memcpy(eth->h_dest, "\x00\x00\x00\x00\x00\x02", ETH_ALEN);
163 memcpy(eth->h_source, "\x00\x00\x00\x00\x00\x01", ETH_ALEN);
164 eth->h_proto = htons(ETH_P_IP);
165
166 iph->version = 0x4;
167 iph->ihl = 0x5;
168 iph->tos = 0x9;
169 iph->tot_len = htons(sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES);
170 iph->id = 0;
171 iph->frag_off = 0;
172 iph->ttl = 0;
173 iph->protocol = IPPROTO_UDP;
174 ASSERT_EQ(inet_pton(FAMILY, TX_ADDR, &iph->saddr), 1, "inet_pton(TX_ADDR)");
175 ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)");
176 ip_csum(iph);
177
178 udph->source = htons(AF_XDP_SOURCE_PORT);
179 udph->dest = htons(dst_port);
180 udph->len = htons(sizeof(*udph) + UDP_PAYLOAD_BYTES);
181 udph->check = 0;
182
183 memset(udph + 1, 0xAA, UDP_PAYLOAD_BYTES);
184
185 tx_desc->len = sizeof(*eth) + sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES;
186 xsk_ring_prod__submit(&xsk->tx, 1);
187
188 ret = sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0);
189 if (!ASSERT_GE(ret, 0, "sendto"))
190 return ret;
191
192 return 0;
193 }
194
complete_tx(struct xsk * xsk)195 static void complete_tx(struct xsk *xsk)
196 {
197 __u32 idx;
198 __u64 addr;
199
200 if (ASSERT_EQ(xsk_ring_cons__peek(&xsk->comp, 1, &idx), 1, "xsk_ring_cons__peek")) {
201 addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx);
202
203 printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr);
204 xsk_ring_cons__release(&xsk->comp, 1);
205 }
206 }
207
refill_rx(struct xsk * xsk,__u64 addr)208 static void refill_rx(struct xsk *xsk, __u64 addr)
209 {
210 __u32 idx;
211
212 if (ASSERT_EQ(xsk_ring_prod__reserve(&xsk->fill, 1, &idx), 1, "xsk_ring_prod__reserve")) {
213 printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr);
214 *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
215 xsk_ring_prod__submit(&xsk->fill, 1);
216 }
217 }
218
verify_xsk_metadata(struct xsk * xsk)219 static int verify_xsk_metadata(struct xsk *xsk)
220 {
221 const struct xdp_desc *rx_desc;
222 struct pollfd fds = {};
223 struct xdp_meta *meta;
224 struct ethhdr *eth;
225 struct iphdr *iph;
226 __u64 comp_addr;
227 void *data;
228 __u64 addr;
229 __u32 idx;
230 int ret;
231
232 ret = recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL);
233 if (!ASSERT_EQ(ret, 0, "recvfrom"))
234 return -1;
235
236 fds.fd = xsk_socket__fd(xsk->socket);
237 fds.events = POLLIN;
238
239 ret = poll(&fds, 1, 1000);
240 if (!ASSERT_GT(ret, 0, "poll"))
241 return -1;
242
243 ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx);
244 if (!ASSERT_EQ(ret, 1, "xsk_ring_cons__peek"))
245 return -2;
246
247 rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
248 comp_addr = xsk_umem__extract_addr(rx_desc->addr);
249 addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
250 printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
251 xsk, idx, rx_desc->addr, addr, comp_addr);
252 data = xsk_umem__get_data(xsk->umem_area, addr);
253
254 /* Make sure we got the packet offset correctly. */
255
256 eth = data;
257 ASSERT_EQ(eth->h_proto, htons(ETH_P_IP), "eth->h_proto");
258 iph = (void *)(eth + 1);
259 ASSERT_EQ((int)iph->version, 4, "iph->version");
260
261 /* custom metadata */
262
263 meta = data - sizeof(struct xdp_meta);
264
265 if (!ASSERT_NEQ(meta->rx_timestamp, 0, "rx_timestamp"))
266 return -1;
267
268 if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash"))
269 return -1;
270
271 ASSERT_EQ(meta->rx_hash_type, 0, "rx_hash_type");
272
273 xsk_ring_cons__release(&xsk->rx, 1);
274 refill_rx(xsk, comp_addr);
275
276 return 0;
277 }
278
test_xdp_metadata(void)279 void test_xdp_metadata(void)
280 {
281 struct xdp_metadata2 *bpf_obj2 = NULL;
282 struct xdp_metadata *bpf_obj = NULL;
283 struct bpf_program *new_prog, *prog;
284 struct nstoken *tok = NULL;
285 __u32 queue_id = QUEUE_ID;
286 struct bpf_map *prog_arr;
287 struct xsk tx_xsk = {};
288 struct xsk rx_xsk = {};
289 __u32 val, key = 0;
290 int retries = 10;
291 int rx_ifindex;
292 int tx_ifindex;
293 int sock_fd;
294 int ret;
295
296 /* Setup new networking namespace, with a veth pair. */
297
298 SYS(out, "ip netns add xdp_metadata");
299 tok = open_netns("xdp_metadata");
300 SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME
301 " type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1");
302 SYS(out, "ip link set dev " TX_NAME " address 00:00:00:00:00:01");
303 SYS(out, "ip link set dev " RX_NAME " address 00:00:00:00:00:02");
304 SYS(out, "ip link set dev " TX_NAME " up");
305 SYS(out, "ip link set dev " RX_NAME " up");
306 SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME);
307 SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME);
308
309 rx_ifindex = if_nametoindex(RX_NAME);
310 tx_ifindex = if_nametoindex(TX_NAME);
311
312 /* Setup separate AF_XDP for TX and RX interfaces. */
313
314 ret = open_xsk(tx_ifindex, &tx_xsk);
315 if (!ASSERT_OK(ret, "open_xsk(TX_NAME)"))
316 goto out;
317
318 ret = open_xsk(rx_ifindex, &rx_xsk);
319 if (!ASSERT_OK(ret, "open_xsk(RX_NAME)"))
320 goto out;
321
322 bpf_obj = xdp_metadata__open();
323 if (!ASSERT_OK_PTR(bpf_obj, "open skeleton"))
324 goto out;
325
326 prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx");
327 bpf_program__set_ifindex(prog, rx_ifindex);
328 bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
329
330 if (!ASSERT_OK(xdp_metadata__load(bpf_obj), "load skeleton"))
331 goto out;
332
333 /* Make sure we can't add dev-bound programs to prog maps. */
334 prog_arr = bpf_object__find_map_by_name(bpf_obj->obj, "prog_arr");
335 if (!ASSERT_OK_PTR(prog_arr, "no prog_arr map"))
336 goto out;
337
338 val = bpf_program__fd(prog);
339 if (!ASSERT_ERR(bpf_map__update_elem(prog_arr, &key, sizeof(key),
340 &val, sizeof(val), BPF_ANY),
341 "update prog_arr"))
342 goto out;
343
344 /* Attach BPF program to RX interface. */
345
346 ret = bpf_xdp_attach(rx_ifindex,
347 bpf_program__fd(bpf_obj->progs.rx),
348 XDP_FLAGS, NULL);
349 if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
350 goto out;
351
352 sock_fd = xsk_socket__fd(rx_xsk.socket);
353 ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0);
354 if (!ASSERT_GE(ret, 0, "bpf_map_update_elem"))
355 goto out;
356
357 /* Send packet destined to RX AF_XDP socket. */
358 if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
359 "generate AF_XDP_CONSUMER_PORT"))
360 goto out;
361
362 /* Verify AF_XDP RX packet has proper metadata. */
363 if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk), 0,
364 "verify_xsk_metadata"))
365 goto out;
366
367 complete_tx(&tx_xsk);
368
369 /* Make sure freplace correctly picks up original bound device
370 * and doesn't crash.
371 */
372
373 bpf_obj2 = xdp_metadata2__open();
374 if (!ASSERT_OK_PTR(bpf_obj2, "open skeleton"))
375 goto out;
376
377 new_prog = bpf_object__find_program_by_name(bpf_obj2->obj, "freplace_rx");
378 bpf_program__set_attach_target(new_prog, bpf_program__fd(prog), "rx");
379
380 if (!ASSERT_OK(xdp_metadata2__load(bpf_obj2), "load freplace skeleton"))
381 goto out;
382
383 if (!ASSERT_OK(xdp_metadata2__attach(bpf_obj2), "attach freplace"))
384 goto out;
385
386 /* Send packet to trigger . */
387 if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
388 "generate freplace packet"))
389 goto out;
390
391 while (!retries--) {
392 if (bpf_obj2->bss->called)
393 break;
394 usleep(10);
395 }
396 ASSERT_GT(bpf_obj2->bss->called, 0, "not called");
397
398 out:
399 close_xsk(&rx_xsk);
400 close_xsk(&tx_xsk);
401 xdp_metadata2__destroy(bpf_obj2);
402 xdp_metadata__destroy(bpf_obj);
403 if (tok)
404 close_netns(tok);
405 SYS_NOFAIL("ip netns del xdp_metadata");
406 }
407