1 // SPDX-License-Identifier: GPL-2.0
2 #include <test_progs.h>
3 #include <network_helpers.h>
4 #include "xdp_metadata.skel.h"
5 #include "xdp_metadata2.skel.h"
6 #include "xdp_metadata.h"
7 #include "xsk.h"
8 
9 #include <bpf/btf.h>
10 #include <linux/errqueue.h>
11 #include <linux/if_link.h>
12 #include <linux/net_tstamp.h>
13 #include <linux/udp.h>
14 #include <sys/mman.h>
15 #include <net/if.h>
16 #include <poll.h>
17 
18 #define TX_NAME "veTX"
19 #define RX_NAME "veRX"
20 
21 #define UDP_PAYLOAD_BYTES 4
22 
23 #define AF_XDP_SOURCE_PORT 1234
24 #define AF_XDP_CONSUMER_PORT 8080
25 
26 #define UMEM_NUM 16
27 #define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
28 #define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM)
29 #define XDP_FLAGS XDP_FLAGS_DRV_MODE
30 #define QUEUE_ID 0
31 
32 #define TX_ADDR "10.0.0.1"
33 #define RX_ADDR "10.0.0.2"
34 #define PREFIX_LEN "8"
35 #define FAMILY AF_INET
36 
37 struct xsk {
38 	void *umem_area;
39 	struct xsk_umem *umem;
40 	struct xsk_ring_prod fill;
41 	struct xsk_ring_cons comp;
42 	struct xsk_ring_prod tx;
43 	struct xsk_ring_cons rx;
44 	struct xsk_socket *socket;
45 };
46 
47 static int open_xsk(int ifindex, struct xsk *xsk)
48 {
49 	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
50 	const struct xsk_socket_config socket_config = {
51 		.rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
52 		.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
53 		.bind_flags = XDP_COPY,
54 	};
55 	const struct xsk_umem_config umem_config = {
56 		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
57 		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
58 		.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
59 		.flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
60 	};
61 	__u32 idx;
62 	u64 addr;
63 	int ret;
64 	int i;
65 
66 	xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
67 	if (!ASSERT_NEQ(xsk->umem_area, MAP_FAILED, "mmap"))
68 		return -1;
69 
70 	ret = xsk_umem__create(&xsk->umem,
71 			       xsk->umem_area, UMEM_SIZE,
72 			       &xsk->fill,
73 			       &xsk->comp,
74 			       &umem_config);
75 	if (!ASSERT_OK(ret, "xsk_umem__create"))
76 		return ret;
77 
78 	ret = xsk_socket__create(&xsk->socket, ifindex, QUEUE_ID,
79 				 xsk->umem,
80 				 &xsk->rx,
81 				 &xsk->tx,
82 				 &socket_config);
83 	if (!ASSERT_OK(ret, "xsk_socket__create"))
84 		return ret;
85 
86 	/* First half of umem is for TX. This way address matches 1-to-1
87 	 * to the completion queue index.
88 	 */
89 
90 	for (i = 0; i < UMEM_NUM / 2; i++) {
91 		addr = i * UMEM_FRAME_SIZE;
92 		printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
93 	}
94 
95 	/* Second half of umem is for RX. */
96 
97 	ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx);
98 	if (!ASSERT_EQ(UMEM_NUM / 2, ret, "xsk_ring_prod__reserve"))
99 		return ret;
100 	if (!ASSERT_EQ(idx, 0, "fill idx != 0"))
101 		return -1;
102 
103 	for (i = 0; i < UMEM_NUM / 2; i++) {
104 		addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
105 		printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
106 		*xsk_ring_prod__fill_addr(&xsk->fill, i) = addr;
107 	}
108 	xsk_ring_prod__submit(&xsk->fill, ret);
109 
110 	return 0;
111 }
112 
113 static void close_xsk(struct xsk *xsk)
114 {
115 	if (xsk->umem)
116 		xsk_umem__delete(xsk->umem);
117 	if (xsk->socket)
118 		xsk_socket__delete(xsk->socket);
119 	munmap(xsk->umem_area, UMEM_SIZE);
120 }
121 
122 static void ip_csum(struct iphdr *iph)
123 {
124 	__u32 sum = 0;
125 	__u16 *p;
126 	int i;
127 
128 	iph->check = 0;
129 	p = (void *)iph;
130 	for (i = 0; i < sizeof(*iph) / sizeof(*p); i++)
131 		sum += p[i];
132 
133 	while (sum >> 16)
134 		sum = (sum & 0xffff) + (sum >> 16);
135 
136 	iph->check = ~sum;
137 }
138 
139 static int generate_packet(struct xsk *xsk, __u16 dst_port)
140 {
141 	struct xdp_desc *tx_desc;
142 	struct udphdr *udph;
143 	struct ethhdr *eth;
144 	struct iphdr *iph;
145 	void *data;
146 	__u32 idx;
147 	int ret;
148 
149 	ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx);
150 	if (!ASSERT_EQ(ret, 1, "xsk_ring_prod__reserve"))
151 		return -1;
152 
153 	tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx);
154 	tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE;
155 	printf("%p: tx_desc[%u]->addr=%llx\n", xsk, idx, tx_desc->addr);
156 	data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
157 
158 	eth = data;
159 	iph = (void *)(eth + 1);
160 	udph = (void *)(iph + 1);
161 
162 	memcpy(eth->h_dest, "\x00\x00\x00\x00\x00\x02", ETH_ALEN);
163 	memcpy(eth->h_source, "\x00\x00\x00\x00\x00\x01", ETH_ALEN);
164 	eth->h_proto = htons(ETH_P_IP);
165 
166 	iph->version = 0x4;
167 	iph->ihl = 0x5;
168 	iph->tos = 0x9;
169 	iph->tot_len = htons(sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES);
170 	iph->id = 0;
171 	iph->frag_off = 0;
172 	iph->ttl = 0;
173 	iph->protocol = IPPROTO_UDP;
174 	ASSERT_EQ(inet_pton(FAMILY, TX_ADDR, &iph->saddr), 1, "inet_pton(TX_ADDR)");
175 	ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)");
176 	ip_csum(iph);
177 
178 	udph->source = htons(AF_XDP_SOURCE_PORT);
179 	udph->dest = htons(dst_port);
180 	udph->len = htons(sizeof(*udph) + UDP_PAYLOAD_BYTES);
181 	udph->check = 0;
182 
183 	memset(udph + 1, 0xAA, UDP_PAYLOAD_BYTES);
184 
185 	tx_desc->len = sizeof(*eth) + sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES;
186 	xsk_ring_prod__submit(&xsk->tx, 1);
187 
188 	ret = sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0);
189 	if (!ASSERT_GE(ret, 0, "sendto"))
190 		return ret;
191 
192 	return 0;
193 }
194 
195 static void complete_tx(struct xsk *xsk)
196 {
197 	__u32 idx;
198 	__u64 addr;
199 
200 	if (ASSERT_EQ(xsk_ring_cons__peek(&xsk->comp, 1, &idx), 1, "xsk_ring_cons__peek")) {
201 		addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx);
202 
203 		printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr);
204 		xsk_ring_cons__release(&xsk->comp, 1);
205 	}
206 }
207 
208 static void refill_rx(struct xsk *xsk, __u64 addr)
209 {
210 	__u32 idx;
211 
212 	if (ASSERT_EQ(xsk_ring_prod__reserve(&xsk->fill, 1, &idx), 1, "xsk_ring_prod__reserve")) {
213 		printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr);
214 		*xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
215 		xsk_ring_prod__submit(&xsk->fill, 1);
216 	}
217 }
218 
219 static int verify_xsk_metadata(struct xsk *xsk)
220 {
221 	const struct xdp_desc *rx_desc;
222 	struct pollfd fds = {};
223 	struct xdp_meta *meta;
224 	struct ethhdr *eth;
225 	struct iphdr *iph;
226 	__u64 comp_addr;
227 	void *data;
228 	__u64 addr;
229 	__u32 idx;
230 	int ret;
231 
232 	ret = recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL);
233 	if (!ASSERT_EQ(ret, 0, "recvfrom"))
234 		return -1;
235 
236 	fds.fd = xsk_socket__fd(xsk->socket);
237 	fds.events = POLLIN;
238 
239 	ret = poll(&fds, 1, 1000);
240 	if (!ASSERT_GT(ret, 0, "poll"))
241 		return -1;
242 
243 	ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx);
244 	if (!ASSERT_EQ(ret, 1, "xsk_ring_cons__peek"))
245 		return -2;
246 
247 	rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
248 	comp_addr = xsk_umem__extract_addr(rx_desc->addr);
249 	addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
250 	printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
251 	       xsk, idx, rx_desc->addr, addr, comp_addr);
252 	data = xsk_umem__get_data(xsk->umem_area, addr);
253 
254 	/* Make sure we got the packet offset correctly. */
255 
256 	eth = data;
257 	ASSERT_EQ(eth->h_proto, htons(ETH_P_IP), "eth->h_proto");
258 	iph = (void *)(eth + 1);
259 	ASSERT_EQ((int)iph->version, 4, "iph->version");
260 
261 	/* custom metadata */
262 
263 	meta = data - sizeof(struct xdp_meta);
264 
265 	if (!ASSERT_NEQ(meta->rx_timestamp, 0, "rx_timestamp"))
266 		return -1;
267 
268 	if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash"))
269 		return -1;
270 
271 	ASSERT_EQ(meta->rx_hash_type, 0, "rx_hash_type");
272 
273 	xsk_ring_cons__release(&xsk->rx, 1);
274 	refill_rx(xsk, comp_addr);
275 
276 	return 0;
277 }
278 
279 void test_xdp_metadata(void)
280 {
281 	struct xdp_metadata2 *bpf_obj2 = NULL;
282 	struct xdp_metadata *bpf_obj = NULL;
283 	struct bpf_program *new_prog, *prog;
284 	struct nstoken *tok = NULL;
285 	__u32 queue_id = QUEUE_ID;
286 	struct bpf_map *prog_arr;
287 	struct xsk tx_xsk = {};
288 	struct xsk rx_xsk = {};
289 	__u32 val, key = 0;
290 	int retries = 10;
291 	int rx_ifindex;
292 	int tx_ifindex;
293 	int sock_fd;
294 	int ret;
295 
296 	/* Setup new networking namespace, with a veth pair. */
297 
298 	SYS(out, "ip netns add xdp_metadata");
299 	tok = open_netns("xdp_metadata");
300 	SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME
301 	    " type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1");
302 	SYS(out, "ip link set dev " TX_NAME " address 00:00:00:00:00:01");
303 	SYS(out, "ip link set dev " RX_NAME " address 00:00:00:00:00:02");
304 	SYS(out, "ip link set dev " TX_NAME " up");
305 	SYS(out, "ip link set dev " RX_NAME " up");
306 	SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME);
307 	SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME);
308 
309 	rx_ifindex = if_nametoindex(RX_NAME);
310 	tx_ifindex = if_nametoindex(TX_NAME);
311 
312 	/* Setup separate AF_XDP for TX and RX interfaces. */
313 
314 	ret = open_xsk(tx_ifindex, &tx_xsk);
315 	if (!ASSERT_OK(ret, "open_xsk(TX_NAME)"))
316 		goto out;
317 
318 	ret = open_xsk(rx_ifindex, &rx_xsk);
319 	if (!ASSERT_OK(ret, "open_xsk(RX_NAME)"))
320 		goto out;
321 
322 	bpf_obj = xdp_metadata__open();
323 	if (!ASSERT_OK_PTR(bpf_obj, "open skeleton"))
324 		goto out;
325 
326 	prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx");
327 	bpf_program__set_ifindex(prog, rx_ifindex);
328 	bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
329 
330 	if (!ASSERT_OK(xdp_metadata__load(bpf_obj), "load skeleton"))
331 		goto out;
332 
333 	/* Make sure we can't add dev-bound programs to prog maps. */
334 	prog_arr = bpf_object__find_map_by_name(bpf_obj->obj, "prog_arr");
335 	if (!ASSERT_OK_PTR(prog_arr, "no prog_arr map"))
336 		goto out;
337 
338 	val = bpf_program__fd(prog);
339 	if (!ASSERT_ERR(bpf_map__update_elem(prog_arr, &key, sizeof(key),
340 					     &val, sizeof(val), BPF_ANY),
341 			"update prog_arr"))
342 		goto out;
343 
344 	/* Attach BPF program to RX interface. */
345 
346 	ret = bpf_xdp_attach(rx_ifindex,
347 			     bpf_program__fd(bpf_obj->progs.rx),
348 			     XDP_FLAGS, NULL);
349 	if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
350 		goto out;
351 
352 	sock_fd = xsk_socket__fd(rx_xsk.socket);
353 	ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0);
354 	if (!ASSERT_GE(ret, 0, "bpf_map_update_elem"))
355 		goto out;
356 
357 	/* Send packet destined to RX AF_XDP socket. */
358 	if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
359 		       "generate AF_XDP_CONSUMER_PORT"))
360 		goto out;
361 
362 	/* Verify AF_XDP RX packet has proper metadata. */
363 	if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk), 0,
364 		       "verify_xsk_metadata"))
365 		goto out;
366 
367 	complete_tx(&tx_xsk);
368 
369 	/* Make sure freplace correctly picks up original bound device
370 	 * and doesn't crash.
371 	 */
372 
373 	bpf_obj2 = xdp_metadata2__open();
374 	if (!ASSERT_OK_PTR(bpf_obj2, "open skeleton"))
375 		goto out;
376 
377 	new_prog = bpf_object__find_program_by_name(bpf_obj2->obj, "freplace_rx");
378 	bpf_program__set_attach_target(new_prog, bpf_program__fd(prog), "rx");
379 
380 	if (!ASSERT_OK(xdp_metadata2__load(bpf_obj2), "load freplace skeleton"))
381 		goto out;
382 
383 	if (!ASSERT_OK(xdp_metadata2__attach(bpf_obj2), "attach freplace"))
384 		goto out;
385 
386 	/* Send packet to trigger . */
387 	if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
388 		       "generate freplace packet"))
389 		goto out;
390 
391 	while (!retries--) {
392 		if (bpf_obj2->bss->called)
393 			break;
394 		usleep(10);
395 	}
396 	ASSERT_GT(bpf_obj2->bss->called, 0, "not called");
397 
398 out:
399 	close_xsk(&rx_xsk);
400 	close_xsk(&tx_xsk);
401 	xdp_metadata2__destroy(bpf_obj2);
402 	xdp_metadata__destroy(bpf_obj);
403 	if (tok)
404 		close_netns(tok);
405 	SYS_NOFAIL("ip netns del xdp_metadata");
406 }
407