xref: /openbmc/linux/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c (revision 1ac731c529cd4d6adbce134754b51ff7d822b145)
1  // SPDX-License-Identifier: GPL-2.0
2  #include <test_progs.h>
3  #include <network_helpers.h>
4  #include "xdp_metadata.skel.h"
5  #include "xdp_metadata2.skel.h"
6  #include "xdp_metadata.h"
7  #include "xsk.h"
8  
9  #include <bpf/btf.h>
10  #include <linux/errqueue.h>
11  #include <linux/if_link.h>
12  #include <linux/net_tstamp.h>
13  #include <linux/udp.h>
14  #include <sys/mman.h>
15  #include <net/if.h>
16  #include <poll.h>
17  
18  #define TX_NAME "veTX"
19  #define RX_NAME "veRX"
20  
21  #define UDP_PAYLOAD_BYTES 4
22  
23  #define AF_XDP_SOURCE_PORT 1234
24  #define AF_XDP_CONSUMER_PORT 8080
25  
26  #define UMEM_NUM 16
27  #define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
28  #define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM)
29  #define XDP_FLAGS XDP_FLAGS_DRV_MODE
30  #define QUEUE_ID 0
31  
32  #define TX_ADDR "10.0.0.1"
33  #define RX_ADDR "10.0.0.2"
34  #define PREFIX_LEN "8"
35  #define FAMILY AF_INET
36  
37  struct xsk {
38  	void *umem_area;
39  	struct xsk_umem *umem;
40  	struct xsk_ring_prod fill;
41  	struct xsk_ring_cons comp;
42  	struct xsk_ring_prod tx;
43  	struct xsk_ring_cons rx;
44  	struct xsk_socket *socket;
45  };
46  
open_xsk(int ifindex,struct xsk * xsk)47  static int open_xsk(int ifindex, struct xsk *xsk)
48  {
49  	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
50  	const struct xsk_socket_config socket_config = {
51  		.rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
52  		.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
53  		.bind_flags = XDP_COPY,
54  	};
55  	const struct xsk_umem_config umem_config = {
56  		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
57  		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
58  		.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
59  		.flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
60  	};
61  	__u32 idx;
62  	u64 addr;
63  	int ret;
64  	int i;
65  
66  	xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
67  	if (!ASSERT_NEQ(xsk->umem_area, MAP_FAILED, "mmap"))
68  		return -1;
69  
70  	ret = xsk_umem__create(&xsk->umem,
71  			       xsk->umem_area, UMEM_SIZE,
72  			       &xsk->fill,
73  			       &xsk->comp,
74  			       &umem_config);
75  	if (!ASSERT_OK(ret, "xsk_umem__create"))
76  		return ret;
77  
78  	ret = xsk_socket__create(&xsk->socket, ifindex, QUEUE_ID,
79  				 xsk->umem,
80  				 &xsk->rx,
81  				 &xsk->tx,
82  				 &socket_config);
83  	if (!ASSERT_OK(ret, "xsk_socket__create"))
84  		return ret;
85  
86  	/* First half of umem is for TX. This way address matches 1-to-1
87  	 * to the completion queue index.
88  	 */
89  
90  	for (i = 0; i < UMEM_NUM / 2; i++) {
91  		addr = i * UMEM_FRAME_SIZE;
92  		printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
93  	}
94  
95  	/* Second half of umem is for RX. */
96  
97  	ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx);
98  	if (!ASSERT_EQ(UMEM_NUM / 2, ret, "xsk_ring_prod__reserve"))
99  		return ret;
100  	if (!ASSERT_EQ(idx, 0, "fill idx != 0"))
101  		return -1;
102  
103  	for (i = 0; i < UMEM_NUM / 2; i++) {
104  		addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
105  		printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
106  		*xsk_ring_prod__fill_addr(&xsk->fill, i) = addr;
107  	}
108  	xsk_ring_prod__submit(&xsk->fill, ret);
109  
110  	return 0;
111  }
112  
close_xsk(struct xsk * xsk)113  static void close_xsk(struct xsk *xsk)
114  {
115  	if (xsk->umem)
116  		xsk_umem__delete(xsk->umem);
117  	if (xsk->socket)
118  		xsk_socket__delete(xsk->socket);
119  	munmap(xsk->umem_area, UMEM_SIZE);
120  }
121  
ip_csum(struct iphdr * iph)122  static void ip_csum(struct iphdr *iph)
123  {
124  	__u32 sum = 0;
125  	__u16 *p;
126  	int i;
127  
128  	iph->check = 0;
129  	p = (void *)iph;
130  	for (i = 0; i < sizeof(*iph) / sizeof(*p); i++)
131  		sum += p[i];
132  
133  	while (sum >> 16)
134  		sum = (sum & 0xffff) + (sum >> 16);
135  
136  	iph->check = ~sum;
137  }
138  
generate_packet(struct xsk * xsk,__u16 dst_port)139  static int generate_packet(struct xsk *xsk, __u16 dst_port)
140  {
141  	struct xdp_desc *tx_desc;
142  	struct udphdr *udph;
143  	struct ethhdr *eth;
144  	struct iphdr *iph;
145  	void *data;
146  	__u32 idx;
147  	int ret;
148  
149  	ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx);
150  	if (!ASSERT_EQ(ret, 1, "xsk_ring_prod__reserve"))
151  		return -1;
152  
153  	tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx);
154  	tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE;
155  	printf("%p: tx_desc[%u]->addr=%llx\n", xsk, idx, tx_desc->addr);
156  	data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
157  
158  	eth = data;
159  	iph = (void *)(eth + 1);
160  	udph = (void *)(iph + 1);
161  
162  	memcpy(eth->h_dest, "\x00\x00\x00\x00\x00\x02", ETH_ALEN);
163  	memcpy(eth->h_source, "\x00\x00\x00\x00\x00\x01", ETH_ALEN);
164  	eth->h_proto = htons(ETH_P_IP);
165  
166  	iph->version = 0x4;
167  	iph->ihl = 0x5;
168  	iph->tos = 0x9;
169  	iph->tot_len = htons(sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES);
170  	iph->id = 0;
171  	iph->frag_off = 0;
172  	iph->ttl = 0;
173  	iph->protocol = IPPROTO_UDP;
174  	ASSERT_EQ(inet_pton(FAMILY, TX_ADDR, &iph->saddr), 1, "inet_pton(TX_ADDR)");
175  	ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)");
176  	ip_csum(iph);
177  
178  	udph->source = htons(AF_XDP_SOURCE_PORT);
179  	udph->dest = htons(dst_port);
180  	udph->len = htons(sizeof(*udph) + UDP_PAYLOAD_BYTES);
181  	udph->check = 0;
182  
183  	memset(udph + 1, 0xAA, UDP_PAYLOAD_BYTES);
184  
185  	tx_desc->len = sizeof(*eth) + sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES;
186  	xsk_ring_prod__submit(&xsk->tx, 1);
187  
188  	ret = sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0);
189  	if (!ASSERT_GE(ret, 0, "sendto"))
190  		return ret;
191  
192  	return 0;
193  }
194  
complete_tx(struct xsk * xsk)195  static void complete_tx(struct xsk *xsk)
196  {
197  	__u32 idx;
198  	__u64 addr;
199  
200  	if (ASSERT_EQ(xsk_ring_cons__peek(&xsk->comp, 1, &idx), 1, "xsk_ring_cons__peek")) {
201  		addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx);
202  
203  		printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr);
204  		xsk_ring_cons__release(&xsk->comp, 1);
205  	}
206  }
207  
refill_rx(struct xsk * xsk,__u64 addr)208  static void refill_rx(struct xsk *xsk, __u64 addr)
209  {
210  	__u32 idx;
211  
212  	if (ASSERT_EQ(xsk_ring_prod__reserve(&xsk->fill, 1, &idx), 1, "xsk_ring_prod__reserve")) {
213  		printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr);
214  		*xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
215  		xsk_ring_prod__submit(&xsk->fill, 1);
216  	}
217  }
218  
verify_xsk_metadata(struct xsk * xsk)219  static int verify_xsk_metadata(struct xsk *xsk)
220  {
221  	const struct xdp_desc *rx_desc;
222  	struct pollfd fds = {};
223  	struct xdp_meta *meta;
224  	struct ethhdr *eth;
225  	struct iphdr *iph;
226  	__u64 comp_addr;
227  	void *data;
228  	__u64 addr;
229  	__u32 idx;
230  	int ret;
231  
232  	ret = recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL);
233  	if (!ASSERT_EQ(ret, 0, "recvfrom"))
234  		return -1;
235  
236  	fds.fd = xsk_socket__fd(xsk->socket);
237  	fds.events = POLLIN;
238  
239  	ret = poll(&fds, 1, 1000);
240  	if (!ASSERT_GT(ret, 0, "poll"))
241  		return -1;
242  
243  	ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx);
244  	if (!ASSERT_EQ(ret, 1, "xsk_ring_cons__peek"))
245  		return -2;
246  
247  	rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
248  	comp_addr = xsk_umem__extract_addr(rx_desc->addr);
249  	addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
250  	printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
251  	       xsk, idx, rx_desc->addr, addr, comp_addr);
252  	data = xsk_umem__get_data(xsk->umem_area, addr);
253  
254  	/* Make sure we got the packet offset correctly. */
255  
256  	eth = data;
257  	ASSERT_EQ(eth->h_proto, htons(ETH_P_IP), "eth->h_proto");
258  	iph = (void *)(eth + 1);
259  	ASSERT_EQ((int)iph->version, 4, "iph->version");
260  
261  	/* custom metadata */
262  
263  	meta = data - sizeof(struct xdp_meta);
264  
265  	if (!ASSERT_NEQ(meta->rx_timestamp, 0, "rx_timestamp"))
266  		return -1;
267  
268  	if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash"))
269  		return -1;
270  
271  	ASSERT_EQ(meta->rx_hash_type, 0, "rx_hash_type");
272  
273  	xsk_ring_cons__release(&xsk->rx, 1);
274  	refill_rx(xsk, comp_addr);
275  
276  	return 0;
277  }
278  
test_xdp_metadata(void)279  void test_xdp_metadata(void)
280  {
281  	struct xdp_metadata2 *bpf_obj2 = NULL;
282  	struct xdp_metadata *bpf_obj = NULL;
283  	struct bpf_program *new_prog, *prog;
284  	struct nstoken *tok = NULL;
285  	__u32 queue_id = QUEUE_ID;
286  	struct bpf_map *prog_arr;
287  	struct xsk tx_xsk = {};
288  	struct xsk rx_xsk = {};
289  	__u32 val, key = 0;
290  	int retries = 10;
291  	int rx_ifindex;
292  	int tx_ifindex;
293  	int sock_fd;
294  	int ret;
295  
296  	/* Setup new networking namespace, with a veth pair. */
297  
298  	SYS(out, "ip netns add xdp_metadata");
299  	tok = open_netns("xdp_metadata");
300  	SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME
301  	    " type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1");
302  	SYS(out, "ip link set dev " TX_NAME " address 00:00:00:00:00:01");
303  	SYS(out, "ip link set dev " RX_NAME " address 00:00:00:00:00:02");
304  	SYS(out, "ip link set dev " TX_NAME " up");
305  	SYS(out, "ip link set dev " RX_NAME " up");
306  	SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME);
307  	SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME);
308  
309  	rx_ifindex = if_nametoindex(RX_NAME);
310  	tx_ifindex = if_nametoindex(TX_NAME);
311  
312  	/* Setup separate AF_XDP for TX and RX interfaces. */
313  
314  	ret = open_xsk(tx_ifindex, &tx_xsk);
315  	if (!ASSERT_OK(ret, "open_xsk(TX_NAME)"))
316  		goto out;
317  
318  	ret = open_xsk(rx_ifindex, &rx_xsk);
319  	if (!ASSERT_OK(ret, "open_xsk(RX_NAME)"))
320  		goto out;
321  
322  	bpf_obj = xdp_metadata__open();
323  	if (!ASSERT_OK_PTR(bpf_obj, "open skeleton"))
324  		goto out;
325  
326  	prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx");
327  	bpf_program__set_ifindex(prog, rx_ifindex);
328  	bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
329  
330  	if (!ASSERT_OK(xdp_metadata__load(bpf_obj), "load skeleton"))
331  		goto out;
332  
333  	/* Make sure we can't add dev-bound programs to prog maps. */
334  	prog_arr = bpf_object__find_map_by_name(bpf_obj->obj, "prog_arr");
335  	if (!ASSERT_OK_PTR(prog_arr, "no prog_arr map"))
336  		goto out;
337  
338  	val = bpf_program__fd(prog);
339  	if (!ASSERT_ERR(bpf_map__update_elem(prog_arr, &key, sizeof(key),
340  					     &val, sizeof(val), BPF_ANY),
341  			"update prog_arr"))
342  		goto out;
343  
344  	/* Attach BPF program to RX interface. */
345  
346  	ret = bpf_xdp_attach(rx_ifindex,
347  			     bpf_program__fd(bpf_obj->progs.rx),
348  			     XDP_FLAGS, NULL);
349  	if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
350  		goto out;
351  
352  	sock_fd = xsk_socket__fd(rx_xsk.socket);
353  	ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0);
354  	if (!ASSERT_GE(ret, 0, "bpf_map_update_elem"))
355  		goto out;
356  
357  	/* Send packet destined to RX AF_XDP socket. */
358  	if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
359  		       "generate AF_XDP_CONSUMER_PORT"))
360  		goto out;
361  
362  	/* Verify AF_XDP RX packet has proper metadata. */
363  	if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk), 0,
364  		       "verify_xsk_metadata"))
365  		goto out;
366  
367  	complete_tx(&tx_xsk);
368  
369  	/* Make sure freplace correctly picks up original bound device
370  	 * and doesn't crash.
371  	 */
372  
373  	bpf_obj2 = xdp_metadata2__open();
374  	if (!ASSERT_OK_PTR(bpf_obj2, "open skeleton"))
375  		goto out;
376  
377  	new_prog = bpf_object__find_program_by_name(bpf_obj2->obj, "freplace_rx");
378  	bpf_program__set_attach_target(new_prog, bpf_program__fd(prog), "rx");
379  
380  	if (!ASSERT_OK(xdp_metadata2__load(bpf_obj2), "load freplace skeleton"))
381  		goto out;
382  
383  	if (!ASSERT_OK(xdp_metadata2__attach(bpf_obj2), "attach freplace"))
384  		goto out;
385  
386  	/* Send packet to trigger . */
387  	if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
388  		       "generate freplace packet"))
389  		goto out;
390  
391  	while (!retries--) {
392  		if (bpf_obj2->bss->called)
393  			break;
394  		usleep(10);
395  	}
396  	ASSERT_GT(bpf_obj2->bss->called, 0, "not called");
397  
398  out:
399  	close_xsk(&rx_xsk);
400  	close_xsk(&tx_xsk);
401  	xdp_metadata2__destroy(bpf_obj2);
402  	xdp_metadata__destroy(bpf_obj);
403  	if (tok)
404  		close_netns(tok);
405  	SYS_NOFAIL("ip netns del xdp_metadata");
406  }
407