1 // SPDX-License-Identifier: GPL-2.0
2 
3 /* Reference program for verifying XDP metadata on real HW. Functional test
4  * only, doesn't test the performance.
5  *
6  * RX:
7  * - UDP 9091 packets are diverted into AF_XDP
8  * - Metadata verified:
9  *   - rx_timestamp
10  *   - rx_hash
11  *
12  * TX:
13  * - TBD
14  */
15 
16 #include <test_progs.h>
17 #include <network_helpers.h>
18 #include "xdp_hw_metadata.skel.h"
19 #include "xsk.h"
20 
21 #include <error.h>
22 #include <linux/errqueue.h>
23 #include <linux/if_link.h>
24 #include <linux/net_tstamp.h>
25 #include <linux/udp.h>
26 #include <linux/sockios.h>
27 #include <sys/mman.h>
28 #include <net/if.h>
29 #include <poll.h>
30 
31 #include "xdp_metadata.h"
32 
33 #define UMEM_NUM 16
34 #define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
35 #define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM)
36 #define XDP_FLAGS (XDP_FLAGS_DRV_MODE | XDP_FLAGS_REPLACE)
37 
38 struct xsk {
39 	void *umem_area;
40 	struct xsk_umem *umem;
41 	struct xsk_ring_prod fill;
42 	struct xsk_ring_cons comp;
43 	struct xsk_ring_prod tx;
44 	struct xsk_ring_cons rx;
45 	struct xsk_socket *socket;
46 };
47 
48 struct xdp_hw_metadata *bpf_obj;
49 struct xsk *rx_xsk;
50 const char *ifname;
51 int ifindex;
52 int rxq;
53 
54 void test__fail(void) { /* for network_helpers.c */ }
55 
56 static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
57 {
58 	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
59 	const struct xsk_socket_config socket_config = {
60 		.rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
61 		.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
62 		.bind_flags = XDP_COPY,
63 	};
64 	const struct xsk_umem_config umem_config = {
65 		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
66 		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
67 		.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
68 		.flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
69 	};
70 	__u32 idx;
71 	u64 addr;
72 	int ret;
73 	int i;
74 
75 	xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
76 	if (xsk->umem_area == MAP_FAILED)
77 		return -ENOMEM;
78 
79 	ret = xsk_umem__create(&xsk->umem,
80 			       xsk->umem_area, UMEM_SIZE,
81 			       &xsk->fill,
82 			       &xsk->comp,
83 			       &umem_config);
84 	if (ret)
85 		return ret;
86 
87 	ret = xsk_socket__create(&xsk->socket, ifindex, queue_id,
88 				 xsk->umem,
89 				 &xsk->rx,
90 				 &xsk->tx,
91 				 &socket_config);
92 	if (ret)
93 		return ret;
94 
95 	/* First half of umem is for TX. This way address matches 1-to-1
96 	 * to the completion queue index.
97 	 */
98 
99 	for (i = 0; i < UMEM_NUM / 2; i++) {
100 		addr = i * UMEM_FRAME_SIZE;
101 		printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
102 	}
103 
104 	/* Second half of umem is for RX. */
105 
106 	ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx);
107 	for (i = 0; i < UMEM_NUM / 2; i++) {
108 		addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
109 		printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
110 		*xsk_ring_prod__fill_addr(&xsk->fill, i) = addr;
111 	}
112 	xsk_ring_prod__submit(&xsk->fill, ret);
113 
114 	return 0;
115 }
116 
117 static void close_xsk(struct xsk *xsk)
118 {
119 	if (xsk->umem)
120 		xsk_umem__delete(xsk->umem);
121 	if (xsk->socket)
122 		xsk_socket__delete(xsk->socket);
123 	munmap(xsk->umem_area, UMEM_SIZE);
124 }
125 
126 static void refill_rx(struct xsk *xsk, __u64 addr)
127 {
128 	__u32 idx;
129 
130 	if (xsk_ring_prod__reserve(&xsk->fill, 1, &idx) == 1) {
131 		printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr);
132 		*xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
133 		xsk_ring_prod__submit(&xsk->fill, 1);
134 	}
135 }
136 
137 static void verify_xdp_metadata(void *data)
138 {
139 	struct xdp_meta *meta;
140 
141 	meta = data - sizeof(*meta);
142 
143 	printf("rx_timestamp: %llu\n", meta->rx_timestamp);
144 	if (meta->rx_hash_err < 0)
145 		printf("No rx_hash err=%d\n", meta->rx_hash_err);
146 	else
147 		printf("rx_hash: 0x%X with RSS type:0x%X\n",
148 		       meta->rx_hash, meta->rx_hash_type);
149 }
150 
151 static void verify_skb_metadata(int fd)
152 {
153 	char cmsg_buf[1024];
154 	char packet_buf[128];
155 
156 	struct scm_timestamping *ts;
157 	struct iovec packet_iov;
158 	struct cmsghdr *cmsg;
159 	struct msghdr hdr;
160 
161 	memset(&hdr, 0, sizeof(hdr));
162 	hdr.msg_iov = &packet_iov;
163 	hdr.msg_iovlen = 1;
164 	packet_iov.iov_base = packet_buf;
165 	packet_iov.iov_len = sizeof(packet_buf);
166 
167 	hdr.msg_control = cmsg_buf;
168 	hdr.msg_controllen = sizeof(cmsg_buf);
169 
170 	if (recvmsg(fd, &hdr, 0) < 0)
171 		error(1, errno, "recvmsg");
172 
173 	for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL;
174 	     cmsg = CMSG_NXTHDR(&hdr, cmsg)) {
175 
176 		if (cmsg->cmsg_level != SOL_SOCKET)
177 			continue;
178 
179 		switch (cmsg->cmsg_type) {
180 		case SCM_TIMESTAMPING:
181 			ts = (struct scm_timestamping *)CMSG_DATA(cmsg);
182 			if (ts->ts[2].tv_sec || ts->ts[2].tv_nsec) {
183 				printf("found skb hwtstamp = %lu.%lu\n",
184 				       ts->ts[2].tv_sec, ts->ts[2].tv_nsec);
185 				return;
186 			}
187 			break;
188 		default:
189 			break;
190 		}
191 	}
192 
193 	printf("skb hwtstamp is not found!\n");
194 }
195 
196 static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd)
197 {
198 	const struct xdp_desc *rx_desc;
199 	struct pollfd fds[rxq + 1];
200 	__u64 comp_addr;
201 	__u64 addr;
202 	__u32 idx;
203 	int ret;
204 	int i;
205 
206 	for (i = 0; i < rxq; i++) {
207 		fds[i].fd = xsk_socket__fd(rx_xsk[i].socket);
208 		fds[i].events = POLLIN;
209 		fds[i].revents = 0;
210 	}
211 
212 	fds[rxq].fd = server_fd;
213 	fds[rxq].events = POLLIN;
214 	fds[rxq].revents = 0;
215 
216 	while (true) {
217 		errno = 0;
218 		ret = poll(fds, rxq + 1, 1000);
219 		printf("poll: %d (%d) skip=%llu fail=%llu redir=%llu\n",
220 		       ret, errno, bpf_obj->bss->pkts_skip,
221 		       bpf_obj->bss->pkts_fail, bpf_obj->bss->pkts_redir);
222 		if (ret < 0)
223 			break;
224 		if (ret == 0)
225 			continue;
226 
227 		if (fds[rxq].revents)
228 			verify_skb_metadata(server_fd);
229 
230 		for (i = 0; i < rxq; i++) {
231 			if (fds[i].revents == 0)
232 				continue;
233 
234 			struct xsk *xsk = &rx_xsk[i];
235 
236 			ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx);
237 			printf("xsk_ring_cons__peek: %d\n", ret);
238 			if (ret != 1)
239 				continue;
240 
241 			rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
242 			comp_addr = xsk_umem__extract_addr(rx_desc->addr);
243 			addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
244 			printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
245 			       xsk, idx, rx_desc->addr, addr, comp_addr);
246 			verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr));
247 			xsk_ring_cons__release(&xsk->rx, 1);
248 			refill_rx(xsk, comp_addr);
249 		}
250 	}
251 
252 	return 0;
253 }
254 
255 struct ethtool_channels {
256 	__u32	cmd;
257 	__u32	max_rx;
258 	__u32	max_tx;
259 	__u32	max_other;
260 	__u32	max_combined;
261 	__u32	rx_count;
262 	__u32	tx_count;
263 	__u32	other_count;
264 	__u32	combined_count;
265 };
266 
267 #define ETHTOOL_GCHANNELS	0x0000003c /* Get no of channels */
268 
269 static int rxq_num(const char *ifname)
270 {
271 	struct ethtool_channels ch = {
272 		.cmd = ETHTOOL_GCHANNELS,
273 	};
274 
275 	struct ifreq ifr = {
276 		.ifr_data = (void *)&ch,
277 	};
278 	strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
279 	int fd, ret;
280 
281 	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
282 	if (fd < 0)
283 		error(1, errno, "socket");
284 
285 	ret = ioctl(fd, SIOCETHTOOL, &ifr);
286 	if (ret < 0)
287 		error(1, errno, "ioctl(SIOCETHTOOL)");
288 
289 	close(fd);
290 
291 	return ch.rx_count + ch.combined_count;
292 }
293 
294 static void hwtstamp_ioctl(int op, const char *ifname, struct hwtstamp_config *cfg)
295 {
296 	struct ifreq ifr = {
297 		.ifr_data = (void *)cfg,
298 	};
299 	strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
300 	int fd, ret;
301 
302 	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
303 	if (fd < 0)
304 		error(1, errno, "socket");
305 
306 	ret = ioctl(fd, op, &ifr);
307 	if (ret < 0)
308 		error(1, errno, "ioctl(%d)", op);
309 
310 	close(fd);
311 }
312 
313 static struct hwtstamp_config saved_hwtstamp_cfg;
314 static const char *saved_hwtstamp_ifname;
315 
316 static void hwtstamp_restore(void)
317 {
318 	hwtstamp_ioctl(SIOCSHWTSTAMP, saved_hwtstamp_ifname, &saved_hwtstamp_cfg);
319 }
320 
321 static void hwtstamp_enable(const char *ifname)
322 {
323 	struct hwtstamp_config cfg = {
324 		.rx_filter = HWTSTAMP_FILTER_ALL,
325 	};
326 
327 	hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg);
328 	saved_hwtstamp_ifname = strdup(ifname);
329 	atexit(hwtstamp_restore);
330 
331 	hwtstamp_ioctl(SIOCSHWTSTAMP, ifname, &cfg);
332 }
333 
334 static void cleanup(void)
335 {
336 	LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
337 	int ret;
338 	int i;
339 
340 	if (bpf_obj) {
341 		opts.old_prog_fd = bpf_program__fd(bpf_obj->progs.rx);
342 		if (opts.old_prog_fd >= 0) {
343 			printf("detaching bpf program....\n");
344 			ret = bpf_xdp_detach(ifindex, XDP_FLAGS, &opts);
345 			if (ret)
346 				printf("failed to detach XDP program: %d\n", ret);
347 		}
348 	}
349 
350 	for (i = 0; i < rxq; i++)
351 		close_xsk(&rx_xsk[i]);
352 
353 	if (bpf_obj)
354 		xdp_hw_metadata__destroy(bpf_obj);
355 }
356 
357 static void handle_signal(int sig)
358 {
359 	/* interrupting poll() is all we need */
360 }
361 
362 static void timestamping_enable(int fd, int val)
363 {
364 	int ret;
365 
366 	ret = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val));
367 	if (ret < 0)
368 		error(1, errno, "setsockopt(SO_TIMESTAMPING)");
369 }
370 
371 int main(int argc, char *argv[])
372 {
373 	int server_fd = -1;
374 	int ret;
375 	int i;
376 
377 	struct bpf_program *prog;
378 
379 	if (argc != 2) {
380 		fprintf(stderr, "pass device name\n");
381 		return -1;
382 	}
383 
384 	ifname = argv[1];
385 	ifindex = if_nametoindex(ifname);
386 	rxq = rxq_num(ifname);
387 
388 	printf("rxq: %d\n", rxq);
389 
390 	hwtstamp_enable(ifname);
391 
392 	rx_xsk = malloc(sizeof(struct xsk) * rxq);
393 	if (!rx_xsk)
394 		error(1, ENOMEM, "malloc");
395 
396 	for (i = 0; i < rxq; i++) {
397 		printf("open_xsk(%s, %p, %d)\n", ifname, &rx_xsk[i], i);
398 		ret = open_xsk(ifindex, &rx_xsk[i], i);
399 		if (ret)
400 			error(1, -ret, "open_xsk");
401 
402 		printf("xsk_socket__fd() -> %d\n", xsk_socket__fd(rx_xsk[i].socket));
403 	}
404 
405 	printf("open bpf program...\n");
406 	bpf_obj = xdp_hw_metadata__open();
407 	if (libbpf_get_error(bpf_obj))
408 		error(1, libbpf_get_error(bpf_obj), "xdp_hw_metadata__open");
409 
410 	prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx");
411 	bpf_program__set_ifindex(prog, ifindex);
412 	bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
413 
414 	printf("load bpf program...\n");
415 	ret = xdp_hw_metadata__load(bpf_obj);
416 	if (ret)
417 		error(1, -ret, "xdp_hw_metadata__load");
418 
419 	printf("prepare skb endpoint...\n");
420 	server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 9092, 1000);
421 	if (server_fd < 0)
422 		error(1, errno, "start_server");
423 	timestamping_enable(server_fd,
424 			    SOF_TIMESTAMPING_SOFTWARE |
425 			    SOF_TIMESTAMPING_RAW_HARDWARE);
426 
427 	printf("prepare xsk map...\n");
428 	for (i = 0; i < rxq; i++) {
429 		int sock_fd = xsk_socket__fd(rx_xsk[i].socket);
430 		__u32 queue_id = i;
431 
432 		printf("map[%d] = %d\n", queue_id, sock_fd);
433 		ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0);
434 		if (ret)
435 			error(1, -ret, "bpf_map_update_elem");
436 	}
437 
438 	printf("attach bpf program...\n");
439 	ret = bpf_xdp_attach(ifindex,
440 			     bpf_program__fd(bpf_obj->progs.rx),
441 			     XDP_FLAGS, NULL);
442 	if (ret)
443 		error(1, -ret, "bpf_xdp_attach");
444 
445 	signal(SIGINT, handle_signal);
446 	ret = verify_metadata(rx_xsk, rxq, server_fd);
447 	close(server_fd);
448 	cleanup();
449 	if (ret)
450 		error(1, -ret, "verify_metadata");
451 }
452