1 // SPDX-License-Identifier: GPL-2.0
2 
3 /* Reference program for verifying XDP metadata on real HW. Functional test
4  * only, doesn't test the performance.
5  *
6  * RX:
7  * - UDP 9091 packets are diverted into AF_XDP
8  * - Metadata verified:
9  *   - rx_timestamp
10  *   - rx_hash
11  *
12  * TX:
13  * - TBD
14  */
15 
16 #include <test_progs.h>
17 #include <network_helpers.h>
18 #include "xdp_hw_metadata.skel.h"
19 #include "xsk.h"
20 
21 #include <error.h>
22 #include <linux/errqueue.h>
23 #include <linux/if_link.h>
24 #include <linux/net_tstamp.h>
25 #include <linux/udp.h>
26 #include <linux/sockios.h>
27 #include <sys/mman.h>
28 #include <net/if.h>
29 #include <poll.h>
30 #include <time.h>
31 
32 #include "xdp_metadata.h"
33 
34 #define UMEM_NUM 16
35 #define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
36 #define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM)
37 #define XDP_FLAGS (XDP_FLAGS_DRV_MODE | XDP_FLAGS_REPLACE)
38 
39 struct xsk {
40 	void *umem_area;
41 	struct xsk_umem *umem;
42 	struct xsk_ring_prod fill;
43 	struct xsk_ring_cons comp;
44 	struct xsk_ring_prod tx;
45 	struct xsk_ring_cons rx;
46 	struct xsk_socket *socket;
47 };
48 
49 struct xdp_hw_metadata *bpf_obj;
50 struct xsk *rx_xsk;
51 const char *ifname;
52 int ifindex;
53 int rxq;
54 
55 void test__fail(void) { /* for network_helpers.c */ }
56 
57 static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
58 {
59 	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
60 	const struct xsk_socket_config socket_config = {
61 		.rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
62 		.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
63 		.bind_flags = XDP_COPY,
64 	};
65 	const struct xsk_umem_config umem_config = {
66 		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
67 		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
68 		.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
69 		.flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
70 	};
71 	__u32 idx = 0;
72 	u64 addr;
73 	int ret;
74 	int i;
75 
76 	xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
77 	if (xsk->umem_area == MAP_FAILED)
78 		return -ENOMEM;
79 
80 	ret = xsk_umem__create(&xsk->umem,
81 			       xsk->umem_area, UMEM_SIZE,
82 			       &xsk->fill,
83 			       &xsk->comp,
84 			       &umem_config);
85 	if (ret)
86 		return ret;
87 
88 	ret = xsk_socket__create(&xsk->socket, ifindex, queue_id,
89 				 xsk->umem,
90 				 &xsk->rx,
91 				 &xsk->tx,
92 				 &socket_config);
93 	if (ret)
94 		return ret;
95 
96 	/* First half of umem is for TX. This way address matches 1-to-1
97 	 * to the completion queue index.
98 	 */
99 
100 	for (i = 0; i < UMEM_NUM / 2; i++) {
101 		addr = i * UMEM_FRAME_SIZE;
102 		printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
103 	}
104 
105 	/* Second half of umem is for RX. */
106 
107 	ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx);
108 	for (i = 0; i < UMEM_NUM / 2; i++) {
109 		addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
110 		printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
111 		*xsk_ring_prod__fill_addr(&xsk->fill, i) = addr;
112 	}
113 	xsk_ring_prod__submit(&xsk->fill, ret);
114 
115 	return 0;
116 }
117 
118 static void close_xsk(struct xsk *xsk)
119 {
120 	if (xsk->umem)
121 		xsk_umem__delete(xsk->umem);
122 	if (xsk->socket)
123 		xsk_socket__delete(xsk->socket);
124 	munmap(xsk->umem_area, UMEM_SIZE);
125 }
126 
127 static void refill_rx(struct xsk *xsk, __u64 addr)
128 {
129 	__u32 idx;
130 
131 	if (xsk_ring_prod__reserve(&xsk->fill, 1, &idx) == 1) {
132 		printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr);
133 		*xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
134 		xsk_ring_prod__submit(&xsk->fill, 1);
135 	}
136 }
137 
138 #define NANOSEC_PER_SEC 1000000000 /* 10^9 */
139 static __u64 gettime(clockid_t clock_id)
140 {
141 	struct timespec t;
142 	int res;
143 
144 	/* See man clock_gettime(2) for type of clock_id's */
145 	res = clock_gettime(clock_id, &t);
146 
147 	if (res < 0)
148 		error(res, errno, "Error with clock_gettime()");
149 
150 	return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
151 }
152 
153 static void verify_xdp_metadata(void *data, clockid_t clock_id)
154 {
155 	struct xdp_meta *meta;
156 
157 	meta = data - sizeof(*meta);
158 
159 	if (meta->rx_hash_err < 0)
160 		printf("No rx_hash err=%d\n", meta->rx_hash_err);
161 	else
162 		printf("rx_hash: 0x%X with RSS type:0x%X\n",
163 		       meta->rx_hash, meta->rx_hash_type);
164 
165 	printf("rx_timestamp:  %llu (sec:%0.4f)\n", meta->rx_timestamp,
166 	       (double)meta->rx_timestamp / NANOSEC_PER_SEC);
167 	if (meta->rx_timestamp) {
168 		__u64 usr_clock = gettime(clock_id);
169 		__u64 xdp_clock = meta->xdp_timestamp;
170 		__s64 delta_X = xdp_clock - meta->rx_timestamp;
171 		__s64 delta_X2U = usr_clock - xdp_clock;
172 
173 		printf("XDP RX-time:   %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n",
174 		       xdp_clock, (double)xdp_clock / NANOSEC_PER_SEC,
175 		       (double)delta_X / NANOSEC_PER_SEC,
176 		       (double)delta_X / 1000);
177 
178 		printf("AF_XDP time:   %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n",
179 		       usr_clock, (double)usr_clock / NANOSEC_PER_SEC,
180 		       (double)delta_X2U / NANOSEC_PER_SEC,
181 		       (double)delta_X2U / 1000);
182 	}
183 
184 }
185 
186 static void verify_skb_metadata(int fd)
187 {
188 	char cmsg_buf[1024];
189 	char packet_buf[128];
190 
191 	struct scm_timestamping *ts;
192 	struct iovec packet_iov;
193 	struct cmsghdr *cmsg;
194 	struct msghdr hdr;
195 
196 	memset(&hdr, 0, sizeof(hdr));
197 	hdr.msg_iov = &packet_iov;
198 	hdr.msg_iovlen = 1;
199 	packet_iov.iov_base = packet_buf;
200 	packet_iov.iov_len = sizeof(packet_buf);
201 
202 	hdr.msg_control = cmsg_buf;
203 	hdr.msg_controllen = sizeof(cmsg_buf);
204 
205 	if (recvmsg(fd, &hdr, 0) < 0)
206 		error(1, errno, "recvmsg");
207 
208 	for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL;
209 	     cmsg = CMSG_NXTHDR(&hdr, cmsg)) {
210 
211 		if (cmsg->cmsg_level != SOL_SOCKET)
212 			continue;
213 
214 		switch (cmsg->cmsg_type) {
215 		case SCM_TIMESTAMPING:
216 			ts = (struct scm_timestamping *)CMSG_DATA(cmsg);
217 			if (ts->ts[2].tv_sec || ts->ts[2].tv_nsec) {
218 				printf("found skb hwtstamp = %lu.%lu\n",
219 				       ts->ts[2].tv_sec, ts->ts[2].tv_nsec);
220 				return;
221 			}
222 			break;
223 		default:
224 			break;
225 		}
226 	}
227 
228 	printf("skb hwtstamp is not found!\n");
229 }
230 
231 static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id)
232 {
233 	const struct xdp_desc *rx_desc;
234 	struct pollfd fds[rxq + 1];
235 	__u64 comp_addr;
236 	__u64 addr;
237 	__u32 idx;
238 	int ret;
239 	int i;
240 
241 	for (i = 0; i < rxq; i++) {
242 		fds[i].fd = xsk_socket__fd(rx_xsk[i].socket);
243 		fds[i].events = POLLIN;
244 		fds[i].revents = 0;
245 	}
246 
247 	fds[rxq].fd = server_fd;
248 	fds[rxq].events = POLLIN;
249 	fds[rxq].revents = 0;
250 
251 	while (true) {
252 		errno = 0;
253 		ret = poll(fds, rxq + 1, 1000);
254 		printf("poll: %d (%d) skip=%llu fail=%llu redir=%llu\n",
255 		       ret, errno, bpf_obj->bss->pkts_skip,
256 		       bpf_obj->bss->pkts_fail, bpf_obj->bss->pkts_redir);
257 		if (ret < 0)
258 			break;
259 		if (ret == 0)
260 			continue;
261 
262 		if (fds[rxq].revents)
263 			verify_skb_metadata(server_fd);
264 
265 		for (i = 0; i < rxq; i++) {
266 			if (fds[i].revents == 0)
267 				continue;
268 
269 			struct xsk *xsk = &rx_xsk[i];
270 
271 			ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx);
272 			printf("xsk_ring_cons__peek: %d\n", ret);
273 			if (ret != 1)
274 				continue;
275 
276 			rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
277 			comp_addr = xsk_umem__extract_addr(rx_desc->addr);
278 			addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
279 			printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
280 			       xsk, idx, rx_desc->addr, addr, comp_addr);
281 			verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr),
282 					    clock_id);
283 			xsk_ring_cons__release(&xsk->rx, 1);
284 			refill_rx(xsk, comp_addr);
285 		}
286 	}
287 
288 	return 0;
289 }
290 
291 static int rxq_num(const char *ifname)
292 {
293 	struct ethtool_channels ch = {
294 		.cmd = ETHTOOL_GCHANNELS,
295 	};
296 
297 	struct ifreq ifr = {
298 		.ifr_data = (void *)&ch,
299 	};
300 	strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
301 	int fd, ret;
302 
303 	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
304 	if (fd < 0)
305 		error(1, errno, "socket");
306 
307 	ret = ioctl(fd, SIOCETHTOOL, &ifr);
308 	if (ret < 0)
309 		error(1, errno, "ioctl(SIOCETHTOOL)");
310 
311 	close(fd);
312 
313 	return ch.rx_count + ch.combined_count;
314 }
315 
316 static void hwtstamp_ioctl(int op, const char *ifname, struct hwtstamp_config *cfg)
317 {
318 	struct ifreq ifr = {
319 		.ifr_data = (void *)cfg,
320 	};
321 	strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
322 	int fd, ret;
323 
324 	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
325 	if (fd < 0)
326 		error(1, errno, "socket");
327 
328 	ret = ioctl(fd, op, &ifr);
329 	if (ret < 0)
330 		error(1, errno, "ioctl(%d)", op);
331 
332 	close(fd);
333 }
334 
335 static struct hwtstamp_config saved_hwtstamp_cfg;
336 static const char *saved_hwtstamp_ifname;
337 
338 static void hwtstamp_restore(void)
339 {
340 	hwtstamp_ioctl(SIOCSHWTSTAMP, saved_hwtstamp_ifname, &saved_hwtstamp_cfg);
341 }
342 
343 static void hwtstamp_enable(const char *ifname)
344 {
345 	struct hwtstamp_config cfg = {
346 		.rx_filter = HWTSTAMP_FILTER_ALL,
347 	};
348 
349 	hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg);
350 	saved_hwtstamp_ifname = strdup(ifname);
351 	atexit(hwtstamp_restore);
352 
353 	hwtstamp_ioctl(SIOCSHWTSTAMP, ifname, &cfg);
354 }
355 
356 static void cleanup(void)
357 {
358 	LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
359 	int ret;
360 	int i;
361 
362 	if (bpf_obj) {
363 		opts.old_prog_fd = bpf_program__fd(bpf_obj->progs.rx);
364 		if (opts.old_prog_fd >= 0) {
365 			printf("detaching bpf program....\n");
366 			ret = bpf_xdp_detach(ifindex, XDP_FLAGS, &opts);
367 			if (ret)
368 				printf("failed to detach XDP program: %d\n", ret);
369 		}
370 	}
371 
372 	for (i = 0; i < rxq; i++)
373 		close_xsk(&rx_xsk[i]);
374 
375 	if (bpf_obj)
376 		xdp_hw_metadata__destroy(bpf_obj);
377 }
378 
379 static void handle_signal(int sig)
380 {
381 	/* interrupting poll() is all we need */
382 }
383 
384 static void timestamping_enable(int fd, int val)
385 {
386 	int ret;
387 
388 	ret = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val));
389 	if (ret < 0)
390 		error(1, errno, "setsockopt(SO_TIMESTAMPING)");
391 }
392 
393 int main(int argc, char *argv[])
394 {
395 	clockid_t clock_id = CLOCK_TAI;
396 	int server_fd = -1;
397 	int ret;
398 	int i;
399 
400 	struct bpf_program *prog;
401 
402 	if (argc != 2) {
403 		fprintf(stderr, "pass device name\n");
404 		return -1;
405 	}
406 
407 	ifname = argv[1];
408 	ifindex = if_nametoindex(ifname);
409 	rxq = rxq_num(ifname);
410 
411 	printf("rxq: %d\n", rxq);
412 
413 	hwtstamp_enable(ifname);
414 
415 	rx_xsk = malloc(sizeof(struct xsk) * rxq);
416 	if (!rx_xsk)
417 		error(1, ENOMEM, "malloc");
418 
419 	for (i = 0; i < rxq; i++) {
420 		printf("open_xsk(%s, %p, %d)\n", ifname, &rx_xsk[i], i);
421 		ret = open_xsk(ifindex, &rx_xsk[i], i);
422 		if (ret)
423 			error(1, -ret, "open_xsk");
424 
425 		printf("xsk_socket__fd() -> %d\n", xsk_socket__fd(rx_xsk[i].socket));
426 	}
427 
428 	printf("open bpf program...\n");
429 	bpf_obj = xdp_hw_metadata__open();
430 	if (libbpf_get_error(bpf_obj))
431 		error(1, libbpf_get_error(bpf_obj), "xdp_hw_metadata__open");
432 
433 	prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx");
434 	bpf_program__set_ifindex(prog, ifindex);
435 	bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
436 
437 	printf("load bpf program...\n");
438 	ret = xdp_hw_metadata__load(bpf_obj);
439 	if (ret)
440 		error(1, -ret, "xdp_hw_metadata__load");
441 
442 	printf("prepare skb endpoint...\n");
443 	server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 9092, 1000);
444 	if (server_fd < 0)
445 		error(1, errno, "start_server");
446 	timestamping_enable(server_fd,
447 			    SOF_TIMESTAMPING_SOFTWARE |
448 			    SOF_TIMESTAMPING_RAW_HARDWARE);
449 
450 	printf("prepare xsk map...\n");
451 	for (i = 0; i < rxq; i++) {
452 		int sock_fd = xsk_socket__fd(rx_xsk[i].socket);
453 		__u32 queue_id = i;
454 
455 		printf("map[%d] = %d\n", queue_id, sock_fd);
456 		ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0);
457 		if (ret)
458 			error(1, -ret, "bpf_map_update_elem");
459 	}
460 
461 	printf("attach bpf program...\n");
462 	ret = bpf_xdp_attach(ifindex,
463 			     bpf_program__fd(bpf_obj->progs.rx),
464 			     XDP_FLAGS, NULL);
465 	if (ret)
466 		error(1, -ret, "bpf_xdp_attach");
467 
468 	signal(SIGINT, handle_signal);
469 	ret = verify_metadata(rx_xsk, rxq, server_fd, clock_id);
470 	close(server_fd);
471 	cleanup();
472 	if (ret)
473 		error(1, -ret, "verify_metadata");
474 }
475