1 // SPDX-License-Identifier: GPL-2.0
2
3 /* Reference program for verifying XDP metadata on real HW. Functional test
4 * only, doesn't test the performance.
5 *
6 * RX:
7 * - UDP 9091 packets are diverted into AF_XDP
8 * - Metadata verified:
9 * - rx_timestamp
10 * - rx_hash
11 *
12 * TX:
13 * - TBD
14 */
15
16 #include <test_progs.h>
17 #include <network_helpers.h>
18 #include "xdp_hw_metadata.skel.h"
19 #include "xsk.h"
20
21 #include <error.h>
22 #include <linux/errqueue.h>
23 #include <linux/if_link.h>
24 #include <linux/net_tstamp.h>
25 #include <linux/udp.h>
26 #include <linux/sockios.h>
27 #include <sys/mman.h>
28 #include <net/if.h>
29 #include <poll.h>
30 #include <time.h>
31
32 #include "xdp_metadata.h"
33
34 #define UMEM_NUM 16
35 #define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
36 #define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM)
37 #define XDP_FLAGS (XDP_FLAGS_DRV_MODE | XDP_FLAGS_REPLACE)
38
39 struct xsk {
40 void *umem_area;
41 struct xsk_umem *umem;
42 struct xsk_ring_prod fill;
43 struct xsk_ring_cons comp;
44 struct xsk_ring_prod tx;
45 struct xsk_ring_cons rx;
46 struct xsk_socket *socket;
47 };
48
49 struct xdp_hw_metadata *bpf_obj;
50 struct xsk *rx_xsk;
51 const char *ifname;
52 int ifindex;
53 int rxq;
54
test__fail(void)55 void test__fail(void) { /* for network_helpers.c */ }
56
open_xsk(int ifindex,struct xsk * xsk,__u32 queue_id)57 static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
58 {
59 int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
60 const struct xsk_socket_config socket_config = {
61 .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
62 .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
63 .bind_flags = XDP_COPY,
64 };
65 const struct xsk_umem_config umem_config = {
66 .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
67 .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
68 .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
69 .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
70 };
71 __u32 idx = 0;
72 u64 addr;
73 int ret;
74 int i;
75
76 xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
77 if (xsk->umem_area == MAP_FAILED)
78 return -ENOMEM;
79
80 ret = xsk_umem__create(&xsk->umem,
81 xsk->umem_area, UMEM_SIZE,
82 &xsk->fill,
83 &xsk->comp,
84 &umem_config);
85 if (ret)
86 return ret;
87
88 ret = xsk_socket__create(&xsk->socket, ifindex, queue_id,
89 xsk->umem,
90 &xsk->rx,
91 &xsk->tx,
92 &socket_config);
93 if (ret)
94 return ret;
95
96 /* First half of umem is for TX. This way address matches 1-to-1
97 * to the completion queue index.
98 */
99
100 for (i = 0; i < UMEM_NUM / 2; i++) {
101 addr = i * UMEM_FRAME_SIZE;
102 printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
103 }
104
105 /* Second half of umem is for RX. */
106
107 ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx);
108 for (i = 0; i < UMEM_NUM / 2; i++) {
109 addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
110 printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
111 *xsk_ring_prod__fill_addr(&xsk->fill, i) = addr;
112 }
113 xsk_ring_prod__submit(&xsk->fill, ret);
114
115 return 0;
116 }
117
close_xsk(struct xsk * xsk)118 static void close_xsk(struct xsk *xsk)
119 {
120 if (xsk->umem)
121 xsk_umem__delete(xsk->umem);
122 if (xsk->socket)
123 xsk_socket__delete(xsk->socket);
124 munmap(xsk->umem_area, UMEM_SIZE);
125 }
126
refill_rx(struct xsk * xsk,__u64 addr)127 static void refill_rx(struct xsk *xsk, __u64 addr)
128 {
129 __u32 idx;
130
131 if (xsk_ring_prod__reserve(&xsk->fill, 1, &idx) == 1) {
132 printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr);
133 *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
134 xsk_ring_prod__submit(&xsk->fill, 1);
135 }
136 }
137
138 #define NANOSEC_PER_SEC 1000000000 /* 10^9 */
gettime(clockid_t clock_id)139 static __u64 gettime(clockid_t clock_id)
140 {
141 struct timespec t;
142 int res;
143
144 /* See man clock_gettime(2) for type of clock_id's */
145 res = clock_gettime(clock_id, &t);
146
147 if (res < 0)
148 error(res, errno, "Error with clock_gettime()");
149
150 return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
151 }
152
verify_xdp_metadata(void * data,clockid_t clock_id)153 static void verify_xdp_metadata(void *data, clockid_t clock_id)
154 {
155 struct xdp_meta *meta;
156
157 meta = data - sizeof(*meta);
158
159 if (meta->rx_hash_err < 0)
160 printf("No rx_hash err=%d\n", meta->rx_hash_err);
161 else
162 printf("rx_hash: 0x%X with RSS type:0x%X\n",
163 meta->rx_hash, meta->rx_hash_type);
164
165 printf("rx_timestamp: %llu (sec:%0.4f)\n", meta->rx_timestamp,
166 (double)meta->rx_timestamp / NANOSEC_PER_SEC);
167 if (meta->rx_timestamp) {
168 __u64 usr_clock = gettime(clock_id);
169 __u64 xdp_clock = meta->xdp_timestamp;
170 __s64 delta_X = xdp_clock - meta->rx_timestamp;
171 __s64 delta_X2U = usr_clock - xdp_clock;
172
173 printf("XDP RX-time: %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n",
174 xdp_clock, (double)xdp_clock / NANOSEC_PER_SEC,
175 (double)delta_X / NANOSEC_PER_SEC,
176 (double)delta_X / 1000);
177
178 printf("AF_XDP time: %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n",
179 usr_clock, (double)usr_clock / NANOSEC_PER_SEC,
180 (double)delta_X2U / NANOSEC_PER_SEC,
181 (double)delta_X2U / 1000);
182 }
183
184 }
185
verify_skb_metadata(int fd)186 static void verify_skb_metadata(int fd)
187 {
188 char cmsg_buf[1024];
189 char packet_buf[128];
190
191 struct scm_timestamping *ts;
192 struct iovec packet_iov;
193 struct cmsghdr *cmsg;
194 struct msghdr hdr;
195
196 memset(&hdr, 0, sizeof(hdr));
197 hdr.msg_iov = &packet_iov;
198 hdr.msg_iovlen = 1;
199 packet_iov.iov_base = packet_buf;
200 packet_iov.iov_len = sizeof(packet_buf);
201
202 hdr.msg_control = cmsg_buf;
203 hdr.msg_controllen = sizeof(cmsg_buf);
204
205 if (recvmsg(fd, &hdr, 0) < 0)
206 error(1, errno, "recvmsg");
207
208 for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL;
209 cmsg = CMSG_NXTHDR(&hdr, cmsg)) {
210
211 if (cmsg->cmsg_level != SOL_SOCKET)
212 continue;
213
214 switch (cmsg->cmsg_type) {
215 case SCM_TIMESTAMPING:
216 ts = (struct scm_timestamping *)CMSG_DATA(cmsg);
217 if (ts->ts[2].tv_sec || ts->ts[2].tv_nsec) {
218 printf("found skb hwtstamp = %lu.%lu\n",
219 ts->ts[2].tv_sec, ts->ts[2].tv_nsec);
220 return;
221 }
222 break;
223 default:
224 break;
225 }
226 }
227
228 printf("skb hwtstamp is not found!\n");
229 }
230
verify_metadata(struct xsk * rx_xsk,int rxq,int server_fd,clockid_t clock_id)231 static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id)
232 {
233 const struct xdp_desc *rx_desc;
234 struct pollfd fds[rxq + 1];
235 __u64 comp_addr;
236 __u64 addr;
237 __u32 idx;
238 int ret;
239 int i;
240
241 for (i = 0; i < rxq; i++) {
242 fds[i].fd = xsk_socket__fd(rx_xsk[i].socket);
243 fds[i].events = POLLIN;
244 fds[i].revents = 0;
245 }
246
247 fds[rxq].fd = server_fd;
248 fds[rxq].events = POLLIN;
249 fds[rxq].revents = 0;
250
251 while (true) {
252 errno = 0;
253 ret = poll(fds, rxq + 1, 1000);
254 printf("poll: %d (%d) skip=%llu fail=%llu redir=%llu\n",
255 ret, errno, bpf_obj->bss->pkts_skip,
256 bpf_obj->bss->pkts_fail, bpf_obj->bss->pkts_redir);
257 if (ret < 0)
258 break;
259 if (ret == 0)
260 continue;
261
262 if (fds[rxq].revents)
263 verify_skb_metadata(server_fd);
264
265 for (i = 0; i < rxq; i++) {
266 if (fds[i].revents == 0)
267 continue;
268
269 struct xsk *xsk = &rx_xsk[i];
270
271 ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx);
272 printf("xsk_ring_cons__peek: %d\n", ret);
273 if (ret != 1)
274 continue;
275
276 rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
277 comp_addr = xsk_umem__extract_addr(rx_desc->addr);
278 addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
279 printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
280 xsk, idx, rx_desc->addr, addr, comp_addr);
281 verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr),
282 clock_id);
283 xsk_ring_cons__release(&xsk->rx, 1);
284 refill_rx(xsk, comp_addr);
285 }
286 }
287
288 return 0;
289 }
290
291 struct ethtool_channels {
292 __u32 cmd;
293 __u32 max_rx;
294 __u32 max_tx;
295 __u32 max_other;
296 __u32 max_combined;
297 __u32 rx_count;
298 __u32 tx_count;
299 __u32 other_count;
300 __u32 combined_count;
301 };
302
303 #define ETHTOOL_GCHANNELS 0x0000003c /* Get no of channels */
304
rxq_num(const char * ifname)305 static int rxq_num(const char *ifname)
306 {
307 struct ethtool_channels ch = {
308 .cmd = ETHTOOL_GCHANNELS,
309 };
310
311 struct ifreq ifr = {
312 .ifr_data = (void *)&ch,
313 };
314 strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
315 int fd, ret;
316
317 fd = socket(AF_UNIX, SOCK_DGRAM, 0);
318 if (fd < 0)
319 error(1, errno, "socket");
320
321 ret = ioctl(fd, SIOCETHTOOL, &ifr);
322 if (ret < 0)
323 error(1, errno, "ioctl(SIOCETHTOOL)");
324
325 close(fd);
326
327 return ch.rx_count + ch.combined_count;
328 }
329
hwtstamp_ioctl(int op,const char * ifname,struct hwtstamp_config * cfg)330 static void hwtstamp_ioctl(int op, const char *ifname, struct hwtstamp_config *cfg)
331 {
332 struct ifreq ifr = {
333 .ifr_data = (void *)cfg,
334 };
335 strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
336 int fd, ret;
337
338 fd = socket(AF_UNIX, SOCK_DGRAM, 0);
339 if (fd < 0)
340 error(1, errno, "socket");
341
342 ret = ioctl(fd, op, &ifr);
343 if (ret < 0)
344 error(1, errno, "ioctl(%d)", op);
345
346 close(fd);
347 }
348
349 static struct hwtstamp_config saved_hwtstamp_cfg;
350 static const char *saved_hwtstamp_ifname;
351
hwtstamp_restore(void)352 static void hwtstamp_restore(void)
353 {
354 hwtstamp_ioctl(SIOCSHWTSTAMP, saved_hwtstamp_ifname, &saved_hwtstamp_cfg);
355 }
356
hwtstamp_enable(const char * ifname)357 static void hwtstamp_enable(const char *ifname)
358 {
359 struct hwtstamp_config cfg = {
360 .rx_filter = HWTSTAMP_FILTER_ALL,
361 };
362
363 hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg);
364 saved_hwtstamp_ifname = strdup(ifname);
365 atexit(hwtstamp_restore);
366
367 hwtstamp_ioctl(SIOCSHWTSTAMP, ifname, &cfg);
368 }
369
cleanup(void)370 static void cleanup(void)
371 {
372 LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
373 int ret;
374 int i;
375
376 if (bpf_obj) {
377 opts.old_prog_fd = bpf_program__fd(bpf_obj->progs.rx);
378 if (opts.old_prog_fd >= 0) {
379 printf("detaching bpf program....\n");
380 ret = bpf_xdp_detach(ifindex, XDP_FLAGS, &opts);
381 if (ret)
382 printf("failed to detach XDP program: %d\n", ret);
383 }
384 }
385
386 for (i = 0; i < rxq; i++)
387 close_xsk(&rx_xsk[i]);
388
389 if (bpf_obj)
390 xdp_hw_metadata__destroy(bpf_obj);
391 }
392
handle_signal(int sig)393 static void handle_signal(int sig)
394 {
395 /* interrupting poll() is all we need */
396 }
397
timestamping_enable(int fd,int val)398 static void timestamping_enable(int fd, int val)
399 {
400 int ret;
401
402 ret = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val));
403 if (ret < 0)
404 error(1, errno, "setsockopt(SO_TIMESTAMPING)");
405 }
406
main(int argc,char * argv[])407 int main(int argc, char *argv[])
408 {
409 clockid_t clock_id = CLOCK_TAI;
410 int server_fd = -1;
411 int ret;
412 int i;
413
414 struct bpf_program *prog;
415
416 if (argc != 2) {
417 fprintf(stderr, "pass device name\n");
418 return -1;
419 }
420
421 ifname = argv[1];
422 ifindex = if_nametoindex(ifname);
423 rxq = rxq_num(ifname);
424
425 printf("rxq: %d\n", rxq);
426
427 hwtstamp_enable(ifname);
428
429 rx_xsk = malloc(sizeof(struct xsk) * rxq);
430 if (!rx_xsk)
431 error(1, ENOMEM, "malloc");
432
433 for (i = 0; i < rxq; i++) {
434 printf("open_xsk(%s, %p, %d)\n", ifname, &rx_xsk[i], i);
435 ret = open_xsk(ifindex, &rx_xsk[i], i);
436 if (ret)
437 error(1, -ret, "open_xsk");
438
439 printf("xsk_socket__fd() -> %d\n", xsk_socket__fd(rx_xsk[i].socket));
440 }
441
442 printf("open bpf program...\n");
443 bpf_obj = xdp_hw_metadata__open();
444 if (libbpf_get_error(bpf_obj))
445 error(1, libbpf_get_error(bpf_obj), "xdp_hw_metadata__open");
446
447 prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx");
448 bpf_program__set_ifindex(prog, ifindex);
449 bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
450
451 printf("load bpf program...\n");
452 ret = xdp_hw_metadata__load(bpf_obj);
453 if (ret)
454 error(1, -ret, "xdp_hw_metadata__load");
455
456 printf("prepare skb endpoint...\n");
457 server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 9092, 1000);
458 if (server_fd < 0)
459 error(1, errno, "start_server");
460 timestamping_enable(server_fd,
461 SOF_TIMESTAMPING_SOFTWARE |
462 SOF_TIMESTAMPING_RAW_HARDWARE);
463
464 printf("prepare xsk map...\n");
465 for (i = 0; i < rxq; i++) {
466 int sock_fd = xsk_socket__fd(rx_xsk[i].socket);
467 __u32 queue_id = i;
468
469 printf("map[%d] = %d\n", queue_id, sock_fd);
470 ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0);
471 if (ret)
472 error(1, -ret, "bpf_map_update_elem");
473 }
474
475 printf("attach bpf program...\n");
476 ret = bpf_xdp_attach(ifindex,
477 bpf_program__fd(bpf_obj->progs.rx),
478 XDP_FLAGS, NULL);
479 if (ret)
480 error(1, -ret, "bpf_xdp_attach");
481
482 signal(SIGINT, handle_signal);
483 ret = verify_metadata(rx_xsk, rxq, server_fd, clock_id);
484 close(server_fd);
485 cleanup();
486 if (ret)
487 error(1, -ret, "verify_metadata");
488 }
489