1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 
3 /*
4  * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
5  * between src and dst. The netns fwd has veth links to each src and dst. The
6  * client is in src and server in dst. The test installs a TC BPF program to each
7  * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
8  * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
9  * switch from ingress side; it also installs a checker prog on the egress side
10  * to drop unexpected traffic.
11  */
12 
13 #include <arpa/inet.h>
14 #include <linux/if.h>
15 #include <linux/if_tun.h>
16 #include <linux/limits.h>
17 #include <linux/sysctl.h>
18 #include <linux/time_types.h>
19 #include <linux/net_tstamp.h>
20 #include <stdbool.h>
21 #include <stdio.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include "test_progs.h"
26 #include "network_helpers.h"
27 #include "test_tc_neigh_fib.skel.h"
28 #include "test_tc_neigh.skel.h"
29 #include "test_tc_peer.skel.h"
30 #include "test_tc_dtime.skel.h"
31 
32 #ifndef TCP_TX_DELAY
33 #define TCP_TX_DELAY 37
34 #endif
35 
36 #define NS_SRC "ns_src"
37 #define NS_FWD "ns_fwd"
38 #define NS_DST "ns_dst"
39 
40 #define IP4_SRC "172.16.1.100"
41 #define IP4_DST "172.16.2.100"
42 #define IP4_TUN_SRC "172.17.1.100"
43 #define IP4_TUN_FWD "172.17.1.200"
44 #define IP4_PORT 9004
45 
46 #define IP6_SRC "0::1:dead:beef:cafe"
47 #define IP6_DST "0::2:dead:beef:cafe"
48 #define IP6_TUN_SRC "1::1:dead:beef:cafe"
49 #define IP6_TUN_FWD "1::2:dead:beef:cafe"
50 #define IP6_PORT 9006
51 
52 #define IP4_SLL "169.254.0.1"
53 #define IP4_DLL "169.254.0.2"
54 #define IP4_NET "169.254.0.0"
55 
56 #define MAC_DST_FWD "00:11:22:33:44:55"
57 #define MAC_DST "00:22:33:44:55:66"
58 
59 #define IFADDR_STR_LEN 18
60 #define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
61 
62 #define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src"
63 #define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst"
64 #define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk"
65 
66 #define TIMEOUT_MILLIS 10000
67 #define NSEC_PER_SEC 1000000000ULL
68 
69 #define log_err(MSG, ...) \
70 	fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
71 		__FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
72 
73 static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
74 
75 static int write_file(const char *path, const char *newval)
76 {
77 	FILE *f;
78 
79 	f = fopen(path, "r+");
80 	if (!f)
81 		return -1;
82 	if (fwrite(newval, strlen(newval), 1, f) != 1) {
83 		log_err("writing to %s failed", path);
84 		fclose(f);
85 		return -1;
86 	}
87 	fclose(f);
88 	return 0;
89 }
90 
91 static int netns_setup_namespaces(const char *verb)
92 {
93 	const char * const *ns = namespaces;
94 	char cmd[128];
95 
96 	while (*ns) {
97 		snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns);
98 		if (!ASSERT_OK(system(cmd), cmd))
99 			return -1;
100 		ns++;
101 	}
102 	return 0;
103 }
104 
105 static void netns_setup_namespaces_nofail(const char *verb)
106 {
107 	const char * const *ns = namespaces;
108 	char cmd[128];
109 
110 	while (*ns) {
111 		snprintf(cmd, sizeof(cmd), "ip netns %s %s > /dev/null 2>&1", verb, *ns);
112 		system(cmd);
113 		ns++;
114 	}
115 }
116 
117 struct netns_setup_result {
118 	int ifindex_veth_src_fwd;
119 	int ifindex_veth_dst_fwd;
120 };
121 
122 static int get_ifaddr(const char *name, char *ifaddr)
123 {
124 	char path[PATH_MAX];
125 	FILE *f;
126 	int ret;
127 
128 	snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name);
129 	f = fopen(path, "r");
130 	if (!ASSERT_OK_PTR(f, path))
131 		return -1;
132 
133 	ret = fread(ifaddr, 1, IFADDR_STR_LEN, f);
134 	if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) {
135 		fclose(f);
136 		return -1;
137 	}
138 	fclose(f);
139 	return 0;
140 }
141 
142 static int get_ifindex(const char *name)
143 {
144 	char path[PATH_MAX];
145 	char buf[32];
146 	FILE *f;
147 	int ret;
148 
149 	snprintf(path, PATH_MAX, "/sys/class/net/%s/ifindex", name);
150 	f = fopen(path, "r");
151 	if (!ASSERT_OK_PTR(f, path))
152 		return -1;
153 
154 	ret = fread(buf, 1, sizeof(buf), f);
155 	if (!ASSERT_GT(ret, 0, "fread ifindex")) {
156 		fclose(f);
157 		return -1;
158 	}
159 	fclose(f);
160 	return atoi(buf);
161 }
162 
163 #define SYS(fmt, ...)						\
164 	({							\
165 		char cmd[1024];					\
166 		snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__);	\
167 		if (!ASSERT_OK(system(cmd), cmd))		\
168 			goto fail;				\
169 	})
170 
171 static int netns_setup_links_and_routes(struct netns_setup_result *result)
172 {
173 	struct nstoken *nstoken = NULL;
174 	char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
175 
176 	SYS("ip link add veth_src type veth peer name veth_src_fwd");
177 	SYS("ip link add veth_dst type veth peer name veth_dst_fwd");
178 
179 	SYS("ip link set veth_dst_fwd address " MAC_DST_FWD);
180 	SYS("ip link set veth_dst address " MAC_DST);
181 
182 	if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
183 		goto fail;
184 
185 	result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd");
186 	if (result->ifindex_veth_src_fwd < 0)
187 		goto fail;
188 	result->ifindex_veth_dst_fwd = get_ifindex("veth_dst_fwd");
189 	if (result->ifindex_veth_dst_fwd < 0)
190 		goto fail;
191 
192 	SYS("ip link set veth_src netns " NS_SRC);
193 	SYS("ip link set veth_src_fwd netns " NS_FWD);
194 	SYS("ip link set veth_dst_fwd netns " NS_FWD);
195 	SYS("ip link set veth_dst netns " NS_DST);
196 
197 	/** setup in 'src' namespace */
198 	nstoken = open_netns(NS_SRC);
199 	if (!ASSERT_OK_PTR(nstoken, "setns src"))
200 		goto fail;
201 
202 	SYS("ip addr add " IP4_SRC "/32 dev veth_src");
203 	SYS("ip addr add " IP6_SRC "/128 dev veth_src nodad");
204 	SYS("ip link set dev veth_src up");
205 
206 	SYS("ip route add " IP4_DST "/32 dev veth_src scope global");
207 	SYS("ip route add " IP4_NET "/16 dev veth_src scope global");
208 	SYS("ip route add " IP6_DST "/128 dev veth_src scope global");
209 
210 	SYS("ip neigh add " IP4_DST " dev veth_src lladdr %s",
211 	    veth_src_fwd_addr);
212 	SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s",
213 	    veth_src_fwd_addr);
214 
215 	close_netns(nstoken);
216 
217 	/** setup in 'fwd' namespace */
218 	nstoken = open_netns(NS_FWD);
219 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
220 		goto fail;
221 
222 	/* The fwd netns automatically gets a v6 LL address / routes, but also
223 	 * needs v4 one in order to start ARP probing. IP4_NET route is added
224 	 * to the endpoints so that the ARP processing will reply.
225 	 */
226 	SYS("ip addr add " IP4_SLL "/32 dev veth_src_fwd");
227 	SYS("ip addr add " IP4_DLL "/32 dev veth_dst_fwd");
228 	SYS("ip link set dev veth_src_fwd up");
229 	SYS("ip link set dev veth_dst_fwd up");
230 
231 	SYS("ip route add " IP4_SRC "/32 dev veth_src_fwd scope global");
232 	SYS("ip route add " IP6_SRC "/128 dev veth_src_fwd scope global");
233 	SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
234 	SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
235 
236 	close_netns(nstoken);
237 
238 	/** setup in 'dst' namespace */
239 	nstoken = open_netns(NS_DST);
240 	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
241 		goto fail;
242 
243 	SYS("ip addr add " IP4_DST "/32 dev veth_dst");
244 	SYS("ip addr add " IP6_DST "/128 dev veth_dst nodad");
245 	SYS("ip link set dev veth_dst up");
246 
247 	SYS("ip route add " IP4_SRC "/32 dev veth_dst scope global");
248 	SYS("ip route add " IP4_NET "/16 dev veth_dst scope global");
249 	SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global");
250 
251 	SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
252 	SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
253 
254 	close_netns(nstoken);
255 
256 	return 0;
257 fail:
258 	if (nstoken)
259 		close_netns(nstoken);
260 	return -1;
261 }
262 
263 static int netns_load_bpf(void)
264 {
265 	SYS("tc qdisc add dev veth_src_fwd clsact");
266 	SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned "
267 	    SRC_PROG_PIN_FILE);
268 	SYS("tc filter add dev veth_src_fwd egress bpf da object-pinned "
269 	    CHK_PROG_PIN_FILE);
270 
271 	SYS("tc qdisc add dev veth_dst_fwd clsact");
272 	SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
273 	    DST_PROG_PIN_FILE);
274 	SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
275 	    CHK_PROG_PIN_FILE);
276 
277 	return 0;
278 fail:
279 	return -1;
280 }
281 
282 static void test_tcp(int family, const char *addr, __u16 port)
283 {
284 	int listen_fd = -1, accept_fd = -1, client_fd = -1;
285 	char buf[] = "testing testing";
286 	int n;
287 	struct nstoken *nstoken;
288 
289 	nstoken = open_netns(NS_DST);
290 	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
291 		return;
292 
293 	listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
294 	if (!ASSERT_GE(listen_fd, 0, "listen"))
295 		goto done;
296 
297 	close_netns(nstoken);
298 	nstoken = open_netns(NS_SRC);
299 	if (!ASSERT_OK_PTR(nstoken, "setns src"))
300 		goto done;
301 
302 	client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
303 	if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
304 		goto done;
305 
306 	accept_fd = accept(listen_fd, NULL, NULL);
307 	if (!ASSERT_GE(accept_fd, 0, "accept"))
308 		goto done;
309 
310 	if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo"))
311 		goto done;
312 
313 	n = write(client_fd, buf, sizeof(buf));
314 	if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
315 		goto done;
316 
317 	n = read(accept_fd, buf, sizeof(buf));
318 	ASSERT_EQ(n, sizeof(buf), "recv from server");
319 
320 done:
321 	if (nstoken)
322 		close_netns(nstoken);
323 	if (listen_fd >= 0)
324 		close(listen_fd);
325 	if (accept_fd >= 0)
326 		close(accept_fd);
327 	if (client_fd >= 0)
328 		close(client_fd);
329 }
330 
331 static int test_ping(int family, const char *addr)
332 {
333 	SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
334 	return 0;
335 fail:
336 	return -1;
337 }
338 
339 static void test_connectivity(void)
340 {
341 	test_tcp(AF_INET, IP4_DST, IP4_PORT);
342 	test_ping(AF_INET, IP4_DST);
343 	test_tcp(AF_INET6, IP6_DST, IP6_PORT);
344 	test_ping(AF_INET6, IP6_DST);
345 }
346 
347 static int set_forwarding(bool enable)
348 {
349 	int err;
350 
351 	err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
352 	if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
353 		return err;
354 
355 	err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
356 	if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
357 		return err;
358 
359 	return 0;
360 }
361 
362 static void rcv_tstamp(int fd, const char *expected, size_t s)
363 {
364 	struct __kernel_timespec pkt_ts = {};
365 	char ctl[CMSG_SPACE(sizeof(pkt_ts))];
366 	struct timespec now_ts;
367 	struct msghdr msg = {};
368 	__u64 now_ns, pkt_ns;
369 	struct cmsghdr *cmsg;
370 	struct iovec iov;
371 	char data[32];
372 	int ret;
373 
374 	iov.iov_base = data;
375 	iov.iov_len = sizeof(data);
376 	msg.msg_iov = &iov;
377 	msg.msg_iovlen = 1;
378 	msg.msg_control = &ctl;
379 	msg.msg_controllen = sizeof(ctl);
380 
381 	ret = recvmsg(fd, &msg, 0);
382 	if (!ASSERT_EQ(ret, s, "recvmsg"))
383 		return;
384 	ASSERT_STRNEQ(data, expected, s, "expected rcv data");
385 
386 	cmsg = CMSG_FIRSTHDR(&msg);
387 	if (cmsg && cmsg->cmsg_level == SOL_SOCKET &&
388 	    cmsg->cmsg_type == SO_TIMESTAMPNS_NEW)
389 		memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts));
390 
391 	pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec;
392 	ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp");
393 
394 	ret = clock_gettime(CLOCK_REALTIME, &now_ts);
395 	ASSERT_OK(ret, "clock_gettime");
396 	now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
397 
398 	if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp"))
399 		ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC,
400 			  "check rcv tstamp");
401 }
402 
403 static void snd_tstamp(int fd, char *b, size_t s)
404 {
405 	struct sock_txtime opt = { .clockid = CLOCK_TAI };
406 	char ctl[CMSG_SPACE(sizeof(__u64))];
407 	struct timespec now_ts;
408 	struct msghdr msg = {};
409 	struct cmsghdr *cmsg;
410 	struct iovec iov;
411 	__u64 now_ns;
412 	int ret;
413 
414 	ret = clock_gettime(CLOCK_TAI, &now_ts);
415 	ASSERT_OK(ret, "clock_get_time(CLOCK_TAI)");
416 	now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
417 
418 	iov.iov_base = b;
419 	iov.iov_len = s;
420 	msg.msg_iov = &iov;
421 	msg.msg_iovlen = 1;
422 	msg.msg_control = &ctl;
423 	msg.msg_controllen = sizeof(ctl);
424 
425 	cmsg = CMSG_FIRSTHDR(&msg);
426 	cmsg->cmsg_level = SOL_SOCKET;
427 	cmsg->cmsg_type = SCM_TXTIME;
428 	cmsg->cmsg_len = CMSG_LEN(sizeof(now_ns));
429 	*(__u64 *)CMSG_DATA(cmsg) = now_ns;
430 
431 	ret = setsockopt(fd, SOL_SOCKET, SO_TXTIME, &opt, sizeof(opt));
432 	ASSERT_OK(ret, "setsockopt(SO_TXTIME)");
433 
434 	ret = sendmsg(fd, &msg, 0);
435 	ASSERT_EQ(ret, s, "sendmsg");
436 }
437 
438 static void test_inet_dtime(int family, int type, const char *addr, __u16 port)
439 {
440 	int opt = 1, accept_fd = -1, client_fd = -1, listen_fd, err;
441 	char buf[] = "testing testing";
442 	struct nstoken *nstoken;
443 
444 	nstoken = open_netns(NS_DST);
445 	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
446 		return;
447 	listen_fd = start_server(family, type, addr, port, 0);
448 	close_netns(nstoken);
449 
450 	if (!ASSERT_GE(listen_fd, 0, "listen"))
451 		return;
452 
453 	/* Ensure the kernel puts the (rcv) timestamp for all skb */
454 	err = setsockopt(listen_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
455 			 &opt, sizeof(opt));
456 	if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)"))
457 		goto done;
458 
459 	if (type == SOCK_STREAM) {
460 		/* Ensure the kernel set EDT when sending out rst/ack
461 		 * from the kernel's ctl_sk.
462 		 */
463 		err = setsockopt(listen_fd, SOL_TCP, TCP_TX_DELAY, &opt,
464 				 sizeof(opt));
465 		if (!ASSERT_OK(err, "setsockopt(TCP_TX_DELAY)"))
466 			goto done;
467 	}
468 
469 	nstoken = open_netns(NS_SRC);
470 	if (!ASSERT_OK_PTR(nstoken, "setns src"))
471 		goto done;
472 	client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
473 	close_netns(nstoken);
474 
475 	if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
476 		goto done;
477 
478 	if (type == SOCK_STREAM) {
479 		int n;
480 
481 		accept_fd = accept(listen_fd, NULL, NULL);
482 		if (!ASSERT_GE(accept_fd, 0, "accept"))
483 			goto done;
484 
485 		n = write(client_fd, buf, sizeof(buf));
486 		if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
487 			goto done;
488 		rcv_tstamp(accept_fd, buf, sizeof(buf));
489 	} else {
490 		snd_tstamp(client_fd, buf, sizeof(buf));
491 		rcv_tstamp(listen_fd, buf, sizeof(buf));
492 	}
493 
494 done:
495 	close(listen_fd);
496 	if (accept_fd != -1)
497 		close(accept_fd);
498 	if (client_fd != -1)
499 		close(client_fd);
500 }
501 
502 static int netns_load_dtime_bpf(struct test_tc_dtime *skel)
503 {
504 	struct nstoken *nstoken;
505 
506 #define PIN_FNAME(__file) "/sys/fs/bpf/" #__file
507 #define PIN(__prog) ({							\
508 		int err = bpf_program__pin(skel->progs.__prog, PIN_FNAME(__prog)); \
509 		if (!ASSERT_OK(err, "pin " #__prog))		\
510 			goto fail;					\
511 		})
512 
513 	/* setup ns_src tc progs */
514 	nstoken = open_netns(NS_SRC);
515 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
516 		return -1;
517 	PIN(egress_host);
518 	PIN(ingress_host);
519 	SYS("tc qdisc add dev veth_src clsact");
520 	SYS("tc filter add dev veth_src ingress bpf da object-pinned "
521 	    PIN_FNAME(ingress_host));
522 	SYS("tc filter add dev veth_src egress bpf da object-pinned "
523 	    PIN_FNAME(egress_host));
524 	close_netns(nstoken);
525 
526 	/* setup ns_dst tc progs */
527 	nstoken = open_netns(NS_DST);
528 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
529 		return -1;
530 	PIN(egress_host);
531 	PIN(ingress_host);
532 	SYS("tc qdisc add dev veth_dst clsact");
533 	SYS("tc filter add dev veth_dst ingress bpf da object-pinned "
534 	    PIN_FNAME(ingress_host));
535 	SYS("tc filter add dev veth_dst egress bpf da object-pinned "
536 	    PIN_FNAME(egress_host));
537 	close_netns(nstoken);
538 
539 	/* setup ns_fwd tc progs */
540 	nstoken = open_netns(NS_FWD);
541 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
542 		return -1;
543 	PIN(ingress_fwdns_prio100);
544 	PIN(egress_fwdns_prio100);
545 	PIN(ingress_fwdns_prio101);
546 	PIN(egress_fwdns_prio101);
547 	SYS("tc qdisc add dev veth_dst_fwd clsact");
548 	SYS("tc filter add dev veth_dst_fwd ingress prio 100 bpf da object-pinned "
549 	    PIN_FNAME(ingress_fwdns_prio100));
550 	SYS("tc filter add dev veth_dst_fwd ingress prio 101 bpf da object-pinned "
551 	    PIN_FNAME(ingress_fwdns_prio101));
552 	SYS("tc filter add dev veth_dst_fwd egress prio 100 bpf da object-pinned "
553 	    PIN_FNAME(egress_fwdns_prio100));
554 	SYS("tc filter add dev veth_dst_fwd egress prio 101 bpf da object-pinned "
555 	    PIN_FNAME(egress_fwdns_prio101));
556 	SYS("tc qdisc add dev veth_src_fwd clsact");
557 	SYS("tc filter add dev veth_src_fwd ingress prio 100 bpf da object-pinned "
558 	    PIN_FNAME(ingress_fwdns_prio100));
559 	SYS("tc filter add dev veth_src_fwd ingress prio 101 bpf da object-pinned "
560 	    PIN_FNAME(ingress_fwdns_prio101));
561 	SYS("tc filter add dev veth_src_fwd egress prio 100 bpf da object-pinned "
562 	    PIN_FNAME(egress_fwdns_prio100));
563 	SYS("tc filter add dev veth_src_fwd egress prio 101 bpf da object-pinned "
564 	    PIN_FNAME(egress_fwdns_prio101));
565 	close_netns(nstoken);
566 
567 #undef PIN
568 
569 	return 0;
570 
571 fail:
572 	close_netns(nstoken);
573 	return -1;
574 }
575 
576 enum {
577 	INGRESS_FWDNS_P100,
578 	INGRESS_FWDNS_P101,
579 	EGRESS_FWDNS_P100,
580 	EGRESS_FWDNS_P101,
581 	INGRESS_ENDHOST,
582 	EGRESS_ENDHOST,
583 	SET_DTIME,
584 	__MAX_CNT,
585 };
586 
587 const char *cnt_names[] = {
588 	"ingress_fwdns_p100",
589 	"ingress_fwdns_p101",
590 	"egress_fwdns_p100",
591 	"egress_fwdns_p101",
592 	"ingress_endhost",
593 	"egress_endhost",
594 	"set_dtime",
595 };
596 
597 enum {
598 	TCP_IP6_CLEAR_DTIME,
599 	TCP_IP4,
600 	TCP_IP6,
601 	UDP_IP4,
602 	UDP_IP6,
603 	TCP_IP4_RT_FWD,
604 	TCP_IP6_RT_FWD,
605 	UDP_IP4_RT_FWD,
606 	UDP_IP6_RT_FWD,
607 	UKN_TEST,
608 	__NR_TESTS,
609 };
610 
611 const char *test_names[] = {
612 	"tcp ip6 clear dtime",
613 	"tcp ip4",
614 	"tcp ip6",
615 	"udp ip4",
616 	"udp ip6",
617 	"tcp ip4 rt fwd",
618 	"tcp ip6 rt fwd",
619 	"udp ip4 rt fwd",
620 	"udp ip6 rt fwd",
621 };
622 
623 static const char *dtime_cnt_str(int test, int cnt)
624 {
625 	static char name[64];
626 
627 	snprintf(name, sizeof(name), "%s %s", test_names[test], cnt_names[cnt]);
628 
629 	return name;
630 }
631 
632 static const char *dtime_err_str(int test, int cnt)
633 {
634 	static char name[64];
635 
636 	snprintf(name, sizeof(name), "%s %s errs", test_names[test],
637 		 cnt_names[cnt]);
638 
639 	return name;
640 }
641 
642 static void test_tcp_clear_dtime(struct test_tc_dtime *skel)
643 {
644 	int i, t = TCP_IP6_CLEAR_DTIME;
645 	__u32 *dtimes = skel->bss->dtimes[t];
646 	__u32 *errs = skel->bss->errs[t];
647 
648 	skel->bss->test = t;
649 	test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 50000 + t);
650 
651 	ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
652 		  dtime_cnt_str(t, INGRESS_FWDNS_P100));
653 	ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
654 		  dtime_cnt_str(t, INGRESS_FWDNS_P101));
655 	ASSERT_GT(dtimes[EGRESS_FWDNS_P100], 0,
656 		  dtime_cnt_str(t, EGRESS_FWDNS_P100));
657 	ASSERT_EQ(dtimes[EGRESS_FWDNS_P101], 0,
658 		  dtime_cnt_str(t, EGRESS_FWDNS_P101));
659 	ASSERT_GT(dtimes[EGRESS_ENDHOST], 0,
660 		  dtime_cnt_str(t, EGRESS_ENDHOST));
661 	ASSERT_GT(dtimes[INGRESS_ENDHOST], 0,
662 		  dtime_cnt_str(t, INGRESS_ENDHOST));
663 
664 	for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
665 		ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
666 }
667 
668 static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
669 {
670 	__u32 *dtimes, *errs;
671 	const char *addr;
672 	int i, t;
673 
674 	if (family == AF_INET) {
675 		t = bpf_fwd ? TCP_IP4 : TCP_IP4_RT_FWD;
676 		addr = IP4_DST;
677 	} else {
678 		t = bpf_fwd ? TCP_IP6 : TCP_IP6_RT_FWD;
679 		addr = IP6_DST;
680 	}
681 
682 	dtimes = skel->bss->dtimes[t];
683 	errs = skel->bss->errs[t];
684 
685 	skel->bss->test = t;
686 	test_inet_dtime(family, SOCK_STREAM, addr, 50000 + t);
687 
688 	/* fwdns_prio100 prog does not read delivery_time_type, so
689 	 * kernel puts the (rcv) timetamp in __sk_buff->tstamp
690 	 */
691 	ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
692 		  dtime_cnt_str(t, INGRESS_FWDNS_P100));
693 	for (i = INGRESS_FWDNS_P101; i < SET_DTIME; i++)
694 		ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
695 
696 	for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
697 		ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
698 }
699 
700 static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
701 {
702 	__u32 *dtimes, *errs;
703 	const char *addr;
704 	int i, t;
705 
706 	if (family == AF_INET) {
707 		t = bpf_fwd ? UDP_IP4 : UDP_IP4_RT_FWD;
708 		addr = IP4_DST;
709 	} else {
710 		t = bpf_fwd ? UDP_IP6 : UDP_IP6_RT_FWD;
711 		addr = IP6_DST;
712 	}
713 
714 	dtimes = skel->bss->dtimes[t];
715 	errs = skel->bss->errs[t];
716 
717 	skel->bss->test = t;
718 	test_inet_dtime(family, SOCK_DGRAM, addr, 50000 + t);
719 
720 	ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
721 		  dtime_cnt_str(t, INGRESS_FWDNS_P100));
722 	/* non mono delivery time is not forwarded */
723 	ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
724 		  dtime_cnt_str(t, INGRESS_FWDNS_P101));
725 	for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++)
726 		ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
727 
728 	for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
729 		ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
730 }
731 
732 static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
733 {
734 	struct test_tc_dtime *skel;
735 	struct nstoken *nstoken;
736 	int err;
737 
738 	skel = test_tc_dtime__open();
739 	if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
740 		return;
741 
742 	skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
743 	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
744 
745 	err = test_tc_dtime__load(skel);
746 	if (!ASSERT_OK(err, "test_tc_dtime__load"))
747 		goto done;
748 
749 	if (netns_load_dtime_bpf(skel))
750 		goto done;
751 
752 	nstoken = open_netns(NS_FWD);
753 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
754 		goto done;
755 	err = set_forwarding(false);
756 	close_netns(nstoken);
757 	if (!ASSERT_OK(err, "disable forwarding"))
758 		goto done;
759 
760 	test_tcp_clear_dtime(skel);
761 
762 	test_tcp_dtime(skel, AF_INET, true);
763 	test_tcp_dtime(skel, AF_INET6, true);
764 	test_udp_dtime(skel, AF_INET, true);
765 	test_udp_dtime(skel, AF_INET6, true);
766 
767 	/* Test the kernel ip[6]_forward path instead
768 	 * of bpf_redirect_neigh().
769 	 */
770 	nstoken = open_netns(NS_FWD);
771 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
772 		goto done;
773 	err = set_forwarding(true);
774 	close_netns(nstoken);
775 	if (!ASSERT_OK(err, "enable forwarding"))
776 		goto done;
777 
778 	test_tcp_dtime(skel, AF_INET, false);
779 	test_tcp_dtime(skel, AF_INET6, false);
780 	test_udp_dtime(skel, AF_INET, false);
781 	test_udp_dtime(skel, AF_INET6, false);
782 
783 done:
784 	test_tc_dtime__destroy(skel);
785 }
786 
787 static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
788 {
789 	struct nstoken *nstoken = NULL;
790 	struct test_tc_neigh_fib *skel = NULL;
791 	int err;
792 
793 	nstoken = open_netns(NS_FWD);
794 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
795 		return;
796 
797 	skel = test_tc_neigh_fib__open();
798 	if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
799 		goto done;
800 
801 	if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
802 		goto done;
803 
804 	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
805 	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
806 		goto done;
807 
808 	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
809 	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
810 		goto done;
811 
812 	err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
813 	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
814 		goto done;
815 
816 	if (netns_load_bpf())
817 		goto done;
818 
819 	/* bpf_fib_lookup() checks if forwarding is enabled */
820 	if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
821 		goto done;
822 
823 	test_connectivity();
824 
825 done:
826 	if (skel)
827 		test_tc_neigh_fib__destroy(skel);
828 	close_netns(nstoken);
829 }
830 
831 static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
832 {
833 	struct nstoken *nstoken = NULL;
834 	struct test_tc_neigh *skel = NULL;
835 	int err;
836 
837 	nstoken = open_netns(NS_FWD);
838 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
839 		return;
840 
841 	skel = test_tc_neigh__open();
842 	if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
843 		goto done;
844 
845 	skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
846 	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
847 
848 	err = test_tc_neigh__load(skel);
849 	if (!ASSERT_OK(err, "test_tc_neigh__load"))
850 		goto done;
851 
852 	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
853 	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
854 		goto done;
855 
856 	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
857 	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
858 		goto done;
859 
860 	err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
861 	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
862 		goto done;
863 
864 	if (netns_load_bpf())
865 		goto done;
866 
867 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
868 		goto done;
869 
870 	test_connectivity();
871 
872 done:
873 	if (skel)
874 		test_tc_neigh__destroy(skel);
875 	close_netns(nstoken);
876 }
877 
878 static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
879 {
880 	struct nstoken *nstoken;
881 	struct test_tc_peer *skel;
882 	int err;
883 
884 	nstoken = open_netns(NS_FWD);
885 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
886 		return;
887 
888 	skel = test_tc_peer__open();
889 	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
890 		goto done;
891 
892 	skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
893 	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
894 
895 	err = test_tc_peer__load(skel);
896 	if (!ASSERT_OK(err, "test_tc_peer__load"))
897 		goto done;
898 
899 	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
900 	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
901 		goto done;
902 
903 	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
904 	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
905 		goto done;
906 
907 	err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
908 	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
909 		goto done;
910 
911 	if (netns_load_bpf())
912 		goto done;
913 
914 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
915 		goto done;
916 
917 	test_connectivity();
918 
919 done:
920 	if (skel)
921 		test_tc_peer__destroy(skel);
922 	close_netns(nstoken);
923 }
924 
925 static int tun_open(char *name)
926 {
927 	struct ifreq ifr;
928 	int fd, err;
929 
930 	fd = open("/dev/net/tun", O_RDWR);
931 	if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
932 		return -1;
933 
934 	memset(&ifr, 0, sizeof(ifr));
935 
936 	ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
937 	if (*name)
938 		strncpy(ifr.ifr_name, name, IFNAMSIZ);
939 
940 	err = ioctl(fd, TUNSETIFF, &ifr);
941 	if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
942 		goto fail;
943 
944 	SYS("ip link set dev %s up", name);
945 
946 	return fd;
947 fail:
948 	close(fd);
949 	return -1;
950 }
951 
952 enum {
953 	SRC_TO_TARGET = 0,
954 	TARGET_TO_SRC = 1,
955 };
956 
957 static int tun_relay_loop(int src_fd, int target_fd)
958 {
959 	fd_set rfds, wfds;
960 
961 	FD_ZERO(&rfds);
962 	FD_ZERO(&wfds);
963 
964 	for (;;) {
965 		char buf[1500];
966 		int direction, nread, nwrite;
967 
968 		FD_SET(src_fd, &rfds);
969 		FD_SET(target_fd, &rfds);
970 
971 		if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
972 			log_err("select failed");
973 			return 1;
974 		}
975 
976 		direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
977 
978 		nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
979 		if (nread < 0) {
980 			log_err("read failed");
981 			return 1;
982 		}
983 
984 		nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
985 		if (nwrite != nread) {
986 			log_err("write failed");
987 			return 1;
988 		}
989 	}
990 }
991 
992 static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
993 {
994 	struct test_tc_peer *skel = NULL;
995 	struct nstoken *nstoken = NULL;
996 	int err;
997 	int tunnel_pid = -1;
998 	int src_fd, target_fd = -1;
999 	int ifindex;
1000 
1001 	/* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
1002 	 * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
1003 	 * expose the L2 headers encapsulating the IP packet to BPF and hence
1004 	 * don't have skb in suitable state for this test. Alternative to TUN/TAP
1005 	 * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
1006 	 * but that requires much more complicated setup.
1007 	 */
1008 	nstoken = open_netns(NS_SRC);
1009 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
1010 		return;
1011 
1012 	src_fd = tun_open("tun_src");
1013 	if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
1014 		goto fail;
1015 
1016 	close_netns(nstoken);
1017 
1018 	nstoken = open_netns(NS_FWD);
1019 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
1020 		goto fail;
1021 
1022 	target_fd = tun_open("tun_fwd");
1023 	if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
1024 		goto fail;
1025 
1026 	tunnel_pid = fork();
1027 	if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
1028 		goto fail;
1029 
1030 	if (tunnel_pid == 0)
1031 		exit(tun_relay_loop(src_fd, target_fd));
1032 
1033 	skel = test_tc_peer__open();
1034 	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
1035 		goto fail;
1036 
1037 	ifindex = get_ifindex("tun_fwd");
1038 	if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd"))
1039 		goto fail;
1040 
1041 	skel->rodata->IFINDEX_SRC = ifindex;
1042 	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
1043 
1044 	err = test_tc_peer__load(skel);
1045 	if (!ASSERT_OK(err, "test_tc_peer__load"))
1046 		goto fail;
1047 
1048 	err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
1049 	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
1050 		goto fail;
1051 
1052 	err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE);
1053 	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
1054 		goto fail;
1055 
1056 	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
1057 	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
1058 		goto fail;
1059 
1060 	/* Load "tc_src_l3" to the tun_fwd interface to redirect packets
1061 	 * towards dst, and "tc_dst" to redirect packets
1062 	 * and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
1063 	 */
1064 	SYS("tc qdisc add dev tun_fwd clsact");
1065 	SYS("tc filter add dev tun_fwd ingress bpf da object-pinned "
1066 	    SRC_PROG_PIN_FILE);
1067 
1068 	SYS("tc qdisc add dev veth_dst_fwd clsact");
1069 	SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
1070 	    DST_PROG_PIN_FILE);
1071 	SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
1072 	    CHK_PROG_PIN_FILE);
1073 
1074 	/* Setup route and neigh tables */
1075 	SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
1076 	SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
1077 
1078 	SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
1079 	SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
1080 
1081 	SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
1082 	SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
1083 	    " dev tun_src scope global");
1084 	SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
1085 	SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
1086 	SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
1087 	    " dev tun_src scope global");
1088 	SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
1089 
1090 	SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
1091 	SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
1092 
1093 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
1094 		goto fail;
1095 
1096 	test_connectivity();
1097 
1098 fail:
1099 	if (tunnel_pid > 0) {
1100 		kill(tunnel_pid, SIGTERM);
1101 		waitpid(tunnel_pid, NULL, 0);
1102 	}
1103 	if (src_fd >= 0)
1104 		close(src_fd);
1105 	if (target_fd >= 0)
1106 		close(target_fd);
1107 	if (skel)
1108 		test_tc_peer__destroy(skel);
1109 	if (nstoken)
1110 		close_netns(nstoken);
1111 }
1112 
1113 #define RUN_TEST(name)                                                                      \
1114 	({                                                                                  \
1115 		struct netns_setup_result setup_result;                                     \
1116 		if (test__start_subtest(#name))                                             \
1117 			if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
1118 				if (ASSERT_OK(netns_setup_links_and_routes(&setup_result),  \
1119 					      "setup links and routes"))                    \
1120 					test_ ## name(&setup_result);                       \
1121 				netns_setup_namespaces("delete");                           \
1122 			}                                                                   \
1123 	})
1124 
1125 static void *test_tc_redirect_run_tests(void *arg)
1126 {
1127 	netns_setup_namespaces_nofail("delete");
1128 
1129 	RUN_TEST(tc_redirect_peer);
1130 	RUN_TEST(tc_redirect_peer_l3);
1131 	RUN_TEST(tc_redirect_neigh);
1132 	RUN_TEST(tc_redirect_neigh_fib);
1133 	RUN_TEST(tc_redirect_dtime);
1134 	return NULL;
1135 }
1136 
1137 void serial_test_tc_redirect(void)
1138 {
1139 	pthread_t test_thread;
1140 	int err;
1141 
1142 	/* Run the tests in their own thread to isolate the namespace changes
1143 	 * so they do not affect the environment of other tests.
1144 	 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
1145 	 */
1146 	err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
1147 	if (ASSERT_OK(err, "pthread_create"))
1148 		ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
1149 }
1150