1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 
3 /*
4  * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
5  * between src and dst. The netns fwd has veth links to each src and dst. The
6  * client is in src and server in dst. The test installs a TC BPF program to each
7  * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
8  * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
9  * switch from ingress side; it also installs a checker prog on the egress side
10  * to drop unexpected traffic.
11  */
12 
13 #define _GNU_SOURCE
14 
15 #include <arpa/inet.h>
16 #include <linux/limits.h>
17 #include <linux/sysctl.h>
18 #include <linux/if_tun.h>
19 #include <linux/if.h>
20 #include <sched.h>
21 #include <stdbool.h>
22 #include <stdio.h>
23 #include <sys/stat.h>
24 #include <sys/mount.h>
25 
26 #include "test_progs.h"
27 #include "network_helpers.h"
28 #include "test_tc_neigh_fib.skel.h"
29 #include "test_tc_neigh.skel.h"
30 #include "test_tc_peer.skel.h"
31 
32 #define NS_SRC "ns_src"
33 #define NS_FWD "ns_fwd"
34 #define NS_DST "ns_dst"
35 
36 #define IP4_SRC "172.16.1.100"
37 #define IP4_DST "172.16.2.100"
38 #define IP4_TUN_SRC "172.17.1.100"
39 #define IP4_TUN_FWD "172.17.1.200"
40 #define IP4_PORT 9004
41 
42 #define IP6_SRC "0::1:dead:beef:cafe"
43 #define IP6_DST "0::2:dead:beef:cafe"
44 #define IP6_TUN_SRC "1::1:dead:beef:cafe"
45 #define IP6_TUN_FWD "1::2:dead:beef:cafe"
46 #define IP6_PORT 9006
47 
48 #define IP4_SLL "169.254.0.1"
49 #define IP4_DLL "169.254.0.2"
50 #define IP4_NET "169.254.0.0"
51 
52 #define MAC_DST_FWD "00:11:22:33:44:55"
53 #define MAC_DST "00:22:33:44:55:66"
54 
55 #define IFADDR_STR_LEN 18
56 #define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
57 
58 #define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src"
59 #define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst"
60 #define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk"
61 
62 #define TIMEOUT_MILLIS 10000
63 
64 #define log_err(MSG, ...) \
65 	fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
66 		__FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
67 
68 static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
69 
70 static int write_file(const char *path, const char *newval)
71 {
72 	FILE *f;
73 
74 	f = fopen(path, "r+");
75 	if (!f)
76 		return -1;
77 	if (fwrite(newval, strlen(newval), 1, f) != 1) {
78 		log_err("writing to %s failed", path);
79 		fclose(f);
80 		return -1;
81 	}
82 	fclose(f);
83 	return 0;
84 }
85 
86 struct nstoken {
87 	int orig_netns_fd;
88 };
89 
90 static int setns_by_fd(int nsfd)
91 {
92 	int err;
93 
94 	err = setns(nsfd, CLONE_NEWNET);
95 	close(nsfd);
96 
97 	if (!ASSERT_OK(err, "setns"))
98 		return err;
99 
100 	/* Switch /sys to the new namespace so that e.g. /sys/class/net
101 	 * reflects the devices in the new namespace.
102 	 */
103 	err = unshare(CLONE_NEWNS);
104 	if (!ASSERT_OK(err, "unshare"))
105 		return err;
106 
107 	err = umount2("/sys", MNT_DETACH);
108 	if (!ASSERT_OK(err, "umount2 /sys"))
109 		return err;
110 
111 	err = mount("sysfs", "/sys", "sysfs", 0, NULL);
112 	if (!ASSERT_OK(err, "mount /sys"))
113 		return err;
114 
115 	err = mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL);
116 	if (!ASSERT_OK(err, "mount /sys/fs/bpf"))
117 		return err;
118 
119 	return 0;
120 }
121 
122 /**
123  * open_netns() - Switch to specified network namespace by name.
124  *
125  * Returns token with which to restore the original namespace
126  * using close_netns().
127  */
128 static struct nstoken *open_netns(const char *name)
129 {
130 	int nsfd;
131 	char nspath[PATH_MAX];
132 	int err;
133 	struct nstoken *token;
134 
135 	token = malloc(sizeof(struct nstoken));
136 	if (!ASSERT_OK_PTR(token, "malloc token"))
137 		return NULL;
138 
139 	token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
140 	if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net"))
141 		goto fail;
142 
143 	snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
144 	nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
145 	if (!ASSERT_GE(nsfd, 0, "open netns fd"))
146 		goto fail;
147 
148 	err = setns_by_fd(nsfd);
149 	if (!ASSERT_OK(err, "setns_by_fd"))
150 		goto fail;
151 
152 	return token;
153 fail:
154 	free(token);
155 	return NULL;
156 }
157 
158 static void close_netns(struct nstoken *token)
159 {
160 	ASSERT_OK(setns_by_fd(token->orig_netns_fd), "setns_by_fd");
161 	free(token);
162 }
163 
164 static int netns_setup_namespaces(const char *verb)
165 {
166 	const char * const *ns = namespaces;
167 	char cmd[128];
168 
169 	while (*ns) {
170 		snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns);
171 		if (!ASSERT_OK(system(cmd), cmd))
172 			return -1;
173 		ns++;
174 	}
175 	return 0;
176 }
177 
178 struct netns_setup_result {
179 	int ifindex_veth_src_fwd;
180 	int ifindex_veth_dst_fwd;
181 };
182 
183 static int get_ifaddr(const char *name, char *ifaddr)
184 {
185 	char path[PATH_MAX];
186 	FILE *f;
187 	int ret;
188 
189 	snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name);
190 	f = fopen(path, "r");
191 	if (!ASSERT_OK_PTR(f, path))
192 		return -1;
193 
194 	ret = fread(ifaddr, 1, IFADDR_STR_LEN, f);
195 	if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) {
196 		fclose(f);
197 		return -1;
198 	}
199 	fclose(f);
200 	return 0;
201 }
202 
203 static int get_ifindex(const char *name)
204 {
205 	char path[PATH_MAX];
206 	char buf[32];
207 	FILE *f;
208 	int ret;
209 
210 	snprintf(path, PATH_MAX, "/sys/class/net/%s/ifindex", name);
211 	f = fopen(path, "r");
212 	if (!ASSERT_OK_PTR(f, path))
213 		return -1;
214 
215 	ret = fread(buf, 1, sizeof(buf), f);
216 	if (!ASSERT_GT(ret, 0, "fread ifindex")) {
217 		fclose(f);
218 		return -1;
219 	}
220 	fclose(f);
221 	return atoi(buf);
222 }
223 
224 #define SYS(fmt, ...)						\
225 	({							\
226 		char cmd[1024];					\
227 		snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__);	\
228 		if (!ASSERT_OK(system(cmd), cmd))		\
229 			goto fail;				\
230 	})
231 
232 static int netns_setup_links_and_routes(struct netns_setup_result *result)
233 {
234 	struct nstoken *nstoken = NULL;
235 	char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
236 
237 	SYS("ip link add veth_src type veth peer name veth_src_fwd");
238 	SYS("ip link add veth_dst type veth peer name veth_dst_fwd");
239 
240 	SYS("ip link set veth_dst_fwd address " MAC_DST_FWD);
241 	SYS("ip link set veth_dst address " MAC_DST);
242 
243 	if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
244 		goto fail;
245 
246 	result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd");
247 	if (result->ifindex_veth_src_fwd < 0)
248 		goto fail;
249 	result->ifindex_veth_dst_fwd = get_ifindex("veth_dst_fwd");
250 	if (result->ifindex_veth_dst_fwd < 0)
251 		goto fail;
252 
253 	SYS("ip link set veth_src netns " NS_SRC);
254 	SYS("ip link set veth_src_fwd netns " NS_FWD);
255 	SYS("ip link set veth_dst_fwd netns " NS_FWD);
256 	SYS("ip link set veth_dst netns " NS_DST);
257 
258 	/** setup in 'src' namespace */
259 	nstoken = open_netns(NS_SRC);
260 	if (!ASSERT_OK_PTR(nstoken, "setns src"))
261 		goto fail;
262 
263 	SYS("ip addr add " IP4_SRC "/32 dev veth_src");
264 	SYS("ip addr add " IP6_SRC "/128 dev veth_src nodad");
265 	SYS("ip link set dev veth_src up");
266 
267 	SYS("ip route add " IP4_DST "/32 dev veth_src scope global");
268 	SYS("ip route add " IP4_NET "/16 dev veth_src scope global");
269 	SYS("ip route add " IP6_DST "/128 dev veth_src scope global");
270 
271 	SYS("ip neigh add " IP4_DST " dev veth_src lladdr %s",
272 	    veth_src_fwd_addr);
273 	SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s",
274 	    veth_src_fwd_addr);
275 
276 	close_netns(nstoken);
277 
278 	/** setup in 'fwd' namespace */
279 	nstoken = open_netns(NS_FWD);
280 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
281 		goto fail;
282 
283 	/* The fwd netns automatically gets a v6 LL address / routes, but also
284 	 * needs v4 one in order to start ARP probing. IP4_NET route is added
285 	 * to the endpoints so that the ARP processing will reply.
286 	 */
287 	SYS("ip addr add " IP4_SLL "/32 dev veth_src_fwd");
288 	SYS("ip addr add " IP4_DLL "/32 dev veth_dst_fwd");
289 	SYS("ip link set dev veth_src_fwd up");
290 	SYS("ip link set dev veth_dst_fwd up");
291 
292 	SYS("ip route add " IP4_SRC "/32 dev veth_src_fwd scope global");
293 	SYS("ip route add " IP6_SRC "/128 dev veth_src_fwd scope global");
294 	SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
295 	SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
296 
297 	close_netns(nstoken);
298 
299 	/** setup in 'dst' namespace */
300 	nstoken = open_netns(NS_DST);
301 	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
302 		goto fail;
303 
304 	SYS("ip addr add " IP4_DST "/32 dev veth_dst");
305 	SYS("ip addr add " IP6_DST "/128 dev veth_dst nodad");
306 	SYS("ip link set dev veth_dst up");
307 
308 	SYS("ip route add " IP4_SRC "/32 dev veth_dst scope global");
309 	SYS("ip route add " IP4_NET "/16 dev veth_dst scope global");
310 	SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global");
311 
312 	SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
313 	SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
314 
315 	close_netns(nstoken);
316 
317 	return 0;
318 fail:
319 	if (nstoken)
320 		close_netns(nstoken);
321 	return -1;
322 }
323 
324 static int netns_load_bpf(void)
325 {
326 	SYS("tc qdisc add dev veth_src_fwd clsact");
327 	SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned "
328 	    SRC_PROG_PIN_FILE);
329 	SYS("tc filter add dev veth_src_fwd egress bpf da object-pinned "
330 	    CHK_PROG_PIN_FILE);
331 
332 	SYS("tc qdisc add dev veth_dst_fwd clsact");
333 	SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
334 	    DST_PROG_PIN_FILE);
335 	SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
336 	    CHK_PROG_PIN_FILE);
337 
338 	return 0;
339 fail:
340 	return -1;
341 }
342 
343 static void test_tcp(int family, const char *addr, __u16 port)
344 {
345 	int listen_fd = -1, accept_fd = -1, client_fd = -1;
346 	char buf[] = "testing testing";
347 	int n;
348 	struct nstoken *nstoken;
349 
350 	nstoken = open_netns(NS_DST);
351 	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
352 		return;
353 
354 	listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
355 	if (!ASSERT_GE(listen_fd, 0, "listen"))
356 		goto done;
357 
358 	close_netns(nstoken);
359 	nstoken = open_netns(NS_SRC);
360 	if (!ASSERT_OK_PTR(nstoken, "setns src"))
361 		goto done;
362 
363 	client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
364 	if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
365 		goto done;
366 
367 	accept_fd = accept(listen_fd, NULL, NULL);
368 	if (!ASSERT_GE(accept_fd, 0, "accept"))
369 		goto done;
370 
371 	if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo"))
372 		goto done;
373 
374 	n = write(client_fd, buf, sizeof(buf));
375 	if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
376 		goto done;
377 
378 	n = read(accept_fd, buf, sizeof(buf));
379 	ASSERT_EQ(n, sizeof(buf), "recv from server");
380 
381 done:
382 	if (nstoken)
383 		close_netns(nstoken);
384 	if (listen_fd >= 0)
385 		close(listen_fd);
386 	if (accept_fd >= 0)
387 		close(accept_fd);
388 	if (client_fd >= 0)
389 		close(client_fd);
390 }
391 
392 static int test_ping(int family, const char *addr)
393 {
394 	const char *ping = family == AF_INET6 ? "ping6" : "ping";
395 
396 	SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr);
397 	return 0;
398 fail:
399 	return -1;
400 }
401 
402 static void test_connectivity(void)
403 {
404 	test_tcp(AF_INET, IP4_DST, IP4_PORT);
405 	test_ping(AF_INET, IP4_DST);
406 	test_tcp(AF_INET6, IP6_DST, IP6_PORT);
407 	test_ping(AF_INET6, IP6_DST);
408 }
409 
410 static int set_forwarding(bool enable)
411 {
412 	int err;
413 
414 	err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
415 	if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
416 		return err;
417 
418 	err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
419 	if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
420 		return err;
421 
422 	return 0;
423 }
424 
425 static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
426 {
427 	struct nstoken *nstoken = NULL;
428 	struct test_tc_neigh_fib *skel = NULL;
429 	int err;
430 
431 	nstoken = open_netns(NS_FWD);
432 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
433 		return;
434 
435 	skel = test_tc_neigh_fib__open();
436 	if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
437 		goto done;
438 
439 	if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
440 		goto done;
441 
442 	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
443 	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
444 		goto done;
445 
446 	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
447 	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
448 		goto done;
449 
450 	err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
451 	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
452 		goto done;
453 
454 	if (netns_load_bpf())
455 		goto done;
456 
457 	/* bpf_fib_lookup() checks if forwarding is enabled */
458 	if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
459 		goto done;
460 
461 	test_connectivity();
462 
463 done:
464 	if (skel)
465 		test_tc_neigh_fib__destroy(skel);
466 	close_netns(nstoken);
467 }
468 
469 static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
470 {
471 	struct nstoken *nstoken = NULL;
472 	struct test_tc_neigh *skel = NULL;
473 	int err;
474 
475 	nstoken = open_netns(NS_FWD);
476 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
477 		return;
478 
479 	skel = test_tc_neigh__open();
480 	if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
481 		goto done;
482 
483 	skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
484 	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
485 
486 	err = test_tc_neigh__load(skel);
487 	if (!ASSERT_OK(err, "test_tc_neigh__load"))
488 		goto done;
489 
490 	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
491 	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
492 		goto done;
493 
494 	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
495 	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
496 		goto done;
497 
498 	err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
499 	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
500 		goto done;
501 
502 	if (netns_load_bpf())
503 		goto done;
504 
505 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
506 		goto done;
507 
508 	test_connectivity();
509 
510 done:
511 	if (skel)
512 		test_tc_neigh__destroy(skel);
513 	close_netns(nstoken);
514 }
515 
516 static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
517 {
518 	struct nstoken *nstoken;
519 	struct test_tc_peer *skel;
520 	int err;
521 
522 	nstoken = open_netns(NS_FWD);
523 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
524 		return;
525 
526 	skel = test_tc_peer__open();
527 	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
528 		goto done;
529 
530 	skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
531 	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
532 
533 	err = test_tc_peer__load(skel);
534 	if (!ASSERT_OK(err, "test_tc_peer__load"))
535 		goto done;
536 
537 	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
538 	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
539 		goto done;
540 
541 	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
542 	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
543 		goto done;
544 
545 	err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
546 	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
547 		goto done;
548 
549 	if (netns_load_bpf())
550 		goto done;
551 
552 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
553 		goto done;
554 
555 	test_connectivity();
556 
557 done:
558 	if (skel)
559 		test_tc_peer__destroy(skel);
560 	close_netns(nstoken);
561 }
562 
563 static int tun_open(char *name)
564 {
565 	struct ifreq ifr;
566 	int fd, err;
567 
568 	fd = open("/dev/net/tun", O_RDWR);
569 	if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
570 		return -1;
571 
572 	memset(&ifr, 0, sizeof(ifr));
573 
574 	ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
575 	if (*name)
576 		strncpy(ifr.ifr_name, name, IFNAMSIZ);
577 
578 	err = ioctl(fd, TUNSETIFF, &ifr);
579 	if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
580 		goto fail;
581 
582 	SYS("ip link set dev %s up", name);
583 
584 	return fd;
585 fail:
586 	close(fd);
587 	return -1;
588 }
589 
590 #define MAX(a, b) ((a) > (b) ? (a) : (b))
591 enum {
592 	SRC_TO_TARGET = 0,
593 	TARGET_TO_SRC = 1,
594 };
595 
596 static int tun_relay_loop(int src_fd, int target_fd)
597 {
598 	fd_set rfds, wfds;
599 
600 	FD_ZERO(&rfds);
601 	FD_ZERO(&wfds);
602 
603 	for (;;) {
604 		char buf[1500];
605 		int direction, nread, nwrite;
606 
607 		FD_SET(src_fd, &rfds);
608 		FD_SET(target_fd, &rfds);
609 
610 		if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
611 			log_err("select failed");
612 			return 1;
613 		}
614 
615 		direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
616 
617 		nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
618 		if (nread < 0) {
619 			log_err("read failed");
620 			return 1;
621 		}
622 
623 		nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
624 		if (nwrite != nread) {
625 			log_err("write failed");
626 			return 1;
627 		}
628 	}
629 }
630 
631 static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
632 {
633 	struct test_tc_peer *skel = NULL;
634 	struct nstoken *nstoken = NULL;
635 	int err;
636 	int tunnel_pid = -1;
637 	int src_fd, target_fd;
638 	int ifindex;
639 
640 	/* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
641 	 * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
642 	 * expose the L2 headers encapsulating the IP packet to BPF and hence
643 	 * don't have skb in suitable state for this test. Alternative to TUN/TAP
644 	 * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
645 	 * but that requires much more complicated setup.
646 	 */
647 	nstoken = open_netns(NS_SRC);
648 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
649 		return;
650 
651 	src_fd = tun_open("tun_src");
652 	if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
653 		goto fail;
654 
655 	close_netns(nstoken);
656 
657 	nstoken = open_netns(NS_FWD);
658 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
659 		goto fail;
660 
661 	target_fd = tun_open("tun_fwd");
662 	if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
663 		goto fail;
664 
665 	tunnel_pid = fork();
666 	if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
667 		goto fail;
668 
669 	if (tunnel_pid == 0)
670 		exit(tun_relay_loop(src_fd, target_fd));
671 
672 	skel = test_tc_peer__open();
673 	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
674 		goto fail;
675 
676 	ifindex = get_ifindex("tun_fwd");
677 	if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd"))
678 		goto fail;
679 
680 	skel->rodata->IFINDEX_SRC = ifindex;
681 	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
682 
683 	err = test_tc_peer__load(skel);
684 	if (!ASSERT_OK(err, "test_tc_peer__load"))
685 		goto fail;
686 
687 	err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
688 	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
689 		goto fail;
690 
691 	err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE);
692 	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
693 		goto fail;
694 
695 	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
696 	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
697 		goto fail;
698 
699 	/* Load "tc_src_l3" to the tun_fwd interface to redirect packets
700 	 * towards dst, and "tc_dst" to redirect packets
701 	 * and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
702 	 */
703 	SYS("tc qdisc add dev tun_fwd clsact");
704 	SYS("tc filter add dev tun_fwd ingress bpf da object-pinned "
705 	    SRC_PROG_PIN_FILE);
706 
707 	SYS("tc qdisc add dev veth_dst_fwd clsact");
708 	SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
709 	    DST_PROG_PIN_FILE);
710 	SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
711 	    CHK_PROG_PIN_FILE);
712 
713 	/* Setup route and neigh tables */
714 	SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
715 	SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
716 
717 	SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
718 	SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
719 
720 	SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
721 	SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
722 	    " dev tun_src scope global");
723 	SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
724 	SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
725 	SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
726 	    " dev tun_src scope global");
727 	SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
728 
729 	SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
730 	SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
731 
732 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
733 		goto fail;
734 
735 	test_connectivity();
736 
737 fail:
738 	if (tunnel_pid > 0) {
739 		kill(tunnel_pid, SIGTERM);
740 		waitpid(tunnel_pid, NULL, 0);
741 	}
742 	if (src_fd >= 0)
743 		close(src_fd);
744 	if (target_fd >= 0)
745 		close(target_fd);
746 	if (skel)
747 		test_tc_peer__destroy(skel);
748 	if (nstoken)
749 		close_netns(nstoken);
750 }
751 
752 #define RUN_TEST(name)                                                                      \
753 	({                                                                                  \
754 		struct netns_setup_result setup_result;                                     \
755 		if (test__start_subtest(#name))                                             \
756 			if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
757 				if (ASSERT_OK(netns_setup_links_and_routes(&setup_result),  \
758 					      "setup links and routes"))                    \
759 					test_ ## name(&setup_result);                       \
760 				netns_setup_namespaces("delete");                           \
761 			}                                                                   \
762 	})
763 
764 static void *test_tc_redirect_run_tests(void *arg)
765 {
766 	RUN_TEST(tc_redirect_peer);
767 	RUN_TEST(tc_redirect_peer_l3);
768 	RUN_TEST(tc_redirect_neigh);
769 	RUN_TEST(tc_redirect_neigh_fib);
770 	return NULL;
771 }
772 
773 void test_tc_redirect(void)
774 {
775 	pthread_t test_thread;
776 	int err;
777 
778 	/* Run the tests in their own thread to isolate the namespace changes
779 	 * so they do not affect the environment of other tests.
780 	 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
781 	 */
782 	err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
783 	if (ASSERT_OK(err, "pthread_create"))
784 		ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
785 }
786