1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 
3 /*
4  * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
5  * between src and dst. The netns fwd has veth links to each src and dst. The
6  * client is in src and server in dst. The test installs a TC BPF program to each
7  * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
8  * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
9  * switch from ingress side; it also installs a checker prog on the egress side
10  * to drop unexpected traffic.
11  */
12 
13 #define _GNU_SOURCE
14 
15 #include <arpa/inet.h>
16 #include <linux/if.h>
17 #include <linux/if_tun.h>
18 #include <linux/limits.h>
19 #include <linux/sysctl.h>
20 #include <sched.h>
21 #include <stdbool.h>
22 #include <stdio.h>
23 #include <sys/mount.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 
27 #include "test_progs.h"
28 #include "network_helpers.h"
29 #include "test_tc_neigh_fib.skel.h"
30 #include "test_tc_neigh.skel.h"
31 #include "test_tc_peer.skel.h"
32 
33 #define NS_SRC "ns_src"
34 #define NS_FWD "ns_fwd"
35 #define NS_DST "ns_dst"
36 
37 #define IP4_SRC "172.16.1.100"
38 #define IP4_DST "172.16.2.100"
39 #define IP4_TUN_SRC "172.17.1.100"
40 #define IP4_TUN_FWD "172.17.1.200"
41 #define IP4_PORT 9004
42 
43 #define IP6_SRC "0::1:dead:beef:cafe"
44 #define IP6_DST "0::2:dead:beef:cafe"
45 #define IP6_TUN_SRC "1::1:dead:beef:cafe"
46 #define IP6_TUN_FWD "1::2:dead:beef:cafe"
47 #define IP6_PORT 9006
48 
49 #define IP4_SLL "169.254.0.1"
50 #define IP4_DLL "169.254.0.2"
51 #define IP4_NET "169.254.0.0"
52 
53 #define MAC_DST_FWD "00:11:22:33:44:55"
54 #define MAC_DST "00:22:33:44:55:66"
55 
56 #define IFADDR_STR_LEN 18
57 #define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
58 
59 #define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src"
60 #define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst"
61 #define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk"
62 
63 #define TIMEOUT_MILLIS 10000
64 
65 #define log_err(MSG, ...) \
66 	fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
67 		__FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
68 
69 static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
70 
71 static int write_file(const char *path, const char *newval)
72 {
73 	FILE *f;
74 
75 	f = fopen(path, "r+");
76 	if (!f)
77 		return -1;
78 	if (fwrite(newval, strlen(newval), 1, f) != 1) {
79 		log_err("writing to %s failed", path);
80 		fclose(f);
81 		return -1;
82 	}
83 	fclose(f);
84 	return 0;
85 }
86 
87 struct nstoken {
88 	int orig_netns_fd;
89 };
90 
91 static int setns_by_fd(int nsfd)
92 {
93 	int err;
94 
95 	err = setns(nsfd, CLONE_NEWNET);
96 	close(nsfd);
97 
98 	if (!ASSERT_OK(err, "setns"))
99 		return err;
100 
101 	/* Switch /sys to the new namespace so that e.g. /sys/class/net
102 	 * reflects the devices in the new namespace.
103 	 */
104 	err = unshare(CLONE_NEWNS);
105 	if (!ASSERT_OK(err, "unshare"))
106 		return err;
107 
108 	err = umount2("/sys", MNT_DETACH);
109 	if (!ASSERT_OK(err, "umount2 /sys"))
110 		return err;
111 
112 	err = mount("sysfs", "/sys", "sysfs", 0, NULL);
113 	if (!ASSERT_OK(err, "mount /sys"))
114 		return err;
115 
116 	err = mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL);
117 	if (!ASSERT_OK(err, "mount /sys/fs/bpf"))
118 		return err;
119 
120 	return 0;
121 }
122 
123 /**
124  * open_netns() - Switch to specified network namespace by name.
125  *
126  * Returns token with which to restore the original namespace
127  * using close_netns().
128  */
129 static struct nstoken *open_netns(const char *name)
130 {
131 	int nsfd;
132 	char nspath[PATH_MAX];
133 	int err;
134 	struct nstoken *token;
135 
136 	token = malloc(sizeof(struct nstoken));
137 	if (!ASSERT_OK_PTR(token, "malloc token"))
138 		return NULL;
139 
140 	token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
141 	if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net"))
142 		goto fail;
143 
144 	snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
145 	nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
146 	if (!ASSERT_GE(nsfd, 0, "open netns fd"))
147 		goto fail;
148 
149 	err = setns_by_fd(nsfd);
150 	if (!ASSERT_OK(err, "setns_by_fd"))
151 		goto fail;
152 
153 	return token;
154 fail:
155 	free(token);
156 	return NULL;
157 }
158 
159 static void close_netns(struct nstoken *token)
160 {
161 	ASSERT_OK(setns_by_fd(token->orig_netns_fd), "setns_by_fd");
162 	free(token);
163 }
164 
165 static int netns_setup_namespaces(const char *verb)
166 {
167 	const char * const *ns = namespaces;
168 	char cmd[128];
169 
170 	while (*ns) {
171 		snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns);
172 		if (!ASSERT_OK(system(cmd), cmd))
173 			return -1;
174 		ns++;
175 	}
176 	return 0;
177 }
178 
179 static void netns_setup_namespaces_nofail(const char *verb)
180 {
181 	const char * const *ns = namespaces;
182 	char cmd[128];
183 
184 	while (*ns) {
185 		snprintf(cmd, sizeof(cmd), "ip netns %s %s > /dev/null 2>&1", verb, *ns);
186 		system(cmd);
187 		ns++;
188 	}
189 }
190 
191 struct netns_setup_result {
192 	int ifindex_veth_src_fwd;
193 	int ifindex_veth_dst_fwd;
194 };
195 
196 static int get_ifaddr(const char *name, char *ifaddr)
197 {
198 	char path[PATH_MAX];
199 	FILE *f;
200 	int ret;
201 
202 	snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name);
203 	f = fopen(path, "r");
204 	if (!ASSERT_OK_PTR(f, path))
205 		return -1;
206 
207 	ret = fread(ifaddr, 1, IFADDR_STR_LEN, f);
208 	if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) {
209 		fclose(f);
210 		return -1;
211 	}
212 	fclose(f);
213 	return 0;
214 }
215 
216 static int get_ifindex(const char *name)
217 {
218 	char path[PATH_MAX];
219 	char buf[32];
220 	FILE *f;
221 	int ret;
222 
223 	snprintf(path, PATH_MAX, "/sys/class/net/%s/ifindex", name);
224 	f = fopen(path, "r");
225 	if (!ASSERT_OK_PTR(f, path))
226 		return -1;
227 
228 	ret = fread(buf, 1, sizeof(buf), f);
229 	if (!ASSERT_GT(ret, 0, "fread ifindex")) {
230 		fclose(f);
231 		return -1;
232 	}
233 	fclose(f);
234 	return atoi(buf);
235 }
236 
237 #define SYS(fmt, ...)						\
238 	({							\
239 		char cmd[1024];					\
240 		snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__);	\
241 		if (!ASSERT_OK(system(cmd), cmd))		\
242 			goto fail;				\
243 	})
244 
245 static int netns_setup_links_and_routes(struct netns_setup_result *result)
246 {
247 	struct nstoken *nstoken = NULL;
248 	char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
249 
250 	SYS("ip link add veth_src type veth peer name veth_src_fwd");
251 	SYS("ip link add veth_dst type veth peer name veth_dst_fwd");
252 
253 	SYS("ip link set veth_dst_fwd address " MAC_DST_FWD);
254 	SYS("ip link set veth_dst address " MAC_DST);
255 
256 	if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
257 		goto fail;
258 
259 	result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd");
260 	if (result->ifindex_veth_src_fwd < 0)
261 		goto fail;
262 	result->ifindex_veth_dst_fwd = get_ifindex("veth_dst_fwd");
263 	if (result->ifindex_veth_dst_fwd < 0)
264 		goto fail;
265 
266 	SYS("ip link set veth_src netns " NS_SRC);
267 	SYS("ip link set veth_src_fwd netns " NS_FWD);
268 	SYS("ip link set veth_dst_fwd netns " NS_FWD);
269 	SYS("ip link set veth_dst netns " NS_DST);
270 
271 	/** setup in 'src' namespace */
272 	nstoken = open_netns(NS_SRC);
273 	if (!ASSERT_OK_PTR(nstoken, "setns src"))
274 		goto fail;
275 
276 	SYS("ip addr add " IP4_SRC "/32 dev veth_src");
277 	SYS("ip addr add " IP6_SRC "/128 dev veth_src nodad");
278 	SYS("ip link set dev veth_src up");
279 
280 	SYS("ip route add " IP4_DST "/32 dev veth_src scope global");
281 	SYS("ip route add " IP4_NET "/16 dev veth_src scope global");
282 	SYS("ip route add " IP6_DST "/128 dev veth_src scope global");
283 
284 	SYS("ip neigh add " IP4_DST " dev veth_src lladdr %s",
285 	    veth_src_fwd_addr);
286 	SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s",
287 	    veth_src_fwd_addr);
288 
289 	close_netns(nstoken);
290 
291 	/** setup in 'fwd' namespace */
292 	nstoken = open_netns(NS_FWD);
293 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
294 		goto fail;
295 
296 	/* The fwd netns automatically gets a v6 LL address / routes, but also
297 	 * needs v4 one in order to start ARP probing. IP4_NET route is added
298 	 * to the endpoints so that the ARP processing will reply.
299 	 */
300 	SYS("ip addr add " IP4_SLL "/32 dev veth_src_fwd");
301 	SYS("ip addr add " IP4_DLL "/32 dev veth_dst_fwd");
302 	SYS("ip link set dev veth_src_fwd up");
303 	SYS("ip link set dev veth_dst_fwd up");
304 
305 	SYS("ip route add " IP4_SRC "/32 dev veth_src_fwd scope global");
306 	SYS("ip route add " IP6_SRC "/128 dev veth_src_fwd scope global");
307 	SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
308 	SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
309 
310 	close_netns(nstoken);
311 
312 	/** setup in 'dst' namespace */
313 	nstoken = open_netns(NS_DST);
314 	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
315 		goto fail;
316 
317 	SYS("ip addr add " IP4_DST "/32 dev veth_dst");
318 	SYS("ip addr add " IP6_DST "/128 dev veth_dst nodad");
319 	SYS("ip link set dev veth_dst up");
320 
321 	SYS("ip route add " IP4_SRC "/32 dev veth_dst scope global");
322 	SYS("ip route add " IP4_NET "/16 dev veth_dst scope global");
323 	SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global");
324 
325 	SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
326 	SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
327 
328 	close_netns(nstoken);
329 
330 	return 0;
331 fail:
332 	if (nstoken)
333 		close_netns(nstoken);
334 	return -1;
335 }
336 
337 static int netns_load_bpf(void)
338 {
339 	SYS("tc qdisc add dev veth_src_fwd clsact");
340 	SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned "
341 	    SRC_PROG_PIN_FILE);
342 	SYS("tc filter add dev veth_src_fwd egress bpf da object-pinned "
343 	    CHK_PROG_PIN_FILE);
344 
345 	SYS("tc qdisc add dev veth_dst_fwd clsact");
346 	SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
347 	    DST_PROG_PIN_FILE);
348 	SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
349 	    CHK_PROG_PIN_FILE);
350 
351 	return 0;
352 fail:
353 	return -1;
354 }
355 
356 static void test_tcp(int family, const char *addr, __u16 port)
357 {
358 	int listen_fd = -1, accept_fd = -1, client_fd = -1;
359 	char buf[] = "testing testing";
360 	int n;
361 	struct nstoken *nstoken;
362 
363 	nstoken = open_netns(NS_DST);
364 	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
365 		return;
366 
367 	listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
368 	if (!ASSERT_GE(listen_fd, 0, "listen"))
369 		goto done;
370 
371 	close_netns(nstoken);
372 	nstoken = open_netns(NS_SRC);
373 	if (!ASSERT_OK_PTR(nstoken, "setns src"))
374 		goto done;
375 
376 	client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
377 	if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
378 		goto done;
379 
380 	accept_fd = accept(listen_fd, NULL, NULL);
381 	if (!ASSERT_GE(accept_fd, 0, "accept"))
382 		goto done;
383 
384 	if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo"))
385 		goto done;
386 
387 	n = write(client_fd, buf, sizeof(buf));
388 	if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
389 		goto done;
390 
391 	n = read(accept_fd, buf, sizeof(buf));
392 	ASSERT_EQ(n, sizeof(buf), "recv from server");
393 
394 done:
395 	if (nstoken)
396 		close_netns(nstoken);
397 	if (listen_fd >= 0)
398 		close(listen_fd);
399 	if (accept_fd >= 0)
400 		close(accept_fd);
401 	if (client_fd >= 0)
402 		close(client_fd);
403 }
404 
405 static int test_ping(int family, const char *addr)
406 {
407 	SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
408 	return 0;
409 fail:
410 	return -1;
411 }
412 
413 static void test_connectivity(void)
414 {
415 	test_tcp(AF_INET, IP4_DST, IP4_PORT);
416 	test_ping(AF_INET, IP4_DST);
417 	test_tcp(AF_INET6, IP6_DST, IP6_PORT);
418 	test_ping(AF_INET6, IP6_DST);
419 }
420 
421 static int set_forwarding(bool enable)
422 {
423 	int err;
424 
425 	err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
426 	if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
427 		return err;
428 
429 	err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
430 	if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
431 		return err;
432 
433 	return 0;
434 }
435 
436 static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
437 {
438 	struct nstoken *nstoken = NULL;
439 	struct test_tc_neigh_fib *skel = NULL;
440 	int err;
441 
442 	nstoken = open_netns(NS_FWD);
443 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
444 		return;
445 
446 	skel = test_tc_neigh_fib__open();
447 	if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
448 		goto done;
449 
450 	if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
451 		goto done;
452 
453 	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
454 	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
455 		goto done;
456 
457 	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
458 	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
459 		goto done;
460 
461 	err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
462 	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
463 		goto done;
464 
465 	if (netns_load_bpf())
466 		goto done;
467 
468 	/* bpf_fib_lookup() checks if forwarding is enabled */
469 	if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
470 		goto done;
471 
472 	test_connectivity();
473 
474 done:
475 	if (skel)
476 		test_tc_neigh_fib__destroy(skel);
477 	close_netns(nstoken);
478 }
479 
480 static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
481 {
482 	struct nstoken *nstoken = NULL;
483 	struct test_tc_neigh *skel = NULL;
484 	int err;
485 
486 	nstoken = open_netns(NS_FWD);
487 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
488 		return;
489 
490 	skel = test_tc_neigh__open();
491 	if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
492 		goto done;
493 
494 	skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
495 	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
496 
497 	err = test_tc_neigh__load(skel);
498 	if (!ASSERT_OK(err, "test_tc_neigh__load"))
499 		goto done;
500 
501 	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
502 	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
503 		goto done;
504 
505 	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
506 	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
507 		goto done;
508 
509 	err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
510 	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
511 		goto done;
512 
513 	if (netns_load_bpf())
514 		goto done;
515 
516 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
517 		goto done;
518 
519 	test_connectivity();
520 
521 done:
522 	if (skel)
523 		test_tc_neigh__destroy(skel);
524 	close_netns(nstoken);
525 }
526 
527 static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
528 {
529 	struct nstoken *nstoken;
530 	struct test_tc_peer *skel;
531 	int err;
532 
533 	nstoken = open_netns(NS_FWD);
534 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
535 		return;
536 
537 	skel = test_tc_peer__open();
538 	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
539 		goto done;
540 
541 	skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
542 	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
543 
544 	err = test_tc_peer__load(skel);
545 	if (!ASSERT_OK(err, "test_tc_peer__load"))
546 		goto done;
547 
548 	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
549 	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
550 		goto done;
551 
552 	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
553 	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
554 		goto done;
555 
556 	err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
557 	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
558 		goto done;
559 
560 	if (netns_load_bpf())
561 		goto done;
562 
563 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
564 		goto done;
565 
566 	test_connectivity();
567 
568 done:
569 	if (skel)
570 		test_tc_peer__destroy(skel);
571 	close_netns(nstoken);
572 }
573 
574 static int tun_open(char *name)
575 {
576 	struct ifreq ifr;
577 	int fd, err;
578 
579 	fd = open("/dev/net/tun", O_RDWR);
580 	if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
581 		return -1;
582 
583 	memset(&ifr, 0, sizeof(ifr));
584 
585 	ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
586 	if (*name)
587 		strncpy(ifr.ifr_name, name, IFNAMSIZ);
588 
589 	err = ioctl(fd, TUNSETIFF, &ifr);
590 	if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
591 		goto fail;
592 
593 	SYS("ip link set dev %s up", name);
594 
595 	return fd;
596 fail:
597 	close(fd);
598 	return -1;
599 }
600 
601 #define MAX(a, b) ((a) > (b) ? (a) : (b))
602 enum {
603 	SRC_TO_TARGET = 0,
604 	TARGET_TO_SRC = 1,
605 };
606 
607 static int tun_relay_loop(int src_fd, int target_fd)
608 {
609 	fd_set rfds, wfds;
610 
611 	FD_ZERO(&rfds);
612 	FD_ZERO(&wfds);
613 
614 	for (;;) {
615 		char buf[1500];
616 		int direction, nread, nwrite;
617 
618 		FD_SET(src_fd, &rfds);
619 		FD_SET(target_fd, &rfds);
620 
621 		if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
622 			log_err("select failed");
623 			return 1;
624 		}
625 
626 		direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
627 
628 		nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
629 		if (nread < 0) {
630 			log_err("read failed");
631 			return 1;
632 		}
633 
634 		nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
635 		if (nwrite != nread) {
636 			log_err("write failed");
637 			return 1;
638 		}
639 	}
640 }
641 
642 static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
643 {
644 	struct test_tc_peer *skel = NULL;
645 	struct nstoken *nstoken = NULL;
646 	int err;
647 	int tunnel_pid = -1;
648 	int src_fd, target_fd = -1;
649 	int ifindex;
650 
651 	/* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
652 	 * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
653 	 * expose the L2 headers encapsulating the IP packet to BPF and hence
654 	 * don't have skb in suitable state for this test. Alternative to TUN/TAP
655 	 * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
656 	 * but that requires much more complicated setup.
657 	 */
658 	nstoken = open_netns(NS_SRC);
659 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
660 		return;
661 
662 	src_fd = tun_open("tun_src");
663 	if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
664 		goto fail;
665 
666 	close_netns(nstoken);
667 
668 	nstoken = open_netns(NS_FWD);
669 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
670 		goto fail;
671 
672 	target_fd = tun_open("tun_fwd");
673 	if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
674 		goto fail;
675 
676 	tunnel_pid = fork();
677 	if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
678 		goto fail;
679 
680 	if (tunnel_pid == 0)
681 		exit(tun_relay_loop(src_fd, target_fd));
682 
683 	skel = test_tc_peer__open();
684 	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
685 		goto fail;
686 
687 	ifindex = get_ifindex("tun_fwd");
688 	if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd"))
689 		goto fail;
690 
691 	skel->rodata->IFINDEX_SRC = ifindex;
692 	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
693 
694 	err = test_tc_peer__load(skel);
695 	if (!ASSERT_OK(err, "test_tc_peer__load"))
696 		goto fail;
697 
698 	err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
699 	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
700 		goto fail;
701 
702 	err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE);
703 	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
704 		goto fail;
705 
706 	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
707 	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
708 		goto fail;
709 
710 	/* Load "tc_src_l3" to the tun_fwd interface to redirect packets
711 	 * towards dst, and "tc_dst" to redirect packets
712 	 * and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
713 	 */
714 	SYS("tc qdisc add dev tun_fwd clsact");
715 	SYS("tc filter add dev tun_fwd ingress bpf da object-pinned "
716 	    SRC_PROG_PIN_FILE);
717 
718 	SYS("tc qdisc add dev veth_dst_fwd clsact");
719 	SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
720 	    DST_PROG_PIN_FILE);
721 	SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
722 	    CHK_PROG_PIN_FILE);
723 
724 	/* Setup route and neigh tables */
725 	SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
726 	SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
727 
728 	SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
729 	SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
730 
731 	SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
732 	SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
733 	    " dev tun_src scope global");
734 	SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
735 	SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
736 	SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
737 	    " dev tun_src scope global");
738 	SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
739 
740 	SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
741 	SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
742 
743 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
744 		goto fail;
745 
746 	test_connectivity();
747 
748 fail:
749 	if (tunnel_pid > 0) {
750 		kill(tunnel_pid, SIGTERM);
751 		waitpid(tunnel_pid, NULL, 0);
752 	}
753 	if (src_fd >= 0)
754 		close(src_fd);
755 	if (target_fd >= 0)
756 		close(target_fd);
757 	if (skel)
758 		test_tc_peer__destroy(skel);
759 	if (nstoken)
760 		close_netns(nstoken);
761 }
762 
763 #define RUN_TEST(name)                                                                      \
764 	({                                                                                  \
765 		struct netns_setup_result setup_result;                                     \
766 		if (test__start_subtest(#name))                                             \
767 			if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
768 				if (ASSERT_OK(netns_setup_links_and_routes(&setup_result),  \
769 					      "setup links and routes"))                    \
770 					test_ ## name(&setup_result);                       \
771 				netns_setup_namespaces("delete");                           \
772 			}                                                                   \
773 	})
774 
775 static void *test_tc_redirect_run_tests(void *arg)
776 {
777 	netns_setup_namespaces_nofail("delete");
778 
779 	RUN_TEST(tc_redirect_peer);
780 	RUN_TEST(tc_redirect_peer_l3);
781 	RUN_TEST(tc_redirect_neigh);
782 	RUN_TEST(tc_redirect_neigh_fib);
783 	return NULL;
784 }
785 
786 void serial_test_tc_redirect(void)
787 {
788 	pthread_t test_thread;
789 	int err;
790 
791 	/* Run the tests in their own thread to isolate the namespace changes
792 	 * so they do not affect the environment of other tests.
793 	 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
794 	 */
795 	err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
796 	if (ASSERT_OK(err, "pthread_create"))
797 		ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
798 }
799