1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 
3 /*
4  * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
5  * between src and dst. The netns fwd has veth links to each src and dst. The
6  * client is in src and server in dst. The test installs a TC BPF program to each
7  * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
8  * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
9  * switch from ingress side; it also installs a checker prog on the egress side
10  * to drop unexpected traffic.
11  */
12 
13 #include <arpa/inet.h>
14 #include <linux/if_tun.h>
15 #include <linux/limits.h>
16 #include <linux/sysctl.h>
17 #include <linux/time_types.h>
18 #include <linux/net_tstamp.h>
19 #include <net/if.h>
20 #include <stdbool.h>
21 #include <stdio.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include "test_progs.h"
26 #include "network_helpers.h"
27 #include "test_tc_neigh_fib.skel.h"
28 #include "test_tc_neigh.skel.h"
29 #include "test_tc_peer.skel.h"
30 #include "test_tc_dtime.skel.h"
31 
32 #ifndef TCP_TX_DELAY
33 #define TCP_TX_DELAY 37
34 #endif
35 
36 #define NS_SRC "ns_src"
37 #define NS_FWD "ns_fwd"
38 #define NS_DST "ns_dst"
39 
40 #define IP4_SRC "172.16.1.100"
41 #define IP4_DST "172.16.2.100"
42 #define IP4_TUN_SRC "172.17.1.100"
43 #define IP4_TUN_FWD "172.17.1.200"
44 #define IP4_PORT 9004
45 
46 #define IP6_SRC "0::1:dead:beef:cafe"
47 #define IP6_DST "0::2:dead:beef:cafe"
48 #define IP6_TUN_SRC "1::1:dead:beef:cafe"
49 #define IP6_TUN_FWD "1::2:dead:beef:cafe"
50 #define IP6_PORT 9006
51 
52 #define IP4_SLL "169.254.0.1"
53 #define IP4_DLL "169.254.0.2"
54 #define IP4_NET "169.254.0.0"
55 
56 #define MAC_DST_FWD "00:11:22:33:44:55"
57 #define MAC_DST "00:22:33:44:55:66"
58 
59 #define IFADDR_STR_LEN 18
60 #define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
61 
62 #define TIMEOUT_MILLIS 10000
63 #define NSEC_PER_SEC 1000000000ULL
64 
65 #define log_err(MSG, ...) \
66 	fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
67 		__FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
68 
69 static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
70 
write_file(const char * path,const char * newval)71 static int write_file(const char *path, const char *newval)
72 {
73 	FILE *f;
74 
75 	f = fopen(path, "r+");
76 	if (!f)
77 		return -1;
78 	if (fwrite(newval, strlen(newval), 1, f) != 1) {
79 		log_err("writing to %s failed", path);
80 		fclose(f);
81 		return -1;
82 	}
83 	fclose(f);
84 	return 0;
85 }
86 
netns_setup_namespaces(const char * verb)87 static int netns_setup_namespaces(const char *verb)
88 {
89 	const char * const *ns = namespaces;
90 	char cmd[128];
91 
92 	while (*ns) {
93 		snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns);
94 		if (!ASSERT_OK(system(cmd), cmd))
95 			return -1;
96 		ns++;
97 	}
98 	return 0;
99 }
100 
netns_setup_namespaces_nofail(const char * verb)101 static void netns_setup_namespaces_nofail(const char *verb)
102 {
103 	const char * const *ns = namespaces;
104 	char cmd[128];
105 
106 	while (*ns) {
107 		snprintf(cmd, sizeof(cmd), "ip netns %s %s > /dev/null 2>&1", verb, *ns);
108 		system(cmd);
109 		ns++;
110 	}
111 }
112 
113 enum dev_mode {
114 	MODE_VETH,
115 };
116 
117 struct netns_setup_result {
118 	enum dev_mode dev_mode;
119 	int ifindex_src;
120 	int ifindex_src_fwd;
121 	int ifindex_dst;
122 	int ifindex_dst_fwd;
123 };
124 
get_ifaddr(const char * name,char * ifaddr)125 static int get_ifaddr(const char *name, char *ifaddr)
126 {
127 	char path[PATH_MAX];
128 	FILE *f;
129 	int ret;
130 
131 	snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name);
132 	f = fopen(path, "r");
133 	if (!ASSERT_OK_PTR(f, path))
134 		return -1;
135 
136 	ret = fread(ifaddr, 1, IFADDR_STR_LEN, f);
137 	if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) {
138 		fclose(f);
139 		return -1;
140 	}
141 	fclose(f);
142 	return 0;
143 }
144 
netns_setup_links_and_routes(struct netns_setup_result * result)145 static int netns_setup_links_and_routes(struct netns_setup_result *result)
146 {
147 	struct nstoken *nstoken = NULL;
148 	char src_fwd_addr[IFADDR_STR_LEN+1] = {};
149 	char src_addr[IFADDR_STR_LEN + 1] = {};
150 
151 	if (result->dev_mode == MODE_VETH) {
152 		SYS(fail, "ip link add src type veth peer name src_fwd");
153 		SYS(fail, "ip link add dst type veth peer name dst_fwd");
154 
155 		SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD);
156 		SYS(fail, "ip link set dst address " MAC_DST);
157 	}
158 
159 	if (get_ifaddr("src_fwd", src_fwd_addr))
160 		goto fail;
161 
162 	if (get_ifaddr("src", src_addr))
163 		goto fail;
164 
165 	result->ifindex_src = if_nametoindex("src");
166 	if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src"))
167 		goto fail;
168 
169 	result->ifindex_src_fwd = if_nametoindex("src_fwd");
170 	if (!ASSERT_GT(result->ifindex_src_fwd, 0, "ifindex_src_fwd"))
171 		goto fail;
172 
173 	result->ifindex_dst = if_nametoindex("dst");
174 	if (!ASSERT_GT(result->ifindex_dst, 0, "ifindex_dst"))
175 		goto fail;
176 
177 	result->ifindex_dst_fwd = if_nametoindex("dst_fwd");
178 	if (!ASSERT_GT(result->ifindex_dst_fwd, 0, "ifindex_dst_fwd"))
179 		goto fail;
180 
181 	SYS(fail, "ip link set src netns " NS_SRC);
182 	SYS(fail, "ip link set src_fwd netns " NS_FWD);
183 	SYS(fail, "ip link set dst_fwd netns " NS_FWD);
184 	SYS(fail, "ip link set dst netns " NS_DST);
185 
186 	/** setup in 'src' namespace */
187 	nstoken = open_netns(NS_SRC);
188 	if (!ASSERT_OK_PTR(nstoken, "setns src"))
189 		goto fail;
190 
191 	SYS(fail, "ip addr add " IP4_SRC "/32 dev src");
192 	SYS(fail, "ip addr add " IP6_SRC "/128 dev src nodad");
193 	SYS(fail, "ip link set dev src up");
194 
195 	SYS(fail, "ip route add " IP4_DST "/32 dev src scope global");
196 	SYS(fail, "ip route add " IP4_NET "/16 dev src scope global");
197 	SYS(fail, "ip route add " IP6_DST "/128 dev src scope global");
198 
199 	if (result->dev_mode == MODE_VETH) {
200 		SYS(fail, "ip neigh add " IP4_DST " dev src lladdr %s",
201 		    src_fwd_addr);
202 		SYS(fail, "ip neigh add " IP6_DST " dev src lladdr %s",
203 		    src_fwd_addr);
204 	}
205 
206 	close_netns(nstoken);
207 
208 	/** setup in 'fwd' namespace */
209 	nstoken = open_netns(NS_FWD);
210 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
211 		goto fail;
212 
213 	/* The fwd netns automatically gets a v6 LL address / routes, but also
214 	 * needs v4 one in order to start ARP probing. IP4_NET route is added
215 	 * to the endpoints so that the ARP processing will reply.
216 	 */
217 	SYS(fail, "ip addr add " IP4_SLL "/32 dev src_fwd");
218 	SYS(fail, "ip addr add " IP4_DLL "/32 dev dst_fwd");
219 	SYS(fail, "ip link set dev src_fwd up");
220 	SYS(fail, "ip link set dev dst_fwd up");
221 
222 	SYS(fail, "ip route add " IP4_SRC "/32 dev src_fwd scope global");
223 	SYS(fail, "ip route add " IP6_SRC "/128 dev src_fwd scope global");
224 	SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global");
225 	SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global");
226 
227 	if (result->dev_mode == MODE_VETH) {
228 		SYS(fail, "ip neigh add " IP4_SRC " dev src_fwd lladdr %s", src_addr);
229 		SYS(fail, "ip neigh add " IP6_SRC " dev src_fwd lladdr %s", src_addr);
230 		SYS(fail, "ip neigh add " IP4_DST " dev dst_fwd lladdr %s", MAC_DST);
231 		SYS(fail, "ip neigh add " IP6_DST " dev dst_fwd lladdr %s", MAC_DST);
232 	}
233 
234 	close_netns(nstoken);
235 
236 	/** setup in 'dst' namespace */
237 	nstoken = open_netns(NS_DST);
238 	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
239 		goto fail;
240 
241 	SYS(fail, "ip addr add " IP4_DST "/32 dev dst");
242 	SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad");
243 	SYS(fail, "ip link set dev dst up");
244 	SYS(fail, "ip link set dev lo up");
245 
246 	SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global");
247 	SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global");
248 	SYS(fail, "ip route add " IP6_SRC "/128 dev dst scope global");
249 
250 	if (result->dev_mode == MODE_VETH) {
251 		SYS(fail, "ip neigh add " IP4_SRC " dev dst lladdr " MAC_DST_FWD);
252 		SYS(fail, "ip neigh add " IP6_SRC " dev dst lladdr " MAC_DST_FWD);
253 	}
254 
255 	close_netns(nstoken);
256 
257 	return 0;
258 fail:
259 	if (nstoken)
260 		close_netns(nstoken);
261 	return -1;
262 }
263 
qdisc_clsact_create(struct bpf_tc_hook * qdisc_hook,int ifindex)264 static int qdisc_clsact_create(struct bpf_tc_hook *qdisc_hook, int ifindex)
265 {
266 	char err_str[128], ifname[16];
267 	int err;
268 
269 	qdisc_hook->ifindex = ifindex;
270 	qdisc_hook->attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
271 	err = bpf_tc_hook_create(qdisc_hook);
272 	snprintf(err_str, sizeof(err_str),
273 		 "qdisc add dev %s clsact",
274 		 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>");
275 	err_str[sizeof(err_str) - 1] = 0;
276 	ASSERT_OK(err, err_str);
277 
278 	return err;
279 }
280 
xgress_filter_add(struct bpf_tc_hook * qdisc_hook,enum bpf_tc_attach_point xgress,const struct bpf_program * prog,int priority)281 static int xgress_filter_add(struct bpf_tc_hook *qdisc_hook,
282 			     enum bpf_tc_attach_point xgress,
283 			     const struct bpf_program *prog, int priority)
284 {
285 	LIBBPF_OPTS(bpf_tc_opts, tc_attach);
286 	char err_str[128], ifname[16];
287 	int err;
288 
289 	qdisc_hook->attach_point = xgress;
290 	tc_attach.prog_fd = bpf_program__fd(prog);
291 	tc_attach.priority = priority;
292 	err = bpf_tc_attach(qdisc_hook, &tc_attach);
293 	snprintf(err_str, sizeof(err_str),
294 		 "filter add dev %s %s prio %d bpf da %s",
295 		 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>",
296 		 xgress == BPF_TC_INGRESS ? "ingress" : "egress",
297 		 priority, bpf_program__name(prog));
298 	err_str[sizeof(err_str) - 1] = 0;
299 	ASSERT_OK(err, err_str);
300 
301 	return err;
302 }
303 
304 #define QDISC_CLSACT_CREATE(qdisc_hook, ifindex) ({		\
305 	if ((err = qdisc_clsact_create(qdisc_hook, ifindex)))	\
306 		goto fail;					\
307 })
308 
309 #define XGRESS_FILTER_ADD(qdisc_hook, xgress, prog, priority) ({		\
310 	if ((err = xgress_filter_add(qdisc_hook, xgress, prog, priority)))	\
311 		goto fail;							\
312 })
313 
netns_load_bpf(const struct bpf_program * src_prog,const struct bpf_program * dst_prog,const struct bpf_program * chk_prog,const struct netns_setup_result * setup_result)314 static int netns_load_bpf(const struct bpf_program *src_prog,
315 			  const struct bpf_program *dst_prog,
316 			  const struct bpf_program *chk_prog,
317 			  const struct netns_setup_result *setup_result)
318 {
319 	LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
320 	LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
321 	int err;
322 
323 	/* tc qdisc add dev src_fwd clsact */
324 	QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
325 	/* tc filter add dev src_fwd ingress bpf da src_prog */
326 	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, src_prog, 0);
327 	/* tc filter add dev src_fwd egress bpf da chk_prog */
328 	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
329 
330 	/* tc qdisc add dev dst_fwd clsact */
331 	QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
332 	/* tc filter add dev dst_fwd ingress bpf da dst_prog */
333 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
334 	/* tc filter add dev dst_fwd egress bpf da chk_prog */
335 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
336 
337 	return 0;
338 fail:
339 	return -1;
340 }
341 
test_tcp(int family,const char * addr,__u16 port)342 static void test_tcp(int family, const char *addr, __u16 port)
343 {
344 	int listen_fd = -1, accept_fd = -1, client_fd = -1;
345 	char buf[] = "testing testing";
346 	int n;
347 	struct nstoken *nstoken;
348 
349 	nstoken = open_netns(NS_DST);
350 	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
351 		return;
352 
353 	listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
354 	if (!ASSERT_GE(listen_fd, 0, "listen"))
355 		goto done;
356 
357 	close_netns(nstoken);
358 	nstoken = open_netns(NS_SRC);
359 	if (!ASSERT_OK_PTR(nstoken, "setns src"))
360 		goto done;
361 
362 	client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
363 	if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
364 		goto done;
365 
366 	accept_fd = accept(listen_fd, NULL, NULL);
367 	if (!ASSERT_GE(accept_fd, 0, "accept"))
368 		goto done;
369 
370 	if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo"))
371 		goto done;
372 
373 	n = write(client_fd, buf, sizeof(buf));
374 	if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
375 		goto done;
376 
377 	n = read(accept_fd, buf, sizeof(buf));
378 	ASSERT_EQ(n, sizeof(buf), "recv from server");
379 
380 done:
381 	if (nstoken)
382 		close_netns(nstoken);
383 	if (listen_fd >= 0)
384 		close(listen_fd);
385 	if (accept_fd >= 0)
386 		close(accept_fd);
387 	if (client_fd >= 0)
388 		close(client_fd);
389 }
390 
test_ping(int family,const char * addr)391 static int test_ping(int family, const char *addr)
392 {
393 	SYS(fail, "ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
394 	return 0;
395 fail:
396 	return -1;
397 }
398 
test_connectivity(void)399 static void test_connectivity(void)
400 {
401 	test_tcp(AF_INET, IP4_DST, IP4_PORT);
402 	test_ping(AF_INET, IP4_DST);
403 	test_tcp(AF_INET6, IP6_DST, IP6_PORT);
404 	test_ping(AF_INET6, IP6_DST);
405 }
406 
set_forwarding(bool enable)407 static int set_forwarding(bool enable)
408 {
409 	int err;
410 
411 	err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
412 	if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
413 		return err;
414 
415 	err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
416 	if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
417 		return err;
418 
419 	return 0;
420 }
421 
__rcv_tstamp(int fd,const char * expected,size_t s,__u64 * tstamp)422 static int __rcv_tstamp(int fd, const char *expected, size_t s, __u64 *tstamp)
423 {
424 	struct __kernel_timespec pkt_ts = {};
425 	char ctl[CMSG_SPACE(sizeof(pkt_ts))];
426 	struct timespec now_ts;
427 	struct msghdr msg = {};
428 	__u64 now_ns, pkt_ns;
429 	struct cmsghdr *cmsg;
430 	struct iovec iov;
431 	char data[32];
432 	int ret;
433 
434 	iov.iov_base = data;
435 	iov.iov_len = sizeof(data);
436 	msg.msg_iov = &iov;
437 	msg.msg_iovlen = 1;
438 	msg.msg_control = &ctl;
439 	msg.msg_controllen = sizeof(ctl);
440 
441 	ret = recvmsg(fd, &msg, 0);
442 	if (!ASSERT_EQ(ret, s, "recvmsg"))
443 		return -1;
444 	ASSERT_STRNEQ(data, expected, s, "expected rcv data");
445 
446 	cmsg = CMSG_FIRSTHDR(&msg);
447 	if (cmsg && cmsg->cmsg_level == SOL_SOCKET &&
448 	    cmsg->cmsg_type == SO_TIMESTAMPNS_NEW)
449 		memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts));
450 
451 	pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec;
452 	if (tstamp) {
453 		/* caller will check the tstamp itself */
454 		*tstamp = pkt_ns;
455 		return 0;
456 	}
457 
458 	ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp");
459 
460 	ret = clock_gettime(CLOCK_REALTIME, &now_ts);
461 	ASSERT_OK(ret, "clock_gettime");
462 	now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
463 
464 	if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp"))
465 		ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC,
466 			  "check rcv tstamp");
467 	return 0;
468 }
469 
rcv_tstamp(int fd,const char * expected,size_t s)470 static void rcv_tstamp(int fd, const char *expected, size_t s)
471 {
472 	__rcv_tstamp(fd, expected, s, NULL);
473 }
474 
wait_netstamp_needed_key(void)475 static int wait_netstamp_needed_key(void)
476 {
477 	int opt = 1, srv_fd = -1, cli_fd = -1, nretries = 0, err, n;
478 	char buf[] = "testing testing";
479 	struct nstoken *nstoken;
480 	__u64 tstamp = 0;
481 
482 	nstoken = open_netns(NS_DST);
483 	if (!nstoken)
484 		return -1;
485 
486 	srv_fd = start_server(AF_INET6, SOCK_DGRAM, "::1", 0, 0);
487 	if (!ASSERT_GE(srv_fd, 0, "start_server"))
488 		goto done;
489 
490 	err = setsockopt(srv_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
491 			 &opt, sizeof(opt));
492 	if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)"))
493 		goto done;
494 
495 	cli_fd = connect_to_fd(srv_fd, TIMEOUT_MILLIS);
496 	if (!ASSERT_GE(cli_fd, 0, "connect_to_fd"))
497 		goto done;
498 
499 again:
500 	n = write(cli_fd, buf, sizeof(buf));
501 	if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
502 		goto done;
503 	err = __rcv_tstamp(srv_fd, buf, sizeof(buf), &tstamp);
504 	if (!ASSERT_OK(err, "__rcv_tstamp"))
505 		goto done;
506 	if (!tstamp && nretries++ < 5) {
507 		sleep(1);
508 		printf("netstamp_needed_key retry#%d\n", nretries);
509 		goto again;
510 	}
511 
512 done:
513 	if (!tstamp && srv_fd != -1) {
514 		close(srv_fd);
515 		srv_fd = -1;
516 	}
517 	if (cli_fd != -1)
518 		close(cli_fd);
519 	close_netns(nstoken);
520 	return srv_fd;
521 }
522 
snd_tstamp(int fd,char * b,size_t s)523 static void snd_tstamp(int fd, char *b, size_t s)
524 {
525 	struct sock_txtime opt = { .clockid = CLOCK_TAI };
526 	char ctl[CMSG_SPACE(sizeof(__u64))];
527 	struct timespec now_ts;
528 	struct msghdr msg = {};
529 	struct cmsghdr *cmsg;
530 	struct iovec iov;
531 	__u64 now_ns;
532 	int ret;
533 
534 	ret = clock_gettime(CLOCK_TAI, &now_ts);
535 	ASSERT_OK(ret, "clock_get_time(CLOCK_TAI)");
536 	now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
537 
538 	iov.iov_base = b;
539 	iov.iov_len = s;
540 	msg.msg_iov = &iov;
541 	msg.msg_iovlen = 1;
542 	msg.msg_control = &ctl;
543 	msg.msg_controllen = sizeof(ctl);
544 
545 	cmsg = CMSG_FIRSTHDR(&msg);
546 	cmsg->cmsg_level = SOL_SOCKET;
547 	cmsg->cmsg_type = SCM_TXTIME;
548 	cmsg->cmsg_len = CMSG_LEN(sizeof(now_ns));
549 	*(__u64 *)CMSG_DATA(cmsg) = now_ns;
550 
551 	ret = setsockopt(fd, SOL_SOCKET, SO_TXTIME, &opt, sizeof(opt));
552 	ASSERT_OK(ret, "setsockopt(SO_TXTIME)");
553 
554 	ret = sendmsg(fd, &msg, 0);
555 	ASSERT_EQ(ret, s, "sendmsg");
556 }
557 
test_inet_dtime(int family,int type,const char * addr,__u16 port)558 static void test_inet_dtime(int family, int type, const char *addr, __u16 port)
559 {
560 	int opt = 1, accept_fd = -1, client_fd = -1, listen_fd, err;
561 	char buf[] = "testing testing";
562 	struct nstoken *nstoken;
563 
564 	nstoken = open_netns(NS_DST);
565 	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
566 		return;
567 	listen_fd = start_server(family, type, addr, port, 0);
568 	close_netns(nstoken);
569 
570 	if (!ASSERT_GE(listen_fd, 0, "listen"))
571 		return;
572 
573 	/* Ensure the kernel puts the (rcv) timestamp for all skb */
574 	err = setsockopt(listen_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
575 			 &opt, sizeof(opt));
576 	if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)"))
577 		goto done;
578 
579 	if (type == SOCK_STREAM) {
580 		/* Ensure the kernel set EDT when sending out rst/ack
581 		 * from the kernel's ctl_sk.
582 		 */
583 		err = setsockopt(listen_fd, SOL_TCP, TCP_TX_DELAY, &opt,
584 				 sizeof(opt));
585 		if (!ASSERT_OK(err, "setsockopt(TCP_TX_DELAY)"))
586 			goto done;
587 	}
588 
589 	nstoken = open_netns(NS_SRC);
590 	if (!ASSERT_OK_PTR(nstoken, "setns src"))
591 		goto done;
592 	client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
593 	close_netns(nstoken);
594 
595 	if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
596 		goto done;
597 
598 	if (type == SOCK_STREAM) {
599 		int n;
600 
601 		accept_fd = accept(listen_fd, NULL, NULL);
602 		if (!ASSERT_GE(accept_fd, 0, "accept"))
603 			goto done;
604 
605 		n = write(client_fd, buf, sizeof(buf));
606 		if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
607 			goto done;
608 		rcv_tstamp(accept_fd, buf, sizeof(buf));
609 	} else {
610 		snd_tstamp(client_fd, buf, sizeof(buf));
611 		rcv_tstamp(listen_fd, buf, sizeof(buf));
612 	}
613 
614 done:
615 	close(listen_fd);
616 	if (accept_fd != -1)
617 		close(accept_fd);
618 	if (client_fd != -1)
619 		close(client_fd);
620 }
621 
netns_load_dtime_bpf(struct test_tc_dtime * skel,const struct netns_setup_result * setup_result)622 static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
623 				const struct netns_setup_result *setup_result)
624 {
625 	LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
626 	LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
627 	LIBBPF_OPTS(bpf_tc_hook, qdisc_src);
628 	LIBBPF_OPTS(bpf_tc_hook, qdisc_dst);
629 	struct nstoken *nstoken;
630 	int err;
631 
632 	/* setup ns_src tc progs */
633 	nstoken = open_netns(NS_SRC);
634 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
635 		return -1;
636 	/* tc qdisc add dev src clsact */
637 	QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src);
638 	/* tc filter add dev src ingress bpf da ingress_host */
639 	XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
640 	/* tc filter add dev src egress bpf da egress_host */
641 	XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
642 	close_netns(nstoken);
643 
644 	/* setup ns_dst tc progs */
645 	nstoken = open_netns(NS_DST);
646 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
647 		return -1;
648 	/* tc qdisc add dev dst clsact */
649 	QDISC_CLSACT_CREATE(&qdisc_dst, setup_result->ifindex_dst);
650 	/* tc filter add dev dst ingress bpf da ingress_host */
651 	XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
652 	/* tc filter add dev dst egress bpf da egress_host */
653 	XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
654 	close_netns(nstoken);
655 
656 	/* setup ns_fwd tc progs */
657 	nstoken = open_netns(NS_FWD);
658 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
659 		return -1;
660 	/* tc qdisc add dev dst_fwd clsact */
661 	QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
662 	/* tc filter add dev dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
663 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
664 			  skel->progs.ingress_fwdns_prio100, 100);
665 	/* tc filter add dev dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
666 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
667 			  skel->progs.ingress_fwdns_prio101, 101);
668 	/* tc filter add dev dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
669 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
670 			  skel->progs.egress_fwdns_prio100, 100);
671 	/* tc filter add dev dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
672 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
673 			  skel->progs.egress_fwdns_prio101, 101);
674 
675 	/* tc qdisc add dev src_fwd clsact */
676 	QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
677 	/* tc filter add dev src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
678 	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
679 			  skel->progs.ingress_fwdns_prio100, 100);
680 	/* tc filter add dev src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
681 	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
682 			  skel->progs.ingress_fwdns_prio101, 101);
683 	/* tc filter add dev src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
684 	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
685 			  skel->progs.egress_fwdns_prio100, 100);
686 	/* tc filter add dev src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
687 	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
688 			  skel->progs.egress_fwdns_prio101, 101);
689 	close_netns(nstoken);
690 	return 0;
691 
692 fail:
693 	close_netns(nstoken);
694 	return err;
695 }
696 
697 enum {
698 	INGRESS_FWDNS_P100,
699 	INGRESS_FWDNS_P101,
700 	EGRESS_FWDNS_P100,
701 	EGRESS_FWDNS_P101,
702 	INGRESS_ENDHOST,
703 	EGRESS_ENDHOST,
704 	SET_DTIME,
705 	__MAX_CNT,
706 };
707 
708 const char *cnt_names[] = {
709 	"ingress_fwdns_p100",
710 	"ingress_fwdns_p101",
711 	"egress_fwdns_p100",
712 	"egress_fwdns_p101",
713 	"ingress_endhost",
714 	"egress_endhost",
715 	"set_dtime",
716 };
717 
718 enum {
719 	TCP_IP6_CLEAR_DTIME,
720 	TCP_IP4,
721 	TCP_IP6,
722 	UDP_IP4,
723 	UDP_IP6,
724 	TCP_IP4_RT_FWD,
725 	TCP_IP6_RT_FWD,
726 	UDP_IP4_RT_FWD,
727 	UDP_IP6_RT_FWD,
728 	UKN_TEST,
729 	__NR_TESTS,
730 };
731 
732 const char *test_names[] = {
733 	"tcp ip6 clear dtime",
734 	"tcp ip4",
735 	"tcp ip6",
736 	"udp ip4",
737 	"udp ip6",
738 	"tcp ip4 rt fwd",
739 	"tcp ip6 rt fwd",
740 	"udp ip4 rt fwd",
741 	"udp ip6 rt fwd",
742 };
743 
dtime_cnt_str(int test,int cnt)744 static const char *dtime_cnt_str(int test, int cnt)
745 {
746 	static char name[64];
747 
748 	snprintf(name, sizeof(name), "%s %s", test_names[test], cnt_names[cnt]);
749 
750 	return name;
751 }
752 
dtime_err_str(int test,int cnt)753 static const char *dtime_err_str(int test, int cnt)
754 {
755 	static char name[64];
756 
757 	snprintf(name, sizeof(name), "%s %s errs", test_names[test],
758 		 cnt_names[cnt]);
759 
760 	return name;
761 }
762 
test_tcp_clear_dtime(struct test_tc_dtime * skel)763 static void test_tcp_clear_dtime(struct test_tc_dtime *skel)
764 {
765 	int i, t = TCP_IP6_CLEAR_DTIME;
766 	__u32 *dtimes = skel->bss->dtimes[t];
767 	__u32 *errs = skel->bss->errs[t];
768 
769 	skel->bss->test = t;
770 	test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 50000 + t);
771 
772 	ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
773 		  dtime_cnt_str(t, INGRESS_FWDNS_P100));
774 	ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
775 		  dtime_cnt_str(t, INGRESS_FWDNS_P101));
776 	ASSERT_GT(dtimes[EGRESS_FWDNS_P100], 0,
777 		  dtime_cnt_str(t, EGRESS_FWDNS_P100));
778 	ASSERT_EQ(dtimes[EGRESS_FWDNS_P101], 0,
779 		  dtime_cnt_str(t, EGRESS_FWDNS_P101));
780 	ASSERT_GT(dtimes[EGRESS_ENDHOST], 0,
781 		  dtime_cnt_str(t, EGRESS_ENDHOST));
782 	ASSERT_GT(dtimes[INGRESS_ENDHOST], 0,
783 		  dtime_cnt_str(t, INGRESS_ENDHOST));
784 
785 	for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
786 		ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
787 }
788 
test_tcp_dtime(struct test_tc_dtime * skel,int family,bool bpf_fwd)789 static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
790 {
791 	__u32 *dtimes, *errs;
792 	const char *addr;
793 	int i, t;
794 
795 	if (family == AF_INET) {
796 		t = bpf_fwd ? TCP_IP4 : TCP_IP4_RT_FWD;
797 		addr = IP4_DST;
798 	} else {
799 		t = bpf_fwd ? TCP_IP6 : TCP_IP6_RT_FWD;
800 		addr = IP6_DST;
801 	}
802 
803 	dtimes = skel->bss->dtimes[t];
804 	errs = skel->bss->errs[t];
805 
806 	skel->bss->test = t;
807 	test_inet_dtime(family, SOCK_STREAM, addr, 50000 + t);
808 
809 	/* fwdns_prio100 prog does not read delivery_time_type, so
810 	 * kernel puts the (rcv) timetamp in __sk_buff->tstamp
811 	 */
812 	ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
813 		  dtime_cnt_str(t, INGRESS_FWDNS_P100));
814 	for (i = INGRESS_FWDNS_P101; i < SET_DTIME; i++)
815 		ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
816 
817 	for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
818 		ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
819 }
820 
test_udp_dtime(struct test_tc_dtime * skel,int family,bool bpf_fwd)821 static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
822 {
823 	__u32 *dtimes, *errs;
824 	const char *addr;
825 	int i, t;
826 
827 	if (family == AF_INET) {
828 		t = bpf_fwd ? UDP_IP4 : UDP_IP4_RT_FWD;
829 		addr = IP4_DST;
830 	} else {
831 		t = bpf_fwd ? UDP_IP6 : UDP_IP6_RT_FWD;
832 		addr = IP6_DST;
833 	}
834 
835 	dtimes = skel->bss->dtimes[t];
836 	errs = skel->bss->errs[t];
837 
838 	skel->bss->test = t;
839 	test_inet_dtime(family, SOCK_DGRAM, addr, 50000 + t);
840 
841 	ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
842 		  dtime_cnt_str(t, INGRESS_FWDNS_P100));
843 	/* non mono delivery time is not forwarded */
844 	ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
845 		  dtime_cnt_str(t, INGRESS_FWDNS_P101));
846 	for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++)
847 		ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
848 
849 	for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
850 		ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
851 }
852 
test_tc_redirect_dtime(struct netns_setup_result * setup_result)853 static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
854 {
855 	struct test_tc_dtime *skel;
856 	struct nstoken *nstoken;
857 	int hold_tstamp_fd, err;
858 
859 	/* Hold a sk with the SOCK_TIMESTAMP set to ensure there
860 	 * is no delay in the kernel net_enable_timestamp().
861 	 * This ensures the following tests must have
862 	 * non zero rcv tstamp in the recvmsg().
863 	 */
864 	hold_tstamp_fd = wait_netstamp_needed_key();
865 	if (!ASSERT_GE(hold_tstamp_fd, 0, "wait_netstamp_needed_key"))
866 		return;
867 
868 	skel = test_tc_dtime__open();
869 	if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
870 		goto done;
871 
872 	skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
873 	skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
874 
875 	err = test_tc_dtime__load(skel);
876 	if (!ASSERT_OK(err, "test_tc_dtime__load"))
877 		goto done;
878 
879 	if (netns_load_dtime_bpf(skel, setup_result))
880 		goto done;
881 
882 	nstoken = open_netns(NS_FWD);
883 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
884 		goto done;
885 	err = set_forwarding(false);
886 	close_netns(nstoken);
887 	if (!ASSERT_OK(err, "disable forwarding"))
888 		goto done;
889 
890 	test_tcp_clear_dtime(skel);
891 
892 	test_tcp_dtime(skel, AF_INET, true);
893 	test_tcp_dtime(skel, AF_INET6, true);
894 	test_udp_dtime(skel, AF_INET, true);
895 	test_udp_dtime(skel, AF_INET6, true);
896 
897 	/* Test the kernel ip[6]_forward path instead
898 	 * of bpf_redirect_neigh().
899 	 */
900 	nstoken = open_netns(NS_FWD);
901 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
902 		goto done;
903 	err = set_forwarding(true);
904 	close_netns(nstoken);
905 	if (!ASSERT_OK(err, "enable forwarding"))
906 		goto done;
907 
908 	test_tcp_dtime(skel, AF_INET, false);
909 	test_tcp_dtime(skel, AF_INET6, false);
910 	test_udp_dtime(skel, AF_INET, false);
911 	test_udp_dtime(skel, AF_INET6, false);
912 
913 done:
914 	test_tc_dtime__destroy(skel);
915 	close(hold_tstamp_fd);
916 }
917 
test_tc_redirect_neigh_fib(struct netns_setup_result * setup_result)918 static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
919 {
920 	struct nstoken *nstoken = NULL;
921 	struct test_tc_neigh_fib *skel = NULL;
922 
923 	nstoken = open_netns(NS_FWD);
924 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
925 		return;
926 
927 	skel = test_tc_neigh_fib__open();
928 	if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
929 		goto done;
930 
931 	if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
932 		goto done;
933 
934 	if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
935 			   skel->progs.tc_chk, setup_result))
936 		goto done;
937 
938 	/* bpf_fib_lookup() checks if forwarding is enabled */
939 	if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
940 		goto done;
941 
942 	test_connectivity();
943 
944 done:
945 	if (skel)
946 		test_tc_neigh_fib__destroy(skel);
947 	close_netns(nstoken);
948 }
949 
test_tc_redirect_neigh(struct netns_setup_result * setup_result)950 static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
951 {
952 	struct nstoken *nstoken = NULL;
953 	struct test_tc_neigh *skel = NULL;
954 	int err;
955 
956 	nstoken = open_netns(NS_FWD);
957 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
958 		return;
959 
960 	skel = test_tc_neigh__open();
961 	if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
962 		goto done;
963 
964 	skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
965 	skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
966 
967 	err = test_tc_neigh__load(skel);
968 	if (!ASSERT_OK(err, "test_tc_neigh__load"))
969 		goto done;
970 
971 	if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
972 			   skel->progs.tc_chk, setup_result))
973 		goto done;
974 
975 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
976 		goto done;
977 
978 	test_connectivity();
979 
980 done:
981 	if (skel)
982 		test_tc_neigh__destroy(skel);
983 	close_netns(nstoken);
984 }
985 
test_tc_redirect_peer(struct netns_setup_result * setup_result)986 static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
987 {
988 	struct nstoken *nstoken;
989 	struct test_tc_peer *skel;
990 	int err;
991 
992 	nstoken = open_netns(NS_FWD);
993 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
994 		return;
995 
996 	skel = test_tc_peer__open();
997 	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
998 		goto done;
999 
1000 	skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
1001 	skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
1002 
1003 	err = test_tc_peer__load(skel);
1004 	if (!ASSERT_OK(err, "test_tc_peer__load"))
1005 		goto done;
1006 
1007 	if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
1008 			   skel->progs.tc_chk, setup_result))
1009 		goto done;
1010 
1011 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
1012 		goto done;
1013 
1014 	test_connectivity();
1015 
1016 done:
1017 	if (skel)
1018 		test_tc_peer__destroy(skel);
1019 	close_netns(nstoken);
1020 }
1021 
tun_open(char * name)1022 static int tun_open(char *name)
1023 {
1024 	struct ifreq ifr;
1025 	int fd, err;
1026 
1027 	fd = open("/dev/net/tun", O_RDWR);
1028 	if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
1029 		return -1;
1030 
1031 	memset(&ifr, 0, sizeof(ifr));
1032 
1033 	ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
1034 	if (*name)
1035 		strncpy(ifr.ifr_name, name, IFNAMSIZ);
1036 
1037 	err = ioctl(fd, TUNSETIFF, &ifr);
1038 	if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
1039 		goto fail;
1040 
1041 	SYS(fail, "ip link set dev %s up", name);
1042 
1043 	return fd;
1044 fail:
1045 	close(fd);
1046 	return -1;
1047 }
1048 
1049 enum {
1050 	SRC_TO_TARGET = 0,
1051 	TARGET_TO_SRC = 1,
1052 };
1053 
tun_relay_loop(int src_fd,int target_fd)1054 static int tun_relay_loop(int src_fd, int target_fd)
1055 {
1056 	fd_set rfds, wfds;
1057 
1058 	FD_ZERO(&rfds);
1059 	FD_ZERO(&wfds);
1060 
1061 	for (;;) {
1062 		char buf[1500];
1063 		int direction, nread, nwrite;
1064 
1065 		FD_SET(src_fd, &rfds);
1066 		FD_SET(target_fd, &rfds);
1067 
1068 		if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
1069 			log_err("select failed");
1070 			return 1;
1071 		}
1072 
1073 		direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
1074 
1075 		nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
1076 		if (nread < 0) {
1077 			log_err("read failed");
1078 			return 1;
1079 		}
1080 
1081 		nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
1082 		if (nwrite != nread) {
1083 			log_err("write failed");
1084 			return 1;
1085 		}
1086 	}
1087 }
1088 
test_tc_redirect_peer_l3(struct netns_setup_result * setup_result)1089 static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
1090 {
1091 	LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd);
1092 	LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
1093 	struct test_tc_peer *skel = NULL;
1094 	struct nstoken *nstoken = NULL;
1095 	int err;
1096 	int tunnel_pid = -1;
1097 	int src_fd, target_fd = -1;
1098 	int ifindex;
1099 
1100 	/* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
1101 	 * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
1102 	 * expose the L2 headers encapsulating the IP packet to BPF and hence
1103 	 * don't have skb in suitable state for this test. Alternative to TUN/TAP
1104 	 * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
1105 	 * but that requires much more complicated setup.
1106 	 */
1107 	nstoken = open_netns(NS_SRC);
1108 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
1109 		return;
1110 
1111 	src_fd = tun_open("tun_src");
1112 	if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
1113 		goto fail;
1114 
1115 	close_netns(nstoken);
1116 
1117 	nstoken = open_netns(NS_FWD);
1118 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
1119 		goto fail;
1120 
1121 	target_fd = tun_open("tun_fwd");
1122 	if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
1123 		goto fail;
1124 
1125 	tunnel_pid = fork();
1126 	if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
1127 		goto fail;
1128 
1129 	if (tunnel_pid == 0)
1130 		exit(tun_relay_loop(src_fd, target_fd));
1131 
1132 	skel = test_tc_peer__open();
1133 	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
1134 		goto fail;
1135 
1136 	ifindex = if_nametoindex("tun_fwd");
1137 	if (!ASSERT_GT(ifindex, 0, "if_indextoname tun_fwd"))
1138 		goto fail;
1139 
1140 	skel->rodata->IFINDEX_SRC = ifindex;
1141 	skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
1142 
1143 	err = test_tc_peer__load(skel);
1144 	if (!ASSERT_OK(err, "test_tc_peer__load"))
1145 		goto fail;
1146 
1147 	/* Load "tc_src_l3" to the tun_fwd interface to redirect packets
1148 	 * towards dst, and "tc_dst" to redirect packets
1149 	 * and "tc_chk" on dst_fwd to drop non-redirected packets.
1150 	 */
1151 	/* tc qdisc add dev tun_fwd clsact */
1152 	QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex);
1153 	/* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */
1154 	XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0);
1155 
1156 	/* tc qdisc add dev dst_fwd clsact */
1157 	QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
1158 	/* tc filter add dev dst_fwd ingress bpf da tc_dst_l3 */
1159 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
1160 	/* tc filter add dev dst_fwd egress bpf da tc_chk */
1161 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
1162 
1163 	/* Setup route and neigh tables */
1164 	SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
1165 	SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
1166 
1167 	SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
1168 	SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
1169 
1170 	SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev src scope global");
1171 	SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
1172 	    " dev tun_src scope global");
1173 	SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev dst scope global");
1174 	SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev src scope global");
1175 	SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
1176 	    " dev tun_src scope global");
1177 	SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev dst scope global");
1178 
1179 	SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
1180 	SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
1181 
1182 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
1183 		goto fail;
1184 
1185 	test_connectivity();
1186 
1187 fail:
1188 	if (tunnel_pid > 0) {
1189 		kill(tunnel_pid, SIGTERM);
1190 		waitpid(tunnel_pid, NULL, 0);
1191 	}
1192 	if (src_fd >= 0)
1193 		close(src_fd);
1194 	if (target_fd >= 0)
1195 		close(target_fd);
1196 	if (skel)
1197 		test_tc_peer__destroy(skel);
1198 	if (nstoken)
1199 		close_netns(nstoken);
1200 }
1201 
1202 #define RUN_TEST(name, mode)                                                                \
1203 	({                                                                                  \
1204 		struct netns_setup_result setup_result = { .dev_mode = mode, };             \
1205 		if (test__start_subtest(#name))                                             \
1206 			if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
1207 				if (ASSERT_OK(netns_setup_links_and_routes(&setup_result),  \
1208 					      "setup links and routes"))                    \
1209 					test_ ## name(&setup_result);                       \
1210 				netns_setup_namespaces("delete");                           \
1211 			}                                                                   \
1212 	})
1213 
test_tc_redirect_run_tests(void * arg)1214 static void *test_tc_redirect_run_tests(void *arg)
1215 {
1216 	netns_setup_namespaces_nofail("delete");
1217 
1218 	RUN_TEST(tc_redirect_peer, MODE_VETH);
1219 	RUN_TEST(tc_redirect_peer_l3, MODE_VETH);
1220 	RUN_TEST(tc_redirect_neigh, MODE_VETH);
1221 	RUN_TEST(tc_redirect_neigh_fib, MODE_VETH);
1222 	RUN_TEST(tc_redirect_dtime, MODE_VETH);
1223 	return NULL;
1224 }
1225 
test_tc_redirect(void)1226 void test_tc_redirect(void)
1227 {
1228 	pthread_t test_thread;
1229 	int err;
1230 
1231 	/* Run the tests in their own thread to isolate the namespace changes
1232 	 * so they do not affect the environment of other tests.
1233 	 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
1234 	 */
1235 	err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
1236 	if (ASSERT_OK(err, "pthread_create"))
1237 		ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
1238 }
1239