1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2
3 /*
4 * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
5 * between src and dst. The netns fwd has veth links to each src and dst. The
6 * client is in src and server in dst. The test installs a TC BPF program to each
7 * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
8 * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
9 * switch from ingress side; it also installs a checker prog on the egress side
10 * to drop unexpected traffic.
11 */
12
13 #include <arpa/inet.h>
14 #include <linux/if_tun.h>
15 #include <linux/limits.h>
16 #include <linux/sysctl.h>
17 #include <linux/time_types.h>
18 #include <linux/net_tstamp.h>
19 #include <net/if.h>
20 #include <stdbool.h>
21 #include <stdio.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24
25 #include "test_progs.h"
26 #include "network_helpers.h"
27 #include "test_tc_neigh_fib.skel.h"
28 #include "test_tc_neigh.skel.h"
29 #include "test_tc_peer.skel.h"
30 #include "test_tc_dtime.skel.h"
31
32 #ifndef TCP_TX_DELAY
33 #define TCP_TX_DELAY 37
34 #endif
35
36 #define NS_SRC "ns_src"
37 #define NS_FWD "ns_fwd"
38 #define NS_DST "ns_dst"
39
40 #define IP4_SRC "172.16.1.100"
41 #define IP4_DST "172.16.2.100"
42 #define IP4_TUN_SRC "172.17.1.100"
43 #define IP4_TUN_FWD "172.17.1.200"
44 #define IP4_PORT 9004
45
46 #define IP6_SRC "0::1:dead:beef:cafe"
47 #define IP6_DST "0::2:dead:beef:cafe"
48 #define IP6_TUN_SRC "1::1:dead:beef:cafe"
49 #define IP6_TUN_FWD "1::2:dead:beef:cafe"
50 #define IP6_PORT 9006
51
52 #define IP4_SLL "169.254.0.1"
53 #define IP4_DLL "169.254.0.2"
54 #define IP4_NET "169.254.0.0"
55
56 #define MAC_DST_FWD "00:11:22:33:44:55"
57 #define MAC_DST "00:22:33:44:55:66"
58
59 #define IFADDR_STR_LEN 18
60 #define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
61
62 #define TIMEOUT_MILLIS 10000
63 #define NSEC_PER_SEC 1000000000ULL
64
65 #define log_err(MSG, ...) \
66 fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
67 __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
68
69 static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
70
write_file(const char * path,const char * newval)71 static int write_file(const char *path, const char *newval)
72 {
73 FILE *f;
74
75 f = fopen(path, "r+");
76 if (!f)
77 return -1;
78 if (fwrite(newval, strlen(newval), 1, f) != 1) {
79 log_err("writing to %s failed", path);
80 fclose(f);
81 return -1;
82 }
83 fclose(f);
84 return 0;
85 }
86
netns_setup_namespaces(const char * verb)87 static int netns_setup_namespaces(const char *verb)
88 {
89 const char * const *ns = namespaces;
90 char cmd[128];
91
92 while (*ns) {
93 snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns);
94 if (!ASSERT_OK(system(cmd), cmd))
95 return -1;
96 ns++;
97 }
98 return 0;
99 }
100
netns_setup_namespaces_nofail(const char * verb)101 static void netns_setup_namespaces_nofail(const char *verb)
102 {
103 const char * const *ns = namespaces;
104 char cmd[128];
105
106 while (*ns) {
107 snprintf(cmd, sizeof(cmd), "ip netns %s %s > /dev/null 2>&1", verb, *ns);
108 system(cmd);
109 ns++;
110 }
111 }
112
113 enum dev_mode {
114 MODE_VETH,
115 };
116
117 struct netns_setup_result {
118 enum dev_mode dev_mode;
119 int ifindex_src;
120 int ifindex_src_fwd;
121 int ifindex_dst;
122 int ifindex_dst_fwd;
123 };
124
get_ifaddr(const char * name,char * ifaddr)125 static int get_ifaddr(const char *name, char *ifaddr)
126 {
127 char path[PATH_MAX];
128 FILE *f;
129 int ret;
130
131 snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name);
132 f = fopen(path, "r");
133 if (!ASSERT_OK_PTR(f, path))
134 return -1;
135
136 ret = fread(ifaddr, 1, IFADDR_STR_LEN, f);
137 if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) {
138 fclose(f);
139 return -1;
140 }
141 fclose(f);
142 return 0;
143 }
144
netns_setup_links_and_routes(struct netns_setup_result * result)145 static int netns_setup_links_and_routes(struct netns_setup_result *result)
146 {
147 struct nstoken *nstoken = NULL;
148 char src_fwd_addr[IFADDR_STR_LEN+1] = {};
149 char src_addr[IFADDR_STR_LEN + 1] = {};
150
151 if (result->dev_mode == MODE_VETH) {
152 SYS(fail, "ip link add src type veth peer name src_fwd");
153 SYS(fail, "ip link add dst type veth peer name dst_fwd");
154
155 SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD);
156 SYS(fail, "ip link set dst address " MAC_DST);
157 }
158
159 if (get_ifaddr("src_fwd", src_fwd_addr))
160 goto fail;
161
162 if (get_ifaddr("src", src_addr))
163 goto fail;
164
165 result->ifindex_src = if_nametoindex("src");
166 if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src"))
167 goto fail;
168
169 result->ifindex_src_fwd = if_nametoindex("src_fwd");
170 if (!ASSERT_GT(result->ifindex_src_fwd, 0, "ifindex_src_fwd"))
171 goto fail;
172
173 result->ifindex_dst = if_nametoindex("dst");
174 if (!ASSERT_GT(result->ifindex_dst, 0, "ifindex_dst"))
175 goto fail;
176
177 result->ifindex_dst_fwd = if_nametoindex("dst_fwd");
178 if (!ASSERT_GT(result->ifindex_dst_fwd, 0, "ifindex_dst_fwd"))
179 goto fail;
180
181 SYS(fail, "ip link set src netns " NS_SRC);
182 SYS(fail, "ip link set src_fwd netns " NS_FWD);
183 SYS(fail, "ip link set dst_fwd netns " NS_FWD);
184 SYS(fail, "ip link set dst netns " NS_DST);
185
186 /** setup in 'src' namespace */
187 nstoken = open_netns(NS_SRC);
188 if (!ASSERT_OK_PTR(nstoken, "setns src"))
189 goto fail;
190
191 SYS(fail, "ip addr add " IP4_SRC "/32 dev src");
192 SYS(fail, "ip addr add " IP6_SRC "/128 dev src nodad");
193 SYS(fail, "ip link set dev src up");
194
195 SYS(fail, "ip route add " IP4_DST "/32 dev src scope global");
196 SYS(fail, "ip route add " IP4_NET "/16 dev src scope global");
197 SYS(fail, "ip route add " IP6_DST "/128 dev src scope global");
198
199 if (result->dev_mode == MODE_VETH) {
200 SYS(fail, "ip neigh add " IP4_DST " dev src lladdr %s",
201 src_fwd_addr);
202 SYS(fail, "ip neigh add " IP6_DST " dev src lladdr %s",
203 src_fwd_addr);
204 }
205
206 close_netns(nstoken);
207
208 /** setup in 'fwd' namespace */
209 nstoken = open_netns(NS_FWD);
210 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
211 goto fail;
212
213 /* The fwd netns automatically gets a v6 LL address / routes, but also
214 * needs v4 one in order to start ARP probing. IP4_NET route is added
215 * to the endpoints so that the ARP processing will reply.
216 */
217 SYS(fail, "ip addr add " IP4_SLL "/32 dev src_fwd");
218 SYS(fail, "ip addr add " IP4_DLL "/32 dev dst_fwd");
219 SYS(fail, "ip link set dev src_fwd up");
220 SYS(fail, "ip link set dev dst_fwd up");
221
222 SYS(fail, "ip route add " IP4_SRC "/32 dev src_fwd scope global");
223 SYS(fail, "ip route add " IP6_SRC "/128 dev src_fwd scope global");
224 SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global");
225 SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global");
226
227 if (result->dev_mode == MODE_VETH) {
228 SYS(fail, "ip neigh add " IP4_SRC " dev src_fwd lladdr %s", src_addr);
229 SYS(fail, "ip neigh add " IP6_SRC " dev src_fwd lladdr %s", src_addr);
230 SYS(fail, "ip neigh add " IP4_DST " dev dst_fwd lladdr %s", MAC_DST);
231 SYS(fail, "ip neigh add " IP6_DST " dev dst_fwd lladdr %s", MAC_DST);
232 }
233
234 close_netns(nstoken);
235
236 /** setup in 'dst' namespace */
237 nstoken = open_netns(NS_DST);
238 if (!ASSERT_OK_PTR(nstoken, "setns dst"))
239 goto fail;
240
241 SYS(fail, "ip addr add " IP4_DST "/32 dev dst");
242 SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad");
243 SYS(fail, "ip link set dev dst up");
244 SYS(fail, "ip link set dev lo up");
245
246 SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global");
247 SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global");
248 SYS(fail, "ip route add " IP6_SRC "/128 dev dst scope global");
249
250 if (result->dev_mode == MODE_VETH) {
251 SYS(fail, "ip neigh add " IP4_SRC " dev dst lladdr " MAC_DST_FWD);
252 SYS(fail, "ip neigh add " IP6_SRC " dev dst lladdr " MAC_DST_FWD);
253 }
254
255 close_netns(nstoken);
256
257 return 0;
258 fail:
259 if (nstoken)
260 close_netns(nstoken);
261 return -1;
262 }
263
qdisc_clsact_create(struct bpf_tc_hook * qdisc_hook,int ifindex)264 static int qdisc_clsact_create(struct bpf_tc_hook *qdisc_hook, int ifindex)
265 {
266 char err_str[128], ifname[16];
267 int err;
268
269 qdisc_hook->ifindex = ifindex;
270 qdisc_hook->attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
271 err = bpf_tc_hook_create(qdisc_hook);
272 snprintf(err_str, sizeof(err_str),
273 "qdisc add dev %s clsact",
274 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>");
275 err_str[sizeof(err_str) - 1] = 0;
276 ASSERT_OK(err, err_str);
277
278 return err;
279 }
280
xgress_filter_add(struct bpf_tc_hook * qdisc_hook,enum bpf_tc_attach_point xgress,const struct bpf_program * prog,int priority)281 static int xgress_filter_add(struct bpf_tc_hook *qdisc_hook,
282 enum bpf_tc_attach_point xgress,
283 const struct bpf_program *prog, int priority)
284 {
285 LIBBPF_OPTS(bpf_tc_opts, tc_attach);
286 char err_str[128], ifname[16];
287 int err;
288
289 qdisc_hook->attach_point = xgress;
290 tc_attach.prog_fd = bpf_program__fd(prog);
291 tc_attach.priority = priority;
292 err = bpf_tc_attach(qdisc_hook, &tc_attach);
293 snprintf(err_str, sizeof(err_str),
294 "filter add dev %s %s prio %d bpf da %s",
295 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>",
296 xgress == BPF_TC_INGRESS ? "ingress" : "egress",
297 priority, bpf_program__name(prog));
298 err_str[sizeof(err_str) - 1] = 0;
299 ASSERT_OK(err, err_str);
300
301 return err;
302 }
303
304 #define QDISC_CLSACT_CREATE(qdisc_hook, ifindex) ({ \
305 if ((err = qdisc_clsact_create(qdisc_hook, ifindex))) \
306 goto fail; \
307 })
308
309 #define XGRESS_FILTER_ADD(qdisc_hook, xgress, prog, priority) ({ \
310 if ((err = xgress_filter_add(qdisc_hook, xgress, prog, priority))) \
311 goto fail; \
312 })
313
netns_load_bpf(const struct bpf_program * src_prog,const struct bpf_program * dst_prog,const struct bpf_program * chk_prog,const struct netns_setup_result * setup_result)314 static int netns_load_bpf(const struct bpf_program *src_prog,
315 const struct bpf_program *dst_prog,
316 const struct bpf_program *chk_prog,
317 const struct netns_setup_result *setup_result)
318 {
319 LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
320 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
321 int err;
322
323 /* tc qdisc add dev src_fwd clsact */
324 QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
325 /* tc filter add dev src_fwd ingress bpf da src_prog */
326 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, src_prog, 0);
327 /* tc filter add dev src_fwd egress bpf da chk_prog */
328 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
329
330 /* tc qdisc add dev dst_fwd clsact */
331 QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
332 /* tc filter add dev dst_fwd ingress bpf da dst_prog */
333 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
334 /* tc filter add dev dst_fwd egress bpf da chk_prog */
335 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
336
337 return 0;
338 fail:
339 return -1;
340 }
341
test_tcp(int family,const char * addr,__u16 port)342 static void test_tcp(int family, const char *addr, __u16 port)
343 {
344 int listen_fd = -1, accept_fd = -1, client_fd = -1;
345 char buf[] = "testing testing";
346 int n;
347 struct nstoken *nstoken;
348
349 nstoken = open_netns(NS_DST);
350 if (!ASSERT_OK_PTR(nstoken, "setns dst"))
351 return;
352
353 listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
354 if (!ASSERT_GE(listen_fd, 0, "listen"))
355 goto done;
356
357 close_netns(nstoken);
358 nstoken = open_netns(NS_SRC);
359 if (!ASSERT_OK_PTR(nstoken, "setns src"))
360 goto done;
361
362 client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
363 if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
364 goto done;
365
366 accept_fd = accept(listen_fd, NULL, NULL);
367 if (!ASSERT_GE(accept_fd, 0, "accept"))
368 goto done;
369
370 if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo"))
371 goto done;
372
373 n = write(client_fd, buf, sizeof(buf));
374 if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
375 goto done;
376
377 n = read(accept_fd, buf, sizeof(buf));
378 ASSERT_EQ(n, sizeof(buf), "recv from server");
379
380 done:
381 if (nstoken)
382 close_netns(nstoken);
383 if (listen_fd >= 0)
384 close(listen_fd);
385 if (accept_fd >= 0)
386 close(accept_fd);
387 if (client_fd >= 0)
388 close(client_fd);
389 }
390
test_ping(int family,const char * addr)391 static int test_ping(int family, const char *addr)
392 {
393 SYS(fail, "ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
394 return 0;
395 fail:
396 return -1;
397 }
398
test_connectivity(void)399 static void test_connectivity(void)
400 {
401 test_tcp(AF_INET, IP4_DST, IP4_PORT);
402 test_ping(AF_INET, IP4_DST);
403 test_tcp(AF_INET6, IP6_DST, IP6_PORT);
404 test_ping(AF_INET6, IP6_DST);
405 }
406
set_forwarding(bool enable)407 static int set_forwarding(bool enable)
408 {
409 int err;
410
411 err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
412 if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
413 return err;
414
415 err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
416 if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
417 return err;
418
419 return 0;
420 }
421
__rcv_tstamp(int fd,const char * expected,size_t s,__u64 * tstamp)422 static int __rcv_tstamp(int fd, const char *expected, size_t s, __u64 *tstamp)
423 {
424 struct timespec pkt_ts = {};
425 char ctl[CMSG_SPACE(sizeof(pkt_ts))];
426 struct timespec now_ts;
427 struct msghdr msg = {};
428 __u64 now_ns, pkt_ns;
429 struct cmsghdr *cmsg;
430 struct iovec iov;
431 char data[32];
432 int ret;
433
434 iov.iov_base = data;
435 iov.iov_len = sizeof(data);
436 msg.msg_iov = &iov;
437 msg.msg_iovlen = 1;
438 msg.msg_control = &ctl;
439 msg.msg_controllen = sizeof(ctl);
440
441 ret = recvmsg(fd, &msg, 0);
442 if (!ASSERT_EQ(ret, s, "recvmsg"))
443 return -1;
444 ASSERT_STRNEQ(data, expected, s, "expected rcv data");
445
446 cmsg = CMSG_FIRSTHDR(&msg);
447 if (cmsg && cmsg->cmsg_level == SOL_SOCKET &&
448 cmsg->cmsg_type == SO_TIMESTAMPNS)
449 memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts));
450
451 pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec;
452 if (tstamp) {
453 /* caller will check the tstamp itself */
454 *tstamp = pkt_ns;
455 return 0;
456 }
457
458 ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp");
459
460 ret = clock_gettime(CLOCK_REALTIME, &now_ts);
461 ASSERT_OK(ret, "clock_gettime");
462 now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
463
464 if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp"))
465 ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC,
466 "check rcv tstamp");
467 return 0;
468 }
469
rcv_tstamp(int fd,const char * expected,size_t s)470 static void rcv_tstamp(int fd, const char *expected, size_t s)
471 {
472 __rcv_tstamp(fd, expected, s, NULL);
473 }
474
wait_netstamp_needed_key(void)475 static int wait_netstamp_needed_key(void)
476 {
477 int opt = 1, srv_fd = -1, cli_fd = -1, nretries = 0, err, n;
478 char buf[] = "testing testing";
479 struct nstoken *nstoken;
480 __u64 tstamp = 0;
481
482 nstoken = open_netns(NS_DST);
483 if (!nstoken)
484 return -1;
485
486 srv_fd = start_server(AF_INET6, SOCK_DGRAM, "::1", 0, 0);
487 if (!ASSERT_GE(srv_fd, 0, "start_server"))
488 goto done;
489
490 err = setsockopt(srv_fd, SOL_SOCKET, SO_TIMESTAMPNS,
491 &opt, sizeof(opt));
492 if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS)"))
493 goto done;
494
495 cli_fd = connect_to_fd(srv_fd, TIMEOUT_MILLIS);
496 if (!ASSERT_GE(cli_fd, 0, "connect_to_fd"))
497 goto done;
498
499 again:
500 n = write(cli_fd, buf, sizeof(buf));
501 if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
502 goto done;
503 err = __rcv_tstamp(srv_fd, buf, sizeof(buf), &tstamp);
504 if (!ASSERT_OK(err, "__rcv_tstamp"))
505 goto done;
506 if (!tstamp && nretries++ < 5) {
507 sleep(1);
508 printf("netstamp_needed_key retry#%d\n", nretries);
509 goto again;
510 }
511
512 done:
513 if (!tstamp && srv_fd != -1) {
514 close(srv_fd);
515 srv_fd = -1;
516 }
517 if (cli_fd != -1)
518 close(cli_fd);
519 close_netns(nstoken);
520 return srv_fd;
521 }
522
snd_tstamp(int fd,char * b,size_t s)523 static void snd_tstamp(int fd, char *b, size_t s)
524 {
525 struct sock_txtime opt = { .clockid = CLOCK_TAI };
526 char ctl[CMSG_SPACE(sizeof(__u64))];
527 struct timespec now_ts;
528 struct msghdr msg = {};
529 struct cmsghdr *cmsg;
530 struct iovec iov;
531 __u64 now_ns;
532 int ret;
533
534 ret = clock_gettime(CLOCK_TAI, &now_ts);
535 ASSERT_OK(ret, "clock_get_time(CLOCK_TAI)");
536 now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
537
538 iov.iov_base = b;
539 iov.iov_len = s;
540 msg.msg_iov = &iov;
541 msg.msg_iovlen = 1;
542 msg.msg_control = &ctl;
543 msg.msg_controllen = sizeof(ctl);
544
545 cmsg = CMSG_FIRSTHDR(&msg);
546 cmsg->cmsg_level = SOL_SOCKET;
547 cmsg->cmsg_type = SCM_TXTIME;
548 cmsg->cmsg_len = CMSG_LEN(sizeof(now_ns));
549 *(__u64 *)CMSG_DATA(cmsg) = now_ns;
550
551 ret = setsockopt(fd, SOL_SOCKET, SO_TXTIME, &opt, sizeof(opt));
552 ASSERT_OK(ret, "setsockopt(SO_TXTIME)");
553
554 ret = sendmsg(fd, &msg, 0);
555 ASSERT_EQ(ret, s, "sendmsg");
556 }
557
test_inet_dtime(int family,int type,const char * addr,__u16 port)558 static void test_inet_dtime(int family, int type, const char *addr, __u16 port)
559 {
560 int opt = 1, accept_fd = -1, client_fd = -1, listen_fd, err;
561 char buf[] = "testing testing";
562 struct nstoken *nstoken;
563
564 nstoken = open_netns(NS_DST);
565 if (!ASSERT_OK_PTR(nstoken, "setns dst"))
566 return;
567 listen_fd = start_server(family, type, addr, port, 0);
568 close_netns(nstoken);
569
570 if (!ASSERT_GE(listen_fd, 0, "listen"))
571 return;
572
573 /* Ensure the kernel puts the (rcv) timestamp for all skb */
574 err = setsockopt(listen_fd, SOL_SOCKET, SO_TIMESTAMPNS,
575 &opt, sizeof(opt));
576 if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS)"))
577 goto done;
578
579 if (type == SOCK_STREAM) {
580 /* Ensure the kernel set EDT when sending out rst/ack
581 * from the kernel's ctl_sk.
582 */
583 err = setsockopt(listen_fd, SOL_TCP, TCP_TX_DELAY, &opt,
584 sizeof(opt));
585 if (!ASSERT_OK(err, "setsockopt(TCP_TX_DELAY)"))
586 goto done;
587 }
588
589 nstoken = open_netns(NS_SRC);
590 if (!ASSERT_OK_PTR(nstoken, "setns src"))
591 goto done;
592 client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
593 close_netns(nstoken);
594
595 if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
596 goto done;
597
598 if (type == SOCK_STREAM) {
599 int n;
600
601 accept_fd = accept(listen_fd, NULL, NULL);
602 if (!ASSERT_GE(accept_fd, 0, "accept"))
603 goto done;
604
605 n = write(client_fd, buf, sizeof(buf));
606 if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
607 goto done;
608 rcv_tstamp(accept_fd, buf, sizeof(buf));
609 } else {
610 snd_tstamp(client_fd, buf, sizeof(buf));
611 rcv_tstamp(listen_fd, buf, sizeof(buf));
612 }
613
614 done:
615 close(listen_fd);
616 if (accept_fd != -1)
617 close(accept_fd);
618 if (client_fd != -1)
619 close(client_fd);
620 }
621
netns_load_dtime_bpf(struct test_tc_dtime * skel,const struct netns_setup_result * setup_result)622 static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
623 const struct netns_setup_result *setup_result)
624 {
625 LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
626 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
627 LIBBPF_OPTS(bpf_tc_hook, qdisc_src);
628 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst);
629 struct nstoken *nstoken;
630 int err;
631
632 /* setup ns_src tc progs */
633 nstoken = open_netns(NS_SRC);
634 if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
635 return -1;
636 /* tc qdisc add dev src clsact */
637 QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src);
638 /* tc filter add dev src ingress bpf da ingress_host */
639 XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
640 /* tc filter add dev src egress bpf da egress_host */
641 XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
642 close_netns(nstoken);
643
644 /* setup ns_dst tc progs */
645 nstoken = open_netns(NS_DST);
646 if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
647 return -1;
648 /* tc qdisc add dev dst clsact */
649 QDISC_CLSACT_CREATE(&qdisc_dst, setup_result->ifindex_dst);
650 /* tc filter add dev dst ingress bpf da ingress_host */
651 XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
652 /* tc filter add dev dst egress bpf da egress_host */
653 XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
654 close_netns(nstoken);
655
656 /* setup ns_fwd tc progs */
657 nstoken = open_netns(NS_FWD);
658 if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
659 return -1;
660 /* tc qdisc add dev dst_fwd clsact */
661 QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
662 /* tc filter add dev dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
663 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
664 skel->progs.ingress_fwdns_prio100, 100);
665 /* tc filter add dev dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
666 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
667 skel->progs.ingress_fwdns_prio101, 101);
668 /* tc filter add dev dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
669 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
670 skel->progs.egress_fwdns_prio100, 100);
671 /* tc filter add dev dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
672 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
673 skel->progs.egress_fwdns_prio101, 101);
674
675 /* tc qdisc add dev src_fwd clsact */
676 QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
677 /* tc filter add dev src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
678 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
679 skel->progs.ingress_fwdns_prio100, 100);
680 /* tc filter add dev src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
681 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
682 skel->progs.ingress_fwdns_prio101, 101);
683 /* tc filter add dev src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
684 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
685 skel->progs.egress_fwdns_prio100, 100);
686 /* tc filter add dev src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
687 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
688 skel->progs.egress_fwdns_prio101, 101);
689 close_netns(nstoken);
690 return 0;
691
692 fail:
693 close_netns(nstoken);
694 return err;
695 }
696
697 enum {
698 INGRESS_FWDNS_P100,
699 INGRESS_FWDNS_P101,
700 EGRESS_FWDNS_P100,
701 EGRESS_FWDNS_P101,
702 INGRESS_ENDHOST,
703 EGRESS_ENDHOST,
704 SET_DTIME,
705 __MAX_CNT,
706 };
707
708 const char *cnt_names[] = {
709 "ingress_fwdns_p100",
710 "ingress_fwdns_p101",
711 "egress_fwdns_p100",
712 "egress_fwdns_p101",
713 "ingress_endhost",
714 "egress_endhost",
715 "set_dtime",
716 };
717
718 enum {
719 TCP_IP6_CLEAR_DTIME,
720 TCP_IP4,
721 TCP_IP6,
722 UDP_IP4,
723 UDP_IP6,
724 TCP_IP4_RT_FWD,
725 TCP_IP6_RT_FWD,
726 UDP_IP4_RT_FWD,
727 UDP_IP6_RT_FWD,
728 UKN_TEST,
729 __NR_TESTS,
730 };
731
732 const char *test_names[] = {
733 "tcp ip6 clear dtime",
734 "tcp ip4",
735 "tcp ip6",
736 "udp ip4",
737 "udp ip6",
738 "tcp ip4 rt fwd",
739 "tcp ip6 rt fwd",
740 "udp ip4 rt fwd",
741 "udp ip6 rt fwd",
742 };
743
dtime_cnt_str(int test,int cnt)744 static const char *dtime_cnt_str(int test, int cnt)
745 {
746 static char name[64];
747
748 snprintf(name, sizeof(name), "%s %s", test_names[test], cnt_names[cnt]);
749
750 return name;
751 }
752
dtime_err_str(int test,int cnt)753 static const char *dtime_err_str(int test, int cnt)
754 {
755 static char name[64];
756
757 snprintf(name, sizeof(name), "%s %s errs", test_names[test],
758 cnt_names[cnt]);
759
760 return name;
761 }
762
test_tcp_clear_dtime(struct test_tc_dtime * skel)763 static void test_tcp_clear_dtime(struct test_tc_dtime *skel)
764 {
765 int i, t = TCP_IP6_CLEAR_DTIME;
766 __u32 *dtimes = skel->bss->dtimes[t];
767 __u32 *errs = skel->bss->errs[t];
768
769 skel->bss->test = t;
770 test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 50000 + t);
771
772 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
773 dtime_cnt_str(t, INGRESS_FWDNS_P100));
774 ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
775 dtime_cnt_str(t, INGRESS_FWDNS_P101));
776 ASSERT_GT(dtimes[EGRESS_FWDNS_P100], 0,
777 dtime_cnt_str(t, EGRESS_FWDNS_P100));
778 ASSERT_EQ(dtimes[EGRESS_FWDNS_P101], 0,
779 dtime_cnt_str(t, EGRESS_FWDNS_P101));
780 ASSERT_GT(dtimes[EGRESS_ENDHOST], 0,
781 dtime_cnt_str(t, EGRESS_ENDHOST));
782 ASSERT_GT(dtimes[INGRESS_ENDHOST], 0,
783 dtime_cnt_str(t, INGRESS_ENDHOST));
784
785 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
786 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
787 }
788
test_tcp_dtime(struct test_tc_dtime * skel,int family,bool bpf_fwd)789 static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
790 {
791 __u32 *dtimes, *errs;
792 const char *addr;
793 int i, t;
794
795 if (family == AF_INET) {
796 t = bpf_fwd ? TCP_IP4 : TCP_IP4_RT_FWD;
797 addr = IP4_DST;
798 } else {
799 t = bpf_fwd ? TCP_IP6 : TCP_IP6_RT_FWD;
800 addr = IP6_DST;
801 }
802
803 dtimes = skel->bss->dtimes[t];
804 errs = skel->bss->errs[t];
805
806 skel->bss->test = t;
807 test_inet_dtime(family, SOCK_STREAM, addr, 50000 + t);
808
809 /* fwdns_prio100 prog does not read delivery_time_type, so
810 * kernel puts the (rcv) timetamp in __sk_buff->tstamp
811 */
812 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
813 dtime_cnt_str(t, INGRESS_FWDNS_P100));
814 for (i = INGRESS_FWDNS_P101; i < SET_DTIME; i++)
815 ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
816
817 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
818 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
819 }
820
test_udp_dtime(struct test_tc_dtime * skel,int family,bool bpf_fwd)821 static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
822 {
823 __u32 *dtimes, *errs;
824 const char *addr;
825 int i, t;
826
827 if (family == AF_INET) {
828 t = bpf_fwd ? UDP_IP4 : UDP_IP4_RT_FWD;
829 addr = IP4_DST;
830 } else {
831 t = bpf_fwd ? UDP_IP6 : UDP_IP6_RT_FWD;
832 addr = IP6_DST;
833 }
834
835 dtimes = skel->bss->dtimes[t];
836 errs = skel->bss->errs[t];
837
838 skel->bss->test = t;
839 test_inet_dtime(family, SOCK_DGRAM, addr, 50000 + t);
840
841 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
842 dtime_cnt_str(t, INGRESS_FWDNS_P100));
843 /* non mono delivery time is not forwarded */
844 ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
845 dtime_cnt_str(t, INGRESS_FWDNS_P101));
846 for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++)
847 ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
848
849 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
850 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
851 }
852
test_tc_redirect_dtime(struct netns_setup_result * setup_result)853 static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
854 {
855 struct test_tc_dtime *skel;
856 struct nstoken *nstoken;
857 int hold_tstamp_fd, err;
858
859 /* Hold a sk with the SOCK_TIMESTAMP set to ensure there
860 * is no delay in the kernel net_enable_timestamp().
861 * This ensures the following tests must have
862 * non zero rcv tstamp in the recvmsg().
863 */
864 hold_tstamp_fd = wait_netstamp_needed_key();
865 if (!ASSERT_GE(hold_tstamp_fd, 0, "wait_netstamp_needed_key"))
866 return;
867
868 skel = test_tc_dtime__open();
869 if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
870 goto done;
871
872 skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
873 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
874
875 err = test_tc_dtime__load(skel);
876 if (!ASSERT_OK(err, "test_tc_dtime__load"))
877 goto done;
878
879 if (netns_load_dtime_bpf(skel, setup_result))
880 goto done;
881
882 nstoken = open_netns(NS_FWD);
883 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
884 goto done;
885 err = set_forwarding(false);
886 close_netns(nstoken);
887 if (!ASSERT_OK(err, "disable forwarding"))
888 goto done;
889
890 test_tcp_clear_dtime(skel);
891
892 test_tcp_dtime(skel, AF_INET, true);
893 test_tcp_dtime(skel, AF_INET6, true);
894 test_udp_dtime(skel, AF_INET, true);
895 test_udp_dtime(skel, AF_INET6, true);
896
897 /* Test the kernel ip[6]_forward path instead
898 * of bpf_redirect_neigh().
899 */
900 nstoken = open_netns(NS_FWD);
901 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
902 goto done;
903 err = set_forwarding(true);
904 close_netns(nstoken);
905 if (!ASSERT_OK(err, "enable forwarding"))
906 goto done;
907
908 test_tcp_dtime(skel, AF_INET, false);
909 test_tcp_dtime(skel, AF_INET6, false);
910 test_udp_dtime(skel, AF_INET, false);
911 test_udp_dtime(skel, AF_INET6, false);
912
913 done:
914 test_tc_dtime__destroy(skel);
915 close(hold_tstamp_fd);
916 }
917
test_tc_redirect_neigh_fib(struct netns_setup_result * setup_result)918 static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
919 {
920 struct nstoken *nstoken = NULL;
921 struct test_tc_neigh_fib *skel = NULL;
922
923 nstoken = open_netns(NS_FWD);
924 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
925 return;
926
927 skel = test_tc_neigh_fib__open();
928 if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
929 goto done;
930
931 if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
932 goto done;
933
934 if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
935 skel->progs.tc_chk, setup_result))
936 goto done;
937
938 /* bpf_fib_lookup() checks if forwarding is enabled */
939 if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
940 goto done;
941
942 test_connectivity();
943
944 done:
945 if (skel)
946 test_tc_neigh_fib__destroy(skel);
947 close_netns(nstoken);
948 }
949
test_tc_redirect_neigh(struct netns_setup_result * setup_result)950 static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
951 {
952 struct nstoken *nstoken = NULL;
953 struct test_tc_neigh *skel = NULL;
954 int err;
955
956 nstoken = open_netns(NS_FWD);
957 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
958 return;
959
960 skel = test_tc_neigh__open();
961 if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
962 goto done;
963
964 skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
965 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
966
967 err = test_tc_neigh__load(skel);
968 if (!ASSERT_OK(err, "test_tc_neigh__load"))
969 goto done;
970
971 if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
972 skel->progs.tc_chk, setup_result))
973 goto done;
974
975 if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
976 goto done;
977
978 test_connectivity();
979
980 done:
981 if (skel)
982 test_tc_neigh__destroy(skel);
983 close_netns(nstoken);
984 }
985
test_tc_redirect_peer(struct netns_setup_result * setup_result)986 static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
987 {
988 struct nstoken *nstoken;
989 struct test_tc_peer *skel;
990 int err;
991
992 nstoken = open_netns(NS_FWD);
993 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
994 return;
995
996 skel = test_tc_peer__open();
997 if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
998 goto done;
999
1000 skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
1001 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
1002
1003 err = test_tc_peer__load(skel);
1004 if (!ASSERT_OK(err, "test_tc_peer__load"))
1005 goto done;
1006
1007 if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
1008 skel->progs.tc_chk, setup_result))
1009 goto done;
1010
1011 if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
1012 goto done;
1013
1014 test_connectivity();
1015
1016 done:
1017 if (skel)
1018 test_tc_peer__destroy(skel);
1019 close_netns(nstoken);
1020 }
1021
tun_open(char * name)1022 static int tun_open(char *name)
1023 {
1024 struct ifreq ifr;
1025 int fd, err;
1026
1027 fd = open("/dev/net/tun", O_RDWR);
1028 if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
1029 return -1;
1030
1031 memset(&ifr, 0, sizeof(ifr));
1032
1033 ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
1034 if (*name)
1035 strncpy(ifr.ifr_name, name, IFNAMSIZ);
1036
1037 err = ioctl(fd, TUNSETIFF, &ifr);
1038 if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
1039 goto fail;
1040
1041 SYS(fail, "ip link set dev %s up", name);
1042
1043 return fd;
1044 fail:
1045 close(fd);
1046 return -1;
1047 }
1048
1049 enum {
1050 SRC_TO_TARGET = 0,
1051 TARGET_TO_SRC = 1,
1052 };
1053
tun_relay_loop(int src_fd,int target_fd)1054 static int tun_relay_loop(int src_fd, int target_fd)
1055 {
1056 fd_set rfds, wfds;
1057
1058 FD_ZERO(&rfds);
1059 FD_ZERO(&wfds);
1060
1061 for (;;) {
1062 char buf[1500];
1063 int direction, nread, nwrite;
1064
1065 FD_SET(src_fd, &rfds);
1066 FD_SET(target_fd, &rfds);
1067
1068 if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
1069 log_err("select failed");
1070 return 1;
1071 }
1072
1073 direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
1074
1075 nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
1076 if (nread < 0) {
1077 log_err("read failed");
1078 return 1;
1079 }
1080
1081 nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
1082 if (nwrite != nread) {
1083 log_err("write failed");
1084 return 1;
1085 }
1086 }
1087 }
1088
test_tc_redirect_peer_l3(struct netns_setup_result * setup_result)1089 static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
1090 {
1091 LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd);
1092 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
1093 struct test_tc_peer *skel = NULL;
1094 struct nstoken *nstoken = NULL;
1095 int err;
1096 int tunnel_pid = -1;
1097 int src_fd, target_fd = -1;
1098 int ifindex;
1099
1100 /* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
1101 * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
1102 * expose the L2 headers encapsulating the IP packet to BPF and hence
1103 * don't have skb in suitable state for this test. Alternative to TUN/TAP
1104 * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
1105 * but that requires much more complicated setup.
1106 */
1107 nstoken = open_netns(NS_SRC);
1108 if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
1109 return;
1110
1111 src_fd = tun_open("tun_src");
1112 if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
1113 goto fail;
1114
1115 close_netns(nstoken);
1116
1117 nstoken = open_netns(NS_FWD);
1118 if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
1119 goto fail;
1120
1121 target_fd = tun_open("tun_fwd");
1122 if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
1123 goto fail;
1124
1125 tunnel_pid = fork();
1126 if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
1127 goto fail;
1128
1129 if (tunnel_pid == 0)
1130 exit(tun_relay_loop(src_fd, target_fd));
1131
1132 skel = test_tc_peer__open();
1133 if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
1134 goto fail;
1135
1136 ifindex = if_nametoindex("tun_fwd");
1137 if (!ASSERT_GT(ifindex, 0, "if_indextoname tun_fwd"))
1138 goto fail;
1139
1140 skel->rodata->IFINDEX_SRC = ifindex;
1141 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
1142
1143 err = test_tc_peer__load(skel);
1144 if (!ASSERT_OK(err, "test_tc_peer__load"))
1145 goto fail;
1146
1147 /* Load "tc_src_l3" to the tun_fwd interface to redirect packets
1148 * towards dst, and "tc_dst" to redirect packets
1149 * and "tc_chk" on dst_fwd to drop non-redirected packets.
1150 */
1151 /* tc qdisc add dev tun_fwd clsact */
1152 QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex);
1153 /* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */
1154 XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0);
1155
1156 /* tc qdisc add dev dst_fwd clsact */
1157 QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
1158 /* tc filter add dev dst_fwd ingress bpf da tc_dst_l3 */
1159 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
1160 /* tc filter add dev dst_fwd egress bpf da tc_chk */
1161 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
1162
1163 /* Setup route and neigh tables */
1164 SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
1165 SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
1166
1167 SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
1168 SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
1169
1170 SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev src scope global");
1171 SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
1172 " dev tun_src scope global");
1173 SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev dst scope global");
1174 SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev src scope global");
1175 SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
1176 " dev tun_src scope global");
1177 SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev dst scope global");
1178
1179 SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
1180 SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
1181
1182 if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
1183 goto fail;
1184
1185 test_connectivity();
1186
1187 fail:
1188 if (tunnel_pid > 0) {
1189 kill(tunnel_pid, SIGTERM);
1190 waitpid(tunnel_pid, NULL, 0);
1191 }
1192 if (src_fd >= 0)
1193 close(src_fd);
1194 if (target_fd >= 0)
1195 close(target_fd);
1196 if (skel)
1197 test_tc_peer__destroy(skel);
1198 if (nstoken)
1199 close_netns(nstoken);
1200 }
1201
1202 #define RUN_TEST(name, mode) \
1203 ({ \
1204 struct netns_setup_result setup_result = { .dev_mode = mode, }; \
1205 if (test__start_subtest(#name)) \
1206 if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
1207 if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \
1208 "setup links and routes")) \
1209 test_ ## name(&setup_result); \
1210 netns_setup_namespaces("delete"); \
1211 } \
1212 })
1213
test_tc_redirect_run_tests(void * arg)1214 static void *test_tc_redirect_run_tests(void *arg)
1215 {
1216 netns_setup_namespaces_nofail("delete");
1217
1218 RUN_TEST(tc_redirect_peer, MODE_VETH);
1219 RUN_TEST(tc_redirect_peer_l3, MODE_VETH);
1220 RUN_TEST(tc_redirect_neigh, MODE_VETH);
1221 RUN_TEST(tc_redirect_neigh_fib, MODE_VETH);
1222 RUN_TEST(tc_redirect_dtime, MODE_VETH);
1223 return NULL;
1224 }
1225
test_tc_redirect(void)1226 void test_tc_redirect(void)
1227 {
1228 pthread_t test_thread;
1229 int err;
1230
1231 /* Run the tests in their own thread to isolate the namespace changes
1232 * so they do not affect the environment of other tests.
1233 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
1234 */
1235 err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
1236 if (ASSERT_OK(err, "pthread_create"))
1237 ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
1238 }
1239