1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 3 /* 4 * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link 5 * between src and dst. The netns fwd has veth links to each src and dst. The 6 * client is in src and server in dst. The test installs a TC BPF program to each 7 * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the 8 * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace 9 * switch from ingress side; it also installs a checker prog on the egress side 10 * to drop unexpected traffic. 11 */ 12 13 #include <arpa/inet.h> 14 #include <linux/if.h> 15 #include <linux/if_tun.h> 16 #include <linux/limits.h> 17 #include <linux/sysctl.h> 18 #include <linux/time_types.h> 19 #include <linux/net_tstamp.h> 20 #include <stdbool.h> 21 #include <stdio.h> 22 #include <sys/stat.h> 23 #include <unistd.h> 24 25 #include "test_progs.h" 26 #include "network_helpers.h" 27 #include "test_tc_neigh_fib.skel.h" 28 #include "test_tc_neigh.skel.h" 29 #include "test_tc_peer.skel.h" 30 #include "test_tc_dtime.skel.h" 31 32 #ifndef TCP_TX_DELAY 33 #define TCP_TX_DELAY 37 34 #endif 35 36 #define NS_SRC "ns_src" 37 #define NS_FWD "ns_fwd" 38 #define NS_DST "ns_dst" 39 40 #define IP4_SRC "172.16.1.100" 41 #define IP4_DST "172.16.2.100" 42 #define IP4_TUN_SRC "172.17.1.100" 43 #define IP4_TUN_FWD "172.17.1.200" 44 #define IP4_PORT 9004 45 46 #define IP6_SRC "0::1:dead:beef:cafe" 47 #define IP6_DST "0::2:dead:beef:cafe" 48 #define IP6_TUN_SRC "1::1:dead:beef:cafe" 49 #define IP6_TUN_FWD "1::2:dead:beef:cafe" 50 #define IP6_PORT 9006 51 52 #define IP4_SLL "169.254.0.1" 53 #define IP4_DLL "169.254.0.2" 54 #define IP4_NET "169.254.0.0" 55 56 #define MAC_DST_FWD "00:11:22:33:44:55" 57 #define MAC_DST "00:22:33:44:55:66" 58 59 #define IFADDR_STR_LEN 18 60 #define PING_ARGS "-i 0.2 -c 3 -w 10 -q" 61 62 #define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src" 63 #define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst" 64 #define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk" 65 66 #define TIMEOUT_MILLIS 10000 67 #define NSEC_PER_SEC 1000000000ULL 68 69 #define log_err(MSG, ...) \ 70 fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ 71 __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__) 72 73 static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL}; 74 75 static int write_file(const char *path, const char *newval) 76 { 77 FILE *f; 78 79 f = fopen(path, "r+"); 80 if (!f) 81 return -1; 82 if (fwrite(newval, strlen(newval), 1, f) != 1) { 83 log_err("writing to %s failed", path); 84 fclose(f); 85 return -1; 86 } 87 fclose(f); 88 return 0; 89 } 90 91 static int netns_setup_namespaces(const char *verb) 92 { 93 const char * const *ns = namespaces; 94 char cmd[128]; 95 96 while (*ns) { 97 snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns); 98 if (!ASSERT_OK(system(cmd), cmd)) 99 return -1; 100 ns++; 101 } 102 return 0; 103 } 104 105 static void netns_setup_namespaces_nofail(const char *verb) 106 { 107 const char * const *ns = namespaces; 108 char cmd[128]; 109 110 while (*ns) { 111 snprintf(cmd, sizeof(cmd), "ip netns %s %s > /dev/null 2>&1", verb, *ns); 112 system(cmd); 113 ns++; 114 } 115 } 116 117 struct netns_setup_result { 118 int ifindex_veth_src_fwd; 119 int ifindex_veth_dst_fwd; 120 }; 121 122 static int get_ifaddr(const char *name, char *ifaddr) 123 { 124 char path[PATH_MAX]; 125 FILE *f; 126 int ret; 127 128 snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name); 129 f = fopen(path, "r"); 130 if (!ASSERT_OK_PTR(f, path)) 131 return -1; 132 133 ret = fread(ifaddr, 1, IFADDR_STR_LEN, f); 134 if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) { 135 fclose(f); 136 return -1; 137 } 138 fclose(f); 139 return 0; 140 } 141 142 static int get_ifindex(const char *name) 143 { 144 char path[PATH_MAX]; 145 char buf[32]; 146 FILE *f; 147 int ret; 148 149 snprintf(path, PATH_MAX, "/sys/class/net/%s/ifindex", name); 150 f = fopen(path, "r"); 151 if (!ASSERT_OK_PTR(f, path)) 152 return -1; 153 154 ret = fread(buf, 1, sizeof(buf), f); 155 if (!ASSERT_GT(ret, 0, "fread ifindex")) { 156 fclose(f); 157 return -1; 158 } 159 fclose(f); 160 return atoi(buf); 161 } 162 163 #define SYS(fmt, ...) \ 164 ({ \ 165 char cmd[1024]; \ 166 snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ 167 if (!ASSERT_OK(system(cmd), cmd)) \ 168 goto fail; \ 169 }) 170 171 static int netns_setup_links_and_routes(struct netns_setup_result *result) 172 { 173 struct nstoken *nstoken = NULL; 174 char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {}; 175 176 SYS("ip link add veth_src type veth peer name veth_src_fwd"); 177 SYS("ip link add veth_dst type veth peer name veth_dst_fwd"); 178 179 SYS("ip link set veth_dst_fwd address " MAC_DST_FWD); 180 SYS("ip link set veth_dst address " MAC_DST); 181 182 if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr)) 183 goto fail; 184 185 result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd"); 186 if (result->ifindex_veth_src_fwd < 0) 187 goto fail; 188 result->ifindex_veth_dst_fwd = get_ifindex("veth_dst_fwd"); 189 if (result->ifindex_veth_dst_fwd < 0) 190 goto fail; 191 192 SYS("ip link set veth_src netns " NS_SRC); 193 SYS("ip link set veth_src_fwd netns " NS_FWD); 194 SYS("ip link set veth_dst_fwd netns " NS_FWD); 195 SYS("ip link set veth_dst netns " NS_DST); 196 197 /** setup in 'src' namespace */ 198 nstoken = open_netns(NS_SRC); 199 if (!ASSERT_OK_PTR(nstoken, "setns src")) 200 goto fail; 201 202 SYS("ip addr add " IP4_SRC "/32 dev veth_src"); 203 SYS("ip addr add " IP6_SRC "/128 dev veth_src nodad"); 204 SYS("ip link set dev veth_src up"); 205 206 SYS("ip route add " IP4_DST "/32 dev veth_src scope global"); 207 SYS("ip route add " IP4_NET "/16 dev veth_src scope global"); 208 SYS("ip route add " IP6_DST "/128 dev veth_src scope global"); 209 210 SYS("ip neigh add " IP4_DST " dev veth_src lladdr %s", 211 veth_src_fwd_addr); 212 SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s", 213 veth_src_fwd_addr); 214 215 close_netns(nstoken); 216 217 /** setup in 'fwd' namespace */ 218 nstoken = open_netns(NS_FWD); 219 if (!ASSERT_OK_PTR(nstoken, "setns fwd")) 220 goto fail; 221 222 /* The fwd netns automatically gets a v6 LL address / routes, but also 223 * needs v4 one in order to start ARP probing. IP4_NET route is added 224 * to the endpoints so that the ARP processing will reply. 225 */ 226 SYS("ip addr add " IP4_SLL "/32 dev veth_src_fwd"); 227 SYS("ip addr add " IP4_DLL "/32 dev veth_dst_fwd"); 228 SYS("ip link set dev veth_src_fwd up"); 229 SYS("ip link set dev veth_dst_fwd up"); 230 231 SYS("ip route add " IP4_SRC "/32 dev veth_src_fwd scope global"); 232 SYS("ip route add " IP6_SRC "/128 dev veth_src_fwd scope global"); 233 SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global"); 234 SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global"); 235 236 close_netns(nstoken); 237 238 /** setup in 'dst' namespace */ 239 nstoken = open_netns(NS_DST); 240 if (!ASSERT_OK_PTR(nstoken, "setns dst")) 241 goto fail; 242 243 SYS("ip addr add " IP4_DST "/32 dev veth_dst"); 244 SYS("ip addr add " IP6_DST "/128 dev veth_dst nodad"); 245 SYS("ip link set dev veth_dst up"); 246 247 SYS("ip route add " IP4_SRC "/32 dev veth_dst scope global"); 248 SYS("ip route add " IP4_NET "/16 dev veth_dst scope global"); 249 SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global"); 250 251 SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD); 252 SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD); 253 254 close_netns(nstoken); 255 256 return 0; 257 fail: 258 if (nstoken) 259 close_netns(nstoken); 260 return -1; 261 } 262 263 static int netns_load_bpf(void) 264 { 265 SYS("tc qdisc add dev veth_src_fwd clsact"); 266 SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned " 267 SRC_PROG_PIN_FILE); 268 SYS("tc filter add dev veth_src_fwd egress bpf da object-pinned " 269 CHK_PROG_PIN_FILE); 270 271 SYS("tc qdisc add dev veth_dst_fwd clsact"); 272 SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned " 273 DST_PROG_PIN_FILE); 274 SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned " 275 CHK_PROG_PIN_FILE); 276 277 return 0; 278 fail: 279 return -1; 280 } 281 282 static void test_tcp(int family, const char *addr, __u16 port) 283 { 284 int listen_fd = -1, accept_fd = -1, client_fd = -1; 285 char buf[] = "testing testing"; 286 int n; 287 struct nstoken *nstoken; 288 289 nstoken = open_netns(NS_DST); 290 if (!ASSERT_OK_PTR(nstoken, "setns dst")) 291 return; 292 293 listen_fd = start_server(family, SOCK_STREAM, addr, port, 0); 294 if (!ASSERT_GE(listen_fd, 0, "listen")) 295 goto done; 296 297 close_netns(nstoken); 298 nstoken = open_netns(NS_SRC); 299 if (!ASSERT_OK_PTR(nstoken, "setns src")) 300 goto done; 301 302 client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS); 303 if (!ASSERT_GE(client_fd, 0, "connect_to_fd")) 304 goto done; 305 306 accept_fd = accept(listen_fd, NULL, NULL); 307 if (!ASSERT_GE(accept_fd, 0, "accept")) 308 goto done; 309 310 if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo")) 311 goto done; 312 313 n = write(client_fd, buf, sizeof(buf)); 314 if (!ASSERT_EQ(n, sizeof(buf), "send to server")) 315 goto done; 316 317 n = read(accept_fd, buf, sizeof(buf)); 318 ASSERT_EQ(n, sizeof(buf), "recv from server"); 319 320 done: 321 if (nstoken) 322 close_netns(nstoken); 323 if (listen_fd >= 0) 324 close(listen_fd); 325 if (accept_fd >= 0) 326 close(accept_fd); 327 if (client_fd >= 0) 328 close(client_fd); 329 } 330 331 static int test_ping(int family, const char *addr) 332 { 333 SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr); 334 return 0; 335 fail: 336 return -1; 337 } 338 339 static void test_connectivity(void) 340 { 341 test_tcp(AF_INET, IP4_DST, IP4_PORT); 342 test_ping(AF_INET, IP4_DST); 343 test_tcp(AF_INET6, IP6_DST, IP6_PORT); 344 test_ping(AF_INET6, IP6_DST); 345 } 346 347 static int set_forwarding(bool enable) 348 { 349 int err; 350 351 err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0"); 352 if (!ASSERT_OK(err, "set ipv4.ip_forward=0")) 353 return err; 354 355 err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0"); 356 if (!ASSERT_OK(err, "set ipv6.forwarding=0")) 357 return err; 358 359 return 0; 360 } 361 362 static void rcv_tstamp(int fd, const char *expected, size_t s) 363 { 364 struct __kernel_timespec pkt_ts = {}; 365 char ctl[CMSG_SPACE(sizeof(pkt_ts))]; 366 struct timespec now_ts; 367 struct msghdr msg = {}; 368 __u64 now_ns, pkt_ns; 369 struct cmsghdr *cmsg; 370 struct iovec iov; 371 char data[32]; 372 int ret; 373 374 iov.iov_base = data; 375 iov.iov_len = sizeof(data); 376 msg.msg_iov = &iov; 377 msg.msg_iovlen = 1; 378 msg.msg_control = &ctl; 379 msg.msg_controllen = sizeof(ctl); 380 381 ret = recvmsg(fd, &msg, 0); 382 if (!ASSERT_EQ(ret, s, "recvmsg")) 383 return; 384 ASSERT_STRNEQ(data, expected, s, "expected rcv data"); 385 386 cmsg = CMSG_FIRSTHDR(&msg); 387 if (cmsg && cmsg->cmsg_level == SOL_SOCKET && 388 cmsg->cmsg_type == SO_TIMESTAMPNS_NEW) 389 memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts)); 390 391 pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec; 392 ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp"); 393 394 ret = clock_gettime(CLOCK_REALTIME, &now_ts); 395 ASSERT_OK(ret, "clock_gettime"); 396 now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec; 397 398 if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp")) 399 ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC, 400 "check rcv tstamp"); 401 } 402 403 static void snd_tstamp(int fd, char *b, size_t s) 404 { 405 struct sock_txtime opt = { .clockid = CLOCK_TAI }; 406 char ctl[CMSG_SPACE(sizeof(__u64))]; 407 struct timespec now_ts; 408 struct msghdr msg = {}; 409 struct cmsghdr *cmsg; 410 struct iovec iov; 411 __u64 now_ns; 412 int ret; 413 414 ret = clock_gettime(CLOCK_TAI, &now_ts); 415 ASSERT_OK(ret, "clock_get_time(CLOCK_TAI)"); 416 now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec; 417 418 iov.iov_base = b; 419 iov.iov_len = s; 420 msg.msg_iov = &iov; 421 msg.msg_iovlen = 1; 422 msg.msg_control = &ctl; 423 msg.msg_controllen = sizeof(ctl); 424 425 cmsg = CMSG_FIRSTHDR(&msg); 426 cmsg->cmsg_level = SOL_SOCKET; 427 cmsg->cmsg_type = SCM_TXTIME; 428 cmsg->cmsg_len = CMSG_LEN(sizeof(now_ns)); 429 *(__u64 *)CMSG_DATA(cmsg) = now_ns; 430 431 ret = setsockopt(fd, SOL_SOCKET, SO_TXTIME, &opt, sizeof(opt)); 432 ASSERT_OK(ret, "setsockopt(SO_TXTIME)"); 433 434 ret = sendmsg(fd, &msg, 0); 435 ASSERT_EQ(ret, s, "sendmsg"); 436 } 437 438 static void test_inet_dtime(int family, int type, const char *addr, __u16 port) 439 { 440 int opt = 1, accept_fd = -1, client_fd = -1, listen_fd, err; 441 char buf[] = "testing testing"; 442 struct nstoken *nstoken; 443 444 nstoken = open_netns(NS_DST); 445 if (!ASSERT_OK_PTR(nstoken, "setns dst")) 446 return; 447 listen_fd = start_server(family, type, addr, port, 0); 448 close_netns(nstoken); 449 450 if (!ASSERT_GE(listen_fd, 0, "listen")) 451 return; 452 453 /* Ensure the kernel puts the (rcv) timestamp for all skb */ 454 err = setsockopt(listen_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, 455 &opt, sizeof(opt)); 456 if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)")) 457 goto done; 458 459 if (type == SOCK_STREAM) { 460 /* Ensure the kernel set EDT when sending out rst/ack 461 * from the kernel's ctl_sk. 462 */ 463 err = setsockopt(listen_fd, SOL_TCP, TCP_TX_DELAY, &opt, 464 sizeof(opt)); 465 if (!ASSERT_OK(err, "setsockopt(TCP_TX_DELAY)")) 466 goto done; 467 } 468 469 nstoken = open_netns(NS_SRC); 470 if (!ASSERT_OK_PTR(nstoken, "setns src")) 471 goto done; 472 client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS); 473 close_netns(nstoken); 474 475 if (!ASSERT_GE(client_fd, 0, "connect_to_fd")) 476 goto done; 477 478 if (type == SOCK_STREAM) { 479 int n; 480 481 accept_fd = accept(listen_fd, NULL, NULL); 482 if (!ASSERT_GE(accept_fd, 0, "accept")) 483 goto done; 484 485 n = write(client_fd, buf, sizeof(buf)); 486 if (!ASSERT_EQ(n, sizeof(buf), "send to server")) 487 goto done; 488 rcv_tstamp(accept_fd, buf, sizeof(buf)); 489 } else { 490 snd_tstamp(client_fd, buf, sizeof(buf)); 491 rcv_tstamp(listen_fd, buf, sizeof(buf)); 492 } 493 494 done: 495 close(listen_fd); 496 if (accept_fd != -1) 497 close(accept_fd); 498 if (client_fd != -1) 499 close(client_fd); 500 } 501 502 static int netns_load_dtime_bpf(struct test_tc_dtime *skel) 503 { 504 struct nstoken *nstoken; 505 506 #define PIN_FNAME(__file) "/sys/fs/bpf/" #__file 507 #define PIN(__prog) ({ \ 508 int err = bpf_program__pin(skel->progs.__prog, PIN_FNAME(__prog)); \ 509 if (!ASSERT_OK(err, "pin " #__prog)) \ 510 goto fail; \ 511 }) 512 513 /* setup ns_src tc progs */ 514 nstoken = open_netns(NS_SRC); 515 if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC)) 516 return -1; 517 PIN(egress_host); 518 PIN(ingress_host); 519 SYS("tc qdisc add dev veth_src clsact"); 520 SYS("tc filter add dev veth_src ingress bpf da object-pinned " 521 PIN_FNAME(ingress_host)); 522 SYS("tc filter add dev veth_src egress bpf da object-pinned " 523 PIN_FNAME(egress_host)); 524 close_netns(nstoken); 525 526 /* setup ns_dst tc progs */ 527 nstoken = open_netns(NS_DST); 528 if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST)) 529 return -1; 530 PIN(egress_host); 531 PIN(ingress_host); 532 SYS("tc qdisc add dev veth_dst clsact"); 533 SYS("tc filter add dev veth_dst ingress bpf da object-pinned " 534 PIN_FNAME(ingress_host)); 535 SYS("tc filter add dev veth_dst egress bpf da object-pinned " 536 PIN_FNAME(egress_host)); 537 close_netns(nstoken); 538 539 /* setup ns_fwd tc progs */ 540 nstoken = open_netns(NS_FWD); 541 if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD)) 542 return -1; 543 PIN(ingress_fwdns_prio100); 544 PIN(egress_fwdns_prio100); 545 PIN(ingress_fwdns_prio101); 546 PIN(egress_fwdns_prio101); 547 SYS("tc qdisc add dev veth_dst_fwd clsact"); 548 SYS("tc filter add dev veth_dst_fwd ingress prio 100 bpf da object-pinned " 549 PIN_FNAME(ingress_fwdns_prio100)); 550 SYS("tc filter add dev veth_dst_fwd ingress prio 101 bpf da object-pinned " 551 PIN_FNAME(ingress_fwdns_prio101)); 552 SYS("tc filter add dev veth_dst_fwd egress prio 100 bpf da object-pinned " 553 PIN_FNAME(egress_fwdns_prio100)); 554 SYS("tc filter add dev veth_dst_fwd egress prio 101 bpf da object-pinned " 555 PIN_FNAME(egress_fwdns_prio101)); 556 SYS("tc qdisc add dev veth_src_fwd clsact"); 557 SYS("tc filter add dev veth_src_fwd ingress prio 100 bpf da object-pinned " 558 PIN_FNAME(ingress_fwdns_prio100)); 559 SYS("tc filter add dev veth_src_fwd ingress prio 101 bpf da object-pinned " 560 PIN_FNAME(ingress_fwdns_prio101)); 561 SYS("tc filter add dev veth_src_fwd egress prio 100 bpf da object-pinned " 562 PIN_FNAME(egress_fwdns_prio100)); 563 SYS("tc filter add dev veth_src_fwd egress prio 101 bpf da object-pinned " 564 PIN_FNAME(egress_fwdns_prio101)); 565 close_netns(nstoken); 566 567 #undef PIN 568 569 return 0; 570 571 fail: 572 close_netns(nstoken); 573 return -1; 574 } 575 576 enum { 577 INGRESS_FWDNS_P100, 578 INGRESS_FWDNS_P101, 579 EGRESS_FWDNS_P100, 580 EGRESS_FWDNS_P101, 581 INGRESS_ENDHOST, 582 EGRESS_ENDHOST, 583 SET_DTIME, 584 __MAX_CNT, 585 }; 586 587 const char *cnt_names[] = { 588 "ingress_fwdns_p100", 589 "ingress_fwdns_p101", 590 "egress_fwdns_p100", 591 "egress_fwdns_p101", 592 "ingress_endhost", 593 "egress_endhost", 594 "set_dtime", 595 }; 596 597 enum { 598 TCP_IP6_CLEAR_DTIME, 599 TCP_IP4, 600 TCP_IP6, 601 UDP_IP4, 602 UDP_IP6, 603 TCP_IP4_RT_FWD, 604 TCP_IP6_RT_FWD, 605 UDP_IP4_RT_FWD, 606 UDP_IP6_RT_FWD, 607 UKN_TEST, 608 __NR_TESTS, 609 }; 610 611 const char *test_names[] = { 612 "tcp ip6 clear dtime", 613 "tcp ip4", 614 "tcp ip6", 615 "udp ip4", 616 "udp ip6", 617 "tcp ip4 rt fwd", 618 "tcp ip6 rt fwd", 619 "udp ip4 rt fwd", 620 "udp ip6 rt fwd", 621 }; 622 623 static const char *dtime_cnt_str(int test, int cnt) 624 { 625 static char name[64]; 626 627 snprintf(name, sizeof(name), "%s %s", test_names[test], cnt_names[cnt]); 628 629 return name; 630 } 631 632 static const char *dtime_err_str(int test, int cnt) 633 { 634 static char name[64]; 635 636 snprintf(name, sizeof(name), "%s %s errs", test_names[test], 637 cnt_names[cnt]); 638 639 return name; 640 } 641 642 static void test_tcp_clear_dtime(struct test_tc_dtime *skel) 643 { 644 int i, t = TCP_IP6_CLEAR_DTIME; 645 __u32 *dtimes = skel->bss->dtimes[t]; 646 __u32 *errs = skel->bss->errs[t]; 647 648 skel->bss->test = t; 649 test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 50000 + t); 650 651 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, 652 dtime_cnt_str(t, INGRESS_FWDNS_P100)); 653 ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0, 654 dtime_cnt_str(t, INGRESS_FWDNS_P101)); 655 ASSERT_GT(dtimes[EGRESS_FWDNS_P100], 0, 656 dtime_cnt_str(t, EGRESS_FWDNS_P100)); 657 ASSERT_EQ(dtimes[EGRESS_FWDNS_P101], 0, 658 dtime_cnt_str(t, EGRESS_FWDNS_P101)); 659 ASSERT_GT(dtimes[EGRESS_ENDHOST], 0, 660 dtime_cnt_str(t, EGRESS_ENDHOST)); 661 ASSERT_GT(dtimes[INGRESS_ENDHOST], 0, 662 dtime_cnt_str(t, INGRESS_ENDHOST)); 663 664 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++) 665 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i)); 666 } 667 668 static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd) 669 { 670 __u32 *dtimes, *errs; 671 const char *addr; 672 int i, t; 673 674 if (family == AF_INET) { 675 t = bpf_fwd ? TCP_IP4 : TCP_IP4_RT_FWD; 676 addr = IP4_DST; 677 } else { 678 t = bpf_fwd ? TCP_IP6 : TCP_IP6_RT_FWD; 679 addr = IP6_DST; 680 } 681 682 dtimes = skel->bss->dtimes[t]; 683 errs = skel->bss->errs[t]; 684 685 skel->bss->test = t; 686 test_inet_dtime(family, SOCK_STREAM, addr, 50000 + t); 687 688 /* fwdns_prio100 prog does not read delivery_time_type, so 689 * kernel puts the (rcv) timetamp in __sk_buff->tstamp 690 */ 691 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, 692 dtime_cnt_str(t, INGRESS_FWDNS_P100)); 693 for (i = INGRESS_FWDNS_P101; i < SET_DTIME; i++) 694 ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i)); 695 696 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++) 697 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i)); 698 } 699 700 static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd) 701 { 702 __u32 *dtimes, *errs; 703 const char *addr; 704 int i, t; 705 706 if (family == AF_INET) { 707 t = bpf_fwd ? UDP_IP4 : UDP_IP4_RT_FWD; 708 addr = IP4_DST; 709 } else { 710 t = bpf_fwd ? UDP_IP6 : UDP_IP6_RT_FWD; 711 addr = IP6_DST; 712 } 713 714 dtimes = skel->bss->dtimes[t]; 715 errs = skel->bss->errs[t]; 716 717 skel->bss->test = t; 718 test_inet_dtime(family, SOCK_DGRAM, addr, 50000 + t); 719 720 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, 721 dtime_cnt_str(t, INGRESS_FWDNS_P100)); 722 /* non mono delivery time is not forwarded */ 723 ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0, 724 dtime_cnt_str(t, INGRESS_FWDNS_P101)); 725 for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++) 726 ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i)); 727 728 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++) 729 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i)); 730 } 731 732 static void test_tc_redirect_dtime(struct netns_setup_result *setup_result) 733 { 734 struct test_tc_dtime *skel; 735 struct nstoken *nstoken; 736 int err; 737 738 skel = test_tc_dtime__open(); 739 if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open")) 740 return; 741 742 skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; 743 skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; 744 745 err = test_tc_dtime__load(skel); 746 if (!ASSERT_OK(err, "test_tc_dtime__load")) 747 goto done; 748 749 if (netns_load_dtime_bpf(skel)) 750 goto done; 751 752 nstoken = open_netns(NS_FWD); 753 if (!ASSERT_OK_PTR(nstoken, "setns fwd")) 754 goto done; 755 err = set_forwarding(false); 756 close_netns(nstoken); 757 if (!ASSERT_OK(err, "disable forwarding")) 758 goto done; 759 760 test_tcp_clear_dtime(skel); 761 762 test_tcp_dtime(skel, AF_INET, true); 763 test_tcp_dtime(skel, AF_INET6, true); 764 test_udp_dtime(skel, AF_INET, true); 765 test_udp_dtime(skel, AF_INET6, true); 766 767 /* Test the kernel ip[6]_forward path instead 768 * of bpf_redirect_neigh(). 769 */ 770 nstoken = open_netns(NS_FWD); 771 if (!ASSERT_OK_PTR(nstoken, "setns fwd")) 772 goto done; 773 err = set_forwarding(true); 774 close_netns(nstoken); 775 if (!ASSERT_OK(err, "enable forwarding")) 776 goto done; 777 778 test_tcp_dtime(skel, AF_INET, false); 779 test_tcp_dtime(skel, AF_INET6, false); 780 test_udp_dtime(skel, AF_INET, false); 781 test_udp_dtime(skel, AF_INET6, false); 782 783 done: 784 test_tc_dtime__destroy(skel); 785 } 786 787 static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result) 788 { 789 struct nstoken *nstoken = NULL; 790 struct test_tc_neigh_fib *skel = NULL; 791 int err; 792 793 nstoken = open_netns(NS_FWD); 794 if (!ASSERT_OK_PTR(nstoken, "setns fwd")) 795 return; 796 797 skel = test_tc_neigh_fib__open(); 798 if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open")) 799 goto done; 800 801 if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load")) 802 goto done; 803 804 err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); 805 if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) 806 goto done; 807 808 err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); 809 if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) 810 goto done; 811 812 err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE); 813 if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) 814 goto done; 815 816 if (netns_load_bpf()) 817 goto done; 818 819 /* bpf_fib_lookup() checks if forwarding is enabled */ 820 if (!ASSERT_OK(set_forwarding(true), "enable forwarding")) 821 goto done; 822 823 test_connectivity(); 824 825 done: 826 if (skel) 827 test_tc_neigh_fib__destroy(skel); 828 close_netns(nstoken); 829 } 830 831 static void test_tc_redirect_neigh(struct netns_setup_result *setup_result) 832 { 833 struct nstoken *nstoken = NULL; 834 struct test_tc_neigh *skel = NULL; 835 int err; 836 837 nstoken = open_netns(NS_FWD); 838 if (!ASSERT_OK_PTR(nstoken, "setns fwd")) 839 return; 840 841 skel = test_tc_neigh__open(); 842 if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open")) 843 goto done; 844 845 skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; 846 skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; 847 848 err = test_tc_neigh__load(skel); 849 if (!ASSERT_OK(err, "test_tc_neigh__load")) 850 goto done; 851 852 err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); 853 if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) 854 goto done; 855 856 err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); 857 if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) 858 goto done; 859 860 err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE); 861 if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) 862 goto done; 863 864 if (netns_load_bpf()) 865 goto done; 866 867 if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) 868 goto done; 869 870 test_connectivity(); 871 872 done: 873 if (skel) 874 test_tc_neigh__destroy(skel); 875 close_netns(nstoken); 876 } 877 878 static void test_tc_redirect_peer(struct netns_setup_result *setup_result) 879 { 880 struct nstoken *nstoken; 881 struct test_tc_peer *skel; 882 int err; 883 884 nstoken = open_netns(NS_FWD); 885 if (!ASSERT_OK_PTR(nstoken, "setns fwd")) 886 return; 887 888 skel = test_tc_peer__open(); 889 if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) 890 goto done; 891 892 skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; 893 skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; 894 895 err = test_tc_peer__load(skel); 896 if (!ASSERT_OK(err, "test_tc_peer__load")) 897 goto done; 898 899 err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); 900 if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) 901 goto done; 902 903 err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); 904 if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) 905 goto done; 906 907 err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE); 908 if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) 909 goto done; 910 911 if (netns_load_bpf()) 912 goto done; 913 914 if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) 915 goto done; 916 917 test_connectivity(); 918 919 done: 920 if (skel) 921 test_tc_peer__destroy(skel); 922 close_netns(nstoken); 923 } 924 925 static int tun_open(char *name) 926 { 927 struct ifreq ifr; 928 int fd, err; 929 930 fd = open("/dev/net/tun", O_RDWR); 931 if (!ASSERT_GE(fd, 0, "open /dev/net/tun")) 932 return -1; 933 934 memset(&ifr, 0, sizeof(ifr)); 935 936 ifr.ifr_flags = IFF_TUN | IFF_NO_PI; 937 if (*name) 938 strncpy(ifr.ifr_name, name, IFNAMSIZ); 939 940 err = ioctl(fd, TUNSETIFF, &ifr); 941 if (!ASSERT_OK(err, "ioctl TUNSETIFF")) 942 goto fail; 943 944 SYS("ip link set dev %s up", name); 945 946 return fd; 947 fail: 948 close(fd); 949 return -1; 950 } 951 952 enum { 953 SRC_TO_TARGET = 0, 954 TARGET_TO_SRC = 1, 955 }; 956 957 static int tun_relay_loop(int src_fd, int target_fd) 958 { 959 fd_set rfds, wfds; 960 961 FD_ZERO(&rfds); 962 FD_ZERO(&wfds); 963 964 for (;;) { 965 char buf[1500]; 966 int direction, nread, nwrite; 967 968 FD_SET(src_fd, &rfds); 969 FD_SET(target_fd, &rfds); 970 971 if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) { 972 log_err("select failed"); 973 return 1; 974 } 975 976 direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC; 977 978 nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf)); 979 if (nread < 0) { 980 log_err("read failed"); 981 return 1; 982 } 983 984 nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread); 985 if (nwrite != nread) { 986 log_err("write failed"); 987 return 1; 988 } 989 } 990 } 991 992 static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) 993 { 994 struct test_tc_peer *skel = NULL; 995 struct nstoken *nstoken = NULL; 996 int err; 997 int tunnel_pid = -1; 998 int src_fd, target_fd = -1; 999 int ifindex; 1000 1001 /* Start a L3 TUN/TAP tunnel between the src and dst namespaces. 1002 * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those 1003 * expose the L2 headers encapsulating the IP packet to BPF and hence 1004 * don't have skb in suitable state for this test. Alternative to TUN/TAP 1005 * would be e.g. Wireguard which would appear as a pure L3 device to BPF, 1006 * but that requires much more complicated setup. 1007 */ 1008 nstoken = open_netns(NS_SRC); 1009 if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC)) 1010 return; 1011 1012 src_fd = tun_open("tun_src"); 1013 if (!ASSERT_GE(src_fd, 0, "tun_open tun_src")) 1014 goto fail; 1015 1016 close_netns(nstoken); 1017 1018 nstoken = open_netns(NS_FWD); 1019 if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD)) 1020 goto fail; 1021 1022 target_fd = tun_open("tun_fwd"); 1023 if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd")) 1024 goto fail; 1025 1026 tunnel_pid = fork(); 1027 if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop")) 1028 goto fail; 1029 1030 if (tunnel_pid == 0) 1031 exit(tun_relay_loop(src_fd, target_fd)); 1032 1033 skel = test_tc_peer__open(); 1034 if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) 1035 goto fail; 1036 1037 ifindex = get_ifindex("tun_fwd"); 1038 if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd")) 1039 goto fail; 1040 1041 skel->rodata->IFINDEX_SRC = ifindex; 1042 skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; 1043 1044 err = test_tc_peer__load(skel); 1045 if (!ASSERT_OK(err, "test_tc_peer__load")) 1046 goto fail; 1047 1048 err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE); 1049 if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) 1050 goto fail; 1051 1052 err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE); 1053 if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) 1054 goto fail; 1055 1056 err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); 1057 if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) 1058 goto fail; 1059 1060 /* Load "tc_src_l3" to the tun_fwd interface to redirect packets 1061 * towards dst, and "tc_dst" to redirect packets 1062 * and "tc_chk" on veth_dst_fwd to drop non-redirected packets. 1063 */ 1064 SYS("tc qdisc add dev tun_fwd clsact"); 1065 SYS("tc filter add dev tun_fwd ingress bpf da object-pinned " 1066 SRC_PROG_PIN_FILE); 1067 1068 SYS("tc qdisc add dev veth_dst_fwd clsact"); 1069 SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned " 1070 DST_PROG_PIN_FILE); 1071 SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned " 1072 CHK_PROG_PIN_FILE); 1073 1074 /* Setup route and neigh tables */ 1075 SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24"); 1076 SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24"); 1077 1078 SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad"); 1079 SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad"); 1080 1081 SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global"); 1082 SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD 1083 " dev tun_src scope global"); 1084 SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global"); 1085 SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global"); 1086 SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD 1087 " dev tun_src scope global"); 1088 SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global"); 1089 1090 SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); 1091 SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); 1092 1093 if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) 1094 goto fail; 1095 1096 test_connectivity(); 1097 1098 fail: 1099 if (tunnel_pid > 0) { 1100 kill(tunnel_pid, SIGTERM); 1101 waitpid(tunnel_pid, NULL, 0); 1102 } 1103 if (src_fd >= 0) 1104 close(src_fd); 1105 if (target_fd >= 0) 1106 close(target_fd); 1107 if (skel) 1108 test_tc_peer__destroy(skel); 1109 if (nstoken) 1110 close_netns(nstoken); 1111 } 1112 1113 #define RUN_TEST(name) \ 1114 ({ \ 1115 struct netns_setup_result setup_result; \ 1116 if (test__start_subtest(#name)) \ 1117 if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \ 1118 if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \ 1119 "setup links and routes")) \ 1120 test_ ## name(&setup_result); \ 1121 netns_setup_namespaces("delete"); \ 1122 } \ 1123 }) 1124 1125 static void *test_tc_redirect_run_tests(void *arg) 1126 { 1127 netns_setup_namespaces_nofail("delete"); 1128 1129 RUN_TEST(tc_redirect_peer); 1130 RUN_TEST(tc_redirect_peer_l3); 1131 RUN_TEST(tc_redirect_neigh); 1132 RUN_TEST(tc_redirect_neigh_fib); 1133 RUN_TEST(tc_redirect_dtime); 1134 return NULL; 1135 } 1136 1137 void serial_test_tc_redirect(void) 1138 { 1139 pthread_t test_thread; 1140 int err; 1141 1142 /* Run the tests in their own thread to isolate the namespace changes 1143 * so they do not affect the environment of other tests. 1144 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns()) 1145 */ 1146 err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL); 1147 if (ASSERT_OK(err, "pthread_create")) 1148 ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join"); 1149 } 1150