1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 3 /* 4 * Test suite of lwt BPF programs that reroutes packets 5 * The file tests focus not only if these programs work as expected normally, 6 * but also if they can handle abnormal situations gracefully. This test 7 * suite currently only covers lwt_xmit hook. lwt_in tests have not been 8 * implemented. 9 * 10 * WARNING 11 * ------- 12 * This test suite can crash the kernel, thus should be run in a VM. 13 * 14 * Setup: 15 * --------- 16 * all tests are performed in a single netns. A lwt encap route is setup for 17 * each subtest: 18 * 19 * ip route add 10.0.0.0/24 encap bpf xmit <obj> sec "<section_N>" dev link_err 20 * 21 * Here <obj> is statically defined to test_lwt_reroute.bpf.o, and it contains 22 * a single test program entry. This program sets packet mark by last byte of 23 * the IPv4 daddr. For example, a packet going to 1.2.3.4 will receive a skb 24 * mark 4. A packet will only be marked once, and IP x.x.x.0 will be skipped 25 * to avoid route loop. We didn't use generated BPF skeleton since the 26 * attachment for lwt programs are not supported by libbpf yet. 27 * 28 * The test program will bring up a tun device, and sets up the following 29 * routes: 30 * 31 * ip rule add pref 100 from all fwmark <tun_index> lookup 100 32 * ip route add table 100 default dev tun0 33 * 34 * For normal testing, a ping command is running in the test netns: 35 * 36 * ping 10.0.0.<tun_index> -c 1 -w 1 -s 100 37 * 38 * For abnormal testing, fq is used as the qdisc of the tun device. Then a UDP 39 * socket will try to overflow the fq queue and trigger qdisc drop error. 40 * 41 * Scenarios: 42 * -------------------------------- 43 * 1. Reroute to a running tun device 44 * 2. Reroute to a device where qdisc drop 45 * 46 * For case 1, ping packets should be received by the tun device. 47 * 48 * For case 2, force UDP packets to overflow fq limit. As long as kernel 49 * is not crashed, it is considered successful. 50 */ 51 #define NETNS "ns_lwt_reroute" 52 #include <netinet/in.h> 53 #include "lwt_helpers.h" 54 #include "network_helpers.h" 55 #include <linux/net_tstamp.h> 56 57 #define BPF_OBJECT "test_lwt_reroute.bpf.o" 58 #define LOCAL_SRC "10.0.0.1" 59 #define TEST_CIDR "10.0.0.0/24" 60 #define XMIT_HOOK "xmit" 61 #define XMIT_SECTION "lwt_xmit" 62 #define NSEC_PER_SEC 1000000000ULL 63 64 /* send a ping to be rerouted to the target device */ 65 static void ping_once(const char *ip) 66 { 67 /* We won't get a reply. Don't fail here */ 68 SYS_NOFAIL("ping %s -c1 -W1 -s %d >/dev/null 2>&1", 69 ip, ICMP_PAYLOAD_SIZE); 70 } 71 72 /* Send snd_target UDP packets to overflow the fq queue and trigger qdisc drop 73 * error. This is done via TX tstamp to force buffering delayed packets. 74 */ 75 static int overflow_fq(int snd_target, const char *target_ip) 76 { 77 struct sockaddr_in addr = { 78 .sin_family = AF_INET, 79 .sin_port = htons(1234), 80 }; 81 82 char data_buf[8]; /* only #pkts matter, so use a random small buffer */ 83 char control_buf[CMSG_SPACE(sizeof(uint64_t))]; 84 struct iovec iov = { 85 .iov_base = data_buf, 86 .iov_len = sizeof(data_buf), 87 }; 88 int err = -1; 89 int s = -1; 90 struct sock_txtime txtime_on = { 91 .clockid = CLOCK_MONOTONIC, 92 .flags = 0, 93 }; 94 struct msghdr msg = { 95 .msg_name = &addr, 96 .msg_namelen = sizeof(addr), 97 .msg_control = control_buf, 98 .msg_controllen = sizeof(control_buf), 99 .msg_iovlen = 1, 100 .msg_iov = &iov, 101 }; 102 struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); 103 104 memset(data_buf, 0, sizeof(data_buf)); 105 106 s = socket(AF_INET, SOCK_DGRAM, 0); 107 if (!ASSERT_GE(s, 0, "socket")) 108 goto out; 109 110 err = setsockopt(s, SOL_SOCKET, SO_TXTIME, &txtime_on, sizeof(txtime_on)); 111 if (!ASSERT_OK(err, "setsockopt(SO_TXTIME)")) 112 goto out; 113 114 err = inet_pton(AF_INET, target_ip, &addr.sin_addr); 115 if (!ASSERT_EQ(err, 1, "inet_pton")) 116 goto out; 117 118 while (snd_target > 0) { 119 struct timespec now; 120 121 memset(control_buf, 0, sizeof(control_buf)); 122 cmsg->cmsg_type = SCM_TXTIME; 123 cmsg->cmsg_level = SOL_SOCKET; 124 cmsg->cmsg_len = CMSG_LEN(sizeof(uint64_t)); 125 126 err = clock_gettime(CLOCK_MONOTONIC, &now); 127 if (!ASSERT_OK(err, "clock_gettime(CLOCK_MONOTONIC)")) { 128 err = -1; 129 goto out; 130 } 131 132 *(uint64_t *)CMSG_DATA(cmsg) = (now.tv_nsec + 1) * NSEC_PER_SEC + 133 now.tv_nsec; 134 135 /* we will intentionally send more than fq limit, so ignore 136 * the error here. 137 */ 138 sendmsg(s, &msg, MSG_NOSIGNAL); 139 snd_target--; 140 } 141 142 /* no kernel crash so far is considered success */ 143 err = 0; 144 145 out: 146 if (s >= 0) 147 close(s); 148 149 return err; 150 } 151 152 static int setup(const char *tun_dev) 153 { 154 int target_index = -1; 155 int tap_fd = -1; 156 157 tap_fd = open_tuntap(tun_dev, false); 158 if (!ASSERT_GE(tap_fd, 0, "open_tun")) 159 return -1; 160 161 target_index = if_nametoindex(tun_dev); 162 if (!ASSERT_GE(target_index, 0, "if_nametoindex")) 163 return -1; 164 165 SYS(fail, "ip link add link_err type dummy"); 166 SYS(fail, "ip link set lo up"); 167 SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32"); 168 SYS(fail, "ip link set link_err up"); 169 SYS(fail, "ip link set %s up", tun_dev); 170 171 SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec lwt_xmit", 172 TEST_CIDR, BPF_OBJECT); 173 174 SYS(fail, "ip rule add pref 100 from all fwmark %d lookup 100", 175 target_index); 176 SYS(fail, "ip route add t 100 default dev %s", tun_dev); 177 178 return tap_fd; 179 180 fail: 181 if (tap_fd >= 0) 182 close(tap_fd); 183 return -1; 184 } 185 186 static void test_lwt_reroute_normal_xmit(void) 187 { 188 const char *tun_dev = "tun0"; 189 int tun_fd = -1; 190 int ifindex = -1; 191 char ip[256]; 192 struct timeval timeo = { 193 .tv_sec = 0, 194 .tv_usec = 250000, 195 }; 196 197 tun_fd = setup(tun_dev); 198 if (!ASSERT_GE(tun_fd, 0, "setup_reroute")) 199 return; 200 201 ifindex = if_nametoindex(tun_dev); 202 if (!ASSERT_GE(ifindex, 0, "if_nametoindex")) 203 return; 204 205 snprintf(ip, 256, "10.0.0.%d", ifindex); 206 207 /* ping packets should be received by the tun device */ 208 ping_once(ip); 209 210 if (!ASSERT_EQ(wait_for_packet(tun_fd, __expect_icmp_ipv4, &timeo), 1, 211 "wait_for_packet")) 212 log_err("%s xmit", __func__); 213 } 214 215 /* 216 * Test the failure case when the skb is dropped at the qdisc. This is a 217 * regression prevention at the xmit hook only. 218 */ 219 static void test_lwt_reroute_qdisc_dropped(void) 220 { 221 const char *tun_dev = "tun0"; 222 int tun_fd = -1; 223 int ifindex = -1; 224 char ip[256]; 225 226 tun_fd = setup(tun_dev); 227 if (!ASSERT_GE(tun_fd, 0, "setup_reroute")) 228 goto fail; 229 230 SYS(fail, "tc qdisc replace dev %s root fq limit 5 flow_limit 5", tun_dev); 231 232 ifindex = if_nametoindex(tun_dev); 233 if (!ASSERT_GE(ifindex, 0, "if_nametoindex")) 234 return; 235 236 snprintf(ip, 256, "10.0.0.%d", ifindex); 237 ASSERT_EQ(overflow_fq(10, ip), 0, "overflow_fq"); 238 239 fail: 240 if (tun_fd >= 0) 241 close(tun_fd); 242 } 243 244 static void *test_lwt_reroute_run(void *arg) 245 { 246 netns_delete(); 247 RUN_TEST(lwt_reroute_normal_xmit); 248 RUN_TEST(lwt_reroute_qdisc_dropped); 249 return NULL; 250 } 251 252 void test_lwt_reroute(void) 253 { 254 pthread_t test_thread; 255 int err; 256 257 /* Run the tests in their own thread to isolate the namespace changes 258 * so they do not affect the environment of other tests. 259 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns()) 260 */ 261 err = pthread_create(&test_thread, NULL, &test_lwt_reroute_run, NULL); 262 if (ASSERT_OK(err, "pthread_create")) 263 ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join"); 264 } 265