1*6c77997bSYan Zhai // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2*6c77997bSYan Zhai
3*6c77997bSYan Zhai /*
4*6c77997bSYan Zhai * Test suite of lwt BPF programs that reroutes packets
5*6c77997bSYan Zhai * The file tests focus not only if these programs work as expected normally,
6*6c77997bSYan Zhai * but also if they can handle abnormal situations gracefully. This test
7*6c77997bSYan Zhai * suite currently only covers lwt_xmit hook. lwt_in tests have not been
8*6c77997bSYan Zhai * implemented.
9*6c77997bSYan Zhai *
10*6c77997bSYan Zhai * WARNING
11*6c77997bSYan Zhai * -------
12*6c77997bSYan Zhai * This test suite can crash the kernel, thus should be run in a VM.
13*6c77997bSYan Zhai *
14*6c77997bSYan Zhai * Setup:
15*6c77997bSYan Zhai * ---------
16*6c77997bSYan Zhai * all tests are performed in a single netns. A lwt encap route is setup for
17*6c77997bSYan Zhai * each subtest:
18*6c77997bSYan Zhai *
19*6c77997bSYan Zhai * ip route add 10.0.0.0/24 encap bpf xmit <obj> sec "<section_N>" dev link_err
20*6c77997bSYan Zhai *
21*6c77997bSYan Zhai * Here <obj> is statically defined to test_lwt_reroute.bpf.o, and it contains
22*6c77997bSYan Zhai * a single test program entry. This program sets packet mark by last byte of
23*6c77997bSYan Zhai * the IPv4 daddr. For example, a packet going to 1.2.3.4 will receive a skb
24*6c77997bSYan Zhai * mark 4. A packet will only be marked once, and IP x.x.x.0 will be skipped
25*6c77997bSYan Zhai * to avoid route loop. We didn't use generated BPF skeleton since the
26*6c77997bSYan Zhai * attachment for lwt programs are not supported by libbpf yet.
27*6c77997bSYan Zhai *
28*6c77997bSYan Zhai * The test program will bring up a tun device, and sets up the following
29*6c77997bSYan Zhai * routes:
30*6c77997bSYan Zhai *
31*6c77997bSYan Zhai * ip rule add pref 100 from all fwmark <tun_index> lookup 100
32*6c77997bSYan Zhai * ip route add table 100 default dev tun0
33*6c77997bSYan Zhai *
34*6c77997bSYan Zhai * For normal testing, a ping command is running in the test netns:
35*6c77997bSYan Zhai *
36*6c77997bSYan Zhai * ping 10.0.0.<tun_index> -c 1 -w 1 -s 100
37*6c77997bSYan Zhai *
38*6c77997bSYan Zhai * For abnormal testing, fq is used as the qdisc of the tun device. Then a UDP
39*6c77997bSYan Zhai * socket will try to overflow the fq queue and trigger qdisc drop error.
40*6c77997bSYan Zhai *
41*6c77997bSYan Zhai * Scenarios:
42*6c77997bSYan Zhai * --------------------------------
43*6c77997bSYan Zhai * 1. Reroute to a running tun device
44*6c77997bSYan Zhai * 2. Reroute to a device where qdisc drop
45*6c77997bSYan Zhai *
46*6c77997bSYan Zhai * For case 1, ping packets should be received by the tun device.
47*6c77997bSYan Zhai *
48*6c77997bSYan Zhai * For case 2, force UDP packets to overflow fq limit. As long as kernel
49*6c77997bSYan Zhai * is not crashed, it is considered successful.
50*6c77997bSYan Zhai */
51*6c77997bSYan Zhai #include "lwt_helpers.h"
52*6c77997bSYan Zhai #include "network_helpers.h"
53*6c77997bSYan Zhai #include <linux/net_tstamp.h>
54*6c77997bSYan Zhai
55*6c77997bSYan Zhai #define BPF_OBJECT "test_lwt_reroute.bpf.o"
56*6c77997bSYan Zhai #define LOCAL_SRC "10.0.0.1"
57*6c77997bSYan Zhai #define TEST_CIDR "10.0.0.0/24"
58*6c77997bSYan Zhai #define XMIT_HOOK "xmit"
59*6c77997bSYan Zhai #define XMIT_SECTION "lwt_xmit"
60*6c77997bSYan Zhai #define NSEC_PER_SEC 1000000000ULL
61*6c77997bSYan Zhai
62*6c77997bSYan Zhai /* send a ping to be rerouted to the target device */
ping_once(const char * ip)63*6c77997bSYan Zhai static void ping_once(const char *ip)
64*6c77997bSYan Zhai {
65*6c77997bSYan Zhai /* We won't get a reply. Don't fail here */
66*6c77997bSYan Zhai SYS_NOFAIL("ping %s -c1 -W1 -s %d >/dev/null 2>&1",
67*6c77997bSYan Zhai ip, ICMP_PAYLOAD_SIZE);
68*6c77997bSYan Zhai }
69*6c77997bSYan Zhai
70*6c77997bSYan Zhai /* Send snd_target UDP packets to overflow the fq queue and trigger qdisc drop
71*6c77997bSYan Zhai * error. This is done via TX tstamp to force buffering delayed packets.
72*6c77997bSYan Zhai */
overflow_fq(int snd_target,const char * target_ip)73*6c77997bSYan Zhai static int overflow_fq(int snd_target, const char *target_ip)
74*6c77997bSYan Zhai {
75*6c77997bSYan Zhai struct sockaddr_in addr = {
76*6c77997bSYan Zhai .sin_family = AF_INET,
77*6c77997bSYan Zhai .sin_port = htons(1234),
78*6c77997bSYan Zhai };
79*6c77997bSYan Zhai
80*6c77997bSYan Zhai char data_buf[8]; /* only #pkts matter, so use a random small buffer */
81*6c77997bSYan Zhai char control_buf[CMSG_SPACE(sizeof(uint64_t))];
82*6c77997bSYan Zhai struct iovec iov = {
83*6c77997bSYan Zhai .iov_base = data_buf,
84*6c77997bSYan Zhai .iov_len = sizeof(data_buf),
85*6c77997bSYan Zhai };
86*6c77997bSYan Zhai int err = -1;
87*6c77997bSYan Zhai int s = -1;
88*6c77997bSYan Zhai struct sock_txtime txtime_on = {
89*6c77997bSYan Zhai .clockid = CLOCK_MONOTONIC,
90*6c77997bSYan Zhai .flags = 0,
91*6c77997bSYan Zhai };
92*6c77997bSYan Zhai struct msghdr msg = {
93*6c77997bSYan Zhai .msg_name = &addr,
94*6c77997bSYan Zhai .msg_namelen = sizeof(addr),
95*6c77997bSYan Zhai .msg_control = control_buf,
96*6c77997bSYan Zhai .msg_controllen = sizeof(control_buf),
97*6c77997bSYan Zhai .msg_iovlen = 1,
98*6c77997bSYan Zhai .msg_iov = &iov,
99*6c77997bSYan Zhai };
100*6c77997bSYan Zhai struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
101*6c77997bSYan Zhai
102*6c77997bSYan Zhai memset(data_buf, 0, sizeof(data_buf));
103*6c77997bSYan Zhai
104*6c77997bSYan Zhai s = socket(AF_INET, SOCK_DGRAM, 0);
105*6c77997bSYan Zhai if (!ASSERT_GE(s, 0, "socket"))
106*6c77997bSYan Zhai goto out;
107*6c77997bSYan Zhai
108*6c77997bSYan Zhai err = setsockopt(s, SOL_SOCKET, SO_TXTIME, &txtime_on, sizeof(txtime_on));
109*6c77997bSYan Zhai if (!ASSERT_OK(err, "setsockopt(SO_TXTIME)"))
110*6c77997bSYan Zhai goto out;
111*6c77997bSYan Zhai
112*6c77997bSYan Zhai err = inet_pton(AF_INET, target_ip, &addr.sin_addr);
113*6c77997bSYan Zhai if (!ASSERT_EQ(err, 1, "inet_pton"))
114*6c77997bSYan Zhai goto out;
115*6c77997bSYan Zhai
116*6c77997bSYan Zhai while (snd_target > 0) {
117*6c77997bSYan Zhai struct timespec now;
118*6c77997bSYan Zhai
119*6c77997bSYan Zhai memset(control_buf, 0, sizeof(control_buf));
120*6c77997bSYan Zhai cmsg->cmsg_type = SCM_TXTIME;
121*6c77997bSYan Zhai cmsg->cmsg_level = SOL_SOCKET;
122*6c77997bSYan Zhai cmsg->cmsg_len = CMSG_LEN(sizeof(uint64_t));
123*6c77997bSYan Zhai
124*6c77997bSYan Zhai err = clock_gettime(CLOCK_MONOTONIC, &now);
125*6c77997bSYan Zhai if (!ASSERT_OK(err, "clock_gettime(CLOCK_MONOTONIC)")) {
126*6c77997bSYan Zhai err = -1;
127*6c77997bSYan Zhai goto out;
128*6c77997bSYan Zhai }
129*6c77997bSYan Zhai
130*6c77997bSYan Zhai *(uint64_t *)CMSG_DATA(cmsg) = (now.tv_nsec + 1) * NSEC_PER_SEC +
131*6c77997bSYan Zhai now.tv_nsec;
132*6c77997bSYan Zhai
133*6c77997bSYan Zhai /* we will intentionally send more than fq limit, so ignore
134*6c77997bSYan Zhai * the error here.
135*6c77997bSYan Zhai */
136*6c77997bSYan Zhai sendmsg(s, &msg, MSG_NOSIGNAL);
137*6c77997bSYan Zhai snd_target--;
138*6c77997bSYan Zhai }
139*6c77997bSYan Zhai
140*6c77997bSYan Zhai /* no kernel crash so far is considered success */
141*6c77997bSYan Zhai err = 0;
142*6c77997bSYan Zhai
143*6c77997bSYan Zhai out:
144*6c77997bSYan Zhai if (s >= 0)
145*6c77997bSYan Zhai close(s);
146*6c77997bSYan Zhai
147*6c77997bSYan Zhai return err;
148*6c77997bSYan Zhai }
149*6c77997bSYan Zhai
setup(const char * tun_dev)150*6c77997bSYan Zhai static int setup(const char *tun_dev)
151*6c77997bSYan Zhai {
152*6c77997bSYan Zhai int target_index = -1;
153*6c77997bSYan Zhai int tap_fd = -1;
154*6c77997bSYan Zhai
155*6c77997bSYan Zhai tap_fd = open_tuntap(tun_dev, false);
156*6c77997bSYan Zhai if (!ASSERT_GE(tap_fd, 0, "open_tun"))
157*6c77997bSYan Zhai return -1;
158*6c77997bSYan Zhai
159*6c77997bSYan Zhai target_index = if_nametoindex(tun_dev);
160*6c77997bSYan Zhai if (!ASSERT_GE(target_index, 0, "if_nametoindex"))
161*6c77997bSYan Zhai return -1;
162*6c77997bSYan Zhai
163*6c77997bSYan Zhai SYS(fail, "ip link add link_err type dummy");
164*6c77997bSYan Zhai SYS(fail, "ip link set lo up");
165*6c77997bSYan Zhai SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32");
166*6c77997bSYan Zhai SYS(fail, "ip link set link_err up");
167*6c77997bSYan Zhai SYS(fail, "ip link set %s up", tun_dev);
168*6c77997bSYan Zhai
169*6c77997bSYan Zhai SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec lwt_xmit",
170*6c77997bSYan Zhai TEST_CIDR, BPF_OBJECT);
171*6c77997bSYan Zhai
172*6c77997bSYan Zhai SYS(fail, "ip rule add pref 100 from all fwmark %d lookup 100",
173*6c77997bSYan Zhai target_index);
174*6c77997bSYan Zhai SYS(fail, "ip route add t 100 default dev %s", tun_dev);
175*6c77997bSYan Zhai
176*6c77997bSYan Zhai return tap_fd;
177*6c77997bSYan Zhai
178*6c77997bSYan Zhai fail:
179*6c77997bSYan Zhai if (tap_fd >= 0)
180*6c77997bSYan Zhai close(tap_fd);
181*6c77997bSYan Zhai return -1;
182*6c77997bSYan Zhai }
183*6c77997bSYan Zhai
test_lwt_reroute_normal_xmit(void)184*6c77997bSYan Zhai static void test_lwt_reroute_normal_xmit(void)
185*6c77997bSYan Zhai {
186*6c77997bSYan Zhai const char *tun_dev = "tun0";
187*6c77997bSYan Zhai int tun_fd = -1;
188*6c77997bSYan Zhai int ifindex = -1;
189*6c77997bSYan Zhai char ip[256];
190*6c77997bSYan Zhai struct timeval timeo = {
191*6c77997bSYan Zhai .tv_sec = 0,
192*6c77997bSYan Zhai .tv_usec = 250000,
193*6c77997bSYan Zhai };
194*6c77997bSYan Zhai
195*6c77997bSYan Zhai tun_fd = setup(tun_dev);
196*6c77997bSYan Zhai if (!ASSERT_GE(tun_fd, 0, "setup_reroute"))
197*6c77997bSYan Zhai return;
198*6c77997bSYan Zhai
199*6c77997bSYan Zhai ifindex = if_nametoindex(tun_dev);
200*6c77997bSYan Zhai if (!ASSERT_GE(ifindex, 0, "if_nametoindex"))
201*6c77997bSYan Zhai return;
202*6c77997bSYan Zhai
203*6c77997bSYan Zhai snprintf(ip, 256, "10.0.0.%d", ifindex);
204*6c77997bSYan Zhai
205*6c77997bSYan Zhai /* ping packets should be received by the tun device */
206*6c77997bSYan Zhai ping_once(ip);
207*6c77997bSYan Zhai
208*6c77997bSYan Zhai if (!ASSERT_EQ(wait_for_packet(tun_fd, __expect_icmp_ipv4, &timeo), 1,
209*6c77997bSYan Zhai "wait_for_packet"))
210*6c77997bSYan Zhai log_err("%s xmit", __func__);
211*6c77997bSYan Zhai }
212*6c77997bSYan Zhai
213*6c77997bSYan Zhai /*
214*6c77997bSYan Zhai * Test the failure case when the skb is dropped at the qdisc. This is a
215*6c77997bSYan Zhai * regression prevention at the xmit hook only.
216*6c77997bSYan Zhai */
test_lwt_reroute_qdisc_dropped(void)217*6c77997bSYan Zhai static void test_lwt_reroute_qdisc_dropped(void)
218*6c77997bSYan Zhai {
219*6c77997bSYan Zhai const char *tun_dev = "tun0";
220*6c77997bSYan Zhai int tun_fd = -1;
221*6c77997bSYan Zhai int ifindex = -1;
222*6c77997bSYan Zhai char ip[256];
223*6c77997bSYan Zhai
224*6c77997bSYan Zhai tun_fd = setup(tun_dev);
225*6c77997bSYan Zhai if (!ASSERT_GE(tun_fd, 0, "setup_reroute"))
226*6c77997bSYan Zhai goto fail;
227*6c77997bSYan Zhai
228*6c77997bSYan Zhai SYS(fail, "tc qdisc replace dev %s root fq limit 5 flow_limit 5", tun_dev);
229*6c77997bSYan Zhai
230*6c77997bSYan Zhai ifindex = if_nametoindex(tun_dev);
231*6c77997bSYan Zhai if (!ASSERT_GE(ifindex, 0, "if_nametoindex"))
232*6c77997bSYan Zhai return;
233*6c77997bSYan Zhai
234*6c77997bSYan Zhai snprintf(ip, 256, "10.0.0.%d", ifindex);
235*6c77997bSYan Zhai ASSERT_EQ(overflow_fq(10, ip), 0, "overflow_fq");
236*6c77997bSYan Zhai
237*6c77997bSYan Zhai fail:
238*6c77997bSYan Zhai if (tun_fd >= 0)
239*6c77997bSYan Zhai close(tun_fd);
240*6c77997bSYan Zhai }
241*6c77997bSYan Zhai
test_lwt_reroute_run(void * arg)242*6c77997bSYan Zhai static void *test_lwt_reroute_run(void *arg)
243*6c77997bSYan Zhai {
244*6c77997bSYan Zhai netns_delete();
245*6c77997bSYan Zhai RUN_TEST(lwt_reroute_normal_xmit);
246*6c77997bSYan Zhai RUN_TEST(lwt_reroute_qdisc_dropped);
247*6c77997bSYan Zhai return NULL;
248*6c77997bSYan Zhai }
249*6c77997bSYan Zhai
test_lwt_reroute(void)250*6c77997bSYan Zhai void test_lwt_reroute(void)
251*6c77997bSYan Zhai {
252*6c77997bSYan Zhai pthread_t test_thread;
253*6c77997bSYan Zhai int err;
254*6c77997bSYan Zhai
255*6c77997bSYan Zhai /* Run the tests in their own thread to isolate the namespace changes
256*6c77997bSYan Zhai * so they do not affect the environment of other tests.
257*6c77997bSYan Zhai * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
258*6c77997bSYan Zhai */
259*6c77997bSYan Zhai err = pthread_create(&test_thread, NULL, &test_lwt_reroute_run, NULL);
260*6c77997bSYan Zhai if (ASSERT_OK(err, "pthread_create"))
261*6c77997bSYan Zhai ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
262*6c77997bSYan Zhai }
263