1c9d0bdefSKuniyuki Iwashima // SPDX-License-Identifier: GPL-2.0
2c9d0bdefSKuniyuki Iwashima /*
3c9d0bdefSKuniyuki Iwashima  * Check if we can migrate child sockets.
4c9d0bdefSKuniyuki Iwashima  *
5c9d0bdefSKuniyuki Iwashima  *   1. call listen() for 4 server sockets.
6c9d0bdefSKuniyuki Iwashima  *   2. call connect() for 25 client sockets.
7c9d0bdefSKuniyuki Iwashima  *   3. call listen() for 1 server socket. (migration target)
8c9d0bdefSKuniyuki Iwashima  *   4. update a map to migrate all child sockets
9c9d0bdefSKuniyuki Iwashima  *        to the last server socket (migrate_map[cookie] = 4)
10c9d0bdefSKuniyuki Iwashima  *   5. call shutdown() for first 4 server sockets
11c9d0bdefSKuniyuki Iwashima  *        and migrate the requests in the accept queue
12c9d0bdefSKuniyuki Iwashima  *        to the last server socket.
13c9d0bdefSKuniyuki Iwashima  *   6. call listen() for the second server socket.
14c9d0bdefSKuniyuki Iwashima  *   7. call shutdown() for the last server
15c9d0bdefSKuniyuki Iwashima  *        and migrate the requests in the accept queue
16c9d0bdefSKuniyuki Iwashima  *        to the second server socket.
17c9d0bdefSKuniyuki Iwashima  *   8. call listen() for the last server.
18c9d0bdefSKuniyuki Iwashima  *   9. call shutdown() for the second server
19c9d0bdefSKuniyuki Iwashima  *        and migrate the requests in the accept queue
20c9d0bdefSKuniyuki Iwashima  *        to the last server socket.
21c9d0bdefSKuniyuki Iwashima  *  10. call accept() for the last server socket.
22c9d0bdefSKuniyuki Iwashima  *
23c9d0bdefSKuniyuki Iwashima  * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
24c9d0bdefSKuniyuki Iwashima  */
25c9d0bdefSKuniyuki Iwashima 
26c9d0bdefSKuniyuki Iwashima #include <bpf/bpf.h>
27c9d0bdefSKuniyuki Iwashima #include <bpf/libbpf.h>
28c9d0bdefSKuniyuki Iwashima 
29c9d0bdefSKuniyuki Iwashima #include "test_progs.h"
30c9d0bdefSKuniyuki Iwashima #include "test_migrate_reuseport.skel.h"
31c9d0bdefSKuniyuki Iwashima #include "network_helpers.h"
32c9d0bdefSKuniyuki Iwashima 
33f20792d4SAndrii Nakryiko #ifndef TCP_FASTOPEN_CONNECT
34f20792d4SAndrii Nakryiko #define TCP_FASTOPEN_CONNECT 30
35f20792d4SAndrii Nakryiko #endif
36f20792d4SAndrii Nakryiko 
37c9d0bdefSKuniyuki Iwashima #define IFINDEX_LO 1
38c9d0bdefSKuniyuki Iwashima 
39c9d0bdefSKuniyuki Iwashima #define NR_SERVERS 5
40c9d0bdefSKuniyuki Iwashima #define NR_CLIENTS (NR_SERVERS * 5)
41c9d0bdefSKuniyuki Iwashima #define MIGRATED_TO (NR_SERVERS - 1)
42c9d0bdefSKuniyuki Iwashima 
43c9d0bdefSKuniyuki Iwashima /* fastopenq->max_qlen and sk->sk_max_ack_backlog */
44c9d0bdefSKuniyuki Iwashima #define QLEN (NR_CLIENTS * 5)
45c9d0bdefSKuniyuki Iwashima 
46c9d0bdefSKuniyuki Iwashima #define MSG "Hello World\0"
47c9d0bdefSKuniyuki Iwashima #define MSGLEN 12
48c9d0bdefSKuniyuki Iwashima 
49c9d0bdefSKuniyuki Iwashima static struct migrate_reuseport_test_case {
50c9d0bdefSKuniyuki Iwashima 	const char *name;
51c9d0bdefSKuniyuki Iwashima 	__s64 servers[NR_SERVERS];
52c9d0bdefSKuniyuki Iwashima 	__s64 clients[NR_CLIENTS];
53c9d0bdefSKuniyuki Iwashima 	struct sockaddr_storage addr;
54c9d0bdefSKuniyuki Iwashima 	socklen_t addrlen;
55c9d0bdefSKuniyuki Iwashima 	int family;
56c9d0bdefSKuniyuki Iwashima 	int state;
57c9d0bdefSKuniyuki Iwashima 	bool drop_ack;
58c9d0bdefSKuniyuki Iwashima 	bool expire_synack_timer;
59c9d0bdefSKuniyuki Iwashima 	bool fastopen;
60c9d0bdefSKuniyuki Iwashima 	struct bpf_link *link;
61c9d0bdefSKuniyuki Iwashima } test_cases[] = {
62c9d0bdefSKuniyuki Iwashima 	{
63c9d0bdefSKuniyuki Iwashima 		.name = "IPv4 TCP_ESTABLISHED  inet_csk_listen_stop",
64c9d0bdefSKuniyuki Iwashima 		.family = AF_INET,
65c9d0bdefSKuniyuki Iwashima 		.state = BPF_TCP_ESTABLISHED,
66c9d0bdefSKuniyuki Iwashima 		.drop_ack = false,
67c9d0bdefSKuniyuki Iwashima 		.expire_synack_timer = false,
68c9d0bdefSKuniyuki Iwashima 		.fastopen = false,
69c9d0bdefSKuniyuki Iwashima 	},
70c9d0bdefSKuniyuki Iwashima 	{
71c9d0bdefSKuniyuki Iwashima 		.name = "IPv4 TCP_SYN_RECV     inet_csk_listen_stop",
72c9d0bdefSKuniyuki Iwashima 		.family = AF_INET,
73c9d0bdefSKuniyuki Iwashima 		.state = BPF_TCP_SYN_RECV,
74c9d0bdefSKuniyuki Iwashima 		.drop_ack = true,
75c9d0bdefSKuniyuki Iwashima 		.expire_synack_timer = false,
76c9d0bdefSKuniyuki Iwashima 		.fastopen = true,
77c9d0bdefSKuniyuki Iwashima 	},
78c9d0bdefSKuniyuki Iwashima 	{
79c9d0bdefSKuniyuki Iwashima 		.name = "IPv4 TCP_NEW_SYN_RECV reqsk_timer_handler",
80c9d0bdefSKuniyuki Iwashima 		.family = AF_INET,
81c9d0bdefSKuniyuki Iwashima 		.state = BPF_TCP_NEW_SYN_RECV,
82c9d0bdefSKuniyuki Iwashima 		.drop_ack = true,
83c9d0bdefSKuniyuki Iwashima 		.expire_synack_timer = true,
84c9d0bdefSKuniyuki Iwashima 		.fastopen = false,
85c9d0bdefSKuniyuki Iwashima 	},
86c9d0bdefSKuniyuki Iwashima 	{
87c9d0bdefSKuniyuki Iwashima 		.name = "IPv4 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
88c9d0bdefSKuniyuki Iwashima 		.family = AF_INET,
89c9d0bdefSKuniyuki Iwashima 		.state = BPF_TCP_NEW_SYN_RECV,
90c9d0bdefSKuniyuki Iwashima 		.drop_ack = true,
91c9d0bdefSKuniyuki Iwashima 		.expire_synack_timer = false,
92c9d0bdefSKuniyuki Iwashima 		.fastopen = false,
93c9d0bdefSKuniyuki Iwashima 	},
94c9d0bdefSKuniyuki Iwashima 	{
95c9d0bdefSKuniyuki Iwashima 		.name = "IPv6 TCP_ESTABLISHED  inet_csk_listen_stop",
96c9d0bdefSKuniyuki Iwashima 		.family = AF_INET6,
97c9d0bdefSKuniyuki Iwashima 		.state = BPF_TCP_ESTABLISHED,
98c9d0bdefSKuniyuki Iwashima 		.drop_ack = false,
99c9d0bdefSKuniyuki Iwashima 		.expire_synack_timer = false,
100c9d0bdefSKuniyuki Iwashima 		.fastopen = false,
101c9d0bdefSKuniyuki Iwashima 	},
102c9d0bdefSKuniyuki Iwashima 	{
103c9d0bdefSKuniyuki Iwashima 		.name = "IPv6 TCP_SYN_RECV     inet_csk_listen_stop",
104c9d0bdefSKuniyuki Iwashima 		.family = AF_INET6,
105c9d0bdefSKuniyuki Iwashima 		.state = BPF_TCP_SYN_RECV,
106c9d0bdefSKuniyuki Iwashima 		.drop_ack = true,
107c9d0bdefSKuniyuki Iwashima 		.expire_synack_timer = false,
108c9d0bdefSKuniyuki Iwashima 		.fastopen = true,
109c9d0bdefSKuniyuki Iwashima 	},
110c9d0bdefSKuniyuki Iwashima 	{
111c9d0bdefSKuniyuki Iwashima 		.name = "IPv6 TCP_NEW_SYN_RECV reqsk_timer_handler",
112c9d0bdefSKuniyuki Iwashima 		.family = AF_INET6,
113c9d0bdefSKuniyuki Iwashima 		.state = BPF_TCP_NEW_SYN_RECV,
114c9d0bdefSKuniyuki Iwashima 		.drop_ack = true,
115c9d0bdefSKuniyuki Iwashima 		.expire_synack_timer = true,
116c9d0bdefSKuniyuki Iwashima 		.fastopen = false,
117c9d0bdefSKuniyuki Iwashima 	},
118c9d0bdefSKuniyuki Iwashima 	{
119c9d0bdefSKuniyuki Iwashima 		.name = "IPv6 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
120c9d0bdefSKuniyuki Iwashima 		.family = AF_INET6,
121c9d0bdefSKuniyuki Iwashima 		.state = BPF_TCP_NEW_SYN_RECV,
122c9d0bdefSKuniyuki Iwashima 		.drop_ack = true,
123c9d0bdefSKuniyuki Iwashima 		.expire_synack_timer = false,
124c9d0bdefSKuniyuki Iwashima 		.fastopen = false,
125c9d0bdefSKuniyuki Iwashima 	}
126c9d0bdefSKuniyuki Iwashima };
127c9d0bdefSKuniyuki Iwashima 
init_fds(__s64 fds[],int len)128c9d0bdefSKuniyuki Iwashima static void init_fds(__s64 fds[], int len)
129c9d0bdefSKuniyuki Iwashima {
130c9d0bdefSKuniyuki Iwashima 	int i;
131c9d0bdefSKuniyuki Iwashima 
132c9d0bdefSKuniyuki Iwashima 	for (i = 0; i < len; i++)
133c9d0bdefSKuniyuki Iwashima 		fds[i] = -1;
134c9d0bdefSKuniyuki Iwashima }
135c9d0bdefSKuniyuki Iwashima 
close_fds(__s64 fds[],int len)136c9d0bdefSKuniyuki Iwashima static void close_fds(__s64 fds[], int len)
137c9d0bdefSKuniyuki Iwashima {
138c9d0bdefSKuniyuki Iwashima 	int i;
139c9d0bdefSKuniyuki Iwashima 
140c9d0bdefSKuniyuki Iwashima 	for (i = 0; i < len; i++) {
141c9d0bdefSKuniyuki Iwashima 		if (fds[i] != -1) {
142c9d0bdefSKuniyuki Iwashima 			close(fds[i]);
143c9d0bdefSKuniyuki Iwashima 			fds[i] = -1;
144c9d0bdefSKuniyuki Iwashima 		}
145c9d0bdefSKuniyuki Iwashima 	}
146c9d0bdefSKuniyuki Iwashima }
147c9d0bdefSKuniyuki Iwashima 
setup_fastopen(char * buf,int size,int * saved_len,bool restore)148c9d0bdefSKuniyuki Iwashima static int setup_fastopen(char *buf, int size, int *saved_len, bool restore)
149c9d0bdefSKuniyuki Iwashima {
150c9d0bdefSKuniyuki Iwashima 	int err = 0, fd, len;
151c9d0bdefSKuniyuki Iwashima 
152c9d0bdefSKuniyuki Iwashima 	fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
153c9d0bdefSKuniyuki Iwashima 	if (!ASSERT_NEQ(fd, -1, "open"))
154c9d0bdefSKuniyuki Iwashima 		return -1;
155c9d0bdefSKuniyuki Iwashima 
156c9d0bdefSKuniyuki Iwashima 	if (restore) {
157c9d0bdefSKuniyuki Iwashima 		len = write(fd, buf, *saved_len);
158c9d0bdefSKuniyuki Iwashima 		if (!ASSERT_EQ(len, *saved_len, "write - restore"))
159c9d0bdefSKuniyuki Iwashima 			err = -1;
160c9d0bdefSKuniyuki Iwashima 	} else {
161c9d0bdefSKuniyuki Iwashima 		*saved_len = read(fd, buf, size);
162c9d0bdefSKuniyuki Iwashima 		if (!ASSERT_GE(*saved_len, 1, "read")) {
163c9d0bdefSKuniyuki Iwashima 			err = -1;
164c9d0bdefSKuniyuki Iwashima 			goto close;
165c9d0bdefSKuniyuki Iwashima 		}
166c9d0bdefSKuniyuki Iwashima 
167c9d0bdefSKuniyuki Iwashima 		err = lseek(fd, 0, SEEK_SET);
168c9d0bdefSKuniyuki Iwashima 		if (!ASSERT_OK(err, "lseek"))
169c9d0bdefSKuniyuki Iwashima 			goto close;
170c9d0bdefSKuniyuki Iwashima 
171c9d0bdefSKuniyuki Iwashima 		/* (TFO_CLIENT_ENABLE | TFO_SERVER_ENABLE |
172c9d0bdefSKuniyuki Iwashima 		 *  TFO_CLIENT_NO_COOKIE | TFO_SERVER_COOKIE_NOT_REQD)
173c9d0bdefSKuniyuki Iwashima 		 */
174c9d0bdefSKuniyuki Iwashima 		len = write(fd, "519", 3);
175c9d0bdefSKuniyuki Iwashima 		if (!ASSERT_EQ(len, 3, "write - setup"))
176c9d0bdefSKuniyuki Iwashima 			err = -1;
177c9d0bdefSKuniyuki Iwashima 	}
178c9d0bdefSKuniyuki Iwashima 
179c9d0bdefSKuniyuki Iwashima close:
180c9d0bdefSKuniyuki Iwashima 	close(fd);
181c9d0bdefSKuniyuki Iwashima 
182c9d0bdefSKuniyuki Iwashima 	return err;
183c9d0bdefSKuniyuki Iwashima }
184c9d0bdefSKuniyuki Iwashima 
drop_ack(struct migrate_reuseport_test_case * test_case,struct test_migrate_reuseport * skel)185c9d0bdefSKuniyuki Iwashima static int drop_ack(struct migrate_reuseport_test_case *test_case,
186c9d0bdefSKuniyuki Iwashima 		    struct test_migrate_reuseport *skel)
187c9d0bdefSKuniyuki Iwashima {
188c9d0bdefSKuniyuki Iwashima 	if (test_case->family == AF_INET)
189c9d0bdefSKuniyuki Iwashima 		skel->bss->server_port = ((struct sockaddr_in *)
190c9d0bdefSKuniyuki Iwashima 					  &test_case->addr)->sin_port;
191c9d0bdefSKuniyuki Iwashima 	else
192c9d0bdefSKuniyuki Iwashima 		skel->bss->server_port = ((struct sockaddr_in6 *)
193c9d0bdefSKuniyuki Iwashima 					  &test_case->addr)->sin6_port;
194c9d0bdefSKuniyuki Iwashima 
195c9d0bdefSKuniyuki Iwashima 	test_case->link = bpf_program__attach_xdp(skel->progs.drop_ack,
196c9d0bdefSKuniyuki Iwashima 						  IFINDEX_LO);
197c9d0bdefSKuniyuki Iwashima 	if (!ASSERT_OK_PTR(test_case->link, "bpf_program__attach_xdp"))
198c9d0bdefSKuniyuki Iwashima 		return -1;
199c9d0bdefSKuniyuki Iwashima 
200c9d0bdefSKuniyuki Iwashima 	return 0;
201c9d0bdefSKuniyuki Iwashima }
202c9d0bdefSKuniyuki Iwashima 
pass_ack(struct migrate_reuseport_test_case * test_case)203c9d0bdefSKuniyuki Iwashima static int pass_ack(struct migrate_reuseport_test_case *test_case)
204c9d0bdefSKuniyuki Iwashima {
205c9d0bdefSKuniyuki Iwashima 	int err;
206c9d0bdefSKuniyuki Iwashima 
207f91231eeSAndrii Nakryiko 	err = bpf_link__destroy(test_case->link);
208f91231eeSAndrii Nakryiko 	if (!ASSERT_OK(err, "bpf_link__destroy"))
209c9d0bdefSKuniyuki Iwashima 		return -1;
210c9d0bdefSKuniyuki Iwashima 
211c9d0bdefSKuniyuki Iwashima 	test_case->link = NULL;
212c9d0bdefSKuniyuki Iwashima 
213c9d0bdefSKuniyuki Iwashima 	return 0;
214c9d0bdefSKuniyuki Iwashima }
215c9d0bdefSKuniyuki Iwashima 
start_servers(struct migrate_reuseport_test_case * test_case,struct test_migrate_reuseport * skel)216c9d0bdefSKuniyuki Iwashima static int start_servers(struct migrate_reuseport_test_case *test_case,
217c9d0bdefSKuniyuki Iwashima 			 struct test_migrate_reuseport *skel)
218c9d0bdefSKuniyuki Iwashima {
219c9d0bdefSKuniyuki Iwashima 	int i, err, prog_fd, reuseport = 1, qlen = QLEN;
220c9d0bdefSKuniyuki Iwashima 
221c9d0bdefSKuniyuki Iwashima 	prog_fd = bpf_program__fd(skel->progs.migrate_reuseport);
222c9d0bdefSKuniyuki Iwashima 
223c9d0bdefSKuniyuki Iwashima 	make_sockaddr(test_case->family,
224c9d0bdefSKuniyuki Iwashima 		      test_case->family == AF_INET ? "127.0.0.1" : "::1", 0,
225c9d0bdefSKuniyuki Iwashima 		      &test_case->addr, &test_case->addrlen);
226c9d0bdefSKuniyuki Iwashima 
227c9d0bdefSKuniyuki Iwashima 	for (i = 0; i < NR_SERVERS; i++) {
228c9d0bdefSKuniyuki Iwashima 		test_case->servers[i] = socket(test_case->family, SOCK_STREAM,
229c9d0bdefSKuniyuki Iwashima 					       IPPROTO_TCP);
230c9d0bdefSKuniyuki Iwashima 		if (!ASSERT_NEQ(test_case->servers[i], -1, "socket"))
231c9d0bdefSKuniyuki Iwashima 			return -1;
232c9d0bdefSKuniyuki Iwashima 
233c9d0bdefSKuniyuki Iwashima 		err = setsockopt(test_case->servers[i], SOL_SOCKET,
234c9d0bdefSKuniyuki Iwashima 				 SO_REUSEPORT, &reuseport, sizeof(reuseport));
235c9d0bdefSKuniyuki Iwashima 		if (!ASSERT_OK(err, "setsockopt - SO_REUSEPORT"))
236c9d0bdefSKuniyuki Iwashima 			return -1;
237c9d0bdefSKuniyuki Iwashima 
238c9d0bdefSKuniyuki Iwashima 		err = bind(test_case->servers[i],
239c9d0bdefSKuniyuki Iwashima 			   (struct sockaddr *)&test_case->addr,
240c9d0bdefSKuniyuki Iwashima 			   test_case->addrlen);
241c9d0bdefSKuniyuki Iwashima 		if (!ASSERT_OK(err, "bind"))
242c9d0bdefSKuniyuki Iwashima 			return -1;
243c9d0bdefSKuniyuki Iwashima 
244c9d0bdefSKuniyuki Iwashima 		if (i == 0) {
245c9d0bdefSKuniyuki Iwashima 			err = setsockopt(test_case->servers[i], SOL_SOCKET,
246c9d0bdefSKuniyuki Iwashima 					 SO_ATTACH_REUSEPORT_EBPF,
247c9d0bdefSKuniyuki Iwashima 					 &prog_fd, sizeof(prog_fd));
248c9d0bdefSKuniyuki Iwashima 			if (!ASSERT_OK(err,
249c9d0bdefSKuniyuki Iwashima 				       "setsockopt - SO_ATTACH_REUSEPORT_EBPF"))
250c9d0bdefSKuniyuki Iwashima 				return -1;
251c9d0bdefSKuniyuki Iwashima 
252c9d0bdefSKuniyuki Iwashima 			err = getsockname(test_case->servers[i],
253c9d0bdefSKuniyuki Iwashima 					  (struct sockaddr *)&test_case->addr,
254c9d0bdefSKuniyuki Iwashima 					  &test_case->addrlen);
255c9d0bdefSKuniyuki Iwashima 			if (!ASSERT_OK(err, "getsockname"))
256c9d0bdefSKuniyuki Iwashima 				return -1;
257c9d0bdefSKuniyuki Iwashima 		}
258c9d0bdefSKuniyuki Iwashima 
259c9d0bdefSKuniyuki Iwashima 		if (test_case->fastopen) {
260c9d0bdefSKuniyuki Iwashima 			err = setsockopt(test_case->servers[i],
261c9d0bdefSKuniyuki Iwashima 					 SOL_TCP, TCP_FASTOPEN,
262c9d0bdefSKuniyuki Iwashima 					 &qlen, sizeof(qlen));
263c9d0bdefSKuniyuki Iwashima 			if (!ASSERT_OK(err, "setsockopt - TCP_FASTOPEN"))
264c9d0bdefSKuniyuki Iwashima 				return -1;
265c9d0bdefSKuniyuki Iwashima 		}
266c9d0bdefSKuniyuki Iwashima 
267c9d0bdefSKuniyuki Iwashima 		/* All requests will be tied to the first four listeners */
268c9d0bdefSKuniyuki Iwashima 		if (i != MIGRATED_TO) {
269c9d0bdefSKuniyuki Iwashima 			err = listen(test_case->servers[i], qlen);
270c9d0bdefSKuniyuki Iwashima 			if (!ASSERT_OK(err, "listen"))
271c9d0bdefSKuniyuki Iwashima 				return -1;
272c9d0bdefSKuniyuki Iwashima 		}
273c9d0bdefSKuniyuki Iwashima 	}
274c9d0bdefSKuniyuki Iwashima 
275c9d0bdefSKuniyuki Iwashima 	return 0;
276c9d0bdefSKuniyuki Iwashima }
277c9d0bdefSKuniyuki Iwashima 
start_clients(struct migrate_reuseport_test_case * test_case)278c9d0bdefSKuniyuki Iwashima static int start_clients(struct migrate_reuseport_test_case *test_case)
279c9d0bdefSKuniyuki Iwashima {
280c9d0bdefSKuniyuki Iwashima 	char buf[MSGLEN] = MSG;
281c9d0bdefSKuniyuki Iwashima 	int i, err;
282c9d0bdefSKuniyuki Iwashima 
283c9d0bdefSKuniyuki Iwashima 	for (i = 0; i < NR_CLIENTS; i++) {
284c9d0bdefSKuniyuki Iwashima 		test_case->clients[i] = socket(test_case->family, SOCK_STREAM,
285c9d0bdefSKuniyuki Iwashima 					       IPPROTO_TCP);
286c9d0bdefSKuniyuki Iwashima 		if (!ASSERT_NEQ(test_case->clients[i], -1, "socket"))
287c9d0bdefSKuniyuki Iwashima 			return -1;
288c9d0bdefSKuniyuki Iwashima 
289c9d0bdefSKuniyuki Iwashima 		/* The attached XDP program drops only the final ACK, so
290c9d0bdefSKuniyuki Iwashima 		 * clients will transition to TCP_ESTABLISHED immediately.
291c9d0bdefSKuniyuki Iwashima 		 */
292c9d0bdefSKuniyuki Iwashima 		err = settimeo(test_case->clients[i], 100);
293c9d0bdefSKuniyuki Iwashima 		if (!ASSERT_OK(err, "settimeo"))
294c9d0bdefSKuniyuki Iwashima 			return -1;
295c9d0bdefSKuniyuki Iwashima 
296c9d0bdefSKuniyuki Iwashima 		if (test_case->fastopen) {
297c9d0bdefSKuniyuki Iwashima 			int fastopen = 1;
298c9d0bdefSKuniyuki Iwashima 
299c9d0bdefSKuniyuki Iwashima 			err = setsockopt(test_case->clients[i], IPPROTO_TCP,
300c9d0bdefSKuniyuki Iwashima 					 TCP_FASTOPEN_CONNECT, &fastopen,
301c9d0bdefSKuniyuki Iwashima 					 sizeof(fastopen));
302c9d0bdefSKuniyuki Iwashima 			if (!ASSERT_OK(err,
303c9d0bdefSKuniyuki Iwashima 				       "setsockopt - TCP_FASTOPEN_CONNECT"))
304c9d0bdefSKuniyuki Iwashima 				return -1;
305c9d0bdefSKuniyuki Iwashima 		}
306c9d0bdefSKuniyuki Iwashima 
307c9d0bdefSKuniyuki Iwashima 		err = connect(test_case->clients[i],
308c9d0bdefSKuniyuki Iwashima 			      (struct sockaddr *)&test_case->addr,
309c9d0bdefSKuniyuki Iwashima 			      test_case->addrlen);
310c9d0bdefSKuniyuki Iwashima 		if (!ASSERT_OK(err, "connect"))
311c9d0bdefSKuniyuki Iwashima 			return -1;
312c9d0bdefSKuniyuki Iwashima 
313c9d0bdefSKuniyuki Iwashima 		err = write(test_case->clients[i], buf, MSGLEN);
314c9d0bdefSKuniyuki Iwashima 		if (!ASSERT_EQ(err, MSGLEN, "write"))
315c9d0bdefSKuniyuki Iwashima 			return -1;
316c9d0bdefSKuniyuki Iwashima 	}
317c9d0bdefSKuniyuki Iwashima 
318c9d0bdefSKuniyuki Iwashima 	return 0;
319c9d0bdefSKuniyuki Iwashima }
320c9d0bdefSKuniyuki Iwashima 
update_maps(struct migrate_reuseport_test_case * test_case,struct test_migrate_reuseport * skel)321c9d0bdefSKuniyuki Iwashima static int update_maps(struct migrate_reuseport_test_case *test_case,
322c9d0bdefSKuniyuki Iwashima 		       struct test_migrate_reuseport *skel)
323c9d0bdefSKuniyuki Iwashima {
324c9d0bdefSKuniyuki Iwashima 	int i, err, migrated_to = MIGRATED_TO;
325c9d0bdefSKuniyuki Iwashima 	int reuseport_map_fd, migrate_map_fd;
326c9d0bdefSKuniyuki Iwashima 	__u64 value;
327c9d0bdefSKuniyuki Iwashima 
328c9d0bdefSKuniyuki Iwashima 	reuseport_map_fd = bpf_map__fd(skel->maps.reuseport_map);
329c9d0bdefSKuniyuki Iwashima 	migrate_map_fd = bpf_map__fd(skel->maps.migrate_map);
330c9d0bdefSKuniyuki Iwashima 
331c9d0bdefSKuniyuki Iwashima 	for (i = 0; i < NR_SERVERS; i++) {
332c9d0bdefSKuniyuki Iwashima 		value = (__u64)test_case->servers[i];
333c9d0bdefSKuniyuki Iwashima 		err = bpf_map_update_elem(reuseport_map_fd, &i, &value,
334c9d0bdefSKuniyuki Iwashima 					  BPF_NOEXIST);
335c9d0bdefSKuniyuki Iwashima 		if (!ASSERT_OK(err, "bpf_map_update_elem - reuseport_map"))
336c9d0bdefSKuniyuki Iwashima 			return -1;
337c9d0bdefSKuniyuki Iwashima 
338c9d0bdefSKuniyuki Iwashima 		err = bpf_map_lookup_elem(reuseport_map_fd, &i, &value);
339c9d0bdefSKuniyuki Iwashima 		if (!ASSERT_OK(err, "bpf_map_lookup_elem - reuseport_map"))
340c9d0bdefSKuniyuki Iwashima 			return -1;
341c9d0bdefSKuniyuki Iwashima 
342c9d0bdefSKuniyuki Iwashima 		err = bpf_map_update_elem(migrate_map_fd, &value, &migrated_to,
343c9d0bdefSKuniyuki Iwashima 					  BPF_NOEXIST);
344c9d0bdefSKuniyuki Iwashima 		if (!ASSERT_OK(err, "bpf_map_update_elem - migrate_map"))
345c9d0bdefSKuniyuki Iwashima 			return -1;
346c9d0bdefSKuniyuki Iwashima 	}
347c9d0bdefSKuniyuki Iwashima 
348c9d0bdefSKuniyuki Iwashima 	return 0;
349c9d0bdefSKuniyuki Iwashima }
350c9d0bdefSKuniyuki Iwashima 
migrate_dance(struct migrate_reuseport_test_case * test_case)351c9d0bdefSKuniyuki Iwashima static int migrate_dance(struct migrate_reuseport_test_case *test_case)
352c9d0bdefSKuniyuki Iwashima {
353c9d0bdefSKuniyuki Iwashima 	int i, err;
354c9d0bdefSKuniyuki Iwashima 
355c9d0bdefSKuniyuki Iwashima 	/* Migrate TCP_ESTABLISHED and TCP_SYN_RECV requests
356c9d0bdefSKuniyuki Iwashima 	 * to the last listener based on eBPF.
357c9d0bdefSKuniyuki Iwashima 	 */
358c9d0bdefSKuniyuki Iwashima 	for (i = 0; i < MIGRATED_TO; i++) {
359c9d0bdefSKuniyuki Iwashima 		err = shutdown(test_case->servers[i], SHUT_RDWR);
360c9d0bdefSKuniyuki Iwashima 		if (!ASSERT_OK(err, "shutdown"))
361c9d0bdefSKuniyuki Iwashima 			return -1;
362c9d0bdefSKuniyuki Iwashima 	}
363c9d0bdefSKuniyuki Iwashima 
364c9d0bdefSKuniyuki Iwashima 	/* No dance for TCP_NEW_SYN_RECV to migrate based on eBPF */
365c9d0bdefSKuniyuki Iwashima 	if (test_case->state == BPF_TCP_NEW_SYN_RECV)
366c9d0bdefSKuniyuki Iwashima 		return 0;
367c9d0bdefSKuniyuki Iwashima 
368c9d0bdefSKuniyuki Iwashima 	/* Note that we use the second listener instead of the
369c9d0bdefSKuniyuki Iwashima 	 * first one here.
370c9d0bdefSKuniyuki Iwashima 	 *
371c9d0bdefSKuniyuki Iwashima 	 * The fist listener is bind()ed with port 0 and,
372c9d0bdefSKuniyuki Iwashima 	 * SOCK_BINDPORT_LOCK is not set to sk_userlocks, so
373c9d0bdefSKuniyuki Iwashima 	 * calling listen() again will bind() the first listener
374c9d0bdefSKuniyuki Iwashima 	 * on a new ephemeral port and detach it from the existing
375c9d0bdefSKuniyuki Iwashima 	 * reuseport group.  (See: __inet_bind(), tcp_set_state())
376c9d0bdefSKuniyuki Iwashima 	 *
377c9d0bdefSKuniyuki Iwashima 	 * OTOH, the second one is bind()ed with a specific port,
378c9d0bdefSKuniyuki Iwashima 	 * and SOCK_BINDPORT_LOCK is set. Thus, re-listen() will
379c9d0bdefSKuniyuki Iwashima 	 * resurrect the listener on the existing reuseport group.
380c9d0bdefSKuniyuki Iwashima 	 */
381c9d0bdefSKuniyuki Iwashima 	err = listen(test_case->servers[1], QLEN);
382c9d0bdefSKuniyuki Iwashima 	if (!ASSERT_OK(err, "listen"))
383c9d0bdefSKuniyuki Iwashima 		return -1;
384c9d0bdefSKuniyuki Iwashima 
385c9d0bdefSKuniyuki Iwashima 	/* Migrate from the last listener to the second one.
386c9d0bdefSKuniyuki Iwashima 	 *
387c9d0bdefSKuniyuki Iwashima 	 * All listeners were detached out of the reuseport_map,
388c9d0bdefSKuniyuki Iwashima 	 * so migration will be done by kernel random pick from here.
389c9d0bdefSKuniyuki Iwashima 	 */
390c9d0bdefSKuniyuki Iwashima 	err = shutdown(test_case->servers[MIGRATED_TO], SHUT_RDWR);
391c9d0bdefSKuniyuki Iwashima 	if (!ASSERT_OK(err, "shutdown"))
392c9d0bdefSKuniyuki Iwashima 		return -1;
393c9d0bdefSKuniyuki Iwashima 
394c9d0bdefSKuniyuki Iwashima 	/* Back to the existing reuseport group */
395c9d0bdefSKuniyuki Iwashima 	err = listen(test_case->servers[MIGRATED_TO], QLEN);
396c9d0bdefSKuniyuki Iwashima 	if (!ASSERT_OK(err, "listen"))
397c9d0bdefSKuniyuki Iwashima 		return -1;
398c9d0bdefSKuniyuki Iwashima 
399c9d0bdefSKuniyuki Iwashima 	/* Migrate back to the last one from the second one */
400c9d0bdefSKuniyuki Iwashima 	err = shutdown(test_case->servers[1], SHUT_RDWR);
401c9d0bdefSKuniyuki Iwashima 	if (!ASSERT_OK(err, "shutdown"))
402c9d0bdefSKuniyuki Iwashima 		return -1;
403c9d0bdefSKuniyuki Iwashima 
404c9d0bdefSKuniyuki Iwashima 	return 0;
405c9d0bdefSKuniyuki Iwashima }
406c9d0bdefSKuniyuki Iwashima 
count_requests(struct migrate_reuseport_test_case * test_case,struct test_migrate_reuseport * skel)407c9d0bdefSKuniyuki Iwashima static void count_requests(struct migrate_reuseport_test_case *test_case,
408c9d0bdefSKuniyuki Iwashima 			   struct test_migrate_reuseport *skel)
409c9d0bdefSKuniyuki Iwashima {
410c9d0bdefSKuniyuki Iwashima 	struct sockaddr_storage addr;
411c9d0bdefSKuniyuki Iwashima 	socklen_t len = sizeof(addr);
412c9d0bdefSKuniyuki Iwashima 	int err, cnt = 0, client;
413c9d0bdefSKuniyuki Iwashima 	char buf[MSGLEN];
414c9d0bdefSKuniyuki Iwashima 
415c9d0bdefSKuniyuki Iwashima 	err = settimeo(test_case->servers[MIGRATED_TO], 4000);
416c9d0bdefSKuniyuki Iwashima 	if (!ASSERT_OK(err, "settimeo"))
417c9d0bdefSKuniyuki Iwashima 		goto out;
418c9d0bdefSKuniyuki Iwashima 
419c9d0bdefSKuniyuki Iwashima 	for (; cnt < NR_CLIENTS; cnt++) {
420c9d0bdefSKuniyuki Iwashima 		client = accept(test_case->servers[MIGRATED_TO],
421c9d0bdefSKuniyuki Iwashima 				(struct sockaddr *)&addr, &len);
422c9d0bdefSKuniyuki Iwashima 		if (!ASSERT_NEQ(client, -1, "accept"))
423c9d0bdefSKuniyuki Iwashima 			goto out;
424c9d0bdefSKuniyuki Iwashima 
425c9d0bdefSKuniyuki Iwashima 		memset(buf, 0, MSGLEN);
426c9d0bdefSKuniyuki Iwashima 		read(client, &buf, MSGLEN);
427c9d0bdefSKuniyuki Iwashima 		close(client);
428c9d0bdefSKuniyuki Iwashima 
429c9d0bdefSKuniyuki Iwashima 		if (!ASSERT_STREQ(buf, MSG, "read"))
430c9d0bdefSKuniyuki Iwashima 			goto out;
431c9d0bdefSKuniyuki Iwashima 	}
432c9d0bdefSKuniyuki Iwashima 
433c9d0bdefSKuniyuki Iwashima out:
434c9d0bdefSKuniyuki Iwashima 	ASSERT_EQ(cnt, NR_CLIENTS, "count in userspace");
435c9d0bdefSKuniyuki Iwashima 
436c9d0bdefSKuniyuki Iwashima 	switch (test_case->state) {
437c9d0bdefSKuniyuki Iwashima 	case BPF_TCP_ESTABLISHED:
438c9d0bdefSKuniyuki Iwashima 		cnt = skel->bss->migrated_at_close;
439c9d0bdefSKuniyuki Iwashima 		break;
440c9d0bdefSKuniyuki Iwashima 	case BPF_TCP_SYN_RECV:
441c9d0bdefSKuniyuki Iwashima 		cnt = skel->bss->migrated_at_close_fastopen;
442c9d0bdefSKuniyuki Iwashima 		break;
443c9d0bdefSKuniyuki Iwashima 	case BPF_TCP_NEW_SYN_RECV:
444c9d0bdefSKuniyuki Iwashima 		if (test_case->expire_synack_timer)
445c9d0bdefSKuniyuki Iwashima 			cnt = skel->bss->migrated_at_send_synack;
446c9d0bdefSKuniyuki Iwashima 		else
447c9d0bdefSKuniyuki Iwashima 			cnt = skel->bss->migrated_at_recv_ack;
448c9d0bdefSKuniyuki Iwashima 		break;
449c9d0bdefSKuniyuki Iwashima 	default:
450c9d0bdefSKuniyuki Iwashima 		cnt = 0;
451c9d0bdefSKuniyuki Iwashima 	}
452c9d0bdefSKuniyuki Iwashima 
453c9d0bdefSKuniyuki Iwashima 	ASSERT_EQ(cnt, NR_CLIENTS, "count in BPF prog");
454c9d0bdefSKuniyuki Iwashima }
455c9d0bdefSKuniyuki Iwashima 
run_test(struct migrate_reuseport_test_case * test_case,struct test_migrate_reuseport * skel)456c9d0bdefSKuniyuki Iwashima static void run_test(struct migrate_reuseport_test_case *test_case,
457c9d0bdefSKuniyuki Iwashima 		     struct test_migrate_reuseport *skel)
458c9d0bdefSKuniyuki Iwashima {
459c9d0bdefSKuniyuki Iwashima 	int err, saved_len;
460c9d0bdefSKuniyuki Iwashima 	char buf[16];
461c9d0bdefSKuniyuki Iwashima 
462c9d0bdefSKuniyuki Iwashima 	skel->bss->migrated_at_close = 0;
463c9d0bdefSKuniyuki Iwashima 	skel->bss->migrated_at_close_fastopen = 0;
464c9d0bdefSKuniyuki Iwashima 	skel->bss->migrated_at_send_synack = 0;
465c9d0bdefSKuniyuki Iwashima 	skel->bss->migrated_at_recv_ack = 0;
466c9d0bdefSKuniyuki Iwashima 
467c9d0bdefSKuniyuki Iwashima 	init_fds(test_case->servers, NR_SERVERS);
468c9d0bdefSKuniyuki Iwashima 	init_fds(test_case->clients, NR_CLIENTS);
469c9d0bdefSKuniyuki Iwashima 
470c9d0bdefSKuniyuki Iwashima 	if (test_case->fastopen) {
471c9d0bdefSKuniyuki Iwashima 		memset(buf, 0, sizeof(buf));
472c9d0bdefSKuniyuki Iwashima 
473c9d0bdefSKuniyuki Iwashima 		err = setup_fastopen(buf, sizeof(buf), &saved_len, false);
474c9d0bdefSKuniyuki Iwashima 		if (!ASSERT_OK(err, "setup_fastopen - setup"))
475c9d0bdefSKuniyuki Iwashima 			return;
476c9d0bdefSKuniyuki Iwashima 	}
477c9d0bdefSKuniyuki Iwashima 
478c9d0bdefSKuniyuki Iwashima 	err = start_servers(test_case, skel);
479c9d0bdefSKuniyuki Iwashima 	if (!ASSERT_OK(err, "start_servers"))
480c9d0bdefSKuniyuki Iwashima 		goto close_servers;
481c9d0bdefSKuniyuki Iwashima 
482c9d0bdefSKuniyuki Iwashima 	if (test_case->drop_ack) {
483c9d0bdefSKuniyuki Iwashima 		/* Drop the final ACK of the 3-way handshake and stick the
484c9d0bdefSKuniyuki Iwashima 		 * in-flight requests on TCP_SYN_RECV or TCP_NEW_SYN_RECV.
485c9d0bdefSKuniyuki Iwashima 		 */
486c9d0bdefSKuniyuki Iwashima 		err = drop_ack(test_case, skel);
487c9d0bdefSKuniyuki Iwashima 		if (!ASSERT_OK(err, "drop_ack"))
488c9d0bdefSKuniyuki Iwashima 			goto close_servers;
489c9d0bdefSKuniyuki Iwashima 	}
490c9d0bdefSKuniyuki Iwashima 
491*df71a42cSTaichi Nishimura 	/* Tie requests to the first four listeners */
492c9d0bdefSKuniyuki Iwashima 	err = start_clients(test_case);
493c9d0bdefSKuniyuki Iwashima 	if (!ASSERT_OK(err, "start_clients"))
494c9d0bdefSKuniyuki Iwashima 		goto close_clients;
495c9d0bdefSKuniyuki Iwashima 
496c9d0bdefSKuniyuki Iwashima 	err = listen(test_case->servers[MIGRATED_TO], QLEN);
497c9d0bdefSKuniyuki Iwashima 	if (!ASSERT_OK(err, "listen"))
498c9d0bdefSKuniyuki Iwashima 		goto close_clients;
499c9d0bdefSKuniyuki Iwashima 
500c9d0bdefSKuniyuki Iwashima 	err = update_maps(test_case, skel);
501c9d0bdefSKuniyuki Iwashima 	if (!ASSERT_OK(err, "fill_maps"))
502c9d0bdefSKuniyuki Iwashima 		goto close_clients;
503c9d0bdefSKuniyuki Iwashima 
504c9d0bdefSKuniyuki Iwashima 	/* Migrate the requests in the accept queue only.
505c9d0bdefSKuniyuki Iwashima 	 * TCP_NEW_SYN_RECV requests are not migrated at this point.
506c9d0bdefSKuniyuki Iwashima 	 */
507c9d0bdefSKuniyuki Iwashima 	err = migrate_dance(test_case);
508c9d0bdefSKuniyuki Iwashima 	if (!ASSERT_OK(err, "migrate_dance"))
509c9d0bdefSKuniyuki Iwashima 		goto close_clients;
510c9d0bdefSKuniyuki Iwashima 
511c9d0bdefSKuniyuki Iwashima 	if (test_case->expire_synack_timer) {
512c9d0bdefSKuniyuki Iwashima 		/* Wait for SYN+ACK timers to expire so that
513c9d0bdefSKuniyuki Iwashima 		 * reqsk_timer_handler() migrates TCP_NEW_SYN_RECV requests.
514c9d0bdefSKuniyuki Iwashima 		 */
515c9d0bdefSKuniyuki Iwashima 		sleep(1);
516c9d0bdefSKuniyuki Iwashima 	}
517c9d0bdefSKuniyuki Iwashima 
518c9d0bdefSKuniyuki Iwashima 	if (test_case->link) {
519c9d0bdefSKuniyuki Iwashima 		/* Resume 3WHS and migrate TCP_NEW_SYN_RECV requests */
520c9d0bdefSKuniyuki Iwashima 		err = pass_ack(test_case);
521c9d0bdefSKuniyuki Iwashima 		if (!ASSERT_OK(err, "pass_ack"))
522c9d0bdefSKuniyuki Iwashima 			goto close_clients;
523c9d0bdefSKuniyuki Iwashima 	}
524c9d0bdefSKuniyuki Iwashima 
525c9d0bdefSKuniyuki Iwashima 	count_requests(test_case, skel);
526c9d0bdefSKuniyuki Iwashima 
527c9d0bdefSKuniyuki Iwashima close_clients:
528c9d0bdefSKuniyuki Iwashima 	close_fds(test_case->clients, NR_CLIENTS);
529c9d0bdefSKuniyuki Iwashima 
530c9d0bdefSKuniyuki Iwashima 	if (test_case->link) {
531c9d0bdefSKuniyuki Iwashima 		err = pass_ack(test_case);
532c9d0bdefSKuniyuki Iwashima 		ASSERT_OK(err, "pass_ack - clean up");
533c9d0bdefSKuniyuki Iwashima 	}
534c9d0bdefSKuniyuki Iwashima 
535c9d0bdefSKuniyuki Iwashima close_servers:
536c9d0bdefSKuniyuki Iwashima 	close_fds(test_case->servers, NR_SERVERS);
537c9d0bdefSKuniyuki Iwashima 
538c9d0bdefSKuniyuki Iwashima 	if (test_case->fastopen) {
539c9d0bdefSKuniyuki Iwashima 		err = setup_fastopen(buf, sizeof(buf), &saved_len, true);
540c9d0bdefSKuniyuki Iwashima 		ASSERT_OK(err, "setup_fastopen - restore");
541c9d0bdefSKuniyuki Iwashima 	}
542c9d0bdefSKuniyuki Iwashima }
543c9d0bdefSKuniyuki Iwashima 
serial_test_migrate_reuseport(void)544d3f7b166SYucong Sun void serial_test_migrate_reuseport(void)
545c9d0bdefSKuniyuki Iwashima {
546c9d0bdefSKuniyuki Iwashima 	struct test_migrate_reuseport *skel;
547c9d0bdefSKuniyuki Iwashima 	int i;
548c9d0bdefSKuniyuki Iwashima 
549c9d0bdefSKuniyuki Iwashima 	skel = test_migrate_reuseport__open_and_load();
550c9d0bdefSKuniyuki Iwashima 	if (!ASSERT_OK_PTR(skel, "open_and_load"))
551c9d0bdefSKuniyuki Iwashima 		return;
552c9d0bdefSKuniyuki Iwashima 
553c9d0bdefSKuniyuki Iwashima 	for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
554c9d0bdefSKuniyuki Iwashima 		test__start_subtest(test_cases[i].name);
555c9d0bdefSKuniyuki Iwashima 		run_test(&test_cases[i], skel);
556c9d0bdefSKuniyuki Iwashima 	}
557c9d0bdefSKuniyuki Iwashima 
558c9d0bdefSKuniyuki Iwashima 	test_migrate_reuseport__destroy(skel);
559c9d0bdefSKuniyuki Iwashima }
560