1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2020 Cloudflare
3 /*
4  * Test suite for SOCKMAP/SOCKHASH holding listening sockets.
5  * Covers:
6  *  1. BPF map operations - bpf_map_{update,lookup delete}_elem
7  *  2. BPF redirect helpers - bpf_{sk,msg}_redirect_map
8  *  3. BPF reuseport helper - bpf_sk_select_reuseport
9  */
10 
11 #include <linux/compiler.h>
12 #include <errno.h>
13 #include <error.h>
14 #include <limits.h>
15 #include <netinet/in.h>
16 #include <pthread.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <sys/select.h>
20 #include <unistd.h>
21 
22 #include <bpf/bpf.h>
23 #include <bpf/libbpf.h>
24 
25 #include "bpf_util.h"
26 #include "test_progs.h"
27 #include "test_sockmap_listen.skel.h"
28 
29 #define IO_TIMEOUT_SEC 30
30 #define MAX_STRERR_LEN 256
31 #define MAX_TEST_NAME 80
32 
33 #define _FAIL(errnum, fmt...)                                                  \
34 	({                                                                     \
35 		error_at_line(0, (errnum), __func__, __LINE__, fmt);           \
36 		CHECK_FAIL(true);                                              \
37 	})
38 #define FAIL(fmt...) _FAIL(0, fmt)
39 #define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
40 #define FAIL_LIBBPF(err, msg)                                                  \
41 	({                                                                     \
42 		char __buf[MAX_STRERR_LEN];                                    \
43 		libbpf_strerror((err), __buf, sizeof(__buf));                  \
44 		FAIL("%s: %s", (msg), __buf);                                  \
45 	})
46 
47 /* Wrappers that fail the test on error and report it. */
48 
49 #define xaccept_nonblock(fd, addr, len)                                        \
50 	({                                                                     \
51 		int __ret =                                                    \
52 			accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC);   \
53 		if (__ret == -1)                                               \
54 			FAIL_ERRNO("accept");                                  \
55 		__ret;                                                         \
56 	})
57 
58 #define xbind(fd, addr, len)                                                   \
59 	({                                                                     \
60 		int __ret = bind((fd), (addr), (len));                         \
61 		if (__ret == -1)                                               \
62 			FAIL_ERRNO("bind");                                    \
63 		__ret;                                                         \
64 	})
65 
66 #define xclose(fd)                                                             \
67 	({                                                                     \
68 		int __ret = close((fd));                                       \
69 		if (__ret == -1)                                               \
70 			FAIL_ERRNO("close");                                   \
71 		__ret;                                                         \
72 	})
73 
74 #define xconnect(fd, addr, len)                                                \
75 	({                                                                     \
76 		int __ret = connect((fd), (addr), (len));                      \
77 		if (__ret == -1)                                               \
78 			FAIL_ERRNO("connect");                                 \
79 		__ret;                                                         \
80 	})
81 
82 #define xgetsockname(fd, addr, len)                                            \
83 	({                                                                     \
84 		int __ret = getsockname((fd), (addr), (len));                  \
85 		if (__ret == -1)                                               \
86 			FAIL_ERRNO("getsockname");                             \
87 		__ret;                                                         \
88 	})
89 
90 #define xgetsockopt(fd, level, name, val, len)                                 \
91 	({                                                                     \
92 		int __ret = getsockopt((fd), (level), (name), (val), (len));   \
93 		if (__ret == -1)                                               \
94 			FAIL_ERRNO("getsockopt(" #name ")");                   \
95 		__ret;                                                         \
96 	})
97 
98 #define xlisten(fd, backlog)                                                   \
99 	({                                                                     \
100 		int __ret = listen((fd), (backlog));                           \
101 		if (__ret == -1)                                               \
102 			FAIL_ERRNO("listen");                                  \
103 		__ret;                                                         \
104 	})
105 
106 #define xsetsockopt(fd, level, name, val, len)                                 \
107 	({                                                                     \
108 		int __ret = setsockopt((fd), (level), (name), (val), (len));   \
109 		if (__ret == -1)                                               \
110 			FAIL_ERRNO("setsockopt(" #name ")");                   \
111 		__ret;                                                         \
112 	})
113 
114 #define xsend(fd, buf, len, flags)                                             \
115 	({                                                                     \
116 		ssize_t __ret = send((fd), (buf), (len), (flags));             \
117 		if (__ret == -1)                                               \
118 			FAIL_ERRNO("send");                                    \
119 		__ret;                                                         \
120 	})
121 
122 #define xrecv_nonblock(fd, buf, len, flags)                                    \
123 	({                                                                     \
124 		ssize_t __ret = recv_timeout((fd), (buf), (len), (flags),      \
125 					     IO_TIMEOUT_SEC);                  \
126 		if (__ret == -1)                                               \
127 			FAIL_ERRNO("recv");                                    \
128 		__ret;                                                         \
129 	})
130 
131 #define xsocket(family, sotype, flags)                                         \
132 	({                                                                     \
133 		int __ret = socket(family, sotype, flags);                     \
134 		if (__ret == -1)                                               \
135 			FAIL_ERRNO("socket");                                  \
136 		__ret;                                                         \
137 	})
138 
139 #define xbpf_map_delete_elem(fd, key)                                          \
140 	({                                                                     \
141 		int __ret = bpf_map_delete_elem((fd), (key));                  \
142 		if (__ret < 0)                                               \
143 			FAIL_ERRNO("map_delete");                              \
144 		__ret;                                                         \
145 	})
146 
147 #define xbpf_map_lookup_elem(fd, key, val)                                     \
148 	({                                                                     \
149 		int __ret = bpf_map_lookup_elem((fd), (key), (val));           \
150 		if (__ret < 0)                                               \
151 			FAIL_ERRNO("map_lookup");                              \
152 		__ret;                                                         \
153 	})
154 
155 #define xbpf_map_update_elem(fd, key, val, flags)                              \
156 	({                                                                     \
157 		int __ret = bpf_map_update_elem((fd), (key), (val), (flags));  \
158 		if (__ret < 0)                                               \
159 			FAIL_ERRNO("map_update");                              \
160 		__ret;                                                         \
161 	})
162 
163 #define xbpf_prog_attach(prog, target, type, flags)                            \
164 	({                                                                     \
165 		int __ret =                                                    \
166 			bpf_prog_attach((prog), (target), (type), (flags));    \
167 		if (__ret < 0)                                               \
168 			FAIL_ERRNO("prog_attach(" #type ")");                  \
169 		__ret;                                                         \
170 	})
171 
172 #define xbpf_prog_detach2(prog, target, type)                                  \
173 	({                                                                     \
174 		int __ret = bpf_prog_detach2((prog), (target), (type));        \
175 		if (__ret < 0)                                               \
176 			FAIL_ERRNO("prog_detach2(" #type ")");                 \
177 		__ret;                                                         \
178 	})
179 
180 #define xpthread_create(thread, attr, func, arg)                               \
181 	({                                                                     \
182 		int __ret = pthread_create((thread), (attr), (func), (arg));   \
183 		errno = __ret;                                                 \
184 		if (__ret)                                                     \
185 			FAIL_ERRNO("pthread_create");                          \
186 		__ret;                                                         \
187 	})
188 
189 #define xpthread_join(thread, retval)                                          \
190 	({                                                                     \
191 		int __ret = pthread_join((thread), (retval));                  \
192 		errno = __ret;                                                 \
193 		if (__ret)                                                     \
194 			FAIL_ERRNO("pthread_join");                            \
195 		__ret;                                                         \
196 	})
197 
198 static int poll_read(int fd, unsigned int timeout_sec)
199 {
200 	struct timeval timeout = { .tv_sec = timeout_sec };
201 	fd_set rfds;
202 	int r;
203 
204 	FD_ZERO(&rfds);
205 	FD_SET(fd, &rfds);
206 
207 	r = select(fd + 1, &rfds, NULL, NULL, &timeout);
208 	if (r == 0)
209 		errno = ETIME;
210 
211 	return r == 1 ? 0 : -1;
212 }
213 
214 static int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
215 			  unsigned int timeout_sec)
216 {
217 	if (poll_read(fd, timeout_sec))
218 		return -1;
219 
220 	return accept(fd, addr, len);
221 }
222 
223 static int recv_timeout(int fd, void *buf, size_t len, int flags,
224 			unsigned int timeout_sec)
225 {
226 	if (poll_read(fd, timeout_sec))
227 		return -1;
228 
229 	return recv(fd, buf, len, flags);
230 }
231 
232 static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len)
233 {
234 	struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
235 
236 	addr4->sin_family = AF_INET;
237 	addr4->sin_port = 0;
238 	addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
239 	*len = sizeof(*addr4);
240 }
241 
242 static void init_addr_loopback6(struct sockaddr_storage *ss, socklen_t *len)
243 {
244 	struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
245 
246 	addr6->sin6_family = AF_INET6;
247 	addr6->sin6_port = 0;
248 	addr6->sin6_addr = in6addr_loopback;
249 	*len = sizeof(*addr6);
250 }
251 
252 static void init_addr_loopback(int family, struct sockaddr_storage *ss,
253 			       socklen_t *len)
254 {
255 	switch (family) {
256 	case AF_INET:
257 		init_addr_loopback4(ss, len);
258 		return;
259 	case AF_INET6:
260 		init_addr_loopback6(ss, len);
261 		return;
262 	default:
263 		FAIL("unsupported address family %d", family);
264 	}
265 }
266 
267 static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
268 {
269 	return (struct sockaddr *)ss;
270 }
271 
272 static int enable_reuseport(int s, int progfd)
273 {
274 	int err, one = 1;
275 
276 	err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
277 	if (err)
278 		return -1;
279 	err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
280 			  sizeof(progfd));
281 	if (err)
282 		return -1;
283 
284 	return 0;
285 }
286 
287 static int socket_loopback_reuseport(int family, int sotype, int progfd)
288 {
289 	struct sockaddr_storage addr;
290 	socklen_t len;
291 	int err, s;
292 
293 	init_addr_loopback(family, &addr, &len);
294 
295 	s = xsocket(family, sotype, 0);
296 	if (s == -1)
297 		return -1;
298 
299 	if (progfd >= 0)
300 		enable_reuseport(s, progfd);
301 
302 	err = xbind(s, sockaddr(&addr), len);
303 	if (err)
304 		goto close;
305 
306 	if (sotype & SOCK_DGRAM)
307 		return s;
308 
309 	err = xlisten(s, SOMAXCONN);
310 	if (err)
311 		goto close;
312 
313 	return s;
314 close:
315 	xclose(s);
316 	return -1;
317 }
318 
319 static int socket_loopback(int family, int sotype)
320 {
321 	return socket_loopback_reuseport(family, sotype, -1);
322 }
323 
324 static void test_insert_invalid(int family, int sotype, int mapfd)
325 {
326 	u32 key = 0;
327 	u64 value;
328 	int err;
329 
330 	value = -1;
331 	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
332 	if (!err || errno != EINVAL)
333 		FAIL_ERRNO("map_update: expected EINVAL");
334 
335 	value = INT_MAX;
336 	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
337 	if (!err || errno != EBADF)
338 		FAIL_ERRNO("map_update: expected EBADF");
339 }
340 
341 static void test_insert_opened(int family, int sotype, int mapfd)
342 {
343 	u32 key = 0;
344 	u64 value;
345 	int err, s;
346 
347 	s = xsocket(family, sotype, 0);
348 	if (s == -1)
349 		return;
350 
351 	errno = 0;
352 	value = s;
353 	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
354 	if (sotype == SOCK_STREAM) {
355 		if (!err || errno != EOPNOTSUPP)
356 			FAIL_ERRNO("map_update: expected EOPNOTSUPP");
357 	} else if (err)
358 		FAIL_ERRNO("map_update: expected success");
359 	xclose(s);
360 }
361 
362 static void test_insert_bound(int family, int sotype, int mapfd)
363 {
364 	struct sockaddr_storage addr;
365 	socklen_t len;
366 	u32 key = 0;
367 	u64 value;
368 	int err, s;
369 
370 	init_addr_loopback(family, &addr, &len);
371 
372 	s = xsocket(family, sotype, 0);
373 	if (s == -1)
374 		return;
375 
376 	err = xbind(s, sockaddr(&addr), len);
377 	if (err)
378 		goto close;
379 
380 	errno = 0;
381 	value = s;
382 	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
383 	if (!err || errno != EOPNOTSUPP)
384 		FAIL_ERRNO("map_update: expected EOPNOTSUPP");
385 close:
386 	xclose(s);
387 }
388 
389 static void test_insert(int family, int sotype, int mapfd)
390 {
391 	u64 value;
392 	u32 key;
393 	int s;
394 
395 	s = socket_loopback(family, sotype);
396 	if (s < 0)
397 		return;
398 
399 	key = 0;
400 	value = s;
401 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
402 	xclose(s);
403 }
404 
405 static void test_delete_after_insert(int family, int sotype, int mapfd)
406 {
407 	u64 value;
408 	u32 key;
409 	int s;
410 
411 	s = socket_loopback(family, sotype);
412 	if (s < 0)
413 		return;
414 
415 	key = 0;
416 	value = s;
417 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
418 	xbpf_map_delete_elem(mapfd, &key);
419 	xclose(s);
420 }
421 
422 static void test_delete_after_close(int family, int sotype, int mapfd)
423 {
424 	int err, s;
425 	u64 value;
426 	u32 key;
427 
428 	s = socket_loopback(family, sotype);
429 	if (s < 0)
430 		return;
431 
432 	key = 0;
433 	value = s;
434 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
435 
436 	xclose(s);
437 
438 	errno = 0;
439 	err = bpf_map_delete_elem(mapfd, &key);
440 	if (!err || (errno != EINVAL && errno != ENOENT))
441 		/* SOCKMAP and SOCKHASH return different error codes */
442 		FAIL_ERRNO("map_delete: expected EINVAL/EINVAL");
443 }
444 
445 static void test_lookup_after_insert(int family, int sotype, int mapfd)
446 {
447 	u64 cookie, value;
448 	socklen_t len;
449 	u32 key;
450 	int s;
451 
452 	s = socket_loopback(family, sotype);
453 	if (s < 0)
454 		return;
455 
456 	key = 0;
457 	value = s;
458 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
459 
460 	len = sizeof(cookie);
461 	xgetsockopt(s, SOL_SOCKET, SO_COOKIE, &cookie, &len);
462 
463 	xbpf_map_lookup_elem(mapfd, &key, &value);
464 
465 	if (value != cookie) {
466 		FAIL("map_lookup: have %#llx, want %#llx",
467 		     (unsigned long long)value, (unsigned long long)cookie);
468 	}
469 
470 	xclose(s);
471 }
472 
473 static void test_lookup_after_delete(int family, int sotype, int mapfd)
474 {
475 	int err, s;
476 	u64 value;
477 	u32 key;
478 
479 	s = socket_loopback(family, sotype);
480 	if (s < 0)
481 		return;
482 
483 	key = 0;
484 	value = s;
485 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
486 	xbpf_map_delete_elem(mapfd, &key);
487 
488 	errno = 0;
489 	err = bpf_map_lookup_elem(mapfd, &key, &value);
490 	if (!err || errno != ENOENT)
491 		FAIL_ERRNO("map_lookup: expected ENOENT");
492 
493 	xclose(s);
494 }
495 
496 static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
497 {
498 	u32 key, value32;
499 	int err, s;
500 
501 	s = socket_loopback(family, sotype);
502 	if (s < 0)
503 		return;
504 
505 	mapfd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP, sizeof(key),
506 			       sizeof(value32), 1, 0);
507 	if (mapfd < 0) {
508 		FAIL_ERRNO("map_create");
509 		goto close;
510 	}
511 
512 	key = 0;
513 	value32 = s;
514 	xbpf_map_update_elem(mapfd, &key, &value32, BPF_NOEXIST);
515 
516 	errno = 0;
517 	err = bpf_map_lookup_elem(mapfd, &key, &value32);
518 	if (!err || errno != ENOSPC)
519 		FAIL_ERRNO("map_lookup: expected ENOSPC");
520 
521 	xclose(mapfd);
522 close:
523 	xclose(s);
524 }
525 
526 static void test_update_existing(int family, int sotype, int mapfd)
527 {
528 	int s1, s2;
529 	u64 value;
530 	u32 key;
531 
532 	s1 = socket_loopback(family, sotype);
533 	if (s1 < 0)
534 		return;
535 
536 	s2 = socket_loopback(family, sotype);
537 	if (s2 < 0)
538 		goto close_s1;
539 
540 	key = 0;
541 	value = s1;
542 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
543 
544 	value = s2;
545 	xbpf_map_update_elem(mapfd, &key, &value, BPF_EXIST);
546 	xclose(s2);
547 close_s1:
548 	xclose(s1);
549 }
550 
551 /* Exercise the code path where we destroy child sockets that never
552  * got accept()'ed, aka orphans, when parent socket gets closed.
553  */
554 static void test_destroy_orphan_child(int family, int sotype, int mapfd)
555 {
556 	struct sockaddr_storage addr;
557 	socklen_t len;
558 	int err, s, c;
559 	u64 value;
560 	u32 key;
561 
562 	s = socket_loopback(family, sotype);
563 	if (s < 0)
564 		return;
565 
566 	len = sizeof(addr);
567 	err = xgetsockname(s, sockaddr(&addr), &len);
568 	if (err)
569 		goto close_srv;
570 
571 	key = 0;
572 	value = s;
573 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
574 
575 	c = xsocket(family, sotype, 0);
576 	if (c == -1)
577 		goto close_srv;
578 
579 	xconnect(c, sockaddr(&addr), len);
580 	xclose(c);
581 close_srv:
582 	xclose(s);
583 }
584 
585 /* Perform a passive open after removing listening socket from SOCKMAP
586  * to ensure that callbacks get restored properly.
587  */
588 static void test_clone_after_delete(int family, int sotype, int mapfd)
589 {
590 	struct sockaddr_storage addr;
591 	socklen_t len;
592 	int err, s, c;
593 	u64 value;
594 	u32 key;
595 
596 	s = socket_loopback(family, sotype);
597 	if (s < 0)
598 		return;
599 
600 	len = sizeof(addr);
601 	err = xgetsockname(s, sockaddr(&addr), &len);
602 	if (err)
603 		goto close_srv;
604 
605 	key = 0;
606 	value = s;
607 	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
608 	xbpf_map_delete_elem(mapfd, &key);
609 
610 	c = xsocket(family, sotype, 0);
611 	if (c < 0)
612 		goto close_srv;
613 
614 	xconnect(c, sockaddr(&addr), len);
615 	xclose(c);
616 close_srv:
617 	xclose(s);
618 }
619 
620 /* Check that child socket that got created while parent was in a
621  * SOCKMAP, but got accept()'ed only after the parent has been removed
622  * from SOCKMAP, gets cloned without parent psock state or callbacks.
623  */
624 static void test_accept_after_delete(int family, int sotype, int mapfd)
625 {
626 	struct sockaddr_storage addr;
627 	const u32 zero = 0;
628 	int err, s, c, p;
629 	socklen_t len;
630 	u64 value;
631 
632 	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
633 	if (s == -1)
634 		return;
635 
636 	len = sizeof(addr);
637 	err = xgetsockname(s, sockaddr(&addr), &len);
638 	if (err)
639 		goto close_srv;
640 
641 	value = s;
642 	err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
643 	if (err)
644 		goto close_srv;
645 
646 	c = xsocket(family, sotype, 0);
647 	if (c == -1)
648 		goto close_srv;
649 
650 	/* Create child while parent is in sockmap */
651 	err = xconnect(c, sockaddr(&addr), len);
652 	if (err)
653 		goto close_cli;
654 
655 	/* Remove parent from sockmap */
656 	err = xbpf_map_delete_elem(mapfd, &zero);
657 	if (err)
658 		goto close_cli;
659 
660 	p = xaccept_nonblock(s, NULL, NULL);
661 	if (p == -1)
662 		goto close_cli;
663 
664 	/* Check that child sk_user_data is not set */
665 	value = p;
666 	xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
667 
668 	xclose(p);
669 close_cli:
670 	xclose(c);
671 close_srv:
672 	xclose(s);
673 }
674 
675 /* Check that child socket that got created and accepted while parent
676  * was in a SOCKMAP is cloned without parent psock state or callbacks.
677  */
678 static void test_accept_before_delete(int family, int sotype, int mapfd)
679 {
680 	struct sockaddr_storage addr;
681 	const u32 zero = 0, one = 1;
682 	int err, s, c, p;
683 	socklen_t len;
684 	u64 value;
685 
686 	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
687 	if (s == -1)
688 		return;
689 
690 	len = sizeof(addr);
691 	err = xgetsockname(s, sockaddr(&addr), &len);
692 	if (err)
693 		goto close_srv;
694 
695 	value = s;
696 	err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
697 	if (err)
698 		goto close_srv;
699 
700 	c = xsocket(family, sotype, 0);
701 	if (c == -1)
702 		goto close_srv;
703 
704 	/* Create & accept child while parent is in sockmap */
705 	err = xconnect(c, sockaddr(&addr), len);
706 	if (err)
707 		goto close_cli;
708 
709 	p = xaccept_nonblock(s, NULL, NULL);
710 	if (p == -1)
711 		goto close_cli;
712 
713 	/* Check that child sk_user_data is not set */
714 	value = p;
715 	xbpf_map_update_elem(mapfd, &one, &value, BPF_NOEXIST);
716 
717 	xclose(p);
718 close_cli:
719 	xclose(c);
720 close_srv:
721 	xclose(s);
722 }
723 
724 struct connect_accept_ctx {
725 	int sockfd;
726 	unsigned int done;
727 	unsigned int nr_iter;
728 };
729 
730 static bool is_thread_done(struct connect_accept_ctx *ctx)
731 {
732 	return READ_ONCE(ctx->done);
733 }
734 
735 static void *connect_accept_thread(void *arg)
736 {
737 	struct connect_accept_ctx *ctx = arg;
738 	struct sockaddr_storage addr;
739 	int family, socktype;
740 	socklen_t len;
741 	int err, i, s;
742 
743 	s = ctx->sockfd;
744 
745 	len = sizeof(addr);
746 	err = xgetsockname(s, sockaddr(&addr), &len);
747 	if (err)
748 		goto done;
749 
750 	len = sizeof(family);
751 	err = xgetsockopt(s, SOL_SOCKET, SO_DOMAIN, &family, &len);
752 	if (err)
753 		goto done;
754 
755 	len = sizeof(socktype);
756 	err = xgetsockopt(s, SOL_SOCKET, SO_TYPE, &socktype, &len);
757 	if (err)
758 		goto done;
759 
760 	for (i = 0; i < ctx->nr_iter; i++) {
761 		int c, p;
762 
763 		c = xsocket(family, socktype, 0);
764 		if (c < 0)
765 			break;
766 
767 		err = xconnect(c, (struct sockaddr *)&addr, sizeof(addr));
768 		if (err) {
769 			xclose(c);
770 			break;
771 		}
772 
773 		p = xaccept_nonblock(s, NULL, NULL);
774 		if (p < 0) {
775 			xclose(c);
776 			break;
777 		}
778 
779 		xclose(p);
780 		xclose(c);
781 	}
782 done:
783 	WRITE_ONCE(ctx->done, 1);
784 	return NULL;
785 }
786 
787 static void test_syn_recv_insert_delete(int family, int sotype, int mapfd)
788 {
789 	struct connect_accept_ctx ctx = { 0 };
790 	struct sockaddr_storage addr;
791 	socklen_t len;
792 	u32 zero = 0;
793 	pthread_t t;
794 	int err, s;
795 	u64 value;
796 
797 	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
798 	if (s < 0)
799 		return;
800 
801 	len = sizeof(addr);
802 	err = xgetsockname(s, sockaddr(&addr), &len);
803 	if (err)
804 		goto close;
805 
806 	ctx.sockfd = s;
807 	ctx.nr_iter = 1000;
808 
809 	err = xpthread_create(&t, NULL, connect_accept_thread, &ctx);
810 	if (err)
811 		goto close;
812 
813 	value = s;
814 	while (!is_thread_done(&ctx)) {
815 		err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
816 		if (err)
817 			break;
818 
819 		err = xbpf_map_delete_elem(mapfd, &zero);
820 		if (err)
821 			break;
822 	}
823 
824 	xpthread_join(t, NULL);
825 close:
826 	xclose(s);
827 }
828 
829 static void *listen_thread(void *arg)
830 {
831 	struct sockaddr unspec = { AF_UNSPEC };
832 	struct connect_accept_ctx *ctx = arg;
833 	int err, i, s;
834 
835 	s = ctx->sockfd;
836 
837 	for (i = 0; i < ctx->nr_iter; i++) {
838 		err = xlisten(s, 1);
839 		if (err)
840 			break;
841 		err = xconnect(s, &unspec, sizeof(unspec));
842 		if (err)
843 			break;
844 	}
845 
846 	WRITE_ONCE(ctx->done, 1);
847 	return NULL;
848 }
849 
850 static void test_race_insert_listen(int family, int socktype, int mapfd)
851 {
852 	struct connect_accept_ctx ctx = { 0 };
853 	const u32 zero = 0;
854 	const int one = 1;
855 	pthread_t t;
856 	int err, s;
857 	u64 value;
858 
859 	s = xsocket(family, socktype, 0);
860 	if (s < 0)
861 		return;
862 
863 	err = xsetsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
864 	if (err)
865 		goto close;
866 
867 	ctx.sockfd = s;
868 	ctx.nr_iter = 10000;
869 
870 	err = pthread_create(&t, NULL, listen_thread, &ctx);
871 	if (err)
872 		goto close;
873 
874 	value = s;
875 	while (!is_thread_done(&ctx)) {
876 		err = bpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
877 		/* Expecting EOPNOTSUPP before listen() */
878 		if (err && errno != EOPNOTSUPP) {
879 			FAIL_ERRNO("map_update");
880 			break;
881 		}
882 
883 		err = bpf_map_delete_elem(mapfd, &zero);
884 		/* Expecting no entry after unhash on connect(AF_UNSPEC) */
885 		if (err && errno != EINVAL && errno != ENOENT) {
886 			FAIL_ERRNO("map_delete");
887 			break;
888 		}
889 	}
890 
891 	xpthread_join(t, NULL);
892 close:
893 	xclose(s);
894 }
895 
896 static void zero_verdict_count(int mapfd)
897 {
898 	unsigned int zero = 0;
899 	int key;
900 
901 	key = SK_DROP;
902 	xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
903 	key = SK_PASS;
904 	xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
905 }
906 
907 enum redir_mode {
908 	REDIR_INGRESS,
909 	REDIR_EGRESS,
910 };
911 
912 static const char *redir_mode_str(enum redir_mode mode)
913 {
914 	switch (mode) {
915 	case REDIR_INGRESS:
916 		return "ingress";
917 	case REDIR_EGRESS:
918 		return "egress";
919 	default:
920 		return "unknown";
921 	}
922 }
923 
924 static int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
925 {
926 	u64 value;
927 	u32 key;
928 	int err;
929 
930 	key = 0;
931 	value = fd1;
932 	err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
933 	if (err)
934 		return err;
935 
936 	key = 1;
937 	value = fd2;
938 	return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
939 }
940 
941 static void redir_to_connected(int family, int sotype, int sock_mapfd,
942 			       int verd_mapfd, enum redir_mode mode)
943 {
944 	const char *log_prefix = redir_mode_str(mode);
945 	struct sockaddr_storage addr;
946 	int s, c0, c1, p0, p1;
947 	unsigned int pass;
948 	socklen_t len;
949 	int err, n;
950 	u32 key;
951 	char b;
952 
953 	zero_verdict_count(verd_mapfd);
954 
955 	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
956 	if (s < 0)
957 		return;
958 
959 	len = sizeof(addr);
960 	err = xgetsockname(s, sockaddr(&addr), &len);
961 	if (err)
962 		goto close_srv;
963 
964 	c0 = xsocket(family, sotype, 0);
965 	if (c0 < 0)
966 		goto close_srv;
967 	err = xconnect(c0, sockaddr(&addr), len);
968 	if (err)
969 		goto close_cli0;
970 
971 	p0 = xaccept_nonblock(s, NULL, NULL);
972 	if (p0 < 0)
973 		goto close_cli0;
974 
975 	c1 = xsocket(family, sotype, 0);
976 	if (c1 < 0)
977 		goto close_peer0;
978 	err = xconnect(c1, sockaddr(&addr), len);
979 	if (err)
980 		goto close_cli1;
981 
982 	p1 = xaccept_nonblock(s, NULL, NULL);
983 	if (p1 < 0)
984 		goto close_cli1;
985 
986 	err = add_to_sockmap(sock_mapfd, p0, p1);
987 	if (err)
988 		goto close_peer1;
989 
990 	n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1);
991 	if (n < 0)
992 		FAIL_ERRNO("%s: write", log_prefix);
993 	if (n == 0)
994 		FAIL("%s: incomplete write", log_prefix);
995 	if (n < 1)
996 		goto close_peer1;
997 
998 	key = SK_PASS;
999 	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1000 	if (err)
1001 		goto close_peer1;
1002 	if (pass != 1)
1003 		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1004 	n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC);
1005 	if (n < 0)
1006 		FAIL_ERRNO("%s: recv_timeout", log_prefix);
1007 	if (n == 0)
1008 		FAIL("%s: incomplete recv", log_prefix);
1009 
1010 close_peer1:
1011 	xclose(p1);
1012 close_cli1:
1013 	xclose(c1);
1014 close_peer0:
1015 	xclose(p0);
1016 close_cli0:
1017 	xclose(c0);
1018 close_srv:
1019 	xclose(s);
1020 }
1021 
1022 static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
1023 					struct bpf_map *inner_map, int family,
1024 					int sotype)
1025 {
1026 	int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
1027 	int parser = bpf_program__fd(skel->progs.prog_stream_parser);
1028 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1029 	int sock_map = bpf_map__fd(inner_map);
1030 	int err;
1031 
1032 	err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
1033 	if (err)
1034 		return;
1035 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
1036 	if (err)
1037 		goto detach;
1038 
1039 	redir_to_connected(family, sotype, sock_map, verdict_map,
1040 			   REDIR_INGRESS);
1041 
1042 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
1043 detach:
1044 	xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
1045 }
1046 
1047 static void test_msg_redir_to_connected(struct test_sockmap_listen *skel,
1048 					struct bpf_map *inner_map, int family,
1049 					int sotype)
1050 {
1051 	int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
1052 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1053 	int sock_map = bpf_map__fd(inner_map);
1054 	int err;
1055 
1056 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
1057 	if (err)
1058 		return;
1059 
1060 	redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
1061 
1062 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
1063 }
1064 
1065 static void redir_to_listening(int family, int sotype, int sock_mapfd,
1066 			       int verd_mapfd, enum redir_mode mode)
1067 {
1068 	const char *log_prefix = redir_mode_str(mode);
1069 	struct sockaddr_storage addr;
1070 	int s, c, p, err, n;
1071 	unsigned int drop;
1072 	socklen_t len;
1073 	u32 key;
1074 
1075 	zero_verdict_count(verd_mapfd);
1076 
1077 	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
1078 	if (s < 0)
1079 		return;
1080 
1081 	len = sizeof(addr);
1082 	err = xgetsockname(s, sockaddr(&addr), &len);
1083 	if (err)
1084 		goto close_srv;
1085 
1086 	c = xsocket(family, sotype, 0);
1087 	if (c < 0)
1088 		goto close_srv;
1089 	err = xconnect(c, sockaddr(&addr), len);
1090 	if (err)
1091 		goto close_cli;
1092 
1093 	p = xaccept_nonblock(s, NULL, NULL);
1094 	if (p < 0)
1095 		goto close_cli;
1096 
1097 	err = add_to_sockmap(sock_mapfd, s, p);
1098 	if (err)
1099 		goto close_peer;
1100 
1101 	n = write(mode == REDIR_INGRESS ? c : p, "a", 1);
1102 	if (n < 0 && errno != EACCES)
1103 		FAIL_ERRNO("%s: write", log_prefix);
1104 	if (n == 0)
1105 		FAIL("%s: incomplete write", log_prefix);
1106 	if (n < 1)
1107 		goto close_peer;
1108 
1109 	key = SK_DROP;
1110 	err = xbpf_map_lookup_elem(verd_mapfd, &key, &drop);
1111 	if (err)
1112 		goto close_peer;
1113 	if (drop != 1)
1114 		FAIL("%s: want drop count 1, have %d", log_prefix, drop);
1115 
1116 close_peer:
1117 	xclose(p);
1118 close_cli:
1119 	xclose(c);
1120 close_srv:
1121 	xclose(s);
1122 }
1123 
1124 static void test_skb_redir_to_listening(struct test_sockmap_listen *skel,
1125 					struct bpf_map *inner_map, int family,
1126 					int sotype)
1127 {
1128 	int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
1129 	int parser = bpf_program__fd(skel->progs.prog_stream_parser);
1130 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1131 	int sock_map = bpf_map__fd(inner_map);
1132 	int err;
1133 
1134 	err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
1135 	if (err)
1136 		return;
1137 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
1138 	if (err)
1139 		goto detach;
1140 
1141 	redir_to_listening(family, sotype, sock_map, verdict_map,
1142 			   REDIR_INGRESS);
1143 
1144 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
1145 detach:
1146 	xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
1147 }
1148 
1149 static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
1150 					struct bpf_map *inner_map, int family,
1151 					int sotype)
1152 {
1153 	int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
1154 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1155 	int sock_map = bpf_map__fd(inner_map);
1156 	int err;
1157 
1158 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
1159 	if (err)
1160 		return;
1161 
1162 	redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
1163 
1164 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
1165 }
1166 
1167 static void test_reuseport_select_listening(int family, int sotype,
1168 					    int sock_map, int verd_map,
1169 					    int reuseport_prog)
1170 {
1171 	struct sockaddr_storage addr;
1172 	unsigned int pass;
1173 	int s, c, err;
1174 	socklen_t len;
1175 	u64 value;
1176 	u32 key;
1177 
1178 	zero_verdict_count(verd_map);
1179 
1180 	s = socket_loopback_reuseport(family, sotype | SOCK_NONBLOCK,
1181 				      reuseport_prog);
1182 	if (s < 0)
1183 		return;
1184 
1185 	len = sizeof(addr);
1186 	err = xgetsockname(s, sockaddr(&addr), &len);
1187 	if (err)
1188 		goto close_srv;
1189 
1190 	key = 0;
1191 	value = s;
1192 	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
1193 	if (err)
1194 		goto close_srv;
1195 
1196 	c = xsocket(family, sotype, 0);
1197 	if (c < 0)
1198 		goto close_srv;
1199 	err = xconnect(c, sockaddr(&addr), len);
1200 	if (err)
1201 		goto close_cli;
1202 
1203 	if (sotype == SOCK_STREAM) {
1204 		int p;
1205 
1206 		p = xaccept_nonblock(s, NULL, NULL);
1207 		if (p < 0)
1208 			goto close_cli;
1209 		xclose(p);
1210 	} else {
1211 		char b = 'a';
1212 		ssize_t n;
1213 
1214 		n = xsend(c, &b, sizeof(b), 0);
1215 		if (n == -1)
1216 			goto close_cli;
1217 
1218 		n = xrecv_nonblock(s, &b, sizeof(b), 0);
1219 		if (n == -1)
1220 			goto close_cli;
1221 	}
1222 
1223 	key = SK_PASS;
1224 	err = xbpf_map_lookup_elem(verd_map, &key, &pass);
1225 	if (err)
1226 		goto close_cli;
1227 	if (pass != 1)
1228 		FAIL("want pass count 1, have %d", pass);
1229 
1230 close_cli:
1231 	xclose(c);
1232 close_srv:
1233 	xclose(s);
1234 }
1235 
1236 static void test_reuseport_select_connected(int family, int sotype,
1237 					    int sock_map, int verd_map,
1238 					    int reuseport_prog)
1239 {
1240 	struct sockaddr_storage addr;
1241 	int s, c0, c1, p0, err;
1242 	unsigned int drop;
1243 	socklen_t len;
1244 	u64 value;
1245 	u32 key;
1246 
1247 	zero_verdict_count(verd_map);
1248 
1249 	s = socket_loopback_reuseport(family, sotype, reuseport_prog);
1250 	if (s < 0)
1251 		return;
1252 
1253 	/* Populate sock_map[0] to avoid ENOENT on first connection */
1254 	key = 0;
1255 	value = s;
1256 	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
1257 	if (err)
1258 		goto close_srv;
1259 
1260 	len = sizeof(addr);
1261 	err = xgetsockname(s, sockaddr(&addr), &len);
1262 	if (err)
1263 		goto close_srv;
1264 
1265 	c0 = xsocket(family, sotype, 0);
1266 	if (c0 < 0)
1267 		goto close_srv;
1268 
1269 	err = xconnect(c0, sockaddr(&addr), len);
1270 	if (err)
1271 		goto close_cli0;
1272 
1273 	if (sotype == SOCK_STREAM) {
1274 		p0 = xaccept_nonblock(s, NULL, NULL);
1275 		if (p0 < 0)
1276 			goto close_cli0;
1277 	} else {
1278 		p0 = xsocket(family, sotype, 0);
1279 		if (p0 < 0)
1280 			goto close_cli0;
1281 
1282 		len = sizeof(addr);
1283 		err = xgetsockname(c0, sockaddr(&addr), &len);
1284 		if (err)
1285 			goto close_cli0;
1286 
1287 		err = xconnect(p0, sockaddr(&addr), len);
1288 		if (err)
1289 			goto close_cli0;
1290 	}
1291 
1292 	/* Update sock_map[0] to redirect to a connected socket */
1293 	key = 0;
1294 	value = p0;
1295 	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_EXIST);
1296 	if (err)
1297 		goto close_peer0;
1298 
1299 	c1 = xsocket(family, sotype, 0);
1300 	if (c1 < 0)
1301 		goto close_peer0;
1302 
1303 	len = sizeof(addr);
1304 	err = xgetsockname(s, sockaddr(&addr), &len);
1305 	if (err)
1306 		goto close_srv;
1307 
1308 	errno = 0;
1309 	err = connect(c1, sockaddr(&addr), len);
1310 	if (sotype == SOCK_DGRAM) {
1311 		char b = 'a';
1312 		ssize_t n;
1313 
1314 		n = xsend(c1, &b, sizeof(b), 0);
1315 		if (n == -1)
1316 			goto close_cli1;
1317 
1318 		n = recv_timeout(c1, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
1319 		err = n == -1;
1320 	}
1321 	if (!err || errno != ECONNREFUSED)
1322 		FAIL_ERRNO("connect: expected ECONNREFUSED");
1323 
1324 	key = SK_DROP;
1325 	err = xbpf_map_lookup_elem(verd_map, &key, &drop);
1326 	if (err)
1327 		goto close_cli1;
1328 	if (drop != 1)
1329 		FAIL("want drop count 1, have %d", drop);
1330 
1331 close_cli1:
1332 	xclose(c1);
1333 close_peer0:
1334 	xclose(p0);
1335 close_cli0:
1336 	xclose(c0);
1337 close_srv:
1338 	xclose(s);
1339 }
1340 
1341 /* Check that redirecting across reuseport groups is not allowed. */
1342 static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
1343 					int verd_map, int reuseport_prog)
1344 {
1345 	struct sockaddr_storage addr;
1346 	int s1, s2, c, err;
1347 	unsigned int drop;
1348 	socklen_t len;
1349 	u32 key;
1350 
1351 	zero_verdict_count(verd_map);
1352 
1353 	/* Create two listeners, each in its own reuseport group */
1354 	s1 = socket_loopback_reuseport(family, sotype, reuseport_prog);
1355 	if (s1 < 0)
1356 		return;
1357 
1358 	s2 = socket_loopback_reuseport(family, sotype, reuseport_prog);
1359 	if (s2 < 0)
1360 		goto close_srv1;
1361 
1362 	err = add_to_sockmap(sock_map, s1, s2);
1363 	if (err)
1364 		goto close_srv2;
1365 
1366 	/* Connect to s2, reuseport BPF selects s1 via sock_map[0] */
1367 	len = sizeof(addr);
1368 	err = xgetsockname(s2, sockaddr(&addr), &len);
1369 	if (err)
1370 		goto close_srv2;
1371 
1372 	c = xsocket(family, sotype, 0);
1373 	if (c < 0)
1374 		goto close_srv2;
1375 
1376 	err = connect(c, sockaddr(&addr), len);
1377 	if (sotype == SOCK_DGRAM) {
1378 		char b = 'a';
1379 		ssize_t n;
1380 
1381 		n = xsend(c, &b, sizeof(b), 0);
1382 		if (n == -1)
1383 			goto close_cli;
1384 
1385 		n = recv_timeout(c, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
1386 		err = n == -1;
1387 	}
1388 	if (!err || errno != ECONNREFUSED) {
1389 		FAIL_ERRNO("connect: expected ECONNREFUSED");
1390 		goto close_cli;
1391 	}
1392 
1393 	/* Expect drop, can't redirect outside of reuseport group */
1394 	key = SK_DROP;
1395 	err = xbpf_map_lookup_elem(verd_map, &key, &drop);
1396 	if (err)
1397 		goto close_cli;
1398 	if (drop != 1)
1399 		FAIL("want drop count 1, have %d", drop);
1400 
1401 close_cli:
1402 	xclose(c);
1403 close_srv2:
1404 	xclose(s2);
1405 close_srv1:
1406 	xclose(s1);
1407 }
1408 
1409 #define TEST(fn, ...)                                                          \
1410 	{                                                                      \
1411 		fn, #fn, __VA_ARGS__                                           \
1412 	}
1413 
1414 static void test_ops_cleanup(const struct bpf_map *map)
1415 {
1416 	const struct bpf_map_def *def;
1417 	int err, mapfd;
1418 	u32 key;
1419 
1420 	def = bpf_map__def(map);
1421 	mapfd = bpf_map__fd(map);
1422 
1423 	for (key = 0; key < def->max_entries; key++) {
1424 		err = bpf_map_delete_elem(mapfd, &key);
1425 		if (err && errno != EINVAL && errno != ENOENT)
1426 			FAIL_ERRNO("map_delete: expected EINVAL/ENOENT");
1427 	}
1428 }
1429 
1430 static const char *family_str(sa_family_t family)
1431 {
1432 	switch (family) {
1433 	case AF_INET:
1434 		return "IPv4";
1435 	case AF_INET6:
1436 		return "IPv6";
1437 	case AF_UNIX:
1438 		return "Unix";
1439 	default:
1440 		return "unknown";
1441 	}
1442 }
1443 
1444 static const char *map_type_str(const struct bpf_map *map)
1445 {
1446 	const struct bpf_map_def *def;
1447 
1448 	def = bpf_map__def(map);
1449 	if (IS_ERR(def))
1450 		return "invalid";
1451 
1452 	switch (def->type) {
1453 	case BPF_MAP_TYPE_SOCKMAP:
1454 		return "sockmap";
1455 	case BPF_MAP_TYPE_SOCKHASH:
1456 		return "sockhash";
1457 	default:
1458 		return "unknown";
1459 	}
1460 }
1461 
1462 static const char *sotype_str(int sotype)
1463 {
1464 	switch (sotype) {
1465 	case SOCK_DGRAM:
1466 		return "UDP";
1467 	case SOCK_STREAM:
1468 		return "TCP";
1469 	default:
1470 		return "unknown";
1471 	}
1472 }
1473 
1474 static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
1475 		     int family, int sotype)
1476 {
1477 	const struct op_test {
1478 		void (*fn)(int family, int sotype, int mapfd);
1479 		const char *name;
1480 		int sotype;
1481 	} tests[] = {
1482 		/* insert */
1483 		TEST(test_insert_invalid),
1484 		TEST(test_insert_opened),
1485 		TEST(test_insert_bound, SOCK_STREAM),
1486 		TEST(test_insert),
1487 		/* delete */
1488 		TEST(test_delete_after_insert),
1489 		TEST(test_delete_after_close),
1490 		/* lookup */
1491 		TEST(test_lookup_after_insert),
1492 		TEST(test_lookup_after_delete),
1493 		TEST(test_lookup_32_bit_value),
1494 		/* update */
1495 		TEST(test_update_existing),
1496 		/* races with insert/delete */
1497 		TEST(test_destroy_orphan_child, SOCK_STREAM),
1498 		TEST(test_syn_recv_insert_delete, SOCK_STREAM),
1499 		TEST(test_race_insert_listen, SOCK_STREAM),
1500 		/* child clone */
1501 		TEST(test_clone_after_delete, SOCK_STREAM),
1502 		TEST(test_accept_after_delete, SOCK_STREAM),
1503 		TEST(test_accept_before_delete, SOCK_STREAM),
1504 	};
1505 	const char *family_name, *map_name, *sotype_name;
1506 	const struct op_test *t;
1507 	char s[MAX_TEST_NAME];
1508 	int map_fd;
1509 
1510 	family_name = family_str(family);
1511 	map_name = map_type_str(map);
1512 	sotype_name = sotype_str(sotype);
1513 	map_fd = bpf_map__fd(map);
1514 
1515 	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1516 		snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
1517 			 sotype_name, t->name);
1518 
1519 		if (t->sotype != 0 && t->sotype != sotype)
1520 			continue;
1521 
1522 		if (!test__start_subtest(s))
1523 			continue;
1524 
1525 		t->fn(family, sotype, map_fd);
1526 		test_ops_cleanup(map);
1527 	}
1528 }
1529 
1530 static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1531 		       int family, int sotype)
1532 {
1533 	const struct redir_test {
1534 		void (*fn)(struct test_sockmap_listen *skel,
1535 			   struct bpf_map *map, int family, int sotype);
1536 		const char *name;
1537 	} tests[] = {
1538 		TEST(test_skb_redir_to_connected),
1539 		TEST(test_skb_redir_to_listening),
1540 		TEST(test_msg_redir_to_connected),
1541 		TEST(test_msg_redir_to_listening),
1542 	};
1543 	const char *family_name, *map_name;
1544 	const struct redir_test *t;
1545 	char s[MAX_TEST_NAME];
1546 
1547 	family_name = family_str(family);
1548 	map_name = map_type_str(map);
1549 
1550 	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1551 		snprintf(s, sizeof(s), "%s %s %s", map_name, family_name,
1552 			 t->name);
1553 
1554 		if (!test__start_subtest(s))
1555 			continue;
1556 
1557 		t->fn(skel, map, family, sotype);
1558 	}
1559 }
1560 
1561 static void unix_redir_to_connected(int sotype, int sock_mapfd,
1562 			       int verd_mapfd, enum redir_mode mode)
1563 {
1564 	const char *log_prefix = redir_mode_str(mode);
1565 	int c0, c1, p0, p1;
1566 	unsigned int pass;
1567 	int err, n;
1568 	int sfd[2];
1569 	u32 key;
1570 	char b;
1571 
1572 	zero_verdict_count(verd_mapfd);
1573 
1574 	if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
1575 		return;
1576 	c0 = sfd[0], p0 = sfd[1];
1577 
1578 	if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
1579 		goto close0;
1580 	c1 = sfd[0], p1 = sfd[1];
1581 
1582 	err = add_to_sockmap(sock_mapfd, p0, p1);
1583 	if (err)
1584 		goto close;
1585 
1586 	n = write(c1, "a", 1);
1587 	if (n < 0)
1588 		FAIL_ERRNO("%s: write", log_prefix);
1589 	if (n == 0)
1590 		FAIL("%s: incomplete write", log_prefix);
1591 	if (n < 1)
1592 		goto close;
1593 
1594 	key = SK_PASS;
1595 	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1596 	if (err)
1597 		goto close;
1598 	if (pass != 1)
1599 		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1600 
1601 	n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
1602 	if (n < 0)
1603 		FAIL_ERRNO("%s: recv_timeout", log_prefix);
1604 	if (n == 0)
1605 		FAIL("%s: incomplete recv", log_prefix);
1606 
1607 close:
1608 	xclose(c1);
1609 	xclose(p1);
1610 close0:
1611 	xclose(c0);
1612 	xclose(p0);
1613 }
1614 
1615 static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
1616 					struct bpf_map *inner_map, int sotype)
1617 {
1618 	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1619 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1620 	int sock_map = bpf_map__fd(inner_map);
1621 	int err;
1622 
1623 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1624 	if (err)
1625 		return;
1626 
1627 	skel->bss->test_ingress = false;
1628 	unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS);
1629 	skel->bss->test_ingress = true;
1630 	unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS);
1631 
1632 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1633 }
1634 
1635 static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1636 			    int sotype)
1637 {
1638 	const char *family_name, *map_name;
1639 	char s[MAX_TEST_NAME];
1640 
1641 	family_name = family_str(AF_UNIX);
1642 	map_name = map_type_str(map);
1643 	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1644 	if (!test__start_subtest(s))
1645 		return;
1646 	unix_skb_redir_to_connected(skel, map, sotype);
1647 }
1648 
1649 static void test_reuseport(struct test_sockmap_listen *skel,
1650 			   struct bpf_map *map, int family, int sotype)
1651 {
1652 	const struct reuseport_test {
1653 		void (*fn)(int family, int sotype, int socket_map,
1654 			   int verdict_map, int reuseport_prog);
1655 		const char *name;
1656 		int sotype;
1657 	} tests[] = {
1658 		TEST(test_reuseport_select_listening),
1659 		TEST(test_reuseport_select_connected),
1660 		TEST(test_reuseport_mixed_groups),
1661 	};
1662 	int socket_map, verdict_map, reuseport_prog;
1663 	const char *family_name, *map_name, *sotype_name;
1664 	const struct reuseport_test *t;
1665 	char s[MAX_TEST_NAME];
1666 
1667 	family_name = family_str(family);
1668 	map_name = map_type_str(map);
1669 	sotype_name = sotype_str(sotype);
1670 
1671 	socket_map = bpf_map__fd(map);
1672 	verdict_map = bpf_map__fd(skel->maps.verdict_map);
1673 	reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport);
1674 
1675 	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1676 		snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
1677 			 sotype_name, t->name);
1678 
1679 		if (t->sotype != 0 && t->sotype != sotype)
1680 			continue;
1681 
1682 		if (!test__start_subtest(s))
1683 			continue;
1684 
1685 		t->fn(family, sotype, socket_map, verdict_map, reuseport_prog);
1686 	}
1687 }
1688 
1689 static int inet_socketpair(int family, int type, int *s, int *c)
1690 {
1691 	struct sockaddr_storage addr;
1692 	socklen_t len;
1693 	int p0, c0;
1694 	int err;
1695 
1696 	p0 = socket_loopback(family, type | SOCK_NONBLOCK);
1697 	if (p0 < 0)
1698 		return p0;
1699 
1700 	len = sizeof(addr);
1701 	err = xgetsockname(p0, sockaddr(&addr), &len);
1702 	if (err)
1703 		goto close_peer0;
1704 
1705 	c0 = xsocket(family, type | SOCK_NONBLOCK, 0);
1706 	if (c0 < 0) {
1707 		err = c0;
1708 		goto close_peer0;
1709 	}
1710 	err = xconnect(c0, sockaddr(&addr), len);
1711 	if (err)
1712 		goto close_cli0;
1713 	err = xgetsockname(c0, sockaddr(&addr), &len);
1714 	if (err)
1715 		goto close_cli0;
1716 	err = xconnect(p0, sockaddr(&addr), len);
1717 	if (err)
1718 		goto close_cli0;
1719 
1720 	*s = p0;
1721 	*c = c0;
1722 	return 0;
1723 
1724 close_cli0:
1725 	xclose(c0);
1726 close_peer0:
1727 	xclose(p0);
1728 	return err;
1729 }
1730 
1731 static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
1732 				   enum redir_mode mode)
1733 {
1734 	const char *log_prefix = redir_mode_str(mode);
1735 	int c0, c1, p0, p1;
1736 	unsigned int pass;
1737 	int err, n;
1738 	u32 key;
1739 	char b;
1740 
1741 	zero_verdict_count(verd_mapfd);
1742 
1743 	err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
1744 	if (err)
1745 		return;
1746 	err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
1747 	if (err)
1748 		goto close_cli0;
1749 
1750 	err = add_to_sockmap(sock_mapfd, p0, p1);
1751 	if (err)
1752 		goto close_cli1;
1753 
1754 	n = write(c1, "a", 1);
1755 	if (n < 0)
1756 		FAIL_ERRNO("%s: write", log_prefix);
1757 	if (n == 0)
1758 		FAIL("%s: incomplete write", log_prefix);
1759 	if (n < 1)
1760 		goto close_cli1;
1761 
1762 	key = SK_PASS;
1763 	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1764 	if (err)
1765 		goto close_cli1;
1766 	if (pass != 1)
1767 		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1768 
1769 	n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
1770 	if (n < 0)
1771 		FAIL_ERRNO("%s: recv_timeout", log_prefix);
1772 	if (n == 0)
1773 		FAIL("%s: incomplete recv", log_prefix);
1774 
1775 close_cli1:
1776 	xclose(c1);
1777 	xclose(p1);
1778 close_cli0:
1779 	xclose(c0);
1780 	xclose(p0);
1781 }
1782 
1783 static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
1784 				       struct bpf_map *inner_map, int family)
1785 {
1786 	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1787 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1788 	int sock_map = bpf_map__fd(inner_map);
1789 	int err;
1790 
1791 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1792 	if (err)
1793 		return;
1794 
1795 	skel->bss->test_ingress = false;
1796 	udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
1797 	skel->bss->test_ingress = true;
1798 	udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
1799 
1800 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1801 }
1802 
1803 static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1804 			   int family)
1805 {
1806 	const char *family_name, *map_name;
1807 	char s[MAX_TEST_NAME];
1808 
1809 	family_name = family_str(family);
1810 	map_name = map_type_str(map);
1811 	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1812 	if (!test__start_subtest(s))
1813 		return;
1814 	udp_skb_redir_to_connected(skel, map, family);
1815 }
1816 
1817 static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
1818 					int verd_mapfd, enum redir_mode mode)
1819 {
1820 	const char *log_prefix = redir_mode_str(mode);
1821 	int c0, c1, p0, p1;
1822 	unsigned int pass;
1823 	int err, n;
1824 	int sfd[2];
1825 	u32 key;
1826 	char b;
1827 
1828 	zero_verdict_count(verd_mapfd);
1829 
1830 	if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
1831 		return;
1832 	c0 = sfd[0], p0 = sfd[1];
1833 
1834 	err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
1835 	if (err)
1836 		goto close;
1837 
1838 	err = add_to_sockmap(sock_mapfd, p0, p1);
1839 	if (err)
1840 		goto close_cli1;
1841 
1842 	n = write(c1, "a", 1);
1843 	if (n < 0)
1844 		FAIL_ERRNO("%s: write", log_prefix);
1845 	if (n == 0)
1846 		FAIL("%s: incomplete write", log_prefix);
1847 	if (n < 1)
1848 		goto close_cli1;
1849 
1850 	key = SK_PASS;
1851 	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1852 	if (err)
1853 		goto close_cli1;
1854 	if (pass != 1)
1855 		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1856 
1857 	n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
1858 	if (n < 0)
1859 		FAIL_ERRNO("%s: recv_timeout", log_prefix);
1860 	if (n == 0)
1861 		FAIL("%s: incomplete recv", log_prefix);
1862 
1863 close_cli1:
1864 	xclose(c1);
1865 	xclose(p1);
1866 close:
1867 	xclose(c0);
1868 	xclose(p0);
1869 }
1870 
1871 static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
1872 					    struct bpf_map *inner_map, int family)
1873 {
1874 	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1875 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1876 	int sock_map = bpf_map__fd(inner_map);
1877 	int err;
1878 
1879 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1880 	if (err)
1881 		return;
1882 
1883 	skel->bss->test_ingress = false;
1884 	inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1885 				    REDIR_EGRESS);
1886 	inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1887 				    REDIR_EGRESS);
1888 	skel->bss->test_ingress = true;
1889 	inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1890 				    REDIR_INGRESS);
1891 	inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1892 				    REDIR_INGRESS);
1893 
1894 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1895 }
1896 
1897 static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
1898 					int verd_mapfd, enum redir_mode mode)
1899 {
1900 	const char *log_prefix = redir_mode_str(mode);
1901 	int c0, c1, p0, p1;
1902 	unsigned int pass;
1903 	int err, n;
1904 	int sfd[2];
1905 	u32 key;
1906 	char b;
1907 
1908 	zero_verdict_count(verd_mapfd);
1909 
1910 	err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
1911 	if (err)
1912 		return;
1913 
1914 	if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
1915 		goto close_cli0;
1916 	c1 = sfd[0], p1 = sfd[1];
1917 
1918 	err = add_to_sockmap(sock_mapfd, p0, p1);
1919 	if (err)
1920 		goto close;
1921 
1922 	n = write(c1, "a", 1);
1923 	if (n < 0)
1924 		FAIL_ERRNO("%s: write", log_prefix);
1925 	if (n == 0)
1926 		FAIL("%s: incomplete write", log_prefix);
1927 	if (n < 1)
1928 		goto close;
1929 
1930 	key = SK_PASS;
1931 	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1932 	if (err)
1933 		goto close;
1934 	if (pass != 1)
1935 		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1936 
1937 	n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
1938 	if (n < 0)
1939 		FAIL_ERRNO("%s: recv_timeout", log_prefix);
1940 	if (n == 0)
1941 		FAIL("%s: incomplete recv", log_prefix);
1942 
1943 close:
1944 	xclose(c1);
1945 	xclose(p1);
1946 close_cli0:
1947 	xclose(c0);
1948 	xclose(p0);
1949 
1950 }
1951 
1952 static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
1953 					    struct bpf_map *inner_map, int family)
1954 {
1955 	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1956 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1957 	int sock_map = bpf_map__fd(inner_map);
1958 	int err;
1959 
1960 	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1961 	if (err)
1962 		return;
1963 
1964 	skel->bss->test_ingress = false;
1965 	unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1966 				     REDIR_EGRESS);
1967 	unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1968 				     REDIR_EGRESS);
1969 	skel->bss->test_ingress = true;
1970 	unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1971 				     REDIR_INGRESS);
1972 	unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1973 				     REDIR_INGRESS);
1974 
1975 	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1976 }
1977 
1978 static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1979 				int family)
1980 {
1981 	const char *family_name, *map_name;
1982 	char s[MAX_TEST_NAME];
1983 
1984 	family_name = family_str(family);
1985 	map_name = map_type_str(map);
1986 	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1987 	if (!test__start_subtest(s))
1988 		return;
1989 	inet_unix_skb_redir_to_connected(skel, map, family);
1990 	unix_inet_skb_redir_to_connected(skel, map, family);
1991 }
1992 
1993 static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
1994 		      int family)
1995 {
1996 	test_ops(skel, map, family, SOCK_STREAM);
1997 	test_ops(skel, map, family, SOCK_DGRAM);
1998 	test_redir(skel, map, family, SOCK_STREAM);
1999 	test_reuseport(skel, map, family, SOCK_STREAM);
2000 	test_reuseport(skel, map, family, SOCK_DGRAM);
2001 	test_udp_redir(skel, map, family);
2002 	test_udp_unix_redir(skel, map, family);
2003 }
2004 
2005 void serial_test_sockmap_listen(void)
2006 {
2007 	struct test_sockmap_listen *skel;
2008 
2009 	skel = test_sockmap_listen__open_and_load();
2010 	if (!skel) {
2011 		FAIL("skeleton open/load failed");
2012 		return;
2013 	}
2014 
2015 	skel->bss->test_sockmap = true;
2016 	run_tests(skel, skel->maps.sock_map, AF_INET);
2017 	run_tests(skel, skel->maps.sock_map, AF_INET6);
2018 	test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
2019 	test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM);
2020 
2021 	skel->bss->test_sockmap = false;
2022 	run_tests(skel, skel->maps.sock_hash, AF_INET);
2023 	run_tests(skel, skel->maps.sock_hash, AF_INET6);
2024 	test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
2025 	test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM);
2026 
2027 	test_sockmap_listen__destroy(skel);
2028 }
2029