1 /*
2  * Test functionality of BPF filters for SO_REUSEPORT.  The tests below will use
3  * a BPF program (both classic and extended) to read the first word from an
4  * incoming packet (expected to be in network byte-order), calculate a modulus
5  * of that number, and then dispatch the packet to the Nth socket using the
6  * result.  These tests are run for each supported address family and protocol.
7  * Additionally, a few edge cases in the implementation are tested.
8  */
9 
10 #include <errno.h>
11 #include <error.h>
12 #include <linux/bpf.h>
13 #include <linux/filter.h>
14 #include <linux/unistd.h>
15 #include <netinet/in.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <sys/epoll.h>
20 #include <sys/types.h>
21 #include <sys/socket.h>
22 #include <unistd.h>
23 
24 #ifndef ARRAY_SIZE
25 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
26 #endif
27 
28 struct test_params {
29 	int recv_family;
30 	int send_family;
31 	int protocol;
32 	size_t recv_socks;
33 	uint16_t recv_port;
34 	uint16_t send_port_min;
35 };
36 
37 static size_t sockaddr_size(void)
38 {
39 	return sizeof(struct sockaddr_storage);
40 }
41 
42 static struct sockaddr *new_any_sockaddr(int family, uint16_t port)
43 {
44 	struct sockaddr_storage *addr;
45 	struct sockaddr_in *addr4;
46 	struct sockaddr_in6 *addr6;
47 
48 	addr = malloc(sizeof(struct sockaddr_storage));
49 	memset(addr, 0, sizeof(struct sockaddr_storage));
50 
51 	switch (family) {
52 	case AF_INET:
53 		addr4 = (struct sockaddr_in *)addr;
54 		addr4->sin_family = AF_INET;
55 		addr4->sin_addr.s_addr = htonl(INADDR_ANY);
56 		addr4->sin_port = htons(port);
57 		break;
58 	case AF_INET6:
59 		addr6 = (struct sockaddr_in6 *)addr;
60 		addr6->sin6_family = AF_INET6;
61 		addr6->sin6_addr = in6addr_any;
62 		addr6->sin6_port = htons(port);
63 		break;
64 	default:
65 		error(1, 0, "Unsupported family %d", family);
66 	}
67 	return (struct sockaddr *)addr;
68 }
69 
70 static struct sockaddr *new_loopback_sockaddr(int family, uint16_t port)
71 {
72 	struct sockaddr *addr = new_any_sockaddr(family, port);
73 	struct sockaddr_in *addr4;
74 	struct sockaddr_in6 *addr6;
75 
76 	switch (family) {
77 	case AF_INET:
78 		addr4 = (struct sockaddr_in *)addr;
79 		addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
80 		break;
81 	case AF_INET6:
82 		addr6 = (struct sockaddr_in6 *)addr;
83 		addr6->sin6_addr = in6addr_loopback;
84 		break;
85 	default:
86 		error(1, 0, "Unsupported family %d", family);
87 	}
88 	return addr;
89 }
90 
91 static void attach_ebpf(int fd, uint16_t mod)
92 {
93 	static char bpf_log_buf[65536];
94 	static const char bpf_license[] = "GPL";
95 
96 	int bpf_fd;
97 	const struct bpf_insn prog[] = {
98 		/* BPF_MOV64_REG(BPF_REG_6, BPF_REG_1) */
99 		{ BPF_ALU64 | BPF_MOV | BPF_X, BPF_REG_6, BPF_REG_1, 0, 0 },
100 		/* BPF_LD_ABS(BPF_W, 0) R0 = (uint32_t)skb[0] */
101 		{ BPF_LD | BPF_ABS | BPF_W, 0, 0, 0, 0 },
102 		/* BPF_ALU64_IMM(BPF_MOD, BPF_REG_0, mod) */
103 		{ BPF_ALU64 | BPF_MOD | BPF_K, BPF_REG_0, 0, 0, mod },
104 		/* BPF_EXIT_INSN() */
105 		{ BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
106 	};
107 	union bpf_attr attr;
108 
109 	memset(&attr, 0, sizeof(attr));
110 	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
111 	attr.insn_cnt = ARRAY_SIZE(prog);
112 	attr.insns = (uint64_t)prog;
113 	attr.license = (uint64_t)bpf_license;
114 	attr.log_buf = (uint64_t)bpf_log_buf;
115 	attr.log_size = sizeof(bpf_log_buf);
116 	attr.log_level = 1;
117 	attr.kern_version = 0;
118 
119 	bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
120 	if (bpf_fd < 0)
121 		error(1, errno, "ebpf error. log:\n%s\n", bpf_log_buf);
122 
123 	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
124 			sizeof(bpf_fd)))
125 		error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF");
126 
127 	close(bpf_fd);
128 }
129 
130 static void attach_cbpf(int fd, uint16_t mod)
131 {
132 	struct sock_filter code[] = {
133 		/* A = (uint32_t)skb[0] */
134 		{ BPF_LD  | BPF_W | BPF_ABS, 0, 0, 0 },
135 		/* A = A % mod */
136 		{ BPF_ALU | BPF_MOD, 0, 0, mod },
137 		/* return A */
138 		{ BPF_RET | BPF_A, 0, 0, 0 },
139 	};
140 	struct sock_fprog p = {
141 		.len = ARRAY_SIZE(code),
142 		.filter = code,
143 	};
144 
145 	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p)))
146 		error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF");
147 }
148 
149 static void build_recv_group(const struct test_params p, int fd[], uint16_t mod,
150 			     void (*attach_bpf)(int, uint16_t))
151 {
152 	struct sockaddr * const addr =
153 		new_any_sockaddr(p.recv_family, p.recv_port);
154 	int i, opt;
155 
156 	for (i = 0; i < p.recv_socks; ++i) {
157 		fd[i] = socket(p.recv_family, p.protocol, 0);
158 		if (fd[i] < 0)
159 			error(1, errno, "failed to create recv %d", i);
160 
161 		opt = 1;
162 		if (setsockopt(fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
163 			       sizeof(opt)))
164 			error(1, errno, "failed to set SO_REUSEPORT on %d", i);
165 
166 		if (i == 0)
167 			attach_bpf(fd[i], mod);
168 
169 		if (bind(fd[i], addr, sockaddr_size()))
170 			error(1, errno, "failed to bind recv socket %d", i);
171 
172 		if (p.protocol == SOCK_STREAM)
173 			if (listen(fd[i], p.recv_socks * 10))
174 				error(1, errno, "failed to listen on socket");
175 	}
176 	free(addr);
177 }
178 
179 static void send_from(struct test_params p, uint16_t sport, char *buf,
180 		      size_t len)
181 {
182 	struct sockaddr * const saddr = new_any_sockaddr(p.send_family, sport);
183 	struct sockaddr * const daddr =
184 		new_loopback_sockaddr(p.send_family, p.recv_port);
185 	const int fd = socket(p.send_family, p.protocol, 0);
186 
187 	if (fd < 0)
188 		error(1, errno, "failed to create send socket");
189 
190 	if (bind(fd, saddr, sockaddr_size()))
191 		error(1, errno, "failed to bind send socket");
192 	if (connect(fd, daddr, sockaddr_size()))
193 		error(1, errno, "failed to connect");
194 
195 	if (send(fd, buf, len, 0) < 0)
196 		error(1, errno, "failed to send message");
197 
198 	close(fd);
199 	free(saddr);
200 	free(daddr);
201 }
202 
203 static void test_recv_order(const struct test_params p, int fd[], int mod)
204 {
205 	char recv_buf[8], send_buf[8];
206 	struct msghdr msg;
207 	struct iovec recv_io = { recv_buf, 8 };
208 	struct epoll_event ev;
209 	int epfd, conn, i, sport, expected;
210 	uint32_t data, ndata;
211 
212 	epfd = epoll_create(1);
213 	if (epfd < 0)
214 		error(1, errno, "failed to create epoll");
215 	for (i = 0; i < p.recv_socks; ++i) {
216 		ev.events = EPOLLIN;
217 		ev.data.fd = fd[i];
218 		if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd[i], &ev))
219 			error(1, errno, "failed to register sock %d epoll", i);
220 	}
221 
222 	memset(&msg, 0, sizeof(msg));
223 	msg.msg_iov = &recv_io;
224 	msg.msg_iovlen = 1;
225 
226 	for (data = 0; data < p.recv_socks * 2; ++data) {
227 		sport = p.send_port_min + data;
228 		ndata = htonl(data);
229 		memcpy(send_buf, &ndata, sizeof(ndata));
230 		send_from(p, sport, send_buf, sizeof(ndata));
231 
232 		i = epoll_wait(epfd, &ev, 1, -1);
233 		if (i < 0)
234 			error(1, errno, "epoll wait failed");
235 
236 		if (p.protocol == SOCK_STREAM) {
237 			conn = accept(ev.data.fd, NULL, NULL);
238 			if (conn < 0)
239 				error(1, errno, "error accepting");
240 			i = recvmsg(conn, &msg, 0);
241 			close(conn);
242 		} else {
243 			i = recvmsg(ev.data.fd, &msg, 0);
244 		}
245 		if (i < 0)
246 			error(1, errno, "recvmsg error");
247 		if (i != sizeof(ndata))
248 			error(1, 0, "expected size %zd got %d",
249 			      sizeof(ndata), i);
250 
251 		for (i = 0; i < p.recv_socks; ++i)
252 			if (ev.data.fd == fd[i])
253 				break;
254 		memcpy(&ndata, recv_buf, sizeof(ndata));
255 		fprintf(stderr, "Socket %d: %d\n", i, ntohl(ndata));
256 
257 		expected = (sport % mod);
258 		if (i != expected)
259 			error(1, 0, "expected socket %d", expected);
260 	}
261 }
262 
263 static void test_reuseport_ebpf(const struct test_params p)
264 {
265 	int i, fd[p.recv_socks];
266 
267 	fprintf(stderr, "Testing EBPF mod %zd...\n", p.recv_socks);
268 	build_recv_group(p, fd, p.recv_socks, attach_ebpf);
269 	test_recv_order(p, fd, p.recv_socks);
270 
271 	fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
272 	attach_ebpf(fd[0], p.recv_socks / 2);
273 	test_recv_order(p, fd, p.recv_socks / 2);
274 
275 	for (i = 0; i < p.recv_socks; ++i)
276 		close(fd[i]);
277 }
278 
279 static void test_reuseport_cbpf(const struct test_params p)
280 {
281 	int i, fd[p.recv_socks];
282 
283 	fprintf(stderr, "Testing CBPF mod %zd...\n", p.recv_socks);
284 	build_recv_group(p, fd, p.recv_socks, attach_cbpf);
285 	test_recv_order(p, fd, p.recv_socks);
286 
287 	fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
288 	attach_cbpf(fd[0], p.recv_socks / 2);
289 	test_recv_order(p, fd, p.recv_socks / 2);
290 
291 	for (i = 0; i < p.recv_socks; ++i)
292 		close(fd[i]);
293 }
294 
295 static void test_extra_filter(const struct test_params p)
296 {
297 	struct sockaddr * const addr =
298 		new_any_sockaddr(p.recv_family, p.recv_port);
299 	int fd1, fd2, opt;
300 
301 	fprintf(stderr, "Testing too many filters...\n");
302 	fd1 = socket(p.recv_family, p.protocol, 0);
303 	if (fd1 < 0)
304 		error(1, errno, "failed to create socket 1");
305 	fd2 = socket(p.recv_family, p.protocol, 0);
306 	if (fd2 < 0)
307 		error(1, errno, "failed to create socket 2");
308 
309 	opt = 1;
310 	if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
311 		error(1, errno, "failed to set SO_REUSEPORT on socket 1");
312 	if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
313 		error(1, errno, "failed to set SO_REUSEPORT on socket 2");
314 
315 	attach_ebpf(fd1, 10);
316 	attach_ebpf(fd2, 10);
317 
318 	if (bind(fd1, addr, sockaddr_size()))
319 		error(1, errno, "failed to bind recv socket 1");
320 
321 	if (!bind(fd2, addr, sockaddr_size()) && errno != EADDRINUSE)
322 		error(1, errno, "bind socket 2 should fail with EADDRINUSE");
323 
324 	free(addr);
325 }
326 
327 static void test_filter_no_reuseport(const struct test_params p)
328 {
329 	struct sockaddr * const addr =
330 		new_any_sockaddr(p.recv_family, p.recv_port);
331 	const char bpf_license[] = "GPL";
332 	struct bpf_insn ecode[] = {
333 		{ BPF_ALU64 | BPF_MOV | BPF_K, BPF_REG_0, 0, 0, 10 },
334 		{ BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
335 	};
336 	struct sock_filter ccode[] = {{ BPF_RET | BPF_A, 0, 0, 0 }};
337 	union bpf_attr eprog;
338 	struct sock_fprog cprog;
339 	int fd, bpf_fd;
340 
341 	fprintf(stderr, "Testing filters on non-SO_REUSEPORT socket...\n");
342 
343 	memset(&eprog, 0, sizeof(eprog));
344 	eprog.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
345 	eprog.insn_cnt = ARRAY_SIZE(ecode);
346 	eprog.insns = (uint64_t)ecode;
347 	eprog.license = (uint64_t)bpf_license;
348 	eprog.kern_version = 0;
349 
350 	memset(&cprog, 0, sizeof(cprog));
351 	cprog.len = ARRAY_SIZE(ccode);
352 	cprog.filter = ccode;
353 
354 
355 	bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &eprog, sizeof(eprog));
356 	if (bpf_fd < 0)
357 		error(1, errno, "ebpf error");
358 	fd = socket(p.recv_family, p.protocol, 0);
359 	if (fd < 0)
360 		error(1, errno, "failed to create socket 1");
361 
362 	if (bind(fd, addr, sockaddr_size()))
363 		error(1, errno, "failed to bind recv socket 1");
364 
365 	errno = 0;
366 	if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
367 			sizeof(bpf_fd)) || errno != EINVAL)
368 		error(1, errno, "setsockopt should have returned EINVAL");
369 
370 	errno = 0;
371 	if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &cprog,
372 		       sizeof(cprog)) || errno != EINVAL)
373 		error(1, errno, "setsockopt should have returned EINVAL");
374 
375 	free(addr);
376 }
377 
378 static void test_filter_without_bind(void)
379 {
380 	int fd1, fd2;
381 
382 	fprintf(stderr, "Testing filter add without bind...\n");
383 	fd1 = socket(AF_INET, SOCK_DGRAM, 0);
384 	if (fd1 < 0)
385 		error(1, errno, "failed to create socket 1");
386 	fd2 = socket(AF_INET, SOCK_DGRAM, 0);
387 	if (fd2 < 0)
388 		error(1, errno, "failed to create socket 2");
389 
390 	attach_ebpf(fd1, 10);
391 	attach_cbpf(fd2, 10);
392 
393 	close(fd1);
394 	close(fd2);
395 }
396 
397 
398 int main(void)
399 {
400 	fprintf(stderr, "---- IPv4 UDP ----\n");
401 	/* NOTE: UDP socket lookups traverse a different code path when there
402 	 * are > 10 sockets in a group.  Run the bpf test through both paths.
403 	 */
404 	test_reuseport_ebpf((struct test_params) {
405 		.recv_family = AF_INET,
406 		.send_family = AF_INET,
407 		.protocol = SOCK_DGRAM,
408 		.recv_socks = 10,
409 		.recv_port = 8000,
410 		.send_port_min = 9000});
411 	test_reuseport_ebpf((struct test_params) {
412 		.recv_family = AF_INET,
413 		.send_family = AF_INET,
414 		.protocol = SOCK_DGRAM,
415 		.recv_socks = 20,
416 		.recv_port = 8000,
417 		.send_port_min = 9000});
418 	test_reuseport_cbpf((struct test_params) {
419 		.recv_family = AF_INET,
420 		.send_family = AF_INET,
421 		.protocol = SOCK_DGRAM,
422 		.recv_socks = 10,
423 		.recv_port = 8001,
424 		.send_port_min = 9020});
425 	test_reuseport_cbpf((struct test_params) {
426 		.recv_family = AF_INET,
427 		.send_family = AF_INET,
428 		.protocol = SOCK_DGRAM,
429 		.recv_socks = 20,
430 		.recv_port = 8001,
431 		.send_port_min = 9020});
432 	test_extra_filter((struct test_params) {
433 		.recv_family = AF_INET,
434 		.protocol = SOCK_DGRAM,
435 		.recv_port = 8002});
436 	test_filter_no_reuseport((struct test_params) {
437 		.recv_family = AF_INET,
438 		.protocol = SOCK_DGRAM,
439 		.recv_port = 8008});
440 
441 	fprintf(stderr, "---- IPv6 UDP ----\n");
442 	test_reuseport_ebpf((struct test_params) {
443 		.recv_family = AF_INET6,
444 		.send_family = AF_INET6,
445 		.protocol = SOCK_DGRAM,
446 		.recv_socks = 10,
447 		.recv_port = 8003,
448 		.send_port_min = 9040});
449 	test_reuseport_ebpf((struct test_params) {
450 		.recv_family = AF_INET6,
451 		.send_family = AF_INET6,
452 		.protocol = SOCK_DGRAM,
453 		.recv_socks = 20,
454 		.recv_port = 8003,
455 		.send_port_min = 9040});
456 	test_reuseport_cbpf((struct test_params) {
457 		.recv_family = AF_INET6,
458 		.send_family = AF_INET6,
459 		.protocol = SOCK_DGRAM,
460 		.recv_socks = 10,
461 		.recv_port = 8004,
462 		.send_port_min = 9060});
463 	test_reuseport_cbpf((struct test_params) {
464 		.recv_family = AF_INET6,
465 		.send_family = AF_INET6,
466 		.protocol = SOCK_DGRAM,
467 		.recv_socks = 20,
468 		.recv_port = 8004,
469 		.send_port_min = 9060});
470 	test_extra_filter((struct test_params) {
471 		.recv_family = AF_INET6,
472 		.protocol = SOCK_DGRAM,
473 		.recv_port = 8005});
474 	test_filter_no_reuseport((struct test_params) {
475 		.recv_family = AF_INET6,
476 		.protocol = SOCK_DGRAM,
477 		.recv_port = 8009});
478 
479 	fprintf(stderr, "---- IPv6 UDP w/ mapped IPv4 ----\n");
480 	test_reuseport_ebpf((struct test_params) {
481 		.recv_family = AF_INET6,
482 		.send_family = AF_INET,
483 		.protocol = SOCK_DGRAM,
484 		.recv_socks = 20,
485 		.recv_port = 8006,
486 		.send_port_min = 9080});
487 	test_reuseport_ebpf((struct test_params) {
488 		.recv_family = AF_INET6,
489 		.send_family = AF_INET,
490 		.protocol = SOCK_DGRAM,
491 		.recv_socks = 10,
492 		.recv_port = 8006,
493 		.send_port_min = 9080});
494 	test_reuseport_cbpf((struct test_params) {
495 		.recv_family = AF_INET6,
496 		.send_family = AF_INET,
497 		.protocol = SOCK_DGRAM,
498 		.recv_socks = 10,
499 		.recv_port = 8007,
500 		.send_port_min = 9100});
501 	test_reuseport_cbpf((struct test_params) {
502 		.recv_family = AF_INET6,
503 		.send_family = AF_INET,
504 		.protocol = SOCK_DGRAM,
505 		.recv_socks = 20,
506 		.recv_port = 8007,
507 		.send_port_min = 9100});
508 
509 
510 	test_filter_without_bind();
511 
512 	fprintf(stderr, "SUCCESS\n");
513 	return 0;
514 }
515