1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <sys/socket.h>
6 #include <sys/ioctl.h>
7 #include <sys/select.h>
8 #include <netinet/in.h>
9 #include <arpa/inet.h>
10 #include <unistd.h>
11 #include <string.h>
12 #include <errno.h>
13 #include <stdbool.h>
14 #include <signal.h>
15 #include <fcntl.h>
16 #include <sys/wait.h>
17 #include <time.h>
18 #include <sched.h>
19 
20 #include <sys/time.h>
21 #include <sys/types.h>
22 #include <sys/sendfile.h>
23 
24 #include <linux/netlink.h>
25 #include <linux/socket.h>
26 #include <linux/sock_diag.h>
27 #include <linux/bpf.h>
28 #include <linux/if_link.h>
29 #include <linux/tls.h>
30 #include <assert.h>
31 #include <libgen.h>
32 
33 #include <getopt.h>
34 
35 #include <bpf/bpf.h>
36 #include <bpf/libbpf.h>
37 
38 #include "bpf_util.h"
39 #include "cgroup_helpers.h"
40 
41 int running;
42 static void running_handler(int a);
43 
44 #ifndef TCP_ULP
45 # define TCP_ULP 31
46 #endif
47 #ifndef SOL_TLS
48 # define SOL_TLS 282
49 #endif
50 
51 /* randomly selected ports for testing on lo */
52 #define S1_PORT 10000
53 #define S2_PORT 10001
54 
55 #define BPF_SOCKMAP_FILENAME  "test_sockmap_kern.o"
56 #define BPF_SOCKHASH_FILENAME "test_sockhash_kern.o"
57 #define CG_PATH "/sockmap"
58 
59 /* global sockets */
60 int s1, s2, c1, c2, p1, p2;
61 int test_cnt;
62 int passed;
63 int failed;
64 int map_fd[9];
65 struct bpf_map *maps[9];
66 int prog_fd[11];
67 
68 int txmsg_pass;
69 int txmsg_redir;
70 int txmsg_drop;
71 int txmsg_apply;
72 int txmsg_cork;
73 int txmsg_start;
74 int txmsg_end;
75 int txmsg_start_push;
76 int txmsg_end_push;
77 int txmsg_start_pop;
78 int txmsg_pop;
79 int txmsg_ingress;
80 int txmsg_redir_skb;
81 int txmsg_ktls_skb;
82 int txmsg_ktls_skb_drop;
83 int txmsg_ktls_skb_redir;
84 int ktls;
85 int peek_flag;
86 int skb_use_parser;
87 int txmsg_omit_skb_parser;
88 
89 static const struct option long_options[] = {
90 	{"help",	no_argument,		NULL, 'h' },
91 	{"cgroup",	required_argument,	NULL, 'c' },
92 	{"rate",	required_argument,	NULL, 'r' },
93 	{"verbose",	optional_argument,	NULL, 'v' },
94 	{"iov_count",	required_argument,	NULL, 'i' },
95 	{"length",	required_argument,	NULL, 'l' },
96 	{"test",	required_argument,	NULL, 't' },
97 	{"data_test",   no_argument,		NULL, 'd' },
98 	{"txmsg",		no_argument,	&txmsg_pass,  1  },
99 	{"txmsg_redir",		no_argument,	&txmsg_redir, 1  },
100 	{"txmsg_drop",		no_argument,	&txmsg_drop, 1 },
101 	{"txmsg_apply",	required_argument,	NULL, 'a'},
102 	{"txmsg_cork",	required_argument,	NULL, 'k'},
103 	{"txmsg_start", required_argument,	NULL, 's'},
104 	{"txmsg_end",	required_argument,	NULL, 'e'},
105 	{"txmsg_start_push", required_argument,	NULL, 'p'},
106 	{"txmsg_end_push",   required_argument,	NULL, 'q'},
107 	{"txmsg_start_pop",  required_argument,	NULL, 'w'},
108 	{"txmsg_pop",	     required_argument,	NULL, 'x'},
109 	{"txmsg_ingress", no_argument,		&txmsg_ingress, 1 },
110 	{"txmsg_redir_skb", no_argument,	&txmsg_redir_skb, 1 },
111 	{"ktls", no_argument,			&ktls, 1 },
112 	{"peek", no_argument,			&peek_flag, 1 },
113 	{"txmsg_omit_skb_parser", no_argument,      &txmsg_omit_skb_parser, 1},
114 	{"whitelist", required_argument,	NULL, 'n' },
115 	{"blacklist", required_argument,	NULL, 'b' },
116 	{0, 0, NULL, 0 }
117 };
118 
119 struct test_env {
120 	const char *type;
121 	const char *subtest;
122 	const char *prepend;
123 
124 	int test_num;
125 	int subtest_num;
126 
127 	int succ_cnt;
128 	int fail_cnt;
129 	int fail_last;
130 };
131 
132 struct test_env env;
133 
134 struct sockmap_options {
135 	int verbose;
136 	bool base;
137 	bool sendpage;
138 	bool data_test;
139 	bool drop_expected;
140 	bool check_recved_len;
141 	int iov_count;
142 	int iov_length;
143 	int rate;
144 	char *map;
145 	char *whitelist;
146 	char *blacklist;
147 	char *prepend;
148 };
149 
150 struct _test {
151 	char *title;
152 	void (*tester)(int cg_fd, struct sockmap_options *opt);
153 };
154 
155 static void test_start(void)
156 {
157 	env.subtest_num++;
158 }
159 
160 static void test_fail(void)
161 {
162 	env.fail_cnt++;
163 }
164 
165 static void test_pass(void)
166 {
167 	env.succ_cnt++;
168 }
169 
170 static void test_reset(void)
171 {
172 	txmsg_start = txmsg_end = 0;
173 	txmsg_start_pop = txmsg_pop = 0;
174 	txmsg_start_push = txmsg_end_push = 0;
175 	txmsg_pass = txmsg_drop = txmsg_redir = 0;
176 	txmsg_apply = txmsg_cork = 0;
177 	txmsg_ingress = txmsg_redir_skb = 0;
178 	txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0;
179 	txmsg_omit_skb_parser = 0;
180 	skb_use_parser = 0;
181 }
182 
183 static int test_start_subtest(const struct _test *t, struct sockmap_options *o)
184 {
185 	env.type = o->map;
186 	env.subtest = t->title;
187 	env.prepend = o->prepend;
188 	env.test_num++;
189 	env.subtest_num = 0;
190 	env.fail_last = env.fail_cnt;
191 	test_reset();
192 	return 0;
193 }
194 
195 static void test_end_subtest(void)
196 {
197 	int error = env.fail_cnt - env.fail_last;
198 	int type = strcmp(env.type, BPF_SOCKMAP_FILENAME);
199 
200 	if (!error)
201 		test_pass();
202 
203 	fprintf(stdout, "#%2d/%2d %8s:%s:%s:%s\n",
204 		env.test_num, env.subtest_num,
205 		!type ? "sockmap" : "sockhash",
206 		env.prepend ? : "",
207 		env.subtest, error ? "FAIL" : "OK");
208 }
209 
210 static void test_print_results(void)
211 {
212 	fprintf(stdout, "Pass: %d Fail: %d\n",
213 		env.succ_cnt, env.fail_cnt);
214 }
215 
216 static void usage(char *argv[])
217 {
218 	int i;
219 
220 	printf(" Usage: %s --cgroup <cgroup_path>\n", argv[0]);
221 	printf(" options:\n");
222 	for (i = 0; long_options[i].name != 0; i++) {
223 		printf(" --%-12s", long_options[i].name);
224 		if (long_options[i].flag != NULL)
225 			printf(" flag (internal value:%d)\n",
226 				*long_options[i].flag);
227 		else
228 			printf(" -%c\n", long_options[i].val);
229 	}
230 	printf("\n");
231 }
232 
233 char *sock_to_string(int s)
234 {
235 	if (s == c1)
236 		return "client1";
237 	else if (s == c2)
238 		return "client2";
239 	else if (s == s1)
240 		return "server1";
241 	else if (s == s2)
242 		return "server2";
243 	else if (s == p1)
244 		return "peer1";
245 	else if (s == p2)
246 		return "peer2";
247 	else
248 		return "unknown";
249 }
250 
251 static int sockmap_init_ktls(int verbose, int s)
252 {
253 	struct tls12_crypto_info_aes_gcm_128 tls_tx = {
254 		.info = {
255 			.version     = TLS_1_2_VERSION,
256 			.cipher_type = TLS_CIPHER_AES_GCM_128,
257 		},
258 	};
259 	struct tls12_crypto_info_aes_gcm_128 tls_rx = {
260 		.info = {
261 			.version     = TLS_1_2_VERSION,
262 			.cipher_type = TLS_CIPHER_AES_GCM_128,
263 		},
264 	};
265 	int so_buf = 6553500;
266 	int err;
267 
268 	err = setsockopt(s, 6, TCP_ULP, "tls", sizeof("tls"));
269 	if (err) {
270 		fprintf(stderr, "setsockopt: TCP_ULP(%s) failed with error %i\n", sock_to_string(s), err);
271 		return -EINVAL;
272 	}
273 	err = setsockopt(s, SOL_TLS, TLS_TX, (void *)&tls_tx, sizeof(tls_tx));
274 	if (err) {
275 		fprintf(stderr, "setsockopt: TLS_TX(%s) failed with error %i\n", sock_to_string(s), err);
276 		return -EINVAL;
277 	}
278 	err = setsockopt(s, SOL_TLS, TLS_RX, (void *)&tls_rx, sizeof(tls_rx));
279 	if (err) {
280 		fprintf(stderr, "setsockopt: TLS_RX(%s) failed with error %i\n", sock_to_string(s), err);
281 		return -EINVAL;
282 	}
283 	err = setsockopt(s, SOL_SOCKET, SO_SNDBUF, &so_buf, sizeof(so_buf));
284 	if (err) {
285 		fprintf(stderr, "setsockopt: (%s) failed sndbuf with error %i\n", sock_to_string(s), err);
286 		return -EINVAL;
287 	}
288 	err = setsockopt(s, SOL_SOCKET, SO_RCVBUF, &so_buf, sizeof(so_buf));
289 	if (err) {
290 		fprintf(stderr, "setsockopt: (%s) failed rcvbuf with error %i\n", sock_to_string(s), err);
291 		return -EINVAL;
292 	}
293 
294 	if (verbose)
295 		fprintf(stdout, "socket(%s) kTLS enabled\n", sock_to_string(s));
296 	return 0;
297 }
298 static int sockmap_init_sockets(int verbose)
299 {
300 	int i, err, one = 1;
301 	struct sockaddr_in addr;
302 	int *fds[4] = {&s1, &s2, &c1, &c2};
303 
304 	s1 = s2 = p1 = p2 = c1 = c2 = 0;
305 
306 	/* Init sockets */
307 	for (i = 0; i < 4; i++) {
308 		*fds[i] = socket(AF_INET, SOCK_STREAM, 0);
309 		if (*fds[i] < 0) {
310 			perror("socket s1 failed()");
311 			return errno;
312 		}
313 	}
314 
315 	/* Allow reuse */
316 	for (i = 0; i < 2; i++) {
317 		err = setsockopt(*fds[i], SOL_SOCKET, SO_REUSEADDR,
318 				 (char *)&one, sizeof(one));
319 		if (err) {
320 			perror("setsockopt failed()");
321 			return errno;
322 		}
323 	}
324 
325 	/* Non-blocking sockets */
326 	for (i = 0; i < 2; i++) {
327 		err = ioctl(*fds[i], FIONBIO, (char *)&one);
328 		if (err < 0) {
329 			perror("ioctl s1 failed()");
330 			return errno;
331 		}
332 	}
333 
334 	/* Bind server sockets */
335 	memset(&addr, 0, sizeof(struct sockaddr_in));
336 	addr.sin_family = AF_INET;
337 	addr.sin_addr.s_addr = inet_addr("127.0.0.1");
338 
339 	addr.sin_port = htons(S1_PORT);
340 	err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
341 	if (err < 0) {
342 		perror("bind s1 failed()");
343 		return errno;
344 	}
345 
346 	addr.sin_port = htons(S2_PORT);
347 	err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
348 	if (err < 0) {
349 		perror("bind s2 failed()");
350 		return errno;
351 	}
352 
353 	/* Listen server sockets */
354 	addr.sin_port = htons(S1_PORT);
355 	err = listen(s1, 32);
356 	if (err < 0) {
357 		perror("listen s1 failed()");
358 		return errno;
359 	}
360 
361 	addr.sin_port = htons(S2_PORT);
362 	err = listen(s2, 32);
363 	if (err < 0) {
364 		perror("listen s1 failed()");
365 		return errno;
366 	}
367 
368 	/* Initiate Connect */
369 	addr.sin_port = htons(S1_PORT);
370 	err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
371 	if (err < 0 && errno != EINPROGRESS) {
372 		perror("connect c1 failed()");
373 		return errno;
374 	}
375 
376 	addr.sin_port = htons(S2_PORT);
377 	err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
378 	if (err < 0 && errno != EINPROGRESS) {
379 		perror("connect c2 failed()");
380 		return errno;
381 	} else if (err < 0) {
382 		err = 0;
383 	}
384 
385 	/* Accept Connecrtions */
386 	p1 = accept(s1, NULL, NULL);
387 	if (p1 < 0) {
388 		perror("accept s1 failed()");
389 		return errno;
390 	}
391 
392 	p2 = accept(s2, NULL, NULL);
393 	if (p2 < 0) {
394 		perror("accept s1 failed()");
395 		return errno;
396 	}
397 
398 	if (verbose > 1) {
399 		printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
400 		printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
401 			c1, s1, c2, s2);
402 	}
403 	return 0;
404 }
405 
406 struct msg_stats {
407 	size_t bytes_sent;
408 	size_t bytes_recvd;
409 	struct timespec start;
410 	struct timespec end;
411 };
412 
413 static int msg_loop_sendpage(int fd, int iov_length, int cnt,
414 			     struct msg_stats *s,
415 			     struct sockmap_options *opt)
416 {
417 	bool drop = opt->drop_expected;
418 	unsigned char k = 0;
419 	FILE *file;
420 	int i, fp;
421 
422 	file = tmpfile();
423 	if (!file) {
424 		perror("create file for sendpage");
425 		return 1;
426 	}
427 	for (i = 0; i < iov_length * cnt; i++, k++)
428 		fwrite(&k, sizeof(char), 1, file);
429 	fflush(file);
430 	fseek(file, 0, SEEK_SET);
431 
432 	fp = fileno(file);
433 
434 	clock_gettime(CLOCK_MONOTONIC, &s->start);
435 	for (i = 0; i < cnt; i++) {
436 		int sent;
437 
438 		errno = 0;
439 		sent = sendfile(fd, fp, NULL, iov_length);
440 
441 		if (!drop && sent < 0) {
442 			perror("sendpage loop error");
443 			fclose(file);
444 			return sent;
445 		} else if (drop && sent >= 0) {
446 			printf("sendpage loop error expected: %i errno %i\n",
447 			       sent, errno);
448 			fclose(file);
449 			return -EIO;
450 		}
451 
452 		if (sent > 0)
453 			s->bytes_sent += sent;
454 	}
455 	clock_gettime(CLOCK_MONOTONIC, &s->end);
456 	fclose(file);
457 	return 0;
458 }
459 
460 static void msg_free_iov(struct msghdr *msg)
461 {
462 	int i;
463 
464 	for (i = 0; i < msg->msg_iovlen; i++)
465 		free(msg->msg_iov[i].iov_base);
466 	free(msg->msg_iov);
467 	msg->msg_iov = NULL;
468 	msg->msg_iovlen = 0;
469 }
470 
471 static int msg_alloc_iov(struct msghdr *msg,
472 			 int iov_count, int iov_length,
473 			 bool data, bool xmit)
474 {
475 	unsigned char k = 0;
476 	struct iovec *iov;
477 	int i;
478 
479 	iov = calloc(iov_count, sizeof(struct iovec));
480 	if (!iov)
481 		return errno;
482 
483 	for (i = 0; i < iov_count; i++) {
484 		unsigned char *d = calloc(iov_length, sizeof(char));
485 
486 		if (!d) {
487 			fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
488 			goto unwind_iov;
489 		}
490 		iov[i].iov_base = d;
491 		iov[i].iov_len = iov_length;
492 
493 		if (data && xmit) {
494 			int j;
495 
496 			for (j = 0; j < iov_length; j++)
497 				d[j] = k++;
498 		}
499 	}
500 
501 	msg->msg_iov = iov;
502 	msg->msg_iovlen = iov_count;
503 
504 	return 0;
505 unwind_iov:
506 	for (i--; i >= 0 ; i--)
507 		free(msg->msg_iov[i].iov_base);
508 	return -ENOMEM;
509 }
510 
511 static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz)
512 {
513 	int i, j = 0, bytes_cnt = 0;
514 	unsigned char k = 0;
515 
516 	for (i = 0; i < msg->msg_iovlen; i++) {
517 		unsigned char *d = msg->msg_iov[i].iov_base;
518 
519 		/* Special case test for skb ingress + ktls */
520 		if (i == 0 && txmsg_ktls_skb) {
521 			if (msg->msg_iov[i].iov_len < 4)
522 				return -EIO;
523 			if (memcmp(d, "PASS", 4) != 0) {
524 				fprintf(stderr,
525 					"detected skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n",
526 					i, 0, d[0], d[1], d[2], d[3]);
527 				return -EIO;
528 			}
529 			j = 4; /* advance index past PASS header */
530 		}
531 
532 		for (; j < msg->msg_iov[i].iov_len && size; j++) {
533 			if (d[j] != k++) {
534 				fprintf(stderr,
535 					"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
536 					i, j, d[j], k - 1, d[j+1], k);
537 				return -EIO;
538 			}
539 			bytes_cnt++;
540 			if (bytes_cnt == chunk_sz) {
541 				k = 0;
542 				bytes_cnt = 0;
543 			}
544 			size--;
545 		}
546 	}
547 	return 0;
548 }
549 
550 static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
551 		    struct msg_stats *s, bool tx,
552 		    struct sockmap_options *opt)
553 {
554 	struct msghdr msg = {0}, msg_peek = {0};
555 	int err, i, flags = MSG_NOSIGNAL;
556 	bool drop = opt->drop_expected;
557 	bool data = opt->data_test;
558 	int iov_alloc_length = iov_length;
559 
560 	if (!tx && opt->check_recved_len)
561 		iov_alloc_length *= 2;
562 
563 	err = msg_alloc_iov(&msg, iov_count, iov_alloc_length, data, tx);
564 	if (err)
565 		goto out_errno;
566 	if (peek_flag) {
567 		err = msg_alloc_iov(&msg_peek, iov_count, iov_length, data, tx);
568 		if (err)
569 			goto out_errno;
570 	}
571 
572 	if (tx) {
573 		clock_gettime(CLOCK_MONOTONIC, &s->start);
574 		for (i = 0; i < cnt; i++) {
575 			int sent;
576 
577 			errno = 0;
578 			sent = sendmsg(fd, &msg, flags);
579 
580 			if (!drop && sent < 0) {
581 				perror("sendmsg loop error");
582 				goto out_errno;
583 			} else if (drop && sent >= 0) {
584 				fprintf(stderr,
585 					"sendmsg loop error expected: %i errno %i\n",
586 					sent, errno);
587 				errno = -EIO;
588 				goto out_errno;
589 			}
590 			if (sent > 0)
591 				s->bytes_sent += sent;
592 		}
593 		clock_gettime(CLOCK_MONOTONIC, &s->end);
594 	} else {
595 		int slct, recvp = 0, recv, max_fd = fd;
596 		float total_bytes, txmsg_pop_total;
597 		int fd_flags = O_NONBLOCK;
598 		struct timeval timeout;
599 		fd_set w;
600 
601 		fcntl(fd, fd_flags);
602 		/* Account for pop bytes noting each iteration of apply will
603 		 * call msg_pop_data helper so we need to account for this
604 		 * by calculating the number of apply iterations. Note user
605 		 * of the tool can create cases where no data is sent by
606 		 * manipulating pop/push/pull/etc. For example txmsg_apply 1
607 		 * with txmsg_pop 1 will try to apply 1B at a time but each
608 		 * iteration will then pop 1B so no data will ever be sent.
609 		 * This is really only useful for testing edge cases in code
610 		 * paths.
611 		 */
612 		total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
613 		if (txmsg_apply)
614 			txmsg_pop_total = txmsg_pop * (total_bytes / txmsg_apply);
615 		else
616 			txmsg_pop_total = txmsg_pop * cnt;
617 		total_bytes -= txmsg_pop_total;
618 		err = clock_gettime(CLOCK_MONOTONIC, &s->start);
619 		if (err < 0)
620 			perror("recv start time");
621 		while (s->bytes_recvd < total_bytes) {
622 			if (txmsg_cork) {
623 				timeout.tv_sec = 0;
624 				timeout.tv_usec = 300000;
625 			} else {
626 				timeout.tv_sec = 3;
627 				timeout.tv_usec = 0;
628 			}
629 
630 			/* FD sets */
631 			FD_ZERO(&w);
632 			FD_SET(fd, &w);
633 
634 			slct = select(max_fd + 1, &w, NULL, NULL, &timeout);
635 			if (slct == -1) {
636 				perror("select()");
637 				clock_gettime(CLOCK_MONOTONIC, &s->end);
638 				goto out_errno;
639 			} else if (!slct) {
640 				if (opt->verbose)
641 					fprintf(stderr, "unexpected timeout: recved %zu/%f pop_total %f\n", s->bytes_recvd, total_bytes, txmsg_pop_total);
642 				errno = -EIO;
643 				clock_gettime(CLOCK_MONOTONIC, &s->end);
644 				goto out_errno;
645 			}
646 
647 			errno = 0;
648 			if (peek_flag) {
649 				flags |= MSG_PEEK;
650 				recvp = recvmsg(fd, &msg_peek, flags);
651 				if (recvp < 0) {
652 					if (errno != EWOULDBLOCK) {
653 						clock_gettime(CLOCK_MONOTONIC, &s->end);
654 						goto out_errno;
655 					}
656 				}
657 				flags = 0;
658 			}
659 
660 			recv = recvmsg(fd, &msg, flags);
661 			if (recv < 0) {
662 				if (errno != EWOULDBLOCK) {
663 					clock_gettime(CLOCK_MONOTONIC, &s->end);
664 					perror("recv failed()");
665 					goto out_errno;
666 				}
667 			}
668 
669 			s->bytes_recvd += recv;
670 
671 			if (opt->check_recved_len && s->bytes_recvd > total_bytes) {
672 				errno = EMSGSIZE;
673 				fprintf(stderr, "recv failed(), bytes_recvd:%zd, total_bytes:%f\n",
674 						s->bytes_recvd, total_bytes);
675 				goto out_errno;
676 			}
677 
678 			if (data) {
679 				int chunk_sz = opt->sendpage ?
680 						iov_length * cnt :
681 						iov_length * iov_count;
682 
683 				errno = msg_verify_data(&msg, recv, chunk_sz);
684 				if (errno) {
685 					perror("data verify msg failed");
686 					goto out_errno;
687 				}
688 				if (recvp) {
689 					errno = msg_verify_data(&msg_peek,
690 								recvp,
691 								chunk_sz);
692 					if (errno) {
693 						perror("data verify msg_peek failed");
694 						goto out_errno;
695 					}
696 				}
697 			}
698 		}
699 		clock_gettime(CLOCK_MONOTONIC, &s->end);
700 	}
701 
702 	msg_free_iov(&msg);
703 	msg_free_iov(&msg_peek);
704 	return err;
705 out_errno:
706 	msg_free_iov(&msg);
707 	msg_free_iov(&msg_peek);
708 	return errno;
709 }
710 
711 static float giga = 1000000000;
712 
713 static inline float sentBps(struct msg_stats s)
714 {
715 	return s.bytes_sent / (s.end.tv_sec - s.start.tv_sec);
716 }
717 
718 static inline float recvdBps(struct msg_stats s)
719 {
720 	return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
721 }
722 
723 static int sendmsg_test(struct sockmap_options *opt)
724 {
725 	float sent_Bps = 0, recvd_Bps = 0;
726 	int rx_fd, txpid, rxpid, err = 0;
727 	struct msg_stats s = {0};
728 	int iov_count = opt->iov_count;
729 	int iov_buf = opt->iov_length;
730 	int rx_status, tx_status;
731 	int cnt = opt->rate;
732 
733 	errno = 0;
734 
735 	if (opt->base)
736 		rx_fd = p1;
737 	else
738 		rx_fd = p2;
739 
740 	if (ktls) {
741 		/* Redirecting into non-TLS socket which sends into a TLS
742 		 * socket is not a valid test. So in this case lets not
743 		 * enable kTLS but still run the test.
744 		 */
745 		if (!txmsg_redir || txmsg_ingress) {
746 			err = sockmap_init_ktls(opt->verbose, rx_fd);
747 			if (err)
748 				return err;
749 		}
750 		err = sockmap_init_ktls(opt->verbose, c1);
751 		if (err)
752 			return err;
753 	}
754 
755 	rxpid = fork();
756 	if (rxpid == 0) {
757 		if (txmsg_pop || txmsg_start_pop)
758 			iov_buf -= (txmsg_pop - txmsg_start_pop + 1);
759 		if (opt->drop_expected || txmsg_ktls_skb_drop)
760 			_exit(0);
761 
762 		if (!iov_buf) /* zero bytes sent case */
763 			_exit(0);
764 
765 		if (opt->sendpage)
766 			iov_count = 1;
767 		err = msg_loop(rx_fd, iov_count, iov_buf,
768 			       cnt, &s, false, opt);
769 		if (opt->verbose > 1)
770 			fprintf(stderr,
771 				"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
772 				iov_count, iov_buf, cnt, err);
773 		if (s.end.tv_sec - s.start.tv_sec) {
774 			sent_Bps = sentBps(s);
775 			recvd_Bps = recvdBps(s);
776 		}
777 		if (opt->verbose > 1)
778 			fprintf(stdout,
779 				"rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s %s\n",
780 				s.bytes_sent, sent_Bps, sent_Bps/giga,
781 				s.bytes_recvd, recvd_Bps, recvd_Bps/giga,
782 				peek_flag ? "(peek_msg)" : "");
783 		if (err && txmsg_cork)
784 			err = 0;
785 		exit(err ? 1 : 0);
786 	} else if (rxpid == -1) {
787 		perror("msg_loop_rx");
788 		return errno;
789 	}
790 
791 	txpid = fork();
792 	if (txpid == 0) {
793 		if (opt->sendpage)
794 			err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt);
795 		else
796 			err = msg_loop(c1, iov_count, iov_buf,
797 				       cnt, &s, true, opt);
798 
799 		if (err)
800 			fprintf(stderr,
801 				"msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
802 				iov_count, iov_buf, cnt, err);
803 		if (s.end.tv_sec - s.start.tv_sec) {
804 			sent_Bps = sentBps(s);
805 			recvd_Bps = recvdBps(s);
806 		}
807 		if (opt->verbose > 1)
808 			fprintf(stdout,
809 				"tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
810 				s.bytes_sent, sent_Bps, sent_Bps/giga,
811 				s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
812 		exit(err ? 1 : 0);
813 	} else if (txpid == -1) {
814 		perror("msg_loop_tx");
815 		return errno;
816 	}
817 
818 	assert(waitpid(rxpid, &rx_status, 0) == rxpid);
819 	assert(waitpid(txpid, &tx_status, 0) == txpid);
820 	if (WIFEXITED(rx_status)) {
821 		err = WEXITSTATUS(rx_status);
822 		if (err) {
823 			fprintf(stderr, "rx thread exited with err %d.\n", err);
824 			goto out;
825 		}
826 	}
827 	if (WIFEXITED(tx_status)) {
828 		err = WEXITSTATUS(tx_status);
829 		if (err)
830 			fprintf(stderr, "tx thread exited with err %d.\n", err);
831 	}
832 out:
833 	return err;
834 }
835 
836 static int forever_ping_pong(int rate, struct sockmap_options *opt)
837 {
838 	struct timeval timeout;
839 	char buf[1024] = {0};
840 	int sc;
841 
842 	timeout.tv_sec = 10;
843 	timeout.tv_usec = 0;
844 
845 	/* Ping/Pong data from client to server */
846 	sc = send(c1, buf, sizeof(buf), 0);
847 	if (sc < 0) {
848 		perror("send failed()");
849 		return sc;
850 	}
851 
852 	do {
853 		int s, rc, i, max_fd = p2;
854 		fd_set w;
855 
856 		/* FD sets */
857 		FD_ZERO(&w);
858 		FD_SET(c1, &w);
859 		FD_SET(c2, &w);
860 		FD_SET(p1, &w);
861 		FD_SET(p2, &w);
862 
863 		s = select(max_fd + 1, &w, NULL, NULL, &timeout);
864 		if (s == -1) {
865 			perror("select()");
866 			break;
867 		} else if (!s) {
868 			fprintf(stderr, "unexpected timeout\n");
869 			break;
870 		}
871 
872 		for (i = 0; i <= max_fd && s > 0; ++i) {
873 			if (!FD_ISSET(i, &w))
874 				continue;
875 
876 			s--;
877 
878 			rc = recv(i, buf, sizeof(buf), 0);
879 			if (rc < 0) {
880 				if (errno != EWOULDBLOCK) {
881 					perror("recv failed()");
882 					return rc;
883 				}
884 			}
885 
886 			if (rc == 0) {
887 				close(i);
888 				break;
889 			}
890 
891 			sc = send(i, buf, rc, 0);
892 			if (sc < 0) {
893 				perror("send failed()");
894 				return sc;
895 			}
896 		}
897 
898 		if (rate)
899 			sleep(rate);
900 
901 		if (opt->verbose) {
902 			printf(".");
903 			fflush(stdout);
904 
905 		}
906 	} while (running);
907 
908 	return 0;
909 }
910 
911 enum {
912 	SELFTESTS,
913 	PING_PONG,
914 	SENDMSG,
915 	BASE,
916 	BASE_SENDPAGE,
917 	SENDPAGE,
918 };
919 
920 static int run_options(struct sockmap_options *options, int cg_fd,  int test)
921 {
922 	int i, key, next_key, err, tx_prog_fd = -1, zero = 0;
923 
924 	/* If base test skip BPF setup */
925 	if (test == BASE || test == BASE_SENDPAGE)
926 		goto run;
927 
928 	/* Attach programs to sockmap */
929 	if (!txmsg_omit_skb_parser) {
930 		err = bpf_prog_attach(prog_fd[0], map_fd[0],
931 				      BPF_SK_SKB_STREAM_PARSER, 0);
932 		if (err) {
933 			fprintf(stderr,
934 				"ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
935 				prog_fd[0], map_fd[0], err, strerror(errno));
936 			return err;
937 		}
938 	}
939 
940 	err = bpf_prog_attach(prog_fd[1], map_fd[0],
941 				BPF_SK_SKB_STREAM_VERDICT, 0);
942 	if (err) {
943 		fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
944 			err, strerror(errno));
945 		return err;
946 	}
947 
948 	/* Attach programs to TLS sockmap */
949 	if (txmsg_ktls_skb) {
950 		if (!txmsg_omit_skb_parser) {
951 			err = bpf_prog_attach(prog_fd[0], map_fd[8],
952 					      BPF_SK_SKB_STREAM_PARSER, 0);
953 			if (err) {
954 				fprintf(stderr,
955 					"ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
956 					prog_fd[0], map_fd[8], err, strerror(errno));
957 				return err;
958 			}
959 		}
960 
961 		err = bpf_prog_attach(prog_fd[2], map_fd[8],
962 				      BPF_SK_SKB_STREAM_VERDICT, 0);
963 		if (err) {
964 			fprintf(stderr, "ERROR: bpf_prog_attach (TLS sockmap): %d (%s)\n",
965 				err, strerror(errno));
966 			return err;
967 		}
968 	}
969 
970 	/* Attach to cgroups */
971 	err = bpf_prog_attach(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
972 	if (err) {
973 		fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
974 			err, strerror(errno));
975 		return err;
976 	}
977 
978 run:
979 	err = sockmap_init_sockets(options->verbose);
980 	if (err) {
981 		fprintf(stderr, "ERROR: test socket failed: %d\n", err);
982 		goto out;
983 	}
984 
985 	/* Attach txmsg program to sockmap */
986 	if (txmsg_pass)
987 		tx_prog_fd = prog_fd[4];
988 	else if (txmsg_redir)
989 		tx_prog_fd = prog_fd[5];
990 	else if (txmsg_apply)
991 		tx_prog_fd = prog_fd[6];
992 	else if (txmsg_cork)
993 		tx_prog_fd = prog_fd[7];
994 	else if (txmsg_drop)
995 		tx_prog_fd = prog_fd[8];
996 	else
997 		tx_prog_fd = 0;
998 
999 	if (tx_prog_fd) {
1000 		int redir_fd, i = 0;
1001 
1002 		err = bpf_prog_attach(tx_prog_fd,
1003 				      map_fd[1], BPF_SK_MSG_VERDICT, 0);
1004 		if (err) {
1005 			fprintf(stderr,
1006 				"ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
1007 				err, strerror(errno));
1008 			goto out;
1009 		}
1010 
1011 		err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
1012 		if (err) {
1013 			fprintf(stderr,
1014 				"ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
1015 				err, strerror(errno));
1016 			goto out;
1017 		}
1018 
1019 		if (txmsg_redir)
1020 			redir_fd = c2;
1021 		else
1022 			redir_fd = c1;
1023 
1024 		err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY);
1025 		if (err) {
1026 			fprintf(stderr,
1027 				"ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
1028 				err, strerror(errno));
1029 			goto out;
1030 		}
1031 
1032 		if (txmsg_apply) {
1033 			err = bpf_map_update_elem(map_fd[3],
1034 						  &i, &txmsg_apply, BPF_ANY);
1035 			if (err) {
1036 				fprintf(stderr,
1037 					"ERROR: bpf_map_update_elem (apply_bytes):  %d (%s\n",
1038 					err, strerror(errno));
1039 				goto out;
1040 			}
1041 		}
1042 
1043 		if (txmsg_cork) {
1044 			err = bpf_map_update_elem(map_fd[4],
1045 						  &i, &txmsg_cork, BPF_ANY);
1046 			if (err) {
1047 				fprintf(stderr,
1048 					"ERROR: bpf_map_update_elem (cork_bytes):  %d (%s\n",
1049 					err, strerror(errno));
1050 				goto out;
1051 			}
1052 		}
1053 
1054 		if (txmsg_start) {
1055 			err = bpf_map_update_elem(map_fd[5],
1056 						  &i, &txmsg_start, BPF_ANY);
1057 			if (err) {
1058 				fprintf(stderr,
1059 					"ERROR: bpf_map_update_elem (txmsg_start):  %d (%s)\n",
1060 					err, strerror(errno));
1061 				goto out;
1062 			}
1063 		}
1064 
1065 		if (txmsg_end) {
1066 			i = 1;
1067 			err = bpf_map_update_elem(map_fd[5],
1068 						  &i, &txmsg_end, BPF_ANY);
1069 			if (err) {
1070 				fprintf(stderr,
1071 					"ERROR: bpf_map_update_elem (txmsg_end):  %d (%s)\n",
1072 					err, strerror(errno));
1073 				goto out;
1074 			}
1075 		}
1076 
1077 		if (txmsg_start_push) {
1078 			i = 2;
1079 			err = bpf_map_update_elem(map_fd[5],
1080 						  &i, &txmsg_start_push, BPF_ANY);
1081 			if (err) {
1082 				fprintf(stderr,
1083 					"ERROR: bpf_map_update_elem (txmsg_start_push):  %d (%s)\n",
1084 					err, strerror(errno));
1085 				goto out;
1086 			}
1087 		}
1088 
1089 		if (txmsg_end_push) {
1090 			i = 3;
1091 			err = bpf_map_update_elem(map_fd[5],
1092 						  &i, &txmsg_end_push, BPF_ANY);
1093 			if (err) {
1094 				fprintf(stderr,
1095 					"ERROR: bpf_map_update_elem %i@%i (txmsg_end_push):  %d (%s)\n",
1096 					txmsg_end_push, i, err, strerror(errno));
1097 				goto out;
1098 			}
1099 		}
1100 
1101 		if (txmsg_start_pop) {
1102 			i = 4;
1103 			err = bpf_map_update_elem(map_fd[5],
1104 						  &i, &txmsg_start_pop, BPF_ANY);
1105 			if (err) {
1106 				fprintf(stderr,
1107 					"ERROR: bpf_map_update_elem %i@%i (txmsg_start_pop):  %d (%s)\n",
1108 					txmsg_start_pop, i, err, strerror(errno));
1109 				goto out;
1110 			}
1111 		} else {
1112 			i = 4;
1113 			bpf_map_update_elem(map_fd[5],
1114 						  &i, &txmsg_start_pop, BPF_ANY);
1115 		}
1116 
1117 		if (txmsg_pop) {
1118 			i = 5;
1119 			err = bpf_map_update_elem(map_fd[5],
1120 						  &i, &txmsg_pop, BPF_ANY);
1121 			if (err) {
1122 				fprintf(stderr,
1123 					"ERROR: bpf_map_update_elem %i@%i (txmsg_pop):  %d (%s)\n",
1124 					txmsg_pop, i, err, strerror(errno));
1125 				goto out;
1126 			}
1127 		} else {
1128 			i = 5;
1129 			bpf_map_update_elem(map_fd[5],
1130 					    &i, &txmsg_pop, BPF_ANY);
1131 
1132 		}
1133 
1134 		if (txmsg_ingress) {
1135 			int in = BPF_F_INGRESS;
1136 
1137 			i = 0;
1138 			err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
1139 			if (err) {
1140 				fprintf(stderr,
1141 					"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
1142 					err, strerror(errno));
1143 			}
1144 			i = 1;
1145 			err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
1146 			if (err) {
1147 				fprintf(stderr,
1148 					"ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
1149 					err, strerror(errno));
1150 			}
1151 			err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
1152 			if (err) {
1153 				fprintf(stderr,
1154 					"ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
1155 					err, strerror(errno));
1156 			}
1157 
1158 			i = 2;
1159 			err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
1160 			if (err) {
1161 				fprintf(stderr,
1162 					"ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
1163 					err, strerror(errno));
1164 			}
1165 		}
1166 
1167 		if (txmsg_ktls_skb) {
1168 			int ingress = BPF_F_INGRESS;
1169 
1170 			i = 0;
1171 			err = bpf_map_update_elem(map_fd[8], &i, &p2, BPF_ANY);
1172 			if (err) {
1173 				fprintf(stderr,
1174 					"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
1175 					err, strerror(errno));
1176 			}
1177 
1178 			if (txmsg_ktls_skb_redir) {
1179 				i = 1;
1180 				err = bpf_map_update_elem(map_fd[7],
1181 							  &i, &ingress, BPF_ANY);
1182 				if (err) {
1183 					fprintf(stderr,
1184 						"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
1185 						err, strerror(errno));
1186 				}
1187 			}
1188 
1189 			if (txmsg_ktls_skb_drop) {
1190 				i = 1;
1191 				err = bpf_map_update_elem(map_fd[7], &i, &i, BPF_ANY);
1192 			}
1193 		}
1194 
1195 		if (txmsg_redir_skb) {
1196 			int skb_fd = (test == SENDMSG || test == SENDPAGE) ?
1197 					p2 : p1;
1198 			int ingress = BPF_F_INGRESS;
1199 
1200 			i = 0;
1201 			err = bpf_map_update_elem(map_fd[7],
1202 						  &i, &ingress, BPF_ANY);
1203 			if (err) {
1204 				fprintf(stderr,
1205 					"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
1206 					err, strerror(errno));
1207 			}
1208 
1209 			i = 3;
1210 			err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
1211 			if (err) {
1212 				fprintf(stderr,
1213 					"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
1214 					err, strerror(errno));
1215 			}
1216 		}
1217 	}
1218 
1219 	if (skb_use_parser) {
1220 		i = 2;
1221 		err = bpf_map_update_elem(map_fd[7], &i, &skb_use_parser, BPF_ANY);
1222 	}
1223 
1224 	if (txmsg_drop)
1225 		options->drop_expected = true;
1226 
1227 	if (test == PING_PONG)
1228 		err = forever_ping_pong(options->rate, options);
1229 	else if (test == SENDMSG) {
1230 		options->base = false;
1231 		options->sendpage = false;
1232 		err = sendmsg_test(options);
1233 	} else if (test == SENDPAGE) {
1234 		options->base = false;
1235 		options->sendpage = true;
1236 		err = sendmsg_test(options);
1237 	} else if (test == BASE) {
1238 		options->base = true;
1239 		options->sendpage = false;
1240 		err = sendmsg_test(options);
1241 	} else if (test == BASE_SENDPAGE) {
1242 		options->base = true;
1243 		options->sendpage = true;
1244 		err = sendmsg_test(options);
1245 	} else
1246 		fprintf(stderr, "unknown test\n");
1247 out:
1248 	/* Detatch and zero all the maps */
1249 	bpf_prog_detach2(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS);
1250 	bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
1251 	bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
1252 	bpf_prog_detach2(prog_fd[0], map_fd[8], BPF_SK_SKB_STREAM_PARSER);
1253 	bpf_prog_detach2(prog_fd[2], map_fd[8], BPF_SK_SKB_STREAM_VERDICT);
1254 
1255 	if (tx_prog_fd >= 0)
1256 		bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
1257 
1258 	for (i = 0; i < 8; i++) {
1259 		key = next_key = 0;
1260 		bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
1261 		while (bpf_map_get_next_key(map_fd[i], &key, &next_key) == 0) {
1262 			bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
1263 			key = next_key;
1264 		}
1265 	}
1266 
1267 	close(s1);
1268 	close(s2);
1269 	close(p1);
1270 	close(p2);
1271 	close(c1);
1272 	close(c2);
1273 	return err;
1274 }
1275 
1276 static char *test_to_str(int test)
1277 {
1278 	switch (test) {
1279 	case SENDMSG:
1280 		return "sendmsg";
1281 	case SENDPAGE:
1282 		return "sendpage";
1283 	}
1284 	return "unknown";
1285 }
1286 
1287 static void append_str(char *dst, const char *src, size_t dst_cap)
1288 {
1289 	size_t avail = dst_cap - strlen(dst);
1290 
1291 	if (avail <= 1) /* just zero byte could be written */
1292 		return;
1293 
1294 	strncat(dst, src, avail - 1); /* strncat() adds + 1 for zero byte */
1295 }
1296 
1297 #define OPTSTRING 60
1298 static void test_options(char *options)
1299 {
1300 	char tstr[OPTSTRING];
1301 
1302 	memset(options, 0, OPTSTRING);
1303 
1304 	if (txmsg_pass)
1305 		append_str(options, "pass,", OPTSTRING);
1306 	if (txmsg_redir)
1307 		append_str(options, "redir,", OPTSTRING);
1308 	if (txmsg_drop)
1309 		append_str(options, "drop,", OPTSTRING);
1310 	if (txmsg_apply) {
1311 		snprintf(tstr, OPTSTRING, "apply %d,", txmsg_apply);
1312 		append_str(options, tstr, OPTSTRING);
1313 	}
1314 	if (txmsg_cork) {
1315 		snprintf(tstr, OPTSTRING, "cork %d,", txmsg_cork);
1316 		append_str(options, tstr, OPTSTRING);
1317 	}
1318 	if (txmsg_start) {
1319 		snprintf(tstr, OPTSTRING, "start %d,", txmsg_start);
1320 		append_str(options, tstr, OPTSTRING);
1321 	}
1322 	if (txmsg_end) {
1323 		snprintf(tstr, OPTSTRING, "end %d,", txmsg_end);
1324 		append_str(options, tstr, OPTSTRING);
1325 	}
1326 	if (txmsg_start_pop) {
1327 		snprintf(tstr, OPTSTRING, "pop (%d,%d),",
1328 			 txmsg_start_pop, txmsg_start_pop + txmsg_pop);
1329 		append_str(options, tstr, OPTSTRING);
1330 	}
1331 	if (txmsg_ingress)
1332 		append_str(options, "ingress,", OPTSTRING);
1333 	if (txmsg_redir_skb)
1334 		append_str(options, "redir_skb,", OPTSTRING);
1335 	if (txmsg_ktls_skb)
1336 		append_str(options, "ktls_skb,", OPTSTRING);
1337 	if (ktls)
1338 		append_str(options, "ktls,", OPTSTRING);
1339 	if (peek_flag)
1340 		append_str(options, "peek,", OPTSTRING);
1341 }
1342 
1343 static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
1344 {
1345 	char *options = calloc(OPTSTRING, sizeof(char));
1346 	int err;
1347 
1348 	if (test == SENDPAGE)
1349 		opt->sendpage = true;
1350 	else
1351 		opt->sendpage = false;
1352 
1353 	if (txmsg_drop)
1354 		opt->drop_expected = true;
1355 	else
1356 		opt->drop_expected = false;
1357 
1358 	test_options(options);
1359 
1360 	if (opt->verbose) {
1361 		fprintf(stdout,
1362 			" [TEST %i]: (%i, %i, %i, %s, %s): ",
1363 			test_cnt, opt->rate, opt->iov_count, opt->iov_length,
1364 			test_to_str(test), options);
1365 		fflush(stdout);
1366 	}
1367 	err = run_options(opt, cgrp, test);
1368 	if (opt->verbose)
1369 		fprintf(stdout, " %s\n", !err ? "PASS" : "FAILED");
1370 	test_cnt++;
1371 	!err ? passed++ : failed++;
1372 	free(options);
1373 	return err;
1374 }
1375 
1376 static void test_exec(int cgrp, struct sockmap_options *opt)
1377 {
1378 	int type = strcmp(opt->map, BPF_SOCKMAP_FILENAME);
1379 	int err;
1380 
1381 	if (type == 0) {
1382 		test_start();
1383 		err = __test_exec(cgrp, SENDMSG, opt);
1384 		if (err)
1385 			test_fail();
1386 	} else {
1387 		test_start();
1388 		err = __test_exec(cgrp, SENDPAGE, opt);
1389 		if (err)
1390 			test_fail();
1391 	}
1392 }
1393 
1394 static void test_send_one(struct sockmap_options *opt, int cgrp)
1395 {
1396 	opt->iov_length = 1;
1397 	opt->iov_count = 1;
1398 	opt->rate = 1;
1399 	test_exec(cgrp, opt);
1400 
1401 	opt->iov_length = 1;
1402 	opt->iov_count = 1024;
1403 	opt->rate = 1;
1404 	test_exec(cgrp, opt);
1405 
1406 	opt->iov_length = 1024;
1407 	opt->iov_count = 1;
1408 	opt->rate = 1;
1409 	test_exec(cgrp, opt);
1410 
1411 }
1412 
1413 static void test_send_many(struct sockmap_options *opt, int cgrp)
1414 {
1415 	opt->iov_length = 3;
1416 	opt->iov_count = 1;
1417 	opt->rate = 512;
1418 	test_exec(cgrp, opt);
1419 
1420 	opt->rate = 100;
1421 	opt->iov_count = 1;
1422 	opt->iov_length = 5;
1423 	test_exec(cgrp, opt);
1424 }
1425 
1426 static void test_send_large(struct sockmap_options *opt, int cgrp)
1427 {
1428 	opt->iov_length = 256;
1429 	opt->iov_count = 1024;
1430 	opt->rate = 2;
1431 	test_exec(cgrp, opt);
1432 }
1433 
1434 static void test_send(struct sockmap_options *opt, int cgrp)
1435 {
1436 	test_send_one(opt, cgrp);
1437 	test_send_many(opt, cgrp);
1438 	test_send_large(opt, cgrp);
1439 	sched_yield();
1440 }
1441 
1442 static void test_txmsg_pass(int cgrp, struct sockmap_options *opt)
1443 {
1444 	/* Test small and large iov_count values with pass/redir/apply/cork */
1445 	txmsg_pass = 1;
1446 	test_send(opt, cgrp);
1447 }
1448 
1449 static void test_txmsg_redir(int cgrp, struct sockmap_options *opt)
1450 {
1451 	txmsg_redir = 1;
1452 	test_send(opt, cgrp);
1453 }
1454 
1455 static void test_txmsg_drop(int cgrp, struct sockmap_options *opt)
1456 {
1457 	txmsg_drop = 1;
1458 	test_send(opt, cgrp);
1459 }
1460 
1461 static void test_txmsg_ingress_redir(int cgrp, struct sockmap_options *opt)
1462 {
1463 	txmsg_pass = txmsg_drop = 0;
1464 	txmsg_ingress = txmsg_redir = 1;
1465 	test_send(opt, cgrp);
1466 }
1467 
1468 static void test_txmsg_skb(int cgrp, struct sockmap_options *opt)
1469 {
1470 	bool data = opt->data_test;
1471 	int k = ktls;
1472 
1473 	opt->data_test = true;
1474 	ktls = 1;
1475 
1476 	txmsg_pass = txmsg_drop = 0;
1477 	txmsg_ingress = txmsg_redir = 0;
1478 	txmsg_ktls_skb = 1;
1479 	txmsg_pass = 1;
1480 
1481 	/* Using data verification so ensure iov layout is
1482 	 * expected from test receiver side. e.g. has enough
1483 	 * bytes to write test code.
1484 	 */
1485 	opt->iov_length = 100;
1486 	opt->iov_count = 1;
1487 	opt->rate = 1;
1488 	test_exec(cgrp, opt);
1489 
1490 	txmsg_ktls_skb_drop = 1;
1491 	test_exec(cgrp, opt);
1492 
1493 	txmsg_ktls_skb_drop = 0;
1494 	txmsg_ktls_skb_redir = 1;
1495 	test_exec(cgrp, opt);
1496 	txmsg_ktls_skb_redir = 0;
1497 
1498 	/* Tests that omit skb_parser */
1499 	txmsg_omit_skb_parser = 1;
1500 	ktls = 0;
1501 	txmsg_ktls_skb = 0;
1502 	test_exec(cgrp, opt);
1503 
1504 	txmsg_ktls_skb_drop = 1;
1505 	test_exec(cgrp, opt);
1506 	txmsg_ktls_skb_drop = 0;
1507 
1508 	txmsg_ktls_skb_redir = 1;
1509 	test_exec(cgrp, opt);
1510 
1511 	ktls = 1;
1512 	test_exec(cgrp, opt);
1513 	txmsg_omit_skb_parser = 0;
1514 
1515 	opt->data_test = data;
1516 	ktls = k;
1517 }
1518 
1519 /* Test cork with hung data. This tests poor usage patterns where
1520  * cork can leave data on the ring if user program is buggy and
1521  * doesn't flush them somehow. They do take some time however
1522  * because they wait for a timeout. Test pass, redir and cork with
1523  * apply logic. Use cork size of 4097 with send_large to avoid
1524  * aligning cork size with send size.
1525  */
1526 static void test_txmsg_cork_hangs(int cgrp, struct sockmap_options *opt)
1527 {
1528 	txmsg_pass = 1;
1529 	txmsg_redir = 0;
1530 	txmsg_cork = 4097;
1531 	txmsg_apply = 4097;
1532 	test_send_large(opt, cgrp);
1533 
1534 	txmsg_pass = 0;
1535 	txmsg_redir = 1;
1536 	txmsg_apply = 0;
1537 	txmsg_cork = 4097;
1538 	test_send_large(opt, cgrp);
1539 
1540 	txmsg_pass = 0;
1541 	txmsg_redir = 1;
1542 	txmsg_apply = 4097;
1543 	txmsg_cork = 4097;
1544 	test_send_large(opt, cgrp);
1545 }
1546 
1547 static void test_txmsg_pull(int cgrp, struct sockmap_options *opt)
1548 {
1549 	/* Test basic start/end */
1550 	txmsg_start = 1;
1551 	txmsg_end = 2;
1552 	test_send(opt, cgrp);
1553 
1554 	/* Test >4k pull */
1555 	txmsg_start = 4096;
1556 	txmsg_end = 9182;
1557 	test_send_large(opt, cgrp);
1558 
1559 	/* Test pull + redirect */
1560 	txmsg_redir = 0;
1561 	txmsg_start = 1;
1562 	txmsg_end = 2;
1563 	test_send(opt, cgrp);
1564 
1565 	/* Test pull + cork */
1566 	txmsg_redir = 0;
1567 	txmsg_cork = 512;
1568 	txmsg_start = 1;
1569 	txmsg_end = 2;
1570 	test_send_many(opt, cgrp);
1571 
1572 	/* Test pull + cork + redirect */
1573 	txmsg_redir = 1;
1574 	txmsg_cork = 512;
1575 	txmsg_start = 1;
1576 	txmsg_end = 2;
1577 	test_send_many(opt, cgrp);
1578 }
1579 
1580 static void test_txmsg_pop(int cgrp, struct sockmap_options *opt)
1581 {
1582 	/* Test basic pop */
1583 	txmsg_start_pop = 1;
1584 	txmsg_pop = 2;
1585 	test_send_many(opt, cgrp);
1586 
1587 	/* Test pop with >4k */
1588 	txmsg_start_pop = 4096;
1589 	txmsg_pop = 4096;
1590 	test_send_large(opt, cgrp);
1591 
1592 	/* Test pop + redirect */
1593 	txmsg_redir = 1;
1594 	txmsg_start_pop = 1;
1595 	txmsg_pop = 2;
1596 	test_send_many(opt, cgrp);
1597 
1598 	/* Test pop + cork */
1599 	txmsg_redir = 0;
1600 	txmsg_cork = 512;
1601 	txmsg_start_pop = 1;
1602 	txmsg_pop = 2;
1603 	test_send_many(opt, cgrp);
1604 
1605 	/* Test pop + redirect + cork */
1606 	txmsg_redir = 1;
1607 	txmsg_cork = 4;
1608 	txmsg_start_pop = 1;
1609 	txmsg_pop = 2;
1610 	test_send_many(opt, cgrp);
1611 }
1612 
1613 static void test_txmsg_push(int cgrp, struct sockmap_options *opt)
1614 {
1615 	/* Test basic push */
1616 	txmsg_start_push = 1;
1617 	txmsg_end_push = 1;
1618 	test_send(opt, cgrp);
1619 
1620 	/* Test push 4kB >4k */
1621 	txmsg_start_push = 4096;
1622 	txmsg_end_push = 4096;
1623 	test_send_large(opt, cgrp);
1624 
1625 	/* Test push + redirect */
1626 	txmsg_redir = 1;
1627 	txmsg_start_push = 1;
1628 	txmsg_end_push = 2;
1629 	test_send_many(opt, cgrp);
1630 
1631 	/* Test push + cork */
1632 	txmsg_redir = 0;
1633 	txmsg_cork = 512;
1634 	txmsg_start_push = 1;
1635 	txmsg_end_push = 2;
1636 	test_send_many(opt, cgrp);
1637 }
1638 
1639 static void test_txmsg_push_pop(int cgrp, struct sockmap_options *opt)
1640 {
1641 	txmsg_start_push = 1;
1642 	txmsg_end_push = 10;
1643 	txmsg_start_pop = 5;
1644 	txmsg_pop = 4;
1645 	test_send_large(opt, cgrp);
1646 }
1647 
1648 static void test_txmsg_apply(int cgrp, struct sockmap_options *opt)
1649 {
1650 	txmsg_pass = 1;
1651 	txmsg_redir = 0;
1652 	txmsg_apply = 1;
1653 	txmsg_cork = 0;
1654 	test_send_one(opt, cgrp);
1655 
1656 	txmsg_pass = 0;
1657 	txmsg_redir = 1;
1658 	txmsg_apply = 1;
1659 	txmsg_cork = 0;
1660 	test_send_one(opt, cgrp);
1661 
1662 	txmsg_pass = 1;
1663 	txmsg_redir = 0;
1664 	txmsg_apply = 1024;
1665 	txmsg_cork = 0;
1666 	test_send_large(opt, cgrp);
1667 
1668 	txmsg_pass = 0;
1669 	txmsg_redir = 1;
1670 	txmsg_apply = 1024;
1671 	txmsg_cork = 0;
1672 	test_send_large(opt, cgrp);
1673 }
1674 
1675 static void test_txmsg_cork(int cgrp, struct sockmap_options *opt)
1676 {
1677 	txmsg_pass = 1;
1678 	txmsg_redir = 0;
1679 	txmsg_apply = 0;
1680 	txmsg_cork = 1;
1681 	test_send(opt, cgrp);
1682 
1683 	txmsg_pass = 1;
1684 	txmsg_redir = 0;
1685 	txmsg_apply = 1;
1686 	txmsg_cork = 1;
1687 	test_send(opt, cgrp);
1688 }
1689 
1690 static void test_txmsg_ingress_parser(int cgrp, struct sockmap_options *opt)
1691 {
1692 	txmsg_pass = 1;
1693 	skb_use_parser = 512;
1694 	if (ktls == 1)
1695 		skb_use_parser = 570;
1696 	opt->iov_length = 256;
1697 	opt->iov_count = 1;
1698 	opt->rate = 2;
1699 	test_exec(cgrp, opt);
1700 }
1701 
1702 static void test_txmsg_ingress_parser2(int cgrp, struct sockmap_options *opt)
1703 {
1704 	if (ktls == 1)
1705 		return;
1706 	skb_use_parser = 10;
1707 	opt->iov_length = 20;
1708 	opt->iov_count = 1;
1709 	opt->rate = 1;
1710 	opt->check_recved_len = true;
1711 	test_exec(cgrp, opt);
1712 	opt->check_recved_len = false;
1713 }
1714 
1715 char *map_names[] = {
1716 	"sock_map",
1717 	"sock_map_txmsg",
1718 	"sock_map_redir",
1719 	"sock_apply_bytes",
1720 	"sock_cork_bytes",
1721 	"sock_bytes",
1722 	"sock_redir_flags",
1723 	"sock_skb_opts",
1724 	"tls_sock_map",
1725 };
1726 
1727 int prog_attach_type[] = {
1728 	BPF_SK_SKB_STREAM_PARSER,
1729 	BPF_SK_SKB_STREAM_VERDICT,
1730 	BPF_SK_SKB_STREAM_VERDICT,
1731 	BPF_CGROUP_SOCK_OPS,
1732 	BPF_SK_MSG_VERDICT,
1733 	BPF_SK_MSG_VERDICT,
1734 	BPF_SK_MSG_VERDICT,
1735 	BPF_SK_MSG_VERDICT,
1736 	BPF_SK_MSG_VERDICT,
1737 	BPF_SK_MSG_VERDICT,
1738 	BPF_SK_MSG_VERDICT,
1739 };
1740 
1741 int prog_type[] = {
1742 	BPF_PROG_TYPE_SK_SKB,
1743 	BPF_PROG_TYPE_SK_SKB,
1744 	BPF_PROG_TYPE_SK_SKB,
1745 	BPF_PROG_TYPE_SOCK_OPS,
1746 	BPF_PROG_TYPE_SK_MSG,
1747 	BPF_PROG_TYPE_SK_MSG,
1748 	BPF_PROG_TYPE_SK_MSG,
1749 	BPF_PROG_TYPE_SK_MSG,
1750 	BPF_PROG_TYPE_SK_MSG,
1751 	BPF_PROG_TYPE_SK_MSG,
1752 	BPF_PROG_TYPE_SK_MSG,
1753 };
1754 
1755 static int populate_progs(char *bpf_file)
1756 {
1757 	struct bpf_program *prog;
1758 	struct bpf_object *obj;
1759 	int i = 0;
1760 	long err;
1761 
1762 	obj = bpf_object__open(bpf_file);
1763 	err = libbpf_get_error(obj);
1764 	if (err) {
1765 		char err_buf[256];
1766 
1767 		libbpf_strerror(err, err_buf, sizeof(err_buf));
1768 		printf("Unable to load eBPF objects in file '%s' : %s\n",
1769 		       bpf_file, err_buf);
1770 		return -1;
1771 	}
1772 
1773 	bpf_object__for_each_program(prog, obj) {
1774 		bpf_program__set_type(prog, prog_type[i]);
1775 		bpf_program__set_expected_attach_type(prog,
1776 						      prog_attach_type[i]);
1777 		i++;
1778 	}
1779 
1780 	i = bpf_object__load(obj);
1781 	i = 0;
1782 	bpf_object__for_each_program(prog, obj) {
1783 		prog_fd[i] = bpf_program__fd(prog);
1784 		i++;
1785 	}
1786 
1787 	for (i = 0; i < ARRAY_SIZE(map_fd); i++) {
1788 		maps[i] = bpf_object__find_map_by_name(obj, map_names[i]);
1789 		map_fd[i] = bpf_map__fd(maps[i]);
1790 		if (map_fd[i] < 0) {
1791 			fprintf(stderr, "load_bpf_file: (%i) %s\n",
1792 				map_fd[i], strerror(errno));
1793 			return -1;
1794 		}
1795 	}
1796 
1797 	return 0;
1798 }
1799 
1800 struct _test test[] = {
1801 	{"txmsg test passthrough", test_txmsg_pass},
1802 	{"txmsg test redirect", test_txmsg_redir},
1803 	{"txmsg test drop", test_txmsg_drop},
1804 	{"txmsg test ingress redirect", test_txmsg_ingress_redir},
1805 	{"txmsg test skb", test_txmsg_skb},
1806 	{"txmsg test apply", test_txmsg_apply},
1807 	{"txmsg test cork", test_txmsg_cork},
1808 	{"txmsg test hanging corks", test_txmsg_cork_hangs},
1809 	{"txmsg test push_data", test_txmsg_push},
1810 	{"txmsg test pull-data", test_txmsg_pull},
1811 	{"txmsg test pop-data", test_txmsg_pop},
1812 	{"txmsg test push/pop data", test_txmsg_push_pop},
1813 	{"txmsg test ingress parser", test_txmsg_ingress_parser},
1814 	{"txmsg test ingress parser2", test_txmsg_ingress_parser2},
1815 };
1816 
1817 static int check_whitelist(struct _test *t, struct sockmap_options *opt)
1818 {
1819 	char *entry, *ptr;
1820 
1821 	if (!opt->whitelist)
1822 		return 0;
1823 	ptr = strdup(opt->whitelist);
1824 	if (!ptr)
1825 		return -ENOMEM;
1826 	entry = strtok(ptr, ",");
1827 	while (entry) {
1828 		if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
1829 		    strstr(opt->map, entry) != 0 ||
1830 		    strstr(t->title, entry) != 0)
1831 			return 0;
1832 		entry = strtok(NULL, ",");
1833 	}
1834 	return -EINVAL;
1835 }
1836 
1837 static int check_blacklist(struct _test *t, struct sockmap_options *opt)
1838 {
1839 	char *entry, *ptr;
1840 
1841 	if (!opt->blacklist)
1842 		return -EINVAL;
1843 	ptr = strdup(opt->blacklist);
1844 	if (!ptr)
1845 		return -ENOMEM;
1846 	entry = strtok(ptr, ",");
1847 	while (entry) {
1848 		if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
1849 		    strstr(opt->map, entry) != 0 ||
1850 		    strstr(t->title, entry) != 0)
1851 			return 0;
1852 		entry = strtok(NULL, ",");
1853 	}
1854 	return -EINVAL;
1855 }
1856 
1857 static int __test_selftests(int cg_fd, struct sockmap_options *opt)
1858 {
1859 	int i, err;
1860 
1861 	err = populate_progs(opt->map);
1862 	if (err < 0) {
1863 		fprintf(stderr, "ERROR: (%i) load bpf failed\n", err);
1864 		return err;
1865 	}
1866 
1867 	/* Tests basic commands and APIs */
1868 	for (i = 0; i < ARRAY_SIZE(test); i++) {
1869 		struct _test t = test[i];
1870 
1871 		if (check_whitelist(&t, opt) != 0)
1872 			continue;
1873 		if (check_blacklist(&t, opt) == 0)
1874 			continue;
1875 
1876 		test_start_subtest(&t, opt);
1877 		t.tester(cg_fd, opt);
1878 		test_end_subtest();
1879 	}
1880 
1881 	return err;
1882 }
1883 
1884 static void test_selftests_sockmap(int cg_fd, struct sockmap_options *opt)
1885 {
1886 	opt->map = BPF_SOCKMAP_FILENAME;
1887 	__test_selftests(cg_fd, opt);
1888 }
1889 
1890 static void test_selftests_sockhash(int cg_fd, struct sockmap_options *opt)
1891 {
1892 	opt->map = BPF_SOCKHASH_FILENAME;
1893 	__test_selftests(cg_fd, opt);
1894 }
1895 
1896 static void test_selftests_ktls(int cg_fd, struct sockmap_options *opt)
1897 {
1898 	opt->map = BPF_SOCKHASH_FILENAME;
1899 	opt->prepend = "ktls";
1900 	ktls = 1;
1901 	__test_selftests(cg_fd, opt);
1902 	ktls = 0;
1903 }
1904 
1905 static int test_selftest(int cg_fd, struct sockmap_options *opt)
1906 {
1907 
1908 	test_selftests_sockmap(cg_fd, opt);
1909 	test_selftests_sockhash(cg_fd, opt);
1910 	test_selftests_ktls(cg_fd, opt);
1911 	test_print_results();
1912 	return 0;
1913 }
1914 
1915 int main(int argc, char **argv)
1916 {
1917 	int iov_count = 1, length = 1024, rate = 1;
1918 	struct sockmap_options options = {0};
1919 	int opt, longindex, err, cg_fd = 0;
1920 	char *bpf_file = BPF_SOCKMAP_FILENAME;
1921 	int test = SELFTESTS;
1922 	bool cg_created = 0;
1923 
1924 	while ((opt = getopt_long(argc, argv, ":dhv:c:r:i:l:t:p:q:n:b:",
1925 				  long_options, &longindex)) != -1) {
1926 		switch (opt) {
1927 		case 's':
1928 			txmsg_start = atoi(optarg);
1929 			break;
1930 		case 'e':
1931 			txmsg_end = atoi(optarg);
1932 			break;
1933 		case 'p':
1934 			txmsg_start_push = atoi(optarg);
1935 			break;
1936 		case 'q':
1937 			txmsg_end_push = atoi(optarg);
1938 			break;
1939 		case 'w':
1940 			txmsg_start_pop = atoi(optarg);
1941 			break;
1942 		case 'x':
1943 			txmsg_pop = atoi(optarg);
1944 			break;
1945 		case 'a':
1946 			txmsg_apply = atoi(optarg);
1947 			break;
1948 		case 'k':
1949 			txmsg_cork = atoi(optarg);
1950 			break;
1951 		case 'c':
1952 			cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
1953 			if (cg_fd < 0) {
1954 				fprintf(stderr,
1955 					"ERROR: (%i) open cg path failed: %s\n",
1956 					cg_fd, optarg);
1957 				return cg_fd;
1958 			}
1959 			break;
1960 		case 'r':
1961 			rate = atoi(optarg);
1962 			break;
1963 		case 'v':
1964 			options.verbose = 1;
1965 			if (optarg)
1966 				options.verbose = atoi(optarg);
1967 			break;
1968 		case 'i':
1969 			iov_count = atoi(optarg);
1970 			break;
1971 		case 'l':
1972 			length = atoi(optarg);
1973 			break;
1974 		case 'd':
1975 			options.data_test = true;
1976 			break;
1977 		case 't':
1978 			if (strcmp(optarg, "ping") == 0) {
1979 				test = PING_PONG;
1980 			} else if (strcmp(optarg, "sendmsg") == 0) {
1981 				test = SENDMSG;
1982 			} else if (strcmp(optarg, "base") == 0) {
1983 				test = BASE;
1984 			} else if (strcmp(optarg, "base_sendpage") == 0) {
1985 				test = BASE_SENDPAGE;
1986 			} else if (strcmp(optarg, "sendpage") == 0) {
1987 				test = SENDPAGE;
1988 			} else {
1989 				usage(argv);
1990 				return -1;
1991 			}
1992 			break;
1993 		case 'n':
1994 			options.whitelist = strdup(optarg);
1995 			if (!options.whitelist)
1996 				return -ENOMEM;
1997 			break;
1998 		case 'b':
1999 			options.blacklist = strdup(optarg);
2000 			if (!options.blacklist)
2001 				return -ENOMEM;
2002 		case 0:
2003 			break;
2004 		case 'h':
2005 		default:
2006 			usage(argv);
2007 			return -1;
2008 		}
2009 	}
2010 
2011 	if (!cg_fd) {
2012 		cg_fd = cgroup_setup_and_join(CG_PATH);
2013 		if (cg_fd < 0)
2014 			return cg_fd;
2015 		cg_created = 1;
2016 	}
2017 
2018 	/* Use libbpf 1.0 API mode */
2019 	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
2020 
2021 	if (test == SELFTESTS) {
2022 		err = test_selftest(cg_fd, &options);
2023 		goto out;
2024 	}
2025 
2026 	err = populate_progs(bpf_file);
2027 	if (err) {
2028 		fprintf(stderr, "populate program: (%s) %s\n",
2029 			bpf_file, strerror(errno));
2030 		return 1;
2031 	}
2032 	running = 1;
2033 
2034 	/* catch SIGINT */
2035 	signal(SIGINT, running_handler);
2036 
2037 	options.iov_count = iov_count;
2038 	options.iov_length = length;
2039 	options.rate = rate;
2040 
2041 	err = run_options(&options, cg_fd, test);
2042 out:
2043 	if (options.whitelist)
2044 		free(options.whitelist);
2045 	if (options.blacklist)
2046 		free(options.blacklist);
2047 	if (cg_created)
2048 		cleanup_cgroup_environment();
2049 	close(cg_fd);
2050 	return err;
2051 }
2052 
2053 void running_handler(int a)
2054 {
2055 	running = 0;
2056 }
2057