1 // SPDX-License-Identifier: GPL-2.0
2 
3 #define _GNU_SOURCE
4 
5 #include <stddef.h>
6 #include <arpa/inet.h>
7 #include <error.h>
8 #include <errno.h>
9 #include <net/if.h>
10 #include <linux/in.h>
11 #include <linux/netlink.h>
12 #include <linux/rtnetlink.h>
13 #include <netinet/if_ether.h>
14 #include <netinet/ip.h>
15 #include <netinet/ip6.h>
16 #include <netinet/udp.h>
17 #include <stdbool.h>
18 #include <stdlib.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <sys/ioctl.h>
23 #include <sys/socket.h>
24 #include <sys/stat.h>
25 #include <sys/time.h>
26 #include <sys/types.h>
27 #include <unistd.h>
28 
29 #ifndef ETH_MAX_MTU
30 #define ETH_MAX_MTU	0xFFFFU
31 #endif
32 
33 #ifndef UDP_SEGMENT
34 #define UDP_SEGMENT		103
35 #endif
36 
37 #define CONST_MTU_TEST	1500
38 
39 #define CONST_HDRLEN_V4		(sizeof(struct iphdr) + sizeof(struct udphdr))
40 #define CONST_HDRLEN_V6		(sizeof(struct ip6_hdr) + sizeof(struct udphdr))
41 
42 #define CONST_MSS_V4		(CONST_MTU_TEST - CONST_HDRLEN_V4)
43 #define CONST_MSS_V6		(CONST_MTU_TEST - CONST_HDRLEN_V6)
44 
45 #define CONST_MAX_SEGS_V4	(ETH_MAX_MTU / CONST_MSS_V4)
46 #define CONST_MAX_SEGS_V6	(ETH_MAX_MTU / CONST_MSS_V6)
47 
48 static bool		cfg_do_ipv4;
49 static bool		cfg_do_ipv6;
50 static bool		cfg_do_connected;
51 static bool		cfg_do_connectionless;
52 static bool		cfg_do_msgmore;
53 static bool		cfg_do_setsockopt;
54 static int		cfg_specific_test_id = -1;
55 
56 static const char	cfg_ifname[] = "lo";
57 static unsigned short	cfg_port = 9000;
58 
59 static char buf[ETH_MAX_MTU];
60 
61 struct testcase {
62 	int tlen;		/* send() buffer size, may exceed mss */
63 	bool tfail;		/* send() call is expected to fail */
64 	int gso_len;		/* mss after applying gso */
65 	int r_num_mss;		/* recv(): number of calls of full mss */
66 	int r_len_last;		/* recv(): size of last non-mss dgram, if any */
67 };
68 
69 const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT;
70 const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) };
71 
72 struct testcase testcases_v4[] = {
73 	{
74 		/* no GSO: send a single byte */
75 		.tlen = 1,
76 		.r_len_last = 1,
77 	},
78 	{
79 		/* no GSO: send a single MSS */
80 		.tlen = CONST_MSS_V4,
81 		.r_num_mss = 1,
82 	},
83 	{
84 		/* no GSO: send a single MSS + 1B: fail */
85 		.tlen = CONST_MSS_V4 + 1,
86 		.tfail = true,
87 	},
88 	{
89 		/* send a single MSS: will fail with GSO, because the segment
90 		 * logic in udp4_ufo_fragment demands a gso skb to be > MTU
91 		 */
92 		.tlen = CONST_MSS_V4,
93 		.gso_len = CONST_MSS_V4,
94 		.tfail = true,
95 		.r_num_mss = 1,
96 	},
97 	{
98 		/* send a single MSS + 1B */
99 		.tlen = CONST_MSS_V4 + 1,
100 		.gso_len = CONST_MSS_V4,
101 		.r_num_mss = 1,
102 		.r_len_last = 1,
103 	},
104 	{
105 		/* send exactly 2 MSS */
106 		.tlen = CONST_MSS_V4 * 2,
107 		.gso_len = CONST_MSS_V4,
108 		.r_num_mss = 2,
109 	},
110 	{
111 		/* send 2 MSS + 1B */
112 		.tlen = (CONST_MSS_V4 * 2) + 1,
113 		.gso_len = CONST_MSS_V4,
114 		.r_num_mss = 2,
115 		.r_len_last = 1,
116 	},
117 	{
118 		/* send MAX segs */
119 		.tlen = (ETH_MAX_MTU / CONST_MSS_V4) * CONST_MSS_V4,
120 		.gso_len = CONST_MSS_V4,
121 		.r_num_mss = (ETH_MAX_MTU / CONST_MSS_V4),
122 	},
123 
124 	{
125 		/* send MAX bytes */
126 		.tlen = ETH_MAX_MTU - CONST_HDRLEN_V4,
127 		.gso_len = CONST_MSS_V4,
128 		.r_num_mss = CONST_MAX_SEGS_V4,
129 		.r_len_last = ETH_MAX_MTU - CONST_HDRLEN_V4 -
130 			      (CONST_MAX_SEGS_V4 * CONST_MSS_V4),
131 	},
132 	{
133 		/* send MAX + 1: fail */
134 		.tlen = ETH_MAX_MTU - CONST_HDRLEN_V4 + 1,
135 		.gso_len = CONST_MSS_V4,
136 		.tfail = true,
137 	},
138 	{
139 		/* EOL */
140 	}
141 };
142 
143 #ifndef IP6_MAX_MTU
144 #define IP6_MAX_MTU	(ETH_MAX_MTU + sizeof(struct ip6_hdr))
145 #endif
146 
147 struct testcase testcases_v6[] = {
148 	{
149 		/* no GSO: send a single byte */
150 		.tlen = 1,
151 		.r_len_last = 1,
152 	},
153 	{
154 		/* no GSO: send a single MSS */
155 		.tlen = CONST_MSS_V6,
156 		.r_num_mss = 1,
157 	},
158 	{
159 		/* no GSO: send a single MSS + 1B: fail */
160 		.tlen = CONST_MSS_V6 + 1,
161 		.tfail = true,
162 	},
163 	{
164 		/* send a single MSS: will fail with GSO, because the segment
165 		 * logic in udp4_ufo_fragment demands a gso skb to be > MTU
166 		 */
167 		.tlen = CONST_MSS_V6,
168 		.gso_len = CONST_MSS_V6,
169 		.tfail = true,
170 		.r_num_mss = 1,
171 	},
172 	{
173 		/* send a single MSS + 1B */
174 		.tlen = CONST_MSS_V6 + 1,
175 		.gso_len = CONST_MSS_V6,
176 		.r_num_mss = 1,
177 		.r_len_last = 1,
178 	},
179 	{
180 		/* send exactly 2 MSS */
181 		.tlen = CONST_MSS_V6 * 2,
182 		.gso_len = CONST_MSS_V6,
183 		.r_num_mss = 2,
184 	},
185 	{
186 		/* send 2 MSS + 1B */
187 		.tlen = (CONST_MSS_V6 * 2) + 1,
188 		.gso_len = CONST_MSS_V6,
189 		.r_num_mss = 2,
190 		.r_len_last = 1,
191 	},
192 	{
193 		/* send MAX segs */
194 		.tlen = (IP6_MAX_MTU / CONST_MSS_V6) * CONST_MSS_V6,
195 		.gso_len = CONST_MSS_V6,
196 		.r_num_mss = (IP6_MAX_MTU / CONST_MSS_V6),
197 	},
198 
199 	{
200 		/* send MAX bytes */
201 		.tlen = IP6_MAX_MTU - CONST_HDRLEN_V6,
202 		.gso_len = CONST_MSS_V6,
203 		.r_num_mss = CONST_MAX_SEGS_V6,
204 		.r_len_last = IP6_MAX_MTU - CONST_HDRLEN_V6 -
205 			      (CONST_MAX_SEGS_V6 * CONST_MSS_V6),
206 	},
207 	{
208 		/* send MAX + 1: fail */
209 		.tlen = IP6_MAX_MTU - CONST_HDRLEN_V6 + 1,
210 		.gso_len = CONST_MSS_V6,
211 		.tfail = true,
212 	},
213 	{
214 		/* EOL */
215 	}
216 };
217 
218 static unsigned int get_device_mtu(int fd, const char *ifname)
219 {
220 	struct ifreq ifr;
221 
222 	memset(&ifr, 0, sizeof(ifr));
223 
224 	strcpy(ifr.ifr_name, ifname);
225 
226 	if (ioctl(fd, SIOCGIFMTU, &ifr))
227 		error(1, errno, "ioctl get mtu");
228 
229 	return ifr.ifr_mtu;
230 }
231 
232 static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu)
233 {
234 	struct ifreq ifr;
235 
236 	memset(&ifr, 0, sizeof(ifr));
237 
238 	ifr.ifr_mtu = mtu;
239 	strcpy(ifr.ifr_name, ifname);
240 
241 	if (ioctl(fd, SIOCSIFMTU, &ifr))
242 		error(1, errno, "ioctl set mtu");
243 }
244 
245 static void set_device_mtu(int fd, int mtu)
246 {
247 	int val;
248 
249 	val = get_device_mtu(fd, cfg_ifname);
250 	fprintf(stderr, "device mtu (orig): %u\n", val);
251 
252 	__set_device_mtu(fd, cfg_ifname, mtu);
253 	val = get_device_mtu(fd, cfg_ifname);
254 	if (val != mtu)
255 		error(1, 0, "unable to set device mtu to %u\n", val);
256 
257 	fprintf(stderr, "device mtu (test): %u\n", val);
258 }
259 
260 static void set_pmtu_discover(int fd, bool is_ipv4)
261 {
262 	int level, name, val;
263 
264 	if (is_ipv4) {
265 		level	= SOL_IP;
266 		name	= IP_MTU_DISCOVER;
267 		val	= IP_PMTUDISC_DO;
268 	} else {
269 		level	= SOL_IPV6;
270 		name	= IPV6_MTU_DISCOVER;
271 		val	= IPV6_PMTUDISC_DO;
272 	}
273 
274 	if (setsockopt(fd, level, name, &val, sizeof(val)))
275 		error(1, errno, "setsockopt path mtu");
276 }
277 
278 static unsigned int get_path_mtu(int fd, bool is_ipv4)
279 {
280 	socklen_t vallen;
281 	unsigned int mtu;
282 	int ret;
283 
284 	vallen = sizeof(mtu);
285 	if (is_ipv4)
286 		ret = getsockopt(fd, SOL_IP, IP_MTU, &mtu, &vallen);
287 	else
288 		ret = getsockopt(fd, SOL_IPV6, IPV6_MTU, &mtu, &vallen);
289 
290 	if (ret)
291 		error(1, errno, "getsockopt mtu");
292 
293 
294 	fprintf(stderr, "path mtu (read):  %u\n", mtu);
295 	return mtu;
296 }
297 
298 /* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */
299 static void set_route_mtu(int mtu, bool is_ipv4)
300 {
301 	struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
302 	struct nlmsghdr *nh;
303 	struct rtattr *rta;
304 	struct rtmsg *rt;
305 	char data[NLMSG_ALIGN(sizeof(*nh)) +
306 		  NLMSG_ALIGN(sizeof(*rt)) +
307 		  NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) +
308 		  NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) +
309 		  NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))];
310 	int fd, ret, alen, off = 0;
311 
312 	alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6);
313 
314 	fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
315 	if (fd == -1)
316 		error(1, errno, "socket netlink");
317 
318 	memset(data, 0, sizeof(data));
319 
320 	nh = (void *)data;
321 	nh->nlmsg_type = RTM_NEWROUTE;
322 	nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE;
323 	off += NLMSG_ALIGN(sizeof(*nh));
324 
325 	rt = (void *)(data + off);
326 	rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6;
327 	rt->rtm_table = RT_TABLE_MAIN;
328 	rt->rtm_dst_len = alen << 3;
329 	rt->rtm_protocol = RTPROT_BOOT;
330 	rt->rtm_scope = RT_SCOPE_UNIVERSE;
331 	rt->rtm_type = RTN_UNICAST;
332 	off += NLMSG_ALIGN(sizeof(*rt));
333 
334 	rta = (void *)(data + off);
335 	rta->rta_type = RTA_DST;
336 	rta->rta_len = RTA_LENGTH(alen);
337 	if (is_ipv4)
338 		memcpy(RTA_DATA(rta), &addr4, alen);
339 	else
340 		memcpy(RTA_DATA(rta), &addr6, alen);
341 	off += NLMSG_ALIGN(rta->rta_len);
342 
343 	rta = (void *)(data + off);
344 	rta->rta_type = RTA_OIF;
345 	rta->rta_len = RTA_LENGTH(sizeof(int));
346 	*((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo");
347 	off += NLMSG_ALIGN(rta->rta_len);
348 
349 	/* MTU is a subtype in a metrics type */
350 	rta = (void *)(data + off);
351 	rta->rta_type = RTA_METRICS;
352 	rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int));
353 	off += NLMSG_ALIGN(rta->rta_len);
354 
355 	/* now fill MTU subtype. Note that it fits within above rta_len */
356 	rta = (void *)(((char *) rta) + RTA_LENGTH(0));
357 	rta->rta_type = RTAX_MTU;
358 	rta->rta_len = RTA_LENGTH(sizeof(int));
359 	*((int *)(RTA_DATA(rta))) = mtu;
360 
361 	nh->nlmsg_len = off;
362 
363 	ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr));
364 	if (ret != off)
365 		error(1, errno, "send netlink: %uB != %uB\n", ret, off);
366 
367 	if (close(fd))
368 		error(1, errno, "close netlink");
369 
370 	fprintf(stderr, "route mtu (test): %u\n", mtu);
371 }
372 
373 static bool __send_one(int fd, struct msghdr *msg, int flags)
374 {
375 	int ret;
376 
377 	ret = sendmsg(fd, msg, flags);
378 	if (ret == -1 && (errno == EMSGSIZE || errno == ENOMEM))
379 		return false;
380 	if (ret == -1)
381 		error(1, errno, "sendmsg");
382 	if (ret != msg->msg_iov->iov_len)
383 		error(1, 0, "sendto: %d != %lu", ret, msg->msg_iov->iov_len);
384 	if (msg->msg_flags)
385 		error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags);
386 
387 	return true;
388 }
389 
390 static bool send_one(int fd, int len, int gso_len,
391 		     struct sockaddr *addr, socklen_t alen)
392 {
393 	char control[CMSG_SPACE(sizeof(uint16_t))] = {0};
394 	struct msghdr msg = {0};
395 	struct iovec iov = {0};
396 	struct cmsghdr *cm;
397 
398 	iov.iov_base = buf;
399 	iov.iov_len = len;
400 
401 	msg.msg_iov = &iov;
402 	msg.msg_iovlen = 1;
403 
404 	msg.msg_name = addr;
405 	msg.msg_namelen = alen;
406 
407 	if (gso_len && !cfg_do_setsockopt) {
408 		msg.msg_control = control;
409 		msg.msg_controllen = sizeof(control);
410 
411 		cm = CMSG_FIRSTHDR(&msg);
412 		cm->cmsg_level = SOL_UDP;
413 		cm->cmsg_type = UDP_SEGMENT;
414 		cm->cmsg_len = CMSG_LEN(sizeof(uint16_t));
415 		*((uint16_t *) CMSG_DATA(cm)) = gso_len;
416 	}
417 
418 	/* If MSG_MORE, send 1 byte followed by remainder */
419 	if (cfg_do_msgmore && len > 1) {
420 		iov.iov_len = 1;
421 		if (!__send_one(fd, &msg, MSG_MORE))
422 			error(1, 0, "send 1B failed");
423 
424 		iov.iov_base++;
425 		iov.iov_len = len - 1;
426 	}
427 
428 	return __send_one(fd, &msg, 0);
429 }
430 
431 static int recv_one(int fd, int flags)
432 {
433 	int ret;
434 
435 	ret = recv(fd, buf, sizeof(buf), flags);
436 	if (ret == -1 && errno == EAGAIN && (flags & MSG_DONTWAIT))
437 		return 0;
438 	if (ret == -1)
439 		error(1, errno, "recv");
440 
441 	return ret;
442 }
443 
444 static void run_one(struct testcase *test, int fdt, int fdr,
445 		    struct sockaddr *addr, socklen_t alen)
446 {
447 	int i, ret, val, mss;
448 	bool sent;
449 
450 	fprintf(stderr, "ipv%d tx:%d gso:%d %s\n",
451 			addr->sa_family == AF_INET ? 4 : 6,
452 			test->tlen, test->gso_len,
453 			test->tfail ? "(fail)" : "");
454 
455 	val = test->gso_len;
456 	if (cfg_do_setsockopt) {
457 		if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val)))
458 			error(1, errno, "setsockopt udp segment");
459 	}
460 
461 	sent = send_one(fdt, test->tlen, test->gso_len, addr, alen);
462 	if (sent && test->tfail)
463 		error(1, 0, "send succeeded while expecting failure");
464 	if (!sent && !test->tfail)
465 		error(1, 0, "send failed while expecting success");
466 	if (!sent)
467 		return;
468 
469 	mss = addr->sa_family == AF_INET ? CONST_MSS_V4 : CONST_MSS_V6;
470 
471 	/* Recv all full MSS datagrams */
472 	for (i = 0; i < test->r_num_mss; i++) {
473 		ret = recv_one(fdr, 0);
474 		if (ret != mss)
475 			error(1, 0, "recv.%d: %d != %d", i, ret, mss);
476 	}
477 
478 	/* Recv the non-full last datagram, if tlen was not a multiple of mss */
479 	if (test->r_len_last) {
480 		ret = recv_one(fdr, 0);
481 		if (ret != test->r_len_last)
482 			error(1, 0, "recv.%d: %d != %d (last)",
483 			      i, ret, test->r_len_last);
484 	}
485 
486 	/* Verify received all data */
487 	ret = recv_one(fdr, MSG_DONTWAIT);
488 	if (ret)
489 		error(1, 0, "recv: unexpected datagram");
490 }
491 
492 static void run_all(int fdt, int fdr, struct sockaddr *addr, socklen_t alen)
493 {
494 	struct testcase *tests, *test;
495 
496 	tests = addr->sa_family == AF_INET ? testcases_v4 : testcases_v6;
497 
498 	for (test = tests; test->tlen; test++) {
499 		/* if a specific test is given, then skip all others */
500 		if (cfg_specific_test_id == -1 ||
501 		    cfg_specific_test_id == test - tests)
502 			run_one(test, fdt, fdr, addr, alen);
503 	}
504 }
505 
506 static void run_test(struct sockaddr *addr, socklen_t alen)
507 {
508 	struct timeval tv = { .tv_usec = 100 * 1000 };
509 	int fdr, fdt, val;
510 
511 	fdr = socket(addr->sa_family, SOCK_DGRAM, 0);
512 	if (fdr == -1)
513 		error(1, errno, "socket r");
514 
515 	if (bind(fdr, addr, alen))
516 		error(1, errno, "bind");
517 
518 	/* Have tests fail quickly instead of hang */
519 	if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
520 		error(1, errno, "setsockopt rcv timeout");
521 
522 	fdt = socket(addr->sa_family, SOCK_DGRAM, 0);
523 	if (fdt == -1)
524 		error(1, errno, "socket t");
525 
526 	/* Do not fragment these datagrams: only succeed if GSO works */
527 	set_pmtu_discover(fdt, addr->sa_family == AF_INET);
528 
529 	if (cfg_do_connectionless) {
530 		set_device_mtu(fdt, CONST_MTU_TEST);
531 		run_all(fdt, fdr, addr, alen);
532 	}
533 
534 	if (cfg_do_connected) {
535 		set_device_mtu(fdt, CONST_MTU_TEST + 100);
536 		set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET);
537 
538 		if (connect(fdt, addr, alen))
539 			error(1, errno, "connect");
540 
541 		val = get_path_mtu(fdt, addr->sa_family == AF_INET);
542 		if (val != CONST_MTU_TEST)
543 			error(1, 0, "bad path mtu %u\n", val);
544 
545 		run_all(fdt, fdr, addr, 0 /* use connected addr */);
546 	}
547 
548 	if (close(fdt))
549 		error(1, errno, "close t");
550 	if (close(fdr))
551 		error(1, errno, "close r");
552 }
553 
554 static void run_test_v4(void)
555 {
556 	struct sockaddr_in addr = {0};
557 
558 	addr.sin_family = AF_INET;
559 	addr.sin_port = htons(cfg_port);
560 	addr.sin_addr = addr4;
561 
562 	run_test((void *)&addr, sizeof(addr));
563 }
564 
565 static void run_test_v6(void)
566 {
567 	struct sockaddr_in6 addr = {0};
568 
569 	addr.sin6_family = AF_INET6;
570 	addr.sin6_port = htons(cfg_port);
571 	addr.sin6_addr = addr6;
572 
573 	run_test((void *)&addr, sizeof(addr));
574 }
575 
576 static void parse_opts(int argc, char **argv)
577 {
578 	int c;
579 
580 	while ((c = getopt(argc, argv, "46cCmst:")) != -1) {
581 		switch (c) {
582 		case '4':
583 			cfg_do_ipv4 = true;
584 			break;
585 		case '6':
586 			cfg_do_ipv6 = true;
587 			break;
588 		case 'c':
589 			cfg_do_connected = true;
590 			break;
591 		case 'C':
592 			cfg_do_connectionless = true;
593 			break;
594 		case 'm':
595 			cfg_do_msgmore = true;
596 			break;
597 		case 's':
598 			cfg_do_setsockopt = true;
599 			break;
600 		case 't':
601 			cfg_specific_test_id = strtoul(optarg, NULL, 0);
602 			break;
603 		default:
604 			error(1, 0, "%s: parse error", argv[0]);
605 		}
606 	}
607 }
608 
609 int main(int argc, char **argv)
610 {
611 	parse_opts(argc, argv);
612 
613 	if (cfg_do_ipv4)
614 		run_test_v4();
615 	if (cfg_do_ipv6)
616 		run_test_v6();
617 
618 	fprintf(stderr, "OK\n");
619 	return 0;
620 }
621