1 // SPDX-License-Identifier: GPL-2.0 2 3 #define _GNU_SOURCE 4 5 #include <stddef.h> 6 #include <arpa/inet.h> 7 #include <error.h> 8 #include <errno.h> 9 #include <net/if.h> 10 #include <linux/in.h> 11 #include <linux/netlink.h> 12 #include <linux/rtnetlink.h> 13 #include <netinet/if_ether.h> 14 #include <netinet/ip.h> 15 #include <netinet/ip6.h> 16 #include <netinet/udp.h> 17 #include <stdbool.h> 18 #include <stdlib.h> 19 #include <stdio.h> 20 #include <string.h> 21 #include <sys/ioctl.h> 22 #include <sys/socket.h> 23 #include <sys/stat.h> 24 #include <sys/time.h> 25 #include <sys/types.h> 26 #include <unistd.h> 27 28 #ifndef ETH_MAX_MTU 29 #define ETH_MAX_MTU 0xFFFFU 30 #endif 31 32 #ifndef UDP_SEGMENT 33 #define UDP_SEGMENT 103 34 #endif 35 36 #ifndef UDP_MAX_SEGMENTS 37 #define UDP_MAX_SEGMENTS (1 << 6UL) 38 #endif 39 40 #define CONST_MTU_TEST 1500 41 42 #define CONST_HDRLEN_V4 (sizeof(struct iphdr) + sizeof(struct udphdr)) 43 #define CONST_HDRLEN_V6 (sizeof(struct ip6_hdr) + sizeof(struct udphdr)) 44 45 #define CONST_MSS_V4 (CONST_MTU_TEST - CONST_HDRLEN_V4) 46 #define CONST_MSS_V6 (CONST_MTU_TEST - CONST_HDRLEN_V6) 47 48 #define CONST_MAX_SEGS_V4 (ETH_MAX_MTU / CONST_MSS_V4) 49 #define CONST_MAX_SEGS_V6 (ETH_MAX_MTU / CONST_MSS_V6) 50 51 static bool cfg_do_ipv4; 52 static bool cfg_do_ipv6; 53 static bool cfg_do_connected; 54 static bool cfg_do_connectionless; 55 static bool cfg_do_msgmore; 56 static bool cfg_do_setsockopt; 57 static int cfg_specific_test_id = -1; 58 59 static const char cfg_ifname[] = "lo"; 60 static unsigned short cfg_port = 9000; 61 62 static char buf[ETH_MAX_MTU]; 63 64 struct testcase { 65 int tlen; /* send() buffer size, may exceed mss */ 66 bool tfail; /* send() call is expected to fail */ 67 int gso_len; /* mss after applying gso */ 68 int r_num_mss; /* recv(): number of calls of full mss */ 69 int r_len_last; /* recv(): size of last non-mss dgram, if any */ 70 }; 71 72 const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT; 73 const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) }; 74 75 struct testcase testcases_v4[] = { 76 { 77 /* no GSO: send a single byte */ 78 .tlen = 1, 79 .r_len_last = 1, 80 }, 81 { 82 /* no GSO: send a single MSS */ 83 .tlen = CONST_MSS_V4, 84 .r_num_mss = 1, 85 }, 86 { 87 /* no GSO: send a single MSS + 1B: fail */ 88 .tlen = CONST_MSS_V4 + 1, 89 .tfail = true, 90 }, 91 { 92 /* send a single MSS: will fall back to no GSO */ 93 .tlen = CONST_MSS_V4, 94 .gso_len = CONST_MSS_V4, 95 .r_num_mss = 1, 96 }, 97 { 98 /* send a single MSS + 1B */ 99 .tlen = CONST_MSS_V4 + 1, 100 .gso_len = CONST_MSS_V4, 101 .r_num_mss = 1, 102 .r_len_last = 1, 103 }, 104 { 105 /* send exactly 2 MSS */ 106 .tlen = CONST_MSS_V4 * 2, 107 .gso_len = CONST_MSS_V4, 108 .r_num_mss = 2, 109 }, 110 { 111 /* send 2 MSS + 1B */ 112 .tlen = (CONST_MSS_V4 * 2) + 1, 113 .gso_len = CONST_MSS_V4, 114 .r_num_mss = 2, 115 .r_len_last = 1, 116 }, 117 { 118 /* send MAX segs */ 119 .tlen = (ETH_MAX_MTU / CONST_MSS_V4) * CONST_MSS_V4, 120 .gso_len = CONST_MSS_V4, 121 .r_num_mss = (ETH_MAX_MTU / CONST_MSS_V4), 122 }, 123 124 { 125 /* send MAX bytes */ 126 .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4, 127 .gso_len = CONST_MSS_V4, 128 .r_num_mss = CONST_MAX_SEGS_V4, 129 .r_len_last = ETH_MAX_MTU - CONST_HDRLEN_V4 - 130 (CONST_MAX_SEGS_V4 * CONST_MSS_V4), 131 }, 132 { 133 /* send MAX + 1: fail */ 134 .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4 + 1, 135 .gso_len = CONST_MSS_V4, 136 .tfail = true, 137 }, 138 { 139 /* send a single 1B MSS: will fall back to no GSO */ 140 .tlen = 1, 141 .gso_len = 1, 142 .r_num_mss = 1, 143 }, 144 { 145 /* send 2 1B segments */ 146 .tlen = 2, 147 .gso_len = 1, 148 .r_num_mss = 2, 149 }, 150 { 151 /* send 2B + 2B + 1B segments */ 152 .tlen = 5, 153 .gso_len = 2, 154 .r_num_mss = 2, 155 .r_len_last = 1, 156 }, 157 { 158 /* send max number of min sized segments */ 159 .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, 160 .gso_len = 1, 161 .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, 162 }, 163 { 164 /* send max number + 1 of min sized segments: fail */ 165 .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1, 166 .gso_len = 1, 167 .tfail = true, 168 }, 169 { 170 /* EOL */ 171 } 172 }; 173 174 #ifndef IP6_MAX_MTU 175 #define IP6_MAX_MTU (ETH_MAX_MTU + sizeof(struct ip6_hdr)) 176 #endif 177 178 struct testcase testcases_v6[] = { 179 { 180 /* no GSO: send a single byte */ 181 .tlen = 1, 182 .r_len_last = 1, 183 }, 184 { 185 /* no GSO: send a single MSS */ 186 .tlen = CONST_MSS_V6, 187 .r_num_mss = 1, 188 }, 189 { 190 /* no GSO: send a single MSS + 1B: fail */ 191 .tlen = CONST_MSS_V6 + 1, 192 .tfail = true, 193 }, 194 { 195 /* send a single MSS: will fall back to no GSO */ 196 .tlen = CONST_MSS_V6, 197 .gso_len = CONST_MSS_V6, 198 .r_num_mss = 1, 199 }, 200 { 201 /* send a single MSS + 1B */ 202 .tlen = CONST_MSS_V6 + 1, 203 .gso_len = CONST_MSS_V6, 204 .r_num_mss = 1, 205 .r_len_last = 1, 206 }, 207 { 208 /* send exactly 2 MSS */ 209 .tlen = CONST_MSS_V6 * 2, 210 .gso_len = CONST_MSS_V6, 211 .r_num_mss = 2, 212 }, 213 { 214 /* send 2 MSS + 1B */ 215 .tlen = (CONST_MSS_V6 * 2) + 1, 216 .gso_len = CONST_MSS_V6, 217 .r_num_mss = 2, 218 .r_len_last = 1, 219 }, 220 { 221 /* send MAX segs */ 222 .tlen = (IP6_MAX_MTU / CONST_MSS_V6) * CONST_MSS_V6, 223 .gso_len = CONST_MSS_V6, 224 .r_num_mss = (IP6_MAX_MTU / CONST_MSS_V6), 225 }, 226 227 { 228 /* send MAX bytes */ 229 .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6, 230 .gso_len = CONST_MSS_V6, 231 .r_num_mss = CONST_MAX_SEGS_V6, 232 .r_len_last = IP6_MAX_MTU - CONST_HDRLEN_V6 - 233 (CONST_MAX_SEGS_V6 * CONST_MSS_V6), 234 }, 235 { 236 /* send MAX + 1: fail */ 237 .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6 + 1, 238 .gso_len = CONST_MSS_V6, 239 .tfail = true, 240 }, 241 { 242 /* send a single 1B MSS: will fall back to no GSO */ 243 .tlen = 1, 244 .gso_len = 1, 245 .r_num_mss = 1, 246 }, 247 { 248 /* send 2 1B segments */ 249 .tlen = 2, 250 .gso_len = 1, 251 .r_num_mss = 2, 252 }, 253 { 254 /* send 2B + 2B + 1B segments */ 255 .tlen = 5, 256 .gso_len = 2, 257 .r_num_mss = 2, 258 .r_len_last = 1, 259 }, 260 { 261 /* send max number of min sized segments */ 262 .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, 263 .gso_len = 1, 264 .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, 265 }, 266 { 267 /* send max number + 1 of min sized segments: fail */ 268 .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1, 269 .gso_len = 1, 270 .tfail = true, 271 }, 272 { 273 /* EOL */ 274 } 275 }; 276 277 static unsigned int get_device_mtu(int fd, const char *ifname) 278 { 279 struct ifreq ifr; 280 281 memset(&ifr, 0, sizeof(ifr)); 282 283 strcpy(ifr.ifr_name, ifname); 284 285 if (ioctl(fd, SIOCGIFMTU, &ifr)) 286 error(1, errno, "ioctl get mtu"); 287 288 return ifr.ifr_mtu; 289 } 290 291 static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu) 292 { 293 struct ifreq ifr; 294 295 memset(&ifr, 0, sizeof(ifr)); 296 297 ifr.ifr_mtu = mtu; 298 strcpy(ifr.ifr_name, ifname); 299 300 if (ioctl(fd, SIOCSIFMTU, &ifr)) 301 error(1, errno, "ioctl set mtu"); 302 } 303 304 static void set_device_mtu(int fd, int mtu) 305 { 306 int val; 307 308 val = get_device_mtu(fd, cfg_ifname); 309 fprintf(stderr, "device mtu (orig): %u\n", val); 310 311 __set_device_mtu(fd, cfg_ifname, mtu); 312 val = get_device_mtu(fd, cfg_ifname); 313 if (val != mtu) 314 error(1, 0, "unable to set device mtu to %u\n", val); 315 316 fprintf(stderr, "device mtu (test): %u\n", val); 317 } 318 319 static void set_pmtu_discover(int fd, bool is_ipv4) 320 { 321 int level, name, val; 322 323 if (is_ipv4) { 324 level = SOL_IP; 325 name = IP_MTU_DISCOVER; 326 val = IP_PMTUDISC_DO; 327 } else { 328 level = SOL_IPV6; 329 name = IPV6_MTU_DISCOVER; 330 val = IPV6_PMTUDISC_DO; 331 } 332 333 if (setsockopt(fd, level, name, &val, sizeof(val))) 334 error(1, errno, "setsockopt path mtu"); 335 } 336 337 static unsigned int get_path_mtu(int fd, bool is_ipv4) 338 { 339 socklen_t vallen; 340 unsigned int mtu; 341 int ret; 342 343 vallen = sizeof(mtu); 344 if (is_ipv4) 345 ret = getsockopt(fd, SOL_IP, IP_MTU, &mtu, &vallen); 346 else 347 ret = getsockopt(fd, SOL_IPV6, IPV6_MTU, &mtu, &vallen); 348 349 if (ret) 350 error(1, errno, "getsockopt mtu"); 351 352 353 fprintf(stderr, "path mtu (read): %u\n", mtu); 354 return mtu; 355 } 356 357 /* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */ 358 static void set_route_mtu(int mtu, bool is_ipv4) 359 { 360 struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; 361 struct nlmsghdr *nh; 362 struct rtattr *rta; 363 struct rtmsg *rt; 364 char data[NLMSG_ALIGN(sizeof(*nh)) + 365 NLMSG_ALIGN(sizeof(*rt)) + 366 NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) + 367 NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) + 368 NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))]; 369 int fd, ret, alen, off = 0; 370 371 alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6); 372 373 fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 374 if (fd == -1) 375 error(1, errno, "socket netlink"); 376 377 memset(data, 0, sizeof(data)); 378 379 nh = (void *)data; 380 nh->nlmsg_type = RTM_NEWROUTE; 381 nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; 382 off += NLMSG_ALIGN(sizeof(*nh)); 383 384 rt = (void *)(data + off); 385 rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6; 386 rt->rtm_table = RT_TABLE_MAIN; 387 rt->rtm_dst_len = alen << 3; 388 rt->rtm_protocol = RTPROT_BOOT; 389 rt->rtm_scope = RT_SCOPE_UNIVERSE; 390 rt->rtm_type = RTN_UNICAST; 391 off += NLMSG_ALIGN(sizeof(*rt)); 392 393 rta = (void *)(data + off); 394 rta->rta_type = RTA_DST; 395 rta->rta_len = RTA_LENGTH(alen); 396 if (is_ipv4) 397 memcpy(RTA_DATA(rta), &addr4, alen); 398 else 399 memcpy(RTA_DATA(rta), &addr6, alen); 400 off += NLMSG_ALIGN(rta->rta_len); 401 402 rta = (void *)(data + off); 403 rta->rta_type = RTA_OIF; 404 rta->rta_len = RTA_LENGTH(sizeof(int)); 405 *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo"); 406 off += NLMSG_ALIGN(rta->rta_len); 407 408 /* MTU is a subtype in a metrics type */ 409 rta = (void *)(data + off); 410 rta->rta_type = RTA_METRICS; 411 rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)); 412 off += NLMSG_ALIGN(rta->rta_len); 413 414 /* now fill MTU subtype. Note that it fits within above rta_len */ 415 rta = (void *)(((char *) rta) + RTA_LENGTH(0)); 416 rta->rta_type = RTAX_MTU; 417 rta->rta_len = RTA_LENGTH(sizeof(int)); 418 *((int *)(RTA_DATA(rta))) = mtu; 419 420 nh->nlmsg_len = off; 421 422 ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr)); 423 if (ret != off) 424 error(1, errno, "send netlink: %uB != %uB\n", ret, off); 425 426 if (close(fd)) 427 error(1, errno, "close netlink"); 428 429 fprintf(stderr, "route mtu (test): %u\n", mtu); 430 } 431 432 static bool __send_one(int fd, struct msghdr *msg, int flags) 433 { 434 int ret; 435 436 ret = sendmsg(fd, msg, flags); 437 if (ret == -1 && 438 (errno == EMSGSIZE || errno == ENOMEM || errno == EINVAL)) 439 return false; 440 if (ret == -1) 441 error(1, errno, "sendmsg"); 442 if (ret != msg->msg_iov->iov_len) 443 error(1, 0, "sendto: %d != %lu", ret, msg->msg_iov->iov_len); 444 if (msg->msg_flags) 445 error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags); 446 447 return true; 448 } 449 450 static bool send_one(int fd, int len, int gso_len, 451 struct sockaddr *addr, socklen_t alen) 452 { 453 char control[CMSG_SPACE(sizeof(uint16_t))] = {0}; 454 struct msghdr msg = {0}; 455 struct iovec iov = {0}; 456 struct cmsghdr *cm; 457 458 iov.iov_base = buf; 459 iov.iov_len = len; 460 461 msg.msg_iov = &iov; 462 msg.msg_iovlen = 1; 463 464 msg.msg_name = addr; 465 msg.msg_namelen = alen; 466 467 if (gso_len && !cfg_do_setsockopt) { 468 msg.msg_control = control; 469 msg.msg_controllen = sizeof(control); 470 471 cm = CMSG_FIRSTHDR(&msg); 472 cm->cmsg_level = SOL_UDP; 473 cm->cmsg_type = UDP_SEGMENT; 474 cm->cmsg_len = CMSG_LEN(sizeof(uint16_t)); 475 *((uint16_t *) CMSG_DATA(cm)) = gso_len; 476 } 477 478 /* If MSG_MORE, send 1 byte followed by remainder */ 479 if (cfg_do_msgmore && len > 1) { 480 iov.iov_len = 1; 481 if (!__send_one(fd, &msg, MSG_MORE)) 482 error(1, 0, "send 1B failed"); 483 484 iov.iov_base++; 485 iov.iov_len = len - 1; 486 } 487 488 return __send_one(fd, &msg, 0); 489 } 490 491 static int recv_one(int fd, int flags) 492 { 493 int ret; 494 495 ret = recv(fd, buf, sizeof(buf), flags); 496 if (ret == -1 && errno == EAGAIN && (flags & MSG_DONTWAIT)) 497 return 0; 498 if (ret == -1) 499 error(1, errno, "recv"); 500 501 return ret; 502 } 503 504 static void run_one(struct testcase *test, int fdt, int fdr, 505 struct sockaddr *addr, socklen_t alen) 506 { 507 int i, ret, val, mss; 508 bool sent; 509 510 fprintf(stderr, "ipv%d tx:%d gso:%d %s\n", 511 addr->sa_family == AF_INET ? 4 : 6, 512 test->tlen, test->gso_len, 513 test->tfail ? "(fail)" : ""); 514 515 val = test->gso_len; 516 if (cfg_do_setsockopt) { 517 if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val))) 518 error(1, errno, "setsockopt udp segment"); 519 } 520 521 sent = send_one(fdt, test->tlen, test->gso_len, addr, alen); 522 if (sent && test->tfail) 523 error(1, 0, "send succeeded while expecting failure"); 524 if (!sent && !test->tfail) 525 error(1, 0, "send failed while expecting success"); 526 if (!sent) 527 return; 528 529 if (test->gso_len) 530 mss = test->gso_len; 531 else 532 mss = addr->sa_family == AF_INET ? CONST_MSS_V4 : CONST_MSS_V6; 533 534 535 /* Recv all full MSS datagrams */ 536 for (i = 0; i < test->r_num_mss; i++) { 537 ret = recv_one(fdr, 0); 538 if (ret != mss) 539 error(1, 0, "recv.%d: %d != %d", i, ret, mss); 540 } 541 542 /* Recv the non-full last datagram, if tlen was not a multiple of mss */ 543 if (test->r_len_last) { 544 ret = recv_one(fdr, 0); 545 if (ret != test->r_len_last) 546 error(1, 0, "recv.%d: %d != %d (last)", 547 i, ret, test->r_len_last); 548 } 549 550 /* Verify received all data */ 551 ret = recv_one(fdr, MSG_DONTWAIT); 552 if (ret) 553 error(1, 0, "recv: unexpected datagram"); 554 } 555 556 static void run_all(int fdt, int fdr, struct sockaddr *addr, socklen_t alen) 557 { 558 struct testcase *tests, *test; 559 560 tests = addr->sa_family == AF_INET ? testcases_v4 : testcases_v6; 561 562 for (test = tests; test->tlen; test++) { 563 /* if a specific test is given, then skip all others */ 564 if (cfg_specific_test_id == -1 || 565 cfg_specific_test_id == test - tests) 566 run_one(test, fdt, fdr, addr, alen); 567 } 568 } 569 570 static void run_test(struct sockaddr *addr, socklen_t alen) 571 { 572 struct timeval tv = { .tv_usec = 100 * 1000 }; 573 int fdr, fdt, val; 574 575 fdr = socket(addr->sa_family, SOCK_DGRAM, 0); 576 if (fdr == -1) 577 error(1, errno, "socket r"); 578 579 if (bind(fdr, addr, alen)) 580 error(1, errno, "bind"); 581 582 /* Have tests fail quickly instead of hang */ 583 if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) 584 error(1, errno, "setsockopt rcv timeout"); 585 586 fdt = socket(addr->sa_family, SOCK_DGRAM, 0); 587 if (fdt == -1) 588 error(1, errno, "socket t"); 589 590 /* Do not fragment these datagrams: only succeed if GSO works */ 591 set_pmtu_discover(fdt, addr->sa_family == AF_INET); 592 593 if (cfg_do_connectionless) { 594 set_device_mtu(fdt, CONST_MTU_TEST); 595 run_all(fdt, fdr, addr, alen); 596 } 597 598 if (cfg_do_connected) { 599 set_device_mtu(fdt, CONST_MTU_TEST + 100); 600 set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET); 601 602 if (connect(fdt, addr, alen)) 603 error(1, errno, "connect"); 604 605 val = get_path_mtu(fdt, addr->sa_family == AF_INET); 606 if (val != CONST_MTU_TEST) 607 error(1, 0, "bad path mtu %u\n", val); 608 609 run_all(fdt, fdr, addr, 0 /* use connected addr */); 610 } 611 612 if (close(fdt)) 613 error(1, errno, "close t"); 614 if (close(fdr)) 615 error(1, errno, "close r"); 616 } 617 618 static void run_test_v4(void) 619 { 620 struct sockaddr_in addr = {0}; 621 622 addr.sin_family = AF_INET; 623 addr.sin_port = htons(cfg_port); 624 addr.sin_addr = addr4; 625 626 run_test((void *)&addr, sizeof(addr)); 627 } 628 629 static void run_test_v6(void) 630 { 631 struct sockaddr_in6 addr = {0}; 632 633 addr.sin6_family = AF_INET6; 634 addr.sin6_port = htons(cfg_port); 635 addr.sin6_addr = addr6; 636 637 run_test((void *)&addr, sizeof(addr)); 638 } 639 640 static void parse_opts(int argc, char **argv) 641 { 642 int c; 643 644 while ((c = getopt(argc, argv, "46cCmst:")) != -1) { 645 switch (c) { 646 case '4': 647 cfg_do_ipv4 = true; 648 break; 649 case '6': 650 cfg_do_ipv6 = true; 651 break; 652 case 'c': 653 cfg_do_connected = true; 654 break; 655 case 'C': 656 cfg_do_connectionless = true; 657 break; 658 case 'm': 659 cfg_do_msgmore = true; 660 break; 661 case 's': 662 cfg_do_setsockopt = true; 663 break; 664 case 't': 665 cfg_specific_test_id = strtoul(optarg, NULL, 0); 666 break; 667 default: 668 error(1, 0, "%s: parse error", argv[0]); 669 } 670 } 671 } 672 673 int main(int argc, char **argv) 674 { 675 parse_opts(argc, argv); 676 677 if (cfg_do_ipv4) 678 run_test_v4(); 679 if (cfg_do_ipv6) 680 run_test_v6(); 681 682 fprintf(stderr, "OK\n"); 683 return 0; 684 } 685