1 // SPDX-License-Identifier: GPL-2.0 2 3 #define _GNU_SOURCE 4 5 #include <stddef.h> 6 #include <arpa/inet.h> 7 #include <error.h> 8 #include <errno.h> 9 #include <net/if.h> 10 #include <linux/in.h> 11 #include <linux/netlink.h> 12 #include <linux/rtnetlink.h> 13 #include <netinet/if_ether.h> 14 #include <netinet/ip.h> 15 #include <netinet/ip6.h> 16 #include <netinet/udp.h> 17 #include <stdbool.h> 18 #include <stdlib.h> 19 #include <stdio.h> 20 #include <string.h> 21 #include <sys/ioctl.h> 22 #include <sys/socket.h> 23 #include <sys/stat.h> 24 #include <sys/time.h> 25 #include <sys/types.h> 26 #include <unistd.h> 27 28 #ifndef ETH_MAX_MTU 29 #define ETH_MAX_MTU 0xFFFFU 30 #endif 31 32 #ifndef UDP_SEGMENT 33 #define UDP_SEGMENT 103 34 #endif 35 36 #ifndef UDP_MAX_SEGMENTS 37 #define UDP_MAX_SEGMENTS (1 << 6UL) 38 #endif 39 40 #define CONST_MTU_TEST 1500 41 42 #define CONST_HDRLEN_V4 (sizeof(struct iphdr) + sizeof(struct udphdr)) 43 #define CONST_HDRLEN_V6 (sizeof(struct ip6_hdr) + sizeof(struct udphdr)) 44 45 #define CONST_MSS_V4 (CONST_MTU_TEST - CONST_HDRLEN_V4) 46 #define CONST_MSS_V6 (CONST_MTU_TEST - CONST_HDRLEN_V6) 47 48 #define CONST_MAX_SEGS_V4 (ETH_MAX_MTU / CONST_MSS_V4) 49 #define CONST_MAX_SEGS_V6 (ETH_MAX_MTU / CONST_MSS_V6) 50 51 static bool cfg_do_ipv4; 52 static bool cfg_do_ipv6; 53 static bool cfg_do_connected; 54 static bool cfg_do_connectionless; 55 static bool cfg_do_msgmore; 56 static bool cfg_do_setsockopt; 57 static int cfg_specific_test_id = -1; 58 59 static const char cfg_ifname[] = "lo"; 60 static unsigned short cfg_port = 9000; 61 62 static char buf[ETH_MAX_MTU]; 63 64 struct testcase { 65 int tlen; /* send() buffer size, may exceed mss */ 66 bool tfail; /* send() call is expected to fail */ 67 int gso_len; /* mss after applying gso */ 68 int r_num_mss; /* recv(): number of calls of full mss */ 69 int r_len_last; /* recv(): size of last non-mss dgram, if any */ 70 }; 71 72 const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT; 73 const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) }; 74 75 struct testcase testcases_v4[] = { 76 { 77 /* no GSO: send a single byte */ 78 .tlen = 1, 79 .r_len_last = 1, 80 }, 81 { 82 /* no GSO: send a single MSS */ 83 .tlen = CONST_MSS_V4, 84 .r_num_mss = 1, 85 }, 86 { 87 /* no GSO: send a single MSS + 1B: fail */ 88 .tlen = CONST_MSS_V4 + 1, 89 .tfail = true, 90 }, 91 { 92 /* send a single MSS: will fail with GSO, because the segment 93 * logic in udp4_ufo_fragment demands a gso skb to be > MTU 94 */ 95 .tlen = CONST_MSS_V4, 96 .gso_len = CONST_MSS_V4, 97 .tfail = true, 98 .r_num_mss = 1, 99 }, 100 { 101 /* send a single MSS + 1B */ 102 .tlen = CONST_MSS_V4 + 1, 103 .gso_len = CONST_MSS_V4, 104 .r_num_mss = 1, 105 .r_len_last = 1, 106 }, 107 { 108 /* send exactly 2 MSS */ 109 .tlen = CONST_MSS_V4 * 2, 110 .gso_len = CONST_MSS_V4, 111 .r_num_mss = 2, 112 }, 113 { 114 /* send 2 MSS + 1B */ 115 .tlen = (CONST_MSS_V4 * 2) + 1, 116 .gso_len = CONST_MSS_V4, 117 .r_num_mss = 2, 118 .r_len_last = 1, 119 }, 120 { 121 /* send MAX segs */ 122 .tlen = (ETH_MAX_MTU / CONST_MSS_V4) * CONST_MSS_V4, 123 .gso_len = CONST_MSS_V4, 124 .r_num_mss = (ETH_MAX_MTU / CONST_MSS_V4), 125 }, 126 127 { 128 /* send MAX bytes */ 129 .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4, 130 .gso_len = CONST_MSS_V4, 131 .r_num_mss = CONST_MAX_SEGS_V4, 132 .r_len_last = ETH_MAX_MTU - CONST_HDRLEN_V4 - 133 (CONST_MAX_SEGS_V4 * CONST_MSS_V4), 134 }, 135 { 136 /* send MAX + 1: fail */ 137 .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4 + 1, 138 .gso_len = CONST_MSS_V4, 139 .tfail = true, 140 }, 141 { 142 /* send a single 1B MSS: will fail, see single MSS above */ 143 .tlen = 1, 144 .gso_len = 1, 145 .tfail = true, 146 .r_num_mss = 1, 147 }, 148 { 149 /* send 2 1B segments */ 150 .tlen = 2, 151 .gso_len = 1, 152 .r_num_mss = 2, 153 }, 154 { 155 /* send 2B + 2B + 1B segments */ 156 .tlen = 5, 157 .gso_len = 2, 158 .r_num_mss = 2, 159 .r_len_last = 1, 160 }, 161 { 162 /* send max number of min sized segments */ 163 .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, 164 .gso_len = 1, 165 .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, 166 }, 167 { 168 /* send max number + 1 of min sized segments: fail */ 169 .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1, 170 .gso_len = 1, 171 .tfail = true, 172 }, 173 { 174 /* EOL */ 175 } 176 }; 177 178 #ifndef IP6_MAX_MTU 179 #define IP6_MAX_MTU (ETH_MAX_MTU + sizeof(struct ip6_hdr)) 180 #endif 181 182 struct testcase testcases_v6[] = { 183 { 184 /* no GSO: send a single byte */ 185 .tlen = 1, 186 .r_len_last = 1, 187 }, 188 { 189 /* no GSO: send a single MSS */ 190 .tlen = CONST_MSS_V6, 191 .r_num_mss = 1, 192 }, 193 { 194 /* no GSO: send a single MSS + 1B: fail */ 195 .tlen = CONST_MSS_V6 + 1, 196 .tfail = true, 197 }, 198 { 199 /* send a single MSS: will fail with GSO, because the segment 200 * logic in udp4_ufo_fragment demands a gso skb to be > MTU 201 */ 202 .tlen = CONST_MSS_V6, 203 .gso_len = CONST_MSS_V6, 204 .tfail = true, 205 .r_num_mss = 1, 206 }, 207 { 208 /* send a single MSS + 1B */ 209 .tlen = CONST_MSS_V6 + 1, 210 .gso_len = CONST_MSS_V6, 211 .r_num_mss = 1, 212 .r_len_last = 1, 213 }, 214 { 215 /* send exactly 2 MSS */ 216 .tlen = CONST_MSS_V6 * 2, 217 .gso_len = CONST_MSS_V6, 218 .r_num_mss = 2, 219 }, 220 { 221 /* send 2 MSS + 1B */ 222 .tlen = (CONST_MSS_V6 * 2) + 1, 223 .gso_len = CONST_MSS_V6, 224 .r_num_mss = 2, 225 .r_len_last = 1, 226 }, 227 { 228 /* send MAX segs */ 229 .tlen = (IP6_MAX_MTU / CONST_MSS_V6) * CONST_MSS_V6, 230 .gso_len = CONST_MSS_V6, 231 .r_num_mss = (IP6_MAX_MTU / CONST_MSS_V6), 232 }, 233 234 { 235 /* send MAX bytes */ 236 .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6, 237 .gso_len = CONST_MSS_V6, 238 .r_num_mss = CONST_MAX_SEGS_V6, 239 .r_len_last = IP6_MAX_MTU - CONST_HDRLEN_V6 - 240 (CONST_MAX_SEGS_V6 * CONST_MSS_V6), 241 }, 242 { 243 /* send MAX + 1: fail */ 244 .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6 + 1, 245 .gso_len = CONST_MSS_V6, 246 .tfail = true, 247 }, 248 { 249 /* send a single 1B MSS: will fail, see single MSS above */ 250 .tlen = 1, 251 .gso_len = 1, 252 .tfail = true, 253 .r_num_mss = 1, 254 }, 255 { 256 /* send 2 1B segments */ 257 .tlen = 2, 258 .gso_len = 1, 259 .r_num_mss = 2, 260 }, 261 { 262 /* send 2B + 2B + 1B segments */ 263 .tlen = 5, 264 .gso_len = 2, 265 .r_num_mss = 2, 266 .r_len_last = 1, 267 }, 268 { 269 /* send max number of min sized segments */ 270 .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, 271 .gso_len = 1, 272 .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, 273 }, 274 { 275 /* send max number + 1 of min sized segments: fail */ 276 .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1, 277 .gso_len = 1, 278 .tfail = true, 279 }, 280 { 281 /* EOL */ 282 } 283 }; 284 285 static unsigned int get_device_mtu(int fd, const char *ifname) 286 { 287 struct ifreq ifr; 288 289 memset(&ifr, 0, sizeof(ifr)); 290 291 strcpy(ifr.ifr_name, ifname); 292 293 if (ioctl(fd, SIOCGIFMTU, &ifr)) 294 error(1, errno, "ioctl get mtu"); 295 296 return ifr.ifr_mtu; 297 } 298 299 static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu) 300 { 301 struct ifreq ifr; 302 303 memset(&ifr, 0, sizeof(ifr)); 304 305 ifr.ifr_mtu = mtu; 306 strcpy(ifr.ifr_name, ifname); 307 308 if (ioctl(fd, SIOCSIFMTU, &ifr)) 309 error(1, errno, "ioctl set mtu"); 310 } 311 312 static void set_device_mtu(int fd, int mtu) 313 { 314 int val; 315 316 val = get_device_mtu(fd, cfg_ifname); 317 fprintf(stderr, "device mtu (orig): %u\n", val); 318 319 __set_device_mtu(fd, cfg_ifname, mtu); 320 val = get_device_mtu(fd, cfg_ifname); 321 if (val != mtu) 322 error(1, 0, "unable to set device mtu to %u\n", val); 323 324 fprintf(stderr, "device mtu (test): %u\n", val); 325 } 326 327 static void set_pmtu_discover(int fd, bool is_ipv4) 328 { 329 int level, name, val; 330 331 if (is_ipv4) { 332 level = SOL_IP; 333 name = IP_MTU_DISCOVER; 334 val = IP_PMTUDISC_DO; 335 } else { 336 level = SOL_IPV6; 337 name = IPV6_MTU_DISCOVER; 338 val = IPV6_PMTUDISC_DO; 339 } 340 341 if (setsockopt(fd, level, name, &val, sizeof(val))) 342 error(1, errno, "setsockopt path mtu"); 343 } 344 345 static unsigned int get_path_mtu(int fd, bool is_ipv4) 346 { 347 socklen_t vallen; 348 unsigned int mtu; 349 int ret; 350 351 vallen = sizeof(mtu); 352 if (is_ipv4) 353 ret = getsockopt(fd, SOL_IP, IP_MTU, &mtu, &vallen); 354 else 355 ret = getsockopt(fd, SOL_IPV6, IPV6_MTU, &mtu, &vallen); 356 357 if (ret) 358 error(1, errno, "getsockopt mtu"); 359 360 361 fprintf(stderr, "path mtu (read): %u\n", mtu); 362 return mtu; 363 } 364 365 /* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */ 366 static void set_route_mtu(int mtu, bool is_ipv4) 367 { 368 struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; 369 struct nlmsghdr *nh; 370 struct rtattr *rta; 371 struct rtmsg *rt; 372 char data[NLMSG_ALIGN(sizeof(*nh)) + 373 NLMSG_ALIGN(sizeof(*rt)) + 374 NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) + 375 NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) + 376 NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))]; 377 int fd, ret, alen, off = 0; 378 379 alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6); 380 381 fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 382 if (fd == -1) 383 error(1, errno, "socket netlink"); 384 385 memset(data, 0, sizeof(data)); 386 387 nh = (void *)data; 388 nh->nlmsg_type = RTM_NEWROUTE; 389 nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; 390 off += NLMSG_ALIGN(sizeof(*nh)); 391 392 rt = (void *)(data + off); 393 rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6; 394 rt->rtm_table = RT_TABLE_MAIN; 395 rt->rtm_dst_len = alen << 3; 396 rt->rtm_protocol = RTPROT_BOOT; 397 rt->rtm_scope = RT_SCOPE_UNIVERSE; 398 rt->rtm_type = RTN_UNICAST; 399 off += NLMSG_ALIGN(sizeof(*rt)); 400 401 rta = (void *)(data + off); 402 rta->rta_type = RTA_DST; 403 rta->rta_len = RTA_LENGTH(alen); 404 if (is_ipv4) 405 memcpy(RTA_DATA(rta), &addr4, alen); 406 else 407 memcpy(RTA_DATA(rta), &addr6, alen); 408 off += NLMSG_ALIGN(rta->rta_len); 409 410 rta = (void *)(data + off); 411 rta->rta_type = RTA_OIF; 412 rta->rta_len = RTA_LENGTH(sizeof(int)); 413 *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo"); 414 off += NLMSG_ALIGN(rta->rta_len); 415 416 /* MTU is a subtype in a metrics type */ 417 rta = (void *)(data + off); 418 rta->rta_type = RTA_METRICS; 419 rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)); 420 off += NLMSG_ALIGN(rta->rta_len); 421 422 /* now fill MTU subtype. Note that it fits within above rta_len */ 423 rta = (void *)(((char *) rta) + RTA_LENGTH(0)); 424 rta->rta_type = RTAX_MTU; 425 rta->rta_len = RTA_LENGTH(sizeof(int)); 426 *((int *)(RTA_DATA(rta))) = mtu; 427 428 nh->nlmsg_len = off; 429 430 ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr)); 431 if (ret != off) 432 error(1, errno, "send netlink: %uB != %uB\n", ret, off); 433 434 if (close(fd)) 435 error(1, errno, "close netlink"); 436 437 fprintf(stderr, "route mtu (test): %u\n", mtu); 438 } 439 440 static bool __send_one(int fd, struct msghdr *msg, int flags) 441 { 442 int ret; 443 444 ret = sendmsg(fd, msg, flags); 445 if (ret == -1 && 446 (errno == EMSGSIZE || errno == ENOMEM || errno == EINVAL)) 447 return false; 448 if (ret == -1) 449 error(1, errno, "sendmsg"); 450 if (ret != msg->msg_iov->iov_len) 451 error(1, 0, "sendto: %d != %lu", ret, msg->msg_iov->iov_len); 452 if (msg->msg_flags) 453 error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags); 454 455 return true; 456 } 457 458 static bool send_one(int fd, int len, int gso_len, 459 struct sockaddr *addr, socklen_t alen) 460 { 461 char control[CMSG_SPACE(sizeof(uint16_t))] = {0}; 462 struct msghdr msg = {0}; 463 struct iovec iov = {0}; 464 struct cmsghdr *cm; 465 466 iov.iov_base = buf; 467 iov.iov_len = len; 468 469 msg.msg_iov = &iov; 470 msg.msg_iovlen = 1; 471 472 msg.msg_name = addr; 473 msg.msg_namelen = alen; 474 475 if (gso_len && !cfg_do_setsockopt) { 476 msg.msg_control = control; 477 msg.msg_controllen = sizeof(control); 478 479 cm = CMSG_FIRSTHDR(&msg); 480 cm->cmsg_level = SOL_UDP; 481 cm->cmsg_type = UDP_SEGMENT; 482 cm->cmsg_len = CMSG_LEN(sizeof(uint16_t)); 483 *((uint16_t *) CMSG_DATA(cm)) = gso_len; 484 } 485 486 /* If MSG_MORE, send 1 byte followed by remainder */ 487 if (cfg_do_msgmore && len > 1) { 488 iov.iov_len = 1; 489 if (!__send_one(fd, &msg, MSG_MORE)) 490 error(1, 0, "send 1B failed"); 491 492 iov.iov_base++; 493 iov.iov_len = len - 1; 494 } 495 496 return __send_one(fd, &msg, 0); 497 } 498 499 static int recv_one(int fd, int flags) 500 { 501 int ret; 502 503 ret = recv(fd, buf, sizeof(buf), flags); 504 if (ret == -1 && errno == EAGAIN && (flags & MSG_DONTWAIT)) 505 return 0; 506 if (ret == -1) 507 error(1, errno, "recv"); 508 509 return ret; 510 } 511 512 static void run_one(struct testcase *test, int fdt, int fdr, 513 struct sockaddr *addr, socklen_t alen) 514 { 515 int i, ret, val, mss; 516 bool sent; 517 518 fprintf(stderr, "ipv%d tx:%d gso:%d %s\n", 519 addr->sa_family == AF_INET ? 4 : 6, 520 test->tlen, test->gso_len, 521 test->tfail ? "(fail)" : ""); 522 523 val = test->gso_len; 524 if (cfg_do_setsockopt) { 525 if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val))) 526 error(1, errno, "setsockopt udp segment"); 527 } 528 529 sent = send_one(fdt, test->tlen, test->gso_len, addr, alen); 530 if (sent && test->tfail) 531 error(1, 0, "send succeeded while expecting failure"); 532 if (!sent && !test->tfail) 533 error(1, 0, "send failed while expecting success"); 534 if (!sent) 535 return; 536 537 if (test->gso_len) 538 mss = test->gso_len; 539 else 540 mss = addr->sa_family == AF_INET ? CONST_MSS_V4 : CONST_MSS_V6; 541 542 543 /* Recv all full MSS datagrams */ 544 for (i = 0; i < test->r_num_mss; i++) { 545 ret = recv_one(fdr, 0); 546 if (ret != mss) 547 error(1, 0, "recv.%d: %d != %d", i, ret, mss); 548 } 549 550 /* Recv the non-full last datagram, if tlen was not a multiple of mss */ 551 if (test->r_len_last) { 552 ret = recv_one(fdr, 0); 553 if (ret != test->r_len_last) 554 error(1, 0, "recv.%d: %d != %d (last)", 555 i, ret, test->r_len_last); 556 } 557 558 /* Verify received all data */ 559 ret = recv_one(fdr, MSG_DONTWAIT); 560 if (ret) 561 error(1, 0, "recv: unexpected datagram"); 562 } 563 564 static void run_all(int fdt, int fdr, struct sockaddr *addr, socklen_t alen) 565 { 566 struct testcase *tests, *test; 567 568 tests = addr->sa_family == AF_INET ? testcases_v4 : testcases_v6; 569 570 for (test = tests; test->tlen; test++) { 571 /* if a specific test is given, then skip all others */ 572 if (cfg_specific_test_id == -1 || 573 cfg_specific_test_id == test - tests) 574 run_one(test, fdt, fdr, addr, alen); 575 } 576 } 577 578 static void run_test(struct sockaddr *addr, socklen_t alen) 579 { 580 struct timeval tv = { .tv_usec = 100 * 1000 }; 581 int fdr, fdt, val; 582 583 fdr = socket(addr->sa_family, SOCK_DGRAM, 0); 584 if (fdr == -1) 585 error(1, errno, "socket r"); 586 587 if (bind(fdr, addr, alen)) 588 error(1, errno, "bind"); 589 590 /* Have tests fail quickly instead of hang */ 591 if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) 592 error(1, errno, "setsockopt rcv timeout"); 593 594 fdt = socket(addr->sa_family, SOCK_DGRAM, 0); 595 if (fdt == -1) 596 error(1, errno, "socket t"); 597 598 /* Do not fragment these datagrams: only succeed if GSO works */ 599 set_pmtu_discover(fdt, addr->sa_family == AF_INET); 600 601 if (cfg_do_connectionless) { 602 set_device_mtu(fdt, CONST_MTU_TEST); 603 run_all(fdt, fdr, addr, alen); 604 } 605 606 if (cfg_do_connected) { 607 set_device_mtu(fdt, CONST_MTU_TEST + 100); 608 set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET); 609 610 if (connect(fdt, addr, alen)) 611 error(1, errno, "connect"); 612 613 val = get_path_mtu(fdt, addr->sa_family == AF_INET); 614 if (val != CONST_MTU_TEST) 615 error(1, 0, "bad path mtu %u\n", val); 616 617 run_all(fdt, fdr, addr, 0 /* use connected addr */); 618 } 619 620 if (close(fdt)) 621 error(1, errno, "close t"); 622 if (close(fdr)) 623 error(1, errno, "close r"); 624 } 625 626 static void run_test_v4(void) 627 { 628 struct sockaddr_in addr = {0}; 629 630 addr.sin_family = AF_INET; 631 addr.sin_port = htons(cfg_port); 632 addr.sin_addr = addr4; 633 634 run_test((void *)&addr, sizeof(addr)); 635 } 636 637 static void run_test_v6(void) 638 { 639 struct sockaddr_in6 addr = {0}; 640 641 addr.sin6_family = AF_INET6; 642 addr.sin6_port = htons(cfg_port); 643 addr.sin6_addr = addr6; 644 645 run_test((void *)&addr, sizeof(addr)); 646 } 647 648 static void parse_opts(int argc, char **argv) 649 { 650 int c; 651 652 while ((c = getopt(argc, argv, "46cCmst:")) != -1) { 653 switch (c) { 654 case '4': 655 cfg_do_ipv4 = true; 656 break; 657 case '6': 658 cfg_do_ipv6 = true; 659 break; 660 case 'c': 661 cfg_do_connected = true; 662 break; 663 case 'C': 664 cfg_do_connectionless = true; 665 break; 666 case 'm': 667 cfg_do_msgmore = true; 668 break; 669 case 's': 670 cfg_do_setsockopt = true; 671 break; 672 case 't': 673 cfg_specific_test_id = strtoul(optarg, NULL, 0); 674 break; 675 default: 676 error(1, 0, "%s: parse error", argv[0]); 677 } 678 } 679 } 680 681 int main(int argc, char **argv) 682 { 683 parse_opts(argc, argv); 684 685 if (cfg_do_ipv4) 686 run_test_v4(); 687 if (cfg_do_ipv6) 688 run_test_v6(); 689 690 fprintf(stderr, "OK\n"); 691 return 0; 692 } 693