1 // SPDX-License-Identifier: GPL-2.0 2 3 #define _GNU_SOURCE 4 5 #include <stddef.h> 6 #include <arpa/inet.h> 7 #include <error.h> 8 #include <errno.h> 9 #include <net/if.h> 10 #include <linux/in.h> 11 #include <linux/netlink.h> 12 #include <linux/rtnetlink.h> 13 #include <netinet/if_ether.h> 14 #include <netinet/ip.h> 15 #include <netinet/ip6.h> 16 #include <netinet/udp.h> 17 #include <stdbool.h> 18 #include <stdlib.h> 19 #include <stdio.h> 20 #include <stdlib.h> 21 #include <string.h> 22 #include <sys/ioctl.h> 23 #include <sys/socket.h> 24 #include <sys/stat.h> 25 #include <sys/time.h> 26 #include <sys/types.h> 27 #include <unistd.h> 28 29 #ifndef ETH_MAX_MTU 30 #define ETH_MAX_MTU 0xFFFFU 31 #endif 32 33 #ifndef UDP_SEGMENT 34 #define UDP_SEGMENT 103 35 #endif 36 37 #ifndef UDP_MAX_SEGMENTS 38 #define UDP_MAX_SEGMENTS (1 << 6UL) 39 #endif 40 41 #define CONST_MTU_TEST 1500 42 43 #define CONST_HDRLEN_V4 (sizeof(struct iphdr) + sizeof(struct udphdr)) 44 #define CONST_HDRLEN_V6 (sizeof(struct ip6_hdr) + sizeof(struct udphdr)) 45 46 #define CONST_MSS_V4 (CONST_MTU_TEST - CONST_HDRLEN_V4) 47 #define CONST_MSS_V6 (CONST_MTU_TEST - CONST_HDRLEN_V6) 48 49 #define CONST_MAX_SEGS_V4 (ETH_MAX_MTU / CONST_MSS_V4) 50 #define CONST_MAX_SEGS_V6 (ETH_MAX_MTU / CONST_MSS_V6) 51 52 static bool cfg_do_ipv4; 53 static bool cfg_do_ipv6; 54 static bool cfg_do_connected; 55 static bool cfg_do_connectionless; 56 static bool cfg_do_msgmore; 57 static bool cfg_do_setsockopt; 58 static int cfg_specific_test_id = -1; 59 60 static const char cfg_ifname[] = "lo"; 61 static unsigned short cfg_port = 9000; 62 63 static char buf[ETH_MAX_MTU]; 64 65 struct testcase { 66 int tlen; /* send() buffer size, may exceed mss */ 67 bool tfail; /* send() call is expected to fail */ 68 int gso_len; /* mss after applying gso */ 69 int r_num_mss; /* recv(): number of calls of full mss */ 70 int r_len_last; /* recv(): size of last non-mss dgram, if any */ 71 }; 72 73 const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT; 74 const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) }; 75 76 struct testcase testcases_v4[] = { 77 { 78 /* no GSO: send a single byte */ 79 .tlen = 1, 80 .r_len_last = 1, 81 }, 82 { 83 /* no GSO: send a single MSS */ 84 .tlen = CONST_MSS_V4, 85 .r_num_mss = 1, 86 }, 87 { 88 /* no GSO: send a single MSS + 1B: fail */ 89 .tlen = CONST_MSS_V4 + 1, 90 .tfail = true, 91 }, 92 { 93 /* send a single MSS: will fail with GSO, because the segment 94 * logic in udp4_ufo_fragment demands a gso skb to be > MTU 95 */ 96 .tlen = CONST_MSS_V4, 97 .gso_len = CONST_MSS_V4, 98 .tfail = true, 99 .r_num_mss = 1, 100 }, 101 { 102 /* send a single MSS + 1B */ 103 .tlen = CONST_MSS_V4 + 1, 104 .gso_len = CONST_MSS_V4, 105 .r_num_mss = 1, 106 .r_len_last = 1, 107 }, 108 { 109 /* send exactly 2 MSS */ 110 .tlen = CONST_MSS_V4 * 2, 111 .gso_len = CONST_MSS_V4, 112 .r_num_mss = 2, 113 }, 114 { 115 /* send 2 MSS + 1B */ 116 .tlen = (CONST_MSS_V4 * 2) + 1, 117 .gso_len = CONST_MSS_V4, 118 .r_num_mss = 2, 119 .r_len_last = 1, 120 }, 121 { 122 /* send MAX segs */ 123 .tlen = (ETH_MAX_MTU / CONST_MSS_V4) * CONST_MSS_V4, 124 .gso_len = CONST_MSS_V4, 125 .r_num_mss = (ETH_MAX_MTU / CONST_MSS_V4), 126 }, 127 128 { 129 /* send MAX bytes */ 130 .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4, 131 .gso_len = CONST_MSS_V4, 132 .r_num_mss = CONST_MAX_SEGS_V4, 133 .r_len_last = ETH_MAX_MTU - CONST_HDRLEN_V4 - 134 (CONST_MAX_SEGS_V4 * CONST_MSS_V4), 135 }, 136 { 137 /* send MAX + 1: fail */ 138 .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4 + 1, 139 .gso_len = CONST_MSS_V4, 140 .tfail = true, 141 }, 142 { 143 /* send a single 1B MSS: will fail, see single MSS above */ 144 .tlen = 1, 145 .gso_len = 1, 146 .tfail = true, 147 .r_num_mss = 1, 148 }, 149 { 150 /* send 2 1B segments */ 151 .tlen = 2, 152 .gso_len = 1, 153 .r_num_mss = 2, 154 }, 155 { 156 /* send 2B + 2B + 1B segments */ 157 .tlen = 5, 158 .gso_len = 2, 159 .r_num_mss = 2, 160 .r_len_last = 1, 161 }, 162 { 163 /* send max number of min sized segments */ 164 .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, 165 .gso_len = 1, 166 .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, 167 }, 168 { 169 /* send max number + 1 of min sized segments: fail */ 170 .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1, 171 .gso_len = 1, 172 .tfail = true, 173 }, 174 { 175 /* EOL */ 176 } 177 }; 178 179 #ifndef IP6_MAX_MTU 180 #define IP6_MAX_MTU (ETH_MAX_MTU + sizeof(struct ip6_hdr)) 181 #endif 182 183 struct testcase testcases_v6[] = { 184 { 185 /* no GSO: send a single byte */ 186 .tlen = 1, 187 .r_len_last = 1, 188 }, 189 { 190 /* no GSO: send a single MSS */ 191 .tlen = CONST_MSS_V6, 192 .r_num_mss = 1, 193 }, 194 { 195 /* no GSO: send a single MSS + 1B: fail */ 196 .tlen = CONST_MSS_V6 + 1, 197 .tfail = true, 198 }, 199 { 200 /* send a single MSS: will fail with GSO, because the segment 201 * logic in udp4_ufo_fragment demands a gso skb to be > MTU 202 */ 203 .tlen = CONST_MSS_V6, 204 .gso_len = CONST_MSS_V6, 205 .tfail = true, 206 .r_num_mss = 1, 207 }, 208 { 209 /* send a single MSS + 1B */ 210 .tlen = CONST_MSS_V6 + 1, 211 .gso_len = CONST_MSS_V6, 212 .r_num_mss = 1, 213 .r_len_last = 1, 214 }, 215 { 216 /* send exactly 2 MSS */ 217 .tlen = CONST_MSS_V6 * 2, 218 .gso_len = CONST_MSS_V6, 219 .r_num_mss = 2, 220 }, 221 { 222 /* send 2 MSS + 1B */ 223 .tlen = (CONST_MSS_V6 * 2) + 1, 224 .gso_len = CONST_MSS_V6, 225 .r_num_mss = 2, 226 .r_len_last = 1, 227 }, 228 { 229 /* send MAX segs */ 230 .tlen = (IP6_MAX_MTU / CONST_MSS_V6) * CONST_MSS_V6, 231 .gso_len = CONST_MSS_V6, 232 .r_num_mss = (IP6_MAX_MTU / CONST_MSS_V6), 233 }, 234 235 { 236 /* send MAX bytes */ 237 .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6, 238 .gso_len = CONST_MSS_V6, 239 .r_num_mss = CONST_MAX_SEGS_V6, 240 .r_len_last = IP6_MAX_MTU - CONST_HDRLEN_V6 - 241 (CONST_MAX_SEGS_V6 * CONST_MSS_V6), 242 }, 243 { 244 /* send MAX + 1: fail */ 245 .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6 + 1, 246 .gso_len = CONST_MSS_V6, 247 .tfail = true, 248 }, 249 { 250 /* send a single 1B MSS: will fail, see single MSS above */ 251 .tlen = 1, 252 .gso_len = 1, 253 .tfail = true, 254 .r_num_mss = 1, 255 }, 256 { 257 /* send 2 1B segments */ 258 .tlen = 2, 259 .gso_len = 1, 260 .r_num_mss = 2, 261 }, 262 { 263 /* send 2B + 2B + 1B segments */ 264 .tlen = 5, 265 .gso_len = 2, 266 .r_num_mss = 2, 267 .r_len_last = 1, 268 }, 269 { 270 /* send max number of min sized segments */ 271 .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, 272 .gso_len = 1, 273 .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, 274 }, 275 { 276 /* send max number + 1 of min sized segments: fail */ 277 .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1, 278 .gso_len = 1, 279 .tfail = true, 280 }, 281 { 282 /* EOL */ 283 } 284 }; 285 286 static unsigned int get_device_mtu(int fd, const char *ifname) 287 { 288 struct ifreq ifr; 289 290 memset(&ifr, 0, sizeof(ifr)); 291 292 strcpy(ifr.ifr_name, ifname); 293 294 if (ioctl(fd, SIOCGIFMTU, &ifr)) 295 error(1, errno, "ioctl get mtu"); 296 297 return ifr.ifr_mtu; 298 } 299 300 static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu) 301 { 302 struct ifreq ifr; 303 304 memset(&ifr, 0, sizeof(ifr)); 305 306 ifr.ifr_mtu = mtu; 307 strcpy(ifr.ifr_name, ifname); 308 309 if (ioctl(fd, SIOCSIFMTU, &ifr)) 310 error(1, errno, "ioctl set mtu"); 311 } 312 313 static void set_device_mtu(int fd, int mtu) 314 { 315 int val; 316 317 val = get_device_mtu(fd, cfg_ifname); 318 fprintf(stderr, "device mtu (orig): %u\n", val); 319 320 __set_device_mtu(fd, cfg_ifname, mtu); 321 val = get_device_mtu(fd, cfg_ifname); 322 if (val != mtu) 323 error(1, 0, "unable to set device mtu to %u\n", val); 324 325 fprintf(stderr, "device mtu (test): %u\n", val); 326 } 327 328 static void set_pmtu_discover(int fd, bool is_ipv4) 329 { 330 int level, name, val; 331 332 if (is_ipv4) { 333 level = SOL_IP; 334 name = IP_MTU_DISCOVER; 335 val = IP_PMTUDISC_DO; 336 } else { 337 level = SOL_IPV6; 338 name = IPV6_MTU_DISCOVER; 339 val = IPV6_PMTUDISC_DO; 340 } 341 342 if (setsockopt(fd, level, name, &val, sizeof(val))) 343 error(1, errno, "setsockopt path mtu"); 344 } 345 346 static unsigned int get_path_mtu(int fd, bool is_ipv4) 347 { 348 socklen_t vallen; 349 unsigned int mtu; 350 int ret; 351 352 vallen = sizeof(mtu); 353 if (is_ipv4) 354 ret = getsockopt(fd, SOL_IP, IP_MTU, &mtu, &vallen); 355 else 356 ret = getsockopt(fd, SOL_IPV6, IPV6_MTU, &mtu, &vallen); 357 358 if (ret) 359 error(1, errno, "getsockopt mtu"); 360 361 362 fprintf(stderr, "path mtu (read): %u\n", mtu); 363 return mtu; 364 } 365 366 /* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */ 367 static void set_route_mtu(int mtu, bool is_ipv4) 368 { 369 struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; 370 struct nlmsghdr *nh; 371 struct rtattr *rta; 372 struct rtmsg *rt; 373 char data[NLMSG_ALIGN(sizeof(*nh)) + 374 NLMSG_ALIGN(sizeof(*rt)) + 375 NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) + 376 NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) + 377 NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))]; 378 int fd, ret, alen, off = 0; 379 380 alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6); 381 382 fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 383 if (fd == -1) 384 error(1, errno, "socket netlink"); 385 386 memset(data, 0, sizeof(data)); 387 388 nh = (void *)data; 389 nh->nlmsg_type = RTM_NEWROUTE; 390 nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; 391 off += NLMSG_ALIGN(sizeof(*nh)); 392 393 rt = (void *)(data + off); 394 rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6; 395 rt->rtm_table = RT_TABLE_MAIN; 396 rt->rtm_dst_len = alen << 3; 397 rt->rtm_protocol = RTPROT_BOOT; 398 rt->rtm_scope = RT_SCOPE_UNIVERSE; 399 rt->rtm_type = RTN_UNICAST; 400 off += NLMSG_ALIGN(sizeof(*rt)); 401 402 rta = (void *)(data + off); 403 rta->rta_type = RTA_DST; 404 rta->rta_len = RTA_LENGTH(alen); 405 if (is_ipv4) 406 memcpy(RTA_DATA(rta), &addr4, alen); 407 else 408 memcpy(RTA_DATA(rta), &addr6, alen); 409 off += NLMSG_ALIGN(rta->rta_len); 410 411 rta = (void *)(data + off); 412 rta->rta_type = RTA_OIF; 413 rta->rta_len = RTA_LENGTH(sizeof(int)); 414 *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo"); 415 off += NLMSG_ALIGN(rta->rta_len); 416 417 /* MTU is a subtype in a metrics type */ 418 rta = (void *)(data + off); 419 rta->rta_type = RTA_METRICS; 420 rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)); 421 off += NLMSG_ALIGN(rta->rta_len); 422 423 /* now fill MTU subtype. Note that it fits within above rta_len */ 424 rta = (void *)(((char *) rta) + RTA_LENGTH(0)); 425 rta->rta_type = RTAX_MTU; 426 rta->rta_len = RTA_LENGTH(sizeof(int)); 427 *((int *)(RTA_DATA(rta))) = mtu; 428 429 nh->nlmsg_len = off; 430 431 ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr)); 432 if (ret != off) 433 error(1, errno, "send netlink: %uB != %uB\n", ret, off); 434 435 if (close(fd)) 436 error(1, errno, "close netlink"); 437 438 fprintf(stderr, "route mtu (test): %u\n", mtu); 439 } 440 441 static bool __send_one(int fd, struct msghdr *msg, int flags) 442 { 443 int ret; 444 445 ret = sendmsg(fd, msg, flags); 446 if (ret == -1 && 447 (errno == EMSGSIZE || errno == ENOMEM || errno == EINVAL)) 448 return false; 449 if (ret == -1) 450 error(1, errno, "sendmsg"); 451 if (ret != msg->msg_iov->iov_len) 452 error(1, 0, "sendto: %d != %lu", ret, msg->msg_iov->iov_len); 453 if (msg->msg_flags) 454 error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags); 455 456 return true; 457 } 458 459 static bool send_one(int fd, int len, int gso_len, 460 struct sockaddr *addr, socklen_t alen) 461 { 462 char control[CMSG_SPACE(sizeof(uint16_t))] = {0}; 463 struct msghdr msg = {0}; 464 struct iovec iov = {0}; 465 struct cmsghdr *cm; 466 467 iov.iov_base = buf; 468 iov.iov_len = len; 469 470 msg.msg_iov = &iov; 471 msg.msg_iovlen = 1; 472 473 msg.msg_name = addr; 474 msg.msg_namelen = alen; 475 476 if (gso_len && !cfg_do_setsockopt) { 477 msg.msg_control = control; 478 msg.msg_controllen = sizeof(control); 479 480 cm = CMSG_FIRSTHDR(&msg); 481 cm->cmsg_level = SOL_UDP; 482 cm->cmsg_type = UDP_SEGMENT; 483 cm->cmsg_len = CMSG_LEN(sizeof(uint16_t)); 484 *((uint16_t *) CMSG_DATA(cm)) = gso_len; 485 } 486 487 /* If MSG_MORE, send 1 byte followed by remainder */ 488 if (cfg_do_msgmore && len > 1) { 489 iov.iov_len = 1; 490 if (!__send_one(fd, &msg, MSG_MORE)) 491 error(1, 0, "send 1B failed"); 492 493 iov.iov_base++; 494 iov.iov_len = len - 1; 495 } 496 497 return __send_one(fd, &msg, 0); 498 } 499 500 static int recv_one(int fd, int flags) 501 { 502 int ret; 503 504 ret = recv(fd, buf, sizeof(buf), flags); 505 if (ret == -1 && errno == EAGAIN && (flags & MSG_DONTWAIT)) 506 return 0; 507 if (ret == -1) 508 error(1, errno, "recv"); 509 510 return ret; 511 } 512 513 static void run_one(struct testcase *test, int fdt, int fdr, 514 struct sockaddr *addr, socklen_t alen) 515 { 516 int i, ret, val, mss; 517 bool sent; 518 519 fprintf(stderr, "ipv%d tx:%d gso:%d %s\n", 520 addr->sa_family == AF_INET ? 4 : 6, 521 test->tlen, test->gso_len, 522 test->tfail ? "(fail)" : ""); 523 524 val = test->gso_len; 525 if (cfg_do_setsockopt) { 526 if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val))) 527 error(1, errno, "setsockopt udp segment"); 528 } 529 530 sent = send_one(fdt, test->tlen, test->gso_len, addr, alen); 531 if (sent && test->tfail) 532 error(1, 0, "send succeeded while expecting failure"); 533 if (!sent && !test->tfail) 534 error(1, 0, "send failed while expecting success"); 535 if (!sent) 536 return; 537 538 if (test->gso_len) 539 mss = test->gso_len; 540 else 541 mss = addr->sa_family == AF_INET ? CONST_MSS_V4 : CONST_MSS_V6; 542 543 544 /* Recv all full MSS datagrams */ 545 for (i = 0; i < test->r_num_mss; i++) { 546 ret = recv_one(fdr, 0); 547 if (ret != mss) 548 error(1, 0, "recv.%d: %d != %d", i, ret, mss); 549 } 550 551 /* Recv the non-full last datagram, if tlen was not a multiple of mss */ 552 if (test->r_len_last) { 553 ret = recv_one(fdr, 0); 554 if (ret != test->r_len_last) 555 error(1, 0, "recv.%d: %d != %d (last)", 556 i, ret, test->r_len_last); 557 } 558 559 /* Verify received all data */ 560 ret = recv_one(fdr, MSG_DONTWAIT); 561 if (ret) 562 error(1, 0, "recv: unexpected datagram"); 563 } 564 565 static void run_all(int fdt, int fdr, struct sockaddr *addr, socklen_t alen) 566 { 567 struct testcase *tests, *test; 568 569 tests = addr->sa_family == AF_INET ? testcases_v4 : testcases_v6; 570 571 for (test = tests; test->tlen; test++) { 572 /* if a specific test is given, then skip all others */ 573 if (cfg_specific_test_id == -1 || 574 cfg_specific_test_id == test - tests) 575 run_one(test, fdt, fdr, addr, alen); 576 } 577 } 578 579 static void run_test(struct sockaddr *addr, socklen_t alen) 580 { 581 struct timeval tv = { .tv_usec = 100 * 1000 }; 582 int fdr, fdt, val; 583 584 fdr = socket(addr->sa_family, SOCK_DGRAM, 0); 585 if (fdr == -1) 586 error(1, errno, "socket r"); 587 588 if (bind(fdr, addr, alen)) 589 error(1, errno, "bind"); 590 591 /* Have tests fail quickly instead of hang */ 592 if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) 593 error(1, errno, "setsockopt rcv timeout"); 594 595 fdt = socket(addr->sa_family, SOCK_DGRAM, 0); 596 if (fdt == -1) 597 error(1, errno, "socket t"); 598 599 /* Do not fragment these datagrams: only succeed if GSO works */ 600 set_pmtu_discover(fdt, addr->sa_family == AF_INET); 601 602 if (cfg_do_connectionless) { 603 set_device_mtu(fdt, CONST_MTU_TEST); 604 run_all(fdt, fdr, addr, alen); 605 } 606 607 if (cfg_do_connected) { 608 set_device_mtu(fdt, CONST_MTU_TEST + 100); 609 set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET); 610 611 if (connect(fdt, addr, alen)) 612 error(1, errno, "connect"); 613 614 val = get_path_mtu(fdt, addr->sa_family == AF_INET); 615 if (val != CONST_MTU_TEST) 616 error(1, 0, "bad path mtu %u\n", val); 617 618 run_all(fdt, fdr, addr, 0 /* use connected addr */); 619 } 620 621 if (close(fdt)) 622 error(1, errno, "close t"); 623 if (close(fdr)) 624 error(1, errno, "close r"); 625 } 626 627 static void run_test_v4(void) 628 { 629 struct sockaddr_in addr = {0}; 630 631 addr.sin_family = AF_INET; 632 addr.sin_port = htons(cfg_port); 633 addr.sin_addr = addr4; 634 635 run_test((void *)&addr, sizeof(addr)); 636 } 637 638 static void run_test_v6(void) 639 { 640 struct sockaddr_in6 addr = {0}; 641 642 addr.sin6_family = AF_INET6; 643 addr.sin6_port = htons(cfg_port); 644 addr.sin6_addr = addr6; 645 646 run_test((void *)&addr, sizeof(addr)); 647 } 648 649 static void parse_opts(int argc, char **argv) 650 { 651 int c; 652 653 while ((c = getopt(argc, argv, "46cCmst:")) != -1) { 654 switch (c) { 655 case '4': 656 cfg_do_ipv4 = true; 657 break; 658 case '6': 659 cfg_do_ipv6 = true; 660 break; 661 case 'c': 662 cfg_do_connected = true; 663 break; 664 case 'C': 665 cfg_do_connectionless = true; 666 break; 667 case 'm': 668 cfg_do_msgmore = true; 669 break; 670 case 's': 671 cfg_do_setsockopt = true; 672 break; 673 case 't': 674 cfg_specific_test_id = strtoul(optarg, NULL, 0); 675 break; 676 default: 677 error(1, 0, "%s: parse error", argv[0]); 678 } 679 } 680 } 681 682 int main(int argc, char **argv) 683 { 684 parse_opts(argc, argv); 685 686 if (cfg_do_ipv4) 687 run_test_v4(); 688 if (cfg_do_ipv6) 689 run_test_v6(); 690 691 fprintf(stderr, "OK\n"); 692 return 0; 693 } 694