1 // SPDX-License-Identifier: GPL-2.0 2 3 #define _GNU_SOURCE 4 5 #include <stddef.h> 6 #include <arpa/inet.h> 7 #include <error.h> 8 #include <errno.h> 9 #include <net/if.h> 10 #include <linux/in.h> 11 #include <linux/netlink.h> 12 #include <linux/rtnetlink.h> 13 #include <netinet/if_ether.h> 14 #include <netinet/ip.h> 15 #include <netinet/ip6.h> 16 #include <netinet/udp.h> 17 #include <stdbool.h> 18 #include <stdlib.h> 19 #include <stdio.h> 20 #include <stdlib.h> 21 #include <string.h> 22 #include <sys/ioctl.h> 23 #include <sys/socket.h> 24 #include <sys/stat.h> 25 #include <sys/time.h> 26 #include <sys/types.h> 27 #include <unistd.h> 28 29 #ifndef ETH_MAX_MTU 30 #define ETH_MAX_MTU 0xFFFFU 31 #endif 32 33 #ifndef UDP_SEGMENT 34 #define UDP_SEGMENT 103 35 #endif 36 37 #define CONST_MTU_TEST 1500 38 39 #define CONST_HDRLEN_V4 (sizeof(struct iphdr) + sizeof(struct udphdr)) 40 #define CONST_HDRLEN_V6 (sizeof(struct ip6_hdr) + sizeof(struct udphdr)) 41 42 #define CONST_MSS_V4 (CONST_MTU_TEST - CONST_HDRLEN_V4) 43 #define CONST_MSS_V6 (CONST_MTU_TEST - CONST_HDRLEN_V6) 44 45 #define CONST_MAX_SEGS_V4 (ETH_MAX_MTU / CONST_MSS_V4) 46 #define CONST_MAX_SEGS_V6 (ETH_MAX_MTU / CONST_MSS_V6) 47 48 static bool cfg_do_ipv4; 49 static bool cfg_do_ipv6; 50 static bool cfg_do_connected; 51 static bool cfg_do_connectionless; 52 static bool cfg_do_msgmore; 53 static bool cfg_do_setsockopt; 54 static int cfg_specific_test_id = -1; 55 56 static const char cfg_ifname[] = "lo"; 57 static unsigned short cfg_port = 9000; 58 59 static char buf[ETH_MAX_MTU]; 60 61 struct testcase { 62 int tlen; /* send() buffer size, may exceed mss */ 63 bool tfail; /* send() call is expected to fail */ 64 int gso_len; /* mss after applying gso */ 65 int r_num_mss; /* recv(): number of calls of full mss */ 66 int r_len_last; /* recv(): size of last non-mss dgram, if any */ 67 }; 68 69 const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT; 70 const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) }; 71 72 struct testcase testcases_v4[] = { 73 { 74 /* no GSO: send a single byte */ 75 .tlen = 1, 76 .r_len_last = 1, 77 }, 78 { 79 /* no GSO: send a single MSS */ 80 .tlen = CONST_MSS_V4, 81 .r_num_mss = 1, 82 }, 83 { 84 /* no GSO: send a single MSS + 1B: fail */ 85 .tlen = CONST_MSS_V4 + 1, 86 .tfail = true, 87 }, 88 { 89 /* send a single MSS: will fail with GSO, because the segment 90 * logic in udp4_ufo_fragment demands a gso skb to be > MTU 91 */ 92 .tlen = CONST_MSS_V4, 93 .gso_len = CONST_MSS_V4, 94 .tfail = true, 95 .r_num_mss = 1, 96 }, 97 { 98 /* send a single MSS + 1B */ 99 .tlen = CONST_MSS_V4 + 1, 100 .gso_len = CONST_MSS_V4, 101 .r_num_mss = 1, 102 .r_len_last = 1, 103 }, 104 { 105 /* send exactly 2 MSS */ 106 .tlen = CONST_MSS_V4 * 2, 107 .gso_len = CONST_MSS_V4, 108 .r_num_mss = 2, 109 }, 110 { 111 /* send 2 MSS + 1B */ 112 .tlen = (CONST_MSS_V4 * 2) + 1, 113 .gso_len = CONST_MSS_V4, 114 .r_num_mss = 2, 115 .r_len_last = 1, 116 }, 117 { 118 /* send MAX segs */ 119 .tlen = (ETH_MAX_MTU / CONST_MSS_V4) * CONST_MSS_V4, 120 .gso_len = CONST_MSS_V4, 121 .r_num_mss = (ETH_MAX_MTU / CONST_MSS_V4), 122 }, 123 124 { 125 /* send MAX bytes */ 126 .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4, 127 .gso_len = CONST_MSS_V4, 128 .r_num_mss = CONST_MAX_SEGS_V4, 129 .r_len_last = ETH_MAX_MTU - CONST_HDRLEN_V4 - 130 (CONST_MAX_SEGS_V4 * CONST_MSS_V4), 131 }, 132 { 133 /* send MAX + 1: fail */ 134 .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4 + 1, 135 .gso_len = CONST_MSS_V4, 136 .tfail = true, 137 }, 138 { 139 /* EOL */ 140 } 141 }; 142 143 #ifndef IP6_MAX_MTU 144 #define IP6_MAX_MTU (ETH_MAX_MTU + sizeof(struct ip6_hdr)) 145 #endif 146 147 struct testcase testcases_v6[] = { 148 { 149 /* no GSO: send a single byte */ 150 .tlen = 1, 151 .r_len_last = 1, 152 }, 153 { 154 /* no GSO: send a single MSS */ 155 .tlen = CONST_MSS_V6, 156 .r_num_mss = 1, 157 }, 158 { 159 /* no GSO: send a single MSS + 1B: fail */ 160 .tlen = CONST_MSS_V6 + 1, 161 .tfail = true, 162 }, 163 { 164 /* send a single MSS: will fail with GSO, because the segment 165 * logic in udp4_ufo_fragment demands a gso skb to be > MTU 166 */ 167 .tlen = CONST_MSS_V6, 168 .gso_len = CONST_MSS_V6, 169 .tfail = true, 170 .r_num_mss = 1, 171 }, 172 { 173 /* send a single MSS + 1B */ 174 .tlen = CONST_MSS_V6 + 1, 175 .gso_len = CONST_MSS_V6, 176 .r_num_mss = 1, 177 .r_len_last = 1, 178 }, 179 { 180 /* send exactly 2 MSS */ 181 .tlen = CONST_MSS_V6 * 2, 182 .gso_len = CONST_MSS_V6, 183 .r_num_mss = 2, 184 }, 185 { 186 /* send 2 MSS + 1B */ 187 .tlen = (CONST_MSS_V6 * 2) + 1, 188 .gso_len = CONST_MSS_V6, 189 .r_num_mss = 2, 190 .r_len_last = 1, 191 }, 192 { 193 /* send MAX segs */ 194 .tlen = (IP6_MAX_MTU / CONST_MSS_V6) * CONST_MSS_V6, 195 .gso_len = CONST_MSS_V6, 196 .r_num_mss = (IP6_MAX_MTU / CONST_MSS_V6), 197 }, 198 199 { 200 /* send MAX bytes */ 201 .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6, 202 .gso_len = CONST_MSS_V6, 203 .r_num_mss = CONST_MAX_SEGS_V6, 204 .r_len_last = IP6_MAX_MTU - CONST_HDRLEN_V6 - 205 (CONST_MAX_SEGS_V6 * CONST_MSS_V6), 206 }, 207 { 208 /* send MAX + 1: fail */ 209 .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6 + 1, 210 .gso_len = CONST_MSS_V6, 211 .tfail = true, 212 }, 213 { 214 /* EOL */ 215 } 216 }; 217 218 static unsigned int get_device_mtu(int fd, const char *ifname) 219 { 220 struct ifreq ifr; 221 222 memset(&ifr, 0, sizeof(ifr)); 223 224 strcpy(ifr.ifr_name, ifname); 225 226 if (ioctl(fd, SIOCGIFMTU, &ifr)) 227 error(1, errno, "ioctl get mtu"); 228 229 return ifr.ifr_mtu; 230 } 231 232 static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu) 233 { 234 struct ifreq ifr; 235 236 memset(&ifr, 0, sizeof(ifr)); 237 238 ifr.ifr_mtu = mtu; 239 strcpy(ifr.ifr_name, ifname); 240 241 if (ioctl(fd, SIOCSIFMTU, &ifr)) 242 error(1, errno, "ioctl set mtu"); 243 } 244 245 static void set_device_mtu(int fd, int mtu) 246 { 247 int val; 248 249 val = get_device_mtu(fd, cfg_ifname); 250 fprintf(stderr, "device mtu (orig): %u\n", val); 251 252 __set_device_mtu(fd, cfg_ifname, mtu); 253 val = get_device_mtu(fd, cfg_ifname); 254 if (val != mtu) 255 error(1, 0, "unable to set device mtu to %u\n", val); 256 257 fprintf(stderr, "device mtu (test): %u\n", val); 258 } 259 260 static void set_pmtu_discover(int fd, bool is_ipv4) 261 { 262 int level, name, val; 263 264 if (is_ipv4) { 265 level = SOL_IP; 266 name = IP_MTU_DISCOVER; 267 val = IP_PMTUDISC_DO; 268 } else { 269 level = SOL_IPV6; 270 name = IPV6_MTU_DISCOVER; 271 val = IPV6_PMTUDISC_DO; 272 } 273 274 if (setsockopt(fd, level, name, &val, sizeof(val))) 275 error(1, errno, "setsockopt path mtu"); 276 } 277 278 static unsigned int get_path_mtu(int fd, bool is_ipv4) 279 { 280 socklen_t vallen; 281 unsigned int mtu; 282 int ret; 283 284 vallen = sizeof(mtu); 285 if (is_ipv4) 286 ret = getsockopt(fd, SOL_IP, IP_MTU, &mtu, &vallen); 287 else 288 ret = getsockopt(fd, SOL_IPV6, IPV6_MTU, &mtu, &vallen); 289 290 if (ret) 291 error(1, errno, "getsockopt mtu"); 292 293 294 fprintf(stderr, "path mtu (read): %u\n", mtu); 295 return mtu; 296 } 297 298 /* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */ 299 static void set_route_mtu(int mtu, bool is_ipv4) 300 { 301 struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; 302 struct nlmsghdr *nh; 303 struct rtattr *rta; 304 struct rtmsg *rt; 305 char data[NLMSG_ALIGN(sizeof(*nh)) + 306 NLMSG_ALIGN(sizeof(*rt)) + 307 NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) + 308 NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) + 309 NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))]; 310 int fd, ret, alen, off = 0; 311 312 alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6); 313 314 fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 315 if (fd == -1) 316 error(1, errno, "socket netlink"); 317 318 memset(data, 0, sizeof(data)); 319 320 nh = (void *)data; 321 nh->nlmsg_type = RTM_NEWROUTE; 322 nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; 323 off += NLMSG_ALIGN(sizeof(*nh)); 324 325 rt = (void *)(data + off); 326 rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6; 327 rt->rtm_table = RT_TABLE_MAIN; 328 rt->rtm_dst_len = alen << 3; 329 rt->rtm_protocol = RTPROT_BOOT; 330 rt->rtm_scope = RT_SCOPE_UNIVERSE; 331 rt->rtm_type = RTN_UNICAST; 332 off += NLMSG_ALIGN(sizeof(*rt)); 333 334 rta = (void *)(data + off); 335 rta->rta_type = RTA_DST; 336 rta->rta_len = RTA_LENGTH(alen); 337 if (is_ipv4) 338 memcpy(RTA_DATA(rta), &addr4, alen); 339 else 340 memcpy(RTA_DATA(rta), &addr6, alen); 341 off += NLMSG_ALIGN(rta->rta_len); 342 343 rta = (void *)(data + off); 344 rta->rta_type = RTA_OIF; 345 rta->rta_len = RTA_LENGTH(sizeof(int)); 346 *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo"); 347 off += NLMSG_ALIGN(rta->rta_len); 348 349 /* MTU is a subtype in a metrics type */ 350 rta = (void *)(data + off); 351 rta->rta_type = RTA_METRICS; 352 rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)); 353 off += NLMSG_ALIGN(rta->rta_len); 354 355 /* now fill MTU subtype. Note that it fits within above rta_len */ 356 rta = (void *)(((char *) rta) + RTA_LENGTH(0)); 357 rta->rta_type = RTAX_MTU; 358 rta->rta_len = RTA_LENGTH(sizeof(int)); 359 *((int *)(RTA_DATA(rta))) = mtu; 360 361 nh->nlmsg_len = off; 362 363 ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr)); 364 if (ret != off) 365 error(1, errno, "send netlink: %uB != %uB\n", ret, off); 366 367 if (close(fd)) 368 error(1, errno, "close netlink"); 369 370 fprintf(stderr, "route mtu (test): %u\n", mtu); 371 } 372 373 static bool __send_one(int fd, struct msghdr *msg, int flags) 374 { 375 int ret; 376 377 ret = sendmsg(fd, msg, flags); 378 if (ret == -1 && (errno == EMSGSIZE || errno == ENOMEM)) 379 return false; 380 if (ret == -1) 381 error(1, errno, "sendmsg"); 382 if (ret != msg->msg_iov->iov_len) 383 error(1, 0, "sendto: %d != %lu", ret, msg->msg_iov->iov_len); 384 if (msg->msg_flags) 385 error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags); 386 387 return true; 388 } 389 390 static bool send_one(int fd, int len, int gso_len, 391 struct sockaddr *addr, socklen_t alen) 392 { 393 char control[CMSG_SPACE(sizeof(uint16_t))] = {0}; 394 struct msghdr msg = {0}; 395 struct iovec iov = {0}; 396 struct cmsghdr *cm; 397 398 iov.iov_base = buf; 399 iov.iov_len = len; 400 401 msg.msg_iov = &iov; 402 msg.msg_iovlen = 1; 403 404 msg.msg_name = addr; 405 msg.msg_namelen = alen; 406 407 if (gso_len && !cfg_do_setsockopt) { 408 msg.msg_control = control; 409 msg.msg_controllen = sizeof(control); 410 411 cm = CMSG_FIRSTHDR(&msg); 412 cm->cmsg_level = SOL_UDP; 413 cm->cmsg_type = UDP_SEGMENT; 414 cm->cmsg_len = CMSG_LEN(sizeof(uint16_t)); 415 *((uint16_t *) CMSG_DATA(cm)) = gso_len; 416 } 417 418 /* If MSG_MORE, send 1 byte followed by remainder */ 419 if (cfg_do_msgmore && len > 1) { 420 iov.iov_len = 1; 421 if (!__send_one(fd, &msg, MSG_MORE)) 422 error(1, 0, "send 1B failed"); 423 424 iov.iov_base++; 425 iov.iov_len = len - 1; 426 } 427 428 return __send_one(fd, &msg, 0); 429 } 430 431 static int recv_one(int fd, int flags) 432 { 433 int ret; 434 435 ret = recv(fd, buf, sizeof(buf), flags); 436 if (ret == -1 && errno == EAGAIN && (flags & MSG_DONTWAIT)) 437 return 0; 438 if (ret == -1) 439 error(1, errno, "recv"); 440 441 return ret; 442 } 443 444 static void run_one(struct testcase *test, int fdt, int fdr, 445 struct sockaddr *addr, socklen_t alen) 446 { 447 int i, ret, val, mss; 448 bool sent; 449 450 fprintf(stderr, "ipv%d tx:%d gso:%d %s\n", 451 addr->sa_family == AF_INET ? 4 : 6, 452 test->tlen, test->gso_len, 453 test->tfail ? "(fail)" : ""); 454 455 val = test->gso_len; 456 if (cfg_do_setsockopt) { 457 if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val))) 458 error(1, errno, "setsockopt udp segment"); 459 } 460 461 sent = send_one(fdt, test->tlen, test->gso_len, addr, alen); 462 if (sent && test->tfail) 463 error(1, 0, "send succeeded while expecting failure"); 464 if (!sent && !test->tfail) 465 error(1, 0, "send failed while expecting success"); 466 if (!sent) 467 return; 468 469 mss = addr->sa_family == AF_INET ? CONST_MSS_V4 : CONST_MSS_V6; 470 471 /* Recv all full MSS datagrams */ 472 for (i = 0; i < test->r_num_mss; i++) { 473 ret = recv_one(fdr, 0); 474 if (ret != mss) 475 error(1, 0, "recv.%d: %d != %d", i, ret, mss); 476 } 477 478 /* Recv the non-full last datagram, if tlen was not a multiple of mss */ 479 if (test->r_len_last) { 480 ret = recv_one(fdr, 0); 481 if (ret != test->r_len_last) 482 error(1, 0, "recv.%d: %d != %d (last)", 483 i, ret, test->r_len_last); 484 } 485 486 /* Verify received all data */ 487 ret = recv_one(fdr, MSG_DONTWAIT); 488 if (ret) 489 error(1, 0, "recv: unexpected datagram"); 490 } 491 492 static void run_all(int fdt, int fdr, struct sockaddr *addr, socklen_t alen) 493 { 494 struct testcase *tests, *test; 495 496 tests = addr->sa_family == AF_INET ? testcases_v4 : testcases_v6; 497 498 for (test = tests; test->tlen; test++) { 499 /* if a specific test is given, then skip all others */ 500 if (cfg_specific_test_id == -1 || 501 cfg_specific_test_id == test - tests) 502 run_one(test, fdt, fdr, addr, alen); 503 } 504 } 505 506 static void run_test(struct sockaddr *addr, socklen_t alen) 507 { 508 struct timeval tv = { .tv_usec = 100 * 1000 }; 509 int fdr, fdt, val; 510 511 fdr = socket(addr->sa_family, SOCK_DGRAM, 0); 512 if (fdr == -1) 513 error(1, errno, "socket r"); 514 515 if (bind(fdr, addr, alen)) 516 error(1, errno, "bind"); 517 518 /* Have tests fail quickly instead of hang */ 519 if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) 520 error(1, errno, "setsockopt rcv timeout"); 521 522 fdt = socket(addr->sa_family, SOCK_DGRAM, 0); 523 if (fdt == -1) 524 error(1, errno, "socket t"); 525 526 /* Do not fragment these datagrams: only succeed if GSO works */ 527 set_pmtu_discover(fdt, addr->sa_family == AF_INET); 528 529 if (cfg_do_connectionless) { 530 set_device_mtu(fdt, CONST_MTU_TEST); 531 run_all(fdt, fdr, addr, alen); 532 } 533 534 if (cfg_do_connected) { 535 set_device_mtu(fdt, CONST_MTU_TEST + 100); 536 set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET); 537 538 if (connect(fdt, addr, alen)) 539 error(1, errno, "connect"); 540 541 val = get_path_mtu(fdt, addr->sa_family == AF_INET); 542 if (val != CONST_MTU_TEST) 543 error(1, 0, "bad path mtu %u\n", val); 544 545 run_all(fdt, fdr, addr, 0 /* use connected addr */); 546 } 547 548 if (close(fdt)) 549 error(1, errno, "close t"); 550 if (close(fdr)) 551 error(1, errno, "close r"); 552 } 553 554 static void run_test_v4(void) 555 { 556 struct sockaddr_in addr = {0}; 557 558 addr.sin_family = AF_INET; 559 addr.sin_port = htons(cfg_port); 560 addr.sin_addr = addr4; 561 562 run_test((void *)&addr, sizeof(addr)); 563 } 564 565 static void run_test_v6(void) 566 { 567 struct sockaddr_in6 addr = {0}; 568 569 addr.sin6_family = AF_INET6; 570 addr.sin6_port = htons(cfg_port); 571 addr.sin6_addr = addr6; 572 573 run_test((void *)&addr, sizeof(addr)); 574 } 575 576 static void parse_opts(int argc, char **argv) 577 { 578 int c; 579 580 while ((c = getopt(argc, argv, "46cCmst:")) != -1) { 581 switch (c) { 582 case '4': 583 cfg_do_ipv4 = true; 584 break; 585 case '6': 586 cfg_do_ipv6 = true; 587 break; 588 case 'c': 589 cfg_do_connected = true; 590 break; 591 case 'C': 592 cfg_do_connectionless = true; 593 break; 594 case 'm': 595 cfg_do_msgmore = true; 596 break; 597 case 's': 598 cfg_do_setsockopt = true; 599 break; 600 case 't': 601 cfg_specific_test_id = strtoul(optarg, NULL, 0); 602 break; 603 default: 604 error(1, 0, "%s: parse error", argv[0]); 605 } 606 } 607 } 608 609 int main(int argc, char **argv) 610 { 611 parse_opts(argc, argv); 612 613 if (cfg_do_ipv4) 614 run_test_v4(); 615 if (cfg_do_ipv6) 616 run_test_v6(); 617 618 fprintf(stderr, "OK\n"); 619 return 0; 620 } 621