1 // SPDX-License-Identifier: GPL-2.0 2 3 #define _GNU_SOURCE 4 5 #include <arpa/inet.h> 6 #include <errno.h> 7 #include <error.h> 8 #include <linux/errqueue.h> 9 #include <linux/net_tstamp.h> 10 #include <netinet/if_ether.h> 11 #include <netinet/in.h> 12 #include <netinet/ip.h> 13 #include <netinet/ip6.h> 14 #include <netinet/udp.h> 15 #include <poll.h> 16 #include <sched.h> 17 #include <signal.h> 18 #include <stdbool.h> 19 #include <stdio.h> 20 #include <stdlib.h> 21 #include <string.h> 22 #include <sys/socket.h> 23 #include <sys/time.h> 24 #include <sys/poll.h> 25 #include <sys/types.h> 26 #include <unistd.h> 27 28 #include "../kselftest.h" 29 30 #ifndef ETH_MAX_MTU 31 #define ETH_MAX_MTU 0xFFFFU 32 #endif 33 34 #ifndef UDP_SEGMENT 35 #define UDP_SEGMENT 103 36 #endif 37 38 #ifndef SO_ZEROCOPY 39 #define SO_ZEROCOPY 60 40 #endif 41 42 #ifndef SO_EE_ORIGIN_ZEROCOPY 43 #define SO_EE_ORIGIN_ZEROCOPY 5 44 #endif 45 46 #ifndef MSG_ZEROCOPY 47 #define MSG_ZEROCOPY 0x4000000 48 #endif 49 50 #ifndef ENOTSUPP 51 #define ENOTSUPP 524 52 #endif 53 54 #define NUM_PKT 100 55 56 static bool cfg_cache_trash; 57 static int cfg_cpu = -1; 58 static int cfg_connected = true; 59 static int cfg_family = PF_UNSPEC; 60 static uint16_t cfg_mss; 61 static int cfg_payload_len = (1472 * 42); 62 static int cfg_port = 8000; 63 static int cfg_runtime_ms = -1; 64 static bool cfg_poll; 65 static bool cfg_segment; 66 static bool cfg_sendmmsg; 67 static bool cfg_tcp; 68 static uint32_t cfg_tx_ts = SOF_TIMESTAMPING_TX_SOFTWARE; 69 static bool cfg_tx_tstamp; 70 static bool cfg_audit; 71 static bool cfg_verbose; 72 static bool cfg_zerocopy; 73 static int cfg_msg_nr; 74 static uint16_t cfg_gso_size; 75 static unsigned long total_num_msgs; 76 static unsigned long total_num_sends; 77 static unsigned long stat_tx_ts; 78 static unsigned long stat_tx_ts_errors; 79 static unsigned long tstart; 80 static unsigned long tend; 81 static unsigned long stat_zcopies; 82 83 static socklen_t cfg_alen; 84 static struct sockaddr_storage cfg_dst_addr; 85 86 static bool interrupted; 87 static char buf[NUM_PKT][ETH_MAX_MTU]; 88 89 static void sigint_handler(int signum) 90 { 91 if (signum == SIGINT) 92 interrupted = true; 93 } 94 95 static unsigned long gettimeofday_ms(void) 96 { 97 struct timeval tv; 98 99 gettimeofday(&tv, NULL); 100 return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); 101 } 102 103 static int set_cpu(int cpu) 104 { 105 cpu_set_t mask; 106 107 CPU_ZERO(&mask); 108 CPU_SET(cpu, &mask); 109 if (sched_setaffinity(0, sizeof(mask), &mask)) 110 error(1, 0, "setaffinity %d", cpu); 111 112 return 0; 113 } 114 115 static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr) 116 { 117 struct sockaddr_in6 *addr6 = (void *) sockaddr; 118 struct sockaddr_in *addr4 = (void *) sockaddr; 119 120 switch (domain) { 121 case PF_INET: 122 addr4->sin_family = AF_INET; 123 addr4->sin_port = htons(cfg_port); 124 if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) 125 error(1, 0, "ipv4 parse error: %s", str_addr); 126 break; 127 case PF_INET6: 128 addr6->sin6_family = AF_INET6; 129 addr6->sin6_port = htons(cfg_port); 130 if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) 131 error(1, 0, "ipv6 parse error: %s", str_addr); 132 break; 133 default: 134 error(1, 0, "illegal domain"); 135 } 136 } 137 138 static void flush_cmsg(struct cmsghdr *cmsg) 139 { 140 struct sock_extended_err *err; 141 struct scm_timestamping *tss; 142 __u32 lo; 143 __u32 hi; 144 int i; 145 146 switch (cmsg->cmsg_level) { 147 case SOL_SOCKET: 148 if (cmsg->cmsg_type == SO_TIMESTAMPING) { 149 i = (cfg_tx_ts == SOF_TIMESTAMPING_TX_HARDWARE) ? 2 : 0; 150 tss = (struct scm_timestamping *)CMSG_DATA(cmsg); 151 if (tss->ts[i].tv_sec == 0) 152 stat_tx_ts_errors++; 153 } else { 154 error(1, 0, "unknown SOL_SOCKET cmsg type=%u\n", 155 cmsg->cmsg_type); 156 } 157 break; 158 case SOL_IP: 159 case SOL_IPV6: 160 switch (cmsg->cmsg_type) { 161 case IP_RECVERR: 162 case IPV6_RECVERR: 163 { 164 err = (struct sock_extended_err *)CMSG_DATA(cmsg); 165 switch (err->ee_origin) { 166 case SO_EE_ORIGIN_TIMESTAMPING: 167 /* Got a TX timestamp from error queue */ 168 stat_tx_ts++; 169 break; 170 case SO_EE_ORIGIN_ICMP: 171 case SO_EE_ORIGIN_ICMP6: 172 if (cfg_verbose) 173 fprintf(stderr, 174 "received ICMP error: type=%u, code=%u\n", 175 err->ee_type, err->ee_code); 176 break; 177 case SO_EE_ORIGIN_ZEROCOPY: 178 { 179 lo = err->ee_info; 180 hi = err->ee_data; 181 /* range of IDs acknowledged */ 182 stat_zcopies += hi - lo + 1; 183 break; 184 } 185 case SO_EE_ORIGIN_LOCAL: 186 if (cfg_verbose) 187 fprintf(stderr, 188 "received packet with local origin: %u\n", 189 err->ee_origin); 190 break; 191 default: 192 error(0, 1, "received packet with origin: %u", 193 err->ee_origin); 194 } 195 break; 196 } 197 default: 198 error(0, 1, "unknown IP msg type=%u\n", 199 cmsg->cmsg_type); 200 break; 201 } 202 break; 203 default: 204 error(0, 1, "unknown cmsg level=%u\n", 205 cmsg->cmsg_level); 206 } 207 } 208 209 static void flush_errqueue_recv(int fd) 210 { 211 char control[CMSG_SPACE(sizeof(struct scm_timestamping)) + 212 CMSG_SPACE(sizeof(struct sock_extended_err)) + 213 CMSG_SPACE(sizeof(struct sockaddr_in6))] = {0}; 214 struct msghdr msg = {0}; 215 struct cmsghdr *cmsg; 216 int ret; 217 218 while (1) { 219 msg.msg_control = control; 220 msg.msg_controllen = sizeof(control); 221 ret = recvmsg(fd, &msg, MSG_ERRQUEUE); 222 if (ret == -1 && errno == EAGAIN) 223 break; 224 if (ret == -1) 225 error(1, errno, "errqueue"); 226 if (msg.msg_flags != MSG_ERRQUEUE) 227 error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags); 228 if (cfg_audit) { 229 for (cmsg = CMSG_FIRSTHDR(&msg); 230 cmsg; 231 cmsg = CMSG_NXTHDR(&msg, cmsg)) 232 flush_cmsg(cmsg); 233 } 234 msg.msg_flags = 0; 235 } 236 } 237 238 static void flush_errqueue(int fd, const bool do_poll) 239 { 240 if (do_poll) { 241 struct pollfd fds = {0}; 242 int ret; 243 244 fds.fd = fd; 245 ret = poll(&fds, 1, 500); 246 if (ret == 0) { 247 if (cfg_verbose) 248 fprintf(stderr, "poll timeout\n"); 249 } else if (ret < 0) { 250 error(1, errno, "poll"); 251 } 252 } 253 254 flush_errqueue_recv(fd); 255 } 256 257 static int send_tcp(int fd, char *data) 258 { 259 int ret, done = 0, count = 0; 260 261 while (done < cfg_payload_len) { 262 ret = send(fd, data + done, cfg_payload_len - done, 263 cfg_zerocopy ? MSG_ZEROCOPY : 0); 264 if (ret == -1) 265 error(1, errno, "write"); 266 267 done += ret; 268 count++; 269 } 270 271 return count; 272 } 273 274 static int send_udp(int fd, char *data) 275 { 276 int ret, total_len, len, count = 0; 277 278 total_len = cfg_payload_len; 279 280 while (total_len) { 281 len = total_len < cfg_mss ? total_len : cfg_mss; 282 283 ret = sendto(fd, data, len, cfg_zerocopy ? MSG_ZEROCOPY : 0, 284 cfg_connected ? NULL : (void *)&cfg_dst_addr, 285 cfg_connected ? 0 : cfg_alen); 286 if (ret == -1) 287 error(1, errno, "write"); 288 if (ret != len) 289 error(1, errno, "write: %uB != %uB\n", ret, len); 290 291 total_len -= len; 292 count++; 293 } 294 295 return count; 296 } 297 298 static void send_ts_cmsg(struct cmsghdr *cm) 299 { 300 uint32_t *valp; 301 302 cm->cmsg_level = SOL_SOCKET; 303 cm->cmsg_type = SO_TIMESTAMPING; 304 cm->cmsg_len = CMSG_LEN(sizeof(cfg_tx_ts)); 305 valp = (void *)CMSG_DATA(cm); 306 *valp = cfg_tx_ts; 307 } 308 309 static int send_udp_sendmmsg(int fd, char *data) 310 { 311 char control[CMSG_SPACE(sizeof(cfg_tx_ts))] = {0}; 312 const int max_nr_msg = ETH_MAX_MTU / ETH_DATA_LEN; 313 struct mmsghdr mmsgs[max_nr_msg]; 314 struct iovec iov[max_nr_msg]; 315 unsigned int off = 0, left; 316 size_t msg_controllen = 0; 317 int i = 0, ret; 318 319 memset(mmsgs, 0, sizeof(mmsgs)); 320 321 if (cfg_tx_tstamp) { 322 struct msghdr msg = {0}; 323 struct cmsghdr *cmsg; 324 325 msg.msg_control = control; 326 msg.msg_controllen = sizeof(control); 327 cmsg = CMSG_FIRSTHDR(&msg); 328 send_ts_cmsg(cmsg); 329 msg_controllen += CMSG_SPACE(sizeof(cfg_tx_ts)); 330 } 331 332 left = cfg_payload_len; 333 while (left) { 334 if (i == max_nr_msg) 335 error(1, 0, "sendmmsg: exceeds max_nr_msg"); 336 337 iov[i].iov_base = data + off; 338 iov[i].iov_len = cfg_mss < left ? cfg_mss : left; 339 340 mmsgs[i].msg_hdr.msg_iov = iov + i; 341 mmsgs[i].msg_hdr.msg_iovlen = 1; 342 343 mmsgs[i].msg_hdr.msg_name = (void *)&cfg_dst_addr; 344 mmsgs[i].msg_hdr.msg_namelen = cfg_alen; 345 if (msg_controllen) { 346 mmsgs[i].msg_hdr.msg_control = control; 347 mmsgs[i].msg_hdr.msg_controllen = msg_controllen; 348 } 349 350 off += iov[i].iov_len; 351 left -= iov[i].iov_len; 352 i++; 353 } 354 355 ret = sendmmsg(fd, mmsgs, i, cfg_zerocopy ? MSG_ZEROCOPY : 0); 356 if (ret == -1) 357 error(1, errno, "sendmmsg"); 358 359 return ret; 360 } 361 362 static void send_udp_segment_cmsg(struct cmsghdr *cm) 363 { 364 uint16_t *valp; 365 366 cm->cmsg_level = SOL_UDP; 367 cm->cmsg_type = UDP_SEGMENT; 368 cm->cmsg_len = CMSG_LEN(sizeof(cfg_gso_size)); 369 valp = (void *)CMSG_DATA(cm); 370 *valp = cfg_gso_size; 371 } 372 373 static int send_udp_segment(int fd, char *data) 374 { 375 char control[CMSG_SPACE(sizeof(cfg_gso_size)) + 376 CMSG_SPACE(sizeof(cfg_tx_ts))] = {0}; 377 struct msghdr msg = {0}; 378 struct iovec iov = {0}; 379 size_t msg_controllen; 380 struct cmsghdr *cmsg; 381 int ret; 382 383 iov.iov_base = data; 384 iov.iov_len = cfg_payload_len; 385 386 msg.msg_iov = &iov; 387 msg.msg_iovlen = 1; 388 389 msg.msg_control = control; 390 msg.msg_controllen = sizeof(control); 391 cmsg = CMSG_FIRSTHDR(&msg); 392 send_udp_segment_cmsg(cmsg); 393 msg_controllen = CMSG_SPACE(sizeof(cfg_mss)); 394 if (cfg_tx_tstamp) { 395 cmsg = CMSG_NXTHDR(&msg, cmsg); 396 send_ts_cmsg(cmsg); 397 msg_controllen += CMSG_SPACE(sizeof(cfg_tx_ts)); 398 } 399 400 msg.msg_controllen = msg_controllen; 401 msg.msg_name = (void *)&cfg_dst_addr; 402 msg.msg_namelen = cfg_alen; 403 404 ret = sendmsg(fd, &msg, cfg_zerocopy ? MSG_ZEROCOPY : 0); 405 if (ret == -1) 406 error(1, errno, "sendmsg"); 407 if (ret != iov.iov_len) 408 error(1, 0, "sendmsg: %u != %llu\n", ret, 409 (unsigned long long)iov.iov_len); 410 411 return 1; 412 } 413 414 static void usage(const char *filepath) 415 { 416 error(1, 0, "Usage: %s [-46acmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] [-M messagenr] [-p port] [-s sendsize] [-S gsosize]", 417 filepath); 418 } 419 420 static void parse_opts(int argc, char **argv) 421 { 422 int max_len, hdrlen; 423 int c; 424 425 while ((c = getopt(argc, argv, "46acC:D:Hl:mM:p:s:PS:tTuvz")) != -1) { 426 switch (c) { 427 case '4': 428 if (cfg_family != PF_UNSPEC) 429 error(1, 0, "Pass one of -4 or -6"); 430 cfg_family = PF_INET; 431 cfg_alen = sizeof(struct sockaddr_in); 432 break; 433 case '6': 434 if (cfg_family != PF_UNSPEC) 435 error(1, 0, "Pass one of -4 or -6"); 436 cfg_family = PF_INET6; 437 cfg_alen = sizeof(struct sockaddr_in6); 438 break; 439 case 'a': 440 cfg_audit = true; 441 break; 442 case 'c': 443 cfg_cache_trash = true; 444 break; 445 case 'C': 446 cfg_cpu = strtol(optarg, NULL, 0); 447 break; 448 case 'D': 449 setup_sockaddr(cfg_family, optarg, &cfg_dst_addr); 450 break; 451 case 'l': 452 cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000; 453 break; 454 case 'm': 455 cfg_sendmmsg = true; 456 break; 457 case 'M': 458 cfg_msg_nr = strtoul(optarg, NULL, 10); 459 break; 460 case 'p': 461 cfg_port = strtoul(optarg, NULL, 0); 462 break; 463 case 'P': 464 cfg_poll = true; 465 break; 466 case 's': 467 cfg_payload_len = strtoul(optarg, NULL, 0); 468 break; 469 case 'S': 470 cfg_gso_size = strtoul(optarg, NULL, 0); 471 cfg_segment = true; 472 break; 473 case 'H': 474 cfg_tx_ts = SOF_TIMESTAMPING_TX_HARDWARE; 475 cfg_tx_tstamp = true; 476 break; 477 case 't': 478 cfg_tcp = true; 479 break; 480 case 'T': 481 cfg_tx_tstamp = true; 482 break; 483 case 'u': 484 cfg_connected = false; 485 break; 486 case 'v': 487 cfg_verbose = true; 488 break; 489 case 'z': 490 cfg_zerocopy = true; 491 break; 492 } 493 } 494 495 if (optind != argc) 496 usage(argv[0]); 497 498 if (cfg_family == PF_UNSPEC) 499 error(1, 0, "must pass one of -4 or -6"); 500 if (cfg_tcp && !cfg_connected) 501 error(1, 0, "connectionless tcp makes no sense"); 502 if (cfg_segment && cfg_sendmmsg) 503 error(1, 0, "cannot combine segment offload and sendmmsg"); 504 if (cfg_tx_tstamp && !(cfg_segment || cfg_sendmmsg)) 505 error(1, 0, "Options -T and -H require either -S or -m option"); 506 507 if (cfg_family == PF_INET) 508 hdrlen = sizeof(struct iphdr) + sizeof(struct udphdr); 509 else 510 hdrlen = sizeof(struct ip6_hdr) + sizeof(struct udphdr); 511 512 cfg_mss = ETH_DATA_LEN - hdrlen; 513 max_len = ETH_MAX_MTU - hdrlen; 514 if (!cfg_gso_size) 515 cfg_gso_size = cfg_mss; 516 517 if (cfg_payload_len > max_len) 518 error(1, 0, "payload length %u exceeds max %u", 519 cfg_payload_len, max_len); 520 } 521 522 static void set_pmtu_discover(int fd, bool is_ipv4) 523 { 524 int level, name, val; 525 526 if (is_ipv4) { 527 level = SOL_IP; 528 name = IP_MTU_DISCOVER; 529 val = IP_PMTUDISC_DO; 530 } else { 531 level = SOL_IPV6; 532 name = IPV6_MTU_DISCOVER; 533 val = IPV6_PMTUDISC_DO; 534 } 535 536 if (setsockopt(fd, level, name, &val, sizeof(val))) 537 error(1, errno, "setsockopt path mtu"); 538 } 539 540 static void set_tx_timestamping(int fd) 541 { 542 int val = SOF_TIMESTAMPING_OPT_CMSG | SOF_TIMESTAMPING_OPT_ID | 543 SOF_TIMESTAMPING_OPT_TSONLY; 544 545 if (cfg_tx_ts == SOF_TIMESTAMPING_TX_SOFTWARE) 546 val |= SOF_TIMESTAMPING_SOFTWARE; 547 else 548 val |= SOF_TIMESTAMPING_RAW_HARDWARE; 549 550 if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val))) 551 error(1, errno, "setsockopt tx timestamping"); 552 } 553 554 static void print_audit_report(unsigned long num_msgs, unsigned long num_sends) 555 { 556 unsigned long tdelta; 557 558 tdelta = tend - tstart; 559 if (!tdelta) 560 return; 561 562 fprintf(stderr, "Summary over %lu.%03lu seconds...\n", 563 tdelta / 1000, tdelta % 1000); 564 fprintf(stderr, 565 "sum %s tx: %6lu MB/s %10lu calls (%lu/s) %10lu msgs (%lu/s)\n", 566 cfg_tcp ? "tcp" : "udp", 567 ((num_msgs * cfg_payload_len) >> 10) / tdelta, 568 num_sends, num_sends * 1000 / tdelta, 569 num_msgs, num_msgs * 1000 / tdelta); 570 571 if (cfg_tx_tstamp) { 572 if (stat_tx_ts_errors) 573 error(1, 0, 574 "Expected clean TX Timestamps: %9lu msgs received %6lu errors", 575 stat_tx_ts, stat_tx_ts_errors); 576 if (stat_tx_ts != num_sends) 577 error(1, 0, 578 "Unexpected number of TX Timestamps: %9lu expected %9lu received", 579 num_sends, stat_tx_ts); 580 fprintf(stderr, 581 "Tx Timestamps: %19lu received %17lu errors\n", 582 stat_tx_ts, stat_tx_ts_errors); 583 } 584 585 if (cfg_zerocopy) { 586 if (stat_zcopies != num_sends) 587 error(1, 0, "Unexpected number of Zerocopy completions: %9lu expected %9lu received", 588 num_sends, stat_zcopies); 589 fprintf(stderr, 590 "Zerocopy acks: %19lu\n", 591 stat_zcopies); 592 } 593 } 594 595 static void print_report(unsigned long num_msgs, unsigned long num_sends) 596 { 597 fprintf(stderr, 598 "%s tx: %6lu MB/s %8lu calls/s %6lu msg/s\n", 599 cfg_tcp ? "tcp" : "udp", 600 (num_msgs * cfg_payload_len) >> 20, 601 num_sends, num_msgs); 602 603 if (cfg_audit) { 604 total_num_msgs += num_msgs; 605 total_num_sends += num_sends; 606 } 607 } 608 609 int main(int argc, char **argv) 610 { 611 unsigned long num_msgs, num_sends; 612 unsigned long tnow, treport, tstop; 613 int fd, i, val, ret; 614 615 parse_opts(argc, argv); 616 617 if (cfg_cpu > 0) 618 set_cpu(cfg_cpu); 619 620 for (i = 0; i < sizeof(buf[0]); i++) 621 buf[0][i] = 'a' + (i % 26); 622 for (i = 1; i < NUM_PKT; i++) 623 memcpy(buf[i], buf[0], sizeof(buf[0])); 624 625 signal(SIGINT, sigint_handler); 626 627 fd = socket(cfg_family, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0); 628 if (fd == -1) 629 error(1, errno, "socket"); 630 631 if (cfg_zerocopy) { 632 val = 1; 633 634 ret = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 635 &val, sizeof(val)); 636 if (ret) { 637 if (errno == ENOPROTOOPT || errno == ENOTSUPP) { 638 fprintf(stderr, "SO_ZEROCOPY not supported"); 639 exit(KSFT_SKIP); 640 } 641 error(1, errno, "setsockopt zerocopy"); 642 } 643 } 644 645 if (cfg_connected && 646 connect(fd, (void *)&cfg_dst_addr, cfg_alen)) 647 error(1, errno, "connect"); 648 649 if (cfg_segment) 650 set_pmtu_discover(fd, cfg_family == PF_INET); 651 652 if (cfg_tx_tstamp) 653 set_tx_timestamping(fd); 654 655 num_msgs = num_sends = 0; 656 tnow = gettimeofday_ms(); 657 tstart = tnow; 658 tend = tnow; 659 tstop = tnow + cfg_runtime_ms; 660 treport = tnow + 1000; 661 662 i = 0; 663 do { 664 if (cfg_tcp) 665 num_sends += send_tcp(fd, buf[i]); 666 else if (cfg_segment) 667 num_sends += send_udp_segment(fd, buf[i]); 668 else if (cfg_sendmmsg) 669 num_sends += send_udp_sendmmsg(fd, buf[i]); 670 else 671 num_sends += send_udp(fd, buf[i]); 672 num_msgs++; 673 if ((cfg_zerocopy && ((num_msgs & 0xF) == 0)) || cfg_tx_tstamp) 674 flush_errqueue(fd, cfg_poll); 675 676 if (cfg_msg_nr && num_msgs >= cfg_msg_nr) 677 break; 678 679 tnow = gettimeofday_ms(); 680 if (tnow >= treport) { 681 print_report(num_msgs, num_sends); 682 num_msgs = num_sends = 0; 683 treport = tnow + 1000; 684 } 685 686 /* cold cache when writing buffer */ 687 if (cfg_cache_trash) 688 i = ++i < NUM_PKT ? i : 0; 689 690 } while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop)); 691 692 if (cfg_zerocopy || cfg_tx_tstamp) 693 flush_errqueue(fd, true); 694 695 if (close(fd)) 696 error(1, errno, "close"); 697 698 if (cfg_audit) { 699 tend = tnow; 700 total_num_msgs += num_msgs; 701 total_num_sends += num_sends; 702 print_audit_report(total_num_msgs, total_num_sends); 703 } 704 705 return 0; 706 } 707