1 // SPDX-License-Identifier: GPL-2.0 2 3 #define _GNU_SOURCE 4 5 #include <arpa/inet.h> 6 #include <errno.h> 7 #include <error.h> 8 #include <linux/errqueue.h> 9 #include <linux/net_tstamp.h> 10 #include <netinet/if_ether.h> 11 #include <netinet/in.h> 12 #include <netinet/ip.h> 13 #include <netinet/ip6.h> 14 #include <netinet/udp.h> 15 #include <poll.h> 16 #include <sched.h> 17 #include <signal.h> 18 #include <stdbool.h> 19 #include <stdio.h> 20 #include <stdlib.h> 21 #include <string.h> 22 #include <sys/socket.h> 23 #include <sys/time.h> 24 #include <sys/poll.h> 25 #include <sys/types.h> 26 #include <unistd.h> 27 28 #include "../kselftest.h" 29 30 #ifndef ETH_MAX_MTU 31 #define ETH_MAX_MTU 0xFFFFU 32 #endif 33 34 #ifndef UDP_SEGMENT 35 #define UDP_SEGMENT 103 36 #endif 37 38 #ifndef SO_ZEROCOPY 39 #define SO_ZEROCOPY 60 40 #endif 41 42 #ifndef SO_EE_ORIGIN_ZEROCOPY 43 #define SO_EE_ORIGIN_ZEROCOPY 5 44 #endif 45 46 #ifndef MSG_ZEROCOPY 47 #define MSG_ZEROCOPY 0x4000000 48 #endif 49 50 #ifndef ENOTSUPP 51 #define ENOTSUPP 524 52 #endif 53 54 #define NUM_PKT 100 55 56 static bool cfg_cache_trash; 57 static int cfg_cpu = -1; 58 static int cfg_connected = true; 59 static int cfg_family = PF_UNSPEC; 60 static uint16_t cfg_mss; 61 static int cfg_payload_len = (1472 * 42); 62 static int cfg_port = 8000; 63 static int cfg_runtime_ms = -1; 64 static bool cfg_poll; 65 static bool cfg_segment; 66 static bool cfg_sendmmsg; 67 static bool cfg_tcp; 68 static uint32_t cfg_tx_ts = SOF_TIMESTAMPING_TX_SOFTWARE; 69 static bool cfg_tx_tstamp; 70 static bool cfg_audit; 71 static bool cfg_verbose; 72 static bool cfg_zerocopy; 73 static int cfg_msg_nr; 74 static uint16_t cfg_gso_size; 75 static unsigned long total_num_msgs; 76 static unsigned long total_num_sends; 77 static unsigned long stat_tx_ts; 78 static unsigned long stat_tx_ts_errors; 79 static unsigned long tstart; 80 static unsigned long tend; 81 static unsigned long stat_zcopies; 82 83 static socklen_t cfg_alen; 84 static struct sockaddr_storage cfg_dst_addr; 85 86 static bool interrupted; 87 static char buf[NUM_PKT][ETH_MAX_MTU]; 88 89 static void sigint_handler(int signum) 90 { 91 if (signum == SIGINT) 92 interrupted = true; 93 } 94 95 static unsigned long gettimeofday_ms(void) 96 { 97 struct timeval tv; 98 99 gettimeofday(&tv, NULL); 100 return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); 101 } 102 103 static int set_cpu(int cpu) 104 { 105 cpu_set_t mask; 106 107 CPU_ZERO(&mask); 108 CPU_SET(cpu, &mask); 109 if (sched_setaffinity(0, sizeof(mask), &mask)) 110 error(1, 0, "setaffinity %d", cpu); 111 112 return 0; 113 } 114 115 static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr) 116 { 117 struct sockaddr_in6 *addr6 = (void *) sockaddr; 118 struct sockaddr_in *addr4 = (void *) sockaddr; 119 120 switch (domain) { 121 case PF_INET: 122 addr4->sin_family = AF_INET; 123 addr4->sin_port = htons(cfg_port); 124 if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) 125 error(1, 0, "ipv4 parse error: %s", str_addr); 126 break; 127 case PF_INET6: 128 addr6->sin6_family = AF_INET6; 129 addr6->sin6_port = htons(cfg_port); 130 if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) 131 error(1, 0, "ipv6 parse error: %s", str_addr); 132 break; 133 default: 134 error(1, 0, "illegal domain"); 135 } 136 } 137 138 static void flush_cmsg(struct cmsghdr *cmsg) 139 { 140 struct sock_extended_err *err; 141 struct scm_timestamping *tss; 142 __u32 lo; 143 __u32 hi; 144 int i; 145 146 switch (cmsg->cmsg_level) { 147 case SOL_SOCKET: 148 if (cmsg->cmsg_type == SO_TIMESTAMPING) { 149 i = (cfg_tx_ts == SOF_TIMESTAMPING_TX_HARDWARE) ? 2 : 0; 150 tss = (struct scm_timestamping *)CMSG_DATA(cmsg); 151 if (tss->ts[i].tv_sec == 0) 152 stat_tx_ts_errors++; 153 } else { 154 error(1, 0, "unknown SOL_SOCKET cmsg type=%u\n", 155 cmsg->cmsg_type); 156 } 157 break; 158 case SOL_IP: 159 case SOL_IPV6: 160 switch (cmsg->cmsg_type) { 161 case IP_RECVERR: 162 case IPV6_RECVERR: 163 { 164 err = (struct sock_extended_err *)CMSG_DATA(cmsg); 165 switch (err->ee_origin) { 166 case SO_EE_ORIGIN_TIMESTAMPING: 167 /* Got a TX timestamp from error queue */ 168 stat_tx_ts++; 169 break; 170 case SO_EE_ORIGIN_ICMP: 171 case SO_EE_ORIGIN_ICMP6: 172 if (cfg_verbose) 173 fprintf(stderr, 174 "received ICMP error: type=%u, code=%u\n", 175 err->ee_type, err->ee_code); 176 break; 177 case SO_EE_ORIGIN_ZEROCOPY: 178 { 179 lo = err->ee_info; 180 hi = err->ee_data; 181 /* range of IDs acknowledged */ 182 stat_zcopies += hi - lo + 1; 183 break; 184 } 185 case SO_EE_ORIGIN_LOCAL: 186 if (cfg_verbose) 187 fprintf(stderr, 188 "received packet with local origin: %u\n", 189 err->ee_origin); 190 break; 191 default: 192 error(0, 1, "received packet with origin: %u", 193 err->ee_origin); 194 } 195 break; 196 } 197 default: 198 error(0, 1, "unknown IP msg type=%u\n", 199 cmsg->cmsg_type); 200 break; 201 } 202 break; 203 default: 204 error(0, 1, "unknown cmsg level=%u\n", 205 cmsg->cmsg_level); 206 } 207 } 208 209 static void flush_errqueue_recv(int fd) 210 { 211 char control[CMSG_SPACE(sizeof(struct scm_timestamping)) + 212 CMSG_SPACE(sizeof(struct sock_extended_err)) + 213 CMSG_SPACE(sizeof(struct sockaddr_in6))] = {0}; 214 struct msghdr msg = {0}; 215 struct cmsghdr *cmsg; 216 int ret; 217 218 while (1) { 219 msg.msg_control = control; 220 msg.msg_controllen = sizeof(control); 221 ret = recvmsg(fd, &msg, MSG_ERRQUEUE); 222 if (ret == -1 && errno == EAGAIN) 223 break; 224 if (ret == -1) 225 error(1, errno, "errqueue"); 226 if (msg.msg_flags != MSG_ERRQUEUE) 227 error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags); 228 if (cfg_audit) { 229 for (cmsg = CMSG_FIRSTHDR(&msg); 230 cmsg; 231 cmsg = CMSG_NXTHDR(&msg, cmsg)) 232 flush_cmsg(cmsg); 233 } 234 msg.msg_flags = 0; 235 } 236 } 237 238 static void flush_errqueue(int fd, const bool do_poll) 239 { 240 if (do_poll) { 241 struct pollfd fds = {0}; 242 int ret; 243 244 fds.fd = fd; 245 ret = poll(&fds, 1, 500); 246 if (ret == 0) { 247 if (cfg_verbose) 248 fprintf(stderr, "poll timeout\n"); 249 } else if (ret < 0) { 250 error(1, errno, "poll"); 251 } 252 } 253 254 flush_errqueue_recv(fd); 255 } 256 257 static int send_tcp(int fd, char *data) 258 { 259 int ret, done = 0, count = 0; 260 261 while (done < cfg_payload_len) { 262 ret = send(fd, data + done, cfg_payload_len - done, 263 cfg_zerocopy ? MSG_ZEROCOPY : 0); 264 if (ret == -1) 265 error(1, errno, "write"); 266 267 done += ret; 268 count++; 269 } 270 271 return count; 272 } 273 274 static int send_udp(int fd, char *data) 275 { 276 int ret, total_len, len, count = 0; 277 278 total_len = cfg_payload_len; 279 280 while (total_len) { 281 len = total_len < cfg_mss ? total_len : cfg_mss; 282 283 ret = sendto(fd, data, len, cfg_zerocopy ? MSG_ZEROCOPY : 0, 284 cfg_connected ? NULL : (void *)&cfg_dst_addr, 285 cfg_connected ? 0 : cfg_alen); 286 if (ret == -1) 287 error(1, errno, "write"); 288 if (ret != len) 289 error(1, errno, "write: %uB != %uB\n", ret, len); 290 291 total_len -= len; 292 count++; 293 } 294 295 return count; 296 } 297 298 static void send_ts_cmsg(struct cmsghdr *cm) 299 { 300 uint32_t *valp; 301 302 cm->cmsg_level = SOL_SOCKET; 303 cm->cmsg_type = SO_TIMESTAMPING; 304 cm->cmsg_len = CMSG_LEN(sizeof(cfg_tx_ts)); 305 valp = (void *)CMSG_DATA(cm); 306 *valp = cfg_tx_ts; 307 } 308 309 static int send_udp_sendmmsg(int fd, char *data) 310 { 311 char control[CMSG_SPACE(sizeof(cfg_tx_ts))] = {0}; 312 const int max_nr_msg = ETH_MAX_MTU / ETH_DATA_LEN; 313 struct mmsghdr mmsgs[max_nr_msg]; 314 struct iovec iov[max_nr_msg]; 315 unsigned int off = 0, left; 316 size_t msg_controllen = 0; 317 int i = 0, ret; 318 319 memset(mmsgs, 0, sizeof(mmsgs)); 320 321 if (cfg_tx_tstamp) { 322 struct msghdr msg = {0}; 323 struct cmsghdr *cmsg; 324 325 msg.msg_control = control; 326 msg.msg_controllen = sizeof(control); 327 cmsg = CMSG_FIRSTHDR(&msg); 328 send_ts_cmsg(cmsg); 329 msg_controllen += CMSG_SPACE(sizeof(cfg_tx_ts)); 330 } 331 332 left = cfg_payload_len; 333 while (left) { 334 if (i == max_nr_msg) 335 error(1, 0, "sendmmsg: exceeds max_nr_msg"); 336 337 iov[i].iov_base = data + off; 338 iov[i].iov_len = cfg_mss < left ? cfg_mss : left; 339 340 mmsgs[i].msg_hdr.msg_iov = iov + i; 341 mmsgs[i].msg_hdr.msg_iovlen = 1; 342 343 mmsgs[i].msg_hdr.msg_name = (void *)&cfg_dst_addr; 344 mmsgs[i].msg_hdr.msg_namelen = cfg_alen; 345 if (msg_controllen) { 346 mmsgs[i].msg_hdr.msg_control = control; 347 mmsgs[i].msg_hdr.msg_controllen = msg_controllen; 348 } 349 350 off += iov[i].iov_len; 351 left -= iov[i].iov_len; 352 i++; 353 } 354 355 ret = sendmmsg(fd, mmsgs, i, cfg_zerocopy ? MSG_ZEROCOPY : 0); 356 if (ret == -1) 357 error(1, errno, "sendmmsg"); 358 359 return ret; 360 } 361 362 static void send_udp_segment_cmsg(struct cmsghdr *cm) 363 { 364 uint16_t *valp; 365 366 cm->cmsg_level = SOL_UDP; 367 cm->cmsg_type = UDP_SEGMENT; 368 cm->cmsg_len = CMSG_LEN(sizeof(cfg_gso_size)); 369 valp = (void *)CMSG_DATA(cm); 370 *valp = cfg_gso_size; 371 } 372 373 static int send_udp_segment(int fd, char *data) 374 { 375 char control[CMSG_SPACE(sizeof(cfg_gso_size)) + 376 CMSG_SPACE(sizeof(cfg_tx_ts))] = {0}; 377 struct msghdr msg = {0}; 378 struct iovec iov = {0}; 379 size_t msg_controllen; 380 struct cmsghdr *cmsg; 381 int ret; 382 383 iov.iov_base = data; 384 iov.iov_len = cfg_payload_len; 385 386 msg.msg_iov = &iov; 387 msg.msg_iovlen = 1; 388 389 msg.msg_control = control; 390 msg.msg_controllen = sizeof(control); 391 cmsg = CMSG_FIRSTHDR(&msg); 392 send_udp_segment_cmsg(cmsg); 393 msg_controllen = CMSG_SPACE(sizeof(cfg_mss)); 394 if (cfg_tx_tstamp) { 395 cmsg = CMSG_NXTHDR(&msg, cmsg); 396 send_ts_cmsg(cmsg); 397 msg_controllen += CMSG_SPACE(sizeof(cfg_tx_ts)); 398 } 399 400 msg.msg_controllen = msg_controllen; 401 msg.msg_name = (void *)&cfg_dst_addr; 402 msg.msg_namelen = cfg_alen; 403 404 ret = sendmsg(fd, &msg, cfg_zerocopy ? MSG_ZEROCOPY : 0); 405 if (ret == -1) 406 error(1, errno, "sendmsg"); 407 if (ret != iov.iov_len) 408 error(1, 0, "sendmsg: %u != %lu\n", ret, iov.iov_len); 409 410 return 1; 411 } 412 413 static void usage(const char *filepath) 414 { 415 error(1, 0, "Usage: %s [-46acmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] [-M messagenr] [-p port] [-s sendsize] [-S gsosize]", 416 filepath); 417 } 418 419 static void parse_opts(int argc, char **argv) 420 { 421 int max_len, hdrlen; 422 int c; 423 424 while ((c = getopt(argc, argv, "46acC:D:Hl:mM:p:s:PS:tTuvz")) != -1) { 425 switch (c) { 426 case '4': 427 if (cfg_family != PF_UNSPEC) 428 error(1, 0, "Pass one of -4 or -6"); 429 cfg_family = PF_INET; 430 cfg_alen = sizeof(struct sockaddr_in); 431 break; 432 case '6': 433 if (cfg_family != PF_UNSPEC) 434 error(1, 0, "Pass one of -4 or -6"); 435 cfg_family = PF_INET6; 436 cfg_alen = sizeof(struct sockaddr_in6); 437 break; 438 case 'a': 439 cfg_audit = true; 440 break; 441 case 'c': 442 cfg_cache_trash = true; 443 break; 444 case 'C': 445 cfg_cpu = strtol(optarg, NULL, 0); 446 break; 447 case 'D': 448 setup_sockaddr(cfg_family, optarg, &cfg_dst_addr); 449 break; 450 case 'l': 451 cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000; 452 break; 453 case 'm': 454 cfg_sendmmsg = true; 455 break; 456 case 'M': 457 cfg_msg_nr = strtoul(optarg, NULL, 10); 458 break; 459 case 'p': 460 cfg_port = strtoul(optarg, NULL, 0); 461 break; 462 case 'P': 463 cfg_poll = true; 464 break; 465 case 's': 466 cfg_payload_len = strtoul(optarg, NULL, 0); 467 break; 468 case 'S': 469 cfg_gso_size = strtoul(optarg, NULL, 0); 470 cfg_segment = true; 471 break; 472 case 'H': 473 cfg_tx_ts = SOF_TIMESTAMPING_TX_HARDWARE; 474 cfg_tx_tstamp = true; 475 break; 476 case 't': 477 cfg_tcp = true; 478 break; 479 case 'T': 480 cfg_tx_tstamp = true; 481 break; 482 case 'u': 483 cfg_connected = false; 484 break; 485 case 'v': 486 cfg_verbose = true; 487 break; 488 case 'z': 489 cfg_zerocopy = true; 490 break; 491 } 492 } 493 494 if (optind != argc) 495 usage(argv[0]); 496 497 if (cfg_family == PF_UNSPEC) 498 error(1, 0, "must pass one of -4 or -6"); 499 if (cfg_tcp && !cfg_connected) 500 error(1, 0, "connectionless tcp makes no sense"); 501 if (cfg_segment && cfg_sendmmsg) 502 error(1, 0, "cannot combine segment offload and sendmmsg"); 503 if (cfg_tx_tstamp && !(cfg_segment || cfg_sendmmsg)) 504 error(1, 0, "Options -T and -H require either -S or -m option"); 505 506 if (cfg_family == PF_INET) 507 hdrlen = sizeof(struct iphdr) + sizeof(struct udphdr); 508 else 509 hdrlen = sizeof(struct ip6_hdr) + sizeof(struct udphdr); 510 511 cfg_mss = ETH_DATA_LEN - hdrlen; 512 max_len = ETH_MAX_MTU - hdrlen; 513 if (!cfg_gso_size) 514 cfg_gso_size = cfg_mss; 515 516 if (cfg_payload_len > max_len) 517 error(1, 0, "payload length %u exceeds max %u", 518 cfg_payload_len, max_len); 519 } 520 521 static void set_pmtu_discover(int fd, bool is_ipv4) 522 { 523 int level, name, val; 524 525 if (is_ipv4) { 526 level = SOL_IP; 527 name = IP_MTU_DISCOVER; 528 val = IP_PMTUDISC_DO; 529 } else { 530 level = SOL_IPV6; 531 name = IPV6_MTU_DISCOVER; 532 val = IPV6_PMTUDISC_DO; 533 } 534 535 if (setsockopt(fd, level, name, &val, sizeof(val))) 536 error(1, errno, "setsockopt path mtu"); 537 } 538 539 static void set_tx_timestamping(int fd) 540 { 541 int val = SOF_TIMESTAMPING_OPT_CMSG | SOF_TIMESTAMPING_OPT_ID | 542 SOF_TIMESTAMPING_OPT_TSONLY; 543 544 if (cfg_tx_ts == SOF_TIMESTAMPING_TX_SOFTWARE) 545 val |= SOF_TIMESTAMPING_SOFTWARE; 546 else 547 val |= SOF_TIMESTAMPING_RAW_HARDWARE; 548 549 if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val))) 550 error(1, errno, "setsockopt tx timestamping"); 551 } 552 553 static void print_audit_report(unsigned long num_msgs, unsigned long num_sends) 554 { 555 unsigned long tdelta; 556 557 tdelta = tend - tstart; 558 if (!tdelta) 559 return; 560 561 fprintf(stderr, "Summary over %lu.%03lu seconds...\n", 562 tdelta / 1000, tdelta % 1000); 563 fprintf(stderr, 564 "sum %s tx: %6lu MB/s %10lu calls (%lu/s) %10lu msgs (%lu/s)\n", 565 cfg_tcp ? "tcp" : "udp", 566 ((num_msgs * cfg_payload_len) >> 10) / tdelta, 567 num_sends, num_sends * 1000 / tdelta, 568 num_msgs, num_msgs * 1000 / tdelta); 569 570 if (cfg_tx_tstamp) { 571 if (stat_tx_ts_errors) 572 error(1, 0, 573 "Expected clean TX Timestamps: %9lu msgs received %6lu errors", 574 stat_tx_ts, stat_tx_ts_errors); 575 if (stat_tx_ts != num_sends) 576 error(1, 0, 577 "Unexpected number of TX Timestamps: %9lu expected %9lu received", 578 num_sends, stat_tx_ts); 579 fprintf(stderr, 580 "Tx Timestamps: %19lu received %17lu errors\n", 581 stat_tx_ts, stat_tx_ts_errors); 582 } 583 584 if (cfg_zerocopy) { 585 if (stat_zcopies != num_sends) 586 error(1, 0, "Unexpected number of Zerocopy completions: %9lu expected %9lu received", 587 num_sends, stat_zcopies); 588 fprintf(stderr, 589 "Zerocopy acks: %19lu\n", 590 stat_zcopies); 591 } 592 } 593 594 static void print_report(unsigned long num_msgs, unsigned long num_sends) 595 { 596 fprintf(stderr, 597 "%s tx: %6lu MB/s %8lu calls/s %6lu msg/s\n", 598 cfg_tcp ? "tcp" : "udp", 599 (num_msgs * cfg_payload_len) >> 20, 600 num_sends, num_msgs); 601 602 if (cfg_audit) { 603 total_num_msgs += num_msgs; 604 total_num_sends += num_sends; 605 } 606 } 607 608 int main(int argc, char **argv) 609 { 610 unsigned long num_msgs, num_sends; 611 unsigned long tnow, treport, tstop; 612 int fd, i, val, ret; 613 614 parse_opts(argc, argv); 615 616 if (cfg_cpu > 0) 617 set_cpu(cfg_cpu); 618 619 for (i = 0; i < sizeof(buf[0]); i++) 620 buf[0][i] = 'a' + (i % 26); 621 for (i = 1; i < NUM_PKT; i++) 622 memcpy(buf[i], buf[0], sizeof(buf[0])); 623 624 signal(SIGINT, sigint_handler); 625 626 fd = socket(cfg_family, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0); 627 if (fd == -1) 628 error(1, errno, "socket"); 629 630 if (cfg_zerocopy) { 631 val = 1; 632 633 ret = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 634 &val, sizeof(val)); 635 if (ret) { 636 if (errno == ENOPROTOOPT || errno == ENOTSUPP) { 637 fprintf(stderr, "SO_ZEROCOPY not supported"); 638 exit(KSFT_SKIP); 639 } 640 error(1, errno, "setsockopt zerocopy"); 641 } 642 } 643 644 if (cfg_connected && 645 connect(fd, (void *)&cfg_dst_addr, cfg_alen)) 646 error(1, errno, "connect"); 647 648 if (cfg_segment) 649 set_pmtu_discover(fd, cfg_family == PF_INET); 650 651 if (cfg_tx_tstamp) 652 set_tx_timestamping(fd); 653 654 num_msgs = num_sends = 0; 655 tnow = gettimeofday_ms(); 656 tstart = tnow; 657 tend = tnow; 658 tstop = tnow + cfg_runtime_ms; 659 treport = tnow + 1000; 660 661 i = 0; 662 do { 663 if (cfg_tcp) 664 num_sends += send_tcp(fd, buf[i]); 665 else if (cfg_segment) 666 num_sends += send_udp_segment(fd, buf[i]); 667 else if (cfg_sendmmsg) 668 num_sends += send_udp_sendmmsg(fd, buf[i]); 669 else 670 num_sends += send_udp(fd, buf[i]); 671 num_msgs++; 672 if ((cfg_zerocopy && ((num_msgs & 0xF) == 0)) || cfg_tx_tstamp) 673 flush_errqueue(fd, cfg_poll); 674 675 if (cfg_msg_nr && num_msgs >= cfg_msg_nr) 676 break; 677 678 tnow = gettimeofday_ms(); 679 if (tnow >= treport) { 680 print_report(num_msgs, num_sends); 681 num_msgs = num_sends = 0; 682 treport = tnow + 1000; 683 } 684 685 /* cold cache when writing buffer */ 686 if (cfg_cache_trash) 687 i = ++i < NUM_PKT ? i : 0; 688 689 } while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop)); 690 691 if (cfg_zerocopy || cfg_tx_tstamp) 692 flush_errqueue(fd, true); 693 694 if (close(fd)) 695 error(1, errno, "close"); 696 697 if (cfg_audit) { 698 tend = tnow; 699 total_num_msgs += num_msgs; 700 total_num_sends += num_sends; 701 print_audit_report(total_num_msgs, total_num_sends); 702 } 703 704 return 0; 705 } 706