1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * This testsuite provides conformance testing for GRO coalescing. 4 * 5 * Test cases: 6 * 1.data 7 * Data packets of the same size and same header setup with correct 8 * sequence numbers coalesce. The one exception being the last data 9 * packet coalesced: it can be smaller than the rest and coalesced 10 * as long as it is in the same flow. 11 * 2.ack 12 * Pure ACK does not coalesce. 13 * 3.flags 14 * Specific test cases: no packets with PSH, SYN, URG, RST set will 15 * be coalesced. 16 * 4.tcp 17 * Packets with incorrect checksum, non-consecutive seqno and 18 * different TCP header options shouldn't coalesce. Nit: given that 19 * some extension headers have paddings, such as timestamp, headers 20 * that are padding differently would not be coalesced. 21 * 5.ip: 22 * Packets with different (ECN, TTL, TOS) header, ip options or 23 * ip fragments (ipv6) shouldn't coalesce. 24 * 6.large: 25 * Packets larger than GRO_MAX_SIZE packets shouldn't coalesce. 26 * 27 * MSS is defined as 4096 - header because if it is too small 28 * (i.e. 1500 MTU - header), it will result in many packets, 29 * increasing the "large" test case's flakiness. This is because 30 * due to time sensitivity in the coalescing window, the receiver 31 * may not coalesce all of the packets. 32 * 33 * Note the timing issue applies to all of the test cases, so some 34 * flakiness is to be expected. 35 * 36 */ 37 38 #define _GNU_SOURCE 39 40 #include <arpa/inet.h> 41 #include <errno.h> 42 #include <error.h> 43 #include <getopt.h> 44 #include <linux/filter.h> 45 #include <linux/if_packet.h> 46 #include <linux/ipv6.h> 47 #include <net/ethernet.h> 48 #include <net/if.h> 49 #include <netinet/in.h> 50 #include <netinet/ip.h> 51 #include <netinet/ip6.h> 52 #include <netinet/tcp.h> 53 #include <stdbool.h> 54 #include <stddef.h> 55 #include <stdio.h> 56 #include <stdarg.h> 57 #include <string.h> 58 #include <unistd.h> 59 60 #include "../kselftest.h" 61 62 #define DPORT 8000 63 #define SPORT 1500 64 #define PAYLOAD_LEN 100 65 #define NUM_PACKETS 4 66 #define START_SEQ 100 67 #define START_ACK 100 68 #define ETH_P_NONE 0 69 #define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) 70 #define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) 71 #define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) 72 #define NUM_LARGE_PKT (MAX_PAYLOAD / MSS) 73 #define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) 74 75 static const char *addr6_src = "fdaa::2"; 76 static const char *addr6_dst = "fdaa::1"; 77 static const char *addr4_src = "192.168.1.200"; 78 static const char *addr4_dst = "192.168.1.100"; 79 static int proto = -1; 80 static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN]; 81 static char *testname = "data"; 82 static char *ifname = "eth0"; 83 static char *smac = "aa:00:00:00:00:02"; 84 static char *dmac = "aa:00:00:00:00:01"; 85 static bool verbose; 86 static bool tx_socket = true; 87 static int tcp_offset = -1; 88 static int total_hdr_len = -1; 89 static int ethhdr_proto = -1; 90 91 static void vlog(const char *fmt, ...) 92 { 93 va_list args; 94 95 if (verbose) { 96 va_start(args, fmt); 97 vfprintf(stderr, fmt, args); 98 va_end(args); 99 } 100 } 101 102 static void setup_sock_filter(int fd) 103 { 104 const int dport_off = tcp_offset + offsetof(struct tcphdr, dest); 105 const int ethproto_off = offsetof(struct ethhdr, h_proto); 106 int optlen = 0; 107 int ipproto_off; 108 int next_off; 109 110 if (proto == PF_INET) 111 next_off = offsetof(struct iphdr, protocol); 112 else 113 next_off = offsetof(struct ipv6hdr, nexthdr); 114 ipproto_off = ETH_HLEN + next_off; 115 116 if (strcmp(testname, "ip") == 0) { 117 if (proto == PF_INET) 118 optlen = sizeof(struct ip_timestamp); 119 else 120 optlen = sizeof(struct ip6_frag); 121 } 122 123 struct sock_filter filter[] = { 124 BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ethproto_off), 125 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 7), 126 BPF_STMT(BPF_LD + BPF_B + BPF_ABS, ipproto_off), 127 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5), 128 BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off), 129 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0), 130 BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off + optlen), 131 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1), 132 BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF), 133 BPF_STMT(BPF_RET + BPF_K, 0), 134 }; 135 136 struct sock_fprog bpf = { 137 .len = ARRAY_SIZE(filter), 138 .filter = filter, 139 }; 140 141 if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0) 142 error(1, errno, "error setting filter"); 143 } 144 145 static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum) 146 { 147 uint16_t *words = data; 148 int i; 149 150 for (i = 0; i < len / 2; i++) 151 sum += words[i]; 152 if (len & 1) 153 sum += ((char *)data)[len - 1]; 154 return sum; 155 } 156 157 static uint16_t checksum_fold(void *data, size_t len, uint32_t sum) 158 { 159 sum = checksum_nofold(data, len, sum); 160 while (sum > 0xFFFF) 161 sum = (sum & 0xFFFF) + (sum >> 16); 162 return ~sum; 163 } 164 165 static uint16_t tcp_checksum(void *buf, int payload_len) 166 { 167 struct pseudo_header6 { 168 struct in6_addr saddr; 169 struct in6_addr daddr; 170 uint16_t protocol; 171 uint16_t payload_len; 172 } ph6; 173 struct pseudo_header4 { 174 struct in_addr saddr; 175 struct in_addr daddr; 176 uint16_t protocol; 177 uint16_t payload_len; 178 } ph4; 179 uint32_t sum = 0; 180 181 if (proto == PF_INET6) { 182 if (inet_pton(AF_INET6, addr6_src, &ph6.saddr) != 1) 183 error(1, errno, "inet_pton6 source ip pseudo"); 184 if (inet_pton(AF_INET6, addr6_dst, &ph6.daddr) != 1) 185 error(1, errno, "inet_pton6 dest ip pseudo"); 186 ph6.protocol = htons(IPPROTO_TCP); 187 ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len); 188 189 sum = checksum_nofold(&ph6, sizeof(ph6), 0); 190 } else if (proto == PF_INET) { 191 if (inet_pton(AF_INET, addr4_src, &ph4.saddr) != 1) 192 error(1, errno, "inet_pton source ip pseudo"); 193 if (inet_pton(AF_INET, addr4_dst, &ph4.daddr) != 1) 194 error(1, errno, "inet_pton dest ip pseudo"); 195 ph4.protocol = htons(IPPROTO_TCP); 196 ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len); 197 198 sum = checksum_nofold(&ph4, sizeof(ph4), 0); 199 } 200 201 return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum); 202 } 203 204 static void read_MAC(uint8_t *mac_addr, char *mac) 205 { 206 if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", 207 &mac_addr[0], &mac_addr[1], &mac_addr[2], 208 &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6) 209 error(1, 0, "sscanf"); 210 } 211 212 static void fill_datalinklayer(void *buf) 213 { 214 struct ethhdr *eth = buf; 215 216 memcpy(eth->h_dest, dst_mac, ETH_ALEN); 217 memcpy(eth->h_source, src_mac, ETH_ALEN); 218 eth->h_proto = ethhdr_proto; 219 } 220 221 static void fill_networklayer(void *buf, int payload_len) 222 { 223 struct ipv6hdr *ip6h = buf; 224 struct iphdr *iph = buf; 225 226 if (proto == PF_INET6) { 227 memset(ip6h, 0, sizeof(*ip6h)); 228 229 ip6h->version = 6; 230 ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len); 231 ip6h->nexthdr = IPPROTO_TCP; 232 ip6h->hop_limit = 8; 233 if (inet_pton(AF_INET6, addr6_src, &ip6h->saddr) != 1) 234 error(1, errno, "inet_pton source ip6"); 235 if (inet_pton(AF_INET6, addr6_dst, &ip6h->daddr) != 1) 236 error(1, errno, "inet_pton dest ip6"); 237 } else if (proto == PF_INET) { 238 memset(iph, 0, sizeof(*iph)); 239 240 iph->version = 4; 241 iph->ihl = 5; 242 iph->ttl = 8; 243 iph->protocol = IPPROTO_TCP; 244 iph->tot_len = htons(sizeof(struct tcphdr) + 245 payload_len + sizeof(struct iphdr)); 246 iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */ 247 if (inet_pton(AF_INET, addr4_src, &iph->saddr) != 1) 248 error(1, errno, "inet_pton source ip"); 249 if (inet_pton(AF_INET, addr4_dst, &iph->daddr) != 1) 250 error(1, errno, "inet_pton dest ip"); 251 iph->check = checksum_fold(buf, sizeof(struct iphdr), 0); 252 } 253 } 254 255 static void fill_transportlayer(void *buf, int seq_offset, int ack_offset, 256 int payload_len, int fin) 257 { 258 struct tcphdr *tcph = buf; 259 260 memset(tcph, 0, sizeof(*tcph)); 261 262 tcph->source = htons(SPORT); 263 tcph->dest = htons(DPORT); 264 tcph->seq = ntohl(START_SEQ + seq_offset); 265 tcph->ack_seq = ntohl(START_ACK + ack_offset); 266 tcph->ack = 1; 267 tcph->fin = fin; 268 tcph->doff = 5; 269 tcph->window = htons(TCP_MAXWIN); 270 tcph->urg_ptr = 0; 271 tcph->check = tcp_checksum(tcph, payload_len); 272 } 273 274 static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr) 275 { 276 int ret = -1; 277 278 ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr)); 279 if (ret == -1) 280 error(1, errno, "sendto failure"); 281 if (ret != len) 282 error(1, errno, "sendto wrong length"); 283 } 284 285 static void create_packet(void *buf, int seq_offset, int ack_offset, 286 int payload_len, int fin) 287 { 288 memset(buf, 0, total_hdr_len); 289 memset(buf + total_hdr_len, 'a', payload_len); 290 fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset, 291 payload_len, fin); 292 fill_networklayer(buf + ETH_HLEN, payload_len); 293 fill_datalinklayer(buf); 294 } 295 296 /* send one extra flag, not first and not last pkt */ 297 static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn, 298 int rst, int urg) 299 { 300 static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN]; 301 static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; 302 int payload_len, pkt_size, flag, i; 303 struct tcphdr *tcph; 304 305 payload_len = PAYLOAD_LEN * psh; 306 pkt_size = total_hdr_len + payload_len; 307 flag = NUM_PACKETS / 2; 308 309 create_packet(flag_buf, flag * payload_len, 0, payload_len, 0); 310 311 tcph = (struct tcphdr *)(flag_buf + tcp_offset); 312 tcph->psh = psh; 313 tcph->syn = syn; 314 tcph->rst = rst; 315 tcph->urg = urg; 316 tcph->check = 0; 317 tcph->check = tcp_checksum(tcph, payload_len); 318 319 for (i = 0; i < NUM_PACKETS + 1; i++) { 320 if (i == flag) { 321 write_packet(fd, flag_buf, pkt_size, daddr); 322 continue; 323 } 324 create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); 325 write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr); 326 } 327 } 328 329 /* Test for data of same length, smaller than previous 330 * and of different lengths 331 */ 332 static void send_data_pkts(int fd, struct sockaddr_ll *daddr, 333 int payload_len1, int payload_len2) 334 { 335 static char buf[ETH_HLEN + IP_MAXPACKET]; 336 337 create_packet(buf, 0, 0, payload_len1, 0); 338 write_packet(fd, buf, total_hdr_len + payload_len1, daddr); 339 create_packet(buf, payload_len1, 0, payload_len2, 0); 340 write_packet(fd, buf, total_hdr_len + payload_len2, daddr); 341 } 342 343 /* If incoming segments make tracked segment length exceed 344 * legal IP datagram length, do not coalesce 345 */ 346 static void send_large(int fd, struct sockaddr_ll *daddr, int remainder) 347 { 348 static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS]; 349 static char last[TOTAL_HDR_LEN + MSS]; 350 static char new_seg[TOTAL_HDR_LEN + MSS]; 351 int i; 352 353 for (i = 0; i < NUM_LARGE_PKT; i++) 354 create_packet(pkts[i], i * MSS, 0, MSS, 0); 355 create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0); 356 create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0); 357 358 for (i = 0; i < NUM_LARGE_PKT; i++) 359 write_packet(fd, pkts[i], total_hdr_len + MSS, daddr); 360 write_packet(fd, last, total_hdr_len + remainder, daddr); 361 write_packet(fd, new_seg, total_hdr_len + remainder, daddr); 362 } 363 364 /* Pure acks and dup acks don't coalesce */ 365 static void send_ack(int fd, struct sockaddr_ll *daddr) 366 { 367 static char buf[MAX_HDR_LEN]; 368 369 create_packet(buf, 0, 0, 0, 0); 370 write_packet(fd, buf, total_hdr_len, daddr); 371 write_packet(fd, buf, total_hdr_len, daddr); 372 create_packet(buf, 0, 1, 0, 0); 373 write_packet(fd, buf, total_hdr_len, daddr); 374 } 375 376 static void recompute_packet(char *buf, char *no_ext, int extlen) 377 { 378 struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset); 379 struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN); 380 struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); 381 382 memmove(buf, no_ext, total_hdr_len); 383 memmove(buf + total_hdr_len + extlen, 384 no_ext + total_hdr_len, PAYLOAD_LEN); 385 386 tcphdr->doff = tcphdr->doff + (extlen / 4); 387 tcphdr->check = 0; 388 tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen); 389 if (proto == PF_INET) { 390 iph->tot_len = htons(ntohs(iph->tot_len) + extlen); 391 iph->check = 0; 392 iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); 393 } else { 394 ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen); 395 } 396 } 397 398 static void tcp_write_options(char *buf, int kind, int ts) 399 { 400 struct tcp_option_ts { 401 uint8_t kind; 402 uint8_t len; 403 uint32_t tsval; 404 uint32_t tsecr; 405 } *opt_ts = (void *)buf; 406 struct tcp_option_window { 407 uint8_t kind; 408 uint8_t len; 409 uint8_t shift; 410 } *opt_window = (void *)buf; 411 412 switch (kind) { 413 case TCPOPT_NOP: 414 buf[0] = TCPOPT_NOP; 415 break; 416 case TCPOPT_WINDOW: 417 memset(opt_window, 0, sizeof(struct tcp_option_window)); 418 opt_window->kind = TCPOPT_WINDOW; 419 opt_window->len = TCPOLEN_WINDOW; 420 opt_window->shift = 0; 421 break; 422 case TCPOPT_TIMESTAMP: 423 memset(opt_ts, 0, sizeof(struct tcp_option_ts)); 424 opt_ts->kind = TCPOPT_TIMESTAMP; 425 opt_ts->len = TCPOLEN_TIMESTAMP; 426 opt_ts->tsval = ts; 427 opt_ts->tsecr = 0; 428 break; 429 default: 430 error(1, 0, "unimplemented TCP option"); 431 break; 432 } 433 } 434 435 /* TCP with options is always a permutation of {TS, NOP, NOP}. 436 * Implement different orders to verify coalescing stops. 437 */ 438 static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order) 439 { 440 switch (order) { 441 case 0: 442 tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0); 443 tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0); 444 tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */, 445 TCPOPT_TIMESTAMP, ts); 446 break; 447 case 1: 448 tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0); 449 tcp_write_options(buf + total_hdr_len + 1, 450 TCPOPT_TIMESTAMP, ts); 451 tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP, 452 TCPOPT_NOP, 0); 453 break; 454 case 2: 455 tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts); 456 tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1, 457 TCPOPT_NOP, 0); 458 tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2, 459 TCPOPT_NOP, 0); 460 break; 461 default: 462 error(1, 0, "unknown order"); 463 break; 464 } 465 recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA); 466 } 467 468 /* Packets with invalid checksum don't coalesce. */ 469 static void send_changed_checksum(int fd, struct sockaddr_ll *daddr) 470 { 471 static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; 472 struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset); 473 int pkt_size = total_hdr_len + PAYLOAD_LEN; 474 475 create_packet(buf, 0, 0, PAYLOAD_LEN, 0); 476 write_packet(fd, buf, pkt_size, daddr); 477 478 create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); 479 tcph->check = tcph->check - 1; 480 write_packet(fd, buf, pkt_size, daddr); 481 } 482 483 /* Packets with non-consecutive sequence number don't coalesce.*/ 484 static void send_changed_seq(int fd, struct sockaddr_ll *daddr) 485 { 486 static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; 487 struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset); 488 int pkt_size = total_hdr_len + PAYLOAD_LEN; 489 490 create_packet(buf, 0, 0, PAYLOAD_LEN, 0); 491 write_packet(fd, buf, pkt_size, daddr); 492 493 create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); 494 tcph->seq = ntohl(htonl(tcph->seq) + 1); 495 tcph->check = 0; 496 tcph->check = tcp_checksum(tcph, PAYLOAD_LEN); 497 write_packet(fd, buf, pkt_size, daddr); 498 } 499 500 /* Packet with different timestamp option or different timestamps 501 * don't coalesce. 502 */ 503 static void send_changed_ts(int fd, struct sockaddr_ll *daddr) 504 { 505 static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; 506 static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA]; 507 int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA; 508 509 create_packet(buf, 0, 0, PAYLOAD_LEN, 0); 510 add_standard_tcp_options(extpkt, buf, 0, 0); 511 write_packet(fd, extpkt, pkt_size, daddr); 512 513 create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); 514 add_standard_tcp_options(extpkt, buf, 0, 0); 515 write_packet(fd, extpkt, pkt_size, daddr); 516 517 create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); 518 add_standard_tcp_options(extpkt, buf, 100, 0); 519 write_packet(fd, extpkt, pkt_size, daddr); 520 521 create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0); 522 add_standard_tcp_options(extpkt, buf, 100, 1); 523 write_packet(fd, extpkt, pkt_size, daddr); 524 525 create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0); 526 add_standard_tcp_options(extpkt, buf, 100, 2); 527 write_packet(fd, extpkt, pkt_size, daddr); 528 } 529 530 /* Packet with different tcp options don't coalesce. */ 531 static void send_diff_opt(int fd, struct sockaddr_ll *daddr) 532 { 533 static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; 534 static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA]; 535 static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG]; 536 int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA; 537 int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG; 538 539 create_packet(buf, 0, 0, PAYLOAD_LEN, 0); 540 add_standard_tcp_options(extpkt1, buf, 0, 0); 541 write_packet(fd, extpkt1, extpkt1_size, daddr); 542 543 create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); 544 add_standard_tcp_options(extpkt1, buf, 0, 0); 545 write_packet(fd, extpkt1, extpkt1_size, daddr); 546 547 create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); 548 tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0); 549 tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0); 550 recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1); 551 write_packet(fd, extpkt2, extpkt2_size, daddr); 552 } 553 554 static void add_ipv4_ts_option(void *buf, void *optpkt) 555 { 556 struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset); 557 int optlen = sizeof(struct ip_timestamp); 558 struct iphdr *iph; 559 560 if (optlen % 4) 561 error(1, 0, "ipv4 timestamp length is not a multiple of 4B"); 562 563 ts->ipt_code = IPOPT_TS; 564 ts->ipt_len = optlen; 565 ts->ipt_ptr = 5; 566 ts->ipt_flg = IPOPT_TS_TSONLY; 567 568 memcpy(optpkt, buf, tcp_offset); 569 memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset, 570 sizeof(struct tcphdr) + PAYLOAD_LEN); 571 572 iph = (struct iphdr *)(optpkt + ETH_HLEN); 573 iph->ihl = 5 + (optlen / 4); 574 iph->tot_len = htons(ntohs(iph->tot_len) + optlen); 575 iph->check = 0; 576 iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0); 577 } 578 579 /* IPv4 options shouldn't coalesce */ 580 static void send_ip_options(int fd, struct sockaddr_ll *daddr) 581 { 582 static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; 583 static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)]; 584 int optlen = sizeof(struct ip_timestamp); 585 int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen; 586 587 create_packet(buf, 0, 0, PAYLOAD_LEN, 0); 588 write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr); 589 590 create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0); 591 add_ipv4_ts_option(buf, optpkt); 592 write_packet(fd, optpkt, pkt_size, daddr); 593 594 create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); 595 write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr); 596 } 597 598 /* IPv4 fragments shouldn't coalesce */ 599 static void send_fragment4(int fd, struct sockaddr_ll *daddr) 600 { 601 static char buf[IP_MAXPACKET]; 602 struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); 603 int pkt_size = total_hdr_len + PAYLOAD_LEN; 604 605 create_packet(buf, 0, 0, PAYLOAD_LEN, 0); 606 write_packet(fd, buf, pkt_size, daddr); 607 608 /* Once fragmented, packet would retain the total_len. 609 * Tcp header is prepared as if rest of data is in follow-up frags, 610 * but follow up frags aren't actually sent. 611 */ 612 memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2); 613 fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0); 614 fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN); 615 fill_datalinklayer(buf); 616 617 iph->frag_off = htons(0x6000); // DF = 1, MF = 1 618 iph->check = 0; 619 iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); 620 write_packet(fd, buf, pkt_size, daddr); 621 } 622 623 /* IPv4 packets with different ttl don't coalesce.*/ 624 static void send_changed_ttl(int fd, struct sockaddr_ll *daddr) 625 { 626 int pkt_size = total_hdr_len + PAYLOAD_LEN; 627 static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; 628 struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); 629 630 create_packet(buf, 0, 0, PAYLOAD_LEN, 0); 631 write_packet(fd, buf, pkt_size, daddr); 632 633 create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); 634 iph->ttl = 7; 635 iph->check = 0; 636 iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); 637 write_packet(fd, buf, pkt_size, daddr); 638 } 639 640 /* Packets with different tos don't coalesce.*/ 641 static void send_changed_tos(int fd, struct sockaddr_ll *daddr) 642 { 643 int pkt_size = total_hdr_len + PAYLOAD_LEN; 644 static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; 645 struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); 646 struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN); 647 648 create_packet(buf, 0, 0, PAYLOAD_LEN, 0); 649 write_packet(fd, buf, pkt_size, daddr); 650 651 create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); 652 if (proto == PF_INET) { 653 iph->tos = 1; 654 iph->check = 0; 655 iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); 656 } else if (proto == PF_INET6) { 657 ip6h->priority = 0xf; 658 } 659 write_packet(fd, buf, pkt_size, daddr); 660 } 661 662 /* Packets with different ECN don't coalesce.*/ 663 static void send_changed_ECN(int fd, struct sockaddr_ll *daddr) 664 { 665 int pkt_size = total_hdr_len + PAYLOAD_LEN; 666 static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; 667 struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); 668 669 create_packet(buf, 0, 0, PAYLOAD_LEN, 0); 670 write_packet(fd, buf, pkt_size, daddr); 671 672 create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); 673 if (proto == PF_INET) { 674 buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10 675 iph->check = 0; 676 iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); 677 } else { 678 buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10 679 } 680 write_packet(fd, buf, pkt_size, daddr); 681 } 682 683 /* IPv6 fragments and packets with extensions don't coalesce.*/ 684 static void send_fragment6(int fd, struct sockaddr_ll *daddr) 685 { 686 static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; 687 static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN + 688 sizeof(struct ip6_frag)]; 689 struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN); 690 struct ip6_frag *frag = (void *)(extpkt + tcp_offset); 691 int extlen = sizeof(struct ip6_frag); 692 int bufpkt_len = total_hdr_len + PAYLOAD_LEN; 693 int extpkt_len = bufpkt_len + extlen; 694 int i; 695 696 for (i = 0; i < 2; i++) { 697 create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0); 698 write_packet(fd, buf, bufpkt_len, daddr); 699 } 700 701 create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); 702 memset(extpkt, 0, extpkt_len); 703 704 ip6h->nexthdr = IPPROTO_FRAGMENT; 705 ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen); 706 frag->ip6f_nxt = IPPROTO_TCP; 707 708 memcpy(extpkt, buf, tcp_offset); 709 memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset, 710 sizeof(struct tcphdr) + PAYLOAD_LEN); 711 write_packet(fd, extpkt, extpkt_len, daddr); 712 713 create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0); 714 write_packet(fd, buf, bufpkt_len, daddr); 715 } 716 717 static void bind_packetsocket(int fd) 718 { 719 struct sockaddr_ll daddr = {}; 720 721 daddr.sll_family = AF_PACKET; 722 daddr.sll_protocol = ethhdr_proto; 723 daddr.sll_ifindex = if_nametoindex(ifname); 724 if (daddr.sll_ifindex == 0) 725 error(1, errno, "if_nametoindex"); 726 727 if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0) 728 error(1, errno, "could not bind socket"); 729 } 730 731 static void set_timeout(int fd) 732 { 733 struct timeval timeout; 734 735 timeout.tv_sec = 3; 736 timeout.tv_usec = 0; 737 if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, 738 sizeof(timeout)) < 0) 739 error(1, errno, "cannot set timeout, setsockopt failed"); 740 } 741 742 static void check_recv_pkts(int fd, int *correct_payload, 743 int correct_num_pkts) 744 { 745 static char buffer[IP_MAXPACKET + ETH_HLEN + 1]; 746 struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN); 747 struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN); 748 struct tcphdr *tcph; 749 bool bad_packet = false; 750 int tcp_ext_len = 0; 751 int ip_ext_len = 0; 752 int pkt_size = -1; 753 int data_len = 0; 754 int num_pkt = 0; 755 int i; 756 757 vlog("Expected {"); 758 for (i = 0; i < correct_num_pkts; i++) 759 vlog("%d ", correct_payload[i]); 760 vlog("}, Total %d packets\nReceived {", correct_num_pkts); 761 762 while (1) { 763 pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0); 764 if (pkt_size < 0) 765 error(1, errno, "could not receive"); 766 767 if (iph->version == 4) 768 ip_ext_len = (iph->ihl - 5) * 4; 769 else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP) 770 ip_ext_len = sizeof(struct ip6_frag); 771 772 tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len); 773 774 if (tcph->fin) 775 break; 776 777 tcp_ext_len = (tcph->doff - 5) * 4; 778 data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len; 779 /* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3. 780 * Ipv4/tcp packets without at least 6 bytes of data will be padded. 781 * Packet sockets are protocol agnostic, and will not trim the padding. 782 */ 783 if (pkt_size == ETH_ZLEN && iph->version == 4) { 784 data_len = ntohs(iph->tot_len) 785 - sizeof(struct tcphdr) - sizeof(struct iphdr); 786 } 787 vlog("%d ", data_len); 788 if (data_len != correct_payload[num_pkt]) { 789 vlog("[!=%d]", correct_payload[num_pkt]); 790 bad_packet = true; 791 } 792 num_pkt++; 793 } 794 vlog("}, Total %d packets.\n", num_pkt); 795 if (num_pkt != correct_num_pkts) 796 error(1, 0, "incorrect number of packets"); 797 if (bad_packet) 798 error(1, 0, "incorrect packet geometry"); 799 800 printf("Test succeeded\n\n"); 801 } 802 803 static void gro_sender(void) 804 { 805 static char fin_pkt[MAX_HDR_LEN]; 806 struct sockaddr_ll daddr = {}; 807 int txfd = -1; 808 809 txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW); 810 if (txfd < 0) 811 error(1, errno, "socket creation"); 812 813 memset(&daddr, 0, sizeof(daddr)); 814 daddr.sll_ifindex = if_nametoindex(ifname); 815 if (daddr.sll_ifindex == 0) 816 error(1, errno, "if_nametoindex"); 817 daddr.sll_family = AF_PACKET; 818 memcpy(daddr.sll_addr, dst_mac, ETH_ALEN); 819 daddr.sll_halen = ETH_ALEN; 820 create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1); 821 822 if (strcmp(testname, "data") == 0) { 823 send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN); 824 write_packet(txfd, fin_pkt, total_hdr_len, &daddr); 825 826 send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2); 827 write_packet(txfd, fin_pkt, total_hdr_len, &daddr); 828 829 send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN); 830 write_packet(txfd, fin_pkt, total_hdr_len, &daddr); 831 } else if (strcmp(testname, "ack") == 0) { 832 send_ack(txfd, &daddr); 833 write_packet(txfd, fin_pkt, total_hdr_len, &daddr); 834 } else if (strcmp(testname, "flags") == 0) { 835 send_flags(txfd, &daddr, 1, 0, 0, 0); 836 write_packet(txfd, fin_pkt, total_hdr_len, &daddr); 837 838 send_flags(txfd, &daddr, 0, 1, 0, 0); 839 write_packet(txfd, fin_pkt, total_hdr_len, &daddr); 840 841 send_flags(txfd, &daddr, 0, 0, 1, 0); 842 write_packet(txfd, fin_pkt, total_hdr_len, &daddr); 843 844 send_flags(txfd, &daddr, 0, 0, 0, 1); 845 write_packet(txfd, fin_pkt, total_hdr_len, &daddr); 846 } else if (strcmp(testname, "tcp") == 0) { 847 send_changed_checksum(txfd, &daddr); 848 write_packet(txfd, fin_pkt, total_hdr_len, &daddr); 849 850 send_changed_seq(txfd, &daddr); 851 write_packet(txfd, fin_pkt, total_hdr_len, &daddr); 852 853 send_changed_ts(txfd, &daddr); 854 write_packet(txfd, fin_pkt, total_hdr_len, &daddr); 855 856 send_diff_opt(txfd, &daddr); 857 write_packet(txfd, fin_pkt, total_hdr_len, &daddr); 858 } else if (strcmp(testname, "ip") == 0) { 859 send_changed_ECN(txfd, &daddr); 860 write_packet(txfd, fin_pkt, total_hdr_len, &daddr); 861 862 send_changed_tos(txfd, &daddr); 863 write_packet(txfd, fin_pkt, total_hdr_len, &daddr); 864 if (proto == PF_INET) { 865 /* Modified packets may be received out of order. 866 * Sleep function added to enforce test boundaries 867 * so that fin pkts are not received prior to other pkts. 868 */ 869 sleep(1); 870 send_changed_ttl(txfd, &daddr); 871 write_packet(txfd, fin_pkt, total_hdr_len, &daddr); 872 873 sleep(1); 874 send_ip_options(txfd, &daddr); 875 sleep(1); 876 write_packet(txfd, fin_pkt, total_hdr_len, &daddr); 877 878 sleep(1); 879 send_fragment4(txfd, &daddr); 880 sleep(1); 881 write_packet(txfd, fin_pkt, total_hdr_len, &daddr); 882 } else if (proto == PF_INET6) { 883 send_fragment6(txfd, &daddr); 884 write_packet(txfd, fin_pkt, total_hdr_len, &daddr); 885 } 886 } else if (strcmp(testname, "large") == 0) { 887 /* 20 is the difference between min iphdr size 888 * and min ipv6hdr size. Like MAX_HDR_SIZE, 889 * MAX_PAYLOAD is defined with the larger header of the two. 890 */ 891 int offset = proto == PF_INET ? 20 : 0; 892 int remainder = (MAX_PAYLOAD + offset) % MSS; 893 894 send_large(txfd, &daddr, remainder); 895 write_packet(txfd, fin_pkt, total_hdr_len, &daddr); 896 897 send_large(txfd, &daddr, remainder + 1); 898 write_packet(txfd, fin_pkt, total_hdr_len, &daddr); 899 } else { 900 error(1, 0, "Unknown testcase"); 901 } 902 903 if (close(txfd)) 904 error(1, errno, "socket close"); 905 } 906 907 static void gro_receiver(void) 908 { 909 static int correct_payload[NUM_PACKETS]; 910 int rxfd = -1; 911 912 rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE)); 913 if (rxfd < 0) 914 error(1, 0, "socket creation"); 915 setup_sock_filter(rxfd); 916 set_timeout(rxfd); 917 bind_packetsocket(rxfd); 918 919 memset(correct_payload, 0, sizeof(correct_payload)); 920 921 if (strcmp(testname, "data") == 0) { 922 printf("pure data packet of same size: "); 923 correct_payload[0] = PAYLOAD_LEN * 2; 924 check_recv_pkts(rxfd, correct_payload, 1); 925 926 printf("large data packets followed by a smaller one: "); 927 correct_payload[0] = PAYLOAD_LEN * 1.5; 928 check_recv_pkts(rxfd, correct_payload, 1); 929 930 printf("small data packets followed by a larger one: "); 931 correct_payload[0] = PAYLOAD_LEN / 2; 932 correct_payload[1] = PAYLOAD_LEN; 933 check_recv_pkts(rxfd, correct_payload, 2); 934 } else if (strcmp(testname, "ack") == 0) { 935 printf("duplicate ack and pure ack: "); 936 check_recv_pkts(rxfd, correct_payload, 3); 937 } else if (strcmp(testname, "flags") == 0) { 938 correct_payload[0] = PAYLOAD_LEN * 3; 939 correct_payload[1] = PAYLOAD_LEN * 2; 940 941 printf("psh flag ends coalescing: "); 942 check_recv_pkts(rxfd, correct_payload, 2); 943 944 correct_payload[0] = PAYLOAD_LEN * 2; 945 correct_payload[1] = 0; 946 correct_payload[2] = PAYLOAD_LEN * 2; 947 printf("syn flag ends coalescing: "); 948 check_recv_pkts(rxfd, correct_payload, 3); 949 950 printf("rst flag ends coalescing: "); 951 check_recv_pkts(rxfd, correct_payload, 3); 952 953 printf("urg flag ends coalescing: "); 954 check_recv_pkts(rxfd, correct_payload, 3); 955 } else if (strcmp(testname, "tcp") == 0) { 956 correct_payload[0] = PAYLOAD_LEN; 957 correct_payload[1] = PAYLOAD_LEN; 958 correct_payload[2] = PAYLOAD_LEN; 959 correct_payload[3] = PAYLOAD_LEN; 960 961 printf("changed checksum does not coalesce: "); 962 check_recv_pkts(rxfd, correct_payload, 2); 963 964 printf("Wrong Seq number doesn't coalesce: "); 965 check_recv_pkts(rxfd, correct_payload, 2); 966 967 printf("Different timestamp doesn't coalesce: "); 968 correct_payload[0] = PAYLOAD_LEN * 2; 969 check_recv_pkts(rxfd, correct_payload, 4); 970 971 printf("Different options doesn't coalesce: "); 972 correct_payload[0] = PAYLOAD_LEN * 2; 973 check_recv_pkts(rxfd, correct_payload, 2); 974 } else if (strcmp(testname, "ip") == 0) { 975 correct_payload[0] = PAYLOAD_LEN; 976 correct_payload[1] = PAYLOAD_LEN; 977 978 printf("different ECN doesn't coalesce: "); 979 check_recv_pkts(rxfd, correct_payload, 2); 980 981 printf("different tos doesn't coalesce: "); 982 check_recv_pkts(rxfd, correct_payload, 2); 983 984 if (proto == PF_INET) { 985 printf("different ttl doesn't coalesce: "); 986 check_recv_pkts(rxfd, correct_payload, 2); 987 988 printf("ip options doesn't coalesce: "); 989 correct_payload[2] = PAYLOAD_LEN; 990 check_recv_pkts(rxfd, correct_payload, 3); 991 992 printf("fragmented ip4 doesn't coalesce: "); 993 check_recv_pkts(rxfd, correct_payload, 2); 994 } else if (proto == PF_INET6) { 995 /* GRO doesn't check for ipv6 hop limit when flushing. 996 * Hence no corresponding test to the ipv4 case. 997 */ 998 printf("fragmented ip6 doesn't coalesce: "); 999 correct_payload[0] = PAYLOAD_LEN * 2; 1000 check_recv_pkts(rxfd, correct_payload, 2); 1001 } 1002 } else if (strcmp(testname, "large") == 0) { 1003 int offset = proto == PF_INET ? 20 : 0; 1004 int remainder = (MAX_PAYLOAD + offset) % MSS; 1005 1006 correct_payload[0] = (MAX_PAYLOAD + offset); 1007 correct_payload[1] = remainder; 1008 printf("Shouldn't coalesce if exceed IP max pkt size: "); 1009 check_recv_pkts(rxfd, correct_payload, 2); 1010 1011 /* last segment sent individually, doesn't start new segment */ 1012 correct_payload[0] = correct_payload[0] - remainder; 1013 correct_payload[1] = remainder + 1; 1014 correct_payload[2] = remainder + 1; 1015 check_recv_pkts(rxfd, correct_payload, 3); 1016 } else { 1017 error(1, 0, "Test case error, should never trigger"); 1018 } 1019 1020 if (close(rxfd)) 1021 error(1, 0, "socket close"); 1022 } 1023 1024 static void parse_args(int argc, char **argv) 1025 { 1026 static const struct option opts[] = { 1027 { "daddr", required_argument, NULL, 'd' }, 1028 { "dmac", required_argument, NULL, 'D' }, 1029 { "iface", required_argument, NULL, 'i' }, 1030 { "ipv4", no_argument, NULL, '4' }, 1031 { "ipv6", no_argument, NULL, '6' }, 1032 { "rx", no_argument, NULL, 'r' }, 1033 { "saddr", required_argument, NULL, 's' }, 1034 { "smac", required_argument, NULL, 'S' }, 1035 { "test", required_argument, NULL, 't' }, 1036 { "verbose", no_argument, NULL, 'v' }, 1037 { 0, 0, 0, 0 } 1038 }; 1039 int c; 1040 1041 while ((c = getopt_long(argc, argv, "46d:D:i:rs:S:t:v", opts, NULL)) != -1) { 1042 switch (c) { 1043 case '4': 1044 proto = PF_INET; 1045 ethhdr_proto = htons(ETH_P_IP); 1046 break; 1047 case '6': 1048 proto = PF_INET6; 1049 ethhdr_proto = htons(ETH_P_IPV6); 1050 break; 1051 case 'd': 1052 addr4_dst = addr6_dst = optarg; 1053 break; 1054 case 'D': 1055 dmac = optarg; 1056 break; 1057 case 'i': 1058 ifname = optarg; 1059 break; 1060 case 'r': 1061 tx_socket = false; 1062 break; 1063 case 's': 1064 addr4_src = addr6_src = optarg; 1065 break; 1066 case 'S': 1067 smac = optarg; 1068 break; 1069 case 't': 1070 testname = optarg; 1071 break; 1072 case 'v': 1073 verbose = true; 1074 break; 1075 default: 1076 error(1, 0, "%s invalid option %c\n", __func__, c); 1077 break; 1078 } 1079 } 1080 } 1081 1082 int main(int argc, char **argv) 1083 { 1084 parse_args(argc, argv); 1085 1086 if (proto == PF_INET) { 1087 tcp_offset = ETH_HLEN + sizeof(struct iphdr); 1088 total_hdr_len = tcp_offset + sizeof(struct tcphdr); 1089 } else if (proto == PF_INET6) { 1090 tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr); 1091 total_hdr_len = MAX_HDR_LEN; 1092 } else { 1093 error(1, 0, "Protocol family is not ipv4 or ipv6"); 1094 } 1095 1096 read_MAC(src_mac, smac); 1097 read_MAC(dst_mac, dmac); 1098 1099 if (tx_socket) 1100 gro_sender(); 1101 else 1102 gro_receiver(); 1103 1104 fprintf(stderr, "Gro::%s test passed.\n", testname); 1105 return 0; 1106 } 1107