xref: /openbmc/linux/tools/testing/selftests/net/csum.c (revision c4c3c32d)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /* Test hardware checksum offload: Rx + Tx, IPv4 + IPv6, TCP + UDP.
4  *
5  * The test runs on two machines to exercise the NIC. For this reason it
6  * is not integrated in kselftests.
7  *
8  *     CMD=$((./csum -[46] -[tu] -S $SADDR -D $DADDR -[RT] -r 1 $EXTRA_ARGS))
9  *
10  * Rx:
11  *
12  * The sender sends packets with a known checksum field using PF_INET(6)
13  * SOCK_RAW sockets.
14  *
15  * good packet: $CMD [-t]
16  * bad packet:  $CMD [-t] -E
17  *
18  * The receiver reads UDP packets with a UDP socket. This is not an
19  * option for TCP packets ('-t'). Optionally insert an iptables filter
20  * to avoid these entering the real protocol stack.
21  *
22  * The receiver also reads all packets with a PF_PACKET socket, to
23  * observe whether both good and bad packets arrive on the host. And to
24  * read the optional TP_STATUS_CSUM_VALID bit. This requires setting
25  * option PACKET_AUXDATA, and works only for CHECKSUM_UNNECESSARY.
26  *
27  * Tx:
28  *
29  * The sender needs to build CHECKSUM_PARTIAL packets to exercise tx
30  * checksum offload.
31  *
32  * The sender can sends packets with a UDP socket.
33  *
34  * Optionally crafts a packet that sums up to zero to verify that the
35  * device writes negative zero 0xFFFF in this case to distinguish from
36  * 0x0000 (checksum disabled), as required by RFC 768. Hit this case
37  * by choosing a specific source port.
38  *
39  * good packet: $CMD -U
40  * zero csum:   $CMD -U -Z
41  *
42  * The sender can also build packets with PF_PACKET with PACKET_VNET_HDR,
43  * to cover more protocols. PF_PACKET requires passing src and dst mac
44  * addresses.
45  *
46  * good packet: $CMD -s $smac -d $dmac -p [-t]
47  *
48  * Argument '-z' sends UDP packets with a 0x000 checksum disabled field,
49  * to verify that the NIC passes these packets unmodified.
50  *
51  * Argument '-e' adds a transport mode encapsulation header between
52  * network and transport header. This will fail for devices that parse
53  *  headers. Should work on devices that implement protocol agnostic tx
54  * checksum offload (NETIF_F_HW_CSUM).
55  *
56  * Argument '-r $SEED' optionally randomizes header, payload and length
57  * to increase coverage between packets sent. SEED 1 further chooses a
58  * different seed for each run (and logs this for reproducibility). It
59  * is advised to enable this for extra coverage in continuous testing.
60  */
61 
62 #define _GNU_SOURCE
63 
64 #include <arpa/inet.h>
65 #include <asm/byteorder.h>
66 #include <errno.h>
67 #include <error.h>
68 #include <linux/filter.h>
69 #include <linux/if_packet.h>
70 #include <linux/ipv6.h>
71 #include <linux/virtio_net.h>
72 #include <net/ethernet.h>
73 #include <net/if.h>
74 #include <netinet/if_ether.h>
75 #include <netinet/in.h>
76 #include <netinet/ip.h>
77 #include <netinet/ip6.h>
78 #include <netinet/tcp.h>
79 #include <netinet/udp.h>
80 #include <poll.h>
81 #include <sched.h>
82 #include <stdbool.h>
83 #include <stddef.h>
84 #include <stdint.h>
85 #include <stdio.h>
86 #include <stdlib.h>
87 #include <string.h>
88 #include <sys/socket.h>
89 #include <sys/stat.h>
90 #include <sys/time.h>
91 #include <sys/types.h>
92 #include <unistd.h>
93 
94 static bool cfg_bad_csum;
95 static int cfg_family = PF_INET6;
96 static int cfg_num_pkt = 4;
97 static bool cfg_do_rx = true;
98 static bool cfg_do_tx = true;
99 static bool cfg_encap;
100 static char *cfg_ifname = "eth0";
101 static char *cfg_mac_dst;
102 static char *cfg_mac_src;
103 static int cfg_proto = IPPROTO_UDP;
104 static int cfg_payload_char = 'a';
105 static int cfg_payload_len = 100;
106 static uint16_t cfg_port_dst = 34000;
107 static uint16_t cfg_port_src = 33000;
108 static uint16_t cfg_port_src_encap = 33001;
109 static unsigned int cfg_random_seed;
110 static int cfg_rcvbuf = 1 << 22;	/* be able to queue large cfg_num_pkt */
111 static bool cfg_send_pfpacket;
112 static bool cfg_send_udp;
113 static int cfg_timeout_ms = 2000;
114 static bool cfg_zero_disable; /* skip checksum: set to zero (udp only) */
115 static bool cfg_zero_sum;     /* create packet that adds up to zero */
116 
117 static struct sockaddr_in cfg_daddr4 = {.sin_family = AF_INET};
118 static struct sockaddr_in cfg_saddr4 = {.sin_family = AF_INET};
119 static struct sockaddr_in6 cfg_daddr6 = {.sin6_family = AF_INET6};
120 static struct sockaddr_in6 cfg_saddr6 = {.sin6_family = AF_INET6};
121 
122 #define ENC_HEADER_LEN	(sizeof(struct udphdr) + sizeof(struct udp_encap_hdr))
123 #define MAX_HEADER_LEN	(sizeof(struct ipv6hdr) + ENC_HEADER_LEN + sizeof(struct tcphdr))
124 #define MAX_PAYLOAD_LEN 1024
125 
126 /* Trivial demo encap. Stand-in for transport layer protocols like ESP or PSP */
127 struct udp_encap_hdr {
128 	uint8_t nexthdr;
129 	uint8_t padding[3];
130 };
131 
132 /* Ipaddrs, for pseudo csum. Global var is ugly, pass through funcs was worse */
133 static void *iph_addr_p;
134 
135 static unsigned long gettimeofday_ms(void)
136 {
137 	struct timeval tv;
138 
139 	gettimeofday(&tv, NULL);
140 	return (tv.tv_sec * 1000UL) + (tv.tv_usec / 1000UL);
141 }
142 
143 static uint32_t checksum_nofold(char *data, size_t len, uint32_t sum)
144 {
145 	uint16_t *words = (uint16_t *)data;
146 	int i;
147 
148 	for (i = 0; i < len / 2; i++)
149 		sum += words[i];
150 
151 	if (len & 1)
152 		sum += ((unsigned char *)data)[len - 1];
153 
154 	return sum;
155 }
156 
157 static uint16_t checksum_fold(void *data, size_t len, uint32_t sum)
158 {
159 	sum = checksum_nofold(data, len, sum);
160 
161 	while (sum > 0xFFFF)
162 		sum = (sum & 0xFFFF) + (sum >> 16);
163 
164 	return ~sum;
165 }
166 
167 static uint16_t checksum(void *th, uint16_t proto, size_t len)
168 {
169 	uint32_t sum;
170 	int alen;
171 
172 	alen = cfg_family == PF_INET6 ? 32 : 8;
173 
174 	sum = checksum_nofold(iph_addr_p, alen, 0);
175 	sum += htons(proto);
176 	sum += htons(len);
177 
178 	/* With CHECKSUM_PARTIAL kernel expects non-inverted pseudo csum */
179 	if (cfg_do_tx && cfg_send_pfpacket)
180 		return ~checksum_fold(NULL, 0, sum);
181 	else
182 		return checksum_fold(th, len, sum);
183 }
184 
185 static void *build_packet_ipv4(void *_iph, uint8_t proto, unsigned int len)
186 {
187 	struct iphdr *iph = _iph;
188 
189 	memset(iph, 0, sizeof(*iph));
190 
191 	iph->version = 4;
192 	iph->ihl = 5;
193 	iph->ttl = 8;
194 	iph->protocol = proto;
195 	iph->saddr = cfg_saddr4.sin_addr.s_addr;
196 	iph->daddr = cfg_daddr4.sin_addr.s_addr;
197 	iph->tot_len = htons(sizeof(*iph) + len);
198 	iph->check = checksum_fold(iph, sizeof(*iph), 0);
199 
200 	iph_addr_p = &iph->saddr;
201 
202 	return iph + 1;
203 }
204 
205 static void *build_packet_ipv6(void *_ip6h, uint8_t proto, unsigned int len)
206 {
207 	struct ipv6hdr *ip6h = _ip6h;
208 
209 	memset(ip6h, 0, sizeof(*ip6h));
210 
211 	ip6h->version = 6;
212 	ip6h->payload_len = htons(len);
213 	ip6h->nexthdr = proto;
214 	ip6h->hop_limit = 64;
215 	ip6h->saddr = cfg_saddr6.sin6_addr;
216 	ip6h->daddr = cfg_daddr6.sin6_addr;
217 
218 	iph_addr_p = &ip6h->saddr;
219 
220 	return ip6h + 1;
221 }
222 
223 static void *build_packet_udp(void *_uh)
224 {
225 	struct udphdr *uh = _uh;
226 
227 	uh->source = htons(cfg_port_src);
228 	uh->dest = htons(cfg_port_dst);
229 	uh->len = htons(sizeof(*uh) + cfg_payload_len);
230 	uh->check = 0;
231 
232 	/* choose source port so that uh->check adds up to zero */
233 	if (cfg_zero_sum) {
234 		uh->source = 0;
235 		uh->source = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len);
236 
237 		fprintf(stderr, "tx: changing sport: %hu -> %hu\n",
238 			cfg_port_src, ntohs(uh->source));
239 		cfg_port_src = ntohs(uh->source);
240 	}
241 
242 	if (cfg_zero_disable)
243 		uh->check = 0;
244 	else
245 		uh->check = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len);
246 
247 	if (cfg_bad_csum)
248 		uh->check = ~uh->check;
249 
250 	fprintf(stderr, "tx: sending checksum: 0x%x\n", uh->check);
251 	return uh + 1;
252 }
253 
254 static void *build_packet_tcp(void *_th)
255 {
256 	struct tcphdr *th = _th;
257 
258 	th->source = htons(cfg_port_src);
259 	th->dest = htons(cfg_port_dst);
260 	th->doff = 5;
261 	th->check = 0;
262 
263 	th->check = checksum(th, IPPROTO_TCP, sizeof(*th) + cfg_payload_len);
264 
265 	if (cfg_bad_csum)
266 		th->check = ~th->check;
267 
268 	fprintf(stderr, "tx: sending checksum: 0x%x\n", th->check);
269 	return th + 1;
270 }
271 
272 static char *build_packet_udp_encap(void *_uh)
273 {
274 	struct udphdr *uh = _uh;
275 	struct udp_encap_hdr *eh = _uh + sizeof(*uh);
276 
277 	/* outer dst == inner dst, to simplify BPF filter
278 	 * outer src != inner src, to demultiplex on recv
279 	 */
280 	uh->dest = htons(cfg_port_dst);
281 	uh->source = htons(cfg_port_src_encap);
282 	uh->check = 0;
283 	uh->len = htons(sizeof(*uh) +
284 			sizeof(*eh) +
285 			sizeof(struct tcphdr) +
286 			cfg_payload_len);
287 
288 	eh->nexthdr = IPPROTO_TCP;
289 
290 	return build_packet_tcp(eh + 1);
291 }
292 
293 static char *build_packet(char *buf, int max_len, int *len)
294 {
295 	uint8_t proto;
296 	char *off;
297 	int tlen;
298 
299 	if (cfg_random_seed) {
300 		int *buf32 = (void *)buf;
301 		int i;
302 
303 		for (i = 0; i < (max_len / sizeof(int)); i++)
304 			buf32[i] = rand();
305 	} else {
306 		memset(buf, cfg_payload_char, max_len);
307 	}
308 
309 	if (cfg_proto == IPPROTO_UDP)
310 		tlen = sizeof(struct udphdr) + cfg_payload_len;
311 	else
312 		tlen = sizeof(struct tcphdr) + cfg_payload_len;
313 
314 	if (cfg_encap) {
315 		proto = IPPROTO_UDP;
316 		tlen += ENC_HEADER_LEN;
317 	} else {
318 		proto = cfg_proto;
319 	}
320 
321 	if (cfg_family == PF_INET)
322 		off = build_packet_ipv4(buf, proto, tlen);
323 	else
324 		off = build_packet_ipv6(buf, proto, tlen);
325 
326 	if (cfg_encap)
327 		off = build_packet_udp_encap(off);
328 	else if (cfg_proto == IPPROTO_UDP)
329 		off = build_packet_udp(off);
330 	else
331 		off = build_packet_tcp(off);
332 
333 	/* only pass the payload, but still compute headers for cfg_zero_sum */
334 	if (cfg_send_udp) {
335 		*len = cfg_payload_len;
336 		return off;
337 	}
338 
339 	*len = off - buf + cfg_payload_len;
340 	return buf;
341 }
342 
343 static int open_inet(int ipproto, int protocol)
344 {
345 	int fd;
346 
347 	fd = socket(cfg_family, ipproto, protocol);
348 	if (fd == -1)
349 		error(1, errno, "socket inet");
350 
351 	if (cfg_family == PF_INET6) {
352 		/* may have been updated by cfg_zero_sum */
353 		cfg_saddr6.sin6_port = htons(cfg_port_src);
354 
355 		if (bind(fd, (void *)&cfg_saddr6, sizeof(cfg_saddr6)))
356 			error(1, errno, "bind dgram 6");
357 		if (connect(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6)))
358 			error(1, errno, "connect dgram 6");
359 	} else {
360 		/* may have been updated by cfg_zero_sum */
361 		cfg_saddr4.sin_port = htons(cfg_port_src);
362 
363 		if (bind(fd, (void *)&cfg_saddr4, sizeof(cfg_saddr4)))
364 			error(1, errno, "bind dgram 4");
365 		if (connect(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4)))
366 			error(1, errno, "connect dgram 4");
367 	}
368 
369 	return fd;
370 }
371 
372 static int open_packet(void)
373 {
374 	int fd, one = 1;
375 
376 	fd = socket(PF_PACKET, SOCK_RAW, 0);
377 	if (fd == -1)
378 		error(1, errno, "socket packet");
379 
380 	if (setsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, &one, sizeof(one)))
381 		error(1, errno, "setsockopt packet_vnet_ndr");
382 
383 	return fd;
384 }
385 
386 static void send_inet(int fd, const char *buf, int len)
387 {
388 	int ret;
389 
390 	ret = write(fd, buf, len);
391 	if (ret == -1)
392 		error(1, errno, "write");
393 	if (ret != len)
394 		error(1, 0, "write: %d", ret);
395 }
396 
397 static void eth_str_to_addr(const char *str, unsigned char *eth)
398 {
399 	if (sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
400 		   &eth[0], &eth[1], &eth[2], &eth[3], &eth[4], &eth[5]) != 6)
401 		error(1, 0, "cannot parse mac addr %s", str);
402 }
403 
404 static void send_packet(int fd, const char *buf, int len)
405 {
406 	struct virtio_net_hdr vh = {0};
407 	struct sockaddr_ll addr = {0};
408 	struct msghdr msg = {0};
409 	struct ethhdr eth;
410 	struct iovec iov[3];
411 	int ret;
412 
413 	addr.sll_family = AF_PACKET;
414 	addr.sll_halen = ETH_ALEN;
415 	addr.sll_ifindex = if_nametoindex(cfg_ifname);
416 	if (!addr.sll_ifindex)
417 		error(1, errno, "if_nametoindex %s", cfg_ifname);
418 
419 	vh.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
420 	if (cfg_family == PF_INET6) {
421 		vh.csum_start = sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
422 		addr.sll_protocol = htons(ETH_P_IPV6);
423 	} else {
424 		vh.csum_start = sizeof(struct ethhdr) + sizeof(struct iphdr);
425 		addr.sll_protocol = htons(ETH_P_IP);
426 	}
427 
428 	if (cfg_encap)
429 		vh.csum_start += ENC_HEADER_LEN;
430 
431 	if (cfg_proto == IPPROTO_TCP) {
432 		vh.csum_offset = __builtin_offsetof(struct tcphdr, check);
433 		vh.hdr_len = vh.csum_start + sizeof(struct tcphdr);
434 	} else {
435 		vh.csum_offset = __builtin_offsetof(struct udphdr, check);
436 		vh.hdr_len = vh.csum_start + sizeof(struct udphdr);
437 	}
438 
439 	eth_str_to_addr(cfg_mac_src, eth.h_source);
440 	eth_str_to_addr(cfg_mac_dst, eth.h_dest);
441 	eth.h_proto = addr.sll_protocol;
442 
443 	iov[0].iov_base = &vh;
444 	iov[0].iov_len = sizeof(vh);
445 
446 	iov[1].iov_base = &eth;
447 	iov[1].iov_len = sizeof(eth);
448 
449 	iov[2].iov_base = (void *)buf;
450 	iov[2].iov_len = len;
451 
452 	msg.msg_iov = iov;
453 	msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]);
454 
455 	msg.msg_name = &addr;
456 	msg.msg_namelen = sizeof(addr);
457 
458 	ret = sendmsg(fd, &msg, 0);
459 	if (ret == -1)
460 		error(1, errno, "sendmsg packet");
461 	if (ret != sizeof(vh) + sizeof(eth) + len)
462 		error(1, errno, "sendmsg packet: %u", ret);
463 }
464 
465 static int recv_prepare_udp(void)
466 {
467 	int fd;
468 
469 	fd = socket(cfg_family, SOCK_DGRAM, 0);
470 	if (fd == -1)
471 		error(1, errno, "socket r");
472 
473 	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
474 		       &cfg_rcvbuf, sizeof(cfg_rcvbuf)))
475 		error(1, errno, "setsockopt SO_RCVBUF r");
476 
477 	if (cfg_family == PF_INET6) {
478 		if (bind(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6)))
479 			error(1, errno, "bind r");
480 	} else {
481 		if (bind(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4)))
482 			error(1, errno, "bind r");
483 	}
484 
485 	return fd;
486 }
487 
488 /* Filter out all traffic that is not cfg_proto with our destination port.
489  *
490  * Otherwise background noise may cause PF_PACKET receive queue overflow,
491  * dropping the expected packets and failing the test.
492  */
493 static void __recv_prepare_packet_filter(int fd, int off_nexthdr, int off_dport)
494 {
495 	struct sock_filter filter[] = {
496 		BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
497 		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
498 		BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_nexthdr),
499 		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_encap ? IPPROTO_UDP : cfg_proto, 0, 2),
500 		BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport),
501 		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_port_dst, 1, 0),
502 		BPF_STMT(BPF_RET + BPF_K, 0),
503 		BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
504 	};
505 	struct sock_fprog prog = {};
506 
507 	prog.filter = filter;
508 	prog.len = sizeof(filter) / sizeof(struct sock_filter);
509 	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
510 		error(1, errno, "setsockopt filter");
511 }
512 
513 static void recv_prepare_packet_filter(int fd)
514 {
515 	const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */
516 
517 	if (cfg_family == AF_INET)
518 		__recv_prepare_packet_filter(fd, offsetof(struct iphdr, protocol),
519 					     sizeof(struct iphdr) + off_dport);
520 	else
521 		__recv_prepare_packet_filter(fd, offsetof(struct ipv6hdr, nexthdr),
522 					     sizeof(struct ipv6hdr) + off_dport);
523 }
524 
525 static void recv_prepare_packet_bind(int fd)
526 {
527 	struct sockaddr_ll laddr = {0};
528 
529 	laddr.sll_family = AF_PACKET;
530 
531 	if (cfg_family == PF_INET)
532 		laddr.sll_protocol = htons(ETH_P_IP);
533 	else
534 		laddr.sll_protocol = htons(ETH_P_IPV6);
535 
536 	laddr.sll_ifindex = if_nametoindex(cfg_ifname);
537 	if (!laddr.sll_ifindex)
538 		error(1, 0, "if_nametoindex %s", cfg_ifname);
539 
540 	if (bind(fd, (void *)&laddr, sizeof(laddr)))
541 		error(1, errno, "bind pf_packet");
542 }
543 
544 static int recv_prepare_packet(void)
545 {
546 	int fd, one = 1;
547 
548 	fd = socket(PF_PACKET, SOCK_DGRAM, 0);
549 	if (fd == -1)
550 		error(1, errno, "socket p");
551 
552 	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
553 		       &cfg_rcvbuf, sizeof(cfg_rcvbuf)))
554 		error(1, errno, "setsockopt SO_RCVBUF p");
555 
556 	/* enable auxdata to recv checksum status (valid vs unknown) */
557 	if (setsockopt(fd, SOL_PACKET, PACKET_AUXDATA, &one, sizeof(one)))
558 		error(1, errno, "setsockopt auxdata");
559 
560 	/* install filter to restrict packet flow to match */
561 	recv_prepare_packet_filter(fd);
562 
563 	/* bind to address family to start packet flow */
564 	recv_prepare_packet_bind(fd);
565 
566 	return fd;
567 }
568 
569 static int recv_udp(int fd)
570 {
571 	static char buf[MAX_PAYLOAD_LEN];
572 	int ret, count = 0;
573 
574 	while (1) {
575 		ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT);
576 		if (ret == -1 && errno == EAGAIN)
577 			break;
578 		if (ret == -1)
579 			error(1, errno, "recv r");
580 
581 		fprintf(stderr, "rx: udp: len=%u\n", ret);
582 		count++;
583 	}
584 
585 	return count;
586 }
587 
588 static int recv_verify_csum(void *th, int len, uint16_t sport, uint16_t csum_field)
589 {
590 	uint16_t csum;
591 
592 	csum = checksum(th, cfg_proto, len);
593 
594 	fprintf(stderr, "rx: pkt: sport=%hu len=%u csum=0x%hx verify=0x%hx\n",
595 		sport, len, csum_field, csum);
596 
597 	/* csum must be zero unless cfg_bad_csum indicates bad csum */
598 	if (csum && !cfg_bad_csum) {
599 		fprintf(stderr, "pkt: bad csum\n");
600 		return 1;
601 	} else if (cfg_bad_csum && !csum) {
602 		fprintf(stderr, "pkt: good csum, while bad expected\n");
603 		return 1;
604 	}
605 
606 	if (cfg_zero_sum && csum_field != 0xFFFF) {
607 		fprintf(stderr, "pkt: zero csum: field should be 0xFFFF, is 0x%hx\n", csum_field);
608 		return 1;
609 	}
610 
611 	return 0;
612 }
613 
614 static int recv_verify_packet_tcp(void *th, int len)
615 {
616 	struct tcphdr *tcph = th;
617 
618 	if (len < sizeof(*tcph) || tcph->dest != htons(cfg_port_dst))
619 		return -1;
620 
621 	return recv_verify_csum(th, len, ntohs(tcph->source), tcph->check);
622 }
623 
624 static int recv_verify_packet_udp_encap(void *th, int len)
625 {
626 	struct udp_encap_hdr *eh = th;
627 
628 	if (len < sizeof(*eh) || eh->nexthdr != IPPROTO_TCP)
629 		return -1;
630 
631 	return recv_verify_packet_tcp(eh + 1, len - sizeof(*eh));
632 }
633 
634 static int recv_verify_packet_udp(void *th, int len)
635 {
636 	struct udphdr *udph = th;
637 
638 	if (len < sizeof(*udph))
639 		return -1;
640 
641 	if (udph->dest != htons(cfg_port_dst))
642 		return -1;
643 
644 	if (udph->source == htons(cfg_port_src_encap))
645 		return recv_verify_packet_udp_encap(udph + 1,
646 						    len - sizeof(*udph));
647 
648 	return recv_verify_csum(th, len, ntohs(udph->source), udph->check);
649 }
650 
651 static int recv_verify_packet_ipv4(void *nh, int len)
652 {
653 	struct iphdr *iph = nh;
654 	uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
655 
656 	if (len < sizeof(*iph) || iph->protocol != proto)
657 		return -1;
658 
659 	iph_addr_p = &iph->saddr;
660 	if (proto == IPPROTO_TCP)
661 		return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph));
662 	else
663 		return recv_verify_packet_udp(iph + 1, len - sizeof(*iph));
664 }
665 
666 static int recv_verify_packet_ipv6(void *nh, int len)
667 {
668 	struct ipv6hdr *ip6h = nh;
669 	uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
670 
671 	if (len < sizeof(*ip6h) || ip6h->nexthdr != proto)
672 		return -1;
673 
674 	iph_addr_p = &ip6h->saddr;
675 
676 	if (proto == IPPROTO_TCP)
677 		return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h));
678 	else
679 		return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h));
680 }
681 
682 /* return whether auxdata includes TP_STATUS_CSUM_VALID */
683 static bool recv_verify_packet_csum(struct msghdr *msg)
684 {
685 	struct tpacket_auxdata *aux = NULL;
686 	struct cmsghdr *cm;
687 
688 	if (msg->msg_flags & MSG_CTRUNC)
689 		error(1, 0, "cmsg: truncated");
690 
691 	for (cm = CMSG_FIRSTHDR(msg); cm; cm = CMSG_NXTHDR(msg, cm)) {
692 		if (cm->cmsg_level != SOL_PACKET ||
693 		    cm->cmsg_type != PACKET_AUXDATA)
694 			error(1, 0, "cmsg: level=%d type=%d\n",
695 			      cm->cmsg_level, cm->cmsg_type);
696 
697 		if (cm->cmsg_len != CMSG_LEN(sizeof(struct tpacket_auxdata)))
698 			error(1, 0, "cmsg: len=%lu expected=%lu",
699 			      cm->cmsg_len, CMSG_LEN(sizeof(struct tpacket_auxdata)));
700 
701 		aux = (void *)CMSG_DATA(cm);
702 	}
703 
704 	if (!aux)
705 		error(1, 0, "cmsg: no auxdata");
706 
707 	return aux->tp_status & TP_STATUS_CSUM_VALID;
708 }
709 
710 static int recv_packet(int fd)
711 {
712 	static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN];
713 	unsigned long total = 0, bad_csums = 0, bad_validations = 0;
714 	char ctrl[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
715 	struct pkt *buf = (void *)_buf;
716 	struct msghdr msg = {0};
717 	struct iovec iov;
718 	int len, ret;
719 
720 	iov.iov_base = _buf;
721 	iov.iov_len = sizeof(_buf);
722 
723 	msg.msg_iov = &iov;
724 	msg.msg_iovlen = 1;
725 
726 	msg.msg_control = ctrl;
727 	msg.msg_controllen = sizeof(ctrl);
728 
729 	while (1) {
730 		msg.msg_flags = 0;
731 
732 		len = recvmsg(fd, &msg, MSG_DONTWAIT);
733 		if (len == -1 && errno == EAGAIN)
734 			break;
735 		if (len == -1)
736 			error(1, errno, "recv p");
737 
738 		if (cfg_family == PF_INET6)
739 			ret = recv_verify_packet_ipv6(buf, len);
740 		else
741 			ret = recv_verify_packet_ipv4(buf, len);
742 
743 		if (ret == -1 /* skip: non-matching */)
744 			continue;
745 
746 		total++;
747 		if (ret == 1)
748 			bad_csums++;
749 
750 		/* Fail if kernel returns valid for known bad csum.
751 		 * Do not fail if kernel does not validate a good csum:
752 		 * Absence of validation does not imply invalid.
753 		 */
754 		if (recv_verify_packet_csum(&msg) && cfg_bad_csum) {
755 			fprintf(stderr, "cmsg: expected bad csum, pf_packet returns valid\n");
756 			bad_validations++;
757 		}
758 	}
759 
760 	if (bad_csums || bad_validations)
761 		error(1, 0, "rx: errors at pf_packet: total=%lu bad_csums=%lu bad_valids=%lu\n",
762 		      total, bad_csums, bad_validations);
763 
764 	return total;
765 }
766 
767 static void parse_args(int argc, char *const argv[])
768 {
769 	const char *daddr = NULL, *saddr = NULL;
770 	int c;
771 
772 	while ((c = getopt(argc, argv, "46d:D:eEi:l:L:n:r:PRs:S:tTuUzZ")) != -1) {
773 		switch (c) {
774 		case '4':
775 			cfg_family = PF_INET;
776 			break;
777 		case '6':
778 			cfg_family = PF_INET6;
779 			break;
780 		case 'd':
781 			cfg_mac_dst = optarg;
782 			break;
783 		case 'D':
784 			daddr = optarg;
785 			break;
786 		case 'e':
787 			cfg_encap = true;
788 			break;
789 		case 'E':
790 			cfg_bad_csum = true;
791 			break;
792 		case 'i':
793 			cfg_ifname = optarg;
794 			break;
795 		case 'l':
796 			cfg_payload_len = strtol(optarg, NULL, 0);
797 			break;
798 		case 'L':
799 			cfg_timeout_ms = strtol(optarg, NULL, 0) * 1000;
800 			break;
801 		case 'n':
802 			cfg_num_pkt = strtol(optarg, NULL, 0);
803 			break;
804 		case 'r':
805 			cfg_random_seed = strtol(optarg, NULL, 0);
806 			break;
807 		case 'P':
808 			cfg_send_pfpacket = true;
809 			break;
810 		case 'R':
811 			/* only Rx: used with two machine tests */
812 			cfg_do_tx = false;
813 			break;
814 		case 's':
815 			cfg_mac_src = optarg;
816 			break;
817 		case 'S':
818 			saddr = optarg;
819 			break;
820 		case 't':
821 			cfg_proto = IPPROTO_TCP;
822 			break;
823 		case 'T':
824 			/* only Tx: used with two machine tests */
825 			cfg_do_rx = false;
826 			break;
827 		case 'u':
828 			cfg_proto = IPPROTO_UDP;
829 			break;
830 		case 'U':
831 			/* send using real udp socket,
832 			 * to exercise tx checksum offload
833 			 */
834 			cfg_send_udp = true;
835 			break;
836 		case 'z':
837 			cfg_zero_disable = true;
838 			break;
839 		case 'Z':
840 			cfg_zero_sum = true;
841 			break;
842 		default:
843 			error(1, 0, "unknown arg %c", c);
844 		}
845 	}
846 
847 	if (!daddr || !saddr)
848 		error(1, 0, "Must pass -D <daddr> and -S <saddr>");
849 
850 	if (cfg_do_tx && cfg_send_pfpacket && (!cfg_mac_src || !cfg_mac_dst))
851 		error(1, 0, "Transmit with pf_packet requires mac addresses");
852 
853 	if (cfg_payload_len > MAX_PAYLOAD_LEN)
854 		error(1, 0, "Payload length exceeds max");
855 
856 	if (cfg_proto != IPPROTO_UDP && (cfg_zero_sum || cfg_zero_disable))
857 		error(1, 0, "Only UDP supports zero csum");
858 
859 	if (cfg_zero_sum && !cfg_send_udp)
860 		error(1, 0, "Zero checksum conversion requires -U for tx csum offload");
861 	if (cfg_zero_sum && cfg_bad_csum)
862 		error(1, 0, "Cannot combine zero checksum conversion and invalid checksum");
863 	if (cfg_zero_sum && cfg_random_seed)
864 		error(1, 0, "Cannot combine zero checksum conversion with randomization");
865 
866 	if (cfg_family == PF_INET6) {
867 		cfg_saddr6.sin6_port = htons(cfg_port_src);
868 		cfg_daddr6.sin6_port = htons(cfg_port_dst);
869 
870 		if (inet_pton(cfg_family, daddr, &cfg_daddr6.sin6_addr) != 1)
871 			error(1, errno, "Cannot parse ipv6 -D");
872 		if (inet_pton(cfg_family, saddr, &cfg_saddr6.sin6_addr) != 1)
873 			error(1, errno, "Cannot parse ipv6 -S");
874 	} else {
875 		cfg_saddr4.sin_port = htons(cfg_port_src);
876 		cfg_daddr4.sin_port = htons(cfg_port_dst);
877 
878 		if (inet_pton(cfg_family, daddr, &cfg_daddr4.sin_addr) != 1)
879 			error(1, errno, "Cannot parse ipv4 -D");
880 		if (inet_pton(cfg_family, saddr, &cfg_saddr4.sin_addr) != 1)
881 			error(1, errno, "Cannot parse ipv4 -S");
882 	}
883 
884 	if (cfg_do_tx && cfg_random_seed) {
885 		/* special case: time-based seed */
886 		if (cfg_random_seed == 1)
887 			cfg_random_seed = (unsigned int)gettimeofday_ms();
888 		srand(cfg_random_seed);
889 		fprintf(stderr, "randomization seed: %u\n", cfg_random_seed);
890 	}
891 }
892 
893 static void do_tx(void)
894 {
895 	static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN];
896 	char *buf;
897 	int fd, len, i;
898 
899 	buf = build_packet(_buf, sizeof(_buf), &len);
900 
901 	if (cfg_send_pfpacket)
902 		fd = open_packet();
903 	else if (cfg_send_udp)
904 		fd = open_inet(SOCK_DGRAM, 0);
905 	else
906 		fd = open_inet(SOCK_RAW, IPPROTO_RAW);
907 
908 	for (i = 0; i < cfg_num_pkt; i++) {
909 		if (cfg_send_pfpacket)
910 			send_packet(fd, buf, len);
911 		else
912 			send_inet(fd, buf, len);
913 
914 		/* randomize each packet individually to increase coverage */
915 		if (cfg_random_seed) {
916 			cfg_payload_len = rand() % MAX_PAYLOAD_LEN;
917 			buf = build_packet(_buf, sizeof(_buf), &len);
918 		}
919 	}
920 
921 	if (close(fd))
922 		error(1, errno, "close tx");
923 }
924 
925 static void do_rx(int fdp, int fdr)
926 {
927 	unsigned long count_udp = 0, count_pkt = 0;
928 	long tleft, tstop;
929 	struct pollfd pfd;
930 
931 	tstop = gettimeofday_ms() + cfg_timeout_ms;
932 	tleft = cfg_timeout_ms;
933 
934 	do {
935 		pfd.events = POLLIN;
936 		pfd.fd = fdp;
937 		if (poll(&pfd, 1, tleft) == -1)
938 			error(1, errno, "poll");
939 
940 		if (pfd.revents & POLLIN)
941 			count_pkt += recv_packet(fdp);
942 
943 		if (cfg_proto == IPPROTO_UDP)
944 			count_udp += recv_udp(fdr);
945 
946 		tleft = tstop - gettimeofday_ms();
947 	} while (tleft > 0);
948 
949 	if (close(fdr))
950 		error(1, errno, "close r");
951 	if (close(fdp))
952 		error(1, errno, "close p");
953 
954 	if (count_pkt < cfg_num_pkt)
955 		error(1, 0, "rx: missing packets at pf_packet: %lu < %u",
956 		      count_pkt, cfg_num_pkt);
957 
958 	if (cfg_proto == IPPROTO_UDP) {
959 		if (cfg_bad_csum && count_udp)
960 			error(1, 0, "rx: unexpected packets at udp");
961 		if (!cfg_bad_csum && !count_udp)
962 			error(1, 0, "rx: missing packets at udp");
963 	}
964 }
965 
966 int main(int argc, char *const argv[])
967 {
968 	int fdp = -1, fdr = -1;		/* -1 to silence -Wmaybe-uninitialized */
969 
970 	parse_args(argc, argv);
971 
972 	/* open receive sockets before transmitting */
973 	if (cfg_do_rx) {
974 		fdp = recv_prepare_packet();
975 		fdr = recv_prepare_udp();
976 	}
977 
978 	if (cfg_do_tx)
979 		do_tx();
980 
981 	if (cfg_do_rx)
982 		do_rx(fdp, fdr);
983 
984 	fprintf(stderr, "OK\n");
985 	return 0;
986 }
987