1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright 2013 Red Hat, Inc.
4  * Author: Daniel Borkmann <dborkman@redhat.com>
5  *         Chetan Loke <loke.chetan@gmail.com> (TPACKET_V3 usage example)
6  *
7  * A basic test of packet socket's TPACKET_V1/TPACKET_V2/TPACKET_V3 behavior.
8  *
9  * Control:
10  *   Test the setup of the TPACKET socket with different patterns that are
11  *   known to fail (TODO) resp. succeed (OK).
12  *
13  * Datapath:
14  *   Open a pair of packet sockets and send resp. receive an a priori known
15  *   packet pattern accross the sockets and check if it was received resp.
16  *   sent correctly. Fanout in combination with RX_RING is currently not
17  *   tested here.
18  *
19  *   The test currently runs for
20  *   - TPACKET_V1: RX_RING, TX_RING
21  *   - TPACKET_V2: RX_RING, TX_RING
22  *   - TPACKET_V3: RX_RING
23  */
24 
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <sys/types.h>
28 #include <sys/stat.h>
29 #include <sys/socket.h>
30 #include <sys/mman.h>
31 #include <linux/if_packet.h>
32 #include <linux/filter.h>
33 #include <ctype.h>
34 #include <fcntl.h>
35 #include <unistd.h>
36 #include <bits/wordsize.h>
37 #include <net/ethernet.h>
38 #include <netinet/ip.h>
39 #include <arpa/inet.h>
40 #include <stdint.h>
41 #include <string.h>
42 #include <assert.h>
43 #include <net/if.h>
44 #include <inttypes.h>
45 #include <poll.h>
46 
47 #include "psock_lib.h"
48 
49 #include "../kselftest.h"
50 
51 #ifndef bug_on
52 # define bug_on(cond)		assert(!(cond))
53 #endif
54 
55 #ifndef __aligned_tpacket
56 # define __aligned_tpacket	__attribute__((aligned(TPACKET_ALIGNMENT)))
57 #endif
58 
59 #ifndef __align_tpacket
60 # define __align_tpacket(x)	__attribute__((aligned(TPACKET_ALIGN(x))))
61 #endif
62 
63 #define NUM_PACKETS		100
64 #define ALIGN_8(x)		(((x) + 8 - 1) & ~(8 - 1))
65 
66 struct ring {
67 	struct iovec *rd;
68 	uint8_t *mm_space;
69 	size_t mm_len, rd_len;
70 	struct sockaddr_ll ll;
71 	void (*walk)(int sock, struct ring *ring);
72 	int type, rd_num, flen, version;
73 	union {
74 		struct tpacket_req  req;
75 		struct tpacket_req3 req3;
76 	};
77 };
78 
79 struct block_desc {
80 	uint32_t version;
81 	uint32_t offset_to_priv;
82 	struct tpacket_hdr_v1 h1;
83 };
84 
85 union frame_map {
86 	struct {
87 		struct tpacket_hdr tp_h __aligned_tpacket;
88 		struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket_hdr));
89 	} *v1;
90 	struct {
91 		struct tpacket2_hdr tp_h __aligned_tpacket;
92 		struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket2_hdr));
93 	} *v2;
94 	void *raw;
95 };
96 
97 static unsigned int total_packets, total_bytes;
98 
pfsocket(int ver)99 static int pfsocket(int ver)
100 {
101 	int ret, sock = socket(PF_PACKET, SOCK_RAW, 0);
102 	if (sock == -1) {
103 		perror("socket");
104 		exit(1);
105 	}
106 
107 	ret = setsockopt(sock, SOL_PACKET, PACKET_VERSION, &ver, sizeof(ver));
108 	if (ret == -1) {
109 		perror("setsockopt");
110 		exit(1);
111 	}
112 
113 	return sock;
114 }
115 
status_bar_update(void)116 static void status_bar_update(void)
117 {
118 	if (total_packets % 10 == 0) {
119 		fprintf(stderr, ".");
120 		fflush(stderr);
121 	}
122 }
123 
test_payload(void * pay,size_t len)124 static void test_payload(void *pay, size_t len)
125 {
126 	struct ethhdr *eth = pay;
127 
128 	if (len < sizeof(struct ethhdr)) {
129 		fprintf(stderr, "test_payload: packet too "
130 			"small: %zu bytes!\n", len);
131 		exit(1);
132 	}
133 
134 	if (eth->h_proto != htons(ETH_P_IP)) {
135 		fprintf(stderr, "test_payload: wrong ethernet "
136 			"type: 0x%x!\n", ntohs(eth->h_proto));
137 		exit(1);
138 	}
139 }
140 
create_payload(void * pay,size_t * len)141 static void create_payload(void *pay, size_t *len)
142 {
143 	int i;
144 	struct ethhdr *eth = pay;
145 	struct iphdr *ip = pay + sizeof(*eth);
146 
147 	/* Lets create some broken crap, that still passes
148 	 * our BPF filter.
149 	 */
150 
151 	*len = DATA_LEN + 42;
152 
153 	memset(pay, 0xff, ETH_ALEN * 2);
154 	eth->h_proto = htons(ETH_P_IP);
155 
156 	for (i = 0; i < sizeof(*ip); ++i)
157 		((uint8_t *) pay)[i + sizeof(*eth)] = (uint8_t) rand();
158 
159 	ip->ihl = 5;
160 	ip->version = 4;
161 	ip->protocol = 0x11;
162 	ip->frag_off = 0;
163 	ip->ttl = 64;
164 	ip->tot_len = htons((uint16_t) *len - sizeof(*eth));
165 
166 	ip->saddr = htonl(INADDR_LOOPBACK);
167 	ip->daddr = htonl(INADDR_LOOPBACK);
168 
169 	memset(pay + sizeof(*eth) + sizeof(*ip),
170 	       DATA_CHAR, DATA_LEN);
171 }
172 
__v1_rx_kernel_ready(struct tpacket_hdr * hdr)173 static inline int __v1_rx_kernel_ready(struct tpacket_hdr *hdr)
174 {
175 	return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER);
176 }
177 
__v1_rx_user_ready(struct tpacket_hdr * hdr)178 static inline void __v1_rx_user_ready(struct tpacket_hdr *hdr)
179 {
180 	hdr->tp_status = TP_STATUS_KERNEL;
181 	__sync_synchronize();
182 }
183 
__v2_rx_kernel_ready(struct tpacket2_hdr * hdr)184 static inline int __v2_rx_kernel_ready(struct tpacket2_hdr *hdr)
185 {
186 	return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER);
187 }
188 
__v2_rx_user_ready(struct tpacket2_hdr * hdr)189 static inline void __v2_rx_user_ready(struct tpacket2_hdr *hdr)
190 {
191 	hdr->tp_status = TP_STATUS_KERNEL;
192 	__sync_synchronize();
193 }
194 
__v1_v2_rx_kernel_ready(void * base,int version)195 static inline int __v1_v2_rx_kernel_ready(void *base, int version)
196 {
197 	switch (version) {
198 	case TPACKET_V1:
199 		return __v1_rx_kernel_ready(base);
200 	case TPACKET_V2:
201 		return __v2_rx_kernel_ready(base);
202 	default:
203 		bug_on(1);
204 		return 0;
205 	}
206 }
207 
__v1_v2_rx_user_ready(void * base,int version)208 static inline void __v1_v2_rx_user_ready(void *base, int version)
209 {
210 	switch (version) {
211 	case TPACKET_V1:
212 		__v1_rx_user_ready(base);
213 		break;
214 	case TPACKET_V2:
215 		__v2_rx_user_ready(base);
216 		break;
217 	}
218 }
219 
walk_v1_v2_rx(int sock,struct ring * ring)220 static void walk_v1_v2_rx(int sock, struct ring *ring)
221 {
222 	struct pollfd pfd;
223 	int udp_sock[2];
224 	union frame_map ppd;
225 	unsigned int frame_num = 0;
226 
227 	bug_on(ring->type != PACKET_RX_RING);
228 
229 	pair_udp_open(udp_sock, PORT_BASE);
230 
231 	memset(&pfd, 0, sizeof(pfd));
232 	pfd.fd = sock;
233 	pfd.events = POLLIN | POLLERR;
234 	pfd.revents = 0;
235 
236 	pair_udp_send(udp_sock, NUM_PACKETS);
237 
238 	while (total_packets < NUM_PACKETS * 2) {
239 		while (__v1_v2_rx_kernel_ready(ring->rd[frame_num].iov_base,
240 					       ring->version)) {
241 			ppd.raw = ring->rd[frame_num].iov_base;
242 
243 			switch (ring->version) {
244 			case TPACKET_V1:
245 				test_payload((uint8_t *) ppd.raw + ppd.v1->tp_h.tp_mac,
246 					     ppd.v1->tp_h.tp_snaplen);
247 				total_bytes += ppd.v1->tp_h.tp_snaplen;
248 				break;
249 
250 			case TPACKET_V2:
251 				test_payload((uint8_t *) ppd.raw + ppd.v2->tp_h.tp_mac,
252 					     ppd.v2->tp_h.tp_snaplen);
253 				total_bytes += ppd.v2->tp_h.tp_snaplen;
254 				break;
255 			}
256 
257 			status_bar_update();
258 			total_packets++;
259 
260 			__v1_v2_rx_user_ready(ppd.raw, ring->version);
261 
262 			frame_num = (frame_num + 1) % ring->rd_num;
263 		}
264 
265 		poll(&pfd, 1, 1);
266 	}
267 
268 	pair_udp_close(udp_sock);
269 
270 	if (total_packets != 2 * NUM_PACKETS) {
271 		fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n",
272 			ring->version, total_packets, NUM_PACKETS);
273 		exit(1);
274 	}
275 
276 	fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1);
277 }
278 
__v1_tx_kernel_ready(struct tpacket_hdr * hdr)279 static inline int __v1_tx_kernel_ready(struct tpacket_hdr *hdr)
280 {
281 	return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
282 }
283 
__v1_tx_user_ready(struct tpacket_hdr * hdr)284 static inline void __v1_tx_user_ready(struct tpacket_hdr *hdr)
285 {
286 	hdr->tp_status = TP_STATUS_SEND_REQUEST;
287 	__sync_synchronize();
288 }
289 
__v2_tx_kernel_ready(struct tpacket2_hdr * hdr)290 static inline int __v2_tx_kernel_ready(struct tpacket2_hdr *hdr)
291 {
292 	return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
293 }
294 
__v2_tx_user_ready(struct tpacket2_hdr * hdr)295 static inline void __v2_tx_user_ready(struct tpacket2_hdr *hdr)
296 {
297 	hdr->tp_status = TP_STATUS_SEND_REQUEST;
298 	__sync_synchronize();
299 }
300 
__v3_tx_kernel_ready(struct tpacket3_hdr * hdr)301 static inline int __v3_tx_kernel_ready(struct tpacket3_hdr *hdr)
302 {
303 	return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
304 }
305 
__v3_tx_user_ready(struct tpacket3_hdr * hdr)306 static inline void __v3_tx_user_ready(struct tpacket3_hdr *hdr)
307 {
308 	hdr->tp_status = TP_STATUS_SEND_REQUEST;
309 	__sync_synchronize();
310 }
311 
__tx_kernel_ready(void * base,int version)312 static inline int __tx_kernel_ready(void *base, int version)
313 {
314 	switch (version) {
315 	case TPACKET_V1:
316 		return __v1_tx_kernel_ready(base);
317 	case TPACKET_V2:
318 		return __v2_tx_kernel_ready(base);
319 	case TPACKET_V3:
320 		return __v3_tx_kernel_ready(base);
321 	default:
322 		bug_on(1);
323 		return 0;
324 	}
325 }
326 
__tx_user_ready(void * base,int version)327 static inline void __tx_user_ready(void *base, int version)
328 {
329 	switch (version) {
330 	case TPACKET_V1:
331 		__v1_tx_user_ready(base);
332 		break;
333 	case TPACKET_V2:
334 		__v2_tx_user_ready(base);
335 		break;
336 	case TPACKET_V3:
337 		__v3_tx_user_ready(base);
338 		break;
339 	}
340 }
341 
__v1_v2_set_packet_loss_discard(int sock)342 static void __v1_v2_set_packet_loss_discard(int sock)
343 {
344 	int ret, discard = 1;
345 
346 	ret = setsockopt(sock, SOL_PACKET, PACKET_LOSS, (void *) &discard,
347 			 sizeof(discard));
348 	if (ret == -1) {
349 		perror("setsockopt");
350 		exit(1);
351 	}
352 }
353 
get_next_frame(struct ring * ring,int n)354 static inline void *get_next_frame(struct ring *ring, int n)
355 {
356 	uint8_t *f0 = ring->rd[0].iov_base;
357 
358 	switch (ring->version) {
359 	case TPACKET_V1:
360 	case TPACKET_V2:
361 		return ring->rd[n].iov_base;
362 	case TPACKET_V3:
363 		return f0 + (n * ring->req3.tp_frame_size);
364 	default:
365 		bug_on(1);
366 	}
367 }
368 
walk_tx(int sock,struct ring * ring)369 static void walk_tx(int sock, struct ring *ring)
370 {
371 	struct pollfd pfd;
372 	int rcv_sock, ret;
373 	size_t packet_len;
374 	union frame_map ppd;
375 	char packet[1024];
376 	unsigned int frame_num = 0, got = 0;
377 	struct sockaddr_ll ll = {
378 		.sll_family = PF_PACKET,
379 		.sll_halen = ETH_ALEN,
380 	};
381 	int nframes;
382 
383 	/* TPACKET_V{1,2} sets up the ring->rd* related variables based
384 	 * on frames (e.g., rd_num is tp_frame_nr) whereas V3 sets these
385 	 * up based on blocks (e.g, rd_num is  tp_block_nr)
386 	 */
387 	if (ring->version <= TPACKET_V2)
388 		nframes = ring->rd_num;
389 	else
390 		nframes = ring->req3.tp_frame_nr;
391 
392 	bug_on(ring->type != PACKET_TX_RING);
393 	bug_on(nframes < NUM_PACKETS);
394 
395 	rcv_sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
396 	if (rcv_sock == -1) {
397 		perror("socket");
398 		exit(1);
399 	}
400 
401 	pair_udp_setfilter(rcv_sock);
402 
403 	ll.sll_ifindex = if_nametoindex("lo");
404 	ret = bind(rcv_sock, (struct sockaddr *) &ll, sizeof(ll));
405 	if (ret == -1) {
406 		perror("bind");
407 		exit(1);
408 	}
409 
410 	memset(&pfd, 0, sizeof(pfd));
411 	pfd.fd = sock;
412 	pfd.events = POLLOUT | POLLERR;
413 	pfd.revents = 0;
414 
415 	total_packets = NUM_PACKETS;
416 	create_payload(packet, &packet_len);
417 
418 	while (total_packets > 0) {
419 		void *next = get_next_frame(ring, frame_num);
420 
421 		while (__tx_kernel_ready(next, ring->version) &&
422 		       total_packets > 0) {
423 			ppd.raw = next;
424 
425 			switch (ring->version) {
426 			case TPACKET_V1:
427 				ppd.v1->tp_h.tp_snaplen = packet_len;
428 				ppd.v1->tp_h.tp_len = packet_len;
429 
430 				memcpy((uint8_t *) ppd.raw + TPACKET_HDRLEN -
431 				       sizeof(struct sockaddr_ll), packet,
432 				       packet_len);
433 				total_bytes += ppd.v1->tp_h.tp_snaplen;
434 				break;
435 
436 			case TPACKET_V2:
437 				ppd.v2->tp_h.tp_snaplen = packet_len;
438 				ppd.v2->tp_h.tp_len = packet_len;
439 
440 				memcpy((uint8_t *) ppd.raw + TPACKET2_HDRLEN -
441 				       sizeof(struct sockaddr_ll), packet,
442 				       packet_len);
443 				total_bytes += ppd.v2->tp_h.tp_snaplen;
444 				break;
445 			case TPACKET_V3: {
446 				struct tpacket3_hdr *tx = next;
447 
448 				tx->tp_snaplen = packet_len;
449 				tx->tp_len = packet_len;
450 				tx->tp_next_offset = 0;
451 
452 				memcpy((uint8_t *)tx + TPACKET3_HDRLEN -
453 				       sizeof(struct sockaddr_ll), packet,
454 				       packet_len);
455 				total_bytes += tx->tp_snaplen;
456 				break;
457 			}
458 			}
459 
460 			status_bar_update();
461 			total_packets--;
462 
463 			__tx_user_ready(next, ring->version);
464 
465 			frame_num = (frame_num + 1) % nframes;
466 		}
467 
468 		poll(&pfd, 1, 1);
469 	}
470 
471 	bug_on(total_packets != 0);
472 
473 	ret = sendto(sock, NULL, 0, 0, NULL, 0);
474 	if (ret == -1) {
475 		perror("sendto");
476 		exit(1);
477 	}
478 
479 	while ((ret = recvfrom(rcv_sock, packet, sizeof(packet),
480 			       0, NULL, NULL)) > 0 &&
481 	       total_packets < NUM_PACKETS) {
482 		got += ret;
483 		test_payload(packet, ret);
484 
485 		status_bar_update();
486 		total_packets++;
487 	}
488 
489 	close(rcv_sock);
490 
491 	if (total_packets != NUM_PACKETS) {
492 		fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n",
493 			ring->version, total_packets, NUM_PACKETS);
494 		exit(1);
495 	}
496 
497 	fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, got);
498 }
499 
walk_v1_v2(int sock,struct ring * ring)500 static void walk_v1_v2(int sock, struct ring *ring)
501 {
502 	if (ring->type == PACKET_RX_RING)
503 		walk_v1_v2_rx(sock, ring);
504 	else
505 		walk_tx(sock, ring);
506 }
507 
508 static uint64_t __v3_prev_block_seq_num = 0;
509 
__v3_test_block_seq_num(struct block_desc * pbd)510 void __v3_test_block_seq_num(struct block_desc *pbd)
511 {
512 	if (__v3_prev_block_seq_num + 1 != pbd->h1.seq_num) {
513 		fprintf(stderr, "\nprev_block_seq_num:%"PRIu64", expected "
514 			"seq:%"PRIu64" != actual seq:%"PRIu64"\n",
515 			__v3_prev_block_seq_num, __v3_prev_block_seq_num + 1,
516 			(uint64_t) pbd->h1.seq_num);
517 		exit(1);
518 	}
519 
520 	__v3_prev_block_seq_num = pbd->h1.seq_num;
521 }
522 
__v3_test_block_len(struct block_desc * pbd,uint32_t bytes,int block_num)523 static void __v3_test_block_len(struct block_desc *pbd, uint32_t bytes, int block_num)
524 {
525 	if (pbd->h1.num_pkts && bytes != pbd->h1.blk_len) {
526 		fprintf(stderr, "\nblock:%u with %upackets, expected "
527 			"len:%u != actual len:%u\n", block_num,
528 			pbd->h1.num_pkts, bytes, pbd->h1.blk_len);
529 		exit(1);
530 	}
531 }
532 
__v3_test_block_header(struct block_desc * pbd,const int block_num)533 static void __v3_test_block_header(struct block_desc *pbd, const int block_num)
534 {
535 	if ((pbd->h1.block_status & TP_STATUS_USER) == 0) {
536 		fprintf(stderr, "\nblock %u: not in TP_STATUS_USER\n", block_num);
537 		exit(1);
538 	}
539 
540 	__v3_test_block_seq_num(pbd);
541 }
542 
__v3_walk_block(struct block_desc * pbd,const int block_num)543 static void __v3_walk_block(struct block_desc *pbd, const int block_num)
544 {
545 	int num_pkts = pbd->h1.num_pkts, i;
546 	unsigned long bytes = 0, bytes_with_padding = ALIGN_8(sizeof(*pbd));
547 	struct tpacket3_hdr *ppd;
548 
549 	__v3_test_block_header(pbd, block_num);
550 
551 	ppd = (struct tpacket3_hdr *) ((uint8_t *) pbd +
552 				       pbd->h1.offset_to_first_pkt);
553 
554 	for (i = 0; i < num_pkts; ++i) {
555 		bytes += ppd->tp_snaplen;
556 
557 		if (ppd->tp_next_offset)
558 			bytes_with_padding += ppd->tp_next_offset;
559 		else
560 			bytes_with_padding += ALIGN_8(ppd->tp_snaplen + ppd->tp_mac);
561 
562 		test_payload((uint8_t *) ppd + ppd->tp_mac, ppd->tp_snaplen);
563 
564 		status_bar_update();
565 		total_packets++;
566 
567 		ppd = (struct tpacket3_hdr *) ((uint8_t *) ppd + ppd->tp_next_offset);
568 		__sync_synchronize();
569 	}
570 
571 	__v3_test_block_len(pbd, bytes_with_padding, block_num);
572 	total_bytes += bytes;
573 }
574 
__v3_flush_block(struct block_desc * pbd)575 void __v3_flush_block(struct block_desc *pbd)
576 {
577 	pbd->h1.block_status = TP_STATUS_KERNEL;
578 	__sync_synchronize();
579 }
580 
walk_v3_rx(int sock,struct ring * ring)581 static void walk_v3_rx(int sock, struct ring *ring)
582 {
583 	unsigned int block_num = 0;
584 	struct pollfd pfd;
585 	struct block_desc *pbd;
586 	int udp_sock[2];
587 
588 	bug_on(ring->type != PACKET_RX_RING);
589 
590 	pair_udp_open(udp_sock, PORT_BASE);
591 
592 	memset(&pfd, 0, sizeof(pfd));
593 	pfd.fd = sock;
594 	pfd.events = POLLIN | POLLERR;
595 	pfd.revents = 0;
596 
597 	pair_udp_send(udp_sock, NUM_PACKETS);
598 
599 	while (total_packets < NUM_PACKETS * 2) {
600 		pbd = (struct block_desc *) ring->rd[block_num].iov_base;
601 
602 		while ((pbd->h1.block_status & TP_STATUS_USER) == 0)
603 			poll(&pfd, 1, 1);
604 
605 		__v3_walk_block(pbd, block_num);
606 		__v3_flush_block(pbd);
607 
608 		block_num = (block_num + 1) % ring->rd_num;
609 	}
610 
611 	pair_udp_close(udp_sock);
612 
613 	if (total_packets != 2 * NUM_PACKETS) {
614 		fprintf(stderr, "walk_v3_rx: received %u out of %u pkts\n",
615 			total_packets, NUM_PACKETS);
616 		exit(1);
617 	}
618 
619 	fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1);
620 }
621 
walk_v3(int sock,struct ring * ring)622 static void walk_v3(int sock, struct ring *ring)
623 {
624 	if (ring->type == PACKET_RX_RING)
625 		walk_v3_rx(sock, ring);
626 	else
627 		walk_tx(sock, ring);
628 }
629 
__v1_v2_fill(struct ring * ring,unsigned int blocks)630 static void __v1_v2_fill(struct ring *ring, unsigned int blocks)
631 {
632 	ring->req.tp_block_size = getpagesize() << 2;
633 	ring->req.tp_frame_size = TPACKET_ALIGNMENT << 7;
634 	ring->req.tp_block_nr = blocks;
635 
636 	ring->req.tp_frame_nr = ring->req.tp_block_size /
637 				ring->req.tp_frame_size *
638 				ring->req.tp_block_nr;
639 
640 	ring->mm_len = ring->req.tp_block_size * ring->req.tp_block_nr;
641 	ring->walk = walk_v1_v2;
642 	ring->rd_num = ring->req.tp_frame_nr;
643 	ring->flen = ring->req.tp_frame_size;
644 }
645 
__v3_fill(struct ring * ring,unsigned int blocks,int type)646 static void __v3_fill(struct ring *ring, unsigned int blocks, int type)
647 {
648 	if (type == PACKET_RX_RING) {
649 		ring->req3.tp_retire_blk_tov = 64;
650 		ring->req3.tp_sizeof_priv = 0;
651 		ring->req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
652 	}
653 	ring->req3.tp_block_size = getpagesize() << 2;
654 	ring->req3.tp_frame_size = TPACKET_ALIGNMENT << 7;
655 	ring->req3.tp_block_nr = blocks;
656 
657 	ring->req3.tp_frame_nr = ring->req3.tp_block_size /
658 				 ring->req3.tp_frame_size *
659 				 ring->req3.tp_block_nr;
660 
661 	ring->mm_len = ring->req3.tp_block_size * ring->req3.tp_block_nr;
662 	ring->walk = walk_v3;
663 	ring->rd_num = ring->req3.tp_block_nr;
664 	ring->flen = ring->req3.tp_block_size;
665 }
666 
setup_ring(int sock,struct ring * ring,int version,int type)667 static void setup_ring(int sock, struct ring *ring, int version, int type)
668 {
669 	int ret = 0;
670 	unsigned int blocks = 256;
671 
672 	ring->type = type;
673 	ring->version = version;
674 
675 	switch (version) {
676 	case TPACKET_V1:
677 	case TPACKET_V2:
678 		if (type == PACKET_TX_RING)
679 			__v1_v2_set_packet_loss_discard(sock);
680 		__v1_v2_fill(ring, blocks);
681 		ret = setsockopt(sock, SOL_PACKET, type, &ring->req,
682 				 sizeof(ring->req));
683 		break;
684 
685 	case TPACKET_V3:
686 		__v3_fill(ring, blocks, type);
687 		ret = setsockopt(sock, SOL_PACKET, type, &ring->req3,
688 				 sizeof(ring->req3));
689 		break;
690 	}
691 
692 	if (ret == -1) {
693 		perror("setsockopt");
694 		exit(1);
695 	}
696 
697 	ring->rd_len = ring->rd_num * sizeof(*ring->rd);
698 	ring->rd = malloc(ring->rd_len);
699 	if (ring->rd == NULL) {
700 		perror("malloc");
701 		exit(1);
702 	}
703 
704 	total_packets = 0;
705 	total_bytes = 0;
706 }
707 
mmap_ring(int sock,struct ring * ring)708 static void mmap_ring(int sock, struct ring *ring)
709 {
710 	int i;
711 
712 	ring->mm_space = mmap(0, ring->mm_len, PROT_READ | PROT_WRITE,
713 			      MAP_SHARED | MAP_LOCKED | MAP_POPULATE, sock, 0);
714 	if (ring->mm_space == MAP_FAILED) {
715 		perror("mmap");
716 		exit(1);
717 	}
718 
719 	memset(ring->rd, 0, ring->rd_len);
720 	for (i = 0; i < ring->rd_num; ++i) {
721 		ring->rd[i].iov_base = ring->mm_space + (i * ring->flen);
722 		ring->rd[i].iov_len = ring->flen;
723 	}
724 }
725 
bind_ring(int sock,struct ring * ring)726 static void bind_ring(int sock, struct ring *ring)
727 {
728 	int ret;
729 
730 	pair_udp_setfilter(sock);
731 
732 	ring->ll.sll_family = PF_PACKET;
733 	ring->ll.sll_protocol = htons(ETH_P_ALL);
734 	ring->ll.sll_ifindex = if_nametoindex("lo");
735 	ring->ll.sll_hatype = 0;
736 	ring->ll.sll_pkttype = 0;
737 	ring->ll.sll_halen = 0;
738 
739 	ret = bind(sock, (struct sockaddr *) &ring->ll, sizeof(ring->ll));
740 	if (ret == -1) {
741 		perror("bind");
742 		exit(1);
743 	}
744 }
745 
walk_ring(int sock,struct ring * ring)746 static void walk_ring(int sock, struct ring *ring)
747 {
748 	ring->walk(sock, ring);
749 }
750 
unmap_ring(int sock,struct ring * ring)751 static void unmap_ring(int sock, struct ring *ring)
752 {
753 	munmap(ring->mm_space, ring->mm_len);
754 	free(ring->rd);
755 }
756 
test_kernel_bit_width(void)757 static int test_kernel_bit_width(void)
758 {
759 	char in[512], *ptr;
760 	int num = 0, fd;
761 	ssize_t ret;
762 
763 	fd = open("/proc/kallsyms", O_RDONLY);
764 	if (fd == -1) {
765 		perror("open");
766 		exit(1);
767 	}
768 
769 	ret = read(fd, in, sizeof(in));
770 	if (ret <= 0) {
771 		perror("read");
772 		exit(1);
773 	}
774 
775 	close(fd);
776 
777 	ptr = in;
778 	while(!isspace(*ptr)) {
779 		num++;
780 		ptr++;
781 	}
782 
783 	return num * 4;
784 }
785 
test_user_bit_width(void)786 static int test_user_bit_width(void)
787 {
788 	return __WORDSIZE;
789 }
790 
791 static const char *tpacket_str[] = {
792 	[TPACKET_V1] = "TPACKET_V1",
793 	[TPACKET_V2] = "TPACKET_V2",
794 	[TPACKET_V3] = "TPACKET_V3",
795 };
796 
797 static const char *type_str[] = {
798 	[PACKET_RX_RING] = "PACKET_RX_RING",
799 	[PACKET_TX_RING] = "PACKET_TX_RING",
800 };
801 
test_tpacket(int version,int type)802 static int test_tpacket(int version, int type)
803 {
804 	int sock;
805 	struct ring ring;
806 
807 	fprintf(stderr, "test: %s with %s ", tpacket_str[version],
808 		type_str[type]);
809 	fflush(stderr);
810 
811 	if (version == TPACKET_V1 &&
812 	    test_kernel_bit_width() != test_user_bit_width()) {
813 		fprintf(stderr, "test: skip %s %s since user and kernel "
814 			"space have different bit width\n",
815 			tpacket_str[version], type_str[type]);
816 		return KSFT_SKIP;
817 	}
818 
819 	sock = pfsocket(version);
820 	memset(&ring, 0, sizeof(ring));
821 	setup_ring(sock, &ring, version, type);
822 	mmap_ring(sock, &ring);
823 	bind_ring(sock, &ring);
824 	walk_ring(sock, &ring);
825 	unmap_ring(sock, &ring);
826 	close(sock);
827 
828 	fprintf(stderr, "\n");
829 	return 0;
830 }
831 
main(void)832 int main(void)
833 {
834 	int ret = 0;
835 
836 	ret |= test_tpacket(TPACKET_V1, PACKET_RX_RING);
837 	ret |= test_tpacket(TPACKET_V1, PACKET_TX_RING);
838 
839 	ret |= test_tpacket(TPACKET_V2, PACKET_RX_RING);
840 	ret |= test_tpacket(TPACKET_V2, PACKET_TX_RING);
841 
842 	ret |= test_tpacket(TPACKET_V3, PACKET_RX_RING);
843 	ret |= test_tpacket(TPACKET_V3, PACKET_TX_RING);
844 
845 	if (ret)
846 		return 1;
847 
848 	printf("OK. All tests passed\n");
849 	return 0;
850 }
851