1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2017 Facebook
3 #include <stddef.h>
4 #include <stdbool.h>
5 #include <string.h>
6 #include <linux/pkt_cls.h>
7 #include <linux/bpf.h>
8 #include <linux/in.h>
9 #include <linux/if_ether.h>
10 #include <linux/ip.h>
11 #include <linux/ipv6.h>
12 #include <linux/icmp.h>
13 #include <linux/icmpv6.h>
14 #include <linux/tcp.h>
15 #include <linux/udp.h>
16 #include "bpf_helpers.h"
17 
18 static __u32 rol32(__u32 word, unsigned int shift)
19 {
20 	return (word << shift) | (word >> ((-shift) & 31));
21 }
22 
23 /* copy paste of jhash from kernel sources to make sure llvm
24  * can compile it into valid sequence of bpf instructions
25  */
26 #define __jhash_mix(a, b, c)			\
27 {						\
28 	a -= c;  a ^= rol32(c, 4);  c += b;	\
29 	b -= a;  b ^= rol32(a, 6);  a += c;	\
30 	c -= b;  c ^= rol32(b, 8);  b += a;	\
31 	a -= c;  a ^= rol32(c, 16); c += b;	\
32 	b -= a;  b ^= rol32(a, 19); a += c;	\
33 	c -= b;  c ^= rol32(b, 4);  b += a;	\
34 }
35 
36 #define __jhash_final(a, b, c)			\
37 {						\
38 	c ^= b; c -= rol32(b, 14);		\
39 	a ^= c; a -= rol32(c, 11);		\
40 	b ^= a; b -= rol32(a, 25);		\
41 	c ^= b; c -= rol32(b, 16);		\
42 	a ^= c; a -= rol32(c, 4);		\
43 	b ^= a; b -= rol32(a, 14);		\
44 	c ^= b; c -= rol32(b, 24);		\
45 }
46 
47 #define JHASH_INITVAL		0xdeadbeef
48 
49 typedef unsigned int u32;
50 
51 static __attribute__ ((noinline))
52 u32 jhash(const void *key, u32 length, u32 initval)
53 {
54 	u32 a, b, c;
55 	const unsigned char *k = key;
56 
57 	a = b = c = JHASH_INITVAL + length + initval;
58 
59 	while (length > 12) {
60 		a += *(u32 *)(k);
61 		b += *(u32 *)(k + 4);
62 		c += *(u32 *)(k + 8);
63 		__jhash_mix(a, b, c);
64 		length -= 12;
65 		k += 12;
66 	}
67 	switch (length) {
68 	case 12: c += (u32)k[11]<<24;
69 	case 11: c += (u32)k[10]<<16;
70 	case 10: c += (u32)k[9]<<8;
71 	case 9:  c += k[8];
72 	case 8:  b += (u32)k[7]<<24;
73 	case 7:  b += (u32)k[6]<<16;
74 	case 6:  b += (u32)k[5]<<8;
75 	case 5:  b += k[4];
76 	case 4:  a += (u32)k[3]<<24;
77 	case 3:  a += (u32)k[2]<<16;
78 	case 2:  a += (u32)k[1]<<8;
79 	case 1:  a += k[0];
80 		 __jhash_final(a, b, c);
81 	case 0: /* Nothing left to add */
82 		break;
83 	}
84 
85 	return c;
86 }
87 
88 static __attribute__ ((noinline))
89 u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
90 {
91 	a += initval;
92 	b += initval;
93 	c += initval;
94 	__jhash_final(a, b, c);
95 	return c;
96 }
97 
98 static __attribute__ ((noinline))
99 u32 jhash_2words(u32 a, u32 b, u32 initval)
100 {
101 	return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
102 }
103 
104 struct flow_key {
105 	union {
106 		__be32 src;
107 		__be32 srcv6[4];
108 	};
109 	union {
110 		__be32 dst;
111 		__be32 dstv6[4];
112 	};
113 	union {
114 		__u32 ports;
115 		__u16 port16[2];
116 	};
117 	__u8 proto;
118 };
119 
120 struct packet_description {
121 	struct flow_key flow;
122 	__u8 flags;
123 };
124 
125 struct ctl_value {
126 	union {
127 		__u64 value;
128 		__u32 ifindex;
129 		__u8 mac[6];
130 	};
131 };
132 
133 struct vip_definition {
134 	union {
135 		__be32 vip;
136 		__be32 vipv6[4];
137 	};
138 	__u16 port;
139 	__u16 family;
140 	__u8 proto;
141 };
142 
143 struct vip_meta {
144 	__u32 flags;
145 	__u32 vip_num;
146 };
147 
148 struct real_pos_lru {
149 	__u32 pos;
150 	__u64 atime;
151 };
152 
153 struct real_definition {
154 	union {
155 		__be32 dst;
156 		__be32 dstv6[4];
157 	};
158 	__u8 flags;
159 };
160 
161 struct lb_stats {
162 	__u64 v2;
163 	__u64 v1;
164 };
165 
166 struct {
167 	__uint(type, BPF_MAP_TYPE_HASH);
168 	__uint(max_entries, 512);
169 	__type(key, struct vip_definition);
170 	__type(value, struct vip_meta);
171 } vip_map SEC(".maps");
172 
173 struct {
174 	__uint(type, BPF_MAP_TYPE_LRU_HASH);
175 	__uint(max_entries, 300);
176 	__uint(map_flags, 1U << 1);
177 	__type(key, struct flow_key);
178 	__type(value, struct real_pos_lru);
179 } lru_cache SEC(".maps");
180 
181 struct {
182 	__uint(type, BPF_MAP_TYPE_ARRAY);
183 	__uint(max_entries, 12 * 655);
184 	__type(key, __u32);
185 	__type(value, __u32);
186 } ch_rings SEC(".maps");
187 
188 struct {
189 	__uint(type, BPF_MAP_TYPE_ARRAY);
190 	__uint(max_entries, 40);
191 	__type(key, __u32);
192 	__type(value, struct real_definition);
193 } reals SEC(".maps");
194 
195 struct {
196 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
197 	__uint(max_entries, 515);
198 	__type(key, __u32);
199 	__type(value, struct lb_stats);
200 } stats SEC(".maps");
201 
202 struct {
203 	__uint(type, BPF_MAP_TYPE_ARRAY);
204 	__uint(max_entries, 16);
205 	__type(key, __u32);
206 	__type(value, struct ctl_value);
207 } ctl_array SEC(".maps");
208 
209 struct eth_hdr {
210 	unsigned char eth_dest[6];
211 	unsigned char eth_source[6];
212 	unsigned short eth_proto;
213 };
214 
215 static inline __u64 calc_offset(bool is_ipv6, bool is_icmp)
216 {
217 	__u64 off = sizeof(struct eth_hdr);
218 	if (is_ipv6) {
219 		off += sizeof(struct ipv6hdr);
220 		if (is_icmp)
221 			off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr);
222 	} else {
223 		off += sizeof(struct iphdr);
224 		if (is_icmp)
225 			off += sizeof(struct icmphdr) + sizeof(struct iphdr);
226 	}
227 	return off;
228 }
229 
230 static __attribute__ ((noinline))
231 bool parse_udp(void *data, void *data_end,
232 	       bool is_ipv6, struct packet_description *pckt)
233 {
234 
235 	bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
236 	__u64 off = calc_offset(is_ipv6, is_icmp);
237 	struct udphdr *udp;
238 	udp = data + off;
239 
240 	if (udp + 1 > data_end)
241 		return 0;
242 	if (!is_icmp) {
243 		pckt->flow.port16[0] = udp->source;
244 		pckt->flow.port16[1] = udp->dest;
245 	} else {
246 		pckt->flow.port16[0] = udp->dest;
247 		pckt->flow.port16[1] = udp->source;
248 	}
249 	return 1;
250 }
251 
252 static __attribute__ ((noinline))
253 bool parse_tcp(void *data, void *data_end,
254 	       bool is_ipv6, struct packet_description *pckt)
255 {
256 
257 	bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
258 	__u64 off = calc_offset(is_ipv6, is_icmp);
259 	struct tcphdr *tcp;
260 
261 	tcp = data + off;
262 	if (tcp + 1 > data_end)
263 		return 0;
264 	if (tcp->syn)
265 		pckt->flags |= (1 << 1);
266 	if (!is_icmp) {
267 		pckt->flow.port16[0] = tcp->source;
268 		pckt->flow.port16[1] = tcp->dest;
269 	} else {
270 		pckt->flow.port16[0] = tcp->dest;
271 		pckt->flow.port16[1] = tcp->source;
272 	}
273 	return 1;
274 }
275 
276 static __attribute__ ((noinline))
277 bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
278 	      struct packet_description *pckt,
279 	      struct real_definition *dst, __u32 pkt_bytes)
280 {
281 	struct eth_hdr *new_eth;
282 	struct eth_hdr *old_eth;
283 	struct ipv6hdr *ip6h;
284 	__u32 ip_suffix;
285 	void *data_end;
286 	void *data;
287 
288 	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
289 		return 0;
290 	data = (void *)(long)xdp->data;
291 	data_end = (void *)(long)xdp->data_end;
292 	new_eth = data;
293 	ip6h = data + sizeof(struct eth_hdr);
294 	old_eth = data + sizeof(struct ipv6hdr);
295 	if (new_eth + 1 > data_end ||
296 	    old_eth + 1 > data_end || ip6h + 1 > data_end)
297 		return 0;
298 	memcpy(new_eth->eth_dest, cval->mac, 6);
299 	memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
300 	new_eth->eth_proto = 56710;
301 	ip6h->version = 6;
302 	ip6h->priority = 0;
303 	memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
304 
305 	ip6h->nexthdr = IPPROTO_IPV6;
306 	ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0];
307 	ip6h->payload_len =
308 	    __builtin_bswap16(pkt_bytes + sizeof(struct ipv6hdr));
309 	ip6h->hop_limit = 4;
310 
311 	ip6h->saddr.in6_u.u6_addr32[0] = 1;
312 	ip6h->saddr.in6_u.u6_addr32[1] = 2;
313 	ip6h->saddr.in6_u.u6_addr32[2] = 3;
314 	ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix;
315 	memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16);
316 	return 1;
317 }
318 
319 static __attribute__ ((noinline))
320 bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
321 	      struct packet_description *pckt,
322 	      struct real_definition *dst, __u32 pkt_bytes)
323 {
324 
325 	__u32 ip_suffix = __builtin_bswap16(pckt->flow.port16[0]);
326 	struct eth_hdr *new_eth;
327 	struct eth_hdr *old_eth;
328 	__u16 *next_iph_u16;
329 	struct iphdr *iph;
330 	__u32 csum = 0;
331 	void *data_end;
332 	void *data;
333 
334 	ip_suffix <<= 15;
335 	ip_suffix ^= pckt->flow.src;
336 	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
337 		return 0;
338 	data = (void *)(long)xdp->data;
339 	data_end = (void *)(long)xdp->data_end;
340 	new_eth = data;
341 	iph = data + sizeof(struct eth_hdr);
342 	old_eth = data + sizeof(struct iphdr);
343 	if (new_eth + 1 > data_end ||
344 	    old_eth + 1 > data_end || iph + 1 > data_end)
345 		return 0;
346 	memcpy(new_eth->eth_dest, cval->mac, 6);
347 	memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
348 	new_eth->eth_proto = 8;
349 	iph->version = 4;
350 	iph->ihl = 5;
351 	iph->frag_off = 0;
352 	iph->protocol = IPPROTO_IPIP;
353 	iph->check = 0;
354 	iph->tos = 1;
355 	iph->tot_len = __builtin_bswap16(pkt_bytes + sizeof(struct iphdr));
356 	/* don't update iph->daddr, since it will overwrite old eth_proto
357 	 * and multiple iterations of bpf_prog_run() will fail
358 	 */
359 
360 	iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst;
361 	iph->ttl = 4;
362 
363 	next_iph_u16 = (__u16 *) iph;
364 #pragma clang loop unroll(full)
365 	for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
366 		csum += *next_iph_u16++;
367 	iph->check = ~((csum & 0xffff) + (csum >> 16));
368 	if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
369 		return 0;
370 	return 1;
371 }
372 
373 static __attribute__ ((noinline))
374 bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4)
375 {
376 	struct eth_hdr *new_eth;
377 	struct eth_hdr *old_eth;
378 
379 	old_eth = *data;
380 	new_eth = *data + sizeof(struct ipv6hdr);
381 	memcpy(new_eth->eth_source, old_eth->eth_source, 6);
382 	memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
383 	if (inner_v4)
384 		new_eth->eth_proto = 8;
385 	else
386 		new_eth->eth_proto = 56710;
387 	if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr)))
388 		return 0;
389 	*data = (void *)(long)xdp->data;
390 	*data_end = (void *)(long)xdp->data_end;
391 	return 1;
392 }
393 
394 static __attribute__ ((noinline))
395 bool decap_v4(struct xdp_md *xdp, void **data, void **data_end)
396 {
397 	struct eth_hdr *new_eth;
398 	struct eth_hdr *old_eth;
399 
400 	old_eth = *data;
401 	new_eth = *data + sizeof(struct iphdr);
402 	memcpy(new_eth->eth_source, old_eth->eth_source, 6);
403 	memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
404 	new_eth->eth_proto = 8;
405 	if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
406 		return 0;
407 	*data = (void *)(long)xdp->data;
408 	*data_end = (void *)(long)xdp->data_end;
409 	return 1;
410 }
411 
412 static __attribute__ ((noinline))
413 int swap_mac_and_send(void *data, void *data_end)
414 {
415 	unsigned char tmp_mac[6];
416 	struct eth_hdr *eth;
417 
418 	eth = data;
419 	memcpy(tmp_mac, eth->eth_source, 6);
420 	memcpy(eth->eth_source, eth->eth_dest, 6);
421 	memcpy(eth->eth_dest, tmp_mac, 6);
422 	return XDP_TX;
423 }
424 
425 static __attribute__ ((noinline))
426 int send_icmp_reply(void *data, void *data_end)
427 {
428 	struct icmphdr *icmp_hdr;
429 	__u16 *next_iph_u16;
430 	__u32 tmp_addr = 0;
431 	struct iphdr *iph;
432 	__u32 csum1 = 0;
433 	__u32 csum = 0;
434 	__u64 off = 0;
435 
436 	if (data + sizeof(struct eth_hdr)
437 	     + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end)
438 		return XDP_DROP;
439 	off += sizeof(struct eth_hdr);
440 	iph = data + off;
441 	off += sizeof(struct iphdr);
442 	icmp_hdr = data + off;
443 	icmp_hdr->type = 0;
444 	icmp_hdr->checksum += 0x0007;
445 	iph->ttl = 4;
446 	tmp_addr = iph->daddr;
447 	iph->daddr = iph->saddr;
448 	iph->saddr = tmp_addr;
449 	iph->check = 0;
450 	next_iph_u16 = (__u16 *) iph;
451 #pragma clang loop unroll(full)
452 	for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
453 		csum += *next_iph_u16++;
454 	iph->check = ~((csum & 0xffff) + (csum >> 16));
455 	return swap_mac_and_send(data, data_end);
456 }
457 
458 static __attribute__ ((noinline))
459 int send_icmp6_reply(void *data, void *data_end)
460 {
461 	struct icmp6hdr *icmp_hdr;
462 	struct ipv6hdr *ip6h;
463 	__be32 tmp_addr[4];
464 	__u64 off = 0;
465 
466 	if (data + sizeof(struct eth_hdr)
467 	     + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end)
468 		return XDP_DROP;
469 	off += sizeof(struct eth_hdr);
470 	ip6h = data + off;
471 	off += sizeof(struct ipv6hdr);
472 	icmp_hdr = data + off;
473 	icmp_hdr->icmp6_type = 129;
474 	icmp_hdr->icmp6_cksum -= 0x0001;
475 	ip6h->hop_limit = 4;
476 	memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16);
477 	memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16);
478 	memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16);
479 	return swap_mac_and_send(data, data_end);
480 }
481 
482 static __attribute__ ((noinline))
483 int parse_icmpv6(void *data, void *data_end, __u64 off,
484 		 struct packet_description *pckt)
485 {
486 	struct icmp6hdr *icmp_hdr;
487 	struct ipv6hdr *ip6h;
488 
489 	icmp_hdr = data + off;
490 	if (icmp_hdr + 1 > data_end)
491 		return XDP_DROP;
492 	if (icmp_hdr->icmp6_type == 128)
493 		return send_icmp6_reply(data, data_end);
494 	if (icmp_hdr->icmp6_type != 3)
495 		return XDP_PASS;
496 	off += sizeof(struct icmp6hdr);
497 	ip6h = data + off;
498 	if (ip6h + 1 > data_end)
499 		return XDP_DROP;
500 	pckt->flow.proto = ip6h->nexthdr;
501 	pckt->flags |= (1 << 0);
502 	memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16);
503 	memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16);
504 	return -1;
505 }
506 
507 static __attribute__ ((noinline))
508 int parse_icmp(void *data, void *data_end, __u64 off,
509 	       struct packet_description *pckt)
510 {
511 	struct icmphdr *icmp_hdr;
512 	struct iphdr *iph;
513 
514 	icmp_hdr = data + off;
515 	if (icmp_hdr + 1 > data_end)
516 		return XDP_DROP;
517 	if (icmp_hdr->type == 8)
518 		return send_icmp_reply(data, data_end);
519 	if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4))
520 		return XDP_PASS;
521 	off += sizeof(struct icmphdr);
522 	iph = data + off;
523 	if (iph + 1 > data_end)
524 		return XDP_DROP;
525 	if (iph->ihl != 5)
526 		return XDP_DROP;
527 	pckt->flow.proto = iph->protocol;
528 	pckt->flags |= (1 << 0);
529 	pckt->flow.src = iph->daddr;
530 	pckt->flow.dst = iph->saddr;
531 	return -1;
532 }
533 
534 static __attribute__ ((noinline))
535 __u32 get_packet_hash(struct packet_description *pckt,
536 		      bool hash_16bytes)
537 {
538 	if (hash_16bytes)
539 		return jhash_2words(jhash(pckt->flow.srcv6, 16, 12),
540 				    pckt->flow.ports, 24);
541 	else
542 		return jhash_2words(pckt->flow.src, pckt->flow.ports,
543 				    24);
544 }
545 
546 __attribute__ ((noinline))
547 static bool get_packet_dst(struct real_definition **real,
548 			   struct packet_description *pckt,
549 			   struct vip_meta *vip_info,
550 			   bool is_ipv6, void *lru_map)
551 {
552 	struct real_pos_lru new_dst_lru = { };
553 	bool hash_16bytes = is_ipv6;
554 	__u32 *real_pos, hash, key;
555 	__u64 cur_time;
556 
557 	if (vip_info->flags & (1 << 2))
558 		hash_16bytes = 1;
559 	if (vip_info->flags & (1 << 3)) {
560 		pckt->flow.port16[0] = pckt->flow.port16[1];
561 		memset(pckt->flow.srcv6, 0, 16);
562 	}
563 	hash = get_packet_hash(pckt, hash_16bytes);
564 	if (hash != 0x358459b7 /* jhash of ipv4 packet */  &&
565 	    hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
566 		return 0;
567 	key = 2 * vip_info->vip_num + hash % 2;
568 	real_pos = bpf_map_lookup_elem(&ch_rings, &key);
569 	if (!real_pos)
570 		return 0;
571 	key = *real_pos;
572 	*real = bpf_map_lookup_elem(&reals, &key);
573 	if (!(*real))
574 		return 0;
575 	if (!(vip_info->flags & (1 << 1))) {
576 		__u32 conn_rate_key = 512 + 2;
577 		struct lb_stats *conn_rate_stats =
578 		    bpf_map_lookup_elem(&stats, &conn_rate_key);
579 
580 		if (!conn_rate_stats)
581 			return 1;
582 		cur_time = bpf_ktime_get_ns();
583 		if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) {
584 			conn_rate_stats->v1 = 1;
585 			conn_rate_stats->v2 = cur_time;
586 		} else {
587 			conn_rate_stats->v1 += 1;
588 			if (conn_rate_stats->v1 >= 1)
589 				return 1;
590 		}
591 		if (pckt->flow.proto == IPPROTO_UDP)
592 			new_dst_lru.atime = cur_time;
593 		new_dst_lru.pos = key;
594 		bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0);
595 	}
596 	return 1;
597 }
598 
599 __attribute__ ((noinline))
600 static void connection_table_lookup(struct real_definition **real,
601 				    struct packet_description *pckt,
602 				    void *lru_map)
603 {
604 
605 	struct real_pos_lru *dst_lru;
606 	__u64 cur_time;
607 	__u32 key;
608 
609 	dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow);
610 	if (!dst_lru)
611 		return;
612 	if (pckt->flow.proto == IPPROTO_UDP) {
613 		cur_time = bpf_ktime_get_ns();
614 		if (cur_time - dst_lru->atime > 300000)
615 			return;
616 		dst_lru->atime = cur_time;
617 	}
618 	key = dst_lru->pos;
619 	*real = bpf_map_lookup_elem(&reals, &key);
620 }
621 
622 /* don't believe your eyes!
623  * below function has 6 arguments whereas bpf and llvm allow maximum of 5
624  * but since it's _static_ llvm can optimize one argument away
625  */
626 __attribute__ ((noinline))
627 static int process_l3_headers_v6(struct packet_description *pckt,
628 				 __u8 *protocol, __u64 off,
629 				 __u16 *pkt_bytes, void *data,
630 				 void *data_end)
631 {
632 	struct ipv6hdr *ip6h;
633 	__u64 iph_len;
634 	int action;
635 
636 	ip6h = data + off;
637 	if (ip6h + 1 > data_end)
638 		return XDP_DROP;
639 	iph_len = sizeof(struct ipv6hdr);
640 	*protocol = ip6h->nexthdr;
641 	pckt->flow.proto = *protocol;
642 	*pkt_bytes = __builtin_bswap16(ip6h->payload_len);
643 	off += iph_len;
644 	if (*protocol == 45) {
645 		return XDP_DROP;
646 	} else if (*protocol == 59) {
647 		action = parse_icmpv6(data, data_end, off, pckt);
648 		if (action >= 0)
649 			return action;
650 	} else {
651 		memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16);
652 		memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16);
653 	}
654 	return -1;
655 }
656 
657 __attribute__ ((noinline))
658 static int process_l3_headers_v4(struct packet_description *pckt,
659 				 __u8 *protocol, __u64 off,
660 				 __u16 *pkt_bytes, void *data,
661 				 void *data_end)
662 {
663 	struct iphdr *iph;
664 	__u64 iph_len;
665 	int action;
666 
667 	iph = data + off;
668 	if (iph + 1 > data_end)
669 		return XDP_DROP;
670 	if (iph->ihl != 5)
671 		return XDP_DROP;
672 	*protocol = iph->protocol;
673 	pckt->flow.proto = *protocol;
674 	*pkt_bytes = __builtin_bswap16(iph->tot_len);
675 	off += 20;
676 	if (iph->frag_off & 65343)
677 		return XDP_DROP;
678 	if (*protocol == IPPROTO_ICMP) {
679 		action = parse_icmp(data, data_end, off, pckt);
680 		if (action >= 0)
681 			return action;
682 	} else {
683 		pckt->flow.src = iph->saddr;
684 		pckt->flow.dst = iph->daddr;
685 	}
686 	return -1;
687 }
688 
689 __attribute__ ((noinline))
690 static int process_packet(void *data, __u64 off, void *data_end,
691 			  bool is_ipv6, struct xdp_md *xdp)
692 {
693 
694 	struct real_definition *dst = NULL;
695 	struct packet_description pckt = { };
696 	struct vip_definition vip = { };
697 	struct lb_stats *data_stats;
698 	struct eth_hdr *eth = data;
699 	void *lru_map = &lru_cache;
700 	struct vip_meta *vip_info;
701 	__u32 lru_stats_key = 513;
702 	__u32 mac_addr_pos = 0;
703 	__u32 stats_key = 512;
704 	struct ctl_value *cval;
705 	__u16 pkt_bytes;
706 	__u64 iph_len;
707 	__u8 protocol;
708 	__u32 vip_num;
709 	int action;
710 
711 	if (is_ipv6)
712 		action = process_l3_headers_v6(&pckt, &protocol, off,
713 					       &pkt_bytes, data, data_end);
714 	else
715 		action = process_l3_headers_v4(&pckt, &protocol, off,
716 					       &pkt_bytes, data, data_end);
717 	if (action >= 0)
718 		return action;
719 	protocol = pckt.flow.proto;
720 	if (protocol == IPPROTO_TCP) {
721 		if (!parse_tcp(data, data_end, is_ipv6, &pckt))
722 			return XDP_DROP;
723 	} else if (protocol == IPPROTO_UDP) {
724 		if (!parse_udp(data, data_end, is_ipv6, &pckt))
725 			return XDP_DROP;
726 	} else {
727 		return XDP_TX;
728 	}
729 
730 	if (is_ipv6)
731 		memcpy(vip.vipv6, pckt.flow.dstv6, 16);
732 	else
733 		vip.vip = pckt.flow.dst;
734 	vip.port = pckt.flow.port16[1];
735 	vip.proto = pckt.flow.proto;
736 	vip_info = bpf_map_lookup_elem(&vip_map, &vip);
737 	if (!vip_info) {
738 		vip.port = 0;
739 		vip_info = bpf_map_lookup_elem(&vip_map, &vip);
740 		if (!vip_info)
741 			return XDP_PASS;
742 		if (!(vip_info->flags & (1 << 4)))
743 			pckt.flow.port16[1] = 0;
744 	}
745 	if (data_end - data > 1400)
746 		return XDP_DROP;
747 	data_stats = bpf_map_lookup_elem(&stats, &stats_key);
748 	if (!data_stats)
749 		return XDP_DROP;
750 	data_stats->v1 += 1;
751 	if (!dst) {
752 		if (vip_info->flags & (1 << 0))
753 			pckt.flow.port16[0] = 0;
754 		if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1)))
755 			connection_table_lookup(&dst, &pckt, lru_map);
756 		if (dst)
757 			goto out;
758 		if (pckt.flow.proto == IPPROTO_TCP) {
759 			struct lb_stats *lru_stats =
760 			    bpf_map_lookup_elem(&stats, &lru_stats_key);
761 
762 			if (!lru_stats)
763 				return XDP_DROP;
764 			if (pckt.flags & (1 << 1))
765 				lru_stats->v1 += 1;
766 			else
767 				lru_stats->v2 += 1;
768 		}
769 		if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map))
770 			return XDP_DROP;
771 		data_stats->v2 += 1;
772 	}
773 out:
774 	cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos);
775 	if (!cval)
776 		return XDP_DROP;
777 	if (dst->flags & (1 << 0)) {
778 		if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes))
779 			return XDP_DROP;
780 	} else {
781 		if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes))
782 			return XDP_DROP;
783 	}
784 	vip_num = vip_info->vip_num;
785 	data_stats = bpf_map_lookup_elem(&stats, &vip_num);
786 	if (!data_stats)
787 		return XDP_DROP;
788 	data_stats->v1 += 1;
789 	data_stats->v2 += pkt_bytes;
790 
791 	data = (void *)(long)xdp->data;
792 	data_end = (void *)(long)xdp->data_end;
793 	if (data + 4 > data_end)
794 		return XDP_DROP;
795 	*(u32 *)data = dst->dst;
796 	return XDP_DROP;
797 }
798 
799 __attribute__ ((section("xdp-test"), used))
800 int balancer_ingress(struct xdp_md *ctx)
801 {
802 	void *data = (void *)(long)ctx->data;
803 	void *data_end = (void *)(long)ctx->data_end;
804 	struct eth_hdr *eth = data;
805 	__u32 eth_proto;
806 	__u32 nh_off;
807 
808 	nh_off = sizeof(struct eth_hdr);
809 	if (data + nh_off > data_end)
810 		return XDP_DROP;
811 	eth_proto = eth->eth_proto;
812 	if (eth_proto == 8)
813 		return process_packet(data, nh_off, data_end, 0, ctx);
814 	else if (eth_proto == 56710)
815 		return process_packet(data, nh_off, data_end, 1, ctx);
816 	else
817 		return XDP_DROP;
818 }
819 
820 char _license[] __attribute__ ((section("license"), used)) = "GPL";
821 int _version __attribute__ ((section("version"), used)) = 1;
822