1bd4aed0eSJiong Wang // SPDX-License-Identifier: GPL-2.0 2bd4aed0eSJiong Wang // Copyright (c) 2017 Facebook 3bd4aed0eSJiong Wang #include <stddef.h> 4bd4aed0eSJiong Wang #include <stdbool.h> 5bd4aed0eSJiong Wang #include <string.h> 6bd4aed0eSJiong Wang #include <linux/pkt_cls.h> 7bd4aed0eSJiong Wang #include <linux/bpf.h> 8bd4aed0eSJiong Wang #include <linux/in.h> 9bd4aed0eSJiong Wang #include <linux/if_ether.h> 10bd4aed0eSJiong Wang #include <linux/ip.h> 11bd4aed0eSJiong Wang #include <linux/ipv6.h> 12bd4aed0eSJiong Wang #include <linux/icmp.h> 13bd4aed0eSJiong Wang #include <linux/icmpv6.h> 14bd4aed0eSJiong Wang #include <linux/tcp.h> 15bd4aed0eSJiong Wang #include <linux/udp.h> 163e689141SToke Høiland-Jørgensen #include <bpf/bpf_helpers.h> 173e689141SToke Høiland-Jørgensen #include <bpf/bpf_endian.h> 18bd4aed0eSJiong Wang 19bd4aed0eSJiong Wang static __u32 rol32(__u32 word, unsigned int shift) 20bd4aed0eSJiong Wang { 21bd4aed0eSJiong Wang return (word << shift) | (word >> ((-shift) & 31)); 22bd4aed0eSJiong Wang } 23bd4aed0eSJiong Wang 24bd4aed0eSJiong Wang /* copy paste of jhash from kernel sources to make sure llvm 25bd4aed0eSJiong Wang * can compile it into valid sequence of bpf instructions 26bd4aed0eSJiong Wang */ 27bd4aed0eSJiong Wang #define __jhash_mix(a, b, c) \ 28bd4aed0eSJiong Wang { \ 29bd4aed0eSJiong Wang a -= c; a ^= rol32(c, 4); c += b; \ 30bd4aed0eSJiong Wang b -= a; b ^= rol32(a, 6); a += c; \ 31bd4aed0eSJiong Wang c -= b; c ^= rol32(b, 8); b += a; \ 32bd4aed0eSJiong Wang a -= c; a ^= rol32(c, 16); c += b; \ 33bd4aed0eSJiong Wang b -= a; b ^= rol32(a, 19); a += c; \ 34bd4aed0eSJiong Wang c -= b; c ^= rol32(b, 4); b += a; \ 35bd4aed0eSJiong Wang } 36bd4aed0eSJiong Wang 37bd4aed0eSJiong Wang #define __jhash_final(a, b, c) \ 38bd4aed0eSJiong Wang { \ 39bd4aed0eSJiong Wang c ^= b; c -= rol32(b, 14); \ 40bd4aed0eSJiong Wang a ^= c; a -= rol32(c, 11); \ 41bd4aed0eSJiong Wang b ^= a; b -= rol32(a, 25); \ 42bd4aed0eSJiong Wang c ^= b; c -= rol32(b, 16); \ 43bd4aed0eSJiong Wang a ^= c; a -= rol32(c, 4); \ 44bd4aed0eSJiong Wang b ^= a; b -= rol32(a, 14); \ 45bd4aed0eSJiong Wang c ^= b; c -= rol32(b, 24); \ 46bd4aed0eSJiong Wang } 47bd4aed0eSJiong Wang 48bd4aed0eSJiong Wang #define JHASH_INITVAL 0xdeadbeef 49bd4aed0eSJiong Wang 50bd4aed0eSJiong Wang typedef unsigned int u32; 51bd4aed0eSJiong Wang 52bd4aed0eSJiong Wang static __attribute__ ((noinline)) 53bd4aed0eSJiong Wang u32 jhash(const void *key, u32 length, u32 initval) 54bd4aed0eSJiong Wang { 55bd4aed0eSJiong Wang u32 a, b, c; 56bd4aed0eSJiong Wang const unsigned char *k = key; 57bd4aed0eSJiong Wang 58bd4aed0eSJiong Wang a = b = c = JHASH_INITVAL + length + initval; 59bd4aed0eSJiong Wang 60bd4aed0eSJiong Wang while (length > 12) { 61bd4aed0eSJiong Wang a += *(u32 *)(k); 62bd4aed0eSJiong Wang b += *(u32 *)(k + 4); 63bd4aed0eSJiong Wang c += *(u32 *)(k + 8); 64bd4aed0eSJiong Wang __jhash_mix(a, b, c); 65bd4aed0eSJiong Wang length -= 12; 66bd4aed0eSJiong Wang k += 12; 67bd4aed0eSJiong Wang } 68bd4aed0eSJiong Wang switch (length) { 69bd4aed0eSJiong Wang case 12: c += (u32)k[11]<<24; 70bd4aed0eSJiong Wang case 11: c += (u32)k[10]<<16; 71bd4aed0eSJiong Wang case 10: c += (u32)k[9]<<8; 72bd4aed0eSJiong Wang case 9: c += k[8]; 73bd4aed0eSJiong Wang case 8: b += (u32)k[7]<<24; 74bd4aed0eSJiong Wang case 7: b += (u32)k[6]<<16; 75bd4aed0eSJiong Wang case 6: b += (u32)k[5]<<8; 76bd4aed0eSJiong Wang case 5: b += k[4]; 77bd4aed0eSJiong Wang case 4: a += (u32)k[3]<<24; 78bd4aed0eSJiong Wang case 3: a += (u32)k[2]<<16; 79bd4aed0eSJiong Wang case 2: a += (u32)k[1]<<8; 80bd4aed0eSJiong Wang case 1: a += k[0]; 81bd4aed0eSJiong Wang __jhash_final(a, b, c); 82bd4aed0eSJiong Wang case 0: /* Nothing left to add */ 83bd4aed0eSJiong Wang break; 84bd4aed0eSJiong Wang } 85bd4aed0eSJiong Wang 86bd4aed0eSJiong Wang return c; 87bd4aed0eSJiong Wang } 88bd4aed0eSJiong Wang 89e528d1c0SAlexei Starovoitov __attribute__ ((noinline)) 90bd4aed0eSJiong Wang u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) 91bd4aed0eSJiong Wang { 92bd4aed0eSJiong Wang a += initval; 93bd4aed0eSJiong Wang b += initval; 94bd4aed0eSJiong Wang c += initval; 95bd4aed0eSJiong Wang __jhash_final(a, b, c); 96bd4aed0eSJiong Wang return c; 97bd4aed0eSJiong Wang } 98bd4aed0eSJiong Wang 99e528d1c0SAlexei Starovoitov __attribute__ ((noinline)) 100bd4aed0eSJiong Wang u32 jhash_2words(u32 a, u32 b, u32 initval) 101bd4aed0eSJiong Wang { 102bd4aed0eSJiong Wang return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); 103bd4aed0eSJiong Wang } 104bd4aed0eSJiong Wang 105bd4aed0eSJiong Wang struct flow_key { 106bd4aed0eSJiong Wang union { 107bd4aed0eSJiong Wang __be32 src; 108bd4aed0eSJiong Wang __be32 srcv6[4]; 109bd4aed0eSJiong Wang }; 110bd4aed0eSJiong Wang union { 111bd4aed0eSJiong Wang __be32 dst; 112bd4aed0eSJiong Wang __be32 dstv6[4]; 113bd4aed0eSJiong Wang }; 114bd4aed0eSJiong Wang union { 115bd4aed0eSJiong Wang __u32 ports; 116bd4aed0eSJiong Wang __u16 port16[2]; 117bd4aed0eSJiong Wang }; 118bd4aed0eSJiong Wang __u8 proto; 119bd4aed0eSJiong Wang }; 120bd4aed0eSJiong Wang 121bd4aed0eSJiong Wang struct packet_description { 122bd4aed0eSJiong Wang struct flow_key flow; 123bd4aed0eSJiong Wang __u8 flags; 124bd4aed0eSJiong Wang }; 125bd4aed0eSJiong Wang 126bd4aed0eSJiong Wang struct ctl_value { 127bd4aed0eSJiong Wang union { 128bd4aed0eSJiong Wang __u64 value; 129bd4aed0eSJiong Wang __u32 ifindex; 130bd4aed0eSJiong Wang __u8 mac[6]; 131bd4aed0eSJiong Wang }; 132bd4aed0eSJiong Wang }; 133bd4aed0eSJiong Wang 134bd4aed0eSJiong Wang struct vip_definition { 135bd4aed0eSJiong Wang union { 136bd4aed0eSJiong Wang __be32 vip; 137bd4aed0eSJiong Wang __be32 vipv6[4]; 138bd4aed0eSJiong Wang }; 139bd4aed0eSJiong Wang __u16 port; 140bd4aed0eSJiong Wang __u16 family; 141bd4aed0eSJiong Wang __u8 proto; 142bd4aed0eSJiong Wang }; 143bd4aed0eSJiong Wang 144bd4aed0eSJiong Wang struct vip_meta { 145bd4aed0eSJiong Wang __u32 flags; 146bd4aed0eSJiong Wang __u32 vip_num; 147bd4aed0eSJiong Wang }; 148bd4aed0eSJiong Wang 149bd4aed0eSJiong Wang struct real_pos_lru { 150bd4aed0eSJiong Wang __u32 pos; 151bd4aed0eSJiong Wang __u64 atime; 152bd4aed0eSJiong Wang }; 153bd4aed0eSJiong Wang 154bd4aed0eSJiong Wang struct real_definition { 155bd4aed0eSJiong Wang union { 156bd4aed0eSJiong Wang __be32 dst; 157bd4aed0eSJiong Wang __be32 dstv6[4]; 158bd4aed0eSJiong Wang }; 159bd4aed0eSJiong Wang __u8 flags; 160bd4aed0eSJiong Wang }; 161bd4aed0eSJiong Wang 162bd4aed0eSJiong Wang struct lb_stats { 163bd4aed0eSJiong Wang __u64 v2; 164bd4aed0eSJiong Wang __u64 v1; 165bd4aed0eSJiong Wang }; 166bd4aed0eSJiong Wang 167df0b7792SAndrii Nakryiko struct { 168bc7430ccSAndrii Nakryiko __uint(type, BPF_MAP_TYPE_HASH); 169bc7430ccSAndrii Nakryiko __uint(max_entries, 512); 170bc7430ccSAndrii Nakryiko __type(key, struct vip_definition); 171bc7430ccSAndrii Nakryiko __type(value, struct vip_meta); 172bc7430ccSAndrii Nakryiko } vip_map SEC(".maps"); 173bd4aed0eSJiong Wang 174df0b7792SAndrii Nakryiko struct { 175bc7430ccSAndrii Nakryiko __uint(type, BPF_MAP_TYPE_LRU_HASH); 176bc7430ccSAndrii Nakryiko __uint(max_entries, 300); 177bc7430ccSAndrii Nakryiko __uint(map_flags, 1U << 1); 178bc7430ccSAndrii Nakryiko __type(key, struct flow_key); 179bc7430ccSAndrii Nakryiko __type(value, struct real_pos_lru); 180bc7430ccSAndrii Nakryiko } lru_cache SEC(".maps"); 181bd4aed0eSJiong Wang 182df0b7792SAndrii Nakryiko struct { 183bc7430ccSAndrii Nakryiko __uint(type, BPF_MAP_TYPE_ARRAY); 184bc7430ccSAndrii Nakryiko __uint(max_entries, 12 * 655); 185bc7430ccSAndrii Nakryiko __type(key, __u32); 186bc7430ccSAndrii Nakryiko __type(value, __u32); 187bc7430ccSAndrii Nakryiko } ch_rings SEC(".maps"); 188bd4aed0eSJiong Wang 189df0b7792SAndrii Nakryiko struct { 190bc7430ccSAndrii Nakryiko __uint(type, BPF_MAP_TYPE_ARRAY); 191bc7430ccSAndrii Nakryiko __uint(max_entries, 40); 192bc7430ccSAndrii Nakryiko __type(key, __u32); 193bc7430ccSAndrii Nakryiko __type(value, struct real_definition); 194bc7430ccSAndrii Nakryiko } reals SEC(".maps"); 195bd4aed0eSJiong Wang 196df0b7792SAndrii Nakryiko struct { 197bc7430ccSAndrii Nakryiko __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 198bc7430ccSAndrii Nakryiko __uint(max_entries, 515); 199bc7430ccSAndrii Nakryiko __type(key, __u32); 200bc7430ccSAndrii Nakryiko __type(value, struct lb_stats); 201bc7430ccSAndrii Nakryiko } stats SEC(".maps"); 202bd4aed0eSJiong Wang 203df0b7792SAndrii Nakryiko struct { 204bc7430ccSAndrii Nakryiko __uint(type, BPF_MAP_TYPE_ARRAY); 205bc7430ccSAndrii Nakryiko __uint(max_entries, 16); 206bc7430ccSAndrii Nakryiko __type(key, __u32); 207bc7430ccSAndrii Nakryiko __type(value, struct ctl_value); 208bc7430ccSAndrii Nakryiko } ctl_array SEC(".maps"); 209bd4aed0eSJiong Wang 210bd4aed0eSJiong Wang struct eth_hdr { 211bd4aed0eSJiong Wang unsigned char eth_dest[6]; 212bd4aed0eSJiong Wang unsigned char eth_source[6]; 213bd4aed0eSJiong Wang unsigned short eth_proto; 214bd4aed0eSJiong Wang }; 215bd4aed0eSJiong Wang 216bd4aed0eSJiong Wang static inline __u64 calc_offset(bool is_ipv6, bool is_icmp) 217bd4aed0eSJiong Wang { 218bd4aed0eSJiong Wang __u64 off = sizeof(struct eth_hdr); 219bd4aed0eSJiong Wang if (is_ipv6) { 220bd4aed0eSJiong Wang off += sizeof(struct ipv6hdr); 221bd4aed0eSJiong Wang if (is_icmp) 222bd4aed0eSJiong Wang off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr); 223bd4aed0eSJiong Wang } else { 224bd4aed0eSJiong Wang off += sizeof(struct iphdr); 225bd4aed0eSJiong Wang if (is_icmp) 226bd4aed0eSJiong Wang off += sizeof(struct icmphdr) + sizeof(struct iphdr); 227bd4aed0eSJiong Wang } 228bd4aed0eSJiong Wang return off; 229bd4aed0eSJiong Wang } 230bd4aed0eSJiong Wang 231bd4aed0eSJiong Wang static __attribute__ ((noinline)) 232bd4aed0eSJiong Wang bool parse_udp(void *data, void *data_end, 233bd4aed0eSJiong Wang bool is_ipv6, struct packet_description *pckt) 234bd4aed0eSJiong Wang { 235bd4aed0eSJiong Wang 236bd4aed0eSJiong Wang bool is_icmp = !((pckt->flags & (1 << 0)) == 0); 237bd4aed0eSJiong Wang __u64 off = calc_offset(is_ipv6, is_icmp); 238bd4aed0eSJiong Wang struct udphdr *udp; 239bd4aed0eSJiong Wang udp = data + off; 240bd4aed0eSJiong Wang 241bd4aed0eSJiong Wang if (udp + 1 > data_end) 242bd4aed0eSJiong Wang return 0; 243bd4aed0eSJiong Wang if (!is_icmp) { 244bd4aed0eSJiong Wang pckt->flow.port16[0] = udp->source; 245bd4aed0eSJiong Wang pckt->flow.port16[1] = udp->dest; 246bd4aed0eSJiong Wang } else { 247bd4aed0eSJiong Wang pckt->flow.port16[0] = udp->dest; 248bd4aed0eSJiong Wang pckt->flow.port16[1] = udp->source; 249bd4aed0eSJiong Wang } 250bd4aed0eSJiong Wang return 1; 251bd4aed0eSJiong Wang } 252bd4aed0eSJiong Wang 253bd4aed0eSJiong Wang static __attribute__ ((noinline)) 254bd4aed0eSJiong Wang bool parse_tcp(void *data, void *data_end, 255bd4aed0eSJiong Wang bool is_ipv6, struct packet_description *pckt) 256bd4aed0eSJiong Wang { 257bd4aed0eSJiong Wang 258bd4aed0eSJiong Wang bool is_icmp = !((pckt->flags & (1 << 0)) == 0); 259bd4aed0eSJiong Wang __u64 off = calc_offset(is_ipv6, is_icmp); 260bd4aed0eSJiong Wang struct tcphdr *tcp; 261bd4aed0eSJiong Wang 262bd4aed0eSJiong Wang tcp = data + off; 263bd4aed0eSJiong Wang if (tcp + 1 > data_end) 264bd4aed0eSJiong Wang return 0; 265bd4aed0eSJiong Wang if (tcp->syn) 266bd4aed0eSJiong Wang pckt->flags |= (1 << 1); 267bd4aed0eSJiong Wang if (!is_icmp) { 268bd4aed0eSJiong Wang pckt->flow.port16[0] = tcp->source; 269bd4aed0eSJiong Wang pckt->flow.port16[1] = tcp->dest; 270bd4aed0eSJiong Wang } else { 271bd4aed0eSJiong Wang pckt->flow.port16[0] = tcp->dest; 272bd4aed0eSJiong Wang pckt->flow.port16[1] = tcp->source; 273bd4aed0eSJiong Wang } 274bd4aed0eSJiong Wang return 1; 275bd4aed0eSJiong Wang } 276bd4aed0eSJiong Wang 277bd4aed0eSJiong Wang static __attribute__ ((noinline)) 278bd4aed0eSJiong Wang bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, 279bd4aed0eSJiong Wang struct packet_description *pckt, 280bd4aed0eSJiong Wang struct real_definition *dst, __u32 pkt_bytes) 281bd4aed0eSJiong Wang { 282bd4aed0eSJiong Wang struct eth_hdr *new_eth; 283bd4aed0eSJiong Wang struct eth_hdr *old_eth; 284bd4aed0eSJiong Wang struct ipv6hdr *ip6h; 285bd4aed0eSJiong Wang __u32 ip_suffix; 286bd4aed0eSJiong Wang void *data_end; 287bd4aed0eSJiong Wang void *data; 288bd4aed0eSJiong Wang 289bd4aed0eSJiong Wang if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr))) 290bd4aed0eSJiong Wang return 0; 291bd4aed0eSJiong Wang data = (void *)(long)xdp->data; 292bd4aed0eSJiong Wang data_end = (void *)(long)xdp->data_end; 293bd4aed0eSJiong Wang new_eth = data; 294bd4aed0eSJiong Wang ip6h = data + sizeof(struct eth_hdr); 295bd4aed0eSJiong Wang old_eth = data + sizeof(struct ipv6hdr); 296bd4aed0eSJiong Wang if (new_eth + 1 > data_end || 297bd4aed0eSJiong Wang old_eth + 1 > data_end || ip6h + 1 > data_end) 298bd4aed0eSJiong Wang return 0; 299bd4aed0eSJiong Wang memcpy(new_eth->eth_dest, cval->mac, 6); 300bd4aed0eSJiong Wang memcpy(new_eth->eth_source, old_eth->eth_dest, 6); 301bd4aed0eSJiong Wang new_eth->eth_proto = 56710; 302bd4aed0eSJiong Wang ip6h->version = 6; 303bd4aed0eSJiong Wang ip6h->priority = 0; 304bd4aed0eSJiong Wang memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl)); 305bd4aed0eSJiong Wang 306bd4aed0eSJiong Wang ip6h->nexthdr = IPPROTO_IPV6; 307bd4aed0eSJiong Wang ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0]; 308bd4aed0eSJiong Wang ip6h->payload_len = 30959fd3486SIlya Leoshkevich bpf_htons(pkt_bytes + sizeof(struct ipv6hdr)); 310bd4aed0eSJiong Wang ip6h->hop_limit = 4; 311bd4aed0eSJiong Wang 312bd4aed0eSJiong Wang ip6h->saddr.in6_u.u6_addr32[0] = 1; 313bd4aed0eSJiong Wang ip6h->saddr.in6_u.u6_addr32[1] = 2; 314bd4aed0eSJiong Wang ip6h->saddr.in6_u.u6_addr32[2] = 3; 315bd4aed0eSJiong Wang ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix; 316bd4aed0eSJiong Wang memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16); 317bd4aed0eSJiong Wang return 1; 318bd4aed0eSJiong Wang } 319bd4aed0eSJiong Wang 320bd4aed0eSJiong Wang static __attribute__ ((noinline)) 321bd4aed0eSJiong Wang bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, 322bd4aed0eSJiong Wang struct packet_description *pckt, 323bd4aed0eSJiong Wang struct real_definition *dst, __u32 pkt_bytes) 324bd4aed0eSJiong Wang { 325bd4aed0eSJiong Wang 32659fd3486SIlya Leoshkevich __u32 ip_suffix = bpf_ntohs(pckt->flow.port16[0]); 327bd4aed0eSJiong Wang struct eth_hdr *new_eth; 328bd4aed0eSJiong Wang struct eth_hdr *old_eth; 329bd4aed0eSJiong Wang __u16 *next_iph_u16; 330bd4aed0eSJiong Wang struct iphdr *iph; 331bd4aed0eSJiong Wang __u32 csum = 0; 332bd4aed0eSJiong Wang void *data_end; 333bd4aed0eSJiong Wang void *data; 334bd4aed0eSJiong Wang 335bd4aed0eSJiong Wang ip_suffix <<= 15; 336bd4aed0eSJiong Wang ip_suffix ^= pckt->flow.src; 337bd4aed0eSJiong Wang if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr))) 338bd4aed0eSJiong Wang return 0; 339bd4aed0eSJiong Wang data = (void *)(long)xdp->data; 340bd4aed0eSJiong Wang data_end = (void *)(long)xdp->data_end; 341bd4aed0eSJiong Wang new_eth = data; 342bd4aed0eSJiong Wang iph = data + sizeof(struct eth_hdr); 343bd4aed0eSJiong Wang old_eth = data + sizeof(struct iphdr); 344bd4aed0eSJiong Wang if (new_eth + 1 > data_end || 345bd4aed0eSJiong Wang old_eth + 1 > data_end || iph + 1 > data_end) 346bd4aed0eSJiong Wang return 0; 347bd4aed0eSJiong Wang memcpy(new_eth->eth_dest, cval->mac, 6); 348bd4aed0eSJiong Wang memcpy(new_eth->eth_source, old_eth->eth_dest, 6); 349bd4aed0eSJiong Wang new_eth->eth_proto = 8; 350bd4aed0eSJiong Wang iph->version = 4; 351bd4aed0eSJiong Wang iph->ihl = 5; 352bd4aed0eSJiong Wang iph->frag_off = 0; 353bd4aed0eSJiong Wang iph->protocol = IPPROTO_IPIP; 354bd4aed0eSJiong Wang iph->check = 0; 355bd4aed0eSJiong Wang iph->tos = 1; 35659fd3486SIlya Leoshkevich iph->tot_len = bpf_htons(pkt_bytes + sizeof(struct iphdr)); 357bd4aed0eSJiong Wang /* don't update iph->daddr, since it will overwrite old eth_proto 358bd4aed0eSJiong Wang * and multiple iterations of bpf_prog_run() will fail 359bd4aed0eSJiong Wang */ 360bd4aed0eSJiong Wang 361bd4aed0eSJiong Wang iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst; 362bd4aed0eSJiong Wang iph->ttl = 4; 363bd4aed0eSJiong Wang 364bd4aed0eSJiong Wang next_iph_u16 = (__u16 *) iph; 365bd4aed0eSJiong Wang #pragma clang loop unroll(full) 366bd4aed0eSJiong Wang for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) 367bd4aed0eSJiong Wang csum += *next_iph_u16++; 368bd4aed0eSJiong Wang iph->check = ~((csum & 0xffff) + (csum >> 16)); 369bd4aed0eSJiong Wang if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) 370bd4aed0eSJiong Wang return 0; 371bd4aed0eSJiong Wang return 1; 372bd4aed0eSJiong Wang } 373bd4aed0eSJiong Wang 374bd4aed0eSJiong Wang static __attribute__ ((noinline)) 375bd4aed0eSJiong Wang bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4) 376bd4aed0eSJiong Wang { 377bd4aed0eSJiong Wang struct eth_hdr *new_eth; 378bd4aed0eSJiong Wang struct eth_hdr *old_eth; 379bd4aed0eSJiong Wang 380bd4aed0eSJiong Wang old_eth = *data; 381bd4aed0eSJiong Wang new_eth = *data + sizeof(struct ipv6hdr); 382bd4aed0eSJiong Wang memcpy(new_eth->eth_source, old_eth->eth_source, 6); 383bd4aed0eSJiong Wang memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); 384bd4aed0eSJiong Wang if (inner_v4) 385bd4aed0eSJiong Wang new_eth->eth_proto = 8; 386bd4aed0eSJiong Wang else 387bd4aed0eSJiong Wang new_eth->eth_proto = 56710; 388bd4aed0eSJiong Wang if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr))) 389bd4aed0eSJiong Wang return 0; 390bd4aed0eSJiong Wang *data = (void *)(long)xdp->data; 391bd4aed0eSJiong Wang *data_end = (void *)(long)xdp->data_end; 392bd4aed0eSJiong Wang return 1; 393bd4aed0eSJiong Wang } 394bd4aed0eSJiong Wang 395bd4aed0eSJiong Wang static __attribute__ ((noinline)) 396bd4aed0eSJiong Wang bool decap_v4(struct xdp_md *xdp, void **data, void **data_end) 397bd4aed0eSJiong Wang { 398bd4aed0eSJiong Wang struct eth_hdr *new_eth; 399bd4aed0eSJiong Wang struct eth_hdr *old_eth; 400bd4aed0eSJiong Wang 401bd4aed0eSJiong Wang old_eth = *data; 402bd4aed0eSJiong Wang new_eth = *data + sizeof(struct iphdr); 403bd4aed0eSJiong Wang memcpy(new_eth->eth_source, old_eth->eth_source, 6); 404bd4aed0eSJiong Wang memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); 405bd4aed0eSJiong Wang new_eth->eth_proto = 8; 406bd4aed0eSJiong Wang if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) 407bd4aed0eSJiong Wang return 0; 408bd4aed0eSJiong Wang *data = (void *)(long)xdp->data; 409bd4aed0eSJiong Wang *data_end = (void *)(long)xdp->data_end; 410bd4aed0eSJiong Wang return 1; 411bd4aed0eSJiong Wang } 412bd4aed0eSJiong Wang 413bd4aed0eSJiong Wang static __attribute__ ((noinline)) 414bd4aed0eSJiong Wang int swap_mac_and_send(void *data, void *data_end) 415bd4aed0eSJiong Wang { 416bd4aed0eSJiong Wang unsigned char tmp_mac[6]; 417bd4aed0eSJiong Wang struct eth_hdr *eth; 418bd4aed0eSJiong Wang 419bd4aed0eSJiong Wang eth = data; 420bd4aed0eSJiong Wang memcpy(tmp_mac, eth->eth_source, 6); 421bd4aed0eSJiong Wang memcpy(eth->eth_source, eth->eth_dest, 6); 422bd4aed0eSJiong Wang memcpy(eth->eth_dest, tmp_mac, 6); 423bd4aed0eSJiong Wang return XDP_TX; 424bd4aed0eSJiong Wang } 425bd4aed0eSJiong Wang 426bd4aed0eSJiong Wang static __attribute__ ((noinline)) 427bd4aed0eSJiong Wang int send_icmp_reply(void *data, void *data_end) 428bd4aed0eSJiong Wang { 429bd4aed0eSJiong Wang struct icmphdr *icmp_hdr; 430bd4aed0eSJiong Wang __u16 *next_iph_u16; 431bd4aed0eSJiong Wang __u32 tmp_addr = 0; 432bd4aed0eSJiong Wang struct iphdr *iph; 433bd4aed0eSJiong Wang __u32 csum1 = 0; 434bd4aed0eSJiong Wang __u32 csum = 0; 435bd4aed0eSJiong Wang __u64 off = 0; 436bd4aed0eSJiong Wang 437bd4aed0eSJiong Wang if (data + sizeof(struct eth_hdr) 438bd4aed0eSJiong Wang + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end) 439bd4aed0eSJiong Wang return XDP_DROP; 440bd4aed0eSJiong Wang off += sizeof(struct eth_hdr); 441bd4aed0eSJiong Wang iph = data + off; 442bd4aed0eSJiong Wang off += sizeof(struct iphdr); 443bd4aed0eSJiong Wang icmp_hdr = data + off; 444bd4aed0eSJiong Wang icmp_hdr->type = 0; 445bd4aed0eSJiong Wang icmp_hdr->checksum += 0x0007; 446bd4aed0eSJiong Wang iph->ttl = 4; 447bd4aed0eSJiong Wang tmp_addr = iph->daddr; 448bd4aed0eSJiong Wang iph->daddr = iph->saddr; 449bd4aed0eSJiong Wang iph->saddr = tmp_addr; 450bd4aed0eSJiong Wang iph->check = 0; 451bd4aed0eSJiong Wang next_iph_u16 = (__u16 *) iph; 452bd4aed0eSJiong Wang #pragma clang loop unroll(full) 453bd4aed0eSJiong Wang for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) 454bd4aed0eSJiong Wang csum += *next_iph_u16++; 455bd4aed0eSJiong Wang iph->check = ~((csum & 0xffff) + (csum >> 16)); 456bd4aed0eSJiong Wang return swap_mac_and_send(data, data_end); 457bd4aed0eSJiong Wang } 458bd4aed0eSJiong Wang 459bd4aed0eSJiong Wang static __attribute__ ((noinline)) 460bd4aed0eSJiong Wang int send_icmp6_reply(void *data, void *data_end) 461bd4aed0eSJiong Wang { 462bd4aed0eSJiong Wang struct icmp6hdr *icmp_hdr; 463bd4aed0eSJiong Wang struct ipv6hdr *ip6h; 464bd4aed0eSJiong Wang __be32 tmp_addr[4]; 465bd4aed0eSJiong Wang __u64 off = 0; 466bd4aed0eSJiong Wang 467bd4aed0eSJiong Wang if (data + sizeof(struct eth_hdr) 468bd4aed0eSJiong Wang + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end) 469bd4aed0eSJiong Wang return XDP_DROP; 470bd4aed0eSJiong Wang off += sizeof(struct eth_hdr); 471bd4aed0eSJiong Wang ip6h = data + off; 472bd4aed0eSJiong Wang off += sizeof(struct ipv6hdr); 473bd4aed0eSJiong Wang icmp_hdr = data + off; 474bd4aed0eSJiong Wang icmp_hdr->icmp6_type = 129; 475bd4aed0eSJiong Wang icmp_hdr->icmp6_cksum -= 0x0001; 476bd4aed0eSJiong Wang ip6h->hop_limit = 4; 477bd4aed0eSJiong Wang memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16); 478bd4aed0eSJiong Wang memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16); 479bd4aed0eSJiong Wang memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16); 480bd4aed0eSJiong Wang return swap_mac_and_send(data, data_end); 481bd4aed0eSJiong Wang } 482bd4aed0eSJiong Wang 483bd4aed0eSJiong Wang static __attribute__ ((noinline)) 484bd4aed0eSJiong Wang int parse_icmpv6(void *data, void *data_end, __u64 off, 485bd4aed0eSJiong Wang struct packet_description *pckt) 486bd4aed0eSJiong Wang { 487bd4aed0eSJiong Wang struct icmp6hdr *icmp_hdr; 488bd4aed0eSJiong Wang struct ipv6hdr *ip6h; 489bd4aed0eSJiong Wang 490bd4aed0eSJiong Wang icmp_hdr = data + off; 491bd4aed0eSJiong Wang if (icmp_hdr + 1 > data_end) 492bd4aed0eSJiong Wang return XDP_DROP; 493bd4aed0eSJiong Wang if (icmp_hdr->icmp6_type == 128) 494bd4aed0eSJiong Wang return send_icmp6_reply(data, data_end); 495bd4aed0eSJiong Wang if (icmp_hdr->icmp6_type != 3) 496bd4aed0eSJiong Wang return XDP_PASS; 497bd4aed0eSJiong Wang off += sizeof(struct icmp6hdr); 498bd4aed0eSJiong Wang ip6h = data + off; 499bd4aed0eSJiong Wang if (ip6h + 1 > data_end) 500bd4aed0eSJiong Wang return XDP_DROP; 501bd4aed0eSJiong Wang pckt->flow.proto = ip6h->nexthdr; 502bd4aed0eSJiong Wang pckt->flags |= (1 << 0); 503bd4aed0eSJiong Wang memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16); 504bd4aed0eSJiong Wang memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16); 505bd4aed0eSJiong Wang return -1; 506bd4aed0eSJiong Wang } 507bd4aed0eSJiong Wang 508bd4aed0eSJiong Wang static __attribute__ ((noinline)) 509bd4aed0eSJiong Wang int parse_icmp(void *data, void *data_end, __u64 off, 510bd4aed0eSJiong Wang struct packet_description *pckt) 511bd4aed0eSJiong Wang { 512bd4aed0eSJiong Wang struct icmphdr *icmp_hdr; 513bd4aed0eSJiong Wang struct iphdr *iph; 514bd4aed0eSJiong Wang 515bd4aed0eSJiong Wang icmp_hdr = data + off; 516bd4aed0eSJiong Wang if (icmp_hdr + 1 > data_end) 517bd4aed0eSJiong Wang return XDP_DROP; 518bd4aed0eSJiong Wang if (icmp_hdr->type == 8) 519bd4aed0eSJiong Wang return send_icmp_reply(data, data_end); 520bd4aed0eSJiong Wang if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4)) 521bd4aed0eSJiong Wang return XDP_PASS; 522bd4aed0eSJiong Wang off += sizeof(struct icmphdr); 523bd4aed0eSJiong Wang iph = data + off; 524bd4aed0eSJiong Wang if (iph + 1 > data_end) 525bd4aed0eSJiong Wang return XDP_DROP; 526bd4aed0eSJiong Wang if (iph->ihl != 5) 527bd4aed0eSJiong Wang return XDP_DROP; 528bd4aed0eSJiong Wang pckt->flow.proto = iph->protocol; 529bd4aed0eSJiong Wang pckt->flags |= (1 << 0); 530bd4aed0eSJiong Wang pckt->flow.src = iph->daddr; 531bd4aed0eSJiong Wang pckt->flow.dst = iph->saddr; 532bd4aed0eSJiong Wang return -1; 533bd4aed0eSJiong Wang } 534bd4aed0eSJiong Wang 535bd4aed0eSJiong Wang static __attribute__ ((noinline)) 536bd4aed0eSJiong Wang __u32 get_packet_hash(struct packet_description *pckt, 537bd4aed0eSJiong Wang bool hash_16bytes) 538bd4aed0eSJiong Wang { 539bd4aed0eSJiong Wang if (hash_16bytes) 540bd4aed0eSJiong Wang return jhash_2words(jhash(pckt->flow.srcv6, 16, 12), 541bd4aed0eSJiong Wang pckt->flow.ports, 24); 542bd4aed0eSJiong Wang else 543bd4aed0eSJiong Wang return jhash_2words(pckt->flow.src, pckt->flow.ports, 544bd4aed0eSJiong Wang 24); 545bd4aed0eSJiong Wang } 546bd4aed0eSJiong Wang 547bd4aed0eSJiong Wang __attribute__ ((noinline)) 548bd4aed0eSJiong Wang static bool get_packet_dst(struct real_definition **real, 549bd4aed0eSJiong Wang struct packet_description *pckt, 550bd4aed0eSJiong Wang struct vip_meta *vip_info, 551bd4aed0eSJiong Wang bool is_ipv6, void *lru_map) 552bd4aed0eSJiong Wang { 553bd4aed0eSJiong Wang struct real_pos_lru new_dst_lru = { }; 554bd4aed0eSJiong Wang bool hash_16bytes = is_ipv6; 555bd4aed0eSJiong Wang __u32 *real_pos, hash, key; 556bd4aed0eSJiong Wang __u64 cur_time; 557bd4aed0eSJiong Wang 558bd4aed0eSJiong Wang if (vip_info->flags & (1 << 2)) 559bd4aed0eSJiong Wang hash_16bytes = 1; 560bd4aed0eSJiong Wang if (vip_info->flags & (1 << 3)) { 561bd4aed0eSJiong Wang pckt->flow.port16[0] = pckt->flow.port16[1]; 562bd4aed0eSJiong Wang memset(pckt->flow.srcv6, 0, 16); 563bd4aed0eSJiong Wang } 564bd4aed0eSJiong Wang hash = get_packet_hash(pckt, hash_16bytes); 565bd4aed0eSJiong Wang if (hash != 0x358459b7 /* jhash of ipv4 packet */ && 566bd4aed0eSJiong Wang hash != 0x2f4bc6bb /* jhash of ipv6 packet */) 567bd4aed0eSJiong Wang return 0; 568bd4aed0eSJiong Wang key = 2 * vip_info->vip_num + hash % 2; 569bd4aed0eSJiong Wang real_pos = bpf_map_lookup_elem(&ch_rings, &key); 570bd4aed0eSJiong Wang if (!real_pos) 571bd4aed0eSJiong Wang return 0; 572bd4aed0eSJiong Wang key = *real_pos; 573bd4aed0eSJiong Wang *real = bpf_map_lookup_elem(&reals, &key); 574bd4aed0eSJiong Wang if (!(*real)) 575bd4aed0eSJiong Wang return 0; 576bd4aed0eSJiong Wang if (!(vip_info->flags & (1 << 1))) { 577bd4aed0eSJiong Wang __u32 conn_rate_key = 512 + 2; 578bd4aed0eSJiong Wang struct lb_stats *conn_rate_stats = 579bd4aed0eSJiong Wang bpf_map_lookup_elem(&stats, &conn_rate_key); 580bd4aed0eSJiong Wang 581bd4aed0eSJiong Wang if (!conn_rate_stats) 582bd4aed0eSJiong Wang return 1; 583bd4aed0eSJiong Wang cur_time = bpf_ktime_get_ns(); 584bd4aed0eSJiong Wang if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) { 585bd4aed0eSJiong Wang conn_rate_stats->v1 = 1; 586bd4aed0eSJiong Wang conn_rate_stats->v2 = cur_time; 587bd4aed0eSJiong Wang } else { 588bd4aed0eSJiong Wang conn_rate_stats->v1 += 1; 589bd4aed0eSJiong Wang if (conn_rate_stats->v1 >= 1) 590bd4aed0eSJiong Wang return 1; 591bd4aed0eSJiong Wang } 592bd4aed0eSJiong Wang if (pckt->flow.proto == IPPROTO_UDP) 593bd4aed0eSJiong Wang new_dst_lru.atime = cur_time; 594bd4aed0eSJiong Wang new_dst_lru.pos = key; 595bd4aed0eSJiong Wang bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0); 596bd4aed0eSJiong Wang } 597bd4aed0eSJiong Wang return 1; 598bd4aed0eSJiong Wang } 599bd4aed0eSJiong Wang 600bd4aed0eSJiong Wang __attribute__ ((noinline)) 601bd4aed0eSJiong Wang static void connection_table_lookup(struct real_definition **real, 602bd4aed0eSJiong Wang struct packet_description *pckt, 603bd4aed0eSJiong Wang void *lru_map) 604bd4aed0eSJiong Wang { 605bd4aed0eSJiong Wang 606bd4aed0eSJiong Wang struct real_pos_lru *dst_lru; 607bd4aed0eSJiong Wang __u64 cur_time; 608bd4aed0eSJiong Wang __u32 key; 609bd4aed0eSJiong Wang 610bd4aed0eSJiong Wang dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow); 611bd4aed0eSJiong Wang if (!dst_lru) 612bd4aed0eSJiong Wang return; 613bd4aed0eSJiong Wang if (pckt->flow.proto == IPPROTO_UDP) { 614bd4aed0eSJiong Wang cur_time = bpf_ktime_get_ns(); 615bd4aed0eSJiong Wang if (cur_time - dst_lru->atime > 300000) 616bd4aed0eSJiong Wang return; 617bd4aed0eSJiong Wang dst_lru->atime = cur_time; 618bd4aed0eSJiong Wang } 619bd4aed0eSJiong Wang key = dst_lru->pos; 620bd4aed0eSJiong Wang *real = bpf_map_lookup_elem(&reals, &key); 621bd4aed0eSJiong Wang } 622bd4aed0eSJiong Wang 623bd4aed0eSJiong Wang /* don't believe your eyes! 624bd4aed0eSJiong Wang * below function has 6 arguments whereas bpf and llvm allow maximum of 5 625bd4aed0eSJiong Wang * but since it's _static_ llvm can optimize one argument away 626bd4aed0eSJiong Wang */ 627bd4aed0eSJiong Wang __attribute__ ((noinline)) 628bd4aed0eSJiong Wang static int process_l3_headers_v6(struct packet_description *pckt, 629bd4aed0eSJiong Wang __u8 *protocol, __u64 off, 630bd4aed0eSJiong Wang __u16 *pkt_bytes, void *data, 631bd4aed0eSJiong Wang void *data_end) 632bd4aed0eSJiong Wang { 633bd4aed0eSJiong Wang struct ipv6hdr *ip6h; 634bd4aed0eSJiong Wang __u64 iph_len; 635bd4aed0eSJiong Wang int action; 636bd4aed0eSJiong Wang 637bd4aed0eSJiong Wang ip6h = data + off; 638bd4aed0eSJiong Wang if (ip6h + 1 > data_end) 639bd4aed0eSJiong Wang return XDP_DROP; 640bd4aed0eSJiong Wang iph_len = sizeof(struct ipv6hdr); 641bd4aed0eSJiong Wang *protocol = ip6h->nexthdr; 642bd4aed0eSJiong Wang pckt->flow.proto = *protocol; 64359fd3486SIlya Leoshkevich *pkt_bytes = bpf_ntohs(ip6h->payload_len); 644bd4aed0eSJiong Wang off += iph_len; 645bd4aed0eSJiong Wang if (*protocol == 45) { 646bd4aed0eSJiong Wang return XDP_DROP; 647bd4aed0eSJiong Wang } else if (*protocol == 59) { 648bd4aed0eSJiong Wang action = parse_icmpv6(data, data_end, off, pckt); 649bd4aed0eSJiong Wang if (action >= 0) 650bd4aed0eSJiong Wang return action; 651bd4aed0eSJiong Wang } else { 652bd4aed0eSJiong Wang memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16); 653bd4aed0eSJiong Wang memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16); 654bd4aed0eSJiong Wang } 655bd4aed0eSJiong Wang return -1; 656bd4aed0eSJiong Wang } 657bd4aed0eSJiong Wang 658bd4aed0eSJiong Wang __attribute__ ((noinline)) 659bd4aed0eSJiong Wang static int process_l3_headers_v4(struct packet_description *pckt, 660bd4aed0eSJiong Wang __u8 *protocol, __u64 off, 661bd4aed0eSJiong Wang __u16 *pkt_bytes, void *data, 662bd4aed0eSJiong Wang void *data_end) 663bd4aed0eSJiong Wang { 664bd4aed0eSJiong Wang struct iphdr *iph; 665bd4aed0eSJiong Wang __u64 iph_len; 666bd4aed0eSJiong Wang int action; 667bd4aed0eSJiong Wang 668bd4aed0eSJiong Wang iph = data + off; 669bd4aed0eSJiong Wang if (iph + 1 > data_end) 670bd4aed0eSJiong Wang return XDP_DROP; 671bd4aed0eSJiong Wang if (iph->ihl != 5) 672bd4aed0eSJiong Wang return XDP_DROP; 673bd4aed0eSJiong Wang *protocol = iph->protocol; 674bd4aed0eSJiong Wang pckt->flow.proto = *protocol; 67559fd3486SIlya Leoshkevich *pkt_bytes = bpf_ntohs(iph->tot_len); 676bd4aed0eSJiong Wang off += 20; 677bd4aed0eSJiong Wang if (iph->frag_off & 65343) 678bd4aed0eSJiong Wang return XDP_DROP; 679bd4aed0eSJiong Wang if (*protocol == IPPROTO_ICMP) { 680bd4aed0eSJiong Wang action = parse_icmp(data, data_end, off, pckt); 681bd4aed0eSJiong Wang if (action >= 0) 682bd4aed0eSJiong Wang return action; 683bd4aed0eSJiong Wang } else { 684bd4aed0eSJiong Wang pckt->flow.src = iph->saddr; 685bd4aed0eSJiong Wang pckt->flow.dst = iph->daddr; 686bd4aed0eSJiong Wang } 687bd4aed0eSJiong Wang return -1; 688bd4aed0eSJiong Wang } 689bd4aed0eSJiong Wang 690bd4aed0eSJiong Wang __attribute__ ((noinline)) 691bd4aed0eSJiong Wang static int process_packet(void *data, __u64 off, void *data_end, 692bd4aed0eSJiong Wang bool is_ipv6, struct xdp_md *xdp) 693bd4aed0eSJiong Wang { 694bd4aed0eSJiong Wang 695bd4aed0eSJiong Wang struct real_definition *dst = NULL; 696bd4aed0eSJiong Wang struct packet_description pckt = { }; 697bd4aed0eSJiong Wang struct vip_definition vip = { }; 698bd4aed0eSJiong Wang struct lb_stats *data_stats; 699bd4aed0eSJiong Wang struct eth_hdr *eth = data; 700bd4aed0eSJiong Wang void *lru_map = &lru_cache; 701bd4aed0eSJiong Wang struct vip_meta *vip_info; 702bd4aed0eSJiong Wang __u32 lru_stats_key = 513; 703bd4aed0eSJiong Wang __u32 mac_addr_pos = 0; 704bd4aed0eSJiong Wang __u32 stats_key = 512; 705bd4aed0eSJiong Wang struct ctl_value *cval; 706bd4aed0eSJiong Wang __u16 pkt_bytes; 707bd4aed0eSJiong Wang __u64 iph_len; 708bd4aed0eSJiong Wang __u8 protocol; 709bd4aed0eSJiong Wang __u32 vip_num; 710bd4aed0eSJiong Wang int action; 711bd4aed0eSJiong Wang 712bd4aed0eSJiong Wang if (is_ipv6) 713bd4aed0eSJiong Wang action = process_l3_headers_v6(&pckt, &protocol, off, 714bd4aed0eSJiong Wang &pkt_bytes, data, data_end); 715bd4aed0eSJiong Wang else 716bd4aed0eSJiong Wang action = process_l3_headers_v4(&pckt, &protocol, off, 717bd4aed0eSJiong Wang &pkt_bytes, data, data_end); 718bd4aed0eSJiong Wang if (action >= 0) 719bd4aed0eSJiong Wang return action; 720bd4aed0eSJiong Wang protocol = pckt.flow.proto; 721bd4aed0eSJiong Wang if (protocol == IPPROTO_TCP) { 722bd4aed0eSJiong Wang if (!parse_tcp(data, data_end, is_ipv6, &pckt)) 723bd4aed0eSJiong Wang return XDP_DROP; 724bd4aed0eSJiong Wang } else if (protocol == IPPROTO_UDP) { 725bd4aed0eSJiong Wang if (!parse_udp(data, data_end, is_ipv6, &pckt)) 726bd4aed0eSJiong Wang return XDP_DROP; 727bd4aed0eSJiong Wang } else { 728bd4aed0eSJiong Wang return XDP_TX; 729bd4aed0eSJiong Wang } 730bd4aed0eSJiong Wang 731bd4aed0eSJiong Wang if (is_ipv6) 732bd4aed0eSJiong Wang memcpy(vip.vipv6, pckt.flow.dstv6, 16); 733bd4aed0eSJiong Wang else 734bd4aed0eSJiong Wang vip.vip = pckt.flow.dst; 735bd4aed0eSJiong Wang vip.port = pckt.flow.port16[1]; 736bd4aed0eSJiong Wang vip.proto = pckt.flow.proto; 737bd4aed0eSJiong Wang vip_info = bpf_map_lookup_elem(&vip_map, &vip); 738bd4aed0eSJiong Wang if (!vip_info) { 739bd4aed0eSJiong Wang vip.port = 0; 740bd4aed0eSJiong Wang vip_info = bpf_map_lookup_elem(&vip_map, &vip); 741bd4aed0eSJiong Wang if (!vip_info) 742bd4aed0eSJiong Wang return XDP_PASS; 743bd4aed0eSJiong Wang if (!(vip_info->flags & (1 << 4))) 744bd4aed0eSJiong Wang pckt.flow.port16[1] = 0; 745bd4aed0eSJiong Wang } 746bd4aed0eSJiong Wang if (data_end - data > 1400) 747bd4aed0eSJiong Wang return XDP_DROP; 748bd4aed0eSJiong Wang data_stats = bpf_map_lookup_elem(&stats, &stats_key); 749bd4aed0eSJiong Wang if (!data_stats) 750bd4aed0eSJiong Wang return XDP_DROP; 751bd4aed0eSJiong Wang data_stats->v1 += 1; 752bd4aed0eSJiong Wang if (!dst) { 753bd4aed0eSJiong Wang if (vip_info->flags & (1 << 0)) 754bd4aed0eSJiong Wang pckt.flow.port16[0] = 0; 755bd4aed0eSJiong Wang if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1))) 756bd4aed0eSJiong Wang connection_table_lookup(&dst, &pckt, lru_map); 757bd4aed0eSJiong Wang if (dst) 758bd4aed0eSJiong Wang goto out; 759bd4aed0eSJiong Wang if (pckt.flow.proto == IPPROTO_TCP) { 760bd4aed0eSJiong Wang struct lb_stats *lru_stats = 761bd4aed0eSJiong Wang bpf_map_lookup_elem(&stats, &lru_stats_key); 762bd4aed0eSJiong Wang 763bd4aed0eSJiong Wang if (!lru_stats) 764bd4aed0eSJiong Wang return XDP_DROP; 765bd4aed0eSJiong Wang if (pckt.flags & (1 << 1)) 766bd4aed0eSJiong Wang lru_stats->v1 += 1; 767bd4aed0eSJiong Wang else 768bd4aed0eSJiong Wang lru_stats->v2 += 1; 769bd4aed0eSJiong Wang } 770bd4aed0eSJiong Wang if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map)) 771bd4aed0eSJiong Wang return XDP_DROP; 772bd4aed0eSJiong Wang data_stats->v2 += 1; 773bd4aed0eSJiong Wang } 774bd4aed0eSJiong Wang out: 775bd4aed0eSJiong Wang cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos); 776bd4aed0eSJiong Wang if (!cval) 777bd4aed0eSJiong Wang return XDP_DROP; 778bd4aed0eSJiong Wang if (dst->flags & (1 << 0)) { 779bd4aed0eSJiong Wang if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes)) 780bd4aed0eSJiong Wang return XDP_DROP; 781bd4aed0eSJiong Wang } else { 782bd4aed0eSJiong Wang if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes)) 783bd4aed0eSJiong Wang return XDP_DROP; 784bd4aed0eSJiong Wang } 785bd4aed0eSJiong Wang vip_num = vip_info->vip_num; 786bd4aed0eSJiong Wang data_stats = bpf_map_lookup_elem(&stats, &vip_num); 787bd4aed0eSJiong Wang if (!data_stats) 788bd4aed0eSJiong Wang return XDP_DROP; 789bd4aed0eSJiong Wang data_stats->v1 += 1; 790bd4aed0eSJiong Wang data_stats->v2 += pkt_bytes; 791bd4aed0eSJiong Wang 792bd4aed0eSJiong Wang data = (void *)(long)xdp->data; 793bd4aed0eSJiong Wang data_end = (void *)(long)xdp->data_end; 794bd4aed0eSJiong Wang if (data + 4 > data_end) 795bd4aed0eSJiong Wang return XDP_DROP; 796bd4aed0eSJiong Wang *(u32 *)data = dst->dst; 797bd4aed0eSJiong Wang return XDP_DROP; 798bd4aed0eSJiong Wang } 799bd4aed0eSJiong Wang 800bd4aed0eSJiong Wang __attribute__ ((section("xdp-test"), used)) 801bd4aed0eSJiong Wang int balancer_ingress(struct xdp_md *ctx) 802bd4aed0eSJiong Wang { 803bd4aed0eSJiong Wang void *data = (void *)(long)ctx->data; 804bd4aed0eSJiong Wang void *data_end = (void *)(long)ctx->data_end; 805bd4aed0eSJiong Wang struct eth_hdr *eth = data; 806bd4aed0eSJiong Wang __u32 eth_proto; 807bd4aed0eSJiong Wang __u32 nh_off; 808bd4aed0eSJiong Wang 809bd4aed0eSJiong Wang nh_off = sizeof(struct eth_hdr); 810bd4aed0eSJiong Wang if (data + nh_off > data_end) 811bd4aed0eSJiong Wang return XDP_DROP; 81259fd3486SIlya Leoshkevich eth_proto = bpf_ntohs(eth->eth_proto); 81359fd3486SIlya Leoshkevich if (eth_proto == ETH_P_IP) 814bd4aed0eSJiong Wang return process_packet(data, nh_off, data_end, 0, ctx); 81559fd3486SIlya Leoshkevich else if (eth_proto == ETH_P_IPV6) 816bd4aed0eSJiong Wang return process_packet(data, nh_off, data_end, 1, ctx); 817bd4aed0eSJiong Wang else 818bd4aed0eSJiong Wang return XDP_DROP; 819bd4aed0eSJiong Wang } 820bd4aed0eSJiong Wang 821bd4aed0eSJiong Wang char _license[] __attribute__ ((section("license"), used)) = "GPL"; 822bd4aed0eSJiong Wang int _version __attribute__ ((section("version"), used)) = 1; 823