1*c9d0bdefSKuniyuki Iwashima // SPDX-License-Identifier: GPL-2.0
2*c9d0bdefSKuniyuki Iwashima /*
3*c9d0bdefSKuniyuki Iwashima  * Check if we can migrate child sockets.
4*c9d0bdefSKuniyuki Iwashima  *
5*c9d0bdefSKuniyuki Iwashima  *   1. If reuse_md->migrating_sk is NULL (SYN packet),
6*c9d0bdefSKuniyuki Iwashima  *        return SK_PASS without selecting a listener.
7*c9d0bdefSKuniyuki Iwashima  *   2. If reuse_md->migrating_sk is not NULL (socket migration),
8*c9d0bdefSKuniyuki Iwashima  *        select a listener (reuseport_map[migrate_map[cookie]])
9*c9d0bdefSKuniyuki Iwashima  *
10*c9d0bdefSKuniyuki Iwashima  * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
11*c9d0bdefSKuniyuki Iwashima  */
12*c9d0bdefSKuniyuki Iwashima 
13*c9d0bdefSKuniyuki Iwashima #include <stddef.h>
14*c9d0bdefSKuniyuki Iwashima #include <string.h>
15*c9d0bdefSKuniyuki Iwashima #include <linux/bpf.h>
16*c9d0bdefSKuniyuki Iwashima #include <linux/if_ether.h>
17*c9d0bdefSKuniyuki Iwashima #include <linux/ip.h>
18*c9d0bdefSKuniyuki Iwashima #include <linux/ipv6.h>
19*c9d0bdefSKuniyuki Iwashima #include <linux/tcp.h>
20*c9d0bdefSKuniyuki Iwashima #include <linux/in.h>
21*c9d0bdefSKuniyuki Iwashima #include <bpf/bpf_endian.h>
22*c9d0bdefSKuniyuki Iwashima #include <bpf/bpf_helpers.h>
23*c9d0bdefSKuniyuki Iwashima 
24*c9d0bdefSKuniyuki Iwashima struct {
25*c9d0bdefSKuniyuki Iwashima 	__uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
26*c9d0bdefSKuniyuki Iwashima 	__uint(max_entries, 256);
27*c9d0bdefSKuniyuki Iwashima 	__type(key, int);
28*c9d0bdefSKuniyuki Iwashima 	__type(value, __u64);
29*c9d0bdefSKuniyuki Iwashima } reuseport_map SEC(".maps");
30*c9d0bdefSKuniyuki Iwashima 
31*c9d0bdefSKuniyuki Iwashima struct {
32*c9d0bdefSKuniyuki Iwashima 	__uint(type, BPF_MAP_TYPE_HASH);
33*c9d0bdefSKuniyuki Iwashima 	__uint(max_entries, 256);
34*c9d0bdefSKuniyuki Iwashima 	__type(key, __u64);
35*c9d0bdefSKuniyuki Iwashima 	__type(value, int);
36*c9d0bdefSKuniyuki Iwashima } migrate_map SEC(".maps");
37*c9d0bdefSKuniyuki Iwashima 
38*c9d0bdefSKuniyuki Iwashima int migrated_at_close = 0;
39*c9d0bdefSKuniyuki Iwashima int migrated_at_close_fastopen = 0;
40*c9d0bdefSKuniyuki Iwashima int migrated_at_send_synack = 0;
41*c9d0bdefSKuniyuki Iwashima int migrated_at_recv_ack = 0;
42*c9d0bdefSKuniyuki Iwashima __be16 server_port;
43*c9d0bdefSKuniyuki Iwashima 
44*c9d0bdefSKuniyuki Iwashima SEC("xdp")
drop_ack(struct xdp_md * xdp)45*c9d0bdefSKuniyuki Iwashima int drop_ack(struct xdp_md *xdp)
46*c9d0bdefSKuniyuki Iwashima {
47*c9d0bdefSKuniyuki Iwashima 	void *data_end = (void *)(long)xdp->data_end;
48*c9d0bdefSKuniyuki Iwashima 	void *data = (void *)(long)xdp->data;
49*c9d0bdefSKuniyuki Iwashima 	struct ethhdr *eth = data;
50*c9d0bdefSKuniyuki Iwashima 	struct tcphdr *tcp = NULL;
51*c9d0bdefSKuniyuki Iwashima 
52*c9d0bdefSKuniyuki Iwashima 	if (eth + 1 > data_end)
53*c9d0bdefSKuniyuki Iwashima 		goto pass;
54*c9d0bdefSKuniyuki Iwashima 
55*c9d0bdefSKuniyuki Iwashima 	switch (bpf_ntohs(eth->h_proto)) {
56*c9d0bdefSKuniyuki Iwashima 	case ETH_P_IP: {
57*c9d0bdefSKuniyuki Iwashima 		struct iphdr *ip = (struct iphdr *)(eth + 1);
58*c9d0bdefSKuniyuki Iwashima 
59*c9d0bdefSKuniyuki Iwashima 		if (ip + 1 > data_end)
60*c9d0bdefSKuniyuki Iwashima 			goto pass;
61*c9d0bdefSKuniyuki Iwashima 
62*c9d0bdefSKuniyuki Iwashima 		if (ip->protocol != IPPROTO_TCP)
63*c9d0bdefSKuniyuki Iwashima 			goto pass;
64*c9d0bdefSKuniyuki Iwashima 
65*c9d0bdefSKuniyuki Iwashima 		tcp = (struct tcphdr *)((void *)ip + ip->ihl * 4);
66*c9d0bdefSKuniyuki Iwashima 		break;
67*c9d0bdefSKuniyuki Iwashima 	}
68*c9d0bdefSKuniyuki Iwashima 	case ETH_P_IPV6: {
69*c9d0bdefSKuniyuki Iwashima 		struct ipv6hdr *ipv6 = (struct ipv6hdr *)(eth + 1);
70*c9d0bdefSKuniyuki Iwashima 
71*c9d0bdefSKuniyuki Iwashima 		if (ipv6 + 1 > data_end)
72*c9d0bdefSKuniyuki Iwashima 			goto pass;
73*c9d0bdefSKuniyuki Iwashima 
74*c9d0bdefSKuniyuki Iwashima 		if (ipv6->nexthdr != IPPROTO_TCP)
75*c9d0bdefSKuniyuki Iwashima 			goto pass;
76*c9d0bdefSKuniyuki Iwashima 
77*c9d0bdefSKuniyuki Iwashima 		tcp = (struct tcphdr *)(ipv6 + 1);
78*c9d0bdefSKuniyuki Iwashima 		break;
79*c9d0bdefSKuniyuki Iwashima 	}
80*c9d0bdefSKuniyuki Iwashima 	default:
81*c9d0bdefSKuniyuki Iwashima 		goto pass;
82*c9d0bdefSKuniyuki Iwashima 	}
83*c9d0bdefSKuniyuki Iwashima 
84*c9d0bdefSKuniyuki Iwashima 	if (tcp + 1 > data_end)
85*c9d0bdefSKuniyuki Iwashima 		goto pass;
86*c9d0bdefSKuniyuki Iwashima 
87*c9d0bdefSKuniyuki Iwashima 	if (tcp->dest != server_port)
88*c9d0bdefSKuniyuki Iwashima 		goto pass;
89*c9d0bdefSKuniyuki Iwashima 
90*c9d0bdefSKuniyuki Iwashima 	if (!tcp->syn && tcp->ack)
91*c9d0bdefSKuniyuki Iwashima 		return XDP_DROP;
92*c9d0bdefSKuniyuki Iwashima 
93*c9d0bdefSKuniyuki Iwashima pass:
94*c9d0bdefSKuniyuki Iwashima 	return XDP_PASS;
95*c9d0bdefSKuniyuki Iwashima }
96*c9d0bdefSKuniyuki Iwashima 
97*c9d0bdefSKuniyuki Iwashima SEC("sk_reuseport/migrate")
migrate_reuseport(struct sk_reuseport_md * reuse_md)98*c9d0bdefSKuniyuki Iwashima int migrate_reuseport(struct sk_reuseport_md *reuse_md)
99*c9d0bdefSKuniyuki Iwashima {
100*c9d0bdefSKuniyuki Iwashima 	int *key, flags = 0, state, err;
101*c9d0bdefSKuniyuki Iwashima 	__u64 cookie;
102*c9d0bdefSKuniyuki Iwashima 
103*c9d0bdefSKuniyuki Iwashima 	if (!reuse_md->migrating_sk)
104*c9d0bdefSKuniyuki Iwashima 		return SK_PASS;
105*c9d0bdefSKuniyuki Iwashima 
106*c9d0bdefSKuniyuki Iwashima 	state = reuse_md->migrating_sk->state;
107*c9d0bdefSKuniyuki Iwashima 	cookie = bpf_get_socket_cookie(reuse_md->sk);
108*c9d0bdefSKuniyuki Iwashima 
109*c9d0bdefSKuniyuki Iwashima 	key = bpf_map_lookup_elem(&migrate_map, &cookie);
110*c9d0bdefSKuniyuki Iwashima 	if (!key)
111*c9d0bdefSKuniyuki Iwashima 		return SK_DROP;
112*c9d0bdefSKuniyuki Iwashima 
113*c9d0bdefSKuniyuki Iwashima 	err = bpf_sk_select_reuseport(reuse_md, &reuseport_map, key, flags);
114*c9d0bdefSKuniyuki Iwashima 	if (err)
115*c9d0bdefSKuniyuki Iwashima 		return SK_PASS;
116*c9d0bdefSKuniyuki Iwashima 
117*c9d0bdefSKuniyuki Iwashima 	switch (state) {
118*c9d0bdefSKuniyuki Iwashima 	case BPF_TCP_ESTABLISHED:
119*c9d0bdefSKuniyuki Iwashima 		__sync_fetch_and_add(&migrated_at_close, 1);
120*c9d0bdefSKuniyuki Iwashima 		break;
121*c9d0bdefSKuniyuki Iwashima 	case BPF_TCP_SYN_RECV:
122*c9d0bdefSKuniyuki Iwashima 		__sync_fetch_and_add(&migrated_at_close_fastopen, 1);
123*c9d0bdefSKuniyuki Iwashima 		break;
124*c9d0bdefSKuniyuki Iwashima 	case BPF_TCP_NEW_SYN_RECV:
125*c9d0bdefSKuniyuki Iwashima 		if (!reuse_md->len)
126*c9d0bdefSKuniyuki Iwashima 			__sync_fetch_and_add(&migrated_at_send_synack, 1);
127*c9d0bdefSKuniyuki Iwashima 		else
128*c9d0bdefSKuniyuki Iwashima 			__sync_fetch_and_add(&migrated_at_recv_ack, 1);
129*c9d0bdefSKuniyuki Iwashima 		break;
130*c9d0bdefSKuniyuki Iwashima 	}
131*c9d0bdefSKuniyuki Iwashima 
132*c9d0bdefSKuniyuki Iwashima 	return SK_PASS;
133*c9d0bdefSKuniyuki Iwashima }
134*c9d0bdefSKuniyuki Iwashima 
135*c9d0bdefSKuniyuki Iwashima char _license[] SEC("license") = "GPL";
136