1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2019 Cloudflare Ltd.
3 // Copyright (c) 2020 Isovalent, Inc.
4 
5 #include <stddef.h>
6 #include <stdbool.h>
7 #include <string.h>
8 #include <linux/bpf.h>
9 #include <linux/if_ether.h>
10 #include <linux/in.h>
11 #include <linux/ip.h>
12 #include <linux/ipv6.h>
13 #include <linux/pkt_cls.h>
14 #include <linux/tcp.h>
15 #include <sys/socket.h>
16 #include <bpf/bpf_helpers.h>
17 #include <bpf/bpf_endian.h>
18 
19 /* Pin map under /sys/fs/bpf/tc/globals/<map name> */
20 #define PIN_GLOBAL_NS 2
21 
22 /* Must match struct bpf_elf_map layout from iproute2 */
23 struct {
24 	__u32 type;
25 	__u32 size_key;
26 	__u32 size_value;
27 	__u32 max_elem;
28 	__u32 flags;
29 	__u32 id;
30 	__u32 pinning;
31 } server_map SEC("maps") = {
32 	.type = BPF_MAP_TYPE_SOCKMAP,
33 	.size_key = sizeof(int),
34 	.size_value  = sizeof(__u64),
35 	.max_elem = 1,
36 	.pinning = PIN_GLOBAL_NS,
37 };
38 
39 char _license[] SEC("license") = "GPL";
40 
41 /* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
42 static inline struct bpf_sock_tuple *
43 get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp)
44 {
45 	void *data_end = (void *)(long)skb->data_end;
46 	void *data = (void *)(long)skb->data;
47 	struct bpf_sock_tuple *result;
48 	struct ethhdr *eth;
49 	__u64 tuple_len;
50 	__u8 proto = 0;
51 	__u64 ihl_len;
52 
53 	eth = (struct ethhdr *)(data);
54 	if (eth + 1 > data_end)
55 		return NULL;
56 
57 	if (eth->h_proto == bpf_htons(ETH_P_IP)) {
58 		struct iphdr *iph = (struct iphdr *)(data + sizeof(*eth));
59 
60 		if (iph + 1 > data_end)
61 			return NULL;
62 		if (iph->ihl != 5)
63 			/* Options are not supported */
64 			return NULL;
65 		ihl_len = iph->ihl * 4;
66 		proto = iph->protocol;
67 		*ipv4 = true;
68 		result = (struct bpf_sock_tuple *)&iph->saddr;
69 	} else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
70 		struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + sizeof(*eth));
71 
72 		if (ip6h + 1 > data_end)
73 			return NULL;
74 		ihl_len = sizeof(*ip6h);
75 		proto = ip6h->nexthdr;
76 		*ipv4 = false;
77 		result = (struct bpf_sock_tuple *)&ip6h->saddr;
78 	} else {
79 		return (struct bpf_sock_tuple *)data;
80 	}
81 
82 	if (proto != IPPROTO_TCP && proto != IPPROTO_UDP)
83 		return NULL;
84 
85 	*tcp = (proto == IPPROTO_TCP);
86 	return result;
87 }
88 
89 static inline int
90 handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
91 {
92 	struct bpf_sock_tuple ln = {0};
93 	struct bpf_sock *sk;
94 	const int zero = 0;
95 	size_t tuple_len;
96 	__be16 dport;
97 	int ret;
98 
99 	tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
100 	if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
101 		return TC_ACT_SHOT;
102 
103 	sk = bpf_sk_lookup_udp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
104 	if (sk)
105 		goto assign;
106 
107 	dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport;
108 	if (dport != bpf_htons(4321))
109 		return TC_ACT_OK;
110 
111 	sk = bpf_map_lookup_elem(&server_map, &zero);
112 	if (!sk)
113 		return TC_ACT_SHOT;
114 
115 assign:
116 	ret = bpf_sk_assign(skb, sk, 0);
117 	bpf_sk_release(sk);
118 	return ret;
119 }
120 
121 static inline int
122 handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
123 {
124 	struct bpf_sock_tuple ln = {0};
125 	struct bpf_sock *sk;
126 	const int zero = 0;
127 	size_t tuple_len;
128 	__be16 dport;
129 	int ret;
130 
131 	tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
132 	if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
133 		return TC_ACT_SHOT;
134 
135 	sk = bpf_skc_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
136 	if (sk) {
137 		if (sk->state != BPF_TCP_LISTEN)
138 			goto assign;
139 		bpf_sk_release(sk);
140 	}
141 
142 	dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport;
143 	if (dport != bpf_htons(4321))
144 		return TC_ACT_OK;
145 
146 	sk = bpf_map_lookup_elem(&server_map, &zero);
147 	if (!sk)
148 		return TC_ACT_SHOT;
149 
150 	if (sk->state != BPF_TCP_LISTEN) {
151 		bpf_sk_release(sk);
152 		return TC_ACT_SHOT;
153 	}
154 
155 assign:
156 	ret = bpf_sk_assign(skb, sk, 0);
157 	bpf_sk_release(sk);
158 	return ret;
159 }
160 
161 SEC("tc")
162 int bpf_sk_assign_test(struct __sk_buff *skb)
163 {
164 	struct bpf_sock_tuple *tuple, ln = {0};
165 	bool ipv4 = false;
166 	bool tcp = false;
167 	int tuple_len;
168 	int ret = 0;
169 
170 	tuple = get_tuple(skb, &ipv4, &tcp);
171 	if (!tuple)
172 		return TC_ACT_SHOT;
173 
174 	/* Note that the verifier socket return type for bpf_skc_lookup_tcp()
175 	 * differs from bpf_sk_lookup_udp(), so even though the C-level type is
176 	 * the same here, if we try to share the implementations they will
177 	 * fail to verify because we're crossing pointer types.
178 	 */
179 	if (tcp)
180 		ret = handle_tcp(skb, tuple, ipv4);
181 	else
182 		ret = handle_udp(skb, tuple, ipv4);
183 
184 	return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT;
185 }
186