1 // SPDX-License-Identifier: GPL-2.0
2 #include <string.h>
3 #include <linux/tcp.h>
4 #include <linux/bpf.h>
5 #include <netinet/in.h>
6 #include <bpf/bpf_helpers.h>
7 
8 char _license[] SEC("license") = "GPL";
9 
10 int page_size = 0; /* userspace should set it */
11 
12 #ifndef SOL_TCP
13 #define SOL_TCP IPPROTO_TCP
14 #endif
15 
16 #define SOL_CUSTOM			0xdeadbeef
17 
18 struct sockopt_sk {
19 	__u8 val;
20 };
21 
22 struct {
23 	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
24 	__uint(map_flags, BPF_F_NO_PREALLOC);
25 	__type(key, int);
26 	__type(value, struct sockopt_sk);
27 } socket_storage_map SEC(".maps");
28 
29 SEC("cgroup/getsockopt")
30 int _getsockopt(struct bpf_sockopt *ctx)
31 {
32 	__u8 *optval_end = ctx->optval_end;
33 	__u8 *optval = ctx->optval;
34 	struct sockopt_sk *storage;
35 	struct bpf_sock *sk;
36 
37 	/* Bypass AF_NETLINK. */
38 	sk = ctx->sk;
39 	if (sk && sk->family == AF_NETLINK)
40 		return 1;
41 
42 	/* Make sure bpf_get_netns_cookie is callable.
43 	 */
44 	if (bpf_get_netns_cookie(NULL) == 0)
45 		return 0;
46 
47 	if (bpf_get_netns_cookie(ctx) == 0)
48 		return 0;
49 
50 	if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
51 		/* Not interested in SOL_IP:IP_TOS;
52 		 * let next BPF program in the cgroup chain or kernel
53 		 * handle it.
54 		 */
55 		ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
56 		return 1;
57 	}
58 
59 	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
60 		/* Not interested in SOL_SOCKET:SO_SNDBUF;
61 		 * let next BPF program in the cgroup chain or kernel
62 		 * handle it.
63 		 */
64 		return 1;
65 	}
66 
67 	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
68 		/* Not interested in SOL_TCP:TCP_CONGESTION;
69 		 * let next BPF program in the cgroup chain or kernel
70 		 * handle it.
71 		 */
72 		return 1;
73 	}
74 
75 	if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) {
76 		/* Verify that TCP_ZEROCOPY_RECEIVE triggers.
77 		 * It has a custom implementation for performance
78 		 * reasons.
79 		 */
80 
81 		/* Check that optval contains address (__u64) */
82 		if (optval + sizeof(__u64) > optval_end)
83 			return 0; /* bounds check */
84 
85 		if (((struct tcp_zerocopy_receive *)optval)->address != 0)
86 			return 0; /* unexpected data */
87 
88 		return 1;
89 	}
90 
91 	if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
92 		if (optval + 1 > optval_end)
93 			return 0; /* bounds check */
94 
95 		ctx->retval = 0; /* Reset system call return value to zero */
96 
97 		/* Always export 0x55 */
98 		optval[0] = 0x55;
99 		ctx->optlen = 1;
100 
101 		/* Userspace buffer is PAGE_SIZE * 2, but BPF
102 		 * program can only see the first PAGE_SIZE
103 		 * bytes of data.
104 		 */
105 		if (optval_end - optval != page_size)
106 			return 0; /* unexpected data size */
107 
108 		return 1;
109 	}
110 
111 	if (ctx->level != SOL_CUSTOM)
112 		return 0; /* deny everything except custom level */
113 
114 	if (optval + 1 > optval_end)
115 		return 0; /* bounds check */
116 
117 	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
118 				     BPF_SK_STORAGE_GET_F_CREATE);
119 	if (!storage)
120 		return 0; /* couldn't get sk storage */
121 
122 	if (!ctx->retval)
123 		return 0; /* kernel should not have handled
124 			   * SOL_CUSTOM, something is wrong!
125 			   */
126 	ctx->retval = 0; /* Reset system call return value to zero */
127 
128 	optval[0] = storage->val;
129 	ctx->optlen = 1;
130 
131 	return 1;
132 }
133 
134 SEC("cgroup/setsockopt")
135 int _setsockopt(struct bpf_sockopt *ctx)
136 {
137 	__u8 *optval_end = ctx->optval_end;
138 	__u8 *optval = ctx->optval;
139 	struct sockopt_sk *storage;
140 	struct bpf_sock *sk;
141 
142 	/* Bypass AF_NETLINK. */
143 	sk = ctx->sk;
144 	if (sk && sk->family == AF_NETLINK)
145 		return 1;
146 
147 	/* Make sure bpf_get_netns_cookie is callable.
148 	 */
149 	if (bpf_get_netns_cookie(NULL) == 0)
150 		return 0;
151 
152 	if (bpf_get_netns_cookie(ctx) == 0)
153 		return 0;
154 
155 	if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
156 		/* Not interested in SOL_IP:IP_TOS;
157 		 * let next BPF program in the cgroup chain or kernel
158 		 * handle it.
159 		 */
160 		ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
161 		return 1;
162 	}
163 
164 	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
165 		/* Overwrite SO_SNDBUF value */
166 
167 		if (optval + sizeof(__u32) > optval_end)
168 			return 0; /* bounds check */
169 
170 		*(__u32 *)optval = 0x55AA;
171 		ctx->optlen = 4;
172 
173 		return 1;
174 	}
175 
176 	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
177 		/* Always use cubic */
178 
179 		if (optval + 5 > optval_end)
180 			return 0; /* bounds check */
181 
182 		memcpy(optval, "cubic", 5);
183 		ctx->optlen = 5;
184 
185 		return 1;
186 	}
187 
188 	if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
189 		/* Original optlen is larger than PAGE_SIZE. */
190 		if (ctx->optlen != page_size * 2)
191 			return 0; /* unexpected data size */
192 
193 		if (optval + 1 > optval_end)
194 			return 0; /* bounds check */
195 
196 		/* Make sure we can trim the buffer. */
197 		optval[0] = 0;
198 		ctx->optlen = 1;
199 
200 		/* Usepace buffer is PAGE_SIZE * 2, but BPF
201 		 * program can only see the first PAGE_SIZE
202 		 * bytes of data.
203 		 */
204 		if (optval_end - optval != page_size)
205 			return 0; /* unexpected data size */
206 
207 		return 1;
208 	}
209 
210 	if (ctx->level != SOL_CUSTOM)
211 		return 0; /* deny everything except custom level */
212 
213 	if (optval + 1 > optval_end)
214 		return 0; /* bounds check */
215 
216 	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
217 				     BPF_SK_STORAGE_GET_F_CREATE);
218 	if (!storage)
219 		return 0; /* couldn't get sk storage */
220 
221 	storage->val = optval[0];
222 	ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
223 			   * setsockopt handler.
224 			   */
225 
226 	return 1;
227 }
228