1 // SPDX-License-Identifier: GPL-2.0
2 #include <string.h>
3 #include <linux/tcp.h>
4 #include <linux/bpf.h>
5 #include <netinet/in.h>
6 #include <bpf/bpf_helpers.h>
7 
8 char _license[] SEC("license") = "GPL";
9 
10 int page_size = 0; /* userspace should set it */
11 
12 #ifndef SOL_TCP
13 #define SOL_TCP IPPROTO_TCP
14 #endif
15 
16 #define SOL_CUSTOM			0xdeadbeef
17 
18 struct sockopt_sk {
19 	__u8 val;
20 };
21 
22 struct {
23 	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
24 	__uint(map_flags, BPF_F_NO_PREALLOC);
25 	__type(key, int);
26 	__type(value, struct sockopt_sk);
27 } socket_storage_map SEC(".maps");
28 
29 SEC("cgroup/getsockopt")
30 int _getsockopt(struct bpf_sockopt *ctx)
31 {
32 	__u8 *optval_end = ctx->optval_end;
33 	__u8 *optval = ctx->optval;
34 	struct sockopt_sk *storage;
35 
36 	if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
37 		/* Not interested in SOL_IP:IP_TOS;
38 		 * let next BPF program in the cgroup chain or kernel
39 		 * handle it.
40 		 */
41 		ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
42 		return 1;
43 	}
44 
45 	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
46 		/* Not interested in SOL_SOCKET:SO_SNDBUF;
47 		 * let next BPF program in the cgroup chain or kernel
48 		 * handle it.
49 		 */
50 		return 1;
51 	}
52 
53 	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
54 		/* Not interested in SOL_TCP:TCP_CONGESTION;
55 		 * let next BPF program in the cgroup chain or kernel
56 		 * handle it.
57 		 */
58 		return 1;
59 	}
60 
61 	if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) {
62 		/* Verify that TCP_ZEROCOPY_RECEIVE triggers.
63 		 * It has a custom implementation for performance
64 		 * reasons.
65 		 */
66 
67 		if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end)
68 			return 0; /* EPERM, bounds check */
69 
70 		if (((struct tcp_zerocopy_receive *)optval)->address != 0)
71 			return 0; /* EPERM, unexpected data */
72 
73 		return 1;
74 	}
75 
76 	if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
77 		if (optval + 1 > optval_end)
78 			return 0; /* EPERM, bounds check */
79 
80 		ctx->retval = 0; /* Reset system call return value to zero */
81 
82 		/* Always export 0x55 */
83 		optval[0] = 0x55;
84 		ctx->optlen = 1;
85 
86 		/* Userspace buffer is PAGE_SIZE * 2, but BPF
87 		 * program can only see the first PAGE_SIZE
88 		 * bytes of data.
89 		 */
90 		if (optval_end - optval != page_size)
91 			return 0; /* EPERM, unexpected data size */
92 
93 		return 1;
94 	}
95 
96 	if (ctx->level != SOL_CUSTOM)
97 		return 0; /* EPERM, deny everything except custom level */
98 
99 	if (optval + 1 > optval_end)
100 		return 0; /* EPERM, bounds check */
101 
102 	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
103 				     BPF_SK_STORAGE_GET_F_CREATE);
104 	if (!storage)
105 		return 0; /* EPERM, couldn't get sk storage */
106 
107 	if (!ctx->retval)
108 		return 0; /* EPERM, kernel should not have handled
109 			   * SOL_CUSTOM, something is wrong!
110 			   */
111 	ctx->retval = 0; /* Reset system call return value to zero */
112 
113 	optval[0] = storage->val;
114 	ctx->optlen = 1;
115 
116 	return 1;
117 }
118 
119 SEC("cgroup/setsockopt")
120 int _setsockopt(struct bpf_sockopt *ctx)
121 {
122 	__u8 *optval_end = ctx->optval_end;
123 	__u8 *optval = ctx->optval;
124 	struct sockopt_sk *storage;
125 
126 	if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
127 		/* Not interested in SOL_IP:IP_TOS;
128 		 * let next BPF program in the cgroup chain or kernel
129 		 * handle it.
130 		 */
131 		ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
132 		return 1;
133 	}
134 
135 	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
136 		/* Overwrite SO_SNDBUF value */
137 
138 		if (optval + sizeof(__u32) > optval_end)
139 			return 0; /* EPERM, bounds check */
140 
141 		*(__u32 *)optval = 0x55AA;
142 		ctx->optlen = 4;
143 
144 		return 1;
145 	}
146 
147 	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
148 		/* Always use cubic */
149 
150 		if (optval + 5 > optval_end)
151 			return 0; /* EPERM, bounds check */
152 
153 		memcpy(optval, "cubic", 5);
154 		ctx->optlen = 5;
155 
156 		return 1;
157 	}
158 
159 	if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
160 		/* Original optlen is larger than PAGE_SIZE. */
161 		if (ctx->optlen != page_size * 2)
162 			return 0; /* EPERM, unexpected data size */
163 
164 		if (optval + 1 > optval_end)
165 			return 0; /* EPERM, bounds check */
166 
167 		/* Make sure we can trim the buffer. */
168 		optval[0] = 0;
169 		ctx->optlen = 1;
170 
171 		/* Usepace buffer is PAGE_SIZE * 2, but BPF
172 		 * program can only see the first PAGE_SIZE
173 		 * bytes of data.
174 		 */
175 		if (optval_end - optval != page_size)
176 			return 0; /* EPERM, unexpected data size */
177 
178 		return 1;
179 	}
180 
181 	if (ctx->level != SOL_CUSTOM)
182 		return 0; /* EPERM, deny everything except custom level */
183 
184 	if (optval + 1 > optval_end)
185 		return 0; /* EPERM, bounds check */
186 
187 	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
188 				     BPF_SK_STORAGE_GET_F_CREATE);
189 	if (!storage)
190 		return 0; /* EPERM, couldn't get sk storage */
191 
192 	storage->val = optval[0];
193 	ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
194 			   * setsockopt handler.
195 			   */
196 
197 	return 1;
198 }
199