1 // SPDX-License-Identifier: GPL-2.0
2 #include <string.h>
3 #include <linux/tcp.h>
4 #include <linux/bpf.h>
5 #include <netinet/in.h>
6 #include <bpf/bpf_helpers.h>
7 
8 char _license[] SEC("license") = "GPL";
9 
10 #ifndef PAGE_SIZE
11 #define PAGE_SIZE 4096
12 #endif
13 
14 #ifndef SOL_TCP
15 #define SOL_TCP IPPROTO_TCP
16 #endif
17 
18 #define SOL_CUSTOM			0xdeadbeef
19 
20 struct sockopt_sk {
21 	__u8 val;
22 };
23 
24 struct {
25 	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
26 	__uint(map_flags, BPF_F_NO_PREALLOC);
27 	__type(key, int);
28 	__type(value, struct sockopt_sk);
29 } socket_storage_map SEC(".maps");
30 
31 SEC("cgroup/getsockopt")
32 int _getsockopt(struct bpf_sockopt *ctx)
33 {
34 	__u8 *optval_end = ctx->optval_end;
35 	__u8 *optval = ctx->optval;
36 	struct sockopt_sk *storage;
37 
38 	if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
39 		/* Not interested in SOL_IP:IP_TOS;
40 		 * let next BPF program in the cgroup chain or kernel
41 		 * handle it.
42 		 */
43 		ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
44 		return 1;
45 	}
46 
47 	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
48 		/* Not interested in SOL_SOCKET:SO_SNDBUF;
49 		 * let next BPF program in the cgroup chain or kernel
50 		 * handle it.
51 		 */
52 		return 1;
53 	}
54 
55 	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
56 		/* Not interested in SOL_TCP:TCP_CONGESTION;
57 		 * let next BPF program in the cgroup chain or kernel
58 		 * handle it.
59 		 */
60 		return 1;
61 	}
62 
63 	if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) {
64 		/* Verify that TCP_ZEROCOPY_RECEIVE triggers.
65 		 * It has a custom implementation for performance
66 		 * reasons.
67 		 */
68 
69 		if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end)
70 			return 0; /* EPERM, bounds check */
71 
72 		if (((struct tcp_zerocopy_receive *)optval)->address != 0)
73 			return 0; /* EPERM, unexpected data */
74 
75 		return 1;
76 	}
77 
78 	if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
79 		if (optval + 1 > optval_end)
80 			return 0; /* EPERM, bounds check */
81 
82 		ctx->retval = 0; /* Reset system call return value to zero */
83 
84 		/* Always export 0x55 */
85 		optval[0] = 0x55;
86 		ctx->optlen = 1;
87 
88 		/* Userspace buffer is PAGE_SIZE * 2, but BPF
89 		 * program can only see the first PAGE_SIZE
90 		 * bytes of data.
91 		 */
92 		if (optval_end - optval != PAGE_SIZE)
93 			return 0; /* EPERM, unexpected data size */
94 
95 		return 1;
96 	}
97 
98 	if (ctx->level != SOL_CUSTOM)
99 		return 0; /* EPERM, deny everything except custom level */
100 
101 	if (optval + 1 > optval_end)
102 		return 0; /* EPERM, bounds check */
103 
104 	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
105 				     BPF_SK_STORAGE_GET_F_CREATE);
106 	if (!storage)
107 		return 0; /* EPERM, couldn't get sk storage */
108 
109 	if (!ctx->retval)
110 		return 0; /* EPERM, kernel should not have handled
111 			   * SOL_CUSTOM, something is wrong!
112 			   */
113 	ctx->retval = 0; /* Reset system call return value to zero */
114 
115 	optval[0] = storage->val;
116 	ctx->optlen = 1;
117 
118 	return 1;
119 }
120 
121 SEC("cgroup/setsockopt")
122 int _setsockopt(struct bpf_sockopt *ctx)
123 {
124 	__u8 *optval_end = ctx->optval_end;
125 	__u8 *optval = ctx->optval;
126 	struct sockopt_sk *storage;
127 
128 	if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
129 		/* Not interested in SOL_IP:IP_TOS;
130 		 * let next BPF program in the cgroup chain or kernel
131 		 * handle it.
132 		 */
133 		ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
134 		return 1;
135 	}
136 
137 	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
138 		/* Overwrite SO_SNDBUF value */
139 
140 		if (optval + sizeof(__u32) > optval_end)
141 			return 0; /* EPERM, bounds check */
142 
143 		*(__u32 *)optval = 0x55AA;
144 		ctx->optlen = 4;
145 
146 		return 1;
147 	}
148 
149 	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
150 		/* Always use cubic */
151 
152 		if (optval + 5 > optval_end)
153 			return 0; /* EPERM, bounds check */
154 
155 		memcpy(optval, "cubic", 5);
156 		ctx->optlen = 5;
157 
158 		return 1;
159 	}
160 
161 	if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
162 		/* Original optlen is larger than PAGE_SIZE. */
163 		if (ctx->optlen != PAGE_SIZE * 2)
164 			return 0; /* EPERM, unexpected data size */
165 
166 		if (optval + 1 > optval_end)
167 			return 0; /* EPERM, bounds check */
168 
169 		/* Make sure we can trim the buffer. */
170 		optval[0] = 0;
171 		ctx->optlen = 1;
172 
173 		/* Usepace buffer is PAGE_SIZE * 2, but BPF
174 		 * program can only see the first PAGE_SIZE
175 		 * bytes of data.
176 		 */
177 		if (optval_end - optval != PAGE_SIZE)
178 			return 0; /* EPERM, unexpected data size */
179 
180 		return 1;
181 	}
182 
183 	if (ctx->level != SOL_CUSTOM)
184 		return 0; /* EPERM, deny everything except custom level */
185 
186 	if (optval + 1 > optval_end)
187 		return 0; /* EPERM, bounds check */
188 
189 	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
190 				     BPF_SK_STORAGE_GET_F_CREATE);
191 	if (!storage)
192 		return 0; /* EPERM, couldn't get sk storage */
193 
194 	storage->val = optval[0];
195 	ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
196 			   * setsockopt handler.
197 			   */
198 
199 	return 1;
200 }
201