1 // SPDX-License-Identifier: GPL-2.0
2 #include <string.h>
3 #include <linux/tcp.h>
4 #include <linux/bpf.h>
5 #include <netinet/in.h>
6 #include <bpf/bpf_helpers.h>
7 
8 char _license[] SEC("license") = "GPL";
9 __u32 _version SEC("version") = 1;
10 
11 #ifndef PAGE_SIZE
12 #define PAGE_SIZE 4096
13 #endif
14 
15 #ifndef SOL_TCP
16 #define SOL_TCP IPPROTO_TCP
17 #endif
18 
19 #define SOL_CUSTOM			0xdeadbeef
20 
21 struct sockopt_sk {
22 	__u8 val;
23 };
24 
25 struct {
26 	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
27 	__uint(map_flags, BPF_F_NO_PREALLOC);
28 	__type(key, int);
29 	__type(value, struct sockopt_sk);
30 } socket_storage_map SEC(".maps");
31 
32 SEC("cgroup/getsockopt")
33 int _getsockopt(struct bpf_sockopt *ctx)
34 {
35 	__u8 *optval_end = ctx->optval_end;
36 	__u8 *optval = ctx->optval;
37 	struct sockopt_sk *storage;
38 
39 	if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
40 		/* Not interested in SOL_IP:IP_TOS;
41 		 * let next BPF program in the cgroup chain or kernel
42 		 * handle it.
43 		 */
44 		ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
45 		return 1;
46 	}
47 
48 	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
49 		/* Not interested in SOL_SOCKET:SO_SNDBUF;
50 		 * let next BPF program in the cgroup chain or kernel
51 		 * handle it.
52 		 */
53 		return 1;
54 	}
55 
56 	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
57 		/* Not interested in SOL_TCP:TCP_CONGESTION;
58 		 * let next BPF program in the cgroup chain or kernel
59 		 * handle it.
60 		 */
61 		return 1;
62 	}
63 
64 	if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) {
65 		/* Verify that TCP_ZEROCOPY_RECEIVE triggers.
66 		 * It has a custom implementation for performance
67 		 * reasons.
68 		 */
69 
70 		if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end)
71 			return 0; /* EPERM, bounds check */
72 
73 		if (((struct tcp_zerocopy_receive *)optval)->address != 0)
74 			return 0; /* EPERM, unexpected data */
75 
76 		return 1;
77 	}
78 
79 	if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
80 		if (optval + 1 > optval_end)
81 			return 0; /* EPERM, bounds check */
82 
83 		ctx->retval = 0; /* Reset system call return value to zero */
84 
85 		/* Always export 0x55 */
86 		optval[0] = 0x55;
87 		ctx->optlen = 1;
88 
89 		/* Userspace buffer is PAGE_SIZE * 2, but BPF
90 		 * program can only see the first PAGE_SIZE
91 		 * bytes of data.
92 		 */
93 		if (optval_end - optval != PAGE_SIZE)
94 			return 0; /* EPERM, unexpected data size */
95 
96 		return 1;
97 	}
98 
99 	if (ctx->level != SOL_CUSTOM)
100 		return 0; /* EPERM, deny everything except custom level */
101 
102 	if (optval + 1 > optval_end)
103 		return 0; /* EPERM, bounds check */
104 
105 	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
106 				     BPF_SK_STORAGE_GET_F_CREATE);
107 	if (!storage)
108 		return 0; /* EPERM, couldn't get sk storage */
109 
110 	if (!ctx->retval)
111 		return 0; /* EPERM, kernel should not have handled
112 			   * SOL_CUSTOM, something is wrong!
113 			   */
114 	ctx->retval = 0; /* Reset system call return value to zero */
115 
116 	optval[0] = storage->val;
117 	ctx->optlen = 1;
118 
119 	return 1;
120 }
121 
122 SEC("cgroup/setsockopt")
123 int _setsockopt(struct bpf_sockopt *ctx)
124 {
125 	__u8 *optval_end = ctx->optval_end;
126 	__u8 *optval = ctx->optval;
127 	struct sockopt_sk *storage;
128 
129 	if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
130 		/* Not interested in SOL_IP:IP_TOS;
131 		 * let next BPF program in the cgroup chain or kernel
132 		 * handle it.
133 		 */
134 		ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
135 		return 1;
136 	}
137 
138 	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
139 		/* Overwrite SO_SNDBUF value */
140 
141 		if (optval + sizeof(__u32) > optval_end)
142 			return 0; /* EPERM, bounds check */
143 
144 		*(__u32 *)optval = 0x55AA;
145 		ctx->optlen = 4;
146 
147 		return 1;
148 	}
149 
150 	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
151 		/* Always use cubic */
152 
153 		if (optval + 5 > optval_end)
154 			return 0; /* EPERM, bounds check */
155 
156 		memcpy(optval, "cubic", 5);
157 		ctx->optlen = 5;
158 
159 		return 1;
160 	}
161 
162 	if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
163 		/* Original optlen is larger than PAGE_SIZE. */
164 		if (ctx->optlen != PAGE_SIZE * 2)
165 			return 0; /* EPERM, unexpected data size */
166 
167 		if (optval + 1 > optval_end)
168 			return 0; /* EPERM, bounds check */
169 
170 		/* Make sure we can trim the buffer. */
171 		optval[0] = 0;
172 		ctx->optlen = 1;
173 
174 		/* Usepace buffer is PAGE_SIZE * 2, but BPF
175 		 * program can only see the first PAGE_SIZE
176 		 * bytes of data.
177 		 */
178 		if (optval_end - optval != PAGE_SIZE)
179 			return 0; /* EPERM, unexpected data size */
180 
181 		return 1;
182 	}
183 
184 	if (ctx->level != SOL_CUSTOM)
185 		return 0; /* EPERM, deny everything except custom level */
186 
187 	if (optval + 1 > optval_end)
188 		return 0; /* EPERM, bounds check */
189 
190 	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
191 				     BPF_SK_STORAGE_GET_F_CREATE);
192 	if (!storage)
193 		return 0; /* EPERM, couldn't get sk storage */
194 
195 	storage->val = optval[0];
196 	ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
197 			   * setsockopt handler.
198 			   */
199 
200 	return 1;
201 }
202