18a027dc0SStanislav Fomichev // SPDX-License-Identifier: GPL-2.0
2fd5ef31fSStanislav Fomichev #include <string.h>
39cacf81fSStanislav Fomichev #include <linux/tcp.h>
48a027dc0SStanislav Fomichev #include <linux/bpf.h>
59cacf81fSStanislav Fomichev #include <netinet/in.h>
63e689141SToke Høiland-Jørgensen #include <bpf/bpf_helpers.h>
78a027dc0SStanislav Fomichev 
88a027dc0SStanislav Fomichev char _license[] SEC("license") = "GPL";
98a027dc0SStanislav Fomichev 
10361d3202SYauheni Kaliuta int page_size = 0; /* userspace should set it */
11a0cb12b0SStanislav Fomichev 
129cacf81fSStanislav Fomichev #ifndef SOL_TCP
139cacf81fSStanislav Fomichev #define SOL_TCP IPPROTO_TCP
149cacf81fSStanislav Fomichev #endif
159cacf81fSStanislav Fomichev 
168a027dc0SStanislav Fomichev #define SOL_CUSTOM			0xdeadbeef
178a027dc0SStanislav Fomichev 
188a027dc0SStanislav Fomichev struct sockopt_sk {
198a027dc0SStanislav Fomichev 	__u8 val;
208a027dc0SStanislav Fomichev };
218a027dc0SStanislav Fomichev 
2236b5d471SAndrii Nakryiko struct {
2336b5d471SAndrii Nakryiko 	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
2436b5d471SAndrii Nakryiko 	__uint(map_flags, BPF_F_NO_PREALLOC);
2536b5d471SAndrii Nakryiko 	__type(key, int);
2636b5d471SAndrii Nakryiko 	__type(value, struct sockopt_sk);
2736b5d471SAndrii Nakryiko } socket_storage_map SEC(".maps");
288a027dc0SStanislav Fomichev 
298a027dc0SStanislav Fomichev SEC("cgroup/getsockopt")
_getsockopt(struct bpf_sockopt * ctx)308a027dc0SStanislav Fomichev int _getsockopt(struct bpf_sockopt *ctx)
318a027dc0SStanislav Fomichev {
328a027dc0SStanislav Fomichev 	__u8 *optval_end = ctx->optval_end;
338a027dc0SStanislav Fomichev 	__u8 *optval = ctx->optval;
348a027dc0SStanislav Fomichev 	struct sockopt_sk *storage;
35833d67ecSStanislav Fomichev 	struct bpf_sock *sk;
36833d67ecSStanislav Fomichev 
37833d67ecSStanislav Fomichev 	/* Bypass AF_NETLINK. */
38833d67ecSStanislav Fomichev 	sk = ctx->sk;
39833d67ecSStanislav Fomichev 	if (sk && sk->family == AF_NETLINK)
40*e01b4a72SStanislav Fomichev 		goto out;
418a027dc0SStanislav Fomichev 
426a3a3dccSStanislav Fomichev 	/* Make sure bpf_get_netns_cookie is callable.
436a3a3dccSStanislav Fomichev 	 */
446a3a3dccSStanislav Fomichev 	if (bpf_get_netns_cookie(NULL) == 0)
456a3a3dccSStanislav Fomichev 		return 0;
466a3a3dccSStanislav Fomichev 
476a3a3dccSStanislav Fomichev 	if (bpf_get_netns_cookie(ctx) == 0)
486a3a3dccSStanislav Fomichev 		return 0;
496a3a3dccSStanislav Fomichev 
50a0cb12b0SStanislav Fomichev 	if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
518a027dc0SStanislav Fomichev 		/* Not interested in SOL_IP:IP_TOS;
528a027dc0SStanislav Fomichev 		 * let next BPF program in the cgroup chain or kernel
538a027dc0SStanislav Fomichev 		 * handle it.
548a027dc0SStanislav Fomichev 		 */
55*e01b4a72SStanislav Fomichev 		goto out;
56a0cb12b0SStanislav Fomichev 	}
578a027dc0SStanislav Fomichev 
588a027dc0SStanislav Fomichev 	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
598a027dc0SStanislav Fomichev 		/* Not interested in SOL_SOCKET:SO_SNDBUF;
608a027dc0SStanislav Fomichev 		 * let next BPF program in the cgroup chain or kernel
618a027dc0SStanislav Fomichev 		 * handle it.
628a027dc0SStanislav Fomichev 		 */
63*e01b4a72SStanislav Fomichev 		goto out;
648a027dc0SStanislav Fomichev 	}
658a027dc0SStanislav Fomichev 
66fd5ef31fSStanislav Fomichev 	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
67fd5ef31fSStanislav Fomichev 		/* Not interested in SOL_TCP:TCP_CONGESTION;
68fd5ef31fSStanislav Fomichev 		 * let next BPF program in the cgroup chain or kernel
69fd5ef31fSStanislav Fomichev 		 * handle it.
70fd5ef31fSStanislav Fomichev 		 */
71*e01b4a72SStanislav Fomichev 		goto out;
72fd5ef31fSStanislav Fomichev 	}
73fd5ef31fSStanislav Fomichev 
749cacf81fSStanislav Fomichev 	if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) {
759cacf81fSStanislav Fomichev 		/* Verify that TCP_ZEROCOPY_RECEIVE triggers.
769cacf81fSStanislav Fomichev 		 * It has a custom implementation for performance
779cacf81fSStanislav Fomichev 		 * reasons.
789cacf81fSStanislav Fomichev 		 */
799cacf81fSStanislav Fomichev 
80fc1ca955SFelix Maurer 		/* Check that optval contains address (__u64) */
81fc1ca955SFelix Maurer 		if (optval + sizeof(__u64) > optval_end)
821080ef5cSYiFei Zhu 			return 0; /* bounds check */
839cacf81fSStanislav Fomichev 
849cacf81fSStanislav Fomichev 		if (((struct tcp_zerocopy_receive *)optval)->address != 0)
851080ef5cSYiFei Zhu 			return 0; /* unexpected data */
869cacf81fSStanislav Fomichev 
87*e01b4a72SStanislav Fomichev 		goto out;
889cacf81fSStanislav Fomichev 	}
899cacf81fSStanislav Fomichev 
90a0cb12b0SStanislav Fomichev 	if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
91a0cb12b0SStanislav Fomichev 		if (optval + 1 > optval_end)
921080ef5cSYiFei Zhu 			return 0; /* bounds check */
93a0cb12b0SStanislav Fomichev 
94a0cb12b0SStanislav Fomichev 		ctx->retval = 0; /* Reset system call return value to zero */
95a0cb12b0SStanislav Fomichev 
96a0cb12b0SStanislav Fomichev 		/* Always export 0x55 */
97a0cb12b0SStanislav Fomichev 		optval[0] = 0x55;
98a0cb12b0SStanislav Fomichev 		ctx->optlen = 1;
99a0cb12b0SStanislav Fomichev 
100a0cb12b0SStanislav Fomichev 		/* Userspace buffer is PAGE_SIZE * 2, but BPF
101a0cb12b0SStanislav Fomichev 		 * program can only see the first PAGE_SIZE
102a0cb12b0SStanislav Fomichev 		 * bytes of data.
103a0cb12b0SStanislav Fomichev 		 */
104361d3202SYauheni Kaliuta 		if (optval_end - optval != page_size)
1051080ef5cSYiFei Zhu 			return 0; /* unexpected data size */
106a0cb12b0SStanislav Fomichev 
107a0cb12b0SStanislav Fomichev 		return 1;
108a0cb12b0SStanislav Fomichev 	}
109a0cb12b0SStanislav Fomichev 
1108a027dc0SStanislav Fomichev 	if (ctx->level != SOL_CUSTOM)
1111080ef5cSYiFei Zhu 		return 0; /* deny everything except custom level */
1128a027dc0SStanislav Fomichev 
1138a027dc0SStanislav Fomichev 	if (optval + 1 > optval_end)
1141080ef5cSYiFei Zhu 		return 0; /* bounds check */
1158a027dc0SStanislav Fomichev 
1168a027dc0SStanislav Fomichev 	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
1178a027dc0SStanislav Fomichev 				     BPF_SK_STORAGE_GET_F_CREATE);
1188a027dc0SStanislav Fomichev 	if (!storage)
1191080ef5cSYiFei Zhu 		return 0; /* couldn't get sk storage */
1208a027dc0SStanislav Fomichev 
1218a027dc0SStanislav Fomichev 	if (!ctx->retval)
1221080ef5cSYiFei Zhu 		return 0; /* kernel should not have handled
1238a027dc0SStanislav Fomichev 			   * SOL_CUSTOM, something is wrong!
1248a027dc0SStanislav Fomichev 			   */
1258a027dc0SStanislav Fomichev 	ctx->retval = 0; /* Reset system call return value to zero */
1268a027dc0SStanislav Fomichev 
1278a027dc0SStanislav Fomichev 	optval[0] = storage->val;
1288a027dc0SStanislav Fomichev 	ctx->optlen = 1;
1298a027dc0SStanislav Fomichev 
1308a027dc0SStanislav Fomichev 	return 1;
131*e01b4a72SStanislav Fomichev 
132*e01b4a72SStanislav Fomichev out:
133*e01b4a72SStanislav Fomichev 	/* optval larger than PAGE_SIZE use kernel's buffer. */
134*e01b4a72SStanislav Fomichev 	if (ctx->optlen > page_size)
135*e01b4a72SStanislav Fomichev 		ctx->optlen = 0;
136*e01b4a72SStanislav Fomichev 	return 1;
1378a027dc0SStanislav Fomichev }
1388a027dc0SStanislav Fomichev 
1398a027dc0SStanislav Fomichev SEC("cgroup/setsockopt")
_setsockopt(struct bpf_sockopt * ctx)1408a027dc0SStanislav Fomichev int _setsockopt(struct bpf_sockopt *ctx)
1418a027dc0SStanislav Fomichev {
1428a027dc0SStanislav Fomichev 	__u8 *optval_end = ctx->optval_end;
1438a027dc0SStanislav Fomichev 	__u8 *optval = ctx->optval;
1448a027dc0SStanislav Fomichev 	struct sockopt_sk *storage;
145833d67ecSStanislav Fomichev 	struct bpf_sock *sk;
146833d67ecSStanislav Fomichev 
147833d67ecSStanislav Fomichev 	/* Bypass AF_NETLINK. */
148833d67ecSStanislav Fomichev 	sk = ctx->sk;
149833d67ecSStanislav Fomichev 	if (sk && sk->family == AF_NETLINK)
150*e01b4a72SStanislav Fomichev 		goto out;
1518a027dc0SStanislav Fomichev 
1526a3a3dccSStanislav Fomichev 	/* Make sure bpf_get_netns_cookie is callable.
1536a3a3dccSStanislav Fomichev 	 */
1546a3a3dccSStanislav Fomichev 	if (bpf_get_netns_cookie(NULL) == 0)
1556a3a3dccSStanislav Fomichev 		return 0;
1566a3a3dccSStanislav Fomichev 
1576a3a3dccSStanislav Fomichev 	if (bpf_get_netns_cookie(ctx) == 0)
1586a3a3dccSStanislav Fomichev 		return 0;
1596a3a3dccSStanislav Fomichev 
160a0cb12b0SStanislav Fomichev 	if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
1618a027dc0SStanislav Fomichev 		/* Not interested in SOL_IP:IP_TOS;
1628a027dc0SStanislav Fomichev 		 * let next BPF program in the cgroup chain or kernel
1638a027dc0SStanislav Fomichev 		 * handle it.
1648a027dc0SStanislav Fomichev 		 */
165a0cb12b0SStanislav Fomichev 		ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
1668a027dc0SStanislav Fomichev 		return 1;
167a0cb12b0SStanislav Fomichev 	}
1688a027dc0SStanislav Fomichev 
1698a027dc0SStanislav Fomichev 	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
1708a027dc0SStanislav Fomichev 		/* Overwrite SO_SNDBUF value */
1718a027dc0SStanislav Fomichev 
1728a027dc0SStanislav Fomichev 		if (optval + sizeof(__u32) > optval_end)
1731080ef5cSYiFei Zhu 			return 0; /* bounds check */
1748a027dc0SStanislav Fomichev 
1758a027dc0SStanislav Fomichev 		*(__u32 *)optval = 0x55AA;
1768a027dc0SStanislav Fomichev 		ctx->optlen = 4;
1778a027dc0SStanislav Fomichev 
1788a027dc0SStanislav Fomichev 		return 1;
1798a027dc0SStanislav Fomichev 	}
1808a027dc0SStanislav Fomichev 
181fd5ef31fSStanislav Fomichev 	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
182fd5ef31fSStanislav Fomichev 		/* Always use cubic */
183fd5ef31fSStanislav Fomichev 
184fd5ef31fSStanislav Fomichev 		if (optval + 5 > optval_end)
1851080ef5cSYiFei Zhu 			return 0; /* bounds check */
186fd5ef31fSStanislav Fomichev 
187fd5ef31fSStanislav Fomichev 		memcpy(optval, "cubic", 5);
188fd5ef31fSStanislav Fomichev 		ctx->optlen = 5;
189fd5ef31fSStanislav Fomichev 
190fd5ef31fSStanislav Fomichev 		return 1;
191fd5ef31fSStanislav Fomichev 	}
192fd5ef31fSStanislav Fomichev 
193a0cb12b0SStanislav Fomichev 	if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
194a0cb12b0SStanislav Fomichev 		/* Original optlen is larger than PAGE_SIZE. */
195361d3202SYauheni Kaliuta 		if (ctx->optlen != page_size * 2)
1961080ef5cSYiFei Zhu 			return 0; /* unexpected data size */
197a0cb12b0SStanislav Fomichev 
198a0cb12b0SStanislav Fomichev 		if (optval + 1 > optval_end)
1991080ef5cSYiFei Zhu 			return 0; /* bounds check */
200a0cb12b0SStanislav Fomichev 
201a0cb12b0SStanislav Fomichev 		/* Make sure we can trim the buffer. */
202a0cb12b0SStanislav Fomichev 		optval[0] = 0;
203a0cb12b0SStanislav Fomichev 		ctx->optlen = 1;
204a0cb12b0SStanislav Fomichev 
205a0cb12b0SStanislav Fomichev 		/* Usepace buffer is PAGE_SIZE * 2, but BPF
206a0cb12b0SStanislav Fomichev 		 * program can only see the first PAGE_SIZE
207a0cb12b0SStanislav Fomichev 		 * bytes of data.
208a0cb12b0SStanislav Fomichev 		 */
209361d3202SYauheni Kaliuta 		if (optval_end - optval != page_size)
2101080ef5cSYiFei Zhu 			return 0; /* unexpected data size */
211a0cb12b0SStanislav Fomichev 
212a0cb12b0SStanislav Fomichev 		return 1;
213a0cb12b0SStanislav Fomichev 	}
214a0cb12b0SStanislav Fomichev 
2158a027dc0SStanislav Fomichev 	if (ctx->level != SOL_CUSTOM)
2161080ef5cSYiFei Zhu 		return 0; /* deny everything except custom level */
2178a027dc0SStanislav Fomichev 
2188a027dc0SStanislav Fomichev 	if (optval + 1 > optval_end)
2191080ef5cSYiFei Zhu 		return 0; /* bounds check */
2208a027dc0SStanislav Fomichev 
2218a027dc0SStanislav Fomichev 	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
2228a027dc0SStanislav Fomichev 				     BPF_SK_STORAGE_GET_F_CREATE);
2238a027dc0SStanislav Fomichev 	if (!storage)
2241080ef5cSYiFei Zhu 		return 0; /* couldn't get sk storage */
2258a027dc0SStanislav Fomichev 
2268a027dc0SStanislav Fomichev 	storage->val = optval[0];
2278a027dc0SStanislav Fomichev 	ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
2288a027dc0SStanislav Fomichev 			   * setsockopt handler.
2298a027dc0SStanislav Fomichev 			   */
2308a027dc0SStanislav Fomichev 
2318a027dc0SStanislav Fomichev 	return 1;
232*e01b4a72SStanislav Fomichev 
233*e01b4a72SStanislav Fomichev out:
234*e01b4a72SStanislav Fomichev 	/* optval larger than PAGE_SIZE use kernel's buffer. */
235*e01b4a72SStanislav Fomichev 	if (ctx->optlen > page_size)
236*e01b4a72SStanislav Fomichev 		ctx->optlen = 0;
237*e01b4a72SStanislav Fomichev 	return 1;
2388a027dc0SStanislav Fomichev }
239