1bd4aed0eSJiong Wang // SPDX-License-Identifier: GPL-2.0
2bd4aed0eSJiong Wang // Copyright (c) 2018 Facebook
3bd4aed0eSJiong Wang 
4bd4aed0eSJiong Wang #include <string.h>
5bd4aed0eSJiong Wang 
6bd4aed0eSJiong Wang #include <linux/stddef.h>
7bd4aed0eSJiong Wang #include <linux/bpf.h>
8bd4aed0eSJiong Wang #include <linux/in.h>
9bd4aed0eSJiong Wang #include <linux/in6.h>
102eb68040SJames Hilliard #include <linux/tcp.h>
119c441fe4SFerenc Fejes #include <linux/if.h>
129c441fe4SFerenc Fejes #include <errno.h>
13bd4aed0eSJiong Wang 
143e689141SToke Høiland-Jørgensen #include <bpf/bpf_helpers.h>
153e689141SToke Høiland-Jørgensen #include <bpf/bpf_endian.h>
16bd4aed0eSJiong Wang 
172eb68040SJames Hilliard #include "bpf_tcp_helpers.h"
182eb68040SJames Hilliard 
19bd4aed0eSJiong Wang #define SRC_REWRITE_IP4		0x7f000004U
20bd4aed0eSJiong Wang #define DST_REWRITE_IP4		0x7f000001U
21bd4aed0eSJiong Wang #define DST_REWRITE_PORT4	4444
22bd4aed0eSJiong Wang 
23beecf11bSStanislav Fomichev #ifndef TCP_CA_NAME_MAX
24beecf11bSStanislav Fomichev #define TCP_CA_NAME_MAX 16
25beecf11bSStanislav Fomichev #endif
26beecf11bSStanislav Fomichev 
27eca43ee6SNikita V. Shirokov #ifndef TCP_NOTSENT_LOWAT
28eca43ee6SNikita V. Shirokov #define TCP_NOTSENT_LOWAT 25
29eca43ee6SNikita V. Shirokov #endif
30eca43ee6SNikita V. Shirokov 
319c441fe4SFerenc Fejes #ifndef IFNAMSIZ
329c441fe4SFerenc Fejes #define IFNAMSIZ 16
339c441fe4SFerenc Fejes #endif
349c441fe4SFerenc Fejes 
35*63d78b7eSYonghong Song __attribute__ ((noinline)) __weak
do_bind(struct bpf_sock_addr * ctx)361d8a0af5SToke Høiland-Jørgensen int do_bind(struct bpf_sock_addr *ctx)
371d8a0af5SToke Høiland-Jørgensen {
381d8a0af5SToke Høiland-Jørgensen 	struct sockaddr_in sa = {};
391d8a0af5SToke Høiland-Jørgensen 
401d8a0af5SToke Høiland-Jørgensen 	sa.sin_family = AF_INET;
411d8a0af5SToke Høiland-Jørgensen 	sa.sin_port = bpf_htons(0);
421d8a0af5SToke Høiland-Jørgensen 	sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4);
431d8a0af5SToke Høiland-Jørgensen 
441d8a0af5SToke Høiland-Jørgensen 	if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
451d8a0af5SToke Høiland-Jørgensen 		return 0;
461d8a0af5SToke Høiland-Jørgensen 
471d8a0af5SToke Høiland-Jørgensen 	return 1;
481d8a0af5SToke Høiland-Jørgensen }
491d8a0af5SToke Høiland-Jørgensen 
verify_cc(struct bpf_sock_addr * ctx,char expected[TCP_CA_NAME_MAX])50beecf11bSStanislav Fomichev static __inline int verify_cc(struct bpf_sock_addr *ctx,
51beecf11bSStanislav Fomichev 			      char expected[TCP_CA_NAME_MAX])
52beecf11bSStanislav Fomichev {
53beecf11bSStanislav Fomichev 	char buf[TCP_CA_NAME_MAX];
54beecf11bSStanislav Fomichev 	int i;
55beecf11bSStanislav Fomichev 
56beecf11bSStanislav Fomichev 	if (bpf_getsockopt(ctx, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
57beecf11bSStanislav Fomichev 		return 1;
58beecf11bSStanislav Fomichev 
59beecf11bSStanislav Fomichev 	for (i = 0; i < TCP_CA_NAME_MAX; i++) {
60beecf11bSStanislav Fomichev 		if (buf[i] != expected[i])
61beecf11bSStanislav Fomichev 			return 1;
62beecf11bSStanislav Fomichev 		if (buf[i] == 0)
63beecf11bSStanislav Fomichev 			break;
64beecf11bSStanislav Fomichev 	}
65beecf11bSStanislav Fomichev 
66beecf11bSStanislav Fomichev 	return 0;
67beecf11bSStanislav Fomichev }
68beecf11bSStanislav Fomichev 
set_cc(struct bpf_sock_addr * ctx)69beecf11bSStanislav Fomichev static __inline int set_cc(struct bpf_sock_addr *ctx)
70beecf11bSStanislav Fomichev {
7157dc6f3bSStanislav Fomichev 	char reno[TCP_CA_NAME_MAX] = "reno";
72beecf11bSStanislav Fomichev 	char cubic[TCP_CA_NAME_MAX] = "cubic";
73beecf11bSStanislav Fomichev 
7457dc6f3bSStanislav Fomichev 	if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &reno, sizeof(reno)))
75beecf11bSStanislav Fomichev 		return 1;
7657dc6f3bSStanislav Fomichev 	if (verify_cc(ctx, reno))
77beecf11bSStanislav Fomichev 		return 1;
78beecf11bSStanislav Fomichev 
79beecf11bSStanislav Fomichev 	if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &cubic, sizeof(cubic)))
80beecf11bSStanislav Fomichev 		return 1;
81beecf11bSStanislav Fomichev 	if (verify_cc(ctx, cubic))
82beecf11bSStanislav Fomichev 		return 1;
83beecf11bSStanislav Fomichev 
84beecf11bSStanislav Fomichev 	return 0;
85beecf11bSStanislav Fomichev }
86beecf11bSStanislav Fomichev 
bind_to_device(struct bpf_sock_addr * ctx)879c441fe4SFerenc Fejes static __inline int bind_to_device(struct bpf_sock_addr *ctx)
889c441fe4SFerenc Fejes {
899c441fe4SFerenc Fejes 	char veth1[IFNAMSIZ] = "test_sock_addr1";
909c441fe4SFerenc Fejes 	char veth2[IFNAMSIZ] = "test_sock_addr2";
919c441fe4SFerenc Fejes 	char missing[IFNAMSIZ] = "nonexistent_dev";
929c441fe4SFerenc Fejes 	char del_bind[IFNAMSIZ] = "";
939c441fe4SFerenc Fejes 
949c441fe4SFerenc Fejes 	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
959c441fe4SFerenc Fejes 				&veth1, sizeof(veth1)))
969c441fe4SFerenc Fejes 		return 1;
979c441fe4SFerenc Fejes 	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
989c441fe4SFerenc Fejes 				&veth2, sizeof(veth2)))
999c441fe4SFerenc Fejes 		return 1;
1009c441fe4SFerenc Fejes 	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
1019c441fe4SFerenc Fejes 				&missing, sizeof(missing)) != -ENODEV)
1029c441fe4SFerenc Fejes 		return 1;
1039c441fe4SFerenc Fejes 	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
1049c441fe4SFerenc Fejes 				&del_bind, sizeof(del_bind)))
1059c441fe4SFerenc Fejes 		return 1;
1069c441fe4SFerenc Fejes 
1079c441fe4SFerenc Fejes 	return 0;
1089c441fe4SFerenc Fejes }
1099c441fe4SFerenc Fejes 
set_keepalive(struct bpf_sock_addr * ctx)110f9bcf968SDmitry Yakunin static __inline int set_keepalive(struct bpf_sock_addr *ctx)
111f9bcf968SDmitry Yakunin {
112f9bcf968SDmitry Yakunin 	int zero = 0, one = 1;
113f9bcf968SDmitry Yakunin 
114f9bcf968SDmitry Yakunin 	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one)))
115f9bcf968SDmitry Yakunin 		return 1;
116f9bcf968SDmitry Yakunin 	if (ctx->type == SOCK_STREAM) {
117f9bcf968SDmitry Yakunin 		if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPIDLE, &one, sizeof(one)))
118f9bcf968SDmitry Yakunin 			return 1;
119f9bcf968SDmitry Yakunin 		if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPINTVL, &one, sizeof(one)))
120f9bcf968SDmitry Yakunin 			return 1;
121f9bcf968SDmitry Yakunin 		if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPCNT, &one, sizeof(one)))
122f9bcf968SDmitry Yakunin 			return 1;
123f9bcf968SDmitry Yakunin 		if (bpf_setsockopt(ctx, SOL_TCP, TCP_SYNCNT, &one, sizeof(one)))
124f9bcf968SDmitry Yakunin 			return 1;
125f9bcf968SDmitry Yakunin 		if (bpf_setsockopt(ctx, SOL_TCP, TCP_USER_TIMEOUT, &one, sizeof(one)))
126f9bcf968SDmitry Yakunin 			return 1;
127f9bcf968SDmitry Yakunin 	}
128f9bcf968SDmitry Yakunin 	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &zero, sizeof(zero)))
129f9bcf968SDmitry Yakunin 		return 1;
130f9bcf968SDmitry Yakunin 
131f9bcf968SDmitry Yakunin 	return 0;
132f9bcf968SDmitry Yakunin }
133f9bcf968SDmitry Yakunin 
set_notsent_lowat(struct bpf_sock_addr * ctx)134eca43ee6SNikita V. Shirokov static __inline int set_notsent_lowat(struct bpf_sock_addr *ctx)
135eca43ee6SNikita V. Shirokov {
136eca43ee6SNikita V. Shirokov 	int lowat = 65535;
137eca43ee6SNikita V. Shirokov 
138eca43ee6SNikita V. Shirokov 	if (ctx->type == SOCK_STREAM) {
139eca43ee6SNikita V. Shirokov 		if (bpf_setsockopt(ctx, SOL_TCP, TCP_NOTSENT_LOWAT, &lowat, sizeof(lowat)))
140eca43ee6SNikita V. Shirokov 			return 1;
141eca43ee6SNikita V. Shirokov 	}
142eca43ee6SNikita V. Shirokov 
143eca43ee6SNikita V. Shirokov 	return 0;
144eca43ee6SNikita V. Shirokov }
145eca43ee6SNikita V. Shirokov 
146bd4aed0eSJiong Wang SEC("cgroup/connect4")
connect_v4_prog(struct bpf_sock_addr * ctx)147bd4aed0eSJiong Wang int connect_v4_prog(struct bpf_sock_addr *ctx)
148bd4aed0eSJiong Wang {
149bd4aed0eSJiong Wang 	struct bpf_sock_tuple tuple = {};
150bd4aed0eSJiong Wang 	struct bpf_sock *sk;
151bd4aed0eSJiong Wang 
152bd4aed0eSJiong Wang 	/* Verify that new destination is available. */
153bd4aed0eSJiong Wang 	memset(&tuple.ipv4.saddr, 0, sizeof(tuple.ipv4.saddr));
154bd4aed0eSJiong Wang 	memset(&tuple.ipv4.sport, 0, sizeof(tuple.ipv4.sport));
155bd4aed0eSJiong Wang 
156bd4aed0eSJiong Wang 	tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4);
157bd4aed0eSJiong Wang 	tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4);
158bd4aed0eSJiong Wang 
1599c441fe4SFerenc Fejes 	/* Bind to device and unbind it. */
1609c441fe4SFerenc Fejes 	if (bind_to_device(ctx))
1619c441fe4SFerenc Fejes 		return 0;
1629c441fe4SFerenc Fejes 
163f9bcf968SDmitry Yakunin 	if (set_keepalive(ctx))
164f9bcf968SDmitry Yakunin 		return 0;
165f9bcf968SDmitry Yakunin 
166eca43ee6SNikita V. Shirokov 	if (set_notsent_lowat(ctx))
167eca43ee6SNikita V. Shirokov 		return 0;
168eca43ee6SNikita V. Shirokov 
169bd4aed0eSJiong Wang 	if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
170bd4aed0eSJiong Wang 		return 0;
171bd4aed0eSJiong Wang 	else if (ctx->type == SOCK_STREAM)
172bd4aed0eSJiong Wang 		sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv4),
173bd4aed0eSJiong Wang 				       BPF_F_CURRENT_NETNS, 0);
174bd4aed0eSJiong Wang 	else
175bd4aed0eSJiong Wang 		sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv4),
176bd4aed0eSJiong Wang 				       BPF_F_CURRENT_NETNS, 0);
177bd4aed0eSJiong Wang 
178bd4aed0eSJiong Wang 	if (!sk)
179bd4aed0eSJiong Wang 		return 0;
180bd4aed0eSJiong Wang 
181bd4aed0eSJiong Wang 	if (sk->src_ip4 != tuple.ipv4.daddr ||
182bd4aed0eSJiong Wang 	    sk->src_port != DST_REWRITE_PORT4) {
183bd4aed0eSJiong Wang 		bpf_sk_release(sk);
184bd4aed0eSJiong Wang 		return 0;
185bd4aed0eSJiong Wang 	}
186bd4aed0eSJiong Wang 
187bd4aed0eSJiong Wang 	bpf_sk_release(sk);
188bd4aed0eSJiong Wang 
189beecf11bSStanislav Fomichev 	/* Rewrite congestion control. */
190beecf11bSStanislav Fomichev 	if (ctx->type == SOCK_STREAM && set_cc(ctx))
191beecf11bSStanislav Fomichev 		return 0;
192beecf11bSStanislav Fomichev 
193bd4aed0eSJiong Wang 	/* Rewrite destination. */
194bd4aed0eSJiong Wang 	ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
195bd4aed0eSJiong Wang 	ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
196bd4aed0eSJiong Wang 
1971d8a0af5SToke Høiland-Jørgensen 	return do_bind(ctx) ? 1 : 0;
198bd4aed0eSJiong Wang }
199bd4aed0eSJiong Wang 
200bd4aed0eSJiong Wang char _license[] SEC("license") = "GPL";
201