1bd4aed0eSJiong Wang // SPDX-License-Identifier: GPL-2.0
2bd4aed0eSJiong Wang // Copyright (c) 2018 Facebook
3bd4aed0eSJiong Wang
4bd4aed0eSJiong Wang #include <string.h>
5bd4aed0eSJiong Wang
6bd4aed0eSJiong Wang #include <linux/stddef.h>
7bd4aed0eSJiong Wang #include <linux/bpf.h>
8bd4aed0eSJiong Wang #include <linux/in.h>
9bd4aed0eSJiong Wang #include <linux/in6.h>
102eb68040SJames Hilliard #include <linux/tcp.h>
119c441fe4SFerenc Fejes #include <linux/if.h>
129c441fe4SFerenc Fejes #include <errno.h>
13bd4aed0eSJiong Wang
143e689141SToke Høiland-Jørgensen #include <bpf/bpf_helpers.h>
153e689141SToke Høiland-Jørgensen #include <bpf/bpf_endian.h>
16bd4aed0eSJiong Wang
172eb68040SJames Hilliard #include "bpf_tcp_helpers.h"
182eb68040SJames Hilliard
19bd4aed0eSJiong Wang #define SRC_REWRITE_IP4 0x7f000004U
20bd4aed0eSJiong Wang #define DST_REWRITE_IP4 0x7f000001U
21bd4aed0eSJiong Wang #define DST_REWRITE_PORT4 4444
22bd4aed0eSJiong Wang
23beecf11bSStanislav Fomichev #ifndef TCP_CA_NAME_MAX
24beecf11bSStanislav Fomichev #define TCP_CA_NAME_MAX 16
25beecf11bSStanislav Fomichev #endif
26beecf11bSStanislav Fomichev
27eca43ee6SNikita V. Shirokov #ifndef TCP_NOTSENT_LOWAT
28eca43ee6SNikita V. Shirokov #define TCP_NOTSENT_LOWAT 25
29eca43ee6SNikita V. Shirokov #endif
30eca43ee6SNikita V. Shirokov
319c441fe4SFerenc Fejes #ifndef IFNAMSIZ
329c441fe4SFerenc Fejes #define IFNAMSIZ 16
339c441fe4SFerenc Fejes #endif
349c441fe4SFerenc Fejes
35*63d78b7eSYonghong Song __attribute__ ((noinline)) __weak
do_bind(struct bpf_sock_addr * ctx)361d8a0af5SToke Høiland-Jørgensen int do_bind(struct bpf_sock_addr *ctx)
371d8a0af5SToke Høiland-Jørgensen {
381d8a0af5SToke Høiland-Jørgensen struct sockaddr_in sa = {};
391d8a0af5SToke Høiland-Jørgensen
401d8a0af5SToke Høiland-Jørgensen sa.sin_family = AF_INET;
411d8a0af5SToke Høiland-Jørgensen sa.sin_port = bpf_htons(0);
421d8a0af5SToke Høiland-Jørgensen sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4);
431d8a0af5SToke Høiland-Jørgensen
441d8a0af5SToke Høiland-Jørgensen if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
451d8a0af5SToke Høiland-Jørgensen return 0;
461d8a0af5SToke Høiland-Jørgensen
471d8a0af5SToke Høiland-Jørgensen return 1;
481d8a0af5SToke Høiland-Jørgensen }
491d8a0af5SToke Høiland-Jørgensen
verify_cc(struct bpf_sock_addr * ctx,char expected[TCP_CA_NAME_MAX])50beecf11bSStanislav Fomichev static __inline int verify_cc(struct bpf_sock_addr *ctx,
51beecf11bSStanislav Fomichev char expected[TCP_CA_NAME_MAX])
52beecf11bSStanislav Fomichev {
53beecf11bSStanislav Fomichev char buf[TCP_CA_NAME_MAX];
54beecf11bSStanislav Fomichev int i;
55beecf11bSStanislav Fomichev
56beecf11bSStanislav Fomichev if (bpf_getsockopt(ctx, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
57beecf11bSStanislav Fomichev return 1;
58beecf11bSStanislav Fomichev
59beecf11bSStanislav Fomichev for (i = 0; i < TCP_CA_NAME_MAX; i++) {
60beecf11bSStanislav Fomichev if (buf[i] != expected[i])
61beecf11bSStanislav Fomichev return 1;
62beecf11bSStanislav Fomichev if (buf[i] == 0)
63beecf11bSStanislav Fomichev break;
64beecf11bSStanislav Fomichev }
65beecf11bSStanislav Fomichev
66beecf11bSStanislav Fomichev return 0;
67beecf11bSStanislav Fomichev }
68beecf11bSStanislav Fomichev
set_cc(struct bpf_sock_addr * ctx)69beecf11bSStanislav Fomichev static __inline int set_cc(struct bpf_sock_addr *ctx)
70beecf11bSStanislav Fomichev {
7157dc6f3bSStanislav Fomichev char reno[TCP_CA_NAME_MAX] = "reno";
72beecf11bSStanislav Fomichev char cubic[TCP_CA_NAME_MAX] = "cubic";
73beecf11bSStanislav Fomichev
7457dc6f3bSStanislav Fomichev if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &reno, sizeof(reno)))
75beecf11bSStanislav Fomichev return 1;
7657dc6f3bSStanislav Fomichev if (verify_cc(ctx, reno))
77beecf11bSStanislav Fomichev return 1;
78beecf11bSStanislav Fomichev
79beecf11bSStanislav Fomichev if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &cubic, sizeof(cubic)))
80beecf11bSStanislav Fomichev return 1;
81beecf11bSStanislav Fomichev if (verify_cc(ctx, cubic))
82beecf11bSStanislav Fomichev return 1;
83beecf11bSStanislav Fomichev
84beecf11bSStanislav Fomichev return 0;
85beecf11bSStanislav Fomichev }
86beecf11bSStanislav Fomichev
bind_to_device(struct bpf_sock_addr * ctx)879c441fe4SFerenc Fejes static __inline int bind_to_device(struct bpf_sock_addr *ctx)
889c441fe4SFerenc Fejes {
899c441fe4SFerenc Fejes char veth1[IFNAMSIZ] = "test_sock_addr1";
909c441fe4SFerenc Fejes char veth2[IFNAMSIZ] = "test_sock_addr2";
919c441fe4SFerenc Fejes char missing[IFNAMSIZ] = "nonexistent_dev";
929c441fe4SFerenc Fejes char del_bind[IFNAMSIZ] = "";
939c441fe4SFerenc Fejes
949c441fe4SFerenc Fejes if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
959c441fe4SFerenc Fejes &veth1, sizeof(veth1)))
969c441fe4SFerenc Fejes return 1;
979c441fe4SFerenc Fejes if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
989c441fe4SFerenc Fejes &veth2, sizeof(veth2)))
999c441fe4SFerenc Fejes return 1;
1009c441fe4SFerenc Fejes if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
1019c441fe4SFerenc Fejes &missing, sizeof(missing)) != -ENODEV)
1029c441fe4SFerenc Fejes return 1;
1039c441fe4SFerenc Fejes if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
1049c441fe4SFerenc Fejes &del_bind, sizeof(del_bind)))
1059c441fe4SFerenc Fejes return 1;
1069c441fe4SFerenc Fejes
1079c441fe4SFerenc Fejes return 0;
1089c441fe4SFerenc Fejes }
1099c441fe4SFerenc Fejes
set_keepalive(struct bpf_sock_addr * ctx)110f9bcf968SDmitry Yakunin static __inline int set_keepalive(struct bpf_sock_addr *ctx)
111f9bcf968SDmitry Yakunin {
112f9bcf968SDmitry Yakunin int zero = 0, one = 1;
113f9bcf968SDmitry Yakunin
114f9bcf968SDmitry Yakunin if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one)))
115f9bcf968SDmitry Yakunin return 1;
116f9bcf968SDmitry Yakunin if (ctx->type == SOCK_STREAM) {
117f9bcf968SDmitry Yakunin if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPIDLE, &one, sizeof(one)))
118f9bcf968SDmitry Yakunin return 1;
119f9bcf968SDmitry Yakunin if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPINTVL, &one, sizeof(one)))
120f9bcf968SDmitry Yakunin return 1;
121f9bcf968SDmitry Yakunin if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPCNT, &one, sizeof(one)))
122f9bcf968SDmitry Yakunin return 1;
123f9bcf968SDmitry Yakunin if (bpf_setsockopt(ctx, SOL_TCP, TCP_SYNCNT, &one, sizeof(one)))
124f9bcf968SDmitry Yakunin return 1;
125f9bcf968SDmitry Yakunin if (bpf_setsockopt(ctx, SOL_TCP, TCP_USER_TIMEOUT, &one, sizeof(one)))
126f9bcf968SDmitry Yakunin return 1;
127f9bcf968SDmitry Yakunin }
128f9bcf968SDmitry Yakunin if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &zero, sizeof(zero)))
129f9bcf968SDmitry Yakunin return 1;
130f9bcf968SDmitry Yakunin
131f9bcf968SDmitry Yakunin return 0;
132f9bcf968SDmitry Yakunin }
133f9bcf968SDmitry Yakunin
set_notsent_lowat(struct bpf_sock_addr * ctx)134eca43ee6SNikita V. Shirokov static __inline int set_notsent_lowat(struct bpf_sock_addr *ctx)
135eca43ee6SNikita V. Shirokov {
136eca43ee6SNikita V. Shirokov int lowat = 65535;
137eca43ee6SNikita V. Shirokov
138eca43ee6SNikita V. Shirokov if (ctx->type == SOCK_STREAM) {
139eca43ee6SNikita V. Shirokov if (bpf_setsockopt(ctx, SOL_TCP, TCP_NOTSENT_LOWAT, &lowat, sizeof(lowat)))
140eca43ee6SNikita V. Shirokov return 1;
141eca43ee6SNikita V. Shirokov }
142eca43ee6SNikita V. Shirokov
143eca43ee6SNikita V. Shirokov return 0;
144eca43ee6SNikita V. Shirokov }
145eca43ee6SNikita V. Shirokov
146bd4aed0eSJiong Wang SEC("cgroup/connect4")
connect_v4_prog(struct bpf_sock_addr * ctx)147bd4aed0eSJiong Wang int connect_v4_prog(struct bpf_sock_addr *ctx)
148bd4aed0eSJiong Wang {
149bd4aed0eSJiong Wang struct bpf_sock_tuple tuple = {};
150bd4aed0eSJiong Wang struct bpf_sock *sk;
151bd4aed0eSJiong Wang
152bd4aed0eSJiong Wang /* Verify that new destination is available. */
153bd4aed0eSJiong Wang memset(&tuple.ipv4.saddr, 0, sizeof(tuple.ipv4.saddr));
154bd4aed0eSJiong Wang memset(&tuple.ipv4.sport, 0, sizeof(tuple.ipv4.sport));
155bd4aed0eSJiong Wang
156bd4aed0eSJiong Wang tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4);
157bd4aed0eSJiong Wang tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4);
158bd4aed0eSJiong Wang
1599c441fe4SFerenc Fejes /* Bind to device and unbind it. */
1609c441fe4SFerenc Fejes if (bind_to_device(ctx))
1619c441fe4SFerenc Fejes return 0;
1629c441fe4SFerenc Fejes
163f9bcf968SDmitry Yakunin if (set_keepalive(ctx))
164f9bcf968SDmitry Yakunin return 0;
165f9bcf968SDmitry Yakunin
166eca43ee6SNikita V. Shirokov if (set_notsent_lowat(ctx))
167eca43ee6SNikita V. Shirokov return 0;
168eca43ee6SNikita V. Shirokov
169bd4aed0eSJiong Wang if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
170bd4aed0eSJiong Wang return 0;
171bd4aed0eSJiong Wang else if (ctx->type == SOCK_STREAM)
172bd4aed0eSJiong Wang sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv4),
173bd4aed0eSJiong Wang BPF_F_CURRENT_NETNS, 0);
174bd4aed0eSJiong Wang else
175bd4aed0eSJiong Wang sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv4),
176bd4aed0eSJiong Wang BPF_F_CURRENT_NETNS, 0);
177bd4aed0eSJiong Wang
178bd4aed0eSJiong Wang if (!sk)
179bd4aed0eSJiong Wang return 0;
180bd4aed0eSJiong Wang
181bd4aed0eSJiong Wang if (sk->src_ip4 != tuple.ipv4.daddr ||
182bd4aed0eSJiong Wang sk->src_port != DST_REWRITE_PORT4) {
183bd4aed0eSJiong Wang bpf_sk_release(sk);
184bd4aed0eSJiong Wang return 0;
185bd4aed0eSJiong Wang }
186bd4aed0eSJiong Wang
187bd4aed0eSJiong Wang bpf_sk_release(sk);
188bd4aed0eSJiong Wang
189beecf11bSStanislav Fomichev /* Rewrite congestion control. */
190beecf11bSStanislav Fomichev if (ctx->type == SOCK_STREAM && set_cc(ctx))
191beecf11bSStanislav Fomichev return 0;
192beecf11bSStanislav Fomichev
193bd4aed0eSJiong Wang /* Rewrite destination. */
194bd4aed0eSJiong Wang ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
195bd4aed0eSJiong Wang ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
196bd4aed0eSJiong Wang
1971d8a0af5SToke Høiland-Jørgensen return do_bind(ctx) ? 1 : 0;
198bd4aed0eSJiong Wang }
199bd4aed0eSJiong Wang
200bd4aed0eSJiong Wang char _license[] SEC("license") = "GPL";
201