1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef __BPF_TCP_HELPERS_H
3 #define __BPF_TCP_HELPERS_H
4 
5 #include <stdbool.h>
6 #include <linux/types.h>
7 #include <bpf/bpf_helpers.h>
8 #include <bpf/bpf_core_read.h>
9 #include <bpf/bpf_tracing.h>
10 
11 #define BPF_STRUCT_OPS(name, args...) \
12 SEC("struct_ops/"#name) \
13 BPF_PROG(name, args)
14 
15 #define tcp_jiffies32 ((__u32)bpf_jiffies64())
16 
17 struct sock_common {
18 	unsigned char	skc_state;
19 } __attribute__((preserve_access_index));
20 
21 enum sk_pacing {
22 	SK_PACING_NONE		= 0,
23 	SK_PACING_NEEDED	= 1,
24 	SK_PACING_FQ		= 2,
25 };
26 
27 struct sock {
28 	struct sock_common	__sk_common;
29 	unsigned long		sk_pacing_rate;
30 	__u32			sk_pacing_status; /* see enum sk_pacing */
31 } __attribute__((preserve_access_index));
32 
33 struct inet_sock {
34 	struct sock		sk;
35 } __attribute__((preserve_access_index));
36 
37 struct inet_connection_sock {
38 	struct inet_sock	  icsk_inet;
39 	__u8			  icsk_ca_state:6,
40 				  icsk_ca_setsockopt:1,
41 				  icsk_ca_dst_locked:1;
42 	struct {
43 		__u8		  pending;
44 	} icsk_ack;
45 	__u64			  icsk_ca_priv[104 / sizeof(__u64)];
46 } __attribute__((preserve_access_index));
47 
48 struct tcp_sock {
49 	struct inet_connection_sock	inet_conn;
50 
51 	__u32	rcv_nxt;
52 	__u32	snd_nxt;
53 	__u32	snd_una;
54 	__u8	ecn_flags;
55 	__u32	delivered;
56 	__u32	delivered_ce;
57 	__u32	snd_cwnd;
58 	__u32	snd_cwnd_cnt;
59 	__u32	snd_cwnd_clamp;
60 	__u32	snd_ssthresh;
61 	__u8	syn_data:1,	/* SYN includes data */
62 		syn_fastopen:1,	/* SYN includes Fast Open option */
63 		syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
64 		syn_fastopen_ch:1, /* Active TFO re-enabling probe */
65 		syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
66 		save_syn:1,	/* Save headers of SYN packet */
67 		is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
68 		syn_smc:1;	/* SYN includes SMC */
69 	__u32	max_packets_out;
70 	__u32	lsndtime;
71 	__u32	prior_cwnd;
72 	__u64	tcp_mstamp;	/* most recent packet received/sent */
73 } __attribute__((preserve_access_index));
74 
75 static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk)
76 {
77 	return (struct inet_connection_sock *)sk;
78 }
79 
80 static __always_inline void *inet_csk_ca(const struct sock *sk)
81 {
82 	return (void *)inet_csk(sk)->icsk_ca_priv;
83 }
84 
85 static __always_inline struct tcp_sock *tcp_sk(const struct sock *sk)
86 {
87 	return (struct tcp_sock *)sk;
88 }
89 
90 static __always_inline bool before(__u32 seq1, __u32 seq2)
91 {
92 	return (__s32)(seq1-seq2) < 0;
93 }
94 #define after(seq2, seq1) 	before(seq1, seq2)
95 
96 #define	TCP_ECN_OK		1
97 #define	TCP_ECN_QUEUE_CWR	2
98 #define	TCP_ECN_DEMAND_CWR	4
99 #define	TCP_ECN_SEEN		8
100 
101 enum inet_csk_ack_state_t {
102 	ICSK_ACK_SCHED	= 1,
103 	ICSK_ACK_TIMER  = 2,
104 	ICSK_ACK_PUSHED = 4,
105 	ICSK_ACK_PUSHED2 = 8,
106 	ICSK_ACK_NOW = 16	/* Send the next ACK immediately (once) */
107 };
108 
109 enum tcp_ca_event {
110 	CA_EVENT_TX_START = 0,
111 	CA_EVENT_CWND_RESTART = 1,
112 	CA_EVENT_COMPLETE_CWR = 2,
113 	CA_EVENT_LOSS = 3,
114 	CA_EVENT_ECN_NO_CE = 4,
115 	CA_EVENT_ECN_IS_CE = 5,
116 };
117 
118 enum tcp_ca_state {
119 	TCP_CA_Open = 0,
120 	TCP_CA_Disorder = 1,
121 	TCP_CA_CWR = 2,
122 	TCP_CA_Recovery = 3,
123 	TCP_CA_Loss = 4
124 };
125 
126 struct ack_sample {
127 	__u32 pkts_acked;
128 	__s32 rtt_us;
129 	__u32 in_flight;
130 } __attribute__((preserve_access_index));
131 
132 struct rate_sample {
133 	__u64  prior_mstamp; /* starting timestamp for interval */
134 	__u32  prior_delivered;	/* tp->delivered at "prior_mstamp" */
135 	__s32  delivered;		/* number of packets delivered over interval */
136 	long interval_us;	/* time for tp->delivered to incr "delivered" */
137 	__u32 snd_interval_us;	/* snd interval for delivered packets */
138 	__u32 rcv_interval_us;	/* rcv interval for delivered packets */
139 	long rtt_us;		/* RTT of last (S)ACKed packet (or -1) */
140 	int  losses;		/* number of packets marked lost upon ACK */
141 	__u32  acked_sacked;	/* number of packets newly (S)ACKed upon ACK */
142 	__u32  prior_in_flight;	/* in flight before this ACK */
143 	bool is_app_limited;	/* is sample from packet with bubble in pipe? */
144 	bool is_retrans;	/* is sample from retransmission? */
145 	bool is_ack_delayed;	/* is this (likely) a delayed ACK? */
146 } __attribute__((preserve_access_index));
147 
148 #define TCP_CA_NAME_MAX		16
149 #define TCP_CONG_NEEDS_ECN	0x2
150 
151 struct tcp_congestion_ops {
152 	char name[TCP_CA_NAME_MAX];
153 	__u32 flags;
154 
155 	/* initialize private data (optional) */
156 	void (*init)(struct sock *sk);
157 	/* cleanup private data  (optional) */
158 	void (*release)(struct sock *sk);
159 
160 	/* return slow start threshold (required) */
161 	__u32 (*ssthresh)(struct sock *sk);
162 	/* do new cwnd calculation (required) */
163 	void (*cong_avoid)(struct sock *sk, __u32 ack, __u32 acked);
164 	/* call before changing ca_state (optional) */
165 	void (*set_state)(struct sock *sk, __u8 new_state);
166 	/* call when cwnd event occurs (optional) */
167 	void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
168 	/* call when ack arrives (optional) */
169 	void (*in_ack_event)(struct sock *sk, __u32 flags);
170 	/* new value of cwnd after loss (required) */
171 	__u32  (*undo_cwnd)(struct sock *sk);
172 	/* hook for packet ack accounting (optional) */
173 	void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
174 	/* override sysctl_tcp_min_tso_segs */
175 	__u32 (*min_tso_segs)(struct sock *sk);
176 	/* returns the multiplier used in tcp_sndbuf_expand (optional) */
177 	__u32 (*sndbuf_expand)(struct sock *sk);
178 	/* call when packets are delivered to update cwnd and pacing rate,
179 	 * after all the ca_state processing. (optional)
180 	 */
181 	void (*cong_control)(struct sock *sk, const struct rate_sample *rs);
182 };
183 
184 #define min(a, b) ((a) < (b) ? (a) : (b))
185 #define max(a, b) ((a) > (b) ? (a) : (b))
186 #define min_not_zero(x, y) ({			\
187 	typeof(x) __x = (x);			\
188 	typeof(y) __y = (y);			\
189 	__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
190 
191 static __always_inline __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked)
192 {
193 	__u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh);
194 
195 	acked -= cwnd - tp->snd_cwnd;
196 	tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
197 
198 	return acked;
199 }
200 
201 static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp)
202 {
203 	return tp->snd_cwnd < tp->snd_ssthresh;
204 }
205 
206 static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk)
207 {
208 	const struct tcp_sock *tp = tcp_sk(sk);
209 
210 	/* If in slow start, ensure cwnd grows to twice what was ACKed. */
211 	if (tcp_in_slow_start(tp))
212 		return tp->snd_cwnd < 2 * tp->max_packets_out;
213 
214 	return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
215 }
216 
217 static __always_inline void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked)
218 {
219 	/* If credits accumulated at a higher w, apply them gently now. */
220 	if (tp->snd_cwnd_cnt >= w) {
221 		tp->snd_cwnd_cnt = 0;
222 		tp->snd_cwnd++;
223 	}
224 
225 	tp->snd_cwnd_cnt += acked;
226 	if (tp->snd_cwnd_cnt >= w) {
227 		__u32 delta = tp->snd_cwnd_cnt / w;
228 
229 		tp->snd_cwnd_cnt -= delta * w;
230 		tp->snd_cwnd += delta;
231 	}
232 	tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp);
233 }
234 
235 #endif
236