109903869SMartin KaFai Lau /* SPDX-License-Identifier: GPL-2.0 */
209903869SMartin KaFai Lau #ifndef __BPF_TCP_HELPERS_H
309903869SMartin KaFai Lau #define __BPF_TCP_HELPERS_H
409903869SMartin KaFai Lau
509903869SMartin KaFai Lau #include <stdbool.h>
609903869SMartin KaFai Lau #include <linux/types.h>
73e689141SToke Høiland-Jørgensen #include <bpf/bpf_helpers.h>
83e689141SToke Høiland-Jørgensen #include <bpf/bpf_core_read.h>
9df8ff353SAndrii Nakryiko #include <bpf/bpf_tracing.h>
106de4a9c4SMartin KaFai Lau
116de4a9c4SMartin KaFai Lau #define BPF_STRUCT_OPS(name, args...) \
126de4a9c4SMartin KaFai Lau SEC("struct_ops/"#name) \
136de4a9c4SMartin KaFai Lau BPF_PROG(name, args)
146de4a9c4SMartin KaFai Lau
15f2a6ee92SPrankur Gupta #ifndef SOL_TCP
16f2a6ee92SPrankur Gupta #define SOL_TCP 6
17f2a6ee92SPrankur Gupta #endif
18f2a6ee92SPrankur Gupta
19ccc090f4SGeliang Tang #ifndef TCP_CA_NAME_MAX
20ccc090f4SGeliang Tang #define TCP_CA_NAME_MAX 16
21ccc090f4SGeliang Tang #endif
22ccc090f4SGeliang Tang
236de4a9c4SMartin KaFai Lau #define tcp_jiffies32 ((__u32)bpf_jiffies64())
2409903869SMartin KaFai Lau
2509903869SMartin KaFai Lau struct sock_common {
2609903869SMartin KaFai Lau unsigned char skc_state;
279a856caeSMartin KaFai Lau __u16 skc_num;
2809903869SMartin KaFai Lau } __attribute__((preserve_access_index));
2909903869SMartin KaFai Lau
306de4a9c4SMartin KaFai Lau enum sk_pacing {
316de4a9c4SMartin KaFai Lau SK_PACING_NONE = 0,
326de4a9c4SMartin KaFai Lau SK_PACING_NEEDED = 1,
336de4a9c4SMartin KaFai Lau SK_PACING_FQ = 2,
346de4a9c4SMartin KaFai Lau };
356de4a9c4SMartin KaFai Lau
3609903869SMartin KaFai Lau struct sock {
3709903869SMartin KaFai Lau struct sock_common __sk_common;
38700dcf0fSMartin KaFai Lau #define sk_state __sk_common.skc_state
396de4a9c4SMartin KaFai Lau unsigned long sk_pacing_rate;
406de4a9c4SMartin KaFai Lau __u32 sk_pacing_status; /* see enum sk_pacing */
4109903869SMartin KaFai Lau } __attribute__((preserve_access_index));
4209903869SMartin KaFai Lau
4309903869SMartin KaFai Lau struct inet_sock {
4409903869SMartin KaFai Lau struct sock sk;
4509903869SMartin KaFai Lau } __attribute__((preserve_access_index));
4609903869SMartin KaFai Lau
4709903869SMartin KaFai Lau struct inet_connection_sock {
4809903869SMartin KaFai Lau struct inet_sock icsk_inet;
4909903869SMartin KaFai Lau __u8 icsk_ca_state:6,
5009903869SMartin KaFai Lau icsk_ca_setsockopt:1,
5109903869SMartin KaFai Lau icsk_ca_dst_locked:1;
5209903869SMartin KaFai Lau struct {
5309903869SMartin KaFai Lau __u8 pending;
5409903869SMartin KaFai Lau } icsk_ack;
5509903869SMartin KaFai Lau __u64 icsk_ca_priv[104 / sizeof(__u64)];
5609903869SMartin KaFai Lau } __attribute__((preserve_access_index));
5709903869SMartin KaFai Lau
589a856caeSMartin KaFai Lau struct request_sock {
599a856caeSMartin KaFai Lau struct sock_common __req_common;
609a856caeSMartin KaFai Lau } __attribute__((preserve_access_index));
619a856caeSMartin KaFai Lau
6209903869SMartin KaFai Lau struct tcp_sock {
6309903869SMartin KaFai Lau struct inet_connection_sock inet_conn;
6409903869SMartin KaFai Lau
6509903869SMartin KaFai Lau __u32 rcv_nxt;
6609903869SMartin KaFai Lau __u32 snd_nxt;
6709903869SMartin KaFai Lau __u32 snd_una;
6855144f31SPrankur gupta __u32 window_clamp;
6909903869SMartin KaFai Lau __u8 ecn_flags;
7009903869SMartin KaFai Lau __u32 delivered;
7109903869SMartin KaFai Lau __u32 delivered_ce;
7209903869SMartin KaFai Lau __u32 snd_cwnd;
7309903869SMartin KaFai Lau __u32 snd_cwnd_cnt;
7409903869SMartin KaFai Lau __u32 snd_cwnd_clamp;
7509903869SMartin KaFai Lau __u32 snd_ssthresh;
7609903869SMartin KaFai Lau __u8 syn_data:1, /* SYN includes data */
7709903869SMartin KaFai Lau syn_fastopen:1, /* SYN includes Fast Open option */
7809903869SMartin KaFai Lau syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
7909903869SMartin KaFai Lau syn_fastopen_ch:1, /* Active TFO re-enabling probe */
8009903869SMartin KaFai Lau syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
8109903869SMartin KaFai Lau save_syn:1, /* Save headers of SYN packet */
8209903869SMartin KaFai Lau is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
8309903869SMartin KaFai Lau syn_smc:1; /* SYN includes SMC */
8409903869SMartin KaFai Lau __u32 max_packets_out;
8509903869SMartin KaFai Lau __u32 lsndtime;
8609903869SMartin KaFai Lau __u32 prior_cwnd;
876de4a9c4SMartin KaFai Lau __u64 tcp_mstamp; /* most recent packet received/sent */
888039d353SNicolas Rybowski bool is_mptcp;
8909903869SMartin KaFai Lau } __attribute__((preserve_access_index));
9009903869SMartin KaFai Lau
inet_csk(const struct sock * sk)9109903869SMartin KaFai Lau static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk)
9209903869SMartin KaFai Lau {
9309903869SMartin KaFai Lau return (struct inet_connection_sock *)sk;
9409903869SMartin KaFai Lau }
9509903869SMartin KaFai Lau
inet_csk_ca(const struct sock * sk)9609903869SMartin KaFai Lau static __always_inline void *inet_csk_ca(const struct sock *sk)
9709903869SMartin KaFai Lau {
9809903869SMartin KaFai Lau return (void *)inet_csk(sk)->icsk_ca_priv;
9909903869SMartin KaFai Lau }
10009903869SMartin KaFai Lau
tcp_sk(const struct sock * sk)10109903869SMartin KaFai Lau static __always_inline struct tcp_sock *tcp_sk(const struct sock *sk)
10209903869SMartin KaFai Lau {
10309903869SMartin KaFai Lau return (struct tcp_sock *)sk;
10409903869SMartin KaFai Lau }
10509903869SMartin KaFai Lau
before(__u32 seq1,__u32 seq2)10609903869SMartin KaFai Lau static __always_inline bool before(__u32 seq1, __u32 seq2)
10709903869SMartin KaFai Lau {
10809903869SMartin KaFai Lau return (__s32)(seq1-seq2) < 0;
10909903869SMartin KaFai Lau }
11009903869SMartin KaFai Lau #define after(seq2, seq1) before(seq1, seq2)
11109903869SMartin KaFai Lau
11209903869SMartin KaFai Lau #define TCP_ECN_OK 1
11309903869SMartin KaFai Lau #define TCP_ECN_QUEUE_CWR 2
11409903869SMartin KaFai Lau #define TCP_ECN_DEMAND_CWR 4
11509903869SMartin KaFai Lau #define TCP_ECN_SEEN 8
11609903869SMartin KaFai Lau
11709903869SMartin KaFai Lau enum inet_csk_ack_state_t {
11809903869SMartin KaFai Lau ICSK_ACK_SCHED = 1,
11909903869SMartin KaFai Lau ICSK_ACK_TIMER = 2,
12009903869SMartin KaFai Lau ICSK_ACK_PUSHED = 4,
12109903869SMartin KaFai Lau ICSK_ACK_PUSHED2 = 8,
12209903869SMartin KaFai Lau ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */
12309903869SMartin KaFai Lau };
12409903869SMartin KaFai Lau
12509903869SMartin KaFai Lau enum tcp_ca_event {
12609903869SMartin KaFai Lau CA_EVENT_TX_START = 0,
12709903869SMartin KaFai Lau CA_EVENT_CWND_RESTART = 1,
12809903869SMartin KaFai Lau CA_EVENT_COMPLETE_CWR = 2,
12909903869SMartin KaFai Lau CA_EVENT_LOSS = 3,
13009903869SMartin KaFai Lau CA_EVENT_ECN_NO_CE = 4,
13109903869SMartin KaFai Lau CA_EVENT_ECN_IS_CE = 5,
13209903869SMartin KaFai Lau };
13309903869SMartin KaFai Lau
13409903869SMartin KaFai Lau struct ack_sample {
13509903869SMartin KaFai Lau __u32 pkts_acked;
13609903869SMartin KaFai Lau __s32 rtt_us;
13709903869SMartin KaFai Lau __u32 in_flight;
13809903869SMartin KaFai Lau } __attribute__((preserve_access_index));
13909903869SMartin KaFai Lau
14009903869SMartin KaFai Lau struct rate_sample {
14109903869SMartin KaFai Lau __u64 prior_mstamp; /* starting timestamp for interval */
14209903869SMartin KaFai Lau __u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
14309903869SMartin KaFai Lau __s32 delivered; /* number of packets delivered over interval */
14409903869SMartin KaFai Lau long interval_us; /* time for tp->delivered to incr "delivered" */
14509903869SMartin KaFai Lau __u32 snd_interval_us; /* snd interval for delivered packets */
14609903869SMartin KaFai Lau __u32 rcv_interval_us; /* rcv interval for delivered packets */
14709903869SMartin KaFai Lau long rtt_us; /* RTT of last (S)ACKed packet (or -1) */
14809903869SMartin KaFai Lau int losses; /* number of packets marked lost upon ACK */
14909903869SMartin KaFai Lau __u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */
15009903869SMartin KaFai Lau __u32 prior_in_flight; /* in flight before this ACK */
15109903869SMartin KaFai Lau bool is_app_limited; /* is sample from packet with bubble in pipe? */
15209903869SMartin KaFai Lau bool is_retrans; /* is sample from retransmission? */
15309903869SMartin KaFai Lau bool is_ack_delayed; /* is this (likely) a delayed ACK? */
15409903869SMartin KaFai Lau } __attribute__((preserve_access_index));
15509903869SMartin KaFai Lau
15609903869SMartin KaFai Lau #define TCP_CA_NAME_MAX 16
15709903869SMartin KaFai Lau #define TCP_CONG_NEEDS_ECN 0x2
15809903869SMartin KaFai Lau
15909903869SMartin KaFai Lau struct tcp_congestion_ops {
16009903869SMartin KaFai Lau char name[TCP_CA_NAME_MAX];
16109903869SMartin KaFai Lau __u32 flags;
16209903869SMartin KaFai Lau
16309903869SMartin KaFai Lau /* initialize private data (optional) */
16409903869SMartin KaFai Lau void (*init)(struct sock *sk);
16509903869SMartin KaFai Lau /* cleanup private data (optional) */
16609903869SMartin KaFai Lau void (*release)(struct sock *sk);
16709903869SMartin KaFai Lau
16809903869SMartin KaFai Lau /* return slow start threshold (required) */
16909903869SMartin KaFai Lau __u32 (*ssthresh)(struct sock *sk);
17009903869SMartin KaFai Lau /* do new cwnd calculation (required) */
17109903869SMartin KaFai Lau void (*cong_avoid)(struct sock *sk, __u32 ack, __u32 acked);
17209903869SMartin KaFai Lau /* call before changing ca_state (optional) */
17309903869SMartin KaFai Lau void (*set_state)(struct sock *sk, __u8 new_state);
17409903869SMartin KaFai Lau /* call when cwnd event occurs (optional) */
17509903869SMartin KaFai Lau void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
17609903869SMartin KaFai Lau /* call when ack arrives (optional) */
17709903869SMartin KaFai Lau void (*in_ack_event)(struct sock *sk, __u32 flags);
17809903869SMartin KaFai Lau /* new value of cwnd after loss (required) */
17909903869SMartin KaFai Lau __u32 (*undo_cwnd)(struct sock *sk);
18009903869SMartin KaFai Lau /* hook for packet ack accounting (optional) */
18109903869SMartin KaFai Lau void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
18209903869SMartin KaFai Lau /* override sysctl_tcp_min_tso_segs */
18309903869SMartin KaFai Lau __u32 (*min_tso_segs)(struct sock *sk);
18409903869SMartin KaFai Lau /* returns the multiplier used in tcp_sndbuf_expand (optional) */
18509903869SMartin KaFai Lau __u32 (*sndbuf_expand)(struct sock *sk);
18609903869SMartin KaFai Lau /* call when packets are delivered to update cwnd and pacing rate,
18709903869SMartin KaFai Lau * after all the ca_state processing. (optional)
18809903869SMartin KaFai Lau */
18909903869SMartin KaFai Lau void (*cong_control)(struct sock *sk, const struct rate_sample *rs);
190a79e88ddSMartin KaFai Lau void *owner;
19109903869SMartin KaFai Lau };
19209903869SMartin KaFai Lau
19309903869SMartin KaFai Lau #define min(a, b) ((a) < (b) ? (a) : (b))
19409903869SMartin KaFai Lau #define max(a, b) ((a) > (b) ? (a) : (b))
19509903869SMartin KaFai Lau #define min_not_zero(x, y) ({ \
19609903869SMartin KaFai Lau typeof(x) __x = (x); \
19709903869SMartin KaFai Lau typeof(y) __y = (y); \
19809903869SMartin KaFai Lau __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
19909903869SMartin KaFai Lau
tcp_in_slow_start(const struct tcp_sock * tp)20009903869SMartin KaFai Lau static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp)
20109903869SMartin KaFai Lau {
20209903869SMartin KaFai Lau return tp->snd_cwnd < tp->snd_ssthresh;
20309903869SMartin KaFai Lau }
20409903869SMartin KaFai Lau
tcp_is_cwnd_limited(const struct sock * sk)20509903869SMartin KaFai Lau static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk)
20609903869SMartin KaFai Lau {
20709903869SMartin KaFai Lau const struct tcp_sock *tp = tcp_sk(sk);
20809903869SMartin KaFai Lau
20909903869SMartin KaFai Lau /* If in slow start, ensure cwnd grows to twice what was ACKed. */
21009903869SMartin KaFai Lau if (tcp_in_slow_start(tp))
21109903869SMartin KaFai Lau return tp->snd_cwnd < 2 * tp->max_packets_out;
21209903869SMartin KaFai Lau
21309903869SMartin KaFai Lau return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
21409903869SMartin KaFai Lau }
21509903869SMartin KaFai Lau
tcp_cc_eq(const char * a,const char * b)216f2a6ee92SPrankur Gupta static __always_inline bool tcp_cc_eq(const char *a, const char *b)
217f2a6ee92SPrankur Gupta {
218f2a6ee92SPrankur Gupta int i;
219f2a6ee92SPrankur Gupta
220f2a6ee92SPrankur Gupta for (i = 0; i < TCP_CA_NAME_MAX; i++) {
221f2a6ee92SPrankur Gupta if (a[i] != b[i])
222f2a6ee92SPrankur Gupta return false;
223f2a6ee92SPrankur Gupta if (!a[i])
224f2a6ee92SPrankur Gupta break;
225f2a6ee92SPrankur Gupta }
226f2a6ee92SPrankur Gupta
227f2a6ee92SPrankur Gupta return true;
228f2a6ee92SPrankur Gupta }
229f2a6ee92SPrankur Gupta
23078e60bbbSMartin KaFai Lau extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
23178e60bbbSMartin KaFai Lau extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
23209903869SMartin KaFai Lau
2333bc48b56SGeliang Tang struct mptcp_sock {
2343bc48b56SGeliang Tang struct inet_connection_sock sk;
23502662234SGeliang Tang
23602662234SGeliang Tang __u32 token;
237*4f90d034SGeliang Tang struct sock *first;
238ccc090f4SGeliang Tang char ca_name[TCP_CA_NAME_MAX];
2393bc48b56SGeliang Tang } __attribute__((preserve_access_index));
2403bc48b56SGeliang Tang
24109903869SMartin KaFai Lau #endif
242