1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef __BPF_TCP_HELPERS_H 3 #define __BPF_TCP_HELPERS_H 4 5 #include <stdbool.h> 6 #include <linux/types.h> 7 #include <bpf/bpf_helpers.h> 8 #include <bpf/bpf_core_read.h> 9 #include <bpf/bpf_tracing.h> 10 11 #define BPF_STRUCT_OPS(name, args...) \ 12 SEC("struct_ops/"#name) \ 13 BPF_PROG(name, args) 14 15 #define tcp_jiffies32 ((__u32)bpf_jiffies64()) 16 17 struct sock_common { 18 unsigned char skc_state; 19 } __attribute__((preserve_access_index)); 20 21 enum sk_pacing { 22 SK_PACING_NONE = 0, 23 SK_PACING_NEEDED = 1, 24 SK_PACING_FQ = 2, 25 }; 26 27 struct sock { 28 struct sock_common __sk_common; 29 unsigned long sk_pacing_rate; 30 __u32 sk_pacing_status; /* see enum sk_pacing */ 31 } __attribute__((preserve_access_index)); 32 33 struct inet_sock { 34 struct sock sk; 35 } __attribute__((preserve_access_index)); 36 37 struct inet_connection_sock { 38 struct inet_sock icsk_inet; 39 __u8 icsk_ca_state:6, 40 icsk_ca_setsockopt:1, 41 icsk_ca_dst_locked:1; 42 struct { 43 __u8 pending; 44 } icsk_ack; 45 __u64 icsk_ca_priv[104 / sizeof(__u64)]; 46 } __attribute__((preserve_access_index)); 47 48 struct tcp_sock { 49 struct inet_connection_sock inet_conn; 50 51 __u32 rcv_nxt; 52 __u32 snd_nxt; 53 __u32 snd_una; 54 __u8 ecn_flags; 55 __u32 delivered; 56 __u32 delivered_ce; 57 __u32 snd_cwnd; 58 __u32 snd_cwnd_cnt; 59 __u32 snd_cwnd_clamp; 60 __u32 snd_ssthresh; 61 __u8 syn_data:1, /* SYN includes data */ 62 syn_fastopen:1, /* SYN includes Fast Open option */ 63 syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ 64 syn_fastopen_ch:1, /* Active TFO re-enabling probe */ 65 syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ 66 save_syn:1, /* Save headers of SYN packet */ 67 is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ 68 syn_smc:1; /* SYN includes SMC */ 69 __u32 max_packets_out; 70 __u32 lsndtime; 71 __u32 prior_cwnd; 72 __u64 tcp_mstamp; /* most recent packet received/sent */ 73 } __attribute__((preserve_access_index)); 74 75 static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk) 76 { 77 return (struct inet_connection_sock *)sk; 78 } 79 80 static __always_inline void *inet_csk_ca(const struct sock *sk) 81 { 82 return (void *)inet_csk(sk)->icsk_ca_priv; 83 } 84 85 static __always_inline struct tcp_sock *tcp_sk(const struct sock *sk) 86 { 87 return (struct tcp_sock *)sk; 88 } 89 90 static __always_inline bool before(__u32 seq1, __u32 seq2) 91 { 92 return (__s32)(seq1-seq2) < 0; 93 } 94 #define after(seq2, seq1) before(seq1, seq2) 95 96 #define TCP_ECN_OK 1 97 #define TCP_ECN_QUEUE_CWR 2 98 #define TCP_ECN_DEMAND_CWR 4 99 #define TCP_ECN_SEEN 8 100 101 enum inet_csk_ack_state_t { 102 ICSK_ACK_SCHED = 1, 103 ICSK_ACK_TIMER = 2, 104 ICSK_ACK_PUSHED = 4, 105 ICSK_ACK_PUSHED2 = 8, 106 ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */ 107 }; 108 109 enum tcp_ca_event { 110 CA_EVENT_TX_START = 0, 111 CA_EVENT_CWND_RESTART = 1, 112 CA_EVENT_COMPLETE_CWR = 2, 113 CA_EVENT_LOSS = 3, 114 CA_EVENT_ECN_NO_CE = 4, 115 CA_EVENT_ECN_IS_CE = 5, 116 }; 117 118 enum tcp_ca_state { 119 TCP_CA_Open = 0, 120 TCP_CA_Disorder = 1, 121 TCP_CA_CWR = 2, 122 TCP_CA_Recovery = 3, 123 TCP_CA_Loss = 4 124 }; 125 126 struct ack_sample { 127 __u32 pkts_acked; 128 __s32 rtt_us; 129 __u32 in_flight; 130 } __attribute__((preserve_access_index)); 131 132 struct rate_sample { 133 __u64 prior_mstamp; /* starting timestamp for interval */ 134 __u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ 135 __s32 delivered; /* number of packets delivered over interval */ 136 long interval_us; /* time for tp->delivered to incr "delivered" */ 137 __u32 snd_interval_us; /* snd interval for delivered packets */ 138 __u32 rcv_interval_us; /* rcv interval for delivered packets */ 139 long rtt_us; /* RTT of last (S)ACKed packet (or -1) */ 140 int losses; /* number of packets marked lost upon ACK */ 141 __u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ 142 __u32 prior_in_flight; /* in flight before this ACK */ 143 bool is_app_limited; /* is sample from packet with bubble in pipe? */ 144 bool is_retrans; /* is sample from retransmission? */ 145 bool is_ack_delayed; /* is this (likely) a delayed ACK? */ 146 } __attribute__((preserve_access_index)); 147 148 #define TCP_CA_NAME_MAX 16 149 #define TCP_CONG_NEEDS_ECN 0x2 150 151 struct tcp_congestion_ops { 152 char name[TCP_CA_NAME_MAX]; 153 __u32 flags; 154 155 /* initialize private data (optional) */ 156 void (*init)(struct sock *sk); 157 /* cleanup private data (optional) */ 158 void (*release)(struct sock *sk); 159 160 /* return slow start threshold (required) */ 161 __u32 (*ssthresh)(struct sock *sk); 162 /* do new cwnd calculation (required) */ 163 void (*cong_avoid)(struct sock *sk, __u32 ack, __u32 acked); 164 /* call before changing ca_state (optional) */ 165 void (*set_state)(struct sock *sk, __u8 new_state); 166 /* call when cwnd event occurs (optional) */ 167 void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); 168 /* call when ack arrives (optional) */ 169 void (*in_ack_event)(struct sock *sk, __u32 flags); 170 /* new value of cwnd after loss (required) */ 171 __u32 (*undo_cwnd)(struct sock *sk); 172 /* hook for packet ack accounting (optional) */ 173 void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); 174 /* override sysctl_tcp_min_tso_segs */ 175 __u32 (*min_tso_segs)(struct sock *sk); 176 /* returns the multiplier used in tcp_sndbuf_expand (optional) */ 177 __u32 (*sndbuf_expand)(struct sock *sk); 178 /* call when packets are delivered to update cwnd and pacing rate, 179 * after all the ca_state processing. (optional) 180 */ 181 void (*cong_control)(struct sock *sk, const struct rate_sample *rs); 182 }; 183 184 #define min(a, b) ((a) < (b) ? (a) : (b)) 185 #define max(a, b) ((a) > (b) ? (a) : (b)) 186 #define min_not_zero(x, y) ({ \ 187 typeof(x) __x = (x); \ 188 typeof(y) __y = (y); \ 189 __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) 190 191 static __always_inline __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) 192 { 193 __u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh); 194 195 acked -= cwnd - tp->snd_cwnd; 196 tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); 197 198 return acked; 199 } 200 201 static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp) 202 { 203 return tp->snd_cwnd < tp->snd_ssthresh; 204 } 205 206 static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk) 207 { 208 const struct tcp_sock *tp = tcp_sk(sk); 209 210 /* If in slow start, ensure cwnd grows to twice what was ACKed. */ 211 if (tcp_in_slow_start(tp)) 212 return tp->snd_cwnd < 2 * tp->max_packets_out; 213 214 return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited); 215 } 216 217 static __always_inline void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) 218 { 219 /* If credits accumulated at a higher w, apply them gently now. */ 220 if (tp->snd_cwnd_cnt >= w) { 221 tp->snd_cwnd_cnt = 0; 222 tp->snd_cwnd++; 223 } 224 225 tp->snd_cwnd_cnt += acked; 226 if (tp->snd_cwnd_cnt >= w) { 227 __u32 delta = tp->snd_cwnd_cnt / w; 228 229 tp->snd_cwnd_cnt -= delta * w; 230 tp->snd_cwnd += delta; 231 } 232 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp); 233 } 234 235 #endif 236