1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef __BPF_TCP_HELPERS_H 3 #define __BPF_TCP_HELPERS_H 4 5 #include <stdbool.h> 6 #include <linux/types.h> 7 #include <bpf/bpf_helpers.h> 8 #include <bpf/bpf_core_read.h> 9 #include <bpf/bpf_tracing.h> 10 11 #define BPF_STRUCT_OPS(name, args...) \ 12 SEC("struct_ops/"#name) \ 13 BPF_PROG(name, args) 14 15 #ifndef SOL_TCP 16 #define SOL_TCP 6 17 #endif 18 19 #ifndef TCP_CA_NAME_MAX 20 #define TCP_CA_NAME_MAX 16 21 #endif 22 23 #define tcp_jiffies32 ((__u32)bpf_jiffies64()) 24 25 struct sock_common { 26 unsigned char skc_state; 27 __u16 skc_num; 28 } __attribute__((preserve_access_index)); 29 30 enum sk_pacing { 31 SK_PACING_NONE = 0, 32 SK_PACING_NEEDED = 1, 33 SK_PACING_FQ = 2, 34 }; 35 36 struct sock { 37 struct sock_common __sk_common; 38 #define sk_state __sk_common.skc_state 39 unsigned long sk_pacing_rate; 40 __u32 sk_pacing_status; /* see enum sk_pacing */ 41 } __attribute__((preserve_access_index)); 42 43 struct inet_sock { 44 struct sock sk; 45 } __attribute__((preserve_access_index)); 46 47 struct inet_connection_sock { 48 struct inet_sock icsk_inet; 49 __u8 icsk_ca_state:6, 50 icsk_ca_setsockopt:1, 51 icsk_ca_dst_locked:1; 52 struct { 53 __u8 pending; 54 } icsk_ack; 55 __u64 icsk_ca_priv[104 / sizeof(__u64)]; 56 } __attribute__((preserve_access_index)); 57 58 struct request_sock { 59 struct sock_common __req_common; 60 } __attribute__((preserve_access_index)); 61 62 struct tcp_sock { 63 struct inet_connection_sock inet_conn; 64 65 __u32 rcv_nxt; 66 __u32 snd_nxt; 67 __u32 snd_una; 68 __u32 window_clamp; 69 __u8 ecn_flags; 70 __u32 delivered; 71 __u32 delivered_ce; 72 __u32 snd_cwnd; 73 __u32 snd_cwnd_cnt; 74 __u32 snd_cwnd_clamp; 75 __u32 snd_ssthresh; 76 __u8 syn_data:1, /* SYN includes data */ 77 syn_fastopen:1, /* SYN includes Fast Open option */ 78 syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ 79 syn_fastopen_ch:1, /* Active TFO re-enabling probe */ 80 syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ 81 save_syn:1, /* Save headers of SYN packet */ 82 is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ 83 syn_smc:1; /* SYN includes SMC */ 84 __u32 max_packets_out; 85 __u32 lsndtime; 86 __u32 prior_cwnd; 87 __u64 tcp_mstamp; /* most recent packet received/sent */ 88 bool is_mptcp; 89 } __attribute__((preserve_access_index)); 90 91 static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk) 92 { 93 return (struct inet_connection_sock *)sk; 94 } 95 96 static __always_inline void *inet_csk_ca(const struct sock *sk) 97 { 98 return (void *)inet_csk(sk)->icsk_ca_priv; 99 } 100 101 static __always_inline struct tcp_sock *tcp_sk(const struct sock *sk) 102 { 103 return (struct tcp_sock *)sk; 104 } 105 106 static __always_inline bool before(__u32 seq1, __u32 seq2) 107 { 108 return (__s32)(seq1-seq2) < 0; 109 } 110 #define after(seq2, seq1) before(seq1, seq2) 111 112 #define TCP_ECN_OK 1 113 #define TCP_ECN_QUEUE_CWR 2 114 #define TCP_ECN_DEMAND_CWR 4 115 #define TCP_ECN_SEEN 8 116 117 enum inet_csk_ack_state_t { 118 ICSK_ACK_SCHED = 1, 119 ICSK_ACK_TIMER = 2, 120 ICSK_ACK_PUSHED = 4, 121 ICSK_ACK_PUSHED2 = 8, 122 ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */ 123 }; 124 125 enum tcp_ca_event { 126 CA_EVENT_TX_START = 0, 127 CA_EVENT_CWND_RESTART = 1, 128 CA_EVENT_COMPLETE_CWR = 2, 129 CA_EVENT_LOSS = 3, 130 CA_EVENT_ECN_NO_CE = 4, 131 CA_EVENT_ECN_IS_CE = 5, 132 }; 133 134 struct ack_sample { 135 __u32 pkts_acked; 136 __s32 rtt_us; 137 __u32 in_flight; 138 } __attribute__((preserve_access_index)); 139 140 struct rate_sample { 141 __u64 prior_mstamp; /* starting timestamp for interval */ 142 __u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ 143 __s32 delivered; /* number of packets delivered over interval */ 144 long interval_us; /* time for tp->delivered to incr "delivered" */ 145 __u32 snd_interval_us; /* snd interval for delivered packets */ 146 __u32 rcv_interval_us; /* rcv interval for delivered packets */ 147 long rtt_us; /* RTT of last (S)ACKed packet (or -1) */ 148 int losses; /* number of packets marked lost upon ACK */ 149 __u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ 150 __u32 prior_in_flight; /* in flight before this ACK */ 151 bool is_app_limited; /* is sample from packet with bubble in pipe? */ 152 bool is_retrans; /* is sample from retransmission? */ 153 bool is_ack_delayed; /* is this (likely) a delayed ACK? */ 154 } __attribute__((preserve_access_index)); 155 156 #define TCP_CA_NAME_MAX 16 157 #define TCP_CONG_NEEDS_ECN 0x2 158 159 struct tcp_congestion_ops { 160 char name[TCP_CA_NAME_MAX]; 161 __u32 flags; 162 163 /* initialize private data (optional) */ 164 void (*init)(struct sock *sk); 165 /* cleanup private data (optional) */ 166 void (*release)(struct sock *sk); 167 168 /* return slow start threshold (required) */ 169 __u32 (*ssthresh)(struct sock *sk); 170 /* do new cwnd calculation (required) */ 171 void (*cong_avoid)(struct sock *sk, __u32 ack, __u32 acked); 172 /* call before changing ca_state (optional) */ 173 void (*set_state)(struct sock *sk, __u8 new_state); 174 /* call when cwnd event occurs (optional) */ 175 void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); 176 /* call when ack arrives (optional) */ 177 void (*in_ack_event)(struct sock *sk, __u32 flags); 178 /* new value of cwnd after loss (required) */ 179 __u32 (*undo_cwnd)(struct sock *sk); 180 /* hook for packet ack accounting (optional) */ 181 void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); 182 /* override sysctl_tcp_min_tso_segs */ 183 __u32 (*min_tso_segs)(struct sock *sk); 184 /* returns the multiplier used in tcp_sndbuf_expand (optional) */ 185 __u32 (*sndbuf_expand)(struct sock *sk); 186 /* call when packets are delivered to update cwnd and pacing rate, 187 * after all the ca_state processing. (optional) 188 */ 189 void (*cong_control)(struct sock *sk, const struct rate_sample *rs); 190 void *owner; 191 }; 192 193 #define min(a, b) ((a) < (b) ? (a) : (b)) 194 #define max(a, b) ((a) > (b) ? (a) : (b)) 195 #define min_not_zero(x, y) ({ \ 196 typeof(x) __x = (x); \ 197 typeof(y) __y = (y); \ 198 __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) 199 200 static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp) 201 { 202 return tp->snd_cwnd < tp->snd_ssthresh; 203 } 204 205 static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk) 206 { 207 const struct tcp_sock *tp = tcp_sk(sk); 208 209 /* If in slow start, ensure cwnd grows to twice what was ACKed. */ 210 if (tcp_in_slow_start(tp)) 211 return tp->snd_cwnd < 2 * tp->max_packets_out; 212 213 return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited); 214 } 215 216 static __always_inline bool tcp_cc_eq(const char *a, const char *b) 217 { 218 int i; 219 220 for (i = 0; i < TCP_CA_NAME_MAX; i++) { 221 if (a[i] != b[i]) 222 return false; 223 if (!a[i]) 224 break; 225 } 226 227 return true; 228 } 229 230 extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym; 231 extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym; 232 233 struct mptcp_sock { 234 struct inet_connection_sock sk; 235 236 __u32 token; 237 struct sock *first; 238 char ca_name[TCP_CA_NAME_MAX]; 239 } __attribute__((preserve_access_index)); 240 241 #endif 242