1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2019 Facebook */ 3 4 #include <linux/bpf.h> 5 #include <netinet/in.h> 6 #include <stdbool.h> 7 8 #include <bpf/bpf_helpers.h> 9 #include <bpf/bpf_endian.h> 10 #include "bpf_tcp_helpers.h" 11 12 enum bpf_linum_array_idx { 13 EGRESS_LINUM_IDX, 14 INGRESS_LINUM_IDX, 15 READ_SK_DST_PORT_LINUM_IDX, 16 __NR_BPF_LINUM_ARRAY_IDX, 17 }; 18 19 struct { 20 __uint(type, BPF_MAP_TYPE_ARRAY); 21 __uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX); 22 __type(key, __u32); 23 __type(value, __u32); 24 } linum_map SEC(".maps"); 25 26 struct bpf_spinlock_cnt { 27 struct bpf_spin_lock lock; 28 __u32 cnt; 29 }; 30 31 struct { 32 __uint(type, BPF_MAP_TYPE_SK_STORAGE); 33 __uint(map_flags, BPF_F_NO_PREALLOC); 34 __type(key, int); 35 __type(value, struct bpf_spinlock_cnt); 36 } sk_pkt_out_cnt SEC(".maps"); 37 38 struct { 39 __uint(type, BPF_MAP_TYPE_SK_STORAGE); 40 __uint(map_flags, BPF_F_NO_PREALLOC); 41 __type(key, int); 42 __type(value, struct bpf_spinlock_cnt); 43 } sk_pkt_out_cnt10 SEC(".maps"); 44 45 struct bpf_tcp_sock listen_tp = {}; 46 struct sockaddr_in6 srv_sa6 = {}; 47 struct bpf_tcp_sock cli_tp = {}; 48 struct bpf_tcp_sock srv_tp = {}; 49 struct bpf_sock listen_sk = {}; 50 struct bpf_sock srv_sk = {}; 51 struct bpf_sock cli_sk = {}; 52 __u64 parent_cg_id = 0; 53 __u64 child_cg_id = 0; 54 __u64 lsndtime = 0; 55 56 static bool is_loopback6(__u32 *a6) 57 { 58 return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1); 59 } 60 61 static void skcpy(struct bpf_sock *dst, 62 const struct bpf_sock *src) 63 { 64 dst->bound_dev_if = src->bound_dev_if; 65 dst->family = src->family; 66 dst->type = src->type; 67 dst->protocol = src->protocol; 68 dst->mark = src->mark; 69 dst->priority = src->priority; 70 dst->src_ip4 = src->src_ip4; 71 dst->src_ip6[0] = src->src_ip6[0]; 72 dst->src_ip6[1] = src->src_ip6[1]; 73 dst->src_ip6[2] = src->src_ip6[2]; 74 dst->src_ip6[3] = src->src_ip6[3]; 75 dst->src_port = src->src_port; 76 dst->dst_ip4 = src->dst_ip4; 77 dst->dst_ip6[0] = src->dst_ip6[0]; 78 dst->dst_ip6[1] = src->dst_ip6[1]; 79 dst->dst_ip6[2] = src->dst_ip6[2]; 80 dst->dst_ip6[3] = src->dst_ip6[3]; 81 dst->dst_port = src->dst_port; 82 dst->state = src->state; 83 } 84 85 static void tpcpy(struct bpf_tcp_sock *dst, 86 const struct bpf_tcp_sock *src) 87 { 88 dst->snd_cwnd = src->snd_cwnd; 89 dst->srtt_us = src->srtt_us; 90 dst->rtt_min = src->rtt_min; 91 dst->snd_ssthresh = src->snd_ssthresh; 92 dst->rcv_nxt = src->rcv_nxt; 93 dst->snd_nxt = src->snd_nxt; 94 dst->snd_una = src->snd_una; 95 dst->mss_cache = src->mss_cache; 96 dst->ecn_flags = src->ecn_flags; 97 dst->rate_delivered = src->rate_delivered; 98 dst->rate_interval_us = src->rate_interval_us; 99 dst->packets_out = src->packets_out; 100 dst->retrans_out = src->retrans_out; 101 dst->total_retrans = src->total_retrans; 102 dst->segs_in = src->segs_in; 103 dst->data_segs_in = src->data_segs_in; 104 dst->segs_out = src->segs_out; 105 dst->data_segs_out = src->data_segs_out; 106 dst->lost_out = src->lost_out; 107 dst->sacked_out = src->sacked_out; 108 dst->bytes_received = src->bytes_received; 109 dst->bytes_acked = src->bytes_acked; 110 } 111 112 /* Always return CG_OK so that no pkt will be filtered out */ 113 #define CG_OK 1 114 115 #define RET_LOG() ({ \ 116 linum = __LINE__; \ 117 bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_ANY); \ 118 return CG_OK; \ 119 }) 120 121 SEC("cgroup_skb/egress") 122 int egress_read_sock_fields(struct __sk_buff *skb) 123 { 124 struct bpf_spinlock_cnt cli_cnt_init = { .lock = {}, .cnt = 0xeB9F }; 125 struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10; 126 struct bpf_tcp_sock *tp, *tp_ret; 127 struct bpf_sock *sk, *sk_ret; 128 __u32 linum, linum_idx; 129 struct tcp_sock *ktp; 130 131 linum_idx = EGRESS_LINUM_IDX; 132 133 sk = skb->sk; 134 if (!sk) 135 RET_LOG(); 136 137 /* Not testing the egress traffic or the listening socket, 138 * which are covered by the cgroup_skb/ingress test program. 139 */ 140 if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) || 141 sk->state == BPF_TCP_LISTEN) 142 return CG_OK; 143 144 if (sk->src_port == bpf_ntohs(srv_sa6.sin6_port)) { 145 /* Server socket */ 146 sk_ret = &srv_sk; 147 tp_ret = &srv_tp; 148 } else if (sk->dst_port == srv_sa6.sin6_port) { 149 /* Client socket */ 150 sk_ret = &cli_sk; 151 tp_ret = &cli_tp; 152 } else { 153 /* Not the testing egress traffic */ 154 return CG_OK; 155 } 156 157 /* It must be a fullsock for cgroup_skb/egress prog */ 158 sk = bpf_sk_fullsock(sk); 159 if (!sk) 160 RET_LOG(); 161 162 /* Not the testing egress traffic */ 163 if (sk->protocol != IPPROTO_TCP) 164 return CG_OK; 165 166 tp = bpf_tcp_sock(sk); 167 if (!tp) 168 RET_LOG(); 169 170 skcpy(sk_ret, sk); 171 tpcpy(tp_ret, tp); 172 173 if (sk_ret == &srv_sk) { 174 ktp = bpf_skc_to_tcp_sock(sk); 175 176 if (!ktp) 177 RET_LOG(); 178 179 lsndtime = ktp->lsndtime; 180 181 child_cg_id = bpf_sk_cgroup_id(ktp); 182 if (!child_cg_id) 183 RET_LOG(); 184 185 parent_cg_id = bpf_sk_ancestor_cgroup_id(ktp, 2); 186 if (!parent_cg_id) 187 RET_LOG(); 188 189 /* The userspace has created it for srv sk */ 190 pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, ktp, 0, 0); 191 pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, ktp, 192 0, 0); 193 } else { 194 pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, 195 &cli_cnt_init, 196 BPF_SK_STORAGE_GET_F_CREATE); 197 pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, 198 sk, &cli_cnt_init, 199 BPF_SK_STORAGE_GET_F_CREATE); 200 } 201 202 if (!pkt_out_cnt || !pkt_out_cnt10) 203 RET_LOG(); 204 205 /* Even both cnt and cnt10 have lock defined in their BTF, 206 * intentionally one cnt takes lock while one does not 207 * as a test for the spinlock support in BPF_MAP_TYPE_SK_STORAGE. 208 */ 209 pkt_out_cnt->cnt += 1; 210 bpf_spin_lock(&pkt_out_cnt10->lock); 211 pkt_out_cnt10->cnt += 10; 212 bpf_spin_unlock(&pkt_out_cnt10->lock); 213 214 return CG_OK; 215 } 216 217 SEC("cgroup_skb/ingress") 218 int ingress_read_sock_fields(struct __sk_buff *skb) 219 { 220 struct bpf_tcp_sock *tp; 221 __u32 linum, linum_idx; 222 struct bpf_sock *sk; 223 224 linum_idx = INGRESS_LINUM_IDX; 225 226 sk = skb->sk; 227 if (!sk) 228 RET_LOG(); 229 230 /* Not the testing ingress traffic to the server */ 231 if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) || 232 sk->src_port != bpf_ntohs(srv_sa6.sin6_port)) 233 return CG_OK; 234 235 /* Only interested in the listening socket */ 236 if (sk->state != BPF_TCP_LISTEN) 237 return CG_OK; 238 239 /* It must be a fullsock for cgroup_skb/ingress prog */ 240 sk = bpf_sk_fullsock(sk); 241 if (!sk) 242 RET_LOG(); 243 244 tp = bpf_tcp_sock(sk); 245 if (!tp) 246 RET_LOG(); 247 248 skcpy(&listen_sk, sk); 249 tpcpy(&listen_tp, tp); 250 251 return CG_OK; 252 } 253 254 /* 255 * NOTE: 4-byte load from bpf_sock at dst_port offset is quirky. It 256 * gets rewritten by the access converter to a 2-byte load for 257 * backward compatibility. Treating the load result as a be16 value 258 * makes the code portable across little- and big-endian platforms. 259 */ 260 static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk) 261 { 262 __u32 *word = (__u32 *)&sk->dst_port; 263 return word[0] == bpf_htons(0xcafe); 264 } 265 266 static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk) 267 { 268 __u16 *half = (__u16 *)&sk->dst_port; 269 return half[0] == bpf_htons(0xcafe); 270 } 271 272 static __noinline bool sk_dst_port__load_byte(struct bpf_sock *sk) 273 { 274 __u8 *byte = (__u8 *)&sk->dst_port; 275 return byte[0] == 0xca && byte[1] == 0xfe; 276 } 277 278 SEC("cgroup_skb/egress") 279 int read_sk_dst_port(struct __sk_buff *skb) 280 { 281 __u32 linum, linum_idx; 282 struct bpf_sock *sk; 283 284 linum_idx = READ_SK_DST_PORT_LINUM_IDX; 285 286 sk = skb->sk; 287 if (!sk) 288 RET_LOG(); 289 290 /* Ignore everything but the SYN from the client socket */ 291 if (sk->state != BPF_TCP_SYN_SENT) 292 return CG_OK; 293 294 if (!sk_dst_port__load_word(sk)) 295 RET_LOG(); 296 if (!sk_dst_port__load_half(sk)) 297 RET_LOG(); 298 if (!sk_dst_port__load_byte(sk)) 299 RET_LOG(); 300 301 return CG_OK; 302 } 303 304 char _license[] SEC("license") = "GPL"; 305