1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2019 Facebook */ 3 4 #include <linux/bpf.h> 5 #include <netinet/in.h> 6 #include <stdbool.h> 7 8 #include <bpf/bpf_helpers.h> 9 #include <bpf/bpf_endian.h> 10 #include "bpf_tcp_helpers.h" 11 12 enum bpf_linum_array_idx { 13 EGRESS_LINUM_IDX, 14 INGRESS_LINUM_IDX, 15 __NR_BPF_LINUM_ARRAY_IDX, 16 }; 17 18 struct { 19 __uint(type, BPF_MAP_TYPE_ARRAY); 20 __uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX); 21 __type(key, __u32); 22 __type(value, __u32); 23 } linum_map SEC(".maps"); 24 25 struct bpf_spinlock_cnt { 26 struct bpf_spin_lock lock; 27 __u32 cnt; 28 }; 29 30 struct { 31 __uint(type, BPF_MAP_TYPE_SK_STORAGE); 32 __uint(map_flags, BPF_F_NO_PREALLOC); 33 __type(key, int); 34 __type(value, struct bpf_spinlock_cnt); 35 } sk_pkt_out_cnt SEC(".maps"); 36 37 struct { 38 __uint(type, BPF_MAP_TYPE_SK_STORAGE); 39 __uint(map_flags, BPF_F_NO_PREALLOC); 40 __type(key, int); 41 __type(value, struct bpf_spinlock_cnt); 42 } sk_pkt_out_cnt10 SEC(".maps"); 43 44 struct bpf_tcp_sock listen_tp = {}; 45 struct sockaddr_in6 srv_sa6 = {}; 46 struct bpf_tcp_sock cli_tp = {}; 47 struct bpf_tcp_sock srv_tp = {}; 48 struct bpf_sock listen_sk = {}; 49 struct bpf_sock srv_sk = {}; 50 struct bpf_sock cli_sk = {}; 51 __u64 parent_cg_id = 0; 52 __u64 child_cg_id = 0; 53 __u64 lsndtime = 0; 54 55 static bool is_loopback6(__u32 *a6) 56 { 57 return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1); 58 } 59 60 static void skcpy(struct bpf_sock *dst, 61 const struct bpf_sock *src) 62 { 63 dst->bound_dev_if = src->bound_dev_if; 64 dst->family = src->family; 65 dst->type = src->type; 66 dst->protocol = src->protocol; 67 dst->mark = src->mark; 68 dst->priority = src->priority; 69 dst->src_ip4 = src->src_ip4; 70 dst->src_ip6[0] = src->src_ip6[0]; 71 dst->src_ip6[1] = src->src_ip6[1]; 72 dst->src_ip6[2] = src->src_ip6[2]; 73 dst->src_ip6[3] = src->src_ip6[3]; 74 dst->src_port = src->src_port; 75 dst->dst_ip4 = src->dst_ip4; 76 dst->dst_ip6[0] = src->dst_ip6[0]; 77 dst->dst_ip6[1] = src->dst_ip6[1]; 78 dst->dst_ip6[2] = src->dst_ip6[2]; 79 dst->dst_ip6[3] = src->dst_ip6[3]; 80 dst->dst_port = src->dst_port; 81 dst->state = src->state; 82 } 83 84 static void tpcpy(struct bpf_tcp_sock *dst, 85 const struct bpf_tcp_sock *src) 86 { 87 dst->snd_cwnd = src->snd_cwnd; 88 dst->srtt_us = src->srtt_us; 89 dst->rtt_min = src->rtt_min; 90 dst->snd_ssthresh = src->snd_ssthresh; 91 dst->rcv_nxt = src->rcv_nxt; 92 dst->snd_nxt = src->snd_nxt; 93 dst->snd_una = src->snd_una; 94 dst->mss_cache = src->mss_cache; 95 dst->ecn_flags = src->ecn_flags; 96 dst->rate_delivered = src->rate_delivered; 97 dst->rate_interval_us = src->rate_interval_us; 98 dst->packets_out = src->packets_out; 99 dst->retrans_out = src->retrans_out; 100 dst->total_retrans = src->total_retrans; 101 dst->segs_in = src->segs_in; 102 dst->data_segs_in = src->data_segs_in; 103 dst->segs_out = src->segs_out; 104 dst->data_segs_out = src->data_segs_out; 105 dst->lost_out = src->lost_out; 106 dst->sacked_out = src->sacked_out; 107 dst->bytes_received = src->bytes_received; 108 dst->bytes_acked = src->bytes_acked; 109 } 110 111 /* Always return CG_OK so that no pkt will be filtered out */ 112 #define CG_OK 1 113 114 #define RET_LOG() ({ \ 115 linum = __LINE__; \ 116 bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_NOEXIST); \ 117 return CG_OK; \ 118 }) 119 120 SEC("cgroup_skb/egress") 121 int egress_read_sock_fields(struct __sk_buff *skb) 122 { 123 struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F }; 124 struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10; 125 struct bpf_tcp_sock *tp, *tp_ret; 126 struct bpf_sock *sk, *sk_ret; 127 __u32 linum, linum_idx; 128 struct tcp_sock *ktp; 129 130 linum_idx = EGRESS_LINUM_IDX; 131 132 sk = skb->sk; 133 if (!sk) 134 RET_LOG(); 135 136 /* Not the testing egress traffic or 137 * TCP_LISTEN (10) socket will be copied at the ingress side. 138 */ 139 if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) || 140 sk->state == 10) 141 return CG_OK; 142 143 if (sk->src_port == bpf_ntohs(srv_sa6.sin6_port)) { 144 /* Server socket */ 145 sk_ret = &srv_sk; 146 tp_ret = &srv_tp; 147 } else if (sk->dst_port == srv_sa6.sin6_port) { 148 /* Client socket */ 149 sk_ret = &cli_sk; 150 tp_ret = &cli_tp; 151 } else { 152 /* Not the testing egress traffic */ 153 return CG_OK; 154 } 155 156 /* It must be a fullsock for cgroup_skb/egress prog */ 157 sk = bpf_sk_fullsock(sk); 158 if (!sk) 159 RET_LOG(); 160 161 /* Not the testing egress traffic */ 162 if (sk->protocol != IPPROTO_TCP) 163 return CG_OK; 164 165 tp = bpf_tcp_sock(sk); 166 if (!tp) 167 RET_LOG(); 168 169 skcpy(sk_ret, sk); 170 tpcpy(tp_ret, tp); 171 172 if (sk_ret == &srv_sk) { 173 ktp = bpf_skc_to_tcp_sock(sk); 174 175 if (!ktp) 176 RET_LOG(); 177 178 lsndtime = ktp->lsndtime; 179 180 child_cg_id = bpf_sk_cgroup_id(ktp); 181 if (!child_cg_id) 182 RET_LOG(); 183 184 parent_cg_id = bpf_sk_ancestor_cgroup_id(ktp, 2); 185 if (!parent_cg_id) 186 RET_LOG(); 187 188 /* The userspace has created it for srv sk */ 189 pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, ktp, 0, 0); 190 pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, ktp, 191 0, 0); 192 } else { 193 pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, 194 &cli_cnt_init, 195 BPF_SK_STORAGE_GET_F_CREATE); 196 pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, 197 sk, &cli_cnt_init, 198 BPF_SK_STORAGE_GET_F_CREATE); 199 } 200 201 if (!pkt_out_cnt || !pkt_out_cnt10) 202 RET_LOG(); 203 204 /* Even both cnt and cnt10 have lock defined in their BTF, 205 * intentionally one cnt takes lock while one does not 206 * as a test for the spinlock support in BPF_MAP_TYPE_SK_STORAGE. 207 */ 208 pkt_out_cnt->cnt += 1; 209 bpf_spin_lock(&pkt_out_cnt10->lock); 210 pkt_out_cnt10->cnt += 10; 211 bpf_spin_unlock(&pkt_out_cnt10->lock); 212 213 return CG_OK; 214 } 215 216 SEC("cgroup_skb/ingress") 217 int ingress_read_sock_fields(struct __sk_buff *skb) 218 { 219 struct bpf_tcp_sock *tp; 220 __u32 linum, linum_idx; 221 struct bpf_sock *sk; 222 223 linum_idx = INGRESS_LINUM_IDX; 224 225 sk = skb->sk; 226 if (!sk) 227 RET_LOG(); 228 229 /* Not the testing ingress traffic to the server */ 230 if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) || 231 sk->src_port != bpf_ntohs(srv_sa6.sin6_port)) 232 return CG_OK; 233 234 /* Only interested in TCP_LISTEN */ 235 if (sk->state != 10) 236 return CG_OK; 237 238 /* It must be a fullsock for cgroup_skb/ingress prog */ 239 sk = bpf_sk_fullsock(sk); 240 if (!sk) 241 RET_LOG(); 242 243 tp = bpf_tcp_sock(sk); 244 if (!tp) 245 RET_LOG(); 246 247 skcpy(&listen_sk, sk); 248 tpcpy(&listen_tp, tp); 249 250 return CG_OK; 251 } 252 253 char _license[] SEC("license") = "GPL"; 254