1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2019 Facebook */
3 
4 #include <linux/bpf.h>
5 #include <netinet/in.h>
6 #include <stdbool.h>
7 
8 #include <bpf/bpf_helpers.h>
9 #include <bpf/bpf_endian.h>
10 #include "bpf_tcp_helpers.h"
11 
12 enum bpf_linum_array_idx {
13 	EGRESS_LINUM_IDX,
14 	INGRESS_LINUM_IDX,
15 	__NR_BPF_LINUM_ARRAY_IDX,
16 };
17 
18 struct {
19 	__uint(type, BPF_MAP_TYPE_ARRAY);
20 	__uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX);
21 	__type(key, __u32);
22 	__type(value, __u32);
23 } linum_map SEC(".maps");
24 
25 struct bpf_spinlock_cnt {
26 	struct bpf_spin_lock lock;
27 	__u32 cnt;
28 };
29 
30 struct {
31 	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
32 	__uint(map_flags, BPF_F_NO_PREALLOC);
33 	__type(key, int);
34 	__type(value, struct bpf_spinlock_cnt);
35 } sk_pkt_out_cnt SEC(".maps");
36 
37 struct {
38 	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
39 	__uint(map_flags, BPF_F_NO_PREALLOC);
40 	__type(key, int);
41 	__type(value, struct bpf_spinlock_cnt);
42 } sk_pkt_out_cnt10 SEC(".maps");
43 
44 struct bpf_tcp_sock listen_tp = {};
45 struct sockaddr_in6 srv_sa6 = {};
46 struct bpf_tcp_sock cli_tp = {};
47 struct bpf_tcp_sock srv_tp = {};
48 struct bpf_sock listen_sk = {};
49 struct bpf_sock srv_sk = {};
50 struct bpf_sock cli_sk = {};
51 __u64 parent_cg_id = 0;
52 __u64 child_cg_id = 0;
53 __u64 lsndtime = 0;
54 
55 static bool is_loopback6(__u32 *a6)
56 {
57 	return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
58 }
59 
60 static void skcpy(struct bpf_sock *dst,
61 		  const struct bpf_sock *src)
62 {
63 	dst->bound_dev_if = src->bound_dev_if;
64 	dst->family = src->family;
65 	dst->type = src->type;
66 	dst->protocol = src->protocol;
67 	dst->mark = src->mark;
68 	dst->priority = src->priority;
69 	dst->src_ip4 = src->src_ip4;
70 	dst->src_ip6[0] = src->src_ip6[0];
71 	dst->src_ip6[1] = src->src_ip6[1];
72 	dst->src_ip6[2] = src->src_ip6[2];
73 	dst->src_ip6[3] = src->src_ip6[3];
74 	dst->src_port = src->src_port;
75 	dst->dst_ip4 = src->dst_ip4;
76 	dst->dst_ip6[0] = src->dst_ip6[0];
77 	dst->dst_ip6[1] = src->dst_ip6[1];
78 	dst->dst_ip6[2] = src->dst_ip6[2];
79 	dst->dst_ip6[3] = src->dst_ip6[3];
80 	dst->dst_port = src->dst_port;
81 	dst->state = src->state;
82 }
83 
84 static void tpcpy(struct bpf_tcp_sock *dst,
85 		  const struct bpf_tcp_sock *src)
86 {
87 	dst->snd_cwnd = src->snd_cwnd;
88 	dst->srtt_us = src->srtt_us;
89 	dst->rtt_min = src->rtt_min;
90 	dst->snd_ssthresh = src->snd_ssthresh;
91 	dst->rcv_nxt = src->rcv_nxt;
92 	dst->snd_nxt = src->snd_nxt;
93 	dst->snd_una = src->snd_una;
94 	dst->mss_cache = src->mss_cache;
95 	dst->ecn_flags = src->ecn_flags;
96 	dst->rate_delivered = src->rate_delivered;
97 	dst->rate_interval_us = src->rate_interval_us;
98 	dst->packets_out = src->packets_out;
99 	dst->retrans_out = src->retrans_out;
100 	dst->total_retrans = src->total_retrans;
101 	dst->segs_in = src->segs_in;
102 	dst->data_segs_in = src->data_segs_in;
103 	dst->segs_out = src->segs_out;
104 	dst->data_segs_out = src->data_segs_out;
105 	dst->lost_out = src->lost_out;
106 	dst->sacked_out = src->sacked_out;
107 	dst->bytes_received = src->bytes_received;
108 	dst->bytes_acked = src->bytes_acked;
109 }
110 
111 /* Always return CG_OK so that no pkt will be filtered out */
112 #define CG_OK 1
113 
114 #define RET_LOG() ({						\
115 	linum = __LINE__;					\
116 	bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_NOEXIST);	\
117 	return CG_OK;						\
118 })
119 
120 SEC("cgroup_skb/egress")
121 int egress_read_sock_fields(struct __sk_buff *skb)
122 {
123 	struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F };
124 	struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10;
125 	struct bpf_tcp_sock *tp, *tp_ret;
126 	struct bpf_sock *sk, *sk_ret;
127 	__u32 linum, linum_idx;
128 	struct tcp_sock *ktp;
129 
130 	linum_idx = EGRESS_LINUM_IDX;
131 
132 	sk = skb->sk;
133 	if (!sk)
134 		RET_LOG();
135 
136 	/* Not the testing egress traffic or
137 	 * TCP_LISTEN (10) socket will be copied at the ingress side.
138 	 */
139 	if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
140 	    sk->state == 10)
141 		return CG_OK;
142 
143 	if (sk->src_port == bpf_ntohs(srv_sa6.sin6_port)) {
144 		/* Server socket */
145 		sk_ret = &srv_sk;
146 		tp_ret = &srv_tp;
147 	} else if (sk->dst_port == srv_sa6.sin6_port) {
148 		/* Client socket */
149 		sk_ret = &cli_sk;
150 		tp_ret = &cli_tp;
151 	} else {
152 		/* Not the testing egress traffic */
153 		return CG_OK;
154 	}
155 
156 	/* It must be a fullsock for cgroup_skb/egress prog */
157 	sk = bpf_sk_fullsock(sk);
158 	if (!sk)
159 		RET_LOG();
160 
161 	/* Not the testing egress traffic */
162 	if (sk->protocol != IPPROTO_TCP)
163 		return CG_OK;
164 
165 	tp = bpf_tcp_sock(sk);
166 	if (!tp)
167 		RET_LOG();
168 
169 	skcpy(sk_ret, sk);
170 	tpcpy(tp_ret, tp);
171 
172 	if (sk_ret == &srv_sk) {
173 		ktp = bpf_skc_to_tcp_sock(sk);
174 
175 		if (!ktp)
176 			RET_LOG();
177 
178 		lsndtime = ktp->lsndtime;
179 
180 		child_cg_id = bpf_sk_cgroup_id(ktp);
181 		if (!child_cg_id)
182 			RET_LOG();
183 
184 		parent_cg_id = bpf_sk_ancestor_cgroup_id(ktp, 2);
185 		if (!parent_cg_id)
186 			RET_LOG();
187 
188 		/* The userspace has created it for srv sk */
189 		pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, ktp, 0, 0);
190 		pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, ktp,
191 						   0, 0);
192 	} else {
193 		pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk,
194 						 &cli_cnt_init,
195 						 BPF_SK_STORAGE_GET_F_CREATE);
196 		pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10,
197 						   sk, &cli_cnt_init,
198 						   BPF_SK_STORAGE_GET_F_CREATE);
199 	}
200 
201 	if (!pkt_out_cnt || !pkt_out_cnt10)
202 		RET_LOG();
203 
204 	/* Even both cnt and cnt10 have lock defined in their BTF,
205 	 * intentionally one cnt takes lock while one does not
206 	 * as a test for the spinlock support in BPF_MAP_TYPE_SK_STORAGE.
207 	 */
208 	pkt_out_cnt->cnt += 1;
209 	bpf_spin_lock(&pkt_out_cnt10->lock);
210 	pkt_out_cnt10->cnt += 10;
211 	bpf_spin_unlock(&pkt_out_cnt10->lock);
212 
213 	return CG_OK;
214 }
215 
216 SEC("cgroup_skb/ingress")
217 int ingress_read_sock_fields(struct __sk_buff *skb)
218 {
219 	struct bpf_tcp_sock *tp;
220 	__u32 linum, linum_idx;
221 	struct bpf_sock *sk;
222 
223 	linum_idx = INGRESS_LINUM_IDX;
224 
225 	sk = skb->sk;
226 	if (!sk)
227 		RET_LOG();
228 
229 	/* Not the testing ingress traffic to the server */
230 	if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
231 	    sk->src_port != bpf_ntohs(srv_sa6.sin6_port))
232 		return CG_OK;
233 
234 	/* Only interested in TCP_LISTEN */
235 	if (sk->state != 10)
236 		return CG_OK;
237 
238 	/* It must be a fullsock for cgroup_skb/ingress prog */
239 	sk = bpf_sk_fullsock(sk);
240 	if (!sk)
241 		RET_LOG();
242 
243 	tp = bpf_tcp_sock(sk);
244 	if (!tp)
245 		RET_LOG();
246 
247 	skcpy(&listen_sk, sk);
248 	tpcpy(&listen_tp, tp);
249 
250 	return CG_OK;
251 }
252 
253 char _license[] SEC("license") = "GPL";
254