1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2019 Facebook */
3 
4 #include <netinet/in.h>
5 #include <arpa/inet.h>
6 #include <unistd.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10 
11 #include <bpf/bpf.h>
12 #include <bpf/libbpf.h>
13 #include <linux/compiler.h>
14 
15 #include "network_helpers.h"
16 #include "cgroup_helpers.h"
17 #include "test_progs.h"
18 #include "test_sock_fields.skel.h"
19 
20 enum bpf_linum_array_idx {
21 	EGRESS_LINUM_IDX,
22 	INGRESS_LINUM_IDX,
23 	__NR_BPF_LINUM_ARRAY_IDX,
24 };
25 
26 struct bpf_spinlock_cnt {
27 	struct bpf_spin_lock lock;
28 	__u32 cnt;
29 };
30 
31 #define PARENT_CGROUP	"/test-bpf-sock-fields"
32 #define CHILD_CGROUP	"/test-bpf-sock-fields/child"
33 #define DATA "Hello BPF!"
34 #define DATA_LEN sizeof(DATA)
35 
36 static struct sockaddr_in6 srv_sa6, cli_sa6;
37 static int sk_pkt_out_cnt10_fd;
38 static struct test_sock_fields *skel;
39 static int sk_pkt_out_cnt_fd;
40 static __u64 parent_cg_id;
41 static __u64 child_cg_id;
42 static int linum_map_fd;
43 static __u32 duration;
44 
45 static __u32 egress_linum_idx = EGRESS_LINUM_IDX;
46 static __u32 ingress_linum_idx = INGRESS_LINUM_IDX;
47 
48 static void print_sk(const struct bpf_sock *sk, const char *prefix)
49 {
50 	char src_ip4[24], dst_ip4[24];
51 	char src_ip6[64], dst_ip6[64];
52 
53 	inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4));
54 	inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6));
55 	inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4));
56 	inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6));
57 
58 	printf("%s: state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u "
59 	       "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u "
60 	       "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n",
61 	       prefix,
62 	       sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol,
63 	       sk->mark, sk->priority,
64 	       sk->src_ip4, src_ip4,
65 	       sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3],
66 	       src_ip6, sk->src_port,
67 	       sk->dst_ip4, dst_ip4,
68 	       sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3],
69 	       dst_ip6, ntohs(sk->dst_port));
70 }
71 
72 static void print_tp(const struct bpf_tcp_sock *tp, const char *prefix)
73 {
74 	printf("%s: snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u "
75 	       "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u "
76 	       "rate_delivered:%u rate_interval_us:%u packets_out:%u "
77 	       "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u "
78 	       "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u "
79 	       "bytes_received:%llu bytes_acked:%llu\n",
80 	       prefix,
81 	       tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh,
82 	       tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache,
83 	       tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us,
84 	       tp->packets_out, tp->retrans_out, tp->total_retrans,
85 	       tp->segs_in, tp->data_segs_in, tp->segs_out,
86 	       tp->data_segs_out, tp->lost_out, tp->sacked_out,
87 	       tp->bytes_received, tp->bytes_acked);
88 }
89 
90 static void check_result(void)
91 {
92 	struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
93 	struct bpf_sock srv_sk, cli_sk, listen_sk;
94 	__u32 ingress_linum, egress_linum;
95 	int err;
96 
97 	err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
98 				  &egress_linum);
99 	CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
100 	      "err:%d errno:%d\n", err, errno);
101 
102 	err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
103 				  &ingress_linum);
104 	CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
105 	      "err:%d errno:%d\n", err, errno);
106 
107 	memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
108 	memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp));
109 	memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk));
110 	memcpy(&cli_tp, &skel->bss->cli_tp, sizeof(cli_tp));
111 	memcpy(&listen_sk, &skel->bss->listen_sk, sizeof(listen_sk));
112 	memcpy(&listen_tp, &skel->bss->listen_tp, sizeof(listen_tp));
113 
114 	print_sk(&listen_sk, "listen_sk");
115 	print_sk(&srv_sk, "srv_sk");
116 	print_sk(&cli_sk, "cli_sk");
117 	print_tp(&listen_tp, "listen_tp");
118 	print_tp(&srv_tp, "srv_tp");
119 	print_tp(&cli_tp, "cli_tp");
120 
121 	CHECK(listen_sk.state != 10 ||
122 	      listen_sk.family != AF_INET6 ||
123 	      listen_sk.protocol != IPPROTO_TCP ||
124 	      memcmp(listen_sk.src_ip6, &in6addr_loopback,
125 		     sizeof(listen_sk.src_ip6)) ||
126 	      listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] ||
127 	      listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] ||
128 	      listen_sk.src_port != ntohs(srv_sa6.sin6_port) ||
129 	      listen_sk.dst_port,
130 	      "listen_sk",
131 	      "Unexpected. Check listen_sk output. ingress_linum:%u\n",
132 	      ingress_linum);
133 
134 	CHECK(srv_sk.state == 10 ||
135 	      !srv_sk.state ||
136 	      srv_sk.family != AF_INET6 ||
137 	      srv_sk.protocol != IPPROTO_TCP ||
138 	      memcmp(srv_sk.src_ip6, &in6addr_loopback,
139 		     sizeof(srv_sk.src_ip6)) ||
140 	      memcmp(srv_sk.dst_ip6, &in6addr_loopback,
141 		     sizeof(srv_sk.dst_ip6)) ||
142 	      srv_sk.src_port != ntohs(srv_sa6.sin6_port) ||
143 	      srv_sk.dst_port != cli_sa6.sin6_port,
144 	      "srv_sk", "Unexpected. Check srv_sk output. egress_linum:%u\n",
145 	      egress_linum);
146 
147 	CHECK(!skel->bss->lsndtime, "srv_tp", "Unexpected lsndtime:0\n");
148 
149 	CHECK(cli_sk.state == 10 ||
150 	      !cli_sk.state ||
151 	      cli_sk.family != AF_INET6 ||
152 	      cli_sk.protocol != IPPROTO_TCP ||
153 	      memcmp(cli_sk.src_ip6, &in6addr_loopback,
154 		     sizeof(cli_sk.src_ip6)) ||
155 	      memcmp(cli_sk.dst_ip6, &in6addr_loopback,
156 		     sizeof(cli_sk.dst_ip6)) ||
157 	      cli_sk.src_port != ntohs(cli_sa6.sin6_port) ||
158 	      cli_sk.dst_port != srv_sa6.sin6_port,
159 	      "cli_sk", "Unexpected. Check cli_sk output. egress_linum:%u\n",
160 	      egress_linum);
161 
162 	CHECK(listen_tp.data_segs_out ||
163 	      listen_tp.data_segs_in ||
164 	      listen_tp.total_retrans ||
165 	      listen_tp.bytes_acked,
166 	      "listen_tp",
167 	      "Unexpected. Check listen_tp output. ingress_linum:%u\n",
168 	      ingress_linum);
169 
170 	CHECK(srv_tp.data_segs_out != 2 ||
171 	      srv_tp.data_segs_in ||
172 	      srv_tp.snd_cwnd != 10 ||
173 	      srv_tp.total_retrans ||
174 	      srv_tp.bytes_acked < 2 * DATA_LEN,
175 	      "srv_tp", "Unexpected. Check srv_tp output. egress_linum:%u\n",
176 	      egress_linum);
177 
178 	CHECK(cli_tp.data_segs_out ||
179 	      cli_tp.data_segs_in != 2 ||
180 	      cli_tp.snd_cwnd != 10 ||
181 	      cli_tp.total_retrans ||
182 	      cli_tp.bytes_received < 2 * DATA_LEN,
183 	      "cli_tp", "Unexpected. Check cli_tp output. egress_linum:%u\n",
184 	      egress_linum);
185 
186 	CHECK(skel->bss->parent_cg_id != parent_cg_id,
187 	      "parent_cg_id", "%zu != %zu\n",
188 	      (size_t)skel->bss->parent_cg_id, (size_t)parent_cg_id);
189 
190 	CHECK(skel->bss->child_cg_id != child_cg_id,
191 	      "child_cg_id", "%zu != %zu\n",
192 	       (size_t)skel->bss->child_cg_id, (size_t)child_cg_id);
193 }
194 
195 static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd)
196 {
197 	struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {};
198 	int err;
199 
200 	pkt_out_cnt.cnt = ~0;
201 	pkt_out_cnt10.cnt = ~0;
202 	err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt);
203 	if (!err)
204 		err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd,
205 					  &pkt_out_cnt10);
206 
207 	/* The bpf prog only counts for fullsock and
208 	 * passive connection did not become fullsock until 3WHS
209 	 * had been finished, so the bpf prog only counted two data
210 	 * packet out.
211 	 */
212 	CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 2 ||
213 	      pkt_out_cnt10.cnt < 0xeB9F + 20,
214 	      "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)",
215 	      "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n",
216 	      err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
217 
218 	pkt_out_cnt.cnt = ~0;
219 	pkt_out_cnt10.cnt = ~0;
220 	err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt);
221 	if (!err)
222 		err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd,
223 					  &pkt_out_cnt10);
224 	/* Active connection is fullsock from the beginning.
225 	 * 1 SYN and 1 ACK during 3WHS
226 	 * 2 Acks on data packet.
227 	 *
228 	 * The bpf_prog initialized it to 0xeB9F.
229 	 */
230 	CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 4 ||
231 	      pkt_out_cnt10.cnt < 0xeB9F + 40,
232 	      "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)",
233 	      "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n",
234 	      err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
235 }
236 
237 static int init_sk_storage(int sk_fd, __u32 pkt_out_cnt)
238 {
239 	struct bpf_spinlock_cnt scnt = {};
240 	int err;
241 
242 	scnt.cnt = pkt_out_cnt;
243 	err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt,
244 				  BPF_NOEXIST);
245 	if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)",
246 		  "err:%d errno:%d\n", err, errno))
247 		return err;
248 
249 	err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt,
250 				  BPF_NOEXIST);
251 	if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)",
252 		  "err:%d errno:%d\n", err, errno))
253 		return err;
254 
255 	return 0;
256 }
257 
258 static void test(void)
259 {
260 	int listen_fd = -1, cli_fd = -1, accept_fd = -1, err, i;
261 	socklen_t addrlen = sizeof(struct sockaddr_in6);
262 	char buf[DATA_LEN];
263 
264 	/* Prepare listen_fd */
265 	listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
266 	/* start_server() has logged the error details */
267 	if (CHECK_FAIL(listen_fd == -1))
268 		goto done;
269 
270 	err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
271 	if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
272 		  errno))
273 		goto done;
274 	memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
275 
276 	cli_fd = connect_to_fd(listen_fd, 0);
277 	if (CHECK_FAIL(cli_fd == -1))
278 		goto done;
279 
280 	err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen);
281 	if (CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d\n",
282 		  err, errno))
283 		goto done;
284 
285 	accept_fd = accept(listen_fd, NULL, NULL);
286 	if (CHECK(accept_fd == -1, "accept(listen_fd)",
287 		  "accept_fd:%d errno:%d\n",
288 		  accept_fd, errno))
289 		goto done;
290 
291 	if (init_sk_storage(accept_fd, 0xeB9F))
292 		goto done;
293 
294 	for (i = 0; i < 2; i++) {
295 		/* Send some data from accept_fd to cli_fd.
296 		 * MSG_EOR to stop kernel from coalescing two pkts.
297 		 */
298 		err = send(accept_fd, DATA, DATA_LEN, MSG_EOR);
299 		if (CHECK(err != DATA_LEN, "send(accept_fd)",
300 			  "err:%d errno:%d\n", err, errno))
301 			goto done;
302 
303 		err = recv(cli_fd, buf, DATA_LEN, 0);
304 		if (CHECK(err != DATA_LEN, "recv(cli_fd)", "err:%d errno:%d\n",
305 			  err, errno))
306 			goto done;
307 	}
308 
309 	shutdown(cli_fd, SHUT_WR);
310 	err = recv(accept_fd, buf, 1, 0);
311 	if (CHECK(err, "recv(accept_fd) for fin", "err:%d errno:%d\n",
312 		  err, errno))
313 		goto done;
314 	shutdown(accept_fd, SHUT_WR);
315 	err = recv(cli_fd, buf, 1, 0);
316 	if (CHECK(err, "recv(cli_fd) for fin", "err:%d errno:%d\n",
317 		  err, errno))
318 		goto done;
319 	check_sk_pkt_out_cnt(accept_fd, cli_fd);
320 	check_result();
321 
322 done:
323 	if (accept_fd != -1)
324 		close(accept_fd);
325 	if (cli_fd != -1)
326 		close(cli_fd);
327 	if (listen_fd != -1)
328 		close(listen_fd);
329 }
330 
331 void serial_test_sock_fields(void)
332 {
333 	struct bpf_link *egress_link = NULL, *ingress_link = NULL;
334 	int parent_cg_fd = -1, child_cg_fd = -1;
335 
336 	/* Create a cgroup, get fd, and join it */
337 	parent_cg_fd = test__join_cgroup(PARENT_CGROUP);
338 	if (CHECK_FAIL(parent_cg_fd < 0))
339 		return;
340 	parent_cg_id = get_cgroup_id(PARENT_CGROUP);
341 	if (CHECK_FAIL(!parent_cg_id))
342 		goto done;
343 
344 	child_cg_fd = test__join_cgroup(CHILD_CGROUP);
345 	if (CHECK_FAIL(child_cg_fd < 0))
346 		goto done;
347 	child_cg_id = get_cgroup_id(CHILD_CGROUP);
348 	if (CHECK_FAIL(!child_cg_id))
349 		goto done;
350 
351 	skel = test_sock_fields__open_and_load();
352 	if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n"))
353 		goto done;
354 
355 	egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields,
356 						 child_cg_fd);
357 	if (!ASSERT_OK_PTR(egress_link, "attach_cgroup(egress)"))
358 		goto done;
359 
360 	ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields,
361 						  child_cg_fd);
362 	if (!ASSERT_OK_PTR(ingress_link, "attach_cgroup(ingress)"))
363 		goto done;
364 
365 	linum_map_fd = bpf_map__fd(skel->maps.linum_map);
366 	sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt);
367 	sk_pkt_out_cnt10_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt10);
368 
369 	test();
370 
371 done:
372 	bpf_link__destroy(egress_link);
373 	bpf_link__destroy(ingress_link);
374 	test_sock_fields__destroy(skel);
375 	if (child_cg_fd >= 0)
376 		close(child_cg_fd);
377 	if (parent_cg_fd >= 0)
378 		close(parent_cg_fd);
379 }
380