1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2019 Facebook */
3
4 #define _GNU_SOURCE
5 #include <netinet/in.h>
6 #include <arpa/inet.h>
7 #include <unistd.h>
8 #include <sched.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <errno.h>
12
13 #include <bpf/bpf.h>
14 #include <bpf/libbpf.h>
15 #include <linux/compiler.h>
16
17 #include "network_helpers.h"
18 #include "cgroup_helpers.h"
19 #include "test_progs.h"
20 #include "test_sock_fields.skel.h"
21
22 enum bpf_linum_array_idx {
23 EGRESS_LINUM_IDX,
24 INGRESS_LINUM_IDX,
25 READ_SK_DST_PORT_LINUM_IDX,
26 __NR_BPF_LINUM_ARRAY_IDX,
27 };
28
29 struct bpf_spinlock_cnt {
30 struct bpf_spin_lock lock;
31 __u32 cnt;
32 };
33
34 #define PARENT_CGROUP "/test-bpf-sock-fields"
35 #define CHILD_CGROUP "/test-bpf-sock-fields/child"
36 #define DATA "Hello BPF!"
37 #define DATA_LEN sizeof(DATA)
38
39 static struct sockaddr_in6 srv_sa6, cli_sa6;
40 static int sk_pkt_out_cnt10_fd;
41 static struct test_sock_fields *skel;
42 static int sk_pkt_out_cnt_fd;
43 static __u64 parent_cg_id;
44 static __u64 child_cg_id;
45 static int linum_map_fd;
46 static __u32 duration;
47
create_netns(void)48 static bool create_netns(void)
49 {
50 if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
51 return false;
52
53 if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo"))
54 return false;
55
56 return true;
57 }
58
print_sk(const struct bpf_sock * sk,const char * prefix)59 static void print_sk(const struct bpf_sock *sk, const char *prefix)
60 {
61 char src_ip4[24], dst_ip4[24];
62 char src_ip6[64], dst_ip6[64];
63
64 inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4));
65 inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6));
66 inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4));
67 inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6));
68
69 printf("%s: state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u "
70 "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u "
71 "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n",
72 prefix,
73 sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol,
74 sk->mark, sk->priority,
75 sk->src_ip4, src_ip4,
76 sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3],
77 src_ip6, sk->src_port,
78 sk->dst_ip4, dst_ip4,
79 sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3],
80 dst_ip6, ntohs(sk->dst_port));
81 }
82
print_tp(const struct bpf_tcp_sock * tp,const char * prefix)83 static void print_tp(const struct bpf_tcp_sock *tp, const char *prefix)
84 {
85 printf("%s: snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u "
86 "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u "
87 "rate_delivered:%u rate_interval_us:%u packets_out:%u "
88 "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u "
89 "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u "
90 "bytes_received:%llu bytes_acked:%llu\n",
91 prefix,
92 tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh,
93 tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache,
94 tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us,
95 tp->packets_out, tp->retrans_out, tp->total_retrans,
96 tp->segs_in, tp->data_segs_in, tp->segs_out,
97 tp->data_segs_out, tp->lost_out, tp->sacked_out,
98 tp->bytes_received, tp->bytes_acked);
99 }
100
check_result(void)101 static void check_result(void)
102 {
103 struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
104 struct bpf_sock srv_sk, cli_sk, listen_sk;
105 __u32 idx, ingress_linum, egress_linum, linum;
106 int err;
107
108 idx = EGRESS_LINUM_IDX;
109 err = bpf_map_lookup_elem(linum_map_fd, &idx, &egress_linum);
110 CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
111 "err:%d errno:%d\n", err, errno);
112
113 idx = INGRESS_LINUM_IDX;
114 err = bpf_map_lookup_elem(linum_map_fd, &idx, &ingress_linum);
115 CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
116 "err:%d errno:%d\n", err, errno);
117
118 idx = READ_SK_DST_PORT_LINUM_IDX;
119 err = bpf_map_lookup_elem(linum_map_fd, &idx, &linum);
120 ASSERT_OK(err, "bpf_map_lookup_elem(linum_map_fd, READ_SK_DST_PORT_IDX)");
121 ASSERT_EQ(linum, 0, "failure in read_sk_dst_port on line");
122
123 memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
124 memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp));
125 memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk));
126 memcpy(&cli_tp, &skel->bss->cli_tp, sizeof(cli_tp));
127 memcpy(&listen_sk, &skel->bss->listen_sk, sizeof(listen_sk));
128 memcpy(&listen_tp, &skel->bss->listen_tp, sizeof(listen_tp));
129
130 print_sk(&listen_sk, "listen_sk");
131 print_sk(&srv_sk, "srv_sk");
132 print_sk(&cli_sk, "cli_sk");
133 print_tp(&listen_tp, "listen_tp");
134 print_tp(&srv_tp, "srv_tp");
135 print_tp(&cli_tp, "cli_tp");
136
137 CHECK(listen_sk.state != 10 ||
138 listen_sk.family != AF_INET6 ||
139 listen_sk.protocol != IPPROTO_TCP ||
140 memcmp(listen_sk.src_ip6, &in6addr_loopback,
141 sizeof(listen_sk.src_ip6)) ||
142 listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] ||
143 listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] ||
144 listen_sk.src_port != ntohs(srv_sa6.sin6_port) ||
145 listen_sk.dst_port,
146 "listen_sk",
147 "Unexpected. Check listen_sk output. ingress_linum:%u\n",
148 ingress_linum);
149
150 CHECK(srv_sk.state == 10 ||
151 !srv_sk.state ||
152 srv_sk.family != AF_INET6 ||
153 srv_sk.protocol != IPPROTO_TCP ||
154 memcmp(srv_sk.src_ip6, &in6addr_loopback,
155 sizeof(srv_sk.src_ip6)) ||
156 memcmp(srv_sk.dst_ip6, &in6addr_loopback,
157 sizeof(srv_sk.dst_ip6)) ||
158 srv_sk.src_port != ntohs(srv_sa6.sin6_port) ||
159 srv_sk.dst_port != cli_sa6.sin6_port,
160 "srv_sk", "Unexpected. Check srv_sk output. egress_linum:%u\n",
161 egress_linum);
162
163 CHECK(!skel->bss->lsndtime, "srv_tp", "Unexpected lsndtime:0\n");
164
165 CHECK(cli_sk.state == 10 ||
166 !cli_sk.state ||
167 cli_sk.family != AF_INET6 ||
168 cli_sk.protocol != IPPROTO_TCP ||
169 memcmp(cli_sk.src_ip6, &in6addr_loopback,
170 sizeof(cli_sk.src_ip6)) ||
171 memcmp(cli_sk.dst_ip6, &in6addr_loopback,
172 sizeof(cli_sk.dst_ip6)) ||
173 cli_sk.src_port != ntohs(cli_sa6.sin6_port) ||
174 cli_sk.dst_port != srv_sa6.sin6_port,
175 "cli_sk", "Unexpected. Check cli_sk output. egress_linum:%u\n",
176 egress_linum);
177
178 CHECK(listen_tp.data_segs_out ||
179 listen_tp.data_segs_in ||
180 listen_tp.total_retrans ||
181 listen_tp.bytes_acked,
182 "listen_tp",
183 "Unexpected. Check listen_tp output. ingress_linum:%u\n",
184 ingress_linum);
185
186 CHECK(srv_tp.data_segs_out != 2 ||
187 srv_tp.data_segs_in ||
188 srv_tp.snd_cwnd != 10 ||
189 srv_tp.total_retrans ||
190 srv_tp.bytes_acked < 2 * DATA_LEN,
191 "srv_tp", "Unexpected. Check srv_tp output. egress_linum:%u\n",
192 egress_linum);
193
194 CHECK(cli_tp.data_segs_out ||
195 cli_tp.data_segs_in != 2 ||
196 cli_tp.snd_cwnd != 10 ||
197 cli_tp.total_retrans ||
198 cli_tp.bytes_received < 2 * DATA_LEN,
199 "cli_tp", "Unexpected. Check cli_tp output. egress_linum:%u\n",
200 egress_linum);
201
202 CHECK(skel->bss->parent_cg_id != parent_cg_id,
203 "parent_cg_id", "%zu != %zu\n",
204 (size_t)skel->bss->parent_cg_id, (size_t)parent_cg_id);
205
206 CHECK(skel->bss->child_cg_id != child_cg_id,
207 "child_cg_id", "%zu != %zu\n",
208 (size_t)skel->bss->child_cg_id, (size_t)child_cg_id);
209 }
210
check_sk_pkt_out_cnt(int accept_fd,int cli_fd)211 static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd)
212 {
213 struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {};
214 int err;
215
216 pkt_out_cnt.cnt = ~0;
217 pkt_out_cnt10.cnt = ~0;
218 err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt);
219 if (!err)
220 err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd,
221 &pkt_out_cnt10);
222
223 /* The bpf prog only counts for fullsock and
224 * passive connection did not become fullsock until 3WHS
225 * had been finished, so the bpf prog only counted two data
226 * packet out.
227 */
228 CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 2 ||
229 pkt_out_cnt10.cnt < 0xeB9F + 20,
230 "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)",
231 "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n",
232 err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
233
234 pkt_out_cnt.cnt = ~0;
235 pkt_out_cnt10.cnt = ~0;
236 err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt);
237 if (!err)
238 err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd,
239 &pkt_out_cnt10);
240 /* Active connection is fullsock from the beginning.
241 * 1 SYN and 1 ACK during 3WHS
242 * 2 Acks on data packet.
243 *
244 * The bpf_prog initialized it to 0xeB9F.
245 */
246 CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 4 ||
247 pkt_out_cnt10.cnt < 0xeB9F + 40,
248 "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)",
249 "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n",
250 err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
251 }
252
init_sk_storage(int sk_fd,__u32 pkt_out_cnt)253 static int init_sk_storage(int sk_fd, __u32 pkt_out_cnt)
254 {
255 struct bpf_spinlock_cnt scnt = {};
256 int err;
257
258 scnt.cnt = pkt_out_cnt;
259 err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt,
260 BPF_NOEXIST);
261 if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)",
262 "err:%d errno:%d\n", err, errno))
263 return err;
264
265 err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt,
266 BPF_NOEXIST);
267 if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)",
268 "err:%d errno:%d\n", err, errno))
269 return err;
270
271 return 0;
272 }
273
test(void)274 static void test(void)
275 {
276 int listen_fd = -1, cli_fd = -1, accept_fd = -1, err, i;
277 socklen_t addrlen = sizeof(struct sockaddr_in6);
278 char buf[DATA_LEN];
279
280 /* Prepare listen_fd */
281 listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0xcafe, 0);
282 /* start_server() has logged the error details */
283 if (CHECK_FAIL(listen_fd == -1))
284 goto done;
285
286 err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
287 if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
288 errno))
289 goto done;
290 memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
291
292 cli_fd = connect_to_fd(listen_fd, 0);
293 if (CHECK_FAIL(cli_fd == -1))
294 goto done;
295
296 err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen);
297 if (CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d\n",
298 err, errno))
299 goto done;
300
301 accept_fd = accept(listen_fd, NULL, NULL);
302 if (CHECK(accept_fd == -1, "accept(listen_fd)",
303 "accept_fd:%d errno:%d\n",
304 accept_fd, errno))
305 goto done;
306
307 if (init_sk_storage(accept_fd, 0xeB9F))
308 goto done;
309
310 for (i = 0; i < 2; i++) {
311 /* Send some data from accept_fd to cli_fd.
312 * MSG_EOR to stop kernel from coalescing two pkts.
313 */
314 err = send(accept_fd, DATA, DATA_LEN, MSG_EOR);
315 if (CHECK(err != DATA_LEN, "send(accept_fd)",
316 "err:%d errno:%d\n", err, errno))
317 goto done;
318
319 err = recv(cli_fd, buf, DATA_LEN, 0);
320 if (CHECK(err != DATA_LEN, "recv(cli_fd)", "err:%d errno:%d\n",
321 err, errno))
322 goto done;
323 }
324
325 shutdown(cli_fd, SHUT_WR);
326 err = recv(accept_fd, buf, 1, 0);
327 if (CHECK(err, "recv(accept_fd) for fin", "err:%d errno:%d\n",
328 err, errno))
329 goto done;
330 shutdown(accept_fd, SHUT_WR);
331 err = recv(cli_fd, buf, 1, 0);
332 if (CHECK(err, "recv(cli_fd) for fin", "err:%d errno:%d\n",
333 err, errno))
334 goto done;
335 check_sk_pkt_out_cnt(accept_fd, cli_fd);
336 check_result();
337
338 done:
339 if (accept_fd != -1)
340 close(accept_fd);
341 if (cli_fd != -1)
342 close(cli_fd);
343 if (listen_fd != -1)
344 close(listen_fd);
345 }
346
serial_test_sock_fields(void)347 void serial_test_sock_fields(void)
348 {
349 int parent_cg_fd = -1, child_cg_fd = -1;
350 struct bpf_link *link;
351
352 /* Use a dedicated netns to have a fixed listen port */
353 if (!create_netns())
354 return;
355
356 /* Create a cgroup, get fd, and join it */
357 parent_cg_fd = test__join_cgroup(PARENT_CGROUP);
358 if (CHECK_FAIL(parent_cg_fd < 0))
359 return;
360 parent_cg_id = get_cgroup_id(PARENT_CGROUP);
361 if (CHECK_FAIL(!parent_cg_id))
362 goto done;
363
364 child_cg_fd = test__join_cgroup(CHILD_CGROUP);
365 if (CHECK_FAIL(child_cg_fd < 0))
366 goto done;
367 child_cg_id = get_cgroup_id(CHILD_CGROUP);
368 if (CHECK_FAIL(!child_cg_id))
369 goto done;
370
371 skel = test_sock_fields__open_and_load();
372 if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n"))
373 goto done;
374
375 link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, child_cg_fd);
376 if (!ASSERT_OK_PTR(link, "attach_cgroup(egress_read_sock_fields)"))
377 goto done;
378 skel->links.egress_read_sock_fields = link;
379
380 link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, child_cg_fd);
381 if (!ASSERT_OK_PTR(link, "attach_cgroup(ingress_read_sock_fields)"))
382 goto done;
383 skel->links.ingress_read_sock_fields = link;
384
385 link = bpf_program__attach_cgroup(skel->progs.read_sk_dst_port, child_cg_fd);
386 if (!ASSERT_OK_PTR(link, "attach_cgroup(read_sk_dst_port"))
387 goto done;
388 skel->links.read_sk_dst_port = link;
389
390 linum_map_fd = bpf_map__fd(skel->maps.linum_map);
391 sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt);
392 sk_pkt_out_cnt10_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt10);
393
394 test();
395
396 done:
397 test_sock_fields__destroy(skel);
398 if (child_cg_fd >= 0)
399 close(child_cg_fd);
400 if (parent_cg_fd >= 0)
401 close(parent_cg_fd);
402 }
403