1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2019 Facebook */ 3 4 #define _GNU_SOURCE 5 #include <netinet/in.h> 6 #include <arpa/inet.h> 7 #include <unistd.h> 8 #include <sched.h> 9 #include <stdlib.h> 10 #include <string.h> 11 #include <errno.h> 12 13 #include <bpf/bpf.h> 14 #include <bpf/libbpf.h> 15 #include <linux/compiler.h> 16 17 #include "network_helpers.h" 18 #include "cgroup_helpers.h" 19 #include "test_progs.h" 20 #include "test_sock_fields.skel.h" 21 22 enum bpf_linum_array_idx { 23 EGRESS_LINUM_IDX, 24 INGRESS_LINUM_IDX, 25 READ_SK_DST_PORT_LINUM_IDX, 26 __NR_BPF_LINUM_ARRAY_IDX, 27 }; 28 29 struct bpf_spinlock_cnt { 30 struct bpf_spin_lock lock; 31 __u32 cnt; 32 }; 33 34 #define PARENT_CGROUP "/test-bpf-sock-fields" 35 #define CHILD_CGROUP "/test-bpf-sock-fields/child" 36 #define DATA "Hello BPF!" 37 #define DATA_LEN sizeof(DATA) 38 39 static struct sockaddr_in6 srv_sa6, cli_sa6; 40 static int sk_pkt_out_cnt10_fd; 41 static struct test_sock_fields *skel; 42 static int sk_pkt_out_cnt_fd; 43 static __u64 parent_cg_id; 44 static __u64 child_cg_id; 45 static int linum_map_fd; 46 static __u32 duration; 47 48 static bool create_netns(void) 49 { 50 if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns")) 51 return false; 52 53 if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo")) 54 return false; 55 56 return true; 57 } 58 59 static void print_sk(const struct bpf_sock *sk, const char *prefix) 60 { 61 char src_ip4[24], dst_ip4[24]; 62 char src_ip6[64], dst_ip6[64]; 63 64 inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4)); 65 inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6)); 66 inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4)); 67 inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6)); 68 69 printf("%s: state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u " 70 "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u " 71 "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n", 72 prefix, 73 sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol, 74 sk->mark, sk->priority, 75 sk->src_ip4, src_ip4, 76 sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3], 77 src_ip6, sk->src_port, 78 sk->dst_ip4, dst_ip4, 79 sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3], 80 dst_ip6, ntohs(sk->dst_port)); 81 } 82 83 static void print_tp(const struct bpf_tcp_sock *tp, const char *prefix) 84 { 85 printf("%s: snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u " 86 "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u " 87 "rate_delivered:%u rate_interval_us:%u packets_out:%u " 88 "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u " 89 "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u " 90 "bytes_received:%llu bytes_acked:%llu\n", 91 prefix, 92 tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh, 93 tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache, 94 tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us, 95 tp->packets_out, tp->retrans_out, tp->total_retrans, 96 tp->segs_in, tp->data_segs_in, tp->segs_out, 97 tp->data_segs_out, tp->lost_out, tp->sacked_out, 98 tp->bytes_received, tp->bytes_acked); 99 } 100 101 static void check_result(void) 102 { 103 struct bpf_tcp_sock srv_tp, cli_tp, listen_tp; 104 struct bpf_sock srv_sk, cli_sk, listen_sk; 105 __u32 idx, ingress_linum, egress_linum, linum; 106 int err; 107 108 idx = EGRESS_LINUM_IDX; 109 err = bpf_map_lookup_elem(linum_map_fd, &idx, &egress_linum); 110 CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)", 111 "err:%d errno:%d\n", err, errno); 112 113 idx = INGRESS_LINUM_IDX; 114 err = bpf_map_lookup_elem(linum_map_fd, &idx, &ingress_linum); 115 CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)", 116 "err:%d errno:%d\n", err, errno); 117 118 idx = READ_SK_DST_PORT_LINUM_IDX; 119 err = bpf_map_lookup_elem(linum_map_fd, &idx, &linum); 120 ASSERT_OK(err, "bpf_map_lookup_elem(linum_map_fd, READ_SK_DST_PORT_IDX)"); 121 ASSERT_EQ(linum, 0, "failure in read_sk_dst_port on line"); 122 123 memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk)); 124 memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp)); 125 memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk)); 126 memcpy(&cli_tp, &skel->bss->cli_tp, sizeof(cli_tp)); 127 memcpy(&listen_sk, &skel->bss->listen_sk, sizeof(listen_sk)); 128 memcpy(&listen_tp, &skel->bss->listen_tp, sizeof(listen_tp)); 129 130 print_sk(&listen_sk, "listen_sk"); 131 print_sk(&srv_sk, "srv_sk"); 132 print_sk(&cli_sk, "cli_sk"); 133 print_tp(&listen_tp, "listen_tp"); 134 print_tp(&srv_tp, "srv_tp"); 135 print_tp(&cli_tp, "cli_tp"); 136 137 CHECK(listen_sk.state != 10 || 138 listen_sk.family != AF_INET6 || 139 listen_sk.protocol != IPPROTO_TCP || 140 memcmp(listen_sk.src_ip6, &in6addr_loopback, 141 sizeof(listen_sk.src_ip6)) || 142 listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] || 143 listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] || 144 listen_sk.src_port != ntohs(srv_sa6.sin6_port) || 145 listen_sk.dst_port, 146 "listen_sk", 147 "Unexpected. Check listen_sk output. ingress_linum:%u\n", 148 ingress_linum); 149 150 CHECK(srv_sk.state == 10 || 151 !srv_sk.state || 152 srv_sk.family != AF_INET6 || 153 srv_sk.protocol != IPPROTO_TCP || 154 memcmp(srv_sk.src_ip6, &in6addr_loopback, 155 sizeof(srv_sk.src_ip6)) || 156 memcmp(srv_sk.dst_ip6, &in6addr_loopback, 157 sizeof(srv_sk.dst_ip6)) || 158 srv_sk.src_port != ntohs(srv_sa6.sin6_port) || 159 srv_sk.dst_port != cli_sa6.sin6_port, 160 "srv_sk", "Unexpected. Check srv_sk output. egress_linum:%u\n", 161 egress_linum); 162 163 CHECK(!skel->bss->lsndtime, "srv_tp", "Unexpected lsndtime:0\n"); 164 165 CHECK(cli_sk.state == 10 || 166 !cli_sk.state || 167 cli_sk.family != AF_INET6 || 168 cli_sk.protocol != IPPROTO_TCP || 169 memcmp(cli_sk.src_ip6, &in6addr_loopback, 170 sizeof(cli_sk.src_ip6)) || 171 memcmp(cli_sk.dst_ip6, &in6addr_loopback, 172 sizeof(cli_sk.dst_ip6)) || 173 cli_sk.src_port != ntohs(cli_sa6.sin6_port) || 174 cli_sk.dst_port != srv_sa6.sin6_port, 175 "cli_sk", "Unexpected. Check cli_sk output. egress_linum:%u\n", 176 egress_linum); 177 178 CHECK(listen_tp.data_segs_out || 179 listen_tp.data_segs_in || 180 listen_tp.total_retrans || 181 listen_tp.bytes_acked, 182 "listen_tp", 183 "Unexpected. Check listen_tp output. ingress_linum:%u\n", 184 ingress_linum); 185 186 CHECK(srv_tp.data_segs_out != 2 || 187 srv_tp.data_segs_in || 188 srv_tp.snd_cwnd != 10 || 189 srv_tp.total_retrans || 190 srv_tp.bytes_acked < 2 * DATA_LEN, 191 "srv_tp", "Unexpected. Check srv_tp output. egress_linum:%u\n", 192 egress_linum); 193 194 CHECK(cli_tp.data_segs_out || 195 cli_tp.data_segs_in != 2 || 196 cli_tp.snd_cwnd != 10 || 197 cli_tp.total_retrans || 198 cli_tp.bytes_received < 2 * DATA_LEN, 199 "cli_tp", "Unexpected. Check cli_tp output. egress_linum:%u\n", 200 egress_linum); 201 202 CHECK(skel->bss->parent_cg_id != parent_cg_id, 203 "parent_cg_id", "%zu != %zu\n", 204 (size_t)skel->bss->parent_cg_id, (size_t)parent_cg_id); 205 206 CHECK(skel->bss->child_cg_id != child_cg_id, 207 "child_cg_id", "%zu != %zu\n", 208 (size_t)skel->bss->child_cg_id, (size_t)child_cg_id); 209 } 210 211 static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd) 212 { 213 struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {}; 214 int err; 215 216 pkt_out_cnt.cnt = ~0; 217 pkt_out_cnt10.cnt = ~0; 218 err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt); 219 if (!err) 220 err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd, 221 &pkt_out_cnt10); 222 223 /* The bpf prog only counts for fullsock and 224 * passive connection did not become fullsock until 3WHS 225 * had been finished, so the bpf prog only counted two data 226 * packet out. 227 */ 228 CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 2 || 229 pkt_out_cnt10.cnt < 0xeB9F + 20, 230 "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)", 231 "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n", 232 err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); 233 234 pkt_out_cnt.cnt = ~0; 235 pkt_out_cnt10.cnt = ~0; 236 err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt); 237 if (!err) 238 err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd, 239 &pkt_out_cnt10); 240 /* Active connection is fullsock from the beginning. 241 * 1 SYN and 1 ACK during 3WHS 242 * 2 Acks on data packet. 243 * 244 * The bpf_prog initialized it to 0xeB9F. 245 */ 246 CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 4 || 247 pkt_out_cnt10.cnt < 0xeB9F + 40, 248 "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)", 249 "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n", 250 err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); 251 } 252 253 static int init_sk_storage(int sk_fd, __u32 pkt_out_cnt) 254 { 255 struct bpf_spinlock_cnt scnt = {}; 256 int err; 257 258 scnt.cnt = pkt_out_cnt; 259 err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt, 260 BPF_NOEXIST); 261 if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)", 262 "err:%d errno:%d\n", err, errno)) 263 return err; 264 265 err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt, 266 BPF_NOEXIST); 267 if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)", 268 "err:%d errno:%d\n", err, errno)) 269 return err; 270 271 return 0; 272 } 273 274 static void test(void) 275 { 276 int listen_fd = -1, cli_fd = -1, accept_fd = -1, err, i; 277 socklen_t addrlen = sizeof(struct sockaddr_in6); 278 char buf[DATA_LEN]; 279 280 /* Prepare listen_fd */ 281 listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0xcafe, 0); 282 /* start_server() has logged the error details */ 283 if (CHECK_FAIL(listen_fd == -1)) 284 goto done; 285 286 err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen); 287 if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err, 288 errno)) 289 goto done; 290 memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6)); 291 292 cli_fd = connect_to_fd(listen_fd, 0); 293 if (CHECK_FAIL(cli_fd == -1)) 294 goto done; 295 296 err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen); 297 if (CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d\n", 298 err, errno)) 299 goto done; 300 301 accept_fd = accept(listen_fd, NULL, NULL); 302 if (CHECK(accept_fd == -1, "accept(listen_fd)", 303 "accept_fd:%d errno:%d\n", 304 accept_fd, errno)) 305 goto done; 306 307 if (init_sk_storage(accept_fd, 0xeB9F)) 308 goto done; 309 310 for (i = 0; i < 2; i++) { 311 /* Send some data from accept_fd to cli_fd. 312 * MSG_EOR to stop kernel from coalescing two pkts. 313 */ 314 err = send(accept_fd, DATA, DATA_LEN, MSG_EOR); 315 if (CHECK(err != DATA_LEN, "send(accept_fd)", 316 "err:%d errno:%d\n", err, errno)) 317 goto done; 318 319 err = recv(cli_fd, buf, DATA_LEN, 0); 320 if (CHECK(err != DATA_LEN, "recv(cli_fd)", "err:%d errno:%d\n", 321 err, errno)) 322 goto done; 323 } 324 325 shutdown(cli_fd, SHUT_WR); 326 err = recv(accept_fd, buf, 1, 0); 327 if (CHECK(err, "recv(accept_fd) for fin", "err:%d errno:%d\n", 328 err, errno)) 329 goto done; 330 shutdown(accept_fd, SHUT_WR); 331 err = recv(cli_fd, buf, 1, 0); 332 if (CHECK(err, "recv(cli_fd) for fin", "err:%d errno:%d\n", 333 err, errno)) 334 goto done; 335 check_sk_pkt_out_cnt(accept_fd, cli_fd); 336 check_result(); 337 338 done: 339 if (accept_fd != -1) 340 close(accept_fd); 341 if (cli_fd != -1) 342 close(cli_fd); 343 if (listen_fd != -1) 344 close(listen_fd); 345 } 346 347 void serial_test_sock_fields(void) 348 { 349 int parent_cg_fd = -1, child_cg_fd = -1; 350 struct bpf_link *link; 351 352 /* Use a dedicated netns to have a fixed listen port */ 353 if (!create_netns()) 354 return; 355 356 /* Create a cgroup, get fd, and join it */ 357 parent_cg_fd = test__join_cgroup(PARENT_CGROUP); 358 if (CHECK_FAIL(parent_cg_fd < 0)) 359 return; 360 parent_cg_id = get_cgroup_id(PARENT_CGROUP); 361 if (CHECK_FAIL(!parent_cg_id)) 362 goto done; 363 364 child_cg_fd = test__join_cgroup(CHILD_CGROUP); 365 if (CHECK_FAIL(child_cg_fd < 0)) 366 goto done; 367 child_cg_id = get_cgroup_id(CHILD_CGROUP); 368 if (CHECK_FAIL(!child_cg_id)) 369 goto done; 370 371 skel = test_sock_fields__open_and_load(); 372 if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n")) 373 goto done; 374 375 link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, child_cg_fd); 376 if (!ASSERT_OK_PTR(link, "attach_cgroup(egress_read_sock_fields)")) 377 goto done; 378 skel->links.egress_read_sock_fields = link; 379 380 link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, child_cg_fd); 381 if (!ASSERT_OK_PTR(link, "attach_cgroup(ingress_read_sock_fields)")) 382 goto done; 383 skel->links.ingress_read_sock_fields = link; 384 385 link = bpf_program__attach_cgroup(skel->progs.read_sk_dst_port, child_cg_fd); 386 if (!ASSERT_OK_PTR(link, "attach_cgroup(read_sk_dst_port")) 387 goto done; 388 skel->links.read_sk_dst_port = link; 389 390 linum_map_fd = bpf_map__fd(skel->maps.linum_map); 391 sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt); 392 sk_pkt_out_cnt10_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt10); 393 394 test(); 395 396 done: 397 test_sock_fields__detach(skel); 398 test_sock_fields__destroy(skel); 399 if (child_cg_fd >= 0) 400 close(child_cg_fd); 401 if (parent_cg_fd >= 0) 402 close(parent_cg_fd); 403 } 404