// SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE #include #include #include #include #include #include "bpf_flow.skel.h" #define FLOW_CONTINUE_SADDR 0x7f00007f /* 127.0.0.127 */ #ifndef IP_MF #define IP_MF 0x2000 #endif #define CHECK_FLOW_KEYS(desc, got, expected) \ _CHECK(memcmp(&got, &expected, sizeof(got)) != 0, \ desc, \ topts.duration, \ "nhoff=%u/%u " \ "thoff=%u/%u " \ "addr_proto=0x%x/0x%x " \ "is_frag=%u/%u " \ "is_first_frag=%u/%u " \ "is_encap=%u/%u " \ "ip_proto=0x%x/0x%x " \ "n_proto=0x%x/0x%x " \ "flow_label=0x%x/0x%x " \ "sport=%u/%u " \ "dport=%u/%u\n", \ got.nhoff, expected.nhoff, \ got.thoff, expected.thoff, \ got.addr_proto, expected.addr_proto, \ got.is_frag, expected.is_frag, \ got.is_first_frag, expected.is_first_frag, \ got.is_encap, expected.is_encap, \ got.ip_proto, expected.ip_proto, \ got.n_proto, expected.n_proto, \ got.flow_label, expected.flow_label, \ got.sport, expected.sport, \ got.dport, expected.dport) struct ipv4_pkt { struct ethhdr eth; struct iphdr iph; struct tcphdr tcp; } __packed; struct ipip_pkt { struct ethhdr eth; struct iphdr iph; struct iphdr iph_inner; struct tcphdr tcp; } __packed; struct svlan_ipv4_pkt { struct ethhdr eth; __u16 vlan_tci; __u16 vlan_proto; struct iphdr iph; struct tcphdr tcp; } __packed; struct ipv6_pkt { struct ethhdr eth; struct ipv6hdr iph; struct tcphdr tcp; } __packed; struct ipv6_frag_pkt { struct ethhdr eth; struct ipv6hdr iph; struct frag_hdr { __u8 nexthdr; __u8 reserved; __be16 frag_off; __be32 identification; } ipf; struct tcphdr tcp; } __packed; struct dvlan_ipv6_pkt { struct ethhdr eth; __u16 vlan_tci; __u16 vlan_proto; __u16 vlan_tci2; __u16 vlan_proto2; struct ipv6hdr iph; struct tcphdr tcp; } __packed; struct test { const char *name; union { struct ipv4_pkt ipv4; struct svlan_ipv4_pkt svlan_ipv4; struct ipip_pkt ipip; struct ipv6_pkt ipv6; struct ipv6_frag_pkt ipv6_frag; struct dvlan_ipv6_pkt dvlan_ipv6; } pkt; struct bpf_flow_keys keys; __u32 flags; __u32 retval; }; #define VLAN_HLEN 4 static __u32 duration; struct test tests[] = { { .name = "ipv4", .pkt.ipv4 = { .eth.h_proto = __bpf_constant_htons(ETH_P_IP), .iph.ihl = 5, .iph.protocol = IPPROTO_TCP, .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), .tcp.doff = 5, .tcp.source = 80, .tcp.dest = 8080, }, .keys = { .nhoff = ETH_HLEN, .thoff = ETH_HLEN + sizeof(struct iphdr), .addr_proto = ETH_P_IP, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IP), .sport = 80, .dport = 8080, }, .retval = BPF_OK, }, { .name = "ipv6", .pkt.ipv6 = { .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), .iph.nexthdr = IPPROTO_TCP, .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), .tcp.doff = 5, .tcp.source = 80, .tcp.dest = 8080, }, .keys = { .nhoff = ETH_HLEN, .thoff = ETH_HLEN + sizeof(struct ipv6hdr), .addr_proto = ETH_P_IPV6, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IPV6), .sport = 80, .dport = 8080, }, .retval = BPF_OK, }, { .name = "802.1q-ipv4", .pkt.svlan_ipv4 = { .eth.h_proto = __bpf_constant_htons(ETH_P_8021Q), .vlan_proto = __bpf_constant_htons(ETH_P_IP), .iph.ihl = 5, .iph.protocol = IPPROTO_TCP, .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), .tcp.doff = 5, .tcp.source = 80, .tcp.dest = 8080, }, .keys = { .nhoff = ETH_HLEN + VLAN_HLEN, .thoff = ETH_HLEN + VLAN_HLEN + sizeof(struct iphdr), .addr_proto = ETH_P_IP, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IP), .sport = 80, .dport = 8080, }, .retval = BPF_OK, }, { .name = "802.1ad-ipv6", .pkt.dvlan_ipv6 = { .eth.h_proto = __bpf_constant_htons(ETH_P_8021AD), .vlan_proto = __bpf_constant_htons(ETH_P_8021Q), .vlan_proto2 = __bpf_constant_htons(ETH_P_IPV6), .iph.nexthdr = IPPROTO_TCP, .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), .tcp.doff = 5, .tcp.source = 80, .tcp.dest = 8080, }, .keys = { .nhoff = ETH_HLEN + VLAN_HLEN * 2, .thoff = ETH_HLEN + VLAN_HLEN * 2 + sizeof(struct ipv6hdr), .addr_proto = ETH_P_IPV6, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IPV6), .sport = 80, .dport = 8080, }, .retval = BPF_OK, }, { .name = "ipv4-frag", .pkt.ipv4 = { .eth.h_proto = __bpf_constant_htons(ETH_P_IP), .iph.ihl = 5, .iph.protocol = IPPROTO_TCP, .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), .iph.frag_off = __bpf_constant_htons(IP_MF), .tcp.doff = 5, .tcp.source = 80, .tcp.dest = 8080, }, .keys = { .flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG, .nhoff = ETH_HLEN, .thoff = ETH_HLEN + sizeof(struct iphdr), .addr_proto = ETH_P_IP, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IP), .is_frag = true, .is_first_frag = true, .sport = 80, .dport = 8080, }, .flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG, .retval = BPF_OK, }, { .name = "ipv4-no-frag", .pkt.ipv4 = { .eth.h_proto = __bpf_constant_htons(ETH_P_IP), .iph.ihl = 5, .iph.protocol = IPPROTO_TCP, .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), .iph.frag_off = __bpf_constant_htons(IP_MF), .tcp.doff = 5, .tcp.source = 80, .tcp.dest = 8080, }, .keys = { .nhoff = ETH_HLEN, .thoff = ETH_HLEN + sizeof(struct iphdr), .addr_proto = ETH_P_IP, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IP), .is_frag = true, .is_first_frag = true, }, .retval = BPF_OK, }, { .name = "ipv6-frag", .pkt.ipv6_frag = { .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), .iph.nexthdr = IPPROTO_FRAGMENT, .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), .ipf.nexthdr = IPPROTO_TCP, .tcp.doff = 5, .tcp.source = 80, .tcp.dest = 8080, }, .keys = { .flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG, .nhoff = ETH_HLEN, .thoff = ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct frag_hdr), .addr_proto = ETH_P_IPV6, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IPV6), .is_frag = true, .is_first_frag = true, .sport = 80, .dport = 8080, }, .flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG, .retval = BPF_OK, }, { .name = "ipv6-no-frag", .pkt.ipv6_frag = { .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), .iph.nexthdr = IPPROTO_FRAGMENT, .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), .ipf.nexthdr = IPPROTO_TCP, .tcp.doff = 5, .tcp.source = 80, .tcp.dest = 8080, }, .keys = { .nhoff = ETH_HLEN, .thoff = ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct frag_hdr), .addr_proto = ETH_P_IPV6, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IPV6), .is_frag = true, .is_first_frag = true, }, .retval = BPF_OK, }, { .name = "ipv6-flow-label", .pkt.ipv6 = { .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), .iph.nexthdr = IPPROTO_TCP, .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), .iph.flow_lbl = { 0xb, 0xee, 0xef }, .tcp.doff = 5, .tcp.source = 80, .tcp.dest = 8080, }, .keys = { .nhoff = ETH_HLEN, .thoff = ETH_HLEN + sizeof(struct ipv6hdr), .addr_proto = ETH_P_IPV6, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IPV6), .sport = 80, .dport = 8080, .flow_label = __bpf_constant_htonl(0xbeeef), }, .retval = BPF_OK, }, { .name = "ipv6-no-flow-label", .pkt.ipv6 = { .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), .iph.nexthdr = IPPROTO_TCP, .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), .iph.flow_lbl = { 0xb, 0xee, 0xef }, .tcp.doff = 5, .tcp.source = 80, .tcp.dest = 8080, }, .keys = { .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL, .nhoff = ETH_HLEN, .thoff = ETH_HLEN + sizeof(struct ipv6hdr), .addr_proto = ETH_P_IPV6, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IPV6), .flow_label = __bpf_constant_htonl(0xbeeef), }, .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL, .retval = BPF_OK, }, { .name = "ipv6-empty-flow-label", .pkt.ipv6 = { .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), .iph.nexthdr = IPPROTO_TCP, .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), .iph.flow_lbl = { 0x00, 0x00, 0x00 }, .tcp.doff = 5, .tcp.source = 80, .tcp.dest = 8080, }, .keys = { .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL, .nhoff = ETH_HLEN, .thoff = ETH_HLEN + sizeof(struct ipv6hdr), .addr_proto = ETH_P_IPV6, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IPV6), .sport = 80, .dport = 8080, }, .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL, .retval = BPF_OK, }, { .name = "ipip-encap", .pkt.ipip = { .eth.h_proto = __bpf_constant_htons(ETH_P_IP), .iph.ihl = 5, .iph.protocol = IPPROTO_IPIP, .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), .iph_inner.ihl = 5, .iph_inner.protocol = IPPROTO_TCP, .iph_inner.tot_len = __bpf_constant_htons(MAGIC_BYTES) - sizeof(struct iphdr), .tcp.doff = 5, .tcp.source = 80, .tcp.dest = 8080, }, .keys = { .nhoff = ETH_HLEN, .thoff = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct iphdr), .addr_proto = ETH_P_IP, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IP), .is_encap = true, .sport = 80, .dport = 8080, }, .retval = BPF_OK, }, { .name = "ipip-no-encap", .pkt.ipip = { .eth.h_proto = __bpf_constant_htons(ETH_P_IP), .iph.ihl = 5, .iph.protocol = IPPROTO_IPIP, .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), .iph_inner.ihl = 5, .iph_inner.protocol = IPPROTO_TCP, .iph_inner.tot_len = __bpf_constant_htons(MAGIC_BYTES) - sizeof(struct iphdr), .tcp.doff = 5, .tcp.source = 80, .tcp.dest = 8080, }, .keys = { .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP, .nhoff = ETH_HLEN, .thoff = ETH_HLEN + sizeof(struct iphdr), .addr_proto = ETH_P_IP, .ip_proto = IPPROTO_IPIP, .n_proto = __bpf_constant_htons(ETH_P_IP), .is_encap = true, }, .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP, .retval = BPF_OK, }, { .name = "ipip-encap-dissector-continue", .pkt.ipip = { .eth.h_proto = __bpf_constant_htons(ETH_P_IP), .iph.ihl = 5, .iph.protocol = IPPROTO_IPIP, .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), .iph.saddr = __bpf_constant_htonl(FLOW_CONTINUE_SADDR), .iph_inner.ihl = 5, .iph_inner.protocol = IPPROTO_TCP, .iph_inner.tot_len = __bpf_constant_htons(MAGIC_BYTES) - sizeof(struct iphdr), .tcp.doff = 5, .tcp.source = 99, .tcp.dest = 9090, }, .retval = BPF_FLOW_DISSECTOR_CONTINUE, }, }; static int create_tap(const char *ifname) { struct ifreq ifr = { .ifr_flags = IFF_TAP | IFF_NO_PI | IFF_NAPI | IFF_NAPI_FRAGS, }; int fd, ret; strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); fd = open("/dev/net/tun", O_RDWR); if (fd < 0) return -1; ret = ioctl(fd, TUNSETIFF, &ifr); if (ret) return -1; return fd; } static int tx_tap(int fd, void *pkt, size_t len) { struct iovec iov[] = { { .iov_len = len, .iov_base = pkt, }, }; return writev(fd, iov, ARRAY_SIZE(iov)); } static int ifup(const char *ifname) { struct ifreq ifr = {}; int sk, ret; strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); sk = socket(PF_INET, SOCK_DGRAM, 0); if (sk < 0) return -1; ret = ioctl(sk, SIOCGIFFLAGS, &ifr); if (ret) { close(sk); return -1; } ifr.ifr_flags |= IFF_UP; ret = ioctl(sk, SIOCSIFFLAGS, &ifr); if (ret) { close(sk); return -1; } close(sk); return 0; } static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array) { int i, err, map_fd, prog_fd; struct bpf_program *prog; char prog_name[32]; map_fd = bpf_map__fd(prog_array); if (map_fd < 0) return -1; for (i = 0; i < bpf_map__max_entries(prog_array); i++) { snprintf(prog_name, sizeof(prog_name), "flow_dissector_%d", i); prog = bpf_object__find_program_by_name(obj, prog_name); if (!prog) return -1; prog_fd = bpf_program__fd(prog); if (prog_fd < 0) return -1; err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); if (err) return -1; } return 0; } static void run_tests_skb_less(int tap_fd, struct bpf_map *keys) { int i, err, keys_fd; keys_fd = bpf_map__fd(keys); if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd)) return; for (i = 0; i < ARRAY_SIZE(tests); i++) { /* Keep in sync with 'flags' from eth_get_headlen. */ __u32 eth_get_headlen_flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG; LIBBPF_OPTS(bpf_test_run_opts, topts); struct bpf_flow_keys flow_keys = {}; __u32 key = (__u32)(tests[i].keys.sport) << 16 | tests[i].keys.dport; /* For skb-less case we can't pass input flags; run * only the tests that have a matching set of flags. */ if (tests[i].flags != eth_get_headlen_flags) continue; err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt)); CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno); /* check the stored flow_keys only if BPF_OK expected */ if (tests[i].retval != BPF_OK) continue; err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys); ASSERT_OK(err, "bpf_map_lookup_elem"); CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys); err = bpf_map_delete_elem(keys_fd, &key); ASSERT_OK(err, "bpf_map_delete_elem"); } } static void test_skb_less_prog_attach(struct bpf_flow *skel, int tap_fd) { int err, prog_fd; prog_fd = bpf_program__fd(skel->progs._dissect); if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd)) return; err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0); if (CHECK(err, "bpf_prog_attach", "err %d errno %d\n", err, errno)) return; run_tests_skb_less(tap_fd, skel->maps.last_dissection); err = bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR); CHECK(err, "bpf_prog_detach2", "err %d errno %d\n", err, errno); } static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd) { struct bpf_link *link; int err, net_fd; net_fd = open("/proc/self/ns/net", O_RDONLY); if (CHECK(net_fd < 0, "open(/proc/self/ns/net)", "err %d\n", errno)) return; link = bpf_program__attach_netns(skel->progs._dissect, net_fd); if (!ASSERT_OK_PTR(link, "attach_netns")) goto out_close; run_tests_skb_less(tap_fd, skel->maps.last_dissection); err = bpf_link__destroy(link); CHECK(err, "bpf_link__destroy", "err %d\n", err); out_close: close(net_fd); } void test_flow_dissector(void) { int i, err, prog_fd, keys_fd = -1, tap_fd; struct bpf_flow *skel; skel = bpf_flow__open_and_load(); if (CHECK(!skel, "skel", "failed to open/load skeleton\n")) return; prog_fd = bpf_program__fd(skel->progs._dissect); if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd)) goto out_destroy_skel; keys_fd = bpf_map__fd(skel->maps.last_dissection); if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd)) goto out_destroy_skel; err = init_prog_array(skel->obj, skel->maps.jmp_table); if (CHECK(err, "init_prog_array", "err %d\n", err)) goto out_destroy_skel; for (i = 0; i < ARRAY_SIZE(tests); i++) { struct bpf_flow_keys flow_keys; LIBBPF_OPTS(bpf_test_run_opts, topts, .data_in = &tests[i].pkt, .data_size_in = sizeof(tests[i].pkt), .data_out = &flow_keys, ); static struct bpf_flow_keys ctx = {}; if (tests[i].flags) { topts.ctx_in = &ctx; topts.ctx_size_in = sizeof(ctx); ctx.flags = tests[i].flags; } err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "test_run"); ASSERT_EQ(topts.retval, tests[i].retval, "test_run retval"); /* check the resulting flow_keys only if BPF_OK returned */ if (topts.retval != BPF_OK) continue; ASSERT_EQ(topts.data_size_out, sizeof(flow_keys), "test_run data_size_out"); CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys); } /* Do the same tests but for skb-less flow dissector. * We use a known path in the net/tun driver that calls * eth_get_headlen and we manually export bpf_flow_keys * via BPF map in this case. */ tap_fd = create_tap("tap0"); CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d\n", tap_fd, errno); err = ifup("tap0"); CHECK(err, "ifup", "err %d errno %d\n", err, errno); /* Test direct prog attachment */ test_skb_less_prog_attach(skel, tap_fd); /* Test indirect prog attachment via link */ test_skb_less_link_create(skel, tap_fd); close(tap_fd); out_destroy_skel: bpf_flow__destroy(skel); }