1d20a1676SBjörn Töpel // SPDX-License-Identifier: GPL-2.0 2d20a1676SBjörn Töpel /* XSKMAP used for AF_XDP sockets 3d20a1676SBjörn Töpel * Copyright(c) 2018 Intel Corporation. 4d20a1676SBjörn Töpel */ 5d20a1676SBjörn Töpel 6d20a1676SBjörn Töpel #include <linux/bpf.h> 7b6459415SJakub Kicinski #include <linux/filter.h> 8d20a1676SBjörn Töpel #include <linux/capability.h> 9d20a1676SBjörn Töpel #include <net/xdp_sock.h> 10d20a1676SBjörn Töpel #include <linux/slab.h> 11d20a1676SBjörn Töpel #include <linux/sched.h> 12c317ab71SMenglong Dong #include <linux/btf_ids.h> 13d20a1676SBjörn Töpel 14d20a1676SBjörn Töpel #include "xsk.h" 15d20a1676SBjörn Töpel 16d20a1676SBjörn Töpel static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map, 17782347b6SToke Høiland-Jørgensen struct xdp_sock __rcu **map_entry) 18d20a1676SBjörn Töpel { 19d20a1676SBjörn Töpel struct xsk_map_node *node; 20d20a1676SBjörn Töpel 2128e1dcdeSRoman Gushchin node = bpf_map_kzalloc(&map->map, sizeof(*node), 2228e1dcdeSRoman Gushchin GFP_ATOMIC | __GFP_NOWARN); 23d20a1676SBjörn Töpel if (!node) 24d20a1676SBjörn Töpel return ERR_PTR(-ENOMEM); 25d20a1676SBjörn Töpel 26bb1b25caSZhu Yanjun bpf_map_inc(&map->map); 27*b4fd0d67SYafang Shao atomic_inc(&map->count); 28d20a1676SBjörn Töpel 29d20a1676SBjörn Töpel node->map = map; 30d20a1676SBjörn Töpel node->map_entry = map_entry; 31d20a1676SBjörn Töpel return node; 32d20a1676SBjörn Töpel } 33d20a1676SBjörn Töpel 34d20a1676SBjörn Töpel static void xsk_map_node_free(struct xsk_map_node *node) 35d20a1676SBjörn Töpel { 36*b4fd0d67SYafang Shao struct xsk_map *map = node->map; 37*b4fd0d67SYafang Shao 38bb1b25caSZhu Yanjun bpf_map_put(&node->map->map); 39d20a1676SBjörn Töpel kfree(node); 40*b4fd0d67SYafang Shao atomic_dec(&map->count); 41d20a1676SBjörn Töpel } 42d20a1676SBjörn Töpel 43d20a1676SBjörn Töpel static void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node) 44d20a1676SBjörn Töpel { 45d20a1676SBjörn Töpel spin_lock_bh(&xs->map_list_lock); 46d20a1676SBjörn Töpel list_add_tail(&node->node, &xs->map_list); 47d20a1676SBjörn Töpel spin_unlock_bh(&xs->map_list_lock); 48d20a1676SBjörn Töpel } 49d20a1676SBjörn Töpel 50d20a1676SBjörn Töpel static void xsk_map_sock_delete(struct xdp_sock *xs, 51782347b6SToke Høiland-Jørgensen struct xdp_sock __rcu **map_entry) 52d20a1676SBjörn Töpel { 53d20a1676SBjörn Töpel struct xsk_map_node *n, *tmp; 54d20a1676SBjörn Töpel 55d20a1676SBjörn Töpel spin_lock_bh(&xs->map_list_lock); 56d20a1676SBjörn Töpel list_for_each_entry_safe(n, tmp, &xs->map_list, node) { 57d20a1676SBjörn Töpel if (map_entry == n->map_entry) { 58d20a1676SBjörn Töpel list_del(&n->node); 59d20a1676SBjörn Töpel xsk_map_node_free(n); 60d20a1676SBjörn Töpel } 61d20a1676SBjörn Töpel } 62d20a1676SBjörn Töpel spin_unlock_bh(&xs->map_list_lock); 63d20a1676SBjörn Töpel } 64d20a1676SBjörn Töpel 65d20a1676SBjörn Töpel static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) 66d20a1676SBjörn Töpel { 67d20a1676SBjörn Töpel struct xsk_map *m; 68819a4f32SRoman Gushchin int numa_node; 69d20a1676SBjörn Töpel u64 size; 70d20a1676SBjörn Töpel 71d20a1676SBjörn Töpel if (!capable(CAP_NET_ADMIN)) 72d20a1676SBjörn Töpel return ERR_PTR(-EPERM); 73d20a1676SBjörn Töpel 74d20a1676SBjörn Töpel if (attr->max_entries == 0 || attr->key_size != 4 || 75d20a1676SBjörn Töpel attr->value_size != 4 || 76d20a1676SBjörn Töpel attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)) 77d20a1676SBjörn Töpel return ERR_PTR(-EINVAL); 78d20a1676SBjörn Töpel 79d20a1676SBjörn Töpel numa_node = bpf_map_attr_numa_node(attr); 80d20a1676SBjörn Töpel size = struct_size(m, xsk_map, attr->max_entries); 81d20a1676SBjörn Töpel 82d20a1676SBjörn Töpel m = bpf_map_area_alloc(size, numa_node); 83819a4f32SRoman Gushchin if (!m) 84d20a1676SBjörn Töpel return ERR_PTR(-ENOMEM); 85d20a1676SBjörn Töpel 86d20a1676SBjörn Töpel bpf_map_init_from_attr(&m->map, attr); 87d20a1676SBjörn Töpel spin_lock_init(&m->lock); 88d20a1676SBjörn Töpel 89d20a1676SBjörn Töpel return &m->map; 90d20a1676SBjörn Töpel } 91d20a1676SBjörn Töpel 92*b4fd0d67SYafang Shao static u64 xsk_map_mem_usage(const struct bpf_map *map) 93*b4fd0d67SYafang Shao { 94*b4fd0d67SYafang Shao struct xsk_map *m = container_of(map, struct xsk_map, map); 95*b4fd0d67SYafang Shao 96*b4fd0d67SYafang Shao return struct_size(m, xsk_map, map->max_entries) + 97*b4fd0d67SYafang Shao (u64)atomic_read(&m->count) * sizeof(struct xsk_map_node); 98*b4fd0d67SYafang Shao } 99*b4fd0d67SYafang Shao 100d20a1676SBjörn Töpel static void xsk_map_free(struct bpf_map *map) 101d20a1676SBjörn Töpel { 102d20a1676SBjörn Töpel struct xsk_map *m = container_of(map, struct xsk_map, map); 103d20a1676SBjörn Töpel 104d20a1676SBjörn Töpel synchronize_net(); 105d20a1676SBjörn Töpel bpf_map_area_free(m); 106d20a1676SBjörn Töpel } 107d20a1676SBjörn Töpel 108d20a1676SBjörn Töpel static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 109d20a1676SBjörn Töpel { 110d20a1676SBjörn Töpel struct xsk_map *m = container_of(map, struct xsk_map, map); 111d20a1676SBjörn Töpel u32 index = key ? *(u32 *)key : U32_MAX; 112d20a1676SBjörn Töpel u32 *next = next_key; 113d20a1676SBjörn Töpel 114d20a1676SBjörn Töpel if (index >= m->map.max_entries) { 115d20a1676SBjörn Töpel *next = 0; 116d20a1676SBjörn Töpel return 0; 117d20a1676SBjörn Töpel } 118d20a1676SBjörn Töpel 119d20a1676SBjörn Töpel if (index == m->map.max_entries - 1) 120d20a1676SBjörn Töpel return -ENOENT; 121d20a1676SBjörn Töpel *next = index + 1; 122d20a1676SBjörn Töpel return 0; 123d20a1676SBjörn Töpel } 124d20a1676SBjörn Töpel 1254a8f87e6SDaniel Borkmann static int xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 126d20a1676SBjörn Töpel { 127d20a1676SBjörn Töpel const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2; 128d20a1676SBjörn Töpel struct bpf_insn *insn = insn_buf; 129d20a1676SBjörn Töpel 130d20a1676SBjörn Töpel *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); 131d20a1676SBjörn Töpel *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5); 132d20a1676SBjörn Töpel *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(sizeof(struct xsk_sock *))); 133d20a1676SBjörn Töpel *insn++ = BPF_ALU64_IMM(BPF_ADD, mp, offsetof(struct xsk_map, xsk_map)); 134d20a1676SBjörn Töpel *insn++ = BPF_ALU64_REG(BPF_ADD, ret, mp); 135d20a1676SBjörn Töpel *insn++ = BPF_LDX_MEM(BPF_SIZEOF(struct xsk_sock *), ret, ret, 0); 136d20a1676SBjörn Töpel *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 137d20a1676SBjörn Töpel *insn++ = BPF_MOV64_IMM(ret, 0); 138d20a1676SBjörn Töpel return insn - insn_buf; 139d20a1676SBjörn Töpel } 140d20a1676SBjörn Töpel 141782347b6SToke Høiland-Jørgensen /* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or 142782347b6SToke Høiland-Jørgensen * by local_bh_disable() (from XDP calls inside NAPI). The 143782347b6SToke Høiland-Jørgensen * rcu_read_lock_bh_held() below makes lockdep accept both. 144782347b6SToke Høiland-Jørgensen */ 145e6a4750fSBjörn Töpel static void *__xsk_map_lookup_elem(struct bpf_map *map, u32 key) 146e6a4750fSBjörn Töpel { 147e6a4750fSBjörn Töpel struct xsk_map *m = container_of(map, struct xsk_map, map); 148e6a4750fSBjörn Töpel 149e6a4750fSBjörn Töpel if (key >= map->max_entries) 150e6a4750fSBjörn Töpel return NULL; 151e6a4750fSBjörn Töpel 152782347b6SToke Høiland-Jørgensen return rcu_dereference_check(m->xsk_map[key], rcu_read_lock_bh_held()); 153e6a4750fSBjörn Töpel } 154e6a4750fSBjörn Töpel 155d20a1676SBjörn Töpel static void *xsk_map_lookup_elem(struct bpf_map *map, void *key) 156d20a1676SBjörn Töpel { 157d20a1676SBjörn Töpel return __xsk_map_lookup_elem(map, *(u32 *)key); 158d20a1676SBjörn Töpel } 159d20a1676SBjörn Töpel 160d20a1676SBjörn Töpel static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key) 161d20a1676SBjörn Töpel { 162d20a1676SBjörn Töpel return ERR_PTR(-EOPNOTSUPP); 163d20a1676SBjörn Töpel } 164d20a1676SBjörn Töpel 165d20a1676SBjörn Töpel static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, 166d20a1676SBjörn Töpel u64 map_flags) 167d20a1676SBjörn Töpel { 168d20a1676SBjörn Töpel struct xsk_map *m = container_of(map, struct xsk_map, map); 169782347b6SToke Høiland-Jørgensen struct xdp_sock __rcu **map_entry; 170782347b6SToke Høiland-Jørgensen struct xdp_sock *xs, *old_xs; 171d20a1676SBjörn Töpel u32 i = *(u32 *)key, fd = *(u32 *)value; 172d20a1676SBjörn Töpel struct xsk_map_node *node; 173d20a1676SBjörn Töpel struct socket *sock; 174d20a1676SBjörn Töpel int err; 175d20a1676SBjörn Töpel 176d20a1676SBjörn Töpel if (unlikely(map_flags > BPF_EXIST)) 177d20a1676SBjörn Töpel return -EINVAL; 178d20a1676SBjörn Töpel if (unlikely(i >= m->map.max_entries)) 179d20a1676SBjörn Töpel return -E2BIG; 180d20a1676SBjörn Töpel 181d20a1676SBjörn Töpel sock = sockfd_lookup(fd, &err); 182d20a1676SBjörn Töpel if (!sock) 183d20a1676SBjörn Töpel return err; 184d20a1676SBjörn Töpel 185d20a1676SBjörn Töpel if (sock->sk->sk_family != PF_XDP) { 186d20a1676SBjörn Töpel sockfd_put(sock); 187d20a1676SBjörn Töpel return -EOPNOTSUPP; 188d20a1676SBjörn Töpel } 189d20a1676SBjörn Töpel 190d20a1676SBjörn Töpel xs = (struct xdp_sock *)sock->sk; 191d20a1676SBjörn Töpel 192d20a1676SBjörn Töpel map_entry = &m->xsk_map[i]; 193d20a1676SBjörn Töpel node = xsk_map_node_alloc(m, map_entry); 194d20a1676SBjörn Töpel if (IS_ERR(node)) { 195d20a1676SBjörn Töpel sockfd_put(sock); 196d20a1676SBjörn Töpel return PTR_ERR(node); 197d20a1676SBjörn Töpel } 198d20a1676SBjörn Töpel 199d20a1676SBjörn Töpel spin_lock_bh(&m->lock); 200782347b6SToke Høiland-Jørgensen old_xs = rcu_dereference_protected(*map_entry, lockdep_is_held(&m->lock)); 201d20a1676SBjörn Töpel if (old_xs == xs) { 202d20a1676SBjörn Töpel err = 0; 203d20a1676SBjörn Töpel goto out; 204d20a1676SBjörn Töpel } else if (old_xs && map_flags == BPF_NOEXIST) { 205d20a1676SBjörn Töpel err = -EEXIST; 206d20a1676SBjörn Töpel goto out; 207d20a1676SBjörn Töpel } else if (!old_xs && map_flags == BPF_EXIST) { 208d20a1676SBjörn Töpel err = -ENOENT; 209d20a1676SBjörn Töpel goto out; 210d20a1676SBjörn Töpel } 211d20a1676SBjörn Töpel xsk_map_sock_add(xs, node); 212782347b6SToke Høiland-Jørgensen rcu_assign_pointer(*map_entry, xs); 213d20a1676SBjörn Töpel if (old_xs) 214d20a1676SBjörn Töpel xsk_map_sock_delete(old_xs, map_entry); 215d20a1676SBjörn Töpel spin_unlock_bh(&m->lock); 216d20a1676SBjörn Töpel sockfd_put(sock); 217d20a1676SBjörn Töpel return 0; 218d20a1676SBjörn Töpel 219d20a1676SBjörn Töpel out: 220d20a1676SBjörn Töpel spin_unlock_bh(&m->lock); 221d20a1676SBjörn Töpel sockfd_put(sock); 222d20a1676SBjörn Töpel xsk_map_node_free(node); 223d20a1676SBjörn Töpel return err; 224d20a1676SBjörn Töpel } 225d20a1676SBjörn Töpel 226d20a1676SBjörn Töpel static int xsk_map_delete_elem(struct bpf_map *map, void *key) 227d20a1676SBjörn Töpel { 228d20a1676SBjörn Töpel struct xsk_map *m = container_of(map, struct xsk_map, map); 229782347b6SToke Høiland-Jørgensen struct xdp_sock __rcu **map_entry; 230782347b6SToke Høiland-Jørgensen struct xdp_sock *old_xs; 231d20a1676SBjörn Töpel int k = *(u32 *)key; 232d20a1676SBjörn Töpel 233d20a1676SBjörn Töpel if (k >= map->max_entries) 234d20a1676SBjörn Töpel return -EINVAL; 235d20a1676SBjörn Töpel 236d20a1676SBjörn Töpel spin_lock_bh(&m->lock); 237d20a1676SBjörn Töpel map_entry = &m->xsk_map[k]; 238782347b6SToke Høiland-Jørgensen old_xs = unrcu_pointer(xchg(map_entry, NULL)); 239d20a1676SBjörn Töpel if (old_xs) 240d20a1676SBjörn Töpel xsk_map_sock_delete(old_xs, map_entry); 241d20a1676SBjörn Töpel spin_unlock_bh(&m->lock); 242d20a1676SBjörn Töpel 243d20a1676SBjörn Töpel return 0; 244d20a1676SBjörn Töpel } 245d20a1676SBjörn Töpel 24632637e33SToke Høiland-Jørgensen static int xsk_map_redirect(struct bpf_map *map, u64 index, u64 flags) 247e6a4750fSBjörn Töpel { 24832637e33SToke Høiland-Jørgensen return __bpf_xdp_redirect_map(map, index, flags, 0, 249e624d4edSHangbin Liu __xsk_map_lookup_elem); 250e6a4750fSBjörn Töpel } 251e6a4750fSBjörn Töpel 252d20a1676SBjörn Töpel void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs, 253782347b6SToke Høiland-Jørgensen struct xdp_sock __rcu **map_entry) 254d20a1676SBjörn Töpel { 255d20a1676SBjörn Töpel spin_lock_bh(&map->lock); 256782347b6SToke Høiland-Jørgensen if (rcu_access_pointer(*map_entry) == xs) { 257782347b6SToke Høiland-Jørgensen rcu_assign_pointer(*map_entry, NULL); 258d20a1676SBjörn Töpel xsk_map_sock_delete(xs, map_entry); 259d20a1676SBjörn Töpel } 260d20a1676SBjörn Töpel spin_unlock_bh(&map->lock); 261d20a1676SBjörn Töpel } 262d20a1676SBjörn Töpel 263134fede4SMartin KaFai Lau static bool xsk_map_meta_equal(const struct bpf_map *meta0, 264134fede4SMartin KaFai Lau const struct bpf_map *meta1) 265134fede4SMartin KaFai Lau { 266134fede4SMartin KaFai Lau return meta0->max_entries == meta1->max_entries && 267134fede4SMartin KaFai Lau bpf_map_meta_equal(meta0, meta1); 268134fede4SMartin KaFai Lau } 269134fede4SMartin KaFai Lau 270c317ab71SMenglong Dong BTF_ID_LIST_SINGLE(xsk_map_btf_ids, struct, xsk_map) 271d20a1676SBjörn Töpel const struct bpf_map_ops xsk_map_ops = { 272134fede4SMartin KaFai Lau .map_meta_equal = xsk_map_meta_equal, 273d20a1676SBjörn Töpel .map_alloc = xsk_map_alloc, 274d20a1676SBjörn Töpel .map_free = xsk_map_free, 275d20a1676SBjörn Töpel .map_get_next_key = xsk_map_get_next_key, 276d20a1676SBjörn Töpel .map_lookup_elem = xsk_map_lookup_elem, 277d20a1676SBjörn Töpel .map_gen_lookup = xsk_map_gen_lookup, 278d20a1676SBjörn Töpel .map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only, 279d20a1676SBjörn Töpel .map_update_elem = xsk_map_update_elem, 280d20a1676SBjörn Töpel .map_delete_elem = xsk_map_delete_elem, 281d20a1676SBjörn Töpel .map_check_btf = map_check_no_btf, 282*b4fd0d67SYafang Shao .map_mem_usage = xsk_map_mem_usage, 283c317ab71SMenglong Dong .map_btf_id = &xsk_map_btf_ids[0], 284e6a4750fSBjörn Töpel .map_redirect = xsk_map_redirect, 285d20a1676SBjörn Töpel }; 286