1 // SPDX-License-Identifier: GPL-2.0 2 /* XSKMAP used for AF_XDP sockets 3 * Copyright(c) 2018 Intel Corporation. 4 */ 5 6 #include <linux/bpf.h> 7 #include <linux/capability.h> 8 #include <net/xdp_sock.h> 9 #include <linux/slab.h> 10 #include <linux/sched.h> 11 12 #include "xsk.h" 13 14 static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map, 15 struct xdp_sock __rcu **map_entry) 16 { 17 struct xsk_map_node *node; 18 19 node = bpf_map_kzalloc(&map->map, sizeof(*node), 20 GFP_ATOMIC | __GFP_NOWARN); 21 if (!node) 22 return ERR_PTR(-ENOMEM); 23 24 bpf_map_inc(&map->map); 25 26 node->map = map; 27 node->map_entry = map_entry; 28 return node; 29 } 30 31 static void xsk_map_node_free(struct xsk_map_node *node) 32 { 33 bpf_map_put(&node->map->map); 34 kfree(node); 35 } 36 37 static void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node) 38 { 39 spin_lock_bh(&xs->map_list_lock); 40 list_add_tail(&node->node, &xs->map_list); 41 spin_unlock_bh(&xs->map_list_lock); 42 } 43 44 static void xsk_map_sock_delete(struct xdp_sock *xs, 45 struct xdp_sock __rcu **map_entry) 46 { 47 struct xsk_map_node *n, *tmp; 48 49 spin_lock_bh(&xs->map_list_lock); 50 list_for_each_entry_safe(n, tmp, &xs->map_list, node) { 51 if (map_entry == n->map_entry) { 52 list_del(&n->node); 53 xsk_map_node_free(n); 54 } 55 } 56 spin_unlock_bh(&xs->map_list_lock); 57 } 58 59 static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) 60 { 61 struct xsk_map *m; 62 int numa_node; 63 u64 size; 64 65 if (!capable(CAP_NET_ADMIN)) 66 return ERR_PTR(-EPERM); 67 68 if (attr->max_entries == 0 || attr->key_size != 4 || 69 attr->value_size != 4 || 70 attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)) 71 return ERR_PTR(-EINVAL); 72 73 numa_node = bpf_map_attr_numa_node(attr); 74 size = struct_size(m, xsk_map, attr->max_entries); 75 76 m = bpf_map_area_alloc(size, numa_node); 77 if (!m) 78 return ERR_PTR(-ENOMEM); 79 80 bpf_map_init_from_attr(&m->map, attr); 81 spin_lock_init(&m->lock); 82 83 return &m->map; 84 } 85 86 static void xsk_map_free(struct bpf_map *map) 87 { 88 struct xsk_map *m = container_of(map, struct xsk_map, map); 89 90 synchronize_net(); 91 bpf_map_area_free(m); 92 } 93 94 static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 95 { 96 struct xsk_map *m = container_of(map, struct xsk_map, map); 97 u32 index = key ? *(u32 *)key : U32_MAX; 98 u32 *next = next_key; 99 100 if (index >= m->map.max_entries) { 101 *next = 0; 102 return 0; 103 } 104 105 if (index == m->map.max_entries - 1) 106 return -ENOENT; 107 *next = index + 1; 108 return 0; 109 } 110 111 static int xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 112 { 113 const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2; 114 struct bpf_insn *insn = insn_buf; 115 116 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); 117 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5); 118 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(sizeof(struct xsk_sock *))); 119 *insn++ = BPF_ALU64_IMM(BPF_ADD, mp, offsetof(struct xsk_map, xsk_map)); 120 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, mp); 121 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(struct xsk_sock *), ret, ret, 0); 122 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 123 *insn++ = BPF_MOV64_IMM(ret, 0); 124 return insn - insn_buf; 125 } 126 127 /* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or 128 * by local_bh_disable() (from XDP calls inside NAPI). The 129 * rcu_read_lock_bh_held() below makes lockdep accept both. 130 */ 131 static void *__xsk_map_lookup_elem(struct bpf_map *map, u32 key) 132 { 133 struct xsk_map *m = container_of(map, struct xsk_map, map); 134 135 if (key >= map->max_entries) 136 return NULL; 137 138 return rcu_dereference_check(m->xsk_map[key], rcu_read_lock_bh_held()); 139 } 140 141 static void *xsk_map_lookup_elem(struct bpf_map *map, void *key) 142 { 143 return __xsk_map_lookup_elem(map, *(u32 *)key); 144 } 145 146 static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key) 147 { 148 return ERR_PTR(-EOPNOTSUPP); 149 } 150 151 static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, 152 u64 map_flags) 153 { 154 struct xsk_map *m = container_of(map, struct xsk_map, map); 155 struct xdp_sock __rcu **map_entry; 156 struct xdp_sock *xs, *old_xs; 157 u32 i = *(u32 *)key, fd = *(u32 *)value; 158 struct xsk_map_node *node; 159 struct socket *sock; 160 int err; 161 162 if (unlikely(map_flags > BPF_EXIST)) 163 return -EINVAL; 164 if (unlikely(i >= m->map.max_entries)) 165 return -E2BIG; 166 167 sock = sockfd_lookup(fd, &err); 168 if (!sock) 169 return err; 170 171 if (sock->sk->sk_family != PF_XDP) { 172 sockfd_put(sock); 173 return -EOPNOTSUPP; 174 } 175 176 xs = (struct xdp_sock *)sock->sk; 177 178 map_entry = &m->xsk_map[i]; 179 node = xsk_map_node_alloc(m, map_entry); 180 if (IS_ERR(node)) { 181 sockfd_put(sock); 182 return PTR_ERR(node); 183 } 184 185 spin_lock_bh(&m->lock); 186 old_xs = rcu_dereference_protected(*map_entry, lockdep_is_held(&m->lock)); 187 if (old_xs == xs) { 188 err = 0; 189 goto out; 190 } else if (old_xs && map_flags == BPF_NOEXIST) { 191 err = -EEXIST; 192 goto out; 193 } else if (!old_xs && map_flags == BPF_EXIST) { 194 err = -ENOENT; 195 goto out; 196 } 197 xsk_map_sock_add(xs, node); 198 rcu_assign_pointer(*map_entry, xs); 199 if (old_xs) 200 xsk_map_sock_delete(old_xs, map_entry); 201 spin_unlock_bh(&m->lock); 202 sockfd_put(sock); 203 return 0; 204 205 out: 206 spin_unlock_bh(&m->lock); 207 sockfd_put(sock); 208 xsk_map_node_free(node); 209 return err; 210 } 211 212 static int xsk_map_delete_elem(struct bpf_map *map, void *key) 213 { 214 struct xsk_map *m = container_of(map, struct xsk_map, map); 215 struct xdp_sock __rcu **map_entry; 216 struct xdp_sock *old_xs; 217 int k = *(u32 *)key; 218 219 if (k >= map->max_entries) 220 return -EINVAL; 221 222 spin_lock_bh(&m->lock); 223 map_entry = &m->xsk_map[k]; 224 old_xs = unrcu_pointer(xchg(map_entry, NULL)); 225 if (old_xs) 226 xsk_map_sock_delete(old_xs, map_entry); 227 spin_unlock_bh(&m->lock); 228 229 return 0; 230 } 231 232 static int xsk_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags) 233 { 234 return __bpf_xdp_redirect_map(map, ifindex, flags, 0, 235 __xsk_map_lookup_elem); 236 } 237 238 void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs, 239 struct xdp_sock __rcu **map_entry) 240 { 241 spin_lock_bh(&map->lock); 242 if (rcu_access_pointer(*map_entry) == xs) { 243 rcu_assign_pointer(*map_entry, NULL); 244 xsk_map_sock_delete(xs, map_entry); 245 } 246 spin_unlock_bh(&map->lock); 247 } 248 249 static bool xsk_map_meta_equal(const struct bpf_map *meta0, 250 const struct bpf_map *meta1) 251 { 252 return meta0->max_entries == meta1->max_entries && 253 bpf_map_meta_equal(meta0, meta1); 254 } 255 256 static int xsk_map_btf_id; 257 const struct bpf_map_ops xsk_map_ops = { 258 .map_meta_equal = xsk_map_meta_equal, 259 .map_alloc = xsk_map_alloc, 260 .map_free = xsk_map_free, 261 .map_get_next_key = xsk_map_get_next_key, 262 .map_lookup_elem = xsk_map_lookup_elem, 263 .map_gen_lookup = xsk_map_gen_lookup, 264 .map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only, 265 .map_update_elem = xsk_map_update_elem, 266 .map_delete_elem = xsk_map_delete_elem, 267 .map_check_btf = map_check_no_btf, 268 .map_btf_name = "xsk_map", 269 .map_btf_id = &xsk_map_btf_id, 270 .map_redirect = xsk_map_redirect, 271 }; 272