1 /* net/core/xdp.c 2 * 3 * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. 4 * Released under terms in GPL version 2. See COPYING. 5 */ 6 #include <linux/bpf.h> 7 #include <linux/filter.h> 8 #include <linux/types.h> 9 #include <linux/mm.h> 10 #include <linux/netdevice.h> 11 #include <linux/slab.h> 12 #include <linux/idr.h> 13 #include <linux/rhashtable.h> 14 #include <net/page_pool.h> 15 16 #include <net/xdp.h> 17 18 #define REG_STATE_NEW 0x0 19 #define REG_STATE_REGISTERED 0x1 20 #define REG_STATE_UNREGISTERED 0x2 21 #define REG_STATE_UNUSED 0x3 22 23 static DEFINE_IDA(mem_id_pool); 24 static DEFINE_MUTEX(mem_id_lock); 25 #define MEM_ID_MAX 0xFFFE 26 #define MEM_ID_MIN 1 27 static int mem_id_next = MEM_ID_MIN; 28 29 static bool mem_id_init; /* false */ 30 static struct rhashtable *mem_id_ht; 31 32 struct xdp_mem_allocator { 33 struct xdp_mem_info mem; 34 union { 35 void *allocator; 36 struct page_pool *page_pool; 37 struct zero_copy_allocator *zc_alloc; 38 }; 39 struct rhash_head node; 40 struct rcu_head rcu; 41 }; 42 43 static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed) 44 { 45 const u32 *k = data; 46 const u32 key = *k; 47 48 BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id) 49 != sizeof(u32)); 50 51 /* Use cyclic increasing ID as direct hash key */ 52 return key; 53 } 54 55 static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg, 56 const void *ptr) 57 { 58 const struct xdp_mem_allocator *xa = ptr; 59 u32 mem_id = *(u32 *)arg->key; 60 61 return xa->mem.id != mem_id; 62 } 63 64 static const struct rhashtable_params mem_id_rht_params = { 65 .nelem_hint = 64, 66 .head_offset = offsetof(struct xdp_mem_allocator, node), 67 .key_offset = offsetof(struct xdp_mem_allocator, mem.id), 68 .key_len = FIELD_SIZEOF(struct xdp_mem_allocator, mem.id), 69 .max_size = MEM_ID_MAX, 70 .min_size = 8, 71 .automatic_shrinking = true, 72 .hashfn = xdp_mem_id_hashfn, 73 .obj_cmpfn = xdp_mem_id_cmp, 74 }; 75 76 static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu) 77 { 78 struct xdp_mem_allocator *xa; 79 80 xa = container_of(rcu, struct xdp_mem_allocator, rcu); 81 82 /* Allow this ID to be reused */ 83 ida_simple_remove(&mem_id_pool, xa->mem.id); 84 85 /* Notice, driver is expected to free the *allocator, 86 * e.g. page_pool, and MUST also use RCU free. 87 */ 88 89 /* Poison memory */ 90 xa->mem.id = 0xFFFF; 91 xa->mem.type = 0xF0F0; 92 xa->allocator = (void *)0xDEAD9001; 93 94 kfree(xa); 95 } 96 97 static void __xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) 98 { 99 struct xdp_mem_allocator *xa; 100 int id = xdp_rxq->mem.id; 101 int err; 102 103 if (id == 0) 104 return; 105 106 mutex_lock(&mem_id_lock); 107 108 xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params); 109 if (!xa) { 110 mutex_unlock(&mem_id_lock); 111 return; 112 } 113 114 err = rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params); 115 WARN_ON(err); 116 117 call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free); 118 119 mutex_unlock(&mem_id_lock); 120 } 121 122 void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) 123 { 124 /* Simplify driver cleanup code paths, allow unreg "unused" */ 125 if (xdp_rxq->reg_state == REG_STATE_UNUSED) 126 return; 127 128 WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG"); 129 130 __xdp_rxq_info_unreg_mem_model(xdp_rxq); 131 132 xdp_rxq->reg_state = REG_STATE_UNREGISTERED; 133 xdp_rxq->dev = NULL; 134 135 /* Reset mem info to defaults */ 136 xdp_rxq->mem.id = 0; 137 xdp_rxq->mem.type = 0; 138 } 139 EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg); 140 141 static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq) 142 { 143 memset(xdp_rxq, 0, sizeof(*xdp_rxq)); 144 } 145 146 /* Returns 0 on success, negative on failure */ 147 int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, 148 struct net_device *dev, u32 queue_index) 149 { 150 if (xdp_rxq->reg_state == REG_STATE_UNUSED) { 151 WARN(1, "Driver promised not to register this"); 152 return -EINVAL; 153 } 154 155 if (xdp_rxq->reg_state == REG_STATE_REGISTERED) { 156 WARN(1, "Missing unregister, handled but fix driver"); 157 xdp_rxq_info_unreg(xdp_rxq); 158 } 159 160 if (!dev) { 161 WARN(1, "Missing net_device from driver"); 162 return -ENODEV; 163 } 164 165 /* State either UNREGISTERED or NEW */ 166 xdp_rxq_info_init(xdp_rxq); 167 xdp_rxq->dev = dev; 168 xdp_rxq->queue_index = queue_index; 169 170 xdp_rxq->reg_state = REG_STATE_REGISTERED; 171 return 0; 172 } 173 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg); 174 175 void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq) 176 { 177 xdp_rxq->reg_state = REG_STATE_UNUSED; 178 } 179 EXPORT_SYMBOL_GPL(xdp_rxq_info_unused); 180 181 bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq) 182 { 183 return (xdp_rxq->reg_state == REG_STATE_REGISTERED); 184 } 185 EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg); 186 187 static int __mem_id_init_hash_table(void) 188 { 189 struct rhashtable *rht; 190 int ret; 191 192 if (unlikely(mem_id_init)) 193 return 0; 194 195 rht = kzalloc(sizeof(*rht), GFP_KERNEL); 196 if (!rht) 197 return -ENOMEM; 198 199 ret = rhashtable_init(rht, &mem_id_rht_params); 200 if (ret < 0) { 201 kfree(rht); 202 return ret; 203 } 204 mem_id_ht = rht; 205 smp_mb(); /* mutex lock should provide enough pairing */ 206 mem_id_init = true; 207 208 return 0; 209 } 210 211 /* Allocate a cyclic ID that maps to allocator pointer. 212 * See: https://www.kernel.org/doc/html/latest/core-api/idr.html 213 * 214 * Caller must lock mem_id_lock. 215 */ 216 static int __mem_id_cyclic_get(gfp_t gfp) 217 { 218 int retries = 1; 219 int id; 220 221 again: 222 id = ida_simple_get(&mem_id_pool, mem_id_next, MEM_ID_MAX, gfp); 223 if (id < 0) { 224 if (id == -ENOSPC) { 225 /* Cyclic allocator, reset next id */ 226 if (retries--) { 227 mem_id_next = MEM_ID_MIN; 228 goto again; 229 } 230 } 231 return id; /* errno */ 232 } 233 mem_id_next = id + 1; 234 235 return id; 236 } 237 238 static bool __is_supported_mem_type(enum xdp_mem_type type) 239 { 240 if (type == MEM_TYPE_PAGE_POOL) 241 return is_page_pool_compiled_in(); 242 243 if (type >= MEM_TYPE_MAX) 244 return false; 245 246 return true; 247 } 248 249 int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, 250 enum xdp_mem_type type, void *allocator) 251 { 252 struct xdp_mem_allocator *xdp_alloc; 253 gfp_t gfp = GFP_KERNEL; 254 int id, errno, ret; 255 void *ptr; 256 257 if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { 258 WARN(1, "Missing register, driver bug"); 259 return -EFAULT; 260 } 261 262 if (!__is_supported_mem_type(type)) 263 return -EOPNOTSUPP; 264 265 xdp_rxq->mem.type = type; 266 267 if (!allocator) { 268 if (type == MEM_TYPE_PAGE_POOL || type == MEM_TYPE_ZERO_COPY) 269 return -EINVAL; /* Setup time check page_pool req */ 270 return 0; 271 } 272 273 /* Delay init of rhashtable to save memory if feature isn't used */ 274 if (!mem_id_init) { 275 mutex_lock(&mem_id_lock); 276 ret = __mem_id_init_hash_table(); 277 mutex_unlock(&mem_id_lock); 278 if (ret < 0) { 279 WARN_ON(1); 280 return ret; 281 } 282 } 283 284 xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp); 285 if (!xdp_alloc) 286 return -ENOMEM; 287 288 mutex_lock(&mem_id_lock); 289 id = __mem_id_cyclic_get(gfp); 290 if (id < 0) { 291 errno = id; 292 goto err; 293 } 294 xdp_rxq->mem.id = id; 295 xdp_alloc->mem = xdp_rxq->mem; 296 xdp_alloc->allocator = allocator; 297 298 /* Insert allocator into ID lookup table */ 299 ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node); 300 if (IS_ERR(ptr)) { 301 errno = PTR_ERR(ptr); 302 goto err; 303 } 304 305 mutex_unlock(&mem_id_lock); 306 307 return 0; 308 err: 309 mutex_unlock(&mem_id_lock); 310 kfree(xdp_alloc); 311 return errno; 312 } 313 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); 314 315 /* XDP RX runs under NAPI protection, and in different delivery error 316 * scenarios (e.g. queue full), it is possible to return the xdp_frame 317 * while still leveraging this protection. The @napi_direct boolian 318 * is used for those calls sites. Thus, allowing for faster recycling 319 * of xdp_frames/pages in those cases. 320 */ 321 static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, 322 unsigned long handle) 323 { 324 struct xdp_mem_allocator *xa; 325 struct page *page; 326 327 switch (mem->type) { 328 case MEM_TYPE_PAGE_POOL: 329 rcu_read_lock(); 330 /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ 331 xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); 332 page = virt_to_head_page(data); 333 if (xa) 334 page_pool_put_page(xa->page_pool, page, napi_direct); 335 else 336 put_page(page); 337 rcu_read_unlock(); 338 break; 339 case MEM_TYPE_PAGE_SHARED: 340 page_frag_free(data); 341 break; 342 case MEM_TYPE_PAGE_ORDER0: 343 page = virt_to_page(data); /* Assumes order0 page*/ 344 put_page(page); 345 break; 346 case MEM_TYPE_ZERO_COPY: 347 /* NB! Only valid from an xdp_buff! */ 348 rcu_read_lock(); 349 /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ 350 xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); 351 xa->zc_alloc->free(xa->zc_alloc, handle); 352 rcu_read_unlock(); 353 default: 354 /* Not possible, checked in xdp_rxq_info_reg_mem_model() */ 355 break; 356 } 357 } 358 359 void xdp_return_frame(struct xdp_frame *xdpf) 360 { 361 __xdp_return(xdpf->data, &xdpf->mem, false, 0); 362 } 363 EXPORT_SYMBOL_GPL(xdp_return_frame); 364 365 void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) 366 { 367 __xdp_return(xdpf->data, &xdpf->mem, true, 0); 368 } 369 EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); 370 371 void xdp_return_buff(struct xdp_buff *xdp) 372 { 373 __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp->handle); 374 } 375 EXPORT_SYMBOL_GPL(xdp_return_buff); 376 377 int xdp_attachment_query(struct xdp_attachment_info *info, 378 struct netdev_bpf *bpf) 379 { 380 bpf->prog_id = info->prog ? info->prog->aux->id : 0; 381 bpf->prog_flags = info->prog ? info->flags : 0; 382 return 0; 383 } 384 EXPORT_SYMBOL_GPL(xdp_attachment_query); 385 386 bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, 387 struct netdev_bpf *bpf) 388 { 389 if (info->prog && (bpf->flags ^ info->flags) & XDP_FLAGS_MODES) { 390 NL_SET_ERR_MSG(bpf->extack, 391 "program loaded with different flags"); 392 return false; 393 } 394 return true; 395 } 396 EXPORT_SYMBOL_GPL(xdp_attachment_flags_ok); 397 398 void xdp_attachment_setup(struct xdp_attachment_info *info, 399 struct netdev_bpf *bpf) 400 { 401 if (info->prog) 402 bpf_prog_put(info->prog); 403 info->prog = bpf->prog; 404 info->flags = bpf->flags; 405 } 406 EXPORT_SYMBOL_GPL(xdp_attachment_setup); 407