1 /* net/core/xdp.c 2 * 3 * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. 4 * Released under terms in GPL version 2. See COPYING. 5 */ 6 #include <linux/bpf.h> 7 #include <linux/filter.h> 8 #include <linux/types.h> 9 #include <linux/mm.h> 10 #include <linux/netdevice.h> 11 #include <linux/slab.h> 12 #include <linux/idr.h> 13 #include <linux/rhashtable.h> 14 #include <net/page_pool.h> 15 16 #include <net/xdp.h> 17 18 #define REG_STATE_NEW 0x0 19 #define REG_STATE_REGISTERED 0x1 20 #define REG_STATE_UNREGISTERED 0x2 21 #define REG_STATE_UNUSED 0x3 22 23 static DEFINE_IDA(mem_id_pool); 24 static DEFINE_MUTEX(mem_id_lock); 25 #define MEM_ID_MAX 0xFFFE 26 #define MEM_ID_MIN 1 27 static int mem_id_next = MEM_ID_MIN; 28 29 static bool mem_id_init; /* false */ 30 static struct rhashtable *mem_id_ht; 31 32 struct xdp_mem_allocator { 33 struct xdp_mem_info mem; 34 union { 35 void *allocator; 36 struct page_pool *page_pool; 37 struct zero_copy_allocator *zc_alloc; 38 }; 39 struct rhash_head node; 40 struct rcu_head rcu; 41 }; 42 43 static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed) 44 { 45 const u32 *k = data; 46 const u32 key = *k; 47 48 BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id) 49 != sizeof(u32)); 50 51 /* Use cyclic increasing ID as direct hash key */ 52 return key; 53 } 54 55 static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg, 56 const void *ptr) 57 { 58 const struct xdp_mem_allocator *xa = ptr; 59 u32 mem_id = *(u32 *)arg->key; 60 61 return xa->mem.id != mem_id; 62 } 63 64 static const struct rhashtable_params mem_id_rht_params = { 65 .nelem_hint = 64, 66 .head_offset = offsetof(struct xdp_mem_allocator, node), 67 .key_offset = offsetof(struct xdp_mem_allocator, mem.id), 68 .key_len = FIELD_SIZEOF(struct xdp_mem_allocator, mem.id), 69 .max_size = MEM_ID_MAX, 70 .min_size = 8, 71 .automatic_shrinking = true, 72 .hashfn = xdp_mem_id_hashfn, 73 .obj_cmpfn = xdp_mem_id_cmp, 74 }; 75 76 static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu) 77 { 78 struct xdp_mem_allocator *xa; 79 80 xa = container_of(rcu, struct xdp_mem_allocator, rcu); 81 82 /* Allow this ID to be reused */ 83 ida_simple_remove(&mem_id_pool, xa->mem.id); 84 85 /* Notice, driver is expected to free the *allocator, 86 * e.g. page_pool, and MUST also use RCU free. 87 */ 88 89 /* Poison memory */ 90 xa->mem.id = 0xFFFF; 91 xa->mem.type = 0xF0F0; 92 xa->allocator = (void *)0xDEAD9001; 93 94 kfree(xa); 95 } 96 97 void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) 98 { 99 struct xdp_mem_allocator *xa; 100 int id = xdp_rxq->mem.id; 101 102 if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { 103 WARN(1, "Missing register, driver bug"); 104 return; 105 } 106 107 if (xdp_rxq->mem.type != MEM_TYPE_PAGE_POOL && 108 xdp_rxq->mem.type != MEM_TYPE_ZERO_COPY) { 109 return; 110 } 111 112 if (id == 0) 113 return; 114 115 mutex_lock(&mem_id_lock); 116 117 xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params); 118 if (xa && !rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params)) 119 call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free); 120 121 mutex_unlock(&mem_id_lock); 122 } 123 EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model); 124 125 void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) 126 { 127 /* Simplify driver cleanup code paths, allow unreg "unused" */ 128 if (xdp_rxq->reg_state == REG_STATE_UNUSED) 129 return; 130 131 WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG"); 132 133 xdp_rxq_info_unreg_mem_model(xdp_rxq); 134 135 xdp_rxq->reg_state = REG_STATE_UNREGISTERED; 136 xdp_rxq->dev = NULL; 137 138 /* Reset mem info to defaults */ 139 xdp_rxq->mem.id = 0; 140 xdp_rxq->mem.type = 0; 141 } 142 EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg); 143 144 static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq) 145 { 146 memset(xdp_rxq, 0, sizeof(*xdp_rxq)); 147 } 148 149 /* Returns 0 on success, negative on failure */ 150 int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, 151 struct net_device *dev, u32 queue_index) 152 { 153 if (xdp_rxq->reg_state == REG_STATE_UNUSED) { 154 WARN(1, "Driver promised not to register this"); 155 return -EINVAL; 156 } 157 158 if (xdp_rxq->reg_state == REG_STATE_REGISTERED) { 159 WARN(1, "Missing unregister, handled but fix driver"); 160 xdp_rxq_info_unreg(xdp_rxq); 161 } 162 163 if (!dev) { 164 WARN(1, "Missing net_device from driver"); 165 return -ENODEV; 166 } 167 168 /* State either UNREGISTERED or NEW */ 169 xdp_rxq_info_init(xdp_rxq); 170 xdp_rxq->dev = dev; 171 xdp_rxq->queue_index = queue_index; 172 173 xdp_rxq->reg_state = REG_STATE_REGISTERED; 174 return 0; 175 } 176 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg); 177 178 void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq) 179 { 180 xdp_rxq->reg_state = REG_STATE_UNUSED; 181 } 182 EXPORT_SYMBOL_GPL(xdp_rxq_info_unused); 183 184 bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq) 185 { 186 return (xdp_rxq->reg_state == REG_STATE_REGISTERED); 187 } 188 EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg); 189 190 static int __mem_id_init_hash_table(void) 191 { 192 struct rhashtable *rht; 193 int ret; 194 195 if (unlikely(mem_id_init)) 196 return 0; 197 198 rht = kzalloc(sizeof(*rht), GFP_KERNEL); 199 if (!rht) 200 return -ENOMEM; 201 202 ret = rhashtable_init(rht, &mem_id_rht_params); 203 if (ret < 0) { 204 kfree(rht); 205 return ret; 206 } 207 mem_id_ht = rht; 208 smp_mb(); /* mutex lock should provide enough pairing */ 209 mem_id_init = true; 210 211 return 0; 212 } 213 214 /* Allocate a cyclic ID that maps to allocator pointer. 215 * See: https://www.kernel.org/doc/html/latest/core-api/idr.html 216 * 217 * Caller must lock mem_id_lock. 218 */ 219 static int __mem_id_cyclic_get(gfp_t gfp) 220 { 221 int retries = 1; 222 int id; 223 224 again: 225 id = ida_simple_get(&mem_id_pool, mem_id_next, MEM_ID_MAX, gfp); 226 if (id < 0) { 227 if (id == -ENOSPC) { 228 /* Cyclic allocator, reset next id */ 229 if (retries--) { 230 mem_id_next = MEM_ID_MIN; 231 goto again; 232 } 233 } 234 return id; /* errno */ 235 } 236 mem_id_next = id + 1; 237 238 return id; 239 } 240 241 static bool __is_supported_mem_type(enum xdp_mem_type type) 242 { 243 if (type == MEM_TYPE_PAGE_POOL) 244 return is_page_pool_compiled_in(); 245 246 if (type >= MEM_TYPE_MAX) 247 return false; 248 249 return true; 250 } 251 252 int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, 253 enum xdp_mem_type type, void *allocator) 254 { 255 struct xdp_mem_allocator *xdp_alloc; 256 gfp_t gfp = GFP_KERNEL; 257 int id, errno, ret; 258 void *ptr; 259 260 if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { 261 WARN(1, "Missing register, driver bug"); 262 return -EFAULT; 263 } 264 265 if (!__is_supported_mem_type(type)) 266 return -EOPNOTSUPP; 267 268 xdp_rxq->mem.type = type; 269 270 if (!allocator) { 271 if (type == MEM_TYPE_PAGE_POOL || type == MEM_TYPE_ZERO_COPY) 272 return -EINVAL; /* Setup time check page_pool req */ 273 return 0; 274 } 275 276 /* Delay init of rhashtable to save memory if feature isn't used */ 277 if (!mem_id_init) { 278 mutex_lock(&mem_id_lock); 279 ret = __mem_id_init_hash_table(); 280 mutex_unlock(&mem_id_lock); 281 if (ret < 0) { 282 WARN_ON(1); 283 return ret; 284 } 285 } 286 287 xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp); 288 if (!xdp_alloc) 289 return -ENOMEM; 290 291 mutex_lock(&mem_id_lock); 292 id = __mem_id_cyclic_get(gfp); 293 if (id < 0) { 294 errno = id; 295 goto err; 296 } 297 xdp_rxq->mem.id = id; 298 xdp_alloc->mem = xdp_rxq->mem; 299 xdp_alloc->allocator = allocator; 300 301 /* Insert allocator into ID lookup table */ 302 ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node); 303 if (IS_ERR(ptr)) { 304 errno = PTR_ERR(ptr); 305 goto err; 306 } 307 308 mutex_unlock(&mem_id_lock); 309 310 return 0; 311 err: 312 mutex_unlock(&mem_id_lock); 313 kfree(xdp_alloc); 314 return errno; 315 } 316 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); 317 318 /* XDP RX runs under NAPI protection, and in different delivery error 319 * scenarios (e.g. queue full), it is possible to return the xdp_frame 320 * while still leveraging this protection. The @napi_direct boolian 321 * is used for those calls sites. Thus, allowing for faster recycling 322 * of xdp_frames/pages in those cases. 323 */ 324 static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, 325 unsigned long handle) 326 { 327 struct xdp_mem_allocator *xa; 328 struct page *page; 329 330 switch (mem->type) { 331 case MEM_TYPE_PAGE_POOL: 332 rcu_read_lock(); 333 /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ 334 xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); 335 page = virt_to_head_page(data); 336 if (xa) { 337 napi_direct &= !xdp_return_frame_no_direct(); 338 page_pool_put_page(xa->page_pool, page, napi_direct); 339 } else { 340 put_page(page); 341 } 342 rcu_read_unlock(); 343 break; 344 case MEM_TYPE_PAGE_SHARED: 345 page_frag_free(data); 346 break; 347 case MEM_TYPE_PAGE_ORDER0: 348 page = virt_to_page(data); /* Assumes order0 page*/ 349 put_page(page); 350 break; 351 case MEM_TYPE_ZERO_COPY: 352 /* NB! Only valid from an xdp_buff! */ 353 rcu_read_lock(); 354 /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ 355 xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); 356 xa->zc_alloc->free(xa->zc_alloc, handle); 357 rcu_read_unlock(); 358 default: 359 /* Not possible, checked in xdp_rxq_info_reg_mem_model() */ 360 break; 361 } 362 } 363 364 void xdp_return_frame(struct xdp_frame *xdpf) 365 { 366 __xdp_return(xdpf->data, &xdpf->mem, false, 0); 367 } 368 EXPORT_SYMBOL_GPL(xdp_return_frame); 369 370 void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) 371 { 372 __xdp_return(xdpf->data, &xdpf->mem, true, 0); 373 } 374 EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); 375 376 void xdp_return_buff(struct xdp_buff *xdp) 377 { 378 __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp->handle); 379 } 380 EXPORT_SYMBOL_GPL(xdp_return_buff); 381 382 int xdp_attachment_query(struct xdp_attachment_info *info, 383 struct netdev_bpf *bpf) 384 { 385 bpf->prog_id = info->prog ? info->prog->aux->id : 0; 386 bpf->prog_flags = info->prog ? info->flags : 0; 387 return 0; 388 } 389 EXPORT_SYMBOL_GPL(xdp_attachment_query); 390 391 bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, 392 struct netdev_bpf *bpf) 393 { 394 if (info->prog && (bpf->flags ^ info->flags) & XDP_FLAGS_MODES) { 395 NL_SET_ERR_MSG(bpf->extack, 396 "program loaded with different flags"); 397 return false; 398 } 399 return true; 400 } 401 EXPORT_SYMBOL_GPL(xdp_attachment_flags_ok); 402 403 void xdp_attachment_setup(struct xdp_attachment_info *info, 404 struct netdev_bpf *bpf) 405 { 406 if (info->prog) 407 bpf_prog_put(info->prog); 408 info->prog = bpf->prog; 409 info->flags = bpf->flags; 410 } 411 EXPORT_SYMBOL_GPL(xdp_attachment_setup); 412 413 struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp) 414 { 415 unsigned int metasize, totsize; 416 void *addr, *data_to_copy; 417 struct xdp_frame *xdpf; 418 struct page *page; 419 420 /* Clone into a MEM_TYPE_PAGE_ORDER0 xdp_frame. */ 421 metasize = xdp_data_meta_unsupported(xdp) ? 0 : 422 xdp->data - xdp->data_meta; 423 totsize = xdp->data_end - xdp->data + metasize; 424 425 if (sizeof(*xdpf) + totsize > PAGE_SIZE) 426 return NULL; 427 428 page = dev_alloc_page(); 429 if (!page) 430 return NULL; 431 432 addr = page_to_virt(page); 433 xdpf = addr; 434 memset(xdpf, 0, sizeof(*xdpf)); 435 436 addr += sizeof(*xdpf); 437 data_to_copy = metasize ? xdp->data_meta : xdp->data; 438 memcpy(addr, data_to_copy, totsize); 439 440 xdpf->data = addr + metasize; 441 xdpf->len = totsize - metasize; 442 xdpf->headroom = 0; 443 xdpf->metasize = metasize; 444 xdpf->mem.type = MEM_TYPE_PAGE_ORDER0; 445 446 xdp_return_buff(xdp); 447 return xdpf; 448 } 449 EXPORT_SYMBOL_GPL(xdp_convert_zc_to_xdp_frame); 450