1 /* net/core/xdp.c 2 * 3 * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. 4 * Released under terms in GPL version 2. See COPYING. 5 */ 6 #include <linux/bpf.h> 7 #include <linux/filter.h> 8 #include <linux/types.h> 9 #include <linux/mm.h> 10 #include <linux/netdevice.h> 11 #include <linux/slab.h> 12 #include <linux/idr.h> 13 #include <linux/rhashtable.h> 14 #include <net/page_pool.h> 15 16 #include <net/xdp.h> 17 18 #define REG_STATE_NEW 0x0 19 #define REG_STATE_REGISTERED 0x1 20 #define REG_STATE_UNREGISTERED 0x2 21 #define REG_STATE_UNUSED 0x3 22 23 static DEFINE_IDA(mem_id_pool); 24 static DEFINE_MUTEX(mem_id_lock); 25 #define MEM_ID_MAX 0xFFFE 26 #define MEM_ID_MIN 1 27 static int mem_id_next = MEM_ID_MIN; 28 29 static bool mem_id_init; /* false */ 30 static struct rhashtable *mem_id_ht; 31 32 struct xdp_mem_allocator { 33 struct xdp_mem_info mem; 34 union { 35 void *allocator; 36 struct page_pool *page_pool; 37 struct zero_copy_allocator *zc_alloc; 38 }; 39 struct rhash_head node; 40 struct rcu_head rcu; 41 }; 42 43 static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed) 44 { 45 const u32 *k = data; 46 const u32 key = *k; 47 48 BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id) 49 != sizeof(u32)); 50 51 /* Use cyclic increasing ID as direct hash key */ 52 return key; 53 } 54 55 static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg, 56 const void *ptr) 57 { 58 const struct xdp_mem_allocator *xa = ptr; 59 u32 mem_id = *(u32 *)arg->key; 60 61 return xa->mem.id != mem_id; 62 } 63 64 static const struct rhashtable_params mem_id_rht_params = { 65 .nelem_hint = 64, 66 .head_offset = offsetof(struct xdp_mem_allocator, node), 67 .key_offset = offsetof(struct xdp_mem_allocator, mem.id), 68 .key_len = FIELD_SIZEOF(struct xdp_mem_allocator, mem.id), 69 .max_size = MEM_ID_MAX, 70 .min_size = 8, 71 .automatic_shrinking = true, 72 .hashfn = xdp_mem_id_hashfn, 73 .obj_cmpfn = xdp_mem_id_cmp, 74 }; 75 76 static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu) 77 { 78 struct xdp_mem_allocator *xa; 79 80 xa = container_of(rcu, struct xdp_mem_allocator, rcu); 81 82 /* Allow this ID to be reused */ 83 ida_simple_remove(&mem_id_pool, xa->mem.id); 84 85 /* Notice, driver is expected to free the *allocator, 86 * e.g. page_pool, and MUST also use RCU free. 87 */ 88 89 /* Poison memory */ 90 xa->mem.id = 0xFFFF; 91 xa->mem.type = 0xF0F0; 92 xa->allocator = (void *)0xDEAD9001; 93 94 kfree(xa); 95 } 96 97 static void __xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) 98 { 99 struct xdp_mem_allocator *xa; 100 int id = xdp_rxq->mem.id; 101 102 if (id == 0) 103 return; 104 105 mutex_lock(&mem_id_lock); 106 107 xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params); 108 if (xa && !rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params)) 109 call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free); 110 111 mutex_unlock(&mem_id_lock); 112 } 113 114 void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) 115 { 116 /* Simplify driver cleanup code paths, allow unreg "unused" */ 117 if (xdp_rxq->reg_state == REG_STATE_UNUSED) 118 return; 119 120 WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG"); 121 122 __xdp_rxq_info_unreg_mem_model(xdp_rxq); 123 124 xdp_rxq->reg_state = REG_STATE_UNREGISTERED; 125 xdp_rxq->dev = NULL; 126 127 /* Reset mem info to defaults */ 128 xdp_rxq->mem.id = 0; 129 xdp_rxq->mem.type = 0; 130 } 131 EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg); 132 133 static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq) 134 { 135 memset(xdp_rxq, 0, sizeof(*xdp_rxq)); 136 } 137 138 /* Returns 0 on success, negative on failure */ 139 int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, 140 struct net_device *dev, u32 queue_index) 141 { 142 if (xdp_rxq->reg_state == REG_STATE_UNUSED) { 143 WARN(1, "Driver promised not to register this"); 144 return -EINVAL; 145 } 146 147 if (xdp_rxq->reg_state == REG_STATE_REGISTERED) { 148 WARN(1, "Missing unregister, handled but fix driver"); 149 xdp_rxq_info_unreg(xdp_rxq); 150 } 151 152 if (!dev) { 153 WARN(1, "Missing net_device from driver"); 154 return -ENODEV; 155 } 156 157 /* State either UNREGISTERED or NEW */ 158 xdp_rxq_info_init(xdp_rxq); 159 xdp_rxq->dev = dev; 160 xdp_rxq->queue_index = queue_index; 161 162 xdp_rxq->reg_state = REG_STATE_REGISTERED; 163 return 0; 164 } 165 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg); 166 167 void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq) 168 { 169 xdp_rxq->reg_state = REG_STATE_UNUSED; 170 } 171 EXPORT_SYMBOL_GPL(xdp_rxq_info_unused); 172 173 bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq) 174 { 175 return (xdp_rxq->reg_state == REG_STATE_REGISTERED); 176 } 177 EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg); 178 179 static int __mem_id_init_hash_table(void) 180 { 181 struct rhashtable *rht; 182 int ret; 183 184 if (unlikely(mem_id_init)) 185 return 0; 186 187 rht = kzalloc(sizeof(*rht), GFP_KERNEL); 188 if (!rht) 189 return -ENOMEM; 190 191 ret = rhashtable_init(rht, &mem_id_rht_params); 192 if (ret < 0) { 193 kfree(rht); 194 return ret; 195 } 196 mem_id_ht = rht; 197 smp_mb(); /* mutex lock should provide enough pairing */ 198 mem_id_init = true; 199 200 return 0; 201 } 202 203 /* Allocate a cyclic ID that maps to allocator pointer. 204 * See: https://www.kernel.org/doc/html/latest/core-api/idr.html 205 * 206 * Caller must lock mem_id_lock. 207 */ 208 static int __mem_id_cyclic_get(gfp_t gfp) 209 { 210 int retries = 1; 211 int id; 212 213 again: 214 id = ida_simple_get(&mem_id_pool, mem_id_next, MEM_ID_MAX, gfp); 215 if (id < 0) { 216 if (id == -ENOSPC) { 217 /* Cyclic allocator, reset next id */ 218 if (retries--) { 219 mem_id_next = MEM_ID_MIN; 220 goto again; 221 } 222 } 223 return id; /* errno */ 224 } 225 mem_id_next = id + 1; 226 227 return id; 228 } 229 230 static bool __is_supported_mem_type(enum xdp_mem_type type) 231 { 232 if (type == MEM_TYPE_PAGE_POOL) 233 return is_page_pool_compiled_in(); 234 235 if (type >= MEM_TYPE_MAX) 236 return false; 237 238 return true; 239 } 240 241 int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, 242 enum xdp_mem_type type, void *allocator) 243 { 244 struct xdp_mem_allocator *xdp_alloc; 245 gfp_t gfp = GFP_KERNEL; 246 int id, errno, ret; 247 void *ptr; 248 249 if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { 250 WARN(1, "Missing register, driver bug"); 251 return -EFAULT; 252 } 253 254 if (!__is_supported_mem_type(type)) 255 return -EOPNOTSUPP; 256 257 xdp_rxq->mem.type = type; 258 259 if (!allocator) { 260 if (type == MEM_TYPE_PAGE_POOL || type == MEM_TYPE_ZERO_COPY) 261 return -EINVAL; /* Setup time check page_pool req */ 262 return 0; 263 } 264 265 /* Delay init of rhashtable to save memory if feature isn't used */ 266 if (!mem_id_init) { 267 mutex_lock(&mem_id_lock); 268 ret = __mem_id_init_hash_table(); 269 mutex_unlock(&mem_id_lock); 270 if (ret < 0) { 271 WARN_ON(1); 272 return ret; 273 } 274 } 275 276 xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp); 277 if (!xdp_alloc) 278 return -ENOMEM; 279 280 mutex_lock(&mem_id_lock); 281 id = __mem_id_cyclic_get(gfp); 282 if (id < 0) { 283 errno = id; 284 goto err; 285 } 286 xdp_rxq->mem.id = id; 287 xdp_alloc->mem = xdp_rxq->mem; 288 xdp_alloc->allocator = allocator; 289 290 /* Insert allocator into ID lookup table */ 291 ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node); 292 if (IS_ERR(ptr)) { 293 errno = PTR_ERR(ptr); 294 goto err; 295 } 296 297 mutex_unlock(&mem_id_lock); 298 299 return 0; 300 err: 301 mutex_unlock(&mem_id_lock); 302 kfree(xdp_alloc); 303 return errno; 304 } 305 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); 306 307 /* XDP RX runs under NAPI protection, and in different delivery error 308 * scenarios (e.g. queue full), it is possible to return the xdp_frame 309 * while still leveraging this protection. The @napi_direct boolian 310 * is used for those calls sites. Thus, allowing for faster recycling 311 * of xdp_frames/pages in those cases. 312 */ 313 static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, 314 unsigned long handle) 315 { 316 struct xdp_mem_allocator *xa; 317 struct page *page; 318 319 switch (mem->type) { 320 case MEM_TYPE_PAGE_POOL: 321 rcu_read_lock(); 322 /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ 323 xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); 324 page = virt_to_head_page(data); 325 if (xa) { 326 napi_direct &= !xdp_return_frame_no_direct(); 327 page_pool_put_page(xa->page_pool, page, napi_direct); 328 } else { 329 put_page(page); 330 } 331 rcu_read_unlock(); 332 break; 333 case MEM_TYPE_PAGE_SHARED: 334 page_frag_free(data); 335 break; 336 case MEM_TYPE_PAGE_ORDER0: 337 page = virt_to_page(data); /* Assumes order0 page*/ 338 put_page(page); 339 break; 340 case MEM_TYPE_ZERO_COPY: 341 /* NB! Only valid from an xdp_buff! */ 342 rcu_read_lock(); 343 /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ 344 xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); 345 xa->zc_alloc->free(xa->zc_alloc, handle); 346 rcu_read_unlock(); 347 default: 348 /* Not possible, checked in xdp_rxq_info_reg_mem_model() */ 349 break; 350 } 351 } 352 353 void xdp_return_frame(struct xdp_frame *xdpf) 354 { 355 __xdp_return(xdpf->data, &xdpf->mem, false, 0); 356 } 357 EXPORT_SYMBOL_GPL(xdp_return_frame); 358 359 void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) 360 { 361 __xdp_return(xdpf->data, &xdpf->mem, true, 0); 362 } 363 EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); 364 365 void xdp_return_buff(struct xdp_buff *xdp) 366 { 367 __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp->handle); 368 } 369 EXPORT_SYMBOL_GPL(xdp_return_buff); 370 371 int xdp_attachment_query(struct xdp_attachment_info *info, 372 struct netdev_bpf *bpf) 373 { 374 bpf->prog_id = info->prog ? info->prog->aux->id : 0; 375 bpf->prog_flags = info->prog ? info->flags : 0; 376 return 0; 377 } 378 EXPORT_SYMBOL_GPL(xdp_attachment_query); 379 380 bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, 381 struct netdev_bpf *bpf) 382 { 383 if (info->prog && (bpf->flags ^ info->flags) & XDP_FLAGS_MODES) { 384 NL_SET_ERR_MSG(bpf->extack, 385 "program loaded with different flags"); 386 return false; 387 } 388 return true; 389 } 390 EXPORT_SYMBOL_GPL(xdp_attachment_flags_ok); 391 392 void xdp_attachment_setup(struct xdp_attachment_info *info, 393 struct netdev_bpf *bpf) 394 { 395 if (info->prog) 396 bpf_prog_put(info->prog); 397 info->prog = bpf->prog; 398 info->flags = bpf->flags; 399 } 400 EXPORT_SYMBOL_GPL(xdp_attachment_setup); 401