1 /* net/core/xdp.c 2 * 3 * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. 4 * Released under terms in GPL version 2. See COPYING. 5 */ 6 #include <linux/types.h> 7 #include <linux/mm.h> 8 #include <linux/slab.h> 9 #include <linux/idr.h> 10 #include <linux/rhashtable.h> 11 #include <net/page_pool.h> 12 13 #include <net/xdp.h> 14 15 #define REG_STATE_NEW 0x0 16 #define REG_STATE_REGISTERED 0x1 17 #define REG_STATE_UNREGISTERED 0x2 18 #define REG_STATE_UNUSED 0x3 19 20 static DEFINE_IDA(mem_id_pool); 21 static DEFINE_MUTEX(mem_id_lock); 22 #define MEM_ID_MAX 0xFFFE 23 #define MEM_ID_MIN 1 24 static int mem_id_next = MEM_ID_MIN; 25 26 static bool mem_id_init; /* false */ 27 static struct rhashtable *mem_id_ht; 28 29 struct xdp_mem_allocator { 30 struct xdp_mem_info mem; 31 union { 32 void *allocator; 33 struct page_pool *page_pool; 34 struct zero_copy_allocator *zc_alloc; 35 }; 36 struct rhash_head node; 37 struct rcu_head rcu; 38 }; 39 40 static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed) 41 { 42 const u32 *k = data; 43 const u32 key = *k; 44 45 BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id) 46 != sizeof(u32)); 47 48 /* Use cyclic increasing ID as direct hash key, see rht_bucket_index */ 49 return key << RHT_HASH_RESERVED_SPACE; 50 } 51 52 static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg, 53 const void *ptr) 54 { 55 const struct xdp_mem_allocator *xa = ptr; 56 u32 mem_id = *(u32 *)arg->key; 57 58 return xa->mem.id != mem_id; 59 } 60 61 static const struct rhashtable_params mem_id_rht_params = { 62 .nelem_hint = 64, 63 .head_offset = offsetof(struct xdp_mem_allocator, node), 64 .key_offset = offsetof(struct xdp_mem_allocator, mem.id), 65 .key_len = FIELD_SIZEOF(struct xdp_mem_allocator, mem.id), 66 .max_size = MEM_ID_MAX, 67 .min_size = 8, 68 .automatic_shrinking = true, 69 .hashfn = xdp_mem_id_hashfn, 70 .obj_cmpfn = xdp_mem_id_cmp, 71 }; 72 73 static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu) 74 { 75 struct xdp_mem_allocator *xa; 76 77 xa = container_of(rcu, struct xdp_mem_allocator, rcu); 78 79 /* Allow this ID to be reused */ 80 ida_simple_remove(&mem_id_pool, xa->mem.id); 81 82 /* Notice, driver is expected to free the *allocator, 83 * e.g. page_pool, and MUST also use RCU free. 84 */ 85 86 /* Poison memory */ 87 xa->mem.id = 0xFFFF; 88 xa->mem.type = 0xF0F0; 89 xa->allocator = (void *)0xDEAD9001; 90 91 kfree(xa); 92 } 93 94 static void __xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) 95 { 96 struct xdp_mem_allocator *xa; 97 int id = xdp_rxq->mem.id; 98 int err; 99 100 if (id == 0) 101 return; 102 103 mutex_lock(&mem_id_lock); 104 105 xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params); 106 if (!xa) { 107 mutex_unlock(&mem_id_lock); 108 return; 109 } 110 111 err = rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params); 112 WARN_ON(err); 113 114 call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free); 115 116 mutex_unlock(&mem_id_lock); 117 } 118 119 void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) 120 { 121 /* Simplify driver cleanup code paths, allow unreg "unused" */ 122 if (xdp_rxq->reg_state == REG_STATE_UNUSED) 123 return; 124 125 WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG"); 126 127 __xdp_rxq_info_unreg_mem_model(xdp_rxq); 128 129 xdp_rxq->reg_state = REG_STATE_UNREGISTERED; 130 xdp_rxq->dev = NULL; 131 132 /* Reset mem info to defaults */ 133 xdp_rxq->mem.id = 0; 134 xdp_rxq->mem.type = 0; 135 } 136 EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg); 137 138 static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq) 139 { 140 memset(xdp_rxq, 0, sizeof(*xdp_rxq)); 141 } 142 143 /* Returns 0 on success, negative on failure */ 144 int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, 145 struct net_device *dev, u32 queue_index) 146 { 147 if (xdp_rxq->reg_state == REG_STATE_UNUSED) { 148 WARN(1, "Driver promised not to register this"); 149 return -EINVAL; 150 } 151 152 if (xdp_rxq->reg_state == REG_STATE_REGISTERED) { 153 WARN(1, "Missing unregister, handled but fix driver"); 154 xdp_rxq_info_unreg(xdp_rxq); 155 } 156 157 if (!dev) { 158 WARN(1, "Missing net_device from driver"); 159 return -ENODEV; 160 } 161 162 /* State either UNREGISTERED or NEW */ 163 xdp_rxq_info_init(xdp_rxq); 164 xdp_rxq->dev = dev; 165 xdp_rxq->queue_index = queue_index; 166 167 xdp_rxq->reg_state = REG_STATE_REGISTERED; 168 return 0; 169 } 170 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg); 171 172 void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq) 173 { 174 xdp_rxq->reg_state = REG_STATE_UNUSED; 175 } 176 EXPORT_SYMBOL_GPL(xdp_rxq_info_unused); 177 178 bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq) 179 { 180 return (xdp_rxq->reg_state == REG_STATE_REGISTERED); 181 } 182 EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg); 183 184 static int __mem_id_init_hash_table(void) 185 { 186 struct rhashtable *rht; 187 int ret; 188 189 if (unlikely(mem_id_init)) 190 return 0; 191 192 rht = kzalloc(sizeof(*rht), GFP_KERNEL); 193 if (!rht) 194 return -ENOMEM; 195 196 ret = rhashtable_init(rht, &mem_id_rht_params); 197 if (ret < 0) { 198 kfree(rht); 199 return ret; 200 } 201 mem_id_ht = rht; 202 smp_mb(); /* mutex lock should provide enough pairing */ 203 mem_id_init = true; 204 205 return 0; 206 } 207 208 /* Allocate a cyclic ID that maps to allocator pointer. 209 * See: https://www.kernel.org/doc/html/latest/core-api/idr.html 210 * 211 * Caller must lock mem_id_lock. 212 */ 213 static int __mem_id_cyclic_get(gfp_t gfp) 214 { 215 int retries = 1; 216 int id; 217 218 again: 219 id = ida_simple_get(&mem_id_pool, mem_id_next, MEM_ID_MAX, gfp); 220 if (id < 0) { 221 if (id == -ENOSPC) { 222 /* Cyclic allocator, reset next id */ 223 if (retries--) { 224 mem_id_next = MEM_ID_MIN; 225 goto again; 226 } 227 } 228 return id; /* errno */ 229 } 230 mem_id_next = id + 1; 231 232 return id; 233 } 234 235 static bool __is_supported_mem_type(enum xdp_mem_type type) 236 { 237 if (type == MEM_TYPE_PAGE_POOL) 238 return is_page_pool_compiled_in(); 239 240 if (type >= MEM_TYPE_MAX) 241 return false; 242 243 return true; 244 } 245 246 int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, 247 enum xdp_mem_type type, void *allocator) 248 { 249 struct xdp_mem_allocator *xdp_alloc; 250 gfp_t gfp = GFP_KERNEL; 251 int id, errno, ret; 252 void *ptr; 253 254 if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { 255 WARN(1, "Missing register, driver bug"); 256 return -EFAULT; 257 } 258 259 if (!__is_supported_mem_type(type)) 260 return -EOPNOTSUPP; 261 262 xdp_rxq->mem.type = type; 263 264 if (!allocator) { 265 if (type == MEM_TYPE_PAGE_POOL || type == MEM_TYPE_ZERO_COPY) 266 return -EINVAL; /* Setup time check page_pool req */ 267 return 0; 268 } 269 270 /* Delay init of rhashtable to save memory if feature isn't used */ 271 if (!mem_id_init) { 272 mutex_lock(&mem_id_lock); 273 ret = __mem_id_init_hash_table(); 274 mutex_unlock(&mem_id_lock); 275 if (ret < 0) { 276 WARN_ON(1); 277 return ret; 278 } 279 } 280 281 xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp); 282 if (!xdp_alloc) 283 return -ENOMEM; 284 285 mutex_lock(&mem_id_lock); 286 id = __mem_id_cyclic_get(gfp); 287 if (id < 0) { 288 errno = id; 289 goto err; 290 } 291 xdp_rxq->mem.id = id; 292 xdp_alloc->mem = xdp_rxq->mem; 293 xdp_alloc->allocator = allocator; 294 295 /* Insert allocator into ID lookup table */ 296 ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node); 297 if (IS_ERR(ptr)) { 298 errno = PTR_ERR(ptr); 299 goto err; 300 } 301 302 mutex_unlock(&mem_id_lock); 303 304 return 0; 305 err: 306 mutex_unlock(&mem_id_lock); 307 kfree(xdp_alloc); 308 return errno; 309 } 310 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); 311 312 /* XDP RX runs under NAPI protection, and in different delivery error 313 * scenarios (e.g. queue full), it is possible to return the xdp_frame 314 * while still leveraging this protection. The @napi_direct boolian 315 * is used for those calls sites. Thus, allowing for faster recycling 316 * of xdp_frames/pages in those cases. 317 */ 318 static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, 319 unsigned long handle) 320 { 321 struct xdp_mem_allocator *xa; 322 struct page *page; 323 324 switch (mem->type) { 325 case MEM_TYPE_PAGE_POOL: 326 rcu_read_lock(); 327 /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ 328 xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); 329 page = virt_to_head_page(data); 330 if (xa) 331 page_pool_put_page(xa->page_pool, page, napi_direct); 332 else 333 put_page(page); 334 rcu_read_unlock(); 335 break; 336 case MEM_TYPE_PAGE_SHARED: 337 page_frag_free(data); 338 break; 339 case MEM_TYPE_PAGE_ORDER0: 340 page = virt_to_page(data); /* Assumes order0 page*/ 341 put_page(page); 342 break; 343 case MEM_TYPE_ZERO_COPY: 344 /* NB! Only valid from an xdp_buff! */ 345 rcu_read_lock(); 346 /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ 347 xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); 348 xa->zc_alloc->free(xa->zc_alloc, handle); 349 rcu_read_unlock(); 350 default: 351 /* Not possible, checked in xdp_rxq_info_reg_mem_model() */ 352 break; 353 } 354 } 355 356 void xdp_return_frame(struct xdp_frame *xdpf) 357 { 358 __xdp_return(xdpf->data, &xdpf->mem, false, 0); 359 } 360 EXPORT_SYMBOL_GPL(xdp_return_frame); 361 362 void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) 363 { 364 __xdp_return(xdpf->data, &xdpf->mem, true, 0); 365 } 366 EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); 367 368 void xdp_return_buff(struct xdp_buff *xdp) 369 { 370 __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp->handle); 371 } 372 EXPORT_SYMBOL_GPL(xdp_return_buff); 373