1 // SPDX-License-Identifier: GPL-2.0 2 /* XDP user-space packet buffer 3 * Copyright(c) 2018 Intel Corporation. 4 */ 5 6 #include <linux/init.h> 7 #include <linux/sched/mm.h> 8 #include <linux/sched/signal.h> 9 #include <linux/sched/task.h> 10 #include <linux/uaccess.h> 11 #include <linux/slab.h> 12 #include <linux/bpf.h> 13 #include <linux/mm.h> 14 #include <linux/netdevice.h> 15 #include <linux/rtnetlink.h> 16 #include <linux/idr.h> 17 #include <linux/vmalloc.h> 18 19 #include "xdp_umem.h" 20 #include "xsk_queue.h" 21 22 #define XDP_UMEM_MIN_CHUNK_SIZE 2048 23 24 static DEFINE_IDA(umem_ida); 25 26 static void xdp_umem_unpin_pages(struct xdp_umem *umem) 27 { 28 unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true); 29 30 kfree(umem->pgs); 31 umem->pgs = NULL; 32 } 33 34 static void xdp_umem_unaccount_pages(struct xdp_umem *umem) 35 { 36 if (umem->user) { 37 atomic_long_sub(umem->npgs, &umem->user->locked_vm); 38 free_uid(umem->user); 39 } 40 } 41 42 static void xdp_umem_addr_unmap(struct xdp_umem *umem) 43 { 44 vunmap(umem->addrs); 45 umem->addrs = NULL; 46 } 47 48 static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages, 49 u32 nr_pages) 50 { 51 umem->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); 52 if (!umem->addrs) 53 return -ENOMEM; 54 return 0; 55 } 56 57 static void xdp_umem_release(struct xdp_umem *umem) 58 { 59 umem->zc = false; 60 ida_simple_remove(&umem_ida, umem->id); 61 62 xdp_umem_addr_unmap(umem); 63 xdp_umem_unpin_pages(umem); 64 65 xdp_umem_unaccount_pages(umem); 66 kfree(umem); 67 } 68 69 static void xdp_umem_release_deferred(struct work_struct *work) 70 { 71 struct xdp_umem *umem = container_of(work, struct xdp_umem, work); 72 73 xdp_umem_release(umem); 74 } 75 76 void xdp_get_umem(struct xdp_umem *umem) 77 { 78 refcount_inc(&umem->users); 79 } 80 81 void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup) 82 { 83 if (!umem) 84 return; 85 86 if (refcount_dec_and_test(&umem->users)) { 87 if (defer_cleanup) { 88 INIT_WORK(&umem->work, xdp_umem_release_deferred); 89 schedule_work(&umem->work); 90 } else { 91 xdp_umem_release(umem); 92 } 93 } 94 } 95 96 static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address) 97 { 98 unsigned int gup_flags = FOLL_WRITE; 99 long npgs; 100 int err; 101 102 umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs), 103 GFP_KERNEL | __GFP_NOWARN); 104 if (!umem->pgs) 105 return -ENOMEM; 106 107 mmap_read_lock(current->mm); 108 npgs = pin_user_pages(address, umem->npgs, 109 gup_flags | FOLL_LONGTERM, &umem->pgs[0], NULL); 110 mmap_read_unlock(current->mm); 111 112 if (npgs != umem->npgs) { 113 if (npgs >= 0) { 114 umem->npgs = npgs; 115 err = -ENOMEM; 116 goto out_pin; 117 } 118 err = npgs; 119 goto out_pgs; 120 } 121 return 0; 122 123 out_pin: 124 xdp_umem_unpin_pages(umem); 125 out_pgs: 126 kfree(umem->pgs); 127 umem->pgs = NULL; 128 return err; 129 } 130 131 static int xdp_umem_account_pages(struct xdp_umem *umem) 132 { 133 unsigned long lock_limit, new_npgs, old_npgs; 134 135 if (capable(CAP_IPC_LOCK)) 136 return 0; 137 138 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 139 umem->user = get_uid(current_user()); 140 141 do { 142 old_npgs = atomic_long_read(&umem->user->locked_vm); 143 new_npgs = old_npgs + umem->npgs; 144 if (new_npgs > lock_limit) { 145 free_uid(umem->user); 146 umem->user = NULL; 147 return -ENOBUFS; 148 } 149 } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs, 150 new_npgs) != old_npgs); 151 return 0; 152 } 153 154 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) 155 { 156 u32 npgs_rem, chunk_size = mr->chunk_size, headroom = mr->headroom; 157 bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; 158 u64 npgs, addr = mr->addr, size = mr->len; 159 unsigned int chunks, chunks_rem; 160 int err; 161 162 if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { 163 /* Strictly speaking we could support this, if: 164 * - huge pages, or* 165 * - using an IOMMU, or 166 * - making sure the memory area is consecutive 167 * but for now, we simply say "computer says no". 168 */ 169 return -EINVAL; 170 } 171 172 if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG) 173 return -EINVAL; 174 175 if (!unaligned_chunks && !is_power_of_2(chunk_size)) 176 return -EINVAL; 177 178 if (!PAGE_ALIGNED(addr)) { 179 /* Memory area has to be page size aligned. For 180 * simplicity, this might change. 181 */ 182 return -EINVAL; 183 } 184 185 if ((addr + size) < addr) 186 return -EINVAL; 187 188 npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem); 189 if (npgs_rem) 190 npgs++; 191 if (npgs > U32_MAX) 192 return -EINVAL; 193 194 chunks = (unsigned int)div_u64_rem(size, chunk_size, &chunks_rem); 195 if (chunks == 0) 196 return -EINVAL; 197 198 if (!unaligned_chunks && chunks_rem) 199 return -EINVAL; 200 201 if (headroom >= chunk_size - XDP_PACKET_HEADROOM) 202 return -EINVAL; 203 204 umem->size = size; 205 umem->headroom = headroom; 206 umem->chunk_size = chunk_size; 207 umem->chunks = chunks; 208 umem->npgs = (u32)npgs; 209 umem->pgs = NULL; 210 umem->user = NULL; 211 umem->flags = mr->flags; 212 213 INIT_LIST_HEAD(&umem->xsk_dma_list); 214 refcount_set(&umem->users, 1); 215 216 err = xdp_umem_account_pages(umem); 217 if (err) 218 return err; 219 220 err = xdp_umem_pin_pages(umem, (unsigned long)addr); 221 if (err) 222 goto out_account; 223 224 err = xdp_umem_addr_map(umem, umem->pgs, umem->npgs); 225 if (err) 226 goto out_unpin; 227 228 return 0; 229 230 out_unpin: 231 xdp_umem_unpin_pages(umem); 232 out_account: 233 xdp_umem_unaccount_pages(umem); 234 return err; 235 } 236 237 struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr) 238 { 239 struct xdp_umem *umem; 240 int err; 241 242 umem = kzalloc(sizeof(*umem), GFP_KERNEL); 243 if (!umem) 244 return ERR_PTR(-ENOMEM); 245 246 err = ida_simple_get(&umem_ida, 0, 0, GFP_KERNEL); 247 if (err < 0) { 248 kfree(umem); 249 return ERR_PTR(err); 250 } 251 umem->id = err; 252 253 err = xdp_umem_reg(umem, mr); 254 if (err) { 255 ida_simple_remove(&umem_ida, umem->id); 256 kfree(umem); 257 return ERR_PTR(err); 258 } 259 260 return umem; 261 } 262