1 // SPDX-License-Identifier: GPL-2.0 2 /* XDP user-space packet buffer 3 * Copyright(c) 2018 Intel Corporation. 4 */ 5 6 #include <linux/init.h> 7 #include <linux/sched/mm.h> 8 #include <linux/sched/signal.h> 9 #include <linux/sched/task.h> 10 #include <linux/uaccess.h> 11 #include <linux/slab.h> 12 #include <linux/bpf.h> 13 #include <linux/mm.h> 14 #include <linux/netdevice.h> 15 #include <linux/rtnetlink.h> 16 #include <linux/idr.h> 17 #include <linux/vmalloc.h> 18 19 #include "xdp_umem.h" 20 #include "xsk_queue.h" 21 22 static DEFINE_IDA(umem_ida); 23 24 static void xdp_umem_unpin_pages(struct xdp_umem *umem) 25 { 26 unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true); 27 28 kvfree(umem->pgs); 29 umem->pgs = NULL; 30 } 31 32 static void xdp_umem_unaccount_pages(struct xdp_umem *umem) 33 { 34 if (umem->user) { 35 atomic_long_sub(umem->npgs, &umem->user->locked_vm); 36 free_uid(umem->user); 37 } 38 } 39 40 static void xdp_umem_addr_unmap(struct xdp_umem *umem) 41 { 42 vunmap(umem->addrs); 43 umem->addrs = NULL; 44 } 45 46 static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages, 47 u32 nr_pages) 48 { 49 umem->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); 50 if (!umem->addrs) 51 return -ENOMEM; 52 return 0; 53 } 54 55 static void xdp_umem_release(struct xdp_umem *umem) 56 { 57 umem->zc = false; 58 ida_free(&umem_ida, umem->id); 59 60 xdp_umem_addr_unmap(umem); 61 xdp_umem_unpin_pages(umem); 62 63 xdp_umem_unaccount_pages(umem); 64 kfree(umem); 65 } 66 67 static void xdp_umem_release_deferred(struct work_struct *work) 68 { 69 struct xdp_umem *umem = container_of(work, struct xdp_umem, work); 70 71 xdp_umem_release(umem); 72 } 73 74 void xdp_get_umem(struct xdp_umem *umem) 75 { 76 refcount_inc(&umem->users); 77 } 78 79 void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup) 80 { 81 if (!umem) 82 return; 83 84 if (refcount_dec_and_test(&umem->users)) { 85 if (defer_cleanup) { 86 INIT_WORK(&umem->work, xdp_umem_release_deferred); 87 schedule_work(&umem->work); 88 } else { 89 xdp_umem_release(umem); 90 } 91 } 92 } 93 94 static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address) 95 { 96 unsigned int gup_flags = FOLL_WRITE; 97 long npgs; 98 int err; 99 100 umem->pgs = kvcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL | __GFP_NOWARN); 101 if (!umem->pgs) 102 return -ENOMEM; 103 104 mmap_read_lock(current->mm); 105 npgs = pin_user_pages(address, umem->npgs, 106 gup_flags | FOLL_LONGTERM, &umem->pgs[0]); 107 mmap_read_unlock(current->mm); 108 109 if (npgs != umem->npgs) { 110 if (npgs >= 0) { 111 umem->npgs = npgs; 112 err = -ENOMEM; 113 goto out_pin; 114 } 115 err = npgs; 116 goto out_pgs; 117 } 118 return 0; 119 120 out_pin: 121 xdp_umem_unpin_pages(umem); 122 out_pgs: 123 kvfree(umem->pgs); 124 umem->pgs = NULL; 125 return err; 126 } 127 128 static int xdp_umem_account_pages(struct xdp_umem *umem) 129 { 130 unsigned long lock_limit, new_npgs, old_npgs; 131 132 if (capable(CAP_IPC_LOCK)) 133 return 0; 134 135 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 136 umem->user = get_uid(current_user()); 137 138 do { 139 old_npgs = atomic_long_read(&umem->user->locked_vm); 140 new_npgs = old_npgs + umem->npgs; 141 if (new_npgs > lock_limit) { 142 free_uid(umem->user); 143 umem->user = NULL; 144 return -ENOBUFS; 145 } 146 } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs, 147 new_npgs) != old_npgs); 148 return 0; 149 } 150 151 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) 152 { 153 bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; 154 u32 chunk_size = mr->chunk_size, headroom = mr->headroom; 155 u64 addr = mr->addr, size = mr->len; 156 u32 chunks_rem, npgs_rem; 157 u64 chunks, npgs; 158 int err; 159 160 if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { 161 /* Strictly speaking we could support this, if: 162 * - huge pages, or* 163 * - using an IOMMU, or 164 * - making sure the memory area is consecutive 165 * but for now, we simply say "computer says no". 166 */ 167 return -EINVAL; 168 } 169 170 if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG) 171 return -EINVAL; 172 173 if (!unaligned_chunks && !is_power_of_2(chunk_size)) 174 return -EINVAL; 175 176 if (!PAGE_ALIGNED(addr)) { 177 /* Memory area has to be page size aligned. For 178 * simplicity, this might change. 179 */ 180 return -EINVAL; 181 } 182 183 if ((addr + size) < addr) 184 return -EINVAL; 185 186 npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem); 187 if (npgs_rem) 188 npgs++; 189 if (npgs > U32_MAX) 190 return -EINVAL; 191 192 chunks = div_u64_rem(size, chunk_size, &chunks_rem); 193 if (!chunks || chunks > U32_MAX) 194 return -EINVAL; 195 196 if (!unaligned_chunks && chunks_rem) 197 return -EINVAL; 198 199 if (headroom >= chunk_size - XDP_PACKET_HEADROOM) 200 return -EINVAL; 201 202 umem->size = size; 203 umem->headroom = headroom; 204 umem->chunk_size = chunk_size; 205 umem->chunks = chunks; 206 umem->npgs = npgs; 207 umem->pgs = NULL; 208 umem->user = NULL; 209 umem->flags = mr->flags; 210 211 INIT_LIST_HEAD(&umem->xsk_dma_list); 212 refcount_set(&umem->users, 1); 213 214 err = xdp_umem_account_pages(umem); 215 if (err) 216 return err; 217 218 err = xdp_umem_pin_pages(umem, (unsigned long)addr); 219 if (err) 220 goto out_account; 221 222 err = xdp_umem_addr_map(umem, umem->pgs, umem->npgs); 223 if (err) 224 goto out_unpin; 225 226 return 0; 227 228 out_unpin: 229 xdp_umem_unpin_pages(umem); 230 out_account: 231 xdp_umem_unaccount_pages(umem); 232 return err; 233 } 234 235 struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr) 236 { 237 struct xdp_umem *umem; 238 int err; 239 240 umem = kzalloc(sizeof(*umem), GFP_KERNEL); 241 if (!umem) 242 return ERR_PTR(-ENOMEM); 243 244 err = ida_alloc(&umem_ida, GFP_KERNEL); 245 if (err < 0) { 246 kfree(umem); 247 return ERR_PTR(err); 248 } 249 umem->id = err; 250 251 err = xdp_umem_reg(umem, mr); 252 if (err) { 253 ida_free(&umem_ida, umem->id); 254 kfree(umem); 255 return ERR_PTR(err); 256 } 257 258 return umem; 259 } 260