1 // SPDX-License-Identifier: GPL-2.0 2 /* XDP user-space packet buffer 3 * Copyright(c) 2018 Intel Corporation. 4 */ 5 6 #include <linux/init.h> 7 #include <linux/sched/mm.h> 8 #include <linux/sched/signal.h> 9 #include <linux/sched/task.h> 10 #include <linux/uaccess.h> 11 #include <linux/slab.h> 12 #include <linux/bpf.h> 13 #include <linux/mm.h> 14 #include <linux/netdevice.h> 15 #include <linux/rtnetlink.h> 16 #include <linux/idr.h> 17 #include <linux/vmalloc.h> 18 19 #include "xdp_umem.h" 20 #include "xsk_queue.h" 21 22 #define XDP_UMEM_MIN_CHUNK_SIZE 2048 23 24 static DEFINE_IDA(umem_ida); 25 26 static void xdp_umem_unpin_pages(struct xdp_umem *umem) 27 { 28 unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true); 29 30 kvfree(umem->pgs); 31 umem->pgs = NULL; 32 } 33 34 static void xdp_umem_unaccount_pages(struct xdp_umem *umem) 35 { 36 if (umem->user) { 37 atomic_long_sub(umem->npgs, &umem->user->locked_vm); 38 free_uid(umem->user); 39 } 40 } 41 42 static void xdp_umem_addr_unmap(struct xdp_umem *umem) 43 { 44 vunmap(umem->addrs); 45 umem->addrs = NULL; 46 } 47 48 static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages, 49 u32 nr_pages) 50 { 51 umem->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); 52 if (!umem->addrs) 53 return -ENOMEM; 54 return 0; 55 } 56 57 static void xdp_umem_release(struct xdp_umem *umem) 58 { 59 umem->zc = false; 60 ida_simple_remove(&umem_ida, umem->id); 61 62 xdp_umem_addr_unmap(umem); 63 xdp_umem_unpin_pages(umem); 64 65 xdp_umem_unaccount_pages(umem); 66 kfree(umem); 67 } 68 69 static void xdp_umem_release_deferred(struct work_struct *work) 70 { 71 struct xdp_umem *umem = container_of(work, struct xdp_umem, work); 72 73 xdp_umem_release(umem); 74 } 75 76 void xdp_get_umem(struct xdp_umem *umem) 77 { 78 refcount_inc(&umem->users); 79 } 80 81 void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup) 82 { 83 if (!umem) 84 return; 85 86 if (refcount_dec_and_test(&umem->users)) { 87 if (defer_cleanup) { 88 INIT_WORK(&umem->work, xdp_umem_release_deferred); 89 schedule_work(&umem->work); 90 } else { 91 xdp_umem_release(umem); 92 } 93 } 94 } 95 96 static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address) 97 { 98 unsigned int gup_flags = FOLL_WRITE; 99 long npgs; 100 int err; 101 102 umem->pgs = kvcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL | __GFP_NOWARN); 103 if (!umem->pgs) 104 return -ENOMEM; 105 106 mmap_read_lock(current->mm); 107 npgs = pin_user_pages(address, umem->npgs, 108 gup_flags | FOLL_LONGTERM, &umem->pgs[0], NULL); 109 mmap_read_unlock(current->mm); 110 111 if (npgs != umem->npgs) { 112 if (npgs >= 0) { 113 umem->npgs = npgs; 114 err = -ENOMEM; 115 goto out_pin; 116 } 117 err = npgs; 118 goto out_pgs; 119 } 120 return 0; 121 122 out_pin: 123 xdp_umem_unpin_pages(umem); 124 out_pgs: 125 kvfree(umem->pgs); 126 umem->pgs = NULL; 127 return err; 128 } 129 130 static int xdp_umem_account_pages(struct xdp_umem *umem) 131 { 132 unsigned long lock_limit, new_npgs, old_npgs; 133 134 if (capable(CAP_IPC_LOCK)) 135 return 0; 136 137 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 138 umem->user = get_uid(current_user()); 139 140 do { 141 old_npgs = atomic_long_read(&umem->user->locked_vm); 142 new_npgs = old_npgs + umem->npgs; 143 if (new_npgs > lock_limit) { 144 free_uid(umem->user); 145 umem->user = NULL; 146 return -ENOBUFS; 147 } 148 } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs, 149 new_npgs) != old_npgs); 150 return 0; 151 } 152 153 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) 154 { 155 u32 npgs_rem, chunk_size = mr->chunk_size, headroom = mr->headroom; 156 bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; 157 u64 npgs, addr = mr->addr, size = mr->len; 158 unsigned int chunks, chunks_rem; 159 int err; 160 161 if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { 162 /* Strictly speaking we could support this, if: 163 * - huge pages, or* 164 * - using an IOMMU, or 165 * - making sure the memory area is consecutive 166 * but for now, we simply say "computer says no". 167 */ 168 return -EINVAL; 169 } 170 171 if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG) 172 return -EINVAL; 173 174 if (!unaligned_chunks && !is_power_of_2(chunk_size)) 175 return -EINVAL; 176 177 if (!PAGE_ALIGNED(addr)) { 178 /* Memory area has to be page size aligned. For 179 * simplicity, this might change. 180 */ 181 return -EINVAL; 182 } 183 184 if ((addr + size) < addr) 185 return -EINVAL; 186 187 npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem); 188 if (npgs_rem) 189 npgs++; 190 if (npgs > U32_MAX) 191 return -EINVAL; 192 193 chunks = (unsigned int)div_u64_rem(size, chunk_size, &chunks_rem); 194 if (chunks == 0) 195 return -EINVAL; 196 197 if (!unaligned_chunks && chunks_rem) 198 return -EINVAL; 199 200 if (headroom >= chunk_size - XDP_PACKET_HEADROOM) 201 return -EINVAL; 202 203 umem->size = size; 204 umem->headroom = headroom; 205 umem->chunk_size = chunk_size; 206 umem->chunks = chunks; 207 umem->npgs = (u32)npgs; 208 umem->pgs = NULL; 209 umem->user = NULL; 210 umem->flags = mr->flags; 211 212 INIT_LIST_HEAD(&umem->xsk_dma_list); 213 refcount_set(&umem->users, 1); 214 215 err = xdp_umem_account_pages(umem); 216 if (err) 217 return err; 218 219 err = xdp_umem_pin_pages(umem, (unsigned long)addr); 220 if (err) 221 goto out_account; 222 223 err = xdp_umem_addr_map(umem, umem->pgs, umem->npgs); 224 if (err) 225 goto out_unpin; 226 227 return 0; 228 229 out_unpin: 230 xdp_umem_unpin_pages(umem); 231 out_account: 232 xdp_umem_unaccount_pages(umem); 233 return err; 234 } 235 236 struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr) 237 { 238 struct xdp_umem *umem; 239 int err; 240 241 umem = kzalloc(sizeof(*umem), GFP_KERNEL); 242 if (!umem) 243 return ERR_PTR(-ENOMEM); 244 245 err = ida_simple_get(&umem_ida, 0, 0, GFP_KERNEL); 246 if (err < 0) { 247 kfree(umem); 248 return ERR_PTR(err); 249 } 250 umem->id = err; 251 252 err = xdp_umem_reg(umem, mr); 253 if (err) { 254 ida_simple_remove(&umem_ida, umem->id); 255 kfree(umem); 256 return ERR_PTR(err); 257 } 258 259 return umem; 260 } 261