1 // SPDX-License-Identifier: GPL-2.0 2 /* XDP user-space packet buffer 3 * Copyright(c) 2018 Intel Corporation. 4 */ 5 6 #include <linux/init.h> 7 #include <linux/sched/mm.h> 8 #include <linux/sched/signal.h> 9 #include <linux/sched/task.h> 10 #include <linux/uaccess.h> 11 #include <linux/slab.h> 12 #include <linux/bpf.h> 13 #include <linux/mm.h> 14 #include <linux/netdevice.h> 15 #include <linux/rtnetlink.h> 16 #include <linux/idr.h> 17 #include <linux/vmalloc.h> 18 19 #include "xdp_umem.h" 20 #include "xsk_queue.h" 21 22 #define XDP_UMEM_MIN_CHUNK_SIZE 2048 23 24 static DEFINE_IDA(umem_ida); 25 26 static void xdp_umem_unpin_pages(struct xdp_umem *umem) 27 { 28 unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true); 29 30 kfree(umem->pgs); 31 umem->pgs = NULL; 32 } 33 34 static void xdp_umem_unaccount_pages(struct xdp_umem *umem) 35 { 36 if (umem->user) { 37 atomic_long_sub(umem->npgs, &umem->user->locked_vm); 38 free_uid(umem->user); 39 } 40 } 41 42 static void xdp_umem_addr_unmap(struct xdp_umem *umem) 43 { 44 vunmap(umem->addrs); 45 umem->addrs = NULL; 46 } 47 48 static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages, 49 u32 nr_pages) 50 { 51 umem->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); 52 if (!umem->addrs) 53 return -ENOMEM; 54 return 0; 55 } 56 57 static void xdp_umem_release(struct xdp_umem *umem) 58 { 59 umem->zc = false; 60 ida_simple_remove(&umem_ida, umem->id); 61 62 xdp_umem_addr_unmap(umem); 63 xdp_umem_unpin_pages(umem); 64 65 xdp_umem_unaccount_pages(umem); 66 kfree(umem); 67 } 68 69 void xdp_get_umem(struct xdp_umem *umem) 70 { 71 refcount_inc(&umem->users); 72 } 73 74 void xdp_put_umem(struct xdp_umem *umem) 75 { 76 if (!umem) 77 return; 78 79 if (refcount_dec_and_test(&umem->users)) 80 xdp_umem_release(umem); 81 } 82 83 static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address) 84 { 85 unsigned int gup_flags = FOLL_WRITE; 86 long npgs; 87 int err; 88 89 umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs), 90 GFP_KERNEL | __GFP_NOWARN); 91 if (!umem->pgs) 92 return -ENOMEM; 93 94 mmap_read_lock(current->mm); 95 npgs = pin_user_pages(address, umem->npgs, 96 gup_flags | FOLL_LONGTERM, &umem->pgs[0], NULL); 97 mmap_read_unlock(current->mm); 98 99 if (npgs != umem->npgs) { 100 if (npgs >= 0) { 101 umem->npgs = npgs; 102 err = -ENOMEM; 103 goto out_pin; 104 } 105 err = npgs; 106 goto out_pgs; 107 } 108 return 0; 109 110 out_pin: 111 xdp_umem_unpin_pages(umem); 112 out_pgs: 113 kfree(umem->pgs); 114 umem->pgs = NULL; 115 return err; 116 } 117 118 static int xdp_umem_account_pages(struct xdp_umem *umem) 119 { 120 unsigned long lock_limit, new_npgs, old_npgs; 121 122 if (capable(CAP_IPC_LOCK)) 123 return 0; 124 125 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 126 umem->user = get_uid(current_user()); 127 128 do { 129 old_npgs = atomic_long_read(&umem->user->locked_vm); 130 new_npgs = old_npgs + umem->npgs; 131 if (new_npgs > lock_limit) { 132 free_uid(umem->user); 133 umem->user = NULL; 134 return -ENOBUFS; 135 } 136 } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs, 137 new_npgs) != old_npgs); 138 return 0; 139 } 140 141 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) 142 { 143 u32 npgs_rem, chunk_size = mr->chunk_size, headroom = mr->headroom; 144 bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; 145 u64 npgs, addr = mr->addr, size = mr->len; 146 unsigned int chunks, chunks_rem; 147 int err; 148 149 if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { 150 /* Strictly speaking we could support this, if: 151 * - huge pages, or* 152 * - using an IOMMU, or 153 * - making sure the memory area is consecutive 154 * but for now, we simply say "computer says no". 155 */ 156 return -EINVAL; 157 } 158 159 if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG) 160 return -EINVAL; 161 162 if (!unaligned_chunks && !is_power_of_2(chunk_size)) 163 return -EINVAL; 164 165 if (!PAGE_ALIGNED(addr)) { 166 /* Memory area has to be page size aligned. For 167 * simplicity, this might change. 168 */ 169 return -EINVAL; 170 } 171 172 if ((addr + size) < addr) 173 return -EINVAL; 174 175 npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem); 176 if (npgs_rem) 177 npgs++; 178 if (npgs > U32_MAX) 179 return -EINVAL; 180 181 chunks = (unsigned int)div_u64_rem(size, chunk_size, &chunks_rem); 182 if (chunks == 0) 183 return -EINVAL; 184 185 if (!unaligned_chunks && chunks_rem) 186 return -EINVAL; 187 188 if (headroom >= chunk_size - XDP_PACKET_HEADROOM) 189 return -EINVAL; 190 191 umem->size = size; 192 umem->headroom = headroom; 193 umem->chunk_size = chunk_size; 194 umem->chunks = chunks; 195 umem->npgs = (u32)npgs; 196 umem->pgs = NULL; 197 umem->user = NULL; 198 umem->flags = mr->flags; 199 200 INIT_LIST_HEAD(&umem->xsk_dma_list); 201 refcount_set(&umem->users, 1); 202 203 err = xdp_umem_account_pages(umem); 204 if (err) 205 return err; 206 207 err = xdp_umem_pin_pages(umem, (unsigned long)addr); 208 if (err) 209 goto out_account; 210 211 err = xdp_umem_addr_map(umem, umem->pgs, umem->npgs); 212 if (err) 213 goto out_unpin; 214 215 return 0; 216 217 out_unpin: 218 xdp_umem_unpin_pages(umem); 219 out_account: 220 xdp_umem_unaccount_pages(umem); 221 return err; 222 } 223 224 struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr) 225 { 226 struct xdp_umem *umem; 227 int err; 228 229 umem = kzalloc(sizeof(*umem), GFP_KERNEL); 230 if (!umem) 231 return ERR_PTR(-ENOMEM); 232 233 err = ida_simple_get(&umem_ida, 0, 0, GFP_KERNEL); 234 if (err < 0) { 235 kfree(umem); 236 return ERR_PTR(err); 237 } 238 umem->id = err; 239 240 err = xdp_umem_reg(umem, mr); 241 if (err) { 242 ida_simple_remove(&umem_ida, umem->id); 243 kfree(umem); 244 return ERR_PTR(err); 245 } 246 247 return umem; 248 } 249