1 // SPDX-License-Identifier: GPL-2.0 2 /* XDP user-space packet buffer 3 * Copyright(c) 2018 Intel Corporation. 4 */ 5 6 #include <linux/init.h> 7 #include <linux/sched/mm.h> 8 #include <linux/sched/signal.h> 9 #include <linux/sched/task.h> 10 #include <linux/uaccess.h> 11 #include <linux/slab.h> 12 #include <linux/bpf.h> 13 #include <linux/mm.h> 14 #include <linux/netdevice.h> 15 #include <linux/rtnetlink.h> 16 #include <linux/idr.h> 17 #include <linux/vmalloc.h> 18 19 #include "xdp_umem.h" 20 #include "xsk_queue.h" 21 22 #define XDP_UMEM_MIN_CHUNK_SIZE 2048 23 24 static DEFINE_IDA(umem_ida); 25 26 static void xdp_umem_unpin_pages(struct xdp_umem *umem) 27 { 28 unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true); 29 30 kfree(umem->pgs); 31 umem->pgs = NULL; 32 } 33 34 static void xdp_umem_unaccount_pages(struct xdp_umem *umem) 35 { 36 if (umem->user) { 37 atomic_long_sub(umem->npgs, &umem->user->locked_vm); 38 free_uid(umem->user); 39 } 40 } 41 42 static void xdp_umem_addr_unmap(struct xdp_umem *umem) 43 { 44 vunmap(umem->addrs); 45 umem->addrs = NULL; 46 } 47 48 static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages, 49 u32 nr_pages) 50 { 51 umem->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); 52 if (!umem->addrs) 53 return -ENOMEM; 54 return 0; 55 } 56 57 static void xdp_umem_release(struct xdp_umem *umem) 58 { 59 umem->zc = false; 60 ida_simple_remove(&umem_ida, umem->id); 61 62 xdp_umem_addr_unmap(umem); 63 xdp_umem_unpin_pages(umem); 64 65 xdp_umem_unaccount_pages(umem); 66 kfree(umem); 67 } 68 69 void xdp_get_umem(struct xdp_umem *umem) 70 { 71 refcount_inc(&umem->users); 72 } 73 74 void xdp_put_umem(struct xdp_umem *umem) 75 { 76 if (!umem) 77 return; 78 79 if (refcount_dec_and_test(&umem->users)) 80 xdp_umem_release(umem); 81 } 82 83 static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address) 84 { 85 unsigned int gup_flags = FOLL_WRITE; 86 long npgs; 87 int err; 88 89 umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs), 90 GFP_KERNEL | __GFP_NOWARN); 91 if (!umem->pgs) 92 return -ENOMEM; 93 94 mmap_read_lock(current->mm); 95 npgs = pin_user_pages(address, umem->npgs, 96 gup_flags | FOLL_LONGTERM, &umem->pgs[0], NULL); 97 mmap_read_unlock(current->mm); 98 99 if (npgs != umem->npgs) { 100 if (npgs >= 0) { 101 umem->npgs = npgs; 102 err = -ENOMEM; 103 goto out_pin; 104 } 105 err = npgs; 106 goto out_pgs; 107 } 108 return 0; 109 110 out_pin: 111 xdp_umem_unpin_pages(umem); 112 out_pgs: 113 kfree(umem->pgs); 114 umem->pgs = NULL; 115 return err; 116 } 117 118 static int xdp_umem_account_pages(struct xdp_umem *umem) 119 { 120 unsigned long lock_limit, new_npgs, old_npgs; 121 122 if (capable(CAP_IPC_LOCK)) 123 return 0; 124 125 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 126 umem->user = get_uid(current_user()); 127 128 do { 129 old_npgs = atomic_long_read(&umem->user->locked_vm); 130 new_npgs = old_npgs + umem->npgs; 131 if (new_npgs > lock_limit) { 132 free_uid(umem->user); 133 umem->user = NULL; 134 return -ENOBUFS; 135 } 136 } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs, 137 new_npgs) != old_npgs); 138 return 0; 139 } 140 141 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) 142 { 143 bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; 144 u32 chunk_size = mr->chunk_size, headroom = mr->headroom; 145 u64 npgs, addr = mr->addr, size = mr->len; 146 unsigned int chunks, chunks_per_page; 147 int err; 148 149 if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { 150 /* Strictly speaking we could support this, if: 151 * - huge pages, or* 152 * - using an IOMMU, or 153 * - making sure the memory area is consecutive 154 * but for now, we simply say "computer says no". 155 */ 156 return -EINVAL; 157 } 158 159 if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG) 160 return -EINVAL; 161 162 if (!unaligned_chunks && !is_power_of_2(chunk_size)) 163 return -EINVAL; 164 165 if (!PAGE_ALIGNED(addr)) { 166 /* Memory area has to be page size aligned. For 167 * simplicity, this might change. 168 */ 169 return -EINVAL; 170 } 171 172 if ((addr + size) < addr) 173 return -EINVAL; 174 175 npgs = size >> PAGE_SHIFT; 176 if (npgs > U32_MAX) 177 return -EINVAL; 178 179 chunks = (unsigned int)div_u64(size, chunk_size); 180 if (chunks == 0) 181 return -EINVAL; 182 183 if (!unaligned_chunks) { 184 chunks_per_page = PAGE_SIZE / chunk_size; 185 if (chunks < chunks_per_page || chunks % chunks_per_page) 186 return -EINVAL; 187 } 188 189 if (headroom >= chunk_size - XDP_PACKET_HEADROOM) 190 return -EINVAL; 191 192 umem->size = size; 193 umem->headroom = headroom; 194 umem->chunk_size = chunk_size; 195 umem->chunks = chunks; 196 umem->npgs = (u32)npgs; 197 umem->pgs = NULL; 198 umem->user = NULL; 199 umem->flags = mr->flags; 200 201 INIT_LIST_HEAD(&umem->xsk_dma_list); 202 refcount_set(&umem->users, 1); 203 204 err = xdp_umem_account_pages(umem); 205 if (err) 206 return err; 207 208 err = xdp_umem_pin_pages(umem, (unsigned long)addr); 209 if (err) 210 goto out_account; 211 212 err = xdp_umem_addr_map(umem, umem->pgs, umem->npgs); 213 if (err) 214 goto out_unpin; 215 216 return 0; 217 218 out_unpin: 219 xdp_umem_unpin_pages(umem); 220 out_account: 221 xdp_umem_unaccount_pages(umem); 222 return err; 223 } 224 225 struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr) 226 { 227 struct xdp_umem *umem; 228 int err; 229 230 umem = kzalloc(sizeof(*umem), GFP_KERNEL); 231 if (!umem) 232 return ERR_PTR(-ENOMEM); 233 234 err = ida_simple_get(&umem_ida, 0, 0, GFP_KERNEL); 235 if (err < 0) { 236 kfree(umem); 237 return ERR_PTR(err); 238 } 239 umem->id = err; 240 241 err = xdp_umem_reg(umem, mr); 242 if (err) { 243 ida_simple_remove(&umem_ida, umem->id); 244 kfree(umem); 245 return ERR_PTR(err); 246 } 247 248 return umem; 249 } 250