xref: /openbmc/linux/net/xdp/xdp_umem.c (revision ba61bb17)
1 // SPDX-License-Identifier: GPL-2.0
2 /* XDP user-space packet buffer
3  * Copyright(c) 2018 Intel Corporation.
4  */
5 
6 #include <linux/init.h>
7 #include <linux/sched/mm.h>
8 #include <linux/sched/signal.h>
9 #include <linux/sched/task.h>
10 #include <linux/uaccess.h>
11 #include <linux/slab.h>
12 #include <linux/bpf.h>
13 #include <linux/mm.h>
14 
15 #include "xdp_umem.h"
16 #include "xsk_queue.h"
17 
18 #define XDP_UMEM_MIN_CHUNK_SIZE 2048
19 
20 void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
21 {
22 	unsigned long flags;
23 
24 	spin_lock_irqsave(&umem->xsk_list_lock, flags);
25 	list_add_rcu(&xs->list, &umem->xsk_list);
26 	spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
27 }
28 
29 void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
30 {
31 	unsigned long flags;
32 
33 	if (xs->dev) {
34 		spin_lock_irqsave(&umem->xsk_list_lock, flags);
35 		list_del_rcu(&xs->list);
36 		spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
37 
38 		if (umem->zc)
39 			synchronize_net();
40 	}
41 }
42 
43 int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
44 			u32 queue_id, u16 flags)
45 {
46 	bool force_zc, force_copy;
47 	struct netdev_bpf bpf;
48 	int err;
49 
50 	force_zc = flags & XDP_ZEROCOPY;
51 	force_copy = flags & XDP_COPY;
52 
53 	if (force_zc && force_copy)
54 		return -EINVAL;
55 
56 	if (force_copy)
57 		return 0;
58 
59 	dev_hold(dev);
60 
61 	if (dev->netdev_ops->ndo_bpf && dev->netdev_ops->ndo_xsk_async_xmit) {
62 		bpf.command = XDP_QUERY_XSK_UMEM;
63 
64 		rtnl_lock();
65 		err = dev->netdev_ops->ndo_bpf(dev, &bpf);
66 		rtnl_unlock();
67 
68 		if (err) {
69 			dev_put(dev);
70 			return force_zc ? -ENOTSUPP : 0;
71 		}
72 
73 		bpf.command = XDP_SETUP_XSK_UMEM;
74 		bpf.xsk.umem = umem;
75 		bpf.xsk.queue_id = queue_id;
76 
77 		rtnl_lock();
78 		err = dev->netdev_ops->ndo_bpf(dev, &bpf);
79 		rtnl_unlock();
80 
81 		if (err) {
82 			dev_put(dev);
83 			return force_zc ? err : 0; /* fail or fallback */
84 		}
85 
86 		umem->dev = dev;
87 		umem->queue_id = queue_id;
88 		umem->zc = true;
89 		return 0;
90 	}
91 
92 	dev_put(dev);
93 	return force_zc ? -ENOTSUPP : 0; /* fail or fallback */
94 }
95 
96 static void xdp_umem_clear_dev(struct xdp_umem *umem)
97 {
98 	struct netdev_bpf bpf;
99 	int err;
100 
101 	if (umem->dev) {
102 		bpf.command = XDP_SETUP_XSK_UMEM;
103 		bpf.xsk.umem = NULL;
104 		bpf.xsk.queue_id = umem->queue_id;
105 
106 		rtnl_lock();
107 		err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf);
108 		rtnl_unlock();
109 
110 		if (err)
111 			WARN(1, "failed to disable umem!\n");
112 
113 		dev_put(umem->dev);
114 		umem->dev = NULL;
115 	}
116 }
117 
118 static void xdp_umem_unpin_pages(struct xdp_umem *umem)
119 {
120 	unsigned int i;
121 
122 	for (i = 0; i < umem->npgs; i++) {
123 		struct page *page = umem->pgs[i];
124 
125 		set_page_dirty_lock(page);
126 		put_page(page);
127 	}
128 
129 	kfree(umem->pgs);
130 	umem->pgs = NULL;
131 }
132 
133 static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
134 {
135 	if (umem->user) {
136 		atomic_long_sub(umem->npgs, &umem->user->locked_vm);
137 		free_uid(umem->user);
138 	}
139 }
140 
141 static void xdp_umem_release(struct xdp_umem *umem)
142 {
143 	struct task_struct *task;
144 	struct mm_struct *mm;
145 
146 	xdp_umem_clear_dev(umem);
147 
148 	if (umem->fq) {
149 		xskq_destroy(umem->fq);
150 		umem->fq = NULL;
151 	}
152 
153 	if (umem->cq) {
154 		xskq_destroy(umem->cq);
155 		umem->cq = NULL;
156 	}
157 
158 	xdp_umem_unpin_pages(umem);
159 
160 	task = get_pid_task(umem->pid, PIDTYPE_PID);
161 	put_pid(umem->pid);
162 	if (!task)
163 		goto out;
164 	mm = get_task_mm(task);
165 	put_task_struct(task);
166 	if (!mm)
167 		goto out;
168 
169 	mmput(mm);
170 	kfree(umem->pages);
171 	umem->pages = NULL;
172 
173 	xdp_umem_unaccount_pages(umem);
174 out:
175 	kfree(umem);
176 }
177 
178 static void xdp_umem_release_deferred(struct work_struct *work)
179 {
180 	struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
181 
182 	xdp_umem_release(umem);
183 }
184 
185 void xdp_get_umem(struct xdp_umem *umem)
186 {
187 	refcount_inc(&umem->users);
188 }
189 
190 void xdp_put_umem(struct xdp_umem *umem)
191 {
192 	if (!umem)
193 		return;
194 
195 	if (refcount_dec_and_test(&umem->users)) {
196 		INIT_WORK(&umem->work, xdp_umem_release_deferred);
197 		schedule_work(&umem->work);
198 	}
199 }
200 
201 static int xdp_umem_pin_pages(struct xdp_umem *umem)
202 {
203 	unsigned int gup_flags = FOLL_WRITE;
204 	long npgs;
205 	int err;
206 
207 	umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs),
208 			    GFP_KERNEL | __GFP_NOWARN);
209 	if (!umem->pgs)
210 		return -ENOMEM;
211 
212 	down_write(&current->mm->mmap_sem);
213 	npgs = get_user_pages(umem->address, umem->npgs,
214 			      gup_flags, &umem->pgs[0], NULL);
215 	up_write(&current->mm->mmap_sem);
216 
217 	if (npgs != umem->npgs) {
218 		if (npgs >= 0) {
219 			umem->npgs = npgs;
220 			err = -ENOMEM;
221 			goto out_pin;
222 		}
223 		err = npgs;
224 		goto out_pgs;
225 	}
226 	return 0;
227 
228 out_pin:
229 	xdp_umem_unpin_pages(umem);
230 out_pgs:
231 	kfree(umem->pgs);
232 	umem->pgs = NULL;
233 	return err;
234 }
235 
236 static int xdp_umem_account_pages(struct xdp_umem *umem)
237 {
238 	unsigned long lock_limit, new_npgs, old_npgs;
239 
240 	if (capable(CAP_IPC_LOCK))
241 		return 0;
242 
243 	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
244 	umem->user = get_uid(current_user());
245 
246 	do {
247 		old_npgs = atomic_long_read(&umem->user->locked_vm);
248 		new_npgs = old_npgs + umem->npgs;
249 		if (new_npgs > lock_limit) {
250 			free_uid(umem->user);
251 			umem->user = NULL;
252 			return -ENOBUFS;
253 		}
254 	} while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
255 				     new_npgs) != old_npgs);
256 	return 0;
257 }
258 
259 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
260 {
261 	u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
262 	unsigned int chunks, chunks_per_page;
263 	u64 addr = mr->addr, size = mr->len;
264 	int size_chk, err, i;
265 
266 	if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
267 		/* Strictly speaking we could support this, if:
268 		 * - huge pages, or*
269 		 * - using an IOMMU, or
270 		 * - making sure the memory area is consecutive
271 		 * but for now, we simply say "computer says no".
272 		 */
273 		return -EINVAL;
274 	}
275 
276 	if (!is_power_of_2(chunk_size))
277 		return -EINVAL;
278 
279 	if (!PAGE_ALIGNED(addr)) {
280 		/* Memory area has to be page size aligned. For
281 		 * simplicity, this might change.
282 		 */
283 		return -EINVAL;
284 	}
285 
286 	if ((addr + size) < addr)
287 		return -EINVAL;
288 
289 	chunks = (unsigned int)div_u64(size, chunk_size);
290 	if (chunks == 0)
291 		return -EINVAL;
292 
293 	chunks_per_page = PAGE_SIZE / chunk_size;
294 	if (chunks < chunks_per_page || chunks % chunks_per_page)
295 		return -EINVAL;
296 
297 	headroom = ALIGN(headroom, 64);
298 
299 	size_chk = chunk_size - headroom - XDP_PACKET_HEADROOM;
300 	if (size_chk < 0)
301 		return -EINVAL;
302 
303 	umem->pid = get_task_pid(current, PIDTYPE_PID);
304 	umem->address = (unsigned long)addr;
305 	umem->props.chunk_mask = ~((u64)chunk_size - 1);
306 	umem->props.size = size;
307 	umem->headroom = headroom;
308 	umem->chunk_size_nohr = chunk_size - headroom;
309 	umem->npgs = size / PAGE_SIZE;
310 	umem->pgs = NULL;
311 	umem->user = NULL;
312 	INIT_LIST_HEAD(&umem->xsk_list);
313 	spin_lock_init(&umem->xsk_list_lock);
314 
315 	refcount_set(&umem->users, 1);
316 
317 	err = xdp_umem_account_pages(umem);
318 	if (err)
319 		goto out;
320 
321 	err = xdp_umem_pin_pages(umem);
322 	if (err)
323 		goto out_account;
324 
325 	umem->pages = kcalloc(umem->npgs, sizeof(*umem->pages), GFP_KERNEL);
326 	if (!umem->pages) {
327 		err = -ENOMEM;
328 		goto out_account;
329 	}
330 
331 	for (i = 0; i < umem->npgs; i++)
332 		umem->pages[i].addr = page_address(umem->pgs[i]);
333 
334 	return 0;
335 
336 out_account:
337 	xdp_umem_unaccount_pages(umem);
338 out:
339 	put_pid(umem->pid);
340 	return err;
341 }
342 
343 struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
344 {
345 	struct xdp_umem *umem;
346 	int err;
347 
348 	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
349 	if (!umem)
350 		return ERR_PTR(-ENOMEM);
351 
352 	err = xdp_umem_reg(umem, mr);
353 	if (err) {
354 		kfree(umem);
355 		return ERR_PTR(err);
356 	}
357 
358 	return umem;
359 }
360 
361 bool xdp_umem_validate_queues(struct xdp_umem *umem)
362 {
363 	return umem->fq && umem->cq;
364 }
365