xdp_umem.c (150f29f5e6ea55d8a7d368b162a4e9947a95d2f5) xdp_umem.c (2b1667e54caf95e1e4249d9068eea7a3089a5229)
1// SPDX-License-Identifier: GPL-2.0
2/* XDP user-space packet buffer
3 * Copyright(c) 2018 Intel Corporation.
4 */
5
6#include <linux/init.h>
7#include <linux/sched/mm.h>
8#include <linux/sched/signal.h>

--- 9 unchanged lines hidden (view full) ---

18
19#include "xdp_umem.h"
20#include "xsk_queue.h"
21
22#define XDP_UMEM_MIN_CHUNK_SIZE 2048
23
24static DEFINE_IDA(umem_ida);
25
1// SPDX-License-Identifier: GPL-2.0
2/* XDP user-space packet buffer
3 * Copyright(c) 2018 Intel Corporation.
4 */
5
6#include <linux/init.h>
7#include <linux/sched/mm.h>
8#include <linux/sched/signal.h>

--- 9 unchanged lines hidden (view full) ---

18
19#include "xdp_umem.h"
20#include "xsk_queue.h"
21
22#define XDP_UMEM_MIN_CHUNK_SIZE 2048
23
24static DEFINE_IDA(umem_ida);
25
26void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
27{
28 unsigned long flags;
29
30 if (!xs->tx)
31 return;
32
33 spin_lock_irqsave(&umem->xsk_tx_list_lock, flags);
34 list_add_rcu(&xs->list, &umem->xsk_tx_list);
35 spin_unlock_irqrestore(&umem->xsk_tx_list_lock, flags);
36}
37
38void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
39{
40 unsigned long flags;
41
42 if (!xs->tx)
43 return;
44
45 spin_lock_irqsave(&umem->xsk_tx_list_lock, flags);
46 list_del_rcu(&xs->list);
47 spin_unlock_irqrestore(&umem->xsk_tx_list_lock, flags);
48}
49
50/* The umem is stored both in the _rx struct and the _tx struct as we do
51 * not know if the device has more tx queues than rx, or the opposite.
52 * This might also change during run time.
53 */
54static int xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem,
55 u16 queue_id)
56{
57 if (queue_id >= max_t(unsigned int,
58 dev->real_num_rx_queues,
59 dev->real_num_tx_queues))
60 return -EINVAL;
61
62 if (queue_id < dev->real_num_rx_queues)
63 dev->_rx[queue_id].umem = umem;
64 if (queue_id < dev->real_num_tx_queues)
65 dev->_tx[queue_id].umem = umem;
66
67 return 0;
68}
69
70struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
71 u16 queue_id)
72{
73 if (queue_id < dev->real_num_rx_queues)
74 return dev->_rx[queue_id].umem;
75 if (queue_id < dev->real_num_tx_queues)
76 return dev->_tx[queue_id].umem;
77
78 return NULL;
79}
80EXPORT_SYMBOL(xdp_get_umem_from_qid);
81
82static void xdp_clear_umem_at_qid(struct net_device *dev, u16 queue_id)
83{
84 if (queue_id < dev->real_num_rx_queues)
85 dev->_rx[queue_id].umem = NULL;
86 if (queue_id < dev->real_num_tx_queues)
87 dev->_tx[queue_id].umem = NULL;
88}
89
90int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
91 u16 queue_id, u16 flags)
92{
93 bool force_zc, force_copy;
94 struct netdev_bpf bpf;
95 int err = 0;
96
97 ASSERT_RTNL();
98
99 force_zc = flags & XDP_ZEROCOPY;
100 force_copy = flags & XDP_COPY;
101
102 if (force_zc && force_copy)
103 return -EINVAL;
104
105 if (xdp_get_umem_from_qid(dev, queue_id))
106 return -EBUSY;
107
108 err = xdp_reg_umem_at_qid(dev, umem, queue_id);
109 if (err)
110 return err;
111
112 umem->dev = dev;
113 umem->queue_id = queue_id;
114
115 if (flags & XDP_USE_NEED_WAKEUP) {
116 umem->flags |= XDP_UMEM_USES_NEED_WAKEUP;
117 /* Tx needs to be explicitly woken up the first time.
118 * Also for supporting drivers that do not implement this
119 * feature. They will always have to call sendto().
120 */
121 xsk_set_tx_need_wakeup(umem);
122 }
123
124 dev_hold(dev);
125
126 if (force_copy)
127 /* For copy-mode, we are done. */
128 return 0;
129
130 if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_wakeup) {
131 err = -EOPNOTSUPP;
132 goto err_unreg_umem;
133 }
134
135 bpf.command = XDP_SETUP_XSK_UMEM;
136 bpf.xsk.umem = umem;
137 bpf.xsk.queue_id = queue_id;
138
139 err = dev->netdev_ops->ndo_bpf(dev, &bpf);
140 if (err)
141 goto err_unreg_umem;
142
143 umem->zc = true;
144 return 0;
145
146err_unreg_umem:
147 if (!force_zc)
148 err = 0; /* fallback to copy mode */
149 if (err)
150 xdp_clear_umem_at_qid(dev, queue_id);
151 return err;
152}
153
154void xdp_umem_clear_dev(struct xdp_umem *umem)
155{
156 struct netdev_bpf bpf;
157 int err;
158
159 ASSERT_RTNL();
160
161 if (!umem->dev)
162 return;
163
164 if (umem->zc) {
165 bpf.command = XDP_SETUP_XSK_UMEM;
166 bpf.xsk.umem = NULL;
167 bpf.xsk.queue_id = umem->queue_id;
168
169 err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf);
170
171 if (err)
172 WARN(1, "failed to disable umem!\n");
173 }
174
175 xdp_clear_umem_at_qid(umem->dev, umem->queue_id);
176
177 dev_put(umem->dev);
178 umem->dev = NULL;
179 umem->zc = false;
180}
181
26static void xdp_umem_unpin_pages(struct xdp_umem *umem)
27{
28 unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true);
29
30 kfree(umem->pgs);
31 umem->pgs = NULL;
32}
33
34static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
35{
36 if (umem->user) {
37 atomic_long_sub(umem->npgs, &umem->user->locked_vm);
38 free_uid(umem->user);
39 }
40}
41
182static void xdp_umem_unpin_pages(struct xdp_umem *umem)
183{
184 unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true);
185
186 kfree(umem->pgs);
187 umem->pgs = NULL;
188}
189
190static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
191{
192 if (umem->user) {
193 atomic_long_sub(umem->npgs, &umem->user->locked_vm);
194 free_uid(umem->user);
195 }
196}
197
42static void xdp_umem_addr_unmap(struct xdp_umem *umem)
43{
44 vunmap(umem->addrs);
45 umem->addrs = NULL;
46}
47
48static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages,
49 u32 nr_pages)
50{
51 umem->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
52 if (!umem->addrs)
53 return -ENOMEM;
54 return 0;
55}
56
57static void xdp_umem_release(struct xdp_umem *umem)
58{
198static void xdp_umem_release(struct xdp_umem *umem)
199{
59 umem->zc = false;
200 rtnl_lock();
201 xdp_umem_clear_dev(umem);
202 rtnl_unlock();
203
60 ida_simple_remove(&umem_ida, umem->id);
61
204 ida_simple_remove(&umem_ida, umem->id);
205
62 xdp_umem_addr_unmap(umem);
206 if (umem->fq) {
207 xskq_destroy(umem->fq);
208 umem->fq = NULL;
209 }
210
211 if (umem->cq) {
212 xskq_destroy(umem->cq);
213 umem->cq = NULL;
214 }
215
216 xp_destroy(umem->pool);
63 xdp_umem_unpin_pages(umem);
64
65 xdp_umem_unaccount_pages(umem);
66 kfree(umem);
67}
68
217 xdp_umem_unpin_pages(umem);
218
219 xdp_umem_unaccount_pages(umem);
220 kfree(umem);
221}
222
223static void xdp_umem_release_deferred(struct work_struct *work)
224{
225 struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
226
227 xdp_umem_release(umem);
228}
229
69void xdp_get_umem(struct xdp_umem *umem)
70{
71 refcount_inc(&umem->users);
72}
73
74void xdp_put_umem(struct xdp_umem *umem)
75{
76 if (!umem)
77 return;
78
230void xdp_get_umem(struct xdp_umem *umem)
231{
232 refcount_inc(&umem->users);
233}
234
235void xdp_put_umem(struct xdp_umem *umem)
236{
237 if (!umem)
238 return;
239
79 if (refcount_dec_and_test(&umem->users))
80 xdp_umem_release(umem);
240 if (refcount_dec_and_test(&umem->users)) {
241 INIT_WORK(&umem->work, xdp_umem_release_deferred);
242 schedule_work(&umem->work);
243 }
81}
82
83static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address)
84{
85 unsigned int gup_flags = FOLL_WRITE;
86 long npgs;
87 int err;
88

--- 46 unchanged lines hidden (view full) ---

135 }
136 } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
137 new_npgs) != old_npgs);
138 return 0;
139}
140
141static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
142{
244}
245
246static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address)
247{
248 unsigned int gup_flags = FOLL_WRITE;
249 long npgs;
250 int err;
251

--- 46 unchanged lines hidden (view full) ---

298 }
299 } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
300 new_npgs) != old_npgs);
301 return 0;
302}
303
304static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
305{
306 u32 npgs_rem, chunk_size = mr->chunk_size, headroom = mr->headroom;
143 bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
307 bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
144 u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
145 u64 npgs, addr = mr->addr, size = mr->len;
308 u64 npgs, addr = mr->addr, size = mr->len;
146 unsigned int chunks, chunks_per_page;
309 unsigned int chunks, chunks_rem;
147 int err;
148
149 if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
150 /* Strictly speaking we could support this, if:
151 * - huge pages, or*
152 * - using an IOMMU, or
153 * - making sure the memory area is consecutive
154 * but for now, we simply say "computer says no".
155 */
156 return -EINVAL;
157 }
158
310 int err;
311
312 if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
313 /* Strictly speaking we could support this, if:
314 * - huge pages, or*
315 * - using an IOMMU, or
316 * - making sure the memory area is consecutive
317 * but for now, we simply say "computer says no".
318 */
319 return -EINVAL;
320 }
321
159 if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG)
322 if (mr->flags & ~(XDP_UMEM_UNALIGNED_CHUNK_FLAG |
323 XDP_UMEM_USES_NEED_WAKEUP))
160 return -EINVAL;
161
162 if (!unaligned_chunks && !is_power_of_2(chunk_size))
163 return -EINVAL;
164
165 if (!PAGE_ALIGNED(addr)) {
166 /* Memory area has to be page size aligned. For
167 * simplicity, this might change.
168 */
169 return -EINVAL;
170 }
171
172 if ((addr + size) < addr)
173 return -EINVAL;
174
324 return -EINVAL;
325
326 if (!unaligned_chunks && !is_power_of_2(chunk_size))
327 return -EINVAL;
328
329 if (!PAGE_ALIGNED(addr)) {
330 /* Memory area has to be page size aligned. For
331 * simplicity, this might change.
332 */
333 return -EINVAL;
334 }
335
336 if ((addr + size) < addr)
337 return -EINVAL;
338
175 npgs = size >> PAGE_SHIFT;
339 npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem);
340 if (npgs_rem)
341 npgs++;
176 if (npgs > U32_MAX)
177 return -EINVAL;
178
342 if (npgs > U32_MAX)
343 return -EINVAL;
344
179 chunks = (unsigned int)div_u64(size, chunk_size);
345 chunks = (unsigned int)div_u64_rem(size, chunk_size, &chunks_rem);
180 if (chunks == 0)
181 return -EINVAL;
182
346 if (chunks == 0)
347 return -EINVAL;
348
183 if (!unaligned_chunks) {
184 chunks_per_page = PAGE_SIZE / chunk_size;
185 if (chunks < chunks_per_page || chunks % chunks_per_page)
186 return -EINVAL;
187 }
349 if (!unaligned_chunks && chunks_rem)
350 return -EINVAL;
188
189 if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
190 return -EINVAL;
191
192 umem->size = size;
193 umem->headroom = headroom;
194 umem->chunk_size = chunk_size;
351
352 if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
353 return -EINVAL;
354
355 umem->size = size;
356 umem->headroom = headroom;
357 umem->chunk_size = chunk_size;
195 umem->chunks = chunks;
196 umem->npgs = (u32)npgs;
197 umem->pgs = NULL;
198 umem->user = NULL;
199 umem->flags = mr->flags;
358 umem->npgs = (u32)npgs;
359 umem->pgs = NULL;
360 umem->user = NULL;
361 umem->flags = mr->flags;
362 INIT_LIST_HEAD(&umem->xsk_tx_list);
363 spin_lock_init(&umem->xsk_tx_list_lock);
200
364
201 INIT_LIST_HEAD(&umem->xsk_dma_list);
202 refcount_set(&umem->users, 1);
203
204 err = xdp_umem_account_pages(umem);
205 if (err)
206 return err;
207
208 err = xdp_umem_pin_pages(umem, (unsigned long)addr);
209 if (err)
210 goto out_account;
211
365 refcount_set(&umem->users, 1);
366
367 err = xdp_umem_account_pages(umem);
368 if (err)
369 return err;
370
371 err = xdp_umem_pin_pages(umem, (unsigned long)addr);
372 if (err)
373 goto out_account;
374
212 err = xdp_umem_addr_map(umem, umem->pgs, umem->npgs);
213 if (err)
214 goto out_unpin;
215
375 umem->pool = xp_create(umem->pgs, umem->npgs, chunks, chunk_size,
376 headroom, size, unaligned_chunks);
377 if (!umem->pool) {
378 err = -ENOMEM;
379 goto out_pin;
380 }
216 return 0;
217
381 return 0;
382
218out_unpin:
383out_pin:
219 xdp_umem_unpin_pages(umem);
220out_account:
221 xdp_umem_unaccount_pages(umem);
222 return err;
223}
224
225struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
226{

--- 15 unchanged lines hidden (view full) ---

242 if (err) {
243 ida_simple_remove(&umem_ida, umem->id);
244 kfree(umem);
245 return ERR_PTR(err);
246 }
247
248 return umem;
249}
384 xdp_umem_unpin_pages(umem);
385out_account:
386 xdp_umem_unaccount_pages(umem);
387 return err;
388}
389
390struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
391{

--- 15 unchanged lines hidden (view full) ---

407 if (err) {
408 ida_simple_remove(&umem_ida, umem->id);
409 kfree(umem);
410 return ERR_PTR(err);
411 }
412
413 return umem;
414}
415
416bool xdp_umem_validate_queues(struct xdp_umem *umem)
417{
418 return umem->fq && umem->cq;
419}