xsk.c (c4655761d3cf62bf5f86650e79349c1bfa5c6285) xsk.c (1c1efc2af158869795d3334a12fed2afd9c51539)
1// SPDX-License-Identifier: GPL-2.0
2/* XDP sockets
3 *
4 * AF_XDP sockets allows a channel between XDP programs and userspace
5 * applications.
6 * Copyright(c) 2018 Intel Corporation.
7 *
8 * Author(s): Björn Töpel <bjorn.topel@intel.com>

--- 91 unchanged lines hidden (view full) ---

100EXPORT_SYMBOL(xsk_clear_tx_need_wakeup);
101
102bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool)
103{
104 return pool->umem->flags & XDP_UMEM_USES_NEED_WAKEUP;
105}
106EXPORT_SYMBOL(xsk_uses_need_wakeup);
107
1// SPDX-License-Identifier: GPL-2.0
2/* XDP sockets
3 *
4 * AF_XDP sockets allows a channel between XDP programs and userspace
5 * applications.
6 * Copyright(c) 2018 Intel Corporation.
7 *
8 * Author(s): Björn Töpel <bjorn.topel@intel.com>

--- 91 unchanged lines hidden (view full) ---

100EXPORT_SYMBOL(xsk_clear_tx_need_wakeup);
101
102bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool)
103{
104 return pool->umem->flags & XDP_UMEM_USES_NEED_WAKEUP;
105}
106EXPORT_SYMBOL(xsk_uses_need_wakeup);
107
108struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev,
109 u16 queue_id)
110{
111 if (queue_id < dev->real_num_rx_queues)
112 return dev->_rx[queue_id].pool;
113 if (queue_id < dev->real_num_tx_queues)
114 return dev->_tx[queue_id].pool;
115
116 return NULL;
117}
118EXPORT_SYMBOL(xsk_get_pool_from_qid);
119
120void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id)
121{
122 if (queue_id < dev->real_num_rx_queues)
123 dev->_rx[queue_id].pool = NULL;
124 if (queue_id < dev->real_num_tx_queues)
125 dev->_tx[queue_id].pool = NULL;
126}
127
128/* The buffer pool is stored both in the _rx struct and the _tx struct as we do
129 * not know if the device has more tx queues than rx, or the opposite.
130 * This might also change during run time.
131 */
132int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
133 u16 queue_id)
134{
135 if (queue_id >= max_t(unsigned int,
136 dev->real_num_rx_queues,
137 dev->real_num_tx_queues))
138 return -EINVAL;
139
140 if (queue_id < dev->real_num_rx_queues)
141 dev->_rx[queue_id].pool = pool;
142 if (queue_id < dev->real_num_tx_queues)
143 dev->_tx[queue_id].pool = pool;
144
145 return 0;
146}
147
108void xp_release(struct xdp_buff_xsk *xskb)
109{
110 xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb;
111}
112
113static u64 xp_get_handle(struct xdp_buff_xsk *xskb)
114{
115 u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start;

--- 160 unchanged lines hidden (view full) ---

276
277bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc)
278{
279 struct xdp_umem *umem = pool->umem;
280 struct xdp_sock *xs;
281
282 rcu_read_lock();
283 list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) {
148void xp_release(struct xdp_buff_xsk *xskb)
149{
150 xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb;
151}
152
153static u64 xp_get_handle(struct xdp_buff_xsk *xskb)
154{
155 u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start;

--- 160 unchanged lines hidden (view full) ---

316
317bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc)
318{
319 struct xdp_umem *umem = pool->umem;
320 struct xdp_sock *xs;
321
322 rcu_read_lock();
323 list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) {
284 if (!xskq_cons_peek_desc(xs->tx, desc, umem)) {
324 if (!xskq_cons_peek_desc(xs->tx, desc, pool)) {
285 xs->tx->queue_empty_descs++;
286 continue;
287 }
288
289 /* This is the backpressure mechanism for the Tx path.
290 * Reserve space in the completion queue and only proceed
291 * if there is space in it. This avoids having to implement
292 * any buffering in the Tx path.

--- 51 unchanged lines hidden (view full) ---

344 struct sk_buff *skb;
345 int err = 0;
346
347 mutex_lock(&xs->mutex);
348
349 if (xs->queue_id >= xs->dev->real_num_tx_queues)
350 goto out;
351
325 xs->tx->queue_empty_descs++;
326 continue;
327 }
328
329 /* This is the backpressure mechanism for the Tx path.
330 * Reserve space in the completion queue and only proceed
331 * if there is space in it. This avoids having to implement
332 * any buffering in the Tx path.

--- 51 unchanged lines hidden (view full) ---

384 struct sk_buff *skb;
385 int err = 0;
386
387 mutex_lock(&xs->mutex);
388
389 if (xs->queue_id >= xs->dev->real_num_tx_queues)
390 goto out;
391
352 while (xskq_cons_peek_desc(xs->tx, &desc, xs->umem)) {
392 while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) {
353 char *buffer;
354 u64 addr;
355 u32 len;
356
357 if (max_batch-- == 0) {
358 err = -EAGAIN;
359 goto out;
360 }

--- 301 unchanged lines hidden (view full) ---

662 goto out_unlock;
663 }
664 if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
665 err = -EINVAL;
666 sockfd_put(sock);
667 goto out_unlock;
668 }
669
393 char *buffer;
394 u64 addr;
395 u32 len;
396
397 if (max_batch-- == 0) {
398 err = -EAGAIN;
399 goto out;
400 }

--- 301 unchanged lines hidden (view full) ---

702 goto out_unlock;
703 }
704 if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
705 err = -EINVAL;
706 sockfd_put(sock);
707 goto out_unlock;
708 }
709
710 /* Share the buffer pool with the other socket. */
711 xp_get_pool(umem_xs->pool);
712 xs->pool = umem_xs->pool;
670 xdp_get_umem(umem_xs->umem);
671 WRITE_ONCE(xs->umem, umem_xs->umem);
672 sockfd_put(sock);
673 } else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
674 err = -EINVAL;
675 goto out_unlock;
676 } else {
677 /* This xsk has its own umem. */
713 xdp_get_umem(umem_xs->umem);
714 WRITE_ONCE(xs->umem, umem_xs->umem);
715 sockfd_put(sock);
716 } else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
717 err = -EINVAL;
718 goto out_unlock;
719 } else {
720 /* This xsk has its own umem. */
678 err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
679 if (err)
721 xdp_umem_assign_dev(xs->umem, dev, qid);
722 xs->pool = xp_create_and_assign_umem(xs, xs->umem);
723 if (!xs->pool) {
724 err = -ENOMEM;
725 xdp_umem_clear_dev(xs->umem);
680 goto out_unlock;
726 goto out_unlock;
727 }
728
729 err = xp_assign_dev(xs->pool, dev, qid, flags);
730 if (err) {
731 xp_destroy(xs->pool);
732 xs->pool = NULL;
733 xdp_umem_clear_dev(xs->umem);
734 goto out_unlock;
735 }
681 }
682
683 xs->dev = dev;
684 xs->zc = xs->umem->zc;
685 xs->queue_id = qid;
686 xdp_add_sk_umem(xs->umem, xs);
687
688out_unlock:

--- 75 unchanged lines hidden (view full) ---

764 }
765
766 umem = xdp_umem_create(&mr);
767 if (IS_ERR(umem)) {
768 mutex_unlock(&xs->mutex);
769 return PTR_ERR(umem);
770 }
771
736 }
737
738 xs->dev = dev;
739 xs->zc = xs->umem->zc;
740 xs->queue_id = qid;
741 xdp_add_sk_umem(xs->umem, xs);
742
743out_unlock:

--- 75 unchanged lines hidden (view full) ---

819 }
820
821 umem = xdp_umem_create(&mr);
822 if (IS_ERR(umem)) {
823 mutex_unlock(&xs->mutex);
824 return PTR_ERR(umem);
825 }
826
772 xs->pool = umem->pool;
773
774 /* Make sure umem is ready before it can be seen by others */
775 smp_wmb();
776 WRITE_ONCE(xs->umem, umem);
777 mutex_unlock(&xs->mutex);
778 return 0;
779 }
780 case XDP_UMEM_FILL_RING:
781 case XDP_UMEM_COMPLETION_RING:

--- 13 unchanged lines hidden (view full) ---

795 mutex_unlock(&xs->mutex);
796 return -EINVAL;
797 }
798
799 q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq :
800 &xs->umem->cq;
801 err = xsk_init_queue(entries, q, true);
802 if (optname == XDP_UMEM_FILL_RING)
827 /* Make sure umem is ready before it can be seen by others */
828 smp_wmb();
829 WRITE_ONCE(xs->umem, umem);
830 mutex_unlock(&xs->mutex);
831 return 0;
832 }
833 case XDP_UMEM_FILL_RING:
834 case XDP_UMEM_COMPLETION_RING:

--- 13 unchanged lines hidden (view full) ---

848 mutex_unlock(&xs->mutex);
849 return -EINVAL;
850 }
851
852 q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq :
853 &xs->umem->cq;
854 err = xsk_init_queue(entries, q, true);
855 if (optname == XDP_UMEM_FILL_RING)
803 xp_set_fq(xs->umem->pool, *q);
856 xp_set_fq(xs->pool, *q);
804 mutex_unlock(&xs->mutex);
805 return err;
806 }
807 default:
808 break;
809 }
810
811 return -ENOPROTOOPT;

--- 211 unchanged lines hidden (view full) ---

1023 mutex_lock(&xs->mutex);
1024 if (xs->dev == dev) {
1025 sk->sk_err = ENETDOWN;
1026 if (!sock_flag(sk, SOCK_DEAD))
1027 sk->sk_error_report(sk);
1028
1029 xsk_unbind_dev(xs);
1030
857 mutex_unlock(&xs->mutex);
858 return err;
859 }
860 default:
861 break;
862 }
863
864 return -ENOPROTOOPT;

--- 211 unchanged lines hidden (view full) ---

1076 mutex_lock(&xs->mutex);
1077 if (xs->dev == dev) {
1078 sk->sk_err = ENETDOWN;
1079 if (!sock_flag(sk, SOCK_DEAD))
1080 sk->sk_error_report(sk);
1081
1082 xsk_unbind_dev(xs);
1083
1031 /* Clear device references in umem. */
1084 /* Clear device references. */
1085 xp_clear_dev(xs->pool);
1032 xdp_umem_clear_dev(xs->umem);
1033 }
1034 mutex_unlock(&xs->mutex);
1035 }
1036 mutex_unlock(&net->xdp.lock);
1037 break;
1038 }
1039 return NOTIFY_DONE;

--- 28 unchanged lines hidden (view full) ---

1068
1069static void xsk_destruct(struct sock *sk)
1070{
1071 struct xdp_sock *xs = xdp_sk(sk);
1072
1073 if (!sock_flag(sk, SOCK_DEAD))
1074 return;
1075
1086 xdp_umem_clear_dev(xs->umem);
1087 }
1088 mutex_unlock(&xs->mutex);
1089 }
1090 mutex_unlock(&net->xdp.lock);
1091 break;
1092 }
1093 return NOTIFY_DONE;

--- 28 unchanged lines hidden (view full) ---

1122
1123static void xsk_destruct(struct sock *sk)
1124{
1125 struct xdp_sock *xs = xdp_sk(sk);
1126
1127 if (!sock_flag(sk, SOCK_DEAD))
1128 return;
1129
1076 xdp_put_umem(xs->umem);
1130 xp_put_pool(xs->pool);
1077
1078 sk_refcnt_debug_dec(sk);
1079}
1080
1081static int xsk_create(struct net *net, struct socket *sock, int protocol,
1082 int kern)
1083{
1131
1132 sk_refcnt_debug_dec(sk);
1133}
1134
1135static int xsk_create(struct net *net, struct socket *sock, int protocol,
1136 int kern)
1137{
1084 struct sock *sk;
1085 struct xdp_sock *xs;
1138 struct xdp_sock *xs;
1139 struct sock *sk;
1086
1087 if (!ns_capable(net->user_ns, CAP_NET_RAW))
1088 return -EPERM;
1089 if (sock->type != SOCK_RAW)
1090 return -ESOCKTNOSUPPORT;
1091
1092 if (protocol)
1093 return -EPROTONOSUPPORT;

--- 100 unchanged lines hidden ---
1140
1141 if (!ns_capable(net->user_ns, CAP_NET_RAW))
1142 return -EPERM;
1143 if (sock->type != SOCK_RAW)
1144 return -ESOCKTNOSUPPORT;
1145
1146 if (protocol)
1147 return -EPROTONOSUPPORT;

--- 100 unchanged lines hidden ---