xref: /openbmc/linux/net/xdp/xsk.c (revision a17922de)
1 // SPDX-License-Identifier: GPL-2.0
2 /* XDP sockets
3  *
4  * AF_XDP sockets allows a channel between XDP programs and userspace
5  * applications.
6  * Copyright(c) 2018 Intel Corporation.
7  *
8  * Author(s): Björn Töpel <bjorn.topel@intel.com>
9  *	      Magnus Karlsson <magnus.karlsson@intel.com>
10  */
11 
12 #define pr_fmt(fmt) "AF_XDP: %s: " fmt, __func__
13 
14 #include <linux/if_xdp.h>
15 #include <linux/init.h>
16 #include <linux/sched/mm.h>
17 #include <linux/sched/signal.h>
18 #include <linux/sched/task.h>
19 #include <linux/socket.h>
20 #include <linux/file.h>
21 #include <linux/uaccess.h>
22 #include <linux/net.h>
23 #include <linux/netdevice.h>
24 #include <linux/rculist.h>
25 #include <net/xdp_sock.h>
26 #include <net/xdp.h>
27 
28 #include "xsk_queue.h"
29 #include "xdp_umem.h"
30 
31 #define TX_BATCH_SIZE 16
32 
33 static struct xdp_sock *xdp_sk(struct sock *sk)
34 {
35 	return (struct xdp_sock *)sk;
36 }
37 
38 bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
39 {
40 	return READ_ONCE(xs->rx) &&  READ_ONCE(xs->umem) &&
41 		READ_ONCE(xs->umem->fq);
42 }
43 
44 u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
45 {
46 	return xskq_peek_addr(umem->fq, addr);
47 }
48 EXPORT_SYMBOL(xsk_umem_peek_addr);
49 
50 void xsk_umem_discard_addr(struct xdp_umem *umem)
51 {
52 	xskq_discard_addr(umem->fq);
53 }
54 EXPORT_SYMBOL(xsk_umem_discard_addr);
55 
56 static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
57 {
58 	void *buffer;
59 	u64 addr;
60 	int err;
61 
62 	if (!xskq_peek_addr(xs->umem->fq, &addr) ||
63 	    len > xs->umem->chunk_size_nohr) {
64 		xs->rx_dropped++;
65 		return -ENOSPC;
66 	}
67 
68 	addr += xs->umem->headroom;
69 
70 	buffer = xdp_umem_get_data(xs->umem, addr);
71 	memcpy(buffer, xdp->data, len);
72 	err = xskq_produce_batch_desc(xs->rx, addr, len);
73 	if (!err) {
74 		xskq_discard_addr(xs->umem->fq);
75 		xdp_return_buff(xdp);
76 		return 0;
77 	}
78 
79 	xs->rx_dropped++;
80 	return err;
81 }
82 
83 static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
84 {
85 	int err = xskq_produce_batch_desc(xs->rx, (u64)xdp->handle, len);
86 
87 	if (err) {
88 		xdp_return_buff(xdp);
89 		xs->rx_dropped++;
90 	}
91 
92 	return err;
93 }
94 
95 int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
96 {
97 	u32 len;
98 
99 	if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
100 		return -EINVAL;
101 
102 	len = xdp->data_end - xdp->data;
103 
104 	return (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) ?
105 		__xsk_rcv_zc(xs, xdp, len) : __xsk_rcv(xs, xdp, len);
106 }
107 
108 void xsk_flush(struct xdp_sock *xs)
109 {
110 	xskq_produce_flush_desc(xs->rx);
111 	xs->sk.sk_data_ready(&xs->sk);
112 }
113 
114 int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
115 {
116 	u32 len = xdp->data_end - xdp->data;
117 	void *buffer;
118 	u64 addr;
119 	int err;
120 
121 	if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
122 		return -EINVAL;
123 
124 	if (!xskq_peek_addr(xs->umem->fq, &addr) ||
125 	    len > xs->umem->chunk_size_nohr) {
126 		xs->rx_dropped++;
127 		return -ENOSPC;
128 	}
129 
130 	addr += xs->umem->headroom;
131 
132 	buffer = xdp_umem_get_data(xs->umem, addr);
133 	memcpy(buffer, xdp->data, len);
134 	err = xskq_produce_batch_desc(xs->rx, addr, len);
135 	if (!err) {
136 		xskq_discard_addr(xs->umem->fq);
137 		xsk_flush(xs);
138 		return 0;
139 	}
140 
141 	xs->rx_dropped++;
142 	return err;
143 }
144 
145 void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
146 {
147 	xskq_produce_flush_addr_n(umem->cq, nb_entries);
148 }
149 EXPORT_SYMBOL(xsk_umem_complete_tx);
150 
151 void xsk_umem_consume_tx_done(struct xdp_umem *umem)
152 {
153 	struct xdp_sock *xs;
154 
155 	rcu_read_lock();
156 	list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
157 		xs->sk.sk_write_space(&xs->sk);
158 	}
159 	rcu_read_unlock();
160 }
161 EXPORT_SYMBOL(xsk_umem_consume_tx_done);
162 
163 bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len)
164 {
165 	struct xdp_desc desc;
166 	struct xdp_sock *xs;
167 
168 	rcu_read_lock();
169 	list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
170 		if (!xskq_peek_desc(xs->tx, &desc))
171 			continue;
172 
173 		if (xskq_produce_addr_lazy(umem->cq, desc.addr))
174 			goto out;
175 
176 		*dma = xdp_umem_get_dma(umem, desc.addr);
177 		*len = desc.len;
178 
179 		xskq_discard_desc(xs->tx);
180 		rcu_read_unlock();
181 		return true;
182 	}
183 
184 out:
185 	rcu_read_unlock();
186 	return false;
187 }
188 EXPORT_SYMBOL(xsk_umem_consume_tx);
189 
190 static int xsk_zc_xmit(struct sock *sk)
191 {
192 	struct xdp_sock *xs = xdp_sk(sk);
193 	struct net_device *dev = xs->dev;
194 
195 	return dev->netdev_ops->ndo_xsk_async_xmit(dev, xs->queue_id);
196 }
197 
198 static void xsk_destruct_skb(struct sk_buff *skb)
199 {
200 	u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
201 	struct xdp_sock *xs = xdp_sk(skb->sk);
202 	unsigned long flags;
203 
204 	spin_lock_irqsave(&xs->tx_completion_lock, flags);
205 	WARN_ON_ONCE(xskq_produce_addr(xs->umem->cq, addr));
206 	spin_unlock_irqrestore(&xs->tx_completion_lock, flags);
207 
208 	sock_wfree(skb);
209 }
210 
211 static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
212 			    size_t total_len)
213 {
214 	u32 max_batch = TX_BATCH_SIZE;
215 	struct xdp_sock *xs = xdp_sk(sk);
216 	bool sent_frame = false;
217 	struct xdp_desc desc;
218 	struct sk_buff *skb;
219 	int err = 0;
220 
221 	mutex_lock(&xs->mutex);
222 
223 	while (xskq_peek_desc(xs->tx, &desc)) {
224 		char *buffer;
225 		u64 addr;
226 		u32 len;
227 
228 		if (max_batch-- == 0) {
229 			err = -EAGAIN;
230 			goto out;
231 		}
232 
233 		if (xskq_reserve_addr(xs->umem->cq))
234 			goto out;
235 
236 		if (xs->queue_id >= xs->dev->real_num_tx_queues)
237 			goto out;
238 
239 		len = desc.len;
240 		skb = sock_alloc_send_skb(sk, len, 1, &err);
241 		if (unlikely(!skb)) {
242 			err = -EAGAIN;
243 			goto out;
244 		}
245 
246 		skb_put(skb, len);
247 		addr = desc.addr;
248 		buffer = xdp_umem_get_data(xs->umem, addr);
249 		err = skb_store_bits(skb, 0, buffer, len);
250 		if (unlikely(err)) {
251 			kfree_skb(skb);
252 			goto out;
253 		}
254 
255 		skb->dev = xs->dev;
256 		skb->priority = sk->sk_priority;
257 		skb->mark = sk->sk_mark;
258 		skb_shinfo(skb)->destructor_arg = (void *)(long)addr;
259 		skb->destructor = xsk_destruct_skb;
260 
261 		err = dev_direct_xmit(skb, xs->queue_id);
262 		xskq_discard_desc(xs->tx);
263 		/* Ignore NET_XMIT_CN as packet might have been sent */
264 		if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) {
265 			/* SKB completed but not sent */
266 			err = -EBUSY;
267 			goto out;
268 		}
269 
270 		sent_frame = true;
271 	}
272 
273 out:
274 	if (sent_frame)
275 		sk->sk_write_space(sk);
276 
277 	mutex_unlock(&xs->mutex);
278 	return err;
279 }
280 
281 static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
282 {
283 	bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
284 	struct sock *sk = sock->sk;
285 	struct xdp_sock *xs = xdp_sk(sk);
286 
287 	if (unlikely(!xs->dev))
288 		return -ENXIO;
289 	if (unlikely(!(xs->dev->flags & IFF_UP)))
290 		return -ENETDOWN;
291 	if (unlikely(!xs->tx))
292 		return -ENOBUFS;
293 	if (need_wait)
294 		return -EOPNOTSUPP;
295 
296 	return (xs->zc) ? xsk_zc_xmit(sk) : xsk_generic_xmit(sk, m, total_len);
297 }
298 
299 static unsigned int xsk_poll(struct file *file, struct socket *sock,
300 			     struct poll_table_struct *wait)
301 {
302 	unsigned int mask = datagram_poll(file, sock, wait);
303 	struct sock *sk = sock->sk;
304 	struct xdp_sock *xs = xdp_sk(sk);
305 
306 	if (xs->rx && !xskq_empty_desc(xs->rx))
307 		mask |= POLLIN | POLLRDNORM;
308 	if (xs->tx && !xskq_full_desc(xs->tx))
309 		mask |= POLLOUT | POLLWRNORM;
310 
311 	return mask;
312 }
313 
314 static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
315 			  bool umem_queue)
316 {
317 	struct xsk_queue *q;
318 
319 	if (entries == 0 || *queue || !is_power_of_2(entries))
320 		return -EINVAL;
321 
322 	q = xskq_create(entries, umem_queue);
323 	if (!q)
324 		return -ENOMEM;
325 
326 	/* Make sure queue is ready before it can be seen by others */
327 	smp_wmb();
328 	*queue = q;
329 	return 0;
330 }
331 
332 static int xsk_release(struct socket *sock)
333 {
334 	struct sock *sk = sock->sk;
335 	struct xdp_sock *xs = xdp_sk(sk);
336 	struct net *net;
337 
338 	if (!sk)
339 		return 0;
340 
341 	net = sock_net(sk);
342 
343 	local_bh_disable();
344 	sock_prot_inuse_add(net, sk->sk_prot, -1);
345 	local_bh_enable();
346 
347 	if (xs->dev) {
348 		/* Wait for driver to stop using the xdp socket. */
349 		synchronize_net();
350 		dev_put(xs->dev);
351 		xs->dev = NULL;
352 	}
353 
354 	sock_orphan(sk);
355 	sock->sk = NULL;
356 
357 	sk_refcnt_debug_release(sk);
358 	sock_put(sk);
359 
360 	return 0;
361 }
362 
363 static struct socket *xsk_lookup_xsk_from_fd(int fd)
364 {
365 	struct socket *sock;
366 	int err;
367 
368 	sock = sockfd_lookup(fd, &err);
369 	if (!sock)
370 		return ERR_PTR(-ENOTSOCK);
371 
372 	if (sock->sk->sk_family != PF_XDP) {
373 		sockfd_put(sock);
374 		return ERR_PTR(-ENOPROTOOPT);
375 	}
376 
377 	return sock;
378 }
379 
380 static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
381 {
382 	struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
383 	struct sock *sk = sock->sk;
384 	struct xdp_sock *xs = xdp_sk(sk);
385 	struct net_device *dev;
386 	u32 flags, qid;
387 	int err = 0;
388 
389 	if (addr_len < sizeof(struct sockaddr_xdp))
390 		return -EINVAL;
391 	if (sxdp->sxdp_family != AF_XDP)
392 		return -EINVAL;
393 
394 	mutex_lock(&xs->mutex);
395 	if (xs->dev) {
396 		err = -EBUSY;
397 		goto out_release;
398 	}
399 
400 	dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex);
401 	if (!dev) {
402 		err = -ENODEV;
403 		goto out_release;
404 	}
405 
406 	if (!xs->rx && !xs->tx) {
407 		err = -EINVAL;
408 		goto out_unlock;
409 	}
410 
411 	qid = sxdp->sxdp_queue_id;
412 
413 	if ((xs->rx && qid >= dev->real_num_rx_queues) ||
414 	    (xs->tx && qid >= dev->real_num_tx_queues)) {
415 		err = -EINVAL;
416 		goto out_unlock;
417 	}
418 
419 	flags = sxdp->sxdp_flags;
420 
421 	if (flags & XDP_SHARED_UMEM) {
422 		struct xdp_sock *umem_xs;
423 		struct socket *sock;
424 
425 		if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY)) {
426 			/* Cannot specify flags for shared sockets. */
427 			err = -EINVAL;
428 			goto out_unlock;
429 		}
430 
431 		if (xs->umem) {
432 			/* We have already our own. */
433 			err = -EINVAL;
434 			goto out_unlock;
435 		}
436 
437 		sock = xsk_lookup_xsk_from_fd(sxdp->sxdp_shared_umem_fd);
438 		if (IS_ERR(sock)) {
439 			err = PTR_ERR(sock);
440 			goto out_unlock;
441 		}
442 
443 		umem_xs = xdp_sk(sock->sk);
444 		if (!umem_xs->umem) {
445 			/* No umem to inherit. */
446 			err = -EBADF;
447 			sockfd_put(sock);
448 			goto out_unlock;
449 		} else if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
450 			err = -EINVAL;
451 			sockfd_put(sock);
452 			goto out_unlock;
453 		}
454 
455 		xdp_get_umem(umem_xs->umem);
456 		xs->umem = umem_xs->umem;
457 		sockfd_put(sock);
458 	} else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
459 		err = -EINVAL;
460 		goto out_unlock;
461 	} else {
462 		/* This xsk has its own umem. */
463 		xskq_set_umem(xs->umem->fq, &xs->umem->props);
464 		xskq_set_umem(xs->umem->cq, &xs->umem->props);
465 
466 		err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
467 		if (err)
468 			goto out_unlock;
469 	}
470 
471 	xs->dev = dev;
472 	xs->zc = xs->umem->zc;
473 	xs->queue_id = qid;
474 	xskq_set_umem(xs->rx, &xs->umem->props);
475 	xskq_set_umem(xs->tx, &xs->umem->props);
476 	xdp_add_sk_umem(xs->umem, xs);
477 
478 out_unlock:
479 	if (err)
480 		dev_put(dev);
481 out_release:
482 	mutex_unlock(&xs->mutex);
483 	return err;
484 }
485 
486 static int xsk_setsockopt(struct socket *sock, int level, int optname,
487 			  char __user *optval, unsigned int optlen)
488 {
489 	struct sock *sk = sock->sk;
490 	struct xdp_sock *xs = xdp_sk(sk);
491 	int err;
492 
493 	if (level != SOL_XDP)
494 		return -ENOPROTOOPT;
495 
496 	switch (optname) {
497 	case XDP_RX_RING:
498 	case XDP_TX_RING:
499 	{
500 		struct xsk_queue **q;
501 		int entries;
502 
503 		if (optlen < sizeof(entries))
504 			return -EINVAL;
505 		if (copy_from_user(&entries, optval, sizeof(entries)))
506 			return -EFAULT;
507 
508 		mutex_lock(&xs->mutex);
509 		q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx;
510 		err = xsk_init_queue(entries, q, false);
511 		mutex_unlock(&xs->mutex);
512 		return err;
513 	}
514 	case XDP_UMEM_REG:
515 	{
516 		struct xdp_umem_reg mr;
517 		struct xdp_umem *umem;
518 
519 		if (copy_from_user(&mr, optval, sizeof(mr)))
520 			return -EFAULT;
521 
522 		mutex_lock(&xs->mutex);
523 		if (xs->umem) {
524 			mutex_unlock(&xs->mutex);
525 			return -EBUSY;
526 		}
527 
528 		umem = xdp_umem_create(&mr);
529 		if (IS_ERR(umem)) {
530 			mutex_unlock(&xs->mutex);
531 			return PTR_ERR(umem);
532 		}
533 
534 		/* Make sure umem is ready before it can be seen by others */
535 		smp_wmb();
536 		xs->umem = umem;
537 		mutex_unlock(&xs->mutex);
538 		return 0;
539 	}
540 	case XDP_UMEM_FILL_RING:
541 	case XDP_UMEM_COMPLETION_RING:
542 	{
543 		struct xsk_queue **q;
544 		int entries;
545 
546 		if (copy_from_user(&entries, optval, sizeof(entries)))
547 			return -EFAULT;
548 
549 		mutex_lock(&xs->mutex);
550 		if (!xs->umem) {
551 			mutex_unlock(&xs->mutex);
552 			return -EINVAL;
553 		}
554 
555 		q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq :
556 			&xs->umem->cq;
557 		err = xsk_init_queue(entries, q, true);
558 		mutex_unlock(&xs->mutex);
559 		return err;
560 	}
561 	default:
562 		break;
563 	}
564 
565 	return -ENOPROTOOPT;
566 }
567 
568 static int xsk_getsockopt(struct socket *sock, int level, int optname,
569 			  char __user *optval, int __user *optlen)
570 {
571 	struct sock *sk = sock->sk;
572 	struct xdp_sock *xs = xdp_sk(sk);
573 	int len;
574 
575 	if (level != SOL_XDP)
576 		return -ENOPROTOOPT;
577 
578 	if (get_user(len, optlen))
579 		return -EFAULT;
580 	if (len < 0)
581 		return -EINVAL;
582 
583 	switch (optname) {
584 	case XDP_STATISTICS:
585 	{
586 		struct xdp_statistics stats;
587 
588 		if (len < sizeof(stats))
589 			return -EINVAL;
590 
591 		mutex_lock(&xs->mutex);
592 		stats.rx_dropped = xs->rx_dropped;
593 		stats.rx_invalid_descs = xskq_nb_invalid_descs(xs->rx);
594 		stats.tx_invalid_descs = xskq_nb_invalid_descs(xs->tx);
595 		mutex_unlock(&xs->mutex);
596 
597 		if (copy_to_user(optval, &stats, sizeof(stats)))
598 			return -EFAULT;
599 		if (put_user(sizeof(stats), optlen))
600 			return -EFAULT;
601 
602 		return 0;
603 	}
604 	case XDP_MMAP_OFFSETS:
605 	{
606 		struct xdp_mmap_offsets off;
607 
608 		if (len < sizeof(off))
609 			return -EINVAL;
610 
611 		off.rx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
612 		off.rx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
613 		off.rx.desc	= offsetof(struct xdp_rxtx_ring, desc);
614 		off.tx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
615 		off.tx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
616 		off.tx.desc	= offsetof(struct xdp_rxtx_ring, desc);
617 
618 		off.fr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
619 		off.fr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
620 		off.fr.desc	= offsetof(struct xdp_umem_ring, desc);
621 		off.cr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
622 		off.cr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
623 		off.cr.desc	= offsetof(struct xdp_umem_ring, desc);
624 
625 		len = sizeof(off);
626 		if (copy_to_user(optval, &off, len))
627 			return -EFAULT;
628 		if (put_user(len, optlen))
629 			return -EFAULT;
630 
631 		return 0;
632 	}
633 	default:
634 		break;
635 	}
636 
637 	return -EOPNOTSUPP;
638 }
639 
640 static int xsk_mmap(struct file *file, struct socket *sock,
641 		    struct vm_area_struct *vma)
642 {
643 	loff_t offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
644 	unsigned long size = vma->vm_end - vma->vm_start;
645 	struct xdp_sock *xs = xdp_sk(sock->sk);
646 	struct xsk_queue *q = NULL;
647 	struct xdp_umem *umem;
648 	unsigned long pfn;
649 	struct page *qpg;
650 
651 	if (offset == XDP_PGOFF_RX_RING) {
652 		q = READ_ONCE(xs->rx);
653 	} else if (offset == XDP_PGOFF_TX_RING) {
654 		q = READ_ONCE(xs->tx);
655 	} else {
656 		umem = READ_ONCE(xs->umem);
657 		if (!umem)
658 			return -EINVAL;
659 
660 		if (offset == XDP_UMEM_PGOFF_FILL_RING)
661 			q = READ_ONCE(umem->fq);
662 		else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
663 			q = READ_ONCE(umem->cq);
664 	}
665 
666 	if (!q)
667 		return -EINVAL;
668 
669 	qpg = virt_to_head_page(q->ring);
670 	if (size > (PAGE_SIZE << compound_order(qpg)))
671 		return -EINVAL;
672 
673 	pfn = virt_to_phys(q->ring) >> PAGE_SHIFT;
674 	return remap_pfn_range(vma, vma->vm_start, pfn,
675 			       size, vma->vm_page_prot);
676 }
677 
678 static struct proto xsk_proto = {
679 	.name =		"XDP",
680 	.owner =	THIS_MODULE,
681 	.obj_size =	sizeof(struct xdp_sock),
682 };
683 
684 static const struct proto_ops xsk_proto_ops = {
685 	.family		= PF_XDP,
686 	.owner		= THIS_MODULE,
687 	.release	= xsk_release,
688 	.bind		= xsk_bind,
689 	.connect	= sock_no_connect,
690 	.socketpair	= sock_no_socketpair,
691 	.accept		= sock_no_accept,
692 	.getname	= sock_no_getname,
693 	.poll		= xsk_poll,
694 	.ioctl		= sock_no_ioctl,
695 	.listen		= sock_no_listen,
696 	.shutdown	= sock_no_shutdown,
697 	.setsockopt	= xsk_setsockopt,
698 	.getsockopt	= xsk_getsockopt,
699 	.sendmsg	= xsk_sendmsg,
700 	.recvmsg	= sock_no_recvmsg,
701 	.mmap		= xsk_mmap,
702 	.sendpage	= sock_no_sendpage,
703 };
704 
705 static void xsk_destruct(struct sock *sk)
706 {
707 	struct xdp_sock *xs = xdp_sk(sk);
708 
709 	if (!sock_flag(sk, SOCK_DEAD))
710 		return;
711 
712 	xskq_destroy(xs->rx);
713 	xskq_destroy(xs->tx);
714 	xdp_del_sk_umem(xs->umem, xs);
715 	xdp_put_umem(xs->umem);
716 
717 	sk_refcnt_debug_dec(sk);
718 }
719 
720 static int xsk_create(struct net *net, struct socket *sock, int protocol,
721 		      int kern)
722 {
723 	struct sock *sk;
724 	struct xdp_sock *xs;
725 
726 	if (!ns_capable(net->user_ns, CAP_NET_RAW))
727 		return -EPERM;
728 	if (sock->type != SOCK_RAW)
729 		return -ESOCKTNOSUPPORT;
730 
731 	if (protocol)
732 		return -EPROTONOSUPPORT;
733 
734 	sock->state = SS_UNCONNECTED;
735 
736 	sk = sk_alloc(net, PF_XDP, GFP_KERNEL, &xsk_proto, kern);
737 	if (!sk)
738 		return -ENOBUFS;
739 
740 	sock->ops = &xsk_proto_ops;
741 
742 	sock_init_data(sock, sk);
743 
744 	sk->sk_family = PF_XDP;
745 
746 	sk->sk_destruct = xsk_destruct;
747 	sk_refcnt_debug_inc(sk);
748 
749 	xs = xdp_sk(sk);
750 	mutex_init(&xs->mutex);
751 	spin_lock_init(&xs->tx_completion_lock);
752 
753 	local_bh_disable();
754 	sock_prot_inuse_add(net, &xsk_proto, 1);
755 	local_bh_enable();
756 
757 	return 0;
758 }
759 
760 static const struct net_proto_family xsk_family_ops = {
761 	.family = PF_XDP,
762 	.create = xsk_create,
763 	.owner	= THIS_MODULE,
764 };
765 
766 static int __init xsk_init(void)
767 {
768 	int err;
769 
770 	err = proto_register(&xsk_proto, 0 /* no slab */);
771 	if (err)
772 		goto out;
773 
774 	err = sock_register(&xsk_family_ops);
775 	if (err)
776 		goto out_proto;
777 
778 	return 0;
779 
780 out_proto:
781 	proto_unregister(&xsk_proto);
782 out:
783 	return err;
784 }
785 
786 fs_initcall(xsk_init);
787