xref: /openbmc/linux/net/xdp/xsk.c (revision 98ddec80)
1 // SPDX-License-Identifier: GPL-2.0
2 /* XDP sockets
3  *
4  * AF_XDP sockets allows a channel between XDP programs and userspace
5  * applications.
6  * Copyright(c) 2018 Intel Corporation.
7  *
8  * Author(s): Björn Töpel <bjorn.topel@intel.com>
9  *	      Magnus Karlsson <magnus.karlsson@intel.com>
10  */
11 
12 #define pr_fmt(fmt) "AF_XDP: %s: " fmt, __func__
13 
14 #include <linux/if_xdp.h>
15 #include <linux/init.h>
16 #include <linux/sched/mm.h>
17 #include <linux/sched/signal.h>
18 #include <linux/sched/task.h>
19 #include <linux/socket.h>
20 #include <linux/file.h>
21 #include <linux/uaccess.h>
22 #include <linux/net.h>
23 #include <linux/netdevice.h>
24 #include <linux/rculist.h>
25 #include <net/xdp_sock.h>
26 #include <net/xdp.h>
27 
28 #include "xsk_queue.h"
29 #include "xdp_umem.h"
30 
31 #define TX_BATCH_SIZE 16
32 
33 static struct xdp_sock *xdp_sk(struct sock *sk)
34 {
35 	return (struct xdp_sock *)sk;
36 }
37 
38 bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
39 {
40 	return READ_ONCE(xs->rx) &&  READ_ONCE(xs->umem) &&
41 		READ_ONCE(xs->umem->fq);
42 }
43 
44 u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
45 {
46 	return xskq_peek_addr(umem->fq, addr);
47 }
48 EXPORT_SYMBOL(xsk_umem_peek_addr);
49 
50 void xsk_umem_discard_addr(struct xdp_umem *umem)
51 {
52 	xskq_discard_addr(umem->fq);
53 }
54 EXPORT_SYMBOL(xsk_umem_discard_addr);
55 
56 static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
57 {
58 	void *buffer;
59 	u64 addr;
60 	int err;
61 
62 	if (!xskq_peek_addr(xs->umem->fq, &addr) ||
63 	    len > xs->umem->chunk_size_nohr) {
64 		xs->rx_dropped++;
65 		return -ENOSPC;
66 	}
67 
68 	addr += xs->umem->headroom;
69 
70 	buffer = xdp_umem_get_data(xs->umem, addr);
71 	memcpy(buffer, xdp->data, len);
72 	err = xskq_produce_batch_desc(xs->rx, addr, len);
73 	if (!err) {
74 		xskq_discard_addr(xs->umem->fq);
75 		xdp_return_buff(xdp);
76 		return 0;
77 	}
78 
79 	xs->rx_dropped++;
80 	return err;
81 }
82 
83 static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
84 {
85 	int err = xskq_produce_batch_desc(xs->rx, (u64)xdp->handle, len);
86 
87 	if (err) {
88 		xdp_return_buff(xdp);
89 		xs->rx_dropped++;
90 	}
91 
92 	return err;
93 }
94 
95 int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
96 {
97 	u32 len;
98 
99 	if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
100 		return -EINVAL;
101 
102 	len = xdp->data_end - xdp->data;
103 
104 	return (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) ?
105 		__xsk_rcv_zc(xs, xdp, len) : __xsk_rcv(xs, xdp, len);
106 }
107 
108 void xsk_flush(struct xdp_sock *xs)
109 {
110 	xskq_produce_flush_desc(xs->rx);
111 	xs->sk.sk_data_ready(&xs->sk);
112 }
113 
114 int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
115 {
116 	u32 len = xdp->data_end - xdp->data;
117 	void *buffer;
118 	u64 addr;
119 	int err;
120 
121 	if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
122 		return -EINVAL;
123 
124 	if (!xskq_peek_addr(xs->umem->fq, &addr) ||
125 	    len > xs->umem->chunk_size_nohr) {
126 		xs->rx_dropped++;
127 		return -ENOSPC;
128 	}
129 
130 	addr += xs->umem->headroom;
131 
132 	buffer = xdp_umem_get_data(xs->umem, addr);
133 	memcpy(buffer, xdp->data, len);
134 	err = xskq_produce_batch_desc(xs->rx, addr, len);
135 	if (!err) {
136 		xskq_discard_addr(xs->umem->fq);
137 		xsk_flush(xs);
138 		return 0;
139 	}
140 
141 	xs->rx_dropped++;
142 	return err;
143 }
144 
145 void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
146 {
147 	xskq_produce_flush_addr_n(umem->cq, nb_entries);
148 }
149 EXPORT_SYMBOL(xsk_umem_complete_tx);
150 
151 void xsk_umem_consume_tx_done(struct xdp_umem *umem)
152 {
153 	struct xdp_sock *xs;
154 
155 	rcu_read_lock();
156 	list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
157 		xs->sk.sk_write_space(&xs->sk);
158 	}
159 	rcu_read_unlock();
160 }
161 EXPORT_SYMBOL(xsk_umem_consume_tx_done);
162 
163 bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len)
164 {
165 	struct xdp_desc desc;
166 	struct xdp_sock *xs;
167 
168 	rcu_read_lock();
169 	list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
170 		if (!xskq_peek_desc(xs->tx, &desc))
171 			continue;
172 
173 		if (xskq_produce_addr_lazy(umem->cq, desc.addr))
174 			goto out;
175 
176 		*dma = xdp_umem_get_dma(umem, desc.addr);
177 		*len = desc.len;
178 
179 		xskq_discard_desc(xs->tx);
180 		rcu_read_unlock();
181 		return true;
182 	}
183 
184 out:
185 	rcu_read_unlock();
186 	return false;
187 }
188 EXPORT_SYMBOL(xsk_umem_consume_tx);
189 
190 static int xsk_zc_xmit(struct sock *sk)
191 {
192 	struct xdp_sock *xs = xdp_sk(sk);
193 	struct net_device *dev = xs->dev;
194 
195 	return dev->netdev_ops->ndo_xsk_async_xmit(dev, xs->queue_id);
196 }
197 
198 static void xsk_destruct_skb(struct sk_buff *skb)
199 {
200 	u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
201 	struct xdp_sock *xs = xdp_sk(skb->sk);
202 
203 	WARN_ON_ONCE(xskq_produce_addr(xs->umem->cq, addr));
204 
205 	sock_wfree(skb);
206 }
207 
208 static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
209 			    size_t total_len)
210 {
211 	u32 max_batch = TX_BATCH_SIZE;
212 	struct xdp_sock *xs = xdp_sk(sk);
213 	bool sent_frame = false;
214 	struct xdp_desc desc;
215 	struct sk_buff *skb;
216 	int err = 0;
217 
218 	if (unlikely(!xs->tx))
219 		return -ENOBUFS;
220 
221 	mutex_lock(&xs->mutex);
222 
223 	while (xskq_peek_desc(xs->tx, &desc)) {
224 		char *buffer;
225 		u64 addr;
226 		u32 len;
227 
228 		if (max_batch-- == 0) {
229 			err = -EAGAIN;
230 			goto out;
231 		}
232 
233 		if (xskq_reserve_addr(xs->umem->cq)) {
234 			err = -EAGAIN;
235 			goto out;
236 		}
237 
238 		len = desc.len;
239 		if (unlikely(len > xs->dev->mtu)) {
240 			err = -EMSGSIZE;
241 			goto out;
242 		}
243 
244 		if (xs->queue_id >= xs->dev->real_num_tx_queues) {
245 			err = -ENXIO;
246 			goto out;
247 		}
248 
249 		skb = sock_alloc_send_skb(sk, len, 1, &err);
250 		if (unlikely(!skb)) {
251 			err = -EAGAIN;
252 			goto out;
253 		}
254 
255 		skb_put(skb, len);
256 		addr = desc.addr;
257 		buffer = xdp_umem_get_data(xs->umem, addr);
258 		err = skb_store_bits(skb, 0, buffer, len);
259 		if (unlikely(err)) {
260 			kfree_skb(skb);
261 			goto out;
262 		}
263 
264 		skb->dev = xs->dev;
265 		skb->priority = sk->sk_priority;
266 		skb->mark = sk->sk_mark;
267 		skb_shinfo(skb)->destructor_arg = (void *)(long)addr;
268 		skb->destructor = xsk_destruct_skb;
269 
270 		err = dev_direct_xmit(skb, xs->queue_id);
271 		/* Ignore NET_XMIT_CN as packet might have been sent */
272 		if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) {
273 			err = -EAGAIN;
274 			/* SKB consumed by dev_direct_xmit() */
275 			goto out;
276 		}
277 
278 		sent_frame = true;
279 		xskq_discard_desc(xs->tx);
280 	}
281 
282 out:
283 	if (sent_frame)
284 		sk->sk_write_space(sk);
285 
286 	mutex_unlock(&xs->mutex);
287 	return err;
288 }
289 
290 static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
291 {
292 	bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
293 	struct sock *sk = sock->sk;
294 	struct xdp_sock *xs = xdp_sk(sk);
295 
296 	if (unlikely(!xs->dev))
297 		return -ENXIO;
298 	if (unlikely(!(xs->dev->flags & IFF_UP)))
299 		return -ENETDOWN;
300 	if (need_wait)
301 		return -EOPNOTSUPP;
302 
303 	return (xs->zc) ? xsk_zc_xmit(sk) : xsk_generic_xmit(sk, m, total_len);
304 }
305 
306 static __poll_t xsk_poll_mask(struct socket *sock, __poll_t events)
307 {
308 	__poll_t mask = datagram_poll_mask(sock, events);
309 	struct sock *sk = sock->sk;
310 	struct xdp_sock *xs = xdp_sk(sk);
311 
312 	if (xs->rx && !xskq_empty_desc(xs->rx))
313 		mask |= POLLIN | POLLRDNORM;
314 	if (xs->tx && !xskq_full_desc(xs->tx))
315 		mask |= POLLOUT | POLLWRNORM;
316 
317 	return mask;
318 }
319 
320 static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
321 			  bool umem_queue)
322 {
323 	struct xsk_queue *q;
324 
325 	if (entries == 0 || *queue || !is_power_of_2(entries))
326 		return -EINVAL;
327 
328 	q = xskq_create(entries, umem_queue);
329 	if (!q)
330 		return -ENOMEM;
331 
332 	/* Make sure queue is ready before it can be seen by others */
333 	smp_wmb();
334 	*queue = q;
335 	return 0;
336 }
337 
338 static int xsk_release(struct socket *sock)
339 {
340 	struct sock *sk = sock->sk;
341 	struct xdp_sock *xs = xdp_sk(sk);
342 	struct net *net;
343 
344 	if (!sk)
345 		return 0;
346 
347 	net = sock_net(sk);
348 
349 	local_bh_disable();
350 	sock_prot_inuse_add(net, sk->sk_prot, -1);
351 	local_bh_enable();
352 
353 	if (xs->dev) {
354 		/* Wait for driver to stop using the xdp socket. */
355 		synchronize_net();
356 		dev_put(xs->dev);
357 		xs->dev = NULL;
358 	}
359 
360 	sock_orphan(sk);
361 	sock->sk = NULL;
362 
363 	sk_refcnt_debug_release(sk);
364 	sock_put(sk);
365 
366 	return 0;
367 }
368 
369 static struct socket *xsk_lookup_xsk_from_fd(int fd)
370 {
371 	struct socket *sock;
372 	int err;
373 
374 	sock = sockfd_lookup(fd, &err);
375 	if (!sock)
376 		return ERR_PTR(-ENOTSOCK);
377 
378 	if (sock->sk->sk_family != PF_XDP) {
379 		sockfd_put(sock);
380 		return ERR_PTR(-ENOPROTOOPT);
381 	}
382 
383 	return sock;
384 }
385 
386 static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
387 {
388 	struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
389 	struct sock *sk = sock->sk;
390 	struct xdp_sock *xs = xdp_sk(sk);
391 	struct net_device *dev;
392 	u32 flags, qid;
393 	int err = 0;
394 
395 	if (addr_len < sizeof(struct sockaddr_xdp))
396 		return -EINVAL;
397 	if (sxdp->sxdp_family != AF_XDP)
398 		return -EINVAL;
399 
400 	mutex_lock(&xs->mutex);
401 	if (xs->dev) {
402 		err = -EBUSY;
403 		goto out_release;
404 	}
405 
406 	dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex);
407 	if (!dev) {
408 		err = -ENODEV;
409 		goto out_release;
410 	}
411 
412 	if (!xs->rx && !xs->tx) {
413 		err = -EINVAL;
414 		goto out_unlock;
415 	}
416 
417 	qid = sxdp->sxdp_queue_id;
418 
419 	if ((xs->rx && qid >= dev->real_num_rx_queues) ||
420 	    (xs->tx && qid >= dev->real_num_tx_queues)) {
421 		err = -EINVAL;
422 		goto out_unlock;
423 	}
424 
425 	flags = sxdp->sxdp_flags;
426 
427 	if (flags & XDP_SHARED_UMEM) {
428 		struct xdp_sock *umem_xs;
429 		struct socket *sock;
430 
431 		if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY)) {
432 			/* Cannot specify flags for shared sockets. */
433 			err = -EINVAL;
434 			goto out_unlock;
435 		}
436 
437 		if (xs->umem) {
438 			/* We have already our own. */
439 			err = -EINVAL;
440 			goto out_unlock;
441 		}
442 
443 		sock = xsk_lookup_xsk_from_fd(sxdp->sxdp_shared_umem_fd);
444 		if (IS_ERR(sock)) {
445 			err = PTR_ERR(sock);
446 			goto out_unlock;
447 		}
448 
449 		umem_xs = xdp_sk(sock->sk);
450 		if (!umem_xs->umem) {
451 			/* No umem to inherit. */
452 			err = -EBADF;
453 			sockfd_put(sock);
454 			goto out_unlock;
455 		} else if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
456 			err = -EINVAL;
457 			sockfd_put(sock);
458 			goto out_unlock;
459 		}
460 
461 		xdp_get_umem(umem_xs->umem);
462 		xs->umem = umem_xs->umem;
463 		sockfd_put(sock);
464 	} else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
465 		err = -EINVAL;
466 		goto out_unlock;
467 	} else {
468 		/* This xsk has its own umem. */
469 		xskq_set_umem(xs->umem->fq, &xs->umem->props);
470 		xskq_set_umem(xs->umem->cq, &xs->umem->props);
471 
472 		err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
473 		if (err)
474 			goto out_unlock;
475 	}
476 
477 	xs->dev = dev;
478 	xs->zc = xs->umem->zc;
479 	xs->queue_id = qid;
480 	xskq_set_umem(xs->rx, &xs->umem->props);
481 	xskq_set_umem(xs->tx, &xs->umem->props);
482 	xdp_add_sk_umem(xs->umem, xs);
483 
484 out_unlock:
485 	if (err)
486 		dev_put(dev);
487 out_release:
488 	mutex_unlock(&xs->mutex);
489 	return err;
490 }
491 
492 static int xsk_setsockopt(struct socket *sock, int level, int optname,
493 			  char __user *optval, unsigned int optlen)
494 {
495 	struct sock *sk = sock->sk;
496 	struct xdp_sock *xs = xdp_sk(sk);
497 	int err;
498 
499 	if (level != SOL_XDP)
500 		return -ENOPROTOOPT;
501 
502 	switch (optname) {
503 	case XDP_RX_RING:
504 	case XDP_TX_RING:
505 	{
506 		struct xsk_queue **q;
507 		int entries;
508 
509 		if (optlen < sizeof(entries))
510 			return -EINVAL;
511 		if (copy_from_user(&entries, optval, sizeof(entries)))
512 			return -EFAULT;
513 
514 		mutex_lock(&xs->mutex);
515 		q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx;
516 		err = xsk_init_queue(entries, q, false);
517 		mutex_unlock(&xs->mutex);
518 		return err;
519 	}
520 	case XDP_UMEM_REG:
521 	{
522 		struct xdp_umem_reg mr;
523 		struct xdp_umem *umem;
524 
525 		if (copy_from_user(&mr, optval, sizeof(mr)))
526 			return -EFAULT;
527 
528 		mutex_lock(&xs->mutex);
529 		if (xs->umem) {
530 			mutex_unlock(&xs->mutex);
531 			return -EBUSY;
532 		}
533 
534 		umem = xdp_umem_create(&mr);
535 		if (IS_ERR(umem)) {
536 			mutex_unlock(&xs->mutex);
537 			return PTR_ERR(umem);
538 		}
539 
540 		/* Make sure umem is ready before it can be seen by others */
541 		smp_wmb();
542 		xs->umem = umem;
543 		mutex_unlock(&xs->mutex);
544 		return 0;
545 	}
546 	case XDP_UMEM_FILL_RING:
547 	case XDP_UMEM_COMPLETION_RING:
548 	{
549 		struct xsk_queue **q;
550 		int entries;
551 
552 		if (copy_from_user(&entries, optval, sizeof(entries)))
553 			return -EFAULT;
554 
555 		mutex_lock(&xs->mutex);
556 		if (!xs->umem) {
557 			mutex_unlock(&xs->mutex);
558 			return -EINVAL;
559 		}
560 
561 		q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq :
562 			&xs->umem->cq;
563 		err = xsk_init_queue(entries, q, true);
564 		mutex_unlock(&xs->mutex);
565 		return err;
566 	}
567 	default:
568 		break;
569 	}
570 
571 	return -ENOPROTOOPT;
572 }
573 
574 static int xsk_getsockopt(struct socket *sock, int level, int optname,
575 			  char __user *optval, int __user *optlen)
576 {
577 	struct sock *sk = sock->sk;
578 	struct xdp_sock *xs = xdp_sk(sk);
579 	int len;
580 
581 	if (level != SOL_XDP)
582 		return -ENOPROTOOPT;
583 
584 	if (get_user(len, optlen))
585 		return -EFAULT;
586 	if (len < 0)
587 		return -EINVAL;
588 
589 	switch (optname) {
590 	case XDP_STATISTICS:
591 	{
592 		struct xdp_statistics stats;
593 
594 		if (len < sizeof(stats))
595 			return -EINVAL;
596 
597 		mutex_lock(&xs->mutex);
598 		stats.rx_dropped = xs->rx_dropped;
599 		stats.rx_invalid_descs = xskq_nb_invalid_descs(xs->rx);
600 		stats.tx_invalid_descs = xskq_nb_invalid_descs(xs->tx);
601 		mutex_unlock(&xs->mutex);
602 
603 		if (copy_to_user(optval, &stats, sizeof(stats)))
604 			return -EFAULT;
605 		if (put_user(sizeof(stats), optlen))
606 			return -EFAULT;
607 
608 		return 0;
609 	}
610 	case XDP_MMAP_OFFSETS:
611 	{
612 		struct xdp_mmap_offsets off;
613 
614 		if (len < sizeof(off))
615 			return -EINVAL;
616 
617 		off.rx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
618 		off.rx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
619 		off.rx.desc	= offsetof(struct xdp_rxtx_ring, desc);
620 		off.tx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
621 		off.tx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
622 		off.tx.desc	= offsetof(struct xdp_rxtx_ring, desc);
623 
624 		off.fr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
625 		off.fr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
626 		off.fr.desc	= offsetof(struct xdp_umem_ring, desc);
627 		off.cr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
628 		off.cr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
629 		off.cr.desc	= offsetof(struct xdp_umem_ring, desc);
630 
631 		len = sizeof(off);
632 		if (copy_to_user(optval, &off, len))
633 			return -EFAULT;
634 		if (put_user(len, optlen))
635 			return -EFAULT;
636 
637 		return 0;
638 	}
639 	default:
640 		break;
641 	}
642 
643 	return -EOPNOTSUPP;
644 }
645 
646 static int xsk_mmap(struct file *file, struct socket *sock,
647 		    struct vm_area_struct *vma)
648 {
649 	loff_t offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
650 	unsigned long size = vma->vm_end - vma->vm_start;
651 	struct xdp_sock *xs = xdp_sk(sock->sk);
652 	struct xsk_queue *q = NULL;
653 	struct xdp_umem *umem;
654 	unsigned long pfn;
655 	struct page *qpg;
656 
657 	if (offset == XDP_PGOFF_RX_RING) {
658 		q = READ_ONCE(xs->rx);
659 	} else if (offset == XDP_PGOFF_TX_RING) {
660 		q = READ_ONCE(xs->tx);
661 	} else {
662 		umem = READ_ONCE(xs->umem);
663 		if (!umem)
664 			return -EINVAL;
665 
666 		if (offset == XDP_UMEM_PGOFF_FILL_RING)
667 			q = READ_ONCE(umem->fq);
668 		else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
669 			q = READ_ONCE(umem->cq);
670 	}
671 
672 	if (!q)
673 		return -EINVAL;
674 
675 	qpg = virt_to_head_page(q->ring);
676 	if (size > (PAGE_SIZE << compound_order(qpg)))
677 		return -EINVAL;
678 
679 	pfn = virt_to_phys(q->ring) >> PAGE_SHIFT;
680 	return remap_pfn_range(vma, vma->vm_start, pfn,
681 			       size, vma->vm_page_prot);
682 }
683 
684 static struct proto xsk_proto = {
685 	.name =		"XDP",
686 	.owner =	THIS_MODULE,
687 	.obj_size =	sizeof(struct xdp_sock),
688 };
689 
690 static const struct proto_ops xsk_proto_ops = {
691 	.family		= PF_XDP,
692 	.owner		= THIS_MODULE,
693 	.release	= xsk_release,
694 	.bind		= xsk_bind,
695 	.connect	= sock_no_connect,
696 	.socketpair	= sock_no_socketpair,
697 	.accept		= sock_no_accept,
698 	.getname	= sock_no_getname,
699 	.poll_mask	= xsk_poll_mask,
700 	.ioctl		= sock_no_ioctl,
701 	.listen		= sock_no_listen,
702 	.shutdown	= sock_no_shutdown,
703 	.setsockopt	= xsk_setsockopt,
704 	.getsockopt	= xsk_getsockopt,
705 	.sendmsg	= xsk_sendmsg,
706 	.recvmsg	= sock_no_recvmsg,
707 	.mmap		= xsk_mmap,
708 	.sendpage	= sock_no_sendpage,
709 };
710 
711 static void xsk_destruct(struct sock *sk)
712 {
713 	struct xdp_sock *xs = xdp_sk(sk);
714 
715 	if (!sock_flag(sk, SOCK_DEAD))
716 		return;
717 
718 	xskq_destroy(xs->rx);
719 	xskq_destroy(xs->tx);
720 	xdp_del_sk_umem(xs->umem, xs);
721 	xdp_put_umem(xs->umem);
722 
723 	sk_refcnt_debug_dec(sk);
724 }
725 
726 static int xsk_create(struct net *net, struct socket *sock, int protocol,
727 		      int kern)
728 {
729 	struct sock *sk;
730 	struct xdp_sock *xs;
731 
732 	if (!ns_capable(net->user_ns, CAP_NET_RAW))
733 		return -EPERM;
734 	if (sock->type != SOCK_RAW)
735 		return -ESOCKTNOSUPPORT;
736 
737 	if (protocol)
738 		return -EPROTONOSUPPORT;
739 
740 	sock->state = SS_UNCONNECTED;
741 
742 	sk = sk_alloc(net, PF_XDP, GFP_KERNEL, &xsk_proto, kern);
743 	if (!sk)
744 		return -ENOBUFS;
745 
746 	sock->ops = &xsk_proto_ops;
747 
748 	sock_init_data(sock, sk);
749 
750 	sk->sk_family = PF_XDP;
751 
752 	sk->sk_destruct = xsk_destruct;
753 	sk_refcnt_debug_inc(sk);
754 
755 	xs = xdp_sk(sk);
756 	mutex_init(&xs->mutex);
757 
758 	local_bh_disable();
759 	sock_prot_inuse_add(net, &xsk_proto, 1);
760 	local_bh_enable();
761 
762 	return 0;
763 }
764 
765 static const struct net_proto_family xsk_family_ops = {
766 	.family = PF_XDP,
767 	.create = xsk_create,
768 	.owner	= THIS_MODULE,
769 };
770 
771 static int __init xsk_init(void)
772 {
773 	int err;
774 
775 	err = proto_register(&xsk_proto, 0 /* no slab */);
776 	if (err)
777 		goto out;
778 
779 	err = sock_register(&xsk_family_ops);
780 	if (err)
781 		goto out_proto;
782 
783 	return 0;
784 
785 out_proto:
786 	proto_unregister(&xsk_proto);
787 out:
788 	return err;
789 }
790 
791 fs_initcall(xsk_init);
792