xref: /openbmc/linux/drivers/vhost/vsock.c (revision 30bd4593)
17a338472SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2433fc58eSAsias He /*
3433fc58eSAsias He  * vhost transport for vsock
4433fc58eSAsias He  *
5433fc58eSAsias He  * Copyright (C) 2013-2015 Red Hat, Inc.
6433fc58eSAsias He  * Author: Asias He <asias@redhat.com>
7433fc58eSAsias He  *         Stefan Hajnoczi <stefanha@redhat.com>
8433fc58eSAsias He  */
9433fc58eSAsias He #include <linux/miscdevice.h>
10433fc58eSAsias He #include <linux/atomic.h>
11433fc58eSAsias He #include <linux/module.h>
12433fc58eSAsias He #include <linux/mutex.h>
13433fc58eSAsias He #include <linux/vmalloc.h>
14433fc58eSAsias He #include <net/sock.h>
15433fc58eSAsias He #include <linux/virtio_vsock.h>
16433fc58eSAsias He #include <linux/vhost.h>
17834e772cSStefan Hajnoczi #include <linux/hashtable.h>
18433fc58eSAsias He 
19433fc58eSAsias He #include <net/af_vsock.h>
20433fc58eSAsias He #include "vhost.h"
21433fc58eSAsias He 
22433fc58eSAsias He #define VHOST_VSOCK_DEFAULT_HOST_CID	2
23e82b9b07SJason Wang /* Max number of bytes transferred before requeueing the job.
24e82b9b07SJason Wang  * Using this limit prevents one virtqueue from starving others. */
25e82b9b07SJason Wang #define VHOST_VSOCK_WEIGHT 0x80000
26e82b9b07SJason Wang /* Max number of packets transferred before requeueing the job.
27e82b9b07SJason Wang  * Using this limit prevents one virtqueue from starving others with
28e82b9b07SJason Wang  * small pkts.
29e82b9b07SJason Wang  */
30e82b9b07SJason Wang #define VHOST_VSOCK_PKT_WEIGHT 256
31433fc58eSAsias He 
32433fc58eSAsias He enum {
33e13a6915SStefano Garzarella 	VHOST_VSOCK_FEATURES = VHOST_FEATURES |
34ced7b713SArseny Krasnov 			       (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
35ced7b713SArseny Krasnov 			       (1ULL << VIRTIO_VSOCK_F_SEQPACKET)
36e13a6915SStefano Garzarella };
37e13a6915SStefano Garzarella 
38e13a6915SStefano Garzarella enum {
39e13a6915SStefano Garzarella 	VHOST_VSOCK_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)
40433fc58eSAsias He };
41433fc58eSAsias He 
42433fc58eSAsias He /* Used to track all the vhost_vsock instances on the system. */
436db3d8dcSStefan Hajnoczi static DEFINE_MUTEX(vhost_vsock_mutex);
44834e772cSStefan Hajnoczi static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8);
45433fc58eSAsias He 
46433fc58eSAsias He struct vhost_vsock {
47433fc58eSAsias He 	struct vhost_dev dev;
48433fc58eSAsias He 	struct vhost_virtqueue vqs[2];
49433fc58eSAsias He 
506db3d8dcSStefan Hajnoczi 	/* Link to global vhost_vsock_hash, writes use vhost_vsock_mutex */
51834e772cSStefan Hajnoczi 	struct hlist_node hash;
52433fc58eSAsias He 
53433fc58eSAsias He 	struct vhost_work send_pkt_work;
5471dc9ec9SBobby Eshleman 	struct sk_buff_head send_pkt_queue; /* host->guest pending packets */
55433fc58eSAsias He 
56433fc58eSAsias He 	atomic_t queued_replies;
57433fc58eSAsias He 
58433fc58eSAsias He 	u32 guest_cid;
59ced7b713SArseny Krasnov 	bool seqpacket_allow;
60433fc58eSAsias He };
61433fc58eSAsias He 
vhost_transport_get_local_cid(void)62433fc58eSAsias He static u32 vhost_transport_get_local_cid(void)
63433fc58eSAsias He {
64433fc58eSAsias He 	return VHOST_VSOCK_DEFAULT_HOST_CID;
65433fc58eSAsias He }
66433fc58eSAsias He 
676db3d8dcSStefan Hajnoczi /* Callers that dereference the return value must hold vhost_vsock_mutex or the
68834e772cSStefan Hajnoczi  * RCU read lock.
69834e772cSStefan Hajnoczi  */
vhost_vsock_get(u32 guest_cid)70834e772cSStefan Hajnoczi static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
71433fc58eSAsias He {
72433fc58eSAsias He 	struct vhost_vsock *vsock;
73433fc58eSAsias He 
74834e772cSStefan Hajnoczi 	hash_for_each_possible_rcu(vhost_vsock_hash, vsock, hash, guest_cid) {
75433fc58eSAsias He 		u32 other_cid = vsock->guest_cid;
76433fc58eSAsias He 
77433fc58eSAsias He 		/* Skip instances that have no CID yet */
78433fc58eSAsias He 		if (other_cid == 0)
79433fc58eSAsias He 			continue;
80433fc58eSAsias He 
81ff3c1b1aSVaibhav Murkute 		if (other_cid == guest_cid)
82433fc58eSAsias He 			return vsock;
83ff3c1b1aSVaibhav Murkute 
84433fc58eSAsias He 	}
85433fc58eSAsias He 
86433fc58eSAsias He 	return NULL;
87433fc58eSAsias He }
88433fc58eSAsias He 
89433fc58eSAsias He static void
vhost_transport_do_send_pkt(struct vhost_vsock * vsock,struct vhost_virtqueue * vq)90433fc58eSAsias He vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
91433fc58eSAsias He 			    struct vhost_virtqueue *vq)
92433fc58eSAsias He {
93433fc58eSAsias He 	struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
94e79b431fSJason Wang 	int pkts = 0, total_len = 0;
95433fc58eSAsias He 	bool added = false;
96433fc58eSAsias He 	bool restart_tx = false;
97433fc58eSAsias He 
98433fc58eSAsias He 	mutex_lock(&vq->mutex);
99433fc58eSAsias He 
100247643f8SEugenio Pérez 	if (!vhost_vq_get_backend(vq))
101433fc58eSAsias He 		goto out;
102433fc58eSAsias He 
103e13a6915SStefano Garzarella 	if (!vq_meta_prefetch(vq))
104e13a6915SStefano Garzarella 		goto out;
105e13a6915SStefano Garzarella 
106433fc58eSAsias He 	/* Avoid further vmexits, we're already processing the virtqueue */
107433fc58eSAsias He 	vhost_disable_notify(&vsock->dev, vq);
108433fc58eSAsias He 
109e79b431fSJason Wang 	do {
11071dc9ec9SBobby Eshleman 		struct virtio_vsock_hdr *hdr;
11171dc9ec9SBobby Eshleman 		size_t iov_len, payload_len;
112433fc58eSAsias He 		struct iov_iter iov_iter;
11371dc9ec9SBobby Eshleman 		u32 flags_to_restore = 0;
11471dc9ec9SBobby Eshleman 		struct sk_buff *skb;
115433fc58eSAsias He 		unsigned out, in;
116433fc58eSAsias He 		size_t nbytes;
117433fc58eSAsias He 		int head;
118433fc58eSAsias He 
11971dc9ec9SBobby Eshleman 		skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue);
12071dc9ec9SBobby Eshleman 
12171dc9ec9SBobby Eshleman 		if (!skb) {
122433fc58eSAsias He 			vhost_enable_notify(&vsock->dev, vq);
123433fc58eSAsias He 			break;
124433fc58eSAsias He 		}
125433fc58eSAsias He 
126433fc58eSAsias He 		head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
127433fc58eSAsias He 					 &out, &in, NULL, NULL);
128433fc58eSAsias He 		if (head < 0) {
12971dc9ec9SBobby Eshleman 			virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb);
130433fc58eSAsias He 			break;
131433fc58eSAsias He 		}
132433fc58eSAsias He 
133433fc58eSAsias He 		if (head == vq->num) {
13471dc9ec9SBobby Eshleman 			virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb);
135433fc58eSAsias He 			/* We cannot finish yet if more buffers snuck in while
136433fc58eSAsias He 			 * re-enabling notify.
137433fc58eSAsias He 			 */
138433fc58eSAsias He 			if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
139433fc58eSAsias He 				vhost_disable_notify(&vsock->dev, vq);
140433fc58eSAsias He 				continue;
141433fc58eSAsias He 			}
142433fc58eSAsias He 			break;
143433fc58eSAsias He 		}
144433fc58eSAsias He 
145433fc58eSAsias He 		if (out) {
14671dc9ec9SBobby Eshleman 			kfree_skb(skb);
147433fc58eSAsias He 			vq_err(vq, "Expected 0 output buffers, got %u\n", out);
148433fc58eSAsias He 			break;
149433fc58eSAsias He 		}
150433fc58eSAsias He 
1516dbd3e66SStefano Garzarella 		iov_len = iov_length(&vq->iov[out], in);
15271dc9ec9SBobby Eshleman 		if (iov_len < sizeof(*hdr)) {
15371dc9ec9SBobby Eshleman 			kfree_skb(skb);
1546dbd3e66SStefano Garzarella 			vq_err(vq, "Buffer len [%zu] too small\n", iov_len);
1556dbd3e66SStefano Garzarella 			break;
1566dbd3e66SStefano Garzarella 		}
1576dbd3e66SStefano Garzarella 
158de4eda9dSAl Viro 		iov_iter_init(&iov_iter, ITER_DEST, &vq->iov[out], in, iov_len);
15971dc9ec9SBobby Eshleman 		payload_len = skb->len;
16071dc9ec9SBobby Eshleman 		hdr = virtio_vsock_hdr(skb);
1616dbd3e66SStefano Garzarella 
1626dbd3e66SStefano Garzarella 		/* If the packet is greater than the space available in the
1636dbd3e66SStefano Garzarella 		 * buffer, we split it using multiple buffers.
1646dbd3e66SStefano Garzarella 		 */
16571dc9ec9SBobby Eshleman 		if (payload_len > iov_len - sizeof(*hdr)) {
16671dc9ec9SBobby Eshleman 			payload_len = iov_len - sizeof(*hdr);
1676dbd3e66SStefano Garzarella 
168ced7b713SArseny Krasnov 			/* As we are copying pieces of large packet's buffer to
169ced7b713SArseny Krasnov 			 * small rx buffers, headers of packets in rx queue are
170ced7b713SArseny Krasnov 			 * created dynamically and are initialized with header
171ced7b713SArseny Krasnov 			 * of current packet(except length). But in case of
1729af8f106SArseny Krasnov 			 * SOCK_SEQPACKET, we also must clear message delimeter
1731af7e555SArseny Krasnov 			 * bit (VIRTIO_VSOCK_SEQ_EOM) and MSG_EOR bit
1741af7e555SArseny Krasnov 			 * (VIRTIO_VSOCK_SEQ_EOR) if set. Otherwise,
1751af7e555SArseny Krasnov 			 * there will be sequence of packets with these
1761af7e555SArseny Krasnov 			 * bits set. After initialized header will be copied to
1771af7e555SArseny Krasnov 			 * rx buffer, these required bits will be restored.
178ced7b713SArseny Krasnov 			 */
17971dc9ec9SBobby Eshleman 			if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) {
18071dc9ec9SBobby Eshleman 				hdr->flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
1811af7e555SArseny Krasnov 				flags_to_restore |= VIRTIO_VSOCK_SEQ_EOM;
1821af7e555SArseny Krasnov 
18371dc9ec9SBobby Eshleman 				if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR) {
18471dc9ec9SBobby Eshleman 					hdr->flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
1851af7e555SArseny Krasnov 					flags_to_restore |= VIRTIO_VSOCK_SEQ_EOR;
1861af7e555SArseny Krasnov 				}
187ced7b713SArseny Krasnov 			}
188ced7b713SArseny Krasnov 		}
189ced7b713SArseny Krasnov 
1906dbd3e66SStefano Garzarella 		/* Set the correct length in the header */
19171dc9ec9SBobby Eshleman 		hdr->len = cpu_to_le32(payload_len);
192433fc58eSAsias He 
19371dc9ec9SBobby Eshleman 		nbytes = copy_to_iter(hdr, sizeof(*hdr), &iov_iter);
19471dc9ec9SBobby Eshleman 		if (nbytes != sizeof(*hdr)) {
19571dc9ec9SBobby Eshleman 			kfree_skb(skb);
196433fc58eSAsias He 			vq_err(vq, "Faulted on copying pkt hdr\n");
197433fc58eSAsias He 			break;
198433fc58eSAsias He 		}
199433fc58eSAsias He 
20071dc9ec9SBobby Eshleman 		nbytes = copy_to_iter(skb->data, payload_len, &iov_iter);
2016dbd3e66SStefano Garzarella 		if (nbytes != payload_len) {
20271dc9ec9SBobby Eshleman 			kfree_skb(skb);
203433fc58eSAsias He 			vq_err(vq, "Faulted on copying pkt buf\n");
204433fc58eSAsias He 			break;
205433fc58eSAsias He 		}
206433fc58eSAsias He 
207107bc076SStefano Garzarella 		/* Deliver to monitoring devices all packets that we
208107bc076SStefano Garzarella 		 * will transmit.
20982dfb540SGerard Garcia 		 */
21071dc9ec9SBobby Eshleman 		virtio_transport_deliver_tap_pkt(skb);
21182dfb540SGerard Garcia 
21271dc9ec9SBobby Eshleman 		vhost_add_used(vq, head, sizeof(*hdr) + payload_len);
213107bc076SStefano Garzarella 		added = true;
214107bc076SStefano Garzarella 
21571dc9ec9SBobby Eshleman 		skb_pull(skb, payload_len);
2166dbd3e66SStefano Garzarella 		total_len += payload_len;
2176dbd3e66SStefano Garzarella 
2186dbd3e66SStefano Garzarella 		/* If we didn't send all the payload we can requeue the packet
2196dbd3e66SStefano Garzarella 		 * to send it with the next available buffer.
2206dbd3e66SStefano Garzarella 		 */
22171dc9ec9SBobby Eshleman 		if (skb->len > 0) {
22271dc9ec9SBobby Eshleman 			hdr->flags |= cpu_to_le32(flags_to_restore);
223ced7b713SArseny Krasnov 
22471dc9ec9SBobby Eshleman 			/* We are queueing the same skb to handle
225a78d1639SStefano Garzarella 			 * the remaining bytes, and we want to deliver it
226a78d1639SStefano Garzarella 			 * to monitoring devices in the next iteration.
227a78d1639SStefano Garzarella 			 */
22871dc9ec9SBobby Eshleman 			virtio_vsock_skb_clear_tap_delivered(skb);
22971dc9ec9SBobby Eshleman 			virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb);
2306dbd3e66SStefano Garzarella 		} else {
23171dc9ec9SBobby Eshleman 			if (virtio_vsock_skb_reply(skb)) {
2326dbd3e66SStefano Garzarella 				int val;
2336dbd3e66SStefano Garzarella 
2346dbd3e66SStefano Garzarella 				val = atomic_dec_return(&vsock->queued_replies);
2356dbd3e66SStefano Garzarella 
2366dbd3e66SStefano Garzarella 				/* Do we have resources to resume tx
2376dbd3e66SStefano Garzarella 				 * processing?
2386dbd3e66SStefano Garzarella 				 */
2396dbd3e66SStefano Garzarella 				if (val + 1 == tx_vq->num)
2406dbd3e66SStefano Garzarella 					restart_tx = true;
2416dbd3e66SStefano Garzarella 			}
2426dbd3e66SStefano Garzarella 
24371dc9ec9SBobby Eshleman 			consume_skb(skb);
2446dbd3e66SStefano Garzarella 		}
245e79b431fSJason Wang 	} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
246433fc58eSAsias He 	if (added)
247433fc58eSAsias He 		vhost_signal(&vsock->dev, vq);
248433fc58eSAsias He 
249433fc58eSAsias He out:
250433fc58eSAsias He 	mutex_unlock(&vq->mutex);
251433fc58eSAsias He 
252433fc58eSAsias He 	if (restart_tx)
253433fc58eSAsias He 		vhost_poll_queue(&tx_vq->poll);
254433fc58eSAsias He }
255433fc58eSAsias He 
vhost_transport_send_pkt_work(struct vhost_work * work)256433fc58eSAsias He static void vhost_transport_send_pkt_work(struct vhost_work *work)
257433fc58eSAsias He {
258433fc58eSAsias He 	struct vhost_virtqueue *vq;
259433fc58eSAsias He 	struct vhost_vsock *vsock;
260433fc58eSAsias He 
261433fc58eSAsias He 	vsock = container_of(work, struct vhost_vsock, send_pkt_work);
262433fc58eSAsias He 	vq = &vsock->vqs[VSOCK_VQ_RX];
263433fc58eSAsias He 
264433fc58eSAsias He 	vhost_transport_do_send_pkt(vsock, vq);
265433fc58eSAsias He }
266433fc58eSAsias He 
267433fc58eSAsias He static int
vhost_transport_send_pkt(struct sk_buff * skb)26871dc9ec9SBobby Eshleman vhost_transport_send_pkt(struct sk_buff *skb)
269433fc58eSAsias He {
27071dc9ec9SBobby Eshleman 	struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
271433fc58eSAsias He 	struct vhost_vsock *vsock;
27271dc9ec9SBobby Eshleman 	int len = skb->len;
273433fc58eSAsias He 
274834e772cSStefan Hajnoczi 	rcu_read_lock();
275834e772cSStefan Hajnoczi 
276433fc58eSAsias He 	/* Find the vhost_vsock according to guest context id  */
27771dc9ec9SBobby Eshleman 	vsock = vhost_vsock_get(le64_to_cpu(hdr->dst_cid));
278433fc58eSAsias He 	if (!vsock) {
279834e772cSStefan Hajnoczi 		rcu_read_unlock();
28071dc9ec9SBobby Eshleman 		kfree_skb(skb);
281433fc58eSAsias He 		return -ENODEV;
282433fc58eSAsias He 	}
283433fc58eSAsias He 
28471dc9ec9SBobby Eshleman 	if (virtio_vsock_skb_reply(skb))
285433fc58eSAsias He 		atomic_inc(&vsock->queued_replies);
286433fc58eSAsias He 
28771dc9ec9SBobby Eshleman 	virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb);
2889e09d0ecSMike Christie 	vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work);
289834e772cSStefan Hajnoczi 
290834e772cSStefan Hajnoczi 	rcu_read_unlock();
291433fc58eSAsias He 	return len;
292433fc58eSAsias He }
293433fc58eSAsias He 
29416320f36SPeng Tao static int
vhost_transport_cancel_pkt(struct vsock_sock * vsk)29516320f36SPeng Tao vhost_transport_cancel_pkt(struct vsock_sock *vsk)
29616320f36SPeng Tao {
29716320f36SPeng Tao 	struct vhost_vsock *vsock;
29816320f36SPeng Tao 	int cnt = 0;
299834e772cSStefan Hajnoczi 	int ret = -ENODEV;
30016320f36SPeng Tao 
301834e772cSStefan Hajnoczi 	rcu_read_lock();
302834e772cSStefan Hajnoczi 
30316320f36SPeng Tao 	/* Find the vhost_vsock according to guest context id  */
30416320f36SPeng Tao 	vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
30516320f36SPeng Tao 	if (!vsock)
306834e772cSStefan Hajnoczi 		goto out;
30716320f36SPeng Tao 
30871dc9ec9SBobby Eshleman 	cnt = virtio_transport_purge_skbs(vsk, &vsock->send_pkt_queue);
30916320f36SPeng Tao 
31016320f36SPeng Tao 	if (cnt) {
31116320f36SPeng Tao 		struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
31216320f36SPeng Tao 		int new_cnt;
31316320f36SPeng Tao 
31416320f36SPeng Tao 		new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
31516320f36SPeng Tao 		if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
31616320f36SPeng Tao 			vhost_poll_queue(&tx_vq->poll);
31716320f36SPeng Tao 	}
31816320f36SPeng Tao 
319834e772cSStefan Hajnoczi 	ret = 0;
320834e772cSStefan Hajnoczi out:
321834e772cSStefan Hajnoczi 	rcu_read_unlock();
322834e772cSStefan Hajnoczi 	return ret;
32316320f36SPeng Tao }
32416320f36SPeng Tao 
32571dc9ec9SBobby Eshleman static struct sk_buff *
vhost_vsock_alloc_skb(struct vhost_virtqueue * vq,unsigned int out,unsigned int in)32671dc9ec9SBobby Eshleman vhost_vsock_alloc_skb(struct vhost_virtqueue *vq,
327433fc58eSAsias He 		      unsigned int out, unsigned int in)
328433fc58eSAsias He {
32971dc9ec9SBobby Eshleman 	struct virtio_vsock_hdr *hdr;
330433fc58eSAsias He 	struct iov_iter iov_iter;
33171dc9ec9SBobby Eshleman 	struct sk_buff *skb;
33271dc9ec9SBobby Eshleman 	size_t payload_len;
333433fc58eSAsias He 	size_t nbytes;
334433fc58eSAsias He 	size_t len;
335433fc58eSAsias He 
336433fc58eSAsias He 	if (in != 0) {
337433fc58eSAsias He 		vq_err(vq, "Expected 0 input buffers, got %u\n", in);
338433fc58eSAsias He 		return NULL;
339433fc58eSAsias He 	}
340433fc58eSAsias He 
34171dc9ec9SBobby Eshleman 	len = iov_length(vq->iov, out);
34271dc9ec9SBobby Eshleman 
34371dc9ec9SBobby Eshleman 	/* len contains both payload and hdr */
34471dc9ec9SBobby Eshleman 	skb = virtio_vsock_alloc_skb(len, GFP_KERNEL);
34571dc9ec9SBobby Eshleman 	if (!skb)
346433fc58eSAsias He 		return NULL;
347433fc58eSAsias He 
348de4eda9dSAl Viro 	iov_iter_init(&iov_iter, ITER_SOURCE, vq->iov, out, len);
349433fc58eSAsias He 
35071dc9ec9SBobby Eshleman 	hdr = virtio_vsock_hdr(skb);
35171dc9ec9SBobby Eshleman 	nbytes = copy_from_iter(hdr, sizeof(*hdr), &iov_iter);
35271dc9ec9SBobby Eshleman 	if (nbytes != sizeof(*hdr)) {
353433fc58eSAsias He 		vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n",
35471dc9ec9SBobby Eshleman 		       sizeof(*hdr), nbytes);
35571dc9ec9SBobby Eshleman 		kfree_skb(skb);
356433fc58eSAsias He 		return NULL;
357433fc58eSAsias He 	}
358433fc58eSAsias He 
35971dc9ec9SBobby Eshleman 	payload_len = le32_to_cpu(hdr->len);
360433fc58eSAsias He 
361433fc58eSAsias He 	/* No payload */
36271dc9ec9SBobby Eshleman 	if (!payload_len)
36371dc9ec9SBobby Eshleman 		return skb;
364433fc58eSAsias He 
36571dc9ec9SBobby Eshleman 	/* The pkt is too big or the length in the header is invalid */
36671dc9ec9SBobby Eshleman 	if (payload_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE ||
36771dc9ec9SBobby Eshleman 	    payload_len + sizeof(*hdr) > len) {
36871dc9ec9SBobby Eshleman 		kfree_skb(skb);
369433fc58eSAsias He 		return NULL;
370433fc58eSAsias He 	}
371433fc58eSAsias He 
37271dc9ec9SBobby Eshleman 	virtio_vsock_skb_rx_put(skb);
37371dc9ec9SBobby Eshleman 
37471dc9ec9SBobby Eshleman 	nbytes = copy_from_iter(skb->data, payload_len, &iov_iter);
37571dc9ec9SBobby Eshleman 	if (nbytes != payload_len) {
37671dc9ec9SBobby Eshleman 		vq_err(vq, "Expected %zu byte payload, got %zu bytes\n",
37771dc9ec9SBobby Eshleman 		       payload_len, nbytes);
37871dc9ec9SBobby Eshleman 		kfree_skb(skb);
379433fc58eSAsias He 		return NULL;
380433fc58eSAsias He 	}
381433fc58eSAsias He 
38271dc9ec9SBobby Eshleman 	return skb;
383433fc58eSAsias He }
384433fc58eSAsias He 
385433fc58eSAsias He /* Is there space left for replies to rx packets? */
vhost_vsock_more_replies(struct vhost_vsock * vsock)386433fc58eSAsias He static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
387433fc58eSAsias He {
388433fc58eSAsias He 	struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX];
389433fc58eSAsias He 	int val;
390433fc58eSAsias He 
391433fc58eSAsias He 	smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */
392433fc58eSAsias He 	val = atomic_read(&vsock->queued_replies);
393433fc58eSAsias He 
394433fc58eSAsias He 	return val < vq->num;
395433fc58eSAsias He }
396433fc58eSAsias He 
397ced7b713SArseny Krasnov static bool vhost_transport_seqpacket_allow(u32 remote_cid);
398ced7b713SArseny Krasnov 
3994c7246dcSStefano Garzarella static struct virtio_transport vhost_transport = {
4004c7246dcSStefano Garzarella 	.transport = {
4016a2c0962SStefano Garzarella 		.module                   = THIS_MODULE,
4026a2c0962SStefano Garzarella 
4034c7246dcSStefano Garzarella 		.get_local_cid            = vhost_transport_get_local_cid,
4044c7246dcSStefano Garzarella 
4054c7246dcSStefano Garzarella 		.init                     = virtio_transport_do_socket_init,
4064c7246dcSStefano Garzarella 		.destruct                 = virtio_transport_destruct,
4074c7246dcSStefano Garzarella 		.release                  = virtio_transport_release,
4084c7246dcSStefano Garzarella 		.connect                  = virtio_transport_connect,
4094c7246dcSStefano Garzarella 		.shutdown                 = virtio_transport_shutdown,
4104c7246dcSStefano Garzarella 		.cancel_pkt               = vhost_transport_cancel_pkt,
4114c7246dcSStefano Garzarella 
4124c7246dcSStefano Garzarella 		.dgram_enqueue            = virtio_transport_dgram_enqueue,
4134c7246dcSStefano Garzarella 		.dgram_dequeue            = virtio_transport_dgram_dequeue,
4144c7246dcSStefano Garzarella 		.dgram_bind               = virtio_transport_dgram_bind,
4154c7246dcSStefano Garzarella 		.dgram_allow              = virtio_transport_dgram_allow,
4164c7246dcSStefano Garzarella 
4174c7246dcSStefano Garzarella 		.stream_enqueue           = virtio_transport_stream_enqueue,
4184c7246dcSStefano Garzarella 		.stream_dequeue           = virtio_transport_stream_dequeue,
4194c7246dcSStefano Garzarella 		.stream_has_data          = virtio_transport_stream_has_data,
4204c7246dcSStefano Garzarella 		.stream_has_space         = virtio_transport_stream_has_space,
4214c7246dcSStefano Garzarella 		.stream_rcvhiwat          = virtio_transport_stream_rcvhiwat,
4224c7246dcSStefano Garzarella 		.stream_is_active         = virtio_transport_stream_is_active,
4234c7246dcSStefano Garzarella 		.stream_allow             = virtio_transport_stream_allow,
4244c7246dcSStefano Garzarella 
425ced7b713SArseny Krasnov 		.seqpacket_dequeue        = virtio_transport_seqpacket_dequeue,
426ced7b713SArseny Krasnov 		.seqpacket_enqueue        = virtio_transport_seqpacket_enqueue,
427ced7b713SArseny Krasnov 		.seqpacket_allow          = vhost_transport_seqpacket_allow,
428ced7b713SArseny Krasnov 		.seqpacket_has_data       = virtio_transport_seqpacket_has_data,
429ced7b713SArseny Krasnov 
4304c7246dcSStefano Garzarella 		.notify_poll_in           = virtio_transport_notify_poll_in,
4314c7246dcSStefano Garzarella 		.notify_poll_out          = virtio_transport_notify_poll_out,
4324c7246dcSStefano Garzarella 		.notify_recv_init         = virtio_transport_notify_recv_init,
4334c7246dcSStefano Garzarella 		.notify_recv_pre_block    = virtio_transport_notify_recv_pre_block,
4344c7246dcSStefano Garzarella 		.notify_recv_pre_dequeue  = virtio_transport_notify_recv_pre_dequeue,
4354c7246dcSStefano Garzarella 		.notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
4364c7246dcSStefano Garzarella 		.notify_send_init         = virtio_transport_notify_send_init,
4374c7246dcSStefano Garzarella 		.notify_send_pre_block    = virtio_transport_notify_send_pre_block,
4384c7246dcSStefano Garzarella 		.notify_send_pre_enqueue  = virtio_transport_notify_send_pre_enqueue,
4394c7246dcSStefano Garzarella 		.notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
440b9f2b0ffSStefano Garzarella 		.notify_buffer_size       = virtio_transport_notify_buffer_size,
44194e5f642SArseniy Krasnov 		.notify_set_rcvlowat      = virtio_transport_notify_set_rcvlowat,
4424c7246dcSStefano Garzarella 
443634f1a71SBobby Eshleman 		.read_skb = virtio_transport_read_skb,
4444c7246dcSStefano Garzarella 	},
4454c7246dcSStefano Garzarella 
4464c7246dcSStefano Garzarella 	.send_pkt = vhost_transport_send_pkt,
4474c7246dcSStefano Garzarella };
4484c7246dcSStefano Garzarella 
vhost_transport_seqpacket_allow(u32 remote_cid)449ced7b713SArseny Krasnov static bool vhost_transport_seqpacket_allow(u32 remote_cid)
450ced7b713SArseny Krasnov {
451ced7b713SArseny Krasnov 	struct vhost_vsock *vsock;
452ced7b713SArseny Krasnov 	bool seqpacket_allow = false;
453ced7b713SArseny Krasnov 
454ced7b713SArseny Krasnov 	rcu_read_lock();
455ced7b713SArseny Krasnov 	vsock = vhost_vsock_get(remote_cid);
456ced7b713SArseny Krasnov 
457ced7b713SArseny Krasnov 	if (vsock)
458ced7b713SArseny Krasnov 		seqpacket_allow = vsock->seqpacket_allow;
459ced7b713SArseny Krasnov 
460ced7b713SArseny Krasnov 	rcu_read_unlock();
461ced7b713SArseny Krasnov 
462ced7b713SArseny Krasnov 	return seqpacket_allow;
463ced7b713SArseny Krasnov }
464ced7b713SArseny Krasnov 
vhost_vsock_handle_tx_kick(struct vhost_work * work)465433fc58eSAsias He static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
466433fc58eSAsias He {
467433fc58eSAsias He 	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
468433fc58eSAsias He 						  poll.work);
469433fc58eSAsias He 	struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
470433fc58eSAsias He 						 dev);
471e79b431fSJason Wang 	int head, pkts = 0, total_len = 0;
472433fc58eSAsias He 	unsigned int out, in;
47371dc9ec9SBobby Eshleman 	struct sk_buff *skb;
474433fc58eSAsias He 	bool added = false;
475433fc58eSAsias He 
476433fc58eSAsias He 	mutex_lock(&vq->mutex);
477433fc58eSAsias He 
478247643f8SEugenio Pérez 	if (!vhost_vq_get_backend(vq))
479433fc58eSAsias He 		goto out;
480433fc58eSAsias He 
481e13a6915SStefano Garzarella 	if (!vq_meta_prefetch(vq))
482e13a6915SStefano Garzarella 		goto out;
483e13a6915SStefano Garzarella 
484433fc58eSAsias He 	vhost_disable_notify(&vsock->dev, vq);
485e79b431fSJason Wang 	do {
48671dc9ec9SBobby Eshleman 		struct virtio_vsock_hdr *hdr;
48771dc9ec9SBobby Eshleman 
488433fc58eSAsias He 		if (!vhost_vsock_more_replies(vsock)) {
489433fc58eSAsias He 			/* Stop tx until the device processes already
490433fc58eSAsias He 			 * pending replies.  Leave tx virtqueue
491433fc58eSAsias He 			 * callbacks disabled.
492433fc58eSAsias He 			 */
493433fc58eSAsias He 			goto no_more_replies;
494433fc58eSAsias He 		}
495433fc58eSAsias He 
496433fc58eSAsias He 		head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
497433fc58eSAsias He 					 &out, &in, NULL, NULL);
498433fc58eSAsias He 		if (head < 0)
499433fc58eSAsias He 			break;
500433fc58eSAsias He 
501433fc58eSAsias He 		if (head == vq->num) {
502433fc58eSAsias He 			if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
503433fc58eSAsias He 				vhost_disable_notify(&vsock->dev, vq);
504433fc58eSAsias He 				continue;
505433fc58eSAsias He 			}
506433fc58eSAsias He 			break;
507433fc58eSAsias He 		}
508433fc58eSAsias He 
50971dc9ec9SBobby Eshleman 		skb = vhost_vsock_alloc_skb(vq, out, in);
51071dc9ec9SBobby Eshleman 		if (!skb) {
511433fc58eSAsias He 			vq_err(vq, "Faulted on pkt\n");
512433fc58eSAsias He 			continue;
513433fc58eSAsias He 		}
514433fc58eSAsias He 
51571dc9ec9SBobby Eshleman 		total_len += sizeof(*hdr) + skb->len;
5163fda5d6eSStefan Hajnoczi 
51782dfb540SGerard Garcia 		/* Deliver to monitoring devices all received packets */
51871dc9ec9SBobby Eshleman 		virtio_transport_deliver_tap_pkt(skb);
51971dc9ec9SBobby Eshleman 
52071dc9ec9SBobby Eshleman 		hdr = virtio_vsock_hdr(skb);
52182dfb540SGerard Garcia 
522433fc58eSAsias He 		/* Only accept correctly addressed packets */
52371dc9ec9SBobby Eshleman 		if (le64_to_cpu(hdr->src_cid) == vsock->guest_cid &&
52471dc9ec9SBobby Eshleman 		    le64_to_cpu(hdr->dst_cid) ==
5258a3cc29cSStefano Garzarella 		    vhost_transport_get_local_cid())
52671dc9ec9SBobby Eshleman 			virtio_transport_recv_pkt(&vhost_transport, skb);
527433fc58eSAsias He 		else
52871dc9ec9SBobby Eshleman 			kfree_skb(skb);
529433fc58eSAsias He 
53049d8c5ffSStefano Garzarella 		vhost_add_used(vq, head, 0);
531433fc58eSAsias He 		added = true;
532e79b431fSJason Wang 	} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
533433fc58eSAsias He 
534433fc58eSAsias He no_more_replies:
535433fc58eSAsias He 	if (added)
536433fc58eSAsias He 		vhost_signal(&vsock->dev, vq);
537433fc58eSAsias He 
538433fc58eSAsias He out:
539433fc58eSAsias He 	mutex_unlock(&vq->mutex);
540433fc58eSAsias He }
541433fc58eSAsias He 
vhost_vsock_handle_rx_kick(struct vhost_work * work)542433fc58eSAsias He static void vhost_vsock_handle_rx_kick(struct vhost_work *work)
543433fc58eSAsias He {
544433fc58eSAsias He 	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
545433fc58eSAsias He 						poll.work);
546433fc58eSAsias He 	struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
547433fc58eSAsias He 						 dev);
548433fc58eSAsias He 
549433fc58eSAsias He 	vhost_transport_do_send_pkt(vsock, vq);
550433fc58eSAsias He }
551433fc58eSAsias He 
vhost_vsock_start(struct vhost_vsock * vsock)552433fc58eSAsias He static int vhost_vsock_start(struct vhost_vsock *vsock)
553433fc58eSAsias He {
5540516ffd8SStefan Hajnoczi 	struct vhost_virtqueue *vq;
555433fc58eSAsias He 	size_t i;
556433fc58eSAsias He 	int ret;
557433fc58eSAsias He 
558433fc58eSAsias He 	mutex_lock(&vsock->dev.mutex);
559433fc58eSAsias He 
560433fc58eSAsias He 	ret = vhost_dev_check_owner(&vsock->dev);
561433fc58eSAsias He 	if (ret)
562433fc58eSAsias He 		goto err;
563433fc58eSAsias He 
564433fc58eSAsias He 	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
5650516ffd8SStefan Hajnoczi 		vq = &vsock->vqs[i];
566433fc58eSAsias He 
567433fc58eSAsias He 		mutex_lock(&vq->mutex);
568433fc58eSAsias He 
569433fc58eSAsias He 		if (!vhost_vq_access_ok(vq)) {
570433fc58eSAsias He 			ret = -EFAULT;
571433fc58eSAsias He 			goto err_vq;
572433fc58eSAsias He 		}
573433fc58eSAsias He 
574247643f8SEugenio Pérez 		if (!vhost_vq_get_backend(vq)) {
575247643f8SEugenio Pérez 			vhost_vq_set_backend(vq, vsock);
5760516ffd8SStefan Hajnoczi 			ret = vhost_vq_init_access(vq);
5770516ffd8SStefan Hajnoczi 			if (ret)
5780516ffd8SStefan Hajnoczi 				goto err_vq;
579433fc58eSAsias He 		}
580433fc58eSAsias He 
581433fc58eSAsias He 		mutex_unlock(&vq->mutex);
582433fc58eSAsias He 	}
583433fc58eSAsias He 
5840b841030SJia He 	/* Some packets may have been queued before the device was started,
5850b841030SJia He 	 * let's kick the send worker to send them.
5860b841030SJia He 	 */
5879e09d0ecSMike Christie 	vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work);
5880b841030SJia He 
589433fc58eSAsias He 	mutex_unlock(&vsock->dev.mutex);
590433fc58eSAsias He 	return 0;
591433fc58eSAsias He 
592433fc58eSAsias He err_vq:
593247643f8SEugenio Pérez 	vhost_vq_set_backend(vq, NULL);
5940516ffd8SStefan Hajnoczi 	mutex_unlock(&vq->mutex);
5950516ffd8SStefan Hajnoczi 
596433fc58eSAsias He 	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
5970516ffd8SStefan Hajnoczi 		vq = &vsock->vqs[i];
598433fc58eSAsias He 
599433fc58eSAsias He 		mutex_lock(&vq->mutex);
600247643f8SEugenio Pérez 		vhost_vq_set_backend(vq, NULL);
601433fc58eSAsias He 		mutex_unlock(&vq->mutex);
602433fc58eSAsias He 	}
603433fc58eSAsias He err:
604433fc58eSAsias He 	mutex_unlock(&vsock->dev.mutex);
605433fc58eSAsias He 	return ret;
606433fc58eSAsias He }
607433fc58eSAsias He 
vhost_vsock_stop(struct vhost_vsock * vsock,bool check_owner)608a58da53fSStefano Garzarella static int vhost_vsock_stop(struct vhost_vsock *vsock, bool check_owner)
609433fc58eSAsias He {
610433fc58eSAsias He 	size_t i;
611a58da53fSStefano Garzarella 	int ret = 0;
612433fc58eSAsias He 
613433fc58eSAsias He 	mutex_lock(&vsock->dev.mutex);
614433fc58eSAsias He 
615a58da53fSStefano Garzarella 	if (check_owner) {
616433fc58eSAsias He 		ret = vhost_dev_check_owner(&vsock->dev);
617433fc58eSAsias He 		if (ret)
618433fc58eSAsias He 			goto err;
619a58da53fSStefano Garzarella 	}
620433fc58eSAsias He 
621433fc58eSAsias He 	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
622433fc58eSAsias He 		struct vhost_virtqueue *vq = &vsock->vqs[i];
623433fc58eSAsias He 
624433fc58eSAsias He 		mutex_lock(&vq->mutex);
625247643f8SEugenio Pérez 		vhost_vq_set_backend(vq, NULL);
626433fc58eSAsias He 		mutex_unlock(&vq->mutex);
627433fc58eSAsias He 	}
628433fc58eSAsias He 
629433fc58eSAsias He err:
630433fc58eSAsias He 	mutex_unlock(&vsock->dev.mutex);
631433fc58eSAsias He 	return ret;
632433fc58eSAsias He }
633433fc58eSAsias He 
vhost_vsock_free(struct vhost_vsock * vsock)634433fc58eSAsias He static void vhost_vsock_free(struct vhost_vsock *vsock)
635433fc58eSAsias He {
636b226acabSWei Yongjun 	kvfree(vsock);
637433fc58eSAsias He }
638433fc58eSAsias He 
vhost_vsock_dev_open(struct inode * inode,struct file * file)639433fc58eSAsias He static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
640433fc58eSAsias He {
641433fc58eSAsias He 	struct vhost_virtqueue **vqs;
642433fc58eSAsias He 	struct vhost_vsock *vsock;
643433fc58eSAsias He 	int ret;
644433fc58eSAsias He 
645433fc58eSAsias He 	/* This struct is large and allocation could fail, fall back to vmalloc
646433fc58eSAsias He 	 * if there is no other way.
647433fc58eSAsias He 	 */
648dcda9b04SMichal Hocko 	vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
649433fc58eSAsias He 	if (!vsock)
650433fc58eSAsias He 		return -ENOMEM;
651433fc58eSAsias He 
652433fc58eSAsias He 	vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL);
653433fc58eSAsias He 	if (!vqs) {
654433fc58eSAsias He 		ret = -ENOMEM;
655433fc58eSAsias He 		goto out;
656433fc58eSAsias He 	}
657433fc58eSAsias He 
658a72b69dcSStefan Hajnoczi 	vsock->guest_cid = 0; /* no CID assigned yet */
659*30bd4593SMichael S. Tsirkin 	vsock->seqpacket_allow = false;
660a72b69dcSStefan Hajnoczi 
661433fc58eSAsias He 	atomic_set(&vsock->queued_replies, 0);
662433fc58eSAsias He 
663433fc58eSAsias He 	vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX];
664433fc58eSAsias He 	vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX];
665433fc58eSAsias He 	vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
666433fc58eSAsias He 	vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
667433fc58eSAsias He 
668e82b9b07SJason Wang 	vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs),
669e82b9b07SJason Wang 		       UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT,
67001fcb1cbSJason Wang 		       VHOST_VSOCK_WEIGHT, true, NULL);
671433fc58eSAsias He 
672433fc58eSAsias He 	file->private_data = vsock;
67371dc9ec9SBobby Eshleman 	skb_queue_head_init(&vsock->send_pkt_queue);
674433fc58eSAsias He 	vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work);
675433fc58eSAsias He 	return 0;
676433fc58eSAsias He 
677433fc58eSAsias He out:
678433fc58eSAsias He 	vhost_vsock_free(vsock);
679433fc58eSAsias He 	return ret;
680433fc58eSAsias He }
681433fc58eSAsias He 
vhost_vsock_flush(struct vhost_vsock * vsock)682433fc58eSAsias He static void vhost_vsock_flush(struct vhost_vsock *vsock)
683433fc58eSAsias He {
684b2ffa407SMike Christie 	vhost_dev_flush(&vsock->dev);
685433fc58eSAsias He }
686433fc58eSAsias He 
vhost_vsock_reset_orphans(struct sock * sk)687433fc58eSAsias He static void vhost_vsock_reset_orphans(struct sock *sk)
688433fc58eSAsias He {
689433fc58eSAsias He 	struct vsock_sock *vsk = vsock_sk(sk);
690433fc58eSAsias He 
691433fc58eSAsias He 	/* vmci_transport.c doesn't take sk_lock here either.  At least we're
692433fc58eSAsias He 	 * under vsock_table_lock so the sock cannot disappear while we're
693433fc58eSAsias He 	 * executing.
694433fc58eSAsias He 	 */
695433fc58eSAsias He 
696c38f57daSStefan Hajnoczi 	/* If the peer is still valid, no need to reset connection */
697c38f57daSStefan Hajnoczi 	if (vhost_vsock_get(vsk->remote_addr.svm_cid))
698c38f57daSStefan Hajnoczi 		return;
699c38f57daSStefan Hajnoczi 
700c38f57daSStefan Hajnoczi 	/* If the close timeout is pending, let it expire.  This avoids races
701c38f57daSStefan Hajnoczi 	 * with the timeout callback.
702c38f57daSStefan Hajnoczi 	 */
703c38f57daSStefan Hajnoczi 	if (vsk->close_work_scheduled)
704c38f57daSStefan Hajnoczi 		return;
705c38f57daSStefan Hajnoczi 
706433fc58eSAsias He 	sock_set_flag(sk, SOCK_DONE);
707433fc58eSAsias He 	vsk->peer_shutdown = SHUTDOWN_MASK;
708433fc58eSAsias He 	sk->sk_state = SS_UNCONNECTED;
709433fc58eSAsias He 	sk->sk_err = ECONNRESET;
710e3ae2365SAlexander Aring 	sk_error_report(sk);
711433fc58eSAsias He }
712433fc58eSAsias He 
vhost_vsock_dev_release(struct inode * inode,struct file * file)713433fc58eSAsias He static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
714433fc58eSAsias He {
715433fc58eSAsias He 	struct vhost_vsock *vsock = file->private_data;
716433fc58eSAsias He 
7176db3d8dcSStefan Hajnoczi 	mutex_lock(&vhost_vsock_mutex);
718834e772cSStefan Hajnoczi 	if (vsock->guest_cid)
719834e772cSStefan Hajnoczi 		hash_del_rcu(&vsock->hash);
7206db3d8dcSStefan Hajnoczi 	mutex_unlock(&vhost_vsock_mutex);
721433fc58eSAsias He 
722834e772cSStefan Hajnoczi 	/* Wait for other CPUs to finish using vsock */
723834e772cSStefan Hajnoczi 	synchronize_rcu();
724834e772cSStefan Hajnoczi 
725433fc58eSAsias He 	/* Iterating over all connections for all CIDs to find orphans is
726433fc58eSAsias He 	 * inefficient.  Room for improvement here. */
7278e6ed963SJiyong Park 	vsock_for_each_connected_socket(&vhost_transport.transport,
7288e6ed963SJiyong Park 					vhost_vsock_reset_orphans);
729433fc58eSAsias He 
730a58da53fSStefano Garzarella 	/* Don't check the owner, because we are in the release path, so we
731a58da53fSStefano Garzarella 	 * need to stop the vsock device in any case.
732a58da53fSStefano Garzarella 	 * vhost_vsock_stop() can not fail in this case, so we don't need to
733a58da53fSStefano Garzarella 	 * check the return code.
734a58da53fSStefano Garzarella 	 */
735a58da53fSStefano Garzarella 	vhost_vsock_stop(vsock, false);
736433fc58eSAsias He 	vhost_vsock_flush(vsock);
737433fc58eSAsias He 	vhost_dev_stop(&vsock->dev);
738433fc58eSAsias He 
73971dc9ec9SBobby Eshleman 	virtio_vsock_skb_queue_purge(&vsock->send_pkt_queue);
740433fc58eSAsias He 
741f6f93f75S夷则(Caspar) 	vhost_dev_cleanup(&vsock->dev);
742433fc58eSAsias He 	kfree(vsock->dev.vqs);
743433fc58eSAsias He 	vhost_vsock_free(vsock);
744433fc58eSAsias He 	return 0;
745433fc58eSAsias He }
746433fc58eSAsias He 
vhost_vsock_set_cid(struct vhost_vsock * vsock,u64 guest_cid)747433fc58eSAsias He static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
748433fc58eSAsias He {
749433fc58eSAsias He 	struct vhost_vsock *other;
750433fc58eSAsias He 
751433fc58eSAsias He 	/* Refuse reserved CIDs */
752433fc58eSAsias He 	if (guest_cid <= VMADDR_CID_HOST ||
753433fc58eSAsias He 	    guest_cid == U32_MAX)
754433fc58eSAsias He 		return -EINVAL;
755433fc58eSAsias He 
756433fc58eSAsias He 	/* 64-bit CIDs are not yet supported */
757433fc58eSAsias He 	if (guest_cid > U32_MAX)
758433fc58eSAsias He 		return -EINVAL;
759433fc58eSAsias He 
760ed8640a9SStefano Garzarella 	/* Refuse if CID is assigned to the guest->host transport (i.e. nested
761ed8640a9SStefano Garzarella 	 * VM), to make the loopback work.
762ed8640a9SStefano Garzarella 	 */
763ed8640a9SStefano Garzarella 	if (vsock_find_cid(guest_cid))
764ed8640a9SStefano Garzarella 		return -EADDRINUSE;
765ed8640a9SStefano Garzarella 
766433fc58eSAsias He 	/* Refuse if CID is already in use */
7676db3d8dcSStefan Hajnoczi 	mutex_lock(&vhost_vsock_mutex);
768834e772cSStefan Hajnoczi 	other = vhost_vsock_get(guest_cid);
7696c083c2bSGao feng 	if (other && other != vsock) {
7706db3d8dcSStefan Hajnoczi 		mutex_unlock(&vhost_vsock_mutex);
7716c083c2bSGao feng 		return -EADDRINUSE;
7726c083c2bSGao feng 	}
773834e772cSStefan Hajnoczi 
774834e772cSStefan Hajnoczi 	if (vsock->guest_cid)
775834e772cSStefan Hajnoczi 		hash_del_rcu(&vsock->hash);
776834e772cSStefan Hajnoczi 
777433fc58eSAsias He 	vsock->guest_cid = guest_cid;
7787fbe078cSZha Bin 	hash_add_rcu(vhost_vsock_hash, &vsock->hash, vsock->guest_cid);
7796db3d8dcSStefan Hajnoczi 	mutex_unlock(&vhost_vsock_mutex);
780433fc58eSAsias He 
781433fc58eSAsias He 	return 0;
782433fc58eSAsias He }
783433fc58eSAsias He 
vhost_vsock_set_features(struct vhost_vsock * vsock,u64 features)784433fc58eSAsias He static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
785433fc58eSAsias He {
786433fc58eSAsias He 	struct vhost_virtqueue *vq;
787433fc58eSAsias He 	int i;
788433fc58eSAsias He 
789433fc58eSAsias He 	if (features & ~VHOST_VSOCK_FEATURES)
790433fc58eSAsias He 		return -EOPNOTSUPP;
791433fc58eSAsias He 
792433fc58eSAsias He 	mutex_lock(&vsock->dev.mutex);
793433fc58eSAsias He 	if ((features & (1 << VHOST_F_LOG_ALL)) &&
794433fc58eSAsias He 	    !vhost_log_access_ok(&vsock->dev)) {
795e13a6915SStefano Garzarella 		goto err;
796e13a6915SStefano Garzarella 	}
797e13a6915SStefano Garzarella 
798e13a6915SStefano Garzarella 	if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) {
799759aba1eSLiming Wu 		if (vhost_init_device_iotlb(&vsock->dev))
800e13a6915SStefano Garzarella 			goto err;
801433fc58eSAsias He 	}
802433fc58eSAsias He 
803*30bd4593SMichael S. Tsirkin 	vsock->seqpacket_allow = features & (1ULL << VIRTIO_VSOCK_F_SEQPACKET);
804ced7b713SArseny Krasnov 
805433fc58eSAsias He 	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
806433fc58eSAsias He 		vq = &vsock->vqs[i];
807433fc58eSAsias He 		mutex_lock(&vq->mutex);
808433fc58eSAsias He 		vq->acked_features = features;
809433fc58eSAsias He 		mutex_unlock(&vq->mutex);
810433fc58eSAsias He 	}
811433fc58eSAsias He 	mutex_unlock(&vsock->dev.mutex);
812433fc58eSAsias He 	return 0;
813e13a6915SStefano Garzarella 
814e13a6915SStefano Garzarella err:
815e13a6915SStefano Garzarella 	mutex_unlock(&vsock->dev.mutex);
816e13a6915SStefano Garzarella 	return -EFAULT;
817433fc58eSAsias He }
818433fc58eSAsias He 
vhost_vsock_dev_ioctl(struct file * f,unsigned int ioctl,unsigned long arg)819433fc58eSAsias He static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
820433fc58eSAsias He 				  unsigned long arg)
821433fc58eSAsias He {
822433fc58eSAsias He 	struct vhost_vsock *vsock = f->private_data;
823433fc58eSAsias He 	void __user *argp = (void __user *)arg;
824433fc58eSAsias He 	u64 guest_cid;
825433fc58eSAsias He 	u64 features;
826433fc58eSAsias He 	int start;
827433fc58eSAsias He 	int r;
828433fc58eSAsias He 
829433fc58eSAsias He 	switch (ioctl) {
830433fc58eSAsias He 	case VHOST_VSOCK_SET_GUEST_CID:
831433fc58eSAsias He 		if (copy_from_user(&guest_cid, argp, sizeof(guest_cid)))
832433fc58eSAsias He 			return -EFAULT;
833433fc58eSAsias He 		return vhost_vsock_set_cid(vsock, guest_cid);
834433fc58eSAsias He 	case VHOST_VSOCK_SET_RUNNING:
835433fc58eSAsias He 		if (copy_from_user(&start, argp, sizeof(start)))
836433fc58eSAsias He 			return -EFAULT;
837433fc58eSAsias He 		if (start)
838433fc58eSAsias He 			return vhost_vsock_start(vsock);
839433fc58eSAsias He 		else
840a58da53fSStefano Garzarella 			return vhost_vsock_stop(vsock, true);
841433fc58eSAsias He 	case VHOST_GET_FEATURES:
842433fc58eSAsias He 		features = VHOST_VSOCK_FEATURES;
843433fc58eSAsias He 		if (copy_to_user(argp, &features, sizeof(features)))
844433fc58eSAsias He 			return -EFAULT;
845433fc58eSAsias He 		return 0;
846433fc58eSAsias He 	case VHOST_SET_FEATURES:
847433fc58eSAsias He 		if (copy_from_user(&features, argp, sizeof(features)))
848433fc58eSAsias He 			return -EFAULT;
849433fc58eSAsias He 		return vhost_vsock_set_features(vsock, features);
850e13a6915SStefano Garzarella 	case VHOST_GET_BACKEND_FEATURES:
851e13a6915SStefano Garzarella 		features = VHOST_VSOCK_BACKEND_FEATURES;
852e13a6915SStefano Garzarella 		if (copy_to_user(argp, &features, sizeof(features)))
853e13a6915SStefano Garzarella 			return -EFAULT;
854e13a6915SStefano Garzarella 		return 0;
855e13a6915SStefano Garzarella 	case VHOST_SET_BACKEND_FEATURES:
856e13a6915SStefano Garzarella 		if (copy_from_user(&features, argp, sizeof(features)))
857e13a6915SStefano Garzarella 			return -EFAULT;
858e13a6915SStefano Garzarella 		if (features & ~VHOST_VSOCK_BACKEND_FEATURES)
859e13a6915SStefano Garzarella 			return -EOPNOTSUPP;
860e13a6915SStefano Garzarella 		vhost_set_backend_features(&vsock->dev, features);
861e13a6915SStefano Garzarella 		return 0;
862433fc58eSAsias He 	default:
863433fc58eSAsias He 		mutex_lock(&vsock->dev.mutex);
864433fc58eSAsias He 		r = vhost_dev_ioctl(&vsock->dev, ioctl, argp);
865433fc58eSAsias He 		if (r == -ENOIOCTLCMD)
866433fc58eSAsias He 			r = vhost_vring_ioctl(&vsock->dev, ioctl, argp);
867433fc58eSAsias He 		else
868433fc58eSAsias He 			vhost_vsock_flush(vsock);
869433fc58eSAsias He 		mutex_unlock(&vsock->dev.mutex);
870433fc58eSAsias He 		return r;
871433fc58eSAsias He 	}
872433fc58eSAsias He }
873433fc58eSAsias He 
vhost_vsock_chr_read_iter(struct kiocb * iocb,struct iov_iter * to)874e13a6915SStefano Garzarella static ssize_t vhost_vsock_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
875e13a6915SStefano Garzarella {
876e13a6915SStefano Garzarella 	struct file *file = iocb->ki_filp;
877e13a6915SStefano Garzarella 	struct vhost_vsock *vsock = file->private_data;
878e13a6915SStefano Garzarella 	struct vhost_dev *dev = &vsock->dev;
879e13a6915SStefano Garzarella 	int noblock = file->f_flags & O_NONBLOCK;
880e13a6915SStefano Garzarella 
881e13a6915SStefano Garzarella 	return vhost_chr_read_iter(dev, to, noblock);
882e13a6915SStefano Garzarella }
883e13a6915SStefano Garzarella 
vhost_vsock_chr_write_iter(struct kiocb * iocb,struct iov_iter * from)884e13a6915SStefano Garzarella static ssize_t vhost_vsock_chr_write_iter(struct kiocb *iocb,
885e13a6915SStefano Garzarella 					struct iov_iter *from)
886e13a6915SStefano Garzarella {
887e13a6915SStefano Garzarella 	struct file *file = iocb->ki_filp;
888e13a6915SStefano Garzarella 	struct vhost_vsock *vsock = file->private_data;
889e13a6915SStefano Garzarella 	struct vhost_dev *dev = &vsock->dev;
890e13a6915SStefano Garzarella 
891e13a6915SStefano Garzarella 	return vhost_chr_write_iter(dev, from);
892e13a6915SStefano Garzarella }
893e13a6915SStefano Garzarella 
vhost_vsock_chr_poll(struct file * file,poll_table * wait)894e13a6915SStefano Garzarella static __poll_t vhost_vsock_chr_poll(struct file *file, poll_table *wait)
895e13a6915SStefano Garzarella {
896e13a6915SStefano Garzarella 	struct vhost_vsock *vsock = file->private_data;
897e13a6915SStefano Garzarella 	struct vhost_dev *dev = &vsock->dev;
898e13a6915SStefano Garzarella 
899e13a6915SStefano Garzarella 	return vhost_chr_poll(file, dev, wait);
900e13a6915SStefano Garzarella }
901e13a6915SStefano Garzarella 
902433fc58eSAsias He static const struct file_operations vhost_vsock_fops = {
903433fc58eSAsias He 	.owner          = THIS_MODULE,
904433fc58eSAsias He 	.open           = vhost_vsock_dev_open,
905433fc58eSAsias He 	.release        = vhost_vsock_dev_release,
906433fc58eSAsias He 	.llseek		= noop_llseek,
907433fc58eSAsias He 	.unlocked_ioctl = vhost_vsock_dev_ioctl,
908407e9ef7SArnd Bergmann 	.compat_ioctl   = compat_ptr_ioctl,
909e13a6915SStefano Garzarella 	.read_iter      = vhost_vsock_chr_read_iter,
910e13a6915SStefano Garzarella 	.write_iter     = vhost_vsock_chr_write_iter,
911e13a6915SStefano Garzarella 	.poll           = vhost_vsock_chr_poll,
912433fc58eSAsias He };
913433fc58eSAsias He 
914433fc58eSAsias He static struct miscdevice vhost_vsock_misc = {
915f4660cc9SStefan Hajnoczi 	.minor = VHOST_VSOCK_MINOR,
916433fc58eSAsias He 	.name = "vhost-vsock",
917433fc58eSAsias He 	.fops = &vhost_vsock_fops,
918433fc58eSAsias He };
919433fc58eSAsias He 
vhost_vsock_init(void)920433fc58eSAsias He static int __init vhost_vsock_init(void)
921433fc58eSAsias He {
922433fc58eSAsias He 	int ret;
923433fc58eSAsias He 
924c0cfa2d8SStefano Garzarella 	ret = vsock_core_register(&vhost_transport.transport,
925c0cfa2d8SStefano Garzarella 				  VSOCK_TRANSPORT_F_H2G);
926433fc58eSAsias He 	if (ret < 0)
927433fc58eSAsias He 		return ret;
9287a4efe18SYuan Can 
9297a4efe18SYuan Can 	ret = misc_register(&vhost_vsock_misc);
9307a4efe18SYuan Can 	if (ret) {
9317a4efe18SYuan Can 		vsock_core_unregister(&vhost_transport.transport);
9327a4efe18SYuan Can 		return ret;
9337a4efe18SYuan Can 	}
9347a4efe18SYuan Can 
9357a4efe18SYuan Can 	return 0;
936433fc58eSAsias He };
937433fc58eSAsias He 
vhost_vsock_exit(void)938433fc58eSAsias He static void __exit vhost_vsock_exit(void)
939433fc58eSAsias He {
940433fc58eSAsias He 	misc_deregister(&vhost_vsock_misc);
941c0cfa2d8SStefano Garzarella 	vsock_core_unregister(&vhost_transport.transport);
942433fc58eSAsias He };
943433fc58eSAsias He 
944433fc58eSAsias He module_init(vhost_vsock_init);
945433fc58eSAsias He module_exit(vhost_vsock_exit);
946433fc58eSAsias He MODULE_LICENSE("GPL v2");
947433fc58eSAsias He MODULE_AUTHOR("Asias He");
948433fc58eSAsias He MODULE_DESCRIPTION("vhost transport for vsock ");
949f4660cc9SStefan Hajnoczi MODULE_ALIAS_MISCDEV(VHOST_VSOCK_MINOR);
950f4660cc9SStefan Hajnoczi MODULE_ALIAS("devname:vhost-vsock");
951