xref: /openbmc/linux/drivers/vhost/vsock.c (revision 9e09d0ec)
17a338472SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2433fc58eSAsias He /*
3433fc58eSAsias He  * vhost transport for vsock
4433fc58eSAsias He  *
5433fc58eSAsias He  * Copyright (C) 2013-2015 Red Hat, Inc.
6433fc58eSAsias He  * Author: Asias He <asias@redhat.com>
7433fc58eSAsias He  *         Stefan Hajnoczi <stefanha@redhat.com>
8433fc58eSAsias He  */
9433fc58eSAsias He #include <linux/miscdevice.h>
10433fc58eSAsias He #include <linux/atomic.h>
11433fc58eSAsias He #include <linux/module.h>
12433fc58eSAsias He #include <linux/mutex.h>
13433fc58eSAsias He #include <linux/vmalloc.h>
14433fc58eSAsias He #include <net/sock.h>
15433fc58eSAsias He #include <linux/virtio_vsock.h>
16433fc58eSAsias He #include <linux/vhost.h>
17834e772cSStefan Hajnoczi #include <linux/hashtable.h>
18433fc58eSAsias He 
19433fc58eSAsias He #include <net/af_vsock.h>
20433fc58eSAsias He #include "vhost.h"
21433fc58eSAsias He 
22433fc58eSAsias He #define VHOST_VSOCK_DEFAULT_HOST_CID	2
23e82b9b07SJason Wang /* Max number of bytes transferred before requeueing the job.
24e82b9b07SJason Wang  * Using this limit prevents one virtqueue from starving others. */
25e82b9b07SJason Wang #define VHOST_VSOCK_WEIGHT 0x80000
26e82b9b07SJason Wang /* Max number of packets transferred before requeueing the job.
27e82b9b07SJason Wang  * Using this limit prevents one virtqueue from starving others with
28e82b9b07SJason Wang  * small pkts.
29e82b9b07SJason Wang  */
30e82b9b07SJason Wang #define VHOST_VSOCK_PKT_WEIGHT 256
31433fc58eSAsias He 
32433fc58eSAsias He enum {
33e13a6915SStefano Garzarella 	VHOST_VSOCK_FEATURES = VHOST_FEATURES |
34ced7b713SArseny Krasnov 			       (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
35ced7b713SArseny Krasnov 			       (1ULL << VIRTIO_VSOCK_F_SEQPACKET)
36e13a6915SStefano Garzarella };
37e13a6915SStefano Garzarella 
38e13a6915SStefano Garzarella enum {
39e13a6915SStefano Garzarella 	VHOST_VSOCK_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)
40433fc58eSAsias He };
41433fc58eSAsias He 
42433fc58eSAsias He /* Used to track all the vhost_vsock instances on the system. */
436db3d8dcSStefan Hajnoczi static DEFINE_MUTEX(vhost_vsock_mutex);
44834e772cSStefan Hajnoczi static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8);
45433fc58eSAsias He 
46433fc58eSAsias He struct vhost_vsock {
47433fc58eSAsias He 	struct vhost_dev dev;
48433fc58eSAsias He 	struct vhost_virtqueue vqs[2];
49433fc58eSAsias He 
506db3d8dcSStefan Hajnoczi 	/* Link to global vhost_vsock_hash, writes use vhost_vsock_mutex */
51834e772cSStefan Hajnoczi 	struct hlist_node hash;
52433fc58eSAsias He 
53433fc58eSAsias He 	struct vhost_work send_pkt_work;
5471dc9ec9SBobby Eshleman 	struct sk_buff_head send_pkt_queue; /* host->guest pending packets */
55433fc58eSAsias He 
56433fc58eSAsias He 	atomic_t queued_replies;
57433fc58eSAsias He 
58433fc58eSAsias He 	u32 guest_cid;
59ced7b713SArseny Krasnov 	bool seqpacket_allow;
60433fc58eSAsias He };
61433fc58eSAsias He 
62433fc58eSAsias He static u32 vhost_transport_get_local_cid(void)
63433fc58eSAsias He {
64433fc58eSAsias He 	return VHOST_VSOCK_DEFAULT_HOST_CID;
65433fc58eSAsias He }
66433fc58eSAsias He 
676db3d8dcSStefan Hajnoczi /* Callers that dereference the return value must hold vhost_vsock_mutex or the
68834e772cSStefan Hajnoczi  * RCU read lock.
69834e772cSStefan Hajnoczi  */
70834e772cSStefan Hajnoczi static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
71433fc58eSAsias He {
72433fc58eSAsias He 	struct vhost_vsock *vsock;
73433fc58eSAsias He 
74834e772cSStefan Hajnoczi 	hash_for_each_possible_rcu(vhost_vsock_hash, vsock, hash, guest_cid) {
75433fc58eSAsias He 		u32 other_cid = vsock->guest_cid;
76433fc58eSAsias He 
77433fc58eSAsias He 		/* Skip instances that have no CID yet */
78433fc58eSAsias He 		if (other_cid == 0)
79433fc58eSAsias He 			continue;
80433fc58eSAsias He 
81ff3c1b1aSVaibhav Murkute 		if (other_cid == guest_cid)
82433fc58eSAsias He 			return vsock;
83ff3c1b1aSVaibhav Murkute 
84433fc58eSAsias He 	}
85433fc58eSAsias He 
86433fc58eSAsias He 	return NULL;
87433fc58eSAsias He }
88433fc58eSAsias He 
89433fc58eSAsias He static void
90433fc58eSAsias He vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
91433fc58eSAsias He 			    struct vhost_virtqueue *vq)
92433fc58eSAsias He {
93433fc58eSAsias He 	struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
94e79b431fSJason Wang 	int pkts = 0, total_len = 0;
95433fc58eSAsias He 	bool added = false;
96433fc58eSAsias He 	bool restart_tx = false;
97433fc58eSAsias He 
98433fc58eSAsias He 	mutex_lock(&vq->mutex);
99433fc58eSAsias He 
100247643f8SEugenio Pérez 	if (!vhost_vq_get_backend(vq))
101433fc58eSAsias He 		goto out;
102433fc58eSAsias He 
103e13a6915SStefano Garzarella 	if (!vq_meta_prefetch(vq))
104e13a6915SStefano Garzarella 		goto out;
105e13a6915SStefano Garzarella 
106433fc58eSAsias He 	/* Avoid further vmexits, we're already processing the virtqueue */
107433fc58eSAsias He 	vhost_disable_notify(&vsock->dev, vq);
108433fc58eSAsias He 
109e79b431fSJason Wang 	do {
11071dc9ec9SBobby Eshleman 		struct virtio_vsock_hdr *hdr;
11171dc9ec9SBobby Eshleman 		size_t iov_len, payload_len;
112433fc58eSAsias He 		struct iov_iter iov_iter;
11371dc9ec9SBobby Eshleman 		u32 flags_to_restore = 0;
11471dc9ec9SBobby Eshleman 		struct sk_buff *skb;
115433fc58eSAsias He 		unsigned out, in;
116433fc58eSAsias He 		size_t nbytes;
117433fc58eSAsias He 		int head;
118433fc58eSAsias He 
11971dc9ec9SBobby Eshleman 		skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue);
12071dc9ec9SBobby Eshleman 
12171dc9ec9SBobby Eshleman 		if (!skb) {
122433fc58eSAsias He 			vhost_enable_notify(&vsock->dev, vq);
123433fc58eSAsias He 			break;
124433fc58eSAsias He 		}
125433fc58eSAsias He 
126433fc58eSAsias He 		head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
127433fc58eSAsias He 					 &out, &in, NULL, NULL);
128433fc58eSAsias He 		if (head < 0) {
12971dc9ec9SBobby Eshleman 			virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb);
130433fc58eSAsias He 			break;
131433fc58eSAsias He 		}
132433fc58eSAsias He 
133433fc58eSAsias He 		if (head == vq->num) {
13471dc9ec9SBobby Eshleman 			virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb);
135433fc58eSAsias He 			/* We cannot finish yet if more buffers snuck in while
136433fc58eSAsias He 			 * re-enabling notify.
137433fc58eSAsias He 			 */
138433fc58eSAsias He 			if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
139433fc58eSAsias He 				vhost_disable_notify(&vsock->dev, vq);
140433fc58eSAsias He 				continue;
141433fc58eSAsias He 			}
142433fc58eSAsias He 			break;
143433fc58eSAsias He 		}
144433fc58eSAsias He 
145433fc58eSAsias He 		if (out) {
14671dc9ec9SBobby Eshleman 			kfree_skb(skb);
147433fc58eSAsias He 			vq_err(vq, "Expected 0 output buffers, got %u\n", out);
148433fc58eSAsias He 			break;
149433fc58eSAsias He 		}
150433fc58eSAsias He 
1516dbd3e66SStefano Garzarella 		iov_len = iov_length(&vq->iov[out], in);
15271dc9ec9SBobby Eshleman 		if (iov_len < sizeof(*hdr)) {
15371dc9ec9SBobby Eshleman 			kfree_skb(skb);
1546dbd3e66SStefano Garzarella 			vq_err(vq, "Buffer len [%zu] too small\n", iov_len);
1556dbd3e66SStefano Garzarella 			break;
1566dbd3e66SStefano Garzarella 		}
1576dbd3e66SStefano Garzarella 
158de4eda9dSAl Viro 		iov_iter_init(&iov_iter, ITER_DEST, &vq->iov[out], in, iov_len);
15971dc9ec9SBobby Eshleman 		payload_len = skb->len;
16071dc9ec9SBobby Eshleman 		hdr = virtio_vsock_hdr(skb);
1616dbd3e66SStefano Garzarella 
1626dbd3e66SStefano Garzarella 		/* If the packet is greater than the space available in the
1636dbd3e66SStefano Garzarella 		 * buffer, we split it using multiple buffers.
1646dbd3e66SStefano Garzarella 		 */
16571dc9ec9SBobby Eshleman 		if (payload_len > iov_len - sizeof(*hdr)) {
16671dc9ec9SBobby Eshleman 			payload_len = iov_len - sizeof(*hdr);
1676dbd3e66SStefano Garzarella 
168ced7b713SArseny Krasnov 			/* As we are copying pieces of large packet's buffer to
169ced7b713SArseny Krasnov 			 * small rx buffers, headers of packets in rx queue are
170ced7b713SArseny Krasnov 			 * created dynamically and are initialized with header
171ced7b713SArseny Krasnov 			 * of current packet(except length). But in case of
1729af8f106SArseny Krasnov 			 * SOCK_SEQPACKET, we also must clear message delimeter
1731af7e555SArseny Krasnov 			 * bit (VIRTIO_VSOCK_SEQ_EOM) and MSG_EOR bit
1741af7e555SArseny Krasnov 			 * (VIRTIO_VSOCK_SEQ_EOR) if set. Otherwise,
1751af7e555SArseny Krasnov 			 * there will be sequence of packets with these
1761af7e555SArseny Krasnov 			 * bits set. After initialized header will be copied to
1771af7e555SArseny Krasnov 			 * rx buffer, these required bits will be restored.
178ced7b713SArseny Krasnov 			 */
17971dc9ec9SBobby Eshleman 			if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) {
18071dc9ec9SBobby Eshleman 				hdr->flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
1811af7e555SArseny Krasnov 				flags_to_restore |= VIRTIO_VSOCK_SEQ_EOM;
1821af7e555SArseny Krasnov 
18371dc9ec9SBobby Eshleman 				if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR) {
18471dc9ec9SBobby Eshleman 					hdr->flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
1851af7e555SArseny Krasnov 					flags_to_restore |= VIRTIO_VSOCK_SEQ_EOR;
1861af7e555SArseny Krasnov 				}
187ced7b713SArseny Krasnov 			}
188ced7b713SArseny Krasnov 		}
189ced7b713SArseny Krasnov 
1906dbd3e66SStefano Garzarella 		/* Set the correct length in the header */
19171dc9ec9SBobby Eshleman 		hdr->len = cpu_to_le32(payload_len);
192433fc58eSAsias He 
19371dc9ec9SBobby Eshleman 		nbytes = copy_to_iter(hdr, sizeof(*hdr), &iov_iter);
19471dc9ec9SBobby Eshleman 		if (nbytes != sizeof(*hdr)) {
19571dc9ec9SBobby Eshleman 			kfree_skb(skb);
196433fc58eSAsias He 			vq_err(vq, "Faulted on copying pkt hdr\n");
197433fc58eSAsias He 			break;
198433fc58eSAsias He 		}
199433fc58eSAsias He 
20071dc9ec9SBobby Eshleman 		nbytes = copy_to_iter(skb->data, payload_len, &iov_iter);
2016dbd3e66SStefano Garzarella 		if (nbytes != payload_len) {
20271dc9ec9SBobby Eshleman 			kfree_skb(skb);
203433fc58eSAsias He 			vq_err(vq, "Faulted on copying pkt buf\n");
204433fc58eSAsias He 			break;
205433fc58eSAsias He 		}
206433fc58eSAsias He 
207107bc076SStefano Garzarella 		/* Deliver to monitoring devices all packets that we
208107bc076SStefano Garzarella 		 * will transmit.
20982dfb540SGerard Garcia 		 */
21071dc9ec9SBobby Eshleman 		virtio_transport_deliver_tap_pkt(skb);
21182dfb540SGerard Garcia 
21271dc9ec9SBobby Eshleman 		vhost_add_used(vq, head, sizeof(*hdr) + payload_len);
213107bc076SStefano Garzarella 		added = true;
214107bc076SStefano Garzarella 
21571dc9ec9SBobby Eshleman 		skb_pull(skb, payload_len);
2166dbd3e66SStefano Garzarella 		total_len += payload_len;
2176dbd3e66SStefano Garzarella 
2186dbd3e66SStefano Garzarella 		/* If we didn't send all the payload we can requeue the packet
2196dbd3e66SStefano Garzarella 		 * to send it with the next available buffer.
2206dbd3e66SStefano Garzarella 		 */
22171dc9ec9SBobby Eshleman 		if (skb->len > 0) {
22271dc9ec9SBobby Eshleman 			hdr->flags |= cpu_to_le32(flags_to_restore);
223ced7b713SArseny Krasnov 
22471dc9ec9SBobby Eshleman 			/* We are queueing the same skb to handle
225a78d1639SStefano Garzarella 			 * the remaining bytes, and we want to deliver it
226a78d1639SStefano Garzarella 			 * to monitoring devices in the next iteration.
227a78d1639SStefano Garzarella 			 */
22871dc9ec9SBobby Eshleman 			virtio_vsock_skb_clear_tap_delivered(skb);
22971dc9ec9SBobby Eshleman 			virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb);
2306dbd3e66SStefano Garzarella 		} else {
23171dc9ec9SBobby Eshleman 			if (virtio_vsock_skb_reply(skb)) {
2326dbd3e66SStefano Garzarella 				int val;
2336dbd3e66SStefano Garzarella 
2346dbd3e66SStefano Garzarella 				val = atomic_dec_return(&vsock->queued_replies);
2356dbd3e66SStefano Garzarella 
2366dbd3e66SStefano Garzarella 				/* Do we have resources to resume tx
2376dbd3e66SStefano Garzarella 				 * processing?
2386dbd3e66SStefano Garzarella 				 */
2396dbd3e66SStefano Garzarella 				if (val + 1 == tx_vq->num)
2406dbd3e66SStefano Garzarella 					restart_tx = true;
2416dbd3e66SStefano Garzarella 			}
2426dbd3e66SStefano Garzarella 
24371dc9ec9SBobby Eshleman 			consume_skb(skb);
2446dbd3e66SStefano Garzarella 		}
245e79b431fSJason Wang 	} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
246433fc58eSAsias He 	if (added)
247433fc58eSAsias He 		vhost_signal(&vsock->dev, vq);
248433fc58eSAsias He 
249433fc58eSAsias He out:
250433fc58eSAsias He 	mutex_unlock(&vq->mutex);
251433fc58eSAsias He 
252433fc58eSAsias He 	if (restart_tx)
253433fc58eSAsias He 		vhost_poll_queue(&tx_vq->poll);
254433fc58eSAsias He }
255433fc58eSAsias He 
256433fc58eSAsias He static void vhost_transport_send_pkt_work(struct vhost_work *work)
257433fc58eSAsias He {
258433fc58eSAsias He 	struct vhost_virtqueue *vq;
259433fc58eSAsias He 	struct vhost_vsock *vsock;
260433fc58eSAsias He 
261433fc58eSAsias He 	vsock = container_of(work, struct vhost_vsock, send_pkt_work);
262433fc58eSAsias He 	vq = &vsock->vqs[VSOCK_VQ_RX];
263433fc58eSAsias He 
264433fc58eSAsias He 	vhost_transport_do_send_pkt(vsock, vq);
265433fc58eSAsias He }
266433fc58eSAsias He 
267433fc58eSAsias He static int
26871dc9ec9SBobby Eshleman vhost_transport_send_pkt(struct sk_buff *skb)
269433fc58eSAsias He {
27071dc9ec9SBobby Eshleman 	struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
271433fc58eSAsias He 	struct vhost_vsock *vsock;
27271dc9ec9SBobby Eshleman 	int len = skb->len;
273433fc58eSAsias He 
274834e772cSStefan Hajnoczi 	rcu_read_lock();
275834e772cSStefan Hajnoczi 
276433fc58eSAsias He 	/* Find the vhost_vsock according to guest context id  */
27771dc9ec9SBobby Eshleman 	vsock = vhost_vsock_get(le64_to_cpu(hdr->dst_cid));
278433fc58eSAsias He 	if (!vsock) {
279834e772cSStefan Hajnoczi 		rcu_read_unlock();
28071dc9ec9SBobby Eshleman 		kfree_skb(skb);
281433fc58eSAsias He 		return -ENODEV;
282433fc58eSAsias He 	}
283433fc58eSAsias He 
28471dc9ec9SBobby Eshleman 	if (virtio_vsock_skb_reply(skb))
285433fc58eSAsias He 		atomic_inc(&vsock->queued_replies);
286433fc58eSAsias He 
28771dc9ec9SBobby Eshleman 	virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb);
288*9e09d0ecSMike Christie 	vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work);
289834e772cSStefan Hajnoczi 
290834e772cSStefan Hajnoczi 	rcu_read_unlock();
291433fc58eSAsias He 	return len;
292433fc58eSAsias He }
293433fc58eSAsias He 
29416320f36SPeng Tao static int
29516320f36SPeng Tao vhost_transport_cancel_pkt(struct vsock_sock *vsk)
29616320f36SPeng Tao {
29716320f36SPeng Tao 	struct vhost_vsock *vsock;
29816320f36SPeng Tao 	int cnt = 0;
299834e772cSStefan Hajnoczi 	int ret = -ENODEV;
30016320f36SPeng Tao 
301834e772cSStefan Hajnoczi 	rcu_read_lock();
302834e772cSStefan Hajnoczi 
30316320f36SPeng Tao 	/* Find the vhost_vsock according to guest context id  */
30416320f36SPeng Tao 	vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
30516320f36SPeng Tao 	if (!vsock)
306834e772cSStefan Hajnoczi 		goto out;
30716320f36SPeng Tao 
30871dc9ec9SBobby Eshleman 	cnt = virtio_transport_purge_skbs(vsk, &vsock->send_pkt_queue);
30916320f36SPeng Tao 
31016320f36SPeng Tao 	if (cnt) {
31116320f36SPeng Tao 		struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
31216320f36SPeng Tao 		int new_cnt;
31316320f36SPeng Tao 
31416320f36SPeng Tao 		new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
31516320f36SPeng Tao 		if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
31616320f36SPeng Tao 			vhost_poll_queue(&tx_vq->poll);
31716320f36SPeng Tao 	}
31816320f36SPeng Tao 
319834e772cSStefan Hajnoczi 	ret = 0;
320834e772cSStefan Hajnoczi out:
321834e772cSStefan Hajnoczi 	rcu_read_unlock();
322834e772cSStefan Hajnoczi 	return ret;
32316320f36SPeng Tao }
32416320f36SPeng Tao 
32571dc9ec9SBobby Eshleman static struct sk_buff *
32671dc9ec9SBobby Eshleman vhost_vsock_alloc_skb(struct vhost_virtqueue *vq,
327433fc58eSAsias He 		      unsigned int out, unsigned int in)
328433fc58eSAsias He {
32971dc9ec9SBobby Eshleman 	struct virtio_vsock_hdr *hdr;
330433fc58eSAsias He 	struct iov_iter iov_iter;
33171dc9ec9SBobby Eshleman 	struct sk_buff *skb;
33271dc9ec9SBobby Eshleman 	size_t payload_len;
333433fc58eSAsias He 	size_t nbytes;
334433fc58eSAsias He 	size_t len;
335433fc58eSAsias He 
336433fc58eSAsias He 	if (in != 0) {
337433fc58eSAsias He 		vq_err(vq, "Expected 0 input buffers, got %u\n", in);
338433fc58eSAsias He 		return NULL;
339433fc58eSAsias He 	}
340433fc58eSAsias He 
34171dc9ec9SBobby Eshleman 	len = iov_length(vq->iov, out);
34271dc9ec9SBobby Eshleman 
34371dc9ec9SBobby Eshleman 	/* len contains both payload and hdr */
34471dc9ec9SBobby Eshleman 	skb = virtio_vsock_alloc_skb(len, GFP_KERNEL);
34571dc9ec9SBobby Eshleman 	if (!skb)
346433fc58eSAsias He 		return NULL;
347433fc58eSAsias He 
348de4eda9dSAl Viro 	iov_iter_init(&iov_iter, ITER_SOURCE, vq->iov, out, len);
349433fc58eSAsias He 
35071dc9ec9SBobby Eshleman 	hdr = virtio_vsock_hdr(skb);
35171dc9ec9SBobby Eshleman 	nbytes = copy_from_iter(hdr, sizeof(*hdr), &iov_iter);
35271dc9ec9SBobby Eshleman 	if (nbytes != sizeof(*hdr)) {
353433fc58eSAsias He 		vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n",
35471dc9ec9SBobby Eshleman 		       sizeof(*hdr), nbytes);
35571dc9ec9SBobby Eshleman 		kfree_skb(skb);
356433fc58eSAsias He 		return NULL;
357433fc58eSAsias He 	}
358433fc58eSAsias He 
35971dc9ec9SBobby Eshleman 	payload_len = le32_to_cpu(hdr->len);
360433fc58eSAsias He 
361433fc58eSAsias He 	/* No payload */
36271dc9ec9SBobby Eshleman 	if (!payload_len)
36371dc9ec9SBobby Eshleman 		return skb;
364433fc58eSAsias He 
36571dc9ec9SBobby Eshleman 	/* The pkt is too big or the length in the header is invalid */
36671dc9ec9SBobby Eshleman 	if (payload_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE ||
36771dc9ec9SBobby Eshleman 	    payload_len + sizeof(*hdr) > len) {
36871dc9ec9SBobby Eshleman 		kfree_skb(skb);
369433fc58eSAsias He 		return NULL;
370433fc58eSAsias He 	}
371433fc58eSAsias He 
37271dc9ec9SBobby Eshleman 	virtio_vsock_skb_rx_put(skb);
37371dc9ec9SBobby Eshleman 
37471dc9ec9SBobby Eshleman 	nbytes = copy_from_iter(skb->data, payload_len, &iov_iter);
37571dc9ec9SBobby Eshleman 	if (nbytes != payload_len) {
37671dc9ec9SBobby Eshleman 		vq_err(vq, "Expected %zu byte payload, got %zu bytes\n",
37771dc9ec9SBobby Eshleman 		       payload_len, nbytes);
37871dc9ec9SBobby Eshleman 		kfree_skb(skb);
379433fc58eSAsias He 		return NULL;
380433fc58eSAsias He 	}
381433fc58eSAsias He 
38271dc9ec9SBobby Eshleman 	return skb;
383433fc58eSAsias He }
384433fc58eSAsias He 
385433fc58eSAsias He /* Is there space left for replies to rx packets? */
386433fc58eSAsias He static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
387433fc58eSAsias He {
388433fc58eSAsias He 	struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX];
389433fc58eSAsias He 	int val;
390433fc58eSAsias He 
391433fc58eSAsias He 	smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */
392433fc58eSAsias He 	val = atomic_read(&vsock->queued_replies);
393433fc58eSAsias He 
394433fc58eSAsias He 	return val < vq->num;
395433fc58eSAsias He }
396433fc58eSAsias He 
397ced7b713SArseny Krasnov static bool vhost_transport_seqpacket_allow(u32 remote_cid);
398ced7b713SArseny Krasnov 
3994c7246dcSStefano Garzarella static struct virtio_transport vhost_transport = {
4004c7246dcSStefano Garzarella 	.transport = {
4016a2c0962SStefano Garzarella 		.module                   = THIS_MODULE,
4026a2c0962SStefano Garzarella 
4034c7246dcSStefano Garzarella 		.get_local_cid            = vhost_transport_get_local_cid,
4044c7246dcSStefano Garzarella 
4054c7246dcSStefano Garzarella 		.init                     = virtio_transport_do_socket_init,
4064c7246dcSStefano Garzarella 		.destruct                 = virtio_transport_destruct,
4074c7246dcSStefano Garzarella 		.release                  = virtio_transport_release,
4084c7246dcSStefano Garzarella 		.connect                  = virtio_transport_connect,
4094c7246dcSStefano Garzarella 		.shutdown                 = virtio_transport_shutdown,
4104c7246dcSStefano Garzarella 		.cancel_pkt               = vhost_transport_cancel_pkt,
4114c7246dcSStefano Garzarella 
4124c7246dcSStefano Garzarella 		.dgram_enqueue            = virtio_transport_dgram_enqueue,
4134c7246dcSStefano Garzarella 		.dgram_dequeue            = virtio_transport_dgram_dequeue,
4144c7246dcSStefano Garzarella 		.dgram_bind               = virtio_transport_dgram_bind,
4154c7246dcSStefano Garzarella 		.dgram_allow              = virtio_transport_dgram_allow,
4164c7246dcSStefano Garzarella 
4174c7246dcSStefano Garzarella 		.stream_enqueue           = virtio_transport_stream_enqueue,
4184c7246dcSStefano Garzarella 		.stream_dequeue           = virtio_transport_stream_dequeue,
4194c7246dcSStefano Garzarella 		.stream_has_data          = virtio_transport_stream_has_data,
4204c7246dcSStefano Garzarella 		.stream_has_space         = virtio_transport_stream_has_space,
4214c7246dcSStefano Garzarella 		.stream_rcvhiwat          = virtio_transport_stream_rcvhiwat,
4224c7246dcSStefano Garzarella 		.stream_is_active         = virtio_transport_stream_is_active,
4234c7246dcSStefano Garzarella 		.stream_allow             = virtio_transport_stream_allow,
4244c7246dcSStefano Garzarella 
425ced7b713SArseny Krasnov 		.seqpacket_dequeue        = virtio_transport_seqpacket_dequeue,
426ced7b713SArseny Krasnov 		.seqpacket_enqueue        = virtio_transport_seqpacket_enqueue,
427ced7b713SArseny Krasnov 		.seqpacket_allow          = vhost_transport_seqpacket_allow,
428ced7b713SArseny Krasnov 		.seqpacket_has_data       = virtio_transport_seqpacket_has_data,
429ced7b713SArseny Krasnov 
4304c7246dcSStefano Garzarella 		.notify_poll_in           = virtio_transport_notify_poll_in,
4314c7246dcSStefano Garzarella 		.notify_poll_out          = virtio_transport_notify_poll_out,
4324c7246dcSStefano Garzarella 		.notify_recv_init         = virtio_transport_notify_recv_init,
4334c7246dcSStefano Garzarella 		.notify_recv_pre_block    = virtio_transport_notify_recv_pre_block,
4344c7246dcSStefano Garzarella 		.notify_recv_pre_dequeue  = virtio_transport_notify_recv_pre_dequeue,
4354c7246dcSStefano Garzarella 		.notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
4364c7246dcSStefano Garzarella 		.notify_send_init         = virtio_transport_notify_send_init,
4374c7246dcSStefano Garzarella 		.notify_send_pre_block    = virtio_transport_notify_send_pre_block,
4384c7246dcSStefano Garzarella 		.notify_send_pre_enqueue  = virtio_transport_notify_send_pre_enqueue,
4394c7246dcSStefano Garzarella 		.notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
440b9f2b0ffSStefano Garzarella 		.notify_buffer_size       = virtio_transport_notify_buffer_size,
4414c7246dcSStefano Garzarella 
442634f1a71SBobby Eshleman 		.read_skb = virtio_transport_read_skb,
4434c7246dcSStefano Garzarella 	},
4444c7246dcSStefano Garzarella 
4454c7246dcSStefano Garzarella 	.send_pkt = vhost_transport_send_pkt,
4464c7246dcSStefano Garzarella };
4474c7246dcSStefano Garzarella 
448ced7b713SArseny Krasnov static bool vhost_transport_seqpacket_allow(u32 remote_cid)
449ced7b713SArseny Krasnov {
450ced7b713SArseny Krasnov 	struct vhost_vsock *vsock;
451ced7b713SArseny Krasnov 	bool seqpacket_allow = false;
452ced7b713SArseny Krasnov 
453ced7b713SArseny Krasnov 	rcu_read_lock();
454ced7b713SArseny Krasnov 	vsock = vhost_vsock_get(remote_cid);
455ced7b713SArseny Krasnov 
456ced7b713SArseny Krasnov 	if (vsock)
457ced7b713SArseny Krasnov 		seqpacket_allow = vsock->seqpacket_allow;
458ced7b713SArseny Krasnov 
459ced7b713SArseny Krasnov 	rcu_read_unlock();
460ced7b713SArseny Krasnov 
461ced7b713SArseny Krasnov 	return seqpacket_allow;
462ced7b713SArseny Krasnov }
463ced7b713SArseny Krasnov 
464433fc58eSAsias He static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
465433fc58eSAsias He {
466433fc58eSAsias He 	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
467433fc58eSAsias He 						  poll.work);
468433fc58eSAsias He 	struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
469433fc58eSAsias He 						 dev);
470e79b431fSJason Wang 	int head, pkts = 0, total_len = 0;
471433fc58eSAsias He 	unsigned int out, in;
47271dc9ec9SBobby Eshleman 	struct sk_buff *skb;
473433fc58eSAsias He 	bool added = false;
474433fc58eSAsias He 
475433fc58eSAsias He 	mutex_lock(&vq->mutex);
476433fc58eSAsias He 
477247643f8SEugenio Pérez 	if (!vhost_vq_get_backend(vq))
478433fc58eSAsias He 		goto out;
479433fc58eSAsias He 
480e13a6915SStefano Garzarella 	if (!vq_meta_prefetch(vq))
481e13a6915SStefano Garzarella 		goto out;
482e13a6915SStefano Garzarella 
483433fc58eSAsias He 	vhost_disable_notify(&vsock->dev, vq);
484e79b431fSJason Wang 	do {
48571dc9ec9SBobby Eshleman 		struct virtio_vsock_hdr *hdr;
48671dc9ec9SBobby Eshleman 
487433fc58eSAsias He 		if (!vhost_vsock_more_replies(vsock)) {
488433fc58eSAsias He 			/* Stop tx until the device processes already
489433fc58eSAsias He 			 * pending replies.  Leave tx virtqueue
490433fc58eSAsias He 			 * callbacks disabled.
491433fc58eSAsias He 			 */
492433fc58eSAsias He 			goto no_more_replies;
493433fc58eSAsias He 		}
494433fc58eSAsias He 
495433fc58eSAsias He 		head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
496433fc58eSAsias He 					 &out, &in, NULL, NULL);
497433fc58eSAsias He 		if (head < 0)
498433fc58eSAsias He 			break;
499433fc58eSAsias He 
500433fc58eSAsias He 		if (head == vq->num) {
501433fc58eSAsias He 			if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
502433fc58eSAsias He 				vhost_disable_notify(&vsock->dev, vq);
503433fc58eSAsias He 				continue;
504433fc58eSAsias He 			}
505433fc58eSAsias He 			break;
506433fc58eSAsias He 		}
507433fc58eSAsias He 
50871dc9ec9SBobby Eshleman 		skb = vhost_vsock_alloc_skb(vq, out, in);
50971dc9ec9SBobby Eshleman 		if (!skb) {
510433fc58eSAsias He 			vq_err(vq, "Faulted on pkt\n");
511433fc58eSAsias He 			continue;
512433fc58eSAsias He 		}
513433fc58eSAsias He 
51471dc9ec9SBobby Eshleman 		total_len += sizeof(*hdr) + skb->len;
5153fda5d6eSStefan Hajnoczi 
51682dfb540SGerard Garcia 		/* Deliver to monitoring devices all received packets */
51771dc9ec9SBobby Eshleman 		virtio_transport_deliver_tap_pkt(skb);
51871dc9ec9SBobby Eshleman 
51971dc9ec9SBobby Eshleman 		hdr = virtio_vsock_hdr(skb);
52082dfb540SGerard Garcia 
521433fc58eSAsias He 		/* Only accept correctly addressed packets */
52271dc9ec9SBobby Eshleman 		if (le64_to_cpu(hdr->src_cid) == vsock->guest_cid &&
52371dc9ec9SBobby Eshleman 		    le64_to_cpu(hdr->dst_cid) ==
5248a3cc29cSStefano Garzarella 		    vhost_transport_get_local_cid())
52571dc9ec9SBobby Eshleman 			virtio_transport_recv_pkt(&vhost_transport, skb);
526433fc58eSAsias He 		else
52771dc9ec9SBobby Eshleman 			kfree_skb(skb);
528433fc58eSAsias He 
52949d8c5ffSStefano Garzarella 		vhost_add_used(vq, head, 0);
530433fc58eSAsias He 		added = true;
531e79b431fSJason Wang 	} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
532433fc58eSAsias He 
533433fc58eSAsias He no_more_replies:
534433fc58eSAsias He 	if (added)
535433fc58eSAsias He 		vhost_signal(&vsock->dev, vq);
536433fc58eSAsias He 
537433fc58eSAsias He out:
538433fc58eSAsias He 	mutex_unlock(&vq->mutex);
539433fc58eSAsias He }
540433fc58eSAsias He 
541433fc58eSAsias He static void vhost_vsock_handle_rx_kick(struct vhost_work *work)
542433fc58eSAsias He {
543433fc58eSAsias He 	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
544433fc58eSAsias He 						poll.work);
545433fc58eSAsias He 	struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
546433fc58eSAsias He 						 dev);
547433fc58eSAsias He 
548433fc58eSAsias He 	vhost_transport_do_send_pkt(vsock, vq);
549433fc58eSAsias He }
550433fc58eSAsias He 
551433fc58eSAsias He static int vhost_vsock_start(struct vhost_vsock *vsock)
552433fc58eSAsias He {
5530516ffd8SStefan Hajnoczi 	struct vhost_virtqueue *vq;
554433fc58eSAsias He 	size_t i;
555433fc58eSAsias He 	int ret;
556433fc58eSAsias He 
557433fc58eSAsias He 	mutex_lock(&vsock->dev.mutex);
558433fc58eSAsias He 
559433fc58eSAsias He 	ret = vhost_dev_check_owner(&vsock->dev);
560433fc58eSAsias He 	if (ret)
561433fc58eSAsias He 		goto err;
562433fc58eSAsias He 
563433fc58eSAsias He 	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
5640516ffd8SStefan Hajnoczi 		vq = &vsock->vqs[i];
565433fc58eSAsias He 
566433fc58eSAsias He 		mutex_lock(&vq->mutex);
567433fc58eSAsias He 
568433fc58eSAsias He 		if (!vhost_vq_access_ok(vq)) {
569433fc58eSAsias He 			ret = -EFAULT;
570433fc58eSAsias He 			goto err_vq;
571433fc58eSAsias He 		}
572433fc58eSAsias He 
573247643f8SEugenio Pérez 		if (!vhost_vq_get_backend(vq)) {
574247643f8SEugenio Pérez 			vhost_vq_set_backend(vq, vsock);
5750516ffd8SStefan Hajnoczi 			ret = vhost_vq_init_access(vq);
5760516ffd8SStefan Hajnoczi 			if (ret)
5770516ffd8SStefan Hajnoczi 				goto err_vq;
578433fc58eSAsias He 		}
579433fc58eSAsias He 
580433fc58eSAsias He 		mutex_unlock(&vq->mutex);
581433fc58eSAsias He 	}
582433fc58eSAsias He 
5830b841030SJia He 	/* Some packets may have been queued before the device was started,
5840b841030SJia He 	 * let's kick the send worker to send them.
5850b841030SJia He 	 */
586*9e09d0ecSMike Christie 	vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work);
5870b841030SJia He 
588433fc58eSAsias He 	mutex_unlock(&vsock->dev.mutex);
589433fc58eSAsias He 	return 0;
590433fc58eSAsias He 
591433fc58eSAsias He err_vq:
592247643f8SEugenio Pérez 	vhost_vq_set_backend(vq, NULL);
5930516ffd8SStefan Hajnoczi 	mutex_unlock(&vq->mutex);
5940516ffd8SStefan Hajnoczi 
595433fc58eSAsias He 	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
5960516ffd8SStefan Hajnoczi 		vq = &vsock->vqs[i];
597433fc58eSAsias He 
598433fc58eSAsias He 		mutex_lock(&vq->mutex);
599247643f8SEugenio Pérez 		vhost_vq_set_backend(vq, NULL);
600433fc58eSAsias He 		mutex_unlock(&vq->mutex);
601433fc58eSAsias He 	}
602433fc58eSAsias He err:
603433fc58eSAsias He 	mutex_unlock(&vsock->dev.mutex);
604433fc58eSAsias He 	return ret;
605433fc58eSAsias He }
606433fc58eSAsias He 
607a58da53fSStefano Garzarella static int vhost_vsock_stop(struct vhost_vsock *vsock, bool check_owner)
608433fc58eSAsias He {
609433fc58eSAsias He 	size_t i;
610a58da53fSStefano Garzarella 	int ret = 0;
611433fc58eSAsias He 
612433fc58eSAsias He 	mutex_lock(&vsock->dev.mutex);
613433fc58eSAsias He 
614a58da53fSStefano Garzarella 	if (check_owner) {
615433fc58eSAsias He 		ret = vhost_dev_check_owner(&vsock->dev);
616433fc58eSAsias He 		if (ret)
617433fc58eSAsias He 			goto err;
618a58da53fSStefano Garzarella 	}
619433fc58eSAsias He 
620433fc58eSAsias He 	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
621433fc58eSAsias He 		struct vhost_virtqueue *vq = &vsock->vqs[i];
622433fc58eSAsias He 
623433fc58eSAsias He 		mutex_lock(&vq->mutex);
624247643f8SEugenio Pérez 		vhost_vq_set_backend(vq, NULL);
625433fc58eSAsias He 		mutex_unlock(&vq->mutex);
626433fc58eSAsias He 	}
627433fc58eSAsias He 
628433fc58eSAsias He err:
629433fc58eSAsias He 	mutex_unlock(&vsock->dev.mutex);
630433fc58eSAsias He 	return ret;
631433fc58eSAsias He }
632433fc58eSAsias He 
633433fc58eSAsias He static void vhost_vsock_free(struct vhost_vsock *vsock)
634433fc58eSAsias He {
635b226acabSWei Yongjun 	kvfree(vsock);
636433fc58eSAsias He }
637433fc58eSAsias He 
638433fc58eSAsias He static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
639433fc58eSAsias He {
640433fc58eSAsias He 	struct vhost_virtqueue **vqs;
641433fc58eSAsias He 	struct vhost_vsock *vsock;
642433fc58eSAsias He 	int ret;
643433fc58eSAsias He 
644433fc58eSAsias He 	/* This struct is large and allocation could fail, fall back to vmalloc
645433fc58eSAsias He 	 * if there is no other way.
646433fc58eSAsias He 	 */
647dcda9b04SMichal Hocko 	vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
648433fc58eSAsias He 	if (!vsock)
649433fc58eSAsias He 		return -ENOMEM;
650433fc58eSAsias He 
651433fc58eSAsias He 	vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL);
652433fc58eSAsias He 	if (!vqs) {
653433fc58eSAsias He 		ret = -ENOMEM;
654433fc58eSAsias He 		goto out;
655433fc58eSAsias He 	}
656433fc58eSAsias He 
657a72b69dcSStefan Hajnoczi 	vsock->guest_cid = 0; /* no CID assigned yet */
658a72b69dcSStefan Hajnoczi 
659433fc58eSAsias He 	atomic_set(&vsock->queued_replies, 0);
660433fc58eSAsias He 
661433fc58eSAsias He 	vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX];
662433fc58eSAsias He 	vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX];
663433fc58eSAsias He 	vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
664433fc58eSAsias He 	vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
665433fc58eSAsias He 
666e82b9b07SJason Wang 	vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs),
667e82b9b07SJason Wang 		       UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT,
66801fcb1cbSJason Wang 		       VHOST_VSOCK_WEIGHT, true, NULL);
669433fc58eSAsias He 
670433fc58eSAsias He 	file->private_data = vsock;
67171dc9ec9SBobby Eshleman 	skb_queue_head_init(&vsock->send_pkt_queue);
672433fc58eSAsias He 	vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work);
673433fc58eSAsias He 	return 0;
674433fc58eSAsias He 
675433fc58eSAsias He out:
676433fc58eSAsias He 	vhost_vsock_free(vsock);
677433fc58eSAsias He 	return ret;
678433fc58eSAsias He }
679433fc58eSAsias He 
680433fc58eSAsias He static void vhost_vsock_flush(struct vhost_vsock *vsock)
681433fc58eSAsias He {
682b2ffa407SMike Christie 	vhost_dev_flush(&vsock->dev);
683433fc58eSAsias He }
684433fc58eSAsias He 
685433fc58eSAsias He static void vhost_vsock_reset_orphans(struct sock *sk)
686433fc58eSAsias He {
687433fc58eSAsias He 	struct vsock_sock *vsk = vsock_sk(sk);
688433fc58eSAsias He 
689433fc58eSAsias He 	/* vmci_transport.c doesn't take sk_lock here either.  At least we're
690433fc58eSAsias He 	 * under vsock_table_lock so the sock cannot disappear while we're
691433fc58eSAsias He 	 * executing.
692433fc58eSAsias He 	 */
693433fc58eSAsias He 
694c38f57daSStefan Hajnoczi 	/* If the peer is still valid, no need to reset connection */
695c38f57daSStefan Hajnoczi 	if (vhost_vsock_get(vsk->remote_addr.svm_cid))
696c38f57daSStefan Hajnoczi 		return;
697c38f57daSStefan Hajnoczi 
698c38f57daSStefan Hajnoczi 	/* If the close timeout is pending, let it expire.  This avoids races
699c38f57daSStefan Hajnoczi 	 * with the timeout callback.
700c38f57daSStefan Hajnoczi 	 */
701c38f57daSStefan Hajnoczi 	if (vsk->close_work_scheduled)
702c38f57daSStefan Hajnoczi 		return;
703c38f57daSStefan Hajnoczi 
704433fc58eSAsias He 	sock_set_flag(sk, SOCK_DONE);
705433fc58eSAsias He 	vsk->peer_shutdown = SHUTDOWN_MASK;
706433fc58eSAsias He 	sk->sk_state = SS_UNCONNECTED;
707433fc58eSAsias He 	sk->sk_err = ECONNRESET;
708e3ae2365SAlexander Aring 	sk_error_report(sk);
709433fc58eSAsias He }
710433fc58eSAsias He 
711433fc58eSAsias He static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
712433fc58eSAsias He {
713433fc58eSAsias He 	struct vhost_vsock *vsock = file->private_data;
714433fc58eSAsias He 
7156db3d8dcSStefan Hajnoczi 	mutex_lock(&vhost_vsock_mutex);
716834e772cSStefan Hajnoczi 	if (vsock->guest_cid)
717834e772cSStefan Hajnoczi 		hash_del_rcu(&vsock->hash);
7186db3d8dcSStefan Hajnoczi 	mutex_unlock(&vhost_vsock_mutex);
719433fc58eSAsias He 
720834e772cSStefan Hajnoczi 	/* Wait for other CPUs to finish using vsock */
721834e772cSStefan Hajnoczi 	synchronize_rcu();
722834e772cSStefan Hajnoczi 
723433fc58eSAsias He 	/* Iterating over all connections for all CIDs to find orphans is
724433fc58eSAsias He 	 * inefficient.  Room for improvement here. */
7258e6ed963SJiyong Park 	vsock_for_each_connected_socket(&vhost_transport.transport,
7268e6ed963SJiyong Park 					vhost_vsock_reset_orphans);
727433fc58eSAsias He 
728a58da53fSStefano Garzarella 	/* Don't check the owner, because we are in the release path, so we
729a58da53fSStefano Garzarella 	 * need to stop the vsock device in any case.
730a58da53fSStefano Garzarella 	 * vhost_vsock_stop() can not fail in this case, so we don't need to
731a58da53fSStefano Garzarella 	 * check the return code.
732a58da53fSStefano Garzarella 	 */
733a58da53fSStefano Garzarella 	vhost_vsock_stop(vsock, false);
734433fc58eSAsias He 	vhost_vsock_flush(vsock);
735433fc58eSAsias He 	vhost_dev_stop(&vsock->dev);
736433fc58eSAsias He 
73771dc9ec9SBobby Eshleman 	virtio_vsock_skb_queue_purge(&vsock->send_pkt_queue);
738433fc58eSAsias He 
739f6f93f75S夷则(Caspar) 	vhost_dev_cleanup(&vsock->dev);
740433fc58eSAsias He 	kfree(vsock->dev.vqs);
741433fc58eSAsias He 	vhost_vsock_free(vsock);
742433fc58eSAsias He 	return 0;
743433fc58eSAsias He }
744433fc58eSAsias He 
745433fc58eSAsias He static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
746433fc58eSAsias He {
747433fc58eSAsias He 	struct vhost_vsock *other;
748433fc58eSAsias He 
749433fc58eSAsias He 	/* Refuse reserved CIDs */
750433fc58eSAsias He 	if (guest_cid <= VMADDR_CID_HOST ||
751433fc58eSAsias He 	    guest_cid == U32_MAX)
752433fc58eSAsias He 		return -EINVAL;
753433fc58eSAsias He 
754433fc58eSAsias He 	/* 64-bit CIDs are not yet supported */
755433fc58eSAsias He 	if (guest_cid > U32_MAX)
756433fc58eSAsias He 		return -EINVAL;
757433fc58eSAsias He 
758ed8640a9SStefano Garzarella 	/* Refuse if CID is assigned to the guest->host transport (i.e. nested
759ed8640a9SStefano Garzarella 	 * VM), to make the loopback work.
760ed8640a9SStefano Garzarella 	 */
761ed8640a9SStefano Garzarella 	if (vsock_find_cid(guest_cid))
762ed8640a9SStefano Garzarella 		return -EADDRINUSE;
763ed8640a9SStefano Garzarella 
764433fc58eSAsias He 	/* Refuse if CID is already in use */
7656db3d8dcSStefan Hajnoczi 	mutex_lock(&vhost_vsock_mutex);
766834e772cSStefan Hajnoczi 	other = vhost_vsock_get(guest_cid);
7676c083c2bSGao feng 	if (other && other != vsock) {
7686db3d8dcSStefan Hajnoczi 		mutex_unlock(&vhost_vsock_mutex);
7696c083c2bSGao feng 		return -EADDRINUSE;
7706c083c2bSGao feng 	}
771834e772cSStefan Hajnoczi 
772834e772cSStefan Hajnoczi 	if (vsock->guest_cid)
773834e772cSStefan Hajnoczi 		hash_del_rcu(&vsock->hash);
774834e772cSStefan Hajnoczi 
775433fc58eSAsias He 	vsock->guest_cid = guest_cid;
7767fbe078cSZha Bin 	hash_add_rcu(vhost_vsock_hash, &vsock->hash, vsock->guest_cid);
7776db3d8dcSStefan Hajnoczi 	mutex_unlock(&vhost_vsock_mutex);
778433fc58eSAsias He 
779433fc58eSAsias He 	return 0;
780433fc58eSAsias He }
781433fc58eSAsias He 
782433fc58eSAsias He static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
783433fc58eSAsias He {
784433fc58eSAsias He 	struct vhost_virtqueue *vq;
785433fc58eSAsias He 	int i;
786433fc58eSAsias He 
787433fc58eSAsias He 	if (features & ~VHOST_VSOCK_FEATURES)
788433fc58eSAsias He 		return -EOPNOTSUPP;
789433fc58eSAsias He 
790433fc58eSAsias He 	mutex_lock(&vsock->dev.mutex);
791433fc58eSAsias He 	if ((features & (1 << VHOST_F_LOG_ALL)) &&
792433fc58eSAsias He 	    !vhost_log_access_ok(&vsock->dev)) {
793e13a6915SStefano Garzarella 		goto err;
794e13a6915SStefano Garzarella 	}
795e13a6915SStefano Garzarella 
796e13a6915SStefano Garzarella 	if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) {
797759aba1eSLiming Wu 		if (vhost_init_device_iotlb(&vsock->dev))
798e13a6915SStefano Garzarella 			goto err;
799433fc58eSAsias He 	}
800433fc58eSAsias He 
801ced7b713SArseny Krasnov 	if (features & (1ULL << VIRTIO_VSOCK_F_SEQPACKET))
802ced7b713SArseny Krasnov 		vsock->seqpacket_allow = true;
803ced7b713SArseny Krasnov 
804433fc58eSAsias He 	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
805433fc58eSAsias He 		vq = &vsock->vqs[i];
806433fc58eSAsias He 		mutex_lock(&vq->mutex);
807433fc58eSAsias He 		vq->acked_features = features;
808433fc58eSAsias He 		mutex_unlock(&vq->mutex);
809433fc58eSAsias He 	}
810433fc58eSAsias He 	mutex_unlock(&vsock->dev.mutex);
811433fc58eSAsias He 	return 0;
812e13a6915SStefano Garzarella 
813e13a6915SStefano Garzarella err:
814e13a6915SStefano Garzarella 	mutex_unlock(&vsock->dev.mutex);
815e13a6915SStefano Garzarella 	return -EFAULT;
816433fc58eSAsias He }
817433fc58eSAsias He 
818433fc58eSAsias He static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
819433fc58eSAsias He 				  unsigned long arg)
820433fc58eSAsias He {
821433fc58eSAsias He 	struct vhost_vsock *vsock = f->private_data;
822433fc58eSAsias He 	void __user *argp = (void __user *)arg;
823433fc58eSAsias He 	u64 guest_cid;
824433fc58eSAsias He 	u64 features;
825433fc58eSAsias He 	int start;
826433fc58eSAsias He 	int r;
827433fc58eSAsias He 
828433fc58eSAsias He 	switch (ioctl) {
829433fc58eSAsias He 	case VHOST_VSOCK_SET_GUEST_CID:
830433fc58eSAsias He 		if (copy_from_user(&guest_cid, argp, sizeof(guest_cid)))
831433fc58eSAsias He 			return -EFAULT;
832433fc58eSAsias He 		return vhost_vsock_set_cid(vsock, guest_cid);
833433fc58eSAsias He 	case VHOST_VSOCK_SET_RUNNING:
834433fc58eSAsias He 		if (copy_from_user(&start, argp, sizeof(start)))
835433fc58eSAsias He 			return -EFAULT;
836433fc58eSAsias He 		if (start)
837433fc58eSAsias He 			return vhost_vsock_start(vsock);
838433fc58eSAsias He 		else
839a58da53fSStefano Garzarella 			return vhost_vsock_stop(vsock, true);
840433fc58eSAsias He 	case VHOST_GET_FEATURES:
841433fc58eSAsias He 		features = VHOST_VSOCK_FEATURES;
842433fc58eSAsias He 		if (copy_to_user(argp, &features, sizeof(features)))
843433fc58eSAsias He 			return -EFAULT;
844433fc58eSAsias He 		return 0;
845433fc58eSAsias He 	case VHOST_SET_FEATURES:
846433fc58eSAsias He 		if (copy_from_user(&features, argp, sizeof(features)))
847433fc58eSAsias He 			return -EFAULT;
848433fc58eSAsias He 		return vhost_vsock_set_features(vsock, features);
849e13a6915SStefano Garzarella 	case VHOST_GET_BACKEND_FEATURES:
850e13a6915SStefano Garzarella 		features = VHOST_VSOCK_BACKEND_FEATURES;
851e13a6915SStefano Garzarella 		if (copy_to_user(argp, &features, sizeof(features)))
852e13a6915SStefano Garzarella 			return -EFAULT;
853e13a6915SStefano Garzarella 		return 0;
854e13a6915SStefano Garzarella 	case VHOST_SET_BACKEND_FEATURES:
855e13a6915SStefano Garzarella 		if (copy_from_user(&features, argp, sizeof(features)))
856e13a6915SStefano Garzarella 			return -EFAULT;
857e13a6915SStefano Garzarella 		if (features & ~VHOST_VSOCK_BACKEND_FEATURES)
858e13a6915SStefano Garzarella 			return -EOPNOTSUPP;
859e13a6915SStefano Garzarella 		vhost_set_backend_features(&vsock->dev, features);
860e13a6915SStefano Garzarella 		return 0;
861433fc58eSAsias He 	default:
862433fc58eSAsias He 		mutex_lock(&vsock->dev.mutex);
863433fc58eSAsias He 		r = vhost_dev_ioctl(&vsock->dev, ioctl, argp);
864433fc58eSAsias He 		if (r == -ENOIOCTLCMD)
865433fc58eSAsias He 			r = vhost_vring_ioctl(&vsock->dev, ioctl, argp);
866433fc58eSAsias He 		else
867433fc58eSAsias He 			vhost_vsock_flush(vsock);
868433fc58eSAsias He 		mutex_unlock(&vsock->dev.mutex);
869433fc58eSAsias He 		return r;
870433fc58eSAsias He 	}
871433fc58eSAsias He }
872433fc58eSAsias He 
873e13a6915SStefano Garzarella static ssize_t vhost_vsock_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
874e13a6915SStefano Garzarella {
875e13a6915SStefano Garzarella 	struct file *file = iocb->ki_filp;
876e13a6915SStefano Garzarella 	struct vhost_vsock *vsock = file->private_data;
877e13a6915SStefano Garzarella 	struct vhost_dev *dev = &vsock->dev;
878e13a6915SStefano Garzarella 	int noblock = file->f_flags & O_NONBLOCK;
879e13a6915SStefano Garzarella 
880e13a6915SStefano Garzarella 	return vhost_chr_read_iter(dev, to, noblock);
881e13a6915SStefano Garzarella }
882e13a6915SStefano Garzarella 
883e13a6915SStefano Garzarella static ssize_t vhost_vsock_chr_write_iter(struct kiocb *iocb,
884e13a6915SStefano Garzarella 					struct iov_iter *from)
885e13a6915SStefano Garzarella {
886e13a6915SStefano Garzarella 	struct file *file = iocb->ki_filp;
887e13a6915SStefano Garzarella 	struct vhost_vsock *vsock = file->private_data;
888e13a6915SStefano Garzarella 	struct vhost_dev *dev = &vsock->dev;
889e13a6915SStefano Garzarella 
890e13a6915SStefano Garzarella 	return vhost_chr_write_iter(dev, from);
891e13a6915SStefano Garzarella }
892e13a6915SStefano Garzarella 
893e13a6915SStefano Garzarella static __poll_t vhost_vsock_chr_poll(struct file *file, poll_table *wait)
894e13a6915SStefano Garzarella {
895e13a6915SStefano Garzarella 	struct vhost_vsock *vsock = file->private_data;
896e13a6915SStefano Garzarella 	struct vhost_dev *dev = &vsock->dev;
897e13a6915SStefano Garzarella 
898e13a6915SStefano Garzarella 	return vhost_chr_poll(file, dev, wait);
899e13a6915SStefano Garzarella }
900e13a6915SStefano Garzarella 
901433fc58eSAsias He static const struct file_operations vhost_vsock_fops = {
902433fc58eSAsias He 	.owner          = THIS_MODULE,
903433fc58eSAsias He 	.open           = vhost_vsock_dev_open,
904433fc58eSAsias He 	.release        = vhost_vsock_dev_release,
905433fc58eSAsias He 	.llseek		= noop_llseek,
906433fc58eSAsias He 	.unlocked_ioctl = vhost_vsock_dev_ioctl,
907407e9ef7SArnd Bergmann 	.compat_ioctl   = compat_ptr_ioctl,
908e13a6915SStefano Garzarella 	.read_iter      = vhost_vsock_chr_read_iter,
909e13a6915SStefano Garzarella 	.write_iter     = vhost_vsock_chr_write_iter,
910e13a6915SStefano Garzarella 	.poll           = vhost_vsock_chr_poll,
911433fc58eSAsias He };
912433fc58eSAsias He 
913433fc58eSAsias He static struct miscdevice vhost_vsock_misc = {
914f4660cc9SStefan Hajnoczi 	.minor = VHOST_VSOCK_MINOR,
915433fc58eSAsias He 	.name = "vhost-vsock",
916433fc58eSAsias He 	.fops = &vhost_vsock_fops,
917433fc58eSAsias He };
918433fc58eSAsias He 
919433fc58eSAsias He static int __init vhost_vsock_init(void)
920433fc58eSAsias He {
921433fc58eSAsias He 	int ret;
922433fc58eSAsias He 
923c0cfa2d8SStefano Garzarella 	ret = vsock_core_register(&vhost_transport.transport,
924c0cfa2d8SStefano Garzarella 				  VSOCK_TRANSPORT_F_H2G);
925433fc58eSAsias He 	if (ret < 0)
926433fc58eSAsias He 		return ret;
9277a4efe18SYuan Can 
9287a4efe18SYuan Can 	ret = misc_register(&vhost_vsock_misc);
9297a4efe18SYuan Can 	if (ret) {
9307a4efe18SYuan Can 		vsock_core_unregister(&vhost_transport.transport);
9317a4efe18SYuan Can 		return ret;
9327a4efe18SYuan Can 	}
9337a4efe18SYuan Can 
9347a4efe18SYuan Can 	return 0;
935433fc58eSAsias He };
936433fc58eSAsias He 
937433fc58eSAsias He static void __exit vhost_vsock_exit(void)
938433fc58eSAsias He {
939433fc58eSAsias He 	misc_deregister(&vhost_vsock_misc);
940c0cfa2d8SStefano Garzarella 	vsock_core_unregister(&vhost_transport.transport);
941433fc58eSAsias He };
942433fc58eSAsias He 
943433fc58eSAsias He module_init(vhost_vsock_init);
944433fc58eSAsias He module_exit(vhost_vsock_exit);
945433fc58eSAsias He MODULE_LICENSE("GPL v2");
946433fc58eSAsias He MODULE_AUTHOR("Asias He");
947433fc58eSAsias He MODULE_DESCRIPTION("vhost transport for vsock ");
948f4660cc9SStefan Hajnoczi MODULE_ALIAS_MISCDEV(VHOST_VSOCK_MINOR);
949f4660cc9SStefan Hajnoczi MODULE_ALIAS("devname:vhost-vsock");
950