xref: /openbmc/linux/drivers/vhost/vsock.c (revision 4c7246dc)
17a338472SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2433fc58eSAsias He /*
3433fc58eSAsias He  * vhost transport for vsock
4433fc58eSAsias He  *
5433fc58eSAsias He  * Copyright (C) 2013-2015 Red Hat, Inc.
6433fc58eSAsias He  * Author: Asias He <asias@redhat.com>
7433fc58eSAsias He  *         Stefan Hajnoczi <stefanha@redhat.com>
8433fc58eSAsias He  */
9433fc58eSAsias He #include <linux/miscdevice.h>
10433fc58eSAsias He #include <linux/atomic.h>
11433fc58eSAsias He #include <linux/module.h>
12433fc58eSAsias He #include <linux/mutex.h>
13433fc58eSAsias He #include <linux/vmalloc.h>
14433fc58eSAsias He #include <net/sock.h>
15433fc58eSAsias He #include <linux/virtio_vsock.h>
16433fc58eSAsias He #include <linux/vhost.h>
17834e772cSStefan Hajnoczi #include <linux/hashtable.h>
18433fc58eSAsias He 
19433fc58eSAsias He #include <net/af_vsock.h>
20433fc58eSAsias He #include "vhost.h"
21433fc58eSAsias He 
22433fc58eSAsias He #define VHOST_VSOCK_DEFAULT_HOST_CID	2
23e82b9b07SJason Wang /* Max number of bytes transferred before requeueing the job.
24e82b9b07SJason Wang  * Using this limit prevents one virtqueue from starving others. */
25e82b9b07SJason Wang #define VHOST_VSOCK_WEIGHT 0x80000
26e82b9b07SJason Wang /* Max number of packets transferred before requeueing the job.
27e82b9b07SJason Wang  * Using this limit prevents one virtqueue from starving others with
28e82b9b07SJason Wang  * small pkts.
29e82b9b07SJason Wang  */
30e82b9b07SJason Wang #define VHOST_VSOCK_PKT_WEIGHT 256
31433fc58eSAsias He 
32433fc58eSAsias He enum {
33433fc58eSAsias He 	VHOST_VSOCK_FEATURES = VHOST_FEATURES,
34433fc58eSAsias He };
35433fc58eSAsias He 
36433fc58eSAsias He /* Used to track all the vhost_vsock instances on the system. */
376db3d8dcSStefan Hajnoczi static DEFINE_MUTEX(vhost_vsock_mutex);
38834e772cSStefan Hajnoczi static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8);
39433fc58eSAsias He 
40433fc58eSAsias He struct vhost_vsock {
41433fc58eSAsias He 	struct vhost_dev dev;
42433fc58eSAsias He 	struct vhost_virtqueue vqs[2];
43433fc58eSAsias He 
446db3d8dcSStefan Hajnoczi 	/* Link to global vhost_vsock_hash, writes use vhost_vsock_mutex */
45834e772cSStefan Hajnoczi 	struct hlist_node hash;
46433fc58eSAsias He 
47433fc58eSAsias He 	struct vhost_work send_pkt_work;
48433fc58eSAsias He 	spinlock_t send_pkt_list_lock;
49433fc58eSAsias He 	struct list_head send_pkt_list;	/* host->guest pending packets */
50433fc58eSAsias He 
51433fc58eSAsias He 	atomic_t queued_replies;
52433fc58eSAsias He 
53433fc58eSAsias He 	u32 guest_cid;
54433fc58eSAsias He };
55433fc58eSAsias He 
56433fc58eSAsias He static u32 vhost_transport_get_local_cid(void)
57433fc58eSAsias He {
58433fc58eSAsias He 	return VHOST_VSOCK_DEFAULT_HOST_CID;
59433fc58eSAsias He }
60433fc58eSAsias He 
616db3d8dcSStefan Hajnoczi /* Callers that dereference the return value must hold vhost_vsock_mutex or the
62834e772cSStefan Hajnoczi  * RCU read lock.
63834e772cSStefan Hajnoczi  */
64834e772cSStefan Hajnoczi static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
65433fc58eSAsias He {
66433fc58eSAsias He 	struct vhost_vsock *vsock;
67433fc58eSAsias He 
68834e772cSStefan Hajnoczi 	hash_for_each_possible_rcu(vhost_vsock_hash, vsock, hash, guest_cid) {
69433fc58eSAsias He 		u32 other_cid = vsock->guest_cid;
70433fc58eSAsias He 
71433fc58eSAsias He 		/* Skip instances that have no CID yet */
72433fc58eSAsias He 		if (other_cid == 0)
73433fc58eSAsias He 			continue;
74433fc58eSAsias He 
75ff3c1b1aSVaibhav Murkute 		if (other_cid == guest_cid)
76433fc58eSAsias He 			return vsock;
77ff3c1b1aSVaibhav Murkute 
78433fc58eSAsias He 	}
79433fc58eSAsias He 
80433fc58eSAsias He 	return NULL;
81433fc58eSAsias He }
82433fc58eSAsias He 
83433fc58eSAsias He static void
84433fc58eSAsias He vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
85433fc58eSAsias He 			    struct vhost_virtqueue *vq)
86433fc58eSAsias He {
87433fc58eSAsias He 	struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
88e79b431fSJason Wang 	int pkts = 0, total_len = 0;
89433fc58eSAsias He 	bool added = false;
90433fc58eSAsias He 	bool restart_tx = false;
91433fc58eSAsias He 
92433fc58eSAsias He 	mutex_lock(&vq->mutex);
93433fc58eSAsias He 
94433fc58eSAsias He 	if (!vq->private_data)
95433fc58eSAsias He 		goto out;
96433fc58eSAsias He 
97433fc58eSAsias He 	/* Avoid further vmexits, we're already processing the virtqueue */
98433fc58eSAsias He 	vhost_disable_notify(&vsock->dev, vq);
99433fc58eSAsias He 
100e79b431fSJason Wang 	do {
101433fc58eSAsias He 		struct virtio_vsock_pkt *pkt;
102433fc58eSAsias He 		struct iov_iter iov_iter;
103433fc58eSAsias He 		unsigned out, in;
104433fc58eSAsias He 		size_t nbytes;
1056dbd3e66SStefano Garzarella 		size_t iov_len, payload_len;
106433fc58eSAsias He 		int head;
107433fc58eSAsias He 
108433fc58eSAsias He 		spin_lock_bh(&vsock->send_pkt_list_lock);
109433fc58eSAsias He 		if (list_empty(&vsock->send_pkt_list)) {
110433fc58eSAsias He 			spin_unlock_bh(&vsock->send_pkt_list_lock);
111433fc58eSAsias He 			vhost_enable_notify(&vsock->dev, vq);
112433fc58eSAsias He 			break;
113433fc58eSAsias He 		}
114433fc58eSAsias He 
115433fc58eSAsias He 		pkt = list_first_entry(&vsock->send_pkt_list,
116433fc58eSAsias He 				       struct virtio_vsock_pkt, list);
117433fc58eSAsias He 		list_del_init(&pkt->list);
118433fc58eSAsias He 		spin_unlock_bh(&vsock->send_pkt_list_lock);
119433fc58eSAsias He 
120433fc58eSAsias He 		head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
121433fc58eSAsias He 					 &out, &in, NULL, NULL);
122433fc58eSAsias He 		if (head < 0) {
123433fc58eSAsias He 			spin_lock_bh(&vsock->send_pkt_list_lock);
124433fc58eSAsias He 			list_add(&pkt->list, &vsock->send_pkt_list);
125433fc58eSAsias He 			spin_unlock_bh(&vsock->send_pkt_list_lock);
126433fc58eSAsias He 			break;
127433fc58eSAsias He 		}
128433fc58eSAsias He 
129433fc58eSAsias He 		if (head == vq->num) {
130433fc58eSAsias He 			spin_lock_bh(&vsock->send_pkt_list_lock);
131433fc58eSAsias He 			list_add(&pkt->list, &vsock->send_pkt_list);
132433fc58eSAsias He 			spin_unlock_bh(&vsock->send_pkt_list_lock);
133433fc58eSAsias He 
134433fc58eSAsias He 			/* We cannot finish yet if more buffers snuck in while
135433fc58eSAsias He 			 * re-enabling notify.
136433fc58eSAsias He 			 */
137433fc58eSAsias He 			if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
138433fc58eSAsias He 				vhost_disable_notify(&vsock->dev, vq);
139433fc58eSAsias He 				continue;
140433fc58eSAsias He 			}
141433fc58eSAsias He 			break;
142433fc58eSAsias He 		}
143433fc58eSAsias He 
144433fc58eSAsias He 		if (out) {
145433fc58eSAsias He 			virtio_transport_free_pkt(pkt);
146433fc58eSAsias He 			vq_err(vq, "Expected 0 output buffers, got %u\n", out);
147433fc58eSAsias He 			break;
148433fc58eSAsias He 		}
149433fc58eSAsias He 
1506dbd3e66SStefano Garzarella 		iov_len = iov_length(&vq->iov[out], in);
1516dbd3e66SStefano Garzarella 		if (iov_len < sizeof(pkt->hdr)) {
1526dbd3e66SStefano Garzarella 			virtio_transport_free_pkt(pkt);
1536dbd3e66SStefano Garzarella 			vq_err(vq, "Buffer len [%zu] too small\n", iov_len);
1546dbd3e66SStefano Garzarella 			break;
1556dbd3e66SStefano Garzarella 		}
1566dbd3e66SStefano Garzarella 
1576dbd3e66SStefano Garzarella 		iov_iter_init(&iov_iter, READ, &vq->iov[out], in, iov_len);
1586dbd3e66SStefano Garzarella 		payload_len = pkt->len - pkt->off;
1596dbd3e66SStefano Garzarella 
1606dbd3e66SStefano Garzarella 		/* If the packet is greater than the space available in the
1616dbd3e66SStefano Garzarella 		 * buffer, we split it using multiple buffers.
1626dbd3e66SStefano Garzarella 		 */
1636dbd3e66SStefano Garzarella 		if (payload_len > iov_len - sizeof(pkt->hdr))
1646dbd3e66SStefano Garzarella 			payload_len = iov_len - sizeof(pkt->hdr);
1656dbd3e66SStefano Garzarella 
1666dbd3e66SStefano Garzarella 		/* Set the correct length in the header */
1676dbd3e66SStefano Garzarella 		pkt->hdr.len = cpu_to_le32(payload_len);
168433fc58eSAsias He 
169433fc58eSAsias He 		nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
170433fc58eSAsias He 		if (nbytes != sizeof(pkt->hdr)) {
171433fc58eSAsias He 			virtio_transport_free_pkt(pkt);
172433fc58eSAsias He 			vq_err(vq, "Faulted on copying pkt hdr\n");
173433fc58eSAsias He 			break;
174433fc58eSAsias He 		}
175433fc58eSAsias He 
1766dbd3e66SStefano Garzarella 		nbytes = copy_to_iter(pkt->buf + pkt->off, payload_len,
1776dbd3e66SStefano Garzarella 				      &iov_iter);
1786dbd3e66SStefano Garzarella 		if (nbytes != payload_len) {
179433fc58eSAsias He 			virtio_transport_free_pkt(pkt);
180433fc58eSAsias He 			vq_err(vq, "Faulted on copying pkt buf\n");
181433fc58eSAsias He 			break;
182433fc58eSAsias He 		}
183433fc58eSAsias He 
1846dbd3e66SStefano Garzarella 		vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len);
185433fc58eSAsias He 		added = true;
186433fc58eSAsias He 
18782dfb540SGerard Garcia 		/* Deliver to monitoring devices all correctly transmitted
18882dfb540SGerard Garcia 		 * packets.
18982dfb540SGerard Garcia 		 */
19082dfb540SGerard Garcia 		virtio_transport_deliver_tap_pkt(pkt);
19182dfb540SGerard Garcia 
1926dbd3e66SStefano Garzarella 		pkt->off += payload_len;
1936dbd3e66SStefano Garzarella 		total_len += payload_len;
1946dbd3e66SStefano Garzarella 
1956dbd3e66SStefano Garzarella 		/* If we didn't send all the payload we can requeue the packet
1966dbd3e66SStefano Garzarella 		 * to send it with the next available buffer.
1976dbd3e66SStefano Garzarella 		 */
1986dbd3e66SStefano Garzarella 		if (pkt->off < pkt->len) {
1996dbd3e66SStefano Garzarella 			spin_lock_bh(&vsock->send_pkt_list_lock);
2006dbd3e66SStefano Garzarella 			list_add(&pkt->list, &vsock->send_pkt_list);
2016dbd3e66SStefano Garzarella 			spin_unlock_bh(&vsock->send_pkt_list_lock);
2026dbd3e66SStefano Garzarella 		} else {
2036dbd3e66SStefano Garzarella 			if (pkt->reply) {
2046dbd3e66SStefano Garzarella 				int val;
2056dbd3e66SStefano Garzarella 
2066dbd3e66SStefano Garzarella 				val = atomic_dec_return(&vsock->queued_replies);
2076dbd3e66SStefano Garzarella 
2086dbd3e66SStefano Garzarella 				/* Do we have resources to resume tx
2096dbd3e66SStefano Garzarella 				 * processing?
2106dbd3e66SStefano Garzarella 				 */
2116dbd3e66SStefano Garzarella 				if (val + 1 == tx_vq->num)
2126dbd3e66SStefano Garzarella 					restart_tx = true;
2136dbd3e66SStefano Garzarella 			}
2146dbd3e66SStefano Garzarella 
215433fc58eSAsias He 			virtio_transport_free_pkt(pkt);
2166dbd3e66SStefano Garzarella 		}
217e79b431fSJason Wang 	} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
218433fc58eSAsias He 	if (added)
219433fc58eSAsias He 		vhost_signal(&vsock->dev, vq);
220433fc58eSAsias He 
221433fc58eSAsias He out:
222433fc58eSAsias He 	mutex_unlock(&vq->mutex);
223433fc58eSAsias He 
224433fc58eSAsias He 	if (restart_tx)
225433fc58eSAsias He 		vhost_poll_queue(&tx_vq->poll);
226433fc58eSAsias He }
227433fc58eSAsias He 
228433fc58eSAsias He static void vhost_transport_send_pkt_work(struct vhost_work *work)
229433fc58eSAsias He {
230433fc58eSAsias He 	struct vhost_virtqueue *vq;
231433fc58eSAsias He 	struct vhost_vsock *vsock;
232433fc58eSAsias He 
233433fc58eSAsias He 	vsock = container_of(work, struct vhost_vsock, send_pkt_work);
234433fc58eSAsias He 	vq = &vsock->vqs[VSOCK_VQ_RX];
235433fc58eSAsias He 
236433fc58eSAsias He 	vhost_transport_do_send_pkt(vsock, vq);
237433fc58eSAsias He }
238433fc58eSAsias He 
239433fc58eSAsias He static int
240433fc58eSAsias He vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
241433fc58eSAsias He {
242433fc58eSAsias He 	struct vhost_vsock *vsock;
243433fc58eSAsias He 	int len = pkt->len;
244433fc58eSAsias He 
245834e772cSStefan Hajnoczi 	rcu_read_lock();
246834e772cSStefan Hajnoczi 
247433fc58eSAsias He 	/* Find the vhost_vsock according to guest context id  */
248433fc58eSAsias He 	vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid));
249433fc58eSAsias He 	if (!vsock) {
250834e772cSStefan Hajnoczi 		rcu_read_unlock();
251433fc58eSAsias He 		virtio_transport_free_pkt(pkt);
252433fc58eSAsias He 		return -ENODEV;
253433fc58eSAsias He 	}
254433fc58eSAsias He 
255433fc58eSAsias He 	if (pkt->reply)
256433fc58eSAsias He 		atomic_inc(&vsock->queued_replies);
257433fc58eSAsias He 
258433fc58eSAsias He 	spin_lock_bh(&vsock->send_pkt_list_lock);
259433fc58eSAsias He 	list_add_tail(&pkt->list, &vsock->send_pkt_list);
260433fc58eSAsias He 	spin_unlock_bh(&vsock->send_pkt_list_lock);
261433fc58eSAsias He 
262433fc58eSAsias He 	vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
263834e772cSStefan Hajnoczi 
264834e772cSStefan Hajnoczi 	rcu_read_unlock();
265433fc58eSAsias He 	return len;
266433fc58eSAsias He }
267433fc58eSAsias He 
26816320f36SPeng Tao static int
26916320f36SPeng Tao vhost_transport_cancel_pkt(struct vsock_sock *vsk)
27016320f36SPeng Tao {
27116320f36SPeng Tao 	struct vhost_vsock *vsock;
27216320f36SPeng Tao 	struct virtio_vsock_pkt *pkt, *n;
27316320f36SPeng Tao 	int cnt = 0;
274834e772cSStefan Hajnoczi 	int ret = -ENODEV;
27516320f36SPeng Tao 	LIST_HEAD(freeme);
27616320f36SPeng Tao 
277834e772cSStefan Hajnoczi 	rcu_read_lock();
278834e772cSStefan Hajnoczi 
27916320f36SPeng Tao 	/* Find the vhost_vsock according to guest context id  */
28016320f36SPeng Tao 	vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
28116320f36SPeng Tao 	if (!vsock)
282834e772cSStefan Hajnoczi 		goto out;
28316320f36SPeng Tao 
28416320f36SPeng Tao 	spin_lock_bh(&vsock->send_pkt_list_lock);
28516320f36SPeng Tao 	list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
28616320f36SPeng Tao 		if (pkt->vsk != vsk)
28716320f36SPeng Tao 			continue;
28816320f36SPeng Tao 		list_move(&pkt->list, &freeme);
28916320f36SPeng Tao 	}
29016320f36SPeng Tao 	spin_unlock_bh(&vsock->send_pkt_list_lock);
29116320f36SPeng Tao 
29216320f36SPeng Tao 	list_for_each_entry_safe(pkt, n, &freeme, list) {
29316320f36SPeng Tao 		if (pkt->reply)
29416320f36SPeng Tao 			cnt++;
29516320f36SPeng Tao 		list_del(&pkt->list);
29616320f36SPeng Tao 		virtio_transport_free_pkt(pkt);
29716320f36SPeng Tao 	}
29816320f36SPeng Tao 
29916320f36SPeng Tao 	if (cnt) {
30016320f36SPeng Tao 		struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
30116320f36SPeng Tao 		int new_cnt;
30216320f36SPeng Tao 
30316320f36SPeng Tao 		new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
30416320f36SPeng Tao 		if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
30516320f36SPeng Tao 			vhost_poll_queue(&tx_vq->poll);
30616320f36SPeng Tao 	}
30716320f36SPeng Tao 
308834e772cSStefan Hajnoczi 	ret = 0;
309834e772cSStefan Hajnoczi out:
310834e772cSStefan Hajnoczi 	rcu_read_unlock();
311834e772cSStefan Hajnoczi 	return ret;
31216320f36SPeng Tao }
31316320f36SPeng Tao 
314433fc58eSAsias He static struct virtio_vsock_pkt *
315433fc58eSAsias He vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
316433fc58eSAsias He 		      unsigned int out, unsigned int in)
317433fc58eSAsias He {
318433fc58eSAsias He 	struct virtio_vsock_pkt *pkt;
319433fc58eSAsias He 	struct iov_iter iov_iter;
320433fc58eSAsias He 	size_t nbytes;
321433fc58eSAsias He 	size_t len;
322433fc58eSAsias He 
323433fc58eSAsias He 	if (in != 0) {
324433fc58eSAsias He 		vq_err(vq, "Expected 0 input buffers, got %u\n", in);
325433fc58eSAsias He 		return NULL;
326433fc58eSAsias He 	}
327433fc58eSAsias He 
328433fc58eSAsias He 	pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
329433fc58eSAsias He 	if (!pkt)
330433fc58eSAsias He 		return NULL;
331433fc58eSAsias He 
332433fc58eSAsias He 	len = iov_length(vq->iov, out);
333433fc58eSAsias He 	iov_iter_init(&iov_iter, WRITE, vq->iov, out, len);
334433fc58eSAsias He 
335433fc58eSAsias He 	nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
336433fc58eSAsias He 	if (nbytes != sizeof(pkt->hdr)) {
337433fc58eSAsias He 		vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n",
338433fc58eSAsias He 		       sizeof(pkt->hdr), nbytes);
339433fc58eSAsias He 		kfree(pkt);
340433fc58eSAsias He 		return NULL;
341433fc58eSAsias He 	}
342433fc58eSAsias He 
343433fc58eSAsias He 	if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM)
344433fc58eSAsias He 		pkt->len = le32_to_cpu(pkt->hdr.len);
345433fc58eSAsias He 
346433fc58eSAsias He 	/* No payload */
347433fc58eSAsias He 	if (!pkt->len)
348433fc58eSAsias He 		return pkt;
349433fc58eSAsias He 
350433fc58eSAsias He 	/* The pkt is too big */
351433fc58eSAsias He 	if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
352433fc58eSAsias He 		kfree(pkt);
353433fc58eSAsias He 		return NULL;
354433fc58eSAsias He 	}
355433fc58eSAsias He 
356433fc58eSAsias He 	pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
357433fc58eSAsias He 	if (!pkt->buf) {
358433fc58eSAsias He 		kfree(pkt);
359433fc58eSAsias He 		return NULL;
360433fc58eSAsias He 	}
361433fc58eSAsias He 
362473c7391SStefano Garzarella 	pkt->buf_len = pkt->len;
363473c7391SStefano Garzarella 
364433fc58eSAsias He 	nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
365433fc58eSAsias He 	if (nbytes != pkt->len) {
366433fc58eSAsias He 		vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
367433fc58eSAsias He 		       pkt->len, nbytes);
368433fc58eSAsias He 		virtio_transport_free_pkt(pkt);
369433fc58eSAsias He 		return NULL;
370433fc58eSAsias He 	}
371433fc58eSAsias He 
372433fc58eSAsias He 	return pkt;
373433fc58eSAsias He }
374433fc58eSAsias He 
375433fc58eSAsias He /* Is there space left for replies to rx packets? */
376433fc58eSAsias He static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
377433fc58eSAsias He {
378433fc58eSAsias He 	struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX];
379433fc58eSAsias He 	int val;
380433fc58eSAsias He 
381433fc58eSAsias He 	smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */
382433fc58eSAsias He 	val = atomic_read(&vsock->queued_replies);
383433fc58eSAsias He 
384433fc58eSAsias He 	return val < vq->num;
385433fc58eSAsias He }
386433fc58eSAsias He 
387*4c7246dcSStefano Garzarella static struct virtio_transport vhost_transport = {
388*4c7246dcSStefano Garzarella 	.transport = {
389*4c7246dcSStefano Garzarella 		.get_local_cid            = vhost_transport_get_local_cid,
390*4c7246dcSStefano Garzarella 
391*4c7246dcSStefano Garzarella 		.init                     = virtio_transport_do_socket_init,
392*4c7246dcSStefano Garzarella 		.destruct                 = virtio_transport_destruct,
393*4c7246dcSStefano Garzarella 		.release                  = virtio_transport_release,
394*4c7246dcSStefano Garzarella 		.connect                  = virtio_transport_connect,
395*4c7246dcSStefano Garzarella 		.shutdown                 = virtio_transport_shutdown,
396*4c7246dcSStefano Garzarella 		.cancel_pkt               = vhost_transport_cancel_pkt,
397*4c7246dcSStefano Garzarella 
398*4c7246dcSStefano Garzarella 		.dgram_enqueue            = virtio_transport_dgram_enqueue,
399*4c7246dcSStefano Garzarella 		.dgram_dequeue            = virtio_transport_dgram_dequeue,
400*4c7246dcSStefano Garzarella 		.dgram_bind               = virtio_transport_dgram_bind,
401*4c7246dcSStefano Garzarella 		.dgram_allow              = virtio_transport_dgram_allow,
402*4c7246dcSStefano Garzarella 
403*4c7246dcSStefano Garzarella 		.stream_enqueue           = virtio_transport_stream_enqueue,
404*4c7246dcSStefano Garzarella 		.stream_dequeue           = virtio_transport_stream_dequeue,
405*4c7246dcSStefano Garzarella 		.stream_has_data          = virtio_transport_stream_has_data,
406*4c7246dcSStefano Garzarella 		.stream_has_space         = virtio_transport_stream_has_space,
407*4c7246dcSStefano Garzarella 		.stream_rcvhiwat          = virtio_transport_stream_rcvhiwat,
408*4c7246dcSStefano Garzarella 		.stream_is_active         = virtio_transport_stream_is_active,
409*4c7246dcSStefano Garzarella 		.stream_allow             = virtio_transport_stream_allow,
410*4c7246dcSStefano Garzarella 
411*4c7246dcSStefano Garzarella 		.notify_poll_in           = virtio_transport_notify_poll_in,
412*4c7246dcSStefano Garzarella 		.notify_poll_out          = virtio_transport_notify_poll_out,
413*4c7246dcSStefano Garzarella 		.notify_recv_init         = virtio_transport_notify_recv_init,
414*4c7246dcSStefano Garzarella 		.notify_recv_pre_block    = virtio_transport_notify_recv_pre_block,
415*4c7246dcSStefano Garzarella 		.notify_recv_pre_dequeue  = virtio_transport_notify_recv_pre_dequeue,
416*4c7246dcSStefano Garzarella 		.notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
417*4c7246dcSStefano Garzarella 		.notify_send_init         = virtio_transport_notify_send_init,
418*4c7246dcSStefano Garzarella 		.notify_send_pre_block    = virtio_transport_notify_send_pre_block,
419*4c7246dcSStefano Garzarella 		.notify_send_pre_enqueue  = virtio_transport_notify_send_pre_enqueue,
420*4c7246dcSStefano Garzarella 		.notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
421*4c7246dcSStefano Garzarella 
422*4c7246dcSStefano Garzarella 		.set_buffer_size          = virtio_transport_set_buffer_size,
423*4c7246dcSStefano Garzarella 		.set_min_buffer_size      = virtio_transport_set_min_buffer_size,
424*4c7246dcSStefano Garzarella 		.set_max_buffer_size      = virtio_transport_set_max_buffer_size,
425*4c7246dcSStefano Garzarella 		.get_buffer_size          = virtio_transport_get_buffer_size,
426*4c7246dcSStefano Garzarella 		.get_min_buffer_size      = virtio_transport_get_min_buffer_size,
427*4c7246dcSStefano Garzarella 		.get_max_buffer_size      = virtio_transport_get_max_buffer_size,
428*4c7246dcSStefano Garzarella 	},
429*4c7246dcSStefano Garzarella 
430*4c7246dcSStefano Garzarella 	.send_pkt = vhost_transport_send_pkt,
431*4c7246dcSStefano Garzarella };
432*4c7246dcSStefano Garzarella 
433433fc58eSAsias He static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
434433fc58eSAsias He {
435433fc58eSAsias He 	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
436433fc58eSAsias He 						  poll.work);
437433fc58eSAsias He 	struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
438433fc58eSAsias He 						 dev);
439433fc58eSAsias He 	struct virtio_vsock_pkt *pkt;
440e79b431fSJason Wang 	int head, pkts = 0, total_len = 0;
441433fc58eSAsias He 	unsigned int out, in;
442433fc58eSAsias He 	bool added = false;
443433fc58eSAsias He 
444433fc58eSAsias He 	mutex_lock(&vq->mutex);
445433fc58eSAsias He 
446433fc58eSAsias He 	if (!vq->private_data)
447433fc58eSAsias He 		goto out;
448433fc58eSAsias He 
449433fc58eSAsias He 	vhost_disable_notify(&vsock->dev, vq);
450e79b431fSJason Wang 	do {
4513fda5d6eSStefan Hajnoczi 		u32 len;
4523fda5d6eSStefan Hajnoczi 
453433fc58eSAsias He 		if (!vhost_vsock_more_replies(vsock)) {
454433fc58eSAsias He 			/* Stop tx until the device processes already
455433fc58eSAsias He 			 * pending replies.  Leave tx virtqueue
456433fc58eSAsias He 			 * callbacks disabled.
457433fc58eSAsias He 			 */
458433fc58eSAsias He 			goto no_more_replies;
459433fc58eSAsias He 		}
460433fc58eSAsias He 
461433fc58eSAsias He 		head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
462433fc58eSAsias He 					 &out, &in, NULL, NULL);
463433fc58eSAsias He 		if (head < 0)
464433fc58eSAsias He 			break;
465433fc58eSAsias He 
466433fc58eSAsias He 		if (head == vq->num) {
467433fc58eSAsias He 			if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
468433fc58eSAsias He 				vhost_disable_notify(&vsock->dev, vq);
469433fc58eSAsias He 				continue;
470433fc58eSAsias He 			}
471433fc58eSAsias He 			break;
472433fc58eSAsias He 		}
473433fc58eSAsias He 
474433fc58eSAsias He 		pkt = vhost_vsock_alloc_pkt(vq, out, in);
475433fc58eSAsias He 		if (!pkt) {
476433fc58eSAsias He 			vq_err(vq, "Faulted on pkt\n");
477433fc58eSAsias He 			continue;
478433fc58eSAsias He 		}
479433fc58eSAsias He 
4803fda5d6eSStefan Hajnoczi 		len = pkt->len;
4813fda5d6eSStefan Hajnoczi 
48282dfb540SGerard Garcia 		/* Deliver to monitoring devices all received packets */
48382dfb540SGerard Garcia 		virtio_transport_deliver_tap_pkt(pkt);
48482dfb540SGerard Garcia 
485433fc58eSAsias He 		/* Only accept correctly addressed packets */
486433fc58eSAsias He 		if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid)
487*4c7246dcSStefano Garzarella 			virtio_transport_recv_pkt(&vhost_transport, pkt);
488433fc58eSAsias He 		else
489433fc58eSAsias He 			virtio_transport_free_pkt(pkt);
490433fc58eSAsias He 
491e79b431fSJason Wang 		len += sizeof(pkt->hdr);
492e79b431fSJason Wang 		vhost_add_used(vq, head, len);
493e79b431fSJason Wang 		total_len += len;
494433fc58eSAsias He 		added = true;
495e79b431fSJason Wang 	} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
496433fc58eSAsias He 
497433fc58eSAsias He no_more_replies:
498433fc58eSAsias He 	if (added)
499433fc58eSAsias He 		vhost_signal(&vsock->dev, vq);
500433fc58eSAsias He 
501433fc58eSAsias He out:
502433fc58eSAsias He 	mutex_unlock(&vq->mutex);
503433fc58eSAsias He }
504433fc58eSAsias He 
505433fc58eSAsias He static void vhost_vsock_handle_rx_kick(struct vhost_work *work)
506433fc58eSAsias He {
507433fc58eSAsias He 	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
508433fc58eSAsias He 						poll.work);
509433fc58eSAsias He 	struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
510433fc58eSAsias He 						 dev);
511433fc58eSAsias He 
512433fc58eSAsias He 	vhost_transport_do_send_pkt(vsock, vq);
513433fc58eSAsias He }
514433fc58eSAsias He 
515433fc58eSAsias He static int vhost_vsock_start(struct vhost_vsock *vsock)
516433fc58eSAsias He {
5170516ffd8SStefan Hajnoczi 	struct vhost_virtqueue *vq;
518433fc58eSAsias He 	size_t i;
519433fc58eSAsias He 	int ret;
520433fc58eSAsias He 
521433fc58eSAsias He 	mutex_lock(&vsock->dev.mutex);
522433fc58eSAsias He 
523433fc58eSAsias He 	ret = vhost_dev_check_owner(&vsock->dev);
524433fc58eSAsias He 	if (ret)
525433fc58eSAsias He 		goto err;
526433fc58eSAsias He 
527433fc58eSAsias He 	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
5280516ffd8SStefan Hajnoczi 		vq = &vsock->vqs[i];
529433fc58eSAsias He 
530433fc58eSAsias He 		mutex_lock(&vq->mutex);
531433fc58eSAsias He 
532433fc58eSAsias He 		if (!vhost_vq_access_ok(vq)) {
533433fc58eSAsias He 			ret = -EFAULT;
534433fc58eSAsias He 			goto err_vq;
535433fc58eSAsias He 		}
536433fc58eSAsias He 
537433fc58eSAsias He 		if (!vq->private_data) {
538433fc58eSAsias He 			vq->private_data = vsock;
5390516ffd8SStefan Hajnoczi 			ret = vhost_vq_init_access(vq);
5400516ffd8SStefan Hajnoczi 			if (ret)
5410516ffd8SStefan Hajnoczi 				goto err_vq;
542433fc58eSAsias He 		}
543433fc58eSAsias He 
544433fc58eSAsias He 		mutex_unlock(&vq->mutex);
545433fc58eSAsias He 	}
546433fc58eSAsias He 
547433fc58eSAsias He 	mutex_unlock(&vsock->dev.mutex);
548433fc58eSAsias He 	return 0;
549433fc58eSAsias He 
550433fc58eSAsias He err_vq:
5510516ffd8SStefan Hajnoczi 	vq->private_data = NULL;
5520516ffd8SStefan Hajnoczi 	mutex_unlock(&vq->mutex);
5530516ffd8SStefan Hajnoczi 
554433fc58eSAsias He 	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
5550516ffd8SStefan Hajnoczi 		vq = &vsock->vqs[i];
556433fc58eSAsias He 
557433fc58eSAsias He 		mutex_lock(&vq->mutex);
558433fc58eSAsias He 		vq->private_data = NULL;
559433fc58eSAsias He 		mutex_unlock(&vq->mutex);
560433fc58eSAsias He 	}
561433fc58eSAsias He err:
562433fc58eSAsias He 	mutex_unlock(&vsock->dev.mutex);
563433fc58eSAsias He 	return ret;
564433fc58eSAsias He }
565433fc58eSAsias He 
566433fc58eSAsias He static int vhost_vsock_stop(struct vhost_vsock *vsock)
567433fc58eSAsias He {
568433fc58eSAsias He 	size_t i;
569433fc58eSAsias He 	int ret;
570433fc58eSAsias He 
571433fc58eSAsias He 	mutex_lock(&vsock->dev.mutex);
572433fc58eSAsias He 
573433fc58eSAsias He 	ret = vhost_dev_check_owner(&vsock->dev);
574433fc58eSAsias He 	if (ret)
575433fc58eSAsias He 		goto err;
576433fc58eSAsias He 
577433fc58eSAsias He 	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
578433fc58eSAsias He 		struct vhost_virtqueue *vq = &vsock->vqs[i];
579433fc58eSAsias He 
580433fc58eSAsias He 		mutex_lock(&vq->mutex);
581433fc58eSAsias He 		vq->private_data = NULL;
582433fc58eSAsias He 		mutex_unlock(&vq->mutex);
583433fc58eSAsias He 	}
584433fc58eSAsias He 
585433fc58eSAsias He err:
586433fc58eSAsias He 	mutex_unlock(&vsock->dev.mutex);
587433fc58eSAsias He 	return ret;
588433fc58eSAsias He }
589433fc58eSAsias He 
590433fc58eSAsias He static void vhost_vsock_free(struct vhost_vsock *vsock)
591433fc58eSAsias He {
592b226acabSWei Yongjun 	kvfree(vsock);
593433fc58eSAsias He }
594433fc58eSAsias He 
595433fc58eSAsias He static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
596433fc58eSAsias He {
597433fc58eSAsias He 	struct vhost_virtqueue **vqs;
598433fc58eSAsias He 	struct vhost_vsock *vsock;
599433fc58eSAsias He 	int ret;
600433fc58eSAsias He 
601433fc58eSAsias He 	/* This struct is large and allocation could fail, fall back to vmalloc
602433fc58eSAsias He 	 * if there is no other way.
603433fc58eSAsias He 	 */
604dcda9b04SMichal Hocko 	vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
605433fc58eSAsias He 	if (!vsock)
606433fc58eSAsias He 		return -ENOMEM;
607433fc58eSAsias He 
608433fc58eSAsias He 	vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL);
609433fc58eSAsias He 	if (!vqs) {
610433fc58eSAsias He 		ret = -ENOMEM;
611433fc58eSAsias He 		goto out;
612433fc58eSAsias He 	}
613433fc58eSAsias He 
614a72b69dcSStefan Hajnoczi 	vsock->guest_cid = 0; /* no CID assigned yet */
615a72b69dcSStefan Hajnoczi 
616433fc58eSAsias He 	atomic_set(&vsock->queued_replies, 0);
617433fc58eSAsias He 
618433fc58eSAsias He 	vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX];
619433fc58eSAsias He 	vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX];
620433fc58eSAsias He 	vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
621433fc58eSAsias He 	vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
622433fc58eSAsias He 
623e82b9b07SJason Wang 	vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs),
624e82b9b07SJason Wang 		       UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT,
625e82b9b07SJason Wang 		       VHOST_VSOCK_WEIGHT);
626433fc58eSAsias He 
627433fc58eSAsias He 	file->private_data = vsock;
628433fc58eSAsias He 	spin_lock_init(&vsock->send_pkt_list_lock);
629433fc58eSAsias He 	INIT_LIST_HEAD(&vsock->send_pkt_list);
630433fc58eSAsias He 	vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work);
631433fc58eSAsias He 	return 0;
632433fc58eSAsias He 
633433fc58eSAsias He out:
634433fc58eSAsias He 	vhost_vsock_free(vsock);
635433fc58eSAsias He 	return ret;
636433fc58eSAsias He }
637433fc58eSAsias He 
638433fc58eSAsias He static void vhost_vsock_flush(struct vhost_vsock *vsock)
639433fc58eSAsias He {
640433fc58eSAsias He 	int i;
641433fc58eSAsias He 
642433fc58eSAsias He 	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++)
643433fc58eSAsias He 		if (vsock->vqs[i].handle_kick)
644433fc58eSAsias He 			vhost_poll_flush(&vsock->vqs[i].poll);
645433fc58eSAsias He 	vhost_work_flush(&vsock->dev, &vsock->send_pkt_work);
646433fc58eSAsias He }
647433fc58eSAsias He 
648433fc58eSAsias He static void vhost_vsock_reset_orphans(struct sock *sk)
649433fc58eSAsias He {
650433fc58eSAsias He 	struct vsock_sock *vsk = vsock_sk(sk);
651433fc58eSAsias He 
652433fc58eSAsias He 	/* vmci_transport.c doesn't take sk_lock here either.  At least we're
653433fc58eSAsias He 	 * under vsock_table_lock so the sock cannot disappear while we're
654433fc58eSAsias He 	 * executing.
655433fc58eSAsias He 	 */
656433fc58eSAsias He 
657c38f57daSStefan Hajnoczi 	/* If the peer is still valid, no need to reset connection */
658c38f57daSStefan Hajnoczi 	if (vhost_vsock_get(vsk->remote_addr.svm_cid))
659c38f57daSStefan Hajnoczi 		return;
660c38f57daSStefan Hajnoczi 
661c38f57daSStefan Hajnoczi 	/* If the close timeout is pending, let it expire.  This avoids races
662c38f57daSStefan Hajnoczi 	 * with the timeout callback.
663c38f57daSStefan Hajnoczi 	 */
664c38f57daSStefan Hajnoczi 	if (vsk->close_work_scheduled)
665c38f57daSStefan Hajnoczi 		return;
666c38f57daSStefan Hajnoczi 
667433fc58eSAsias He 	sock_set_flag(sk, SOCK_DONE);
668433fc58eSAsias He 	vsk->peer_shutdown = SHUTDOWN_MASK;
669433fc58eSAsias He 	sk->sk_state = SS_UNCONNECTED;
670433fc58eSAsias He 	sk->sk_err = ECONNRESET;
671433fc58eSAsias He 	sk->sk_error_report(sk);
672433fc58eSAsias He }
673433fc58eSAsias He 
674433fc58eSAsias He static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
675433fc58eSAsias He {
676433fc58eSAsias He 	struct vhost_vsock *vsock = file->private_data;
677433fc58eSAsias He 
6786db3d8dcSStefan Hajnoczi 	mutex_lock(&vhost_vsock_mutex);
679834e772cSStefan Hajnoczi 	if (vsock->guest_cid)
680834e772cSStefan Hajnoczi 		hash_del_rcu(&vsock->hash);
6816db3d8dcSStefan Hajnoczi 	mutex_unlock(&vhost_vsock_mutex);
682433fc58eSAsias He 
683834e772cSStefan Hajnoczi 	/* Wait for other CPUs to finish using vsock */
684834e772cSStefan Hajnoczi 	synchronize_rcu();
685834e772cSStefan Hajnoczi 
686433fc58eSAsias He 	/* Iterating over all connections for all CIDs to find orphans is
687433fc58eSAsias He 	 * inefficient.  Room for improvement here. */
688433fc58eSAsias He 	vsock_for_each_connected_socket(vhost_vsock_reset_orphans);
689433fc58eSAsias He 
690433fc58eSAsias He 	vhost_vsock_stop(vsock);
691433fc58eSAsias He 	vhost_vsock_flush(vsock);
692433fc58eSAsias He 	vhost_dev_stop(&vsock->dev);
693433fc58eSAsias He 
694433fc58eSAsias He 	spin_lock_bh(&vsock->send_pkt_list_lock);
695433fc58eSAsias He 	while (!list_empty(&vsock->send_pkt_list)) {
696433fc58eSAsias He 		struct virtio_vsock_pkt *pkt;
697433fc58eSAsias He 
698433fc58eSAsias He 		pkt = list_first_entry(&vsock->send_pkt_list,
699433fc58eSAsias He 				struct virtio_vsock_pkt, list);
700433fc58eSAsias He 		list_del_init(&pkt->list);
701433fc58eSAsias He 		virtio_transport_free_pkt(pkt);
702433fc58eSAsias He 	}
703433fc58eSAsias He 	spin_unlock_bh(&vsock->send_pkt_list_lock);
704433fc58eSAsias He 
705f6f93f75S夷则(Caspar) 	vhost_dev_cleanup(&vsock->dev);
706433fc58eSAsias He 	kfree(vsock->dev.vqs);
707433fc58eSAsias He 	vhost_vsock_free(vsock);
708433fc58eSAsias He 	return 0;
709433fc58eSAsias He }
710433fc58eSAsias He 
711433fc58eSAsias He static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
712433fc58eSAsias He {
713433fc58eSAsias He 	struct vhost_vsock *other;
714433fc58eSAsias He 
715433fc58eSAsias He 	/* Refuse reserved CIDs */
716433fc58eSAsias He 	if (guest_cid <= VMADDR_CID_HOST ||
717433fc58eSAsias He 	    guest_cid == U32_MAX)
718433fc58eSAsias He 		return -EINVAL;
719433fc58eSAsias He 
720433fc58eSAsias He 	/* 64-bit CIDs are not yet supported */
721433fc58eSAsias He 	if (guest_cid > U32_MAX)
722433fc58eSAsias He 		return -EINVAL;
723433fc58eSAsias He 
724433fc58eSAsias He 	/* Refuse if CID is already in use */
7256db3d8dcSStefan Hajnoczi 	mutex_lock(&vhost_vsock_mutex);
726834e772cSStefan Hajnoczi 	other = vhost_vsock_get(guest_cid);
7276c083c2bSGao feng 	if (other && other != vsock) {
7286db3d8dcSStefan Hajnoczi 		mutex_unlock(&vhost_vsock_mutex);
7296c083c2bSGao feng 		return -EADDRINUSE;
7306c083c2bSGao feng 	}
731834e772cSStefan Hajnoczi 
732834e772cSStefan Hajnoczi 	if (vsock->guest_cid)
733834e772cSStefan Hajnoczi 		hash_del_rcu(&vsock->hash);
734834e772cSStefan Hajnoczi 
735433fc58eSAsias He 	vsock->guest_cid = guest_cid;
7367fbe078cSZha Bin 	hash_add_rcu(vhost_vsock_hash, &vsock->hash, vsock->guest_cid);
7376db3d8dcSStefan Hajnoczi 	mutex_unlock(&vhost_vsock_mutex);
738433fc58eSAsias He 
739433fc58eSAsias He 	return 0;
740433fc58eSAsias He }
741433fc58eSAsias He 
742433fc58eSAsias He static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
743433fc58eSAsias He {
744433fc58eSAsias He 	struct vhost_virtqueue *vq;
745433fc58eSAsias He 	int i;
746433fc58eSAsias He 
747433fc58eSAsias He 	if (features & ~VHOST_VSOCK_FEATURES)
748433fc58eSAsias He 		return -EOPNOTSUPP;
749433fc58eSAsias He 
750433fc58eSAsias He 	mutex_lock(&vsock->dev.mutex);
751433fc58eSAsias He 	if ((features & (1 << VHOST_F_LOG_ALL)) &&
752433fc58eSAsias He 	    !vhost_log_access_ok(&vsock->dev)) {
753433fc58eSAsias He 		mutex_unlock(&vsock->dev.mutex);
754433fc58eSAsias He 		return -EFAULT;
755433fc58eSAsias He 	}
756433fc58eSAsias He 
757433fc58eSAsias He 	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
758433fc58eSAsias He 		vq = &vsock->vqs[i];
759433fc58eSAsias He 		mutex_lock(&vq->mutex);
760433fc58eSAsias He 		vq->acked_features = features;
761433fc58eSAsias He 		mutex_unlock(&vq->mutex);
762433fc58eSAsias He 	}
763433fc58eSAsias He 	mutex_unlock(&vsock->dev.mutex);
764433fc58eSAsias He 	return 0;
765433fc58eSAsias He }
766433fc58eSAsias He 
767433fc58eSAsias He static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
768433fc58eSAsias He 				  unsigned long arg)
769433fc58eSAsias He {
770433fc58eSAsias He 	struct vhost_vsock *vsock = f->private_data;
771433fc58eSAsias He 	void __user *argp = (void __user *)arg;
772433fc58eSAsias He 	u64 guest_cid;
773433fc58eSAsias He 	u64 features;
774433fc58eSAsias He 	int start;
775433fc58eSAsias He 	int r;
776433fc58eSAsias He 
777433fc58eSAsias He 	switch (ioctl) {
778433fc58eSAsias He 	case VHOST_VSOCK_SET_GUEST_CID:
779433fc58eSAsias He 		if (copy_from_user(&guest_cid, argp, sizeof(guest_cid)))
780433fc58eSAsias He 			return -EFAULT;
781433fc58eSAsias He 		return vhost_vsock_set_cid(vsock, guest_cid);
782433fc58eSAsias He 	case VHOST_VSOCK_SET_RUNNING:
783433fc58eSAsias He 		if (copy_from_user(&start, argp, sizeof(start)))
784433fc58eSAsias He 			return -EFAULT;
785433fc58eSAsias He 		if (start)
786433fc58eSAsias He 			return vhost_vsock_start(vsock);
787433fc58eSAsias He 		else
788433fc58eSAsias He 			return vhost_vsock_stop(vsock);
789433fc58eSAsias He 	case VHOST_GET_FEATURES:
790433fc58eSAsias He 		features = VHOST_VSOCK_FEATURES;
791433fc58eSAsias He 		if (copy_to_user(argp, &features, sizeof(features)))
792433fc58eSAsias He 			return -EFAULT;
793433fc58eSAsias He 		return 0;
794433fc58eSAsias He 	case VHOST_SET_FEATURES:
795433fc58eSAsias He 		if (copy_from_user(&features, argp, sizeof(features)))
796433fc58eSAsias He 			return -EFAULT;
797433fc58eSAsias He 		return vhost_vsock_set_features(vsock, features);
798433fc58eSAsias He 	default:
799433fc58eSAsias He 		mutex_lock(&vsock->dev.mutex);
800433fc58eSAsias He 		r = vhost_dev_ioctl(&vsock->dev, ioctl, argp);
801433fc58eSAsias He 		if (r == -ENOIOCTLCMD)
802433fc58eSAsias He 			r = vhost_vring_ioctl(&vsock->dev, ioctl, argp);
803433fc58eSAsias He 		else
804433fc58eSAsias He 			vhost_vsock_flush(vsock);
805433fc58eSAsias He 		mutex_unlock(&vsock->dev.mutex);
806433fc58eSAsias He 		return r;
807433fc58eSAsias He 	}
808433fc58eSAsias He }
809433fc58eSAsias He 
810dc32bb67SSonny Rao #ifdef CONFIG_COMPAT
811dc32bb67SSonny Rao static long vhost_vsock_dev_compat_ioctl(struct file *f, unsigned int ioctl,
812dc32bb67SSonny Rao 					 unsigned long arg)
813dc32bb67SSonny Rao {
814dc32bb67SSonny Rao 	return vhost_vsock_dev_ioctl(f, ioctl, (unsigned long)compat_ptr(arg));
815dc32bb67SSonny Rao }
816dc32bb67SSonny Rao #endif
817dc32bb67SSonny Rao 
818433fc58eSAsias He static const struct file_operations vhost_vsock_fops = {
819433fc58eSAsias He 	.owner          = THIS_MODULE,
820433fc58eSAsias He 	.open           = vhost_vsock_dev_open,
821433fc58eSAsias He 	.release        = vhost_vsock_dev_release,
822433fc58eSAsias He 	.llseek		= noop_llseek,
823433fc58eSAsias He 	.unlocked_ioctl = vhost_vsock_dev_ioctl,
824dc32bb67SSonny Rao #ifdef CONFIG_COMPAT
825dc32bb67SSonny Rao 	.compat_ioctl   = vhost_vsock_dev_compat_ioctl,
826dc32bb67SSonny Rao #endif
827433fc58eSAsias He };
828433fc58eSAsias He 
829433fc58eSAsias He static struct miscdevice vhost_vsock_misc = {
830f4660cc9SStefan Hajnoczi 	.minor = VHOST_VSOCK_MINOR,
831433fc58eSAsias He 	.name = "vhost-vsock",
832433fc58eSAsias He 	.fops = &vhost_vsock_fops,
833433fc58eSAsias He };
834433fc58eSAsias He 
835433fc58eSAsias He static int __init vhost_vsock_init(void)
836433fc58eSAsias He {
837433fc58eSAsias He 	int ret;
838433fc58eSAsias He 
839433fc58eSAsias He 	ret = vsock_core_init(&vhost_transport.transport);
840433fc58eSAsias He 	if (ret < 0)
841433fc58eSAsias He 		return ret;
842433fc58eSAsias He 	return misc_register(&vhost_vsock_misc);
843433fc58eSAsias He };
844433fc58eSAsias He 
845433fc58eSAsias He static void __exit vhost_vsock_exit(void)
846433fc58eSAsias He {
847433fc58eSAsias He 	misc_deregister(&vhost_vsock_misc);
848433fc58eSAsias He 	vsock_core_exit();
849433fc58eSAsias He };
850433fc58eSAsias He 
851433fc58eSAsias He module_init(vhost_vsock_init);
852433fc58eSAsias He module_exit(vhost_vsock_exit);
853433fc58eSAsias He MODULE_LICENSE("GPL v2");
854433fc58eSAsias He MODULE_AUTHOR("Asias He");
855433fc58eSAsias He MODULE_DESCRIPTION("vhost transport for vsock ");
856f4660cc9SStefan Hajnoczi MODULE_ALIAS_MISCDEV(VHOST_VSOCK_MINOR);
857f4660cc9SStefan Hajnoczi MODULE_ALIAS("devname:vhost-vsock");
858