xref: /openbmc/linux/drivers/vhost/vsock.c (revision 8e6ed963)
17a338472SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2433fc58eSAsias He /*
3433fc58eSAsias He  * vhost transport for vsock
4433fc58eSAsias He  *
5433fc58eSAsias He  * Copyright (C) 2013-2015 Red Hat, Inc.
6433fc58eSAsias He  * Author: Asias He <asias@redhat.com>
7433fc58eSAsias He  *         Stefan Hajnoczi <stefanha@redhat.com>
8433fc58eSAsias He  */
9433fc58eSAsias He #include <linux/miscdevice.h>
10433fc58eSAsias He #include <linux/atomic.h>
11433fc58eSAsias He #include <linux/module.h>
12433fc58eSAsias He #include <linux/mutex.h>
13433fc58eSAsias He #include <linux/vmalloc.h>
14433fc58eSAsias He #include <net/sock.h>
15433fc58eSAsias He #include <linux/virtio_vsock.h>
16433fc58eSAsias He #include <linux/vhost.h>
17834e772cSStefan Hajnoczi #include <linux/hashtable.h>
18433fc58eSAsias He 
19433fc58eSAsias He #include <net/af_vsock.h>
20433fc58eSAsias He #include "vhost.h"
21433fc58eSAsias He 
22433fc58eSAsias He #define VHOST_VSOCK_DEFAULT_HOST_CID	2
23e82b9b07SJason Wang /* Max number of bytes transferred before requeueing the job.
24e82b9b07SJason Wang  * Using this limit prevents one virtqueue from starving others. */
25e82b9b07SJason Wang #define VHOST_VSOCK_WEIGHT 0x80000
26e82b9b07SJason Wang /* Max number of packets transferred before requeueing the job.
27e82b9b07SJason Wang  * Using this limit prevents one virtqueue from starving others with
28e82b9b07SJason Wang  * small pkts.
29e82b9b07SJason Wang  */
30e82b9b07SJason Wang #define VHOST_VSOCK_PKT_WEIGHT 256
31433fc58eSAsias He 
32433fc58eSAsias He enum {
33e13a6915SStefano Garzarella 	VHOST_VSOCK_FEATURES = VHOST_FEATURES |
34ced7b713SArseny Krasnov 			       (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
35ced7b713SArseny Krasnov 			       (1ULL << VIRTIO_VSOCK_F_SEQPACKET)
36e13a6915SStefano Garzarella };
37e13a6915SStefano Garzarella 
38e13a6915SStefano Garzarella enum {
39e13a6915SStefano Garzarella 	VHOST_VSOCK_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)
40433fc58eSAsias He };
41433fc58eSAsias He 
42433fc58eSAsias He /* Used to track all the vhost_vsock instances on the system. */
436db3d8dcSStefan Hajnoczi static DEFINE_MUTEX(vhost_vsock_mutex);
44834e772cSStefan Hajnoczi static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8);
45433fc58eSAsias He 
46433fc58eSAsias He struct vhost_vsock {
47433fc58eSAsias He 	struct vhost_dev dev;
48433fc58eSAsias He 	struct vhost_virtqueue vqs[2];
49433fc58eSAsias He 
506db3d8dcSStefan Hajnoczi 	/* Link to global vhost_vsock_hash, writes use vhost_vsock_mutex */
51834e772cSStefan Hajnoczi 	struct hlist_node hash;
52433fc58eSAsias He 
53433fc58eSAsias He 	struct vhost_work send_pkt_work;
54433fc58eSAsias He 	spinlock_t send_pkt_list_lock;
55433fc58eSAsias He 	struct list_head send_pkt_list;	/* host->guest pending packets */
56433fc58eSAsias He 
57433fc58eSAsias He 	atomic_t queued_replies;
58433fc58eSAsias He 
59433fc58eSAsias He 	u32 guest_cid;
60ced7b713SArseny Krasnov 	bool seqpacket_allow;
61433fc58eSAsias He };
62433fc58eSAsias He 
63433fc58eSAsias He static u32 vhost_transport_get_local_cid(void)
64433fc58eSAsias He {
65433fc58eSAsias He 	return VHOST_VSOCK_DEFAULT_HOST_CID;
66433fc58eSAsias He }
67433fc58eSAsias He 
686db3d8dcSStefan Hajnoczi /* Callers that dereference the return value must hold vhost_vsock_mutex or the
69834e772cSStefan Hajnoczi  * RCU read lock.
70834e772cSStefan Hajnoczi  */
71834e772cSStefan Hajnoczi static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
72433fc58eSAsias He {
73433fc58eSAsias He 	struct vhost_vsock *vsock;
74433fc58eSAsias He 
75834e772cSStefan Hajnoczi 	hash_for_each_possible_rcu(vhost_vsock_hash, vsock, hash, guest_cid) {
76433fc58eSAsias He 		u32 other_cid = vsock->guest_cid;
77433fc58eSAsias He 
78433fc58eSAsias He 		/* Skip instances that have no CID yet */
79433fc58eSAsias He 		if (other_cid == 0)
80433fc58eSAsias He 			continue;
81433fc58eSAsias He 
82ff3c1b1aSVaibhav Murkute 		if (other_cid == guest_cid)
83433fc58eSAsias He 			return vsock;
84ff3c1b1aSVaibhav Murkute 
85433fc58eSAsias He 	}
86433fc58eSAsias He 
87433fc58eSAsias He 	return NULL;
88433fc58eSAsias He }
89433fc58eSAsias He 
90433fc58eSAsias He static void
91433fc58eSAsias He vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
92433fc58eSAsias He 			    struct vhost_virtqueue *vq)
93433fc58eSAsias He {
94433fc58eSAsias He 	struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
95e79b431fSJason Wang 	int pkts = 0, total_len = 0;
96433fc58eSAsias He 	bool added = false;
97433fc58eSAsias He 	bool restart_tx = false;
98433fc58eSAsias He 
99433fc58eSAsias He 	mutex_lock(&vq->mutex);
100433fc58eSAsias He 
101247643f8SEugenio Pérez 	if (!vhost_vq_get_backend(vq))
102433fc58eSAsias He 		goto out;
103433fc58eSAsias He 
104e13a6915SStefano Garzarella 	if (!vq_meta_prefetch(vq))
105e13a6915SStefano Garzarella 		goto out;
106e13a6915SStefano Garzarella 
107433fc58eSAsias He 	/* Avoid further vmexits, we're already processing the virtqueue */
108433fc58eSAsias He 	vhost_disable_notify(&vsock->dev, vq);
109433fc58eSAsias He 
110e79b431fSJason Wang 	do {
111433fc58eSAsias He 		struct virtio_vsock_pkt *pkt;
112433fc58eSAsias He 		struct iov_iter iov_iter;
113433fc58eSAsias He 		unsigned out, in;
114433fc58eSAsias He 		size_t nbytes;
1156dbd3e66SStefano Garzarella 		size_t iov_len, payload_len;
116433fc58eSAsias He 		int head;
1171af7e555SArseny Krasnov 		u32 flags_to_restore = 0;
118433fc58eSAsias He 
119433fc58eSAsias He 		spin_lock_bh(&vsock->send_pkt_list_lock);
120433fc58eSAsias He 		if (list_empty(&vsock->send_pkt_list)) {
121433fc58eSAsias He 			spin_unlock_bh(&vsock->send_pkt_list_lock);
122433fc58eSAsias He 			vhost_enable_notify(&vsock->dev, vq);
123433fc58eSAsias He 			break;
124433fc58eSAsias He 		}
125433fc58eSAsias He 
126433fc58eSAsias He 		pkt = list_first_entry(&vsock->send_pkt_list,
127433fc58eSAsias He 				       struct virtio_vsock_pkt, list);
128433fc58eSAsias He 		list_del_init(&pkt->list);
129433fc58eSAsias He 		spin_unlock_bh(&vsock->send_pkt_list_lock);
130433fc58eSAsias He 
131433fc58eSAsias He 		head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
132433fc58eSAsias He 					 &out, &in, NULL, NULL);
133433fc58eSAsias He 		if (head < 0) {
134433fc58eSAsias He 			spin_lock_bh(&vsock->send_pkt_list_lock);
135433fc58eSAsias He 			list_add(&pkt->list, &vsock->send_pkt_list);
136433fc58eSAsias He 			spin_unlock_bh(&vsock->send_pkt_list_lock);
137433fc58eSAsias He 			break;
138433fc58eSAsias He 		}
139433fc58eSAsias He 
140433fc58eSAsias He 		if (head == vq->num) {
141433fc58eSAsias He 			spin_lock_bh(&vsock->send_pkt_list_lock);
142433fc58eSAsias He 			list_add(&pkt->list, &vsock->send_pkt_list);
143433fc58eSAsias He 			spin_unlock_bh(&vsock->send_pkt_list_lock);
144433fc58eSAsias He 
145433fc58eSAsias He 			/* We cannot finish yet if more buffers snuck in while
146433fc58eSAsias He 			 * re-enabling notify.
147433fc58eSAsias He 			 */
148433fc58eSAsias He 			if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
149433fc58eSAsias He 				vhost_disable_notify(&vsock->dev, vq);
150433fc58eSAsias He 				continue;
151433fc58eSAsias He 			}
152433fc58eSAsias He 			break;
153433fc58eSAsias He 		}
154433fc58eSAsias He 
155433fc58eSAsias He 		if (out) {
156433fc58eSAsias He 			virtio_transport_free_pkt(pkt);
157433fc58eSAsias He 			vq_err(vq, "Expected 0 output buffers, got %u\n", out);
158433fc58eSAsias He 			break;
159433fc58eSAsias He 		}
160433fc58eSAsias He 
1616dbd3e66SStefano Garzarella 		iov_len = iov_length(&vq->iov[out], in);
1626dbd3e66SStefano Garzarella 		if (iov_len < sizeof(pkt->hdr)) {
1636dbd3e66SStefano Garzarella 			virtio_transport_free_pkt(pkt);
1646dbd3e66SStefano Garzarella 			vq_err(vq, "Buffer len [%zu] too small\n", iov_len);
1656dbd3e66SStefano Garzarella 			break;
1666dbd3e66SStefano Garzarella 		}
1676dbd3e66SStefano Garzarella 
1686dbd3e66SStefano Garzarella 		iov_iter_init(&iov_iter, READ, &vq->iov[out], in, iov_len);
1696dbd3e66SStefano Garzarella 		payload_len = pkt->len - pkt->off;
1706dbd3e66SStefano Garzarella 
1716dbd3e66SStefano Garzarella 		/* If the packet is greater than the space available in the
1726dbd3e66SStefano Garzarella 		 * buffer, we split it using multiple buffers.
1736dbd3e66SStefano Garzarella 		 */
174ced7b713SArseny Krasnov 		if (payload_len > iov_len - sizeof(pkt->hdr)) {
1756dbd3e66SStefano Garzarella 			payload_len = iov_len - sizeof(pkt->hdr);
1766dbd3e66SStefano Garzarella 
177ced7b713SArseny Krasnov 			/* As we are copying pieces of large packet's buffer to
178ced7b713SArseny Krasnov 			 * small rx buffers, headers of packets in rx queue are
179ced7b713SArseny Krasnov 			 * created dynamically and are initialized with header
180ced7b713SArseny Krasnov 			 * of current packet(except length). But in case of
1819af8f106SArseny Krasnov 			 * SOCK_SEQPACKET, we also must clear message delimeter
1821af7e555SArseny Krasnov 			 * bit (VIRTIO_VSOCK_SEQ_EOM) and MSG_EOR bit
1831af7e555SArseny Krasnov 			 * (VIRTIO_VSOCK_SEQ_EOR) if set. Otherwise,
1841af7e555SArseny Krasnov 			 * there will be sequence of packets with these
1851af7e555SArseny Krasnov 			 * bits set. After initialized header will be copied to
1861af7e555SArseny Krasnov 			 * rx buffer, these required bits will be restored.
187ced7b713SArseny Krasnov 			 */
1889af8f106SArseny Krasnov 			if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM) {
1899af8f106SArseny Krasnov 				pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
1901af7e555SArseny Krasnov 				flags_to_restore |= VIRTIO_VSOCK_SEQ_EOM;
1911af7e555SArseny Krasnov 
1921af7e555SArseny Krasnov 				if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) {
1931af7e555SArseny Krasnov 					pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
1941af7e555SArseny Krasnov 					flags_to_restore |= VIRTIO_VSOCK_SEQ_EOR;
1951af7e555SArseny Krasnov 				}
196ced7b713SArseny Krasnov 			}
197ced7b713SArseny Krasnov 		}
198ced7b713SArseny Krasnov 
1996dbd3e66SStefano Garzarella 		/* Set the correct length in the header */
2006dbd3e66SStefano Garzarella 		pkt->hdr.len = cpu_to_le32(payload_len);
201433fc58eSAsias He 
202433fc58eSAsias He 		nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
203433fc58eSAsias He 		if (nbytes != sizeof(pkt->hdr)) {
204433fc58eSAsias He 			virtio_transport_free_pkt(pkt);
205433fc58eSAsias He 			vq_err(vq, "Faulted on copying pkt hdr\n");
206433fc58eSAsias He 			break;
207433fc58eSAsias He 		}
208433fc58eSAsias He 
2096dbd3e66SStefano Garzarella 		nbytes = copy_to_iter(pkt->buf + pkt->off, payload_len,
2106dbd3e66SStefano Garzarella 				      &iov_iter);
2116dbd3e66SStefano Garzarella 		if (nbytes != payload_len) {
212433fc58eSAsias He 			virtio_transport_free_pkt(pkt);
213433fc58eSAsias He 			vq_err(vq, "Faulted on copying pkt buf\n");
214433fc58eSAsias He 			break;
215433fc58eSAsias He 		}
216433fc58eSAsias He 
217107bc076SStefano Garzarella 		/* Deliver to monitoring devices all packets that we
218107bc076SStefano Garzarella 		 * will transmit.
21982dfb540SGerard Garcia 		 */
22082dfb540SGerard Garcia 		virtio_transport_deliver_tap_pkt(pkt);
22182dfb540SGerard Garcia 
222107bc076SStefano Garzarella 		vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len);
223107bc076SStefano Garzarella 		added = true;
224107bc076SStefano Garzarella 
2256dbd3e66SStefano Garzarella 		pkt->off += payload_len;
2266dbd3e66SStefano Garzarella 		total_len += payload_len;
2276dbd3e66SStefano Garzarella 
2286dbd3e66SStefano Garzarella 		/* If we didn't send all the payload we can requeue the packet
2296dbd3e66SStefano Garzarella 		 * to send it with the next available buffer.
2306dbd3e66SStefano Garzarella 		 */
2316dbd3e66SStefano Garzarella 		if (pkt->off < pkt->len) {
2321af7e555SArseny Krasnov 			pkt->hdr.flags |= cpu_to_le32(flags_to_restore);
233ced7b713SArseny Krasnov 
234a78d1639SStefano Garzarella 			/* We are queueing the same virtio_vsock_pkt to handle
235a78d1639SStefano Garzarella 			 * the remaining bytes, and we want to deliver it
236a78d1639SStefano Garzarella 			 * to monitoring devices in the next iteration.
237a78d1639SStefano Garzarella 			 */
238a78d1639SStefano Garzarella 			pkt->tap_delivered = false;
239a78d1639SStefano Garzarella 
2406dbd3e66SStefano Garzarella 			spin_lock_bh(&vsock->send_pkt_list_lock);
2416dbd3e66SStefano Garzarella 			list_add(&pkt->list, &vsock->send_pkt_list);
2426dbd3e66SStefano Garzarella 			spin_unlock_bh(&vsock->send_pkt_list_lock);
2436dbd3e66SStefano Garzarella 		} else {
2446dbd3e66SStefano Garzarella 			if (pkt->reply) {
2456dbd3e66SStefano Garzarella 				int val;
2466dbd3e66SStefano Garzarella 
2476dbd3e66SStefano Garzarella 				val = atomic_dec_return(&vsock->queued_replies);
2486dbd3e66SStefano Garzarella 
2496dbd3e66SStefano Garzarella 				/* Do we have resources to resume tx
2506dbd3e66SStefano Garzarella 				 * processing?
2516dbd3e66SStefano Garzarella 				 */
2526dbd3e66SStefano Garzarella 				if (val + 1 == tx_vq->num)
2536dbd3e66SStefano Garzarella 					restart_tx = true;
2546dbd3e66SStefano Garzarella 			}
2556dbd3e66SStefano Garzarella 
256433fc58eSAsias He 			virtio_transport_free_pkt(pkt);
2576dbd3e66SStefano Garzarella 		}
258e79b431fSJason Wang 	} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
259433fc58eSAsias He 	if (added)
260433fc58eSAsias He 		vhost_signal(&vsock->dev, vq);
261433fc58eSAsias He 
262433fc58eSAsias He out:
263433fc58eSAsias He 	mutex_unlock(&vq->mutex);
264433fc58eSAsias He 
265433fc58eSAsias He 	if (restart_tx)
266433fc58eSAsias He 		vhost_poll_queue(&tx_vq->poll);
267433fc58eSAsias He }
268433fc58eSAsias He 
269433fc58eSAsias He static void vhost_transport_send_pkt_work(struct vhost_work *work)
270433fc58eSAsias He {
271433fc58eSAsias He 	struct vhost_virtqueue *vq;
272433fc58eSAsias He 	struct vhost_vsock *vsock;
273433fc58eSAsias He 
274433fc58eSAsias He 	vsock = container_of(work, struct vhost_vsock, send_pkt_work);
275433fc58eSAsias He 	vq = &vsock->vqs[VSOCK_VQ_RX];
276433fc58eSAsias He 
277433fc58eSAsias He 	vhost_transport_do_send_pkt(vsock, vq);
278433fc58eSAsias He }
279433fc58eSAsias He 
280433fc58eSAsias He static int
281433fc58eSAsias He vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
282433fc58eSAsias He {
283433fc58eSAsias He 	struct vhost_vsock *vsock;
284433fc58eSAsias He 	int len = pkt->len;
285433fc58eSAsias He 
286834e772cSStefan Hajnoczi 	rcu_read_lock();
287834e772cSStefan Hajnoczi 
288433fc58eSAsias He 	/* Find the vhost_vsock according to guest context id  */
289433fc58eSAsias He 	vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid));
290433fc58eSAsias He 	if (!vsock) {
291834e772cSStefan Hajnoczi 		rcu_read_unlock();
292433fc58eSAsias He 		virtio_transport_free_pkt(pkt);
293433fc58eSAsias He 		return -ENODEV;
294433fc58eSAsias He 	}
295433fc58eSAsias He 
296433fc58eSAsias He 	if (pkt->reply)
297433fc58eSAsias He 		atomic_inc(&vsock->queued_replies);
298433fc58eSAsias He 
299433fc58eSAsias He 	spin_lock_bh(&vsock->send_pkt_list_lock);
300433fc58eSAsias He 	list_add_tail(&pkt->list, &vsock->send_pkt_list);
301433fc58eSAsias He 	spin_unlock_bh(&vsock->send_pkt_list_lock);
302433fc58eSAsias He 
303433fc58eSAsias He 	vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
304834e772cSStefan Hajnoczi 
305834e772cSStefan Hajnoczi 	rcu_read_unlock();
306433fc58eSAsias He 	return len;
307433fc58eSAsias He }
308433fc58eSAsias He 
30916320f36SPeng Tao static int
31016320f36SPeng Tao vhost_transport_cancel_pkt(struct vsock_sock *vsk)
31116320f36SPeng Tao {
31216320f36SPeng Tao 	struct vhost_vsock *vsock;
31316320f36SPeng Tao 	struct virtio_vsock_pkt *pkt, *n;
31416320f36SPeng Tao 	int cnt = 0;
315834e772cSStefan Hajnoczi 	int ret = -ENODEV;
31616320f36SPeng Tao 	LIST_HEAD(freeme);
31716320f36SPeng Tao 
318834e772cSStefan Hajnoczi 	rcu_read_lock();
319834e772cSStefan Hajnoczi 
32016320f36SPeng Tao 	/* Find the vhost_vsock according to guest context id  */
32116320f36SPeng Tao 	vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
32216320f36SPeng Tao 	if (!vsock)
323834e772cSStefan Hajnoczi 		goto out;
32416320f36SPeng Tao 
32516320f36SPeng Tao 	spin_lock_bh(&vsock->send_pkt_list_lock);
32616320f36SPeng Tao 	list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
32716320f36SPeng Tao 		if (pkt->vsk != vsk)
32816320f36SPeng Tao 			continue;
32916320f36SPeng Tao 		list_move(&pkt->list, &freeme);
33016320f36SPeng Tao 	}
33116320f36SPeng Tao 	spin_unlock_bh(&vsock->send_pkt_list_lock);
33216320f36SPeng Tao 
33316320f36SPeng Tao 	list_for_each_entry_safe(pkt, n, &freeme, list) {
33416320f36SPeng Tao 		if (pkt->reply)
33516320f36SPeng Tao 			cnt++;
33616320f36SPeng Tao 		list_del(&pkt->list);
33716320f36SPeng Tao 		virtio_transport_free_pkt(pkt);
33816320f36SPeng Tao 	}
33916320f36SPeng Tao 
34016320f36SPeng Tao 	if (cnt) {
34116320f36SPeng Tao 		struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
34216320f36SPeng Tao 		int new_cnt;
34316320f36SPeng Tao 
34416320f36SPeng Tao 		new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
34516320f36SPeng Tao 		if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
34616320f36SPeng Tao 			vhost_poll_queue(&tx_vq->poll);
34716320f36SPeng Tao 	}
34816320f36SPeng Tao 
349834e772cSStefan Hajnoczi 	ret = 0;
350834e772cSStefan Hajnoczi out:
351834e772cSStefan Hajnoczi 	rcu_read_unlock();
352834e772cSStefan Hajnoczi 	return ret;
35316320f36SPeng Tao }
35416320f36SPeng Tao 
355433fc58eSAsias He static struct virtio_vsock_pkt *
356433fc58eSAsias He vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
357433fc58eSAsias He 		      unsigned int out, unsigned int in)
358433fc58eSAsias He {
359433fc58eSAsias He 	struct virtio_vsock_pkt *pkt;
360433fc58eSAsias He 	struct iov_iter iov_iter;
361433fc58eSAsias He 	size_t nbytes;
362433fc58eSAsias He 	size_t len;
363433fc58eSAsias He 
364433fc58eSAsias He 	if (in != 0) {
365433fc58eSAsias He 		vq_err(vq, "Expected 0 input buffers, got %u\n", in);
366433fc58eSAsias He 		return NULL;
367433fc58eSAsias He 	}
368433fc58eSAsias He 
369433fc58eSAsias He 	pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
370433fc58eSAsias He 	if (!pkt)
371433fc58eSAsias He 		return NULL;
372433fc58eSAsias He 
373433fc58eSAsias He 	len = iov_length(vq->iov, out);
374433fc58eSAsias He 	iov_iter_init(&iov_iter, WRITE, vq->iov, out, len);
375433fc58eSAsias He 
376433fc58eSAsias He 	nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
377433fc58eSAsias He 	if (nbytes != sizeof(pkt->hdr)) {
378433fc58eSAsias He 		vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n",
379433fc58eSAsias He 		       sizeof(pkt->hdr), nbytes);
380433fc58eSAsias He 		kfree(pkt);
381433fc58eSAsias He 		return NULL;
382433fc58eSAsias He 	}
383433fc58eSAsias He 
384433fc58eSAsias He 	pkt->len = le32_to_cpu(pkt->hdr.len);
385433fc58eSAsias He 
386433fc58eSAsias He 	/* No payload */
387433fc58eSAsias He 	if (!pkt->len)
388433fc58eSAsias He 		return pkt;
389433fc58eSAsias He 
390433fc58eSAsias He 	/* The pkt is too big */
391433fc58eSAsias He 	if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
392433fc58eSAsias He 		kfree(pkt);
393433fc58eSAsias He 		return NULL;
394433fc58eSAsias He 	}
395433fc58eSAsias He 
396433fc58eSAsias He 	pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
397433fc58eSAsias He 	if (!pkt->buf) {
398433fc58eSAsias He 		kfree(pkt);
399433fc58eSAsias He 		return NULL;
400433fc58eSAsias He 	}
401433fc58eSAsias He 
402473c7391SStefano Garzarella 	pkt->buf_len = pkt->len;
403473c7391SStefano Garzarella 
404433fc58eSAsias He 	nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
405433fc58eSAsias He 	if (nbytes != pkt->len) {
406433fc58eSAsias He 		vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
407433fc58eSAsias He 		       pkt->len, nbytes);
408433fc58eSAsias He 		virtio_transport_free_pkt(pkt);
409433fc58eSAsias He 		return NULL;
410433fc58eSAsias He 	}
411433fc58eSAsias He 
412433fc58eSAsias He 	return pkt;
413433fc58eSAsias He }
414433fc58eSAsias He 
415433fc58eSAsias He /* Is there space left for replies to rx packets? */
416433fc58eSAsias He static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
417433fc58eSAsias He {
418433fc58eSAsias He 	struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX];
419433fc58eSAsias He 	int val;
420433fc58eSAsias He 
421433fc58eSAsias He 	smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */
422433fc58eSAsias He 	val = atomic_read(&vsock->queued_replies);
423433fc58eSAsias He 
424433fc58eSAsias He 	return val < vq->num;
425433fc58eSAsias He }
426433fc58eSAsias He 
427ced7b713SArseny Krasnov static bool vhost_transport_seqpacket_allow(u32 remote_cid);
428ced7b713SArseny Krasnov 
4294c7246dcSStefano Garzarella static struct virtio_transport vhost_transport = {
4304c7246dcSStefano Garzarella 	.transport = {
4316a2c0962SStefano Garzarella 		.module                   = THIS_MODULE,
4326a2c0962SStefano Garzarella 
4334c7246dcSStefano Garzarella 		.get_local_cid            = vhost_transport_get_local_cid,
4344c7246dcSStefano Garzarella 
4354c7246dcSStefano Garzarella 		.init                     = virtio_transport_do_socket_init,
4364c7246dcSStefano Garzarella 		.destruct                 = virtio_transport_destruct,
4374c7246dcSStefano Garzarella 		.release                  = virtio_transport_release,
4384c7246dcSStefano Garzarella 		.connect                  = virtio_transport_connect,
4394c7246dcSStefano Garzarella 		.shutdown                 = virtio_transport_shutdown,
4404c7246dcSStefano Garzarella 		.cancel_pkt               = vhost_transport_cancel_pkt,
4414c7246dcSStefano Garzarella 
4424c7246dcSStefano Garzarella 		.dgram_enqueue            = virtio_transport_dgram_enqueue,
4434c7246dcSStefano Garzarella 		.dgram_dequeue            = virtio_transport_dgram_dequeue,
4444c7246dcSStefano Garzarella 		.dgram_bind               = virtio_transport_dgram_bind,
4454c7246dcSStefano Garzarella 		.dgram_allow              = virtio_transport_dgram_allow,
4464c7246dcSStefano Garzarella 
4474c7246dcSStefano Garzarella 		.stream_enqueue           = virtio_transport_stream_enqueue,
4484c7246dcSStefano Garzarella 		.stream_dequeue           = virtio_transport_stream_dequeue,
4494c7246dcSStefano Garzarella 		.stream_has_data          = virtio_transport_stream_has_data,
4504c7246dcSStefano Garzarella 		.stream_has_space         = virtio_transport_stream_has_space,
4514c7246dcSStefano Garzarella 		.stream_rcvhiwat          = virtio_transport_stream_rcvhiwat,
4524c7246dcSStefano Garzarella 		.stream_is_active         = virtio_transport_stream_is_active,
4534c7246dcSStefano Garzarella 		.stream_allow             = virtio_transport_stream_allow,
4544c7246dcSStefano Garzarella 
455ced7b713SArseny Krasnov 		.seqpacket_dequeue        = virtio_transport_seqpacket_dequeue,
456ced7b713SArseny Krasnov 		.seqpacket_enqueue        = virtio_transport_seqpacket_enqueue,
457ced7b713SArseny Krasnov 		.seqpacket_allow          = vhost_transport_seqpacket_allow,
458ced7b713SArseny Krasnov 		.seqpacket_has_data       = virtio_transport_seqpacket_has_data,
459ced7b713SArseny Krasnov 
4604c7246dcSStefano Garzarella 		.notify_poll_in           = virtio_transport_notify_poll_in,
4614c7246dcSStefano Garzarella 		.notify_poll_out          = virtio_transport_notify_poll_out,
4624c7246dcSStefano Garzarella 		.notify_recv_init         = virtio_transport_notify_recv_init,
4634c7246dcSStefano Garzarella 		.notify_recv_pre_block    = virtio_transport_notify_recv_pre_block,
4644c7246dcSStefano Garzarella 		.notify_recv_pre_dequeue  = virtio_transport_notify_recv_pre_dequeue,
4654c7246dcSStefano Garzarella 		.notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
4664c7246dcSStefano Garzarella 		.notify_send_init         = virtio_transport_notify_send_init,
4674c7246dcSStefano Garzarella 		.notify_send_pre_block    = virtio_transport_notify_send_pre_block,
4684c7246dcSStefano Garzarella 		.notify_send_pre_enqueue  = virtio_transport_notify_send_pre_enqueue,
4694c7246dcSStefano Garzarella 		.notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
470b9f2b0ffSStefano Garzarella 		.notify_buffer_size       = virtio_transport_notify_buffer_size,
4714c7246dcSStefano Garzarella 
4724c7246dcSStefano Garzarella 	},
4734c7246dcSStefano Garzarella 
4744c7246dcSStefano Garzarella 	.send_pkt = vhost_transport_send_pkt,
4754c7246dcSStefano Garzarella };
4764c7246dcSStefano Garzarella 
477ced7b713SArseny Krasnov static bool vhost_transport_seqpacket_allow(u32 remote_cid)
478ced7b713SArseny Krasnov {
479ced7b713SArseny Krasnov 	struct vhost_vsock *vsock;
480ced7b713SArseny Krasnov 	bool seqpacket_allow = false;
481ced7b713SArseny Krasnov 
482ced7b713SArseny Krasnov 	rcu_read_lock();
483ced7b713SArseny Krasnov 	vsock = vhost_vsock_get(remote_cid);
484ced7b713SArseny Krasnov 
485ced7b713SArseny Krasnov 	if (vsock)
486ced7b713SArseny Krasnov 		seqpacket_allow = vsock->seqpacket_allow;
487ced7b713SArseny Krasnov 
488ced7b713SArseny Krasnov 	rcu_read_unlock();
489ced7b713SArseny Krasnov 
490ced7b713SArseny Krasnov 	return seqpacket_allow;
491ced7b713SArseny Krasnov }
492ced7b713SArseny Krasnov 
493433fc58eSAsias He static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
494433fc58eSAsias He {
495433fc58eSAsias He 	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
496433fc58eSAsias He 						  poll.work);
497433fc58eSAsias He 	struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
498433fc58eSAsias He 						 dev);
499433fc58eSAsias He 	struct virtio_vsock_pkt *pkt;
500e79b431fSJason Wang 	int head, pkts = 0, total_len = 0;
501433fc58eSAsias He 	unsigned int out, in;
502433fc58eSAsias He 	bool added = false;
503433fc58eSAsias He 
504433fc58eSAsias He 	mutex_lock(&vq->mutex);
505433fc58eSAsias He 
506247643f8SEugenio Pérez 	if (!vhost_vq_get_backend(vq))
507433fc58eSAsias He 		goto out;
508433fc58eSAsias He 
509e13a6915SStefano Garzarella 	if (!vq_meta_prefetch(vq))
510e13a6915SStefano Garzarella 		goto out;
511e13a6915SStefano Garzarella 
512433fc58eSAsias He 	vhost_disable_notify(&vsock->dev, vq);
513e79b431fSJason Wang 	do {
514433fc58eSAsias He 		if (!vhost_vsock_more_replies(vsock)) {
515433fc58eSAsias He 			/* Stop tx until the device processes already
516433fc58eSAsias He 			 * pending replies.  Leave tx virtqueue
517433fc58eSAsias He 			 * callbacks disabled.
518433fc58eSAsias He 			 */
519433fc58eSAsias He 			goto no_more_replies;
520433fc58eSAsias He 		}
521433fc58eSAsias He 
522433fc58eSAsias He 		head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
523433fc58eSAsias He 					 &out, &in, NULL, NULL);
524433fc58eSAsias He 		if (head < 0)
525433fc58eSAsias He 			break;
526433fc58eSAsias He 
527433fc58eSAsias He 		if (head == vq->num) {
528433fc58eSAsias He 			if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
529433fc58eSAsias He 				vhost_disable_notify(&vsock->dev, vq);
530433fc58eSAsias He 				continue;
531433fc58eSAsias He 			}
532433fc58eSAsias He 			break;
533433fc58eSAsias He 		}
534433fc58eSAsias He 
535433fc58eSAsias He 		pkt = vhost_vsock_alloc_pkt(vq, out, in);
536433fc58eSAsias He 		if (!pkt) {
537433fc58eSAsias He 			vq_err(vq, "Faulted on pkt\n");
538433fc58eSAsias He 			continue;
539433fc58eSAsias He 		}
540433fc58eSAsias He 
54111708ff9SStefano Garzarella 		total_len += sizeof(pkt->hdr) + pkt->len;
5423fda5d6eSStefan Hajnoczi 
54382dfb540SGerard Garcia 		/* Deliver to monitoring devices all received packets */
54482dfb540SGerard Garcia 		virtio_transport_deliver_tap_pkt(pkt);
54582dfb540SGerard Garcia 
546433fc58eSAsias He 		/* Only accept correctly addressed packets */
5478a3cc29cSStefano Garzarella 		if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid &&
5488a3cc29cSStefano Garzarella 		    le64_to_cpu(pkt->hdr.dst_cid) ==
5498a3cc29cSStefano Garzarella 		    vhost_transport_get_local_cid())
5504c7246dcSStefano Garzarella 			virtio_transport_recv_pkt(&vhost_transport, pkt);
551433fc58eSAsias He 		else
552433fc58eSAsias He 			virtio_transport_free_pkt(pkt);
553433fc58eSAsias He 
55449d8c5ffSStefano Garzarella 		vhost_add_used(vq, head, 0);
555433fc58eSAsias He 		added = true;
556e79b431fSJason Wang 	} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
557433fc58eSAsias He 
558433fc58eSAsias He no_more_replies:
559433fc58eSAsias He 	if (added)
560433fc58eSAsias He 		vhost_signal(&vsock->dev, vq);
561433fc58eSAsias He 
562433fc58eSAsias He out:
563433fc58eSAsias He 	mutex_unlock(&vq->mutex);
564433fc58eSAsias He }
565433fc58eSAsias He 
566433fc58eSAsias He static void vhost_vsock_handle_rx_kick(struct vhost_work *work)
567433fc58eSAsias He {
568433fc58eSAsias He 	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
569433fc58eSAsias He 						poll.work);
570433fc58eSAsias He 	struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
571433fc58eSAsias He 						 dev);
572433fc58eSAsias He 
573433fc58eSAsias He 	vhost_transport_do_send_pkt(vsock, vq);
574433fc58eSAsias He }
575433fc58eSAsias He 
576433fc58eSAsias He static int vhost_vsock_start(struct vhost_vsock *vsock)
577433fc58eSAsias He {
5780516ffd8SStefan Hajnoczi 	struct vhost_virtqueue *vq;
579433fc58eSAsias He 	size_t i;
580433fc58eSAsias He 	int ret;
581433fc58eSAsias He 
582433fc58eSAsias He 	mutex_lock(&vsock->dev.mutex);
583433fc58eSAsias He 
584433fc58eSAsias He 	ret = vhost_dev_check_owner(&vsock->dev);
585433fc58eSAsias He 	if (ret)
586433fc58eSAsias He 		goto err;
587433fc58eSAsias He 
588433fc58eSAsias He 	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
5890516ffd8SStefan Hajnoczi 		vq = &vsock->vqs[i];
590433fc58eSAsias He 
591433fc58eSAsias He 		mutex_lock(&vq->mutex);
592433fc58eSAsias He 
593433fc58eSAsias He 		if (!vhost_vq_access_ok(vq)) {
594433fc58eSAsias He 			ret = -EFAULT;
595433fc58eSAsias He 			goto err_vq;
596433fc58eSAsias He 		}
597433fc58eSAsias He 
598247643f8SEugenio Pérez 		if (!vhost_vq_get_backend(vq)) {
599247643f8SEugenio Pérez 			vhost_vq_set_backend(vq, vsock);
6000516ffd8SStefan Hajnoczi 			ret = vhost_vq_init_access(vq);
6010516ffd8SStefan Hajnoczi 			if (ret)
6020516ffd8SStefan Hajnoczi 				goto err_vq;
603433fc58eSAsias He 		}
604433fc58eSAsias He 
605433fc58eSAsias He 		mutex_unlock(&vq->mutex);
606433fc58eSAsias He 	}
607433fc58eSAsias He 
6080b841030SJia He 	/* Some packets may have been queued before the device was started,
6090b841030SJia He 	 * let's kick the send worker to send them.
6100b841030SJia He 	 */
6110b841030SJia He 	vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
6120b841030SJia He 
613433fc58eSAsias He 	mutex_unlock(&vsock->dev.mutex);
614433fc58eSAsias He 	return 0;
615433fc58eSAsias He 
616433fc58eSAsias He err_vq:
617247643f8SEugenio Pérez 	vhost_vq_set_backend(vq, NULL);
6180516ffd8SStefan Hajnoczi 	mutex_unlock(&vq->mutex);
6190516ffd8SStefan Hajnoczi 
620433fc58eSAsias He 	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
6210516ffd8SStefan Hajnoczi 		vq = &vsock->vqs[i];
622433fc58eSAsias He 
623433fc58eSAsias He 		mutex_lock(&vq->mutex);
624247643f8SEugenio Pérez 		vhost_vq_set_backend(vq, NULL);
625433fc58eSAsias He 		mutex_unlock(&vq->mutex);
626433fc58eSAsias He 	}
627433fc58eSAsias He err:
628433fc58eSAsias He 	mutex_unlock(&vsock->dev.mutex);
629433fc58eSAsias He 	return ret;
630433fc58eSAsias He }
631433fc58eSAsias He 
632a58da53fSStefano Garzarella static int vhost_vsock_stop(struct vhost_vsock *vsock, bool check_owner)
633433fc58eSAsias He {
634433fc58eSAsias He 	size_t i;
635a58da53fSStefano Garzarella 	int ret = 0;
636433fc58eSAsias He 
637433fc58eSAsias He 	mutex_lock(&vsock->dev.mutex);
638433fc58eSAsias He 
639a58da53fSStefano Garzarella 	if (check_owner) {
640433fc58eSAsias He 		ret = vhost_dev_check_owner(&vsock->dev);
641433fc58eSAsias He 		if (ret)
642433fc58eSAsias He 			goto err;
643a58da53fSStefano Garzarella 	}
644433fc58eSAsias He 
645433fc58eSAsias He 	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
646433fc58eSAsias He 		struct vhost_virtqueue *vq = &vsock->vqs[i];
647433fc58eSAsias He 
648433fc58eSAsias He 		mutex_lock(&vq->mutex);
649247643f8SEugenio Pérez 		vhost_vq_set_backend(vq, NULL);
650433fc58eSAsias He 		mutex_unlock(&vq->mutex);
651433fc58eSAsias He 	}
652433fc58eSAsias He 
653433fc58eSAsias He err:
654433fc58eSAsias He 	mutex_unlock(&vsock->dev.mutex);
655433fc58eSAsias He 	return ret;
656433fc58eSAsias He }
657433fc58eSAsias He 
658433fc58eSAsias He static void vhost_vsock_free(struct vhost_vsock *vsock)
659433fc58eSAsias He {
660b226acabSWei Yongjun 	kvfree(vsock);
661433fc58eSAsias He }
662433fc58eSAsias He 
663433fc58eSAsias He static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
664433fc58eSAsias He {
665433fc58eSAsias He 	struct vhost_virtqueue **vqs;
666433fc58eSAsias He 	struct vhost_vsock *vsock;
667433fc58eSAsias He 	int ret;
668433fc58eSAsias He 
669433fc58eSAsias He 	/* This struct is large and allocation could fail, fall back to vmalloc
670433fc58eSAsias He 	 * if there is no other way.
671433fc58eSAsias He 	 */
672dcda9b04SMichal Hocko 	vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
673433fc58eSAsias He 	if (!vsock)
674433fc58eSAsias He 		return -ENOMEM;
675433fc58eSAsias He 
676433fc58eSAsias He 	vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL);
677433fc58eSAsias He 	if (!vqs) {
678433fc58eSAsias He 		ret = -ENOMEM;
679433fc58eSAsias He 		goto out;
680433fc58eSAsias He 	}
681433fc58eSAsias He 
682a72b69dcSStefan Hajnoczi 	vsock->guest_cid = 0; /* no CID assigned yet */
683a72b69dcSStefan Hajnoczi 
684433fc58eSAsias He 	atomic_set(&vsock->queued_replies, 0);
685433fc58eSAsias He 
686433fc58eSAsias He 	vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX];
687433fc58eSAsias He 	vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX];
688433fc58eSAsias He 	vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
689433fc58eSAsias He 	vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
690433fc58eSAsias He 
691e82b9b07SJason Wang 	vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs),
692e82b9b07SJason Wang 		       UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT,
69301fcb1cbSJason Wang 		       VHOST_VSOCK_WEIGHT, true, NULL);
694433fc58eSAsias He 
695433fc58eSAsias He 	file->private_data = vsock;
696433fc58eSAsias He 	spin_lock_init(&vsock->send_pkt_list_lock);
697433fc58eSAsias He 	INIT_LIST_HEAD(&vsock->send_pkt_list);
698433fc58eSAsias He 	vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work);
699433fc58eSAsias He 	return 0;
700433fc58eSAsias He 
701433fc58eSAsias He out:
702433fc58eSAsias He 	vhost_vsock_free(vsock);
703433fc58eSAsias He 	return ret;
704433fc58eSAsias He }
705433fc58eSAsias He 
706433fc58eSAsias He static void vhost_vsock_flush(struct vhost_vsock *vsock)
707433fc58eSAsias He {
708433fc58eSAsias He 	int i;
709433fc58eSAsias He 
710433fc58eSAsias He 	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++)
711433fc58eSAsias He 		if (vsock->vqs[i].handle_kick)
712433fc58eSAsias He 			vhost_poll_flush(&vsock->vqs[i].poll);
7131465cb61SMike Christie 	vhost_work_dev_flush(&vsock->dev);
714433fc58eSAsias He }
715433fc58eSAsias He 
716433fc58eSAsias He static void vhost_vsock_reset_orphans(struct sock *sk)
717433fc58eSAsias He {
718433fc58eSAsias He 	struct vsock_sock *vsk = vsock_sk(sk);
719433fc58eSAsias He 
720433fc58eSAsias He 	/* vmci_transport.c doesn't take sk_lock here either.  At least we're
721433fc58eSAsias He 	 * under vsock_table_lock so the sock cannot disappear while we're
722433fc58eSAsias He 	 * executing.
723433fc58eSAsias He 	 */
724433fc58eSAsias He 
725c38f57daSStefan Hajnoczi 	/* If the peer is still valid, no need to reset connection */
726c38f57daSStefan Hajnoczi 	if (vhost_vsock_get(vsk->remote_addr.svm_cid))
727c38f57daSStefan Hajnoczi 		return;
728c38f57daSStefan Hajnoczi 
729c38f57daSStefan Hajnoczi 	/* If the close timeout is pending, let it expire.  This avoids races
730c38f57daSStefan Hajnoczi 	 * with the timeout callback.
731c38f57daSStefan Hajnoczi 	 */
732c38f57daSStefan Hajnoczi 	if (vsk->close_work_scheduled)
733c38f57daSStefan Hajnoczi 		return;
734c38f57daSStefan Hajnoczi 
735433fc58eSAsias He 	sock_set_flag(sk, SOCK_DONE);
736433fc58eSAsias He 	vsk->peer_shutdown = SHUTDOWN_MASK;
737433fc58eSAsias He 	sk->sk_state = SS_UNCONNECTED;
738433fc58eSAsias He 	sk->sk_err = ECONNRESET;
739e3ae2365SAlexander Aring 	sk_error_report(sk);
740433fc58eSAsias He }
741433fc58eSAsias He 
742433fc58eSAsias He static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
743433fc58eSAsias He {
744433fc58eSAsias He 	struct vhost_vsock *vsock = file->private_data;
745433fc58eSAsias He 
7466db3d8dcSStefan Hajnoczi 	mutex_lock(&vhost_vsock_mutex);
747834e772cSStefan Hajnoczi 	if (vsock->guest_cid)
748834e772cSStefan Hajnoczi 		hash_del_rcu(&vsock->hash);
7496db3d8dcSStefan Hajnoczi 	mutex_unlock(&vhost_vsock_mutex);
750433fc58eSAsias He 
751834e772cSStefan Hajnoczi 	/* Wait for other CPUs to finish using vsock */
752834e772cSStefan Hajnoczi 	synchronize_rcu();
753834e772cSStefan Hajnoczi 
754433fc58eSAsias He 	/* Iterating over all connections for all CIDs to find orphans is
755433fc58eSAsias He 	 * inefficient.  Room for improvement here. */
756*8e6ed963SJiyong Park 	vsock_for_each_connected_socket(&vhost_transport.transport,
757*8e6ed963SJiyong Park 					vhost_vsock_reset_orphans);
758433fc58eSAsias He 
759a58da53fSStefano Garzarella 	/* Don't check the owner, because we are in the release path, so we
760a58da53fSStefano Garzarella 	 * need to stop the vsock device in any case.
761a58da53fSStefano Garzarella 	 * vhost_vsock_stop() can not fail in this case, so we don't need to
762a58da53fSStefano Garzarella 	 * check the return code.
763a58da53fSStefano Garzarella 	 */
764a58da53fSStefano Garzarella 	vhost_vsock_stop(vsock, false);
765433fc58eSAsias He 	vhost_vsock_flush(vsock);
766433fc58eSAsias He 	vhost_dev_stop(&vsock->dev);
767433fc58eSAsias He 
768433fc58eSAsias He 	spin_lock_bh(&vsock->send_pkt_list_lock);
769433fc58eSAsias He 	while (!list_empty(&vsock->send_pkt_list)) {
770433fc58eSAsias He 		struct virtio_vsock_pkt *pkt;
771433fc58eSAsias He 
772433fc58eSAsias He 		pkt = list_first_entry(&vsock->send_pkt_list,
773433fc58eSAsias He 				struct virtio_vsock_pkt, list);
774433fc58eSAsias He 		list_del_init(&pkt->list);
775433fc58eSAsias He 		virtio_transport_free_pkt(pkt);
776433fc58eSAsias He 	}
777433fc58eSAsias He 	spin_unlock_bh(&vsock->send_pkt_list_lock);
778433fc58eSAsias He 
779f6f93f75S夷则(Caspar) 	vhost_dev_cleanup(&vsock->dev);
780433fc58eSAsias He 	kfree(vsock->dev.vqs);
781433fc58eSAsias He 	vhost_vsock_free(vsock);
782433fc58eSAsias He 	return 0;
783433fc58eSAsias He }
784433fc58eSAsias He 
785433fc58eSAsias He static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
786433fc58eSAsias He {
787433fc58eSAsias He 	struct vhost_vsock *other;
788433fc58eSAsias He 
789433fc58eSAsias He 	/* Refuse reserved CIDs */
790433fc58eSAsias He 	if (guest_cid <= VMADDR_CID_HOST ||
791433fc58eSAsias He 	    guest_cid == U32_MAX)
792433fc58eSAsias He 		return -EINVAL;
793433fc58eSAsias He 
794433fc58eSAsias He 	/* 64-bit CIDs are not yet supported */
795433fc58eSAsias He 	if (guest_cid > U32_MAX)
796433fc58eSAsias He 		return -EINVAL;
797433fc58eSAsias He 
798ed8640a9SStefano Garzarella 	/* Refuse if CID is assigned to the guest->host transport (i.e. nested
799ed8640a9SStefano Garzarella 	 * VM), to make the loopback work.
800ed8640a9SStefano Garzarella 	 */
801ed8640a9SStefano Garzarella 	if (vsock_find_cid(guest_cid))
802ed8640a9SStefano Garzarella 		return -EADDRINUSE;
803ed8640a9SStefano Garzarella 
804433fc58eSAsias He 	/* Refuse if CID is already in use */
8056db3d8dcSStefan Hajnoczi 	mutex_lock(&vhost_vsock_mutex);
806834e772cSStefan Hajnoczi 	other = vhost_vsock_get(guest_cid);
8076c083c2bSGao feng 	if (other && other != vsock) {
8086db3d8dcSStefan Hajnoczi 		mutex_unlock(&vhost_vsock_mutex);
8096c083c2bSGao feng 		return -EADDRINUSE;
8106c083c2bSGao feng 	}
811834e772cSStefan Hajnoczi 
812834e772cSStefan Hajnoczi 	if (vsock->guest_cid)
813834e772cSStefan Hajnoczi 		hash_del_rcu(&vsock->hash);
814834e772cSStefan Hajnoczi 
815433fc58eSAsias He 	vsock->guest_cid = guest_cid;
8167fbe078cSZha Bin 	hash_add_rcu(vhost_vsock_hash, &vsock->hash, vsock->guest_cid);
8176db3d8dcSStefan Hajnoczi 	mutex_unlock(&vhost_vsock_mutex);
818433fc58eSAsias He 
819433fc58eSAsias He 	return 0;
820433fc58eSAsias He }
821433fc58eSAsias He 
822433fc58eSAsias He static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
823433fc58eSAsias He {
824433fc58eSAsias He 	struct vhost_virtqueue *vq;
825433fc58eSAsias He 	int i;
826433fc58eSAsias He 
827433fc58eSAsias He 	if (features & ~VHOST_VSOCK_FEATURES)
828433fc58eSAsias He 		return -EOPNOTSUPP;
829433fc58eSAsias He 
830433fc58eSAsias He 	mutex_lock(&vsock->dev.mutex);
831433fc58eSAsias He 	if ((features & (1 << VHOST_F_LOG_ALL)) &&
832433fc58eSAsias He 	    !vhost_log_access_ok(&vsock->dev)) {
833e13a6915SStefano Garzarella 		goto err;
834e13a6915SStefano Garzarella 	}
835e13a6915SStefano Garzarella 
836e13a6915SStefano Garzarella 	if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) {
837e13a6915SStefano Garzarella 		if (vhost_init_device_iotlb(&vsock->dev, true))
838e13a6915SStefano Garzarella 			goto err;
839433fc58eSAsias He 	}
840433fc58eSAsias He 
841ced7b713SArseny Krasnov 	if (features & (1ULL << VIRTIO_VSOCK_F_SEQPACKET))
842ced7b713SArseny Krasnov 		vsock->seqpacket_allow = true;
843ced7b713SArseny Krasnov 
844433fc58eSAsias He 	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
845433fc58eSAsias He 		vq = &vsock->vqs[i];
846433fc58eSAsias He 		mutex_lock(&vq->mutex);
847433fc58eSAsias He 		vq->acked_features = features;
848433fc58eSAsias He 		mutex_unlock(&vq->mutex);
849433fc58eSAsias He 	}
850433fc58eSAsias He 	mutex_unlock(&vsock->dev.mutex);
851433fc58eSAsias He 	return 0;
852e13a6915SStefano Garzarella 
853e13a6915SStefano Garzarella err:
854e13a6915SStefano Garzarella 	mutex_unlock(&vsock->dev.mutex);
855e13a6915SStefano Garzarella 	return -EFAULT;
856433fc58eSAsias He }
857433fc58eSAsias He 
858433fc58eSAsias He static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
859433fc58eSAsias He 				  unsigned long arg)
860433fc58eSAsias He {
861433fc58eSAsias He 	struct vhost_vsock *vsock = f->private_data;
862433fc58eSAsias He 	void __user *argp = (void __user *)arg;
863433fc58eSAsias He 	u64 guest_cid;
864433fc58eSAsias He 	u64 features;
865433fc58eSAsias He 	int start;
866433fc58eSAsias He 	int r;
867433fc58eSAsias He 
868433fc58eSAsias He 	switch (ioctl) {
869433fc58eSAsias He 	case VHOST_VSOCK_SET_GUEST_CID:
870433fc58eSAsias He 		if (copy_from_user(&guest_cid, argp, sizeof(guest_cid)))
871433fc58eSAsias He 			return -EFAULT;
872433fc58eSAsias He 		return vhost_vsock_set_cid(vsock, guest_cid);
873433fc58eSAsias He 	case VHOST_VSOCK_SET_RUNNING:
874433fc58eSAsias He 		if (copy_from_user(&start, argp, sizeof(start)))
875433fc58eSAsias He 			return -EFAULT;
876433fc58eSAsias He 		if (start)
877433fc58eSAsias He 			return vhost_vsock_start(vsock);
878433fc58eSAsias He 		else
879a58da53fSStefano Garzarella 			return vhost_vsock_stop(vsock, true);
880433fc58eSAsias He 	case VHOST_GET_FEATURES:
881433fc58eSAsias He 		features = VHOST_VSOCK_FEATURES;
882433fc58eSAsias He 		if (copy_to_user(argp, &features, sizeof(features)))
883433fc58eSAsias He 			return -EFAULT;
884433fc58eSAsias He 		return 0;
885433fc58eSAsias He 	case VHOST_SET_FEATURES:
886433fc58eSAsias He 		if (copy_from_user(&features, argp, sizeof(features)))
887433fc58eSAsias He 			return -EFAULT;
888433fc58eSAsias He 		return vhost_vsock_set_features(vsock, features);
889e13a6915SStefano Garzarella 	case VHOST_GET_BACKEND_FEATURES:
890e13a6915SStefano Garzarella 		features = VHOST_VSOCK_BACKEND_FEATURES;
891e13a6915SStefano Garzarella 		if (copy_to_user(argp, &features, sizeof(features)))
892e13a6915SStefano Garzarella 			return -EFAULT;
893e13a6915SStefano Garzarella 		return 0;
894e13a6915SStefano Garzarella 	case VHOST_SET_BACKEND_FEATURES:
895e13a6915SStefano Garzarella 		if (copy_from_user(&features, argp, sizeof(features)))
896e13a6915SStefano Garzarella 			return -EFAULT;
897e13a6915SStefano Garzarella 		if (features & ~VHOST_VSOCK_BACKEND_FEATURES)
898e13a6915SStefano Garzarella 			return -EOPNOTSUPP;
899e13a6915SStefano Garzarella 		vhost_set_backend_features(&vsock->dev, features);
900e13a6915SStefano Garzarella 		return 0;
901433fc58eSAsias He 	default:
902433fc58eSAsias He 		mutex_lock(&vsock->dev.mutex);
903433fc58eSAsias He 		r = vhost_dev_ioctl(&vsock->dev, ioctl, argp);
904433fc58eSAsias He 		if (r == -ENOIOCTLCMD)
905433fc58eSAsias He 			r = vhost_vring_ioctl(&vsock->dev, ioctl, argp);
906433fc58eSAsias He 		else
907433fc58eSAsias He 			vhost_vsock_flush(vsock);
908433fc58eSAsias He 		mutex_unlock(&vsock->dev.mutex);
909433fc58eSAsias He 		return r;
910433fc58eSAsias He 	}
911433fc58eSAsias He }
912433fc58eSAsias He 
913e13a6915SStefano Garzarella static ssize_t vhost_vsock_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
914e13a6915SStefano Garzarella {
915e13a6915SStefano Garzarella 	struct file *file = iocb->ki_filp;
916e13a6915SStefano Garzarella 	struct vhost_vsock *vsock = file->private_data;
917e13a6915SStefano Garzarella 	struct vhost_dev *dev = &vsock->dev;
918e13a6915SStefano Garzarella 	int noblock = file->f_flags & O_NONBLOCK;
919e13a6915SStefano Garzarella 
920e13a6915SStefano Garzarella 	return vhost_chr_read_iter(dev, to, noblock);
921e13a6915SStefano Garzarella }
922e13a6915SStefano Garzarella 
923e13a6915SStefano Garzarella static ssize_t vhost_vsock_chr_write_iter(struct kiocb *iocb,
924e13a6915SStefano Garzarella 					struct iov_iter *from)
925e13a6915SStefano Garzarella {
926e13a6915SStefano Garzarella 	struct file *file = iocb->ki_filp;
927e13a6915SStefano Garzarella 	struct vhost_vsock *vsock = file->private_data;
928e13a6915SStefano Garzarella 	struct vhost_dev *dev = &vsock->dev;
929e13a6915SStefano Garzarella 
930e13a6915SStefano Garzarella 	return vhost_chr_write_iter(dev, from);
931e13a6915SStefano Garzarella }
932e13a6915SStefano Garzarella 
933e13a6915SStefano Garzarella static __poll_t vhost_vsock_chr_poll(struct file *file, poll_table *wait)
934e13a6915SStefano Garzarella {
935e13a6915SStefano Garzarella 	struct vhost_vsock *vsock = file->private_data;
936e13a6915SStefano Garzarella 	struct vhost_dev *dev = &vsock->dev;
937e13a6915SStefano Garzarella 
938e13a6915SStefano Garzarella 	return vhost_chr_poll(file, dev, wait);
939e13a6915SStefano Garzarella }
940e13a6915SStefano Garzarella 
941433fc58eSAsias He static const struct file_operations vhost_vsock_fops = {
942433fc58eSAsias He 	.owner          = THIS_MODULE,
943433fc58eSAsias He 	.open           = vhost_vsock_dev_open,
944433fc58eSAsias He 	.release        = vhost_vsock_dev_release,
945433fc58eSAsias He 	.llseek		= noop_llseek,
946433fc58eSAsias He 	.unlocked_ioctl = vhost_vsock_dev_ioctl,
947407e9ef7SArnd Bergmann 	.compat_ioctl   = compat_ptr_ioctl,
948e13a6915SStefano Garzarella 	.read_iter      = vhost_vsock_chr_read_iter,
949e13a6915SStefano Garzarella 	.write_iter     = vhost_vsock_chr_write_iter,
950e13a6915SStefano Garzarella 	.poll           = vhost_vsock_chr_poll,
951433fc58eSAsias He };
952433fc58eSAsias He 
953433fc58eSAsias He static struct miscdevice vhost_vsock_misc = {
954f4660cc9SStefan Hajnoczi 	.minor = VHOST_VSOCK_MINOR,
955433fc58eSAsias He 	.name = "vhost-vsock",
956433fc58eSAsias He 	.fops = &vhost_vsock_fops,
957433fc58eSAsias He };
958433fc58eSAsias He 
959433fc58eSAsias He static int __init vhost_vsock_init(void)
960433fc58eSAsias He {
961433fc58eSAsias He 	int ret;
962433fc58eSAsias He 
963c0cfa2d8SStefano Garzarella 	ret = vsock_core_register(&vhost_transport.transport,
964c0cfa2d8SStefano Garzarella 				  VSOCK_TRANSPORT_F_H2G);
965433fc58eSAsias He 	if (ret < 0)
966433fc58eSAsias He 		return ret;
967433fc58eSAsias He 	return misc_register(&vhost_vsock_misc);
968433fc58eSAsias He };
969433fc58eSAsias He 
970433fc58eSAsias He static void __exit vhost_vsock_exit(void)
971433fc58eSAsias He {
972433fc58eSAsias He 	misc_deregister(&vhost_vsock_misc);
973c0cfa2d8SStefano Garzarella 	vsock_core_unregister(&vhost_transport.transport);
974433fc58eSAsias He };
975433fc58eSAsias He 
976433fc58eSAsias He module_init(vhost_vsock_init);
977433fc58eSAsias He module_exit(vhost_vsock_exit);
978433fc58eSAsias He MODULE_LICENSE("GPL v2");
979433fc58eSAsias He MODULE_AUTHOR("Asias He");
980433fc58eSAsias He MODULE_DESCRIPTION("vhost transport for vsock ");
981f4660cc9SStefan Hajnoczi MODULE_ALIAS_MISCDEV(VHOST_VSOCK_MINOR);
982f4660cc9SStefan Hajnoczi MODULE_ALIAS("devname:vhost-vsock");
983