17a338472SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2433fc58eSAsias He /*
3433fc58eSAsias He * vhost transport for vsock
4433fc58eSAsias He *
5433fc58eSAsias He * Copyright (C) 2013-2015 Red Hat, Inc.
6433fc58eSAsias He * Author: Asias He <asias@redhat.com>
7433fc58eSAsias He * Stefan Hajnoczi <stefanha@redhat.com>
8433fc58eSAsias He */
9433fc58eSAsias He #include <linux/miscdevice.h>
10433fc58eSAsias He #include <linux/atomic.h>
11433fc58eSAsias He #include <linux/module.h>
12433fc58eSAsias He #include <linux/mutex.h>
13433fc58eSAsias He #include <linux/vmalloc.h>
14433fc58eSAsias He #include <net/sock.h>
15433fc58eSAsias He #include <linux/virtio_vsock.h>
16433fc58eSAsias He #include <linux/vhost.h>
17834e772cSStefan Hajnoczi #include <linux/hashtable.h>
18433fc58eSAsias He
19433fc58eSAsias He #include <net/af_vsock.h>
20433fc58eSAsias He #include "vhost.h"
21433fc58eSAsias He
22433fc58eSAsias He #define VHOST_VSOCK_DEFAULT_HOST_CID 2
23e82b9b07SJason Wang /* Max number of bytes transferred before requeueing the job.
24e82b9b07SJason Wang * Using this limit prevents one virtqueue from starving others. */
25e82b9b07SJason Wang #define VHOST_VSOCK_WEIGHT 0x80000
26e82b9b07SJason Wang /* Max number of packets transferred before requeueing the job.
27e82b9b07SJason Wang * Using this limit prevents one virtqueue from starving others with
28e82b9b07SJason Wang * small pkts.
29e82b9b07SJason Wang */
30e82b9b07SJason Wang #define VHOST_VSOCK_PKT_WEIGHT 256
31433fc58eSAsias He
32433fc58eSAsias He enum {
33e13a6915SStefano Garzarella VHOST_VSOCK_FEATURES = VHOST_FEATURES |
34ced7b713SArseny Krasnov (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
35ced7b713SArseny Krasnov (1ULL << VIRTIO_VSOCK_F_SEQPACKET)
36e13a6915SStefano Garzarella };
37e13a6915SStefano Garzarella
38e13a6915SStefano Garzarella enum {
39e13a6915SStefano Garzarella VHOST_VSOCK_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)
40433fc58eSAsias He };
41433fc58eSAsias He
42433fc58eSAsias He /* Used to track all the vhost_vsock instances on the system. */
436db3d8dcSStefan Hajnoczi static DEFINE_MUTEX(vhost_vsock_mutex);
44834e772cSStefan Hajnoczi static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8);
45433fc58eSAsias He
46433fc58eSAsias He struct vhost_vsock {
47433fc58eSAsias He struct vhost_dev dev;
48433fc58eSAsias He struct vhost_virtqueue vqs[2];
49433fc58eSAsias He
506db3d8dcSStefan Hajnoczi /* Link to global vhost_vsock_hash, writes use vhost_vsock_mutex */
51834e772cSStefan Hajnoczi struct hlist_node hash;
52433fc58eSAsias He
53433fc58eSAsias He struct vhost_work send_pkt_work;
5471dc9ec9SBobby Eshleman struct sk_buff_head send_pkt_queue; /* host->guest pending packets */
55433fc58eSAsias He
56433fc58eSAsias He atomic_t queued_replies;
57433fc58eSAsias He
58433fc58eSAsias He u32 guest_cid;
59ced7b713SArseny Krasnov bool seqpacket_allow;
60433fc58eSAsias He };
61433fc58eSAsias He
vhost_transport_get_local_cid(void)62433fc58eSAsias He static u32 vhost_transport_get_local_cid(void)
63433fc58eSAsias He {
64433fc58eSAsias He return VHOST_VSOCK_DEFAULT_HOST_CID;
65433fc58eSAsias He }
66433fc58eSAsias He
676db3d8dcSStefan Hajnoczi /* Callers that dereference the return value must hold vhost_vsock_mutex or the
68834e772cSStefan Hajnoczi * RCU read lock.
69834e772cSStefan Hajnoczi */
vhost_vsock_get(u32 guest_cid)70834e772cSStefan Hajnoczi static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
71433fc58eSAsias He {
72433fc58eSAsias He struct vhost_vsock *vsock;
73433fc58eSAsias He
74834e772cSStefan Hajnoczi hash_for_each_possible_rcu(vhost_vsock_hash, vsock, hash, guest_cid) {
75433fc58eSAsias He u32 other_cid = vsock->guest_cid;
76433fc58eSAsias He
77433fc58eSAsias He /* Skip instances that have no CID yet */
78433fc58eSAsias He if (other_cid == 0)
79433fc58eSAsias He continue;
80433fc58eSAsias He
81ff3c1b1aSVaibhav Murkute if (other_cid == guest_cid)
82433fc58eSAsias He return vsock;
83ff3c1b1aSVaibhav Murkute
84433fc58eSAsias He }
85433fc58eSAsias He
86433fc58eSAsias He return NULL;
87433fc58eSAsias He }
88433fc58eSAsias He
89433fc58eSAsias He static void
vhost_transport_do_send_pkt(struct vhost_vsock * vsock,struct vhost_virtqueue * vq)90433fc58eSAsias He vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
91433fc58eSAsias He struct vhost_virtqueue *vq)
92433fc58eSAsias He {
93433fc58eSAsias He struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
94e79b431fSJason Wang int pkts = 0, total_len = 0;
95433fc58eSAsias He bool added = false;
96433fc58eSAsias He bool restart_tx = false;
97433fc58eSAsias He
98433fc58eSAsias He mutex_lock(&vq->mutex);
99433fc58eSAsias He
100247643f8SEugenio Pérez if (!vhost_vq_get_backend(vq))
101433fc58eSAsias He goto out;
102433fc58eSAsias He
103e13a6915SStefano Garzarella if (!vq_meta_prefetch(vq))
104e13a6915SStefano Garzarella goto out;
105e13a6915SStefano Garzarella
106433fc58eSAsias He /* Avoid further vmexits, we're already processing the virtqueue */
107433fc58eSAsias He vhost_disable_notify(&vsock->dev, vq);
108433fc58eSAsias He
109e79b431fSJason Wang do {
11071dc9ec9SBobby Eshleman struct virtio_vsock_hdr *hdr;
11171dc9ec9SBobby Eshleman size_t iov_len, payload_len;
112433fc58eSAsias He struct iov_iter iov_iter;
11371dc9ec9SBobby Eshleman u32 flags_to_restore = 0;
11471dc9ec9SBobby Eshleman struct sk_buff *skb;
115433fc58eSAsias He unsigned out, in;
116433fc58eSAsias He size_t nbytes;
117433fc58eSAsias He int head;
118433fc58eSAsias He
11971dc9ec9SBobby Eshleman skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue);
12071dc9ec9SBobby Eshleman
12171dc9ec9SBobby Eshleman if (!skb) {
122433fc58eSAsias He vhost_enable_notify(&vsock->dev, vq);
123433fc58eSAsias He break;
124433fc58eSAsias He }
125433fc58eSAsias He
126433fc58eSAsias He head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
127433fc58eSAsias He &out, &in, NULL, NULL);
128433fc58eSAsias He if (head < 0) {
12971dc9ec9SBobby Eshleman virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb);
130433fc58eSAsias He break;
131433fc58eSAsias He }
132433fc58eSAsias He
133433fc58eSAsias He if (head == vq->num) {
13471dc9ec9SBobby Eshleman virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb);
135433fc58eSAsias He /* We cannot finish yet if more buffers snuck in while
136433fc58eSAsias He * re-enabling notify.
137433fc58eSAsias He */
138433fc58eSAsias He if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
139433fc58eSAsias He vhost_disable_notify(&vsock->dev, vq);
140433fc58eSAsias He continue;
141433fc58eSAsias He }
142433fc58eSAsias He break;
143433fc58eSAsias He }
144433fc58eSAsias He
145433fc58eSAsias He if (out) {
14671dc9ec9SBobby Eshleman kfree_skb(skb);
147433fc58eSAsias He vq_err(vq, "Expected 0 output buffers, got %u\n", out);
148433fc58eSAsias He break;
149433fc58eSAsias He }
150433fc58eSAsias He
1516dbd3e66SStefano Garzarella iov_len = iov_length(&vq->iov[out], in);
15271dc9ec9SBobby Eshleman if (iov_len < sizeof(*hdr)) {
15371dc9ec9SBobby Eshleman kfree_skb(skb);
1546dbd3e66SStefano Garzarella vq_err(vq, "Buffer len [%zu] too small\n", iov_len);
1556dbd3e66SStefano Garzarella break;
1566dbd3e66SStefano Garzarella }
1576dbd3e66SStefano Garzarella
158de4eda9dSAl Viro iov_iter_init(&iov_iter, ITER_DEST, &vq->iov[out], in, iov_len);
15971dc9ec9SBobby Eshleman payload_len = skb->len;
16071dc9ec9SBobby Eshleman hdr = virtio_vsock_hdr(skb);
1616dbd3e66SStefano Garzarella
1626dbd3e66SStefano Garzarella /* If the packet is greater than the space available in the
1636dbd3e66SStefano Garzarella * buffer, we split it using multiple buffers.
1646dbd3e66SStefano Garzarella */
16571dc9ec9SBobby Eshleman if (payload_len > iov_len - sizeof(*hdr)) {
16671dc9ec9SBobby Eshleman payload_len = iov_len - sizeof(*hdr);
1676dbd3e66SStefano Garzarella
168ced7b713SArseny Krasnov /* As we are copying pieces of large packet's buffer to
169ced7b713SArseny Krasnov * small rx buffers, headers of packets in rx queue are
170ced7b713SArseny Krasnov * created dynamically and are initialized with header
171ced7b713SArseny Krasnov * of current packet(except length). But in case of
1729af8f106SArseny Krasnov * SOCK_SEQPACKET, we also must clear message delimeter
1731af7e555SArseny Krasnov * bit (VIRTIO_VSOCK_SEQ_EOM) and MSG_EOR bit
1741af7e555SArseny Krasnov * (VIRTIO_VSOCK_SEQ_EOR) if set. Otherwise,
1751af7e555SArseny Krasnov * there will be sequence of packets with these
1761af7e555SArseny Krasnov * bits set. After initialized header will be copied to
1771af7e555SArseny Krasnov * rx buffer, these required bits will be restored.
178ced7b713SArseny Krasnov */
17971dc9ec9SBobby Eshleman if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) {
18071dc9ec9SBobby Eshleman hdr->flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
1811af7e555SArseny Krasnov flags_to_restore |= VIRTIO_VSOCK_SEQ_EOM;
1821af7e555SArseny Krasnov
18371dc9ec9SBobby Eshleman if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR) {
18471dc9ec9SBobby Eshleman hdr->flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
1851af7e555SArseny Krasnov flags_to_restore |= VIRTIO_VSOCK_SEQ_EOR;
1861af7e555SArseny Krasnov }
187ced7b713SArseny Krasnov }
188ced7b713SArseny Krasnov }
189ced7b713SArseny Krasnov
1906dbd3e66SStefano Garzarella /* Set the correct length in the header */
19171dc9ec9SBobby Eshleman hdr->len = cpu_to_le32(payload_len);
192433fc58eSAsias He
19371dc9ec9SBobby Eshleman nbytes = copy_to_iter(hdr, sizeof(*hdr), &iov_iter);
19471dc9ec9SBobby Eshleman if (nbytes != sizeof(*hdr)) {
19571dc9ec9SBobby Eshleman kfree_skb(skb);
196433fc58eSAsias He vq_err(vq, "Faulted on copying pkt hdr\n");
197433fc58eSAsias He break;
198433fc58eSAsias He }
199433fc58eSAsias He
20071dc9ec9SBobby Eshleman nbytes = copy_to_iter(skb->data, payload_len, &iov_iter);
2016dbd3e66SStefano Garzarella if (nbytes != payload_len) {
20271dc9ec9SBobby Eshleman kfree_skb(skb);
203433fc58eSAsias He vq_err(vq, "Faulted on copying pkt buf\n");
204433fc58eSAsias He break;
205433fc58eSAsias He }
206433fc58eSAsias He
207107bc076SStefano Garzarella /* Deliver to monitoring devices all packets that we
208107bc076SStefano Garzarella * will transmit.
20982dfb540SGerard Garcia */
21071dc9ec9SBobby Eshleman virtio_transport_deliver_tap_pkt(skb);
21182dfb540SGerard Garcia
21271dc9ec9SBobby Eshleman vhost_add_used(vq, head, sizeof(*hdr) + payload_len);
213107bc076SStefano Garzarella added = true;
214107bc076SStefano Garzarella
21571dc9ec9SBobby Eshleman skb_pull(skb, payload_len);
2166dbd3e66SStefano Garzarella total_len += payload_len;
2176dbd3e66SStefano Garzarella
2186dbd3e66SStefano Garzarella /* If we didn't send all the payload we can requeue the packet
2196dbd3e66SStefano Garzarella * to send it with the next available buffer.
2206dbd3e66SStefano Garzarella */
22171dc9ec9SBobby Eshleman if (skb->len > 0) {
22271dc9ec9SBobby Eshleman hdr->flags |= cpu_to_le32(flags_to_restore);
223ced7b713SArseny Krasnov
22471dc9ec9SBobby Eshleman /* We are queueing the same skb to handle
225a78d1639SStefano Garzarella * the remaining bytes, and we want to deliver it
226a78d1639SStefano Garzarella * to monitoring devices in the next iteration.
227a78d1639SStefano Garzarella */
22871dc9ec9SBobby Eshleman virtio_vsock_skb_clear_tap_delivered(skb);
22971dc9ec9SBobby Eshleman virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb);
2306dbd3e66SStefano Garzarella } else {
23171dc9ec9SBobby Eshleman if (virtio_vsock_skb_reply(skb)) {
2326dbd3e66SStefano Garzarella int val;
2336dbd3e66SStefano Garzarella
2346dbd3e66SStefano Garzarella val = atomic_dec_return(&vsock->queued_replies);
2356dbd3e66SStefano Garzarella
2366dbd3e66SStefano Garzarella /* Do we have resources to resume tx
2376dbd3e66SStefano Garzarella * processing?
2386dbd3e66SStefano Garzarella */
2396dbd3e66SStefano Garzarella if (val + 1 == tx_vq->num)
2406dbd3e66SStefano Garzarella restart_tx = true;
2416dbd3e66SStefano Garzarella }
2426dbd3e66SStefano Garzarella
24371dc9ec9SBobby Eshleman consume_skb(skb);
2446dbd3e66SStefano Garzarella }
245e79b431fSJason Wang } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
246433fc58eSAsias He if (added)
247433fc58eSAsias He vhost_signal(&vsock->dev, vq);
248433fc58eSAsias He
249433fc58eSAsias He out:
250433fc58eSAsias He mutex_unlock(&vq->mutex);
251433fc58eSAsias He
252433fc58eSAsias He if (restart_tx)
253433fc58eSAsias He vhost_poll_queue(&tx_vq->poll);
254433fc58eSAsias He }
255433fc58eSAsias He
vhost_transport_send_pkt_work(struct vhost_work * work)256433fc58eSAsias He static void vhost_transport_send_pkt_work(struct vhost_work *work)
257433fc58eSAsias He {
258433fc58eSAsias He struct vhost_virtqueue *vq;
259433fc58eSAsias He struct vhost_vsock *vsock;
260433fc58eSAsias He
261433fc58eSAsias He vsock = container_of(work, struct vhost_vsock, send_pkt_work);
262433fc58eSAsias He vq = &vsock->vqs[VSOCK_VQ_RX];
263433fc58eSAsias He
264433fc58eSAsias He vhost_transport_do_send_pkt(vsock, vq);
265433fc58eSAsias He }
266433fc58eSAsias He
267433fc58eSAsias He static int
vhost_transport_send_pkt(struct sk_buff * skb)26871dc9ec9SBobby Eshleman vhost_transport_send_pkt(struct sk_buff *skb)
269433fc58eSAsias He {
27071dc9ec9SBobby Eshleman struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
271433fc58eSAsias He struct vhost_vsock *vsock;
27271dc9ec9SBobby Eshleman int len = skb->len;
273433fc58eSAsias He
274834e772cSStefan Hajnoczi rcu_read_lock();
275834e772cSStefan Hajnoczi
276433fc58eSAsias He /* Find the vhost_vsock according to guest context id */
27771dc9ec9SBobby Eshleman vsock = vhost_vsock_get(le64_to_cpu(hdr->dst_cid));
278433fc58eSAsias He if (!vsock) {
279834e772cSStefan Hajnoczi rcu_read_unlock();
28071dc9ec9SBobby Eshleman kfree_skb(skb);
281433fc58eSAsias He return -ENODEV;
282433fc58eSAsias He }
283433fc58eSAsias He
28471dc9ec9SBobby Eshleman if (virtio_vsock_skb_reply(skb))
285433fc58eSAsias He atomic_inc(&vsock->queued_replies);
286433fc58eSAsias He
28771dc9ec9SBobby Eshleman virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb);
2889e09d0ecSMike Christie vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work);
289834e772cSStefan Hajnoczi
290834e772cSStefan Hajnoczi rcu_read_unlock();
291433fc58eSAsias He return len;
292433fc58eSAsias He }
293433fc58eSAsias He
29416320f36SPeng Tao static int
vhost_transport_cancel_pkt(struct vsock_sock * vsk)29516320f36SPeng Tao vhost_transport_cancel_pkt(struct vsock_sock *vsk)
29616320f36SPeng Tao {
29716320f36SPeng Tao struct vhost_vsock *vsock;
29816320f36SPeng Tao int cnt = 0;
299834e772cSStefan Hajnoczi int ret = -ENODEV;
30016320f36SPeng Tao
301834e772cSStefan Hajnoczi rcu_read_lock();
302834e772cSStefan Hajnoczi
30316320f36SPeng Tao /* Find the vhost_vsock according to guest context id */
30416320f36SPeng Tao vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
30516320f36SPeng Tao if (!vsock)
306834e772cSStefan Hajnoczi goto out;
30716320f36SPeng Tao
30871dc9ec9SBobby Eshleman cnt = virtio_transport_purge_skbs(vsk, &vsock->send_pkt_queue);
30916320f36SPeng Tao
31016320f36SPeng Tao if (cnt) {
31116320f36SPeng Tao struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
31216320f36SPeng Tao int new_cnt;
31316320f36SPeng Tao
31416320f36SPeng Tao new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
31516320f36SPeng Tao if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
31616320f36SPeng Tao vhost_poll_queue(&tx_vq->poll);
31716320f36SPeng Tao }
31816320f36SPeng Tao
319834e772cSStefan Hajnoczi ret = 0;
320834e772cSStefan Hajnoczi out:
321834e772cSStefan Hajnoczi rcu_read_unlock();
322834e772cSStefan Hajnoczi return ret;
32316320f36SPeng Tao }
32416320f36SPeng Tao
32571dc9ec9SBobby Eshleman static struct sk_buff *
vhost_vsock_alloc_skb(struct vhost_virtqueue * vq,unsigned int out,unsigned int in)32671dc9ec9SBobby Eshleman vhost_vsock_alloc_skb(struct vhost_virtqueue *vq,
327433fc58eSAsias He unsigned int out, unsigned int in)
328433fc58eSAsias He {
32971dc9ec9SBobby Eshleman struct virtio_vsock_hdr *hdr;
330433fc58eSAsias He struct iov_iter iov_iter;
33171dc9ec9SBobby Eshleman struct sk_buff *skb;
33271dc9ec9SBobby Eshleman size_t payload_len;
333433fc58eSAsias He size_t nbytes;
334433fc58eSAsias He size_t len;
335433fc58eSAsias He
336433fc58eSAsias He if (in != 0) {
337433fc58eSAsias He vq_err(vq, "Expected 0 input buffers, got %u\n", in);
338433fc58eSAsias He return NULL;
339433fc58eSAsias He }
340433fc58eSAsias He
34171dc9ec9SBobby Eshleman len = iov_length(vq->iov, out);
34271dc9ec9SBobby Eshleman
34371dc9ec9SBobby Eshleman /* len contains both payload and hdr */
34471dc9ec9SBobby Eshleman skb = virtio_vsock_alloc_skb(len, GFP_KERNEL);
34571dc9ec9SBobby Eshleman if (!skb)
346433fc58eSAsias He return NULL;
347433fc58eSAsias He
348de4eda9dSAl Viro iov_iter_init(&iov_iter, ITER_SOURCE, vq->iov, out, len);
349433fc58eSAsias He
35071dc9ec9SBobby Eshleman hdr = virtio_vsock_hdr(skb);
35171dc9ec9SBobby Eshleman nbytes = copy_from_iter(hdr, sizeof(*hdr), &iov_iter);
35271dc9ec9SBobby Eshleman if (nbytes != sizeof(*hdr)) {
353433fc58eSAsias He vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n",
35471dc9ec9SBobby Eshleman sizeof(*hdr), nbytes);
35571dc9ec9SBobby Eshleman kfree_skb(skb);
356433fc58eSAsias He return NULL;
357433fc58eSAsias He }
358433fc58eSAsias He
35971dc9ec9SBobby Eshleman payload_len = le32_to_cpu(hdr->len);
360433fc58eSAsias He
361433fc58eSAsias He /* No payload */
36271dc9ec9SBobby Eshleman if (!payload_len)
36371dc9ec9SBobby Eshleman return skb;
364433fc58eSAsias He
36571dc9ec9SBobby Eshleman /* The pkt is too big or the length in the header is invalid */
36671dc9ec9SBobby Eshleman if (payload_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE ||
36771dc9ec9SBobby Eshleman payload_len + sizeof(*hdr) > len) {
36871dc9ec9SBobby Eshleman kfree_skb(skb);
369433fc58eSAsias He return NULL;
370433fc58eSAsias He }
371433fc58eSAsias He
37271dc9ec9SBobby Eshleman virtio_vsock_skb_rx_put(skb);
37371dc9ec9SBobby Eshleman
37471dc9ec9SBobby Eshleman nbytes = copy_from_iter(skb->data, payload_len, &iov_iter);
37571dc9ec9SBobby Eshleman if (nbytes != payload_len) {
37671dc9ec9SBobby Eshleman vq_err(vq, "Expected %zu byte payload, got %zu bytes\n",
37771dc9ec9SBobby Eshleman payload_len, nbytes);
37871dc9ec9SBobby Eshleman kfree_skb(skb);
379433fc58eSAsias He return NULL;
380433fc58eSAsias He }
381433fc58eSAsias He
38271dc9ec9SBobby Eshleman return skb;
383433fc58eSAsias He }
384433fc58eSAsias He
385433fc58eSAsias He /* Is there space left for replies to rx packets? */
vhost_vsock_more_replies(struct vhost_vsock * vsock)386433fc58eSAsias He static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
387433fc58eSAsias He {
388433fc58eSAsias He struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX];
389433fc58eSAsias He int val;
390433fc58eSAsias He
391433fc58eSAsias He smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */
392433fc58eSAsias He val = atomic_read(&vsock->queued_replies);
393433fc58eSAsias He
394433fc58eSAsias He return val < vq->num;
395433fc58eSAsias He }
396433fc58eSAsias He
397ced7b713SArseny Krasnov static bool vhost_transport_seqpacket_allow(u32 remote_cid);
398ced7b713SArseny Krasnov
3994c7246dcSStefano Garzarella static struct virtio_transport vhost_transport = {
4004c7246dcSStefano Garzarella .transport = {
4016a2c0962SStefano Garzarella .module = THIS_MODULE,
4026a2c0962SStefano Garzarella
4034c7246dcSStefano Garzarella .get_local_cid = vhost_transport_get_local_cid,
4044c7246dcSStefano Garzarella
4054c7246dcSStefano Garzarella .init = virtio_transport_do_socket_init,
4064c7246dcSStefano Garzarella .destruct = virtio_transport_destruct,
4074c7246dcSStefano Garzarella .release = virtio_transport_release,
4084c7246dcSStefano Garzarella .connect = virtio_transport_connect,
4094c7246dcSStefano Garzarella .shutdown = virtio_transport_shutdown,
4104c7246dcSStefano Garzarella .cancel_pkt = vhost_transport_cancel_pkt,
4114c7246dcSStefano Garzarella
4124c7246dcSStefano Garzarella .dgram_enqueue = virtio_transport_dgram_enqueue,
4134c7246dcSStefano Garzarella .dgram_dequeue = virtio_transport_dgram_dequeue,
4144c7246dcSStefano Garzarella .dgram_bind = virtio_transport_dgram_bind,
4154c7246dcSStefano Garzarella .dgram_allow = virtio_transport_dgram_allow,
4164c7246dcSStefano Garzarella
4174c7246dcSStefano Garzarella .stream_enqueue = virtio_transport_stream_enqueue,
4184c7246dcSStefano Garzarella .stream_dequeue = virtio_transport_stream_dequeue,
4194c7246dcSStefano Garzarella .stream_has_data = virtio_transport_stream_has_data,
4204c7246dcSStefano Garzarella .stream_has_space = virtio_transport_stream_has_space,
4214c7246dcSStefano Garzarella .stream_rcvhiwat = virtio_transport_stream_rcvhiwat,
4224c7246dcSStefano Garzarella .stream_is_active = virtio_transport_stream_is_active,
4234c7246dcSStefano Garzarella .stream_allow = virtio_transport_stream_allow,
4244c7246dcSStefano Garzarella
425ced7b713SArseny Krasnov .seqpacket_dequeue = virtio_transport_seqpacket_dequeue,
426ced7b713SArseny Krasnov .seqpacket_enqueue = virtio_transport_seqpacket_enqueue,
427ced7b713SArseny Krasnov .seqpacket_allow = vhost_transport_seqpacket_allow,
428ced7b713SArseny Krasnov .seqpacket_has_data = virtio_transport_seqpacket_has_data,
429ced7b713SArseny Krasnov
4304c7246dcSStefano Garzarella .notify_poll_in = virtio_transport_notify_poll_in,
4314c7246dcSStefano Garzarella .notify_poll_out = virtio_transport_notify_poll_out,
4324c7246dcSStefano Garzarella .notify_recv_init = virtio_transport_notify_recv_init,
4334c7246dcSStefano Garzarella .notify_recv_pre_block = virtio_transport_notify_recv_pre_block,
4344c7246dcSStefano Garzarella .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue,
4354c7246dcSStefano Garzarella .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
4364c7246dcSStefano Garzarella .notify_send_init = virtio_transport_notify_send_init,
4374c7246dcSStefano Garzarella .notify_send_pre_block = virtio_transport_notify_send_pre_block,
4384c7246dcSStefano Garzarella .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
4394c7246dcSStefano Garzarella .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
440b9f2b0ffSStefano Garzarella .notify_buffer_size = virtio_transport_notify_buffer_size,
44194e5f642SArseniy Krasnov .notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat,
4424c7246dcSStefano Garzarella
443634f1a71SBobby Eshleman .read_skb = virtio_transport_read_skb,
4444c7246dcSStefano Garzarella },
4454c7246dcSStefano Garzarella
4464c7246dcSStefano Garzarella .send_pkt = vhost_transport_send_pkt,
4474c7246dcSStefano Garzarella };
4484c7246dcSStefano Garzarella
vhost_transport_seqpacket_allow(u32 remote_cid)449ced7b713SArseny Krasnov static bool vhost_transport_seqpacket_allow(u32 remote_cid)
450ced7b713SArseny Krasnov {
451ced7b713SArseny Krasnov struct vhost_vsock *vsock;
452ced7b713SArseny Krasnov bool seqpacket_allow = false;
453ced7b713SArseny Krasnov
454ced7b713SArseny Krasnov rcu_read_lock();
455ced7b713SArseny Krasnov vsock = vhost_vsock_get(remote_cid);
456ced7b713SArseny Krasnov
457ced7b713SArseny Krasnov if (vsock)
458ced7b713SArseny Krasnov seqpacket_allow = vsock->seqpacket_allow;
459ced7b713SArseny Krasnov
460ced7b713SArseny Krasnov rcu_read_unlock();
461ced7b713SArseny Krasnov
462ced7b713SArseny Krasnov return seqpacket_allow;
463ced7b713SArseny Krasnov }
464ced7b713SArseny Krasnov
vhost_vsock_handle_tx_kick(struct vhost_work * work)465433fc58eSAsias He static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
466433fc58eSAsias He {
467433fc58eSAsias He struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
468433fc58eSAsias He poll.work);
469433fc58eSAsias He struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
470433fc58eSAsias He dev);
471e79b431fSJason Wang int head, pkts = 0, total_len = 0;
472433fc58eSAsias He unsigned int out, in;
47371dc9ec9SBobby Eshleman struct sk_buff *skb;
474433fc58eSAsias He bool added = false;
475433fc58eSAsias He
476433fc58eSAsias He mutex_lock(&vq->mutex);
477433fc58eSAsias He
478247643f8SEugenio Pérez if (!vhost_vq_get_backend(vq))
479433fc58eSAsias He goto out;
480433fc58eSAsias He
481e13a6915SStefano Garzarella if (!vq_meta_prefetch(vq))
482e13a6915SStefano Garzarella goto out;
483e13a6915SStefano Garzarella
484433fc58eSAsias He vhost_disable_notify(&vsock->dev, vq);
485e79b431fSJason Wang do {
48671dc9ec9SBobby Eshleman struct virtio_vsock_hdr *hdr;
48771dc9ec9SBobby Eshleman
488433fc58eSAsias He if (!vhost_vsock_more_replies(vsock)) {
489433fc58eSAsias He /* Stop tx until the device processes already
490433fc58eSAsias He * pending replies. Leave tx virtqueue
491433fc58eSAsias He * callbacks disabled.
492433fc58eSAsias He */
493433fc58eSAsias He goto no_more_replies;
494433fc58eSAsias He }
495433fc58eSAsias He
496433fc58eSAsias He head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
497433fc58eSAsias He &out, &in, NULL, NULL);
498433fc58eSAsias He if (head < 0)
499433fc58eSAsias He break;
500433fc58eSAsias He
501433fc58eSAsias He if (head == vq->num) {
502433fc58eSAsias He if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
503433fc58eSAsias He vhost_disable_notify(&vsock->dev, vq);
504433fc58eSAsias He continue;
505433fc58eSAsias He }
506433fc58eSAsias He break;
507433fc58eSAsias He }
508433fc58eSAsias He
50971dc9ec9SBobby Eshleman skb = vhost_vsock_alloc_skb(vq, out, in);
51071dc9ec9SBobby Eshleman if (!skb) {
511433fc58eSAsias He vq_err(vq, "Faulted on pkt\n");
512433fc58eSAsias He continue;
513433fc58eSAsias He }
514433fc58eSAsias He
51571dc9ec9SBobby Eshleman total_len += sizeof(*hdr) + skb->len;
5163fda5d6eSStefan Hajnoczi
51782dfb540SGerard Garcia /* Deliver to monitoring devices all received packets */
51871dc9ec9SBobby Eshleman virtio_transport_deliver_tap_pkt(skb);
51971dc9ec9SBobby Eshleman
52071dc9ec9SBobby Eshleman hdr = virtio_vsock_hdr(skb);
52182dfb540SGerard Garcia
522433fc58eSAsias He /* Only accept correctly addressed packets */
52371dc9ec9SBobby Eshleman if (le64_to_cpu(hdr->src_cid) == vsock->guest_cid &&
52471dc9ec9SBobby Eshleman le64_to_cpu(hdr->dst_cid) ==
5258a3cc29cSStefano Garzarella vhost_transport_get_local_cid())
52671dc9ec9SBobby Eshleman virtio_transport_recv_pkt(&vhost_transport, skb);
527433fc58eSAsias He else
52871dc9ec9SBobby Eshleman kfree_skb(skb);
529433fc58eSAsias He
53049d8c5ffSStefano Garzarella vhost_add_used(vq, head, 0);
531433fc58eSAsias He added = true;
532e79b431fSJason Wang } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
533433fc58eSAsias He
534433fc58eSAsias He no_more_replies:
535433fc58eSAsias He if (added)
536433fc58eSAsias He vhost_signal(&vsock->dev, vq);
537433fc58eSAsias He
538433fc58eSAsias He out:
539433fc58eSAsias He mutex_unlock(&vq->mutex);
540433fc58eSAsias He }
541433fc58eSAsias He
vhost_vsock_handle_rx_kick(struct vhost_work * work)542433fc58eSAsias He static void vhost_vsock_handle_rx_kick(struct vhost_work *work)
543433fc58eSAsias He {
544433fc58eSAsias He struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
545433fc58eSAsias He poll.work);
546433fc58eSAsias He struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
547433fc58eSAsias He dev);
548433fc58eSAsias He
549433fc58eSAsias He vhost_transport_do_send_pkt(vsock, vq);
550433fc58eSAsias He }
551433fc58eSAsias He
vhost_vsock_start(struct vhost_vsock * vsock)552433fc58eSAsias He static int vhost_vsock_start(struct vhost_vsock *vsock)
553433fc58eSAsias He {
5540516ffd8SStefan Hajnoczi struct vhost_virtqueue *vq;
555433fc58eSAsias He size_t i;
556433fc58eSAsias He int ret;
557433fc58eSAsias He
558433fc58eSAsias He mutex_lock(&vsock->dev.mutex);
559433fc58eSAsias He
560433fc58eSAsias He ret = vhost_dev_check_owner(&vsock->dev);
561433fc58eSAsias He if (ret)
562433fc58eSAsias He goto err;
563433fc58eSAsias He
564433fc58eSAsias He for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
5650516ffd8SStefan Hajnoczi vq = &vsock->vqs[i];
566433fc58eSAsias He
567433fc58eSAsias He mutex_lock(&vq->mutex);
568433fc58eSAsias He
569433fc58eSAsias He if (!vhost_vq_access_ok(vq)) {
570433fc58eSAsias He ret = -EFAULT;
571433fc58eSAsias He goto err_vq;
572433fc58eSAsias He }
573433fc58eSAsias He
574247643f8SEugenio Pérez if (!vhost_vq_get_backend(vq)) {
575247643f8SEugenio Pérez vhost_vq_set_backend(vq, vsock);
5760516ffd8SStefan Hajnoczi ret = vhost_vq_init_access(vq);
5770516ffd8SStefan Hajnoczi if (ret)
5780516ffd8SStefan Hajnoczi goto err_vq;
579433fc58eSAsias He }
580433fc58eSAsias He
581433fc58eSAsias He mutex_unlock(&vq->mutex);
582433fc58eSAsias He }
583433fc58eSAsias He
5840b841030SJia He /* Some packets may have been queued before the device was started,
5850b841030SJia He * let's kick the send worker to send them.
5860b841030SJia He */
5879e09d0ecSMike Christie vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work);
5880b841030SJia He
589433fc58eSAsias He mutex_unlock(&vsock->dev.mutex);
590433fc58eSAsias He return 0;
591433fc58eSAsias He
592433fc58eSAsias He err_vq:
593247643f8SEugenio Pérez vhost_vq_set_backend(vq, NULL);
5940516ffd8SStefan Hajnoczi mutex_unlock(&vq->mutex);
5950516ffd8SStefan Hajnoczi
596433fc58eSAsias He for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
5970516ffd8SStefan Hajnoczi vq = &vsock->vqs[i];
598433fc58eSAsias He
599433fc58eSAsias He mutex_lock(&vq->mutex);
600247643f8SEugenio Pérez vhost_vq_set_backend(vq, NULL);
601433fc58eSAsias He mutex_unlock(&vq->mutex);
602433fc58eSAsias He }
603433fc58eSAsias He err:
604433fc58eSAsias He mutex_unlock(&vsock->dev.mutex);
605433fc58eSAsias He return ret;
606433fc58eSAsias He }
607433fc58eSAsias He
vhost_vsock_stop(struct vhost_vsock * vsock,bool check_owner)608a58da53fSStefano Garzarella static int vhost_vsock_stop(struct vhost_vsock *vsock, bool check_owner)
609433fc58eSAsias He {
610433fc58eSAsias He size_t i;
611a58da53fSStefano Garzarella int ret = 0;
612433fc58eSAsias He
613433fc58eSAsias He mutex_lock(&vsock->dev.mutex);
614433fc58eSAsias He
615a58da53fSStefano Garzarella if (check_owner) {
616433fc58eSAsias He ret = vhost_dev_check_owner(&vsock->dev);
617433fc58eSAsias He if (ret)
618433fc58eSAsias He goto err;
619a58da53fSStefano Garzarella }
620433fc58eSAsias He
621433fc58eSAsias He for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
622433fc58eSAsias He struct vhost_virtqueue *vq = &vsock->vqs[i];
623433fc58eSAsias He
624433fc58eSAsias He mutex_lock(&vq->mutex);
625247643f8SEugenio Pérez vhost_vq_set_backend(vq, NULL);
626433fc58eSAsias He mutex_unlock(&vq->mutex);
627433fc58eSAsias He }
628433fc58eSAsias He
629433fc58eSAsias He err:
630433fc58eSAsias He mutex_unlock(&vsock->dev.mutex);
631433fc58eSAsias He return ret;
632433fc58eSAsias He }
633433fc58eSAsias He
vhost_vsock_free(struct vhost_vsock * vsock)634433fc58eSAsias He static void vhost_vsock_free(struct vhost_vsock *vsock)
635433fc58eSAsias He {
636b226acabSWei Yongjun kvfree(vsock);
637433fc58eSAsias He }
638433fc58eSAsias He
vhost_vsock_dev_open(struct inode * inode,struct file * file)639433fc58eSAsias He static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
640433fc58eSAsias He {
641433fc58eSAsias He struct vhost_virtqueue **vqs;
642433fc58eSAsias He struct vhost_vsock *vsock;
643433fc58eSAsias He int ret;
644433fc58eSAsias He
645433fc58eSAsias He /* This struct is large and allocation could fail, fall back to vmalloc
646433fc58eSAsias He * if there is no other way.
647433fc58eSAsias He */
648dcda9b04SMichal Hocko vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
649433fc58eSAsias He if (!vsock)
650433fc58eSAsias He return -ENOMEM;
651433fc58eSAsias He
652433fc58eSAsias He vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL);
653433fc58eSAsias He if (!vqs) {
654433fc58eSAsias He ret = -ENOMEM;
655433fc58eSAsias He goto out;
656433fc58eSAsias He }
657433fc58eSAsias He
658a72b69dcSStefan Hajnoczi vsock->guest_cid = 0; /* no CID assigned yet */
659*30bd4593SMichael S. Tsirkin vsock->seqpacket_allow = false;
660a72b69dcSStefan Hajnoczi
661433fc58eSAsias He atomic_set(&vsock->queued_replies, 0);
662433fc58eSAsias He
663433fc58eSAsias He vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX];
664433fc58eSAsias He vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX];
665433fc58eSAsias He vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
666433fc58eSAsias He vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
667433fc58eSAsias He
668e82b9b07SJason Wang vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs),
669e82b9b07SJason Wang UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT,
67001fcb1cbSJason Wang VHOST_VSOCK_WEIGHT, true, NULL);
671433fc58eSAsias He
672433fc58eSAsias He file->private_data = vsock;
67371dc9ec9SBobby Eshleman skb_queue_head_init(&vsock->send_pkt_queue);
674433fc58eSAsias He vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work);
675433fc58eSAsias He return 0;
676433fc58eSAsias He
677433fc58eSAsias He out:
678433fc58eSAsias He vhost_vsock_free(vsock);
679433fc58eSAsias He return ret;
680433fc58eSAsias He }
681433fc58eSAsias He
vhost_vsock_flush(struct vhost_vsock * vsock)682433fc58eSAsias He static void vhost_vsock_flush(struct vhost_vsock *vsock)
683433fc58eSAsias He {
684b2ffa407SMike Christie vhost_dev_flush(&vsock->dev);
685433fc58eSAsias He }
686433fc58eSAsias He
vhost_vsock_reset_orphans(struct sock * sk)687433fc58eSAsias He static void vhost_vsock_reset_orphans(struct sock *sk)
688433fc58eSAsias He {
689433fc58eSAsias He struct vsock_sock *vsk = vsock_sk(sk);
690433fc58eSAsias He
691433fc58eSAsias He /* vmci_transport.c doesn't take sk_lock here either. At least we're
692433fc58eSAsias He * under vsock_table_lock so the sock cannot disappear while we're
693433fc58eSAsias He * executing.
694433fc58eSAsias He */
695433fc58eSAsias He
696c38f57daSStefan Hajnoczi /* If the peer is still valid, no need to reset connection */
697c38f57daSStefan Hajnoczi if (vhost_vsock_get(vsk->remote_addr.svm_cid))
698c38f57daSStefan Hajnoczi return;
699c38f57daSStefan Hajnoczi
700c38f57daSStefan Hajnoczi /* If the close timeout is pending, let it expire. This avoids races
701c38f57daSStefan Hajnoczi * with the timeout callback.
702c38f57daSStefan Hajnoczi */
703c38f57daSStefan Hajnoczi if (vsk->close_work_scheduled)
704c38f57daSStefan Hajnoczi return;
705c38f57daSStefan Hajnoczi
706433fc58eSAsias He sock_set_flag(sk, SOCK_DONE);
707433fc58eSAsias He vsk->peer_shutdown = SHUTDOWN_MASK;
708433fc58eSAsias He sk->sk_state = SS_UNCONNECTED;
709433fc58eSAsias He sk->sk_err = ECONNRESET;
710e3ae2365SAlexander Aring sk_error_report(sk);
711433fc58eSAsias He }
712433fc58eSAsias He
vhost_vsock_dev_release(struct inode * inode,struct file * file)713433fc58eSAsias He static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
714433fc58eSAsias He {
715433fc58eSAsias He struct vhost_vsock *vsock = file->private_data;
716433fc58eSAsias He
7176db3d8dcSStefan Hajnoczi mutex_lock(&vhost_vsock_mutex);
718834e772cSStefan Hajnoczi if (vsock->guest_cid)
719834e772cSStefan Hajnoczi hash_del_rcu(&vsock->hash);
7206db3d8dcSStefan Hajnoczi mutex_unlock(&vhost_vsock_mutex);
721433fc58eSAsias He
722834e772cSStefan Hajnoczi /* Wait for other CPUs to finish using vsock */
723834e772cSStefan Hajnoczi synchronize_rcu();
724834e772cSStefan Hajnoczi
725433fc58eSAsias He /* Iterating over all connections for all CIDs to find orphans is
726433fc58eSAsias He * inefficient. Room for improvement here. */
7278e6ed963SJiyong Park vsock_for_each_connected_socket(&vhost_transport.transport,
7288e6ed963SJiyong Park vhost_vsock_reset_orphans);
729433fc58eSAsias He
730a58da53fSStefano Garzarella /* Don't check the owner, because we are in the release path, so we
731a58da53fSStefano Garzarella * need to stop the vsock device in any case.
732a58da53fSStefano Garzarella * vhost_vsock_stop() can not fail in this case, so we don't need to
733a58da53fSStefano Garzarella * check the return code.
734a58da53fSStefano Garzarella */
735a58da53fSStefano Garzarella vhost_vsock_stop(vsock, false);
736433fc58eSAsias He vhost_vsock_flush(vsock);
737433fc58eSAsias He vhost_dev_stop(&vsock->dev);
738433fc58eSAsias He
73971dc9ec9SBobby Eshleman virtio_vsock_skb_queue_purge(&vsock->send_pkt_queue);
740433fc58eSAsias He
741f6f93f75S夷则(Caspar) vhost_dev_cleanup(&vsock->dev);
742433fc58eSAsias He kfree(vsock->dev.vqs);
743433fc58eSAsias He vhost_vsock_free(vsock);
744433fc58eSAsias He return 0;
745433fc58eSAsias He }
746433fc58eSAsias He
vhost_vsock_set_cid(struct vhost_vsock * vsock,u64 guest_cid)747433fc58eSAsias He static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
748433fc58eSAsias He {
749433fc58eSAsias He struct vhost_vsock *other;
750433fc58eSAsias He
751433fc58eSAsias He /* Refuse reserved CIDs */
752433fc58eSAsias He if (guest_cid <= VMADDR_CID_HOST ||
753433fc58eSAsias He guest_cid == U32_MAX)
754433fc58eSAsias He return -EINVAL;
755433fc58eSAsias He
756433fc58eSAsias He /* 64-bit CIDs are not yet supported */
757433fc58eSAsias He if (guest_cid > U32_MAX)
758433fc58eSAsias He return -EINVAL;
759433fc58eSAsias He
760ed8640a9SStefano Garzarella /* Refuse if CID is assigned to the guest->host transport (i.e. nested
761ed8640a9SStefano Garzarella * VM), to make the loopback work.
762ed8640a9SStefano Garzarella */
763ed8640a9SStefano Garzarella if (vsock_find_cid(guest_cid))
764ed8640a9SStefano Garzarella return -EADDRINUSE;
765ed8640a9SStefano Garzarella
766433fc58eSAsias He /* Refuse if CID is already in use */
7676db3d8dcSStefan Hajnoczi mutex_lock(&vhost_vsock_mutex);
768834e772cSStefan Hajnoczi other = vhost_vsock_get(guest_cid);
7696c083c2bSGao feng if (other && other != vsock) {
7706db3d8dcSStefan Hajnoczi mutex_unlock(&vhost_vsock_mutex);
7716c083c2bSGao feng return -EADDRINUSE;
7726c083c2bSGao feng }
773834e772cSStefan Hajnoczi
774834e772cSStefan Hajnoczi if (vsock->guest_cid)
775834e772cSStefan Hajnoczi hash_del_rcu(&vsock->hash);
776834e772cSStefan Hajnoczi
777433fc58eSAsias He vsock->guest_cid = guest_cid;
7787fbe078cSZha Bin hash_add_rcu(vhost_vsock_hash, &vsock->hash, vsock->guest_cid);
7796db3d8dcSStefan Hajnoczi mutex_unlock(&vhost_vsock_mutex);
780433fc58eSAsias He
781433fc58eSAsias He return 0;
782433fc58eSAsias He }
783433fc58eSAsias He
vhost_vsock_set_features(struct vhost_vsock * vsock,u64 features)784433fc58eSAsias He static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
785433fc58eSAsias He {
786433fc58eSAsias He struct vhost_virtqueue *vq;
787433fc58eSAsias He int i;
788433fc58eSAsias He
789433fc58eSAsias He if (features & ~VHOST_VSOCK_FEATURES)
790433fc58eSAsias He return -EOPNOTSUPP;
791433fc58eSAsias He
792433fc58eSAsias He mutex_lock(&vsock->dev.mutex);
793433fc58eSAsias He if ((features & (1 << VHOST_F_LOG_ALL)) &&
794433fc58eSAsias He !vhost_log_access_ok(&vsock->dev)) {
795e13a6915SStefano Garzarella goto err;
796e13a6915SStefano Garzarella }
797e13a6915SStefano Garzarella
798e13a6915SStefano Garzarella if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) {
799759aba1eSLiming Wu if (vhost_init_device_iotlb(&vsock->dev))
800e13a6915SStefano Garzarella goto err;
801433fc58eSAsias He }
802433fc58eSAsias He
803*30bd4593SMichael S. Tsirkin vsock->seqpacket_allow = features & (1ULL << VIRTIO_VSOCK_F_SEQPACKET);
804ced7b713SArseny Krasnov
805433fc58eSAsias He for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
806433fc58eSAsias He vq = &vsock->vqs[i];
807433fc58eSAsias He mutex_lock(&vq->mutex);
808433fc58eSAsias He vq->acked_features = features;
809433fc58eSAsias He mutex_unlock(&vq->mutex);
810433fc58eSAsias He }
811433fc58eSAsias He mutex_unlock(&vsock->dev.mutex);
812433fc58eSAsias He return 0;
813e13a6915SStefano Garzarella
814e13a6915SStefano Garzarella err:
815e13a6915SStefano Garzarella mutex_unlock(&vsock->dev.mutex);
816e13a6915SStefano Garzarella return -EFAULT;
817433fc58eSAsias He }
818433fc58eSAsias He
vhost_vsock_dev_ioctl(struct file * f,unsigned int ioctl,unsigned long arg)819433fc58eSAsias He static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
820433fc58eSAsias He unsigned long arg)
821433fc58eSAsias He {
822433fc58eSAsias He struct vhost_vsock *vsock = f->private_data;
823433fc58eSAsias He void __user *argp = (void __user *)arg;
824433fc58eSAsias He u64 guest_cid;
825433fc58eSAsias He u64 features;
826433fc58eSAsias He int start;
827433fc58eSAsias He int r;
828433fc58eSAsias He
829433fc58eSAsias He switch (ioctl) {
830433fc58eSAsias He case VHOST_VSOCK_SET_GUEST_CID:
831433fc58eSAsias He if (copy_from_user(&guest_cid, argp, sizeof(guest_cid)))
832433fc58eSAsias He return -EFAULT;
833433fc58eSAsias He return vhost_vsock_set_cid(vsock, guest_cid);
834433fc58eSAsias He case VHOST_VSOCK_SET_RUNNING:
835433fc58eSAsias He if (copy_from_user(&start, argp, sizeof(start)))
836433fc58eSAsias He return -EFAULT;
837433fc58eSAsias He if (start)
838433fc58eSAsias He return vhost_vsock_start(vsock);
839433fc58eSAsias He else
840a58da53fSStefano Garzarella return vhost_vsock_stop(vsock, true);
841433fc58eSAsias He case VHOST_GET_FEATURES:
842433fc58eSAsias He features = VHOST_VSOCK_FEATURES;
843433fc58eSAsias He if (copy_to_user(argp, &features, sizeof(features)))
844433fc58eSAsias He return -EFAULT;
845433fc58eSAsias He return 0;
846433fc58eSAsias He case VHOST_SET_FEATURES:
847433fc58eSAsias He if (copy_from_user(&features, argp, sizeof(features)))
848433fc58eSAsias He return -EFAULT;
849433fc58eSAsias He return vhost_vsock_set_features(vsock, features);
850e13a6915SStefano Garzarella case VHOST_GET_BACKEND_FEATURES:
851e13a6915SStefano Garzarella features = VHOST_VSOCK_BACKEND_FEATURES;
852e13a6915SStefano Garzarella if (copy_to_user(argp, &features, sizeof(features)))
853e13a6915SStefano Garzarella return -EFAULT;
854e13a6915SStefano Garzarella return 0;
855e13a6915SStefano Garzarella case VHOST_SET_BACKEND_FEATURES:
856e13a6915SStefano Garzarella if (copy_from_user(&features, argp, sizeof(features)))
857e13a6915SStefano Garzarella return -EFAULT;
858e13a6915SStefano Garzarella if (features & ~VHOST_VSOCK_BACKEND_FEATURES)
859e13a6915SStefano Garzarella return -EOPNOTSUPP;
860e13a6915SStefano Garzarella vhost_set_backend_features(&vsock->dev, features);
861e13a6915SStefano Garzarella return 0;
862433fc58eSAsias He default:
863433fc58eSAsias He mutex_lock(&vsock->dev.mutex);
864433fc58eSAsias He r = vhost_dev_ioctl(&vsock->dev, ioctl, argp);
865433fc58eSAsias He if (r == -ENOIOCTLCMD)
866433fc58eSAsias He r = vhost_vring_ioctl(&vsock->dev, ioctl, argp);
867433fc58eSAsias He else
868433fc58eSAsias He vhost_vsock_flush(vsock);
869433fc58eSAsias He mutex_unlock(&vsock->dev.mutex);
870433fc58eSAsias He return r;
871433fc58eSAsias He }
872433fc58eSAsias He }
873433fc58eSAsias He
vhost_vsock_chr_read_iter(struct kiocb * iocb,struct iov_iter * to)874e13a6915SStefano Garzarella static ssize_t vhost_vsock_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
875e13a6915SStefano Garzarella {
876e13a6915SStefano Garzarella struct file *file = iocb->ki_filp;
877e13a6915SStefano Garzarella struct vhost_vsock *vsock = file->private_data;
878e13a6915SStefano Garzarella struct vhost_dev *dev = &vsock->dev;
879e13a6915SStefano Garzarella int noblock = file->f_flags & O_NONBLOCK;
880e13a6915SStefano Garzarella
881e13a6915SStefano Garzarella return vhost_chr_read_iter(dev, to, noblock);
882e13a6915SStefano Garzarella }
883e13a6915SStefano Garzarella
vhost_vsock_chr_write_iter(struct kiocb * iocb,struct iov_iter * from)884e13a6915SStefano Garzarella static ssize_t vhost_vsock_chr_write_iter(struct kiocb *iocb,
885e13a6915SStefano Garzarella struct iov_iter *from)
886e13a6915SStefano Garzarella {
887e13a6915SStefano Garzarella struct file *file = iocb->ki_filp;
888e13a6915SStefano Garzarella struct vhost_vsock *vsock = file->private_data;
889e13a6915SStefano Garzarella struct vhost_dev *dev = &vsock->dev;
890e13a6915SStefano Garzarella
891e13a6915SStefano Garzarella return vhost_chr_write_iter(dev, from);
892e13a6915SStefano Garzarella }
893e13a6915SStefano Garzarella
vhost_vsock_chr_poll(struct file * file,poll_table * wait)894e13a6915SStefano Garzarella static __poll_t vhost_vsock_chr_poll(struct file *file, poll_table *wait)
895e13a6915SStefano Garzarella {
896e13a6915SStefano Garzarella struct vhost_vsock *vsock = file->private_data;
897e13a6915SStefano Garzarella struct vhost_dev *dev = &vsock->dev;
898e13a6915SStefano Garzarella
899e13a6915SStefano Garzarella return vhost_chr_poll(file, dev, wait);
900e13a6915SStefano Garzarella }
901e13a6915SStefano Garzarella
902433fc58eSAsias He static const struct file_operations vhost_vsock_fops = {
903433fc58eSAsias He .owner = THIS_MODULE,
904433fc58eSAsias He .open = vhost_vsock_dev_open,
905433fc58eSAsias He .release = vhost_vsock_dev_release,
906433fc58eSAsias He .llseek = noop_llseek,
907433fc58eSAsias He .unlocked_ioctl = vhost_vsock_dev_ioctl,
908407e9ef7SArnd Bergmann .compat_ioctl = compat_ptr_ioctl,
909e13a6915SStefano Garzarella .read_iter = vhost_vsock_chr_read_iter,
910e13a6915SStefano Garzarella .write_iter = vhost_vsock_chr_write_iter,
911e13a6915SStefano Garzarella .poll = vhost_vsock_chr_poll,
912433fc58eSAsias He };
913433fc58eSAsias He
914433fc58eSAsias He static struct miscdevice vhost_vsock_misc = {
915f4660cc9SStefan Hajnoczi .minor = VHOST_VSOCK_MINOR,
916433fc58eSAsias He .name = "vhost-vsock",
917433fc58eSAsias He .fops = &vhost_vsock_fops,
918433fc58eSAsias He };
919433fc58eSAsias He
vhost_vsock_init(void)920433fc58eSAsias He static int __init vhost_vsock_init(void)
921433fc58eSAsias He {
922433fc58eSAsias He int ret;
923433fc58eSAsias He
924c0cfa2d8SStefano Garzarella ret = vsock_core_register(&vhost_transport.transport,
925c0cfa2d8SStefano Garzarella VSOCK_TRANSPORT_F_H2G);
926433fc58eSAsias He if (ret < 0)
927433fc58eSAsias He return ret;
9287a4efe18SYuan Can
9297a4efe18SYuan Can ret = misc_register(&vhost_vsock_misc);
9307a4efe18SYuan Can if (ret) {
9317a4efe18SYuan Can vsock_core_unregister(&vhost_transport.transport);
9327a4efe18SYuan Can return ret;
9337a4efe18SYuan Can }
9347a4efe18SYuan Can
9357a4efe18SYuan Can return 0;
936433fc58eSAsias He };
937433fc58eSAsias He
vhost_vsock_exit(void)938433fc58eSAsias He static void __exit vhost_vsock_exit(void)
939433fc58eSAsias He {
940433fc58eSAsias He misc_deregister(&vhost_vsock_misc);
941c0cfa2d8SStefano Garzarella vsock_core_unregister(&vhost_transport.transport);
942433fc58eSAsias He };
943433fc58eSAsias He
944433fc58eSAsias He module_init(vhost_vsock_init);
945433fc58eSAsias He module_exit(vhost_vsock_exit);
946433fc58eSAsias He MODULE_LICENSE("GPL v2");
947433fc58eSAsias He MODULE_AUTHOR("Asias He");
948433fc58eSAsias He MODULE_DESCRIPTION("vhost transport for vsock ");
949f4660cc9SStefan Hajnoczi MODULE_ALIAS_MISCDEV(VHOST_VSOCK_MINOR);
950f4660cc9SStefan Hajnoczi MODULE_ALIAS("devname:vhost-vsock");
951