1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * vhost transport for vsock 4 * 5 * Copyright (C) 2013-2015 Red Hat, Inc. 6 * Author: Asias He <asias@redhat.com> 7 * Stefan Hajnoczi <stefanha@redhat.com> 8 */ 9 #include <linux/miscdevice.h> 10 #include <linux/atomic.h> 11 #include <linux/module.h> 12 #include <linux/mutex.h> 13 #include <linux/vmalloc.h> 14 #include <net/sock.h> 15 #include <linux/virtio_vsock.h> 16 #include <linux/vhost.h> 17 #include <linux/hashtable.h> 18 19 #include <net/af_vsock.h> 20 #include "vhost.h" 21 22 #define VHOST_VSOCK_DEFAULT_HOST_CID 2 23 /* Max number of bytes transferred before requeueing the job. 24 * Using this limit prevents one virtqueue from starving others. */ 25 #define VHOST_VSOCK_WEIGHT 0x80000 26 /* Max number of packets transferred before requeueing the job. 27 * Using this limit prevents one virtqueue from starving others with 28 * small pkts. 29 */ 30 #define VHOST_VSOCK_PKT_WEIGHT 256 31 32 enum { 33 VHOST_VSOCK_FEATURES = VHOST_FEATURES, 34 }; 35 36 /* Used to track all the vhost_vsock instances on the system. */ 37 static DEFINE_MUTEX(vhost_vsock_mutex); 38 static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8); 39 40 struct vhost_vsock { 41 struct vhost_dev dev; 42 struct vhost_virtqueue vqs[2]; 43 44 /* Link to global vhost_vsock_hash, writes use vhost_vsock_mutex */ 45 struct hlist_node hash; 46 47 struct vhost_work send_pkt_work; 48 spinlock_t send_pkt_list_lock; 49 struct list_head send_pkt_list; /* host->guest pending packets */ 50 51 atomic_t queued_replies; 52 53 u32 guest_cid; 54 }; 55 56 static u32 vhost_transport_get_local_cid(void) 57 { 58 return VHOST_VSOCK_DEFAULT_HOST_CID; 59 } 60 61 /* Callers that dereference the return value must hold vhost_vsock_mutex or the 62 * RCU read lock. 63 */ 64 static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) 65 { 66 struct vhost_vsock *vsock; 67 68 hash_for_each_possible_rcu(vhost_vsock_hash, vsock, hash, guest_cid) { 69 u32 other_cid = vsock->guest_cid; 70 71 /* Skip instances that have no CID yet */ 72 if (other_cid == 0) 73 continue; 74 75 if (other_cid == guest_cid) 76 return vsock; 77 78 } 79 80 return NULL; 81 } 82 83 static void 84 vhost_transport_do_send_pkt(struct vhost_vsock *vsock, 85 struct vhost_virtqueue *vq) 86 { 87 struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; 88 int pkts = 0, total_len = 0; 89 bool added = false; 90 bool restart_tx = false; 91 92 mutex_lock(&vq->mutex); 93 94 if (!vq->private_data) 95 goto out; 96 97 /* Avoid further vmexits, we're already processing the virtqueue */ 98 vhost_disable_notify(&vsock->dev, vq); 99 100 do { 101 struct virtio_vsock_pkt *pkt; 102 struct iov_iter iov_iter; 103 unsigned out, in; 104 size_t nbytes; 105 size_t iov_len, payload_len; 106 int head; 107 108 spin_lock_bh(&vsock->send_pkt_list_lock); 109 if (list_empty(&vsock->send_pkt_list)) { 110 spin_unlock_bh(&vsock->send_pkt_list_lock); 111 vhost_enable_notify(&vsock->dev, vq); 112 break; 113 } 114 115 pkt = list_first_entry(&vsock->send_pkt_list, 116 struct virtio_vsock_pkt, list); 117 list_del_init(&pkt->list); 118 spin_unlock_bh(&vsock->send_pkt_list_lock); 119 120 head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), 121 &out, &in, NULL, NULL); 122 if (head < 0) { 123 spin_lock_bh(&vsock->send_pkt_list_lock); 124 list_add(&pkt->list, &vsock->send_pkt_list); 125 spin_unlock_bh(&vsock->send_pkt_list_lock); 126 break; 127 } 128 129 if (head == vq->num) { 130 spin_lock_bh(&vsock->send_pkt_list_lock); 131 list_add(&pkt->list, &vsock->send_pkt_list); 132 spin_unlock_bh(&vsock->send_pkt_list_lock); 133 134 /* We cannot finish yet if more buffers snuck in while 135 * re-enabling notify. 136 */ 137 if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { 138 vhost_disable_notify(&vsock->dev, vq); 139 continue; 140 } 141 break; 142 } 143 144 if (out) { 145 virtio_transport_free_pkt(pkt); 146 vq_err(vq, "Expected 0 output buffers, got %u\n", out); 147 break; 148 } 149 150 iov_len = iov_length(&vq->iov[out], in); 151 if (iov_len < sizeof(pkt->hdr)) { 152 virtio_transport_free_pkt(pkt); 153 vq_err(vq, "Buffer len [%zu] too small\n", iov_len); 154 break; 155 } 156 157 iov_iter_init(&iov_iter, READ, &vq->iov[out], in, iov_len); 158 payload_len = pkt->len - pkt->off; 159 160 /* If the packet is greater than the space available in the 161 * buffer, we split it using multiple buffers. 162 */ 163 if (payload_len > iov_len - sizeof(pkt->hdr)) 164 payload_len = iov_len - sizeof(pkt->hdr); 165 166 /* Set the correct length in the header */ 167 pkt->hdr.len = cpu_to_le32(payload_len); 168 169 nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); 170 if (nbytes != sizeof(pkt->hdr)) { 171 virtio_transport_free_pkt(pkt); 172 vq_err(vq, "Faulted on copying pkt hdr\n"); 173 break; 174 } 175 176 nbytes = copy_to_iter(pkt->buf + pkt->off, payload_len, 177 &iov_iter); 178 if (nbytes != payload_len) { 179 virtio_transport_free_pkt(pkt); 180 vq_err(vq, "Faulted on copying pkt buf\n"); 181 break; 182 } 183 184 vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len); 185 added = true; 186 187 /* Deliver to monitoring devices all correctly transmitted 188 * packets. 189 */ 190 virtio_transport_deliver_tap_pkt(pkt); 191 192 pkt->off += payload_len; 193 total_len += payload_len; 194 195 /* If we didn't send all the payload we can requeue the packet 196 * to send it with the next available buffer. 197 */ 198 if (pkt->off < pkt->len) { 199 spin_lock_bh(&vsock->send_pkt_list_lock); 200 list_add(&pkt->list, &vsock->send_pkt_list); 201 spin_unlock_bh(&vsock->send_pkt_list_lock); 202 } else { 203 if (pkt->reply) { 204 int val; 205 206 val = atomic_dec_return(&vsock->queued_replies); 207 208 /* Do we have resources to resume tx 209 * processing? 210 */ 211 if (val + 1 == tx_vq->num) 212 restart_tx = true; 213 } 214 215 virtio_transport_free_pkt(pkt); 216 } 217 } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len))); 218 if (added) 219 vhost_signal(&vsock->dev, vq); 220 221 out: 222 mutex_unlock(&vq->mutex); 223 224 if (restart_tx) 225 vhost_poll_queue(&tx_vq->poll); 226 } 227 228 static void vhost_transport_send_pkt_work(struct vhost_work *work) 229 { 230 struct vhost_virtqueue *vq; 231 struct vhost_vsock *vsock; 232 233 vsock = container_of(work, struct vhost_vsock, send_pkt_work); 234 vq = &vsock->vqs[VSOCK_VQ_RX]; 235 236 vhost_transport_do_send_pkt(vsock, vq); 237 } 238 239 static int 240 vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) 241 { 242 struct vhost_vsock *vsock; 243 int len = pkt->len; 244 245 rcu_read_lock(); 246 247 /* Find the vhost_vsock according to guest context id */ 248 vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid)); 249 if (!vsock) { 250 rcu_read_unlock(); 251 virtio_transport_free_pkt(pkt); 252 return -ENODEV; 253 } 254 255 if (pkt->reply) 256 atomic_inc(&vsock->queued_replies); 257 258 spin_lock_bh(&vsock->send_pkt_list_lock); 259 list_add_tail(&pkt->list, &vsock->send_pkt_list); 260 spin_unlock_bh(&vsock->send_pkt_list_lock); 261 262 vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); 263 264 rcu_read_unlock(); 265 return len; 266 } 267 268 static int 269 vhost_transport_cancel_pkt(struct vsock_sock *vsk) 270 { 271 struct vhost_vsock *vsock; 272 struct virtio_vsock_pkt *pkt, *n; 273 int cnt = 0; 274 int ret = -ENODEV; 275 LIST_HEAD(freeme); 276 277 rcu_read_lock(); 278 279 /* Find the vhost_vsock according to guest context id */ 280 vsock = vhost_vsock_get(vsk->remote_addr.svm_cid); 281 if (!vsock) 282 goto out; 283 284 spin_lock_bh(&vsock->send_pkt_list_lock); 285 list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) { 286 if (pkt->vsk != vsk) 287 continue; 288 list_move(&pkt->list, &freeme); 289 } 290 spin_unlock_bh(&vsock->send_pkt_list_lock); 291 292 list_for_each_entry_safe(pkt, n, &freeme, list) { 293 if (pkt->reply) 294 cnt++; 295 list_del(&pkt->list); 296 virtio_transport_free_pkt(pkt); 297 } 298 299 if (cnt) { 300 struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; 301 int new_cnt; 302 303 new_cnt = atomic_sub_return(cnt, &vsock->queued_replies); 304 if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num) 305 vhost_poll_queue(&tx_vq->poll); 306 } 307 308 ret = 0; 309 out: 310 rcu_read_unlock(); 311 return ret; 312 } 313 314 static struct virtio_vsock_pkt * 315 vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, 316 unsigned int out, unsigned int in) 317 { 318 struct virtio_vsock_pkt *pkt; 319 struct iov_iter iov_iter; 320 size_t nbytes; 321 size_t len; 322 323 if (in != 0) { 324 vq_err(vq, "Expected 0 input buffers, got %u\n", in); 325 return NULL; 326 } 327 328 pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); 329 if (!pkt) 330 return NULL; 331 332 len = iov_length(vq->iov, out); 333 iov_iter_init(&iov_iter, WRITE, vq->iov, out, len); 334 335 nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); 336 if (nbytes != sizeof(pkt->hdr)) { 337 vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n", 338 sizeof(pkt->hdr), nbytes); 339 kfree(pkt); 340 return NULL; 341 } 342 343 if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM) 344 pkt->len = le32_to_cpu(pkt->hdr.len); 345 346 /* No payload */ 347 if (!pkt->len) 348 return pkt; 349 350 /* The pkt is too big */ 351 if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { 352 kfree(pkt); 353 return NULL; 354 } 355 356 pkt->buf = kmalloc(pkt->len, GFP_KERNEL); 357 if (!pkt->buf) { 358 kfree(pkt); 359 return NULL; 360 } 361 362 pkt->buf_len = pkt->len; 363 364 nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter); 365 if (nbytes != pkt->len) { 366 vq_err(vq, "Expected %u byte payload, got %zu bytes\n", 367 pkt->len, nbytes); 368 virtio_transport_free_pkt(pkt); 369 return NULL; 370 } 371 372 return pkt; 373 } 374 375 /* Is there space left for replies to rx packets? */ 376 static bool vhost_vsock_more_replies(struct vhost_vsock *vsock) 377 { 378 struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX]; 379 int val; 380 381 smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */ 382 val = atomic_read(&vsock->queued_replies); 383 384 return val < vq->num; 385 } 386 387 static struct virtio_transport vhost_transport = { 388 .transport = { 389 .module = THIS_MODULE, 390 391 .get_local_cid = vhost_transport_get_local_cid, 392 393 .init = virtio_transport_do_socket_init, 394 .destruct = virtio_transport_destruct, 395 .release = virtio_transport_release, 396 .connect = virtio_transport_connect, 397 .shutdown = virtio_transport_shutdown, 398 .cancel_pkt = vhost_transport_cancel_pkt, 399 400 .dgram_enqueue = virtio_transport_dgram_enqueue, 401 .dgram_dequeue = virtio_transport_dgram_dequeue, 402 .dgram_bind = virtio_transport_dgram_bind, 403 .dgram_allow = virtio_transport_dgram_allow, 404 405 .stream_enqueue = virtio_transport_stream_enqueue, 406 .stream_dequeue = virtio_transport_stream_dequeue, 407 .stream_has_data = virtio_transport_stream_has_data, 408 .stream_has_space = virtio_transport_stream_has_space, 409 .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, 410 .stream_is_active = virtio_transport_stream_is_active, 411 .stream_allow = virtio_transport_stream_allow, 412 413 .notify_poll_in = virtio_transport_notify_poll_in, 414 .notify_poll_out = virtio_transport_notify_poll_out, 415 .notify_recv_init = virtio_transport_notify_recv_init, 416 .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, 417 .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, 418 .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, 419 .notify_send_init = virtio_transport_notify_send_init, 420 .notify_send_pre_block = virtio_transport_notify_send_pre_block, 421 .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, 422 .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, 423 .notify_buffer_size = virtio_transport_notify_buffer_size, 424 425 }, 426 427 .send_pkt = vhost_transport_send_pkt, 428 }; 429 430 static void vhost_vsock_handle_tx_kick(struct vhost_work *work) 431 { 432 struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, 433 poll.work); 434 struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, 435 dev); 436 struct virtio_vsock_pkt *pkt; 437 int head, pkts = 0, total_len = 0; 438 unsigned int out, in; 439 bool added = false; 440 441 mutex_lock(&vq->mutex); 442 443 if (!vq->private_data) 444 goto out; 445 446 vhost_disable_notify(&vsock->dev, vq); 447 do { 448 u32 len; 449 450 if (!vhost_vsock_more_replies(vsock)) { 451 /* Stop tx until the device processes already 452 * pending replies. Leave tx virtqueue 453 * callbacks disabled. 454 */ 455 goto no_more_replies; 456 } 457 458 head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), 459 &out, &in, NULL, NULL); 460 if (head < 0) 461 break; 462 463 if (head == vq->num) { 464 if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { 465 vhost_disable_notify(&vsock->dev, vq); 466 continue; 467 } 468 break; 469 } 470 471 pkt = vhost_vsock_alloc_pkt(vq, out, in); 472 if (!pkt) { 473 vq_err(vq, "Faulted on pkt\n"); 474 continue; 475 } 476 477 len = pkt->len; 478 479 /* Deliver to monitoring devices all received packets */ 480 virtio_transport_deliver_tap_pkt(pkt); 481 482 /* Only accept correctly addressed packets */ 483 if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid && 484 le64_to_cpu(pkt->hdr.dst_cid) == 485 vhost_transport_get_local_cid()) 486 virtio_transport_recv_pkt(&vhost_transport, pkt); 487 else 488 virtio_transport_free_pkt(pkt); 489 490 len += sizeof(pkt->hdr); 491 vhost_add_used(vq, head, len); 492 total_len += len; 493 added = true; 494 } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len))); 495 496 no_more_replies: 497 if (added) 498 vhost_signal(&vsock->dev, vq); 499 500 out: 501 mutex_unlock(&vq->mutex); 502 } 503 504 static void vhost_vsock_handle_rx_kick(struct vhost_work *work) 505 { 506 struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, 507 poll.work); 508 struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, 509 dev); 510 511 vhost_transport_do_send_pkt(vsock, vq); 512 } 513 514 static int vhost_vsock_start(struct vhost_vsock *vsock) 515 { 516 struct vhost_virtqueue *vq; 517 size_t i; 518 int ret; 519 520 mutex_lock(&vsock->dev.mutex); 521 522 ret = vhost_dev_check_owner(&vsock->dev); 523 if (ret) 524 goto err; 525 526 for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { 527 vq = &vsock->vqs[i]; 528 529 mutex_lock(&vq->mutex); 530 531 if (!vhost_vq_access_ok(vq)) { 532 ret = -EFAULT; 533 goto err_vq; 534 } 535 536 if (!vq->private_data) { 537 vq->private_data = vsock; 538 ret = vhost_vq_init_access(vq); 539 if (ret) 540 goto err_vq; 541 } 542 543 mutex_unlock(&vq->mutex); 544 } 545 546 mutex_unlock(&vsock->dev.mutex); 547 return 0; 548 549 err_vq: 550 vq->private_data = NULL; 551 mutex_unlock(&vq->mutex); 552 553 for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { 554 vq = &vsock->vqs[i]; 555 556 mutex_lock(&vq->mutex); 557 vq->private_data = NULL; 558 mutex_unlock(&vq->mutex); 559 } 560 err: 561 mutex_unlock(&vsock->dev.mutex); 562 return ret; 563 } 564 565 static int vhost_vsock_stop(struct vhost_vsock *vsock) 566 { 567 size_t i; 568 int ret; 569 570 mutex_lock(&vsock->dev.mutex); 571 572 ret = vhost_dev_check_owner(&vsock->dev); 573 if (ret) 574 goto err; 575 576 for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { 577 struct vhost_virtqueue *vq = &vsock->vqs[i]; 578 579 mutex_lock(&vq->mutex); 580 vq->private_data = NULL; 581 mutex_unlock(&vq->mutex); 582 } 583 584 err: 585 mutex_unlock(&vsock->dev.mutex); 586 return ret; 587 } 588 589 static void vhost_vsock_free(struct vhost_vsock *vsock) 590 { 591 kvfree(vsock); 592 } 593 594 static int vhost_vsock_dev_open(struct inode *inode, struct file *file) 595 { 596 struct vhost_virtqueue **vqs; 597 struct vhost_vsock *vsock; 598 int ret; 599 600 /* This struct is large and allocation could fail, fall back to vmalloc 601 * if there is no other way. 602 */ 603 vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_RETRY_MAYFAIL); 604 if (!vsock) 605 return -ENOMEM; 606 607 vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL); 608 if (!vqs) { 609 ret = -ENOMEM; 610 goto out; 611 } 612 613 vsock->guest_cid = 0; /* no CID assigned yet */ 614 615 atomic_set(&vsock->queued_replies, 0); 616 617 vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX]; 618 vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX]; 619 vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick; 620 vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick; 621 622 vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs), 623 UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT, 624 VHOST_VSOCK_WEIGHT, NULL); 625 626 file->private_data = vsock; 627 spin_lock_init(&vsock->send_pkt_list_lock); 628 INIT_LIST_HEAD(&vsock->send_pkt_list); 629 vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work); 630 return 0; 631 632 out: 633 vhost_vsock_free(vsock); 634 return ret; 635 } 636 637 static void vhost_vsock_flush(struct vhost_vsock *vsock) 638 { 639 int i; 640 641 for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) 642 if (vsock->vqs[i].handle_kick) 643 vhost_poll_flush(&vsock->vqs[i].poll); 644 vhost_work_flush(&vsock->dev, &vsock->send_pkt_work); 645 } 646 647 static void vhost_vsock_reset_orphans(struct sock *sk) 648 { 649 struct vsock_sock *vsk = vsock_sk(sk); 650 651 /* vmci_transport.c doesn't take sk_lock here either. At least we're 652 * under vsock_table_lock so the sock cannot disappear while we're 653 * executing. 654 */ 655 656 /* If the peer is still valid, no need to reset connection */ 657 if (vhost_vsock_get(vsk->remote_addr.svm_cid)) 658 return; 659 660 /* If the close timeout is pending, let it expire. This avoids races 661 * with the timeout callback. 662 */ 663 if (vsk->close_work_scheduled) 664 return; 665 666 sock_set_flag(sk, SOCK_DONE); 667 vsk->peer_shutdown = SHUTDOWN_MASK; 668 sk->sk_state = SS_UNCONNECTED; 669 sk->sk_err = ECONNRESET; 670 sk->sk_error_report(sk); 671 } 672 673 static int vhost_vsock_dev_release(struct inode *inode, struct file *file) 674 { 675 struct vhost_vsock *vsock = file->private_data; 676 677 mutex_lock(&vhost_vsock_mutex); 678 if (vsock->guest_cid) 679 hash_del_rcu(&vsock->hash); 680 mutex_unlock(&vhost_vsock_mutex); 681 682 /* Wait for other CPUs to finish using vsock */ 683 synchronize_rcu(); 684 685 /* Iterating over all connections for all CIDs to find orphans is 686 * inefficient. Room for improvement here. */ 687 vsock_for_each_connected_socket(vhost_vsock_reset_orphans); 688 689 vhost_vsock_stop(vsock); 690 vhost_vsock_flush(vsock); 691 vhost_dev_stop(&vsock->dev); 692 693 spin_lock_bh(&vsock->send_pkt_list_lock); 694 while (!list_empty(&vsock->send_pkt_list)) { 695 struct virtio_vsock_pkt *pkt; 696 697 pkt = list_first_entry(&vsock->send_pkt_list, 698 struct virtio_vsock_pkt, list); 699 list_del_init(&pkt->list); 700 virtio_transport_free_pkt(pkt); 701 } 702 spin_unlock_bh(&vsock->send_pkt_list_lock); 703 704 vhost_dev_cleanup(&vsock->dev); 705 kfree(vsock->dev.vqs); 706 vhost_vsock_free(vsock); 707 return 0; 708 } 709 710 static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid) 711 { 712 struct vhost_vsock *other; 713 714 /* Refuse reserved CIDs */ 715 if (guest_cid <= VMADDR_CID_HOST || 716 guest_cid == U32_MAX) 717 return -EINVAL; 718 719 /* 64-bit CIDs are not yet supported */ 720 if (guest_cid > U32_MAX) 721 return -EINVAL; 722 723 /* Refuse if CID is assigned to the guest->host transport (i.e. nested 724 * VM), to make the loopback work. 725 */ 726 if (vsock_find_cid(guest_cid)) 727 return -EADDRINUSE; 728 729 /* Refuse if CID is already in use */ 730 mutex_lock(&vhost_vsock_mutex); 731 other = vhost_vsock_get(guest_cid); 732 if (other && other != vsock) { 733 mutex_unlock(&vhost_vsock_mutex); 734 return -EADDRINUSE; 735 } 736 737 if (vsock->guest_cid) 738 hash_del_rcu(&vsock->hash); 739 740 vsock->guest_cid = guest_cid; 741 hash_add_rcu(vhost_vsock_hash, &vsock->hash, vsock->guest_cid); 742 mutex_unlock(&vhost_vsock_mutex); 743 744 return 0; 745 } 746 747 static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) 748 { 749 struct vhost_virtqueue *vq; 750 int i; 751 752 if (features & ~VHOST_VSOCK_FEATURES) 753 return -EOPNOTSUPP; 754 755 mutex_lock(&vsock->dev.mutex); 756 if ((features & (1 << VHOST_F_LOG_ALL)) && 757 !vhost_log_access_ok(&vsock->dev)) { 758 mutex_unlock(&vsock->dev.mutex); 759 return -EFAULT; 760 } 761 762 for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { 763 vq = &vsock->vqs[i]; 764 mutex_lock(&vq->mutex); 765 vq->acked_features = features; 766 mutex_unlock(&vq->mutex); 767 } 768 mutex_unlock(&vsock->dev.mutex); 769 return 0; 770 } 771 772 static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, 773 unsigned long arg) 774 { 775 struct vhost_vsock *vsock = f->private_data; 776 void __user *argp = (void __user *)arg; 777 u64 guest_cid; 778 u64 features; 779 int start; 780 int r; 781 782 switch (ioctl) { 783 case VHOST_VSOCK_SET_GUEST_CID: 784 if (copy_from_user(&guest_cid, argp, sizeof(guest_cid))) 785 return -EFAULT; 786 return vhost_vsock_set_cid(vsock, guest_cid); 787 case VHOST_VSOCK_SET_RUNNING: 788 if (copy_from_user(&start, argp, sizeof(start))) 789 return -EFAULT; 790 if (start) 791 return vhost_vsock_start(vsock); 792 else 793 return vhost_vsock_stop(vsock); 794 case VHOST_GET_FEATURES: 795 features = VHOST_VSOCK_FEATURES; 796 if (copy_to_user(argp, &features, sizeof(features))) 797 return -EFAULT; 798 return 0; 799 case VHOST_SET_FEATURES: 800 if (copy_from_user(&features, argp, sizeof(features))) 801 return -EFAULT; 802 return vhost_vsock_set_features(vsock, features); 803 default: 804 mutex_lock(&vsock->dev.mutex); 805 r = vhost_dev_ioctl(&vsock->dev, ioctl, argp); 806 if (r == -ENOIOCTLCMD) 807 r = vhost_vring_ioctl(&vsock->dev, ioctl, argp); 808 else 809 vhost_vsock_flush(vsock); 810 mutex_unlock(&vsock->dev.mutex); 811 return r; 812 } 813 } 814 815 static const struct file_operations vhost_vsock_fops = { 816 .owner = THIS_MODULE, 817 .open = vhost_vsock_dev_open, 818 .release = vhost_vsock_dev_release, 819 .llseek = noop_llseek, 820 .unlocked_ioctl = vhost_vsock_dev_ioctl, 821 .compat_ioctl = compat_ptr_ioctl, 822 }; 823 824 static struct miscdevice vhost_vsock_misc = { 825 .minor = VHOST_VSOCK_MINOR, 826 .name = "vhost-vsock", 827 .fops = &vhost_vsock_fops, 828 }; 829 830 static int __init vhost_vsock_init(void) 831 { 832 int ret; 833 834 ret = vsock_core_register(&vhost_transport.transport, 835 VSOCK_TRANSPORT_F_H2G); 836 if (ret < 0) 837 return ret; 838 return misc_register(&vhost_vsock_misc); 839 }; 840 841 static void __exit vhost_vsock_exit(void) 842 { 843 misc_deregister(&vhost_vsock_misc); 844 vsock_core_unregister(&vhost_transport.transport); 845 }; 846 847 module_init(vhost_vsock_init); 848 module_exit(vhost_vsock_exit); 849 MODULE_LICENSE("GPL v2"); 850 MODULE_AUTHOR("Asias He"); 851 MODULE_DESCRIPTION("vhost transport for vsock "); 852 MODULE_ALIAS_MISCDEV(VHOST_VSOCK_MINOR); 853 MODULE_ALIAS("devname:vhost-vsock"); 854