1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * common code for virtio vsock 4 * 5 * Copyright (C) 2013-2015 Red Hat, Inc. 6 * Author: Asias He <asias@redhat.com> 7 * Stefan Hajnoczi <stefanha@redhat.com> 8 */ 9 #include <linux/spinlock.h> 10 #include <linux/module.h> 11 #include <linux/sched/signal.h> 12 #include <linux/ctype.h> 13 #include <linux/list.h> 14 #include <linux/virtio_vsock.h> 15 #include <uapi/linux/vsockmon.h> 16 17 #include <net/sock.h> 18 #include <net/af_vsock.h> 19 20 #define CREATE_TRACE_POINTS 21 #include <trace/events/vsock_virtio_transport_common.h> 22 23 /* How long to wait for graceful shutdown of a connection */ 24 #define VSOCK_CLOSE_TIMEOUT (8 * HZ) 25 26 /* Threshold for detecting small packets to copy */ 27 #define GOOD_COPY_LEN 128 28 29 static const struct virtio_transport * 30 virtio_transport_get_ops(struct vsock_sock *vsk) 31 { 32 const struct vsock_transport *t = vsock_core_get_transport(vsk); 33 34 if (WARN_ON(!t)) 35 return NULL; 36 37 return container_of(t, struct virtio_transport, transport); 38 } 39 40 static struct virtio_vsock_pkt * 41 virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, 42 size_t len, 43 u32 src_cid, 44 u32 src_port, 45 u32 dst_cid, 46 u32 dst_port) 47 { 48 struct virtio_vsock_pkt *pkt; 49 int err; 50 51 pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); 52 if (!pkt) 53 return NULL; 54 55 pkt->hdr.type = cpu_to_le16(info->type); 56 pkt->hdr.op = cpu_to_le16(info->op); 57 pkt->hdr.src_cid = cpu_to_le64(src_cid); 58 pkt->hdr.dst_cid = cpu_to_le64(dst_cid); 59 pkt->hdr.src_port = cpu_to_le32(src_port); 60 pkt->hdr.dst_port = cpu_to_le32(dst_port); 61 pkt->hdr.flags = cpu_to_le32(info->flags); 62 pkt->len = len; 63 pkt->hdr.len = cpu_to_le32(len); 64 pkt->reply = info->reply; 65 pkt->vsk = info->vsk; 66 67 if (info->msg && len > 0) { 68 pkt->buf = kmalloc(len, GFP_KERNEL); 69 if (!pkt->buf) 70 goto out_pkt; 71 72 pkt->buf_len = len; 73 74 err = memcpy_from_msg(pkt->buf, info->msg, len); 75 if (err) 76 goto out; 77 78 if (msg_data_left(info->msg) == 0 && 79 info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) 80 pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); 81 } 82 83 trace_virtio_transport_alloc_pkt(src_cid, src_port, 84 dst_cid, dst_port, 85 len, 86 info->type, 87 info->op, 88 info->flags); 89 90 return pkt; 91 92 out: 93 kfree(pkt->buf); 94 out_pkt: 95 kfree(pkt); 96 return NULL; 97 } 98 99 /* Packet capture */ 100 static struct sk_buff *virtio_transport_build_skb(void *opaque) 101 { 102 struct virtio_vsock_pkt *pkt = opaque; 103 struct af_vsockmon_hdr *hdr; 104 struct sk_buff *skb; 105 size_t payload_len; 106 void *payload_buf; 107 108 /* A packet could be split to fit the RX buffer, so we can retrieve 109 * the payload length from the header and the buffer pointer taking 110 * care of the offset in the original packet. 111 */ 112 payload_len = le32_to_cpu(pkt->hdr.len); 113 payload_buf = pkt->buf + pkt->off; 114 115 skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + payload_len, 116 GFP_ATOMIC); 117 if (!skb) 118 return NULL; 119 120 hdr = skb_put(skb, sizeof(*hdr)); 121 122 /* pkt->hdr is little-endian so no need to byteswap here */ 123 hdr->src_cid = pkt->hdr.src_cid; 124 hdr->src_port = pkt->hdr.src_port; 125 hdr->dst_cid = pkt->hdr.dst_cid; 126 hdr->dst_port = pkt->hdr.dst_port; 127 128 hdr->transport = cpu_to_le16(AF_VSOCK_TRANSPORT_VIRTIO); 129 hdr->len = cpu_to_le16(sizeof(pkt->hdr)); 130 memset(hdr->reserved, 0, sizeof(hdr->reserved)); 131 132 switch (le16_to_cpu(pkt->hdr.op)) { 133 case VIRTIO_VSOCK_OP_REQUEST: 134 case VIRTIO_VSOCK_OP_RESPONSE: 135 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONNECT); 136 break; 137 case VIRTIO_VSOCK_OP_RST: 138 case VIRTIO_VSOCK_OP_SHUTDOWN: 139 hdr->op = cpu_to_le16(AF_VSOCK_OP_DISCONNECT); 140 break; 141 case VIRTIO_VSOCK_OP_RW: 142 hdr->op = cpu_to_le16(AF_VSOCK_OP_PAYLOAD); 143 break; 144 case VIRTIO_VSOCK_OP_CREDIT_UPDATE: 145 case VIRTIO_VSOCK_OP_CREDIT_REQUEST: 146 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONTROL); 147 break; 148 default: 149 hdr->op = cpu_to_le16(AF_VSOCK_OP_UNKNOWN); 150 break; 151 } 152 153 skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr)); 154 155 if (payload_len) { 156 skb_put_data(skb, payload_buf, payload_len); 157 } 158 159 return skb; 160 } 161 162 void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt) 163 { 164 if (pkt->tap_delivered) 165 return; 166 167 vsock_deliver_tap(virtio_transport_build_skb, pkt); 168 pkt->tap_delivered = true; 169 } 170 EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); 171 172 static u16 virtio_transport_get_type(struct sock *sk) 173 { 174 if (sk->sk_type == SOCK_STREAM) 175 return VIRTIO_VSOCK_TYPE_STREAM; 176 else 177 return VIRTIO_VSOCK_TYPE_SEQPACKET; 178 } 179 180 /* This function can only be used on connecting/connected sockets, 181 * since a socket assigned to a transport is required. 182 * 183 * Do not use on listener sockets! 184 */ 185 static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, 186 struct virtio_vsock_pkt_info *info) 187 { 188 u32 src_cid, src_port, dst_cid, dst_port; 189 const struct virtio_transport *t_ops; 190 struct virtio_vsock_sock *vvs; 191 struct virtio_vsock_pkt *pkt; 192 u32 pkt_len = info->pkt_len; 193 194 info->type = virtio_transport_get_type(sk_vsock(vsk)); 195 196 t_ops = virtio_transport_get_ops(vsk); 197 if (unlikely(!t_ops)) 198 return -EFAULT; 199 200 src_cid = t_ops->transport.get_local_cid(); 201 src_port = vsk->local_addr.svm_port; 202 if (!info->remote_cid) { 203 dst_cid = vsk->remote_addr.svm_cid; 204 dst_port = vsk->remote_addr.svm_port; 205 } else { 206 dst_cid = info->remote_cid; 207 dst_port = info->remote_port; 208 } 209 210 vvs = vsk->trans; 211 212 /* we can send less than pkt_len bytes */ 213 if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) 214 pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; 215 216 /* virtio_transport_get_credit might return less than pkt_len credit */ 217 pkt_len = virtio_transport_get_credit(vvs, pkt_len); 218 219 /* Do not send zero length OP_RW pkt */ 220 if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) 221 return pkt_len; 222 223 pkt = virtio_transport_alloc_pkt(info, pkt_len, 224 src_cid, src_port, 225 dst_cid, dst_port); 226 if (!pkt) { 227 virtio_transport_put_credit(vvs, pkt_len); 228 return -ENOMEM; 229 } 230 231 virtio_transport_inc_tx_pkt(vvs, pkt); 232 233 return t_ops->send_pkt(pkt); 234 } 235 236 static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, 237 struct virtio_vsock_pkt *pkt) 238 { 239 if (vvs->rx_bytes + pkt->len > vvs->buf_alloc) 240 return false; 241 242 vvs->rx_bytes += pkt->len; 243 return true; 244 } 245 246 static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, 247 struct virtio_vsock_pkt *pkt) 248 { 249 vvs->rx_bytes -= pkt->len; 250 vvs->fwd_cnt += pkt->len; 251 } 252 253 void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt) 254 { 255 spin_lock_bh(&vvs->rx_lock); 256 vvs->last_fwd_cnt = vvs->fwd_cnt; 257 pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt); 258 pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc); 259 spin_unlock_bh(&vvs->rx_lock); 260 } 261 EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); 262 263 u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit) 264 { 265 u32 ret; 266 267 spin_lock_bh(&vvs->tx_lock); 268 ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); 269 if (ret > credit) 270 ret = credit; 271 vvs->tx_cnt += ret; 272 spin_unlock_bh(&vvs->tx_lock); 273 274 return ret; 275 } 276 EXPORT_SYMBOL_GPL(virtio_transport_get_credit); 277 278 void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit) 279 { 280 spin_lock_bh(&vvs->tx_lock); 281 vvs->tx_cnt -= credit; 282 spin_unlock_bh(&vvs->tx_lock); 283 } 284 EXPORT_SYMBOL_GPL(virtio_transport_put_credit); 285 286 static int virtio_transport_send_credit_update(struct vsock_sock *vsk) 287 { 288 struct virtio_vsock_pkt_info info = { 289 .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, 290 .vsk = vsk, 291 }; 292 293 return virtio_transport_send_pkt_info(vsk, &info); 294 } 295 296 static ssize_t 297 virtio_transport_stream_do_peek(struct vsock_sock *vsk, 298 struct msghdr *msg, 299 size_t len) 300 { 301 struct virtio_vsock_sock *vvs = vsk->trans; 302 struct virtio_vsock_pkt *pkt; 303 size_t bytes, total = 0, off; 304 int err = -EFAULT; 305 306 spin_lock_bh(&vvs->rx_lock); 307 308 list_for_each_entry(pkt, &vvs->rx_queue, list) { 309 off = pkt->off; 310 311 if (total == len) 312 break; 313 314 while (total < len && off < pkt->len) { 315 bytes = len - total; 316 if (bytes > pkt->len - off) 317 bytes = pkt->len - off; 318 319 /* sk_lock is held by caller so no one else can dequeue. 320 * Unlock rx_lock since memcpy_to_msg() may sleep. 321 */ 322 spin_unlock_bh(&vvs->rx_lock); 323 324 err = memcpy_to_msg(msg, pkt->buf + off, bytes); 325 if (err) 326 goto out; 327 328 spin_lock_bh(&vvs->rx_lock); 329 330 total += bytes; 331 off += bytes; 332 } 333 } 334 335 spin_unlock_bh(&vvs->rx_lock); 336 337 return total; 338 339 out: 340 if (total) 341 err = total; 342 return err; 343 } 344 345 static ssize_t 346 virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, 347 struct msghdr *msg, 348 size_t len) 349 { 350 struct virtio_vsock_sock *vvs = vsk->trans; 351 struct virtio_vsock_pkt *pkt; 352 size_t bytes, total = 0; 353 u32 free_space; 354 int err = -EFAULT; 355 356 spin_lock_bh(&vvs->rx_lock); 357 while (total < len && !list_empty(&vvs->rx_queue)) { 358 pkt = list_first_entry(&vvs->rx_queue, 359 struct virtio_vsock_pkt, list); 360 361 bytes = len - total; 362 if (bytes > pkt->len - pkt->off) 363 bytes = pkt->len - pkt->off; 364 365 /* sk_lock is held by caller so no one else can dequeue. 366 * Unlock rx_lock since memcpy_to_msg() may sleep. 367 */ 368 spin_unlock_bh(&vvs->rx_lock); 369 370 err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes); 371 if (err) 372 goto out; 373 374 spin_lock_bh(&vvs->rx_lock); 375 376 total += bytes; 377 pkt->off += bytes; 378 if (pkt->off == pkt->len) { 379 virtio_transport_dec_rx_pkt(vvs, pkt); 380 list_del(&pkt->list); 381 virtio_transport_free_pkt(pkt); 382 } 383 } 384 385 free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt); 386 387 spin_unlock_bh(&vvs->rx_lock); 388 389 /* To reduce the number of credit update messages, 390 * don't update credits as long as lots of space is available. 391 * Note: the limit chosen here is arbitrary. Setting the limit 392 * too high causes extra messages. Too low causes transmitter 393 * stalls. As stalls are in theory more expensive than extra 394 * messages, we set the limit to a high value. TODO: experiment 395 * with different values. 396 */ 397 if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) 398 virtio_transport_send_credit_update(vsk); 399 400 return total; 401 402 out: 403 if (total) 404 err = total; 405 return err; 406 } 407 408 static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, 409 struct msghdr *msg, 410 int flags) 411 { 412 struct virtio_vsock_sock *vvs = vsk->trans; 413 struct virtio_vsock_pkt *pkt; 414 int dequeued_len = 0; 415 size_t user_buf_len = msg_data_left(msg); 416 bool msg_ready = false; 417 418 spin_lock_bh(&vvs->rx_lock); 419 420 if (vvs->msg_count == 0) { 421 spin_unlock_bh(&vvs->rx_lock); 422 return 0; 423 } 424 425 while (!msg_ready) { 426 pkt = list_first_entry(&vvs->rx_queue, struct virtio_vsock_pkt, list); 427 428 if (dequeued_len >= 0) { 429 size_t pkt_len; 430 size_t bytes_to_copy; 431 432 pkt_len = (size_t)le32_to_cpu(pkt->hdr.len); 433 bytes_to_copy = min(user_buf_len, pkt_len); 434 435 if (bytes_to_copy) { 436 int err; 437 438 /* sk_lock is held by caller so no one else can dequeue. 439 * Unlock rx_lock since memcpy_to_msg() may sleep. 440 */ 441 spin_unlock_bh(&vvs->rx_lock); 442 443 err = memcpy_to_msg(msg, pkt->buf, bytes_to_copy); 444 if (err) { 445 /* Copy of message failed. Rest of 446 * fragments will be freed without copy. 447 */ 448 dequeued_len = err; 449 } else { 450 user_buf_len -= bytes_to_copy; 451 } 452 453 spin_lock_bh(&vvs->rx_lock); 454 } 455 456 if (dequeued_len >= 0) 457 dequeued_len += pkt_len; 458 } 459 460 if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) { 461 msg_ready = true; 462 vvs->msg_count--; 463 } 464 465 virtio_transport_dec_rx_pkt(vvs, pkt); 466 list_del(&pkt->list); 467 virtio_transport_free_pkt(pkt); 468 } 469 470 spin_unlock_bh(&vvs->rx_lock); 471 472 virtio_transport_send_credit_update(vsk); 473 474 return dequeued_len; 475 } 476 477 ssize_t 478 virtio_transport_stream_dequeue(struct vsock_sock *vsk, 479 struct msghdr *msg, 480 size_t len, int flags) 481 { 482 if (flags & MSG_PEEK) 483 return virtio_transport_stream_do_peek(vsk, msg, len); 484 else 485 return virtio_transport_stream_do_dequeue(vsk, msg, len); 486 } 487 EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); 488 489 ssize_t 490 virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk, 491 struct msghdr *msg, 492 int flags) 493 { 494 if (flags & MSG_PEEK) 495 return -EOPNOTSUPP; 496 497 return virtio_transport_seqpacket_do_dequeue(vsk, msg, flags); 498 } 499 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue); 500 501 int 502 virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk, 503 struct msghdr *msg, 504 size_t len) 505 { 506 struct virtio_vsock_sock *vvs = vsk->trans; 507 508 spin_lock_bh(&vvs->tx_lock); 509 510 if (len > vvs->peer_buf_alloc) { 511 spin_unlock_bh(&vvs->tx_lock); 512 return -EMSGSIZE; 513 } 514 515 spin_unlock_bh(&vvs->tx_lock); 516 517 return virtio_transport_stream_enqueue(vsk, msg, len); 518 } 519 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_enqueue); 520 521 int 522 virtio_transport_dgram_dequeue(struct vsock_sock *vsk, 523 struct msghdr *msg, 524 size_t len, int flags) 525 { 526 return -EOPNOTSUPP; 527 } 528 EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); 529 530 s64 virtio_transport_stream_has_data(struct vsock_sock *vsk) 531 { 532 struct virtio_vsock_sock *vvs = vsk->trans; 533 s64 bytes; 534 535 spin_lock_bh(&vvs->rx_lock); 536 bytes = vvs->rx_bytes; 537 spin_unlock_bh(&vvs->rx_lock); 538 539 return bytes; 540 } 541 EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data); 542 543 u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk) 544 { 545 struct virtio_vsock_sock *vvs = vsk->trans; 546 u32 msg_count; 547 548 spin_lock_bh(&vvs->rx_lock); 549 msg_count = vvs->msg_count; 550 spin_unlock_bh(&vvs->rx_lock); 551 552 return msg_count; 553 } 554 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_has_data); 555 556 static s64 virtio_transport_has_space(struct vsock_sock *vsk) 557 { 558 struct virtio_vsock_sock *vvs = vsk->trans; 559 s64 bytes; 560 561 bytes = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); 562 if (bytes < 0) 563 bytes = 0; 564 565 return bytes; 566 } 567 568 s64 virtio_transport_stream_has_space(struct vsock_sock *vsk) 569 { 570 struct virtio_vsock_sock *vvs = vsk->trans; 571 s64 bytes; 572 573 spin_lock_bh(&vvs->tx_lock); 574 bytes = virtio_transport_has_space(vsk); 575 spin_unlock_bh(&vvs->tx_lock); 576 577 return bytes; 578 } 579 EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space); 580 581 int virtio_transport_do_socket_init(struct vsock_sock *vsk, 582 struct vsock_sock *psk) 583 { 584 struct virtio_vsock_sock *vvs; 585 586 vvs = kzalloc(sizeof(*vvs), GFP_KERNEL); 587 if (!vvs) 588 return -ENOMEM; 589 590 vsk->trans = vvs; 591 vvs->vsk = vsk; 592 if (psk && psk->trans) { 593 struct virtio_vsock_sock *ptrans = psk->trans; 594 595 vvs->peer_buf_alloc = ptrans->peer_buf_alloc; 596 } 597 598 if (vsk->buffer_size > VIRTIO_VSOCK_MAX_BUF_SIZE) 599 vsk->buffer_size = VIRTIO_VSOCK_MAX_BUF_SIZE; 600 601 vvs->buf_alloc = vsk->buffer_size; 602 603 spin_lock_init(&vvs->rx_lock); 604 spin_lock_init(&vvs->tx_lock); 605 INIT_LIST_HEAD(&vvs->rx_queue); 606 607 return 0; 608 } 609 EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init); 610 611 /* sk_lock held by the caller */ 612 void virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val) 613 { 614 struct virtio_vsock_sock *vvs = vsk->trans; 615 616 if (*val > VIRTIO_VSOCK_MAX_BUF_SIZE) 617 *val = VIRTIO_VSOCK_MAX_BUF_SIZE; 618 619 vvs->buf_alloc = *val; 620 621 virtio_transport_send_credit_update(vsk); 622 } 623 EXPORT_SYMBOL_GPL(virtio_transport_notify_buffer_size); 624 625 int 626 virtio_transport_notify_poll_in(struct vsock_sock *vsk, 627 size_t target, 628 bool *data_ready_now) 629 { 630 if (vsock_stream_has_data(vsk)) 631 *data_ready_now = true; 632 else 633 *data_ready_now = false; 634 635 return 0; 636 } 637 EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in); 638 639 int 640 virtio_transport_notify_poll_out(struct vsock_sock *vsk, 641 size_t target, 642 bool *space_avail_now) 643 { 644 s64 free_space; 645 646 free_space = vsock_stream_has_space(vsk); 647 if (free_space > 0) 648 *space_avail_now = true; 649 else if (free_space == 0) 650 *space_avail_now = false; 651 652 return 0; 653 } 654 EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out); 655 656 int virtio_transport_notify_recv_init(struct vsock_sock *vsk, 657 size_t target, struct vsock_transport_recv_notify_data *data) 658 { 659 return 0; 660 } 661 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init); 662 663 int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, 664 size_t target, struct vsock_transport_recv_notify_data *data) 665 { 666 return 0; 667 } 668 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block); 669 670 int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, 671 size_t target, struct vsock_transport_recv_notify_data *data) 672 { 673 return 0; 674 } 675 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue); 676 677 int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, 678 size_t target, ssize_t copied, bool data_read, 679 struct vsock_transport_recv_notify_data *data) 680 { 681 return 0; 682 } 683 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue); 684 685 int virtio_transport_notify_send_init(struct vsock_sock *vsk, 686 struct vsock_transport_send_notify_data *data) 687 { 688 return 0; 689 } 690 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init); 691 692 int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, 693 struct vsock_transport_send_notify_data *data) 694 { 695 return 0; 696 } 697 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block); 698 699 int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, 700 struct vsock_transport_send_notify_data *data) 701 { 702 return 0; 703 } 704 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue); 705 706 int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, 707 ssize_t written, struct vsock_transport_send_notify_data *data) 708 { 709 return 0; 710 } 711 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue); 712 713 u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk) 714 { 715 return vsk->buffer_size; 716 } 717 EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat); 718 719 bool virtio_transport_stream_is_active(struct vsock_sock *vsk) 720 { 721 return true; 722 } 723 EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active); 724 725 bool virtio_transport_stream_allow(u32 cid, u32 port) 726 { 727 return true; 728 } 729 EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); 730 731 int virtio_transport_dgram_bind(struct vsock_sock *vsk, 732 struct sockaddr_vm *addr) 733 { 734 return -EOPNOTSUPP; 735 } 736 EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); 737 738 bool virtio_transport_dgram_allow(u32 cid, u32 port) 739 { 740 return false; 741 } 742 EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); 743 744 int virtio_transport_connect(struct vsock_sock *vsk) 745 { 746 struct virtio_vsock_pkt_info info = { 747 .op = VIRTIO_VSOCK_OP_REQUEST, 748 .vsk = vsk, 749 }; 750 751 return virtio_transport_send_pkt_info(vsk, &info); 752 } 753 EXPORT_SYMBOL_GPL(virtio_transport_connect); 754 755 int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) 756 { 757 struct virtio_vsock_pkt_info info = { 758 .op = VIRTIO_VSOCK_OP_SHUTDOWN, 759 .flags = (mode & RCV_SHUTDOWN ? 760 VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | 761 (mode & SEND_SHUTDOWN ? 762 VIRTIO_VSOCK_SHUTDOWN_SEND : 0), 763 .vsk = vsk, 764 }; 765 766 return virtio_transport_send_pkt_info(vsk, &info); 767 } 768 EXPORT_SYMBOL_GPL(virtio_transport_shutdown); 769 770 int 771 virtio_transport_dgram_enqueue(struct vsock_sock *vsk, 772 struct sockaddr_vm *remote_addr, 773 struct msghdr *msg, 774 size_t dgram_len) 775 { 776 return -EOPNOTSUPP; 777 } 778 EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); 779 780 ssize_t 781 virtio_transport_stream_enqueue(struct vsock_sock *vsk, 782 struct msghdr *msg, 783 size_t len) 784 { 785 struct virtio_vsock_pkt_info info = { 786 .op = VIRTIO_VSOCK_OP_RW, 787 .msg = msg, 788 .pkt_len = len, 789 .vsk = vsk, 790 }; 791 792 return virtio_transport_send_pkt_info(vsk, &info); 793 } 794 EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue); 795 796 void virtio_transport_destruct(struct vsock_sock *vsk) 797 { 798 struct virtio_vsock_sock *vvs = vsk->trans; 799 800 kfree(vvs); 801 } 802 EXPORT_SYMBOL_GPL(virtio_transport_destruct); 803 804 static int virtio_transport_reset(struct vsock_sock *vsk, 805 struct virtio_vsock_pkt *pkt) 806 { 807 struct virtio_vsock_pkt_info info = { 808 .op = VIRTIO_VSOCK_OP_RST, 809 .reply = !!pkt, 810 .vsk = vsk, 811 }; 812 813 /* Send RST only if the original pkt is not a RST pkt */ 814 if (pkt && le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) 815 return 0; 816 817 return virtio_transport_send_pkt_info(vsk, &info); 818 } 819 820 /* Normally packets are associated with a socket. There may be no socket if an 821 * attempt was made to connect to a socket that does not exist. 822 */ 823 static int virtio_transport_reset_no_sock(const struct virtio_transport *t, 824 struct virtio_vsock_pkt *pkt) 825 { 826 struct virtio_vsock_pkt *reply; 827 struct virtio_vsock_pkt_info info = { 828 .op = VIRTIO_VSOCK_OP_RST, 829 .type = le16_to_cpu(pkt->hdr.type), 830 .reply = true, 831 }; 832 833 /* Send RST only if the original pkt is not a RST pkt */ 834 if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) 835 return 0; 836 837 reply = virtio_transport_alloc_pkt(&info, 0, 838 le64_to_cpu(pkt->hdr.dst_cid), 839 le32_to_cpu(pkt->hdr.dst_port), 840 le64_to_cpu(pkt->hdr.src_cid), 841 le32_to_cpu(pkt->hdr.src_port)); 842 if (!reply) 843 return -ENOMEM; 844 845 if (!t) { 846 virtio_transport_free_pkt(reply); 847 return -ENOTCONN; 848 } 849 850 return t->send_pkt(reply); 851 } 852 853 /* This function should be called with sk_lock held and SOCK_DONE set */ 854 static void virtio_transport_remove_sock(struct vsock_sock *vsk) 855 { 856 struct virtio_vsock_sock *vvs = vsk->trans; 857 struct virtio_vsock_pkt *pkt, *tmp; 858 859 /* We don't need to take rx_lock, as the socket is closing and we are 860 * removing it. 861 */ 862 list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) { 863 list_del(&pkt->list); 864 virtio_transport_free_pkt(pkt); 865 } 866 867 vsock_remove_sock(vsk); 868 } 869 870 static void virtio_transport_wait_close(struct sock *sk, long timeout) 871 { 872 if (timeout) { 873 DEFINE_WAIT_FUNC(wait, woken_wake_function); 874 875 add_wait_queue(sk_sleep(sk), &wait); 876 877 do { 878 if (sk_wait_event(sk, &timeout, 879 sock_flag(sk, SOCK_DONE), &wait)) 880 break; 881 } while (!signal_pending(current) && timeout); 882 883 remove_wait_queue(sk_sleep(sk), &wait); 884 } 885 } 886 887 static void virtio_transport_do_close(struct vsock_sock *vsk, 888 bool cancel_timeout) 889 { 890 struct sock *sk = sk_vsock(vsk); 891 892 sock_set_flag(sk, SOCK_DONE); 893 vsk->peer_shutdown = SHUTDOWN_MASK; 894 if (vsock_stream_has_data(vsk) <= 0) 895 sk->sk_state = TCP_CLOSING; 896 sk->sk_state_change(sk); 897 898 if (vsk->close_work_scheduled && 899 (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) { 900 vsk->close_work_scheduled = false; 901 902 virtio_transport_remove_sock(vsk); 903 904 /* Release refcnt obtained when we scheduled the timeout */ 905 sock_put(sk); 906 } 907 } 908 909 static void virtio_transport_close_timeout(struct work_struct *work) 910 { 911 struct vsock_sock *vsk = 912 container_of(work, struct vsock_sock, close_work.work); 913 struct sock *sk = sk_vsock(vsk); 914 915 sock_hold(sk); 916 lock_sock(sk); 917 918 if (!sock_flag(sk, SOCK_DONE)) { 919 (void)virtio_transport_reset(vsk, NULL); 920 921 virtio_transport_do_close(vsk, false); 922 } 923 924 vsk->close_work_scheduled = false; 925 926 release_sock(sk); 927 sock_put(sk); 928 } 929 930 /* User context, vsk->sk is locked */ 931 static bool virtio_transport_close(struct vsock_sock *vsk) 932 { 933 struct sock *sk = &vsk->sk; 934 935 if (!(sk->sk_state == TCP_ESTABLISHED || 936 sk->sk_state == TCP_CLOSING)) 937 return true; 938 939 /* Already received SHUTDOWN from peer, reply with RST */ 940 if ((vsk->peer_shutdown & SHUTDOWN_MASK) == SHUTDOWN_MASK) { 941 (void)virtio_transport_reset(vsk, NULL); 942 return true; 943 } 944 945 if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK) 946 (void)virtio_transport_shutdown(vsk, SHUTDOWN_MASK); 947 948 if (sock_flag(sk, SOCK_LINGER) && !(current->flags & PF_EXITING)) 949 virtio_transport_wait_close(sk, sk->sk_lingertime); 950 951 if (sock_flag(sk, SOCK_DONE)) { 952 return true; 953 } 954 955 sock_hold(sk); 956 INIT_DELAYED_WORK(&vsk->close_work, 957 virtio_transport_close_timeout); 958 vsk->close_work_scheduled = true; 959 schedule_delayed_work(&vsk->close_work, VSOCK_CLOSE_TIMEOUT); 960 return false; 961 } 962 963 void virtio_transport_release(struct vsock_sock *vsk) 964 { 965 struct sock *sk = &vsk->sk; 966 bool remove_sock = true; 967 968 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) 969 remove_sock = virtio_transport_close(vsk); 970 971 if (remove_sock) { 972 sock_set_flag(sk, SOCK_DONE); 973 virtio_transport_remove_sock(vsk); 974 } 975 } 976 EXPORT_SYMBOL_GPL(virtio_transport_release); 977 978 static int 979 virtio_transport_recv_connecting(struct sock *sk, 980 struct virtio_vsock_pkt *pkt) 981 { 982 struct vsock_sock *vsk = vsock_sk(sk); 983 int err; 984 int skerr; 985 986 switch (le16_to_cpu(pkt->hdr.op)) { 987 case VIRTIO_VSOCK_OP_RESPONSE: 988 sk->sk_state = TCP_ESTABLISHED; 989 sk->sk_socket->state = SS_CONNECTED; 990 vsock_insert_connected(vsk); 991 sk->sk_state_change(sk); 992 break; 993 case VIRTIO_VSOCK_OP_INVALID: 994 break; 995 case VIRTIO_VSOCK_OP_RST: 996 skerr = ECONNRESET; 997 err = 0; 998 goto destroy; 999 default: 1000 skerr = EPROTO; 1001 err = -EINVAL; 1002 goto destroy; 1003 } 1004 return 0; 1005 1006 destroy: 1007 virtio_transport_reset(vsk, pkt); 1008 sk->sk_state = TCP_CLOSE; 1009 sk->sk_err = skerr; 1010 sk_error_report(sk); 1011 return err; 1012 } 1013 1014 static void 1015 virtio_transport_recv_enqueue(struct vsock_sock *vsk, 1016 struct virtio_vsock_pkt *pkt) 1017 { 1018 struct virtio_vsock_sock *vvs = vsk->trans; 1019 bool can_enqueue, free_pkt = false; 1020 1021 pkt->len = le32_to_cpu(pkt->hdr.len); 1022 pkt->off = 0; 1023 1024 spin_lock_bh(&vvs->rx_lock); 1025 1026 can_enqueue = virtio_transport_inc_rx_pkt(vvs, pkt); 1027 if (!can_enqueue) { 1028 free_pkt = true; 1029 goto out; 1030 } 1031 1032 if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) 1033 vvs->msg_count++; 1034 1035 /* Try to copy small packets into the buffer of last packet queued, 1036 * to avoid wasting memory queueing the entire buffer with a small 1037 * payload. 1038 */ 1039 if (pkt->len <= GOOD_COPY_LEN && !list_empty(&vvs->rx_queue)) { 1040 struct virtio_vsock_pkt *last_pkt; 1041 1042 last_pkt = list_last_entry(&vvs->rx_queue, 1043 struct virtio_vsock_pkt, list); 1044 1045 /* If there is space in the last packet queued, we copy the 1046 * new packet in its buffer. We avoid this if the last packet 1047 * queued has VIRTIO_VSOCK_SEQ_EOR set, because this is 1048 * delimiter of SEQPACKET record, so 'pkt' is the first packet 1049 * of a new record. 1050 */ 1051 if ((pkt->len <= last_pkt->buf_len - last_pkt->len) && 1052 !(le32_to_cpu(last_pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR)) { 1053 memcpy(last_pkt->buf + last_pkt->len, pkt->buf, 1054 pkt->len); 1055 last_pkt->len += pkt->len; 1056 free_pkt = true; 1057 last_pkt->hdr.flags |= pkt->hdr.flags; 1058 goto out; 1059 } 1060 } 1061 1062 list_add_tail(&pkt->list, &vvs->rx_queue); 1063 1064 out: 1065 spin_unlock_bh(&vvs->rx_lock); 1066 if (free_pkt) 1067 virtio_transport_free_pkt(pkt); 1068 } 1069 1070 static int 1071 virtio_transport_recv_connected(struct sock *sk, 1072 struct virtio_vsock_pkt *pkt) 1073 { 1074 struct vsock_sock *vsk = vsock_sk(sk); 1075 int err = 0; 1076 1077 switch (le16_to_cpu(pkt->hdr.op)) { 1078 case VIRTIO_VSOCK_OP_RW: 1079 virtio_transport_recv_enqueue(vsk, pkt); 1080 sk->sk_data_ready(sk); 1081 return err; 1082 case VIRTIO_VSOCK_OP_CREDIT_UPDATE: 1083 sk->sk_write_space(sk); 1084 break; 1085 case VIRTIO_VSOCK_OP_SHUTDOWN: 1086 if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) 1087 vsk->peer_shutdown |= RCV_SHUTDOWN; 1088 if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) 1089 vsk->peer_shutdown |= SEND_SHUTDOWN; 1090 if (vsk->peer_shutdown == SHUTDOWN_MASK && 1091 vsock_stream_has_data(vsk) <= 0 && 1092 !sock_flag(sk, SOCK_DONE)) { 1093 (void)virtio_transport_reset(vsk, NULL); 1094 1095 virtio_transport_do_close(vsk, true); 1096 } 1097 if (le32_to_cpu(pkt->hdr.flags)) 1098 sk->sk_state_change(sk); 1099 break; 1100 case VIRTIO_VSOCK_OP_RST: 1101 virtio_transport_do_close(vsk, true); 1102 break; 1103 default: 1104 err = -EINVAL; 1105 break; 1106 } 1107 1108 virtio_transport_free_pkt(pkt); 1109 return err; 1110 } 1111 1112 static void 1113 virtio_transport_recv_disconnecting(struct sock *sk, 1114 struct virtio_vsock_pkt *pkt) 1115 { 1116 struct vsock_sock *vsk = vsock_sk(sk); 1117 1118 if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) 1119 virtio_transport_do_close(vsk, true); 1120 } 1121 1122 static int 1123 virtio_transport_send_response(struct vsock_sock *vsk, 1124 struct virtio_vsock_pkt *pkt) 1125 { 1126 struct virtio_vsock_pkt_info info = { 1127 .op = VIRTIO_VSOCK_OP_RESPONSE, 1128 .remote_cid = le64_to_cpu(pkt->hdr.src_cid), 1129 .remote_port = le32_to_cpu(pkt->hdr.src_port), 1130 .reply = true, 1131 .vsk = vsk, 1132 }; 1133 1134 return virtio_transport_send_pkt_info(vsk, &info); 1135 } 1136 1137 static bool virtio_transport_space_update(struct sock *sk, 1138 struct virtio_vsock_pkt *pkt) 1139 { 1140 struct vsock_sock *vsk = vsock_sk(sk); 1141 struct virtio_vsock_sock *vvs = vsk->trans; 1142 bool space_available; 1143 1144 /* Listener sockets are not associated with any transport, so we are 1145 * not able to take the state to see if there is space available in the 1146 * remote peer, but since they are only used to receive requests, we 1147 * can assume that there is always space available in the other peer. 1148 */ 1149 if (!vvs) 1150 return true; 1151 1152 /* buf_alloc and fwd_cnt is always included in the hdr */ 1153 spin_lock_bh(&vvs->tx_lock); 1154 vvs->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc); 1155 vvs->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt); 1156 space_available = virtio_transport_has_space(vsk); 1157 spin_unlock_bh(&vvs->tx_lock); 1158 return space_available; 1159 } 1160 1161 /* Handle server socket */ 1162 static int 1163 virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt, 1164 struct virtio_transport *t) 1165 { 1166 struct vsock_sock *vsk = vsock_sk(sk); 1167 struct vsock_sock *vchild; 1168 struct sock *child; 1169 int ret; 1170 1171 if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_REQUEST) { 1172 virtio_transport_reset_no_sock(t, pkt); 1173 return -EINVAL; 1174 } 1175 1176 if (sk_acceptq_is_full(sk)) { 1177 virtio_transport_reset_no_sock(t, pkt); 1178 return -ENOMEM; 1179 } 1180 1181 child = vsock_create_connected(sk); 1182 if (!child) { 1183 virtio_transport_reset_no_sock(t, pkt); 1184 return -ENOMEM; 1185 } 1186 1187 sk_acceptq_added(sk); 1188 1189 lock_sock_nested(child, SINGLE_DEPTH_NESTING); 1190 1191 child->sk_state = TCP_ESTABLISHED; 1192 1193 vchild = vsock_sk(child); 1194 vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid), 1195 le32_to_cpu(pkt->hdr.dst_port)); 1196 vsock_addr_init(&vchild->remote_addr, le64_to_cpu(pkt->hdr.src_cid), 1197 le32_to_cpu(pkt->hdr.src_port)); 1198 1199 ret = vsock_assign_transport(vchild, vsk); 1200 /* Transport assigned (looking at remote_addr) must be the same 1201 * where we received the request. 1202 */ 1203 if (ret || vchild->transport != &t->transport) { 1204 release_sock(child); 1205 virtio_transport_reset_no_sock(t, pkt); 1206 sock_put(child); 1207 return ret; 1208 } 1209 1210 if (virtio_transport_space_update(child, pkt)) 1211 child->sk_write_space(child); 1212 1213 vsock_insert_connected(vchild); 1214 vsock_enqueue_accept(sk, child); 1215 virtio_transport_send_response(vchild, pkt); 1216 1217 release_sock(child); 1218 1219 sk->sk_data_ready(sk); 1220 return 0; 1221 } 1222 1223 static bool virtio_transport_valid_type(u16 type) 1224 { 1225 return (type == VIRTIO_VSOCK_TYPE_STREAM) || 1226 (type == VIRTIO_VSOCK_TYPE_SEQPACKET); 1227 } 1228 1229 /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex 1230 * lock. 1231 */ 1232 void virtio_transport_recv_pkt(struct virtio_transport *t, 1233 struct virtio_vsock_pkt *pkt) 1234 { 1235 struct sockaddr_vm src, dst; 1236 struct vsock_sock *vsk; 1237 struct sock *sk; 1238 bool space_available; 1239 1240 vsock_addr_init(&src, le64_to_cpu(pkt->hdr.src_cid), 1241 le32_to_cpu(pkt->hdr.src_port)); 1242 vsock_addr_init(&dst, le64_to_cpu(pkt->hdr.dst_cid), 1243 le32_to_cpu(pkt->hdr.dst_port)); 1244 1245 trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port, 1246 dst.svm_cid, dst.svm_port, 1247 le32_to_cpu(pkt->hdr.len), 1248 le16_to_cpu(pkt->hdr.type), 1249 le16_to_cpu(pkt->hdr.op), 1250 le32_to_cpu(pkt->hdr.flags), 1251 le32_to_cpu(pkt->hdr.buf_alloc), 1252 le32_to_cpu(pkt->hdr.fwd_cnt)); 1253 1254 if (!virtio_transport_valid_type(le16_to_cpu(pkt->hdr.type))) { 1255 (void)virtio_transport_reset_no_sock(t, pkt); 1256 goto free_pkt; 1257 } 1258 1259 /* The socket must be in connected or bound table 1260 * otherwise send reset back 1261 */ 1262 sk = vsock_find_connected_socket(&src, &dst); 1263 if (!sk) { 1264 sk = vsock_find_bound_socket(&dst); 1265 if (!sk) { 1266 (void)virtio_transport_reset_no_sock(t, pkt); 1267 goto free_pkt; 1268 } 1269 } 1270 1271 if (virtio_transport_get_type(sk) != le16_to_cpu(pkt->hdr.type)) { 1272 (void)virtio_transport_reset_no_sock(t, pkt); 1273 sock_put(sk); 1274 goto free_pkt; 1275 } 1276 1277 vsk = vsock_sk(sk); 1278 1279 lock_sock(sk); 1280 1281 /* Check if sk has been closed before lock_sock */ 1282 if (sock_flag(sk, SOCK_DONE)) { 1283 (void)virtio_transport_reset_no_sock(t, pkt); 1284 release_sock(sk); 1285 sock_put(sk); 1286 goto free_pkt; 1287 } 1288 1289 space_available = virtio_transport_space_update(sk, pkt); 1290 1291 /* Update CID in case it has changed after a transport reset event */ 1292 vsk->local_addr.svm_cid = dst.svm_cid; 1293 1294 if (space_available) 1295 sk->sk_write_space(sk); 1296 1297 switch (sk->sk_state) { 1298 case TCP_LISTEN: 1299 virtio_transport_recv_listen(sk, pkt, t); 1300 virtio_transport_free_pkt(pkt); 1301 break; 1302 case TCP_SYN_SENT: 1303 virtio_transport_recv_connecting(sk, pkt); 1304 virtio_transport_free_pkt(pkt); 1305 break; 1306 case TCP_ESTABLISHED: 1307 virtio_transport_recv_connected(sk, pkt); 1308 break; 1309 case TCP_CLOSING: 1310 virtio_transport_recv_disconnecting(sk, pkt); 1311 virtio_transport_free_pkt(pkt); 1312 break; 1313 default: 1314 (void)virtio_transport_reset_no_sock(t, pkt); 1315 virtio_transport_free_pkt(pkt); 1316 break; 1317 } 1318 1319 release_sock(sk); 1320 1321 /* Release refcnt obtained when we fetched this socket out of the 1322 * bound or connected list. 1323 */ 1324 sock_put(sk); 1325 return; 1326 1327 free_pkt: 1328 virtio_transport_free_pkt(pkt); 1329 } 1330 EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt); 1331 1332 void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt) 1333 { 1334 kfree(pkt->buf); 1335 kfree(pkt); 1336 } 1337 EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); 1338 1339 MODULE_LICENSE("GPL v2"); 1340 MODULE_AUTHOR("Asias He"); 1341 MODULE_DESCRIPTION("common code for virtio vsock"); 1342