1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * common code for virtio vsock 4 * 5 * Copyright (C) 2013-2015 Red Hat, Inc. 6 * Author: Asias He <asias@redhat.com> 7 * Stefan Hajnoczi <stefanha@redhat.com> 8 */ 9 #include <linux/spinlock.h> 10 #include <linux/module.h> 11 #include <linux/sched/signal.h> 12 #include <linux/ctype.h> 13 #include <linux/list.h> 14 #include <linux/virtio_vsock.h> 15 #include <uapi/linux/vsockmon.h> 16 17 #include <net/sock.h> 18 #include <net/af_vsock.h> 19 20 #define CREATE_TRACE_POINTS 21 #include <trace/events/vsock_virtio_transport_common.h> 22 23 /* How long to wait for graceful shutdown of a connection */ 24 #define VSOCK_CLOSE_TIMEOUT (8 * HZ) 25 26 /* Threshold for detecting small packets to copy */ 27 #define GOOD_COPY_LEN 128 28 29 static const struct virtio_transport * 30 virtio_transport_get_ops(struct vsock_sock *vsk) 31 { 32 const struct vsock_transport *t = vsock_core_get_transport(vsk); 33 34 if (WARN_ON(!t)) 35 return NULL; 36 37 return container_of(t, struct virtio_transport, transport); 38 } 39 40 static struct virtio_vsock_pkt * 41 virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, 42 size_t len, 43 u32 src_cid, 44 u32 src_port, 45 u32 dst_cid, 46 u32 dst_port) 47 { 48 struct virtio_vsock_pkt *pkt; 49 int err; 50 51 pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); 52 if (!pkt) 53 return NULL; 54 55 pkt->hdr.type = cpu_to_le16(info->type); 56 pkt->hdr.op = cpu_to_le16(info->op); 57 pkt->hdr.src_cid = cpu_to_le64(src_cid); 58 pkt->hdr.dst_cid = cpu_to_le64(dst_cid); 59 pkt->hdr.src_port = cpu_to_le32(src_port); 60 pkt->hdr.dst_port = cpu_to_le32(dst_port); 61 pkt->hdr.flags = cpu_to_le32(info->flags); 62 pkt->len = len; 63 pkt->hdr.len = cpu_to_le32(len); 64 pkt->reply = info->reply; 65 pkt->vsk = info->vsk; 66 67 if (info->msg && len > 0) { 68 pkt->buf = kmalloc(len, GFP_KERNEL); 69 if (!pkt->buf) 70 goto out_pkt; 71 72 pkt->buf_len = len; 73 74 err = memcpy_from_msg(pkt->buf, info->msg, len); 75 if (err) 76 goto out; 77 } 78 79 trace_virtio_transport_alloc_pkt(src_cid, src_port, 80 dst_cid, dst_port, 81 len, 82 info->type, 83 info->op, 84 info->flags); 85 86 return pkt; 87 88 out: 89 kfree(pkt->buf); 90 out_pkt: 91 kfree(pkt); 92 return NULL; 93 } 94 95 /* Packet capture */ 96 static struct sk_buff *virtio_transport_build_skb(void *opaque) 97 { 98 struct virtio_vsock_pkt *pkt = opaque; 99 struct af_vsockmon_hdr *hdr; 100 struct sk_buff *skb; 101 size_t payload_len; 102 void *payload_buf; 103 104 /* A packet could be split to fit the RX buffer, so we can retrieve 105 * the payload length from the header and the buffer pointer taking 106 * care of the offset in the original packet. 107 */ 108 payload_len = le32_to_cpu(pkt->hdr.len); 109 payload_buf = pkt->buf + pkt->off; 110 111 skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + payload_len, 112 GFP_ATOMIC); 113 if (!skb) 114 return NULL; 115 116 hdr = skb_put(skb, sizeof(*hdr)); 117 118 /* pkt->hdr is little-endian so no need to byteswap here */ 119 hdr->src_cid = pkt->hdr.src_cid; 120 hdr->src_port = pkt->hdr.src_port; 121 hdr->dst_cid = pkt->hdr.dst_cid; 122 hdr->dst_port = pkt->hdr.dst_port; 123 124 hdr->transport = cpu_to_le16(AF_VSOCK_TRANSPORT_VIRTIO); 125 hdr->len = cpu_to_le16(sizeof(pkt->hdr)); 126 memset(hdr->reserved, 0, sizeof(hdr->reserved)); 127 128 switch (le16_to_cpu(pkt->hdr.op)) { 129 case VIRTIO_VSOCK_OP_REQUEST: 130 case VIRTIO_VSOCK_OP_RESPONSE: 131 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONNECT); 132 break; 133 case VIRTIO_VSOCK_OP_RST: 134 case VIRTIO_VSOCK_OP_SHUTDOWN: 135 hdr->op = cpu_to_le16(AF_VSOCK_OP_DISCONNECT); 136 break; 137 case VIRTIO_VSOCK_OP_RW: 138 hdr->op = cpu_to_le16(AF_VSOCK_OP_PAYLOAD); 139 break; 140 case VIRTIO_VSOCK_OP_CREDIT_UPDATE: 141 case VIRTIO_VSOCK_OP_CREDIT_REQUEST: 142 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONTROL); 143 break; 144 default: 145 hdr->op = cpu_to_le16(AF_VSOCK_OP_UNKNOWN); 146 break; 147 } 148 149 skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr)); 150 151 if (payload_len) { 152 skb_put_data(skb, payload_buf, payload_len); 153 } 154 155 return skb; 156 } 157 158 void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt) 159 { 160 vsock_deliver_tap(virtio_transport_build_skb, pkt); 161 } 162 EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); 163 164 /* This function can only be used on connecting/connected sockets, 165 * since a socket assigned to a transport is required. 166 * 167 * Do not use on listener sockets! 168 */ 169 static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, 170 struct virtio_vsock_pkt_info *info) 171 { 172 u32 src_cid, src_port, dst_cid, dst_port; 173 const struct virtio_transport *t_ops; 174 struct virtio_vsock_sock *vvs; 175 struct virtio_vsock_pkt *pkt; 176 u32 pkt_len = info->pkt_len; 177 178 t_ops = virtio_transport_get_ops(vsk); 179 if (unlikely(!t_ops)) 180 return -EFAULT; 181 182 src_cid = t_ops->transport.get_local_cid(); 183 src_port = vsk->local_addr.svm_port; 184 if (!info->remote_cid) { 185 dst_cid = vsk->remote_addr.svm_cid; 186 dst_port = vsk->remote_addr.svm_port; 187 } else { 188 dst_cid = info->remote_cid; 189 dst_port = info->remote_port; 190 } 191 192 vvs = vsk->trans; 193 194 /* we can send less than pkt_len bytes */ 195 if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) 196 pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; 197 198 /* virtio_transport_get_credit might return less than pkt_len credit */ 199 pkt_len = virtio_transport_get_credit(vvs, pkt_len); 200 201 /* Do not send zero length OP_RW pkt */ 202 if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) 203 return pkt_len; 204 205 pkt = virtio_transport_alloc_pkt(info, pkt_len, 206 src_cid, src_port, 207 dst_cid, dst_port); 208 if (!pkt) { 209 virtio_transport_put_credit(vvs, pkt_len); 210 return -ENOMEM; 211 } 212 213 virtio_transport_inc_tx_pkt(vvs, pkt); 214 215 return t_ops->send_pkt(pkt); 216 } 217 218 static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, 219 struct virtio_vsock_pkt *pkt) 220 { 221 if (vvs->rx_bytes + pkt->len > vvs->buf_alloc) 222 return false; 223 224 vvs->rx_bytes += pkt->len; 225 return true; 226 } 227 228 static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, 229 struct virtio_vsock_pkt *pkt) 230 { 231 vvs->rx_bytes -= pkt->len; 232 vvs->fwd_cnt += pkt->len; 233 } 234 235 void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt) 236 { 237 spin_lock_bh(&vvs->rx_lock); 238 vvs->last_fwd_cnt = vvs->fwd_cnt; 239 pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt); 240 pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc); 241 spin_unlock_bh(&vvs->rx_lock); 242 } 243 EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); 244 245 u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit) 246 { 247 u32 ret; 248 249 spin_lock_bh(&vvs->tx_lock); 250 ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); 251 if (ret > credit) 252 ret = credit; 253 vvs->tx_cnt += ret; 254 spin_unlock_bh(&vvs->tx_lock); 255 256 return ret; 257 } 258 EXPORT_SYMBOL_GPL(virtio_transport_get_credit); 259 260 void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit) 261 { 262 spin_lock_bh(&vvs->tx_lock); 263 vvs->tx_cnt -= credit; 264 spin_unlock_bh(&vvs->tx_lock); 265 } 266 EXPORT_SYMBOL_GPL(virtio_transport_put_credit); 267 268 static int virtio_transport_send_credit_update(struct vsock_sock *vsk, 269 int type, 270 struct virtio_vsock_hdr *hdr) 271 { 272 struct virtio_vsock_pkt_info info = { 273 .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, 274 .type = type, 275 .vsk = vsk, 276 }; 277 278 return virtio_transport_send_pkt_info(vsk, &info); 279 } 280 281 static ssize_t 282 virtio_transport_stream_do_peek(struct vsock_sock *vsk, 283 struct msghdr *msg, 284 size_t len) 285 { 286 struct virtio_vsock_sock *vvs = vsk->trans; 287 struct virtio_vsock_pkt *pkt; 288 size_t bytes, total = 0, off; 289 int err = -EFAULT; 290 291 spin_lock_bh(&vvs->rx_lock); 292 293 list_for_each_entry(pkt, &vvs->rx_queue, list) { 294 off = pkt->off; 295 296 if (total == len) 297 break; 298 299 while (total < len && off < pkt->len) { 300 bytes = len - total; 301 if (bytes > pkt->len - off) 302 bytes = pkt->len - off; 303 304 /* sk_lock is held by caller so no one else can dequeue. 305 * Unlock rx_lock since memcpy_to_msg() may sleep. 306 */ 307 spin_unlock_bh(&vvs->rx_lock); 308 309 err = memcpy_to_msg(msg, pkt->buf + off, bytes); 310 if (err) 311 goto out; 312 313 spin_lock_bh(&vvs->rx_lock); 314 315 total += bytes; 316 off += bytes; 317 } 318 } 319 320 spin_unlock_bh(&vvs->rx_lock); 321 322 return total; 323 324 out: 325 if (total) 326 err = total; 327 return err; 328 } 329 330 static ssize_t 331 virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, 332 struct msghdr *msg, 333 size_t len) 334 { 335 struct virtio_vsock_sock *vvs = vsk->trans; 336 struct virtio_vsock_pkt *pkt; 337 size_t bytes, total = 0; 338 u32 free_space; 339 int err = -EFAULT; 340 341 spin_lock_bh(&vvs->rx_lock); 342 while (total < len && !list_empty(&vvs->rx_queue)) { 343 pkt = list_first_entry(&vvs->rx_queue, 344 struct virtio_vsock_pkt, list); 345 346 bytes = len - total; 347 if (bytes > pkt->len - pkt->off) 348 bytes = pkt->len - pkt->off; 349 350 /* sk_lock is held by caller so no one else can dequeue. 351 * Unlock rx_lock since memcpy_to_msg() may sleep. 352 */ 353 spin_unlock_bh(&vvs->rx_lock); 354 355 err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes); 356 if (err) 357 goto out; 358 359 spin_lock_bh(&vvs->rx_lock); 360 361 total += bytes; 362 pkt->off += bytes; 363 if (pkt->off == pkt->len) { 364 virtio_transport_dec_rx_pkt(vvs, pkt); 365 list_del(&pkt->list); 366 virtio_transport_free_pkt(pkt); 367 } 368 } 369 370 free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt); 371 372 spin_unlock_bh(&vvs->rx_lock); 373 374 /* To reduce the number of credit update messages, 375 * don't update credits as long as lots of space is available. 376 * Note: the limit chosen here is arbitrary. Setting the limit 377 * too high causes extra messages. Too low causes transmitter 378 * stalls. As stalls are in theory more expensive than extra 379 * messages, we set the limit to a high value. TODO: experiment 380 * with different values. 381 */ 382 if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { 383 virtio_transport_send_credit_update(vsk, 384 VIRTIO_VSOCK_TYPE_STREAM, 385 NULL); 386 } 387 388 return total; 389 390 out: 391 if (total) 392 err = total; 393 return err; 394 } 395 396 ssize_t 397 virtio_transport_stream_dequeue(struct vsock_sock *vsk, 398 struct msghdr *msg, 399 size_t len, int flags) 400 { 401 if (flags & MSG_PEEK) 402 return virtio_transport_stream_do_peek(vsk, msg, len); 403 else 404 return virtio_transport_stream_do_dequeue(vsk, msg, len); 405 } 406 EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); 407 408 int 409 virtio_transport_dgram_dequeue(struct vsock_sock *vsk, 410 struct msghdr *msg, 411 size_t len, int flags) 412 { 413 return -EOPNOTSUPP; 414 } 415 EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); 416 417 s64 virtio_transport_stream_has_data(struct vsock_sock *vsk) 418 { 419 struct virtio_vsock_sock *vvs = vsk->trans; 420 s64 bytes; 421 422 spin_lock_bh(&vvs->rx_lock); 423 bytes = vvs->rx_bytes; 424 spin_unlock_bh(&vvs->rx_lock); 425 426 return bytes; 427 } 428 EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data); 429 430 static s64 virtio_transport_has_space(struct vsock_sock *vsk) 431 { 432 struct virtio_vsock_sock *vvs = vsk->trans; 433 s64 bytes; 434 435 bytes = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); 436 if (bytes < 0) 437 bytes = 0; 438 439 return bytes; 440 } 441 442 s64 virtio_transport_stream_has_space(struct vsock_sock *vsk) 443 { 444 struct virtio_vsock_sock *vvs = vsk->trans; 445 s64 bytes; 446 447 spin_lock_bh(&vvs->tx_lock); 448 bytes = virtio_transport_has_space(vsk); 449 spin_unlock_bh(&vvs->tx_lock); 450 451 return bytes; 452 } 453 EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space); 454 455 int virtio_transport_do_socket_init(struct vsock_sock *vsk, 456 struct vsock_sock *psk) 457 { 458 struct virtio_vsock_sock *vvs; 459 460 vvs = kzalloc(sizeof(*vvs), GFP_KERNEL); 461 if (!vvs) 462 return -ENOMEM; 463 464 vsk->trans = vvs; 465 vvs->vsk = vsk; 466 if (psk && psk->trans) { 467 struct virtio_vsock_sock *ptrans = psk->trans; 468 469 vvs->peer_buf_alloc = ptrans->peer_buf_alloc; 470 } 471 472 if (vsk->buffer_size > VIRTIO_VSOCK_MAX_BUF_SIZE) 473 vsk->buffer_size = VIRTIO_VSOCK_MAX_BUF_SIZE; 474 475 vvs->buf_alloc = vsk->buffer_size; 476 477 spin_lock_init(&vvs->rx_lock); 478 spin_lock_init(&vvs->tx_lock); 479 INIT_LIST_HEAD(&vvs->rx_queue); 480 481 return 0; 482 } 483 EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init); 484 485 /* sk_lock held by the caller */ 486 void virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val) 487 { 488 struct virtio_vsock_sock *vvs = vsk->trans; 489 490 if (*val > VIRTIO_VSOCK_MAX_BUF_SIZE) 491 *val = VIRTIO_VSOCK_MAX_BUF_SIZE; 492 493 vvs->buf_alloc = *val; 494 495 virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM, 496 NULL); 497 } 498 EXPORT_SYMBOL_GPL(virtio_transport_notify_buffer_size); 499 500 int 501 virtio_transport_notify_poll_in(struct vsock_sock *vsk, 502 size_t target, 503 bool *data_ready_now) 504 { 505 if (vsock_stream_has_data(vsk)) 506 *data_ready_now = true; 507 else 508 *data_ready_now = false; 509 510 return 0; 511 } 512 EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in); 513 514 int 515 virtio_transport_notify_poll_out(struct vsock_sock *vsk, 516 size_t target, 517 bool *space_avail_now) 518 { 519 s64 free_space; 520 521 free_space = vsock_stream_has_space(vsk); 522 if (free_space > 0) 523 *space_avail_now = true; 524 else if (free_space == 0) 525 *space_avail_now = false; 526 527 return 0; 528 } 529 EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out); 530 531 int virtio_transport_notify_recv_init(struct vsock_sock *vsk, 532 size_t target, struct vsock_transport_recv_notify_data *data) 533 { 534 return 0; 535 } 536 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init); 537 538 int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, 539 size_t target, struct vsock_transport_recv_notify_data *data) 540 { 541 return 0; 542 } 543 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block); 544 545 int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, 546 size_t target, struct vsock_transport_recv_notify_data *data) 547 { 548 return 0; 549 } 550 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue); 551 552 int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, 553 size_t target, ssize_t copied, bool data_read, 554 struct vsock_transport_recv_notify_data *data) 555 { 556 return 0; 557 } 558 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue); 559 560 int virtio_transport_notify_send_init(struct vsock_sock *vsk, 561 struct vsock_transport_send_notify_data *data) 562 { 563 return 0; 564 } 565 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init); 566 567 int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, 568 struct vsock_transport_send_notify_data *data) 569 { 570 return 0; 571 } 572 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block); 573 574 int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, 575 struct vsock_transport_send_notify_data *data) 576 { 577 return 0; 578 } 579 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue); 580 581 int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, 582 ssize_t written, struct vsock_transport_send_notify_data *data) 583 { 584 return 0; 585 } 586 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue); 587 588 u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk) 589 { 590 return vsk->buffer_size; 591 } 592 EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat); 593 594 bool virtio_transport_stream_is_active(struct vsock_sock *vsk) 595 { 596 return true; 597 } 598 EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active); 599 600 bool virtio_transport_stream_allow(u32 cid, u32 port) 601 { 602 return true; 603 } 604 EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); 605 606 int virtio_transport_dgram_bind(struct vsock_sock *vsk, 607 struct sockaddr_vm *addr) 608 { 609 return -EOPNOTSUPP; 610 } 611 EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); 612 613 bool virtio_transport_dgram_allow(u32 cid, u32 port) 614 { 615 return false; 616 } 617 EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); 618 619 int virtio_transport_connect(struct vsock_sock *vsk) 620 { 621 struct virtio_vsock_pkt_info info = { 622 .op = VIRTIO_VSOCK_OP_REQUEST, 623 .type = VIRTIO_VSOCK_TYPE_STREAM, 624 .vsk = vsk, 625 }; 626 627 return virtio_transport_send_pkt_info(vsk, &info); 628 } 629 EXPORT_SYMBOL_GPL(virtio_transport_connect); 630 631 int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) 632 { 633 struct virtio_vsock_pkt_info info = { 634 .op = VIRTIO_VSOCK_OP_SHUTDOWN, 635 .type = VIRTIO_VSOCK_TYPE_STREAM, 636 .flags = (mode & RCV_SHUTDOWN ? 637 VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | 638 (mode & SEND_SHUTDOWN ? 639 VIRTIO_VSOCK_SHUTDOWN_SEND : 0), 640 .vsk = vsk, 641 }; 642 643 return virtio_transport_send_pkt_info(vsk, &info); 644 } 645 EXPORT_SYMBOL_GPL(virtio_transport_shutdown); 646 647 int 648 virtio_transport_dgram_enqueue(struct vsock_sock *vsk, 649 struct sockaddr_vm *remote_addr, 650 struct msghdr *msg, 651 size_t dgram_len) 652 { 653 return -EOPNOTSUPP; 654 } 655 EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); 656 657 ssize_t 658 virtio_transport_stream_enqueue(struct vsock_sock *vsk, 659 struct msghdr *msg, 660 size_t len) 661 { 662 struct virtio_vsock_pkt_info info = { 663 .op = VIRTIO_VSOCK_OP_RW, 664 .type = VIRTIO_VSOCK_TYPE_STREAM, 665 .msg = msg, 666 .pkt_len = len, 667 .vsk = vsk, 668 }; 669 670 return virtio_transport_send_pkt_info(vsk, &info); 671 } 672 EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue); 673 674 void virtio_transport_destruct(struct vsock_sock *vsk) 675 { 676 struct virtio_vsock_sock *vvs = vsk->trans; 677 678 kfree(vvs); 679 } 680 EXPORT_SYMBOL_GPL(virtio_transport_destruct); 681 682 static int virtio_transport_reset(struct vsock_sock *vsk, 683 struct virtio_vsock_pkt *pkt) 684 { 685 struct virtio_vsock_pkt_info info = { 686 .op = VIRTIO_VSOCK_OP_RST, 687 .type = VIRTIO_VSOCK_TYPE_STREAM, 688 .reply = !!pkt, 689 .vsk = vsk, 690 }; 691 692 /* Send RST only if the original pkt is not a RST pkt */ 693 if (pkt && le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) 694 return 0; 695 696 return virtio_transport_send_pkt_info(vsk, &info); 697 } 698 699 /* Normally packets are associated with a socket. There may be no socket if an 700 * attempt was made to connect to a socket that does not exist. 701 */ 702 static int virtio_transport_reset_no_sock(const struct virtio_transport *t, 703 struct virtio_vsock_pkt *pkt) 704 { 705 struct virtio_vsock_pkt *reply; 706 struct virtio_vsock_pkt_info info = { 707 .op = VIRTIO_VSOCK_OP_RST, 708 .type = le16_to_cpu(pkt->hdr.type), 709 .reply = true, 710 }; 711 712 /* Send RST only if the original pkt is not a RST pkt */ 713 if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) 714 return 0; 715 716 reply = virtio_transport_alloc_pkt(&info, 0, 717 le64_to_cpu(pkt->hdr.dst_cid), 718 le32_to_cpu(pkt->hdr.dst_port), 719 le64_to_cpu(pkt->hdr.src_cid), 720 le32_to_cpu(pkt->hdr.src_port)); 721 if (!reply) 722 return -ENOMEM; 723 724 if (!t) { 725 virtio_transport_free_pkt(reply); 726 return -ENOTCONN; 727 } 728 729 return t->send_pkt(reply); 730 } 731 732 static void virtio_transport_wait_close(struct sock *sk, long timeout) 733 { 734 if (timeout) { 735 DEFINE_WAIT_FUNC(wait, woken_wake_function); 736 737 add_wait_queue(sk_sleep(sk), &wait); 738 739 do { 740 if (sk_wait_event(sk, &timeout, 741 sock_flag(sk, SOCK_DONE), &wait)) 742 break; 743 } while (!signal_pending(current) && timeout); 744 745 remove_wait_queue(sk_sleep(sk), &wait); 746 } 747 } 748 749 static void virtio_transport_do_close(struct vsock_sock *vsk, 750 bool cancel_timeout) 751 { 752 struct sock *sk = sk_vsock(vsk); 753 754 sock_set_flag(sk, SOCK_DONE); 755 vsk->peer_shutdown = SHUTDOWN_MASK; 756 if (vsock_stream_has_data(vsk) <= 0) 757 sk->sk_state = TCP_CLOSING; 758 sk->sk_state_change(sk); 759 760 if (vsk->close_work_scheduled && 761 (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) { 762 vsk->close_work_scheduled = false; 763 764 vsock_remove_sock(vsk); 765 766 /* Release refcnt obtained when we scheduled the timeout */ 767 sock_put(sk); 768 } 769 } 770 771 static void virtio_transport_close_timeout(struct work_struct *work) 772 { 773 struct vsock_sock *vsk = 774 container_of(work, struct vsock_sock, close_work.work); 775 struct sock *sk = sk_vsock(vsk); 776 777 sock_hold(sk); 778 lock_sock(sk); 779 780 if (!sock_flag(sk, SOCK_DONE)) { 781 (void)virtio_transport_reset(vsk, NULL); 782 783 virtio_transport_do_close(vsk, false); 784 } 785 786 vsk->close_work_scheduled = false; 787 788 release_sock(sk); 789 sock_put(sk); 790 } 791 792 /* User context, vsk->sk is locked */ 793 static bool virtio_transport_close(struct vsock_sock *vsk) 794 { 795 struct sock *sk = &vsk->sk; 796 797 if (!(sk->sk_state == TCP_ESTABLISHED || 798 sk->sk_state == TCP_CLOSING)) 799 return true; 800 801 /* Already received SHUTDOWN from peer, reply with RST */ 802 if ((vsk->peer_shutdown & SHUTDOWN_MASK) == SHUTDOWN_MASK) { 803 (void)virtio_transport_reset(vsk, NULL); 804 return true; 805 } 806 807 if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK) 808 (void)virtio_transport_shutdown(vsk, SHUTDOWN_MASK); 809 810 if (sock_flag(sk, SOCK_LINGER) && !(current->flags & PF_EXITING)) 811 virtio_transport_wait_close(sk, sk->sk_lingertime); 812 813 if (sock_flag(sk, SOCK_DONE)) { 814 return true; 815 } 816 817 sock_hold(sk); 818 INIT_DELAYED_WORK(&vsk->close_work, 819 virtio_transport_close_timeout); 820 vsk->close_work_scheduled = true; 821 schedule_delayed_work(&vsk->close_work, VSOCK_CLOSE_TIMEOUT); 822 return false; 823 } 824 825 void virtio_transport_release(struct vsock_sock *vsk) 826 { 827 struct virtio_vsock_sock *vvs = vsk->trans; 828 struct virtio_vsock_pkt *pkt, *tmp; 829 struct sock *sk = &vsk->sk; 830 bool remove_sock = true; 831 832 if (sk->sk_type == SOCK_STREAM) 833 remove_sock = virtio_transport_close(vsk); 834 835 list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) { 836 list_del(&pkt->list); 837 virtio_transport_free_pkt(pkt); 838 } 839 840 if (remove_sock) 841 vsock_remove_sock(vsk); 842 } 843 EXPORT_SYMBOL_GPL(virtio_transport_release); 844 845 static int 846 virtio_transport_recv_connecting(struct sock *sk, 847 struct virtio_vsock_pkt *pkt) 848 { 849 struct vsock_sock *vsk = vsock_sk(sk); 850 int err; 851 int skerr; 852 853 switch (le16_to_cpu(pkt->hdr.op)) { 854 case VIRTIO_VSOCK_OP_RESPONSE: 855 sk->sk_state = TCP_ESTABLISHED; 856 sk->sk_socket->state = SS_CONNECTED; 857 vsock_insert_connected(vsk); 858 sk->sk_state_change(sk); 859 break; 860 case VIRTIO_VSOCK_OP_INVALID: 861 break; 862 case VIRTIO_VSOCK_OP_RST: 863 skerr = ECONNRESET; 864 err = 0; 865 goto destroy; 866 default: 867 skerr = EPROTO; 868 err = -EINVAL; 869 goto destroy; 870 } 871 return 0; 872 873 destroy: 874 virtio_transport_reset(vsk, pkt); 875 sk->sk_state = TCP_CLOSE; 876 sk->sk_err = skerr; 877 sk->sk_error_report(sk); 878 return err; 879 } 880 881 static void 882 virtio_transport_recv_enqueue(struct vsock_sock *vsk, 883 struct virtio_vsock_pkt *pkt) 884 { 885 struct virtio_vsock_sock *vvs = vsk->trans; 886 bool can_enqueue, free_pkt = false; 887 888 pkt->len = le32_to_cpu(pkt->hdr.len); 889 pkt->off = 0; 890 891 spin_lock_bh(&vvs->rx_lock); 892 893 can_enqueue = virtio_transport_inc_rx_pkt(vvs, pkt); 894 if (!can_enqueue) { 895 free_pkt = true; 896 goto out; 897 } 898 899 /* Try to copy small packets into the buffer of last packet queued, 900 * to avoid wasting memory queueing the entire buffer with a small 901 * payload. 902 */ 903 if (pkt->len <= GOOD_COPY_LEN && !list_empty(&vvs->rx_queue)) { 904 struct virtio_vsock_pkt *last_pkt; 905 906 last_pkt = list_last_entry(&vvs->rx_queue, 907 struct virtio_vsock_pkt, list); 908 909 /* If there is space in the last packet queued, we copy the 910 * new packet in its buffer. 911 */ 912 if (pkt->len <= last_pkt->buf_len - last_pkt->len) { 913 memcpy(last_pkt->buf + last_pkt->len, pkt->buf, 914 pkt->len); 915 last_pkt->len += pkt->len; 916 free_pkt = true; 917 goto out; 918 } 919 } 920 921 list_add_tail(&pkt->list, &vvs->rx_queue); 922 923 out: 924 spin_unlock_bh(&vvs->rx_lock); 925 if (free_pkt) 926 virtio_transport_free_pkt(pkt); 927 } 928 929 static int 930 virtio_transport_recv_connected(struct sock *sk, 931 struct virtio_vsock_pkt *pkt) 932 { 933 struct vsock_sock *vsk = vsock_sk(sk); 934 int err = 0; 935 936 switch (le16_to_cpu(pkt->hdr.op)) { 937 case VIRTIO_VSOCK_OP_RW: 938 virtio_transport_recv_enqueue(vsk, pkt); 939 sk->sk_data_ready(sk); 940 return err; 941 case VIRTIO_VSOCK_OP_CREDIT_UPDATE: 942 sk->sk_write_space(sk); 943 break; 944 case VIRTIO_VSOCK_OP_SHUTDOWN: 945 if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) 946 vsk->peer_shutdown |= RCV_SHUTDOWN; 947 if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) 948 vsk->peer_shutdown |= SEND_SHUTDOWN; 949 if (vsk->peer_shutdown == SHUTDOWN_MASK && 950 vsock_stream_has_data(vsk) <= 0 && 951 !sock_flag(sk, SOCK_DONE)) { 952 (void)virtio_transport_reset(vsk, NULL); 953 954 virtio_transport_do_close(vsk, true); 955 } 956 if (le32_to_cpu(pkt->hdr.flags)) 957 sk->sk_state_change(sk); 958 break; 959 case VIRTIO_VSOCK_OP_RST: 960 virtio_transport_do_close(vsk, true); 961 break; 962 default: 963 err = -EINVAL; 964 break; 965 } 966 967 virtio_transport_free_pkt(pkt); 968 return err; 969 } 970 971 static void 972 virtio_transport_recv_disconnecting(struct sock *sk, 973 struct virtio_vsock_pkt *pkt) 974 { 975 struct vsock_sock *vsk = vsock_sk(sk); 976 977 if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) 978 virtio_transport_do_close(vsk, true); 979 } 980 981 static int 982 virtio_transport_send_response(struct vsock_sock *vsk, 983 struct virtio_vsock_pkt *pkt) 984 { 985 struct virtio_vsock_pkt_info info = { 986 .op = VIRTIO_VSOCK_OP_RESPONSE, 987 .type = VIRTIO_VSOCK_TYPE_STREAM, 988 .remote_cid = le64_to_cpu(pkt->hdr.src_cid), 989 .remote_port = le32_to_cpu(pkt->hdr.src_port), 990 .reply = true, 991 .vsk = vsk, 992 }; 993 994 return virtio_transport_send_pkt_info(vsk, &info); 995 } 996 997 static bool virtio_transport_space_update(struct sock *sk, 998 struct virtio_vsock_pkt *pkt) 999 { 1000 struct vsock_sock *vsk = vsock_sk(sk); 1001 struct virtio_vsock_sock *vvs = vsk->trans; 1002 bool space_available; 1003 1004 /* Listener sockets are not associated with any transport, so we are 1005 * not able to take the state to see if there is space available in the 1006 * remote peer, but since they are only used to receive requests, we 1007 * can assume that there is always space available in the other peer. 1008 */ 1009 if (!vvs) 1010 return true; 1011 1012 /* buf_alloc and fwd_cnt is always included in the hdr */ 1013 spin_lock_bh(&vvs->tx_lock); 1014 vvs->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc); 1015 vvs->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt); 1016 space_available = virtio_transport_has_space(vsk); 1017 spin_unlock_bh(&vvs->tx_lock); 1018 return space_available; 1019 } 1020 1021 /* Handle server socket */ 1022 static int 1023 virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt, 1024 struct virtio_transport *t) 1025 { 1026 struct vsock_sock *vsk = vsock_sk(sk); 1027 struct vsock_sock *vchild; 1028 struct sock *child; 1029 int ret; 1030 1031 if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_REQUEST) { 1032 virtio_transport_reset_no_sock(t, pkt); 1033 return -EINVAL; 1034 } 1035 1036 if (sk_acceptq_is_full(sk)) { 1037 virtio_transport_reset_no_sock(t, pkt); 1038 return -ENOMEM; 1039 } 1040 1041 child = vsock_create_connected(sk); 1042 if (!child) { 1043 virtio_transport_reset_no_sock(t, pkt); 1044 return -ENOMEM; 1045 } 1046 1047 sk_acceptq_added(sk); 1048 1049 lock_sock_nested(child, SINGLE_DEPTH_NESTING); 1050 1051 child->sk_state = TCP_ESTABLISHED; 1052 1053 vchild = vsock_sk(child); 1054 vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid), 1055 le32_to_cpu(pkt->hdr.dst_port)); 1056 vsock_addr_init(&vchild->remote_addr, le64_to_cpu(pkt->hdr.src_cid), 1057 le32_to_cpu(pkt->hdr.src_port)); 1058 1059 ret = vsock_assign_transport(vchild, vsk); 1060 /* Transport assigned (looking at remote_addr) must be the same 1061 * where we received the request. 1062 */ 1063 if (ret || vchild->transport != &t->transport) { 1064 release_sock(child); 1065 virtio_transport_reset_no_sock(t, pkt); 1066 sock_put(child); 1067 return ret; 1068 } 1069 1070 if (virtio_transport_space_update(child, pkt)) 1071 child->sk_write_space(child); 1072 1073 vsock_insert_connected(vchild); 1074 vsock_enqueue_accept(sk, child); 1075 virtio_transport_send_response(vchild, pkt); 1076 1077 release_sock(child); 1078 1079 sk->sk_data_ready(sk); 1080 return 0; 1081 } 1082 1083 /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex 1084 * lock. 1085 */ 1086 void virtio_transport_recv_pkt(struct virtio_transport *t, 1087 struct virtio_vsock_pkt *pkt) 1088 { 1089 struct sockaddr_vm src, dst; 1090 struct vsock_sock *vsk; 1091 struct sock *sk; 1092 bool space_available; 1093 1094 vsock_addr_init(&src, le64_to_cpu(pkt->hdr.src_cid), 1095 le32_to_cpu(pkt->hdr.src_port)); 1096 vsock_addr_init(&dst, le64_to_cpu(pkt->hdr.dst_cid), 1097 le32_to_cpu(pkt->hdr.dst_port)); 1098 1099 trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port, 1100 dst.svm_cid, dst.svm_port, 1101 le32_to_cpu(pkt->hdr.len), 1102 le16_to_cpu(pkt->hdr.type), 1103 le16_to_cpu(pkt->hdr.op), 1104 le32_to_cpu(pkt->hdr.flags), 1105 le32_to_cpu(pkt->hdr.buf_alloc), 1106 le32_to_cpu(pkt->hdr.fwd_cnt)); 1107 1108 if (le16_to_cpu(pkt->hdr.type) != VIRTIO_VSOCK_TYPE_STREAM) { 1109 (void)virtio_transport_reset_no_sock(t, pkt); 1110 goto free_pkt; 1111 } 1112 1113 /* The socket must be in connected or bound table 1114 * otherwise send reset back 1115 */ 1116 sk = vsock_find_connected_socket(&src, &dst); 1117 if (!sk) { 1118 sk = vsock_find_bound_socket(&dst); 1119 if (!sk) { 1120 (void)virtio_transport_reset_no_sock(t, pkt); 1121 goto free_pkt; 1122 } 1123 } 1124 1125 vsk = vsock_sk(sk); 1126 1127 space_available = virtio_transport_space_update(sk, pkt); 1128 1129 lock_sock(sk); 1130 1131 /* Update CID in case it has changed after a transport reset event */ 1132 vsk->local_addr.svm_cid = dst.svm_cid; 1133 1134 if (space_available) 1135 sk->sk_write_space(sk); 1136 1137 switch (sk->sk_state) { 1138 case TCP_LISTEN: 1139 virtio_transport_recv_listen(sk, pkt, t); 1140 virtio_transport_free_pkt(pkt); 1141 break; 1142 case TCP_SYN_SENT: 1143 virtio_transport_recv_connecting(sk, pkt); 1144 virtio_transport_free_pkt(pkt); 1145 break; 1146 case TCP_ESTABLISHED: 1147 virtio_transport_recv_connected(sk, pkt); 1148 break; 1149 case TCP_CLOSING: 1150 virtio_transport_recv_disconnecting(sk, pkt); 1151 virtio_transport_free_pkt(pkt); 1152 break; 1153 default: 1154 virtio_transport_free_pkt(pkt); 1155 break; 1156 } 1157 1158 release_sock(sk); 1159 1160 /* Release refcnt obtained when we fetched this socket out of the 1161 * bound or connected list. 1162 */ 1163 sock_put(sk); 1164 return; 1165 1166 free_pkt: 1167 virtio_transport_free_pkt(pkt); 1168 } 1169 EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt); 1170 1171 void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt) 1172 { 1173 kfree(pkt->buf); 1174 kfree(pkt); 1175 } 1176 EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); 1177 1178 MODULE_LICENSE("GPL v2"); 1179 MODULE_AUTHOR("Asias He"); 1180 MODULE_DESCRIPTION("common code for virtio vsock"); 1181