1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Virtio vhost-user driver 4 * 5 * Copyright(c) 2019 Intel Corporation 6 * 7 * This driver allows virtio devices to be used over a vhost-user socket. 8 * 9 * Guest devices can be instantiated by kernel module or command line 10 * parameters. One device will be created for each parameter. Syntax: 11 * 12 * virtio_uml.device=<socket>:<virtio_id>[:<platform_id>] 13 * where: 14 * <socket> := vhost-user socket path to connect 15 * <virtio_id> := virtio device id (as in virtio_ids.h) 16 * <platform_id> := (optional) platform device id 17 * 18 * example: 19 * virtio_uml.device=/var/uml.socket:1 20 * 21 * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd. 22 */ 23 #include <linux/module.h> 24 #include <linux/platform_device.h> 25 #include <linux/slab.h> 26 #include <linux/virtio.h> 27 #include <linux/virtio_config.h> 28 #include <linux/virtio_ring.h> 29 #include <linux/time-internal.h> 30 #include <shared/as-layout.h> 31 #include <irq_kern.h> 32 #include <init.h> 33 #include <os.h> 34 #include "vhost_user.h" 35 36 #define MAX_SUPPORTED_QUEUE_SIZE 256 37 38 #define to_virtio_uml_device(_vdev) \ 39 container_of(_vdev, struct virtio_uml_device, vdev) 40 41 struct virtio_uml_platform_data { 42 u32 virtio_device_id; 43 const char *socket_path; 44 struct work_struct conn_broken_wk; 45 struct platform_device *pdev; 46 }; 47 48 struct virtio_uml_device { 49 struct virtio_device vdev; 50 struct platform_device *pdev; 51 52 spinlock_t sock_lock; 53 int sock, req_fd, irq; 54 u64 features; 55 u64 protocol_features; 56 u8 status; 57 u8 registered:1; 58 u8 suspended:1; 59 u8 no_vq_suspend:1; 60 61 u8 config_changed_irq:1; 62 uint64_t vq_irq_vq_map; 63 }; 64 65 struct virtio_uml_vq_info { 66 int kick_fd, call_fd; 67 char name[32]; 68 bool suspended; 69 }; 70 71 extern unsigned long long physmem_size, highmem; 72 73 #define vu_err(vu_dev, ...) dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__) 74 75 /* Vhost-user protocol */ 76 77 static int full_sendmsg_fds(int fd, const void *buf, unsigned int len, 78 const int *fds, unsigned int fds_num) 79 { 80 int rc; 81 82 do { 83 rc = os_sendmsg_fds(fd, buf, len, fds, fds_num); 84 if (rc > 0) { 85 buf += rc; 86 len -= rc; 87 fds = NULL; 88 fds_num = 0; 89 } 90 } while (len && (rc >= 0 || rc == -EINTR)); 91 92 if (rc < 0) 93 return rc; 94 return 0; 95 } 96 97 static int full_read(int fd, void *buf, int len, bool abortable) 98 { 99 int rc; 100 101 if (!len) 102 return 0; 103 104 do { 105 rc = os_read_file(fd, buf, len); 106 if (rc > 0) { 107 buf += rc; 108 len -= rc; 109 } 110 } while (len && (rc > 0 || rc == -EINTR || (!abortable && rc == -EAGAIN))); 111 112 if (rc < 0) 113 return rc; 114 if (rc == 0) 115 return -ECONNRESET; 116 return 0; 117 } 118 119 static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg) 120 { 121 return full_read(fd, msg, sizeof(msg->header), true); 122 } 123 124 static int vhost_user_recv(struct virtio_uml_device *vu_dev, 125 int fd, struct vhost_user_msg *msg, 126 size_t max_payload_size, bool wait) 127 { 128 size_t size; 129 int rc; 130 131 /* 132 * In virtio time-travel mode, we're handling all the vhost-user 133 * FDs by polling them whenever appropriate. However, we may get 134 * into a situation where we're sending out an interrupt message 135 * to a device (e.g. a net device) and need to handle a simulation 136 * time message while doing so, e.g. one that tells us to update 137 * our idea of how long we can run without scheduling. 138 * 139 * Thus, we need to not just read() from the given fd, but need 140 * to also handle messages for the simulation time - this function 141 * does that for us while waiting for the given fd to be readable. 142 */ 143 if (wait) 144 time_travel_wait_readable(fd); 145 146 rc = vhost_user_recv_header(fd, msg); 147 148 if (rc == -ECONNRESET && vu_dev->registered) { 149 struct virtio_uml_platform_data *pdata; 150 151 pdata = vu_dev->pdev->dev.platform_data; 152 153 virtio_break_device(&vu_dev->vdev); 154 schedule_work(&pdata->conn_broken_wk); 155 } 156 if (rc) 157 return rc; 158 size = msg->header.size; 159 if (size > max_payload_size) 160 return -EPROTO; 161 return full_read(fd, &msg->payload, size, false); 162 } 163 164 static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev, 165 struct vhost_user_msg *msg, 166 size_t max_payload_size) 167 { 168 int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg, 169 max_payload_size, true); 170 171 if (rc) 172 return rc; 173 174 if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION)) 175 return -EPROTO; 176 177 return 0; 178 } 179 180 static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev, 181 u64 *value) 182 { 183 struct vhost_user_msg msg; 184 int rc = vhost_user_recv_resp(vu_dev, &msg, 185 sizeof(msg.payload.integer)); 186 187 if (rc) 188 return rc; 189 if (msg.header.size != sizeof(msg.payload.integer)) 190 return -EPROTO; 191 *value = msg.payload.integer; 192 return 0; 193 } 194 195 static int vhost_user_recv_req(struct virtio_uml_device *vu_dev, 196 struct vhost_user_msg *msg, 197 size_t max_payload_size) 198 { 199 int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg, 200 max_payload_size, false); 201 202 if (rc) 203 return rc; 204 205 if ((msg->header.flags & ~VHOST_USER_FLAG_NEED_REPLY) != 206 VHOST_USER_VERSION) 207 return -EPROTO; 208 209 return 0; 210 } 211 212 static int vhost_user_send(struct virtio_uml_device *vu_dev, 213 bool need_response, struct vhost_user_msg *msg, 214 int *fds, size_t num_fds) 215 { 216 size_t size = sizeof(msg->header) + msg->header.size; 217 unsigned long flags; 218 bool request_ack; 219 int rc; 220 221 msg->header.flags |= VHOST_USER_VERSION; 222 223 /* 224 * The need_response flag indicates that we already need a response, 225 * e.g. to read the features. In these cases, don't request an ACK as 226 * it is meaningless. Also request an ACK only if supported. 227 */ 228 request_ack = !need_response; 229 if (!(vu_dev->protocol_features & 230 BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK))) 231 request_ack = false; 232 233 if (request_ack) 234 msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY; 235 236 spin_lock_irqsave(&vu_dev->sock_lock, flags); 237 rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds); 238 if (rc < 0) 239 goto out; 240 241 if (request_ack) { 242 uint64_t status; 243 244 rc = vhost_user_recv_u64(vu_dev, &status); 245 if (rc) 246 goto out; 247 248 if (status) { 249 vu_err(vu_dev, "slave reports error: %llu\n", status); 250 rc = -EIO; 251 goto out; 252 } 253 } 254 255 out: 256 spin_unlock_irqrestore(&vu_dev->sock_lock, flags); 257 return rc; 258 } 259 260 static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev, 261 bool need_response, u32 request) 262 { 263 struct vhost_user_msg msg = { 264 .header.request = request, 265 }; 266 267 return vhost_user_send(vu_dev, need_response, &msg, NULL, 0); 268 } 269 270 static int vhost_user_send_no_payload_fd(struct virtio_uml_device *vu_dev, 271 u32 request, int fd) 272 { 273 struct vhost_user_msg msg = { 274 .header.request = request, 275 }; 276 277 return vhost_user_send(vu_dev, false, &msg, &fd, 1); 278 } 279 280 static int vhost_user_send_u64(struct virtio_uml_device *vu_dev, 281 u32 request, u64 value) 282 { 283 struct vhost_user_msg msg = { 284 .header.request = request, 285 .header.size = sizeof(msg.payload.integer), 286 .payload.integer = value, 287 }; 288 289 return vhost_user_send(vu_dev, false, &msg, NULL, 0); 290 } 291 292 static int vhost_user_set_owner(struct virtio_uml_device *vu_dev) 293 { 294 return vhost_user_send_no_payload(vu_dev, false, VHOST_USER_SET_OWNER); 295 } 296 297 static int vhost_user_get_features(struct virtio_uml_device *vu_dev, 298 u64 *features) 299 { 300 int rc = vhost_user_send_no_payload(vu_dev, true, 301 VHOST_USER_GET_FEATURES); 302 303 if (rc) 304 return rc; 305 return vhost_user_recv_u64(vu_dev, features); 306 } 307 308 static int vhost_user_set_features(struct virtio_uml_device *vu_dev, 309 u64 features) 310 { 311 return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features); 312 } 313 314 static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev, 315 u64 *protocol_features) 316 { 317 int rc = vhost_user_send_no_payload(vu_dev, true, 318 VHOST_USER_GET_PROTOCOL_FEATURES); 319 320 if (rc) 321 return rc; 322 return vhost_user_recv_u64(vu_dev, protocol_features); 323 } 324 325 static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev, 326 u64 protocol_features) 327 { 328 return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES, 329 protocol_features); 330 } 331 332 static void vhost_user_reply(struct virtio_uml_device *vu_dev, 333 struct vhost_user_msg *msg, int response) 334 { 335 struct vhost_user_msg reply = { 336 .payload.integer = response, 337 }; 338 size_t size = sizeof(reply.header) + sizeof(reply.payload.integer); 339 int rc; 340 341 reply.header = msg->header; 342 reply.header.flags &= ~VHOST_USER_FLAG_NEED_REPLY; 343 reply.header.flags |= VHOST_USER_FLAG_REPLY; 344 reply.header.size = sizeof(reply.payload.integer); 345 346 rc = full_sendmsg_fds(vu_dev->req_fd, &reply, size, NULL, 0); 347 348 if (rc) 349 vu_err(vu_dev, 350 "sending reply to slave request failed: %d (size %zu)\n", 351 rc, size); 352 } 353 354 static irqreturn_t vu_req_read_message(struct virtio_uml_device *vu_dev, 355 struct time_travel_event *ev) 356 { 357 struct virtqueue *vq; 358 int response = 1; 359 struct { 360 struct vhost_user_msg msg; 361 u8 extra_payload[512]; 362 } msg; 363 int rc; 364 365 rc = vhost_user_recv_req(vu_dev, &msg.msg, 366 sizeof(msg.msg.payload) + 367 sizeof(msg.extra_payload)); 368 369 if (rc) 370 return IRQ_NONE; 371 372 switch (msg.msg.header.request) { 373 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG: 374 vu_dev->config_changed_irq = true; 375 response = 0; 376 break; 377 case VHOST_USER_SLAVE_VRING_CALL: 378 virtio_device_for_each_vq((&vu_dev->vdev), vq) { 379 if (vq->index == msg.msg.payload.vring_state.index) { 380 response = 0; 381 vu_dev->vq_irq_vq_map |= BIT_ULL(vq->index); 382 break; 383 } 384 } 385 break; 386 case VHOST_USER_SLAVE_IOTLB_MSG: 387 /* not supported - VIRTIO_F_ACCESS_PLATFORM */ 388 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: 389 /* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */ 390 default: 391 vu_err(vu_dev, "unexpected slave request %d\n", 392 msg.msg.header.request); 393 } 394 395 if (ev && !vu_dev->suspended) 396 time_travel_add_irq_event(ev); 397 398 if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY) 399 vhost_user_reply(vu_dev, &msg.msg, response); 400 401 return IRQ_HANDLED; 402 } 403 404 static irqreturn_t vu_req_interrupt(int irq, void *data) 405 { 406 struct virtio_uml_device *vu_dev = data; 407 irqreturn_t ret = IRQ_HANDLED; 408 409 if (!um_irq_timetravel_handler_used()) 410 ret = vu_req_read_message(vu_dev, NULL); 411 412 if (vu_dev->vq_irq_vq_map) { 413 struct virtqueue *vq; 414 415 virtio_device_for_each_vq((&vu_dev->vdev), vq) { 416 if (vu_dev->vq_irq_vq_map & BIT_ULL(vq->index)) 417 vring_interrupt(0 /* ignored */, vq); 418 } 419 vu_dev->vq_irq_vq_map = 0; 420 } else if (vu_dev->config_changed_irq) { 421 virtio_config_changed(&vu_dev->vdev); 422 vu_dev->config_changed_irq = false; 423 } 424 425 return ret; 426 } 427 428 static void vu_req_interrupt_comm_handler(int irq, int fd, void *data, 429 struct time_travel_event *ev) 430 { 431 vu_req_read_message(data, ev); 432 } 433 434 static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev) 435 { 436 int rc, req_fds[2]; 437 438 /* Use a pipe for slave req fd, SIGIO is not supported for eventfd */ 439 rc = os_pipe(req_fds, true, true); 440 if (rc < 0) 441 return rc; 442 vu_dev->req_fd = req_fds[0]; 443 444 rc = um_request_irq_tt(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ, 445 vu_req_interrupt, IRQF_SHARED, 446 vu_dev->pdev->name, vu_dev, 447 vu_req_interrupt_comm_handler); 448 if (rc < 0) 449 goto err_close; 450 451 vu_dev->irq = rc; 452 453 rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD, 454 req_fds[1]); 455 if (rc) 456 goto err_free_irq; 457 458 goto out; 459 460 err_free_irq: 461 um_free_irq(vu_dev->irq, vu_dev); 462 err_close: 463 os_close_file(req_fds[0]); 464 out: 465 /* Close unused write end of request fds */ 466 os_close_file(req_fds[1]); 467 return rc; 468 } 469 470 static int vhost_user_init(struct virtio_uml_device *vu_dev) 471 { 472 int rc = vhost_user_set_owner(vu_dev); 473 474 if (rc) 475 return rc; 476 rc = vhost_user_get_features(vu_dev, &vu_dev->features); 477 if (rc) 478 return rc; 479 480 if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) { 481 rc = vhost_user_get_protocol_features(vu_dev, 482 &vu_dev->protocol_features); 483 if (rc) 484 return rc; 485 vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F; 486 rc = vhost_user_set_protocol_features(vu_dev, 487 vu_dev->protocol_features); 488 if (rc) 489 return rc; 490 } 491 492 if (vu_dev->protocol_features & 493 BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { 494 rc = vhost_user_init_slave_req(vu_dev); 495 if (rc) 496 return rc; 497 } 498 499 return 0; 500 } 501 502 static void vhost_user_get_config(struct virtio_uml_device *vu_dev, 503 u32 offset, void *buf, u32 len) 504 { 505 u32 cfg_size = offset + len; 506 struct vhost_user_msg *msg; 507 size_t payload_size = sizeof(msg->payload.config) + cfg_size; 508 size_t msg_size = sizeof(msg->header) + payload_size; 509 int rc; 510 511 if (!(vu_dev->protocol_features & 512 BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG))) 513 return; 514 515 msg = kzalloc(msg_size, GFP_KERNEL); 516 if (!msg) 517 return; 518 msg->header.request = VHOST_USER_GET_CONFIG; 519 msg->header.size = payload_size; 520 msg->payload.config.offset = 0; 521 msg->payload.config.size = cfg_size; 522 523 rc = vhost_user_send(vu_dev, true, msg, NULL, 0); 524 if (rc) { 525 vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n", 526 rc); 527 goto free; 528 } 529 530 rc = vhost_user_recv_resp(vu_dev, msg, msg_size); 531 if (rc) { 532 vu_err(vu_dev, 533 "receiving VHOST_USER_GET_CONFIG response failed: %d\n", 534 rc); 535 goto free; 536 } 537 538 if (msg->header.size != payload_size || 539 msg->payload.config.size != cfg_size) { 540 rc = -EPROTO; 541 vu_err(vu_dev, 542 "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n", 543 msg->header.size, payload_size, 544 msg->payload.config.size, cfg_size); 545 goto free; 546 } 547 memcpy(buf, msg->payload.config.payload + offset, len); 548 549 free: 550 kfree(msg); 551 } 552 553 static void vhost_user_set_config(struct virtio_uml_device *vu_dev, 554 u32 offset, const void *buf, u32 len) 555 { 556 struct vhost_user_msg *msg; 557 size_t payload_size = sizeof(msg->payload.config) + len; 558 size_t msg_size = sizeof(msg->header) + payload_size; 559 int rc; 560 561 if (!(vu_dev->protocol_features & 562 BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG))) 563 return; 564 565 msg = kzalloc(msg_size, GFP_KERNEL); 566 if (!msg) 567 return; 568 msg->header.request = VHOST_USER_SET_CONFIG; 569 msg->header.size = payload_size; 570 msg->payload.config.offset = offset; 571 msg->payload.config.size = len; 572 memcpy(msg->payload.config.payload, buf, len); 573 574 rc = vhost_user_send(vu_dev, false, msg, NULL, 0); 575 if (rc) 576 vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n", 577 rc); 578 579 kfree(msg); 580 } 581 582 static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out, 583 struct vhost_user_mem_region *region_out) 584 { 585 unsigned long long mem_offset; 586 int rc = phys_mapping(addr, &mem_offset); 587 588 if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc)) 589 return -EFAULT; 590 *fd_out = rc; 591 region_out->guest_addr = addr; 592 region_out->user_addr = addr; 593 region_out->size = size; 594 region_out->mmap_offset = mem_offset; 595 596 /* Ensure mapping is valid for the entire region */ 597 rc = phys_mapping(addr + size - 1, &mem_offset); 598 if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n", 599 addr + size - 1, rc, *fd_out)) 600 return -EFAULT; 601 return 0; 602 } 603 604 static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev) 605 { 606 struct vhost_user_msg msg = { 607 .header.request = VHOST_USER_SET_MEM_TABLE, 608 .header.size = sizeof(msg.payload.mem_regions), 609 .payload.mem_regions.num = 1, 610 }; 611 unsigned long reserved = uml_reserved - uml_physmem; 612 int fds[2]; 613 int rc; 614 615 /* 616 * This is a bit tricky, see also the comment with setup_physmem(). 617 * 618 * Essentially, setup_physmem() uses a file to mmap() our physmem, 619 * but the code and data we *already* have is omitted. To us, this 620 * is no difference, since they both become part of our address 621 * space and memory consumption. To somebody looking in from the 622 * outside, however, it is different because the part of our memory 623 * consumption that's already part of the binary (code/data) is not 624 * mapped from the file, so it's not visible to another mmap from 625 * the file descriptor. 626 * 627 * Thus, don't advertise this space to the vhost-user slave. This 628 * means that the slave will likely abort or similar when we give 629 * it an address from the hidden range, since it's not marked as 630 * a valid address, but at least that way we detect the issue and 631 * don't just have the slave read an all-zeroes buffer from the 632 * shared memory file, or write something there that we can never 633 * see (depending on the direction of the virtqueue traffic.) 634 * 635 * Since we usually don't want to use .text for virtio buffers, 636 * this effectively means that you cannot use 637 * 1) global variables, which are in the .bss and not in the shm 638 * file-backed memory 639 * 2) the stack in some processes, depending on where they have 640 * their stack (or maybe only no interrupt stack?) 641 * 642 * The stack is already not typically valid for DMA, so this isn't 643 * much of a restriction, but global variables might be encountered. 644 * 645 * It might be possible to fix it by copying around the data that's 646 * between bss_start and where we map the file now, but it's not 647 * something that you typically encounter with virtio drivers, so 648 * it didn't seem worthwhile. 649 */ 650 rc = vhost_user_init_mem_region(reserved, physmem_size - reserved, 651 &fds[0], 652 &msg.payload.mem_regions.regions[0]); 653 654 if (rc < 0) 655 return rc; 656 if (highmem) { 657 msg.payload.mem_regions.num++; 658 rc = vhost_user_init_mem_region(__pa(end_iomem), highmem, 659 &fds[1], &msg.payload.mem_regions.regions[1]); 660 if (rc < 0) 661 return rc; 662 } 663 664 return vhost_user_send(vu_dev, false, &msg, fds, 665 msg.payload.mem_regions.num); 666 } 667 668 static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev, 669 u32 request, u32 index, u32 num) 670 { 671 struct vhost_user_msg msg = { 672 .header.request = request, 673 .header.size = sizeof(msg.payload.vring_state), 674 .payload.vring_state.index = index, 675 .payload.vring_state.num = num, 676 }; 677 678 return vhost_user_send(vu_dev, false, &msg, NULL, 0); 679 } 680 681 static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev, 682 u32 index, u32 num) 683 { 684 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM, 685 index, num); 686 } 687 688 static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev, 689 u32 index, u32 offset) 690 { 691 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE, 692 index, offset); 693 } 694 695 static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev, 696 u32 index, u64 desc, u64 used, u64 avail, 697 u64 log) 698 { 699 struct vhost_user_msg msg = { 700 .header.request = VHOST_USER_SET_VRING_ADDR, 701 .header.size = sizeof(msg.payload.vring_addr), 702 .payload.vring_addr.index = index, 703 .payload.vring_addr.desc = desc, 704 .payload.vring_addr.used = used, 705 .payload.vring_addr.avail = avail, 706 .payload.vring_addr.log = log, 707 }; 708 709 return vhost_user_send(vu_dev, false, &msg, NULL, 0); 710 } 711 712 static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev, 713 u32 request, int index, int fd) 714 { 715 struct vhost_user_msg msg = { 716 .header.request = request, 717 .header.size = sizeof(msg.payload.integer), 718 .payload.integer = index, 719 }; 720 721 if (index & ~VHOST_USER_VRING_INDEX_MASK) 722 return -EINVAL; 723 if (fd < 0) { 724 msg.payload.integer |= VHOST_USER_VRING_POLL_MASK; 725 return vhost_user_send(vu_dev, false, &msg, NULL, 0); 726 } 727 return vhost_user_send(vu_dev, false, &msg, &fd, 1); 728 } 729 730 static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev, 731 int index, int fd) 732 { 733 return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL, 734 index, fd); 735 } 736 737 static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev, 738 int index, int fd) 739 { 740 return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK, 741 index, fd); 742 } 743 744 static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev, 745 u32 index, bool enable) 746 { 747 if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES))) 748 return 0; 749 750 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE, 751 index, enable); 752 } 753 754 755 /* Virtio interface */ 756 757 static bool vu_notify(struct virtqueue *vq) 758 { 759 struct virtio_uml_vq_info *info = vq->priv; 760 const uint64_t n = 1; 761 int rc; 762 763 if (info->suspended) 764 return true; 765 766 time_travel_propagate_time(); 767 768 if (info->kick_fd < 0) { 769 struct virtio_uml_device *vu_dev; 770 771 vu_dev = to_virtio_uml_device(vq->vdev); 772 773 return vhost_user_set_vring_state(vu_dev, VHOST_USER_VRING_KICK, 774 vq->index, 0) == 0; 775 } 776 777 do { 778 rc = os_write_file(info->kick_fd, &n, sizeof(n)); 779 } while (rc == -EINTR); 780 return !WARN(rc != sizeof(n), "write returned %d\n", rc); 781 } 782 783 static irqreturn_t vu_interrupt(int irq, void *opaque) 784 { 785 struct virtqueue *vq = opaque; 786 struct virtio_uml_vq_info *info = vq->priv; 787 uint64_t n; 788 int rc; 789 irqreturn_t ret = IRQ_NONE; 790 791 do { 792 rc = os_read_file(info->call_fd, &n, sizeof(n)); 793 if (rc == sizeof(n)) 794 ret |= vring_interrupt(irq, vq); 795 } while (rc == sizeof(n) || rc == -EINTR); 796 WARN(rc != -EAGAIN, "read returned %d\n", rc); 797 return ret; 798 } 799 800 801 static void vu_get(struct virtio_device *vdev, unsigned offset, 802 void *buf, unsigned len) 803 { 804 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 805 806 vhost_user_get_config(vu_dev, offset, buf, len); 807 } 808 809 static void vu_set(struct virtio_device *vdev, unsigned offset, 810 const void *buf, unsigned len) 811 { 812 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 813 814 vhost_user_set_config(vu_dev, offset, buf, len); 815 } 816 817 static u8 vu_get_status(struct virtio_device *vdev) 818 { 819 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 820 821 return vu_dev->status; 822 } 823 824 static void vu_set_status(struct virtio_device *vdev, u8 status) 825 { 826 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 827 828 vu_dev->status = status; 829 } 830 831 static void vu_reset(struct virtio_device *vdev) 832 { 833 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 834 835 vu_dev->status = 0; 836 } 837 838 static void vu_del_vq(struct virtqueue *vq) 839 { 840 struct virtio_uml_vq_info *info = vq->priv; 841 842 if (info->call_fd >= 0) { 843 struct virtio_uml_device *vu_dev; 844 845 vu_dev = to_virtio_uml_device(vq->vdev); 846 847 um_free_irq(vu_dev->irq, vq); 848 os_close_file(info->call_fd); 849 } 850 851 if (info->kick_fd >= 0) 852 os_close_file(info->kick_fd); 853 854 vring_del_virtqueue(vq); 855 kfree(info); 856 } 857 858 static void vu_del_vqs(struct virtio_device *vdev) 859 { 860 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 861 struct virtqueue *vq, *n; 862 u64 features; 863 864 /* Note: reverse order as a workaround to a decoding bug in snabb */ 865 list_for_each_entry_reverse(vq, &vdev->vqs, list) 866 WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false)); 867 868 /* Ensure previous messages have been processed */ 869 WARN_ON(vhost_user_get_features(vu_dev, &features)); 870 871 list_for_each_entry_safe(vq, n, &vdev->vqs, list) 872 vu_del_vq(vq); 873 } 874 875 static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev, 876 struct virtqueue *vq) 877 { 878 struct virtio_uml_vq_info *info = vq->priv; 879 int call_fds[2]; 880 int rc; 881 882 /* no call FD needed/desired in this case */ 883 if (vu_dev->protocol_features & 884 BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) && 885 vu_dev->protocol_features & 886 BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { 887 info->call_fd = -1; 888 return 0; 889 } 890 891 /* Use a pipe for call fd, since SIGIO is not supported for eventfd */ 892 rc = os_pipe(call_fds, true, true); 893 if (rc < 0) 894 return rc; 895 896 info->call_fd = call_fds[0]; 897 rc = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ, 898 vu_interrupt, IRQF_SHARED, info->name, vq); 899 if (rc < 0) 900 goto close_both; 901 902 rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]); 903 if (rc) 904 goto release_irq; 905 906 goto out; 907 908 release_irq: 909 um_free_irq(vu_dev->irq, vq); 910 close_both: 911 os_close_file(call_fds[0]); 912 out: 913 /* Close (unused) write end of call fds */ 914 os_close_file(call_fds[1]); 915 916 return rc; 917 } 918 919 static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, 920 unsigned index, vq_callback_t *callback, 921 const char *name, bool ctx) 922 { 923 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 924 struct platform_device *pdev = vu_dev->pdev; 925 struct virtio_uml_vq_info *info; 926 struct virtqueue *vq; 927 int num = MAX_SUPPORTED_QUEUE_SIZE; 928 int rc; 929 930 info = kzalloc(sizeof(*info), GFP_KERNEL); 931 if (!info) { 932 rc = -ENOMEM; 933 goto error_kzalloc; 934 } 935 snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name, 936 pdev->id, name); 937 938 vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true, 939 ctx, vu_notify, callback, info->name); 940 if (!vq) { 941 rc = -ENOMEM; 942 goto error_create; 943 } 944 vq->priv = info; 945 num = virtqueue_get_vring_size(vq); 946 947 if (vu_dev->protocol_features & 948 BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) { 949 info->kick_fd = -1; 950 } else { 951 rc = os_eventfd(0, 0); 952 if (rc < 0) 953 goto error_kick; 954 info->kick_fd = rc; 955 } 956 957 rc = vu_setup_vq_call_fd(vu_dev, vq); 958 if (rc) 959 goto error_call; 960 961 rc = vhost_user_set_vring_num(vu_dev, index, num); 962 if (rc) 963 goto error_setup; 964 965 rc = vhost_user_set_vring_base(vu_dev, index, 0); 966 if (rc) 967 goto error_setup; 968 969 rc = vhost_user_set_vring_addr(vu_dev, index, 970 virtqueue_get_desc_addr(vq), 971 virtqueue_get_used_addr(vq), 972 virtqueue_get_avail_addr(vq), 973 (u64) -1); 974 if (rc) 975 goto error_setup; 976 977 return vq; 978 979 error_setup: 980 if (info->call_fd >= 0) { 981 um_free_irq(vu_dev->irq, vq); 982 os_close_file(info->call_fd); 983 } 984 error_call: 985 if (info->kick_fd >= 0) 986 os_close_file(info->kick_fd); 987 error_kick: 988 vring_del_virtqueue(vq); 989 error_create: 990 kfree(info); 991 error_kzalloc: 992 return ERR_PTR(rc); 993 } 994 995 static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs, 996 struct virtqueue *vqs[], vq_callback_t *callbacks[], 997 const char * const names[], const bool *ctx, 998 struct irq_affinity *desc) 999 { 1000 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 1001 int i, queue_idx = 0, rc; 1002 struct virtqueue *vq; 1003 1004 /* not supported for now */ 1005 if (WARN_ON(nvqs > 64)) 1006 return -EINVAL; 1007 1008 rc = vhost_user_set_mem_table(vu_dev); 1009 if (rc) 1010 return rc; 1011 1012 for (i = 0; i < nvqs; ++i) { 1013 if (!names[i]) { 1014 vqs[i] = NULL; 1015 continue; 1016 } 1017 1018 vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i], 1019 ctx ? ctx[i] : false); 1020 if (IS_ERR(vqs[i])) { 1021 rc = PTR_ERR(vqs[i]); 1022 goto error_setup; 1023 } 1024 } 1025 1026 list_for_each_entry(vq, &vdev->vqs, list) { 1027 struct virtio_uml_vq_info *info = vq->priv; 1028 1029 if (info->kick_fd >= 0) { 1030 rc = vhost_user_set_vring_kick(vu_dev, vq->index, 1031 info->kick_fd); 1032 if (rc) 1033 goto error_setup; 1034 } 1035 1036 rc = vhost_user_set_vring_enable(vu_dev, vq->index, true); 1037 if (rc) 1038 goto error_setup; 1039 } 1040 1041 return 0; 1042 1043 error_setup: 1044 vu_del_vqs(vdev); 1045 return rc; 1046 } 1047 1048 static u64 vu_get_features(struct virtio_device *vdev) 1049 { 1050 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 1051 1052 return vu_dev->features; 1053 } 1054 1055 static int vu_finalize_features(struct virtio_device *vdev) 1056 { 1057 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 1058 u64 supported = vdev->features & VHOST_USER_SUPPORTED_F; 1059 1060 vring_transport_features(vdev); 1061 vu_dev->features = vdev->features | supported; 1062 1063 return vhost_user_set_features(vu_dev, vu_dev->features); 1064 } 1065 1066 static const char *vu_bus_name(struct virtio_device *vdev) 1067 { 1068 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 1069 1070 return vu_dev->pdev->name; 1071 } 1072 1073 static const struct virtio_config_ops virtio_uml_config_ops = { 1074 .get = vu_get, 1075 .set = vu_set, 1076 .get_status = vu_get_status, 1077 .set_status = vu_set_status, 1078 .reset = vu_reset, 1079 .find_vqs = vu_find_vqs, 1080 .del_vqs = vu_del_vqs, 1081 .get_features = vu_get_features, 1082 .finalize_features = vu_finalize_features, 1083 .bus_name = vu_bus_name, 1084 }; 1085 1086 static void virtio_uml_release_dev(struct device *d) 1087 { 1088 struct virtio_device *vdev = 1089 container_of(d, struct virtio_device, dev); 1090 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 1091 1092 /* might not have been opened due to not negotiating the feature */ 1093 if (vu_dev->req_fd >= 0) { 1094 um_free_irq(vu_dev->irq, vu_dev); 1095 os_close_file(vu_dev->req_fd); 1096 } 1097 1098 os_close_file(vu_dev->sock); 1099 kfree(vu_dev); 1100 } 1101 1102 void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev, 1103 bool no_vq_suspend) 1104 { 1105 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 1106 1107 if (WARN_ON(vdev->config != &virtio_uml_config_ops)) 1108 return; 1109 1110 vu_dev->no_vq_suspend = no_vq_suspend; 1111 dev_info(&vdev->dev, "%sabled VQ suspend\n", 1112 no_vq_suspend ? "dis" : "en"); 1113 } 1114 1115 /* Platform device */ 1116 1117 static int virtio_uml_probe(struct platform_device *pdev) 1118 { 1119 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data; 1120 struct virtio_uml_device *vu_dev; 1121 int rc; 1122 1123 if (!pdata) 1124 return -EINVAL; 1125 1126 vu_dev = kzalloc(sizeof(*vu_dev), GFP_KERNEL); 1127 if (!vu_dev) 1128 return -ENOMEM; 1129 1130 vu_dev->vdev.dev.parent = &pdev->dev; 1131 vu_dev->vdev.dev.release = virtio_uml_release_dev; 1132 vu_dev->vdev.config = &virtio_uml_config_ops; 1133 vu_dev->vdev.id.device = pdata->virtio_device_id; 1134 vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID; 1135 vu_dev->pdev = pdev; 1136 vu_dev->req_fd = -1; 1137 1138 do { 1139 rc = os_connect_socket(pdata->socket_path); 1140 } while (rc == -EINTR); 1141 if (rc < 0) 1142 return rc; 1143 vu_dev->sock = rc; 1144 1145 spin_lock_init(&vu_dev->sock_lock); 1146 1147 rc = vhost_user_init(vu_dev); 1148 if (rc) 1149 goto error_init; 1150 1151 platform_set_drvdata(pdev, vu_dev); 1152 1153 device_set_wakeup_capable(&vu_dev->vdev.dev, true); 1154 1155 rc = register_virtio_device(&vu_dev->vdev); 1156 if (rc) 1157 put_device(&vu_dev->vdev.dev); 1158 vu_dev->registered = 1; 1159 return rc; 1160 1161 error_init: 1162 os_close_file(vu_dev->sock); 1163 return rc; 1164 } 1165 1166 static int virtio_uml_remove(struct platform_device *pdev) 1167 { 1168 struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); 1169 1170 unregister_virtio_device(&vu_dev->vdev); 1171 return 0; 1172 } 1173 1174 /* Command line device list */ 1175 1176 static void vu_cmdline_release_dev(struct device *d) 1177 { 1178 } 1179 1180 static struct device vu_cmdline_parent = { 1181 .init_name = "virtio-uml-cmdline", 1182 .release = vu_cmdline_release_dev, 1183 }; 1184 1185 static bool vu_cmdline_parent_registered; 1186 static int vu_cmdline_id; 1187 1188 static int vu_unregister_cmdline_device(struct device *dev, void *data) 1189 { 1190 struct platform_device *pdev = to_platform_device(dev); 1191 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data; 1192 1193 kfree(pdata->socket_path); 1194 platform_device_unregister(pdev); 1195 return 0; 1196 } 1197 1198 static void vu_conn_broken(struct work_struct *wk) 1199 { 1200 struct virtio_uml_platform_data *pdata; 1201 1202 pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk); 1203 vu_unregister_cmdline_device(&pdata->pdev->dev, NULL); 1204 } 1205 1206 static int vu_cmdline_set(const char *device, const struct kernel_param *kp) 1207 { 1208 const char *ids = strchr(device, ':'); 1209 unsigned int virtio_device_id; 1210 int processed, consumed, err; 1211 char *socket_path; 1212 struct virtio_uml_platform_data pdata, *ppdata; 1213 struct platform_device *pdev; 1214 1215 if (!ids || ids == device) 1216 return -EINVAL; 1217 1218 processed = sscanf(ids, ":%u%n:%d%n", 1219 &virtio_device_id, &consumed, 1220 &vu_cmdline_id, &consumed); 1221 1222 if (processed < 1 || ids[consumed]) 1223 return -EINVAL; 1224 1225 if (!vu_cmdline_parent_registered) { 1226 err = device_register(&vu_cmdline_parent); 1227 if (err) { 1228 pr_err("Failed to register parent device!\n"); 1229 put_device(&vu_cmdline_parent); 1230 return err; 1231 } 1232 vu_cmdline_parent_registered = true; 1233 } 1234 1235 socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL); 1236 if (!socket_path) 1237 return -ENOMEM; 1238 1239 pdata.virtio_device_id = (u32) virtio_device_id; 1240 pdata.socket_path = socket_path; 1241 1242 pr_info("Registering device virtio-uml.%d id=%d at %s\n", 1243 vu_cmdline_id, virtio_device_id, socket_path); 1244 1245 pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml", 1246 vu_cmdline_id++, &pdata, 1247 sizeof(pdata)); 1248 err = PTR_ERR_OR_ZERO(pdev); 1249 if (err) 1250 goto free; 1251 1252 ppdata = pdev->dev.platform_data; 1253 ppdata->pdev = pdev; 1254 INIT_WORK(&ppdata->conn_broken_wk, vu_conn_broken); 1255 1256 return 0; 1257 1258 free: 1259 kfree(socket_path); 1260 return err; 1261 } 1262 1263 static int vu_cmdline_get_device(struct device *dev, void *data) 1264 { 1265 struct platform_device *pdev = to_platform_device(dev); 1266 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data; 1267 char *buffer = data; 1268 unsigned int len = strlen(buffer); 1269 1270 snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n", 1271 pdata->socket_path, pdata->virtio_device_id, pdev->id); 1272 return 0; 1273 } 1274 1275 static int vu_cmdline_get(char *buffer, const struct kernel_param *kp) 1276 { 1277 buffer[0] = '\0'; 1278 if (vu_cmdline_parent_registered) 1279 device_for_each_child(&vu_cmdline_parent, buffer, 1280 vu_cmdline_get_device); 1281 return strlen(buffer) + 1; 1282 } 1283 1284 static const struct kernel_param_ops vu_cmdline_param_ops = { 1285 .set = vu_cmdline_set, 1286 .get = vu_cmdline_get, 1287 }; 1288 1289 device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR); 1290 __uml_help(vu_cmdline_param_ops, 1291 "virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n" 1292 " Configure a virtio device over a vhost-user socket.\n" 1293 " See virtio_ids.h for a list of possible virtio device id values.\n" 1294 " Optionally use a specific platform_device id.\n\n" 1295 ); 1296 1297 1298 static void vu_unregister_cmdline_devices(void) 1299 { 1300 if (vu_cmdline_parent_registered) { 1301 device_for_each_child(&vu_cmdline_parent, NULL, 1302 vu_unregister_cmdline_device); 1303 device_unregister(&vu_cmdline_parent); 1304 vu_cmdline_parent_registered = false; 1305 } 1306 } 1307 1308 /* Platform driver */ 1309 1310 static const struct of_device_id virtio_uml_match[] = { 1311 { .compatible = "virtio,uml", }, 1312 { } 1313 }; 1314 MODULE_DEVICE_TABLE(of, virtio_uml_match); 1315 1316 static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state) 1317 { 1318 struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); 1319 1320 if (!vu_dev->no_vq_suspend) { 1321 struct virtqueue *vq; 1322 1323 virtio_device_for_each_vq((&vu_dev->vdev), vq) { 1324 struct virtio_uml_vq_info *info = vq->priv; 1325 1326 info->suspended = true; 1327 vhost_user_set_vring_enable(vu_dev, vq->index, false); 1328 } 1329 } 1330 1331 if (!device_may_wakeup(&vu_dev->vdev.dev)) { 1332 vu_dev->suspended = true; 1333 return 0; 1334 } 1335 1336 return irq_set_irq_wake(vu_dev->irq, 1); 1337 } 1338 1339 static int virtio_uml_resume(struct platform_device *pdev) 1340 { 1341 struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); 1342 1343 if (!vu_dev->no_vq_suspend) { 1344 struct virtqueue *vq; 1345 1346 virtio_device_for_each_vq((&vu_dev->vdev), vq) { 1347 struct virtio_uml_vq_info *info = vq->priv; 1348 1349 info->suspended = false; 1350 vhost_user_set_vring_enable(vu_dev, vq->index, true); 1351 } 1352 } 1353 1354 vu_dev->suspended = false; 1355 1356 if (!device_may_wakeup(&vu_dev->vdev.dev)) 1357 return 0; 1358 1359 return irq_set_irq_wake(vu_dev->irq, 0); 1360 } 1361 1362 static struct platform_driver virtio_uml_driver = { 1363 .probe = virtio_uml_probe, 1364 .remove = virtio_uml_remove, 1365 .driver = { 1366 .name = "virtio-uml", 1367 .of_match_table = virtio_uml_match, 1368 }, 1369 .suspend = virtio_uml_suspend, 1370 .resume = virtio_uml_resume, 1371 }; 1372 1373 static int __init virtio_uml_init(void) 1374 { 1375 return platform_driver_register(&virtio_uml_driver); 1376 } 1377 1378 static void __exit virtio_uml_exit(void) 1379 { 1380 platform_driver_unregister(&virtio_uml_driver); 1381 vu_unregister_cmdline_devices(); 1382 } 1383 1384 module_init(virtio_uml_init); 1385 module_exit(virtio_uml_exit); 1386 __uml_exitcall(virtio_uml_exit); 1387 1388 MODULE_DESCRIPTION("UML driver for vhost-user virtio devices"); 1389 MODULE_LICENSE("GPL"); 1390