1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Virtio vhost-user driver 4 * 5 * Copyright(c) 2019 Intel Corporation 6 * 7 * This driver allows virtio devices to be used over a vhost-user socket. 8 * 9 * Guest devices can be instantiated by kernel module or command line 10 * parameters. One device will be created for each parameter. Syntax: 11 * 12 * virtio_uml.device=<socket>:<virtio_id>[:<platform_id>] 13 * where: 14 * <socket> := vhost-user socket path to connect 15 * <virtio_id> := virtio device id (as in virtio_ids.h) 16 * <platform_id> := (optional) platform device id 17 * 18 * example: 19 * virtio_uml.device=/var/uml.socket:1 20 * 21 * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd. 22 */ 23 #include <linux/module.h> 24 #include <linux/platform_device.h> 25 #include <linux/slab.h> 26 #include <linux/virtio.h> 27 #include <linux/virtio_config.h> 28 #include <linux/virtio_ring.h> 29 #include <linux/time-internal.h> 30 #include <shared/as-layout.h> 31 #include <irq_kern.h> 32 #include <init.h> 33 #include <os.h> 34 #include "vhost_user.h" 35 36 #define MAX_SUPPORTED_QUEUE_SIZE 256 37 38 #define to_virtio_uml_device(_vdev) \ 39 container_of(_vdev, struct virtio_uml_device, vdev) 40 41 struct virtio_uml_platform_data { 42 u32 virtio_device_id; 43 const char *socket_path; 44 struct work_struct conn_broken_wk; 45 struct platform_device *pdev; 46 }; 47 48 struct virtio_uml_device { 49 struct virtio_device vdev; 50 struct platform_device *pdev; 51 52 spinlock_t sock_lock; 53 int sock, req_fd, irq; 54 u64 features; 55 u64 protocol_features; 56 u8 status; 57 u8 registered:1; 58 u8 suspended:1; 59 60 u8 config_changed_irq:1; 61 uint64_t vq_irq_vq_map; 62 }; 63 64 struct virtio_uml_vq_info { 65 int kick_fd, call_fd; 66 char name[32]; 67 bool suspended; 68 }; 69 70 extern unsigned long long physmem_size, highmem; 71 72 #define vu_err(vu_dev, ...) dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__) 73 74 /* Vhost-user protocol */ 75 76 static int full_sendmsg_fds(int fd, const void *buf, unsigned int len, 77 const int *fds, unsigned int fds_num) 78 { 79 int rc; 80 81 do { 82 rc = os_sendmsg_fds(fd, buf, len, fds, fds_num); 83 if (rc > 0) { 84 buf += rc; 85 len -= rc; 86 fds = NULL; 87 fds_num = 0; 88 } 89 } while (len && (rc >= 0 || rc == -EINTR)); 90 91 if (rc < 0) 92 return rc; 93 return 0; 94 } 95 96 static int full_read(int fd, void *buf, int len, bool abortable) 97 { 98 int rc; 99 100 if (!len) 101 return 0; 102 103 do { 104 rc = os_read_file(fd, buf, len); 105 if (rc > 0) { 106 buf += rc; 107 len -= rc; 108 } 109 } while (len && (rc > 0 || rc == -EINTR || (!abortable && rc == -EAGAIN))); 110 111 if (rc < 0) 112 return rc; 113 if (rc == 0) 114 return -ECONNRESET; 115 return 0; 116 } 117 118 static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg) 119 { 120 return full_read(fd, msg, sizeof(msg->header), true); 121 } 122 123 static int vhost_user_recv(struct virtio_uml_device *vu_dev, 124 int fd, struct vhost_user_msg *msg, 125 size_t max_payload_size, bool wait) 126 { 127 size_t size; 128 int rc; 129 130 /* 131 * In virtio time-travel mode, we're handling all the vhost-user 132 * FDs by polling them whenever appropriate. However, we may get 133 * into a situation where we're sending out an interrupt message 134 * to a device (e.g. a net device) and need to handle a simulation 135 * time message while doing so, e.g. one that tells us to update 136 * our idea of how long we can run without scheduling. 137 * 138 * Thus, we need to not just read() from the given fd, but need 139 * to also handle messages for the simulation time - this function 140 * does that for us while waiting for the given fd to be readable. 141 */ 142 if (wait) 143 time_travel_wait_readable(fd); 144 145 rc = vhost_user_recv_header(fd, msg); 146 147 if (rc == -ECONNRESET && vu_dev->registered) { 148 struct virtio_uml_platform_data *pdata; 149 150 pdata = vu_dev->pdev->dev.platform_data; 151 152 virtio_break_device(&vu_dev->vdev); 153 schedule_work(&pdata->conn_broken_wk); 154 } 155 if (rc) 156 return rc; 157 size = msg->header.size; 158 if (size > max_payload_size) 159 return -EPROTO; 160 return full_read(fd, &msg->payload, size, false); 161 } 162 163 static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev, 164 struct vhost_user_msg *msg, 165 size_t max_payload_size) 166 { 167 int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg, 168 max_payload_size, true); 169 170 if (rc) 171 return rc; 172 173 if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION)) 174 return -EPROTO; 175 176 return 0; 177 } 178 179 static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev, 180 u64 *value) 181 { 182 struct vhost_user_msg msg; 183 int rc = vhost_user_recv_resp(vu_dev, &msg, 184 sizeof(msg.payload.integer)); 185 186 if (rc) 187 return rc; 188 if (msg.header.size != sizeof(msg.payload.integer)) 189 return -EPROTO; 190 *value = msg.payload.integer; 191 return 0; 192 } 193 194 static int vhost_user_recv_req(struct virtio_uml_device *vu_dev, 195 struct vhost_user_msg *msg, 196 size_t max_payload_size) 197 { 198 int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg, 199 max_payload_size, false); 200 201 if (rc) 202 return rc; 203 204 if ((msg->header.flags & ~VHOST_USER_FLAG_NEED_REPLY) != 205 VHOST_USER_VERSION) 206 return -EPROTO; 207 208 return 0; 209 } 210 211 static int vhost_user_send(struct virtio_uml_device *vu_dev, 212 bool need_response, struct vhost_user_msg *msg, 213 int *fds, size_t num_fds) 214 { 215 size_t size = sizeof(msg->header) + msg->header.size; 216 unsigned long flags; 217 bool request_ack; 218 int rc; 219 220 msg->header.flags |= VHOST_USER_VERSION; 221 222 /* 223 * The need_response flag indicates that we already need a response, 224 * e.g. to read the features. In these cases, don't request an ACK as 225 * it is meaningless. Also request an ACK only if supported. 226 */ 227 request_ack = !need_response; 228 if (!(vu_dev->protocol_features & 229 BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK))) 230 request_ack = false; 231 232 if (request_ack) 233 msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY; 234 235 spin_lock_irqsave(&vu_dev->sock_lock, flags); 236 rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds); 237 if (rc < 0) 238 goto out; 239 240 if (request_ack) { 241 uint64_t status; 242 243 rc = vhost_user_recv_u64(vu_dev, &status); 244 if (rc) 245 goto out; 246 247 if (status) { 248 vu_err(vu_dev, "slave reports error: %llu\n", status); 249 rc = -EIO; 250 goto out; 251 } 252 } 253 254 out: 255 spin_unlock_irqrestore(&vu_dev->sock_lock, flags); 256 return rc; 257 } 258 259 static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev, 260 bool need_response, u32 request) 261 { 262 struct vhost_user_msg msg = { 263 .header.request = request, 264 }; 265 266 return vhost_user_send(vu_dev, need_response, &msg, NULL, 0); 267 } 268 269 static int vhost_user_send_no_payload_fd(struct virtio_uml_device *vu_dev, 270 u32 request, int fd) 271 { 272 struct vhost_user_msg msg = { 273 .header.request = request, 274 }; 275 276 return vhost_user_send(vu_dev, false, &msg, &fd, 1); 277 } 278 279 static int vhost_user_send_u64(struct virtio_uml_device *vu_dev, 280 u32 request, u64 value) 281 { 282 struct vhost_user_msg msg = { 283 .header.request = request, 284 .header.size = sizeof(msg.payload.integer), 285 .payload.integer = value, 286 }; 287 288 return vhost_user_send(vu_dev, false, &msg, NULL, 0); 289 } 290 291 static int vhost_user_set_owner(struct virtio_uml_device *vu_dev) 292 { 293 return vhost_user_send_no_payload(vu_dev, false, VHOST_USER_SET_OWNER); 294 } 295 296 static int vhost_user_get_features(struct virtio_uml_device *vu_dev, 297 u64 *features) 298 { 299 int rc = vhost_user_send_no_payload(vu_dev, true, 300 VHOST_USER_GET_FEATURES); 301 302 if (rc) 303 return rc; 304 return vhost_user_recv_u64(vu_dev, features); 305 } 306 307 static int vhost_user_set_features(struct virtio_uml_device *vu_dev, 308 u64 features) 309 { 310 return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features); 311 } 312 313 static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev, 314 u64 *protocol_features) 315 { 316 int rc = vhost_user_send_no_payload(vu_dev, true, 317 VHOST_USER_GET_PROTOCOL_FEATURES); 318 319 if (rc) 320 return rc; 321 return vhost_user_recv_u64(vu_dev, protocol_features); 322 } 323 324 static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev, 325 u64 protocol_features) 326 { 327 return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES, 328 protocol_features); 329 } 330 331 static void vhost_user_reply(struct virtio_uml_device *vu_dev, 332 struct vhost_user_msg *msg, int response) 333 { 334 struct vhost_user_msg reply = { 335 .payload.integer = response, 336 }; 337 size_t size = sizeof(reply.header) + sizeof(reply.payload.integer); 338 int rc; 339 340 reply.header = msg->header; 341 reply.header.flags &= ~VHOST_USER_FLAG_NEED_REPLY; 342 reply.header.flags |= VHOST_USER_FLAG_REPLY; 343 reply.header.size = sizeof(reply.payload.integer); 344 345 rc = full_sendmsg_fds(vu_dev->req_fd, &reply, size, NULL, 0); 346 347 if (rc) 348 vu_err(vu_dev, 349 "sending reply to slave request failed: %d (size %zu)\n", 350 rc, size); 351 } 352 353 static irqreturn_t vu_req_read_message(struct virtio_uml_device *vu_dev, 354 struct time_travel_event *ev) 355 { 356 struct virtqueue *vq; 357 int response = 1; 358 struct { 359 struct vhost_user_msg msg; 360 u8 extra_payload[512]; 361 } msg; 362 int rc; 363 364 rc = vhost_user_recv_req(vu_dev, &msg.msg, 365 sizeof(msg.msg.payload) + 366 sizeof(msg.extra_payload)); 367 368 if (rc) 369 return IRQ_NONE; 370 371 switch (msg.msg.header.request) { 372 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG: 373 vu_dev->config_changed_irq = true; 374 response = 0; 375 break; 376 case VHOST_USER_SLAVE_VRING_CALL: 377 virtio_device_for_each_vq((&vu_dev->vdev), vq) { 378 if (vq->index == msg.msg.payload.vring_state.index) { 379 response = 0; 380 vu_dev->vq_irq_vq_map |= BIT_ULL(vq->index); 381 break; 382 } 383 } 384 break; 385 case VHOST_USER_SLAVE_IOTLB_MSG: 386 /* not supported - VIRTIO_F_ACCESS_PLATFORM */ 387 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: 388 /* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */ 389 default: 390 vu_err(vu_dev, "unexpected slave request %d\n", 391 msg.msg.header.request); 392 } 393 394 if (ev && !vu_dev->suspended) 395 time_travel_add_irq_event(ev); 396 397 if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY) 398 vhost_user_reply(vu_dev, &msg.msg, response); 399 400 return IRQ_HANDLED; 401 } 402 403 static irqreturn_t vu_req_interrupt(int irq, void *data) 404 { 405 struct virtio_uml_device *vu_dev = data; 406 irqreturn_t ret = IRQ_HANDLED; 407 408 if (!um_irq_timetravel_handler_used()) 409 ret = vu_req_read_message(vu_dev, NULL); 410 411 if (vu_dev->vq_irq_vq_map) { 412 struct virtqueue *vq; 413 414 virtio_device_for_each_vq((&vu_dev->vdev), vq) { 415 if (vu_dev->vq_irq_vq_map & BIT_ULL(vq->index)) 416 vring_interrupt(0 /* ignored */, vq); 417 } 418 vu_dev->vq_irq_vq_map = 0; 419 } else if (vu_dev->config_changed_irq) { 420 virtio_config_changed(&vu_dev->vdev); 421 vu_dev->config_changed_irq = false; 422 } 423 424 return ret; 425 } 426 427 static void vu_req_interrupt_comm_handler(int irq, int fd, void *data, 428 struct time_travel_event *ev) 429 { 430 vu_req_read_message(data, ev); 431 } 432 433 static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev) 434 { 435 int rc, req_fds[2]; 436 437 /* Use a pipe for slave req fd, SIGIO is not supported for eventfd */ 438 rc = os_pipe(req_fds, true, true); 439 if (rc < 0) 440 return rc; 441 vu_dev->req_fd = req_fds[0]; 442 443 rc = um_request_irq_tt(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ, 444 vu_req_interrupt, IRQF_SHARED, 445 vu_dev->pdev->name, vu_dev, 446 vu_req_interrupt_comm_handler); 447 if (rc < 0) 448 goto err_close; 449 450 vu_dev->irq = rc; 451 452 rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD, 453 req_fds[1]); 454 if (rc) 455 goto err_free_irq; 456 457 goto out; 458 459 err_free_irq: 460 um_free_irq(vu_dev->irq, vu_dev); 461 err_close: 462 os_close_file(req_fds[0]); 463 out: 464 /* Close unused write end of request fds */ 465 os_close_file(req_fds[1]); 466 return rc; 467 } 468 469 static int vhost_user_init(struct virtio_uml_device *vu_dev) 470 { 471 int rc = vhost_user_set_owner(vu_dev); 472 473 if (rc) 474 return rc; 475 rc = vhost_user_get_features(vu_dev, &vu_dev->features); 476 if (rc) 477 return rc; 478 479 if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) { 480 rc = vhost_user_get_protocol_features(vu_dev, 481 &vu_dev->protocol_features); 482 if (rc) 483 return rc; 484 vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F; 485 rc = vhost_user_set_protocol_features(vu_dev, 486 vu_dev->protocol_features); 487 if (rc) 488 return rc; 489 } 490 491 if (vu_dev->protocol_features & 492 BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { 493 rc = vhost_user_init_slave_req(vu_dev); 494 if (rc) 495 return rc; 496 } 497 498 return 0; 499 } 500 501 static void vhost_user_get_config(struct virtio_uml_device *vu_dev, 502 u32 offset, void *buf, u32 len) 503 { 504 u32 cfg_size = offset + len; 505 struct vhost_user_msg *msg; 506 size_t payload_size = sizeof(msg->payload.config) + cfg_size; 507 size_t msg_size = sizeof(msg->header) + payload_size; 508 int rc; 509 510 if (!(vu_dev->protocol_features & 511 BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG))) 512 return; 513 514 msg = kzalloc(msg_size, GFP_KERNEL); 515 if (!msg) 516 return; 517 msg->header.request = VHOST_USER_GET_CONFIG; 518 msg->header.size = payload_size; 519 msg->payload.config.offset = 0; 520 msg->payload.config.size = cfg_size; 521 522 rc = vhost_user_send(vu_dev, true, msg, NULL, 0); 523 if (rc) { 524 vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n", 525 rc); 526 goto free; 527 } 528 529 rc = vhost_user_recv_resp(vu_dev, msg, msg_size); 530 if (rc) { 531 vu_err(vu_dev, 532 "receiving VHOST_USER_GET_CONFIG response failed: %d\n", 533 rc); 534 goto free; 535 } 536 537 if (msg->header.size != payload_size || 538 msg->payload.config.size != cfg_size) { 539 rc = -EPROTO; 540 vu_err(vu_dev, 541 "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n", 542 msg->header.size, payload_size, 543 msg->payload.config.size, cfg_size); 544 goto free; 545 } 546 memcpy(buf, msg->payload.config.payload + offset, len); 547 548 free: 549 kfree(msg); 550 } 551 552 static void vhost_user_set_config(struct virtio_uml_device *vu_dev, 553 u32 offset, const void *buf, u32 len) 554 { 555 struct vhost_user_msg *msg; 556 size_t payload_size = sizeof(msg->payload.config) + len; 557 size_t msg_size = sizeof(msg->header) + payload_size; 558 int rc; 559 560 if (!(vu_dev->protocol_features & 561 BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG))) 562 return; 563 564 msg = kzalloc(msg_size, GFP_KERNEL); 565 if (!msg) 566 return; 567 msg->header.request = VHOST_USER_SET_CONFIG; 568 msg->header.size = payload_size; 569 msg->payload.config.offset = offset; 570 msg->payload.config.size = len; 571 memcpy(msg->payload.config.payload, buf, len); 572 573 rc = vhost_user_send(vu_dev, false, msg, NULL, 0); 574 if (rc) 575 vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n", 576 rc); 577 578 kfree(msg); 579 } 580 581 static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out, 582 struct vhost_user_mem_region *region_out) 583 { 584 unsigned long long mem_offset; 585 int rc = phys_mapping(addr, &mem_offset); 586 587 if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc)) 588 return -EFAULT; 589 *fd_out = rc; 590 region_out->guest_addr = addr; 591 region_out->user_addr = addr; 592 region_out->size = size; 593 region_out->mmap_offset = mem_offset; 594 595 /* Ensure mapping is valid for the entire region */ 596 rc = phys_mapping(addr + size - 1, &mem_offset); 597 if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n", 598 addr + size - 1, rc, *fd_out)) 599 return -EFAULT; 600 return 0; 601 } 602 603 static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev) 604 { 605 struct vhost_user_msg msg = { 606 .header.request = VHOST_USER_SET_MEM_TABLE, 607 .header.size = sizeof(msg.payload.mem_regions), 608 .payload.mem_regions.num = 1, 609 }; 610 unsigned long reserved = uml_reserved - uml_physmem; 611 int fds[2]; 612 int rc; 613 614 /* 615 * This is a bit tricky, see also the comment with setup_physmem(). 616 * 617 * Essentially, setup_physmem() uses a file to mmap() our physmem, 618 * but the code and data we *already* have is omitted. To us, this 619 * is no difference, since they both become part of our address 620 * space and memory consumption. To somebody looking in from the 621 * outside, however, it is different because the part of our memory 622 * consumption that's already part of the binary (code/data) is not 623 * mapped from the file, so it's not visible to another mmap from 624 * the file descriptor. 625 * 626 * Thus, don't advertise this space to the vhost-user slave. This 627 * means that the slave will likely abort or similar when we give 628 * it an address from the hidden range, since it's not marked as 629 * a valid address, but at least that way we detect the issue and 630 * don't just have the slave read an all-zeroes buffer from the 631 * shared memory file, or write something there that we can never 632 * see (depending on the direction of the virtqueue traffic.) 633 * 634 * Since we usually don't want to use .text for virtio buffers, 635 * this effectively means that you cannot use 636 * 1) global variables, which are in the .bss and not in the shm 637 * file-backed memory 638 * 2) the stack in some processes, depending on where they have 639 * their stack (or maybe only no interrupt stack?) 640 * 641 * The stack is already not typically valid for DMA, so this isn't 642 * much of a restriction, but global variables might be encountered. 643 * 644 * It might be possible to fix it by copying around the data that's 645 * between bss_start and where we map the file now, but it's not 646 * something that you typically encounter with virtio drivers, so 647 * it didn't seem worthwhile. 648 */ 649 rc = vhost_user_init_mem_region(reserved, physmem_size - reserved, 650 &fds[0], 651 &msg.payload.mem_regions.regions[0]); 652 653 if (rc < 0) 654 return rc; 655 if (highmem) { 656 msg.payload.mem_regions.num++; 657 rc = vhost_user_init_mem_region(__pa(end_iomem), highmem, 658 &fds[1], &msg.payload.mem_regions.regions[1]); 659 if (rc < 0) 660 return rc; 661 } 662 663 return vhost_user_send(vu_dev, false, &msg, fds, 664 msg.payload.mem_regions.num); 665 } 666 667 static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev, 668 u32 request, u32 index, u32 num) 669 { 670 struct vhost_user_msg msg = { 671 .header.request = request, 672 .header.size = sizeof(msg.payload.vring_state), 673 .payload.vring_state.index = index, 674 .payload.vring_state.num = num, 675 }; 676 677 return vhost_user_send(vu_dev, false, &msg, NULL, 0); 678 } 679 680 static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev, 681 u32 index, u32 num) 682 { 683 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM, 684 index, num); 685 } 686 687 static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev, 688 u32 index, u32 offset) 689 { 690 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE, 691 index, offset); 692 } 693 694 static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev, 695 u32 index, u64 desc, u64 used, u64 avail, 696 u64 log) 697 { 698 struct vhost_user_msg msg = { 699 .header.request = VHOST_USER_SET_VRING_ADDR, 700 .header.size = sizeof(msg.payload.vring_addr), 701 .payload.vring_addr.index = index, 702 .payload.vring_addr.desc = desc, 703 .payload.vring_addr.used = used, 704 .payload.vring_addr.avail = avail, 705 .payload.vring_addr.log = log, 706 }; 707 708 return vhost_user_send(vu_dev, false, &msg, NULL, 0); 709 } 710 711 static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev, 712 u32 request, int index, int fd) 713 { 714 struct vhost_user_msg msg = { 715 .header.request = request, 716 .header.size = sizeof(msg.payload.integer), 717 .payload.integer = index, 718 }; 719 720 if (index & ~VHOST_USER_VRING_INDEX_MASK) 721 return -EINVAL; 722 if (fd < 0) { 723 msg.payload.integer |= VHOST_USER_VRING_POLL_MASK; 724 return vhost_user_send(vu_dev, false, &msg, NULL, 0); 725 } 726 return vhost_user_send(vu_dev, false, &msg, &fd, 1); 727 } 728 729 static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev, 730 int index, int fd) 731 { 732 return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL, 733 index, fd); 734 } 735 736 static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev, 737 int index, int fd) 738 { 739 return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK, 740 index, fd); 741 } 742 743 static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev, 744 u32 index, bool enable) 745 { 746 if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES))) 747 return 0; 748 749 return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE, 750 index, enable); 751 } 752 753 754 /* Virtio interface */ 755 756 static bool vu_notify(struct virtqueue *vq) 757 { 758 struct virtio_uml_vq_info *info = vq->priv; 759 const uint64_t n = 1; 760 int rc; 761 762 if (info->suspended) 763 return true; 764 765 time_travel_propagate_time(); 766 767 if (info->kick_fd < 0) { 768 struct virtio_uml_device *vu_dev; 769 770 vu_dev = to_virtio_uml_device(vq->vdev); 771 772 return vhost_user_set_vring_state(vu_dev, VHOST_USER_VRING_KICK, 773 vq->index, 0) == 0; 774 } 775 776 do { 777 rc = os_write_file(info->kick_fd, &n, sizeof(n)); 778 } while (rc == -EINTR); 779 return !WARN(rc != sizeof(n), "write returned %d\n", rc); 780 } 781 782 static irqreturn_t vu_interrupt(int irq, void *opaque) 783 { 784 struct virtqueue *vq = opaque; 785 struct virtio_uml_vq_info *info = vq->priv; 786 uint64_t n; 787 int rc; 788 irqreturn_t ret = IRQ_NONE; 789 790 do { 791 rc = os_read_file(info->call_fd, &n, sizeof(n)); 792 if (rc == sizeof(n)) 793 ret |= vring_interrupt(irq, vq); 794 } while (rc == sizeof(n) || rc == -EINTR); 795 WARN(rc != -EAGAIN, "read returned %d\n", rc); 796 return ret; 797 } 798 799 800 static void vu_get(struct virtio_device *vdev, unsigned offset, 801 void *buf, unsigned len) 802 { 803 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 804 805 vhost_user_get_config(vu_dev, offset, buf, len); 806 } 807 808 static void vu_set(struct virtio_device *vdev, unsigned offset, 809 const void *buf, unsigned len) 810 { 811 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 812 813 vhost_user_set_config(vu_dev, offset, buf, len); 814 } 815 816 static u8 vu_get_status(struct virtio_device *vdev) 817 { 818 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 819 820 return vu_dev->status; 821 } 822 823 static void vu_set_status(struct virtio_device *vdev, u8 status) 824 { 825 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 826 827 vu_dev->status = status; 828 } 829 830 static void vu_reset(struct virtio_device *vdev) 831 { 832 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 833 834 vu_dev->status = 0; 835 } 836 837 static void vu_del_vq(struct virtqueue *vq) 838 { 839 struct virtio_uml_vq_info *info = vq->priv; 840 841 if (info->call_fd >= 0) { 842 struct virtio_uml_device *vu_dev; 843 844 vu_dev = to_virtio_uml_device(vq->vdev); 845 846 um_free_irq(vu_dev->irq, vq); 847 os_close_file(info->call_fd); 848 } 849 850 if (info->kick_fd >= 0) 851 os_close_file(info->kick_fd); 852 853 vring_del_virtqueue(vq); 854 kfree(info); 855 } 856 857 static void vu_del_vqs(struct virtio_device *vdev) 858 { 859 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 860 struct virtqueue *vq, *n; 861 u64 features; 862 863 /* Note: reverse order as a workaround to a decoding bug in snabb */ 864 list_for_each_entry_reverse(vq, &vdev->vqs, list) 865 WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false)); 866 867 /* Ensure previous messages have been processed */ 868 WARN_ON(vhost_user_get_features(vu_dev, &features)); 869 870 list_for_each_entry_safe(vq, n, &vdev->vqs, list) 871 vu_del_vq(vq); 872 } 873 874 static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev, 875 struct virtqueue *vq) 876 { 877 struct virtio_uml_vq_info *info = vq->priv; 878 int call_fds[2]; 879 int rc; 880 881 /* no call FD needed/desired in this case */ 882 if (vu_dev->protocol_features & 883 BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) && 884 vu_dev->protocol_features & 885 BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { 886 info->call_fd = -1; 887 return 0; 888 } 889 890 /* Use a pipe for call fd, since SIGIO is not supported for eventfd */ 891 rc = os_pipe(call_fds, true, true); 892 if (rc < 0) 893 return rc; 894 895 info->call_fd = call_fds[0]; 896 rc = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ, 897 vu_interrupt, IRQF_SHARED, info->name, vq); 898 if (rc < 0) 899 goto close_both; 900 901 rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]); 902 if (rc) 903 goto release_irq; 904 905 goto out; 906 907 release_irq: 908 um_free_irq(vu_dev->irq, vq); 909 close_both: 910 os_close_file(call_fds[0]); 911 out: 912 /* Close (unused) write end of call fds */ 913 os_close_file(call_fds[1]); 914 915 return rc; 916 } 917 918 static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, 919 unsigned index, vq_callback_t *callback, 920 const char *name, bool ctx) 921 { 922 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 923 struct platform_device *pdev = vu_dev->pdev; 924 struct virtio_uml_vq_info *info; 925 struct virtqueue *vq; 926 int num = MAX_SUPPORTED_QUEUE_SIZE; 927 int rc; 928 929 info = kzalloc(sizeof(*info), GFP_KERNEL); 930 if (!info) { 931 rc = -ENOMEM; 932 goto error_kzalloc; 933 } 934 snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name, 935 pdev->id, name); 936 937 vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true, 938 ctx, vu_notify, callback, info->name); 939 if (!vq) { 940 rc = -ENOMEM; 941 goto error_create; 942 } 943 vq->priv = info; 944 num = virtqueue_get_vring_size(vq); 945 946 if (vu_dev->protocol_features & 947 BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) { 948 info->kick_fd = -1; 949 } else { 950 rc = os_eventfd(0, 0); 951 if (rc < 0) 952 goto error_kick; 953 info->kick_fd = rc; 954 } 955 956 rc = vu_setup_vq_call_fd(vu_dev, vq); 957 if (rc) 958 goto error_call; 959 960 rc = vhost_user_set_vring_num(vu_dev, index, num); 961 if (rc) 962 goto error_setup; 963 964 rc = vhost_user_set_vring_base(vu_dev, index, 0); 965 if (rc) 966 goto error_setup; 967 968 rc = vhost_user_set_vring_addr(vu_dev, index, 969 virtqueue_get_desc_addr(vq), 970 virtqueue_get_used_addr(vq), 971 virtqueue_get_avail_addr(vq), 972 (u64) -1); 973 if (rc) 974 goto error_setup; 975 976 return vq; 977 978 error_setup: 979 if (info->call_fd >= 0) { 980 um_free_irq(vu_dev->irq, vq); 981 os_close_file(info->call_fd); 982 } 983 error_call: 984 if (info->kick_fd >= 0) 985 os_close_file(info->kick_fd); 986 error_kick: 987 vring_del_virtqueue(vq); 988 error_create: 989 kfree(info); 990 error_kzalloc: 991 return ERR_PTR(rc); 992 } 993 994 static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs, 995 struct virtqueue *vqs[], vq_callback_t *callbacks[], 996 const char * const names[], const bool *ctx, 997 struct irq_affinity *desc) 998 { 999 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 1000 int i, queue_idx = 0, rc; 1001 struct virtqueue *vq; 1002 1003 /* not supported for now */ 1004 if (WARN_ON(nvqs > 64)) 1005 return -EINVAL; 1006 1007 rc = vhost_user_set_mem_table(vu_dev); 1008 if (rc) 1009 return rc; 1010 1011 for (i = 0; i < nvqs; ++i) { 1012 if (!names[i]) { 1013 vqs[i] = NULL; 1014 continue; 1015 } 1016 1017 vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i], 1018 ctx ? ctx[i] : false); 1019 if (IS_ERR(vqs[i])) { 1020 rc = PTR_ERR(vqs[i]); 1021 goto error_setup; 1022 } 1023 } 1024 1025 list_for_each_entry(vq, &vdev->vqs, list) { 1026 struct virtio_uml_vq_info *info = vq->priv; 1027 1028 if (info->kick_fd >= 0) { 1029 rc = vhost_user_set_vring_kick(vu_dev, vq->index, 1030 info->kick_fd); 1031 if (rc) 1032 goto error_setup; 1033 } 1034 1035 rc = vhost_user_set_vring_enable(vu_dev, vq->index, true); 1036 if (rc) 1037 goto error_setup; 1038 } 1039 1040 return 0; 1041 1042 error_setup: 1043 vu_del_vqs(vdev); 1044 return rc; 1045 } 1046 1047 static u64 vu_get_features(struct virtio_device *vdev) 1048 { 1049 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 1050 1051 return vu_dev->features; 1052 } 1053 1054 static int vu_finalize_features(struct virtio_device *vdev) 1055 { 1056 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 1057 u64 supported = vdev->features & VHOST_USER_SUPPORTED_F; 1058 1059 vring_transport_features(vdev); 1060 vu_dev->features = vdev->features | supported; 1061 1062 return vhost_user_set_features(vu_dev, vu_dev->features); 1063 } 1064 1065 static const char *vu_bus_name(struct virtio_device *vdev) 1066 { 1067 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 1068 1069 return vu_dev->pdev->name; 1070 } 1071 1072 static const struct virtio_config_ops virtio_uml_config_ops = { 1073 .get = vu_get, 1074 .set = vu_set, 1075 .get_status = vu_get_status, 1076 .set_status = vu_set_status, 1077 .reset = vu_reset, 1078 .find_vqs = vu_find_vqs, 1079 .del_vqs = vu_del_vqs, 1080 .get_features = vu_get_features, 1081 .finalize_features = vu_finalize_features, 1082 .bus_name = vu_bus_name, 1083 }; 1084 1085 static void virtio_uml_release_dev(struct device *d) 1086 { 1087 struct virtio_device *vdev = 1088 container_of(d, struct virtio_device, dev); 1089 struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); 1090 1091 /* might not have been opened due to not negotiating the feature */ 1092 if (vu_dev->req_fd >= 0) { 1093 um_free_irq(vu_dev->irq, vu_dev); 1094 os_close_file(vu_dev->req_fd); 1095 } 1096 1097 os_close_file(vu_dev->sock); 1098 kfree(vu_dev); 1099 } 1100 1101 /* Platform device */ 1102 1103 static int virtio_uml_probe(struct platform_device *pdev) 1104 { 1105 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data; 1106 struct virtio_uml_device *vu_dev; 1107 int rc; 1108 1109 if (!pdata) 1110 return -EINVAL; 1111 1112 vu_dev = kzalloc(sizeof(*vu_dev), GFP_KERNEL); 1113 if (!vu_dev) 1114 return -ENOMEM; 1115 1116 vu_dev->vdev.dev.parent = &pdev->dev; 1117 vu_dev->vdev.dev.release = virtio_uml_release_dev; 1118 vu_dev->vdev.config = &virtio_uml_config_ops; 1119 vu_dev->vdev.id.device = pdata->virtio_device_id; 1120 vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID; 1121 vu_dev->pdev = pdev; 1122 vu_dev->req_fd = -1; 1123 1124 do { 1125 rc = os_connect_socket(pdata->socket_path); 1126 } while (rc == -EINTR); 1127 if (rc < 0) 1128 return rc; 1129 vu_dev->sock = rc; 1130 1131 spin_lock_init(&vu_dev->sock_lock); 1132 1133 rc = vhost_user_init(vu_dev); 1134 if (rc) 1135 goto error_init; 1136 1137 platform_set_drvdata(pdev, vu_dev); 1138 1139 device_set_wakeup_capable(&vu_dev->vdev.dev, true); 1140 1141 rc = register_virtio_device(&vu_dev->vdev); 1142 if (rc) 1143 put_device(&vu_dev->vdev.dev); 1144 vu_dev->registered = 1; 1145 return rc; 1146 1147 error_init: 1148 os_close_file(vu_dev->sock); 1149 return rc; 1150 } 1151 1152 static int virtio_uml_remove(struct platform_device *pdev) 1153 { 1154 struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); 1155 1156 unregister_virtio_device(&vu_dev->vdev); 1157 return 0; 1158 } 1159 1160 /* Command line device list */ 1161 1162 static void vu_cmdline_release_dev(struct device *d) 1163 { 1164 } 1165 1166 static struct device vu_cmdline_parent = { 1167 .init_name = "virtio-uml-cmdline", 1168 .release = vu_cmdline_release_dev, 1169 }; 1170 1171 static bool vu_cmdline_parent_registered; 1172 static int vu_cmdline_id; 1173 1174 static int vu_unregister_cmdline_device(struct device *dev, void *data) 1175 { 1176 struct platform_device *pdev = to_platform_device(dev); 1177 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data; 1178 1179 kfree(pdata->socket_path); 1180 platform_device_unregister(pdev); 1181 return 0; 1182 } 1183 1184 static void vu_conn_broken(struct work_struct *wk) 1185 { 1186 struct virtio_uml_platform_data *pdata; 1187 1188 pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk); 1189 vu_unregister_cmdline_device(&pdata->pdev->dev, NULL); 1190 } 1191 1192 static int vu_cmdline_set(const char *device, const struct kernel_param *kp) 1193 { 1194 const char *ids = strchr(device, ':'); 1195 unsigned int virtio_device_id; 1196 int processed, consumed, err; 1197 char *socket_path; 1198 struct virtio_uml_platform_data pdata, *ppdata; 1199 struct platform_device *pdev; 1200 1201 if (!ids || ids == device) 1202 return -EINVAL; 1203 1204 processed = sscanf(ids, ":%u%n:%d%n", 1205 &virtio_device_id, &consumed, 1206 &vu_cmdline_id, &consumed); 1207 1208 if (processed < 1 || ids[consumed]) 1209 return -EINVAL; 1210 1211 if (!vu_cmdline_parent_registered) { 1212 err = device_register(&vu_cmdline_parent); 1213 if (err) { 1214 pr_err("Failed to register parent device!\n"); 1215 put_device(&vu_cmdline_parent); 1216 return err; 1217 } 1218 vu_cmdline_parent_registered = true; 1219 } 1220 1221 socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL); 1222 if (!socket_path) 1223 return -ENOMEM; 1224 1225 pdata.virtio_device_id = (u32) virtio_device_id; 1226 pdata.socket_path = socket_path; 1227 1228 pr_info("Registering device virtio-uml.%d id=%d at %s\n", 1229 vu_cmdline_id, virtio_device_id, socket_path); 1230 1231 pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml", 1232 vu_cmdline_id++, &pdata, 1233 sizeof(pdata)); 1234 err = PTR_ERR_OR_ZERO(pdev); 1235 if (err) 1236 goto free; 1237 1238 ppdata = pdev->dev.platform_data; 1239 ppdata->pdev = pdev; 1240 INIT_WORK(&ppdata->conn_broken_wk, vu_conn_broken); 1241 1242 return 0; 1243 1244 free: 1245 kfree(socket_path); 1246 return err; 1247 } 1248 1249 static int vu_cmdline_get_device(struct device *dev, void *data) 1250 { 1251 struct platform_device *pdev = to_platform_device(dev); 1252 struct virtio_uml_platform_data *pdata = pdev->dev.platform_data; 1253 char *buffer = data; 1254 unsigned int len = strlen(buffer); 1255 1256 snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n", 1257 pdata->socket_path, pdata->virtio_device_id, pdev->id); 1258 return 0; 1259 } 1260 1261 static int vu_cmdline_get(char *buffer, const struct kernel_param *kp) 1262 { 1263 buffer[0] = '\0'; 1264 if (vu_cmdline_parent_registered) 1265 device_for_each_child(&vu_cmdline_parent, buffer, 1266 vu_cmdline_get_device); 1267 return strlen(buffer) + 1; 1268 } 1269 1270 static const struct kernel_param_ops vu_cmdline_param_ops = { 1271 .set = vu_cmdline_set, 1272 .get = vu_cmdline_get, 1273 }; 1274 1275 device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR); 1276 __uml_help(vu_cmdline_param_ops, 1277 "virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n" 1278 " Configure a virtio device over a vhost-user socket.\n" 1279 " See virtio_ids.h for a list of possible virtio device id values.\n" 1280 " Optionally use a specific platform_device id.\n\n" 1281 ); 1282 1283 1284 static void vu_unregister_cmdline_devices(void) 1285 { 1286 if (vu_cmdline_parent_registered) { 1287 device_for_each_child(&vu_cmdline_parent, NULL, 1288 vu_unregister_cmdline_device); 1289 device_unregister(&vu_cmdline_parent); 1290 vu_cmdline_parent_registered = false; 1291 } 1292 } 1293 1294 /* Platform driver */ 1295 1296 static const struct of_device_id virtio_uml_match[] = { 1297 { .compatible = "virtio,uml", }, 1298 { } 1299 }; 1300 MODULE_DEVICE_TABLE(of, virtio_uml_match); 1301 1302 static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state) 1303 { 1304 struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); 1305 struct virtqueue *vq; 1306 1307 virtio_device_for_each_vq((&vu_dev->vdev), vq) { 1308 struct virtio_uml_vq_info *info = vq->priv; 1309 1310 info->suspended = true; 1311 vhost_user_set_vring_enable(vu_dev, vq->index, false); 1312 } 1313 1314 if (!device_may_wakeup(&vu_dev->vdev.dev)) { 1315 vu_dev->suspended = true; 1316 return 0; 1317 } 1318 1319 return irq_set_irq_wake(vu_dev->irq, 1); 1320 } 1321 1322 static int virtio_uml_resume(struct platform_device *pdev) 1323 { 1324 struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); 1325 struct virtqueue *vq; 1326 1327 virtio_device_for_each_vq((&vu_dev->vdev), vq) { 1328 struct virtio_uml_vq_info *info = vq->priv; 1329 1330 info->suspended = false; 1331 vhost_user_set_vring_enable(vu_dev, vq->index, true); 1332 } 1333 1334 vu_dev->suspended = false; 1335 1336 if (!device_may_wakeup(&vu_dev->vdev.dev)) 1337 return 0; 1338 1339 return irq_set_irq_wake(vu_dev->irq, 0); 1340 } 1341 1342 static struct platform_driver virtio_uml_driver = { 1343 .probe = virtio_uml_probe, 1344 .remove = virtio_uml_remove, 1345 .driver = { 1346 .name = "virtio-uml", 1347 .of_match_table = virtio_uml_match, 1348 }, 1349 .suspend = virtio_uml_suspend, 1350 .resume = virtio_uml_resume, 1351 }; 1352 1353 static int __init virtio_uml_init(void) 1354 { 1355 return platform_driver_register(&virtio_uml_driver); 1356 } 1357 1358 static void __exit virtio_uml_exit(void) 1359 { 1360 platform_driver_unregister(&virtio_uml_driver); 1361 vu_unregister_cmdline_devices(); 1362 } 1363 1364 module_init(virtio_uml_init); 1365 module_exit(virtio_uml_exit); 1366 __uml_exitcall(virtio_uml_exit); 1367 1368 MODULE_DESCRIPTION("UML driver for vhost-user virtio devices"); 1369 MODULE_LICENSE("GPL"); 1370