1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/vhost.h" 14 #include "hw/virtio/vhost-backend.h" 15 #include "hw/virtio/virtio-net.h" 16 #include "sysemu/char.h" 17 #include "sysemu/kvm.h" 18 #include "qemu/error-report.h" 19 #include "qemu/sockets.h" 20 #include "migration/migration.h" 21 22 #include <sys/ioctl.h> 23 #include <sys/socket.h> 24 #include <sys/un.h> 25 #include <linux/vhost.h> 26 27 #define VHOST_MEMORY_MAX_NREGIONS 8 28 #define VHOST_USER_F_PROTOCOL_FEATURES 30 29 30 enum VhostUserProtocolFeature { 31 VHOST_USER_PROTOCOL_F_MQ = 0, 32 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 33 VHOST_USER_PROTOCOL_F_RARP = 2, 34 35 VHOST_USER_PROTOCOL_F_MAX 36 }; 37 38 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 39 40 typedef enum VhostUserRequest { 41 VHOST_USER_NONE = 0, 42 VHOST_USER_GET_FEATURES = 1, 43 VHOST_USER_SET_FEATURES = 2, 44 VHOST_USER_SET_OWNER = 3, 45 VHOST_USER_RESET_OWNER = 4, 46 VHOST_USER_SET_MEM_TABLE = 5, 47 VHOST_USER_SET_LOG_BASE = 6, 48 VHOST_USER_SET_LOG_FD = 7, 49 VHOST_USER_SET_VRING_NUM = 8, 50 VHOST_USER_SET_VRING_ADDR = 9, 51 VHOST_USER_SET_VRING_BASE = 10, 52 VHOST_USER_GET_VRING_BASE = 11, 53 VHOST_USER_SET_VRING_KICK = 12, 54 VHOST_USER_SET_VRING_CALL = 13, 55 VHOST_USER_SET_VRING_ERR = 14, 56 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 57 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 58 VHOST_USER_GET_QUEUE_NUM = 17, 59 VHOST_USER_SET_VRING_ENABLE = 18, 60 VHOST_USER_SEND_RARP = 19, 61 VHOST_USER_MAX 62 } VhostUserRequest; 63 64 typedef struct VhostUserMemoryRegion { 65 uint64_t guest_phys_addr; 66 uint64_t memory_size; 67 uint64_t userspace_addr; 68 uint64_t mmap_offset; 69 } VhostUserMemoryRegion; 70 71 typedef struct VhostUserMemory { 72 uint32_t nregions; 73 uint32_t padding; 74 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; 75 } VhostUserMemory; 76 77 typedef struct VhostUserLog { 78 uint64_t mmap_size; 79 uint64_t mmap_offset; 80 } VhostUserLog; 81 82 typedef struct VhostUserMsg { 83 VhostUserRequest request; 84 85 #define VHOST_USER_VERSION_MASK (0x3) 86 #define VHOST_USER_REPLY_MASK (0x1<<2) 87 uint32_t flags; 88 uint32_t size; /* the following payload size */ 89 union { 90 #define VHOST_USER_VRING_IDX_MASK (0xff) 91 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) 92 uint64_t u64; 93 struct vhost_vring_state state; 94 struct vhost_vring_addr addr; 95 VhostUserMemory memory; 96 VhostUserLog log; 97 } payload; 98 } QEMU_PACKED VhostUserMsg; 99 100 static VhostUserMsg m __attribute__ ((unused)); 101 #define VHOST_USER_HDR_SIZE (sizeof(m.request) \ 102 + sizeof(m.flags) \ 103 + sizeof(m.size)) 104 105 #define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE) 106 107 /* The version of the protocol we support */ 108 #define VHOST_USER_VERSION (0x1) 109 110 static bool ioeventfd_enabled(void) 111 { 112 return kvm_enabled() && kvm_eventfds_enabled(); 113 } 114 115 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 116 { 117 CharDriverState *chr = dev->opaque; 118 uint8_t *p = (uint8_t *) msg; 119 int r, size = VHOST_USER_HDR_SIZE; 120 121 r = qemu_chr_fe_read_all(chr, p, size); 122 if (r != size) { 123 error_report("Failed to read msg header. Read %d instead of %d." 124 " Original request %d.", r, size, msg->request); 125 goto fail; 126 } 127 128 /* validate received flags */ 129 if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 130 error_report("Failed to read msg header." 131 " Flags 0x%x instead of 0x%x.", msg->flags, 132 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 133 goto fail; 134 } 135 136 /* validate message size is sane */ 137 if (msg->size > VHOST_USER_PAYLOAD_SIZE) { 138 error_report("Failed to read msg header." 139 " Size %d exceeds the maximum %zu.", msg->size, 140 VHOST_USER_PAYLOAD_SIZE); 141 goto fail; 142 } 143 144 if (msg->size) { 145 p += VHOST_USER_HDR_SIZE; 146 size = msg->size; 147 r = qemu_chr_fe_read_all(chr, p, size); 148 if (r != size) { 149 error_report("Failed to read msg payload." 150 " Read %d instead of %d.", r, msg->size); 151 goto fail; 152 } 153 } 154 155 return 0; 156 157 fail: 158 return -1; 159 } 160 161 static bool vhost_user_one_time_request(VhostUserRequest request) 162 { 163 switch (request) { 164 case VHOST_USER_SET_OWNER: 165 case VHOST_USER_RESET_OWNER: 166 case VHOST_USER_SET_MEM_TABLE: 167 case VHOST_USER_GET_QUEUE_NUM: 168 return true; 169 default: 170 return false; 171 } 172 } 173 174 /* most non-init callers ignore the error */ 175 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 176 int *fds, int fd_num) 177 { 178 CharDriverState *chr = dev->opaque; 179 int ret, size = VHOST_USER_HDR_SIZE + msg->size; 180 181 /* 182 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 183 * we just need send it once in the first time. For later such 184 * request, we just ignore it. 185 */ 186 if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) { 187 return 0; 188 } 189 190 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 191 error_report("Failed to set msg fds."); 192 return -1; 193 } 194 195 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 196 if (ret != size) { 197 error_report("Failed to write msg." 198 " Wrote %d instead of %d.", ret, size); 199 return -1; 200 } 201 202 return 0; 203 } 204 205 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 206 struct vhost_log *log) 207 { 208 int fds[VHOST_MEMORY_MAX_NREGIONS]; 209 size_t fd_num = 0; 210 bool shmfd = virtio_has_feature(dev->protocol_features, 211 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 212 VhostUserMsg msg = { 213 .request = VHOST_USER_SET_LOG_BASE, 214 .flags = VHOST_USER_VERSION, 215 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 216 .payload.log.mmap_offset = 0, 217 .size = sizeof(msg.payload.log), 218 }; 219 220 if (shmfd && log->fd != -1) { 221 fds[fd_num++] = log->fd; 222 } 223 224 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 225 return -1; 226 } 227 228 if (shmfd) { 229 msg.size = 0; 230 if (vhost_user_read(dev, &msg) < 0) { 231 return -1; 232 } 233 234 if (msg.request != VHOST_USER_SET_LOG_BASE) { 235 error_report("Received unexpected msg type. " 236 "Expected %d received %d", 237 VHOST_USER_SET_LOG_BASE, msg.request); 238 return -1; 239 } 240 } 241 242 return 0; 243 } 244 245 static int vhost_user_set_mem_table(struct vhost_dev *dev, 246 struct vhost_memory *mem) 247 { 248 int fds[VHOST_MEMORY_MAX_NREGIONS]; 249 int i, fd; 250 size_t fd_num = 0; 251 VhostUserMsg msg = { 252 .request = VHOST_USER_SET_MEM_TABLE, 253 .flags = VHOST_USER_VERSION, 254 }; 255 256 for (i = 0; i < dev->mem->nregions; ++i) { 257 struct vhost_memory_region *reg = dev->mem->regions + i; 258 ram_addr_t offset; 259 MemoryRegion *mr; 260 261 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); 262 mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr, 263 &offset); 264 fd = memory_region_get_fd(mr); 265 if (fd > 0) { 266 msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr; 267 msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; 268 msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; 269 msg.payload.memory.regions[fd_num].mmap_offset = offset; 270 assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); 271 fds[fd_num++] = fd; 272 } 273 } 274 275 msg.payload.memory.nregions = fd_num; 276 277 if (!fd_num) { 278 error_report("Failed initializing vhost-user memory map, " 279 "consider using -object memory-backend-file share=on"); 280 return -1; 281 } 282 283 msg.size = sizeof(msg.payload.memory.nregions); 284 msg.size += sizeof(msg.payload.memory.padding); 285 msg.size += fd_num * sizeof(VhostUserMemoryRegion); 286 287 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 288 return -1; 289 } 290 291 return 0; 292 } 293 294 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 295 struct vhost_vring_addr *addr) 296 { 297 VhostUserMsg msg = { 298 .request = VHOST_USER_SET_VRING_ADDR, 299 .flags = VHOST_USER_VERSION, 300 .payload.addr = *addr, 301 .size = sizeof(msg.payload.addr), 302 }; 303 304 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 305 return -1; 306 } 307 308 return 0; 309 } 310 311 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 312 struct vhost_vring_state *ring) 313 { 314 error_report("vhost-user trying to send unhandled ioctl"); 315 return -1; 316 } 317 318 static int vhost_set_vring(struct vhost_dev *dev, 319 unsigned long int request, 320 struct vhost_vring_state *ring) 321 { 322 VhostUserMsg msg = { 323 .request = request, 324 .flags = VHOST_USER_VERSION, 325 .payload.state = *ring, 326 .size = sizeof(msg.payload.state), 327 }; 328 329 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 330 return -1; 331 } 332 333 return 0; 334 } 335 336 static int vhost_user_set_vring_num(struct vhost_dev *dev, 337 struct vhost_vring_state *ring) 338 { 339 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 340 } 341 342 static int vhost_user_set_vring_base(struct vhost_dev *dev, 343 struct vhost_vring_state *ring) 344 { 345 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 346 } 347 348 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 349 { 350 int i; 351 352 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 353 return -1; 354 } 355 356 for (i = 0; i < dev->nvqs; ++i) { 357 struct vhost_vring_state state = { 358 .index = dev->vq_index + i, 359 .num = enable, 360 }; 361 362 vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 363 } 364 365 return 0; 366 } 367 368 static int vhost_user_get_vring_base(struct vhost_dev *dev, 369 struct vhost_vring_state *ring) 370 { 371 VhostUserMsg msg = { 372 .request = VHOST_USER_GET_VRING_BASE, 373 .flags = VHOST_USER_VERSION, 374 .payload.state = *ring, 375 .size = sizeof(msg.payload.state), 376 }; 377 378 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 379 return -1; 380 } 381 382 if (vhost_user_read(dev, &msg) < 0) { 383 return -1; 384 } 385 386 if (msg.request != VHOST_USER_GET_VRING_BASE) { 387 error_report("Received unexpected msg type. Expected %d received %d", 388 VHOST_USER_GET_VRING_BASE, msg.request); 389 return -1; 390 } 391 392 if (msg.size != sizeof(msg.payload.state)) { 393 error_report("Received bad msg size."); 394 return -1; 395 } 396 397 *ring = msg.payload.state; 398 399 return 0; 400 } 401 402 static int vhost_set_vring_file(struct vhost_dev *dev, 403 VhostUserRequest request, 404 struct vhost_vring_file *file) 405 { 406 int fds[VHOST_MEMORY_MAX_NREGIONS]; 407 size_t fd_num = 0; 408 VhostUserMsg msg = { 409 .request = request, 410 .flags = VHOST_USER_VERSION, 411 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 412 .size = sizeof(msg.payload.u64), 413 }; 414 415 if (ioeventfd_enabled() && file->fd > 0) { 416 fds[fd_num++] = file->fd; 417 } else { 418 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 419 } 420 421 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 422 return -1; 423 } 424 425 return 0; 426 } 427 428 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 429 struct vhost_vring_file *file) 430 { 431 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 432 } 433 434 static int vhost_user_set_vring_call(struct vhost_dev *dev, 435 struct vhost_vring_file *file) 436 { 437 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 438 } 439 440 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64) 441 { 442 VhostUserMsg msg = { 443 .request = request, 444 .flags = VHOST_USER_VERSION, 445 .payload.u64 = u64, 446 .size = sizeof(msg.payload.u64), 447 }; 448 449 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 450 return -1; 451 } 452 453 return 0; 454 } 455 456 static int vhost_user_set_features(struct vhost_dev *dev, 457 uint64_t features) 458 { 459 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features); 460 } 461 462 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 463 uint64_t features) 464 { 465 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features); 466 } 467 468 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 469 { 470 VhostUserMsg msg = { 471 .request = request, 472 .flags = VHOST_USER_VERSION, 473 }; 474 475 if (vhost_user_one_time_request(request) && dev->vq_index != 0) { 476 return 0; 477 } 478 479 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 480 return -1; 481 } 482 483 if (vhost_user_read(dev, &msg) < 0) { 484 return -1; 485 } 486 487 if (msg.request != request) { 488 error_report("Received unexpected msg type. Expected %d received %d", 489 request, msg.request); 490 return -1; 491 } 492 493 if (msg.size != sizeof(msg.payload.u64)) { 494 error_report("Received bad msg size."); 495 return -1; 496 } 497 498 *u64 = msg.payload.u64; 499 500 return 0; 501 } 502 503 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 504 { 505 return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features); 506 } 507 508 static int vhost_user_set_owner(struct vhost_dev *dev) 509 { 510 VhostUserMsg msg = { 511 .request = VHOST_USER_SET_OWNER, 512 .flags = VHOST_USER_VERSION, 513 }; 514 515 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 516 return -1; 517 } 518 519 return 0; 520 } 521 522 static int vhost_user_reset_device(struct vhost_dev *dev) 523 { 524 VhostUserMsg msg = { 525 .request = VHOST_USER_RESET_OWNER, 526 .flags = VHOST_USER_VERSION, 527 }; 528 529 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 530 return -1; 531 } 532 533 return 0; 534 } 535 536 static int vhost_user_init(struct vhost_dev *dev, void *opaque) 537 { 538 uint64_t features; 539 int err; 540 541 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 542 543 dev->opaque = opaque; 544 545 err = vhost_user_get_features(dev, &features); 546 if (err < 0) { 547 return err; 548 } 549 550 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 551 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 552 553 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 554 &features); 555 if (err < 0) { 556 return err; 557 } 558 559 dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK; 560 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 561 if (err < 0) { 562 return err; 563 } 564 565 /* query the max queues we support if backend supports Multiple Queue */ 566 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 567 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 568 &dev->max_queues); 569 if (err < 0) { 570 return err; 571 } 572 } 573 } 574 575 if (dev->migration_blocker == NULL && 576 !virtio_has_feature(dev->protocol_features, 577 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 578 error_setg(&dev->migration_blocker, 579 "Migration disabled: vhost-user backend lacks " 580 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 581 } 582 583 return 0; 584 } 585 586 static int vhost_user_cleanup(struct vhost_dev *dev) 587 { 588 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 589 590 dev->opaque = 0; 591 592 return 0; 593 } 594 595 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 596 { 597 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 598 599 return idx; 600 } 601 602 static int vhost_user_memslots_limit(struct vhost_dev *dev) 603 { 604 return VHOST_MEMORY_MAX_NREGIONS; 605 } 606 607 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 608 { 609 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 610 611 return virtio_has_feature(dev->protocol_features, 612 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 613 } 614 615 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 616 { 617 VhostUserMsg msg = { 0 }; 618 619 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 620 621 /* If guest supports GUEST_ANNOUNCE do nothing */ 622 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 623 return 0; 624 } 625 626 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 627 if (virtio_has_feature(dev->protocol_features, 628 VHOST_USER_PROTOCOL_F_RARP)) { 629 msg.request = VHOST_USER_SEND_RARP; 630 msg.flags = VHOST_USER_VERSION; 631 memcpy((char *)&msg.payload.u64, mac_addr, 6); 632 msg.size = sizeof(msg.payload.u64); 633 634 return vhost_user_write(dev, &msg, NULL, 0); 635 } 636 return -1; 637 } 638 639 static bool vhost_user_can_merge(struct vhost_dev *dev, 640 uint64_t start1, uint64_t size1, 641 uint64_t start2, uint64_t size2) 642 { 643 ram_addr_t offset; 644 int mfd, rfd; 645 MemoryRegion *mr; 646 647 mr = memory_region_from_host((void *)(uintptr_t)start1, &offset); 648 mfd = memory_region_get_fd(mr); 649 650 mr = memory_region_from_host((void *)(uintptr_t)start2, &offset); 651 rfd = memory_region_get_fd(mr); 652 653 return mfd == rfd; 654 } 655 656 const VhostOps user_ops = { 657 .backend_type = VHOST_BACKEND_TYPE_USER, 658 .vhost_backend_init = vhost_user_init, 659 .vhost_backend_cleanup = vhost_user_cleanup, 660 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 661 .vhost_set_log_base = vhost_user_set_log_base, 662 .vhost_set_mem_table = vhost_user_set_mem_table, 663 .vhost_set_vring_addr = vhost_user_set_vring_addr, 664 .vhost_set_vring_endian = vhost_user_set_vring_endian, 665 .vhost_set_vring_num = vhost_user_set_vring_num, 666 .vhost_set_vring_base = vhost_user_set_vring_base, 667 .vhost_get_vring_base = vhost_user_get_vring_base, 668 .vhost_set_vring_kick = vhost_user_set_vring_kick, 669 .vhost_set_vring_call = vhost_user_set_vring_call, 670 .vhost_set_features = vhost_user_set_features, 671 .vhost_get_features = vhost_user_get_features, 672 .vhost_set_owner = vhost_user_set_owner, 673 .vhost_reset_device = vhost_user_reset_device, 674 .vhost_get_vq_index = vhost_user_get_vq_index, 675 .vhost_set_vring_enable = vhost_user_set_vring_enable, 676 .vhost_requires_shm_log = vhost_user_requires_shm_log, 677 .vhost_migration_done = vhost_user_migration_done, 678 .vhost_backend_can_merge = vhost_user_can_merge, 679 }; 680