1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "hw/virtio/vhost.h" 13 #include "hw/virtio/vhost-backend.h" 14 #include "hw/virtio/virtio-net.h" 15 #include "sysemu/char.h" 16 #include "sysemu/kvm.h" 17 #include "qemu/error-report.h" 18 #include "qemu/sockets.h" 19 #include "exec/ram_addr.h" 20 #include "migration/migration.h" 21 22 #include <sys/ioctl.h> 23 #include <sys/socket.h> 24 #include <sys/un.h> 25 #include <linux/vhost.h> 26 27 #define VHOST_MEMORY_MAX_NREGIONS 8 28 #define VHOST_USER_F_PROTOCOL_FEATURES 30 29 30 enum VhostUserProtocolFeature { 31 VHOST_USER_PROTOCOL_F_MQ = 0, 32 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 33 VHOST_USER_PROTOCOL_F_RARP = 2, 34 35 VHOST_USER_PROTOCOL_F_MAX 36 }; 37 38 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 39 40 typedef enum VhostUserRequest { 41 VHOST_USER_NONE = 0, 42 VHOST_USER_GET_FEATURES = 1, 43 VHOST_USER_SET_FEATURES = 2, 44 VHOST_USER_SET_OWNER = 3, 45 VHOST_USER_RESET_OWNER = 4, 46 VHOST_USER_SET_MEM_TABLE = 5, 47 VHOST_USER_SET_LOG_BASE = 6, 48 VHOST_USER_SET_LOG_FD = 7, 49 VHOST_USER_SET_VRING_NUM = 8, 50 VHOST_USER_SET_VRING_ADDR = 9, 51 VHOST_USER_SET_VRING_BASE = 10, 52 VHOST_USER_GET_VRING_BASE = 11, 53 VHOST_USER_SET_VRING_KICK = 12, 54 VHOST_USER_SET_VRING_CALL = 13, 55 VHOST_USER_SET_VRING_ERR = 14, 56 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 57 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 58 VHOST_USER_GET_QUEUE_NUM = 17, 59 VHOST_USER_SET_VRING_ENABLE = 18, 60 VHOST_USER_SEND_RARP = 19, 61 VHOST_USER_MAX 62 } VhostUserRequest; 63 64 typedef struct VhostUserMemoryRegion { 65 uint64_t guest_phys_addr; 66 uint64_t memory_size; 67 uint64_t userspace_addr; 68 uint64_t mmap_offset; 69 } VhostUserMemoryRegion; 70 71 typedef struct VhostUserMemory { 72 uint32_t nregions; 73 uint32_t padding; 74 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; 75 } VhostUserMemory; 76 77 typedef struct VhostUserLog { 78 uint64_t mmap_size; 79 uint64_t mmap_offset; 80 } VhostUserLog; 81 82 typedef struct VhostUserMsg { 83 VhostUserRequest request; 84 85 #define VHOST_USER_VERSION_MASK (0x3) 86 #define VHOST_USER_REPLY_MASK (0x1<<2) 87 uint32_t flags; 88 uint32_t size; /* the following payload size */ 89 union { 90 #define VHOST_USER_VRING_IDX_MASK (0xff) 91 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) 92 uint64_t u64; 93 struct vhost_vring_state state; 94 struct vhost_vring_addr addr; 95 VhostUserMemory memory; 96 VhostUserLog log; 97 } payload; 98 } QEMU_PACKED VhostUserMsg; 99 100 static VhostUserMsg m __attribute__ ((unused)); 101 #define VHOST_USER_HDR_SIZE (sizeof(m.request) \ 102 + sizeof(m.flags) \ 103 + sizeof(m.size)) 104 105 #define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE) 106 107 /* The version of the protocol we support */ 108 #define VHOST_USER_VERSION (0x1) 109 110 static bool ioeventfd_enabled(void) 111 { 112 return kvm_enabled() && kvm_eventfds_enabled(); 113 } 114 115 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 116 { 117 CharDriverState *chr = dev->opaque; 118 uint8_t *p = (uint8_t *) msg; 119 int r, size = VHOST_USER_HDR_SIZE; 120 121 r = qemu_chr_fe_read_all(chr, p, size); 122 if (r != size) { 123 error_report("Failed to read msg header. Read %d instead of %d." 124 " Original request %d.", r, size, msg->request); 125 goto fail; 126 } 127 128 /* validate received flags */ 129 if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 130 error_report("Failed to read msg header." 131 " Flags 0x%x instead of 0x%x.", msg->flags, 132 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 133 goto fail; 134 } 135 136 /* validate message size is sane */ 137 if (msg->size > VHOST_USER_PAYLOAD_SIZE) { 138 error_report("Failed to read msg header." 139 " Size %d exceeds the maximum %zu.", msg->size, 140 VHOST_USER_PAYLOAD_SIZE); 141 goto fail; 142 } 143 144 if (msg->size) { 145 p += VHOST_USER_HDR_SIZE; 146 size = msg->size; 147 r = qemu_chr_fe_read_all(chr, p, size); 148 if (r != size) { 149 error_report("Failed to read msg payload." 150 " Read %d instead of %d.", r, msg->size); 151 goto fail; 152 } 153 } 154 155 return 0; 156 157 fail: 158 return -1; 159 } 160 161 static bool vhost_user_one_time_request(VhostUserRequest request) 162 { 163 switch (request) { 164 case VHOST_USER_SET_OWNER: 165 case VHOST_USER_RESET_OWNER: 166 case VHOST_USER_SET_MEM_TABLE: 167 case VHOST_USER_GET_QUEUE_NUM: 168 return true; 169 default: 170 return false; 171 } 172 } 173 174 /* most non-init callers ignore the error */ 175 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 176 int *fds, int fd_num) 177 { 178 CharDriverState *chr = dev->opaque; 179 int size = VHOST_USER_HDR_SIZE + msg->size; 180 181 /* 182 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 183 * we just need send it once in the first time. For later such 184 * request, we just ignore it. 185 */ 186 if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) { 187 return 0; 188 } 189 190 if (fd_num) { 191 qemu_chr_fe_set_msgfds(chr, fds, fd_num); 192 } 193 194 return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ? 195 0 : -1; 196 } 197 198 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 199 struct vhost_log *log) 200 { 201 int fds[VHOST_MEMORY_MAX_NREGIONS]; 202 size_t fd_num = 0; 203 bool shmfd = virtio_has_feature(dev->protocol_features, 204 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 205 VhostUserMsg msg = { 206 .request = VHOST_USER_SET_LOG_BASE, 207 .flags = VHOST_USER_VERSION, 208 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 209 .payload.log.mmap_offset = 0, 210 .size = sizeof(msg.payload.log), 211 }; 212 213 if (shmfd && log->fd != -1) { 214 fds[fd_num++] = log->fd; 215 } 216 217 vhost_user_write(dev, &msg, fds, fd_num); 218 219 if (shmfd) { 220 msg.size = 0; 221 if (vhost_user_read(dev, &msg) < 0) { 222 return 0; 223 } 224 225 if (msg.request != VHOST_USER_SET_LOG_BASE) { 226 error_report("Received unexpected msg type. " 227 "Expected %d received %d", 228 VHOST_USER_SET_LOG_BASE, msg.request); 229 return -1; 230 } 231 } 232 233 return 0; 234 } 235 236 static int vhost_user_set_mem_table(struct vhost_dev *dev, 237 struct vhost_memory *mem) 238 { 239 int fds[VHOST_MEMORY_MAX_NREGIONS]; 240 int i, fd; 241 size_t fd_num = 0; 242 VhostUserMsg msg = { 243 .request = VHOST_USER_SET_MEM_TABLE, 244 .flags = VHOST_USER_VERSION, 245 }; 246 247 for (i = 0; i < dev->mem->nregions; ++i) { 248 struct vhost_memory_region *reg = dev->mem->regions + i; 249 ram_addr_t ram_addr; 250 251 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); 252 qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, 253 &ram_addr); 254 fd = qemu_get_ram_fd(ram_addr); 255 if (fd > 0) { 256 msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr; 257 msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; 258 msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; 259 msg.payload.memory.regions[fd_num].mmap_offset = reg->userspace_addr - 260 (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr); 261 assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); 262 fds[fd_num++] = fd; 263 } 264 } 265 266 msg.payload.memory.nregions = fd_num; 267 268 if (!fd_num) { 269 error_report("Failed initializing vhost-user memory map, " 270 "consider using -object memory-backend-file share=on"); 271 return -1; 272 } 273 274 msg.size = sizeof(msg.payload.memory.nregions); 275 msg.size += sizeof(msg.payload.memory.padding); 276 msg.size += fd_num * sizeof(VhostUserMemoryRegion); 277 278 vhost_user_write(dev, &msg, fds, fd_num); 279 280 return 0; 281 } 282 283 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 284 struct vhost_vring_addr *addr) 285 { 286 VhostUserMsg msg = { 287 .request = VHOST_USER_SET_VRING_ADDR, 288 .flags = VHOST_USER_VERSION, 289 .payload.addr = *addr, 290 .size = sizeof(msg.payload.addr), 291 }; 292 293 vhost_user_write(dev, &msg, NULL, 0); 294 295 return 0; 296 } 297 298 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 299 struct vhost_vring_state *ring) 300 { 301 error_report("vhost-user trying to send unhandled ioctl"); 302 return -1; 303 } 304 305 static int vhost_set_vring(struct vhost_dev *dev, 306 unsigned long int request, 307 struct vhost_vring_state *ring) 308 { 309 VhostUserMsg msg = { 310 .request = request, 311 .flags = VHOST_USER_VERSION, 312 .payload.state = *ring, 313 .size = sizeof(msg.payload.state), 314 }; 315 316 vhost_user_write(dev, &msg, NULL, 0); 317 318 return 0; 319 } 320 321 static int vhost_user_set_vring_num(struct vhost_dev *dev, 322 struct vhost_vring_state *ring) 323 { 324 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 325 } 326 327 static int vhost_user_set_vring_base(struct vhost_dev *dev, 328 struct vhost_vring_state *ring) 329 { 330 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 331 } 332 333 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 334 { 335 int i; 336 337 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 338 return -1; 339 } 340 341 for (i = 0; i < dev->nvqs; ++i) { 342 struct vhost_vring_state state = { 343 .index = dev->vq_index + i, 344 .num = enable, 345 }; 346 347 vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 348 } 349 350 return 0; 351 } 352 353 static int vhost_user_get_vring_base(struct vhost_dev *dev, 354 struct vhost_vring_state *ring) 355 { 356 VhostUserMsg msg = { 357 .request = VHOST_USER_GET_VRING_BASE, 358 .flags = VHOST_USER_VERSION, 359 .payload.state = *ring, 360 .size = sizeof(msg.payload.state), 361 }; 362 363 vhost_user_write(dev, &msg, NULL, 0); 364 365 if (vhost_user_read(dev, &msg) < 0) { 366 return 0; 367 } 368 369 if (msg.request != VHOST_USER_GET_VRING_BASE) { 370 error_report("Received unexpected msg type. Expected %d received %d", 371 VHOST_USER_GET_VRING_BASE, msg.request); 372 return -1; 373 } 374 375 if (msg.size != sizeof(msg.payload.state)) { 376 error_report("Received bad msg size."); 377 return -1; 378 } 379 380 *ring = msg.payload.state; 381 382 return 0; 383 } 384 385 static int vhost_set_vring_file(struct vhost_dev *dev, 386 VhostUserRequest request, 387 struct vhost_vring_file *file) 388 { 389 int fds[VHOST_MEMORY_MAX_NREGIONS]; 390 size_t fd_num = 0; 391 VhostUserMsg msg = { 392 .request = request, 393 .flags = VHOST_USER_VERSION, 394 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 395 .size = sizeof(msg.payload.u64), 396 }; 397 398 if (ioeventfd_enabled() && file->fd > 0) { 399 fds[fd_num++] = file->fd; 400 } else { 401 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 402 } 403 404 vhost_user_write(dev, &msg, fds, fd_num); 405 406 return 0; 407 } 408 409 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 410 struct vhost_vring_file *file) 411 { 412 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 413 } 414 415 static int vhost_user_set_vring_call(struct vhost_dev *dev, 416 struct vhost_vring_file *file) 417 { 418 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 419 } 420 421 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64) 422 { 423 VhostUserMsg msg = { 424 .request = request, 425 .flags = VHOST_USER_VERSION, 426 .payload.u64 = u64, 427 .size = sizeof(msg.payload.u64), 428 }; 429 430 vhost_user_write(dev, &msg, NULL, 0); 431 432 return 0; 433 } 434 435 static int vhost_user_set_features(struct vhost_dev *dev, 436 uint64_t features) 437 { 438 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features); 439 } 440 441 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 442 uint64_t features) 443 { 444 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features); 445 } 446 447 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 448 { 449 VhostUserMsg msg = { 450 .request = request, 451 .flags = VHOST_USER_VERSION, 452 }; 453 454 if (vhost_user_one_time_request(request) && dev->vq_index != 0) { 455 return 0; 456 } 457 458 vhost_user_write(dev, &msg, NULL, 0); 459 460 if (vhost_user_read(dev, &msg) < 0) { 461 return 0; 462 } 463 464 if (msg.request != request) { 465 error_report("Received unexpected msg type. Expected %d received %d", 466 request, msg.request); 467 return -1; 468 } 469 470 if (msg.size != sizeof(msg.payload.u64)) { 471 error_report("Received bad msg size."); 472 return -1; 473 } 474 475 *u64 = msg.payload.u64; 476 477 return 0; 478 } 479 480 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 481 { 482 return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features); 483 } 484 485 static int vhost_user_set_owner(struct vhost_dev *dev) 486 { 487 VhostUserMsg msg = { 488 .request = VHOST_USER_SET_OWNER, 489 .flags = VHOST_USER_VERSION, 490 }; 491 492 vhost_user_write(dev, &msg, NULL, 0); 493 494 return 0; 495 } 496 497 static int vhost_user_reset_device(struct vhost_dev *dev) 498 { 499 VhostUserMsg msg = { 500 .request = VHOST_USER_RESET_OWNER, 501 .flags = VHOST_USER_VERSION, 502 }; 503 504 vhost_user_write(dev, &msg, NULL, 0); 505 506 return 0; 507 } 508 509 static int vhost_user_init(struct vhost_dev *dev, void *opaque) 510 { 511 uint64_t features; 512 int err; 513 514 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 515 516 dev->opaque = opaque; 517 518 err = vhost_user_get_features(dev, &features); 519 if (err < 0) { 520 return err; 521 } 522 523 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 524 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 525 526 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 527 &features); 528 if (err < 0) { 529 return err; 530 } 531 532 dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK; 533 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 534 if (err < 0) { 535 return err; 536 } 537 538 /* query the max queues we support if backend supports Multiple Queue */ 539 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 540 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 541 &dev->max_queues); 542 if (err < 0) { 543 return err; 544 } 545 } 546 } 547 548 if (dev->migration_blocker == NULL && 549 !virtio_has_feature(dev->protocol_features, 550 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 551 error_setg(&dev->migration_blocker, 552 "Migration disabled: vhost-user backend lacks " 553 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 554 } 555 556 return 0; 557 } 558 559 static int vhost_user_cleanup(struct vhost_dev *dev) 560 { 561 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 562 563 dev->opaque = 0; 564 565 return 0; 566 } 567 568 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 569 { 570 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 571 572 return idx; 573 } 574 575 static int vhost_user_memslots_limit(struct vhost_dev *dev) 576 { 577 return VHOST_MEMORY_MAX_NREGIONS; 578 } 579 580 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 581 { 582 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 583 584 return virtio_has_feature(dev->protocol_features, 585 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 586 } 587 588 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 589 { 590 VhostUserMsg msg = { 0 }; 591 int err; 592 593 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 594 595 /* If guest supports GUEST_ANNOUNCE do nothing */ 596 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 597 return 0; 598 } 599 600 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 601 if (virtio_has_feature(dev->protocol_features, 602 VHOST_USER_PROTOCOL_F_RARP)) { 603 msg.request = VHOST_USER_SEND_RARP; 604 msg.flags = VHOST_USER_VERSION; 605 memcpy((char *)&msg.payload.u64, mac_addr, 6); 606 msg.size = sizeof(msg.payload.u64); 607 608 err = vhost_user_write(dev, &msg, NULL, 0); 609 return err; 610 } 611 return -1; 612 } 613 614 static bool vhost_user_can_merge(struct vhost_dev *dev, 615 uint64_t start1, uint64_t size1, 616 uint64_t start2, uint64_t size2) 617 { 618 ram_addr_t ram_addr; 619 int mfd, rfd; 620 MemoryRegion *mr; 621 622 mr = qemu_ram_addr_from_host((void *)(uintptr_t)start1, &ram_addr); 623 assert(mr); 624 mfd = qemu_get_ram_fd(ram_addr); 625 626 mr = qemu_ram_addr_from_host((void *)(uintptr_t)start2, &ram_addr); 627 assert(mr); 628 rfd = qemu_get_ram_fd(ram_addr); 629 630 return mfd == rfd; 631 } 632 633 const VhostOps user_ops = { 634 .backend_type = VHOST_BACKEND_TYPE_USER, 635 .vhost_backend_init = vhost_user_init, 636 .vhost_backend_cleanup = vhost_user_cleanup, 637 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 638 .vhost_set_log_base = vhost_user_set_log_base, 639 .vhost_set_mem_table = vhost_user_set_mem_table, 640 .vhost_set_vring_addr = vhost_user_set_vring_addr, 641 .vhost_set_vring_endian = vhost_user_set_vring_endian, 642 .vhost_set_vring_num = vhost_user_set_vring_num, 643 .vhost_set_vring_base = vhost_user_set_vring_base, 644 .vhost_get_vring_base = vhost_user_get_vring_base, 645 .vhost_set_vring_kick = vhost_user_set_vring_kick, 646 .vhost_set_vring_call = vhost_user_set_vring_call, 647 .vhost_set_features = vhost_user_set_features, 648 .vhost_get_features = vhost_user_get_features, 649 .vhost_set_owner = vhost_user_set_owner, 650 .vhost_reset_device = vhost_user_reset_device, 651 .vhost_get_vq_index = vhost_user_get_vq_index, 652 .vhost_set_vring_enable = vhost_user_set_vring_enable, 653 .vhost_requires_shm_log = vhost_user_requires_shm_log, 654 .vhost_migration_done = vhost_user_migration_done, 655 .vhost_backend_can_merge = vhost_user_can_merge, 656 }; 657