1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "hw/virtio/vhost.h" 12 #include "hw/virtio/vhost-backend.h" 13 #include "hw/virtio/virtio-net.h" 14 #include "sysemu/char.h" 15 #include "sysemu/kvm.h" 16 #include "qemu/error-report.h" 17 #include "qemu/sockets.h" 18 #include "exec/ram_addr.h" 19 #include "migration/migration.h" 20 21 #include <fcntl.h> 22 #include <unistd.h> 23 #include <sys/ioctl.h> 24 #include <sys/socket.h> 25 #include <sys/un.h> 26 #include <linux/vhost.h> 27 28 #define VHOST_MEMORY_MAX_NREGIONS 8 29 #define VHOST_USER_F_PROTOCOL_FEATURES 30 30 31 enum VhostUserProtocolFeature { 32 VHOST_USER_PROTOCOL_F_MQ = 0, 33 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 34 VHOST_USER_PROTOCOL_F_RARP = 2, 35 36 VHOST_USER_PROTOCOL_F_MAX 37 }; 38 39 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 40 41 typedef enum VhostUserRequest { 42 VHOST_USER_NONE = 0, 43 VHOST_USER_GET_FEATURES = 1, 44 VHOST_USER_SET_FEATURES = 2, 45 VHOST_USER_SET_OWNER = 3, 46 VHOST_USER_RESET_OWNER = 4, 47 VHOST_USER_SET_MEM_TABLE = 5, 48 VHOST_USER_SET_LOG_BASE = 6, 49 VHOST_USER_SET_LOG_FD = 7, 50 VHOST_USER_SET_VRING_NUM = 8, 51 VHOST_USER_SET_VRING_ADDR = 9, 52 VHOST_USER_SET_VRING_BASE = 10, 53 VHOST_USER_GET_VRING_BASE = 11, 54 VHOST_USER_SET_VRING_KICK = 12, 55 VHOST_USER_SET_VRING_CALL = 13, 56 VHOST_USER_SET_VRING_ERR = 14, 57 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 58 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 59 VHOST_USER_GET_QUEUE_NUM = 17, 60 VHOST_USER_SET_VRING_ENABLE = 18, 61 VHOST_USER_SEND_RARP = 19, 62 VHOST_USER_MAX 63 } VhostUserRequest; 64 65 typedef struct VhostUserMemoryRegion { 66 uint64_t guest_phys_addr; 67 uint64_t memory_size; 68 uint64_t userspace_addr; 69 uint64_t mmap_offset; 70 } VhostUserMemoryRegion; 71 72 typedef struct VhostUserMemory { 73 uint32_t nregions; 74 uint32_t padding; 75 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; 76 } VhostUserMemory; 77 78 typedef struct VhostUserLog { 79 uint64_t mmap_size; 80 uint64_t mmap_offset; 81 } VhostUserLog; 82 83 typedef struct VhostUserMsg { 84 VhostUserRequest request; 85 86 #define VHOST_USER_VERSION_MASK (0x3) 87 #define VHOST_USER_REPLY_MASK (0x1<<2) 88 uint32_t flags; 89 uint32_t size; /* the following payload size */ 90 union { 91 #define VHOST_USER_VRING_IDX_MASK (0xff) 92 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) 93 uint64_t u64; 94 struct vhost_vring_state state; 95 struct vhost_vring_addr addr; 96 VhostUserMemory memory; 97 VhostUserLog log; 98 } payload; 99 } QEMU_PACKED VhostUserMsg; 100 101 static VhostUserMsg m __attribute__ ((unused)); 102 #define VHOST_USER_HDR_SIZE (sizeof(m.request) \ 103 + sizeof(m.flags) \ 104 + sizeof(m.size)) 105 106 #define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE) 107 108 /* The version of the protocol we support */ 109 #define VHOST_USER_VERSION (0x1) 110 111 static bool ioeventfd_enabled(void) 112 { 113 return kvm_enabled() && kvm_eventfds_enabled(); 114 } 115 116 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 117 { 118 CharDriverState *chr = dev->opaque; 119 uint8_t *p = (uint8_t *) msg; 120 int r, size = VHOST_USER_HDR_SIZE; 121 122 r = qemu_chr_fe_read_all(chr, p, size); 123 if (r != size) { 124 error_report("Failed to read msg header. Read %d instead of %d." 125 " Original request %d.", r, size, msg->request); 126 goto fail; 127 } 128 129 /* validate received flags */ 130 if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 131 error_report("Failed to read msg header." 132 " Flags 0x%x instead of 0x%x.", msg->flags, 133 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 134 goto fail; 135 } 136 137 /* validate message size is sane */ 138 if (msg->size > VHOST_USER_PAYLOAD_SIZE) { 139 error_report("Failed to read msg header." 140 " Size %d exceeds the maximum %zu.", msg->size, 141 VHOST_USER_PAYLOAD_SIZE); 142 goto fail; 143 } 144 145 if (msg->size) { 146 p += VHOST_USER_HDR_SIZE; 147 size = msg->size; 148 r = qemu_chr_fe_read_all(chr, p, size); 149 if (r != size) { 150 error_report("Failed to read msg payload." 151 " Read %d instead of %d.", r, msg->size); 152 goto fail; 153 } 154 } 155 156 return 0; 157 158 fail: 159 return -1; 160 } 161 162 static bool vhost_user_one_time_request(VhostUserRequest request) 163 { 164 switch (request) { 165 case VHOST_USER_SET_OWNER: 166 case VHOST_USER_RESET_OWNER: 167 case VHOST_USER_SET_MEM_TABLE: 168 case VHOST_USER_GET_QUEUE_NUM: 169 return true; 170 default: 171 return false; 172 } 173 } 174 175 /* most non-init callers ignore the error */ 176 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 177 int *fds, int fd_num) 178 { 179 CharDriverState *chr = dev->opaque; 180 int size = VHOST_USER_HDR_SIZE + msg->size; 181 182 /* 183 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 184 * we just need send it once in the first time. For later such 185 * request, we just ignore it. 186 */ 187 if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) { 188 return 0; 189 } 190 191 if (fd_num) { 192 qemu_chr_fe_set_msgfds(chr, fds, fd_num); 193 } 194 195 return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ? 196 0 : -1; 197 } 198 199 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 200 struct vhost_log *log) 201 { 202 int fds[VHOST_MEMORY_MAX_NREGIONS]; 203 size_t fd_num = 0; 204 bool shmfd = virtio_has_feature(dev->protocol_features, 205 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 206 VhostUserMsg msg = { 207 .request = VHOST_USER_SET_LOG_BASE, 208 .flags = VHOST_USER_VERSION, 209 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 210 .payload.log.mmap_offset = 0, 211 .size = sizeof(msg.payload.log), 212 }; 213 214 if (shmfd && log->fd != -1) { 215 fds[fd_num++] = log->fd; 216 } 217 218 vhost_user_write(dev, &msg, fds, fd_num); 219 220 if (shmfd) { 221 msg.size = 0; 222 if (vhost_user_read(dev, &msg) < 0) { 223 return 0; 224 } 225 226 if (msg.request != VHOST_USER_SET_LOG_BASE) { 227 error_report("Received unexpected msg type. " 228 "Expected %d received %d", 229 VHOST_USER_SET_LOG_BASE, msg.request); 230 return -1; 231 } 232 } 233 234 return 0; 235 } 236 237 static int vhost_user_set_mem_table(struct vhost_dev *dev, 238 struct vhost_memory *mem) 239 { 240 int fds[VHOST_MEMORY_MAX_NREGIONS]; 241 int i, fd; 242 size_t fd_num = 0; 243 VhostUserMsg msg = { 244 .request = VHOST_USER_SET_MEM_TABLE, 245 .flags = VHOST_USER_VERSION, 246 }; 247 248 for (i = 0; i < dev->mem->nregions; ++i) { 249 struct vhost_memory_region *reg = dev->mem->regions + i; 250 ram_addr_t ram_addr; 251 252 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); 253 qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, 254 &ram_addr); 255 fd = qemu_get_ram_fd(ram_addr); 256 if (fd > 0) { 257 msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr; 258 msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; 259 msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; 260 msg.payload.memory.regions[fd_num].mmap_offset = reg->userspace_addr - 261 (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr); 262 assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); 263 fds[fd_num++] = fd; 264 } 265 } 266 267 msg.payload.memory.nregions = fd_num; 268 269 if (!fd_num) { 270 error_report("Failed initializing vhost-user memory map, " 271 "consider using -object memory-backend-file share=on"); 272 return -1; 273 } 274 275 msg.size = sizeof(msg.payload.memory.nregions); 276 msg.size += sizeof(msg.payload.memory.padding); 277 msg.size += fd_num * sizeof(VhostUserMemoryRegion); 278 279 vhost_user_write(dev, &msg, fds, fd_num); 280 281 return 0; 282 } 283 284 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 285 struct vhost_vring_addr *addr) 286 { 287 VhostUserMsg msg = { 288 .request = VHOST_USER_SET_VRING_ADDR, 289 .flags = VHOST_USER_VERSION, 290 .payload.addr = *addr, 291 .size = sizeof(msg.payload.addr), 292 }; 293 294 vhost_user_write(dev, &msg, NULL, 0); 295 296 return 0; 297 } 298 299 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 300 struct vhost_vring_state *ring) 301 { 302 error_report("vhost-user trying to send unhandled ioctl"); 303 return -1; 304 } 305 306 static int vhost_set_vring(struct vhost_dev *dev, 307 unsigned long int request, 308 struct vhost_vring_state *ring) 309 { 310 VhostUserMsg msg = { 311 .request = request, 312 .flags = VHOST_USER_VERSION, 313 .payload.state = *ring, 314 .size = sizeof(msg.payload.state), 315 }; 316 317 vhost_user_write(dev, &msg, NULL, 0); 318 319 return 0; 320 } 321 322 static int vhost_user_set_vring_num(struct vhost_dev *dev, 323 struct vhost_vring_state *ring) 324 { 325 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 326 } 327 328 static int vhost_user_set_vring_base(struct vhost_dev *dev, 329 struct vhost_vring_state *ring) 330 { 331 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 332 } 333 334 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 335 { 336 int i; 337 338 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 339 return -1; 340 } 341 342 for (i = 0; i < dev->nvqs; ++i) { 343 struct vhost_vring_state state = { 344 .index = dev->vq_index + i, 345 .num = enable, 346 }; 347 348 vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 349 } 350 351 return 0; 352 } 353 354 static int vhost_user_get_vring_base(struct vhost_dev *dev, 355 struct vhost_vring_state *ring) 356 { 357 VhostUserMsg msg = { 358 .request = VHOST_USER_GET_VRING_BASE, 359 .flags = VHOST_USER_VERSION, 360 .payload.state = *ring, 361 .size = sizeof(msg.payload.state), 362 }; 363 364 vhost_user_write(dev, &msg, NULL, 0); 365 366 if (vhost_user_read(dev, &msg) < 0) { 367 return 0; 368 } 369 370 if (msg.request != VHOST_USER_GET_VRING_BASE) { 371 error_report("Received unexpected msg type. Expected %d received %d", 372 VHOST_USER_GET_VRING_BASE, msg.request); 373 return -1; 374 } 375 376 if (msg.size != sizeof(msg.payload.state)) { 377 error_report("Received bad msg size."); 378 return -1; 379 } 380 381 *ring = msg.payload.state; 382 383 return 0; 384 } 385 386 static int vhost_set_vring_file(struct vhost_dev *dev, 387 VhostUserRequest request, 388 struct vhost_vring_file *file) 389 { 390 int fds[VHOST_MEMORY_MAX_NREGIONS]; 391 size_t fd_num = 0; 392 VhostUserMsg msg = { 393 .request = request, 394 .flags = VHOST_USER_VERSION, 395 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 396 .size = sizeof(msg.payload.u64), 397 }; 398 399 if (ioeventfd_enabled() && file->fd > 0) { 400 fds[fd_num++] = file->fd; 401 } else { 402 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 403 } 404 405 vhost_user_write(dev, &msg, fds, fd_num); 406 407 return 0; 408 } 409 410 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 411 struct vhost_vring_file *file) 412 { 413 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 414 } 415 416 static int vhost_user_set_vring_call(struct vhost_dev *dev, 417 struct vhost_vring_file *file) 418 { 419 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 420 } 421 422 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64) 423 { 424 VhostUserMsg msg = { 425 .request = request, 426 .flags = VHOST_USER_VERSION, 427 .payload.u64 = u64, 428 .size = sizeof(msg.payload.u64), 429 }; 430 431 vhost_user_write(dev, &msg, NULL, 0); 432 433 return 0; 434 } 435 436 static int vhost_user_set_features(struct vhost_dev *dev, 437 uint64_t features) 438 { 439 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features); 440 } 441 442 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 443 uint64_t features) 444 { 445 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features); 446 } 447 448 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 449 { 450 VhostUserMsg msg = { 451 .request = request, 452 .flags = VHOST_USER_VERSION, 453 }; 454 455 if (vhost_user_one_time_request(request) && dev->vq_index != 0) { 456 return 0; 457 } 458 459 vhost_user_write(dev, &msg, NULL, 0); 460 461 if (vhost_user_read(dev, &msg) < 0) { 462 return 0; 463 } 464 465 if (msg.request != request) { 466 error_report("Received unexpected msg type. Expected %d received %d", 467 request, msg.request); 468 return -1; 469 } 470 471 if (msg.size != sizeof(msg.payload.u64)) { 472 error_report("Received bad msg size."); 473 return -1; 474 } 475 476 *u64 = msg.payload.u64; 477 478 return 0; 479 } 480 481 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 482 { 483 return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features); 484 } 485 486 static int vhost_user_set_owner(struct vhost_dev *dev) 487 { 488 VhostUserMsg msg = { 489 .request = VHOST_USER_SET_OWNER, 490 .flags = VHOST_USER_VERSION, 491 }; 492 493 vhost_user_write(dev, &msg, NULL, 0); 494 495 return 0; 496 } 497 498 static int vhost_user_reset_device(struct vhost_dev *dev) 499 { 500 VhostUserMsg msg = { 501 .request = VHOST_USER_RESET_OWNER, 502 .flags = VHOST_USER_VERSION, 503 }; 504 505 vhost_user_write(dev, &msg, NULL, 0); 506 507 return 0; 508 } 509 510 static int vhost_user_init(struct vhost_dev *dev, void *opaque) 511 { 512 uint64_t features; 513 int err; 514 515 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 516 517 dev->opaque = opaque; 518 519 err = vhost_user_get_features(dev, &features); 520 if (err < 0) { 521 return err; 522 } 523 524 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 525 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 526 527 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 528 &features); 529 if (err < 0) { 530 return err; 531 } 532 533 dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK; 534 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 535 if (err < 0) { 536 return err; 537 } 538 539 /* query the max queues we support if backend supports Multiple Queue */ 540 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 541 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 542 &dev->max_queues); 543 if (err < 0) { 544 return err; 545 } 546 } 547 } 548 549 if (dev->migration_blocker == NULL && 550 !virtio_has_feature(dev->protocol_features, 551 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 552 error_setg(&dev->migration_blocker, 553 "Migration disabled: vhost-user backend lacks " 554 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 555 } 556 557 return 0; 558 } 559 560 static int vhost_user_cleanup(struct vhost_dev *dev) 561 { 562 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 563 564 dev->opaque = 0; 565 566 return 0; 567 } 568 569 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 570 { 571 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 572 573 return idx; 574 } 575 576 static int vhost_user_memslots_limit(struct vhost_dev *dev) 577 { 578 return VHOST_MEMORY_MAX_NREGIONS; 579 } 580 581 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 582 { 583 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 584 585 return virtio_has_feature(dev->protocol_features, 586 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 587 } 588 589 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 590 { 591 VhostUserMsg msg = { 0 }; 592 int err; 593 594 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 595 596 /* If guest supports GUEST_ANNOUNCE do nothing */ 597 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 598 return 0; 599 } 600 601 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 602 if (virtio_has_feature(dev->protocol_features, 603 VHOST_USER_PROTOCOL_F_RARP)) { 604 msg.request = VHOST_USER_SEND_RARP; 605 msg.flags = VHOST_USER_VERSION; 606 memcpy((char *)&msg.payload.u64, mac_addr, 6); 607 msg.size = sizeof(msg.payload.u64); 608 609 err = vhost_user_write(dev, &msg, NULL, 0); 610 return err; 611 } 612 return -1; 613 } 614 615 const VhostOps user_ops = { 616 .backend_type = VHOST_BACKEND_TYPE_USER, 617 .vhost_backend_init = vhost_user_init, 618 .vhost_backend_cleanup = vhost_user_cleanup, 619 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 620 .vhost_set_log_base = vhost_user_set_log_base, 621 .vhost_set_mem_table = vhost_user_set_mem_table, 622 .vhost_set_vring_addr = vhost_user_set_vring_addr, 623 .vhost_set_vring_endian = vhost_user_set_vring_endian, 624 .vhost_set_vring_num = vhost_user_set_vring_num, 625 .vhost_set_vring_base = vhost_user_set_vring_base, 626 .vhost_get_vring_base = vhost_user_get_vring_base, 627 .vhost_set_vring_kick = vhost_user_set_vring_kick, 628 .vhost_set_vring_call = vhost_user_set_vring_call, 629 .vhost_set_features = vhost_user_set_features, 630 .vhost_get_features = vhost_user_get_features, 631 .vhost_set_owner = vhost_user_set_owner, 632 .vhost_reset_device = vhost_user_reset_device, 633 .vhost_get_vq_index = vhost_user_get_vq_index, 634 .vhost_set_vring_enable = vhost_user_set_vring_enable, 635 .vhost_requires_shm_log = vhost_user_requires_shm_log, 636 .vhost_migration_done = vhost_user_migration_done, 637 }; 638