1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "hw/virtio/vhost.h" 12 #include "hw/virtio/vhost-backend.h" 13 #include "hw/virtio/virtio-net.h" 14 #include "sysemu/char.h" 15 #include "sysemu/kvm.h" 16 #include "qemu/error-report.h" 17 #include "qemu/sockets.h" 18 #include "exec/ram_addr.h" 19 #include "migration/migration.h" 20 21 #include <fcntl.h> 22 #include <unistd.h> 23 #include <sys/ioctl.h> 24 #include <sys/socket.h> 25 #include <sys/un.h> 26 #include <linux/vhost.h> 27 28 #define VHOST_MEMORY_MAX_NREGIONS 8 29 #define VHOST_USER_F_PROTOCOL_FEATURES 30 30 31 enum VhostUserProtocolFeature { 32 VHOST_USER_PROTOCOL_F_MQ = 0, 33 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 34 VHOST_USER_PROTOCOL_F_RARP = 2, 35 36 VHOST_USER_PROTOCOL_F_MAX 37 }; 38 39 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 40 41 typedef enum VhostUserRequest { 42 VHOST_USER_NONE = 0, 43 VHOST_USER_GET_FEATURES = 1, 44 VHOST_USER_SET_FEATURES = 2, 45 VHOST_USER_SET_OWNER = 3, 46 VHOST_USER_RESET_OWNER = 4, 47 VHOST_USER_SET_MEM_TABLE = 5, 48 VHOST_USER_SET_LOG_BASE = 6, 49 VHOST_USER_SET_LOG_FD = 7, 50 VHOST_USER_SET_VRING_NUM = 8, 51 VHOST_USER_SET_VRING_ADDR = 9, 52 VHOST_USER_SET_VRING_BASE = 10, 53 VHOST_USER_GET_VRING_BASE = 11, 54 VHOST_USER_SET_VRING_KICK = 12, 55 VHOST_USER_SET_VRING_CALL = 13, 56 VHOST_USER_SET_VRING_ERR = 14, 57 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 58 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 59 VHOST_USER_GET_QUEUE_NUM = 17, 60 VHOST_USER_SET_VRING_ENABLE = 18, 61 VHOST_USER_SEND_RARP = 19, 62 VHOST_USER_MAX 63 } VhostUserRequest; 64 65 typedef struct VhostUserMemoryRegion { 66 uint64_t guest_phys_addr; 67 uint64_t memory_size; 68 uint64_t userspace_addr; 69 uint64_t mmap_offset; 70 } VhostUserMemoryRegion; 71 72 typedef struct VhostUserMemory { 73 uint32_t nregions; 74 uint32_t padding; 75 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; 76 } VhostUserMemory; 77 78 typedef struct VhostUserLog { 79 uint64_t mmap_size; 80 uint64_t mmap_offset; 81 } VhostUserLog; 82 83 typedef struct VhostUserMsg { 84 VhostUserRequest request; 85 86 #define VHOST_USER_VERSION_MASK (0x3) 87 #define VHOST_USER_REPLY_MASK (0x1<<2) 88 uint32_t flags; 89 uint32_t size; /* the following payload size */ 90 union { 91 #define VHOST_USER_VRING_IDX_MASK (0xff) 92 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) 93 uint64_t u64; 94 struct vhost_vring_state state; 95 struct vhost_vring_addr addr; 96 VhostUserMemory memory; 97 VhostUserLog log; 98 } payload; 99 } QEMU_PACKED VhostUserMsg; 100 101 static VhostUserMsg m __attribute__ ((unused)); 102 #define VHOST_USER_HDR_SIZE (sizeof(m.request) \ 103 + sizeof(m.flags) \ 104 + sizeof(m.size)) 105 106 #define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE) 107 108 /* The version of the protocol we support */ 109 #define VHOST_USER_VERSION (0x1) 110 111 static bool ioeventfd_enabled(void) 112 { 113 return kvm_enabled() && kvm_eventfds_enabled(); 114 } 115 116 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 117 { 118 CharDriverState *chr = dev->opaque; 119 uint8_t *p = (uint8_t *) msg; 120 int r, size = VHOST_USER_HDR_SIZE; 121 122 r = qemu_chr_fe_read_all(chr, p, size); 123 if (r != size) { 124 error_report("Failed to read msg header. Read %d instead of %d.", r, 125 size); 126 goto fail; 127 } 128 129 /* validate received flags */ 130 if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 131 error_report("Failed to read msg header." 132 " Flags 0x%x instead of 0x%x.", msg->flags, 133 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 134 goto fail; 135 } 136 137 /* validate message size is sane */ 138 if (msg->size > VHOST_USER_PAYLOAD_SIZE) { 139 error_report("Failed to read msg header." 140 " Size %d exceeds the maximum %zu.", msg->size, 141 VHOST_USER_PAYLOAD_SIZE); 142 goto fail; 143 } 144 145 if (msg->size) { 146 p += VHOST_USER_HDR_SIZE; 147 size = msg->size; 148 r = qemu_chr_fe_read_all(chr, p, size); 149 if (r != size) { 150 error_report("Failed to read msg payload." 151 " Read %d instead of %d.", r, msg->size); 152 goto fail; 153 } 154 } 155 156 return 0; 157 158 fail: 159 return -1; 160 } 161 162 static bool vhost_user_one_time_request(VhostUserRequest request) 163 { 164 switch (request) { 165 case VHOST_USER_SET_OWNER: 166 case VHOST_USER_RESET_OWNER: 167 case VHOST_USER_SET_MEM_TABLE: 168 case VHOST_USER_GET_QUEUE_NUM: 169 return true; 170 default: 171 return false; 172 } 173 } 174 175 /* most non-init callers ignore the error */ 176 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 177 int *fds, int fd_num) 178 { 179 CharDriverState *chr = dev->opaque; 180 int size = VHOST_USER_HDR_SIZE + msg->size; 181 182 /* 183 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 184 * we just need send it once in the first time. For later such 185 * request, we just ignore it. 186 */ 187 if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) { 188 return 0; 189 } 190 191 if (fd_num) { 192 qemu_chr_fe_set_msgfds(chr, fds, fd_num); 193 } 194 195 return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ? 196 0 : -1; 197 } 198 199 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 200 struct vhost_log *log) 201 { 202 int fds[VHOST_MEMORY_MAX_NREGIONS]; 203 size_t fd_num = 0; 204 bool shmfd = virtio_has_feature(dev->protocol_features, 205 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 206 VhostUserMsg msg = { 207 .request = VHOST_USER_SET_LOG_BASE, 208 .flags = VHOST_USER_VERSION, 209 .payload.log.mmap_size = log->size, 210 .payload.log.mmap_offset = 0, 211 .size = sizeof(msg.payload.log), 212 }; 213 214 if (shmfd && log->fd != -1) { 215 fds[fd_num++] = log->fd; 216 } 217 218 vhost_user_write(dev, &msg, fds, fd_num); 219 220 if (shmfd) { 221 msg.size = 0; 222 if (vhost_user_read(dev, &msg) < 0) { 223 return 0; 224 } 225 226 if (msg.request != VHOST_USER_SET_LOG_BASE) { 227 error_report("Received unexpected msg type. " 228 "Expected %d received %d", 229 VHOST_USER_SET_LOG_BASE, msg.request); 230 return -1; 231 } 232 } 233 234 return 0; 235 } 236 237 static int vhost_user_set_mem_table(struct vhost_dev *dev, 238 struct vhost_memory *mem) 239 { 240 int fds[VHOST_MEMORY_MAX_NREGIONS]; 241 int i, fd; 242 size_t fd_num = 0; 243 VhostUserMsg msg = { 244 .request = VHOST_USER_SET_MEM_TABLE, 245 .flags = VHOST_USER_VERSION, 246 }; 247 248 for (i = 0; i < dev->mem->nregions; ++i) { 249 struct vhost_memory_region *reg = dev->mem->regions + i; 250 ram_addr_t ram_addr; 251 252 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); 253 qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, 254 &ram_addr); 255 fd = qemu_get_ram_fd(ram_addr); 256 if (fd > 0) { 257 msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr; 258 msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; 259 msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; 260 msg.payload.memory.regions[fd_num].mmap_offset = reg->userspace_addr - 261 (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr); 262 assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); 263 fds[fd_num++] = fd; 264 } 265 } 266 267 msg.payload.memory.nregions = fd_num; 268 269 if (!fd_num) { 270 error_report("Failed initializing vhost-user memory map, " 271 "consider using -object memory-backend-file share=on"); 272 return -1; 273 } 274 275 msg.size = sizeof(msg.payload.memory.nregions); 276 msg.size += sizeof(msg.payload.memory.padding); 277 msg.size += fd_num * sizeof(VhostUserMemoryRegion); 278 279 vhost_user_write(dev, &msg, fds, fd_num); 280 281 return 0; 282 } 283 284 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 285 struct vhost_vring_addr *addr) 286 { 287 VhostUserMsg msg = { 288 .request = VHOST_USER_SET_VRING_ADDR, 289 .flags = VHOST_USER_VERSION, 290 .payload.addr = *addr, 291 .size = sizeof(msg.payload.addr), 292 }; 293 294 vhost_user_write(dev, &msg, NULL, 0); 295 296 return 0; 297 } 298 299 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 300 struct vhost_vring_state *ring) 301 { 302 error_report("vhost-user trying to send unhandled ioctl"); 303 return -1; 304 } 305 306 static int vhost_set_vring(struct vhost_dev *dev, 307 unsigned long int request, 308 struct vhost_vring_state *ring) 309 { 310 VhostUserMsg msg = { 311 .request = request, 312 .flags = VHOST_USER_VERSION, 313 .payload.state = *ring, 314 .size = sizeof(msg.payload.state), 315 }; 316 317 vhost_user_write(dev, &msg, NULL, 0); 318 319 return 0; 320 } 321 322 static int vhost_user_set_vring_num(struct vhost_dev *dev, 323 struct vhost_vring_state *ring) 324 { 325 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 326 } 327 328 static int vhost_user_set_vring_base(struct vhost_dev *dev, 329 struct vhost_vring_state *ring) 330 { 331 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 332 } 333 334 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 335 { 336 struct vhost_vring_state state = { 337 .index = dev->vq_index, 338 .num = enable, 339 }; 340 341 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ))) { 342 return -1; 343 } 344 345 return vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 346 } 347 348 349 static int vhost_user_get_vring_base(struct vhost_dev *dev, 350 struct vhost_vring_state *ring) 351 { 352 VhostUserMsg msg = { 353 .request = VHOST_USER_GET_VRING_BASE, 354 .flags = VHOST_USER_VERSION, 355 .payload.state = *ring, 356 .size = sizeof(msg.payload.state), 357 }; 358 359 vhost_user_write(dev, &msg, NULL, 0); 360 361 if (vhost_user_read(dev, &msg) < 0) { 362 return 0; 363 } 364 365 if (msg.request != VHOST_USER_GET_VRING_BASE) { 366 error_report("Received unexpected msg type. Expected %d received %d", 367 VHOST_USER_GET_VRING_BASE, msg.request); 368 return -1; 369 } 370 371 if (msg.size != sizeof(msg.payload.state)) { 372 error_report("Received bad msg size."); 373 return -1; 374 } 375 376 *ring = msg.payload.state; 377 378 return 0; 379 } 380 381 static int vhost_set_vring_file(struct vhost_dev *dev, 382 VhostUserRequest request, 383 struct vhost_vring_file *file) 384 { 385 int fds[VHOST_MEMORY_MAX_NREGIONS]; 386 size_t fd_num = 0; 387 VhostUserMsg msg = { 388 .request = request, 389 .flags = VHOST_USER_VERSION, 390 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 391 .size = sizeof(msg.payload.u64), 392 }; 393 394 if (ioeventfd_enabled() && file->fd > 0) { 395 fds[fd_num++] = file->fd; 396 } else { 397 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 398 } 399 400 vhost_user_write(dev, &msg, fds, fd_num); 401 402 return 0; 403 } 404 405 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 406 struct vhost_vring_file *file) 407 { 408 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 409 } 410 411 static int vhost_user_set_vring_call(struct vhost_dev *dev, 412 struct vhost_vring_file *file) 413 { 414 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 415 } 416 417 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64) 418 { 419 VhostUserMsg msg = { 420 .request = request, 421 .flags = VHOST_USER_VERSION, 422 .payload.u64 = u64, 423 .size = sizeof(msg.payload.u64), 424 }; 425 426 vhost_user_write(dev, &msg, NULL, 0); 427 428 return 0; 429 } 430 431 static int vhost_user_set_features(struct vhost_dev *dev, 432 uint64_t features) 433 { 434 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features); 435 } 436 437 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 438 uint64_t features) 439 { 440 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features); 441 } 442 443 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 444 { 445 VhostUserMsg msg = { 446 .request = request, 447 .flags = VHOST_USER_VERSION, 448 }; 449 450 if (vhost_user_one_time_request(request) && dev->vq_index != 0) { 451 return 0; 452 } 453 454 vhost_user_write(dev, &msg, NULL, 0); 455 456 if (vhost_user_read(dev, &msg) < 0) { 457 return 0; 458 } 459 460 if (msg.request != request) { 461 error_report("Received unexpected msg type. Expected %d received %d", 462 request, msg.request); 463 return -1; 464 } 465 466 if (msg.size != sizeof(msg.payload.u64)) { 467 error_report("Received bad msg size."); 468 return -1; 469 } 470 471 *u64 = msg.payload.u64; 472 473 return 0; 474 } 475 476 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 477 { 478 return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features); 479 } 480 481 static int vhost_user_set_owner(struct vhost_dev *dev) 482 { 483 VhostUserMsg msg = { 484 .request = VHOST_USER_SET_OWNER, 485 .flags = VHOST_USER_VERSION, 486 }; 487 488 vhost_user_write(dev, &msg, NULL, 0); 489 490 return 0; 491 } 492 493 static int vhost_user_reset_device(struct vhost_dev *dev) 494 { 495 VhostUserMsg msg = { 496 .request = VHOST_USER_RESET_OWNER, 497 .flags = VHOST_USER_VERSION, 498 }; 499 500 vhost_user_write(dev, &msg, NULL, 0); 501 502 return 0; 503 } 504 505 static int vhost_user_init(struct vhost_dev *dev, void *opaque) 506 { 507 uint64_t features; 508 int err; 509 510 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 511 512 dev->opaque = opaque; 513 514 err = vhost_user_get_features(dev, &features); 515 if (err < 0) { 516 return err; 517 } 518 519 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 520 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 521 522 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 523 &features); 524 if (err < 0) { 525 return err; 526 } 527 528 dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK; 529 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 530 if (err < 0) { 531 return err; 532 } 533 534 /* query the max queues we support if backend supports Multiple Queue */ 535 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 536 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 537 &dev->max_queues); 538 if (err < 0) { 539 return err; 540 } 541 } 542 } 543 544 if (dev->migration_blocker == NULL && 545 !virtio_has_feature(dev->protocol_features, 546 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 547 error_setg(&dev->migration_blocker, 548 "Migration disabled: vhost-user backend lacks " 549 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 550 } 551 552 return 0; 553 } 554 555 static int vhost_user_cleanup(struct vhost_dev *dev) 556 { 557 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 558 559 dev->opaque = 0; 560 561 return 0; 562 } 563 564 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 565 { 566 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 567 568 return idx; 569 } 570 571 static int vhost_user_memslots_limit(struct vhost_dev *dev) 572 { 573 return VHOST_MEMORY_MAX_NREGIONS; 574 } 575 576 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 577 { 578 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 579 580 return virtio_has_feature(dev->protocol_features, 581 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 582 } 583 584 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 585 { 586 VhostUserMsg msg = { 0 }; 587 int err; 588 589 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 590 591 /* If guest supports GUEST_ANNOUNCE do nothing */ 592 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 593 return 0; 594 } 595 596 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 597 if (virtio_has_feature(dev->protocol_features, 598 VHOST_USER_PROTOCOL_F_RARP)) { 599 msg.request = VHOST_USER_SEND_RARP; 600 msg.flags = VHOST_USER_VERSION; 601 memcpy((char *)&msg.payload.u64, mac_addr, 6); 602 msg.size = sizeof(msg.payload.u64); 603 604 err = vhost_user_write(dev, &msg, NULL, 0); 605 return err; 606 } 607 return -1; 608 } 609 610 const VhostOps user_ops = { 611 .backend_type = VHOST_BACKEND_TYPE_USER, 612 .vhost_backend_init = vhost_user_init, 613 .vhost_backend_cleanup = vhost_user_cleanup, 614 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 615 .vhost_set_log_base = vhost_user_set_log_base, 616 .vhost_set_mem_table = vhost_user_set_mem_table, 617 .vhost_set_vring_addr = vhost_user_set_vring_addr, 618 .vhost_set_vring_endian = vhost_user_set_vring_endian, 619 .vhost_set_vring_num = vhost_user_set_vring_num, 620 .vhost_set_vring_base = vhost_user_set_vring_base, 621 .vhost_get_vring_base = vhost_user_get_vring_base, 622 .vhost_set_vring_kick = vhost_user_set_vring_kick, 623 .vhost_set_vring_call = vhost_user_set_vring_call, 624 .vhost_set_features = vhost_user_set_features, 625 .vhost_get_features = vhost_user_get_features, 626 .vhost_set_owner = vhost_user_set_owner, 627 .vhost_reset_device = vhost_user_reset_device, 628 .vhost_get_vq_index = vhost_user_get_vq_index, 629 .vhost_set_vring_enable = vhost_user_set_vring_enable, 630 .vhost_requires_shm_log = vhost_user_requires_shm_log, 631 .vhost_migration_done = vhost_user_migration_done, 632 }; 633