1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "hw/virtio/vhost.h" 12 #include "hw/virtio/vhost-backend.h" 13 #include "hw/virtio/virtio-net.h" 14 #include "sysemu/char.h" 15 #include "sysemu/kvm.h" 16 #include "qemu/error-report.h" 17 #include "qemu/sockets.h" 18 #include "exec/ram_addr.h" 19 #include "migration/migration.h" 20 21 #include <fcntl.h> 22 #include <unistd.h> 23 #include <sys/ioctl.h> 24 #include <sys/socket.h> 25 #include <sys/un.h> 26 #include <linux/vhost.h> 27 28 #define VHOST_MEMORY_MAX_NREGIONS 8 29 #define VHOST_USER_F_PROTOCOL_FEATURES 30 30 31 enum VhostUserProtocolFeature { 32 VHOST_USER_PROTOCOL_F_MQ = 0, 33 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 34 VHOST_USER_PROTOCOL_F_RARP = 2, 35 36 VHOST_USER_PROTOCOL_F_MAX 37 }; 38 39 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 40 41 typedef enum VhostUserRequest { 42 VHOST_USER_NONE = 0, 43 VHOST_USER_GET_FEATURES = 1, 44 VHOST_USER_SET_FEATURES = 2, 45 VHOST_USER_SET_OWNER = 3, 46 VHOST_USER_RESET_DEVICE = 4, 47 VHOST_USER_SET_MEM_TABLE = 5, 48 VHOST_USER_SET_LOG_BASE = 6, 49 VHOST_USER_SET_LOG_FD = 7, 50 VHOST_USER_SET_VRING_NUM = 8, 51 VHOST_USER_SET_VRING_ADDR = 9, 52 VHOST_USER_SET_VRING_BASE = 10, 53 VHOST_USER_GET_VRING_BASE = 11, 54 VHOST_USER_SET_VRING_KICK = 12, 55 VHOST_USER_SET_VRING_CALL = 13, 56 VHOST_USER_SET_VRING_ERR = 14, 57 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 58 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 59 VHOST_USER_GET_QUEUE_NUM = 17, 60 VHOST_USER_SET_VRING_ENABLE = 18, 61 VHOST_USER_SEND_RARP = 19, 62 VHOST_USER_MAX 63 } VhostUserRequest; 64 65 typedef struct VhostUserMemoryRegion { 66 uint64_t guest_phys_addr; 67 uint64_t memory_size; 68 uint64_t userspace_addr; 69 uint64_t mmap_offset; 70 } VhostUserMemoryRegion; 71 72 typedef struct VhostUserMemory { 73 uint32_t nregions; 74 uint32_t padding; 75 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; 76 } VhostUserMemory; 77 78 typedef struct VhostUserMsg { 79 VhostUserRequest request; 80 81 #define VHOST_USER_VERSION_MASK (0x3) 82 #define VHOST_USER_REPLY_MASK (0x1<<2) 83 uint32_t flags; 84 uint32_t size; /* the following payload size */ 85 union { 86 #define VHOST_USER_VRING_IDX_MASK (0xff) 87 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) 88 uint64_t u64; 89 struct vhost_vring_state state; 90 struct vhost_vring_addr addr; 91 VhostUserMemory memory; 92 } payload; 93 } QEMU_PACKED VhostUserMsg; 94 95 static VhostUserMsg m __attribute__ ((unused)); 96 #define VHOST_USER_HDR_SIZE (sizeof(m.request) \ 97 + sizeof(m.flags) \ 98 + sizeof(m.size)) 99 100 #define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE) 101 102 /* The version of the protocol we support */ 103 #define VHOST_USER_VERSION (0x1) 104 105 static bool ioeventfd_enabled(void) 106 { 107 return kvm_enabled() && kvm_eventfds_enabled(); 108 } 109 110 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 111 { 112 CharDriverState *chr = dev->opaque; 113 uint8_t *p = (uint8_t *) msg; 114 int r, size = VHOST_USER_HDR_SIZE; 115 116 r = qemu_chr_fe_read_all(chr, p, size); 117 if (r != size) { 118 error_report("Failed to read msg header. Read %d instead of %d.", r, 119 size); 120 goto fail; 121 } 122 123 /* validate received flags */ 124 if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 125 error_report("Failed to read msg header." 126 " Flags 0x%x instead of 0x%x.", msg->flags, 127 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 128 goto fail; 129 } 130 131 /* validate message size is sane */ 132 if (msg->size > VHOST_USER_PAYLOAD_SIZE) { 133 error_report("Failed to read msg header." 134 " Size %d exceeds the maximum %zu.", msg->size, 135 VHOST_USER_PAYLOAD_SIZE); 136 goto fail; 137 } 138 139 if (msg->size) { 140 p += VHOST_USER_HDR_SIZE; 141 size = msg->size; 142 r = qemu_chr_fe_read_all(chr, p, size); 143 if (r != size) { 144 error_report("Failed to read msg payload." 145 " Read %d instead of %d.", r, msg->size); 146 goto fail; 147 } 148 } 149 150 return 0; 151 152 fail: 153 return -1; 154 } 155 156 static bool vhost_user_one_time_request(VhostUserRequest request) 157 { 158 switch (request) { 159 case VHOST_USER_SET_OWNER: 160 case VHOST_USER_RESET_DEVICE: 161 case VHOST_USER_SET_MEM_TABLE: 162 case VHOST_USER_GET_QUEUE_NUM: 163 return true; 164 default: 165 return false; 166 } 167 } 168 169 /* most non-init callers ignore the error */ 170 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 171 int *fds, int fd_num) 172 { 173 CharDriverState *chr = dev->opaque; 174 int size = VHOST_USER_HDR_SIZE + msg->size; 175 176 /* 177 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 178 * we just need send it once in the first time. For later such 179 * request, we just ignore it. 180 */ 181 if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) { 182 return 0; 183 } 184 185 if (fd_num) { 186 qemu_chr_fe_set_msgfds(chr, fds, fd_num); 187 } 188 189 return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ? 190 0 : -1; 191 } 192 193 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 194 struct vhost_log *log) 195 { 196 int fds[VHOST_MEMORY_MAX_NREGIONS]; 197 size_t fd_num = 0; 198 bool shmfd = virtio_has_feature(dev->protocol_features, 199 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 200 VhostUserMsg msg = { 201 .request = VHOST_USER_SET_LOG_BASE, 202 .flags = VHOST_USER_VERSION, 203 .payload.u64 = base, 204 .size = sizeof(msg.payload.u64), 205 }; 206 207 if (shmfd && log->fd != -1) { 208 fds[fd_num++] = log->fd; 209 } 210 211 vhost_user_write(dev, &msg, fds, fd_num); 212 213 if (shmfd) { 214 msg.size = 0; 215 if (vhost_user_read(dev, &msg) < 0) { 216 return 0; 217 } 218 219 if (msg.request != VHOST_USER_SET_LOG_BASE) { 220 error_report("Received unexpected msg type. " 221 "Expected %d received %d", 222 VHOST_USER_SET_LOG_BASE, msg.request); 223 return -1; 224 } 225 } 226 227 return 0; 228 } 229 230 static int vhost_user_set_mem_table(struct vhost_dev *dev, 231 struct vhost_memory *mem) 232 { 233 int fds[VHOST_MEMORY_MAX_NREGIONS]; 234 int i, fd; 235 size_t fd_num = 0; 236 VhostUserMsg msg = { 237 .request = VHOST_USER_SET_MEM_TABLE, 238 .flags = VHOST_USER_VERSION, 239 }; 240 241 for (i = 0; i < dev->mem->nregions; ++i) { 242 struct vhost_memory_region *reg = dev->mem->regions + i; 243 ram_addr_t ram_addr; 244 245 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); 246 qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, 247 &ram_addr); 248 fd = qemu_get_ram_fd(ram_addr); 249 if (fd > 0) { 250 msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr; 251 msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; 252 msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; 253 msg.payload.memory.regions[fd_num].mmap_offset = reg->userspace_addr - 254 (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr); 255 assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); 256 fds[fd_num++] = fd; 257 } 258 } 259 260 msg.payload.memory.nregions = fd_num; 261 262 if (!fd_num) { 263 error_report("Failed initializing vhost-user memory map, " 264 "consider using -object memory-backend-file share=on"); 265 return -1; 266 } 267 268 msg.size = sizeof(msg.payload.memory.nregions); 269 msg.size += sizeof(msg.payload.memory.padding); 270 msg.size += fd_num * sizeof(VhostUserMemoryRegion); 271 272 vhost_user_write(dev, &msg, fds, fd_num); 273 274 return 0; 275 } 276 277 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 278 struct vhost_vring_addr *addr) 279 { 280 VhostUserMsg msg = { 281 .request = VHOST_USER_SET_VRING_ADDR, 282 .flags = VHOST_USER_VERSION, 283 .payload.addr = *addr, 284 .size = sizeof(msg.payload.addr), 285 }; 286 287 vhost_user_write(dev, &msg, NULL, 0); 288 289 return 0; 290 } 291 292 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 293 struct vhost_vring_state *ring) 294 { 295 error_report("vhost-user trying to send unhandled ioctl"); 296 return -1; 297 } 298 299 static int vhost_set_vring(struct vhost_dev *dev, 300 unsigned long int request, 301 struct vhost_vring_state *ring) 302 { 303 VhostUserMsg msg = { 304 .request = request, 305 .flags = VHOST_USER_VERSION, 306 .payload.state = *ring, 307 .size = sizeof(msg.payload.state), 308 }; 309 310 vhost_user_write(dev, &msg, NULL, 0); 311 312 return 0; 313 } 314 315 static int vhost_user_set_vring_num(struct vhost_dev *dev, 316 struct vhost_vring_state *ring) 317 { 318 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 319 } 320 321 static int vhost_user_set_vring_base(struct vhost_dev *dev, 322 struct vhost_vring_state *ring) 323 { 324 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 325 } 326 327 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 328 { 329 struct vhost_vring_state state = { 330 .index = dev->vq_index, 331 .num = enable, 332 }; 333 334 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ))) { 335 return -1; 336 } 337 338 return vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 339 } 340 341 342 static int vhost_user_get_vring_base(struct vhost_dev *dev, 343 struct vhost_vring_state *ring) 344 { 345 VhostUserMsg msg = { 346 .request = VHOST_USER_GET_VRING_BASE, 347 .flags = VHOST_USER_VERSION, 348 .payload.state = *ring, 349 .size = sizeof(msg.payload.state), 350 }; 351 352 vhost_user_write(dev, &msg, NULL, 0); 353 354 if (vhost_user_read(dev, &msg) < 0) { 355 return 0; 356 } 357 358 if (msg.request != VHOST_USER_GET_VRING_BASE) { 359 error_report("Received unexpected msg type. Expected %d received %d", 360 VHOST_USER_GET_VRING_BASE, msg.request); 361 return -1; 362 } 363 364 if (msg.size != sizeof(msg.payload.state)) { 365 error_report("Received bad msg size."); 366 return -1; 367 } 368 369 *ring = msg.payload.state; 370 371 return 0; 372 } 373 374 static int vhost_set_vring_file(struct vhost_dev *dev, 375 VhostUserRequest request, 376 struct vhost_vring_file *file) 377 { 378 int fds[VHOST_MEMORY_MAX_NREGIONS]; 379 size_t fd_num = 0; 380 VhostUserMsg msg = { 381 .request = request, 382 .flags = VHOST_USER_VERSION, 383 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 384 .size = sizeof(msg.payload.u64), 385 }; 386 387 if (ioeventfd_enabled() && file->fd > 0) { 388 fds[fd_num++] = file->fd; 389 } else { 390 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 391 } 392 393 vhost_user_write(dev, &msg, fds, fd_num); 394 395 return 0; 396 } 397 398 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 399 struct vhost_vring_file *file) 400 { 401 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 402 } 403 404 static int vhost_user_set_vring_call(struct vhost_dev *dev, 405 struct vhost_vring_file *file) 406 { 407 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 408 } 409 410 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64) 411 { 412 VhostUserMsg msg = { 413 .request = request, 414 .flags = VHOST_USER_VERSION, 415 .payload.u64 = u64, 416 .size = sizeof(msg.payload.u64), 417 }; 418 419 vhost_user_write(dev, &msg, NULL, 0); 420 421 return 0; 422 } 423 424 static int vhost_user_set_features(struct vhost_dev *dev, 425 uint64_t features) 426 { 427 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features); 428 } 429 430 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 431 uint64_t features) 432 { 433 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features); 434 } 435 436 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 437 { 438 VhostUserMsg msg = { 439 .request = request, 440 .flags = VHOST_USER_VERSION, 441 }; 442 443 if (vhost_user_one_time_request(request) && dev->vq_index != 0) { 444 return 0; 445 } 446 447 vhost_user_write(dev, &msg, NULL, 0); 448 449 if (vhost_user_read(dev, &msg) < 0) { 450 return 0; 451 } 452 453 if (msg.request != request) { 454 error_report("Received unexpected msg type. Expected %d received %d", 455 request, msg.request); 456 return -1; 457 } 458 459 if (msg.size != sizeof(msg.payload.u64)) { 460 error_report("Received bad msg size."); 461 return -1; 462 } 463 464 *u64 = msg.payload.u64; 465 466 return 0; 467 } 468 469 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 470 { 471 return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features); 472 } 473 474 static int vhost_user_set_owner(struct vhost_dev *dev) 475 { 476 VhostUserMsg msg = { 477 .request = VHOST_USER_SET_OWNER, 478 .flags = VHOST_USER_VERSION, 479 }; 480 481 vhost_user_write(dev, &msg, NULL, 0); 482 483 return 0; 484 } 485 486 static int vhost_user_reset_device(struct vhost_dev *dev) 487 { 488 VhostUserMsg msg = { 489 .request = VHOST_USER_RESET_DEVICE, 490 .flags = VHOST_USER_VERSION, 491 }; 492 493 vhost_user_write(dev, &msg, NULL, 0); 494 495 return 0; 496 } 497 498 static int vhost_user_init(struct vhost_dev *dev, void *opaque) 499 { 500 uint64_t features; 501 int err; 502 503 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 504 505 dev->opaque = opaque; 506 507 err = vhost_user_get_features(dev, &features); 508 if (err < 0) { 509 return err; 510 } 511 512 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 513 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 514 515 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 516 &features); 517 if (err < 0) { 518 return err; 519 } 520 521 dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK; 522 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 523 if (err < 0) { 524 return err; 525 } 526 527 /* query the max queues we support if backend supports Multiple Queue */ 528 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 529 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 530 &dev->max_queues); 531 if (err < 0) { 532 return err; 533 } 534 } 535 } 536 537 if (dev->migration_blocker == NULL && 538 !virtio_has_feature(dev->protocol_features, 539 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 540 error_setg(&dev->migration_blocker, 541 "Migration disabled: vhost-user backend lacks " 542 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 543 } 544 545 return 0; 546 } 547 548 static int vhost_user_cleanup(struct vhost_dev *dev) 549 { 550 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 551 552 dev->opaque = 0; 553 554 return 0; 555 } 556 557 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 558 { 559 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 560 561 return idx; 562 } 563 564 static int vhost_user_memslots_limit(struct vhost_dev *dev) 565 { 566 return VHOST_MEMORY_MAX_NREGIONS; 567 } 568 569 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 570 { 571 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 572 573 return virtio_has_feature(dev->protocol_features, 574 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 575 } 576 577 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 578 { 579 VhostUserMsg msg = { 0 }; 580 int err; 581 582 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 583 584 /* If guest supports GUEST_ANNOUNCE do nothing */ 585 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 586 return 0; 587 } 588 589 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 590 if (virtio_has_feature(dev->protocol_features, 591 VHOST_USER_PROTOCOL_F_RARP)) { 592 msg.request = VHOST_USER_SEND_RARP; 593 msg.flags = VHOST_USER_VERSION; 594 memcpy((char *)&msg.payload.u64, mac_addr, 6); 595 msg.size = sizeof(msg.payload.u64); 596 597 err = vhost_user_write(dev, &msg, NULL, 0); 598 return err; 599 } 600 return -1; 601 } 602 603 const VhostOps user_ops = { 604 .backend_type = VHOST_BACKEND_TYPE_USER, 605 .vhost_backend_init = vhost_user_init, 606 .vhost_backend_cleanup = vhost_user_cleanup, 607 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 608 .vhost_set_log_base = vhost_user_set_log_base, 609 .vhost_set_mem_table = vhost_user_set_mem_table, 610 .vhost_set_vring_addr = vhost_user_set_vring_addr, 611 .vhost_set_vring_endian = vhost_user_set_vring_endian, 612 .vhost_set_vring_num = vhost_user_set_vring_num, 613 .vhost_set_vring_base = vhost_user_set_vring_base, 614 .vhost_get_vring_base = vhost_user_get_vring_base, 615 .vhost_set_vring_kick = vhost_user_set_vring_kick, 616 .vhost_set_vring_call = vhost_user_set_vring_call, 617 .vhost_set_features = vhost_user_set_features, 618 .vhost_get_features = vhost_user_get_features, 619 .vhost_set_owner = vhost_user_set_owner, 620 .vhost_reset_device = vhost_user_reset_device, 621 .vhost_get_vq_index = vhost_user_get_vq_index, 622 .vhost_set_vring_enable = vhost_user_set_vring_enable, 623 .vhost_requires_shm_log = vhost_user_requires_shm_log, 624 .vhost_migration_done = vhost_user_migration_done, 625 }; 626