1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/vhost.h" 14 #include "hw/virtio/vhost-user.h" 15 #include "hw/virtio/vhost-backend.h" 16 #include "hw/virtio/virtio.h" 17 #include "hw/virtio/virtio-net.h" 18 #include "chardev/char-fe.h" 19 #include "sysemu/kvm.h" 20 #include "qemu/error-report.h" 21 #include "qemu/main-loop.h" 22 #include "qemu/sockets.h" 23 #include "sysemu/cryptodev.h" 24 #include "migration/migration.h" 25 #include "migration/postcopy-ram.h" 26 #include "trace.h" 27 28 #include <sys/ioctl.h> 29 #include <sys/socket.h> 30 #include <sys/un.h> 31 32 #include "standard-headers/linux/vhost_types.h" 33 34 #ifdef CONFIG_LINUX 35 #include <linux/userfaultfd.h> 36 #endif 37 38 #define VHOST_MEMORY_BASELINE_NREGIONS 8 39 #define VHOST_USER_F_PROTOCOL_FEATURES 30 40 #define VHOST_USER_SLAVE_MAX_FDS 8 41 42 /* 43 * Set maximum number of RAM slots supported to 44 * the maximum number supported by the target 45 * hardware plaform. 46 */ 47 #if defined(TARGET_X86) || defined(TARGET_X86_64) || \ 48 defined(TARGET_ARM) || defined(TARGET_ARM_64) 49 #include "hw/acpi/acpi.h" 50 #define VHOST_USER_MAX_RAM_SLOTS ACPI_MAX_RAM_SLOTS 51 52 #elif defined(TARGET_PPC) || defined(TARGET_PPC_64) 53 #include "hw/ppc/spapr.h" 54 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS 55 56 #else 57 #define VHOST_USER_MAX_RAM_SLOTS 512 58 #endif 59 60 /* 61 * Maximum size of virtio device config space 62 */ 63 #define VHOST_USER_MAX_CONFIG_SIZE 256 64 65 enum VhostUserProtocolFeature { 66 VHOST_USER_PROTOCOL_F_MQ = 0, 67 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 68 VHOST_USER_PROTOCOL_F_RARP = 2, 69 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, 70 VHOST_USER_PROTOCOL_F_NET_MTU = 4, 71 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, 72 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, 73 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, 74 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, 75 VHOST_USER_PROTOCOL_F_CONFIG = 9, 76 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, 77 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, 78 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, 79 VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, 80 /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */ 81 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, 82 VHOST_USER_PROTOCOL_F_MAX 83 }; 84 85 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 86 87 typedef enum VhostUserRequest { 88 VHOST_USER_NONE = 0, 89 VHOST_USER_GET_FEATURES = 1, 90 VHOST_USER_SET_FEATURES = 2, 91 VHOST_USER_SET_OWNER = 3, 92 VHOST_USER_RESET_OWNER = 4, 93 VHOST_USER_SET_MEM_TABLE = 5, 94 VHOST_USER_SET_LOG_BASE = 6, 95 VHOST_USER_SET_LOG_FD = 7, 96 VHOST_USER_SET_VRING_NUM = 8, 97 VHOST_USER_SET_VRING_ADDR = 9, 98 VHOST_USER_SET_VRING_BASE = 10, 99 VHOST_USER_GET_VRING_BASE = 11, 100 VHOST_USER_SET_VRING_KICK = 12, 101 VHOST_USER_SET_VRING_CALL = 13, 102 VHOST_USER_SET_VRING_ERR = 14, 103 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 104 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 105 VHOST_USER_GET_QUEUE_NUM = 17, 106 VHOST_USER_SET_VRING_ENABLE = 18, 107 VHOST_USER_SEND_RARP = 19, 108 VHOST_USER_NET_SET_MTU = 20, 109 VHOST_USER_SET_SLAVE_REQ_FD = 21, 110 VHOST_USER_IOTLB_MSG = 22, 111 VHOST_USER_SET_VRING_ENDIAN = 23, 112 VHOST_USER_GET_CONFIG = 24, 113 VHOST_USER_SET_CONFIG = 25, 114 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 115 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 116 VHOST_USER_POSTCOPY_ADVISE = 28, 117 VHOST_USER_POSTCOPY_LISTEN = 29, 118 VHOST_USER_POSTCOPY_END = 30, 119 VHOST_USER_GET_INFLIGHT_FD = 31, 120 VHOST_USER_SET_INFLIGHT_FD = 32, 121 VHOST_USER_GPU_SET_SOCKET = 33, 122 VHOST_USER_RESET_DEVICE = 34, 123 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */ 124 VHOST_USER_GET_MAX_MEM_SLOTS = 36, 125 VHOST_USER_ADD_MEM_REG = 37, 126 VHOST_USER_REM_MEM_REG = 38, 127 VHOST_USER_MAX 128 } VhostUserRequest; 129 130 typedef enum VhostUserSlaveRequest { 131 VHOST_USER_SLAVE_NONE = 0, 132 VHOST_USER_SLAVE_IOTLB_MSG = 1, 133 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, 134 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, 135 VHOST_USER_SLAVE_MAX 136 } VhostUserSlaveRequest; 137 138 typedef struct VhostUserMemoryRegion { 139 uint64_t guest_phys_addr; 140 uint64_t memory_size; 141 uint64_t userspace_addr; 142 uint64_t mmap_offset; 143 } VhostUserMemoryRegion; 144 145 typedef struct VhostUserMemory { 146 uint32_t nregions; 147 uint32_t padding; 148 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; 149 } VhostUserMemory; 150 151 typedef struct VhostUserMemRegMsg { 152 uint64_t padding; 153 VhostUserMemoryRegion region; 154 } VhostUserMemRegMsg; 155 156 typedef struct VhostUserLog { 157 uint64_t mmap_size; 158 uint64_t mmap_offset; 159 } VhostUserLog; 160 161 typedef struct VhostUserConfig { 162 uint32_t offset; 163 uint32_t size; 164 uint32_t flags; 165 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 166 } VhostUserConfig; 167 168 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 169 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 170 171 typedef struct VhostUserCryptoSession { 172 /* session id for success, -1 on errors */ 173 int64_t session_id; 174 CryptoDevBackendSymSessionInfo session_setup_data; 175 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 176 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 177 } VhostUserCryptoSession; 178 179 static VhostUserConfig c __attribute__ ((unused)); 180 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 181 + sizeof(c.size) \ 182 + sizeof(c.flags)) 183 184 typedef struct VhostUserVringArea { 185 uint64_t u64; 186 uint64_t size; 187 uint64_t offset; 188 } VhostUserVringArea; 189 190 typedef struct VhostUserInflight { 191 uint64_t mmap_size; 192 uint64_t mmap_offset; 193 uint16_t num_queues; 194 uint16_t queue_size; 195 } VhostUserInflight; 196 197 typedef struct { 198 VhostUserRequest request; 199 200 #define VHOST_USER_VERSION_MASK (0x3) 201 #define VHOST_USER_REPLY_MASK (0x1<<2) 202 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 203 uint32_t flags; 204 uint32_t size; /* the following payload size */ 205 } QEMU_PACKED VhostUserHeader; 206 207 typedef union { 208 #define VHOST_USER_VRING_IDX_MASK (0xff) 209 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) 210 uint64_t u64; 211 struct vhost_vring_state state; 212 struct vhost_vring_addr addr; 213 VhostUserMemory memory; 214 VhostUserMemRegMsg mem_reg; 215 VhostUserLog log; 216 struct vhost_iotlb_msg iotlb; 217 VhostUserConfig config; 218 VhostUserCryptoSession session; 219 VhostUserVringArea area; 220 VhostUserInflight inflight; 221 } VhostUserPayload; 222 223 typedef struct VhostUserMsg { 224 VhostUserHeader hdr; 225 VhostUserPayload payload; 226 } QEMU_PACKED VhostUserMsg; 227 228 static VhostUserMsg m __attribute__ ((unused)); 229 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 230 231 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 232 233 /* The version of the protocol we support */ 234 #define VHOST_USER_VERSION (0x1) 235 236 struct vhost_user { 237 struct vhost_dev *dev; 238 /* Shared between vhost devs of the same virtio device */ 239 VhostUserState *user; 240 int slave_fd; 241 NotifierWithReturn postcopy_notifier; 242 struct PostCopyFD postcopy_fd; 243 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; 244 /* Length of the region_rb and region_rb_offset arrays */ 245 size_t region_rb_len; 246 /* RAMBlock associated with a given region */ 247 RAMBlock **region_rb; 248 /* The offset from the start of the RAMBlock to the start of the 249 * vhost region. 250 */ 251 ram_addr_t *region_rb_offset; 252 253 /* True once we've entered postcopy_listen */ 254 bool postcopy_listen; 255 256 /* Our current regions */ 257 int num_shadow_regions; 258 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS]; 259 }; 260 261 struct scrub_regions { 262 struct vhost_memory_region *region; 263 int reg_idx; 264 int fd_idx; 265 }; 266 267 static bool ioeventfd_enabled(void) 268 { 269 return !kvm_enabled() || kvm_eventfds_enabled(); 270 } 271 272 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) 273 { 274 struct vhost_user *u = dev->opaque; 275 CharBackend *chr = u->user->chr; 276 uint8_t *p = (uint8_t *) msg; 277 int r, size = VHOST_USER_HDR_SIZE; 278 279 r = qemu_chr_fe_read_all(chr, p, size); 280 if (r != size) { 281 error_report("Failed to read msg header. Read %d instead of %d." 282 " Original request %d.", r, size, msg->hdr.request); 283 return -1; 284 } 285 286 /* validate received flags */ 287 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 288 error_report("Failed to read msg header." 289 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 290 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 291 return -1; 292 } 293 294 return 0; 295 } 296 297 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 298 { 299 struct vhost_user *u = dev->opaque; 300 CharBackend *chr = u->user->chr; 301 uint8_t *p = (uint8_t *) msg; 302 int r, size; 303 304 if (vhost_user_read_header(dev, msg) < 0) { 305 return -1; 306 } 307 308 /* validate message size is sane */ 309 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 310 error_report("Failed to read msg header." 311 " Size %d exceeds the maximum %zu.", msg->hdr.size, 312 VHOST_USER_PAYLOAD_SIZE); 313 return -1; 314 } 315 316 if (msg->hdr.size) { 317 p += VHOST_USER_HDR_SIZE; 318 size = msg->hdr.size; 319 r = qemu_chr_fe_read_all(chr, p, size); 320 if (r != size) { 321 error_report("Failed to read msg payload." 322 " Read %d instead of %d.", r, msg->hdr.size); 323 return -1; 324 } 325 } 326 327 return 0; 328 } 329 330 static int process_message_reply(struct vhost_dev *dev, 331 const VhostUserMsg *msg) 332 { 333 VhostUserMsg msg_reply; 334 335 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 336 return 0; 337 } 338 339 if (vhost_user_read(dev, &msg_reply) < 0) { 340 return -1; 341 } 342 343 if (msg_reply.hdr.request != msg->hdr.request) { 344 error_report("Received unexpected msg type." 345 "Expected %d received %d", 346 msg->hdr.request, msg_reply.hdr.request); 347 return -1; 348 } 349 350 return msg_reply.payload.u64 ? -1 : 0; 351 } 352 353 static bool vhost_user_one_time_request(VhostUserRequest request) 354 { 355 switch (request) { 356 case VHOST_USER_SET_OWNER: 357 case VHOST_USER_RESET_OWNER: 358 case VHOST_USER_SET_MEM_TABLE: 359 case VHOST_USER_GET_QUEUE_NUM: 360 case VHOST_USER_NET_SET_MTU: 361 return true; 362 default: 363 return false; 364 } 365 } 366 367 /* most non-init callers ignore the error */ 368 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 369 int *fds, int fd_num) 370 { 371 struct vhost_user *u = dev->opaque; 372 CharBackend *chr = u->user->chr; 373 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 374 375 /* 376 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 377 * we just need send it once in the first time. For later such 378 * request, we just ignore it. 379 */ 380 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) { 381 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 382 return 0; 383 } 384 385 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 386 error_report("Failed to set msg fds."); 387 return -1; 388 } 389 390 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 391 if (ret != size) { 392 error_report("Failed to write msg." 393 " Wrote %d instead of %d.", ret, size); 394 return -1; 395 } 396 397 return 0; 398 } 399 400 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd) 401 { 402 VhostUserMsg msg = { 403 .hdr.request = VHOST_USER_GPU_SET_SOCKET, 404 .hdr.flags = VHOST_USER_VERSION, 405 }; 406 407 return vhost_user_write(dev, &msg, &fd, 1); 408 } 409 410 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 411 struct vhost_log *log) 412 { 413 int fds[VHOST_USER_MAX_RAM_SLOTS]; 414 size_t fd_num = 0; 415 bool shmfd = virtio_has_feature(dev->protocol_features, 416 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 417 VhostUserMsg msg = { 418 .hdr.request = VHOST_USER_SET_LOG_BASE, 419 .hdr.flags = VHOST_USER_VERSION, 420 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 421 .payload.log.mmap_offset = 0, 422 .hdr.size = sizeof(msg.payload.log), 423 }; 424 425 if (shmfd && log->fd != -1) { 426 fds[fd_num++] = log->fd; 427 } 428 429 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 430 return -1; 431 } 432 433 if (shmfd) { 434 msg.hdr.size = 0; 435 if (vhost_user_read(dev, &msg) < 0) { 436 return -1; 437 } 438 439 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 440 error_report("Received unexpected msg type. " 441 "Expected %d received %d", 442 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 443 return -1; 444 } 445 } 446 447 return 0; 448 } 449 450 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset, 451 int *fd) 452 { 453 MemoryRegion *mr; 454 455 assert((uintptr_t)addr == addr); 456 mr = memory_region_from_host((void *)(uintptr_t)addr, offset); 457 *fd = memory_region_get_fd(mr); 458 459 return mr; 460 } 461 462 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst, 463 struct vhost_memory_region *src, 464 uint64_t mmap_offset) 465 { 466 assert(src != NULL && dst != NULL); 467 dst->userspace_addr = src->userspace_addr; 468 dst->memory_size = src->memory_size; 469 dst->guest_phys_addr = src->guest_phys_addr; 470 dst->mmap_offset = mmap_offset; 471 } 472 473 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, 474 struct vhost_dev *dev, 475 VhostUserMsg *msg, 476 int *fds, size_t *fd_num, 477 bool track_ramblocks) 478 { 479 int i, fd; 480 ram_addr_t offset; 481 MemoryRegion *mr; 482 struct vhost_memory_region *reg; 483 VhostUserMemoryRegion region_buffer; 484 485 msg->hdr.request = VHOST_USER_SET_MEM_TABLE; 486 487 for (i = 0; i < dev->mem->nregions; ++i) { 488 reg = dev->mem->regions + i; 489 490 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 491 if (fd > 0) { 492 if (track_ramblocks) { 493 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS); 494 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name, 495 reg->memory_size, 496 reg->guest_phys_addr, 497 reg->userspace_addr, 498 offset); 499 u->region_rb_offset[i] = offset; 500 u->region_rb[i] = mr->ram_block; 501 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) { 502 error_report("Failed preparing vhost-user memory table msg"); 503 return -1; 504 } 505 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 506 msg->payload.memory.regions[*fd_num] = region_buffer; 507 fds[(*fd_num)++] = fd; 508 } else if (track_ramblocks) { 509 u->region_rb_offset[i] = 0; 510 u->region_rb[i] = NULL; 511 } 512 } 513 514 msg->payload.memory.nregions = *fd_num; 515 516 if (!*fd_num) { 517 error_report("Failed initializing vhost-user memory map, " 518 "consider using -object memory-backend-file share=on"); 519 return -1; 520 } 521 522 msg->hdr.size = sizeof(msg->payload.memory.nregions); 523 msg->hdr.size += sizeof(msg->payload.memory.padding); 524 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion); 525 526 return 1; 527 } 528 529 static inline bool reg_equal(struct vhost_memory_region *shadow_reg, 530 struct vhost_memory_region *vdev_reg) 531 { 532 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr && 533 shadow_reg->userspace_addr == vdev_reg->userspace_addr && 534 shadow_reg->memory_size == vdev_reg->memory_size; 535 } 536 537 static void scrub_shadow_regions(struct vhost_dev *dev, 538 struct scrub_regions *add_reg, 539 int *nr_add_reg, 540 struct scrub_regions *rem_reg, 541 int *nr_rem_reg, uint64_t *shadow_pcb, 542 bool track_ramblocks) 543 { 544 struct vhost_user *u = dev->opaque; 545 bool found[VHOST_USER_MAX_RAM_SLOTS] = {}; 546 struct vhost_memory_region *reg, *shadow_reg; 547 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0; 548 ram_addr_t offset; 549 MemoryRegion *mr; 550 bool matching; 551 552 /* 553 * Find memory regions present in our shadow state which are not in 554 * the device's current memory state. 555 * 556 * Mark regions in both the shadow and device state as "found". 557 */ 558 for (i = 0; i < u->num_shadow_regions; i++) { 559 shadow_reg = &u->shadow_regions[i]; 560 matching = false; 561 562 for (j = 0; j < dev->mem->nregions; j++) { 563 reg = &dev->mem->regions[j]; 564 565 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 566 567 if (reg_equal(shadow_reg, reg)) { 568 matching = true; 569 found[j] = true; 570 if (track_ramblocks) { 571 /* 572 * Reset postcopy client bases, region_rb, and 573 * region_rb_offset in case regions are removed. 574 */ 575 if (fd > 0) { 576 u->region_rb_offset[j] = offset; 577 u->region_rb[j] = mr->ram_block; 578 shadow_pcb[j] = u->postcopy_client_bases[i]; 579 } else { 580 u->region_rb_offset[j] = 0; 581 u->region_rb[j] = NULL; 582 } 583 } 584 break; 585 } 586 } 587 588 /* 589 * If the region was not found in the current device memory state 590 * create an entry for it in the removed list. 591 */ 592 if (!matching) { 593 rem_reg[rm_idx].region = shadow_reg; 594 rem_reg[rm_idx++].reg_idx = i; 595 } 596 } 597 598 /* 599 * For regions not marked "found", create entries in the added list. 600 * 601 * Note their indexes in the device memory state and the indexes of their 602 * file descriptors. 603 */ 604 for (i = 0; i < dev->mem->nregions; i++) { 605 reg = &dev->mem->regions[i]; 606 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 607 if (fd > 0) { 608 ++fd_num; 609 } 610 611 /* 612 * If the region was in both the shadow and device state we don't 613 * need to send a VHOST_USER_ADD_MEM_REG message for it. 614 */ 615 if (found[i]) { 616 continue; 617 } 618 619 add_reg[add_idx].region = reg; 620 add_reg[add_idx].reg_idx = i; 621 add_reg[add_idx++].fd_idx = fd_num; 622 } 623 *nr_rem_reg = rm_idx; 624 *nr_add_reg = add_idx; 625 626 return; 627 } 628 629 static int send_remove_regions(struct vhost_dev *dev, 630 struct scrub_regions *remove_reg, 631 int nr_rem_reg, VhostUserMsg *msg, 632 bool reply_supported) 633 { 634 struct vhost_user *u = dev->opaque; 635 struct vhost_memory_region *shadow_reg; 636 int i, fd, shadow_reg_idx, ret; 637 ram_addr_t offset; 638 VhostUserMemoryRegion region_buffer; 639 640 /* 641 * The regions in remove_reg appear in the same order they do in the 642 * shadow table. Therefore we can minimize memory copies by iterating 643 * through remove_reg backwards. 644 */ 645 for (i = nr_rem_reg - 1; i >= 0; i--) { 646 shadow_reg = remove_reg[i].region; 647 shadow_reg_idx = remove_reg[i].reg_idx; 648 649 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd); 650 651 if (fd > 0) { 652 msg->hdr.request = VHOST_USER_REM_MEM_REG; 653 vhost_user_fill_msg_region(®ion_buffer, shadow_reg, 0); 654 msg->payload.mem_reg.region = region_buffer; 655 656 if (vhost_user_write(dev, msg, &fd, 1) < 0) { 657 return -1; 658 } 659 660 if (reply_supported) { 661 ret = process_message_reply(dev, msg); 662 if (ret) { 663 return ret; 664 } 665 } 666 } 667 668 /* 669 * At this point we know the backend has unmapped the region. It is now 670 * safe to remove it from the shadow table. 671 */ 672 memmove(&u->shadow_regions[shadow_reg_idx], 673 &u->shadow_regions[shadow_reg_idx + 1], 674 sizeof(struct vhost_memory_region) * 675 (u->num_shadow_regions - shadow_reg_idx - 1)); 676 u->num_shadow_regions--; 677 } 678 679 return 0; 680 } 681 682 static int send_add_regions(struct vhost_dev *dev, 683 struct scrub_regions *add_reg, int nr_add_reg, 684 VhostUserMsg *msg, uint64_t *shadow_pcb, 685 bool reply_supported, bool track_ramblocks) 686 { 687 struct vhost_user *u = dev->opaque; 688 int i, fd, ret, reg_idx, reg_fd_idx; 689 struct vhost_memory_region *reg; 690 MemoryRegion *mr; 691 ram_addr_t offset; 692 VhostUserMsg msg_reply; 693 VhostUserMemoryRegion region_buffer; 694 695 for (i = 0; i < nr_add_reg; i++) { 696 reg = add_reg[i].region; 697 reg_idx = add_reg[i].reg_idx; 698 reg_fd_idx = add_reg[i].fd_idx; 699 700 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 701 702 if (fd > 0) { 703 if (track_ramblocks) { 704 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name, 705 reg->memory_size, 706 reg->guest_phys_addr, 707 reg->userspace_addr, 708 offset); 709 u->region_rb_offset[reg_idx] = offset; 710 u->region_rb[reg_idx] = mr->ram_block; 711 } 712 msg->hdr.request = VHOST_USER_ADD_MEM_REG; 713 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 714 msg->payload.mem_reg.region = region_buffer; 715 716 if (vhost_user_write(dev, msg, &fd, 1) < 0) { 717 return -1; 718 } 719 720 if (track_ramblocks) { 721 uint64_t reply_gpa; 722 723 if (vhost_user_read(dev, &msg_reply) < 0) { 724 return -1; 725 } 726 727 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr; 728 729 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) { 730 error_report("%s: Received unexpected msg type." 731 "Expected %d received %d", __func__, 732 VHOST_USER_ADD_MEM_REG, 733 msg_reply.hdr.request); 734 return -1; 735 } 736 737 /* 738 * We're using the same structure, just reusing one of the 739 * fields, so it should be the same size. 740 */ 741 if (msg_reply.hdr.size != msg->hdr.size) { 742 error_report("%s: Unexpected size for postcopy reply " 743 "%d vs %d", __func__, msg_reply.hdr.size, 744 msg->hdr.size); 745 return -1; 746 } 747 748 /* Get the postcopy client base from the backend's reply. */ 749 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) { 750 shadow_pcb[reg_idx] = 751 msg_reply.payload.mem_reg.region.userspace_addr; 752 trace_vhost_user_set_mem_table_postcopy( 753 msg_reply.payload.mem_reg.region.userspace_addr, 754 msg->payload.mem_reg.region.userspace_addr, 755 reg_fd_idx, reg_idx); 756 } else { 757 error_report("%s: invalid postcopy reply for region. " 758 "Got guest physical address %" PRIX64 ", expected " 759 "%" PRIX64, __func__, reply_gpa, 760 dev->mem->regions[reg_idx].guest_phys_addr); 761 return -1; 762 } 763 } else if (reply_supported) { 764 ret = process_message_reply(dev, msg); 765 if (ret) { 766 return ret; 767 } 768 } 769 } else if (track_ramblocks) { 770 u->region_rb_offset[reg_idx] = 0; 771 u->region_rb[reg_idx] = NULL; 772 } 773 774 /* 775 * At this point, we know the backend has mapped in the new 776 * region, if the region has a valid file descriptor. 777 * 778 * The region should now be added to the shadow table. 779 */ 780 u->shadow_regions[u->num_shadow_regions].guest_phys_addr = 781 reg->guest_phys_addr; 782 u->shadow_regions[u->num_shadow_regions].userspace_addr = 783 reg->userspace_addr; 784 u->shadow_regions[u->num_shadow_regions].memory_size = 785 reg->memory_size; 786 u->num_shadow_regions++; 787 } 788 789 return 0; 790 } 791 792 static int vhost_user_add_remove_regions(struct vhost_dev *dev, 793 VhostUserMsg *msg, 794 bool reply_supported, 795 bool track_ramblocks) 796 { 797 struct vhost_user *u = dev->opaque; 798 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS]; 799 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS]; 800 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {}; 801 int nr_add_reg, nr_rem_reg; 802 803 msg->hdr.size = sizeof(msg->payload.mem_reg); 804 805 /* Find the regions which need to be removed or added. */ 806 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg, 807 shadow_pcb, track_ramblocks); 808 809 if (nr_rem_reg && send_remove_regions(dev, rem_reg, nr_rem_reg, msg, 810 reply_supported) < 0) 811 { 812 goto err; 813 } 814 815 if (nr_add_reg && send_add_regions(dev, add_reg, nr_add_reg, msg, 816 shadow_pcb, reply_supported, track_ramblocks) < 0) 817 { 818 goto err; 819 } 820 821 if (track_ramblocks) { 822 memcpy(u->postcopy_client_bases, shadow_pcb, 823 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 824 /* 825 * Now we've registered this with the postcopy code, we ack to the 826 * client, because now we're in the position to be able to deal with 827 * any faults it generates. 828 */ 829 /* TODO: Use this for failure cases as well with a bad value. */ 830 msg->hdr.size = sizeof(msg->payload.u64); 831 msg->payload.u64 = 0; /* OK */ 832 833 if (vhost_user_write(dev, msg, NULL, 0) < 0) { 834 return -1; 835 } 836 } 837 838 return 0; 839 840 err: 841 if (track_ramblocks) { 842 memcpy(u->postcopy_client_bases, shadow_pcb, 843 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 844 } 845 846 return -1; 847 } 848 849 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 850 struct vhost_memory *mem, 851 bool reply_supported, 852 bool config_mem_slots) 853 { 854 struct vhost_user *u = dev->opaque; 855 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 856 size_t fd_num = 0; 857 VhostUserMsg msg_reply; 858 int region_i, msg_i; 859 860 VhostUserMsg msg = { 861 .hdr.flags = VHOST_USER_VERSION, 862 }; 863 864 if (u->region_rb_len < dev->mem->nregions) { 865 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 866 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 867 dev->mem->nregions); 868 memset(&(u->region_rb[u->region_rb_len]), '\0', 869 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 870 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 871 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 872 u->region_rb_len = dev->mem->nregions; 873 } 874 875 if (config_mem_slots) { 876 if (vhost_user_add_remove_regions(dev, &msg, reply_supported, 877 true) < 0) { 878 return -1; 879 } 880 } else { 881 if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 882 true) < 0) { 883 return -1; 884 } 885 886 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 887 return -1; 888 } 889 890 if (vhost_user_read(dev, &msg_reply) < 0) { 891 return -1; 892 } 893 894 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 895 error_report("%s: Received unexpected msg type." 896 "Expected %d received %d", __func__, 897 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 898 return -1; 899 } 900 901 /* 902 * We're using the same structure, just reusing one of the 903 * fields, so it should be the same size. 904 */ 905 if (msg_reply.hdr.size != msg.hdr.size) { 906 error_report("%s: Unexpected size for postcopy reply " 907 "%d vs %d", __func__, msg_reply.hdr.size, 908 msg.hdr.size); 909 return -1; 910 } 911 912 memset(u->postcopy_client_bases, 0, 913 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 914 915 /* 916 * They're in the same order as the regions that were sent 917 * but some of the regions were skipped (above) if they 918 * didn't have fd's 919 */ 920 for (msg_i = 0, region_i = 0; 921 region_i < dev->mem->nregions; 922 region_i++) { 923 if (msg_i < fd_num && 924 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 925 dev->mem->regions[region_i].guest_phys_addr) { 926 u->postcopy_client_bases[region_i] = 927 msg_reply.payload.memory.regions[msg_i].userspace_addr; 928 trace_vhost_user_set_mem_table_postcopy( 929 msg_reply.payload.memory.regions[msg_i].userspace_addr, 930 msg.payload.memory.regions[msg_i].userspace_addr, 931 msg_i, region_i); 932 msg_i++; 933 } 934 } 935 if (msg_i != fd_num) { 936 error_report("%s: postcopy reply not fully consumed " 937 "%d vs %zd", 938 __func__, msg_i, fd_num); 939 return -1; 940 } 941 942 /* 943 * Now we've registered this with the postcopy code, we ack to the 944 * client, because now we're in the position to be able to deal 945 * with any faults it generates. 946 */ 947 /* TODO: Use this for failure cases as well with a bad value. */ 948 msg.hdr.size = sizeof(msg.payload.u64); 949 msg.payload.u64 = 0; /* OK */ 950 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 951 return -1; 952 } 953 } 954 955 return 0; 956 } 957 958 static int vhost_user_set_mem_table(struct vhost_dev *dev, 959 struct vhost_memory *mem) 960 { 961 struct vhost_user *u = dev->opaque; 962 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 963 size_t fd_num = 0; 964 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 965 bool reply_supported = virtio_has_feature(dev->protocol_features, 966 VHOST_USER_PROTOCOL_F_REPLY_ACK); 967 bool config_mem_slots = 968 virtio_has_feature(dev->protocol_features, 969 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); 970 971 if (do_postcopy) { 972 /* 973 * Postcopy has enough differences that it's best done in it's own 974 * version 975 */ 976 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported, 977 config_mem_slots); 978 } 979 980 VhostUserMsg msg = { 981 .hdr.flags = VHOST_USER_VERSION, 982 }; 983 984 if (reply_supported) { 985 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 986 } 987 988 if (config_mem_slots) { 989 if (vhost_user_add_remove_regions(dev, &msg, reply_supported, 990 false) < 0) { 991 return -1; 992 } 993 } else { 994 if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 995 false) < 0) { 996 return -1; 997 } 998 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 999 return -1; 1000 } 1001 1002 if (reply_supported) { 1003 return process_message_reply(dev, &msg); 1004 } 1005 } 1006 1007 return 0; 1008 } 1009 1010 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 1011 struct vhost_vring_addr *addr) 1012 { 1013 VhostUserMsg msg = { 1014 .hdr.request = VHOST_USER_SET_VRING_ADDR, 1015 .hdr.flags = VHOST_USER_VERSION, 1016 .payload.addr = *addr, 1017 .hdr.size = sizeof(msg.payload.addr), 1018 }; 1019 1020 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1021 return -1; 1022 } 1023 1024 return 0; 1025 } 1026 1027 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 1028 struct vhost_vring_state *ring) 1029 { 1030 bool cross_endian = virtio_has_feature(dev->protocol_features, 1031 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 1032 VhostUserMsg msg = { 1033 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 1034 .hdr.flags = VHOST_USER_VERSION, 1035 .payload.state = *ring, 1036 .hdr.size = sizeof(msg.payload.state), 1037 }; 1038 1039 if (!cross_endian) { 1040 error_report("vhost-user trying to send unhandled ioctl"); 1041 return -1; 1042 } 1043 1044 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1045 return -1; 1046 } 1047 1048 return 0; 1049 } 1050 1051 static int vhost_set_vring(struct vhost_dev *dev, 1052 unsigned long int request, 1053 struct vhost_vring_state *ring) 1054 { 1055 VhostUserMsg msg = { 1056 .hdr.request = request, 1057 .hdr.flags = VHOST_USER_VERSION, 1058 .payload.state = *ring, 1059 .hdr.size = sizeof(msg.payload.state), 1060 }; 1061 1062 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1063 return -1; 1064 } 1065 1066 return 0; 1067 } 1068 1069 static int vhost_user_set_vring_num(struct vhost_dev *dev, 1070 struct vhost_vring_state *ring) 1071 { 1072 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 1073 } 1074 1075 static void vhost_user_host_notifier_restore(struct vhost_dev *dev, 1076 int queue_idx) 1077 { 1078 struct vhost_user *u = dev->opaque; 1079 VhostUserHostNotifier *n = &u->user->notifier[queue_idx]; 1080 VirtIODevice *vdev = dev->vdev; 1081 1082 if (n->addr && !n->set) { 1083 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true); 1084 n->set = true; 1085 } 1086 } 1087 1088 static void vhost_user_host_notifier_remove(struct vhost_dev *dev, 1089 int queue_idx) 1090 { 1091 struct vhost_user *u = dev->opaque; 1092 VhostUserHostNotifier *n = &u->user->notifier[queue_idx]; 1093 VirtIODevice *vdev = dev->vdev; 1094 1095 if (n->addr && n->set) { 1096 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); 1097 n->set = false; 1098 } 1099 } 1100 1101 static int vhost_user_set_vring_base(struct vhost_dev *dev, 1102 struct vhost_vring_state *ring) 1103 { 1104 vhost_user_host_notifier_restore(dev, ring->index); 1105 1106 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 1107 } 1108 1109 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 1110 { 1111 int i; 1112 1113 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1114 return -1; 1115 } 1116 1117 for (i = 0; i < dev->nvqs; ++i) { 1118 struct vhost_vring_state state = { 1119 .index = dev->vq_index + i, 1120 .num = enable, 1121 }; 1122 1123 vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 1124 } 1125 1126 return 0; 1127 } 1128 1129 static int vhost_user_get_vring_base(struct vhost_dev *dev, 1130 struct vhost_vring_state *ring) 1131 { 1132 VhostUserMsg msg = { 1133 .hdr.request = VHOST_USER_GET_VRING_BASE, 1134 .hdr.flags = VHOST_USER_VERSION, 1135 .payload.state = *ring, 1136 .hdr.size = sizeof(msg.payload.state), 1137 }; 1138 1139 vhost_user_host_notifier_remove(dev, ring->index); 1140 1141 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1142 return -1; 1143 } 1144 1145 if (vhost_user_read(dev, &msg) < 0) { 1146 return -1; 1147 } 1148 1149 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 1150 error_report("Received unexpected msg type. Expected %d received %d", 1151 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 1152 return -1; 1153 } 1154 1155 if (msg.hdr.size != sizeof(msg.payload.state)) { 1156 error_report("Received bad msg size."); 1157 return -1; 1158 } 1159 1160 *ring = msg.payload.state; 1161 1162 return 0; 1163 } 1164 1165 static int vhost_set_vring_file(struct vhost_dev *dev, 1166 VhostUserRequest request, 1167 struct vhost_vring_file *file) 1168 { 1169 int fds[VHOST_USER_MAX_RAM_SLOTS]; 1170 size_t fd_num = 0; 1171 VhostUserMsg msg = { 1172 .hdr.request = request, 1173 .hdr.flags = VHOST_USER_VERSION, 1174 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 1175 .hdr.size = sizeof(msg.payload.u64), 1176 }; 1177 1178 if (ioeventfd_enabled() && file->fd > 0) { 1179 fds[fd_num++] = file->fd; 1180 } else { 1181 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 1182 } 1183 1184 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 1185 return -1; 1186 } 1187 1188 return 0; 1189 } 1190 1191 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 1192 struct vhost_vring_file *file) 1193 { 1194 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 1195 } 1196 1197 static int vhost_user_set_vring_call(struct vhost_dev *dev, 1198 struct vhost_vring_file *file) 1199 { 1200 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 1201 } 1202 1203 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64) 1204 { 1205 VhostUserMsg msg = { 1206 .hdr.request = request, 1207 .hdr.flags = VHOST_USER_VERSION, 1208 .payload.u64 = u64, 1209 .hdr.size = sizeof(msg.payload.u64), 1210 }; 1211 1212 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1213 return -1; 1214 } 1215 1216 return 0; 1217 } 1218 1219 static int vhost_user_set_features(struct vhost_dev *dev, 1220 uint64_t features) 1221 { 1222 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features); 1223 } 1224 1225 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 1226 uint64_t features) 1227 { 1228 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features); 1229 } 1230 1231 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 1232 { 1233 VhostUserMsg msg = { 1234 .hdr.request = request, 1235 .hdr.flags = VHOST_USER_VERSION, 1236 }; 1237 1238 if (vhost_user_one_time_request(request) && dev->vq_index != 0) { 1239 return 0; 1240 } 1241 1242 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1243 return -1; 1244 } 1245 1246 if (vhost_user_read(dev, &msg) < 0) { 1247 return -1; 1248 } 1249 1250 if (msg.hdr.request != request) { 1251 error_report("Received unexpected msg type. Expected %d received %d", 1252 request, msg.hdr.request); 1253 return -1; 1254 } 1255 1256 if (msg.hdr.size != sizeof(msg.payload.u64)) { 1257 error_report("Received bad msg size."); 1258 return -1; 1259 } 1260 1261 *u64 = msg.payload.u64; 1262 1263 return 0; 1264 } 1265 1266 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 1267 { 1268 return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features); 1269 } 1270 1271 static int vhost_user_set_owner(struct vhost_dev *dev) 1272 { 1273 VhostUserMsg msg = { 1274 .hdr.request = VHOST_USER_SET_OWNER, 1275 .hdr.flags = VHOST_USER_VERSION, 1276 }; 1277 1278 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1279 return -1; 1280 } 1281 1282 return 0; 1283 } 1284 1285 static int vhost_user_get_max_memslots(struct vhost_dev *dev, 1286 uint64_t *max_memslots) 1287 { 1288 uint64_t backend_max_memslots; 1289 int err; 1290 1291 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS, 1292 &backend_max_memslots); 1293 if (err < 0) { 1294 return err; 1295 } 1296 1297 *max_memslots = backend_max_memslots; 1298 1299 return 0; 1300 } 1301 1302 static int vhost_user_reset_device(struct vhost_dev *dev) 1303 { 1304 VhostUserMsg msg = { 1305 .hdr.flags = VHOST_USER_VERSION, 1306 }; 1307 1308 msg.hdr.request = virtio_has_feature(dev->protocol_features, 1309 VHOST_USER_PROTOCOL_F_RESET_DEVICE) 1310 ? VHOST_USER_RESET_DEVICE 1311 : VHOST_USER_RESET_OWNER; 1312 1313 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1314 return -1; 1315 } 1316 1317 return 0; 1318 } 1319 1320 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) 1321 { 1322 int ret = -1; 1323 1324 if (!dev->config_ops) { 1325 return -1; 1326 } 1327 1328 if (dev->config_ops->vhost_dev_config_notifier) { 1329 ret = dev->config_ops->vhost_dev_config_notifier(dev); 1330 } 1331 1332 return ret; 1333 } 1334 1335 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, 1336 VhostUserVringArea *area, 1337 int fd) 1338 { 1339 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 1340 size_t page_size = qemu_real_host_page_size; 1341 struct vhost_user *u = dev->opaque; 1342 VhostUserState *user = u->user; 1343 VirtIODevice *vdev = dev->vdev; 1344 VhostUserHostNotifier *n; 1345 void *addr; 1346 char *name; 1347 1348 if (!virtio_has_feature(dev->protocol_features, 1349 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 1350 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 1351 return -1; 1352 } 1353 1354 n = &user->notifier[queue_idx]; 1355 1356 if (n->addr) { 1357 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); 1358 object_unparent(OBJECT(&n->mr)); 1359 munmap(n->addr, page_size); 1360 n->addr = NULL; 1361 } 1362 1363 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 1364 return 0; 1365 } 1366 1367 /* Sanity check. */ 1368 if (area->size != page_size) { 1369 return -1; 1370 } 1371 1372 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 1373 fd, area->offset); 1374 if (addr == MAP_FAILED) { 1375 return -1; 1376 } 1377 1378 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 1379 user, queue_idx); 1380 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 1381 page_size, addr); 1382 g_free(name); 1383 1384 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 1385 munmap(addr, page_size); 1386 return -1; 1387 } 1388 1389 n->addr = addr; 1390 n->set = true; 1391 1392 return 0; 1393 } 1394 1395 static void slave_read(void *opaque) 1396 { 1397 struct vhost_dev *dev = opaque; 1398 struct vhost_user *u = dev->opaque; 1399 VhostUserHeader hdr = { 0, }; 1400 VhostUserPayload payload = { 0, }; 1401 int size, ret = 0; 1402 struct iovec iov; 1403 struct msghdr msgh; 1404 int fd[VHOST_USER_SLAVE_MAX_FDS]; 1405 char control[CMSG_SPACE(sizeof(fd))]; 1406 struct cmsghdr *cmsg; 1407 int i, fdsize = 0; 1408 1409 memset(&msgh, 0, sizeof(msgh)); 1410 msgh.msg_iov = &iov; 1411 msgh.msg_iovlen = 1; 1412 msgh.msg_control = control; 1413 msgh.msg_controllen = sizeof(control); 1414 1415 memset(fd, -1, sizeof(fd)); 1416 1417 /* Read header */ 1418 iov.iov_base = &hdr; 1419 iov.iov_len = VHOST_USER_HDR_SIZE; 1420 1421 do { 1422 size = recvmsg(u->slave_fd, &msgh, 0); 1423 } while (size < 0 && (errno == EINTR || errno == EAGAIN)); 1424 1425 if (size != VHOST_USER_HDR_SIZE) { 1426 error_report("Failed to read from slave."); 1427 goto err; 1428 } 1429 1430 if (msgh.msg_flags & MSG_CTRUNC) { 1431 error_report("Truncated message."); 1432 goto err; 1433 } 1434 1435 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; 1436 cmsg = CMSG_NXTHDR(&msgh, cmsg)) { 1437 if (cmsg->cmsg_level == SOL_SOCKET && 1438 cmsg->cmsg_type == SCM_RIGHTS) { 1439 fdsize = cmsg->cmsg_len - CMSG_LEN(0); 1440 memcpy(fd, CMSG_DATA(cmsg), fdsize); 1441 break; 1442 } 1443 } 1444 1445 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 1446 error_report("Failed to read msg header." 1447 " Size %d exceeds the maximum %zu.", hdr.size, 1448 VHOST_USER_PAYLOAD_SIZE); 1449 goto err; 1450 } 1451 1452 /* Read payload */ 1453 do { 1454 size = read(u->slave_fd, &payload, hdr.size); 1455 } while (size < 0 && (errno == EINTR || errno == EAGAIN)); 1456 1457 if (size != hdr.size) { 1458 error_report("Failed to read payload from slave."); 1459 goto err; 1460 } 1461 1462 switch (hdr.request) { 1463 case VHOST_USER_SLAVE_IOTLB_MSG: 1464 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1465 break; 1466 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG : 1467 ret = vhost_user_slave_handle_config_change(dev); 1468 break; 1469 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: 1470 ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, 1471 fd[0]); 1472 break; 1473 default: 1474 error_report("Received unexpected msg type: %d.", hdr.request); 1475 ret = -EINVAL; 1476 } 1477 1478 /* Close the remaining file descriptors. */ 1479 for (i = 0; i < fdsize; i++) { 1480 if (fd[i] != -1) { 1481 close(fd[i]); 1482 } 1483 } 1484 1485 /* 1486 * REPLY_ACK feature handling. Other reply types has to be managed 1487 * directly in their request handlers. 1488 */ 1489 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1490 struct iovec iovec[2]; 1491 1492 1493 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 1494 hdr.flags |= VHOST_USER_REPLY_MASK; 1495 1496 payload.u64 = !!ret; 1497 hdr.size = sizeof(payload.u64); 1498 1499 iovec[0].iov_base = &hdr; 1500 iovec[0].iov_len = VHOST_USER_HDR_SIZE; 1501 iovec[1].iov_base = &payload; 1502 iovec[1].iov_len = hdr.size; 1503 1504 do { 1505 size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec)); 1506 } while (size < 0 && (errno == EINTR || errno == EAGAIN)); 1507 1508 if (size != VHOST_USER_HDR_SIZE + hdr.size) { 1509 error_report("Failed to send msg reply to slave."); 1510 goto err; 1511 } 1512 } 1513 1514 return; 1515 1516 err: 1517 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); 1518 close(u->slave_fd); 1519 u->slave_fd = -1; 1520 for (i = 0; i < fdsize; i++) { 1521 if (fd[i] != -1) { 1522 close(fd[i]); 1523 } 1524 } 1525 return; 1526 } 1527 1528 static int vhost_setup_slave_channel(struct vhost_dev *dev) 1529 { 1530 VhostUserMsg msg = { 1531 .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD, 1532 .hdr.flags = VHOST_USER_VERSION, 1533 }; 1534 struct vhost_user *u = dev->opaque; 1535 int sv[2], ret = 0; 1536 bool reply_supported = virtio_has_feature(dev->protocol_features, 1537 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1538 1539 if (!virtio_has_feature(dev->protocol_features, 1540 VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { 1541 return 0; 1542 } 1543 1544 if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1545 error_report("socketpair() failed"); 1546 return -1; 1547 } 1548 1549 u->slave_fd = sv[0]; 1550 qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev); 1551 1552 if (reply_supported) { 1553 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1554 } 1555 1556 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1557 if (ret) { 1558 goto out; 1559 } 1560 1561 if (reply_supported) { 1562 ret = process_message_reply(dev, &msg); 1563 } 1564 1565 out: 1566 close(sv[1]); 1567 if (ret) { 1568 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); 1569 close(u->slave_fd); 1570 u->slave_fd = -1; 1571 } 1572 1573 return ret; 1574 } 1575 1576 #ifdef CONFIG_LINUX 1577 /* 1578 * Called back from the postcopy fault thread when a fault is received on our 1579 * ufd. 1580 * TODO: This is Linux specific 1581 */ 1582 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1583 void *ufd) 1584 { 1585 struct vhost_dev *dev = pcfd->data; 1586 struct vhost_user *u = dev->opaque; 1587 struct uffd_msg *msg = ufd; 1588 uint64_t faultaddr = msg->arg.pagefault.address; 1589 RAMBlock *rb = NULL; 1590 uint64_t rb_offset; 1591 int i; 1592 1593 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1594 dev->mem->nregions); 1595 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1596 trace_vhost_user_postcopy_fault_handler_loop(i, 1597 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1598 if (faultaddr >= u->postcopy_client_bases[i]) { 1599 /* Ofset of the fault address in the vhost region */ 1600 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1601 if (region_offset < dev->mem->regions[i].memory_size) { 1602 rb_offset = region_offset + u->region_rb_offset[i]; 1603 trace_vhost_user_postcopy_fault_handler_found(i, 1604 region_offset, rb_offset); 1605 rb = u->region_rb[i]; 1606 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1607 rb_offset); 1608 } 1609 } 1610 } 1611 error_report("%s: Failed to find region for fault %" PRIx64, 1612 __func__, faultaddr); 1613 return -1; 1614 } 1615 1616 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1617 uint64_t offset) 1618 { 1619 struct vhost_dev *dev = pcfd->data; 1620 struct vhost_user *u = dev->opaque; 1621 int i; 1622 1623 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1624 1625 if (!u) { 1626 return 0; 1627 } 1628 /* Translate the offset into an address in the clients address space */ 1629 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1630 if (u->region_rb[i] == rb && 1631 offset >= u->region_rb_offset[i] && 1632 offset < (u->region_rb_offset[i] + 1633 dev->mem->regions[i].memory_size)) { 1634 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1635 u->postcopy_client_bases[i]; 1636 trace_vhost_user_postcopy_waker_found(client_addr); 1637 return postcopy_wake_shared(pcfd, client_addr, rb); 1638 } 1639 } 1640 1641 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1642 return 0; 1643 } 1644 #endif 1645 1646 /* 1647 * Called at the start of an inbound postcopy on reception of the 1648 * 'advise' command. 1649 */ 1650 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 1651 { 1652 #ifdef CONFIG_LINUX 1653 struct vhost_user *u = dev->opaque; 1654 CharBackend *chr = u->user->chr; 1655 int ufd; 1656 VhostUserMsg msg = { 1657 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 1658 .hdr.flags = VHOST_USER_VERSION, 1659 }; 1660 1661 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1662 error_setg(errp, "Failed to send postcopy_advise to vhost"); 1663 return -1; 1664 } 1665 1666 if (vhost_user_read(dev, &msg) < 0) { 1667 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 1668 return -1; 1669 } 1670 1671 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 1672 error_setg(errp, "Unexpected msg type. Expected %d received %d", 1673 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 1674 return -1; 1675 } 1676 1677 if (msg.hdr.size) { 1678 error_setg(errp, "Received bad msg size."); 1679 return -1; 1680 } 1681 ufd = qemu_chr_fe_get_msgfd(chr); 1682 if (ufd < 0) { 1683 error_setg(errp, "%s: Failed to get ufd", __func__); 1684 return -1; 1685 } 1686 qemu_set_nonblock(ufd); 1687 1688 /* register ufd with userfault thread */ 1689 u->postcopy_fd.fd = ufd; 1690 u->postcopy_fd.data = dev; 1691 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 1692 u->postcopy_fd.waker = vhost_user_postcopy_waker; 1693 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 1694 postcopy_register_shared_ufd(&u->postcopy_fd); 1695 return 0; 1696 #else 1697 error_setg(errp, "Postcopy not supported on non-Linux systems"); 1698 return -1; 1699 #endif 1700 } 1701 1702 /* 1703 * Called at the switch to postcopy on reception of the 'listen' command. 1704 */ 1705 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 1706 { 1707 struct vhost_user *u = dev->opaque; 1708 int ret; 1709 VhostUserMsg msg = { 1710 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 1711 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1712 }; 1713 u->postcopy_listen = true; 1714 trace_vhost_user_postcopy_listen(); 1715 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1716 error_setg(errp, "Failed to send postcopy_listen to vhost"); 1717 return -1; 1718 } 1719 1720 ret = process_message_reply(dev, &msg); 1721 if (ret) { 1722 error_setg(errp, "Failed to receive reply to postcopy_listen"); 1723 return ret; 1724 } 1725 1726 return 0; 1727 } 1728 1729 /* 1730 * Called at the end of postcopy 1731 */ 1732 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 1733 { 1734 VhostUserMsg msg = { 1735 .hdr.request = VHOST_USER_POSTCOPY_END, 1736 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1737 }; 1738 int ret; 1739 struct vhost_user *u = dev->opaque; 1740 1741 trace_vhost_user_postcopy_end_entry(); 1742 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1743 error_setg(errp, "Failed to send postcopy_end to vhost"); 1744 return -1; 1745 } 1746 1747 ret = process_message_reply(dev, &msg); 1748 if (ret) { 1749 error_setg(errp, "Failed to receive reply to postcopy_end"); 1750 return ret; 1751 } 1752 postcopy_unregister_shared_ufd(&u->postcopy_fd); 1753 close(u->postcopy_fd.fd); 1754 u->postcopy_fd.handler = NULL; 1755 1756 trace_vhost_user_postcopy_end_exit(); 1757 1758 return 0; 1759 } 1760 1761 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 1762 void *opaque) 1763 { 1764 struct PostcopyNotifyData *pnd = opaque; 1765 struct vhost_user *u = container_of(notifier, struct vhost_user, 1766 postcopy_notifier); 1767 struct vhost_dev *dev = u->dev; 1768 1769 switch (pnd->reason) { 1770 case POSTCOPY_NOTIFY_PROBE: 1771 if (!virtio_has_feature(dev->protocol_features, 1772 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 1773 /* TODO: Get the device name into this error somehow */ 1774 error_setg(pnd->errp, 1775 "vhost-user backend not capable of postcopy"); 1776 return -ENOENT; 1777 } 1778 break; 1779 1780 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 1781 return vhost_user_postcopy_advise(dev, pnd->errp); 1782 1783 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 1784 return vhost_user_postcopy_listen(dev, pnd->errp); 1785 1786 case POSTCOPY_NOTIFY_INBOUND_END: 1787 return vhost_user_postcopy_end(dev, pnd->errp); 1788 1789 default: 1790 /* We ignore notifications we don't know */ 1791 break; 1792 } 1793 1794 return 0; 1795 } 1796 1797 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) 1798 { 1799 uint64_t features, protocol_features, ram_slots; 1800 struct vhost_user *u; 1801 int err; 1802 1803 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1804 1805 u = g_new0(struct vhost_user, 1); 1806 u->user = opaque; 1807 u->slave_fd = -1; 1808 u->dev = dev; 1809 dev->opaque = u; 1810 1811 err = vhost_user_get_features(dev, &features); 1812 if (err < 0) { 1813 return err; 1814 } 1815 1816 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1817 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 1818 1819 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 1820 &protocol_features); 1821 if (err < 0) { 1822 return err; 1823 } 1824 1825 dev->protocol_features = 1826 protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK; 1827 1828 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1829 /* Don't acknowledge CONFIG feature if device doesn't support it */ 1830 dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 1831 } else if (!(protocol_features & 1832 (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) { 1833 error_report("Device expects VHOST_USER_PROTOCOL_F_CONFIG " 1834 "but backend does not support it."); 1835 return -1; 1836 } 1837 1838 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 1839 if (err < 0) { 1840 return err; 1841 } 1842 1843 /* query the max queues we support if backend supports Multiple Queue */ 1844 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 1845 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 1846 &dev->max_queues); 1847 if (err < 0) { 1848 return err; 1849 } 1850 } 1851 1852 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 1853 !(virtio_has_feature(dev->protocol_features, 1854 VHOST_USER_PROTOCOL_F_SLAVE_REQ) && 1855 virtio_has_feature(dev->protocol_features, 1856 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 1857 error_report("IOMMU support requires reply-ack and " 1858 "slave-req protocol features."); 1859 return -1; 1860 } 1861 1862 /* get max memory regions if backend supports configurable RAM slots */ 1863 if (!virtio_has_feature(dev->protocol_features, 1864 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { 1865 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; 1866 } else { 1867 err = vhost_user_get_max_memslots(dev, &ram_slots); 1868 if (err < 0) { 1869 return err; 1870 } 1871 1872 if (ram_slots < u->user->memory_slots) { 1873 error_report("The backend specified a max ram slots limit " 1874 "of %" PRIu64", when the prior validated limit was %d. " 1875 "This limit should never decrease.", ram_slots, 1876 u->user->memory_slots); 1877 return -1; 1878 } 1879 1880 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); 1881 } 1882 } 1883 1884 if (dev->migration_blocker == NULL && 1885 !virtio_has_feature(dev->protocol_features, 1886 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 1887 error_setg(&dev->migration_blocker, 1888 "Migration disabled: vhost-user backend lacks " 1889 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 1890 } 1891 1892 if (dev->vq_index == 0) { 1893 err = vhost_setup_slave_channel(dev); 1894 if (err < 0) { 1895 return err; 1896 } 1897 } 1898 1899 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 1900 postcopy_add_notifier(&u->postcopy_notifier); 1901 1902 return 0; 1903 } 1904 1905 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 1906 { 1907 struct vhost_user *u; 1908 1909 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1910 1911 u = dev->opaque; 1912 if (u->postcopy_notifier.notify) { 1913 postcopy_remove_notifier(&u->postcopy_notifier); 1914 u->postcopy_notifier.notify = NULL; 1915 } 1916 u->postcopy_listen = false; 1917 if (u->postcopy_fd.handler) { 1918 postcopy_unregister_shared_ufd(&u->postcopy_fd); 1919 close(u->postcopy_fd.fd); 1920 u->postcopy_fd.handler = NULL; 1921 } 1922 if (u->slave_fd >= 0) { 1923 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); 1924 close(u->slave_fd); 1925 u->slave_fd = -1; 1926 } 1927 g_free(u->region_rb); 1928 u->region_rb = NULL; 1929 g_free(u->region_rb_offset); 1930 u->region_rb_offset = NULL; 1931 u->region_rb_len = 0; 1932 g_free(u); 1933 dev->opaque = 0; 1934 1935 return 0; 1936 } 1937 1938 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 1939 { 1940 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 1941 1942 return idx; 1943 } 1944 1945 static int vhost_user_memslots_limit(struct vhost_dev *dev) 1946 { 1947 struct vhost_user *u = dev->opaque; 1948 1949 return u->user->memory_slots; 1950 } 1951 1952 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 1953 { 1954 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1955 1956 return virtio_has_feature(dev->protocol_features, 1957 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 1958 } 1959 1960 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 1961 { 1962 VhostUserMsg msg = { }; 1963 1964 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1965 1966 /* If guest supports GUEST_ANNOUNCE do nothing */ 1967 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 1968 return 0; 1969 } 1970 1971 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 1972 if (virtio_has_feature(dev->protocol_features, 1973 VHOST_USER_PROTOCOL_F_RARP)) { 1974 msg.hdr.request = VHOST_USER_SEND_RARP; 1975 msg.hdr.flags = VHOST_USER_VERSION; 1976 memcpy((char *)&msg.payload.u64, mac_addr, 6); 1977 msg.hdr.size = sizeof(msg.payload.u64); 1978 1979 return vhost_user_write(dev, &msg, NULL, 0); 1980 } 1981 return -1; 1982 } 1983 1984 static bool vhost_user_can_merge(struct vhost_dev *dev, 1985 uint64_t start1, uint64_t size1, 1986 uint64_t start2, uint64_t size2) 1987 { 1988 ram_addr_t offset; 1989 int mfd, rfd; 1990 1991 (void)vhost_user_get_mr_data(start1, &offset, &mfd); 1992 (void)vhost_user_get_mr_data(start2, &offset, &rfd); 1993 1994 return mfd == rfd; 1995 } 1996 1997 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 1998 { 1999 VhostUserMsg msg; 2000 bool reply_supported = virtio_has_feature(dev->protocol_features, 2001 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2002 2003 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 2004 return 0; 2005 } 2006 2007 msg.hdr.request = VHOST_USER_NET_SET_MTU; 2008 msg.payload.u64 = mtu; 2009 msg.hdr.size = sizeof(msg.payload.u64); 2010 msg.hdr.flags = VHOST_USER_VERSION; 2011 if (reply_supported) { 2012 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2013 } 2014 2015 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 2016 return -1; 2017 } 2018 2019 /* If reply_ack supported, slave has to ack specified MTU is valid */ 2020 if (reply_supported) { 2021 return process_message_reply(dev, &msg); 2022 } 2023 2024 return 0; 2025 } 2026 2027 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 2028 struct vhost_iotlb_msg *imsg) 2029 { 2030 VhostUserMsg msg = { 2031 .hdr.request = VHOST_USER_IOTLB_MSG, 2032 .hdr.size = sizeof(msg.payload.iotlb), 2033 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2034 .payload.iotlb = *imsg, 2035 }; 2036 2037 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 2038 return -EFAULT; 2039 } 2040 2041 return process_message_reply(dev, &msg); 2042 } 2043 2044 2045 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 2046 { 2047 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 2048 } 2049 2050 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 2051 uint32_t config_len) 2052 { 2053 VhostUserMsg msg = { 2054 .hdr.request = VHOST_USER_GET_CONFIG, 2055 .hdr.flags = VHOST_USER_VERSION, 2056 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 2057 }; 2058 2059 if (!virtio_has_feature(dev->protocol_features, 2060 VHOST_USER_PROTOCOL_F_CONFIG)) { 2061 return -1; 2062 } 2063 2064 if (config_len > VHOST_USER_MAX_CONFIG_SIZE) { 2065 return -1; 2066 } 2067 2068 msg.payload.config.offset = 0; 2069 msg.payload.config.size = config_len; 2070 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 2071 return -1; 2072 } 2073 2074 if (vhost_user_read(dev, &msg) < 0) { 2075 return -1; 2076 } 2077 2078 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 2079 error_report("Received unexpected msg type. Expected %d received %d", 2080 VHOST_USER_GET_CONFIG, msg.hdr.request); 2081 return -1; 2082 } 2083 2084 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 2085 error_report("Received bad msg size."); 2086 return -1; 2087 } 2088 2089 memcpy(config, msg.payload.config.region, config_len); 2090 2091 return 0; 2092 } 2093 2094 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 2095 uint32_t offset, uint32_t size, uint32_t flags) 2096 { 2097 uint8_t *p; 2098 bool reply_supported = virtio_has_feature(dev->protocol_features, 2099 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2100 2101 VhostUserMsg msg = { 2102 .hdr.request = VHOST_USER_SET_CONFIG, 2103 .hdr.flags = VHOST_USER_VERSION, 2104 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 2105 }; 2106 2107 if (!virtio_has_feature(dev->protocol_features, 2108 VHOST_USER_PROTOCOL_F_CONFIG)) { 2109 return -1; 2110 } 2111 2112 if (reply_supported) { 2113 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2114 } 2115 2116 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 2117 return -1; 2118 } 2119 2120 msg.payload.config.offset = offset, 2121 msg.payload.config.size = size, 2122 msg.payload.config.flags = flags, 2123 p = msg.payload.config.region; 2124 memcpy(p, data, size); 2125 2126 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 2127 return -1; 2128 } 2129 2130 if (reply_supported) { 2131 return process_message_reply(dev, &msg); 2132 } 2133 2134 return 0; 2135 } 2136 2137 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 2138 void *session_info, 2139 uint64_t *session_id) 2140 { 2141 bool crypto_session = virtio_has_feature(dev->protocol_features, 2142 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2143 CryptoDevBackendSymSessionInfo *sess_info = session_info; 2144 VhostUserMsg msg = { 2145 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 2146 .hdr.flags = VHOST_USER_VERSION, 2147 .hdr.size = sizeof(msg.payload.session), 2148 }; 2149 2150 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2151 2152 if (!crypto_session) { 2153 error_report("vhost-user trying to send unhandled ioctl"); 2154 return -1; 2155 } 2156 2157 memcpy(&msg.payload.session.session_setup_data, sess_info, 2158 sizeof(CryptoDevBackendSymSessionInfo)); 2159 if (sess_info->key_len) { 2160 memcpy(&msg.payload.session.key, sess_info->cipher_key, 2161 sess_info->key_len); 2162 } 2163 if (sess_info->auth_key_len > 0) { 2164 memcpy(&msg.payload.session.auth_key, sess_info->auth_key, 2165 sess_info->auth_key_len); 2166 } 2167 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 2168 error_report("vhost_user_write() return -1, create session failed"); 2169 return -1; 2170 } 2171 2172 if (vhost_user_read(dev, &msg) < 0) { 2173 error_report("vhost_user_read() return -1, create session failed"); 2174 return -1; 2175 } 2176 2177 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 2178 error_report("Received unexpected msg type. Expected %d received %d", 2179 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 2180 return -1; 2181 } 2182 2183 if (msg.hdr.size != sizeof(msg.payload.session)) { 2184 error_report("Received bad msg size."); 2185 return -1; 2186 } 2187 2188 if (msg.payload.session.session_id < 0) { 2189 error_report("Bad session id: %" PRId64 "", 2190 msg.payload.session.session_id); 2191 return -1; 2192 } 2193 *session_id = msg.payload.session.session_id; 2194 2195 return 0; 2196 } 2197 2198 static int 2199 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 2200 { 2201 bool crypto_session = virtio_has_feature(dev->protocol_features, 2202 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2203 VhostUserMsg msg = { 2204 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 2205 .hdr.flags = VHOST_USER_VERSION, 2206 .hdr.size = sizeof(msg.payload.u64), 2207 }; 2208 msg.payload.u64 = session_id; 2209 2210 if (!crypto_session) { 2211 error_report("vhost-user trying to send unhandled ioctl"); 2212 return -1; 2213 } 2214 2215 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 2216 error_report("vhost_user_write() return -1, close session failed"); 2217 return -1; 2218 } 2219 2220 return 0; 2221 } 2222 2223 static bool vhost_user_mem_section_filter(struct vhost_dev *dev, 2224 MemoryRegionSection *section) 2225 { 2226 bool result; 2227 2228 result = memory_region_get_fd(section->mr) >= 0; 2229 2230 return result; 2231 } 2232 2233 static int vhost_user_get_inflight_fd(struct vhost_dev *dev, 2234 uint16_t queue_size, 2235 struct vhost_inflight *inflight) 2236 { 2237 void *addr; 2238 int fd; 2239 struct vhost_user *u = dev->opaque; 2240 CharBackend *chr = u->user->chr; 2241 VhostUserMsg msg = { 2242 .hdr.request = VHOST_USER_GET_INFLIGHT_FD, 2243 .hdr.flags = VHOST_USER_VERSION, 2244 .payload.inflight.num_queues = dev->nvqs, 2245 .payload.inflight.queue_size = queue_size, 2246 .hdr.size = sizeof(msg.payload.inflight), 2247 }; 2248 2249 if (!virtio_has_feature(dev->protocol_features, 2250 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2251 return 0; 2252 } 2253 2254 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 2255 return -1; 2256 } 2257 2258 if (vhost_user_read(dev, &msg) < 0) { 2259 return -1; 2260 } 2261 2262 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) { 2263 error_report("Received unexpected msg type. " 2264 "Expected %d received %d", 2265 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request); 2266 return -1; 2267 } 2268 2269 if (msg.hdr.size != sizeof(msg.payload.inflight)) { 2270 error_report("Received bad msg size."); 2271 return -1; 2272 } 2273 2274 if (!msg.payload.inflight.mmap_size) { 2275 return 0; 2276 } 2277 2278 fd = qemu_chr_fe_get_msgfd(chr); 2279 if (fd < 0) { 2280 error_report("Failed to get mem fd"); 2281 return -1; 2282 } 2283 2284 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE, 2285 MAP_SHARED, fd, msg.payload.inflight.mmap_offset); 2286 2287 if (addr == MAP_FAILED) { 2288 error_report("Failed to mmap mem fd"); 2289 close(fd); 2290 return -1; 2291 } 2292 2293 inflight->addr = addr; 2294 inflight->fd = fd; 2295 inflight->size = msg.payload.inflight.mmap_size; 2296 inflight->offset = msg.payload.inflight.mmap_offset; 2297 inflight->queue_size = queue_size; 2298 2299 return 0; 2300 } 2301 2302 static int vhost_user_set_inflight_fd(struct vhost_dev *dev, 2303 struct vhost_inflight *inflight) 2304 { 2305 VhostUserMsg msg = { 2306 .hdr.request = VHOST_USER_SET_INFLIGHT_FD, 2307 .hdr.flags = VHOST_USER_VERSION, 2308 .payload.inflight.mmap_size = inflight->size, 2309 .payload.inflight.mmap_offset = inflight->offset, 2310 .payload.inflight.num_queues = dev->nvqs, 2311 .payload.inflight.queue_size = inflight->queue_size, 2312 .hdr.size = sizeof(msg.payload.inflight), 2313 }; 2314 2315 if (!virtio_has_feature(dev->protocol_features, 2316 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2317 return 0; 2318 } 2319 2320 if (vhost_user_write(dev, &msg, &inflight->fd, 1) < 0) { 2321 return -1; 2322 } 2323 2324 return 0; 2325 } 2326 2327 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) 2328 { 2329 if (user->chr) { 2330 error_setg(errp, "Cannot initialize vhost-user state"); 2331 return false; 2332 } 2333 user->chr = chr; 2334 user->memory_slots = 0; 2335 return true; 2336 } 2337 2338 void vhost_user_cleanup(VhostUserState *user) 2339 { 2340 int i; 2341 2342 if (!user->chr) { 2343 return; 2344 } 2345 2346 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 2347 if (user->notifier[i].addr) { 2348 object_unparent(OBJECT(&user->notifier[i].mr)); 2349 munmap(user->notifier[i].addr, qemu_real_host_page_size); 2350 user->notifier[i].addr = NULL; 2351 } 2352 } 2353 user->chr = NULL; 2354 } 2355 2356 const VhostOps user_ops = { 2357 .backend_type = VHOST_BACKEND_TYPE_USER, 2358 .vhost_backend_init = vhost_user_backend_init, 2359 .vhost_backend_cleanup = vhost_user_backend_cleanup, 2360 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 2361 .vhost_set_log_base = vhost_user_set_log_base, 2362 .vhost_set_mem_table = vhost_user_set_mem_table, 2363 .vhost_set_vring_addr = vhost_user_set_vring_addr, 2364 .vhost_set_vring_endian = vhost_user_set_vring_endian, 2365 .vhost_set_vring_num = vhost_user_set_vring_num, 2366 .vhost_set_vring_base = vhost_user_set_vring_base, 2367 .vhost_get_vring_base = vhost_user_get_vring_base, 2368 .vhost_set_vring_kick = vhost_user_set_vring_kick, 2369 .vhost_set_vring_call = vhost_user_set_vring_call, 2370 .vhost_set_features = vhost_user_set_features, 2371 .vhost_get_features = vhost_user_get_features, 2372 .vhost_set_owner = vhost_user_set_owner, 2373 .vhost_reset_device = vhost_user_reset_device, 2374 .vhost_get_vq_index = vhost_user_get_vq_index, 2375 .vhost_set_vring_enable = vhost_user_set_vring_enable, 2376 .vhost_requires_shm_log = vhost_user_requires_shm_log, 2377 .vhost_migration_done = vhost_user_migration_done, 2378 .vhost_backend_can_merge = vhost_user_can_merge, 2379 .vhost_net_set_mtu = vhost_user_net_set_mtu, 2380 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 2381 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 2382 .vhost_get_config = vhost_user_get_config, 2383 .vhost_set_config = vhost_user_set_config, 2384 .vhost_crypto_create_session = vhost_user_crypto_create_session, 2385 .vhost_crypto_close_session = vhost_user_crypto_close_session, 2386 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, 2387 .vhost_get_inflight_fd = vhost_user_get_inflight_fd, 2388 .vhost_set_inflight_fd = vhost_user_set_inflight_fd, 2389 }; 2390