1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/virtio-dmabuf.h" 14 #include "hw/virtio/vhost.h" 15 #include "hw/virtio/virtio-crypto.h" 16 #include "hw/virtio/vhost-user.h" 17 #include "hw/virtio/vhost-backend.h" 18 #include "hw/virtio/virtio.h" 19 #include "hw/virtio/virtio-net.h" 20 #include "chardev/char-fe.h" 21 #include "io/channel-socket.h" 22 #include "sysemu/kvm.h" 23 #include "qemu/error-report.h" 24 #include "qemu/main-loop.h" 25 #include "qemu/uuid.h" 26 #include "qemu/sockets.h" 27 #include "sysemu/runstate.h" 28 #include "sysemu/cryptodev.h" 29 #include "migration/postcopy-ram.h" 30 #include "trace.h" 31 #include "exec/ramblock.h" 32 33 #include <sys/ioctl.h> 34 #include <sys/socket.h> 35 #include <sys/un.h> 36 37 #include "standard-headers/linux/vhost_types.h" 38 39 #ifdef CONFIG_LINUX 40 #include <linux/userfaultfd.h> 41 #endif 42 43 #define VHOST_MEMORY_BASELINE_NREGIONS 8 44 #define VHOST_USER_F_PROTOCOL_FEATURES 30 45 #define VHOST_USER_BACKEND_MAX_FDS 8 46 47 #if defined(TARGET_PPC) || defined(TARGET_PPC64) 48 #include "hw/ppc/spapr.h" 49 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS 50 51 #else 52 #define VHOST_USER_MAX_RAM_SLOTS 512 53 #endif 54 55 /* 56 * Maximum size of virtio device config space 57 */ 58 #define VHOST_USER_MAX_CONFIG_SIZE 256 59 60 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 61 62 typedef enum VhostUserRequest { 63 VHOST_USER_NONE = 0, 64 VHOST_USER_GET_FEATURES = 1, 65 VHOST_USER_SET_FEATURES = 2, 66 VHOST_USER_SET_OWNER = 3, 67 VHOST_USER_RESET_OWNER = 4, 68 VHOST_USER_SET_MEM_TABLE = 5, 69 VHOST_USER_SET_LOG_BASE = 6, 70 VHOST_USER_SET_LOG_FD = 7, 71 VHOST_USER_SET_VRING_NUM = 8, 72 VHOST_USER_SET_VRING_ADDR = 9, 73 VHOST_USER_SET_VRING_BASE = 10, 74 VHOST_USER_GET_VRING_BASE = 11, 75 VHOST_USER_SET_VRING_KICK = 12, 76 VHOST_USER_SET_VRING_CALL = 13, 77 VHOST_USER_SET_VRING_ERR = 14, 78 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 79 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 80 VHOST_USER_GET_QUEUE_NUM = 17, 81 VHOST_USER_SET_VRING_ENABLE = 18, 82 VHOST_USER_SEND_RARP = 19, 83 VHOST_USER_NET_SET_MTU = 20, 84 VHOST_USER_SET_BACKEND_REQ_FD = 21, 85 VHOST_USER_IOTLB_MSG = 22, 86 VHOST_USER_SET_VRING_ENDIAN = 23, 87 VHOST_USER_GET_CONFIG = 24, 88 VHOST_USER_SET_CONFIG = 25, 89 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 90 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 91 VHOST_USER_POSTCOPY_ADVISE = 28, 92 VHOST_USER_POSTCOPY_LISTEN = 29, 93 VHOST_USER_POSTCOPY_END = 30, 94 VHOST_USER_GET_INFLIGHT_FD = 31, 95 VHOST_USER_SET_INFLIGHT_FD = 32, 96 VHOST_USER_GPU_SET_SOCKET = 33, 97 VHOST_USER_RESET_DEVICE = 34, 98 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */ 99 VHOST_USER_GET_MAX_MEM_SLOTS = 36, 100 VHOST_USER_ADD_MEM_REG = 37, 101 VHOST_USER_REM_MEM_REG = 38, 102 VHOST_USER_SET_STATUS = 39, 103 VHOST_USER_GET_STATUS = 40, 104 VHOST_USER_GET_SHARED_OBJECT = 41, 105 VHOST_USER_SET_DEVICE_STATE_FD = 42, 106 VHOST_USER_CHECK_DEVICE_STATE = 43, 107 VHOST_USER_MAX 108 } VhostUserRequest; 109 110 typedef enum VhostUserBackendRequest { 111 VHOST_USER_BACKEND_NONE = 0, 112 VHOST_USER_BACKEND_IOTLB_MSG = 1, 113 VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2, 114 VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3, 115 VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6, 116 VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7, 117 VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8, 118 VHOST_USER_BACKEND_MAX 119 } VhostUserBackendRequest; 120 121 typedef struct VhostUserMemoryRegion { 122 uint64_t guest_phys_addr; 123 uint64_t memory_size; 124 uint64_t userspace_addr; 125 uint64_t mmap_offset; 126 } VhostUserMemoryRegion; 127 128 typedef struct VhostUserMemory { 129 uint32_t nregions; 130 uint32_t padding; 131 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; 132 } VhostUserMemory; 133 134 typedef struct VhostUserMemRegMsg { 135 uint64_t padding; 136 VhostUserMemoryRegion region; 137 } VhostUserMemRegMsg; 138 139 typedef struct VhostUserLog { 140 uint64_t mmap_size; 141 uint64_t mmap_offset; 142 } VhostUserLog; 143 144 typedef struct VhostUserConfig { 145 uint32_t offset; 146 uint32_t size; 147 uint32_t flags; 148 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 149 } VhostUserConfig; 150 151 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 152 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 153 #define VHOST_CRYPTO_ASYM_MAX_KEY_LEN 1024 154 155 typedef struct VhostUserCryptoSession { 156 uint64_t op_code; 157 union { 158 struct { 159 CryptoDevBackendSymSessionInfo session_setup_data; 160 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 161 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 162 } sym; 163 struct { 164 CryptoDevBackendAsymSessionInfo session_setup_data; 165 uint8_t key[VHOST_CRYPTO_ASYM_MAX_KEY_LEN]; 166 } asym; 167 } u; 168 169 /* session id for success, -1 on errors */ 170 int64_t session_id; 171 } VhostUserCryptoSession; 172 173 static VhostUserConfig c __attribute__ ((unused)); 174 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 175 + sizeof(c.size) \ 176 + sizeof(c.flags)) 177 178 typedef struct VhostUserVringArea { 179 uint64_t u64; 180 uint64_t size; 181 uint64_t offset; 182 } VhostUserVringArea; 183 184 typedef struct VhostUserInflight { 185 uint64_t mmap_size; 186 uint64_t mmap_offset; 187 uint16_t num_queues; 188 uint16_t queue_size; 189 } VhostUserInflight; 190 191 typedef struct VhostUserShared { 192 unsigned char uuid[16]; 193 } VhostUserShared; 194 195 typedef struct { 196 VhostUserRequest request; 197 198 #define VHOST_USER_VERSION_MASK (0x3) 199 #define VHOST_USER_REPLY_MASK (0x1 << 2) 200 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 201 uint32_t flags; 202 uint32_t size; /* the following payload size */ 203 } QEMU_PACKED VhostUserHeader; 204 205 /* Request payload of VHOST_USER_SET_DEVICE_STATE_FD */ 206 typedef struct VhostUserTransferDeviceState { 207 uint32_t direction; 208 uint32_t phase; 209 } VhostUserTransferDeviceState; 210 211 typedef union { 212 #define VHOST_USER_VRING_IDX_MASK (0xff) 213 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) 214 uint64_t u64; 215 struct vhost_vring_state state; 216 struct vhost_vring_addr addr; 217 VhostUserMemory memory; 218 VhostUserMemRegMsg mem_reg; 219 VhostUserLog log; 220 struct vhost_iotlb_msg iotlb; 221 VhostUserConfig config; 222 VhostUserCryptoSession session; 223 VhostUserVringArea area; 224 VhostUserInflight inflight; 225 VhostUserShared object; 226 VhostUserTransferDeviceState transfer_state; 227 } VhostUserPayload; 228 229 typedef struct VhostUserMsg { 230 VhostUserHeader hdr; 231 VhostUserPayload payload; 232 } QEMU_PACKED VhostUserMsg; 233 234 static VhostUserMsg m __attribute__ ((unused)); 235 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 236 237 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 238 239 /* The version of the protocol we support */ 240 #define VHOST_USER_VERSION (0x1) 241 242 struct vhost_user { 243 struct vhost_dev *dev; 244 /* Shared between vhost devs of the same virtio device */ 245 VhostUserState *user; 246 QIOChannel *backend_ioc; 247 GSource *backend_src; 248 NotifierWithReturn postcopy_notifier; 249 struct PostCopyFD postcopy_fd; 250 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; 251 /* Length of the region_rb and region_rb_offset arrays */ 252 size_t region_rb_len; 253 /* RAMBlock associated with a given region */ 254 RAMBlock **region_rb; 255 /* 256 * The offset from the start of the RAMBlock to the start of the 257 * vhost region. 258 */ 259 ram_addr_t *region_rb_offset; 260 261 /* True once we've entered postcopy_listen */ 262 bool postcopy_listen; 263 264 /* Our current regions */ 265 int num_shadow_regions; 266 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS]; 267 }; 268 269 struct scrub_regions { 270 struct vhost_memory_region *region; 271 int reg_idx; 272 int fd_idx; 273 }; 274 275 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) 276 { 277 struct vhost_user *u = dev->opaque; 278 CharBackend *chr = u->user->chr; 279 uint8_t *p = (uint8_t *) msg; 280 int r, size = VHOST_USER_HDR_SIZE; 281 282 r = qemu_chr_fe_read_all(chr, p, size); 283 if (r != size) { 284 int saved_errno = errno; 285 error_report("Failed to read msg header. Read %d instead of %d." 286 " Original request %d.", r, size, msg->hdr.request); 287 return r < 0 ? -saved_errno : -EIO; 288 } 289 290 /* validate received flags */ 291 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 292 error_report("Failed to read msg header." 293 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 294 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 295 return -EPROTO; 296 } 297 298 trace_vhost_user_read(msg->hdr.request, msg->hdr.flags); 299 300 return 0; 301 } 302 303 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 304 { 305 struct vhost_user *u = dev->opaque; 306 CharBackend *chr = u->user->chr; 307 uint8_t *p = (uint8_t *) msg; 308 int r, size; 309 310 r = vhost_user_read_header(dev, msg); 311 if (r < 0) { 312 return r; 313 } 314 315 /* validate message size is sane */ 316 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 317 error_report("Failed to read msg header." 318 " Size %d exceeds the maximum %zu.", msg->hdr.size, 319 VHOST_USER_PAYLOAD_SIZE); 320 return -EPROTO; 321 } 322 323 if (msg->hdr.size) { 324 p += VHOST_USER_HDR_SIZE; 325 size = msg->hdr.size; 326 r = qemu_chr_fe_read_all(chr, p, size); 327 if (r != size) { 328 int saved_errno = errno; 329 error_report("Failed to read msg payload." 330 " Read %d instead of %d.", r, msg->hdr.size); 331 return r < 0 ? -saved_errno : -EIO; 332 } 333 } 334 335 return 0; 336 } 337 338 static int process_message_reply(struct vhost_dev *dev, 339 const VhostUserMsg *msg) 340 { 341 int ret; 342 VhostUserMsg msg_reply; 343 344 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 345 return 0; 346 } 347 348 ret = vhost_user_read(dev, &msg_reply); 349 if (ret < 0) { 350 return ret; 351 } 352 353 if (msg_reply.hdr.request != msg->hdr.request) { 354 error_report("Received unexpected msg type. " 355 "Expected %d received %d", 356 msg->hdr.request, msg_reply.hdr.request); 357 return -EPROTO; 358 } 359 360 return msg_reply.payload.u64 ? -EIO : 0; 361 } 362 363 static bool vhost_user_per_device_request(VhostUserRequest request) 364 { 365 switch (request) { 366 case VHOST_USER_SET_OWNER: 367 case VHOST_USER_RESET_OWNER: 368 case VHOST_USER_SET_MEM_TABLE: 369 case VHOST_USER_GET_QUEUE_NUM: 370 case VHOST_USER_NET_SET_MTU: 371 case VHOST_USER_RESET_DEVICE: 372 case VHOST_USER_ADD_MEM_REG: 373 case VHOST_USER_REM_MEM_REG: 374 case VHOST_USER_SET_LOG_BASE: 375 return true; 376 default: 377 return false; 378 } 379 } 380 381 /* most non-init callers ignore the error */ 382 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 383 int *fds, int fd_num) 384 { 385 struct vhost_user *u = dev->opaque; 386 CharBackend *chr = u->user->chr; 387 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 388 389 /* 390 * Some devices, like virtio-scsi, are implemented as a single vhost_dev, 391 * while others, like virtio-net, contain multiple vhost_devs. For 392 * operations such as configuring device memory mappings or issuing device 393 * resets, which affect the whole device instead of individual VQs, 394 * vhost-user messages should only be sent once. 395 * 396 * Devices with multiple vhost_devs are given an associated dev->vq_index 397 * so per_device requests are only sent if vq_index is 0. 398 */ 399 if (vhost_user_per_device_request(msg->hdr.request) 400 && dev->vq_index != 0) { 401 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 402 return 0; 403 } 404 405 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 406 error_report("Failed to set msg fds."); 407 return -EINVAL; 408 } 409 410 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 411 if (ret != size) { 412 int saved_errno = errno; 413 error_report("Failed to write msg." 414 " Wrote %d instead of %d.", ret, size); 415 return ret < 0 ? -saved_errno : -EIO; 416 } 417 418 trace_vhost_user_write(msg->hdr.request, msg->hdr.flags); 419 420 return 0; 421 } 422 423 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd) 424 { 425 VhostUserMsg msg = { 426 .hdr.request = VHOST_USER_GPU_SET_SOCKET, 427 .hdr.flags = VHOST_USER_VERSION, 428 }; 429 430 return vhost_user_write(dev, &msg, &fd, 1); 431 } 432 433 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 434 struct vhost_log *log) 435 { 436 int fds[VHOST_USER_MAX_RAM_SLOTS]; 437 size_t fd_num = 0; 438 bool shmfd = virtio_has_feature(dev->protocol_features, 439 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 440 int ret; 441 VhostUserMsg msg = { 442 .hdr.request = VHOST_USER_SET_LOG_BASE, 443 .hdr.flags = VHOST_USER_VERSION, 444 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 445 .payload.log.mmap_offset = 0, 446 .hdr.size = sizeof(msg.payload.log), 447 }; 448 449 /* Send only once with first queue pair */ 450 if (dev->vq_index != 0) { 451 return 0; 452 } 453 454 if (shmfd && log->fd != -1) { 455 fds[fd_num++] = log->fd; 456 } 457 458 ret = vhost_user_write(dev, &msg, fds, fd_num); 459 if (ret < 0) { 460 return ret; 461 } 462 463 if (shmfd) { 464 msg.hdr.size = 0; 465 ret = vhost_user_read(dev, &msg); 466 if (ret < 0) { 467 return ret; 468 } 469 470 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 471 error_report("Received unexpected msg type. " 472 "Expected %d received %d", 473 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 474 return -EPROTO; 475 } 476 } 477 478 return 0; 479 } 480 481 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset, 482 int *fd) 483 { 484 MemoryRegion *mr; 485 486 assert((uintptr_t)addr == addr); 487 mr = memory_region_from_host((void *)(uintptr_t)addr, offset); 488 *fd = memory_region_get_fd(mr); 489 *offset += mr->ram_block->fd_offset; 490 491 return mr; 492 } 493 494 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst, 495 struct vhost_memory_region *src, 496 uint64_t mmap_offset) 497 { 498 assert(src != NULL && dst != NULL); 499 dst->userspace_addr = src->userspace_addr; 500 dst->memory_size = src->memory_size; 501 dst->guest_phys_addr = src->guest_phys_addr; 502 dst->mmap_offset = mmap_offset; 503 } 504 505 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, 506 struct vhost_dev *dev, 507 VhostUserMsg *msg, 508 int *fds, size_t *fd_num, 509 bool track_ramblocks) 510 { 511 int i, fd; 512 ram_addr_t offset; 513 MemoryRegion *mr; 514 struct vhost_memory_region *reg; 515 VhostUserMemoryRegion region_buffer; 516 517 msg->hdr.request = VHOST_USER_SET_MEM_TABLE; 518 519 for (i = 0; i < dev->mem->nregions; ++i) { 520 reg = dev->mem->regions + i; 521 522 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 523 if (fd > 0) { 524 if (track_ramblocks) { 525 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS); 526 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name, 527 reg->memory_size, 528 reg->guest_phys_addr, 529 reg->userspace_addr, 530 offset); 531 u->region_rb_offset[i] = offset; 532 u->region_rb[i] = mr->ram_block; 533 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) { 534 error_report("Failed preparing vhost-user memory table msg"); 535 return -ENOBUFS; 536 } 537 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 538 msg->payload.memory.regions[*fd_num] = region_buffer; 539 fds[(*fd_num)++] = fd; 540 } else if (track_ramblocks) { 541 u->region_rb_offset[i] = 0; 542 u->region_rb[i] = NULL; 543 } 544 } 545 546 msg->payload.memory.nregions = *fd_num; 547 548 if (!*fd_num) { 549 error_report("Failed initializing vhost-user memory map, " 550 "consider using -object memory-backend-file share=on"); 551 return -EINVAL; 552 } 553 554 msg->hdr.size = sizeof(msg->payload.memory.nregions); 555 msg->hdr.size += sizeof(msg->payload.memory.padding); 556 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion); 557 558 return 0; 559 } 560 561 static inline bool reg_equal(struct vhost_memory_region *shadow_reg, 562 struct vhost_memory_region *vdev_reg) 563 { 564 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr && 565 shadow_reg->userspace_addr == vdev_reg->userspace_addr && 566 shadow_reg->memory_size == vdev_reg->memory_size; 567 } 568 569 static void scrub_shadow_regions(struct vhost_dev *dev, 570 struct scrub_regions *add_reg, 571 int *nr_add_reg, 572 struct scrub_regions *rem_reg, 573 int *nr_rem_reg, uint64_t *shadow_pcb, 574 bool track_ramblocks) 575 { 576 struct vhost_user *u = dev->opaque; 577 bool found[VHOST_USER_MAX_RAM_SLOTS] = {}; 578 struct vhost_memory_region *reg, *shadow_reg; 579 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0; 580 ram_addr_t offset; 581 MemoryRegion *mr; 582 bool matching; 583 584 /* 585 * Find memory regions present in our shadow state which are not in 586 * the device's current memory state. 587 * 588 * Mark regions in both the shadow and device state as "found". 589 */ 590 for (i = 0; i < u->num_shadow_regions; i++) { 591 shadow_reg = &u->shadow_regions[i]; 592 matching = false; 593 594 for (j = 0; j < dev->mem->nregions; j++) { 595 reg = &dev->mem->regions[j]; 596 597 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 598 599 if (reg_equal(shadow_reg, reg)) { 600 matching = true; 601 found[j] = true; 602 if (track_ramblocks) { 603 /* 604 * Reset postcopy client bases, region_rb, and 605 * region_rb_offset in case regions are removed. 606 */ 607 if (fd > 0) { 608 u->region_rb_offset[j] = offset; 609 u->region_rb[j] = mr->ram_block; 610 shadow_pcb[j] = u->postcopy_client_bases[i]; 611 } else { 612 u->region_rb_offset[j] = 0; 613 u->region_rb[j] = NULL; 614 } 615 } 616 break; 617 } 618 } 619 620 /* 621 * If the region was not found in the current device memory state 622 * create an entry for it in the removed list. 623 */ 624 if (!matching) { 625 rem_reg[rm_idx].region = shadow_reg; 626 rem_reg[rm_idx++].reg_idx = i; 627 } 628 } 629 630 /* 631 * For regions not marked "found", create entries in the added list. 632 * 633 * Note their indexes in the device memory state and the indexes of their 634 * file descriptors. 635 */ 636 for (i = 0; i < dev->mem->nregions; i++) { 637 reg = &dev->mem->regions[i]; 638 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 639 if (fd > 0) { 640 ++fd_num; 641 } 642 643 /* 644 * If the region was in both the shadow and device state we don't 645 * need to send a VHOST_USER_ADD_MEM_REG message for it. 646 */ 647 if (found[i]) { 648 continue; 649 } 650 651 add_reg[add_idx].region = reg; 652 add_reg[add_idx].reg_idx = i; 653 add_reg[add_idx++].fd_idx = fd_num; 654 } 655 *nr_rem_reg = rm_idx; 656 *nr_add_reg = add_idx; 657 658 return; 659 } 660 661 static int send_remove_regions(struct vhost_dev *dev, 662 struct scrub_regions *remove_reg, 663 int nr_rem_reg, VhostUserMsg *msg, 664 bool reply_supported) 665 { 666 struct vhost_user *u = dev->opaque; 667 struct vhost_memory_region *shadow_reg; 668 int i, fd, shadow_reg_idx, ret; 669 ram_addr_t offset; 670 VhostUserMemoryRegion region_buffer; 671 672 /* 673 * The regions in remove_reg appear in the same order they do in the 674 * shadow table. Therefore we can minimize memory copies by iterating 675 * through remove_reg backwards. 676 */ 677 for (i = nr_rem_reg - 1; i >= 0; i--) { 678 shadow_reg = remove_reg[i].region; 679 shadow_reg_idx = remove_reg[i].reg_idx; 680 681 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd); 682 683 if (fd > 0) { 684 msg->hdr.request = VHOST_USER_REM_MEM_REG; 685 vhost_user_fill_msg_region(®ion_buffer, shadow_reg, 0); 686 msg->payload.mem_reg.region = region_buffer; 687 688 ret = vhost_user_write(dev, msg, NULL, 0); 689 if (ret < 0) { 690 return ret; 691 } 692 693 if (reply_supported) { 694 ret = process_message_reply(dev, msg); 695 if (ret) { 696 return ret; 697 } 698 } 699 } 700 701 /* 702 * At this point we know the backend has unmapped the region. It is now 703 * safe to remove it from the shadow table. 704 */ 705 memmove(&u->shadow_regions[shadow_reg_idx], 706 &u->shadow_regions[shadow_reg_idx + 1], 707 sizeof(struct vhost_memory_region) * 708 (u->num_shadow_regions - shadow_reg_idx - 1)); 709 u->num_shadow_regions--; 710 } 711 712 return 0; 713 } 714 715 static int send_add_regions(struct vhost_dev *dev, 716 struct scrub_regions *add_reg, int nr_add_reg, 717 VhostUserMsg *msg, uint64_t *shadow_pcb, 718 bool reply_supported, bool track_ramblocks) 719 { 720 struct vhost_user *u = dev->opaque; 721 int i, fd, ret, reg_idx, reg_fd_idx; 722 struct vhost_memory_region *reg; 723 MemoryRegion *mr; 724 ram_addr_t offset; 725 VhostUserMsg msg_reply; 726 VhostUserMemoryRegion region_buffer; 727 728 for (i = 0; i < nr_add_reg; i++) { 729 reg = add_reg[i].region; 730 reg_idx = add_reg[i].reg_idx; 731 reg_fd_idx = add_reg[i].fd_idx; 732 733 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 734 735 if (fd > 0) { 736 if (track_ramblocks) { 737 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name, 738 reg->memory_size, 739 reg->guest_phys_addr, 740 reg->userspace_addr, 741 offset); 742 u->region_rb_offset[reg_idx] = offset; 743 u->region_rb[reg_idx] = mr->ram_block; 744 } 745 msg->hdr.request = VHOST_USER_ADD_MEM_REG; 746 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 747 msg->payload.mem_reg.region = region_buffer; 748 749 ret = vhost_user_write(dev, msg, &fd, 1); 750 if (ret < 0) { 751 return ret; 752 } 753 754 if (track_ramblocks) { 755 uint64_t reply_gpa; 756 757 ret = vhost_user_read(dev, &msg_reply); 758 if (ret < 0) { 759 return ret; 760 } 761 762 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr; 763 764 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) { 765 error_report("%s: Received unexpected msg type." 766 "Expected %d received %d", __func__, 767 VHOST_USER_ADD_MEM_REG, 768 msg_reply.hdr.request); 769 return -EPROTO; 770 } 771 772 /* 773 * We're using the same structure, just reusing one of the 774 * fields, so it should be the same size. 775 */ 776 if (msg_reply.hdr.size != msg->hdr.size) { 777 error_report("%s: Unexpected size for postcopy reply " 778 "%d vs %d", __func__, msg_reply.hdr.size, 779 msg->hdr.size); 780 return -EPROTO; 781 } 782 783 /* Get the postcopy client base from the backend's reply. */ 784 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) { 785 shadow_pcb[reg_idx] = 786 msg_reply.payload.mem_reg.region.userspace_addr; 787 trace_vhost_user_set_mem_table_postcopy( 788 msg_reply.payload.mem_reg.region.userspace_addr, 789 msg->payload.mem_reg.region.userspace_addr, 790 reg_fd_idx, reg_idx); 791 } else { 792 error_report("%s: invalid postcopy reply for region. " 793 "Got guest physical address %" PRIX64 ", expected " 794 "%" PRIX64, __func__, reply_gpa, 795 dev->mem->regions[reg_idx].guest_phys_addr); 796 return -EPROTO; 797 } 798 } else if (reply_supported) { 799 ret = process_message_reply(dev, msg); 800 if (ret) { 801 return ret; 802 } 803 } 804 } else if (track_ramblocks) { 805 u->region_rb_offset[reg_idx] = 0; 806 u->region_rb[reg_idx] = NULL; 807 } 808 809 /* 810 * At this point, we know the backend has mapped in the new 811 * region, if the region has a valid file descriptor. 812 * 813 * The region should now be added to the shadow table. 814 */ 815 u->shadow_regions[u->num_shadow_regions].guest_phys_addr = 816 reg->guest_phys_addr; 817 u->shadow_regions[u->num_shadow_regions].userspace_addr = 818 reg->userspace_addr; 819 u->shadow_regions[u->num_shadow_regions].memory_size = 820 reg->memory_size; 821 u->num_shadow_regions++; 822 } 823 824 return 0; 825 } 826 827 static int vhost_user_add_remove_regions(struct vhost_dev *dev, 828 VhostUserMsg *msg, 829 bool reply_supported, 830 bool track_ramblocks) 831 { 832 struct vhost_user *u = dev->opaque; 833 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS]; 834 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS]; 835 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {}; 836 int nr_add_reg, nr_rem_reg; 837 int ret; 838 839 msg->hdr.size = sizeof(msg->payload.mem_reg); 840 841 /* Find the regions which need to be removed or added. */ 842 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg, 843 shadow_pcb, track_ramblocks); 844 845 if (nr_rem_reg) { 846 ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg, 847 reply_supported); 848 if (ret < 0) { 849 goto err; 850 } 851 } 852 853 if (nr_add_reg) { 854 ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb, 855 reply_supported, track_ramblocks); 856 if (ret < 0) { 857 goto err; 858 } 859 } 860 861 if (track_ramblocks) { 862 memcpy(u->postcopy_client_bases, shadow_pcb, 863 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 864 /* 865 * Now we've registered this with the postcopy code, we ack to the 866 * client, because now we're in the position to be able to deal with 867 * any faults it generates. 868 */ 869 /* TODO: Use this for failure cases as well with a bad value. */ 870 msg->hdr.size = sizeof(msg->payload.u64); 871 msg->payload.u64 = 0; /* OK */ 872 873 ret = vhost_user_write(dev, msg, NULL, 0); 874 if (ret < 0) { 875 return ret; 876 } 877 } 878 879 return 0; 880 881 err: 882 if (track_ramblocks) { 883 memcpy(u->postcopy_client_bases, shadow_pcb, 884 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 885 } 886 887 return ret; 888 } 889 890 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 891 struct vhost_memory *mem, 892 bool reply_supported, 893 bool config_mem_slots) 894 { 895 struct vhost_user *u = dev->opaque; 896 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 897 size_t fd_num = 0; 898 VhostUserMsg msg_reply; 899 int region_i, msg_i; 900 int ret; 901 902 VhostUserMsg msg = { 903 .hdr.flags = VHOST_USER_VERSION, 904 }; 905 906 if (u->region_rb_len < dev->mem->nregions) { 907 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 908 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 909 dev->mem->nregions); 910 memset(&(u->region_rb[u->region_rb_len]), '\0', 911 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 912 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 913 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 914 u->region_rb_len = dev->mem->nregions; 915 } 916 917 if (config_mem_slots) { 918 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true); 919 if (ret < 0) { 920 return ret; 921 } 922 } else { 923 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 924 true); 925 if (ret < 0) { 926 return ret; 927 } 928 929 ret = vhost_user_write(dev, &msg, fds, fd_num); 930 if (ret < 0) { 931 return ret; 932 } 933 934 ret = vhost_user_read(dev, &msg_reply); 935 if (ret < 0) { 936 return ret; 937 } 938 939 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 940 error_report("%s: Received unexpected msg type." 941 "Expected %d received %d", __func__, 942 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 943 return -EPROTO; 944 } 945 946 /* 947 * We're using the same structure, just reusing one of the 948 * fields, so it should be the same size. 949 */ 950 if (msg_reply.hdr.size != msg.hdr.size) { 951 error_report("%s: Unexpected size for postcopy reply " 952 "%d vs %d", __func__, msg_reply.hdr.size, 953 msg.hdr.size); 954 return -EPROTO; 955 } 956 957 memset(u->postcopy_client_bases, 0, 958 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 959 960 /* 961 * They're in the same order as the regions that were sent 962 * but some of the regions were skipped (above) if they 963 * didn't have fd's 964 */ 965 for (msg_i = 0, region_i = 0; 966 region_i < dev->mem->nregions; 967 region_i++) { 968 if (msg_i < fd_num && 969 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 970 dev->mem->regions[region_i].guest_phys_addr) { 971 u->postcopy_client_bases[region_i] = 972 msg_reply.payload.memory.regions[msg_i].userspace_addr; 973 trace_vhost_user_set_mem_table_postcopy( 974 msg_reply.payload.memory.regions[msg_i].userspace_addr, 975 msg.payload.memory.regions[msg_i].userspace_addr, 976 msg_i, region_i); 977 msg_i++; 978 } 979 } 980 if (msg_i != fd_num) { 981 error_report("%s: postcopy reply not fully consumed " 982 "%d vs %zd", 983 __func__, msg_i, fd_num); 984 return -EIO; 985 } 986 987 /* 988 * Now we've registered this with the postcopy code, we ack to the 989 * client, because now we're in the position to be able to deal 990 * with any faults it generates. 991 */ 992 /* TODO: Use this for failure cases as well with a bad value. */ 993 msg.hdr.size = sizeof(msg.payload.u64); 994 msg.payload.u64 = 0; /* OK */ 995 ret = vhost_user_write(dev, &msg, NULL, 0); 996 if (ret < 0) { 997 return ret; 998 } 999 } 1000 1001 return 0; 1002 } 1003 1004 static int vhost_user_set_mem_table(struct vhost_dev *dev, 1005 struct vhost_memory *mem) 1006 { 1007 struct vhost_user *u = dev->opaque; 1008 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 1009 size_t fd_num = 0; 1010 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 1011 bool reply_supported = virtio_has_feature(dev->protocol_features, 1012 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1013 bool config_mem_slots = 1014 virtio_has_feature(dev->protocol_features, 1015 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); 1016 int ret; 1017 1018 if (do_postcopy) { 1019 /* 1020 * Postcopy has enough differences that it's best done in it's own 1021 * version 1022 */ 1023 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported, 1024 config_mem_slots); 1025 } 1026 1027 VhostUserMsg msg = { 1028 .hdr.flags = VHOST_USER_VERSION, 1029 }; 1030 1031 if (reply_supported) { 1032 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1033 } 1034 1035 if (config_mem_slots) { 1036 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false); 1037 if (ret < 0) { 1038 return ret; 1039 } 1040 } else { 1041 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 1042 false); 1043 if (ret < 0) { 1044 return ret; 1045 } 1046 1047 ret = vhost_user_write(dev, &msg, fds, fd_num); 1048 if (ret < 0) { 1049 return ret; 1050 } 1051 1052 if (reply_supported) { 1053 return process_message_reply(dev, &msg); 1054 } 1055 } 1056 1057 return 0; 1058 } 1059 1060 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 1061 struct vhost_vring_state *ring) 1062 { 1063 bool cross_endian = virtio_has_feature(dev->protocol_features, 1064 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 1065 VhostUserMsg msg = { 1066 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 1067 .hdr.flags = VHOST_USER_VERSION, 1068 .payload.state = *ring, 1069 .hdr.size = sizeof(msg.payload.state), 1070 }; 1071 1072 if (!cross_endian) { 1073 error_report("vhost-user trying to send unhandled ioctl"); 1074 return -ENOTSUP; 1075 } 1076 1077 return vhost_user_write(dev, &msg, NULL, 0); 1078 } 1079 1080 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 1081 { 1082 int ret; 1083 VhostUserMsg msg = { 1084 .hdr.request = request, 1085 .hdr.flags = VHOST_USER_VERSION, 1086 }; 1087 1088 if (vhost_user_per_device_request(request) && dev->vq_index != 0) { 1089 return 0; 1090 } 1091 1092 ret = vhost_user_write(dev, &msg, NULL, 0); 1093 if (ret < 0) { 1094 return ret; 1095 } 1096 1097 ret = vhost_user_read(dev, &msg); 1098 if (ret < 0) { 1099 return ret; 1100 } 1101 1102 if (msg.hdr.request != request) { 1103 error_report("Received unexpected msg type. Expected %d received %d", 1104 request, msg.hdr.request); 1105 return -EPROTO; 1106 } 1107 1108 if (msg.hdr.size != sizeof(msg.payload.u64)) { 1109 error_report("Received bad msg size."); 1110 return -EPROTO; 1111 } 1112 1113 *u64 = msg.payload.u64; 1114 1115 return 0; 1116 } 1117 1118 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 1119 { 1120 if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) { 1121 return -EPROTO; 1122 } 1123 1124 return 0; 1125 } 1126 1127 /* Note: "msg->hdr.flags" may be modified. */ 1128 static int vhost_user_write_sync(struct vhost_dev *dev, VhostUserMsg *msg, 1129 bool wait_for_reply) 1130 { 1131 int ret; 1132 1133 if (wait_for_reply) { 1134 bool reply_supported = virtio_has_feature(dev->protocol_features, 1135 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1136 if (reply_supported) { 1137 msg->hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1138 } 1139 } 1140 1141 ret = vhost_user_write(dev, msg, NULL, 0); 1142 if (ret < 0) { 1143 return ret; 1144 } 1145 1146 if (wait_for_reply) { 1147 uint64_t dummy; 1148 1149 if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1150 return process_message_reply(dev, msg); 1151 } 1152 1153 /* 1154 * We need to wait for a reply but the backend does not 1155 * support replies for the command we just sent. 1156 * Send VHOST_USER_GET_FEATURES which makes all backends 1157 * send a reply. 1158 */ 1159 return vhost_user_get_features(dev, &dummy); 1160 } 1161 1162 return 0; 1163 } 1164 1165 static int vhost_set_vring(struct vhost_dev *dev, 1166 unsigned long int request, 1167 struct vhost_vring_state *ring, 1168 bool wait_for_reply) 1169 { 1170 VhostUserMsg msg = { 1171 .hdr.request = request, 1172 .hdr.flags = VHOST_USER_VERSION, 1173 .payload.state = *ring, 1174 .hdr.size = sizeof(msg.payload.state), 1175 }; 1176 1177 return vhost_user_write_sync(dev, &msg, wait_for_reply); 1178 } 1179 1180 static int vhost_user_set_vring_num(struct vhost_dev *dev, 1181 struct vhost_vring_state *ring) 1182 { 1183 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring, false); 1184 } 1185 1186 static void vhost_user_host_notifier_free(VhostUserHostNotifier *n) 1187 { 1188 if (n->unmap_addr) { 1189 munmap(n->unmap_addr, qemu_real_host_page_size()); 1190 n->unmap_addr = NULL; 1191 } 1192 if (n->destroy) { 1193 memory_region_transaction_begin(); 1194 object_unparent(OBJECT(&n->mr)); 1195 memory_region_transaction_commit(); 1196 g_free(n); 1197 } 1198 } 1199 1200 /* 1201 * clean-up function for notifier, will finally free the structure 1202 * under rcu. 1203 */ 1204 static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n, 1205 VirtIODevice *vdev, bool destroy) 1206 { 1207 /* 1208 * if destroy == false and n->addr == NULL, we have nothing to do. 1209 * so, just return. 1210 */ 1211 if (!n || (!destroy && !n->addr)) { 1212 return; 1213 } 1214 1215 if (n->addr) { 1216 if (vdev) { 1217 memory_region_transaction_begin(); 1218 virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false); 1219 memory_region_transaction_commit(); 1220 } 1221 assert(!n->unmap_addr); 1222 n->unmap_addr = n->addr; 1223 n->addr = NULL; 1224 } 1225 n->destroy = destroy; 1226 call_rcu(n, vhost_user_host_notifier_free, rcu); 1227 } 1228 1229 static int vhost_user_set_vring_base(struct vhost_dev *dev, 1230 struct vhost_vring_state *ring) 1231 { 1232 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring, false); 1233 } 1234 1235 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 1236 { 1237 int i; 1238 1239 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1240 return -EINVAL; 1241 } 1242 1243 for (i = 0; i < dev->nvqs; ++i) { 1244 int ret; 1245 struct vhost_vring_state state = { 1246 .index = dev->vq_index + i, 1247 .num = enable, 1248 }; 1249 1250 /* 1251 * SET_VRING_ENABLE travels from guest to QEMU to vhost-user backend / 1252 * control plane thread via unix domain socket. Virtio requests travel 1253 * from guest to vhost-user backend / data plane thread via eventfd. 1254 * Even if the guest enables the ring first, and pushes its first virtio 1255 * request second (conforming to the virtio spec), the data plane thread 1256 * in the backend may see the virtio request before the control plane 1257 * thread sees the queue enablement. This causes (in fact, requires) the 1258 * data plane thread to discard the virtio request (it arrived on a 1259 * seemingly disabled queue). To prevent this out-of-order delivery, 1260 * don't let the guest proceed to pushing the virtio request until the 1261 * backend control plane acknowledges enabling the queue -- IOW, pass 1262 * wait_for_reply=true below. 1263 */ 1264 ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state, true); 1265 if (ret < 0) { 1266 /* 1267 * Restoring the previous state is likely infeasible, as well as 1268 * proceeding regardless the error, so just bail out and hope for 1269 * the device-level recovery. 1270 */ 1271 return ret; 1272 } 1273 } 1274 1275 return 0; 1276 } 1277 1278 static VhostUserHostNotifier *fetch_notifier(VhostUserState *u, 1279 int idx) 1280 { 1281 if (idx >= u->notifiers->len) { 1282 return NULL; 1283 } 1284 return g_ptr_array_index(u->notifiers, idx); 1285 } 1286 1287 static int vhost_user_get_vring_base(struct vhost_dev *dev, 1288 struct vhost_vring_state *ring) 1289 { 1290 int ret; 1291 VhostUserMsg msg = { 1292 .hdr.request = VHOST_USER_GET_VRING_BASE, 1293 .hdr.flags = VHOST_USER_VERSION, 1294 .payload.state = *ring, 1295 .hdr.size = sizeof(msg.payload.state), 1296 }; 1297 struct vhost_user *u = dev->opaque; 1298 1299 VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index); 1300 vhost_user_host_notifier_remove(n, dev->vdev, false); 1301 1302 ret = vhost_user_write(dev, &msg, NULL, 0); 1303 if (ret < 0) { 1304 return ret; 1305 } 1306 1307 ret = vhost_user_read(dev, &msg); 1308 if (ret < 0) { 1309 return ret; 1310 } 1311 1312 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 1313 error_report("Received unexpected msg type. Expected %d received %d", 1314 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 1315 return -EPROTO; 1316 } 1317 1318 if (msg.hdr.size != sizeof(msg.payload.state)) { 1319 error_report("Received bad msg size."); 1320 return -EPROTO; 1321 } 1322 1323 *ring = msg.payload.state; 1324 1325 return 0; 1326 } 1327 1328 static int vhost_set_vring_file(struct vhost_dev *dev, 1329 VhostUserRequest request, 1330 struct vhost_vring_file *file) 1331 { 1332 int fds[VHOST_USER_MAX_RAM_SLOTS]; 1333 size_t fd_num = 0; 1334 VhostUserMsg msg = { 1335 .hdr.request = request, 1336 .hdr.flags = VHOST_USER_VERSION, 1337 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 1338 .hdr.size = sizeof(msg.payload.u64), 1339 }; 1340 1341 if (file->fd > 0) { 1342 fds[fd_num++] = file->fd; 1343 } else { 1344 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 1345 } 1346 1347 return vhost_user_write(dev, &msg, fds, fd_num); 1348 } 1349 1350 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 1351 struct vhost_vring_file *file) 1352 { 1353 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 1354 } 1355 1356 static int vhost_user_set_vring_call(struct vhost_dev *dev, 1357 struct vhost_vring_file *file) 1358 { 1359 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 1360 } 1361 1362 static int vhost_user_set_vring_err(struct vhost_dev *dev, 1363 struct vhost_vring_file *file) 1364 { 1365 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file); 1366 } 1367 1368 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 1369 struct vhost_vring_addr *addr) 1370 { 1371 VhostUserMsg msg = { 1372 .hdr.request = VHOST_USER_SET_VRING_ADDR, 1373 .hdr.flags = VHOST_USER_VERSION, 1374 .payload.addr = *addr, 1375 .hdr.size = sizeof(msg.payload.addr), 1376 }; 1377 1378 /* 1379 * wait for a reply if logging is enabled to make sure 1380 * backend is actually logging changes 1381 */ 1382 bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); 1383 1384 return vhost_user_write_sync(dev, &msg, wait_for_reply); 1385 } 1386 1387 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64, 1388 bool wait_for_reply) 1389 { 1390 VhostUserMsg msg = { 1391 .hdr.request = request, 1392 .hdr.flags = VHOST_USER_VERSION, 1393 .payload.u64 = u64, 1394 .hdr.size = sizeof(msg.payload.u64), 1395 }; 1396 1397 return vhost_user_write_sync(dev, &msg, wait_for_reply); 1398 } 1399 1400 static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status) 1401 { 1402 return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false); 1403 } 1404 1405 static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status) 1406 { 1407 uint64_t value; 1408 int ret; 1409 1410 ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value); 1411 if (ret < 0) { 1412 return ret; 1413 } 1414 *status = value; 1415 1416 return 0; 1417 } 1418 1419 static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status) 1420 { 1421 uint8_t s; 1422 int ret; 1423 1424 ret = vhost_user_get_status(dev, &s); 1425 if (ret < 0) { 1426 return ret; 1427 } 1428 1429 if ((s & status) == status) { 1430 return 0; 1431 } 1432 s |= status; 1433 1434 return vhost_user_set_status(dev, s); 1435 } 1436 1437 static int vhost_user_set_features(struct vhost_dev *dev, 1438 uint64_t features) 1439 { 1440 /* 1441 * wait for a reply if logging is enabled to make sure 1442 * backend is actually logging changes 1443 */ 1444 bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); 1445 int ret; 1446 1447 /* 1448 * We need to include any extra backend only feature bits that 1449 * might be needed by our device. Currently this includes the 1450 * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol 1451 * features. 1452 */ 1453 ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, 1454 features | dev->backend_features, 1455 log_enabled); 1456 1457 if (virtio_has_feature(dev->protocol_features, 1458 VHOST_USER_PROTOCOL_F_STATUS)) { 1459 if (!ret) { 1460 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 1461 } 1462 } 1463 1464 return ret; 1465 } 1466 1467 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 1468 uint64_t features) 1469 { 1470 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features, 1471 false); 1472 } 1473 1474 static int vhost_user_set_owner(struct vhost_dev *dev) 1475 { 1476 VhostUserMsg msg = { 1477 .hdr.request = VHOST_USER_SET_OWNER, 1478 .hdr.flags = VHOST_USER_VERSION, 1479 }; 1480 1481 return vhost_user_write(dev, &msg, NULL, 0); 1482 } 1483 1484 static int vhost_user_get_max_memslots(struct vhost_dev *dev, 1485 uint64_t *max_memslots) 1486 { 1487 uint64_t backend_max_memslots; 1488 int err; 1489 1490 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS, 1491 &backend_max_memslots); 1492 if (err < 0) { 1493 return err; 1494 } 1495 1496 *max_memslots = backend_max_memslots; 1497 1498 return 0; 1499 } 1500 1501 static int vhost_user_reset_device(struct vhost_dev *dev) 1502 { 1503 VhostUserMsg msg = { 1504 .hdr.flags = VHOST_USER_VERSION, 1505 .hdr.request = VHOST_USER_RESET_DEVICE, 1506 }; 1507 1508 /* 1509 * Historically, reset was not implemented so only reset devices 1510 * that are expecting it. 1511 */ 1512 if (!virtio_has_feature(dev->protocol_features, 1513 VHOST_USER_PROTOCOL_F_RESET_DEVICE)) { 1514 return -ENOSYS; 1515 } 1516 1517 return vhost_user_write(dev, &msg, NULL, 0); 1518 } 1519 1520 static int vhost_user_backend_handle_config_change(struct vhost_dev *dev) 1521 { 1522 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1523 return -ENOSYS; 1524 } 1525 1526 return dev->config_ops->vhost_dev_config_notifier(dev); 1527 } 1528 1529 /* 1530 * Fetch or create the notifier for a given idx. Newly created 1531 * notifiers are added to the pointer array that tracks them. 1532 */ 1533 static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u, 1534 int idx) 1535 { 1536 VhostUserHostNotifier *n = NULL; 1537 if (idx >= u->notifiers->len) { 1538 g_ptr_array_set_size(u->notifiers, idx + 1); 1539 } 1540 1541 n = g_ptr_array_index(u->notifiers, idx); 1542 if (!n) { 1543 /* 1544 * In case notification arrive out-of-order, 1545 * make room for current index. 1546 */ 1547 g_ptr_array_remove_index(u->notifiers, idx); 1548 n = g_new0(VhostUserHostNotifier, 1); 1549 n->idx = idx; 1550 g_ptr_array_insert(u->notifiers, idx, n); 1551 trace_vhost_user_create_notifier(idx, n); 1552 } 1553 1554 return n; 1555 } 1556 1557 static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev, 1558 VhostUserVringArea *area, 1559 int fd) 1560 { 1561 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 1562 size_t page_size = qemu_real_host_page_size(); 1563 struct vhost_user *u = dev->opaque; 1564 VhostUserState *user = u->user; 1565 VirtIODevice *vdev = dev->vdev; 1566 VhostUserHostNotifier *n; 1567 void *addr; 1568 char *name; 1569 1570 if (!virtio_has_feature(dev->protocol_features, 1571 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 1572 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 1573 return -EINVAL; 1574 } 1575 1576 /* 1577 * Fetch notifier and invalidate any old data before setting up 1578 * new mapped address. 1579 */ 1580 n = fetch_or_create_notifier(user, queue_idx); 1581 vhost_user_host_notifier_remove(n, vdev, false); 1582 1583 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 1584 return 0; 1585 } 1586 1587 /* Sanity check. */ 1588 if (area->size != page_size) { 1589 return -EINVAL; 1590 } 1591 1592 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 1593 fd, area->offset); 1594 if (addr == MAP_FAILED) { 1595 return -EFAULT; 1596 } 1597 1598 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 1599 user, queue_idx); 1600 if (!n->mr.ram) { /* Don't init again after suspend. */ 1601 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 1602 page_size, addr); 1603 } else { 1604 n->mr.ram_block->host = addr; 1605 } 1606 g_free(name); 1607 1608 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 1609 object_unparent(OBJECT(&n->mr)); 1610 munmap(addr, page_size); 1611 return -ENXIO; 1612 } 1613 1614 n->addr = addr; 1615 1616 return 0; 1617 } 1618 1619 static int 1620 vhost_user_backend_handle_shared_object_add(struct vhost_dev *dev, 1621 VhostUserShared *object) 1622 { 1623 QemuUUID uuid; 1624 1625 memcpy(uuid.data, object->uuid, sizeof(object->uuid)); 1626 return !virtio_add_vhost_device(&uuid, dev); 1627 } 1628 1629 /* 1630 * Handle VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE backend requests. 1631 * 1632 * Return: 0 on success, 1 on error. 1633 */ 1634 static int 1635 vhost_user_backend_handle_shared_object_remove(struct vhost_dev *dev, 1636 VhostUserShared *object) 1637 { 1638 QemuUUID uuid; 1639 1640 memcpy(uuid.data, object->uuid, sizeof(object->uuid)); 1641 switch (virtio_object_type(&uuid)) { 1642 case TYPE_VHOST_DEV: 1643 { 1644 struct vhost_dev *owner = virtio_lookup_vhost_device(&uuid); 1645 if (dev != owner) { 1646 /* Not allowed to remove non-owned entries */ 1647 return 1; 1648 } 1649 break; 1650 } 1651 default: 1652 /* Not allowed to remove non-owned entries */ 1653 return 1; 1654 } 1655 1656 return !virtio_remove_resource(&uuid); 1657 } 1658 1659 static bool vhost_user_send_resp(QIOChannel *ioc, VhostUserHeader *hdr, 1660 VhostUserPayload *payload, Error **errp) 1661 { 1662 struct iovec iov[] = { 1663 { .iov_base = hdr, .iov_len = VHOST_USER_HDR_SIZE }, 1664 { .iov_base = payload, .iov_len = hdr->size }, 1665 }; 1666 1667 hdr->flags &= ~VHOST_USER_NEED_REPLY_MASK; 1668 hdr->flags |= VHOST_USER_REPLY_MASK; 1669 1670 return !qio_channel_writev_all(ioc, iov, ARRAY_SIZE(iov), errp); 1671 } 1672 1673 static bool 1674 vhost_user_backend_send_dmabuf_fd(QIOChannel *ioc, VhostUserHeader *hdr, 1675 VhostUserPayload *payload, Error **errp) 1676 { 1677 hdr->size = sizeof(payload->u64); 1678 return vhost_user_send_resp(ioc, hdr, payload, errp); 1679 } 1680 1681 int vhost_user_get_shared_object(struct vhost_dev *dev, unsigned char *uuid, 1682 int *dmabuf_fd) 1683 { 1684 struct vhost_user *u = dev->opaque; 1685 CharBackend *chr = u->user->chr; 1686 int ret; 1687 VhostUserMsg msg = { 1688 .hdr.request = VHOST_USER_GET_SHARED_OBJECT, 1689 .hdr.flags = VHOST_USER_VERSION, 1690 }; 1691 memcpy(msg.payload.object.uuid, uuid, sizeof(msg.payload.object.uuid)); 1692 1693 ret = vhost_user_write(dev, &msg, NULL, 0); 1694 if (ret < 0) { 1695 return ret; 1696 } 1697 1698 ret = vhost_user_read(dev, &msg); 1699 if (ret < 0) { 1700 return ret; 1701 } 1702 1703 if (msg.hdr.request != VHOST_USER_GET_SHARED_OBJECT) { 1704 error_report("Received unexpected msg type. " 1705 "Expected %d received %d", 1706 VHOST_USER_GET_SHARED_OBJECT, msg.hdr.request); 1707 return -EPROTO; 1708 } 1709 1710 *dmabuf_fd = qemu_chr_fe_get_msgfd(chr); 1711 if (*dmabuf_fd < 0) { 1712 error_report("Failed to get dmabuf fd"); 1713 return -EIO; 1714 } 1715 1716 return 0; 1717 } 1718 1719 static int 1720 vhost_user_backend_handle_shared_object_lookup(struct vhost_user *u, 1721 QIOChannel *ioc, 1722 VhostUserHeader *hdr, 1723 VhostUserPayload *payload) 1724 { 1725 QemuUUID uuid; 1726 CharBackend *chr = u->user->chr; 1727 Error *local_err = NULL; 1728 int dmabuf_fd = -1; 1729 int fd_num = 0; 1730 1731 memcpy(uuid.data, payload->object.uuid, sizeof(payload->object.uuid)); 1732 1733 payload->u64 = 0; 1734 switch (virtio_object_type(&uuid)) { 1735 case TYPE_DMABUF: 1736 dmabuf_fd = virtio_lookup_dmabuf(&uuid); 1737 break; 1738 case TYPE_VHOST_DEV: 1739 { 1740 struct vhost_dev *dev = virtio_lookup_vhost_device(&uuid); 1741 if (dev == NULL) { 1742 payload->u64 = -EINVAL; 1743 break; 1744 } 1745 int ret = vhost_user_get_shared_object(dev, uuid.data, &dmabuf_fd); 1746 if (ret < 0) { 1747 payload->u64 = ret; 1748 } 1749 break; 1750 } 1751 case TYPE_INVALID: 1752 payload->u64 = -EINVAL; 1753 break; 1754 } 1755 1756 if (dmabuf_fd != -1) { 1757 fd_num++; 1758 } 1759 1760 if (qemu_chr_fe_set_msgfds(chr, &dmabuf_fd, fd_num) < 0) { 1761 error_report("Failed to set msg fds."); 1762 payload->u64 = -EINVAL; 1763 } 1764 1765 if (!vhost_user_backend_send_dmabuf_fd(ioc, hdr, payload, &local_err)) { 1766 error_report_err(local_err); 1767 return -EINVAL; 1768 } 1769 1770 return 0; 1771 } 1772 1773 static void close_backend_channel(struct vhost_user *u) 1774 { 1775 g_source_destroy(u->backend_src); 1776 g_source_unref(u->backend_src); 1777 u->backend_src = NULL; 1778 object_unref(OBJECT(u->backend_ioc)); 1779 u->backend_ioc = NULL; 1780 } 1781 1782 static gboolean backend_read(QIOChannel *ioc, GIOCondition condition, 1783 gpointer opaque) 1784 { 1785 struct vhost_dev *dev = opaque; 1786 struct vhost_user *u = dev->opaque; 1787 VhostUserHeader hdr = { 0, }; 1788 VhostUserPayload payload = { 0, }; 1789 Error *local_err = NULL; 1790 gboolean rc = G_SOURCE_CONTINUE; 1791 int ret = 0; 1792 struct iovec iov; 1793 g_autofree int *fd = NULL; 1794 size_t fdsize = 0; 1795 int i; 1796 1797 /* Read header */ 1798 iov.iov_base = &hdr; 1799 iov.iov_len = VHOST_USER_HDR_SIZE; 1800 1801 if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) { 1802 error_report_err(local_err); 1803 goto err; 1804 } 1805 1806 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 1807 error_report("Failed to read msg header." 1808 " Size %d exceeds the maximum %zu.", hdr.size, 1809 VHOST_USER_PAYLOAD_SIZE); 1810 goto err; 1811 } 1812 1813 /* Read payload */ 1814 if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) { 1815 error_report_err(local_err); 1816 goto err; 1817 } 1818 1819 switch (hdr.request) { 1820 case VHOST_USER_BACKEND_IOTLB_MSG: 1821 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1822 break; 1823 case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG: 1824 ret = vhost_user_backend_handle_config_change(dev); 1825 break; 1826 case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG: 1827 ret = vhost_user_backend_handle_vring_host_notifier(dev, &payload.area, 1828 fd ? fd[0] : -1); 1829 break; 1830 case VHOST_USER_BACKEND_SHARED_OBJECT_ADD: 1831 ret = vhost_user_backend_handle_shared_object_add(dev, &payload.object); 1832 break; 1833 case VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE: 1834 ret = vhost_user_backend_handle_shared_object_remove(dev, 1835 &payload.object); 1836 break; 1837 case VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP: 1838 ret = vhost_user_backend_handle_shared_object_lookup(dev->opaque, ioc, 1839 &hdr, &payload); 1840 break; 1841 default: 1842 error_report("Received unexpected msg type: %d.", hdr.request); 1843 ret = -EINVAL; 1844 } 1845 1846 /* 1847 * REPLY_ACK feature handling. Other reply types has to be managed 1848 * directly in their request handlers. 1849 */ 1850 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1851 payload.u64 = !!ret; 1852 hdr.size = sizeof(payload.u64); 1853 1854 if (!vhost_user_send_resp(ioc, &hdr, &payload, &local_err)) { 1855 error_report_err(local_err); 1856 goto err; 1857 } 1858 } 1859 1860 goto fdcleanup; 1861 1862 err: 1863 close_backend_channel(u); 1864 rc = G_SOURCE_REMOVE; 1865 1866 fdcleanup: 1867 if (fd) { 1868 for (i = 0; i < fdsize; i++) { 1869 close(fd[i]); 1870 } 1871 } 1872 return rc; 1873 } 1874 1875 static int vhost_setup_backend_channel(struct vhost_dev *dev) 1876 { 1877 VhostUserMsg msg = { 1878 .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD, 1879 .hdr.flags = VHOST_USER_VERSION, 1880 }; 1881 struct vhost_user *u = dev->opaque; 1882 int sv[2], ret = 0; 1883 bool reply_supported = virtio_has_feature(dev->protocol_features, 1884 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1885 Error *local_err = NULL; 1886 QIOChannel *ioc; 1887 1888 if (!virtio_has_feature(dev->protocol_features, 1889 VHOST_USER_PROTOCOL_F_BACKEND_REQ)) { 1890 return 0; 1891 } 1892 1893 if (qemu_socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1894 int saved_errno = errno; 1895 error_report("socketpair() failed"); 1896 return -saved_errno; 1897 } 1898 1899 ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err)); 1900 if (!ioc) { 1901 error_report_err(local_err); 1902 return -ECONNREFUSED; 1903 } 1904 u->backend_ioc = ioc; 1905 u->backend_src = qio_channel_add_watch_source(u->backend_ioc, 1906 G_IO_IN | G_IO_HUP, 1907 backend_read, dev, NULL, NULL); 1908 1909 if (reply_supported) { 1910 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1911 } 1912 1913 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1914 if (ret) { 1915 goto out; 1916 } 1917 1918 if (reply_supported) { 1919 ret = process_message_reply(dev, &msg); 1920 } 1921 1922 out: 1923 close(sv[1]); 1924 if (ret) { 1925 close_backend_channel(u); 1926 } 1927 1928 return ret; 1929 } 1930 1931 #ifdef CONFIG_LINUX 1932 /* 1933 * Called back from the postcopy fault thread when a fault is received on our 1934 * ufd. 1935 * TODO: This is Linux specific 1936 */ 1937 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1938 void *ufd) 1939 { 1940 struct vhost_dev *dev = pcfd->data; 1941 struct vhost_user *u = dev->opaque; 1942 struct uffd_msg *msg = ufd; 1943 uint64_t faultaddr = msg->arg.pagefault.address; 1944 RAMBlock *rb = NULL; 1945 uint64_t rb_offset; 1946 int i; 1947 1948 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1949 dev->mem->nregions); 1950 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1951 trace_vhost_user_postcopy_fault_handler_loop(i, 1952 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1953 if (faultaddr >= u->postcopy_client_bases[i]) { 1954 /* Ofset of the fault address in the vhost region */ 1955 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1956 if (region_offset < dev->mem->regions[i].memory_size) { 1957 rb_offset = region_offset + u->region_rb_offset[i]; 1958 trace_vhost_user_postcopy_fault_handler_found(i, 1959 region_offset, rb_offset); 1960 rb = u->region_rb[i]; 1961 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1962 rb_offset); 1963 } 1964 } 1965 } 1966 error_report("%s: Failed to find region for fault %" PRIx64, 1967 __func__, faultaddr); 1968 return -1; 1969 } 1970 1971 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1972 uint64_t offset) 1973 { 1974 struct vhost_dev *dev = pcfd->data; 1975 struct vhost_user *u = dev->opaque; 1976 int i; 1977 1978 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1979 1980 if (!u) { 1981 return 0; 1982 } 1983 /* Translate the offset into an address in the clients address space */ 1984 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1985 if (u->region_rb[i] == rb && 1986 offset >= u->region_rb_offset[i] && 1987 offset < (u->region_rb_offset[i] + 1988 dev->mem->regions[i].memory_size)) { 1989 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1990 u->postcopy_client_bases[i]; 1991 trace_vhost_user_postcopy_waker_found(client_addr); 1992 return postcopy_wake_shared(pcfd, client_addr, rb); 1993 } 1994 } 1995 1996 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1997 return 0; 1998 } 1999 #endif 2000 2001 /* 2002 * Called at the start of an inbound postcopy on reception of the 2003 * 'advise' command. 2004 */ 2005 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 2006 { 2007 #ifdef CONFIG_LINUX 2008 struct vhost_user *u = dev->opaque; 2009 CharBackend *chr = u->user->chr; 2010 int ufd; 2011 int ret; 2012 VhostUserMsg msg = { 2013 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 2014 .hdr.flags = VHOST_USER_VERSION, 2015 }; 2016 2017 ret = vhost_user_write(dev, &msg, NULL, 0); 2018 if (ret < 0) { 2019 error_setg(errp, "Failed to send postcopy_advise to vhost"); 2020 return ret; 2021 } 2022 2023 ret = vhost_user_read(dev, &msg); 2024 if (ret < 0) { 2025 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 2026 return ret; 2027 } 2028 2029 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 2030 error_setg(errp, "Unexpected msg type. Expected %d received %d", 2031 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 2032 return -EPROTO; 2033 } 2034 2035 if (msg.hdr.size) { 2036 error_setg(errp, "Received bad msg size."); 2037 return -EPROTO; 2038 } 2039 ufd = qemu_chr_fe_get_msgfd(chr); 2040 if (ufd < 0) { 2041 error_setg(errp, "%s: Failed to get ufd", __func__); 2042 return -EIO; 2043 } 2044 qemu_socket_set_nonblock(ufd); 2045 2046 /* register ufd with userfault thread */ 2047 u->postcopy_fd.fd = ufd; 2048 u->postcopy_fd.data = dev; 2049 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 2050 u->postcopy_fd.waker = vhost_user_postcopy_waker; 2051 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 2052 postcopy_register_shared_ufd(&u->postcopy_fd); 2053 return 0; 2054 #else 2055 error_setg(errp, "Postcopy not supported on non-Linux systems"); 2056 return -ENOSYS; 2057 #endif 2058 } 2059 2060 /* 2061 * Called at the switch to postcopy on reception of the 'listen' command. 2062 */ 2063 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 2064 { 2065 struct vhost_user *u = dev->opaque; 2066 int ret; 2067 VhostUserMsg msg = { 2068 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 2069 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2070 }; 2071 u->postcopy_listen = true; 2072 2073 trace_vhost_user_postcopy_listen(); 2074 2075 ret = vhost_user_write(dev, &msg, NULL, 0); 2076 if (ret < 0) { 2077 error_setg(errp, "Failed to send postcopy_listen to vhost"); 2078 return ret; 2079 } 2080 2081 ret = process_message_reply(dev, &msg); 2082 if (ret) { 2083 error_setg(errp, "Failed to receive reply to postcopy_listen"); 2084 return ret; 2085 } 2086 2087 return 0; 2088 } 2089 2090 /* 2091 * Called at the end of postcopy 2092 */ 2093 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 2094 { 2095 VhostUserMsg msg = { 2096 .hdr.request = VHOST_USER_POSTCOPY_END, 2097 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2098 }; 2099 int ret; 2100 struct vhost_user *u = dev->opaque; 2101 2102 trace_vhost_user_postcopy_end_entry(); 2103 2104 ret = vhost_user_write(dev, &msg, NULL, 0); 2105 if (ret < 0) { 2106 error_setg(errp, "Failed to send postcopy_end to vhost"); 2107 return ret; 2108 } 2109 2110 ret = process_message_reply(dev, &msg); 2111 if (ret) { 2112 error_setg(errp, "Failed to receive reply to postcopy_end"); 2113 return ret; 2114 } 2115 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2116 close(u->postcopy_fd.fd); 2117 u->postcopy_fd.handler = NULL; 2118 2119 trace_vhost_user_postcopy_end_exit(); 2120 2121 return 0; 2122 } 2123 2124 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 2125 void *opaque, Error **errp) 2126 { 2127 struct PostcopyNotifyData *pnd = opaque; 2128 struct vhost_user *u = container_of(notifier, struct vhost_user, 2129 postcopy_notifier); 2130 struct vhost_dev *dev = u->dev; 2131 2132 switch (pnd->reason) { 2133 case POSTCOPY_NOTIFY_PROBE: 2134 if (!virtio_has_feature(dev->protocol_features, 2135 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 2136 /* TODO: Get the device name into this error somehow */ 2137 error_setg(errp, 2138 "vhost-user backend not capable of postcopy"); 2139 return -ENOENT; 2140 } 2141 break; 2142 2143 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 2144 return vhost_user_postcopy_advise(dev, errp); 2145 2146 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 2147 return vhost_user_postcopy_listen(dev, errp); 2148 2149 case POSTCOPY_NOTIFY_INBOUND_END: 2150 return vhost_user_postcopy_end(dev, errp); 2151 2152 default: 2153 /* We ignore notifications we don't know */ 2154 break; 2155 } 2156 2157 return 0; 2158 } 2159 2160 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, 2161 Error **errp) 2162 { 2163 uint64_t features, ram_slots; 2164 struct vhost_user *u; 2165 VhostUserState *vus = (VhostUserState *) opaque; 2166 int err; 2167 2168 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2169 2170 u = g_new0(struct vhost_user, 1); 2171 u->user = vus; 2172 u->dev = dev; 2173 dev->opaque = u; 2174 2175 err = vhost_user_get_features(dev, &features); 2176 if (err < 0) { 2177 error_setg_errno(errp, -err, "vhost_backend_init failed"); 2178 return err; 2179 } 2180 2181 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 2182 bool supports_f_config = vus->supports_config || 2183 (dev->config_ops && dev->config_ops->vhost_dev_config_notifier); 2184 uint64_t protocol_features; 2185 2186 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 2187 2188 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 2189 &protocol_features); 2190 if (err < 0) { 2191 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2192 return -EPROTO; 2193 } 2194 2195 /* 2196 * We will use all the protocol features we support - although 2197 * we suppress F_CONFIG if we know QEMUs internal code can not support 2198 * it. 2199 */ 2200 protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK; 2201 2202 if (supports_f_config) { 2203 if (!virtio_has_feature(protocol_features, 2204 VHOST_USER_PROTOCOL_F_CONFIG)) { 2205 error_setg(errp, "vhost-user device expecting " 2206 "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does " 2207 "not support it."); 2208 return -EPROTO; 2209 } 2210 } else { 2211 if (virtio_has_feature(protocol_features, 2212 VHOST_USER_PROTOCOL_F_CONFIG)) { 2213 warn_report("vhost-user backend supports " 2214 "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not."); 2215 protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 2216 } 2217 } 2218 2219 /* final set of protocol features */ 2220 dev->protocol_features = protocol_features; 2221 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 2222 if (err < 0) { 2223 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2224 return -EPROTO; 2225 } 2226 2227 /* query the max queues we support if backend supports Multiple Queue */ 2228 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 2229 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 2230 &dev->max_queues); 2231 if (err < 0) { 2232 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2233 return -EPROTO; 2234 } 2235 } else { 2236 dev->max_queues = 1; 2237 } 2238 2239 if (dev->num_queues && dev->max_queues < dev->num_queues) { 2240 error_setg(errp, "The maximum number of queues supported by the " 2241 "backend is %" PRIu64, dev->max_queues); 2242 return -EINVAL; 2243 } 2244 2245 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 2246 !(virtio_has_feature(dev->protocol_features, 2247 VHOST_USER_PROTOCOL_F_BACKEND_REQ) && 2248 virtio_has_feature(dev->protocol_features, 2249 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 2250 error_setg(errp, "IOMMU support requires reply-ack and " 2251 "backend-req protocol features."); 2252 return -EINVAL; 2253 } 2254 2255 /* get max memory regions if backend supports configurable RAM slots */ 2256 if (!virtio_has_feature(dev->protocol_features, 2257 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { 2258 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; 2259 } else { 2260 err = vhost_user_get_max_memslots(dev, &ram_slots); 2261 if (err < 0) { 2262 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2263 return -EPROTO; 2264 } 2265 2266 if (ram_slots < u->user->memory_slots) { 2267 error_setg(errp, "The backend specified a max ram slots limit " 2268 "of %" PRIu64", when the prior validated limit was " 2269 "%d. This limit should never decrease.", ram_slots, 2270 u->user->memory_slots); 2271 return -EINVAL; 2272 } 2273 2274 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); 2275 } 2276 } 2277 2278 if (dev->migration_blocker == NULL && 2279 !virtio_has_feature(dev->protocol_features, 2280 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 2281 error_setg(&dev->migration_blocker, 2282 "Migration disabled: vhost-user backend lacks " 2283 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 2284 } 2285 2286 if (dev->vq_index == 0) { 2287 err = vhost_setup_backend_channel(dev); 2288 if (err < 0) { 2289 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2290 return -EPROTO; 2291 } 2292 } 2293 2294 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 2295 postcopy_add_notifier(&u->postcopy_notifier); 2296 2297 return 0; 2298 } 2299 2300 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 2301 { 2302 struct vhost_user *u; 2303 2304 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2305 2306 u = dev->opaque; 2307 if (u->postcopy_notifier.notify) { 2308 postcopy_remove_notifier(&u->postcopy_notifier); 2309 u->postcopy_notifier.notify = NULL; 2310 } 2311 u->postcopy_listen = false; 2312 if (u->postcopy_fd.handler) { 2313 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2314 close(u->postcopy_fd.fd); 2315 u->postcopy_fd.handler = NULL; 2316 } 2317 if (u->backend_ioc) { 2318 close_backend_channel(u); 2319 } 2320 g_free(u->region_rb); 2321 u->region_rb = NULL; 2322 g_free(u->region_rb_offset); 2323 u->region_rb_offset = NULL; 2324 u->region_rb_len = 0; 2325 g_free(u); 2326 dev->opaque = 0; 2327 2328 return 0; 2329 } 2330 2331 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 2332 { 2333 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 2334 2335 return idx; 2336 } 2337 2338 static int vhost_user_memslots_limit(struct vhost_dev *dev) 2339 { 2340 struct vhost_user *u = dev->opaque; 2341 2342 return u->user->memory_slots; 2343 } 2344 2345 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 2346 { 2347 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2348 2349 return virtio_has_feature(dev->protocol_features, 2350 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 2351 } 2352 2353 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 2354 { 2355 VhostUserMsg msg = { }; 2356 2357 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2358 2359 /* If guest supports GUEST_ANNOUNCE do nothing */ 2360 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 2361 return 0; 2362 } 2363 2364 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 2365 if (virtio_has_feature(dev->protocol_features, 2366 VHOST_USER_PROTOCOL_F_RARP)) { 2367 msg.hdr.request = VHOST_USER_SEND_RARP; 2368 msg.hdr.flags = VHOST_USER_VERSION; 2369 memcpy((char *)&msg.payload.u64, mac_addr, 6); 2370 msg.hdr.size = sizeof(msg.payload.u64); 2371 2372 return vhost_user_write(dev, &msg, NULL, 0); 2373 } 2374 return -ENOTSUP; 2375 } 2376 2377 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 2378 { 2379 VhostUserMsg msg; 2380 bool reply_supported = virtio_has_feature(dev->protocol_features, 2381 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2382 int ret; 2383 2384 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 2385 return 0; 2386 } 2387 2388 msg.hdr.request = VHOST_USER_NET_SET_MTU; 2389 msg.payload.u64 = mtu; 2390 msg.hdr.size = sizeof(msg.payload.u64); 2391 msg.hdr.flags = VHOST_USER_VERSION; 2392 if (reply_supported) { 2393 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2394 } 2395 2396 ret = vhost_user_write(dev, &msg, NULL, 0); 2397 if (ret < 0) { 2398 return ret; 2399 } 2400 2401 /* If reply_ack supported, backend has to ack specified MTU is valid */ 2402 if (reply_supported) { 2403 return process_message_reply(dev, &msg); 2404 } 2405 2406 return 0; 2407 } 2408 2409 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 2410 struct vhost_iotlb_msg *imsg) 2411 { 2412 int ret; 2413 VhostUserMsg msg = { 2414 .hdr.request = VHOST_USER_IOTLB_MSG, 2415 .hdr.size = sizeof(msg.payload.iotlb), 2416 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2417 .payload.iotlb = *imsg, 2418 }; 2419 2420 ret = vhost_user_write(dev, &msg, NULL, 0); 2421 if (ret < 0) { 2422 return ret; 2423 } 2424 2425 return process_message_reply(dev, &msg); 2426 } 2427 2428 2429 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 2430 { 2431 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 2432 } 2433 2434 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 2435 uint32_t config_len, Error **errp) 2436 { 2437 int ret; 2438 VhostUserMsg msg = { 2439 .hdr.request = VHOST_USER_GET_CONFIG, 2440 .hdr.flags = VHOST_USER_VERSION, 2441 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 2442 }; 2443 2444 if (!virtio_has_feature(dev->protocol_features, 2445 VHOST_USER_PROTOCOL_F_CONFIG)) { 2446 error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported"); 2447 return -EINVAL; 2448 } 2449 2450 assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE); 2451 2452 msg.payload.config.offset = 0; 2453 msg.payload.config.size = config_len; 2454 ret = vhost_user_write(dev, &msg, NULL, 0); 2455 if (ret < 0) { 2456 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2457 return ret; 2458 } 2459 2460 ret = vhost_user_read(dev, &msg); 2461 if (ret < 0) { 2462 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2463 return ret; 2464 } 2465 2466 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 2467 error_setg(errp, 2468 "Received unexpected msg type. Expected %d received %d", 2469 VHOST_USER_GET_CONFIG, msg.hdr.request); 2470 return -EPROTO; 2471 } 2472 2473 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 2474 error_setg(errp, "Received bad msg size."); 2475 return -EPROTO; 2476 } 2477 2478 memcpy(config, msg.payload.config.region, config_len); 2479 2480 return 0; 2481 } 2482 2483 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 2484 uint32_t offset, uint32_t size, uint32_t flags) 2485 { 2486 int ret; 2487 uint8_t *p; 2488 bool reply_supported = virtio_has_feature(dev->protocol_features, 2489 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2490 2491 VhostUserMsg msg = { 2492 .hdr.request = VHOST_USER_SET_CONFIG, 2493 .hdr.flags = VHOST_USER_VERSION, 2494 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 2495 }; 2496 2497 if (!virtio_has_feature(dev->protocol_features, 2498 VHOST_USER_PROTOCOL_F_CONFIG)) { 2499 return -ENOTSUP; 2500 } 2501 2502 if (reply_supported) { 2503 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2504 } 2505 2506 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 2507 return -EINVAL; 2508 } 2509 2510 msg.payload.config.offset = offset, 2511 msg.payload.config.size = size, 2512 msg.payload.config.flags = flags, 2513 p = msg.payload.config.region; 2514 memcpy(p, data, size); 2515 2516 ret = vhost_user_write(dev, &msg, NULL, 0); 2517 if (ret < 0) { 2518 return ret; 2519 } 2520 2521 if (reply_supported) { 2522 return process_message_reply(dev, &msg); 2523 } 2524 2525 return 0; 2526 } 2527 2528 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 2529 void *session_info, 2530 uint64_t *session_id) 2531 { 2532 int ret; 2533 bool crypto_session = virtio_has_feature(dev->protocol_features, 2534 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2535 CryptoDevBackendSessionInfo *backend_info = session_info; 2536 VhostUserMsg msg = { 2537 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 2538 .hdr.flags = VHOST_USER_VERSION, 2539 .hdr.size = sizeof(msg.payload.session), 2540 }; 2541 2542 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2543 2544 if (!crypto_session) { 2545 error_report("vhost-user trying to send unhandled ioctl"); 2546 return -ENOTSUP; 2547 } 2548 2549 if (backend_info->op_code == VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION) { 2550 CryptoDevBackendAsymSessionInfo *sess = &backend_info->u.asym_sess_info; 2551 size_t keylen; 2552 2553 memcpy(&msg.payload.session.u.asym.session_setup_data, sess, 2554 sizeof(CryptoDevBackendAsymSessionInfo)); 2555 if (sess->keylen) { 2556 keylen = sizeof(msg.payload.session.u.asym.key); 2557 if (sess->keylen > keylen) { 2558 error_report("Unsupported asymmetric key size"); 2559 return -ENOTSUP; 2560 } 2561 2562 memcpy(&msg.payload.session.u.asym.key, sess->key, 2563 sess->keylen); 2564 } 2565 } else { 2566 CryptoDevBackendSymSessionInfo *sess = &backend_info->u.sym_sess_info; 2567 size_t keylen; 2568 2569 memcpy(&msg.payload.session.u.sym.session_setup_data, sess, 2570 sizeof(CryptoDevBackendSymSessionInfo)); 2571 if (sess->key_len) { 2572 keylen = sizeof(msg.payload.session.u.sym.key); 2573 if (sess->key_len > keylen) { 2574 error_report("Unsupported cipher key size"); 2575 return -ENOTSUP; 2576 } 2577 2578 memcpy(&msg.payload.session.u.sym.key, sess->cipher_key, 2579 sess->key_len); 2580 } 2581 2582 if (sess->auth_key_len > 0) { 2583 keylen = sizeof(msg.payload.session.u.sym.auth_key); 2584 if (sess->auth_key_len > keylen) { 2585 error_report("Unsupported auth key size"); 2586 return -ENOTSUP; 2587 } 2588 2589 memcpy(&msg.payload.session.u.sym.auth_key, sess->auth_key, 2590 sess->auth_key_len); 2591 } 2592 } 2593 2594 msg.payload.session.op_code = backend_info->op_code; 2595 msg.payload.session.session_id = backend_info->session_id; 2596 ret = vhost_user_write(dev, &msg, NULL, 0); 2597 if (ret < 0) { 2598 error_report("vhost_user_write() return %d, create session failed", 2599 ret); 2600 return ret; 2601 } 2602 2603 ret = vhost_user_read(dev, &msg); 2604 if (ret < 0) { 2605 error_report("vhost_user_read() return %d, create session failed", 2606 ret); 2607 return ret; 2608 } 2609 2610 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 2611 error_report("Received unexpected msg type. Expected %d received %d", 2612 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 2613 return -EPROTO; 2614 } 2615 2616 if (msg.hdr.size != sizeof(msg.payload.session)) { 2617 error_report("Received bad msg size."); 2618 return -EPROTO; 2619 } 2620 2621 if (msg.payload.session.session_id < 0) { 2622 error_report("Bad session id: %" PRId64 "", 2623 msg.payload.session.session_id); 2624 return -EINVAL; 2625 } 2626 *session_id = msg.payload.session.session_id; 2627 2628 return 0; 2629 } 2630 2631 static int 2632 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 2633 { 2634 int ret; 2635 bool crypto_session = virtio_has_feature(dev->protocol_features, 2636 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2637 VhostUserMsg msg = { 2638 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 2639 .hdr.flags = VHOST_USER_VERSION, 2640 .hdr.size = sizeof(msg.payload.u64), 2641 }; 2642 msg.payload.u64 = session_id; 2643 2644 if (!crypto_session) { 2645 error_report("vhost-user trying to send unhandled ioctl"); 2646 return -ENOTSUP; 2647 } 2648 2649 ret = vhost_user_write(dev, &msg, NULL, 0); 2650 if (ret < 0) { 2651 error_report("vhost_user_write() return %d, close session failed", 2652 ret); 2653 return ret; 2654 } 2655 2656 return 0; 2657 } 2658 2659 static bool vhost_user_no_private_memslots(struct vhost_dev *dev) 2660 { 2661 return true; 2662 } 2663 2664 static int vhost_user_get_inflight_fd(struct vhost_dev *dev, 2665 uint16_t queue_size, 2666 struct vhost_inflight *inflight) 2667 { 2668 void *addr; 2669 int fd; 2670 int ret; 2671 struct vhost_user *u = dev->opaque; 2672 CharBackend *chr = u->user->chr; 2673 VhostUserMsg msg = { 2674 .hdr.request = VHOST_USER_GET_INFLIGHT_FD, 2675 .hdr.flags = VHOST_USER_VERSION, 2676 .payload.inflight.num_queues = dev->nvqs, 2677 .payload.inflight.queue_size = queue_size, 2678 .hdr.size = sizeof(msg.payload.inflight), 2679 }; 2680 2681 if (!virtio_has_feature(dev->protocol_features, 2682 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2683 return 0; 2684 } 2685 2686 ret = vhost_user_write(dev, &msg, NULL, 0); 2687 if (ret < 0) { 2688 return ret; 2689 } 2690 2691 ret = vhost_user_read(dev, &msg); 2692 if (ret < 0) { 2693 return ret; 2694 } 2695 2696 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) { 2697 error_report("Received unexpected msg type. " 2698 "Expected %d received %d", 2699 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request); 2700 return -EPROTO; 2701 } 2702 2703 if (msg.hdr.size != sizeof(msg.payload.inflight)) { 2704 error_report("Received bad msg size."); 2705 return -EPROTO; 2706 } 2707 2708 if (!msg.payload.inflight.mmap_size) { 2709 return 0; 2710 } 2711 2712 fd = qemu_chr_fe_get_msgfd(chr); 2713 if (fd < 0) { 2714 error_report("Failed to get mem fd"); 2715 return -EIO; 2716 } 2717 2718 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE, 2719 MAP_SHARED, fd, msg.payload.inflight.mmap_offset); 2720 2721 if (addr == MAP_FAILED) { 2722 error_report("Failed to mmap mem fd"); 2723 close(fd); 2724 return -EFAULT; 2725 } 2726 2727 inflight->addr = addr; 2728 inflight->fd = fd; 2729 inflight->size = msg.payload.inflight.mmap_size; 2730 inflight->offset = msg.payload.inflight.mmap_offset; 2731 inflight->queue_size = queue_size; 2732 2733 return 0; 2734 } 2735 2736 static int vhost_user_set_inflight_fd(struct vhost_dev *dev, 2737 struct vhost_inflight *inflight) 2738 { 2739 VhostUserMsg msg = { 2740 .hdr.request = VHOST_USER_SET_INFLIGHT_FD, 2741 .hdr.flags = VHOST_USER_VERSION, 2742 .payload.inflight.mmap_size = inflight->size, 2743 .payload.inflight.mmap_offset = inflight->offset, 2744 .payload.inflight.num_queues = dev->nvqs, 2745 .payload.inflight.queue_size = inflight->queue_size, 2746 .hdr.size = sizeof(msg.payload.inflight), 2747 }; 2748 2749 if (!virtio_has_feature(dev->protocol_features, 2750 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2751 return 0; 2752 } 2753 2754 return vhost_user_write(dev, &msg, &inflight->fd, 1); 2755 } 2756 2757 static void vhost_user_state_destroy(gpointer data) 2758 { 2759 VhostUserHostNotifier *n = (VhostUserHostNotifier *) data; 2760 vhost_user_host_notifier_remove(n, NULL, true); 2761 } 2762 2763 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) 2764 { 2765 if (user->chr) { 2766 error_setg(errp, "Cannot initialize vhost-user state"); 2767 return false; 2768 } 2769 user->chr = chr; 2770 user->memory_slots = 0; 2771 user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4, 2772 &vhost_user_state_destroy); 2773 return true; 2774 } 2775 2776 void vhost_user_cleanup(VhostUserState *user) 2777 { 2778 if (!user->chr) { 2779 return; 2780 } 2781 user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true); 2782 user->chr = NULL; 2783 } 2784 2785 2786 typedef struct { 2787 vu_async_close_fn cb; 2788 DeviceState *dev; 2789 CharBackend *cd; 2790 struct vhost_dev *vhost; 2791 } VhostAsyncCallback; 2792 2793 static void vhost_user_async_close_bh(void *opaque) 2794 { 2795 VhostAsyncCallback *data = opaque; 2796 2797 data->cb(data->dev); 2798 2799 g_free(data); 2800 } 2801 2802 /* 2803 * We only schedule the work if the machine is running. If suspended 2804 * we want to keep all the in-flight data as is for migration 2805 * purposes. 2806 */ 2807 void vhost_user_async_close(DeviceState *d, 2808 CharBackend *chardev, struct vhost_dev *vhost, 2809 vu_async_close_fn cb) 2810 { 2811 if (!runstate_check(RUN_STATE_SHUTDOWN)) { 2812 /* 2813 * A close event may happen during a read/write, but vhost 2814 * code assumes the vhost_dev remains setup, so delay the 2815 * stop & clear. 2816 */ 2817 AioContext *ctx = qemu_get_current_aio_context(); 2818 VhostAsyncCallback *data = g_new0(VhostAsyncCallback, 1); 2819 2820 /* Save data for the callback */ 2821 data->cb = cb; 2822 data->dev = d; 2823 data->cd = chardev; 2824 data->vhost = vhost; 2825 2826 /* Disable any further notifications on the chardev */ 2827 qemu_chr_fe_set_handlers(chardev, 2828 NULL, NULL, NULL, NULL, NULL, NULL, 2829 false); 2830 2831 aio_bh_schedule_oneshot(ctx, vhost_user_async_close_bh, data); 2832 2833 /* 2834 * Move vhost device to the stopped state. The vhost-user device 2835 * will be clean up and disconnected in BH. This can be useful in 2836 * the vhost migration code. If disconnect was caught there is an 2837 * option for the general vhost code to get the dev state without 2838 * knowing its type (in this case vhost-user). 2839 * 2840 * Note if the vhost device is fully cleared by the time we 2841 * execute the bottom half we won't continue with the cleanup. 2842 */ 2843 vhost->started = false; 2844 } 2845 } 2846 2847 static int vhost_user_dev_start(struct vhost_dev *dev, bool started) 2848 { 2849 if (!virtio_has_feature(dev->protocol_features, 2850 VHOST_USER_PROTOCOL_F_STATUS)) { 2851 return 0; 2852 } 2853 2854 /* Set device status only for last queue pair */ 2855 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 2856 return 0; 2857 } 2858 2859 if (started) { 2860 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 2861 VIRTIO_CONFIG_S_DRIVER | 2862 VIRTIO_CONFIG_S_DRIVER_OK); 2863 } else { 2864 return 0; 2865 } 2866 } 2867 2868 static void vhost_user_reset_status(struct vhost_dev *dev) 2869 { 2870 /* Set device status only for last queue pair */ 2871 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 2872 return; 2873 } 2874 2875 if (virtio_has_feature(dev->protocol_features, 2876 VHOST_USER_PROTOCOL_F_STATUS)) { 2877 vhost_user_set_status(dev, 0); 2878 } 2879 } 2880 2881 static bool vhost_user_supports_device_state(struct vhost_dev *dev) 2882 { 2883 return virtio_has_feature(dev->protocol_features, 2884 VHOST_USER_PROTOCOL_F_DEVICE_STATE); 2885 } 2886 2887 static int vhost_user_set_device_state_fd(struct vhost_dev *dev, 2888 VhostDeviceStateDirection direction, 2889 VhostDeviceStatePhase phase, 2890 int fd, 2891 int *reply_fd, 2892 Error **errp) 2893 { 2894 int ret; 2895 struct vhost_user *vu = dev->opaque; 2896 VhostUserMsg msg = { 2897 .hdr = { 2898 .request = VHOST_USER_SET_DEVICE_STATE_FD, 2899 .flags = VHOST_USER_VERSION, 2900 .size = sizeof(msg.payload.transfer_state), 2901 }, 2902 .payload.transfer_state = { 2903 .direction = direction, 2904 .phase = phase, 2905 }, 2906 }; 2907 2908 *reply_fd = -1; 2909 2910 if (!vhost_user_supports_device_state(dev)) { 2911 close(fd); 2912 error_setg(errp, "Back-end does not support migration state transfer"); 2913 return -ENOTSUP; 2914 } 2915 2916 ret = vhost_user_write(dev, &msg, &fd, 1); 2917 close(fd); 2918 if (ret < 0) { 2919 error_setg_errno(errp, -ret, 2920 "Failed to send SET_DEVICE_STATE_FD message"); 2921 return ret; 2922 } 2923 2924 ret = vhost_user_read(dev, &msg); 2925 if (ret < 0) { 2926 error_setg_errno(errp, -ret, 2927 "Failed to receive SET_DEVICE_STATE_FD reply"); 2928 return ret; 2929 } 2930 2931 if (msg.hdr.request != VHOST_USER_SET_DEVICE_STATE_FD) { 2932 error_setg(errp, 2933 "Received unexpected message type, expected %d, received %d", 2934 VHOST_USER_SET_DEVICE_STATE_FD, msg.hdr.request); 2935 return -EPROTO; 2936 } 2937 2938 if (msg.hdr.size != sizeof(msg.payload.u64)) { 2939 error_setg(errp, 2940 "Received bad message size, expected %zu, received %" PRIu32, 2941 sizeof(msg.payload.u64), msg.hdr.size); 2942 return -EPROTO; 2943 } 2944 2945 if ((msg.payload.u64 & 0xff) != 0) { 2946 error_setg(errp, "Back-end did not accept migration state transfer"); 2947 return -EIO; 2948 } 2949 2950 if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK)) { 2951 *reply_fd = qemu_chr_fe_get_msgfd(vu->user->chr); 2952 if (*reply_fd < 0) { 2953 error_setg(errp, 2954 "Failed to get back-end-provided transfer pipe FD"); 2955 *reply_fd = -1; 2956 return -EIO; 2957 } 2958 } 2959 2960 return 0; 2961 } 2962 2963 static int vhost_user_check_device_state(struct vhost_dev *dev, Error **errp) 2964 { 2965 int ret; 2966 VhostUserMsg msg = { 2967 .hdr = { 2968 .request = VHOST_USER_CHECK_DEVICE_STATE, 2969 .flags = VHOST_USER_VERSION, 2970 .size = 0, 2971 }, 2972 }; 2973 2974 if (!vhost_user_supports_device_state(dev)) { 2975 error_setg(errp, "Back-end does not support migration state transfer"); 2976 return -ENOTSUP; 2977 } 2978 2979 ret = vhost_user_write(dev, &msg, NULL, 0); 2980 if (ret < 0) { 2981 error_setg_errno(errp, -ret, 2982 "Failed to send CHECK_DEVICE_STATE message"); 2983 return ret; 2984 } 2985 2986 ret = vhost_user_read(dev, &msg); 2987 if (ret < 0) { 2988 error_setg_errno(errp, -ret, 2989 "Failed to receive CHECK_DEVICE_STATE reply"); 2990 return ret; 2991 } 2992 2993 if (msg.hdr.request != VHOST_USER_CHECK_DEVICE_STATE) { 2994 error_setg(errp, 2995 "Received unexpected message type, expected %d, received %d", 2996 VHOST_USER_CHECK_DEVICE_STATE, msg.hdr.request); 2997 return -EPROTO; 2998 } 2999 3000 if (msg.hdr.size != sizeof(msg.payload.u64)) { 3001 error_setg(errp, 3002 "Received bad message size, expected %zu, received %" PRIu32, 3003 sizeof(msg.payload.u64), msg.hdr.size); 3004 return -EPROTO; 3005 } 3006 3007 if (msg.payload.u64 != 0) { 3008 error_setg(errp, "Back-end failed to process its internal state"); 3009 return -EIO; 3010 } 3011 3012 return 0; 3013 } 3014 3015 const VhostOps user_ops = { 3016 .backend_type = VHOST_BACKEND_TYPE_USER, 3017 .vhost_backend_init = vhost_user_backend_init, 3018 .vhost_backend_cleanup = vhost_user_backend_cleanup, 3019 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 3020 .vhost_backend_no_private_memslots = vhost_user_no_private_memslots, 3021 .vhost_set_log_base = vhost_user_set_log_base, 3022 .vhost_set_mem_table = vhost_user_set_mem_table, 3023 .vhost_set_vring_addr = vhost_user_set_vring_addr, 3024 .vhost_set_vring_endian = vhost_user_set_vring_endian, 3025 .vhost_set_vring_num = vhost_user_set_vring_num, 3026 .vhost_set_vring_base = vhost_user_set_vring_base, 3027 .vhost_get_vring_base = vhost_user_get_vring_base, 3028 .vhost_set_vring_kick = vhost_user_set_vring_kick, 3029 .vhost_set_vring_call = vhost_user_set_vring_call, 3030 .vhost_set_vring_err = vhost_user_set_vring_err, 3031 .vhost_set_features = vhost_user_set_features, 3032 .vhost_get_features = vhost_user_get_features, 3033 .vhost_set_owner = vhost_user_set_owner, 3034 .vhost_reset_device = vhost_user_reset_device, 3035 .vhost_get_vq_index = vhost_user_get_vq_index, 3036 .vhost_set_vring_enable = vhost_user_set_vring_enable, 3037 .vhost_requires_shm_log = vhost_user_requires_shm_log, 3038 .vhost_migration_done = vhost_user_migration_done, 3039 .vhost_net_set_mtu = vhost_user_net_set_mtu, 3040 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 3041 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 3042 .vhost_get_config = vhost_user_get_config, 3043 .vhost_set_config = vhost_user_set_config, 3044 .vhost_crypto_create_session = vhost_user_crypto_create_session, 3045 .vhost_crypto_close_session = vhost_user_crypto_close_session, 3046 .vhost_get_inflight_fd = vhost_user_get_inflight_fd, 3047 .vhost_set_inflight_fd = vhost_user_set_inflight_fd, 3048 .vhost_dev_start = vhost_user_dev_start, 3049 .vhost_reset_status = vhost_user_reset_status, 3050 .vhost_supports_device_state = vhost_user_supports_device_state, 3051 .vhost_set_device_state_fd = vhost_user_set_device_state_fd, 3052 .vhost_check_device_state = vhost_user_check_device_state, 3053 }; 3054