1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/vhost.h" 14 #include "hw/virtio/virtio-crypto.h" 15 #include "hw/virtio/vhost-user.h" 16 #include "hw/virtio/vhost-backend.h" 17 #include "hw/virtio/virtio.h" 18 #include "hw/virtio/virtio-net.h" 19 #include "chardev/char-fe.h" 20 #include "io/channel-socket.h" 21 #include "sysemu/kvm.h" 22 #include "qemu/error-report.h" 23 #include "qemu/main-loop.h" 24 #include "qemu/sockets.h" 25 #include "sysemu/runstate.h" 26 #include "sysemu/cryptodev.h" 27 #include "migration/migration.h" 28 #include "migration/postcopy-ram.h" 29 #include "trace.h" 30 #include "exec/ramblock.h" 31 32 #include <sys/ioctl.h> 33 #include <sys/socket.h> 34 #include <sys/un.h> 35 36 #include "standard-headers/linux/vhost_types.h" 37 38 #ifdef CONFIG_LINUX 39 #include <linux/userfaultfd.h> 40 #endif 41 42 #define VHOST_MEMORY_BASELINE_NREGIONS 8 43 #define VHOST_USER_F_PROTOCOL_FEATURES 30 44 #define VHOST_USER_BACKEND_MAX_FDS 8 45 46 #if defined(TARGET_PPC) || defined(TARGET_PPC64) 47 #include "hw/ppc/spapr.h" 48 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS 49 50 #else 51 #define VHOST_USER_MAX_RAM_SLOTS 512 52 #endif 53 54 /* 55 * Maximum size of virtio device config space 56 */ 57 #define VHOST_USER_MAX_CONFIG_SIZE 256 58 59 enum VhostUserProtocolFeature { 60 VHOST_USER_PROTOCOL_F_MQ = 0, 61 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 62 VHOST_USER_PROTOCOL_F_RARP = 2, 63 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, 64 VHOST_USER_PROTOCOL_F_NET_MTU = 4, 65 VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5, 66 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, 67 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, 68 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, 69 VHOST_USER_PROTOCOL_F_CONFIG = 9, 70 VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10, 71 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, 72 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, 73 VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, 74 /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */ 75 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, 76 VHOST_USER_PROTOCOL_F_STATUS = 16, 77 VHOST_USER_PROTOCOL_F_MAX 78 }; 79 80 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 81 82 typedef enum VhostUserRequest { 83 VHOST_USER_NONE = 0, 84 VHOST_USER_GET_FEATURES = 1, 85 VHOST_USER_SET_FEATURES = 2, 86 VHOST_USER_SET_OWNER = 3, 87 VHOST_USER_RESET_OWNER = 4, 88 VHOST_USER_SET_MEM_TABLE = 5, 89 VHOST_USER_SET_LOG_BASE = 6, 90 VHOST_USER_SET_LOG_FD = 7, 91 VHOST_USER_SET_VRING_NUM = 8, 92 VHOST_USER_SET_VRING_ADDR = 9, 93 VHOST_USER_SET_VRING_BASE = 10, 94 VHOST_USER_GET_VRING_BASE = 11, 95 VHOST_USER_SET_VRING_KICK = 12, 96 VHOST_USER_SET_VRING_CALL = 13, 97 VHOST_USER_SET_VRING_ERR = 14, 98 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 99 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 100 VHOST_USER_GET_QUEUE_NUM = 17, 101 VHOST_USER_SET_VRING_ENABLE = 18, 102 VHOST_USER_SEND_RARP = 19, 103 VHOST_USER_NET_SET_MTU = 20, 104 VHOST_USER_SET_BACKEND_REQ_FD = 21, 105 VHOST_USER_IOTLB_MSG = 22, 106 VHOST_USER_SET_VRING_ENDIAN = 23, 107 VHOST_USER_GET_CONFIG = 24, 108 VHOST_USER_SET_CONFIG = 25, 109 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 110 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 111 VHOST_USER_POSTCOPY_ADVISE = 28, 112 VHOST_USER_POSTCOPY_LISTEN = 29, 113 VHOST_USER_POSTCOPY_END = 30, 114 VHOST_USER_GET_INFLIGHT_FD = 31, 115 VHOST_USER_SET_INFLIGHT_FD = 32, 116 VHOST_USER_GPU_SET_SOCKET = 33, 117 VHOST_USER_RESET_DEVICE = 34, 118 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */ 119 VHOST_USER_GET_MAX_MEM_SLOTS = 36, 120 VHOST_USER_ADD_MEM_REG = 37, 121 VHOST_USER_REM_MEM_REG = 38, 122 VHOST_USER_SET_STATUS = 39, 123 VHOST_USER_GET_STATUS = 40, 124 VHOST_USER_MAX 125 } VhostUserRequest; 126 127 typedef enum VhostUserBackendRequest { 128 VHOST_USER_BACKEND_NONE = 0, 129 VHOST_USER_BACKEND_IOTLB_MSG = 1, 130 VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2, 131 VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3, 132 VHOST_USER_BACKEND_MAX 133 } VhostUserBackendRequest; 134 135 typedef struct VhostUserMemoryRegion { 136 uint64_t guest_phys_addr; 137 uint64_t memory_size; 138 uint64_t userspace_addr; 139 uint64_t mmap_offset; 140 } VhostUserMemoryRegion; 141 142 typedef struct VhostUserMemory { 143 uint32_t nregions; 144 uint32_t padding; 145 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; 146 } VhostUserMemory; 147 148 typedef struct VhostUserMemRegMsg { 149 uint64_t padding; 150 VhostUserMemoryRegion region; 151 } VhostUserMemRegMsg; 152 153 typedef struct VhostUserLog { 154 uint64_t mmap_size; 155 uint64_t mmap_offset; 156 } VhostUserLog; 157 158 typedef struct VhostUserConfig { 159 uint32_t offset; 160 uint32_t size; 161 uint32_t flags; 162 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 163 } VhostUserConfig; 164 165 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 166 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 167 #define VHOST_CRYPTO_ASYM_MAX_KEY_LEN 1024 168 169 typedef struct VhostUserCryptoSession { 170 uint64_t op_code; 171 union { 172 struct { 173 CryptoDevBackendSymSessionInfo session_setup_data; 174 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 175 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 176 } sym; 177 struct { 178 CryptoDevBackendAsymSessionInfo session_setup_data; 179 uint8_t key[VHOST_CRYPTO_ASYM_MAX_KEY_LEN]; 180 } asym; 181 } u; 182 183 /* session id for success, -1 on errors */ 184 int64_t session_id; 185 } VhostUserCryptoSession; 186 187 static VhostUserConfig c __attribute__ ((unused)); 188 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 189 + sizeof(c.size) \ 190 + sizeof(c.flags)) 191 192 typedef struct VhostUserVringArea { 193 uint64_t u64; 194 uint64_t size; 195 uint64_t offset; 196 } VhostUserVringArea; 197 198 typedef struct VhostUserInflight { 199 uint64_t mmap_size; 200 uint64_t mmap_offset; 201 uint16_t num_queues; 202 uint16_t queue_size; 203 } VhostUserInflight; 204 205 typedef struct { 206 VhostUserRequest request; 207 208 #define VHOST_USER_VERSION_MASK (0x3) 209 #define VHOST_USER_REPLY_MASK (0x1 << 2) 210 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 211 uint32_t flags; 212 uint32_t size; /* the following payload size */ 213 } QEMU_PACKED VhostUserHeader; 214 215 typedef union { 216 #define VHOST_USER_VRING_IDX_MASK (0xff) 217 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) 218 uint64_t u64; 219 struct vhost_vring_state state; 220 struct vhost_vring_addr addr; 221 VhostUserMemory memory; 222 VhostUserMemRegMsg mem_reg; 223 VhostUserLog log; 224 struct vhost_iotlb_msg iotlb; 225 VhostUserConfig config; 226 VhostUserCryptoSession session; 227 VhostUserVringArea area; 228 VhostUserInflight inflight; 229 } VhostUserPayload; 230 231 typedef struct VhostUserMsg { 232 VhostUserHeader hdr; 233 VhostUserPayload payload; 234 } QEMU_PACKED VhostUserMsg; 235 236 static VhostUserMsg m __attribute__ ((unused)); 237 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 238 239 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 240 241 /* The version of the protocol we support */ 242 #define VHOST_USER_VERSION (0x1) 243 244 struct vhost_user { 245 struct vhost_dev *dev; 246 /* Shared between vhost devs of the same virtio device */ 247 VhostUserState *user; 248 QIOChannel *backend_ioc; 249 GSource *backend_src; 250 NotifierWithReturn postcopy_notifier; 251 struct PostCopyFD postcopy_fd; 252 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; 253 /* Length of the region_rb and region_rb_offset arrays */ 254 size_t region_rb_len; 255 /* RAMBlock associated with a given region */ 256 RAMBlock **region_rb; 257 /* 258 * The offset from the start of the RAMBlock to the start of the 259 * vhost region. 260 */ 261 ram_addr_t *region_rb_offset; 262 263 /* True once we've entered postcopy_listen */ 264 bool postcopy_listen; 265 266 /* Our current regions */ 267 int num_shadow_regions; 268 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS]; 269 }; 270 271 struct scrub_regions { 272 struct vhost_memory_region *region; 273 int reg_idx; 274 int fd_idx; 275 }; 276 277 static bool ioeventfd_enabled(void) 278 { 279 return !kvm_enabled() || kvm_eventfds_enabled(); 280 } 281 282 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) 283 { 284 struct vhost_user *u = dev->opaque; 285 CharBackend *chr = u->user->chr; 286 uint8_t *p = (uint8_t *) msg; 287 int r, size = VHOST_USER_HDR_SIZE; 288 289 r = qemu_chr_fe_read_all(chr, p, size); 290 if (r != size) { 291 int saved_errno = errno; 292 error_report("Failed to read msg header. Read %d instead of %d." 293 " Original request %d.", r, size, msg->hdr.request); 294 return r < 0 ? -saved_errno : -EIO; 295 } 296 297 /* validate received flags */ 298 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 299 error_report("Failed to read msg header." 300 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 301 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 302 return -EPROTO; 303 } 304 305 trace_vhost_user_read(msg->hdr.request, msg->hdr.flags); 306 307 return 0; 308 } 309 310 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 311 { 312 struct vhost_user *u = dev->opaque; 313 CharBackend *chr = u->user->chr; 314 uint8_t *p = (uint8_t *) msg; 315 int r, size; 316 317 r = vhost_user_read_header(dev, msg); 318 if (r < 0) { 319 return r; 320 } 321 322 /* validate message size is sane */ 323 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 324 error_report("Failed to read msg header." 325 " Size %d exceeds the maximum %zu.", msg->hdr.size, 326 VHOST_USER_PAYLOAD_SIZE); 327 return -EPROTO; 328 } 329 330 if (msg->hdr.size) { 331 p += VHOST_USER_HDR_SIZE; 332 size = msg->hdr.size; 333 r = qemu_chr_fe_read_all(chr, p, size); 334 if (r != size) { 335 int saved_errno = errno; 336 error_report("Failed to read msg payload." 337 " Read %d instead of %d.", r, msg->hdr.size); 338 return r < 0 ? -saved_errno : -EIO; 339 } 340 } 341 342 return 0; 343 } 344 345 static int process_message_reply(struct vhost_dev *dev, 346 const VhostUserMsg *msg) 347 { 348 int ret; 349 VhostUserMsg msg_reply; 350 351 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 352 return 0; 353 } 354 355 ret = vhost_user_read(dev, &msg_reply); 356 if (ret < 0) { 357 return ret; 358 } 359 360 if (msg_reply.hdr.request != msg->hdr.request) { 361 error_report("Received unexpected msg type. " 362 "Expected %d received %d", 363 msg->hdr.request, msg_reply.hdr.request); 364 return -EPROTO; 365 } 366 367 return msg_reply.payload.u64 ? -EIO : 0; 368 } 369 370 static bool vhost_user_per_device_request(VhostUserRequest request) 371 { 372 switch (request) { 373 case VHOST_USER_SET_OWNER: 374 case VHOST_USER_RESET_OWNER: 375 case VHOST_USER_SET_MEM_TABLE: 376 case VHOST_USER_GET_QUEUE_NUM: 377 case VHOST_USER_NET_SET_MTU: 378 case VHOST_USER_RESET_DEVICE: 379 case VHOST_USER_ADD_MEM_REG: 380 case VHOST_USER_REM_MEM_REG: 381 return true; 382 default: 383 return false; 384 } 385 } 386 387 /* most non-init callers ignore the error */ 388 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 389 int *fds, int fd_num) 390 { 391 struct vhost_user *u = dev->opaque; 392 CharBackend *chr = u->user->chr; 393 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 394 395 /* 396 * Some devices, like virtio-scsi, are implemented as a single vhost_dev, 397 * while others, like virtio-net, contain multiple vhost_devs. For 398 * operations such as configuring device memory mappings or issuing device 399 * resets, which affect the whole device instead of individual VQs, 400 * vhost-user messages should only be sent once. 401 * 402 * Devices with multiple vhost_devs are given an associated dev->vq_index 403 * so per_device requests are only sent if vq_index is 0. 404 */ 405 if (vhost_user_per_device_request(msg->hdr.request) 406 && dev->vq_index != 0) { 407 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 408 return 0; 409 } 410 411 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 412 error_report("Failed to set msg fds."); 413 return -EINVAL; 414 } 415 416 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 417 if (ret != size) { 418 int saved_errno = errno; 419 error_report("Failed to write msg." 420 " Wrote %d instead of %d.", ret, size); 421 return ret < 0 ? -saved_errno : -EIO; 422 } 423 424 trace_vhost_user_write(msg->hdr.request, msg->hdr.flags); 425 426 return 0; 427 } 428 429 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd) 430 { 431 VhostUserMsg msg = { 432 .hdr.request = VHOST_USER_GPU_SET_SOCKET, 433 .hdr.flags = VHOST_USER_VERSION, 434 }; 435 436 return vhost_user_write(dev, &msg, &fd, 1); 437 } 438 439 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 440 struct vhost_log *log) 441 { 442 int fds[VHOST_USER_MAX_RAM_SLOTS]; 443 size_t fd_num = 0; 444 bool shmfd = virtio_has_feature(dev->protocol_features, 445 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 446 int ret; 447 VhostUserMsg msg = { 448 .hdr.request = VHOST_USER_SET_LOG_BASE, 449 .hdr.flags = VHOST_USER_VERSION, 450 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 451 .payload.log.mmap_offset = 0, 452 .hdr.size = sizeof(msg.payload.log), 453 }; 454 455 /* Send only once with first queue pair */ 456 if (dev->vq_index != 0) { 457 return 0; 458 } 459 460 if (shmfd && log->fd != -1) { 461 fds[fd_num++] = log->fd; 462 } 463 464 ret = vhost_user_write(dev, &msg, fds, fd_num); 465 if (ret < 0) { 466 return ret; 467 } 468 469 if (shmfd) { 470 msg.hdr.size = 0; 471 ret = vhost_user_read(dev, &msg); 472 if (ret < 0) { 473 return ret; 474 } 475 476 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 477 error_report("Received unexpected msg type. " 478 "Expected %d received %d", 479 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 480 return -EPROTO; 481 } 482 } 483 484 return 0; 485 } 486 487 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset, 488 int *fd) 489 { 490 MemoryRegion *mr; 491 492 assert((uintptr_t)addr == addr); 493 mr = memory_region_from_host((void *)(uintptr_t)addr, offset); 494 *fd = memory_region_get_fd(mr); 495 *offset += mr->ram_block->fd_offset; 496 497 return mr; 498 } 499 500 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst, 501 struct vhost_memory_region *src, 502 uint64_t mmap_offset) 503 { 504 assert(src != NULL && dst != NULL); 505 dst->userspace_addr = src->userspace_addr; 506 dst->memory_size = src->memory_size; 507 dst->guest_phys_addr = src->guest_phys_addr; 508 dst->mmap_offset = mmap_offset; 509 } 510 511 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, 512 struct vhost_dev *dev, 513 VhostUserMsg *msg, 514 int *fds, size_t *fd_num, 515 bool track_ramblocks) 516 { 517 int i, fd; 518 ram_addr_t offset; 519 MemoryRegion *mr; 520 struct vhost_memory_region *reg; 521 VhostUserMemoryRegion region_buffer; 522 523 msg->hdr.request = VHOST_USER_SET_MEM_TABLE; 524 525 for (i = 0; i < dev->mem->nregions; ++i) { 526 reg = dev->mem->regions + i; 527 528 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 529 if (fd > 0) { 530 if (track_ramblocks) { 531 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS); 532 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name, 533 reg->memory_size, 534 reg->guest_phys_addr, 535 reg->userspace_addr, 536 offset); 537 u->region_rb_offset[i] = offset; 538 u->region_rb[i] = mr->ram_block; 539 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) { 540 error_report("Failed preparing vhost-user memory table msg"); 541 return -ENOBUFS; 542 } 543 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 544 msg->payload.memory.regions[*fd_num] = region_buffer; 545 fds[(*fd_num)++] = fd; 546 } else if (track_ramblocks) { 547 u->region_rb_offset[i] = 0; 548 u->region_rb[i] = NULL; 549 } 550 } 551 552 msg->payload.memory.nregions = *fd_num; 553 554 if (!*fd_num) { 555 error_report("Failed initializing vhost-user memory map, " 556 "consider using -object memory-backend-file share=on"); 557 return -EINVAL; 558 } 559 560 msg->hdr.size = sizeof(msg->payload.memory.nregions); 561 msg->hdr.size += sizeof(msg->payload.memory.padding); 562 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion); 563 564 return 0; 565 } 566 567 static inline bool reg_equal(struct vhost_memory_region *shadow_reg, 568 struct vhost_memory_region *vdev_reg) 569 { 570 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr && 571 shadow_reg->userspace_addr == vdev_reg->userspace_addr && 572 shadow_reg->memory_size == vdev_reg->memory_size; 573 } 574 575 static void scrub_shadow_regions(struct vhost_dev *dev, 576 struct scrub_regions *add_reg, 577 int *nr_add_reg, 578 struct scrub_regions *rem_reg, 579 int *nr_rem_reg, uint64_t *shadow_pcb, 580 bool track_ramblocks) 581 { 582 struct vhost_user *u = dev->opaque; 583 bool found[VHOST_USER_MAX_RAM_SLOTS] = {}; 584 struct vhost_memory_region *reg, *shadow_reg; 585 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0; 586 ram_addr_t offset; 587 MemoryRegion *mr; 588 bool matching; 589 590 /* 591 * Find memory regions present in our shadow state which are not in 592 * the device's current memory state. 593 * 594 * Mark regions in both the shadow and device state as "found". 595 */ 596 for (i = 0; i < u->num_shadow_regions; i++) { 597 shadow_reg = &u->shadow_regions[i]; 598 matching = false; 599 600 for (j = 0; j < dev->mem->nregions; j++) { 601 reg = &dev->mem->regions[j]; 602 603 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 604 605 if (reg_equal(shadow_reg, reg)) { 606 matching = true; 607 found[j] = true; 608 if (track_ramblocks) { 609 /* 610 * Reset postcopy client bases, region_rb, and 611 * region_rb_offset in case regions are removed. 612 */ 613 if (fd > 0) { 614 u->region_rb_offset[j] = offset; 615 u->region_rb[j] = mr->ram_block; 616 shadow_pcb[j] = u->postcopy_client_bases[i]; 617 } else { 618 u->region_rb_offset[j] = 0; 619 u->region_rb[j] = NULL; 620 } 621 } 622 break; 623 } 624 } 625 626 /* 627 * If the region was not found in the current device memory state 628 * create an entry for it in the removed list. 629 */ 630 if (!matching) { 631 rem_reg[rm_idx].region = shadow_reg; 632 rem_reg[rm_idx++].reg_idx = i; 633 } 634 } 635 636 /* 637 * For regions not marked "found", create entries in the added list. 638 * 639 * Note their indexes in the device memory state and the indexes of their 640 * file descriptors. 641 */ 642 for (i = 0; i < dev->mem->nregions; i++) { 643 reg = &dev->mem->regions[i]; 644 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 645 if (fd > 0) { 646 ++fd_num; 647 } 648 649 /* 650 * If the region was in both the shadow and device state we don't 651 * need to send a VHOST_USER_ADD_MEM_REG message for it. 652 */ 653 if (found[i]) { 654 continue; 655 } 656 657 add_reg[add_idx].region = reg; 658 add_reg[add_idx].reg_idx = i; 659 add_reg[add_idx++].fd_idx = fd_num; 660 } 661 *nr_rem_reg = rm_idx; 662 *nr_add_reg = add_idx; 663 664 return; 665 } 666 667 static int send_remove_regions(struct vhost_dev *dev, 668 struct scrub_regions *remove_reg, 669 int nr_rem_reg, VhostUserMsg *msg, 670 bool reply_supported) 671 { 672 struct vhost_user *u = dev->opaque; 673 struct vhost_memory_region *shadow_reg; 674 int i, fd, shadow_reg_idx, ret; 675 ram_addr_t offset; 676 VhostUserMemoryRegion region_buffer; 677 678 /* 679 * The regions in remove_reg appear in the same order they do in the 680 * shadow table. Therefore we can minimize memory copies by iterating 681 * through remove_reg backwards. 682 */ 683 for (i = nr_rem_reg - 1; i >= 0; i--) { 684 shadow_reg = remove_reg[i].region; 685 shadow_reg_idx = remove_reg[i].reg_idx; 686 687 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd); 688 689 if (fd > 0) { 690 msg->hdr.request = VHOST_USER_REM_MEM_REG; 691 vhost_user_fill_msg_region(®ion_buffer, shadow_reg, 0); 692 msg->payload.mem_reg.region = region_buffer; 693 694 ret = vhost_user_write(dev, msg, NULL, 0); 695 if (ret < 0) { 696 return ret; 697 } 698 699 if (reply_supported) { 700 ret = process_message_reply(dev, msg); 701 if (ret) { 702 return ret; 703 } 704 } 705 } 706 707 /* 708 * At this point we know the backend has unmapped the region. It is now 709 * safe to remove it from the shadow table. 710 */ 711 memmove(&u->shadow_regions[shadow_reg_idx], 712 &u->shadow_regions[shadow_reg_idx + 1], 713 sizeof(struct vhost_memory_region) * 714 (u->num_shadow_regions - shadow_reg_idx - 1)); 715 u->num_shadow_regions--; 716 } 717 718 return 0; 719 } 720 721 static int send_add_regions(struct vhost_dev *dev, 722 struct scrub_regions *add_reg, int nr_add_reg, 723 VhostUserMsg *msg, uint64_t *shadow_pcb, 724 bool reply_supported, bool track_ramblocks) 725 { 726 struct vhost_user *u = dev->opaque; 727 int i, fd, ret, reg_idx, reg_fd_idx; 728 struct vhost_memory_region *reg; 729 MemoryRegion *mr; 730 ram_addr_t offset; 731 VhostUserMsg msg_reply; 732 VhostUserMemoryRegion region_buffer; 733 734 for (i = 0; i < nr_add_reg; i++) { 735 reg = add_reg[i].region; 736 reg_idx = add_reg[i].reg_idx; 737 reg_fd_idx = add_reg[i].fd_idx; 738 739 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 740 741 if (fd > 0) { 742 if (track_ramblocks) { 743 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name, 744 reg->memory_size, 745 reg->guest_phys_addr, 746 reg->userspace_addr, 747 offset); 748 u->region_rb_offset[reg_idx] = offset; 749 u->region_rb[reg_idx] = mr->ram_block; 750 } 751 msg->hdr.request = VHOST_USER_ADD_MEM_REG; 752 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 753 msg->payload.mem_reg.region = region_buffer; 754 755 ret = vhost_user_write(dev, msg, &fd, 1); 756 if (ret < 0) { 757 return ret; 758 } 759 760 if (track_ramblocks) { 761 uint64_t reply_gpa; 762 763 ret = vhost_user_read(dev, &msg_reply); 764 if (ret < 0) { 765 return ret; 766 } 767 768 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr; 769 770 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) { 771 error_report("%s: Received unexpected msg type." 772 "Expected %d received %d", __func__, 773 VHOST_USER_ADD_MEM_REG, 774 msg_reply.hdr.request); 775 return -EPROTO; 776 } 777 778 /* 779 * We're using the same structure, just reusing one of the 780 * fields, so it should be the same size. 781 */ 782 if (msg_reply.hdr.size != msg->hdr.size) { 783 error_report("%s: Unexpected size for postcopy reply " 784 "%d vs %d", __func__, msg_reply.hdr.size, 785 msg->hdr.size); 786 return -EPROTO; 787 } 788 789 /* Get the postcopy client base from the backend's reply. */ 790 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) { 791 shadow_pcb[reg_idx] = 792 msg_reply.payload.mem_reg.region.userspace_addr; 793 trace_vhost_user_set_mem_table_postcopy( 794 msg_reply.payload.mem_reg.region.userspace_addr, 795 msg->payload.mem_reg.region.userspace_addr, 796 reg_fd_idx, reg_idx); 797 } else { 798 error_report("%s: invalid postcopy reply for region. " 799 "Got guest physical address %" PRIX64 ", expected " 800 "%" PRIX64, __func__, reply_gpa, 801 dev->mem->regions[reg_idx].guest_phys_addr); 802 return -EPROTO; 803 } 804 } else if (reply_supported) { 805 ret = process_message_reply(dev, msg); 806 if (ret) { 807 return ret; 808 } 809 } 810 } else if (track_ramblocks) { 811 u->region_rb_offset[reg_idx] = 0; 812 u->region_rb[reg_idx] = NULL; 813 } 814 815 /* 816 * At this point, we know the backend has mapped in the new 817 * region, if the region has a valid file descriptor. 818 * 819 * The region should now be added to the shadow table. 820 */ 821 u->shadow_regions[u->num_shadow_regions].guest_phys_addr = 822 reg->guest_phys_addr; 823 u->shadow_regions[u->num_shadow_regions].userspace_addr = 824 reg->userspace_addr; 825 u->shadow_regions[u->num_shadow_regions].memory_size = 826 reg->memory_size; 827 u->num_shadow_regions++; 828 } 829 830 return 0; 831 } 832 833 static int vhost_user_add_remove_regions(struct vhost_dev *dev, 834 VhostUserMsg *msg, 835 bool reply_supported, 836 bool track_ramblocks) 837 { 838 struct vhost_user *u = dev->opaque; 839 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS]; 840 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS]; 841 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {}; 842 int nr_add_reg, nr_rem_reg; 843 int ret; 844 845 msg->hdr.size = sizeof(msg->payload.mem_reg); 846 847 /* Find the regions which need to be removed or added. */ 848 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg, 849 shadow_pcb, track_ramblocks); 850 851 if (nr_rem_reg) { 852 ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg, 853 reply_supported); 854 if (ret < 0) { 855 goto err; 856 } 857 } 858 859 if (nr_add_reg) { 860 ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb, 861 reply_supported, track_ramblocks); 862 if (ret < 0) { 863 goto err; 864 } 865 } 866 867 if (track_ramblocks) { 868 memcpy(u->postcopy_client_bases, shadow_pcb, 869 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 870 /* 871 * Now we've registered this with the postcopy code, we ack to the 872 * client, because now we're in the position to be able to deal with 873 * any faults it generates. 874 */ 875 /* TODO: Use this for failure cases as well with a bad value. */ 876 msg->hdr.size = sizeof(msg->payload.u64); 877 msg->payload.u64 = 0; /* OK */ 878 879 ret = vhost_user_write(dev, msg, NULL, 0); 880 if (ret < 0) { 881 return ret; 882 } 883 } 884 885 return 0; 886 887 err: 888 if (track_ramblocks) { 889 memcpy(u->postcopy_client_bases, shadow_pcb, 890 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 891 } 892 893 return ret; 894 } 895 896 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 897 struct vhost_memory *mem, 898 bool reply_supported, 899 bool config_mem_slots) 900 { 901 struct vhost_user *u = dev->opaque; 902 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 903 size_t fd_num = 0; 904 VhostUserMsg msg_reply; 905 int region_i, msg_i; 906 int ret; 907 908 VhostUserMsg msg = { 909 .hdr.flags = VHOST_USER_VERSION, 910 }; 911 912 if (u->region_rb_len < dev->mem->nregions) { 913 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 914 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 915 dev->mem->nregions); 916 memset(&(u->region_rb[u->region_rb_len]), '\0', 917 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 918 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 919 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 920 u->region_rb_len = dev->mem->nregions; 921 } 922 923 if (config_mem_slots) { 924 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true); 925 if (ret < 0) { 926 return ret; 927 } 928 } else { 929 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 930 true); 931 if (ret < 0) { 932 return ret; 933 } 934 935 ret = vhost_user_write(dev, &msg, fds, fd_num); 936 if (ret < 0) { 937 return ret; 938 } 939 940 ret = vhost_user_read(dev, &msg_reply); 941 if (ret < 0) { 942 return ret; 943 } 944 945 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 946 error_report("%s: Received unexpected msg type." 947 "Expected %d received %d", __func__, 948 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 949 return -EPROTO; 950 } 951 952 /* 953 * We're using the same structure, just reusing one of the 954 * fields, so it should be the same size. 955 */ 956 if (msg_reply.hdr.size != msg.hdr.size) { 957 error_report("%s: Unexpected size for postcopy reply " 958 "%d vs %d", __func__, msg_reply.hdr.size, 959 msg.hdr.size); 960 return -EPROTO; 961 } 962 963 memset(u->postcopy_client_bases, 0, 964 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 965 966 /* 967 * They're in the same order as the regions that were sent 968 * but some of the regions were skipped (above) if they 969 * didn't have fd's 970 */ 971 for (msg_i = 0, region_i = 0; 972 region_i < dev->mem->nregions; 973 region_i++) { 974 if (msg_i < fd_num && 975 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 976 dev->mem->regions[region_i].guest_phys_addr) { 977 u->postcopy_client_bases[region_i] = 978 msg_reply.payload.memory.regions[msg_i].userspace_addr; 979 trace_vhost_user_set_mem_table_postcopy( 980 msg_reply.payload.memory.regions[msg_i].userspace_addr, 981 msg.payload.memory.regions[msg_i].userspace_addr, 982 msg_i, region_i); 983 msg_i++; 984 } 985 } 986 if (msg_i != fd_num) { 987 error_report("%s: postcopy reply not fully consumed " 988 "%d vs %zd", 989 __func__, msg_i, fd_num); 990 return -EIO; 991 } 992 993 /* 994 * Now we've registered this with the postcopy code, we ack to the 995 * client, because now we're in the position to be able to deal 996 * with any faults it generates. 997 */ 998 /* TODO: Use this for failure cases as well with a bad value. */ 999 msg.hdr.size = sizeof(msg.payload.u64); 1000 msg.payload.u64 = 0; /* OK */ 1001 ret = vhost_user_write(dev, &msg, NULL, 0); 1002 if (ret < 0) { 1003 return ret; 1004 } 1005 } 1006 1007 return 0; 1008 } 1009 1010 static int vhost_user_set_mem_table(struct vhost_dev *dev, 1011 struct vhost_memory *mem) 1012 { 1013 struct vhost_user *u = dev->opaque; 1014 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 1015 size_t fd_num = 0; 1016 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 1017 bool reply_supported = virtio_has_feature(dev->protocol_features, 1018 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1019 bool config_mem_slots = 1020 virtio_has_feature(dev->protocol_features, 1021 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); 1022 int ret; 1023 1024 if (do_postcopy) { 1025 /* 1026 * Postcopy has enough differences that it's best done in it's own 1027 * version 1028 */ 1029 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported, 1030 config_mem_slots); 1031 } 1032 1033 VhostUserMsg msg = { 1034 .hdr.flags = VHOST_USER_VERSION, 1035 }; 1036 1037 if (reply_supported) { 1038 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1039 } 1040 1041 if (config_mem_slots) { 1042 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false); 1043 if (ret < 0) { 1044 return ret; 1045 } 1046 } else { 1047 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 1048 false); 1049 if (ret < 0) { 1050 return ret; 1051 } 1052 1053 ret = vhost_user_write(dev, &msg, fds, fd_num); 1054 if (ret < 0) { 1055 return ret; 1056 } 1057 1058 if (reply_supported) { 1059 return process_message_reply(dev, &msg); 1060 } 1061 } 1062 1063 return 0; 1064 } 1065 1066 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 1067 struct vhost_vring_state *ring) 1068 { 1069 bool cross_endian = virtio_has_feature(dev->protocol_features, 1070 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 1071 VhostUserMsg msg = { 1072 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 1073 .hdr.flags = VHOST_USER_VERSION, 1074 .payload.state = *ring, 1075 .hdr.size = sizeof(msg.payload.state), 1076 }; 1077 1078 if (!cross_endian) { 1079 error_report("vhost-user trying to send unhandled ioctl"); 1080 return -ENOTSUP; 1081 } 1082 1083 return vhost_user_write(dev, &msg, NULL, 0); 1084 } 1085 1086 static int vhost_set_vring(struct vhost_dev *dev, 1087 unsigned long int request, 1088 struct vhost_vring_state *ring) 1089 { 1090 VhostUserMsg msg = { 1091 .hdr.request = request, 1092 .hdr.flags = VHOST_USER_VERSION, 1093 .payload.state = *ring, 1094 .hdr.size = sizeof(msg.payload.state), 1095 }; 1096 1097 return vhost_user_write(dev, &msg, NULL, 0); 1098 } 1099 1100 static int vhost_user_set_vring_num(struct vhost_dev *dev, 1101 struct vhost_vring_state *ring) 1102 { 1103 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 1104 } 1105 1106 static void vhost_user_host_notifier_free(VhostUserHostNotifier *n) 1107 { 1108 assert(n && n->unmap_addr); 1109 munmap(n->unmap_addr, qemu_real_host_page_size()); 1110 n->unmap_addr = NULL; 1111 } 1112 1113 /* 1114 * clean-up function for notifier, will finally free the structure 1115 * under rcu. 1116 */ 1117 static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n, 1118 VirtIODevice *vdev) 1119 { 1120 if (n->addr) { 1121 if (vdev) { 1122 virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false); 1123 } 1124 assert(!n->unmap_addr); 1125 n->unmap_addr = n->addr; 1126 n->addr = NULL; 1127 call_rcu(n, vhost_user_host_notifier_free, rcu); 1128 } 1129 } 1130 1131 static int vhost_user_set_vring_base(struct vhost_dev *dev, 1132 struct vhost_vring_state *ring) 1133 { 1134 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 1135 } 1136 1137 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 1138 { 1139 int i; 1140 1141 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1142 return -EINVAL; 1143 } 1144 1145 for (i = 0; i < dev->nvqs; ++i) { 1146 int ret; 1147 struct vhost_vring_state state = { 1148 .index = dev->vq_index + i, 1149 .num = enable, 1150 }; 1151 1152 ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 1153 if (ret < 0) { 1154 /* 1155 * Restoring the previous state is likely infeasible, as well as 1156 * proceeding regardless the error, so just bail out and hope for 1157 * the device-level recovery. 1158 */ 1159 return ret; 1160 } 1161 } 1162 1163 return 0; 1164 } 1165 1166 static VhostUserHostNotifier *fetch_notifier(VhostUserState *u, 1167 int idx) 1168 { 1169 if (idx >= u->notifiers->len) { 1170 return NULL; 1171 } 1172 return g_ptr_array_index(u->notifiers, idx); 1173 } 1174 1175 static int vhost_user_get_vring_base(struct vhost_dev *dev, 1176 struct vhost_vring_state *ring) 1177 { 1178 int ret; 1179 VhostUserMsg msg = { 1180 .hdr.request = VHOST_USER_GET_VRING_BASE, 1181 .hdr.flags = VHOST_USER_VERSION, 1182 .payload.state = *ring, 1183 .hdr.size = sizeof(msg.payload.state), 1184 }; 1185 struct vhost_user *u = dev->opaque; 1186 1187 VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index); 1188 if (n) { 1189 vhost_user_host_notifier_remove(n, dev->vdev); 1190 } 1191 1192 ret = vhost_user_write(dev, &msg, NULL, 0); 1193 if (ret < 0) { 1194 return ret; 1195 } 1196 1197 ret = vhost_user_read(dev, &msg); 1198 if (ret < 0) { 1199 return ret; 1200 } 1201 1202 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 1203 error_report("Received unexpected msg type. Expected %d received %d", 1204 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 1205 return -EPROTO; 1206 } 1207 1208 if (msg.hdr.size != sizeof(msg.payload.state)) { 1209 error_report("Received bad msg size."); 1210 return -EPROTO; 1211 } 1212 1213 *ring = msg.payload.state; 1214 1215 return 0; 1216 } 1217 1218 static int vhost_set_vring_file(struct vhost_dev *dev, 1219 VhostUserRequest request, 1220 struct vhost_vring_file *file) 1221 { 1222 int fds[VHOST_USER_MAX_RAM_SLOTS]; 1223 size_t fd_num = 0; 1224 VhostUserMsg msg = { 1225 .hdr.request = request, 1226 .hdr.flags = VHOST_USER_VERSION, 1227 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 1228 .hdr.size = sizeof(msg.payload.u64), 1229 }; 1230 1231 if (ioeventfd_enabled() && file->fd > 0) { 1232 fds[fd_num++] = file->fd; 1233 } else { 1234 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 1235 } 1236 1237 return vhost_user_write(dev, &msg, fds, fd_num); 1238 } 1239 1240 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 1241 struct vhost_vring_file *file) 1242 { 1243 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 1244 } 1245 1246 static int vhost_user_set_vring_call(struct vhost_dev *dev, 1247 struct vhost_vring_file *file) 1248 { 1249 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 1250 } 1251 1252 static int vhost_user_set_vring_err(struct vhost_dev *dev, 1253 struct vhost_vring_file *file) 1254 { 1255 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file); 1256 } 1257 1258 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 1259 { 1260 int ret; 1261 VhostUserMsg msg = { 1262 .hdr.request = request, 1263 .hdr.flags = VHOST_USER_VERSION, 1264 }; 1265 1266 if (vhost_user_per_device_request(request) && dev->vq_index != 0) { 1267 return 0; 1268 } 1269 1270 ret = vhost_user_write(dev, &msg, NULL, 0); 1271 if (ret < 0) { 1272 return ret; 1273 } 1274 1275 ret = vhost_user_read(dev, &msg); 1276 if (ret < 0) { 1277 return ret; 1278 } 1279 1280 if (msg.hdr.request != request) { 1281 error_report("Received unexpected msg type. Expected %d received %d", 1282 request, msg.hdr.request); 1283 return -EPROTO; 1284 } 1285 1286 if (msg.hdr.size != sizeof(msg.payload.u64)) { 1287 error_report("Received bad msg size."); 1288 return -EPROTO; 1289 } 1290 1291 *u64 = msg.payload.u64; 1292 1293 return 0; 1294 } 1295 1296 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 1297 { 1298 if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) { 1299 return -EPROTO; 1300 } 1301 1302 return 0; 1303 } 1304 1305 static int enforce_reply(struct vhost_dev *dev, 1306 const VhostUserMsg *msg) 1307 { 1308 uint64_t dummy; 1309 1310 if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1311 return process_message_reply(dev, msg); 1312 } 1313 1314 /* 1315 * We need to wait for a reply but the backend does not 1316 * support replies for the command we just sent. 1317 * Send VHOST_USER_GET_FEATURES which makes all backends 1318 * send a reply. 1319 */ 1320 return vhost_user_get_features(dev, &dummy); 1321 } 1322 1323 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 1324 struct vhost_vring_addr *addr) 1325 { 1326 int ret; 1327 VhostUserMsg msg = { 1328 .hdr.request = VHOST_USER_SET_VRING_ADDR, 1329 .hdr.flags = VHOST_USER_VERSION, 1330 .payload.addr = *addr, 1331 .hdr.size = sizeof(msg.payload.addr), 1332 }; 1333 1334 bool reply_supported = virtio_has_feature(dev->protocol_features, 1335 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1336 1337 /* 1338 * wait for a reply if logging is enabled to make sure 1339 * backend is actually logging changes 1340 */ 1341 bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); 1342 1343 if (reply_supported && wait_for_reply) { 1344 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1345 } 1346 1347 ret = vhost_user_write(dev, &msg, NULL, 0); 1348 if (ret < 0) { 1349 return ret; 1350 } 1351 1352 if (wait_for_reply) { 1353 return enforce_reply(dev, &msg); 1354 } 1355 1356 return 0; 1357 } 1358 1359 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64, 1360 bool wait_for_reply) 1361 { 1362 VhostUserMsg msg = { 1363 .hdr.request = request, 1364 .hdr.flags = VHOST_USER_VERSION, 1365 .payload.u64 = u64, 1366 .hdr.size = sizeof(msg.payload.u64), 1367 }; 1368 int ret; 1369 1370 if (wait_for_reply) { 1371 bool reply_supported = virtio_has_feature(dev->protocol_features, 1372 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1373 if (reply_supported) { 1374 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1375 } 1376 } 1377 1378 ret = vhost_user_write(dev, &msg, NULL, 0); 1379 if (ret < 0) { 1380 return ret; 1381 } 1382 1383 if (wait_for_reply) { 1384 return enforce_reply(dev, &msg); 1385 } 1386 1387 return 0; 1388 } 1389 1390 static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status) 1391 { 1392 return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false); 1393 } 1394 1395 static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status) 1396 { 1397 uint64_t value; 1398 int ret; 1399 1400 ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value); 1401 if (ret < 0) { 1402 return ret; 1403 } 1404 *status = value; 1405 1406 return 0; 1407 } 1408 1409 static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status) 1410 { 1411 uint8_t s; 1412 int ret; 1413 1414 ret = vhost_user_get_status(dev, &s); 1415 if (ret < 0) { 1416 return ret; 1417 } 1418 1419 if ((s & status) == status) { 1420 return 0; 1421 } 1422 s |= status; 1423 1424 return vhost_user_set_status(dev, s); 1425 } 1426 1427 static int vhost_user_set_features(struct vhost_dev *dev, 1428 uint64_t features) 1429 { 1430 /* 1431 * wait for a reply if logging is enabled to make sure 1432 * backend is actually logging changes 1433 */ 1434 bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); 1435 int ret; 1436 1437 /* 1438 * We need to include any extra backend only feature bits that 1439 * might be needed by our device. Currently this includes the 1440 * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol 1441 * features. 1442 */ 1443 ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, 1444 features | dev->backend_features, 1445 log_enabled); 1446 1447 if (virtio_has_feature(dev->protocol_features, 1448 VHOST_USER_PROTOCOL_F_STATUS)) { 1449 if (!ret) { 1450 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 1451 } 1452 } 1453 1454 return ret; 1455 } 1456 1457 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 1458 uint64_t features) 1459 { 1460 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features, 1461 false); 1462 } 1463 1464 static int vhost_user_set_owner(struct vhost_dev *dev) 1465 { 1466 VhostUserMsg msg = { 1467 .hdr.request = VHOST_USER_SET_OWNER, 1468 .hdr.flags = VHOST_USER_VERSION, 1469 }; 1470 1471 return vhost_user_write(dev, &msg, NULL, 0); 1472 } 1473 1474 static int vhost_user_get_max_memslots(struct vhost_dev *dev, 1475 uint64_t *max_memslots) 1476 { 1477 uint64_t backend_max_memslots; 1478 int err; 1479 1480 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS, 1481 &backend_max_memslots); 1482 if (err < 0) { 1483 return err; 1484 } 1485 1486 *max_memslots = backend_max_memslots; 1487 1488 return 0; 1489 } 1490 1491 static int vhost_user_reset_device(struct vhost_dev *dev) 1492 { 1493 VhostUserMsg msg = { 1494 .hdr.flags = VHOST_USER_VERSION, 1495 }; 1496 1497 msg.hdr.request = virtio_has_feature(dev->protocol_features, 1498 VHOST_USER_PROTOCOL_F_RESET_DEVICE) 1499 ? VHOST_USER_RESET_DEVICE 1500 : VHOST_USER_RESET_OWNER; 1501 1502 return vhost_user_write(dev, &msg, NULL, 0); 1503 } 1504 1505 static int vhost_user_backend_handle_config_change(struct vhost_dev *dev) 1506 { 1507 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1508 return -ENOSYS; 1509 } 1510 1511 return dev->config_ops->vhost_dev_config_notifier(dev); 1512 } 1513 1514 /* 1515 * Fetch or create the notifier for a given idx. Newly created 1516 * notifiers are added to the pointer array that tracks them. 1517 */ 1518 static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u, 1519 int idx) 1520 { 1521 VhostUserHostNotifier *n = NULL; 1522 if (idx >= u->notifiers->len) { 1523 g_ptr_array_set_size(u->notifiers, idx + 1); 1524 } 1525 1526 n = g_ptr_array_index(u->notifiers, idx); 1527 if (!n) { 1528 /* 1529 * In case notification arrive out-of-order, 1530 * make room for current index. 1531 */ 1532 g_ptr_array_remove_index(u->notifiers, idx); 1533 n = g_new0(VhostUserHostNotifier, 1); 1534 n->idx = idx; 1535 g_ptr_array_insert(u->notifiers, idx, n); 1536 trace_vhost_user_create_notifier(idx, n); 1537 } 1538 1539 return n; 1540 } 1541 1542 static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev, 1543 VhostUserVringArea *area, 1544 int fd) 1545 { 1546 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 1547 size_t page_size = qemu_real_host_page_size(); 1548 struct vhost_user *u = dev->opaque; 1549 VhostUserState *user = u->user; 1550 VirtIODevice *vdev = dev->vdev; 1551 VhostUserHostNotifier *n; 1552 void *addr; 1553 char *name; 1554 1555 if (!virtio_has_feature(dev->protocol_features, 1556 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 1557 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 1558 return -EINVAL; 1559 } 1560 1561 /* 1562 * Fetch notifier and invalidate any old data before setting up 1563 * new mapped address. 1564 */ 1565 n = fetch_or_create_notifier(user, queue_idx); 1566 vhost_user_host_notifier_remove(n, vdev); 1567 1568 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 1569 return 0; 1570 } 1571 1572 /* Sanity check. */ 1573 if (area->size != page_size) { 1574 return -EINVAL; 1575 } 1576 1577 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 1578 fd, area->offset); 1579 if (addr == MAP_FAILED) { 1580 return -EFAULT; 1581 } 1582 1583 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 1584 user, queue_idx); 1585 if (!n->mr.ram) { /* Don't init again after suspend. */ 1586 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 1587 page_size, addr); 1588 } else { 1589 n->mr.ram_block->host = addr; 1590 } 1591 g_free(name); 1592 1593 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 1594 object_unparent(OBJECT(&n->mr)); 1595 munmap(addr, page_size); 1596 return -ENXIO; 1597 } 1598 1599 n->addr = addr; 1600 1601 return 0; 1602 } 1603 1604 static void close_backend_channel(struct vhost_user *u) 1605 { 1606 g_source_destroy(u->backend_src); 1607 g_source_unref(u->backend_src); 1608 u->backend_src = NULL; 1609 object_unref(OBJECT(u->backend_ioc)); 1610 u->backend_ioc = NULL; 1611 } 1612 1613 static gboolean backend_read(QIOChannel *ioc, GIOCondition condition, 1614 gpointer opaque) 1615 { 1616 struct vhost_dev *dev = opaque; 1617 struct vhost_user *u = dev->opaque; 1618 VhostUserHeader hdr = { 0, }; 1619 VhostUserPayload payload = { 0, }; 1620 Error *local_err = NULL; 1621 gboolean rc = G_SOURCE_CONTINUE; 1622 int ret = 0; 1623 struct iovec iov; 1624 g_autofree int *fd = NULL; 1625 size_t fdsize = 0; 1626 int i; 1627 1628 /* Read header */ 1629 iov.iov_base = &hdr; 1630 iov.iov_len = VHOST_USER_HDR_SIZE; 1631 1632 if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) { 1633 error_report_err(local_err); 1634 goto err; 1635 } 1636 1637 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 1638 error_report("Failed to read msg header." 1639 " Size %d exceeds the maximum %zu.", hdr.size, 1640 VHOST_USER_PAYLOAD_SIZE); 1641 goto err; 1642 } 1643 1644 /* Read payload */ 1645 if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) { 1646 error_report_err(local_err); 1647 goto err; 1648 } 1649 1650 switch (hdr.request) { 1651 case VHOST_USER_BACKEND_IOTLB_MSG: 1652 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1653 break; 1654 case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG: 1655 ret = vhost_user_backend_handle_config_change(dev); 1656 break; 1657 case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG: 1658 ret = vhost_user_backend_handle_vring_host_notifier(dev, &payload.area, 1659 fd ? fd[0] : -1); 1660 break; 1661 default: 1662 error_report("Received unexpected msg type: %d.", hdr.request); 1663 ret = -EINVAL; 1664 } 1665 1666 /* 1667 * REPLY_ACK feature handling. Other reply types has to be managed 1668 * directly in their request handlers. 1669 */ 1670 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1671 struct iovec iovec[2]; 1672 1673 1674 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 1675 hdr.flags |= VHOST_USER_REPLY_MASK; 1676 1677 payload.u64 = !!ret; 1678 hdr.size = sizeof(payload.u64); 1679 1680 iovec[0].iov_base = &hdr; 1681 iovec[0].iov_len = VHOST_USER_HDR_SIZE; 1682 iovec[1].iov_base = &payload; 1683 iovec[1].iov_len = hdr.size; 1684 1685 if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) { 1686 error_report_err(local_err); 1687 goto err; 1688 } 1689 } 1690 1691 goto fdcleanup; 1692 1693 err: 1694 close_backend_channel(u); 1695 rc = G_SOURCE_REMOVE; 1696 1697 fdcleanup: 1698 if (fd) { 1699 for (i = 0; i < fdsize; i++) { 1700 close(fd[i]); 1701 } 1702 } 1703 return rc; 1704 } 1705 1706 static int vhost_setup_backend_channel(struct vhost_dev *dev) 1707 { 1708 VhostUserMsg msg = { 1709 .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD, 1710 .hdr.flags = VHOST_USER_VERSION, 1711 }; 1712 struct vhost_user *u = dev->opaque; 1713 int sv[2], ret = 0; 1714 bool reply_supported = virtio_has_feature(dev->protocol_features, 1715 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1716 Error *local_err = NULL; 1717 QIOChannel *ioc; 1718 1719 if (!virtio_has_feature(dev->protocol_features, 1720 VHOST_USER_PROTOCOL_F_BACKEND_REQ)) { 1721 return 0; 1722 } 1723 1724 if (qemu_socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1725 int saved_errno = errno; 1726 error_report("socketpair() failed"); 1727 return -saved_errno; 1728 } 1729 1730 ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err)); 1731 if (!ioc) { 1732 error_report_err(local_err); 1733 return -ECONNREFUSED; 1734 } 1735 u->backend_ioc = ioc; 1736 u->backend_src = qio_channel_add_watch_source(u->backend_ioc, 1737 G_IO_IN | G_IO_HUP, 1738 backend_read, dev, NULL, NULL); 1739 1740 if (reply_supported) { 1741 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1742 } 1743 1744 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1745 if (ret) { 1746 goto out; 1747 } 1748 1749 if (reply_supported) { 1750 ret = process_message_reply(dev, &msg); 1751 } 1752 1753 out: 1754 close(sv[1]); 1755 if (ret) { 1756 close_backend_channel(u); 1757 } 1758 1759 return ret; 1760 } 1761 1762 #ifdef CONFIG_LINUX 1763 /* 1764 * Called back from the postcopy fault thread when a fault is received on our 1765 * ufd. 1766 * TODO: This is Linux specific 1767 */ 1768 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1769 void *ufd) 1770 { 1771 struct vhost_dev *dev = pcfd->data; 1772 struct vhost_user *u = dev->opaque; 1773 struct uffd_msg *msg = ufd; 1774 uint64_t faultaddr = msg->arg.pagefault.address; 1775 RAMBlock *rb = NULL; 1776 uint64_t rb_offset; 1777 int i; 1778 1779 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1780 dev->mem->nregions); 1781 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1782 trace_vhost_user_postcopy_fault_handler_loop(i, 1783 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1784 if (faultaddr >= u->postcopy_client_bases[i]) { 1785 /* Ofset of the fault address in the vhost region */ 1786 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1787 if (region_offset < dev->mem->regions[i].memory_size) { 1788 rb_offset = region_offset + u->region_rb_offset[i]; 1789 trace_vhost_user_postcopy_fault_handler_found(i, 1790 region_offset, rb_offset); 1791 rb = u->region_rb[i]; 1792 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1793 rb_offset); 1794 } 1795 } 1796 } 1797 error_report("%s: Failed to find region for fault %" PRIx64, 1798 __func__, faultaddr); 1799 return -1; 1800 } 1801 1802 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1803 uint64_t offset) 1804 { 1805 struct vhost_dev *dev = pcfd->data; 1806 struct vhost_user *u = dev->opaque; 1807 int i; 1808 1809 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1810 1811 if (!u) { 1812 return 0; 1813 } 1814 /* Translate the offset into an address in the clients address space */ 1815 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1816 if (u->region_rb[i] == rb && 1817 offset >= u->region_rb_offset[i] && 1818 offset < (u->region_rb_offset[i] + 1819 dev->mem->regions[i].memory_size)) { 1820 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1821 u->postcopy_client_bases[i]; 1822 trace_vhost_user_postcopy_waker_found(client_addr); 1823 return postcopy_wake_shared(pcfd, client_addr, rb); 1824 } 1825 } 1826 1827 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1828 return 0; 1829 } 1830 #endif 1831 1832 /* 1833 * Called at the start of an inbound postcopy on reception of the 1834 * 'advise' command. 1835 */ 1836 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 1837 { 1838 #ifdef CONFIG_LINUX 1839 struct vhost_user *u = dev->opaque; 1840 CharBackend *chr = u->user->chr; 1841 int ufd; 1842 int ret; 1843 VhostUserMsg msg = { 1844 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 1845 .hdr.flags = VHOST_USER_VERSION, 1846 }; 1847 1848 ret = vhost_user_write(dev, &msg, NULL, 0); 1849 if (ret < 0) { 1850 error_setg(errp, "Failed to send postcopy_advise to vhost"); 1851 return ret; 1852 } 1853 1854 ret = vhost_user_read(dev, &msg); 1855 if (ret < 0) { 1856 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 1857 return ret; 1858 } 1859 1860 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 1861 error_setg(errp, "Unexpected msg type. Expected %d received %d", 1862 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 1863 return -EPROTO; 1864 } 1865 1866 if (msg.hdr.size) { 1867 error_setg(errp, "Received bad msg size."); 1868 return -EPROTO; 1869 } 1870 ufd = qemu_chr_fe_get_msgfd(chr); 1871 if (ufd < 0) { 1872 error_setg(errp, "%s: Failed to get ufd", __func__); 1873 return -EIO; 1874 } 1875 qemu_socket_set_nonblock(ufd); 1876 1877 /* register ufd with userfault thread */ 1878 u->postcopy_fd.fd = ufd; 1879 u->postcopy_fd.data = dev; 1880 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 1881 u->postcopy_fd.waker = vhost_user_postcopy_waker; 1882 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 1883 postcopy_register_shared_ufd(&u->postcopy_fd); 1884 return 0; 1885 #else 1886 error_setg(errp, "Postcopy not supported on non-Linux systems"); 1887 return -ENOSYS; 1888 #endif 1889 } 1890 1891 /* 1892 * Called at the switch to postcopy on reception of the 'listen' command. 1893 */ 1894 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 1895 { 1896 struct vhost_user *u = dev->opaque; 1897 int ret; 1898 VhostUserMsg msg = { 1899 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 1900 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1901 }; 1902 u->postcopy_listen = true; 1903 1904 trace_vhost_user_postcopy_listen(); 1905 1906 ret = vhost_user_write(dev, &msg, NULL, 0); 1907 if (ret < 0) { 1908 error_setg(errp, "Failed to send postcopy_listen to vhost"); 1909 return ret; 1910 } 1911 1912 ret = process_message_reply(dev, &msg); 1913 if (ret) { 1914 error_setg(errp, "Failed to receive reply to postcopy_listen"); 1915 return ret; 1916 } 1917 1918 return 0; 1919 } 1920 1921 /* 1922 * Called at the end of postcopy 1923 */ 1924 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 1925 { 1926 VhostUserMsg msg = { 1927 .hdr.request = VHOST_USER_POSTCOPY_END, 1928 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1929 }; 1930 int ret; 1931 struct vhost_user *u = dev->opaque; 1932 1933 trace_vhost_user_postcopy_end_entry(); 1934 1935 ret = vhost_user_write(dev, &msg, NULL, 0); 1936 if (ret < 0) { 1937 error_setg(errp, "Failed to send postcopy_end to vhost"); 1938 return ret; 1939 } 1940 1941 ret = process_message_reply(dev, &msg); 1942 if (ret) { 1943 error_setg(errp, "Failed to receive reply to postcopy_end"); 1944 return ret; 1945 } 1946 postcopy_unregister_shared_ufd(&u->postcopy_fd); 1947 close(u->postcopy_fd.fd); 1948 u->postcopy_fd.handler = NULL; 1949 1950 trace_vhost_user_postcopy_end_exit(); 1951 1952 return 0; 1953 } 1954 1955 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 1956 void *opaque) 1957 { 1958 struct PostcopyNotifyData *pnd = opaque; 1959 struct vhost_user *u = container_of(notifier, struct vhost_user, 1960 postcopy_notifier); 1961 struct vhost_dev *dev = u->dev; 1962 1963 switch (pnd->reason) { 1964 case POSTCOPY_NOTIFY_PROBE: 1965 if (!virtio_has_feature(dev->protocol_features, 1966 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 1967 /* TODO: Get the device name into this error somehow */ 1968 error_setg(pnd->errp, 1969 "vhost-user backend not capable of postcopy"); 1970 return -ENOENT; 1971 } 1972 break; 1973 1974 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 1975 return vhost_user_postcopy_advise(dev, pnd->errp); 1976 1977 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 1978 return vhost_user_postcopy_listen(dev, pnd->errp); 1979 1980 case POSTCOPY_NOTIFY_INBOUND_END: 1981 return vhost_user_postcopy_end(dev, pnd->errp); 1982 1983 default: 1984 /* We ignore notifications we don't know */ 1985 break; 1986 } 1987 1988 return 0; 1989 } 1990 1991 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, 1992 Error **errp) 1993 { 1994 uint64_t features, ram_slots; 1995 struct vhost_user *u; 1996 VhostUserState *vus = (VhostUserState *) opaque; 1997 int err; 1998 1999 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2000 2001 u = g_new0(struct vhost_user, 1); 2002 u->user = vus; 2003 u->dev = dev; 2004 dev->opaque = u; 2005 2006 err = vhost_user_get_features(dev, &features); 2007 if (err < 0) { 2008 error_setg_errno(errp, -err, "vhost_backend_init failed"); 2009 return err; 2010 } 2011 2012 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 2013 bool supports_f_config = vus->supports_config || 2014 (dev->config_ops && dev->config_ops->vhost_dev_config_notifier); 2015 uint64_t protocol_features; 2016 2017 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 2018 2019 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 2020 &protocol_features); 2021 if (err < 0) { 2022 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2023 return -EPROTO; 2024 } 2025 2026 /* 2027 * We will use all the protocol features we support - although 2028 * we suppress F_CONFIG if we know QEMUs internal code can not support 2029 * it. 2030 */ 2031 protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK; 2032 2033 if (supports_f_config) { 2034 if (!virtio_has_feature(protocol_features, 2035 VHOST_USER_PROTOCOL_F_CONFIG)) { 2036 error_setg(errp, "vhost-user device expecting " 2037 "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does " 2038 "not support it."); 2039 return -EPROTO; 2040 } 2041 } else { 2042 if (virtio_has_feature(protocol_features, 2043 VHOST_USER_PROTOCOL_F_CONFIG)) { 2044 warn_report("vhost-user backend supports " 2045 "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not."); 2046 protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 2047 } 2048 } 2049 2050 /* final set of protocol features */ 2051 dev->protocol_features = protocol_features; 2052 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 2053 if (err < 0) { 2054 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2055 return -EPROTO; 2056 } 2057 2058 /* query the max queues we support if backend supports Multiple Queue */ 2059 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 2060 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 2061 &dev->max_queues); 2062 if (err < 0) { 2063 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2064 return -EPROTO; 2065 } 2066 } else { 2067 dev->max_queues = 1; 2068 } 2069 2070 if (dev->num_queues && dev->max_queues < dev->num_queues) { 2071 error_setg(errp, "The maximum number of queues supported by the " 2072 "backend is %" PRIu64, dev->max_queues); 2073 return -EINVAL; 2074 } 2075 2076 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 2077 !(virtio_has_feature(dev->protocol_features, 2078 VHOST_USER_PROTOCOL_F_BACKEND_REQ) && 2079 virtio_has_feature(dev->protocol_features, 2080 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 2081 error_setg(errp, "IOMMU support requires reply-ack and " 2082 "backend-req protocol features."); 2083 return -EINVAL; 2084 } 2085 2086 /* get max memory regions if backend supports configurable RAM slots */ 2087 if (!virtio_has_feature(dev->protocol_features, 2088 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { 2089 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; 2090 } else { 2091 err = vhost_user_get_max_memslots(dev, &ram_slots); 2092 if (err < 0) { 2093 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2094 return -EPROTO; 2095 } 2096 2097 if (ram_slots < u->user->memory_slots) { 2098 error_setg(errp, "The backend specified a max ram slots limit " 2099 "of %" PRIu64", when the prior validated limit was " 2100 "%d. This limit should never decrease.", ram_slots, 2101 u->user->memory_slots); 2102 return -EINVAL; 2103 } 2104 2105 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); 2106 } 2107 } 2108 2109 if (dev->migration_blocker == NULL && 2110 !virtio_has_feature(dev->protocol_features, 2111 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 2112 error_setg(&dev->migration_blocker, 2113 "Migration disabled: vhost-user backend lacks " 2114 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 2115 } 2116 2117 if (dev->vq_index == 0) { 2118 err = vhost_setup_backend_channel(dev); 2119 if (err < 0) { 2120 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2121 return -EPROTO; 2122 } 2123 } 2124 2125 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 2126 postcopy_add_notifier(&u->postcopy_notifier); 2127 2128 return 0; 2129 } 2130 2131 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 2132 { 2133 struct vhost_user *u; 2134 2135 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2136 2137 u = dev->opaque; 2138 if (u->postcopy_notifier.notify) { 2139 postcopy_remove_notifier(&u->postcopy_notifier); 2140 u->postcopy_notifier.notify = NULL; 2141 } 2142 u->postcopy_listen = false; 2143 if (u->postcopy_fd.handler) { 2144 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2145 close(u->postcopy_fd.fd); 2146 u->postcopy_fd.handler = NULL; 2147 } 2148 if (u->backend_ioc) { 2149 close_backend_channel(u); 2150 } 2151 g_free(u->region_rb); 2152 u->region_rb = NULL; 2153 g_free(u->region_rb_offset); 2154 u->region_rb_offset = NULL; 2155 u->region_rb_len = 0; 2156 g_free(u); 2157 dev->opaque = 0; 2158 2159 return 0; 2160 } 2161 2162 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 2163 { 2164 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 2165 2166 return idx; 2167 } 2168 2169 static int vhost_user_memslots_limit(struct vhost_dev *dev) 2170 { 2171 struct vhost_user *u = dev->opaque; 2172 2173 return u->user->memory_slots; 2174 } 2175 2176 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 2177 { 2178 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2179 2180 return virtio_has_feature(dev->protocol_features, 2181 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 2182 } 2183 2184 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 2185 { 2186 VhostUserMsg msg = { }; 2187 2188 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2189 2190 /* If guest supports GUEST_ANNOUNCE do nothing */ 2191 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 2192 return 0; 2193 } 2194 2195 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 2196 if (virtio_has_feature(dev->protocol_features, 2197 VHOST_USER_PROTOCOL_F_RARP)) { 2198 msg.hdr.request = VHOST_USER_SEND_RARP; 2199 msg.hdr.flags = VHOST_USER_VERSION; 2200 memcpy((char *)&msg.payload.u64, mac_addr, 6); 2201 msg.hdr.size = sizeof(msg.payload.u64); 2202 2203 return vhost_user_write(dev, &msg, NULL, 0); 2204 } 2205 return -ENOTSUP; 2206 } 2207 2208 static bool vhost_user_can_merge(struct vhost_dev *dev, 2209 uint64_t start1, uint64_t size1, 2210 uint64_t start2, uint64_t size2) 2211 { 2212 ram_addr_t offset; 2213 int mfd, rfd; 2214 2215 (void)vhost_user_get_mr_data(start1, &offset, &mfd); 2216 (void)vhost_user_get_mr_data(start2, &offset, &rfd); 2217 2218 return mfd == rfd; 2219 } 2220 2221 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 2222 { 2223 VhostUserMsg msg; 2224 bool reply_supported = virtio_has_feature(dev->protocol_features, 2225 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2226 int ret; 2227 2228 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 2229 return 0; 2230 } 2231 2232 msg.hdr.request = VHOST_USER_NET_SET_MTU; 2233 msg.payload.u64 = mtu; 2234 msg.hdr.size = sizeof(msg.payload.u64); 2235 msg.hdr.flags = VHOST_USER_VERSION; 2236 if (reply_supported) { 2237 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2238 } 2239 2240 ret = vhost_user_write(dev, &msg, NULL, 0); 2241 if (ret < 0) { 2242 return ret; 2243 } 2244 2245 /* If reply_ack supported, backend has to ack specified MTU is valid */ 2246 if (reply_supported) { 2247 return process_message_reply(dev, &msg); 2248 } 2249 2250 return 0; 2251 } 2252 2253 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 2254 struct vhost_iotlb_msg *imsg) 2255 { 2256 int ret; 2257 VhostUserMsg msg = { 2258 .hdr.request = VHOST_USER_IOTLB_MSG, 2259 .hdr.size = sizeof(msg.payload.iotlb), 2260 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2261 .payload.iotlb = *imsg, 2262 }; 2263 2264 ret = vhost_user_write(dev, &msg, NULL, 0); 2265 if (ret < 0) { 2266 return ret; 2267 } 2268 2269 return process_message_reply(dev, &msg); 2270 } 2271 2272 2273 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 2274 { 2275 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 2276 } 2277 2278 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 2279 uint32_t config_len, Error **errp) 2280 { 2281 int ret; 2282 VhostUserMsg msg = { 2283 .hdr.request = VHOST_USER_GET_CONFIG, 2284 .hdr.flags = VHOST_USER_VERSION, 2285 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 2286 }; 2287 2288 if (!virtio_has_feature(dev->protocol_features, 2289 VHOST_USER_PROTOCOL_F_CONFIG)) { 2290 error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported"); 2291 return -EINVAL; 2292 } 2293 2294 assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE); 2295 2296 msg.payload.config.offset = 0; 2297 msg.payload.config.size = config_len; 2298 ret = vhost_user_write(dev, &msg, NULL, 0); 2299 if (ret < 0) { 2300 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2301 return ret; 2302 } 2303 2304 ret = vhost_user_read(dev, &msg); 2305 if (ret < 0) { 2306 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2307 return ret; 2308 } 2309 2310 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 2311 error_setg(errp, 2312 "Received unexpected msg type. Expected %d received %d", 2313 VHOST_USER_GET_CONFIG, msg.hdr.request); 2314 return -EPROTO; 2315 } 2316 2317 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 2318 error_setg(errp, "Received bad msg size."); 2319 return -EPROTO; 2320 } 2321 2322 memcpy(config, msg.payload.config.region, config_len); 2323 2324 return 0; 2325 } 2326 2327 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 2328 uint32_t offset, uint32_t size, uint32_t flags) 2329 { 2330 int ret; 2331 uint8_t *p; 2332 bool reply_supported = virtio_has_feature(dev->protocol_features, 2333 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2334 2335 VhostUserMsg msg = { 2336 .hdr.request = VHOST_USER_SET_CONFIG, 2337 .hdr.flags = VHOST_USER_VERSION, 2338 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 2339 }; 2340 2341 if (!virtio_has_feature(dev->protocol_features, 2342 VHOST_USER_PROTOCOL_F_CONFIG)) { 2343 return -ENOTSUP; 2344 } 2345 2346 if (reply_supported) { 2347 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2348 } 2349 2350 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 2351 return -EINVAL; 2352 } 2353 2354 msg.payload.config.offset = offset, 2355 msg.payload.config.size = size, 2356 msg.payload.config.flags = flags, 2357 p = msg.payload.config.region; 2358 memcpy(p, data, size); 2359 2360 ret = vhost_user_write(dev, &msg, NULL, 0); 2361 if (ret < 0) { 2362 return ret; 2363 } 2364 2365 if (reply_supported) { 2366 return process_message_reply(dev, &msg); 2367 } 2368 2369 return 0; 2370 } 2371 2372 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 2373 void *session_info, 2374 uint64_t *session_id) 2375 { 2376 int ret; 2377 bool crypto_session = virtio_has_feature(dev->protocol_features, 2378 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2379 CryptoDevBackendSessionInfo *backend_info = session_info; 2380 VhostUserMsg msg = { 2381 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 2382 .hdr.flags = VHOST_USER_VERSION, 2383 .hdr.size = sizeof(msg.payload.session), 2384 }; 2385 2386 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2387 2388 if (!crypto_session) { 2389 error_report("vhost-user trying to send unhandled ioctl"); 2390 return -ENOTSUP; 2391 } 2392 2393 if (backend_info->op_code == VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION) { 2394 CryptoDevBackendAsymSessionInfo *sess = &backend_info->u.asym_sess_info; 2395 size_t keylen; 2396 2397 memcpy(&msg.payload.session.u.asym.session_setup_data, sess, 2398 sizeof(CryptoDevBackendAsymSessionInfo)); 2399 if (sess->keylen) { 2400 keylen = sizeof(msg.payload.session.u.asym.key); 2401 if (sess->keylen > keylen) { 2402 error_report("Unsupported asymmetric key size"); 2403 return -ENOTSUP; 2404 } 2405 2406 memcpy(&msg.payload.session.u.asym.key, sess->key, 2407 sess->keylen); 2408 } 2409 } else { 2410 CryptoDevBackendSymSessionInfo *sess = &backend_info->u.sym_sess_info; 2411 size_t keylen; 2412 2413 memcpy(&msg.payload.session.u.sym.session_setup_data, sess, 2414 sizeof(CryptoDevBackendSymSessionInfo)); 2415 if (sess->key_len) { 2416 keylen = sizeof(msg.payload.session.u.sym.key); 2417 if (sess->key_len > keylen) { 2418 error_report("Unsupported cipher key size"); 2419 return -ENOTSUP; 2420 } 2421 2422 memcpy(&msg.payload.session.u.sym.key, sess->cipher_key, 2423 sess->key_len); 2424 } 2425 2426 if (sess->auth_key_len > 0) { 2427 keylen = sizeof(msg.payload.session.u.sym.auth_key); 2428 if (sess->auth_key_len > keylen) { 2429 error_report("Unsupported auth key size"); 2430 return -ENOTSUP; 2431 } 2432 2433 memcpy(&msg.payload.session.u.sym.auth_key, sess->auth_key, 2434 sess->auth_key_len); 2435 } 2436 } 2437 2438 msg.payload.session.op_code = backend_info->op_code; 2439 msg.payload.session.session_id = backend_info->session_id; 2440 ret = vhost_user_write(dev, &msg, NULL, 0); 2441 if (ret < 0) { 2442 error_report("vhost_user_write() return %d, create session failed", 2443 ret); 2444 return ret; 2445 } 2446 2447 ret = vhost_user_read(dev, &msg); 2448 if (ret < 0) { 2449 error_report("vhost_user_read() return %d, create session failed", 2450 ret); 2451 return ret; 2452 } 2453 2454 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 2455 error_report("Received unexpected msg type. Expected %d received %d", 2456 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 2457 return -EPROTO; 2458 } 2459 2460 if (msg.hdr.size != sizeof(msg.payload.session)) { 2461 error_report("Received bad msg size."); 2462 return -EPROTO; 2463 } 2464 2465 if (msg.payload.session.session_id < 0) { 2466 error_report("Bad session id: %" PRId64 "", 2467 msg.payload.session.session_id); 2468 return -EINVAL; 2469 } 2470 *session_id = msg.payload.session.session_id; 2471 2472 return 0; 2473 } 2474 2475 static int 2476 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 2477 { 2478 int ret; 2479 bool crypto_session = virtio_has_feature(dev->protocol_features, 2480 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2481 VhostUserMsg msg = { 2482 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 2483 .hdr.flags = VHOST_USER_VERSION, 2484 .hdr.size = sizeof(msg.payload.u64), 2485 }; 2486 msg.payload.u64 = session_id; 2487 2488 if (!crypto_session) { 2489 error_report("vhost-user trying to send unhandled ioctl"); 2490 return -ENOTSUP; 2491 } 2492 2493 ret = vhost_user_write(dev, &msg, NULL, 0); 2494 if (ret < 0) { 2495 error_report("vhost_user_write() return %d, close session failed", 2496 ret); 2497 return ret; 2498 } 2499 2500 return 0; 2501 } 2502 2503 static bool vhost_user_mem_section_filter(struct vhost_dev *dev, 2504 MemoryRegionSection *section) 2505 { 2506 return memory_region_get_fd(section->mr) >= 0; 2507 } 2508 2509 static int vhost_user_get_inflight_fd(struct vhost_dev *dev, 2510 uint16_t queue_size, 2511 struct vhost_inflight *inflight) 2512 { 2513 void *addr; 2514 int fd; 2515 int ret; 2516 struct vhost_user *u = dev->opaque; 2517 CharBackend *chr = u->user->chr; 2518 VhostUserMsg msg = { 2519 .hdr.request = VHOST_USER_GET_INFLIGHT_FD, 2520 .hdr.flags = VHOST_USER_VERSION, 2521 .payload.inflight.num_queues = dev->nvqs, 2522 .payload.inflight.queue_size = queue_size, 2523 .hdr.size = sizeof(msg.payload.inflight), 2524 }; 2525 2526 if (!virtio_has_feature(dev->protocol_features, 2527 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2528 return 0; 2529 } 2530 2531 ret = vhost_user_write(dev, &msg, NULL, 0); 2532 if (ret < 0) { 2533 return ret; 2534 } 2535 2536 ret = vhost_user_read(dev, &msg); 2537 if (ret < 0) { 2538 return ret; 2539 } 2540 2541 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) { 2542 error_report("Received unexpected msg type. " 2543 "Expected %d received %d", 2544 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request); 2545 return -EPROTO; 2546 } 2547 2548 if (msg.hdr.size != sizeof(msg.payload.inflight)) { 2549 error_report("Received bad msg size."); 2550 return -EPROTO; 2551 } 2552 2553 if (!msg.payload.inflight.mmap_size) { 2554 return 0; 2555 } 2556 2557 fd = qemu_chr_fe_get_msgfd(chr); 2558 if (fd < 0) { 2559 error_report("Failed to get mem fd"); 2560 return -EIO; 2561 } 2562 2563 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE, 2564 MAP_SHARED, fd, msg.payload.inflight.mmap_offset); 2565 2566 if (addr == MAP_FAILED) { 2567 error_report("Failed to mmap mem fd"); 2568 close(fd); 2569 return -EFAULT; 2570 } 2571 2572 inflight->addr = addr; 2573 inflight->fd = fd; 2574 inflight->size = msg.payload.inflight.mmap_size; 2575 inflight->offset = msg.payload.inflight.mmap_offset; 2576 inflight->queue_size = queue_size; 2577 2578 return 0; 2579 } 2580 2581 static int vhost_user_set_inflight_fd(struct vhost_dev *dev, 2582 struct vhost_inflight *inflight) 2583 { 2584 VhostUserMsg msg = { 2585 .hdr.request = VHOST_USER_SET_INFLIGHT_FD, 2586 .hdr.flags = VHOST_USER_VERSION, 2587 .payload.inflight.mmap_size = inflight->size, 2588 .payload.inflight.mmap_offset = inflight->offset, 2589 .payload.inflight.num_queues = dev->nvqs, 2590 .payload.inflight.queue_size = inflight->queue_size, 2591 .hdr.size = sizeof(msg.payload.inflight), 2592 }; 2593 2594 if (!virtio_has_feature(dev->protocol_features, 2595 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2596 return 0; 2597 } 2598 2599 return vhost_user_write(dev, &msg, &inflight->fd, 1); 2600 } 2601 2602 static void vhost_user_state_destroy(gpointer data) 2603 { 2604 VhostUserHostNotifier *n = (VhostUserHostNotifier *) data; 2605 if (n) { 2606 vhost_user_host_notifier_remove(n, NULL); 2607 object_unparent(OBJECT(&n->mr)); 2608 /* 2609 * We can't free until vhost_user_host_notifier_remove has 2610 * done it's thing so schedule the free with RCU. 2611 */ 2612 g_free_rcu(n, rcu); 2613 } 2614 } 2615 2616 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) 2617 { 2618 if (user->chr) { 2619 error_setg(errp, "Cannot initialize vhost-user state"); 2620 return false; 2621 } 2622 user->chr = chr; 2623 user->memory_slots = 0; 2624 user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4, 2625 &vhost_user_state_destroy); 2626 return true; 2627 } 2628 2629 void vhost_user_cleanup(VhostUserState *user) 2630 { 2631 if (!user->chr) { 2632 return; 2633 } 2634 memory_region_transaction_begin(); 2635 user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true); 2636 memory_region_transaction_commit(); 2637 user->chr = NULL; 2638 } 2639 2640 2641 typedef struct { 2642 vu_async_close_fn cb; 2643 DeviceState *dev; 2644 CharBackend *cd; 2645 struct vhost_dev *vhost; 2646 } VhostAsyncCallback; 2647 2648 static void vhost_user_async_close_bh(void *opaque) 2649 { 2650 VhostAsyncCallback *data = opaque; 2651 struct vhost_dev *vhost = data->vhost; 2652 2653 /* 2654 * If the vhost_dev has been cleared in the meantime there is 2655 * nothing left to do as some other path has completed the 2656 * cleanup. 2657 */ 2658 if (vhost->vdev) { 2659 data->cb(data->dev); 2660 } 2661 2662 g_free(data); 2663 } 2664 2665 /* 2666 * We only schedule the work if the machine is running. If suspended 2667 * we want to keep all the in-flight data as is for migration 2668 * purposes. 2669 */ 2670 void vhost_user_async_close(DeviceState *d, 2671 CharBackend *chardev, struct vhost_dev *vhost, 2672 vu_async_close_fn cb) 2673 { 2674 if (!runstate_check(RUN_STATE_SHUTDOWN)) { 2675 /* 2676 * A close event may happen during a read/write, but vhost 2677 * code assumes the vhost_dev remains setup, so delay the 2678 * stop & clear. 2679 */ 2680 AioContext *ctx = qemu_get_current_aio_context(); 2681 VhostAsyncCallback *data = g_new0(VhostAsyncCallback, 1); 2682 2683 /* Save data for the callback */ 2684 data->cb = cb; 2685 data->dev = d; 2686 data->cd = chardev; 2687 data->vhost = vhost; 2688 2689 /* Disable any further notifications on the chardev */ 2690 qemu_chr_fe_set_handlers(chardev, 2691 NULL, NULL, NULL, NULL, NULL, NULL, 2692 false); 2693 2694 aio_bh_schedule_oneshot(ctx, vhost_user_async_close_bh, data); 2695 2696 /* 2697 * Move vhost device to the stopped state. The vhost-user device 2698 * will be clean up and disconnected in BH. This can be useful in 2699 * the vhost migration code. If disconnect was caught there is an 2700 * option for the general vhost code to get the dev state without 2701 * knowing its type (in this case vhost-user). 2702 * 2703 * Note if the vhost device is fully cleared by the time we 2704 * execute the bottom half we won't continue with the cleanup. 2705 */ 2706 vhost->started = false; 2707 } 2708 } 2709 2710 static int vhost_user_dev_start(struct vhost_dev *dev, bool started) 2711 { 2712 if (!virtio_has_feature(dev->protocol_features, 2713 VHOST_USER_PROTOCOL_F_STATUS)) { 2714 return 0; 2715 } 2716 2717 /* Set device status only for last queue pair */ 2718 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 2719 return 0; 2720 } 2721 2722 if (started) { 2723 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 2724 VIRTIO_CONFIG_S_DRIVER | 2725 VIRTIO_CONFIG_S_DRIVER_OK); 2726 } else { 2727 return 0; 2728 } 2729 } 2730 2731 static void vhost_user_reset_status(struct vhost_dev *dev) 2732 { 2733 /* Set device status only for last queue pair */ 2734 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 2735 return; 2736 } 2737 2738 if (virtio_has_feature(dev->protocol_features, 2739 VHOST_USER_PROTOCOL_F_STATUS)) { 2740 vhost_user_set_status(dev, 0); 2741 } 2742 } 2743 2744 const VhostOps user_ops = { 2745 .backend_type = VHOST_BACKEND_TYPE_USER, 2746 .vhost_backend_init = vhost_user_backend_init, 2747 .vhost_backend_cleanup = vhost_user_backend_cleanup, 2748 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 2749 .vhost_set_log_base = vhost_user_set_log_base, 2750 .vhost_set_mem_table = vhost_user_set_mem_table, 2751 .vhost_set_vring_addr = vhost_user_set_vring_addr, 2752 .vhost_set_vring_endian = vhost_user_set_vring_endian, 2753 .vhost_set_vring_num = vhost_user_set_vring_num, 2754 .vhost_set_vring_base = vhost_user_set_vring_base, 2755 .vhost_get_vring_base = vhost_user_get_vring_base, 2756 .vhost_set_vring_kick = vhost_user_set_vring_kick, 2757 .vhost_set_vring_call = vhost_user_set_vring_call, 2758 .vhost_set_vring_err = vhost_user_set_vring_err, 2759 .vhost_set_features = vhost_user_set_features, 2760 .vhost_get_features = vhost_user_get_features, 2761 .vhost_set_owner = vhost_user_set_owner, 2762 .vhost_reset_device = vhost_user_reset_device, 2763 .vhost_get_vq_index = vhost_user_get_vq_index, 2764 .vhost_set_vring_enable = vhost_user_set_vring_enable, 2765 .vhost_requires_shm_log = vhost_user_requires_shm_log, 2766 .vhost_migration_done = vhost_user_migration_done, 2767 .vhost_backend_can_merge = vhost_user_can_merge, 2768 .vhost_net_set_mtu = vhost_user_net_set_mtu, 2769 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 2770 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 2771 .vhost_get_config = vhost_user_get_config, 2772 .vhost_set_config = vhost_user_set_config, 2773 .vhost_crypto_create_session = vhost_user_crypto_create_session, 2774 .vhost_crypto_close_session = vhost_user_crypto_close_session, 2775 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, 2776 .vhost_get_inflight_fd = vhost_user_get_inflight_fd, 2777 .vhost_set_inflight_fd = vhost_user_set_inflight_fd, 2778 .vhost_dev_start = vhost_user_dev_start, 2779 .vhost_reset_status = vhost_user_reset_status, 2780 }; 2781