1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/vhost.h" 14 #include "hw/virtio/vhost-user.h" 15 #include "hw/virtio/vhost-backend.h" 16 #include "hw/virtio/virtio.h" 17 #include "hw/virtio/virtio-net.h" 18 #include "chardev/char-fe.h" 19 #include "io/channel-socket.h" 20 #include "sysemu/kvm.h" 21 #include "qemu/error-report.h" 22 #include "qemu/main-loop.h" 23 #include "qemu/sockets.h" 24 #include "sysemu/runstate.h" 25 #include "sysemu/cryptodev.h" 26 #include "migration/migration.h" 27 #include "migration/postcopy-ram.h" 28 #include "trace.h" 29 #include "exec/ramblock.h" 30 31 #include <sys/ioctl.h> 32 #include <sys/socket.h> 33 #include <sys/un.h> 34 35 #include "standard-headers/linux/vhost_types.h" 36 37 #ifdef CONFIG_LINUX 38 #include <linux/userfaultfd.h> 39 #endif 40 41 #define VHOST_MEMORY_BASELINE_NREGIONS 8 42 #define VHOST_USER_F_PROTOCOL_FEATURES 30 43 #define VHOST_USER_BACKEND_MAX_FDS 8 44 45 #if defined(TARGET_PPC) || defined(TARGET_PPC64) 46 #include "hw/ppc/spapr.h" 47 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS 48 49 #else 50 #define VHOST_USER_MAX_RAM_SLOTS 512 51 #endif 52 53 /* 54 * Maximum size of virtio device config space 55 */ 56 #define VHOST_USER_MAX_CONFIG_SIZE 256 57 58 enum VhostUserProtocolFeature { 59 VHOST_USER_PROTOCOL_F_MQ = 0, 60 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 61 VHOST_USER_PROTOCOL_F_RARP = 2, 62 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, 63 VHOST_USER_PROTOCOL_F_NET_MTU = 4, 64 VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5, 65 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, 66 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, 67 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, 68 VHOST_USER_PROTOCOL_F_CONFIG = 9, 69 VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10, 70 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, 71 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, 72 VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, 73 /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */ 74 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, 75 VHOST_USER_PROTOCOL_F_STATUS = 16, 76 VHOST_USER_PROTOCOL_F_MAX 77 }; 78 79 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 80 81 typedef enum VhostUserRequest { 82 VHOST_USER_NONE = 0, 83 VHOST_USER_GET_FEATURES = 1, 84 VHOST_USER_SET_FEATURES = 2, 85 VHOST_USER_SET_OWNER = 3, 86 VHOST_USER_RESET_OWNER = 4, 87 VHOST_USER_SET_MEM_TABLE = 5, 88 VHOST_USER_SET_LOG_BASE = 6, 89 VHOST_USER_SET_LOG_FD = 7, 90 VHOST_USER_SET_VRING_NUM = 8, 91 VHOST_USER_SET_VRING_ADDR = 9, 92 VHOST_USER_SET_VRING_BASE = 10, 93 VHOST_USER_GET_VRING_BASE = 11, 94 VHOST_USER_SET_VRING_KICK = 12, 95 VHOST_USER_SET_VRING_CALL = 13, 96 VHOST_USER_SET_VRING_ERR = 14, 97 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 98 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 99 VHOST_USER_GET_QUEUE_NUM = 17, 100 VHOST_USER_SET_VRING_ENABLE = 18, 101 VHOST_USER_SEND_RARP = 19, 102 VHOST_USER_NET_SET_MTU = 20, 103 VHOST_USER_SET_BACKEND_REQ_FD = 21, 104 VHOST_USER_IOTLB_MSG = 22, 105 VHOST_USER_SET_VRING_ENDIAN = 23, 106 VHOST_USER_GET_CONFIG = 24, 107 VHOST_USER_SET_CONFIG = 25, 108 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 109 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 110 VHOST_USER_POSTCOPY_ADVISE = 28, 111 VHOST_USER_POSTCOPY_LISTEN = 29, 112 VHOST_USER_POSTCOPY_END = 30, 113 VHOST_USER_GET_INFLIGHT_FD = 31, 114 VHOST_USER_SET_INFLIGHT_FD = 32, 115 VHOST_USER_GPU_SET_SOCKET = 33, 116 VHOST_USER_RESET_DEVICE = 34, 117 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */ 118 VHOST_USER_GET_MAX_MEM_SLOTS = 36, 119 VHOST_USER_ADD_MEM_REG = 37, 120 VHOST_USER_REM_MEM_REG = 38, 121 VHOST_USER_SET_STATUS = 39, 122 VHOST_USER_GET_STATUS = 40, 123 VHOST_USER_MAX 124 } VhostUserRequest; 125 126 typedef enum VhostUserSlaveRequest { 127 VHOST_USER_BACKEND_NONE = 0, 128 VHOST_USER_BACKEND_IOTLB_MSG = 1, 129 VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2, 130 VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3, 131 VHOST_USER_BACKEND_MAX 132 } VhostUserSlaveRequest; 133 134 typedef struct VhostUserMemoryRegion { 135 uint64_t guest_phys_addr; 136 uint64_t memory_size; 137 uint64_t userspace_addr; 138 uint64_t mmap_offset; 139 } VhostUserMemoryRegion; 140 141 typedef struct VhostUserMemory { 142 uint32_t nregions; 143 uint32_t padding; 144 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; 145 } VhostUserMemory; 146 147 typedef struct VhostUserMemRegMsg { 148 uint64_t padding; 149 VhostUserMemoryRegion region; 150 } VhostUserMemRegMsg; 151 152 typedef struct VhostUserLog { 153 uint64_t mmap_size; 154 uint64_t mmap_offset; 155 } VhostUserLog; 156 157 typedef struct VhostUserConfig { 158 uint32_t offset; 159 uint32_t size; 160 uint32_t flags; 161 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 162 } VhostUserConfig; 163 164 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 165 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 166 167 typedef struct VhostUserCryptoSession { 168 /* session id for success, -1 on errors */ 169 int64_t session_id; 170 CryptoDevBackendSymSessionInfo session_setup_data; 171 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 172 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 173 } VhostUserCryptoSession; 174 175 static VhostUserConfig c __attribute__ ((unused)); 176 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 177 + sizeof(c.size) \ 178 + sizeof(c.flags)) 179 180 typedef struct VhostUserVringArea { 181 uint64_t u64; 182 uint64_t size; 183 uint64_t offset; 184 } VhostUserVringArea; 185 186 typedef struct VhostUserInflight { 187 uint64_t mmap_size; 188 uint64_t mmap_offset; 189 uint16_t num_queues; 190 uint16_t queue_size; 191 } VhostUserInflight; 192 193 typedef struct { 194 VhostUserRequest request; 195 196 #define VHOST_USER_VERSION_MASK (0x3) 197 #define VHOST_USER_REPLY_MASK (0x1 << 2) 198 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 199 uint32_t flags; 200 uint32_t size; /* the following payload size */ 201 } QEMU_PACKED VhostUserHeader; 202 203 typedef union { 204 #define VHOST_USER_VRING_IDX_MASK (0xff) 205 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) 206 uint64_t u64; 207 struct vhost_vring_state state; 208 struct vhost_vring_addr addr; 209 VhostUserMemory memory; 210 VhostUserMemRegMsg mem_reg; 211 VhostUserLog log; 212 struct vhost_iotlb_msg iotlb; 213 VhostUserConfig config; 214 VhostUserCryptoSession session; 215 VhostUserVringArea area; 216 VhostUserInflight inflight; 217 } VhostUserPayload; 218 219 typedef struct VhostUserMsg { 220 VhostUserHeader hdr; 221 VhostUserPayload payload; 222 } QEMU_PACKED VhostUserMsg; 223 224 static VhostUserMsg m __attribute__ ((unused)); 225 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 226 227 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 228 229 /* The version of the protocol we support */ 230 #define VHOST_USER_VERSION (0x1) 231 232 struct vhost_user { 233 struct vhost_dev *dev; 234 /* Shared between vhost devs of the same virtio device */ 235 VhostUserState *user; 236 QIOChannel *slave_ioc; 237 GSource *slave_src; 238 NotifierWithReturn postcopy_notifier; 239 struct PostCopyFD postcopy_fd; 240 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; 241 /* Length of the region_rb and region_rb_offset arrays */ 242 size_t region_rb_len; 243 /* RAMBlock associated with a given region */ 244 RAMBlock **region_rb; 245 /* 246 * The offset from the start of the RAMBlock to the start of the 247 * vhost region. 248 */ 249 ram_addr_t *region_rb_offset; 250 251 /* True once we've entered postcopy_listen */ 252 bool postcopy_listen; 253 254 /* Our current regions */ 255 int num_shadow_regions; 256 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS]; 257 }; 258 259 struct scrub_regions { 260 struct vhost_memory_region *region; 261 int reg_idx; 262 int fd_idx; 263 }; 264 265 static bool ioeventfd_enabled(void) 266 { 267 return !kvm_enabled() || kvm_eventfds_enabled(); 268 } 269 270 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) 271 { 272 struct vhost_user *u = dev->opaque; 273 CharBackend *chr = u->user->chr; 274 uint8_t *p = (uint8_t *) msg; 275 int r, size = VHOST_USER_HDR_SIZE; 276 277 r = qemu_chr_fe_read_all(chr, p, size); 278 if (r != size) { 279 int saved_errno = errno; 280 error_report("Failed to read msg header. Read %d instead of %d." 281 " Original request %d.", r, size, msg->hdr.request); 282 return r < 0 ? -saved_errno : -EIO; 283 } 284 285 /* validate received flags */ 286 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 287 error_report("Failed to read msg header." 288 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 289 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 290 return -EPROTO; 291 } 292 293 trace_vhost_user_read(msg->hdr.request, msg->hdr.flags); 294 295 return 0; 296 } 297 298 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 299 { 300 struct vhost_user *u = dev->opaque; 301 CharBackend *chr = u->user->chr; 302 uint8_t *p = (uint8_t *) msg; 303 int r, size; 304 305 r = vhost_user_read_header(dev, msg); 306 if (r < 0) { 307 return r; 308 } 309 310 /* validate message size is sane */ 311 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 312 error_report("Failed to read msg header." 313 " Size %d exceeds the maximum %zu.", msg->hdr.size, 314 VHOST_USER_PAYLOAD_SIZE); 315 return -EPROTO; 316 } 317 318 if (msg->hdr.size) { 319 p += VHOST_USER_HDR_SIZE; 320 size = msg->hdr.size; 321 r = qemu_chr_fe_read_all(chr, p, size); 322 if (r != size) { 323 int saved_errno = errno; 324 error_report("Failed to read msg payload." 325 " Read %d instead of %d.", r, msg->hdr.size); 326 return r < 0 ? -saved_errno : -EIO; 327 } 328 } 329 330 return 0; 331 } 332 333 static int process_message_reply(struct vhost_dev *dev, 334 const VhostUserMsg *msg) 335 { 336 int ret; 337 VhostUserMsg msg_reply; 338 339 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 340 return 0; 341 } 342 343 ret = vhost_user_read(dev, &msg_reply); 344 if (ret < 0) { 345 return ret; 346 } 347 348 if (msg_reply.hdr.request != msg->hdr.request) { 349 error_report("Received unexpected msg type. " 350 "Expected %d received %d", 351 msg->hdr.request, msg_reply.hdr.request); 352 return -EPROTO; 353 } 354 355 return msg_reply.payload.u64 ? -EIO : 0; 356 } 357 358 static bool vhost_user_one_time_request(VhostUserRequest request) 359 { 360 switch (request) { 361 case VHOST_USER_SET_OWNER: 362 case VHOST_USER_RESET_OWNER: 363 case VHOST_USER_SET_MEM_TABLE: 364 case VHOST_USER_GET_QUEUE_NUM: 365 case VHOST_USER_NET_SET_MTU: 366 case VHOST_USER_ADD_MEM_REG: 367 case VHOST_USER_REM_MEM_REG: 368 return true; 369 default: 370 return false; 371 } 372 } 373 374 /* most non-init callers ignore the error */ 375 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 376 int *fds, int fd_num) 377 { 378 struct vhost_user *u = dev->opaque; 379 CharBackend *chr = u->user->chr; 380 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 381 382 /* 383 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 384 * we just need send it once in the first time. For later such 385 * request, we just ignore it. 386 */ 387 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) { 388 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 389 return 0; 390 } 391 392 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 393 error_report("Failed to set msg fds."); 394 return -EINVAL; 395 } 396 397 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 398 if (ret != size) { 399 int saved_errno = errno; 400 error_report("Failed to write msg." 401 " Wrote %d instead of %d.", ret, size); 402 return ret < 0 ? -saved_errno : -EIO; 403 } 404 405 trace_vhost_user_write(msg->hdr.request, msg->hdr.flags); 406 407 return 0; 408 } 409 410 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd) 411 { 412 VhostUserMsg msg = { 413 .hdr.request = VHOST_USER_GPU_SET_SOCKET, 414 .hdr.flags = VHOST_USER_VERSION, 415 }; 416 417 return vhost_user_write(dev, &msg, &fd, 1); 418 } 419 420 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 421 struct vhost_log *log) 422 { 423 int fds[VHOST_USER_MAX_RAM_SLOTS]; 424 size_t fd_num = 0; 425 bool shmfd = virtio_has_feature(dev->protocol_features, 426 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 427 int ret; 428 VhostUserMsg msg = { 429 .hdr.request = VHOST_USER_SET_LOG_BASE, 430 .hdr.flags = VHOST_USER_VERSION, 431 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 432 .payload.log.mmap_offset = 0, 433 .hdr.size = sizeof(msg.payload.log), 434 }; 435 436 /* Send only once with first queue pair */ 437 if (dev->vq_index != 0) { 438 return 0; 439 } 440 441 if (shmfd && log->fd != -1) { 442 fds[fd_num++] = log->fd; 443 } 444 445 ret = vhost_user_write(dev, &msg, fds, fd_num); 446 if (ret < 0) { 447 return ret; 448 } 449 450 if (shmfd) { 451 msg.hdr.size = 0; 452 ret = vhost_user_read(dev, &msg); 453 if (ret < 0) { 454 return ret; 455 } 456 457 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 458 error_report("Received unexpected msg type. " 459 "Expected %d received %d", 460 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 461 return -EPROTO; 462 } 463 } 464 465 return 0; 466 } 467 468 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset, 469 int *fd) 470 { 471 MemoryRegion *mr; 472 473 assert((uintptr_t)addr == addr); 474 mr = memory_region_from_host((void *)(uintptr_t)addr, offset); 475 *fd = memory_region_get_fd(mr); 476 *offset += mr->ram_block->fd_offset; 477 478 return mr; 479 } 480 481 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst, 482 struct vhost_memory_region *src, 483 uint64_t mmap_offset) 484 { 485 assert(src != NULL && dst != NULL); 486 dst->userspace_addr = src->userspace_addr; 487 dst->memory_size = src->memory_size; 488 dst->guest_phys_addr = src->guest_phys_addr; 489 dst->mmap_offset = mmap_offset; 490 } 491 492 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, 493 struct vhost_dev *dev, 494 VhostUserMsg *msg, 495 int *fds, size_t *fd_num, 496 bool track_ramblocks) 497 { 498 int i, fd; 499 ram_addr_t offset; 500 MemoryRegion *mr; 501 struct vhost_memory_region *reg; 502 VhostUserMemoryRegion region_buffer; 503 504 msg->hdr.request = VHOST_USER_SET_MEM_TABLE; 505 506 for (i = 0; i < dev->mem->nregions; ++i) { 507 reg = dev->mem->regions + i; 508 509 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 510 if (fd > 0) { 511 if (track_ramblocks) { 512 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS); 513 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name, 514 reg->memory_size, 515 reg->guest_phys_addr, 516 reg->userspace_addr, 517 offset); 518 u->region_rb_offset[i] = offset; 519 u->region_rb[i] = mr->ram_block; 520 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) { 521 error_report("Failed preparing vhost-user memory table msg"); 522 return -ENOBUFS; 523 } 524 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 525 msg->payload.memory.regions[*fd_num] = region_buffer; 526 fds[(*fd_num)++] = fd; 527 } else if (track_ramblocks) { 528 u->region_rb_offset[i] = 0; 529 u->region_rb[i] = NULL; 530 } 531 } 532 533 msg->payload.memory.nregions = *fd_num; 534 535 if (!*fd_num) { 536 error_report("Failed initializing vhost-user memory map, " 537 "consider using -object memory-backend-file share=on"); 538 return -EINVAL; 539 } 540 541 msg->hdr.size = sizeof(msg->payload.memory.nregions); 542 msg->hdr.size += sizeof(msg->payload.memory.padding); 543 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion); 544 545 return 0; 546 } 547 548 static inline bool reg_equal(struct vhost_memory_region *shadow_reg, 549 struct vhost_memory_region *vdev_reg) 550 { 551 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr && 552 shadow_reg->userspace_addr == vdev_reg->userspace_addr && 553 shadow_reg->memory_size == vdev_reg->memory_size; 554 } 555 556 static void scrub_shadow_regions(struct vhost_dev *dev, 557 struct scrub_regions *add_reg, 558 int *nr_add_reg, 559 struct scrub_regions *rem_reg, 560 int *nr_rem_reg, uint64_t *shadow_pcb, 561 bool track_ramblocks) 562 { 563 struct vhost_user *u = dev->opaque; 564 bool found[VHOST_USER_MAX_RAM_SLOTS] = {}; 565 struct vhost_memory_region *reg, *shadow_reg; 566 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0; 567 ram_addr_t offset; 568 MemoryRegion *mr; 569 bool matching; 570 571 /* 572 * Find memory regions present in our shadow state which are not in 573 * the device's current memory state. 574 * 575 * Mark regions in both the shadow and device state as "found". 576 */ 577 for (i = 0; i < u->num_shadow_regions; i++) { 578 shadow_reg = &u->shadow_regions[i]; 579 matching = false; 580 581 for (j = 0; j < dev->mem->nregions; j++) { 582 reg = &dev->mem->regions[j]; 583 584 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 585 586 if (reg_equal(shadow_reg, reg)) { 587 matching = true; 588 found[j] = true; 589 if (track_ramblocks) { 590 /* 591 * Reset postcopy client bases, region_rb, and 592 * region_rb_offset in case regions are removed. 593 */ 594 if (fd > 0) { 595 u->region_rb_offset[j] = offset; 596 u->region_rb[j] = mr->ram_block; 597 shadow_pcb[j] = u->postcopy_client_bases[i]; 598 } else { 599 u->region_rb_offset[j] = 0; 600 u->region_rb[j] = NULL; 601 } 602 } 603 break; 604 } 605 } 606 607 /* 608 * If the region was not found in the current device memory state 609 * create an entry for it in the removed list. 610 */ 611 if (!matching) { 612 rem_reg[rm_idx].region = shadow_reg; 613 rem_reg[rm_idx++].reg_idx = i; 614 } 615 } 616 617 /* 618 * For regions not marked "found", create entries in the added list. 619 * 620 * Note their indexes in the device memory state and the indexes of their 621 * file descriptors. 622 */ 623 for (i = 0; i < dev->mem->nregions; i++) { 624 reg = &dev->mem->regions[i]; 625 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 626 if (fd > 0) { 627 ++fd_num; 628 } 629 630 /* 631 * If the region was in both the shadow and device state we don't 632 * need to send a VHOST_USER_ADD_MEM_REG message for it. 633 */ 634 if (found[i]) { 635 continue; 636 } 637 638 add_reg[add_idx].region = reg; 639 add_reg[add_idx].reg_idx = i; 640 add_reg[add_idx++].fd_idx = fd_num; 641 } 642 *nr_rem_reg = rm_idx; 643 *nr_add_reg = add_idx; 644 645 return; 646 } 647 648 static int send_remove_regions(struct vhost_dev *dev, 649 struct scrub_regions *remove_reg, 650 int nr_rem_reg, VhostUserMsg *msg, 651 bool reply_supported) 652 { 653 struct vhost_user *u = dev->opaque; 654 struct vhost_memory_region *shadow_reg; 655 int i, fd, shadow_reg_idx, ret; 656 ram_addr_t offset; 657 VhostUserMemoryRegion region_buffer; 658 659 /* 660 * The regions in remove_reg appear in the same order they do in the 661 * shadow table. Therefore we can minimize memory copies by iterating 662 * through remove_reg backwards. 663 */ 664 for (i = nr_rem_reg - 1; i >= 0; i--) { 665 shadow_reg = remove_reg[i].region; 666 shadow_reg_idx = remove_reg[i].reg_idx; 667 668 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd); 669 670 if (fd > 0) { 671 msg->hdr.request = VHOST_USER_REM_MEM_REG; 672 vhost_user_fill_msg_region(®ion_buffer, shadow_reg, 0); 673 msg->payload.mem_reg.region = region_buffer; 674 675 ret = vhost_user_write(dev, msg, NULL, 0); 676 if (ret < 0) { 677 return ret; 678 } 679 680 if (reply_supported) { 681 ret = process_message_reply(dev, msg); 682 if (ret) { 683 return ret; 684 } 685 } 686 } 687 688 /* 689 * At this point we know the backend has unmapped the region. It is now 690 * safe to remove it from the shadow table. 691 */ 692 memmove(&u->shadow_regions[shadow_reg_idx], 693 &u->shadow_regions[shadow_reg_idx + 1], 694 sizeof(struct vhost_memory_region) * 695 (u->num_shadow_regions - shadow_reg_idx - 1)); 696 u->num_shadow_regions--; 697 } 698 699 return 0; 700 } 701 702 static int send_add_regions(struct vhost_dev *dev, 703 struct scrub_regions *add_reg, int nr_add_reg, 704 VhostUserMsg *msg, uint64_t *shadow_pcb, 705 bool reply_supported, bool track_ramblocks) 706 { 707 struct vhost_user *u = dev->opaque; 708 int i, fd, ret, reg_idx, reg_fd_idx; 709 struct vhost_memory_region *reg; 710 MemoryRegion *mr; 711 ram_addr_t offset; 712 VhostUserMsg msg_reply; 713 VhostUserMemoryRegion region_buffer; 714 715 for (i = 0; i < nr_add_reg; i++) { 716 reg = add_reg[i].region; 717 reg_idx = add_reg[i].reg_idx; 718 reg_fd_idx = add_reg[i].fd_idx; 719 720 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 721 722 if (fd > 0) { 723 if (track_ramblocks) { 724 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name, 725 reg->memory_size, 726 reg->guest_phys_addr, 727 reg->userspace_addr, 728 offset); 729 u->region_rb_offset[reg_idx] = offset; 730 u->region_rb[reg_idx] = mr->ram_block; 731 } 732 msg->hdr.request = VHOST_USER_ADD_MEM_REG; 733 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 734 msg->payload.mem_reg.region = region_buffer; 735 736 ret = vhost_user_write(dev, msg, &fd, 1); 737 if (ret < 0) { 738 return ret; 739 } 740 741 if (track_ramblocks) { 742 uint64_t reply_gpa; 743 744 ret = vhost_user_read(dev, &msg_reply); 745 if (ret < 0) { 746 return ret; 747 } 748 749 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr; 750 751 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) { 752 error_report("%s: Received unexpected msg type." 753 "Expected %d received %d", __func__, 754 VHOST_USER_ADD_MEM_REG, 755 msg_reply.hdr.request); 756 return -EPROTO; 757 } 758 759 /* 760 * We're using the same structure, just reusing one of the 761 * fields, so it should be the same size. 762 */ 763 if (msg_reply.hdr.size != msg->hdr.size) { 764 error_report("%s: Unexpected size for postcopy reply " 765 "%d vs %d", __func__, msg_reply.hdr.size, 766 msg->hdr.size); 767 return -EPROTO; 768 } 769 770 /* Get the postcopy client base from the backend's reply. */ 771 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) { 772 shadow_pcb[reg_idx] = 773 msg_reply.payload.mem_reg.region.userspace_addr; 774 trace_vhost_user_set_mem_table_postcopy( 775 msg_reply.payload.mem_reg.region.userspace_addr, 776 msg->payload.mem_reg.region.userspace_addr, 777 reg_fd_idx, reg_idx); 778 } else { 779 error_report("%s: invalid postcopy reply for region. " 780 "Got guest physical address %" PRIX64 ", expected " 781 "%" PRIX64, __func__, reply_gpa, 782 dev->mem->regions[reg_idx].guest_phys_addr); 783 return -EPROTO; 784 } 785 } else if (reply_supported) { 786 ret = process_message_reply(dev, msg); 787 if (ret) { 788 return ret; 789 } 790 } 791 } else if (track_ramblocks) { 792 u->region_rb_offset[reg_idx] = 0; 793 u->region_rb[reg_idx] = NULL; 794 } 795 796 /* 797 * At this point, we know the backend has mapped in the new 798 * region, if the region has a valid file descriptor. 799 * 800 * The region should now be added to the shadow table. 801 */ 802 u->shadow_regions[u->num_shadow_regions].guest_phys_addr = 803 reg->guest_phys_addr; 804 u->shadow_regions[u->num_shadow_regions].userspace_addr = 805 reg->userspace_addr; 806 u->shadow_regions[u->num_shadow_regions].memory_size = 807 reg->memory_size; 808 u->num_shadow_regions++; 809 } 810 811 return 0; 812 } 813 814 static int vhost_user_add_remove_regions(struct vhost_dev *dev, 815 VhostUserMsg *msg, 816 bool reply_supported, 817 bool track_ramblocks) 818 { 819 struct vhost_user *u = dev->opaque; 820 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS]; 821 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS]; 822 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {}; 823 int nr_add_reg, nr_rem_reg; 824 int ret; 825 826 msg->hdr.size = sizeof(msg->payload.mem_reg); 827 828 /* Find the regions which need to be removed or added. */ 829 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg, 830 shadow_pcb, track_ramblocks); 831 832 if (nr_rem_reg) { 833 ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg, 834 reply_supported); 835 if (ret < 0) { 836 goto err; 837 } 838 } 839 840 if (nr_add_reg) { 841 ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb, 842 reply_supported, track_ramblocks); 843 if (ret < 0) { 844 goto err; 845 } 846 } 847 848 if (track_ramblocks) { 849 memcpy(u->postcopy_client_bases, shadow_pcb, 850 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 851 /* 852 * Now we've registered this with the postcopy code, we ack to the 853 * client, because now we're in the position to be able to deal with 854 * any faults it generates. 855 */ 856 /* TODO: Use this for failure cases as well with a bad value. */ 857 msg->hdr.size = sizeof(msg->payload.u64); 858 msg->payload.u64 = 0; /* OK */ 859 860 ret = vhost_user_write(dev, msg, NULL, 0); 861 if (ret < 0) { 862 return ret; 863 } 864 } 865 866 return 0; 867 868 err: 869 if (track_ramblocks) { 870 memcpy(u->postcopy_client_bases, shadow_pcb, 871 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 872 } 873 874 return ret; 875 } 876 877 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 878 struct vhost_memory *mem, 879 bool reply_supported, 880 bool config_mem_slots) 881 { 882 struct vhost_user *u = dev->opaque; 883 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 884 size_t fd_num = 0; 885 VhostUserMsg msg_reply; 886 int region_i, msg_i; 887 int ret; 888 889 VhostUserMsg msg = { 890 .hdr.flags = VHOST_USER_VERSION, 891 }; 892 893 if (u->region_rb_len < dev->mem->nregions) { 894 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 895 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 896 dev->mem->nregions); 897 memset(&(u->region_rb[u->region_rb_len]), '\0', 898 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 899 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 900 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 901 u->region_rb_len = dev->mem->nregions; 902 } 903 904 if (config_mem_slots) { 905 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true); 906 if (ret < 0) { 907 return ret; 908 } 909 } else { 910 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 911 true); 912 if (ret < 0) { 913 return ret; 914 } 915 916 ret = vhost_user_write(dev, &msg, fds, fd_num); 917 if (ret < 0) { 918 return ret; 919 } 920 921 ret = vhost_user_read(dev, &msg_reply); 922 if (ret < 0) { 923 return ret; 924 } 925 926 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 927 error_report("%s: Received unexpected msg type." 928 "Expected %d received %d", __func__, 929 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 930 return -EPROTO; 931 } 932 933 /* 934 * We're using the same structure, just reusing one of the 935 * fields, so it should be the same size. 936 */ 937 if (msg_reply.hdr.size != msg.hdr.size) { 938 error_report("%s: Unexpected size for postcopy reply " 939 "%d vs %d", __func__, msg_reply.hdr.size, 940 msg.hdr.size); 941 return -EPROTO; 942 } 943 944 memset(u->postcopy_client_bases, 0, 945 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 946 947 /* 948 * They're in the same order as the regions that were sent 949 * but some of the regions were skipped (above) if they 950 * didn't have fd's 951 */ 952 for (msg_i = 0, region_i = 0; 953 region_i < dev->mem->nregions; 954 region_i++) { 955 if (msg_i < fd_num && 956 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 957 dev->mem->regions[region_i].guest_phys_addr) { 958 u->postcopy_client_bases[region_i] = 959 msg_reply.payload.memory.regions[msg_i].userspace_addr; 960 trace_vhost_user_set_mem_table_postcopy( 961 msg_reply.payload.memory.regions[msg_i].userspace_addr, 962 msg.payload.memory.regions[msg_i].userspace_addr, 963 msg_i, region_i); 964 msg_i++; 965 } 966 } 967 if (msg_i != fd_num) { 968 error_report("%s: postcopy reply not fully consumed " 969 "%d vs %zd", 970 __func__, msg_i, fd_num); 971 return -EIO; 972 } 973 974 /* 975 * Now we've registered this with the postcopy code, we ack to the 976 * client, because now we're in the position to be able to deal 977 * with any faults it generates. 978 */ 979 /* TODO: Use this for failure cases as well with a bad value. */ 980 msg.hdr.size = sizeof(msg.payload.u64); 981 msg.payload.u64 = 0; /* OK */ 982 ret = vhost_user_write(dev, &msg, NULL, 0); 983 if (ret < 0) { 984 return ret; 985 } 986 } 987 988 return 0; 989 } 990 991 static int vhost_user_set_mem_table(struct vhost_dev *dev, 992 struct vhost_memory *mem) 993 { 994 struct vhost_user *u = dev->opaque; 995 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 996 size_t fd_num = 0; 997 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 998 bool reply_supported = virtio_has_feature(dev->protocol_features, 999 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1000 bool config_mem_slots = 1001 virtio_has_feature(dev->protocol_features, 1002 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); 1003 int ret; 1004 1005 if (do_postcopy) { 1006 /* 1007 * Postcopy has enough differences that it's best done in it's own 1008 * version 1009 */ 1010 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported, 1011 config_mem_slots); 1012 } 1013 1014 VhostUserMsg msg = { 1015 .hdr.flags = VHOST_USER_VERSION, 1016 }; 1017 1018 if (reply_supported) { 1019 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1020 } 1021 1022 if (config_mem_slots) { 1023 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false); 1024 if (ret < 0) { 1025 return ret; 1026 } 1027 } else { 1028 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 1029 false); 1030 if (ret < 0) { 1031 return ret; 1032 } 1033 1034 ret = vhost_user_write(dev, &msg, fds, fd_num); 1035 if (ret < 0) { 1036 return ret; 1037 } 1038 1039 if (reply_supported) { 1040 return process_message_reply(dev, &msg); 1041 } 1042 } 1043 1044 return 0; 1045 } 1046 1047 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 1048 struct vhost_vring_state *ring) 1049 { 1050 bool cross_endian = virtio_has_feature(dev->protocol_features, 1051 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 1052 VhostUserMsg msg = { 1053 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 1054 .hdr.flags = VHOST_USER_VERSION, 1055 .payload.state = *ring, 1056 .hdr.size = sizeof(msg.payload.state), 1057 }; 1058 1059 if (!cross_endian) { 1060 error_report("vhost-user trying to send unhandled ioctl"); 1061 return -ENOTSUP; 1062 } 1063 1064 return vhost_user_write(dev, &msg, NULL, 0); 1065 } 1066 1067 static int vhost_set_vring(struct vhost_dev *dev, 1068 unsigned long int request, 1069 struct vhost_vring_state *ring) 1070 { 1071 VhostUserMsg msg = { 1072 .hdr.request = request, 1073 .hdr.flags = VHOST_USER_VERSION, 1074 .payload.state = *ring, 1075 .hdr.size = sizeof(msg.payload.state), 1076 }; 1077 1078 return vhost_user_write(dev, &msg, NULL, 0); 1079 } 1080 1081 static int vhost_user_set_vring_num(struct vhost_dev *dev, 1082 struct vhost_vring_state *ring) 1083 { 1084 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 1085 } 1086 1087 static void vhost_user_host_notifier_free(VhostUserHostNotifier *n) 1088 { 1089 assert(n && n->unmap_addr); 1090 munmap(n->unmap_addr, qemu_real_host_page_size()); 1091 n->unmap_addr = NULL; 1092 } 1093 1094 /* 1095 * clean-up function for notifier, will finally free the structure 1096 * under rcu. 1097 */ 1098 static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n, 1099 VirtIODevice *vdev) 1100 { 1101 if (n->addr) { 1102 if (vdev) { 1103 virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false); 1104 } 1105 assert(!n->unmap_addr); 1106 n->unmap_addr = n->addr; 1107 n->addr = NULL; 1108 call_rcu(n, vhost_user_host_notifier_free, rcu); 1109 } 1110 } 1111 1112 static int vhost_user_set_vring_base(struct vhost_dev *dev, 1113 struct vhost_vring_state *ring) 1114 { 1115 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 1116 } 1117 1118 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 1119 { 1120 int i; 1121 1122 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1123 return -EINVAL; 1124 } 1125 1126 for (i = 0; i < dev->nvqs; ++i) { 1127 int ret; 1128 struct vhost_vring_state state = { 1129 .index = dev->vq_index + i, 1130 .num = enable, 1131 }; 1132 1133 ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 1134 if (ret < 0) { 1135 /* 1136 * Restoring the previous state is likely infeasible, as well as 1137 * proceeding regardless the error, so just bail out and hope for 1138 * the device-level recovery. 1139 */ 1140 return ret; 1141 } 1142 } 1143 1144 return 0; 1145 } 1146 1147 static VhostUserHostNotifier *fetch_notifier(VhostUserState *u, 1148 int idx) 1149 { 1150 if (idx >= u->notifiers->len) { 1151 return NULL; 1152 } 1153 return g_ptr_array_index(u->notifiers, idx); 1154 } 1155 1156 static int vhost_user_get_vring_base(struct vhost_dev *dev, 1157 struct vhost_vring_state *ring) 1158 { 1159 int ret; 1160 VhostUserMsg msg = { 1161 .hdr.request = VHOST_USER_GET_VRING_BASE, 1162 .hdr.flags = VHOST_USER_VERSION, 1163 .payload.state = *ring, 1164 .hdr.size = sizeof(msg.payload.state), 1165 }; 1166 struct vhost_user *u = dev->opaque; 1167 1168 VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index); 1169 if (n) { 1170 vhost_user_host_notifier_remove(n, dev->vdev); 1171 } 1172 1173 ret = vhost_user_write(dev, &msg, NULL, 0); 1174 if (ret < 0) { 1175 return ret; 1176 } 1177 1178 ret = vhost_user_read(dev, &msg); 1179 if (ret < 0) { 1180 return ret; 1181 } 1182 1183 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 1184 error_report("Received unexpected msg type. Expected %d received %d", 1185 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 1186 return -EPROTO; 1187 } 1188 1189 if (msg.hdr.size != sizeof(msg.payload.state)) { 1190 error_report("Received bad msg size."); 1191 return -EPROTO; 1192 } 1193 1194 *ring = msg.payload.state; 1195 1196 return 0; 1197 } 1198 1199 static int vhost_set_vring_file(struct vhost_dev *dev, 1200 VhostUserRequest request, 1201 struct vhost_vring_file *file) 1202 { 1203 int fds[VHOST_USER_MAX_RAM_SLOTS]; 1204 size_t fd_num = 0; 1205 VhostUserMsg msg = { 1206 .hdr.request = request, 1207 .hdr.flags = VHOST_USER_VERSION, 1208 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 1209 .hdr.size = sizeof(msg.payload.u64), 1210 }; 1211 1212 if (ioeventfd_enabled() && file->fd > 0) { 1213 fds[fd_num++] = file->fd; 1214 } else { 1215 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 1216 } 1217 1218 return vhost_user_write(dev, &msg, fds, fd_num); 1219 } 1220 1221 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 1222 struct vhost_vring_file *file) 1223 { 1224 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 1225 } 1226 1227 static int vhost_user_set_vring_call(struct vhost_dev *dev, 1228 struct vhost_vring_file *file) 1229 { 1230 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 1231 } 1232 1233 static int vhost_user_set_vring_err(struct vhost_dev *dev, 1234 struct vhost_vring_file *file) 1235 { 1236 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file); 1237 } 1238 1239 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 1240 { 1241 int ret; 1242 VhostUserMsg msg = { 1243 .hdr.request = request, 1244 .hdr.flags = VHOST_USER_VERSION, 1245 }; 1246 1247 if (vhost_user_one_time_request(request) && dev->vq_index != 0) { 1248 return 0; 1249 } 1250 1251 ret = vhost_user_write(dev, &msg, NULL, 0); 1252 if (ret < 0) { 1253 return ret; 1254 } 1255 1256 ret = vhost_user_read(dev, &msg); 1257 if (ret < 0) { 1258 return ret; 1259 } 1260 1261 if (msg.hdr.request != request) { 1262 error_report("Received unexpected msg type. Expected %d received %d", 1263 request, msg.hdr.request); 1264 return -EPROTO; 1265 } 1266 1267 if (msg.hdr.size != sizeof(msg.payload.u64)) { 1268 error_report("Received bad msg size."); 1269 return -EPROTO; 1270 } 1271 1272 *u64 = msg.payload.u64; 1273 1274 return 0; 1275 } 1276 1277 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 1278 { 1279 if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) { 1280 return -EPROTO; 1281 } 1282 1283 return 0; 1284 } 1285 1286 static int enforce_reply(struct vhost_dev *dev, 1287 const VhostUserMsg *msg) 1288 { 1289 uint64_t dummy; 1290 1291 if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1292 return process_message_reply(dev, msg); 1293 } 1294 1295 /* 1296 * We need to wait for a reply but the backend does not 1297 * support replies for the command we just sent. 1298 * Send VHOST_USER_GET_FEATURES which makes all backends 1299 * send a reply. 1300 */ 1301 return vhost_user_get_features(dev, &dummy); 1302 } 1303 1304 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 1305 struct vhost_vring_addr *addr) 1306 { 1307 int ret; 1308 VhostUserMsg msg = { 1309 .hdr.request = VHOST_USER_SET_VRING_ADDR, 1310 .hdr.flags = VHOST_USER_VERSION, 1311 .payload.addr = *addr, 1312 .hdr.size = sizeof(msg.payload.addr), 1313 }; 1314 1315 bool reply_supported = virtio_has_feature(dev->protocol_features, 1316 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1317 1318 /* 1319 * wait for a reply if logging is enabled to make sure 1320 * backend is actually logging changes 1321 */ 1322 bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); 1323 1324 if (reply_supported && wait_for_reply) { 1325 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1326 } 1327 1328 ret = vhost_user_write(dev, &msg, NULL, 0); 1329 if (ret < 0) { 1330 return ret; 1331 } 1332 1333 if (wait_for_reply) { 1334 return enforce_reply(dev, &msg); 1335 } 1336 1337 return 0; 1338 } 1339 1340 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64, 1341 bool wait_for_reply) 1342 { 1343 VhostUserMsg msg = { 1344 .hdr.request = request, 1345 .hdr.flags = VHOST_USER_VERSION, 1346 .payload.u64 = u64, 1347 .hdr.size = sizeof(msg.payload.u64), 1348 }; 1349 int ret; 1350 1351 if (wait_for_reply) { 1352 bool reply_supported = virtio_has_feature(dev->protocol_features, 1353 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1354 if (reply_supported) { 1355 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1356 } 1357 } 1358 1359 ret = vhost_user_write(dev, &msg, NULL, 0); 1360 if (ret < 0) { 1361 return ret; 1362 } 1363 1364 if (wait_for_reply) { 1365 return enforce_reply(dev, &msg); 1366 } 1367 1368 return 0; 1369 } 1370 1371 static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status) 1372 { 1373 return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false); 1374 } 1375 1376 static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status) 1377 { 1378 uint64_t value; 1379 int ret; 1380 1381 ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value); 1382 if (ret < 0) { 1383 return ret; 1384 } 1385 *status = value; 1386 1387 return 0; 1388 } 1389 1390 static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status) 1391 { 1392 uint8_t s; 1393 int ret; 1394 1395 ret = vhost_user_get_status(dev, &s); 1396 if (ret < 0) { 1397 return ret; 1398 } 1399 1400 if ((s & status) == status) { 1401 return 0; 1402 } 1403 s |= status; 1404 1405 return vhost_user_set_status(dev, s); 1406 } 1407 1408 static int vhost_user_set_features(struct vhost_dev *dev, 1409 uint64_t features) 1410 { 1411 /* 1412 * wait for a reply if logging is enabled to make sure 1413 * backend is actually logging changes 1414 */ 1415 bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); 1416 int ret; 1417 1418 /* 1419 * We need to include any extra backend only feature bits that 1420 * might be needed by our device. Currently this includes the 1421 * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol 1422 * features. 1423 */ 1424 ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, 1425 features | dev->backend_features, 1426 log_enabled); 1427 1428 if (virtio_has_feature(dev->protocol_features, 1429 VHOST_USER_PROTOCOL_F_STATUS)) { 1430 if (!ret) { 1431 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 1432 } 1433 } 1434 1435 return ret; 1436 } 1437 1438 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 1439 uint64_t features) 1440 { 1441 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features, 1442 false); 1443 } 1444 1445 static int vhost_user_set_owner(struct vhost_dev *dev) 1446 { 1447 VhostUserMsg msg = { 1448 .hdr.request = VHOST_USER_SET_OWNER, 1449 .hdr.flags = VHOST_USER_VERSION, 1450 }; 1451 1452 return vhost_user_write(dev, &msg, NULL, 0); 1453 } 1454 1455 static int vhost_user_get_max_memslots(struct vhost_dev *dev, 1456 uint64_t *max_memslots) 1457 { 1458 uint64_t backend_max_memslots; 1459 int err; 1460 1461 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS, 1462 &backend_max_memslots); 1463 if (err < 0) { 1464 return err; 1465 } 1466 1467 *max_memslots = backend_max_memslots; 1468 1469 return 0; 1470 } 1471 1472 static int vhost_user_reset_device(struct vhost_dev *dev) 1473 { 1474 VhostUserMsg msg = { 1475 .hdr.flags = VHOST_USER_VERSION, 1476 }; 1477 1478 msg.hdr.request = virtio_has_feature(dev->protocol_features, 1479 VHOST_USER_PROTOCOL_F_RESET_DEVICE) 1480 ? VHOST_USER_RESET_DEVICE 1481 : VHOST_USER_RESET_OWNER; 1482 1483 return vhost_user_write(dev, &msg, NULL, 0); 1484 } 1485 1486 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) 1487 { 1488 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1489 return -ENOSYS; 1490 } 1491 1492 return dev->config_ops->vhost_dev_config_notifier(dev); 1493 } 1494 1495 /* 1496 * Fetch or create the notifier for a given idx. Newly created 1497 * notifiers are added to the pointer array that tracks them. 1498 */ 1499 static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u, 1500 int idx) 1501 { 1502 VhostUserHostNotifier *n = NULL; 1503 if (idx >= u->notifiers->len) { 1504 g_ptr_array_set_size(u->notifiers, idx + 1); 1505 } 1506 1507 n = g_ptr_array_index(u->notifiers, idx); 1508 if (!n) { 1509 /* 1510 * In case notification arrive out-of-order, 1511 * make room for current index. 1512 */ 1513 g_ptr_array_remove_index(u->notifiers, idx); 1514 n = g_new0(VhostUserHostNotifier, 1); 1515 n->idx = idx; 1516 g_ptr_array_insert(u->notifiers, idx, n); 1517 trace_vhost_user_create_notifier(idx, n); 1518 } 1519 1520 return n; 1521 } 1522 1523 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, 1524 VhostUserVringArea *area, 1525 int fd) 1526 { 1527 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 1528 size_t page_size = qemu_real_host_page_size(); 1529 struct vhost_user *u = dev->opaque; 1530 VhostUserState *user = u->user; 1531 VirtIODevice *vdev = dev->vdev; 1532 VhostUserHostNotifier *n; 1533 void *addr; 1534 char *name; 1535 1536 if (!virtio_has_feature(dev->protocol_features, 1537 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 1538 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 1539 return -EINVAL; 1540 } 1541 1542 /* 1543 * Fetch notifier and invalidate any old data before setting up 1544 * new mapped address. 1545 */ 1546 n = fetch_or_create_notifier(user, queue_idx); 1547 vhost_user_host_notifier_remove(n, vdev); 1548 1549 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 1550 return 0; 1551 } 1552 1553 /* Sanity check. */ 1554 if (area->size != page_size) { 1555 return -EINVAL; 1556 } 1557 1558 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 1559 fd, area->offset); 1560 if (addr == MAP_FAILED) { 1561 return -EFAULT; 1562 } 1563 1564 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 1565 user, queue_idx); 1566 if (!n->mr.ram) { /* Don't init again after suspend. */ 1567 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 1568 page_size, addr); 1569 } else { 1570 n->mr.ram_block->host = addr; 1571 } 1572 g_free(name); 1573 1574 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 1575 object_unparent(OBJECT(&n->mr)); 1576 munmap(addr, page_size); 1577 return -ENXIO; 1578 } 1579 1580 n->addr = addr; 1581 1582 return 0; 1583 } 1584 1585 static void close_slave_channel(struct vhost_user *u) 1586 { 1587 g_source_destroy(u->slave_src); 1588 g_source_unref(u->slave_src); 1589 u->slave_src = NULL; 1590 object_unref(OBJECT(u->slave_ioc)); 1591 u->slave_ioc = NULL; 1592 } 1593 1594 static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, 1595 gpointer opaque) 1596 { 1597 struct vhost_dev *dev = opaque; 1598 struct vhost_user *u = dev->opaque; 1599 VhostUserHeader hdr = { 0, }; 1600 VhostUserPayload payload = { 0, }; 1601 Error *local_err = NULL; 1602 gboolean rc = G_SOURCE_CONTINUE; 1603 int ret = 0; 1604 struct iovec iov; 1605 g_autofree int *fd = NULL; 1606 size_t fdsize = 0; 1607 int i; 1608 1609 /* Read header */ 1610 iov.iov_base = &hdr; 1611 iov.iov_len = VHOST_USER_HDR_SIZE; 1612 1613 if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) { 1614 error_report_err(local_err); 1615 goto err; 1616 } 1617 1618 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 1619 error_report("Failed to read msg header." 1620 " Size %d exceeds the maximum %zu.", hdr.size, 1621 VHOST_USER_PAYLOAD_SIZE); 1622 goto err; 1623 } 1624 1625 /* Read payload */ 1626 if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) { 1627 error_report_err(local_err); 1628 goto err; 1629 } 1630 1631 switch (hdr.request) { 1632 case VHOST_USER_BACKEND_IOTLB_MSG: 1633 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1634 break; 1635 case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG: 1636 ret = vhost_user_slave_handle_config_change(dev); 1637 break; 1638 case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG: 1639 ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, 1640 fd ? fd[0] : -1); 1641 break; 1642 default: 1643 error_report("Received unexpected msg type: %d.", hdr.request); 1644 ret = -EINVAL; 1645 } 1646 1647 /* 1648 * REPLY_ACK feature handling. Other reply types has to be managed 1649 * directly in their request handlers. 1650 */ 1651 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1652 struct iovec iovec[2]; 1653 1654 1655 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 1656 hdr.flags |= VHOST_USER_REPLY_MASK; 1657 1658 payload.u64 = !!ret; 1659 hdr.size = sizeof(payload.u64); 1660 1661 iovec[0].iov_base = &hdr; 1662 iovec[0].iov_len = VHOST_USER_HDR_SIZE; 1663 iovec[1].iov_base = &payload; 1664 iovec[1].iov_len = hdr.size; 1665 1666 if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) { 1667 error_report_err(local_err); 1668 goto err; 1669 } 1670 } 1671 1672 goto fdcleanup; 1673 1674 err: 1675 close_slave_channel(u); 1676 rc = G_SOURCE_REMOVE; 1677 1678 fdcleanup: 1679 if (fd) { 1680 for (i = 0; i < fdsize; i++) { 1681 close(fd[i]); 1682 } 1683 } 1684 return rc; 1685 } 1686 1687 static int vhost_setup_slave_channel(struct vhost_dev *dev) 1688 { 1689 VhostUserMsg msg = { 1690 .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD, 1691 .hdr.flags = VHOST_USER_VERSION, 1692 }; 1693 struct vhost_user *u = dev->opaque; 1694 int sv[2], ret = 0; 1695 bool reply_supported = virtio_has_feature(dev->protocol_features, 1696 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1697 Error *local_err = NULL; 1698 QIOChannel *ioc; 1699 1700 if (!virtio_has_feature(dev->protocol_features, 1701 VHOST_USER_PROTOCOL_F_BACKEND_REQ)) { 1702 return 0; 1703 } 1704 1705 if (qemu_socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1706 int saved_errno = errno; 1707 error_report("socketpair() failed"); 1708 return -saved_errno; 1709 } 1710 1711 ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err)); 1712 if (!ioc) { 1713 error_report_err(local_err); 1714 return -ECONNREFUSED; 1715 } 1716 u->slave_ioc = ioc; 1717 u->slave_src = qio_channel_add_watch_source(u->slave_ioc, 1718 G_IO_IN | G_IO_HUP, 1719 slave_read, dev, NULL, NULL); 1720 1721 if (reply_supported) { 1722 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1723 } 1724 1725 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1726 if (ret) { 1727 goto out; 1728 } 1729 1730 if (reply_supported) { 1731 ret = process_message_reply(dev, &msg); 1732 } 1733 1734 out: 1735 close(sv[1]); 1736 if (ret) { 1737 close_slave_channel(u); 1738 } 1739 1740 return ret; 1741 } 1742 1743 #ifdef CONFIG_LINUX 1744 /* 1745 * Called back from the postcopy fault thread when a fault is received on our 1746 * ufd. 1747 * TODO: This is Linux specific 1748 */ 1749 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1750 void *ufd) 1751 { 1752 struct vhost_dev *dev = pcfd->data; 1753 struct vhost_user *u = dev->opaque; 1754 struct uffd_msg *msg = ufd; 1755 uint64_t faultaddr = msg->arg.pagefault.address; 1756 RAMBlock *rb = NULL; 1757 uint64_t rb_offset; 1758 int i; 1759 1760 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1761 dev->mem->nregions); 1762 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1763 trace_vhost_user_postcopy_fault_handler_loop(i, 1764 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1765 if (faultaddr >= u->postcopy_client_bases[i]) { 1766 /* Ofset of the fault address in the vhost region */ 1767 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1768 if (region_offset < dev->mem->regions[i].memory_size) { 1769 rb_offset = region_offset + u->region_rb_offset[i]; 1770 trace_vhost_user_postcopy_fault_handler_found(i, 1771 region_offset, rb_offset); 1772 rb = u->region_rb[i]; 1773 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1774 rb_offset); 1775 } 1776 } 1777 } 1778 error_report("%s: Failed to find region for fault %" PRIx64, 1779 __func__, faultaddr); 1780 return -1; 1781 } 1782 1783 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1784 uint64_t offset) 1785 { 1786 struct vhost_dev *dev = pcfd->data; 1787 struct vhost_user *u = dev->opaque; 1788 int i; 1789 1790 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1791 1792 if (!u) { 1793 return 0; 1794 } 1795 /* Translate the offset into an address in the clients address space */ 1796 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1797 if (u->region_rb[i] == rb && 1798 offset >= u->region_rb_offset[i] && 1799 offset < (u->region_rb_offset[i] + 1800 dev->mem->regions[i].memory_size)) { 1801 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1802 u->postcopy_client_bases[i]; 1803 trace_vhost_user_postcopy_waker_found(client_addr); 1804 return postcopy_wake_shared(pcfd, client_addr, rb); 1805 } 1806 } 1807 1808 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1809 return 0; 1810 } 1811 #endif 1812 1813 /* 1814 * Called at the start of an inbound postcopy on reception of the 1815 * 'advise' command. 1816 */ 1817 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 1818 { 1819 #ifdef CONFIG_LINUX 1820 struct vhost_user *u = dev->opaque; 1821 CharBackend *chr = u->user->chr; 1822 int ufd; 1823 int ret; 1824 VhostUserMsg msg = { 1825 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 1826 .hdr.flags = VHOST_USER_VERSION, 1827 }; 1828 1829 ret = vhost_user_write(dev, &msg, NULL, 0); 1830 if (ret < 0) { 1831 error_setg(errp, "Failed to send postcopy_advise to vhost"); 1832 return ret; 1833 } 1834 1835 ret = vhost_user_read(dev, &msg); 1836 if (ret < 0) { 1837 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 1838 return ret; 1839 } 1840 1841 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 1842 error_setg(errp, "Unexpected msg type. Expected %d received %d", 1843 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 1844 return -EPROTO; 1845 } 1846 1847 if (msg.hdr.size) { 1848 error_setg(errp, "Received bad msg size."); 1849 return -EPROTO; 1850 } 1851 ufd = qemu_chr_fe_get_msgfd(chr); 1852 if (ufd < 0) { 1853 error_setg(errp, "%s: Failed to get ufd", __func__); 1854 return -EIO; 1855 } 1856 qemu_socket_set_nonblock(ufd); 1857 1858 /* register ufd with userfault thread */ 1859 u->postcopy_fd.fd = ufd; 1860 u->postcopy_fd.data = dev; 1861 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 1862 u->postcopy_fd.waker = vhost_user_postcopy_waker; 1863 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 1864 postcopy_register_shared_ufd(&u->postcopy_fd); 1865 return 0; 1866 #else 1867 error_setg(errp, "Postcopy not supported on non-Linux systems"); 1868 return -ENOSYS; 1869 #endif 1870 } 1871 1872 /* 1873 * Called at the switch to postcopy on reception of the 'listen' command. 1874 */ 1875 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 1876 { 1877 struct vhost_user *u = dev->opaque; 1878 int ret; 1879 VhostUserMsg msg = { 1880 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 1881 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1882 }; 1883 u->postcopy_listen = true; 1884 1885 trace_vhost_user_postcopy_listen(); 1886 1887 ret = vhost_user_write(dev, &msg, NULL, 0); 1888 if (ret < 0) { 1889 error_setg(errp, "Failed to send postcopy_listen to vhost"); 1890 return ret; 1891 } 1892 1893 ret = process_message_reply(dev, &msg); 1894 if (ret) { 1895 error_setg(errp, "Failed to receive reply to postcopy_listen"); 1896 return ret; 1897 } 1898 1899 return 0; 1900 } 1901 1902 /* 1903 * Called at the end of postcopy 1904 */ 1905 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 1906 { 1907 VhostUserMsg msg = { 1908 .hdr.request = VHOST_USER_POSTCOPY_END, 1909 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1910 }; 1911 int ret; 1912 struct vhost_user *u = dev->opaque; 1913 1914 trace_vhost_user_postcopy_end_entry(); 1915 1916 ret = vhost_user_write(dev, &msg, NULL, 0); 1917 if (ret < 0) { 1918 error_setg(errp, "Failed to send postcopy_end to vhost"); 1919 return ret; 1920 } 1921 1922 ret = process_message_reply(dev, &msg); 1923 if (ret) { 1924 error_setg(errp, "Failed to receive reply to postcopy_end"); 1925 return ret; 1926 } 1927 postcopy_unregister_shared_ufd(&u->postcopy_fd); 1928 close(u->postcopy_fd.fd); 1929 u->postcopy_fd.handler = NULL; 1930 1931 trace_vhost_user_postcopy_end_exit(); 1932 1933 return 0; 1934 } 1935 1936 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 1937 void *opaque) 1938 { 1939 struct PostcopyNotifyData *pnd = opaque; 1940 struct vhost_user *u = container_of(notifier, struct vhost_user, 1941 postcopy_notifier); 1942 struct vhost_dev *dev = u->dev; 1943 1944 switch (pnd->reason) { 1945 case POSTCOPY_NOTIFY_PROBE: 1946 if (!virtio_has_feature(dev->protocol_features, 1947 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 1948 /* TODO: Get the device name into this error somehow */ 1949 error_setg(pnd->errp, 1950 "vhost-user backend not capable of postcopy"); 1951 return -ENOENT; 1952 } 1953 break; 1954 1955 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 1956 return vhost_user_postcopy_advise(dev, pnd->errp); 1957 1958 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 1959 return vhost_user_postcopy_listen(dev, pnd->errp); 1960 1961 case POSTCOPY_NOTIFY_INBOUND_END: 1962 return vhost_user_postcopy_end(dev, pnd->errp); 1963 1964 default: 1965 /* We ignore notifications we don't know */ 1966 break; 1967 } 1968 1969 return 0; 1970 } 1971 1972 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, 1973 Error **errp) 1974 { 1975 uint64_t features, ram_slots; 1976 struct vhost_user *u; 1977 VhostUserState *vus = (VhostUserState *) opaque; 1978 int err; 1979 1980 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1981 1982 u = g_new0(struct vhost_user, 1); 1983 u->user = vus; 1984 u->dev = dev; 1985 dev->opaque = u; 1986 1987 err = vhost_user_get_features(dev, &features); 1988 if (err < 0) { 1989 error_setg_errno(errp, -err, "vhost_backend_init failed"); 1990 return err; 1991 } 1992 1993 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1994 bool supports_f_config = vus->supports_config || 1995 (dev->config_ops && dev->config_ops->vhost_dev_config_notifier); 1996 uint64_t protocol_features; 1997 1998 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 1999 2000 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 2001 &protocol_features); 2002 if (err < 0) { 2003 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2004 return -EPROTO; 2005 } 2006 2007 /* 2008 * We will use all the protocol features we support - although 2009 * we suppress F_CONFIG if we know QEMUs internal code can not support 2010 * it. 2011 */ 2012 protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK; 2013 2014 if (supports_f_config) { 2015 if (!virtio_has_feature(protocol_features, 2016 VHOST_USER_PROTOCOL_F_CONFIG)) { 2017 error_setg(errp, "vhost-user device expecting " 2018 "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does " 2019 "not support it."); 2020 return -EPROTO; 2021 } 2022 } else { 2023 if (virtio_has_feature(protocol_features, 2024 VHOST_USER_PROTOCOL_F_CONFIG)) { 2025 warn_report("vhost-user backend supports " 2026 "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not."); 2027 protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 2028 } 2029 } 2030 2031 /* final set of protocol features */ 2032 dev->protocol_features = protocol_features; 2033 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 2034 if (err < 0) { 2035 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2036 return -EPROTO; 2037 } 2038 2039 /* query the max queues we support if backend supports Multiple Queue */ 2040 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 2041 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 2042 &dev->max_queues); 2043 if (err < 0) { 2044 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2045 return -EPROTO; 2046 } 2047 } else { 2048 dev->max_queues = 1; 2049 } 2050 2051 if (dev->num_queues && dev->max_queues < dev->num_queues) { 2052 error_setg(errp, "The maximum number of queues supported by the " 2053 "backend is %" PRIu64, dev->max_queues); 2054 return -EINVAL; 2055 } 2056 2057 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 2058 !(virtio_has_feature(dev->protocol_features, 2059 VHOST_USER_PROTOCOL_F_BACKEND_REQ) && 2060 virtio_has_feature(dev->protocol_features, 2061 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 2062 error_setg(errp, "IOMMU support requires reply-ack and " 2063 "slave-req protocol features."); 2064 return -EINVAL; 2065 } 2066 2067 /* get max memory regions if backend supports configurable RAM slots */ 2068 if (!virtio_has_feature(dev->protocol_features, 2069 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { 2070 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; 2071 } else { 2072 err = vhost_user_get_max_memslots(dev, &ram_slots); 2073 if (err < 0) { 2074 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2075 return -EPROTO; 2076 } 2077 2078 if (ram_slots < u->user->memory_slots) { 2079 error_setg(errp, "The backend specified a max ram slots limit " 2080 "of %" PRIu64", when the prior validated limit was " 2081 "%d. This limit should never decrease.", ram_slots, 2082 u->user->memory_slots); 2083 return -EINVAL; 2084 } 2085 2086 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); 2087 } 2088 } 2089 2090 if (dev->migration_blocker == NULL && 2091 !virtio_has_feature(dev->protocol_features, 2092 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 2093 error_setg(&dev->migration_blocker, 2094 "Migration disabled: vhost-user backend lacks " 2095 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 2096 } 2097 2098 if (dev->vq_index == 0) { 2099 err = vhost_setup_slave_channel(dev); 2100 if (err < 0) { 2101 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2102 return -EPROTO; 2103 } 2104 } 2105 2106 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 2107 postcopy_add_notifier(&u->postcopy_notifier); 2108 2109 return 0; 2110 } 2111 2112 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 2113 { 2114 struct vhost_user *u; 2115 2116 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2117 2118 u = dev->opaque; 2119 if (u->postcopy_notifier.notify) { 2120 postcopy_remove_notifier(&u->postcopy_notifier); 2121 u->postcopy_notifier.notify = NULL; 2122 } 2123 u->postcopy_listen = false; 2124 if (u->postcopy_fd.handler) { 2125 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2126 close(u->postcopy_fd.fd); 2127 u->postcopy_fd.handler = NULL; 2128 } 2129 if (u->slave_ioc) { 2130 close_slave_channel(u); 2131 } 2132 g_free(u->region_rb); 2133 u->region_rb = NULL; 2134 g_free(u->region_rb_offset); 2135 u->region_rb_offset = NULL; 2136 u->region_rb_len = 0; 2137 g_free(u); 2138 dev->opaque = 0; 2139 2140 return 0; 2141 } 2142 2143 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 2144 { 2145 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 2146 2147 return idx; 2148 } 2149 2150 static int vhost_user_memslots_limit(struct vhost_dev *dev) 2151 { 2152 struct vhost_user *u = dev->opaque; 2153 2154 return u->user->memory_slots; 2155 } 2156 2157 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 2158 { 2159 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2160 2161 return virtio_has_feature(dev->protocol_features, 2162 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 2163 } 2164 2165 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 2166 { 2167 VhostUserMsg msg = { }; 2168 2169 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2170 2171 /* If guest supports GUEST_ANNOUNCE do nothing */ 2172 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 2173 return 0; 2174 } 2175 2176 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 2177 if (virtio_has_feature(dev->protocol_features, 2178 VHOST_USER_PROTOCOL_F_RARP)) { 2179 msg.hdr.request = VHOST_USER_SEND_RARP; 2180 msg.hdr.flags = VHOST_USER_VERSION; 2181 memcpy((char *)&msg.payload.u64, mac_addr, 6); 2182 msg.hdr.size = sizeof(msg.payload.u64); 2183 2184 return vhost_user_write(dev, &msg, NULL, 0); 2185 } 2186 return -ENOTSUP; 2187 } 2188 2189 static bool vhost_user_can_merge(struct vhost_dev *dev, 2190 uint64_t start1, uint64_t size1, 2191 uint64_t start2, uint64_t size2) 2192 { 2193 ram_addr_t offset; 2194 int mfd, rfd; 2195 2196 (void)vhost_user_get_mr_data(start1, &offset, &mfd); 2197 (void)vhost_user_get_mr_data(start2, &offset, &rfd); 2198 2199 return mfd == rfd; 2200 } 2201 2202 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 2203 { 2204 VhostUserMsg msg; 2205 bool reply_supported = virtio_has_feature(dev->protocol_features, 2206 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2207 int ret; 2208 2209 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 2210 return 0; 2211 } 2212 2213 msg.hdr.request = VHOST_USER_NET_SET_MTU; 2214 msg.payload.u64 = mtu; 2215 msg.hdr.size = sizeof(msg.payload.u64); 2216 msg.hdr.flags = VHOST_USER_VERSION; 2217 if (reply_supported) { 2218 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2219 } 2220 2221 ret = vhost_user_write(dev, &msg, NULL, 0); 2222 if (ret < 0) { 2223 return ret; 2224 } 2225 2226 /* If reply_ack supported, slave has to ack specified MTU is valid */ 2227 if (reply_supported) { 2228 return process_message_reply(dev, &msg); 2229 } 2230 2231 return 0; 2232 } 2233 2234 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 2235 struct vhost_iotlb_msg *imsg) 2236 { 2237 int ret; 2238 VhostUserMsg msg = { 2239 .hdr.request = VHOST_USER_IOTLB_MSG, 2240 .hdr.size = sizeof(msg.payload.iotlb), 2241 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2242 .payload.iotlb = *imsg, 2243 }; 2244 2245 ret = vhost_user_write(dev, &msg, NULL, 0); 2246 if (ret < 0) { 2247 return ret; 2248 } 2249 2250 return process_message_reply(dev, &msg); 2251 } 2252 2253 2254 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 2255 { 2256 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 2257 } 2258 2259 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 2260 uint32_t config_len, Error **errp) 2261 { 2262 int ret; 2263 VhostUserMsg msg = { 2264 .hdr.request = VHOST_USER_GET_CONFIG, 2265 .hdr.flags = VHOST_USER_VERSION, 2266 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 2267 }; 2268 2269 if (!virtio_has_feature(dev->protocol_features, 2270 VHOST_USER_PROTOCOL_F_CONFIG)) { 2271 error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported"); 2272 return -EINVAL; 2273 } 2274 2275 assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE); 2276 2277 msg.payload.config.offset = 0; 2278 msg.payload.config.size = config_len; 2279 ret = vhost_user_write(dev, &msg, NULL, 0); 2280 if (ret < 0) { 2281 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2282 return ret; 2283 } 2284 2285 ret = vhost_user_read(dev, &msg); 2286 if (ret < 0) { 2287 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2288 return ret; 2289 } 2290 2291 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 2292 error_setg(errp, 2293 "Received unexpected msg type. Expected %d received %d", 2294 VHOST_USER_GET_CONFIG, msg.hdr.request); 2295 return -EPROTO; 2296 } 2297 2298 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 2299 error_setg(errp, "Received bad msg size."); 2300 return -EPROTO; 2301 } 2302 2303 memcpy(config, msg.payload.config.region, config_len); 2304 2305 return 0; 2306 } 2307 2308 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 2309 uint32_t offset, uint32_t size, uint32_t flags) 2310 { 2311 int ret; 2312 uint8_t *p; 2313 bool reply_supported = virtio_has_feature(dev->protocol_features, 2314 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2315 2316 VhostUserMsg msg = { 2317 .hdr.request = VHOST_USER_SET_CONFIG, 2318 .hdr.flags = VHOST_USER_VERSION, 2319 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 2320 }; 2321 2322 if (!virtio_has_feature(dev->protocol_features, 2323 VHOST_USER_PROTOCOL_F_CONFIG)) { 2324 return -ENOTSUP; 2325 } 2326 2327 if (reply_supported) { 2328 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2329 } 2330 2331 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 2332 return -EINVAL; 2333 } 2334 2335 msg.payload.config.offset = offset, 2336 msg.payload.config.size = size, 2337 msg.payload.config.flags = flags, 2338 p = msg.payload.config.region; 2339 memcpy(p, data, size); 2340 2341 ret = vhost_user_write(dev, &msg, NULL, 0); 2342 if (ret < 0) { 2343 return ret; 2344 } 2345 2346 if (reply_supported) { 2347 return process_message_reply(dev, &msg); 2348 } 2349 2350 return 0; 2351 } 2352 2353 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 2354 void *session_info, 2355 uint64_t *session_id) 2356 { 2357 int ret; 2358 bool crypto_session = virtio_has_feature(dev->protocol_features, 2359 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2360 CryptoDevBackendSymSessionInfo *sess_info = session_info; 2361 VhostUserMsg msg = { 2362 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 2363 .hdr.flags = VHOST_USER_VERSION, 2364 .hdr.size = sizeof(msg.payload.session), 2365 }; 2366 2367 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2368 2369 if (!crypto_session) { 2370 error_report("vhost-user trying to send unhandled ioctl"); 2371 return -ENOTSUP; 2372 } 2373 2374 memcpy(&msg.payload.session.session_setup_data, sess_info, 2375 sizeof(CryptoDevBackendSymSessionInfo)); 2376 if (sess_info->key_len) { 2377 memcpy(&msg.payload.session.key, sess_info->cipher_key, 2378 sess_info->key_len); 2379 } 2380 if (sess_info->auth_key_len > 0) { 2381 memcpy(&msg.payload.session.auth_key, sess_info->auth_key, 2382 sess_info->auth_key_len); 2383 } 2384 ret = vhost_user_write(dev, &msg, NULL, 0); 2385 if (ret < 0) { 2386 error_report("vhost_user_write() return %d, create session failed", 2387 ret); 2388 return ret; 2389 } 2390 2391 ret = vhost_user_read(dev, &msg); 2392 if (ret < 0) { 2393 error_report("vhost_user_read() return %d, create session failed", 2394 ret); 2395 return ret; 2396 } 2397 2398 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 2399 error_report("Received unexpected msg type. Expected %d received %d", 2400 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 2401 return -EPROTO; 2402 } 2403 2404 if (msg.hdr.size != sizeof(msg.payload.session)) { 2405 error_report("Received bad msg size."); 2406 return -EPROTO; 2407 } 2408 2409 if (msg.payload.session.session_id < 0) { 2410 error_report("Bad session id: %" PRId64 "", 2411 msg.payload.session.session_id); 2412 return -EINVAL; 2413 } 2414 *session_id = msg.payload.session.session_id; 2415 2416 return 0; 2417 } 2418 2419 static int 2420 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 2421 { 2422 int ret; 2423 bool crypto_session = virtio_has_feature(dev->protocol_features, 2424 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2425 VhostUserMsg msg = { 2426 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 2427 .hdr.flags = VHOST_USER_VERSION, 2428 .hdr.size = sizeof(msg.payload.u64), 2429 }; 2430 msg.payload.u64 = session_id; 2431 2432 if (!crypto_session) { 2433 error_report("vhost-user trying to send unhandled ioctl"); 2434 return -ENOTSUP; 2435 } 2436 2437 ret = vhost_user_write(dev, &msg, NULL, 0); 2438 if (ret < 0) { 2439 error_report("vhost_user_write() return %d, close session failed", 2440 ret); 2441 return ret; 2442 } 2443 2444 return 0; 2445 } 2446 2447 static bool vhost_user_mem_section_filter(struct vhost_dev *dev, 2448 MemoryRegionSection *section) 2449 { 2450 return memory_region_get_fd(section->mr) >= 0; 2451 } 2452 2453 static int vhost_user_get_inflight_fd(struct vhost_dev *dev, 2454 uint16_t queue_size, 2455 struct vhost_inflight *inflight) 2456 { 2457 void *addr; 2458 int fd; 2459 int ret; 2460 struct vhost_user *u = dev->opaque; 2461 CharBackend *chr = u->user->chr; 2462 VhostUserMsg msg = { 2463 .hdr.request = VHOST_USER_GET_INFLIGHT_FD, 2464 .hdr.flags = VHOST_USER_VERSION, 2465 .payload.inflight.num_queues = dev->nvqs, 2466 .payload.inflight.queue_size = queue_size, 2467 .hdr.size = sizeof(msg.payload.inflight), 2468 }; 2469 2470 if (!virtio_has_feature(dev->protocol_features, 2471 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2472 return 0; 2473 } 2474 2475 ret = vhost_user_write(dev, &msg, NULL, 0); 2476 if (ret < 0) { 2477 return ret; 2478 } 2479 2480 ret = vhost_user_read(dev, &msg); 2481 if (ret < 0) { 2482 return ret; 2483 } 2484 2485 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) { 2486 error_report("Received unexpected msg type. " 2487 "Expected %d received %d", 2488 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request); 2489 return -EPROTO; 2490 } 2491 2492 if (msg.hdr.size != sizeof(msg.payload.inflight)) { 2493 error_report("Received bad msg size."); 2494 return -EPROTO; 2495 } 2496 2497 if (!msg.payload.inflight.mmap_size) { 2498 return 0; 2499 } 2500 2501 fd = qemu_chr_fe_get_msgfd(chr); 2502 if (fd < 0) { 2503 error_report("Failed to get mem fd"); 2504 return -EIO; 2505 } 2506 2507 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE, 2508 MAP_SHARED, fd, msg.payload.inflight.mmap_offset); 2509 2510 if (addr == MAP_FAILED) { 2511 error_report("Failed to mmap mem fd"); 2512 close(fd); 2513 return -EFAULT; 2514 } 2515 2516 inflight->addr = addr; 2517 inflight->fd = fd; 2518 inflight->size = msg.payload.inflight.mmap_size; 2519 inflight->offset = msg.payload.inflight.mmap_offset; 2520 inflight->queue_size = queue_size; 2521 2522 return 0; 2523 } 2524 2525 static int vhost_user_set_inflight_fd(struct vhost_dev *dev, 2526 struct vhost_inflight *inflight) 2527 { 2528 VhostUserMsg msg = { 2529 .hdr.request = VHOST_USER_SET_INFLIGHT_FD, 2530 .hdr.flags = VHOST_USER_VERSION, 2531 .payload.inflight.mmap_size = inflight->size, 2532 .payload.inflight.mmap_offset = inflight->offset, 2533 .payload.inflight.num_queues = dev->nvqs, 2534 .payload.inflight.queue_size = inflight->queue_size, 2535 .hdr.size = sizeof(msg.payload.inflight), 2536 }; 2537 2538 if (!virtio_has_feature(dev->protocol_features, 2539 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2540 return 0; 2541 } 2542 2543 return vhost_user_write(dev, &msg, &inflight->fd, 1); 2544 } 2545 2546 static void vhost_user_state_destroy(gpointer data) 2547 { 2548 VhostUserHostNotifier *n = (VhostUserHostNotifier *) data; 2549 if (n) { 2550 vhost_user_host_notifier_remove(n, NULL); 2551 object_unparent(OBJECT(&n->mr)); 2552 /* 2553 * We can't free until vhost_user_host_notifier_remove has 2554 * done it's thing so schedule the free with RCU. 2555 */ 2556 g_free_rcu(n, rcu); 2557 } 2558 } 2559 2560 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) 2561 { 2562 if (user->chr) { 2563 error_setg(errp, "Cannot initialize vhost-user state"); 2564 return false; 2565 } 2566 user->chr = chr; 2567 user->memory_slots = 0; 2568 user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4, 2569 &vhost_user_state_destroy); 2570 return true; 2571 } 2572 2573 void vhost_user_cleanup(VhostUserState *user) 2574 { 2575 if (!user->chr) { 2576 return; 2577 } 2578 memory_region_transaction_begin(); 2579 user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true); 2580 memory_region_transaction_commit(); 2581 user->chr = NULL; 2582 } 2583 2584 2585 typedef struct { 2586 vu_async_close_fn cb; 2587 DeviceState *dev; 2588 CharBackend *cd; 2589 struct vhost_dev *vhost; 2590 } VhostAsyncCallback; 2591 2592 static void vhost_user_async_close_bh(void *opaque) 2593 { 2594 VhostAsyncCallback *data = opaque; 2595 struct vhost_dev *vhost = data->vhost; 2596 2597 /* 2598 * If the vhost_dev has been cleared in the meantime there is 2599 * nothing left to do as some other path has completed the 2600 * cleanup. 2601 */ 2602 if (vhost->vdev) { 2603 data->cb(data->dev); 2604 } 2605 2606 g_free(data); 2607 } 2608 2609 /* 2610 * We only schedule the work if the machine is running. If suspended 2611 * we want to keep all the in-flight data as is for migration 2612 * purposes. 2613 */ 2614 void vhost_user_async_close(DeviceState *d, 2615 CharBackend *chardev, struct vhost_dev *vhost, 2616 vu_async_close_fn cb) 2617 { 2618 if (!runstate_check(RUN_STATE_SHUTDOWN)) { 2619 /* 2620 * A close event may happen during a read/write, but vhost 2621 * code assumes the vhost_dev remains setup, so delay the 2622 * stop & clear. 2623 */ 2624 AioContext *ctx = qemu_get_current_aio_context(); 2625 VhostAsyncCallback *data = g_new0(VhostAsyncCallback, 1); 2626 2627 /* Save data for the callback */ 2628 data->cb = cb; 2629 data->dev = d; 2630 data->cd = chardev; 2631 data->vhost = vhost; 2632 2633 /* Disable any further notifications on the chardev */ 2634 qemu_chr_fe_set_handlers(chardev, 2635 NULL, NULL, NULL, NULL, NULL, NULL, 2636 false); 2637 2638 aio_bh_schedule_oneshot(ctx, vhost_user_async_close_bh, data); 2639 2640 /* 2641 * Move vhost device to the stopped state. The vhost-user device 2642 * will be clean up and disconnected in BH. This can be useful in 2643 * the vhost migration code. If disconnect was caught there is an 2644 * option for the general vhost code to get the dev state without 2645 * knowing its type (in this case vhost-user). 2646 * 2647 * Note if the vhost device is fully cleared by the time we 2648 * execute the bottom half we won't continue with the cleanup. 2649 */ 2650 vhost->started = false; 2651 } 2652 } 2653 2654 static int vhost_user_dev_start(struct vhost_dev *dev, bool started) 2655 { 2656 if (!virtio_has_feature(dev->protocol_features, 2657 VHOST_USER_PROTOCOL_F_STATUS)) { 2658 return 0; 2659 } 2660 2661 /* Set device status only for last queue pair */ 2662 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 2663 return 0; 2664 } 2665 2666 if (started) { 2667 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 2668 VIRTIO_CONFIG_S_DRIVER | 2669 VIRTIO_CONFIG_S_DRIVER_OK); 2670 } else { 2671 return 0; 2672 } 2673 } 2674 2675 static void vhost_user_reset_status(struct vhost_dev *dev) 2676 { 2677 /* Set device status only for last queue pair */ 2678 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 2679 return; 2680 } 2681 2682 if (virtio_has_feature(dev->protocol_features, 2683 VHOST_USER_PROTOCOL_F_STATUS)) { 2684 vhost_user_set_status(dev, 0); 2685 } 2686 } 2687 2688 const VhostOps user_ops = { 2689 .backend_type = VHOST_BACKEND_TYPE_USER, 2690 .vhost_backend_init = vhost_user_backend_init, 2691 .vhost_backend_cleanup = vhost_user_backend_cleanup, 2692 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 2693 .vhost_set_log_base = vhost_user_set_log_base, 2694 .vhost_set_mem_table = vhost_user_set_mem_table, 2695 .vhost_set_vring_addr = vhost_user_set_vring_addr, 2696 .vhost_set_vring_endian = vhost_user_set_vring_endian, 2697 .vhost_set_vring_num = vhost_user_set_vring_num, 2698 .vhost_set_vring_base = vhost_user_set_vring_base, 2699 .vhost_get_vring_base = vhost_user_get_vring_base, 2700 .vhost_set_vring_kick = vhost_user_set_vring_kick, 2701 .vhost_set_vring_call = vhost_user_set_vring_call, 2702 .vhost_set_vring_err = vhost_user_set_vring_err, 2703 .vhost_set_features = vhost_user_set_features, 2704 .vhost_get_features = vhost_user_get_features, 2705 .vhost_set_owner = vhost_user_set_owner, 2706 .vhost_reset_device = vhost_user_reset_device, 2707 .vhost_get_vq_index = vhost_user_get_vq_index, 2708 .vhost_set_vring_enable = vhost_user_set_vring_enable, 2709 .vhost_requires_shm_log = vhost_user_requires_shm_log, 2710 .vhost_migration_done = vhost_user_migration_done, 2711 .vhost_backend_can_merge = vhost_user_can_merge, 2712 .vhost_net_set_mtu = vhost_user_net_set_mtu, 2713 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 2714 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 2715 .vhost_get_config = vhost_user_get_config, 2716 .vhost_set_config = vhost_user_set_config, 2717 .vhost_crypto_create_session = vhost_user_crypto_create_session, 2718 .vhost_crypto_close_session = vhost_user_crypto_close_session, 2719 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, 2720 .vhost_get_inflight_fd = vhost_user_get_inflight_fd, 2721 .vhost_set_inflight_fd = vhost_user_set_inflight_fd, 2722 .vhost_dev_start = vhost_user_dev_start, 2723 .vhost_reset_status = vhost_user_reset_status, 2724 }; 2725