1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/vhost.h" 14 #include "hw/virtio/vhost-user.h" 15 #include "hw/virtio/vhost-backend.h" 16 #include "hw/virtio/virtio.h" 17 #include "hw/virtio/virtio-net.h" 18 #include "chardev/char-fe.h" 19 #include "io/channel-socket.h" 20 #include "sysemu/kvm.h" 21 #include "qemu/error-report.h" 22 #include "qemu/main-loop.h" 23 #include "qemu/sockets.h" 24 #include "sysemu/runstate.h" 25 #include "sysemu/cryptodev.h" 26 #include "migration/migration.h" 27 #include "migration/postcopy-ram.h" 28 #include "trace.h" 29 #include "exec/ramblock.h" 30 31 #include <sys/ioctl.h> 32 #include <sys/socket.h> 33 #include <sys/un.h> 34 35 #include "standard-headers/linux/vhost_types.h" 36 37 #ifdef CONFIG_LINUX 38 #include <linux/userfaultfd.h> 39 #endif 40 41 #define VHOST_MEMORY_BASELINE_NREGIONS 8 42 #define VHOST_USER_F_PROTOCOL_FEATURES 30 43 #define VHOST_USER_BACKEND_MAX_FDS 8 44 45 #if defined(TARGET_PPC) || defined(TARGET_PPC64) 46 #include "hw/ppc/spapr.h" 47 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS 48 49 #else 50 #define VHOST_USER_MAX_RAM_SLOTS 512 51 #endif 52 53 /* 54 * Maximum size of virtio device config space 55 */ 56 #define VHOST_USER_MAX_CONFIG_SIZE 256 57 58 enum VhostUserProtocolFeature { 59 VHOST_USER_PROTOCOL_F_MQ = 0, 60 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 61 VHOST_USER_PROTOCOL_F_RARP = 2, 62 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, 63 VHOST_USER_PROTOCOL_F_NET_MTU = 4, 64 VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5, 65 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, 66 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, 67 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, 68 VHOST_USER_PROTOCOL_F_CONFIG = 9, 69 VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10, 70 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, 71 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, 72 VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, 73 /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */ 74 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, 75 VHOST_USER_PROTOCOL_F_STATUS = 16, 76 VHOST_USER_PROTOCOL_F_MAX 77 }; 78 79 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 80 81 typedef enum VhostUserRequest { 82 VHOST_USER_NONE = 0, 83 VHOST_USER_GET_FEATURES = 1, 84 VHOST_USER_SET_FEATURES = 2, 85 VHOST_USER_SET_OWNER = 3, 86 VHOST_USER_RESET_OWNER = 4, 87 VHOST_USER_SET_MEM_TABLE = 5, 88 VHOST_USER_SET_LOG_BASE = 6, 89 VHOST_USER_SET_LOG_FD = 7, 90 VHOST_USER_SET_VRING_NUM = 8, 91 VHOST_USER_SET_VRING_ADDR = 9, 92 VHOST_USER_SET_VRING_BASE = 10, 93 VHOST_USER_GET_VRING_BASE = 11, 94 VHOST_USER_SET_VRING_KICK = 12, 95 VHOST_USER_SET_VRING_CALL = 13, 96 VHOST_USER_SET_VRING_ERR = 14, 97 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 98 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 99 VHOST_USER_GET_QUEUE_NUM = 17, 100 VHOST_USER_SET_VRING_ENABLE = 18, 101 VHOST_USER_SEND_RARP = 19, 102 VHOST_USER_NET_SET_MTU = 20, 103 VHOST_USER_SET_BACKEND_REQ_FD = 21, 104 VHOST_USER_IOTLB_MSG = 22, 105 VHOST_USER_SET_VRING_ENDIAN = 23, 106 VHOST_USER_GET_CONFIG = 24, 107 VHOST_USER_SET_CONFIG = 25, 108 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 109 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 110 VHOST_USER_POSTCOPY_ADVISE = 28, 111 VHOST_USER_POSTCOPY_LISTEN = 29, 112 VHOST_USER_POSTCOPY_END = 30, 113 VHOST_USER_GET_INFLIGHT_FD = 31, 114 VHOST_USER_SET_INFLIGHT_FD = 32, 115 VHOST_USER_GPU_SET_SOCKET = 33, 116 VHOST_USER_RESET_DEVICE = 34, 117 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */ 118 VHOST_USER_GET_MAX_MEM_SLOTS = 36, 119 VHOST_USER_ADD_MEM_REG = 37, 120 VHOST_USER_REM_MEM_REG = 38, 121 VHOST_USER_SET_STATUS = 39, 122 VHOST_USER_GET_STATUS = 40, 123 VHOST_USER_MAX 124 } VhostUserRequest; 125 126 typedef enum VhostUserSlaveRequest { 127 VHOST_USER_BACKEND_NONE = 0, 128 VHOST_USER_BACKEND_IOTLB_MSG = 1, 129 VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2, 130 VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3, 131 VHOST_USER_BACKEND_MAX 132 } VhostUserSlaveRequest; 133 134 typedef struct VhostUserMemoryRegion { 135 uint64_t guest_phys_addr; 136 uint64_t memory_size; 137 uint64_t userspace_addr; 138 uint64_t mmap_offset; 139 } VhostUserMemoryRegion; 140 141 typedef struct VhostUserMemory { 142 uint32_t nregions; 143 uint32_t padding; 144 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; 145 } VhostUserMemory; 146 147 typedef struct VhostUserMemRegMsg { 148 uint64_t padding; 149 VhostUserMemoryRegion region; 150 } VhostUserMemRegMsg; 151 152 typedef struct VhostUserLog { 153 uint64_t mmap_size; 154 uint64_t mmap_offset; 155 } VhostUserLog; 156 157 typedef struct VhostUserConfig { 158 uint32_t offset; 159 uint32_t size; 160 uint32_t flags; 161 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 162 } VhostUserConfig; 163 164 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 165 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 166 167 typedef struct VhostUserCryptoSession { 168 /* session id for success, -1 on errors */ 169 int64_t session_id; 170 CryptoDevBackendSymSessionInfo session_setup_data; 171 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 172 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 173 } VhostUserCryptoSession; 174 175 static VhostUserConfig c __attribute__ ((unused)); 176 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 177 + sizeof(c.size) \ 178 + sizeof(c.flags)) 179 180 typedef struct VhostUserVringArea { 181 uint64_t u64; 182 uint64_t size; 183 uint64_t offset; 184 } VhostUserVringArea; 185 186 typedef struct VhostUserInflight { 187 uint64_t mmap_size; 188 uint64_t mmap_offset; 189 uint16_t num_queues; 190 uint16_t queue_size; 191 } VhostUserInflight; 192 193 typedef struct { 194 VhostUserRequest request; 195 196 #define VHOST_USER_VERSION_MASK (0x3) 197 #define VHOST_USER_REPLY_MASK (0x1 << 2) 198 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 199 uint32_t flags; 200 uint32_t size; /* the following payload size */ 201 } QEMU_PACKED VhostUserHeader; 202 203 typedef union { 204 #define VHOST_USER_VRING_IDX_MASK (0xff) 205 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) 206 uint64_t u64; 207 struct vhost_vring_state state; 208 struct vhost_vring_addr addr; 209 VhostUserMemory memory; 210 VhostUserMemRegMsg mem_reg; 211 VhostUserLog log; 212 struct vhost_iotlb_msg iotlb; 213 VhostUserConfig config; 214 VhostUserCryptoSession session; 215 VhostUserVringArea area; 216 VhostUserInflight inflight; 217 } VhostUserPayload; 218 219 typedef struct VhostUserMsg { 220 VhostUserHeader hdr; 221 VhostUserPayload payload; 222 } QEMU_PACKED VhostUserMsg; 223 224 static VhostUserMsg m __attribute__ ((unused)); 225 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 226 227 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 228 229 /* The version of the protocol we support */ 230 #define VHOST_USER_VERSION (0x1) 231 232 struct vhost_user { 233 struct vhost_dev *dev; 234 /* Shared between vhost devs of the same virtio device */ 235 VhostUserState *user; 236 QIOChannel *slave_ioc; 237 GSource *slave_src; 238 NotifierWithReturn postcopy_notifier; 239 struct PostCopyFD postcopy_fd; 240 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; 241 /* Length of the region_rb and region_rb_offset arrays */ 242 size_t region_rb_len; 243 /* RAMBlock associated with a given region */ 244 RAMBlock **region_rb; 245 /* 246 * The offset from the start of the RAMBlock to the start of the 247 * vhost region. 248 */ 249 ram_addr_t *region_rb_offset; 250 251 /* True once we've entered postcopy_listen */ 252 bool postcopy_listen; 253 254 /* Our current regions */ 255 int num_shadow_regions; 256 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS]; 257 }; 258 259 struct scrub_regions { 260 struct vhost_memory_region *region; 261 int reg_idx; 262 int fd_idx; 263 }; 264 265 static bool ioeventfd_enabled(void) 266 { 267 return !kvm_enabled() || kvm_eventfds_enabled(); 268 } 269 270 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) 271 { 272 struct vhost_user *u = dev->opaque; 273 CharBackend *chr = u->user->chr; 274 uint8_t *p = (uint8_t *) msg; 275 int r, size = VHOST_USER_HDR_SIZE; 276 277 r = qemu_chr_fe_read_all(chr, p, size); 278 if (r != size) { 279 int saved_errno = errno; 280 error_report("Failed to read msg header. Read %d instead of %d." 281 " Original request %d.", r, size, msg->hdr.request); 282 return r < 0 ? -saved_errno : -EIO; 283 } 284 285 /* validate received flags */ 286 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 287 error_report("Failed to read msg header." 288 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 289 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 290 return -EPROTO; 291 } 292 293 trace_vhost_user_read(msg->hdr.request, msg->hdr.flags); 294 295 return 0; 296 } 297 298 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 299 { 300 struct vhost_user *u = dev->opaque; 301 CharBackend *chr = u->user->chr; 302 uint8_t *p = (uint8_t *) msg; 303 int r, size; 304 305 r = vhost_user_read_header(dev, msg); 306 if (r < 0) { 307 return r; 308 } 309 310 /* validate message size is sane */ 311 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 312 error_report("Failed to read msg header." 313 " Size %d exceeds the maximum %zu.", msg->hdr.size, 314 VHOST_USER_PAYLOAD_SIZE); 315 return -EPROTO; 316 } 317 318 if (msg->hdr.size) { 319 p += VHOST_USER_HDR_SIZE; 320 size = msg->hdr.size; 321 r = qemu_chr_fe_read_all(chr, p, size); 322 if (r != size) { 323 int saved_errno = errno; 324 error_report("Failed to read msg payload." 325 " Read %d instead of %d.", r, msg->hdr.size); 326 return r < 0 ? -saved_errno : -EIO; 327 } 328 } 329 330 return 0; 331 } 332 333 static int process_message_reply(struct vhost_dev *dev, 334 const VhostUserMsg *msg) 335 { 336 int ret; 337 VhostUserMsg msg_reply; 338 339 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 340 return 0; 341 } 342 343 ret = vhost_user_read(dev, &msg_reply); 344 if (ret < 0) { 345 return ret; 346 } 347 348 if (msg_reply.hdr.request != msg->hdr.request) { 349 error_report("Received unexpected msg type. " 350 "Expected %d received %d", 351 msg->hdr.request, msg_reply.hdr.request); 352 return -EPROTO; 353 } 354 355 return msg_reply.payload.u64 ? -EIO : 0; 356 } 357 358 static bool vhost_user_one_time_request(VhostUserRequest request) 359 { 360 switch (request) { 361 case VHOST_USER_SET_OWNER: 362 case VHOST_USER_RESET_OWNER: 363 case VHOST_USER_SET_MEM_TABLE: 364 case VHOST_USER_GET_QUEUE_NUM: 365 case VHOST_USER_NET_SET_MTU: 366 case VHOST_USER_ADD_MEM_REG: 367 case VHOST_USER_REM_MEM_REG: 368 return true; 369 default: 370 return false; 371 } 372 } 373 374 /* most non-init callers ignore the error */ 375 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 376 int *fds, int fd_num) 377 { 378 struct vhost_user *u = dev->opaque; 379 CharBackend *chr = u->user->chr; 380 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 381 382 /* 383 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 384 * we just need send it once in the first time. For later such 385 * request, we just ignore it. 386 */ 387 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) { 388 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 389 return 0; 390 } 391 392 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 393 error_report("Failed to set msg fds."); 394 return -EINVAL; 395 } 396 397 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 398 if (ret != size) { 399 int saved_errno = errno; 400 error_report("Failed to write msg." 401 " Wrote %d instead of %d.", ret, size); 402 return ret < 0 ? -saved_errno : -EIO; 403 } 404 405 trace_vhost_user_write(msg->hdr.request, msg->hdr.flags); 406 407 return 0; 408 } 409 410 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd) 411 { 412 VhostUserMsg msg = { 413 .hdr.request = VHOST_USER_GPU_SET_SOCKET, 414 .hdr.flags = VHOST_USER_VERSION, 415 }; 416 417 return vhost_user_write(dev, &msg, &fd, 1); 418 } 419 420 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 421 struct vhost_log *log) 422 { 423 int fds[VHOST_USER_MAX_RAM_SLOTS]; 424 size_t fd_num = 0; 425 bool shmfd = virtio_has_feature(dev->protocol_features, 426 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 427 int ret; 428 VhostUserMsg msg = { 429 .hdr.request = VHOST_USER_SET_LOG_BASE, 430 .hdr.flags = VHOST_USER_VERSION, 431 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 432 .payload.log.mmap_offset = 0, 433 .hdr.size = sizeof(msg.payload.log), 434 }; 435 436 /* Send only once with first queue pair */ 437 if (dev->vq_index != 0) { 438 return 0; 439 } 440 441 if (shmfd && log->fd != -1) { 442 fds[fd_num++] = log->fd; 443 } 444 445 ret = vhost_user_write(dev, &msg, fds, fd_num); 446 if (ret < 0) { 447 return ret; 448 } 449 450 if (shmfd) { 451 msg.hdr.size = 0; 452 ret = vhost_user_read(dev, &msg); 453 if (ret < 0) { 454 return ret; 455 } 456 457 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 458 error_report("Received unexpected msg type. " 459 "Expected %d received %d", 460 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 461 return -EPROTO; 462 } 463 } 464 465 return 0; 466 } 467 468 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset, 469 int *fd) 470 { 471 MemoryRegion *mr; 472 473 assert((uintptr_t)addr == addr); 474 mr = memory_region_from_host((void *)(uintptr_t)addr, offset); 475 *fd = memory_region_get_fd(mr); 476 477 return mr; 478 } 479 480 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst, 481 struct vhost_memory_region *src, 482 uint64_t mmap_offset) 483 { 484 assert(src != NULL && dst != NULL); 485 dst->userspace_addr = src->userspace_addr; 486 dst->memory_size = src->memory_size; 487 dst->guest_phys_addr = src->guest_phys_addr; 488 dst->mmap_offset = mmap_offset; 489 } 490 491 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, 492 struct vhost_dev *dev, 493 VhostUserMsg *msg, 494 int *fds, size_t *fd_num, 495 bool track_ramblocks) 496 { 497 int i, fd; 498 ram_addr_t offset; 499 MemoryRegion *mr; 500 struct vhost_memory_region *reg; 501 VhostUserMemoryRegion region_buffer; 502 503 msg->hdr.request = VHOST_USER_SET_MEM_TABLE; 504 505 for (i = 0; i < dev->mem->nregions; ++i) { 506 reg = dev->mem->regions + i; 507 508 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 509 if (fd > 0) { 510 if (track_ramblocks) { 511 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS); 512 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name, 513 reg->memory_size, 514 reg->guest_phys_addr, 515 reg->userspace_addr, 516 offset); 517 u->region_rb_offset[i] = offset; 518 u->region_rb[i] = mr->ram_block; 519 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) { 520 error_report("Failed preparing vhost-user memory table msg"); 521 return -ENOBUFS; 522 } 523 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 524 msg->payload.memory.regions[*fd_num] = region_buffer; 525 fds[(*fd_num)++] = fd; 526 } else if (track_ramblocks) { 527 u->region_rb_offset[i] = 0; 528 u->region_rb[i] = NULL; 529 } 530 } 531 532 msg->payload.memory.nregions = *fd_num; 533 534 if (!*fd_num) { 535 error_report("Failed initializing vhost-user memory map, " 536 "consider using -object memory-backend-file share=on"); 537 return -EINVAL; 538 } 539 540 msg->hdr.size = sizeof(msg->payload.memory.nregions); 541 msg->hdr.size += sizeof(msg->payload.memory.padding); 542 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion); 543 544 return 0; 545 } 546 547 static inline bool reg_equal(struct vhost_memory_region *shadow_reg, 548 struct vhost_memory_region *vdev_reg) 549 { 550 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr && 551 shadow_reg->userspace_addr == vdev_reg->userspace_addr && 552 shadow_reg->memory_size == vdev_reg->memory_size; 553 } 554 555 static void scrub_shadow_regions(struct vhost_dev *dev, 556 struct scrub_regions *add_reg, 557 int *nr_add_reg, 558 struct scrub_regions *rem_reg, 559 int *nr_rem_reg, uint64_t *shadow_pcb, 560 bool track_ramblocks) 561 { 562 struct vhost_user *u = dev->opaque; 563 bool found[VHOST_USER_MAX_RAM_SLOTS] = {}; 564 struct vhost_memory_region *reg, *shadow_reg; 565 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0; 566 ram_addr_t offset; 567 MemoryRegion *mr; 568 bool matching; 569 570 /* 571 * Find memory regions present in our shadow state which are not in 572 * the device's current memory state. 573 * 574 * Mark regions in both the shadow and device state as "found". 575 */ 576 for (i = 0; i < u->num_shadow_regions; i++) { 577 shadow_reg = &u->shadow_regions[i]; 578 matching = false; 579 580 for (j = 0; j < dev->mem->nregions; j++) { 581 reg = &dev->mem->regions[j]; 582 583 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 584 585 if (reg_equal(shadow_reg, reg)) { 586 matching = true; 587 found[j] = true; 588 if (track_ramblocks) { 589 /* 590 * Reset postcopy client bases, region_rb, and 591 * region_rb_offset in case regions are removed. 592 */ 593 if (fd > 0) { 594 u->region_rb_offset[j] = offset; 595 u->region_rb[j] = mr->ram_block; 596 shadow_pcb[j] = u->postcopy_client_bases[i]; 597 } else { 598 u->region_rb_offset[j] = 0; 599 u->region_rb[j] = NULL; 600 } 601 } 602 break; 603 } 604 } 605 606 /* 607 * If the region was not found in the current device memory state 608 * create an entry for it in the removed list. 609 */ 610 if (!matching) { 611 rem_reg[rm_idx].region = shadow_reg; 612 rem_reg[rm_idx++].reg_idx = i; 613 } 614 } 615 616 /* 617 * For regions not marked "found", create entries in the added list. 618 * 619 * Note their indexes in the device memory state and the indexes of their 620 * file descriptors. 621 */ 622 for (i = 0; i < dev->mem->nregions; i++) { 623 reg = &dev->mem->regions[i]; 624 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 625 if (fd > 0) { 626 ++fd_num; 627 } 628 629 /* 630 * If the region was in both the shadow and device state we don't 631 * need to send a VHOST_USER_ADD_MEM_REG message for it. 632 */ 633 if (found[i]) { 634 continue; 635 } 636 637 add_reg[add_idx].region = reg; 638 add_reg[add_idx].reg_idx = i; 639 add_reg[add_idx++].fd_idx = fd_num; 640 } 641 *nr_rem_reg = rm_idx; 642 *nr_add_reg = add_idx; 643 644 return; 645 } 646 647 static int send_remove_regions(struct vhost_dev *dev, 648 struct scrub_regions *remove_reg, 649 int nr_rem_reg, VhostUserMsg *msg, 650 bool reply_supported) 651 { 652 struct vhost_user *u = dev->opaque; 653 struct vhost_memory_region *shadow_reg; 654 int i, fd, shadow_reg_idx, ret; 655 ram_addr_t offset; 656 VhostUserMemoryRegion region_buffer; 657 658 /* 659 * The regions in remove_reg appear in the same order they do in the 660 * shadow table. Therefore we can minimize memory copies by iterating 661 * through remove_reg backwards. 662 */ 663 for (i = nr_rem_reg - 1; i >= 0; i--) { 664 shadow_reg = remove_reg[i].region; 665 shadow_reg_idx = remove_reg[i].reg_idx; 666 667 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd); 668 669 if (fd > 0) { 670 msg->hdr.request = VHOST_USER_REM_MEM_REG; 671 vhost_user_fill_msg_region(®ion_buffer, shadow_reg, 0); 672 msg->payload.mem_reg.region = region_buffer; 673 674 ret = vhost_user_write(dev, msg, NULL, 0); 675 if (ret < 0) { 676 return ret; 677 } 678 679 if (reply_supported) { 680 ret = process_message_reply(dev, msg); 681 if (ret) { 682 return ret; 683 } 684 } 685 } 686 687 /* 688 * At this point we know the backend has unmapped the region. It is now 689 * safe to remove it from the shadow table. 690 */ 691 memmove(&u->shadow_regions[shadow_reg_idx], 692 &u->shadow_regions[shadow_reg_idx + 1], 693 sizeof(struct vhost_memory_region) * 694 (u->num_shadow_regions - shadow_reg_idx - 1)); 695 u->num_shadow_regions--; 696 } 697 698 return 0; 699 } 700 701 static int send_add_regions(struct vhost_dev *dev, 702 struct scrub_regions *add_reg, int nr_add_reg, 703 VhostUserMsg *msg, uint64_t *shadow_pcb, 704 bool reply_supported, bool track_ramblocks) 705 { 706 struct vhost_user *u = dev->opaque; 707 int i, fd, ret, reg_idx, reg_fd_idx; 708 struct vhost_memory_region *reg; 709 MemoryRegion *mr; 710 ram_addr_t offset; 711 VhostUserMsg msg_reply; 712 VhostUserMemoryRegion region_buffer; 713 714 for (i = 0; i < nr_add_reg; i++) { 715 reg = add_reg[i].region; 716 reg_idx = add_reg[i].reg_idx; 717 reg_fd_idx = add_reg[i].fd_idx; 718 719 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 720 721 if (fd > 0) { 722 if (track_ramblocks) { 723 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name, 724 reg->memory_size, 725 reg->guest_phys_addr, 726 reg->userspace_addr, 727 offset); 728 u->region_rb_offset[reg_idx] = offset; 729 u->region_rb[reg_idx] = mr->ram_block; 730 } 731 msg->hdr.request = VHOST_USER_ADD_MEM_REG; 732 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 733 msg->payload.mem_reg.region = region_buffer; 734 735 ret = vhost_user_write(dev, msg, &fd, 1); 736 if (ret < 0) { 737 return ret; 738 } 739 740 if (track_ramblocks) { 741 uint64_t reply_gpa; 742 743 ret = vhost_user_read(dev, &msg_reply); 744 if (ret < 0) { 745 return ret; 746 } 747 748 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr; 749 750 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) { 751 error_report("%s: Received unexpected msg type." 752 "Expected %d received %d", __func__, 753 VHOST_USER_ADD_MEM_REG, 754 msg_reply.hdr.request); 755 return -EPROTO; 756 } 757 758 /* 759 * We're using the same structure, just reusing one of the 760 * fields, so it should be the same size. 761 */ 762 if (msg_reply.hdr.size != msg->hdr.size) { 763 error_report("%s: Unexpected size for postcopy reply " 764 "%d vs %d", __func__, msg_reply.hdr.size, 765 msg->hdr.size); 766 return -EPROTO; 767 } 768 769 /* Get the postcopy client base from the backend's reply. */ 770 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) { 771 shadow_pcb[reg_idx] = 772 msg_reply.payload.mem_reg.region.userspace_addr; 773 trace_vhost_user_set_mem_table_postcopy( 774 msg_reply.payload.mem_reg.region.userspace_addr, 775 msg->payload.mem_reg.region.userspace_addr, 776 reg_fd_idx, reg_idx); 777 } else { 778 error_report("%s: invalid postcopy reply for region. " 779 "Got guest physical address %" PRIX64 ", expected " 780 "%" PRIX64, __func__, reply_gpa, 781 dev->mem->regions[reg_idx].guest_phys_addr); 782 return -EPROTO; 783 } 784 } else if (reply_supported) { 785 ret = process_message_reply(dev, msg); 786 if (ret) { 787 return ret; 788 } 789 } 790 } else if (track_ramblocks) { 791 u->region_rb_offset[reg_idx] = 0; 792 u->region_rb[reg_idx] = NULL; 793 } 794 795 /* 796 * At this point, we know the backend has mapped in the new 797 * region, if the region has a valid file descriptor. 798 * 799 * The region should now be added to the shadow table. 800 */ 801 u->shadow_regions[u->num_shadow_regions].guest_phys_addr = 802 reg->guest_phys_addr; 803 u->shadow_regions[u->num_shadow_regions].userspace_addr = 804 reg->userspace_addr; 805 u->shadow_regions[u->num_shadow_regions].memory_size = 806 reg->memory_size; 807 u->num_shadow_regions++; 808 } 809 810 return 0; 811 } 812 813 static int vhost_user_add_remove_regions(struct vhost_dev *dev, 814 VhostUserMsg *msg, 815 bool reply_supported, 816 bool track_ramblocks) 817 { 818 struct vhost_user *u = dev->opaque; 819 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS]; 820 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS]; 821 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {}; 822 int nr_add_reg, nr_rem_reg; 823 int ret; 824 825 msg->hdr.size = sizeof(msg->payload.mem_reg); 826 827 /* Find the regions which need to be removed or added. */ 828 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg, 829 shadow_pcb, track_ramblocks); 830 831 if (nr_rem_reg) { 832 ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg, 833 reply_supported); 834 if (ret < 0) { 835 goto err; 836 } 837 } 838 839 if (nr_add_reg) { 840 ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb, 841 reply_supported, track_ramblocks); 842 if (ret < 0) { 843 goto err; 844 } 845 } 846 847 if (track_ramblocks) { 848 memcpy(u->postcopy_client_bases, shadow_pcb, 849 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 850 /* 851 * Now we've registered this with the postcopy code, we ack to the 852 * client, because now we're in the position to be able to deal with 853 * any faults it generates. 854 */ 855 /* TODO: Use this for failure cases as well with a bad value. */ 856 msg->hdr.size = sizeof(msg->payload.u64); 857 msg->payload.u64 = 0; /* OK */ 858 859 ret = vhost_user_write(dev, msg, NULL, 0); 860 if (ret < 0) { 861 return ret; 862 } 863 } 864 865 return 0; 866 867 err: 868 if (track_ramblocks) { 869 memcpy(u->postcopy_client_bases, shadow_pcb, 870 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 871 } 872 873 return ret; 874 } 875 876 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 877 struct vhost_memory *mem, 878 bool reply_supported, 879 bool config_mem_slots) 880 { 881 struct vhost_user *u = dev->opaque; 882 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 883 size_t fd_num = 0; 884 VhostUserMsg msg_reply; 885 int region_i, msg_i; 886 int ret; 887 888 VhostUserMsg msg = { 889 .hdr.flags = VHOST_USER_VERSION, 890 }; 891 892 if (u->region_rb_len < dev->mem->nregions) { 893 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 894 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 895 dev->mem->nregions); 896 memset(&(u->region_rb[u->region_rb_len]), '\0', 897 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 898 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 899 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 900 u->region_rb_len = dev->mem->nregions; 901 } 902 903 if (config_mem_slots) { 904 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true); 905 if (ret < 0) { 906 return ret; 907 } 908 } else { 909 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 910 true); 911 if (ret < 0) { 912 return ret; 913 } 914 915 ret = vhost_user_write(dev, &msg, fds, fd_num); 916 if (ret < 0) { 917 return ret; 918 } 919 920 ret = vhost_user_read(dev, &msg_reply); 921 if (ret < 0) { 922 return ret; 923 } 924 925 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 926 error_report("%s: Received unexpected msg type." 927 "Expected %d received %d", __func__, 928 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 929 return -EPROTO; 930 } 931 932 /* 933 * We're using the same structure, just reusing one of the 934 * fields, so it should be the same size. 935 */ 936 if (msg_reply.hdr.size != msg.hdr.size) { 937 error_report("%s: Unexpected size for postcopy reply " 938 "%d vs %d", __func__, msg_reply.hdr.size, 939 msg.hdr.size); 940 return -EPROTO; 941 } 942 943 memset(u->postcopy_client_bases, 0, 944 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 945 946 /* 947 * They're in the same order as the regions that were sent 948 * but some of the regions were skipped (above) if they 949 * didn't have fd's 950 */ 951 for (msg_i = 0, region_i = 0; 952 region_i < dev->mem->nregions; 953 region_i++) { 954 if (msg_i < fd_num && 955 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 956 dev->mem->regions[region_i].guest_phys_addr) { 957 u->postcopy_client_bases[region_i] = 958 msg_reply.payload.memory.regions[msg_i].userspace_addr; 959 trace_vhost_user_set_mem_table_postcopy( 960 msg_reply.payload.memory.regions[msg_i].userspace_addr, 961 msg.payload.memory.regions[msg_i].userspace_addr, 962 msg_i, region_i); 963 msg_i++; 964 } 965 } 966 if (msg_i != fd_num) { 967 error_report("%s: postcopy reply not fully consumed " 968 "%d vs %zd", 969 __func__, msg_i, fd_num); 970 return -EIO; 971 } 972 973 /* 974 * Now we've registered this with the postcopy code, we ack to the 975 * client, because now we're in the position to be able to deal 976 * with any faults it generates. 977 */ 978 /* TODO: Use this for failure cases as well with a bad value. */ 979 msg.hdr.size = sizeof(msg.payload.u64); 980 msg.payload.u64 = 0; /* OK */ 981 ret = vhost_user_write(dev, &msg, NULL, 0); 982 if (ret < 0) { 983 return ret; 984 } 985 } 986 987 return 0; 988 } 989 990 static int vhost_user_set_mem_table(struct vhost_dev *dev, 991 struct vhost_memory *mem) 992 { 993 struct vhost_user *u = dev->opaque; 994 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 995 size_t fd_num = 0; 996 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 997 bool reply_supported = virtio_has_feature(dev->protocol_features, 998 VHOST_USER_PROTOCOL_F_REPLY_ACK); 999 bool config_mem_slots = 1000 virtio_has_feature(dev->protocol_features, 1001 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); 1002 int ret; 1003 1004 if (do_postcopy) { 1005 /* 1006 * Postcopy has enough differences that it's best done in it's own 1007 * version 1008 */ 1009 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported, 1010 config_mem_slots); 1011 } 1012 1013 VhostUserMsg msg = { 1014 .hdr.flags = VHOST_USER_VERSION, 1015 }; 1016 1017 if (reply_supported) { 1018 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1019 } 1020 1021 if (config_mem_slots) { 1022 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false); 1023 if (ret < 0) { 1024 return ret; 1025 } 1026 } else { 1027 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 1028 false); 1029 if (ret < 0) { 1030 return ret; 1031 } 1032 1033 ret = vhost_user_write(dev, &msg, fds, fd_num); 1034 if (ret < 0) { 1035 return ret; 1036 } 1037 1038 if (reply_supported) { 1039 return process_message_reply(dev, &msg); 1040 } 1041 } 1042 1043 return 0; 1044 } 1045 1046 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 1047 struct vhost_vring_state *ring) 1048 { 1049 bool cross_endian = virtio_has_feature(dev->protocol_features, 1050 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 1051 VhostUserMsg msg = { 1052 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 1053 .hdr.flags = VHOST_USER_VERSION, 1054 .payload.state = *ring, 1055 .hdr.size = sizeof(msg.payload.state), 1056 }; 1057 1058 if (!cross_endian) { 1059 error_report("vhost-user trying to send unhandled ioctl"); 1060 return -ENOTSUP; 1061 } 1062 1063 return vhost_user_write(dev, &msg, NULL, 0); 1064 } 1065 1066 static int vhost_set_vring(struct vhost_dev *dev, 1067 unsigned long int request, 1068 struct vhost_vring_state *ring) 1069 { 1070 VhostUserMsg msg = { 1071 .hdr.request = request, 1072 .hdr.flags = VHOST_USER_VERSION, 1073 .payload.state = *ring, 1074 .hdr.size = sizeof(msg.payload.state), 1075 }; 1076 1077 return vhost_user_write(dev, &msg, NULL, 0); 1078 } 1079 1080 static int vhost_user_set_vring_num(struct vhost_dev *dev, 1081 struct vhost_vring_state *ring) 1082 { 1083 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 1084 } 1085 1086 static void vhost_user_host_notifier_free(VhostUserHostNotifier *n) 1087 { 1088 assert(n && n->unmap_addr); 1089 munmap(n->unmap_addr, qemu_real_host_page_size()); 1090 n->unmap_addr = NULL; 1091 } 1092 1093 /* 1094 * clean-up function for notifier, will finally free the structure 1095 * under rcu. 1096 */ 1097 static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n, 1098 VirtIODevice *vdev) 1099 { 1100 if (n->addr) { 1101 if (vdev) { 1102 virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false); 1103 } 1104 assert(!n->unmap_addr); 1105 n->unmap_addr = n->addr; 1106 n->addr = NULL; 1107 call_rcu(n, vhost_user_host_notifier_free, rcu); 1108 } 1109 } 1110 1111 static int vhost_user_set_vring_base(struct vhost_dev *dev, 1112 struct vhost_vring_state *ring) 1113 { 1114 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 1115 } 1116 1117 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 1118 { 1119 int i; 1120 1121 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1122 return -EINVAL; 1123 } 1124 1125 for (i = 0; i < dev->nvqs; ++i) { 1126 int ret; 1127 struct vhost_vring_state state = { 1128 .index = dev->vq_index + i, 1129 .num = enable, 1130 }; 1131 1132 ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 1133 if (ret < 0) { 1134 /* 1135 * Restoring the previous state is likely infeasible, as well as 1136 * proceeding regardless the error, so just bail out and hope for 1137 * the device-level recovery. 1138 */ 1139 return ret; 1140 } 1141 } 1142 1143 return 0; 1144 } 1145 1146 static VhostUserHostNotifier *fetch_notifier(VhostUserState *u, 1147 int idx) 1148 { 1149 if (idx >= u->notifiers->len) { 1150 return NULL; 1151 } 1152 return g_ptr_array_index(u->notifiers, idx); 1153 } 1154 1155 static int vhost_user_get_vring_base(struct vhost_dev *dev, 1156 struct vhost_vring_state *ring) 1157 { 1158 int ret; 1159 VhostUserMsg msg = { 1160 .hdr.request = VHOST_USER_GET_VRING_BASE, 1161 .hdr.flags = VHOST_USER_VERSION, 1162 .payload.state = *ring, 1163 .hdr.size = sizeof(msg.payload.state), 1164 }; 1165 struct vhost_user *u = dev->opaque; 1166 1167 VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index); 1168 if (n) { 1169 vhost_user_host_notifier_remove(n, dev->vdev); 1170 } 1171 1172 ret = vhost_user_write(dev, &msg, NULL, 0); 1173 if (ret < 0) { 1174 return ret; 1175 } 1176 1177 ret = vhost_user_read(dev, &msg); 1178 if (ret < 0) { 1179 return ret; 1180 } 1181 1182 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 1183 error_report("Received unexpected msg type. Expected %d received %d", 1184 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 1185 return -EPROTO; 1186 } 1187 1188 if (msg.hdr.size != sizeof(msg.payload.state)) { 1189 error_report("Received bad msg size."); 1190 return -EPROTO; 1191 } 1192 1193 *ring = msg.payload.state; 1194 1195 return 0; 1196 } 1197 1198 static int vhost_set_vring_file(struct vhost_dev *dev, 1199 VhostUserRequest request, 1200 struct vhost_vring_file *file) 1201 { 1202 int fds[VHOST_USER_MAX_RAM_SLOTS]; 1203 size_t fd_num = 0; 1204 VhostUserMsg msg = { 1205 .hdr.request = request, 1206 .hdr.flags = VHOST_USER_VERSION, 1207 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 1208 .hdr.size = sizeof(msg.payload.u64), 1209 }; 1210 1211 if (ioeventfd_enabled() && file->fd > 0) { 1212 fds[fd_num++] = file->fd; 1213 } else { 1214 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 1215 } 1216 1217 return vhost_user_write(dev, &msg, fds, fd_num); 1218 } 1219 1220 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 1221 struct vhost_vring_file *file) 1222 { 1223 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 1224 } 1225 1226 static int vhost_user_set_vring_call(struct vhost_dev *dev, 1227 struct vhost_vring_file *file) 1228 { 1229 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 1230 } 1231 1232 static int vhost_user_set_vring_err(struct vhost_dev *dev, 1233 struct vhost_vring_file *file) 1234 { 1235 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file); 1236 } 1237 1238 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 1239 { 1240 int ret; 1241 VhostUserMsg msg = { 1242 .hdr.request = request, 1243 .hdr.flags = VHOST_USER_VERSION, 1244 }; 1245 1246 if (vhost_user_one_time_request(request) && dev->vq_index != 0) { 1247 return 0; 1248 } 1249 1250 ret = vhost_user_write(dev, &msg, NULL, 0); 1251 if (ret < 0) { 1252 return ret; 1253 } 1254 1255 ret = vhost_user_read(dev, &msg); 1256 if (ret < 0) { 1257 return ret; 1258 } 1259 1260 if (msg.hdr.request != request) { 1261 error_report("Received unexpected msg type. Expected %d received %d", 1262 request, msg.hdr.request); 1263 return -EPROTO; 1264 } 1265 1266 if (msg.hdr.size != sizeof(msg.payload.u64)) { 1267 error_report("Received bad msg size."); 1268 return -EPROTO; 1269 } 1270 1271 *u64 = msg.payload.u64; 1272 1273 return 0; 1274 } 1275 1276 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 1277 { 1278 if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) { 1279 return -EPROTO; 1280 } 1281 1282 return 0; 1283 } 1284 1285 static int enforce_reply(struct vhost_dev *dev, 1286 const VhostUserMsg *msg) 1287 { 1288 uint64_t dummy; 1289 1290 if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1291 return process_message_reply(dev, msg); 1292 } 1293 1294 /* 1295 * We need to wait for a reply but the backend does not 1296 * support replies for the command we just sent. 1297 * Send VHOST_USER_GET_FEATURES which makes all backends 1298 * send a reply. 1299 */ 1300 return vhost_user_get_features(dev, &dummy); 1301 } 1302 1303 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 1304 struct vhost_vring_addr *addr) 1305 { 1306 int ret; 1307 VhostUserMsg msg = { 1308 .hdr.request = VHOST_USER_SET_VRING_ADDR, 1309 .hdr.flags = VHOST_USER_VERSION, 1310 .payload.addr = *addr, 1311 .hdr.size = sizeof(msg.payload.addr), 1312 }; 1313 1314 bool reply_supported = virtio_has_feature(dev->protocol_features, 1315 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1316 1317 /* 1318 * wait for a reply if logging is enabled to make sure 1319 * backend is actually logging changes 1320 */ 1321 bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); 1322 1323 if (reply_supported && wait_for_reply) { 1324 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1325 } 1326 1327 ret = vhost_user_write(dev, &msg, NULL, 0); 1328 if (ret < 0) { 1329 return ret; 1330 } 1331 1332 if (wait_for_reply) { 1333 return enforce_reply(dev, &msg); 1334 } 1335 1336 return 0; 1337 } 1338 1339 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64, 1340 bool wait_for_reply) 1341 { 1342 VhostUserMsg msg = { 1343 .hdr.request = request, 1344 .hdr.flags = VHOST_USER_VERSION, 1345 .payload.u64 = u64, 1346 .hdr.size = sizeof(msg.payload.u64), 1347 }; 1348 int ret; 1349 1350 if (wait_for_reply) { 1351 bool reply_supported = virtio_has_feature(dev->protocol_features, 1352 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1353 if (reply_supported) { 1354 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1355 } 1356 } 1357 1358 ret = vhost_user_write(dev, &msg, NULL, 0); 1359 if (ret < 0) { 1360 return ret; 1361 } 1362 1363 if (wait_for_reply) { 1364 return enforce_reply(dev, &msg); 1365 } 1366 1367 return 0; 1368 } 1369 1370 static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status) 1371 { 1372 return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false); 1373 } 1374 1375 static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status) 1376 { 1377 uint64_t value; 1378 int ret; 1379 1380 ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value); 1381 if (ret < 0) { 1382 return ret; 1383 } 1384 *status = value; 1385 1386 return 0; 1387 } 1388 1389 static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status) 1390 { 1391 uint8_t s; 1392 int ret; 1393 1394 ret = vhost_user_get_status(dev, &s); 1395 if (ret < 0) { 1396 return ret; 1397 } 1398 1399 if ((s & status) == status) { 1400 return 0; 1401 } 1402 s |= status; 1403 1404 return vhost_user_set_status(dev, s); 1405 } 1406 1407 static int vhost_user_set_features(struct vhost_dev *dev, 1408 uint64_t features) 1409 { 1410 /* 1411 * wait for a reply if logging is enabled to make sure 1412 * backend is actually logging changes 1413 */ 1414 bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); 1415 int ret; 1416 1417 /* 1418 * We need to include any extra backend only feature bits that 1419 * might be needed by our device. Currently this includes the 1420 * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol 1421 * features. 1422 */ 1423 ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, 1424 features | dev->backend_features, 1425 log_enabled); 1426 1427 if (virtio_has_feature(dev->protocol_features, 1428 VHOST_USER_PROTOCOL_F_STATUS)) { 1429 if (!ret) { 1430 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 1431 } 1432 } 1433 1434 return ret; 1435 } 1436 1437 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 1438 uint64_t features) 1439 { 1440 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features, 1441 false); 1442 } 1443 1444 static int vhost_user_set_owner(struct vhost_dev *dev) 1445 { 1446 VhostUserMsg msg = { 1447 .hdr.request = VHOST_USER_SET_OWNER, 1448 .hdr.flags = VHOST_USER_VERSION, 1449 }; 1450 1451 return vhost_user_write(dev, &msg, NULL, 0); 1452 } 1453 1454 static int vhost_user_get_max_memslots(struct vhost_dev *dev, 1455 uint64_t *max_memslots) 1456 { 1457 uint64_t backend_max_memslots; 1458 int err; 1459 1460 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS, 1461 &backend_max_memslots); 1462 if (err < 0) { 1463 return err; 1464 } 1465 1466 *max_memslots = backend_max_memslots; 1467 1468 return 0; 1469 } 1470 1471 static int vhost_user_reset_device(struct vhost_dev *dev) 1472 { 1473 VhostUserMsg msg = { 1474 .hdr.flags = VHOST_USER_VERSION, 1475 }; 1476 1477 msg.hdr.request = virtio_has_feature(dev->protocol_features, 1478 VHOST_USER_PROTOCOL_F_RESET_DEVICE) 1479 ? VHOST_USER_RESET_DEVICE 1480 : VHOST_USER_RESET_OWNER; 1481 1482 return vhost_user_write(dev, &msg, NULL, 0); 1483 } 1484 1485 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) 1486 { 1487 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1488 return -ENOSYS; 1489 } 1490 1491 return dev->config_ops->vhost_dev_config_notifier(dev); 1492 } 1493 1494 /* 1495 * Fetch or create the notifier for a given idx. Newly created 1496 * notifiers are added to the pointer array that tracks them. 1497 */ 1498 static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u, 1499 int idx) 1500 { 1501 VhostUserHostNotifier *n = NULL; 1502 if (idx >= u->notifiers->len) { 1503 g_ptr_array_set_size(u->notifiers, idx + 1); 1504 } 1505 1506 n = g_ptr_array_index(u->notifiers, idx); 1507 if (!n) { 1508 /* 1509 * In case notification arrive out-of-order, 1510 * make room for current index. 1511 */ 1512 g_ptr_array_remove_index(u->notifiers, idx); 1513 n = g_new0(VhostUserHostNotifier, 1); 1514 n->idx = idx; 1515 g_ptr_array_insert(u->notifiers, idx, n); 1516 trace_vhost_user_create_notifier(idx, n); 1517 } 1518 1519 return n; 1520 } 1521 1522 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, 1523 VhostUserVringArea *area, 1524 int fd) 1525 { 1526 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 1527 size_t page_size = qemu_real_host_page_size(); 1528 struct vhost_user *u = dev->opaque; 1529 VhostUserState *user = u->user; 1530 VirtIODevice *vdev = dev->vdev; 1531 VhostUserHostNotifier *n; 1532 void *addr; 1533 char *name; 1534 1535 if (!virtio_has_feature(dev->protocol_features, 1536 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 1537 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 1538 return -EINVAL; 1539 } 1540 1541 /* 1542 * Fetch notifier and invalidate any old data before setting up 1543 * new mapped address. 1544 */ 1545 n = fetch_or_create_notifier(user, queue_idx); 1546 vhost_user_host_notifier_remove(n, vdev); 1547 1548 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 1549 return 0; 1550 } 1551 1552 /* Sanity check. */ 1553 if (area->size != page_size) { 1554 return -EINVAL; 1555 } 1556 1557 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 1558 fd, area->offset); 1559 if (addr == MAP_FAILED) { 1560 return -EFAULT; 1561 } 1562 1563 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 1564 user, queue_idx); 1565 if (!n->mr.ram) { /* Don't init again after suspend. */ 1566 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 1567 page_size, addr); 1568 } else { 1569 n->mr.ram_block->host = addr; 1570 } 1571 g_free(name); 1572 1573 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 1574 object_unparent(OBJECT(&n->mr)); 1575 munmap(addr, page_size); 1576 return -ENXIO; 1577 } 1578 1579 n->addr = addr; 1580 1581 return 0; 1582 } 1583 1584 static void close_slave_channel(struct vhost_user *u) 1585 { 1586 g_source_destroy(u->slave_src); 1587 g_source_unref(u->slave_src); 1588 u->slave_src = NULL; 1589 object_unref(OBJECT(u->slave_ioc)); 1590 u->slave_ioc = NULL; 1591 } 1592 1593 static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, 1594 gpointer opaque) 1595 { 1596 struct vhost_dev *dev = opaque; 1597 struct vhost_user *u = dev->opaque; 1598 VhostUserHeader hdr = { 0, }; 1599 VhostUserPayload payload = { 0, }; 1600 Error *local_err = NULL; 1601 gboolean rc = G_SOURCE_CONTINUE; 1602 int ret = 0; 1603 struct iovec iov; 1604 g_autofree int *fd = NULL; 1605 size_t fdsize = 0; 1606 int i; 1607 1608 /* Read header */ 1609 iov.iov_base = &hdr; 1610 iov.iov_len = VHOST_USER_HDR_SIZE; 1611 1612 if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) { 1613 error_report_err(local_err); 1614 goto err; 1615 } 1616 1617 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 1618 error_report("Failed to read msg header." 1619 " Size %d exceeds the maximum %zu.", hdr.size, 1620 VHOST_USER_PAYLOAD_SIZE); 1621 goto err; 1622 } 1623 1624 /* Read payload */ 1625 if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) { 1626 error_report_err(local_err); 1627 goto err; 1628 } 1629 1630 switch (hdr.request) { 1631 case VHOST_USER_BACKEND_IOTLB_MSG: 1632 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1633 break; 1634 case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG: 1635 ret = vhost_user_slave_handle_config_change(dev); 1636 break; 1637 case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG: 1638 ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, 1639 fd ? fd[0] : -1); 1640 break; 1641 default: 1642 error_report("Received unexpected msg type: %d.", hdr.request); 1643 ret = -EINVAL; 1644 } 1645 1646 /* 1647 * REPLY_ACK feature handling. Other reply types has to be managed 1648 * directly in their request handlers. 1649 */ 1650 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1651 struct iovec iovec[2]; 1652 1653 1654 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 1655 hdr.flags |= VHOST_USER_REPLY_MASK; 1656 1657 payload.u64 = !!ret; 1658 hdr.size = sizeof(payload.u64); 1659 1660 iovec[0].iov_base = &hdr; 1661 iovec[0].iov_len = VHOST_USER_HDR_SIZE; 1662 iovec[1].iov_base = &payload; 1663 iovec[1].iov_len = hdr.size; 1664 1665 if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) { 1666 error_report_err(local_err); 1667 goto err; 1668 } 1669 } 1670 1671 goto fdcleanup; 1672 1673 err: 1674 close_slave_channel(u); 1675 rc = G_SOURCE_REMOVE; 1676 1677 fdcleanup: 1678 if (fd) { 1679 for (i = 0; i < fdsize; i++) { 1680 close(fd[i]); 1681 } 1682 } 1683 return rc; 1684 } 1685 1686 static int vhost_setup_slave_channel(struct vhost_dev *dev) 1687 { 1688 VhostUserMsg msg = { 1689 .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD, 1690 .hdr.flags = VHOST_USER_VERSION, 1691 }; 1692 struct vhost_user *u = dev->opaque; 1693 int sv[2], ret = 0; 1694 bool reply_supported = virtio_has_feature(dev->protocol_features, 1695 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1696 Error *local_err = NULL; 1697 QIOChannel *ioc; 1698 1699 if (!virtio_has_feature(dev->protocol_features, 1700 VHOST_USER_PROTOCOL_F_BACKEND_REQ)) { 1701 return 0; 1702 } 1703 1704 if (qemu_socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1705 int saved_errno = errno; 1706 error_report("socketpair() failed"); 1707 return -saved_errno; 1708 } 1709 1710 ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err)); 1711 if (!ioc) { 1712 error_report_err(local_err); 1713 return -ECONNREFUSED; 1714 } 1715 u->slave_ioc = ioc; 1716 u->slave_src = qio_channel_add_watch_source(u->slave_ioc, 1717 G_IO_IN | G_IO_HUP, 1718 slave_read, dev, NULL, NULL); 1719 1720 if (reply_supported) { 1721 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1722 } 1723 1724 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1725 if (ret) { 1726 goto out; 1727 } 1728 1729 if (reply_supported) { 1730 ret = process_message_reply(dev, &msg); 1731 } 1732 1733 out: 1734 close(sv[1]); 1735 if (ret) { 1736 close_slave_channel(u); 1737 } 1738 1739 return ret; 1740 } 1741 1742 #ifdef CONFIG_LINUX 1743 /* 1744 * Called back from the postcopy fault thread when a fault is received on our 1745 * ufd. 1746 * TODO: This is Linux specific 1747 */ 1748 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1749 void *ufd) 1750 { 1751 struct vhost_dev *dev = pcfd->data; 1752 struct vhost_user *u = dev->opaque; 1753 struct uffd_msg *msg = ufd; 1754 uint64_t faultaddr = msg->arg.pagefault.address; 1755 RAMBlock *rb = NULL; 1756 uint64_t rb_offset; 1757 int i; 1758 1759 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1760 dev->mem->nregions); 1761 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1762 trace_vhost_user_postcopy_fault_handler_loop(i, 1763 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1764 if (faultaddr >= u->postcopy_client_bases[i]) { 1765 /* Ofset of the fault address in the vhost region */ 1766 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1767 if (region_offset < dev->mem->regions[i].memory_size) { 1768 rb_offset = region_offset + u->region_rb_offset[i]; 1769 trace_vhost_user_postcopy_fault_handler_found(i, 1770 region_offset, rb_offset); 1771 rb = u->region_rb[i]; 1772 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1773 rb_offset); 1774 } 1775 } 1776 } 1777 error_report("%s: Failed to find region for fault %" PRIx64, 1778 __func__, faultaddr); 1779 return -1; 1780 } 1781 1782 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1783 uint64_t offset) 1784 { 1785 struct vhost_dev *dev = pcfd->data; 1786 struct vhost_user *u = dev->opaque; 1787 int i; 1788 1789 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1790 1791 if (!u) { 1792 return 0; 1793 } 1794 /* Translate the offset into an address in the clients address space */ 1795 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1796 if (u->region_rb[i] == rb && 1797 offset >= u->region_rb_offset[i] && 1798 offset < (u->region_rb_offset[i] + 1799 dev->mem->regions[i].memory_size)) { 1800 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1801 u->postcopy_client_bases[i]; 1802 trace_vhost_user_postcopy_waker_found(client_addr); 1803 return postcopy_wake_shared(pcfd, client_addr, rb); 1804 } 1805 } 1806 1807 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1808 return 0; 1809 } 1810 #endif 1811 1812 /* 1813 * Called at the start of an inbound postcopy on reception of the 1814 * 'advise' command. 1815 */ 1816 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 1817 { 1818 #ifdef CONFIG_LINUX 1819 struct vhost_user *u = dev->opaque; 1820 CharBackend *chr = u->user->chr; 1821 int ufd; 1822 int ret; 1823 VhostUserMsg msg = { 1824 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 1825 .hdr.flags = VHOST_USER_VERSION, 1826 }; 1827 1828 ret = vhost_user_write(dev, &msg, NULL, 0); 1829 if (ret < 0) { 1830 error_setg(errp, "Failed to send postcopy_advise to vhost"); 1831 return ret; 1832 } 1833 1834 ret = vhost_user_read(dev, &msg); 1835 if (ret < 0) { 1836 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 1837 return ret; 1838 } 1839 1840 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 1841 error_setg(errp, "Unexpected msg type. Expected %d received %d", 1842 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 1843 return -EPROTO; 1844 } 1845 1846 if (msg.hdr.size) { 1847 error_setg(errp, "Received bad msg size."); 1848 return -EPROTO; 1849 } 1850 ufd = qemu_chr_fe_get_msgfd(chr); 1851 if (ufd < 0) { 1852 error_setg(errp, "%s: Failed to get ufd", __func__); 1853 return -EIO; 1854 } 1855 qemu_socket_set_nonblock(ufd); 1856 1857 /* register ufd with userfault thread */ 1858 u->postcopy_fd.fd = ufd; 1859 u->postcopy_fd.data = dev; 1860 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 1861 u->postcopy_fd.waker = vhost_user_postcopy_waker; 1862 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 1863 postcopy_register_shared_ufd(&u->postcopy_fd); 1864 return 0; 1865 #else 1866 error_setg(errp, "Postcopy not supported on non-Linux systems"); 1867 return -ENOSYS; 1868 #endif 1869 } 1870 1871 /* 1872 * Called at the switch to postcopy on reception of the 'listen' command. 1873 */ 1874 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 1875 { 1876 struct vhost_user *u = dev->opaque; 1877 int ret; 1878 VhostUserMsg msg = { 1879 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 1880 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1881 }; 1882 u->postcopy_listen = true; 1883 1884 trace_vhost_user_postcopy_listen(); 1885 1886 ret = vhost_user_write(dev, &msg, NULL, 0); 1887 if (ret < 0) { 1888 error_setg(errp, "Failed to send postcopy_listen to vhost"); 1889 return ret; 1890 } 1891 1892 ret = process_message_reply(dev, &msg); 1893 if (ret) { 1894 error_setg(errp, "Failed to receive reply to postcopy_listen"); 1895 return ret; 1896 } 1897 1898 return 0; 1899 } 1900 1901 /* 1902 * Called at the end of postcopy 1903 */ 1904 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 1905 { 1906 VhostUserMsg msg = { 1907 .hdr.request = VHOST_USER_POSTCOPY_END, 1908 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1909 }; 1910 int ret; 1911 struct vhost_user *u = dev->opaque; 1912 1913 trace_vhost_user_postcopy_end_entry(); 1914 1915 ret = vhost_user_write(dev, &msg, NULL, 0); 1916 if (ret < 0) { 1917 error_setg(errp, "Failed to send postcopy_end to vhost"); 1918 return ret; 1919 } 1920 1921 ret = process_message_reply(dev, &msg); 1922 if (ret) { 1923 error_setg(errp, "Failed to receive reply to postcopy_end"); 1924 return ret; 1925 } 1926 postcopy_unregister_shared_ufd(&u->postcopy_fd); 1927 close(u->postcopy_fd.fd); 1928 u->postcopy_fd.handler = NULL; 1929 1930 trace_vhost_user_postcopy_end_exit(); 1931 1932 return 0; 1933 } 1934 1935 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 1936 void *opaque) 1937 { 1938 struct PostcopyNotifyData *pnd = opaque; 1939 struct vhost_user *u = container_of(notifier, struct vhost_user, 1940 postcopy_notifier); 1941 struct vhost_dev *dev = u->dev; 1942 1943 switch (pnd->reason) { 1944 case POSTCOPY_NOTIFY_PROBE: 1945 if (!virtio_has_feature(dev->protocol_features, 1946 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 1947 /* TODO: Get the device name into this error somehow */ 1948 error_setg(pnd->errp, 1949 "vhost-user backend not capable of postcopy"); 1950 return -ENOENT; 1951 } 1952 break; 1953 1954 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 1955 return vhost_user_postcopy_advise(dev, pnd->errp); 1956 1957 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 1958 return vhost_user_postcopy_listen(dev, pnd->errp); 1959 1960 case POSTCOPY_NOTIFY_INBOUND_END: 1961 return vhost_user_postcopy_end(dev, pnd->errp); 1962 1963 default: 1964 /* We ignore notifications we don't know */ 1965 break; 1966 } 1967 1968 return 0; 1969 } 1970 1971 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, 1972 Error **errp) 1973 { 1974 uint64_t features, ram_slots; 1975 struct vhost_user *u; 1976 VhostUserState *vus = (VhostUserState *) opaque; 1977 int err; 1978 1979 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1980 1981 u = g_new0(struct vhost_user, 1); 1982 u->user = vus; 1983 u->dev = dev; 1984 dev->opaque = u; 1985 1986 err = vhost_user_get_features(dev, &features); 1987 if (err < 0) { 1988 error_setg_errno(errp, -err, "vhost_backend_init failed"); 1989 return err; 1990 } 1991 1992 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1993 bool supports_f_config = vus->supports_config || 1994 (dev->config_ops && dev->config_ops->vhost_dev_config_notifier); 1995 uint64_t protocol_features; 1996 1997 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 1998 1999 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 2000 &protocol_features); 2001 if (err < 0) { 2002 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2003 return -EPROTO; 2004 } 2005 2006 /* 2007 * We will use all the protocol features we support - although 2008 * we suppress F_CONFIG if we know QEMUs internal code can not support 2009 * it. 2010 */ 2011 protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK; 2012 2013 if (supports_f_config) { 2014 if (!virtio_has_feature(protocol_features, 2015 VHOST_USER_PROTOCOL_F_CONFIG)) { 2016 error_setg(errp, "vhost-user device expecting " 2017 "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does " 2018 "not support it."); 2019 return -EPROTO; 2020 } 2021 } else { 2022 if (virtio_has_feature(protocol_features, 2023 VHOST_USER_PROTOCOL_F_CONFIG)) { 2024 warn_report("vhost-user backend supports " 2025 "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not."); 2026 protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 2027 } 2028 } 2029 2030 /* final set of protocol features */ 2031 dev->protocol_features = protocol_features; 2032 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 2033 if (err < 0) { 2034 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2035 return -EPROTO; 2036 } 2037 2038 /* query the max queues we support if backend supports Multiple Queue */ 2039 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 2040 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 2041 &dev->max_queues); 2042 if (err < 0) { 2043 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2044 return -EPROTO; 2045 } 2046 } else { 2047 dev->max_queues = 1; 2048 } 2049 2050 if (dev->num_queues && dev->max_queues < dev->num_queues) { 2051 error_setg(errp, "The maximum number of queues supported by the " 2052 "backend is %" PRIu64, dev->max_queues); 2053 return -EINVAL; 2054 } 2055 2056 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 2057 !(virtio_has_feature(dev->protocol_features, 2058 VHOST_USER_PROTOCOL_F_BACKEND_REQ) && 2059 virtio_has_feature(dev->protocol_features, 2060 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 2061 error_setg(errp, "IOMMU support requires reply-ack and " 2062 "slave-req protocol features."); 2063 return -EINVAL; 2064 } 2065 2066 /* get max memory regions if backend supports configurable RAM slots */ 2067 if (!virtio_has_feature(dev->protocol_features, 2068 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { 2069 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; 2070 } else { 2071 err = vhost_user_get_max_memslots(dev, &ram_slots); 2072 if (err < 0) { 2073 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2074 return -EPROTO; 2075 } 2076 2077 if (ram_slots < u->user->memory_slots) { 2078 error_setg(errp, "The backend specified a max ram slots limit " 2079 "of %" PRIu64", when the prior validated limit was " 2080 "%d. This limit should never decrease.", ram_slots, 2081 u->user->memory_slots); 2082 return -EINVAL; 2083 } 2084 2085 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); 2086 } 2087 } 2088 2089 if (dev->migration_blocker == NULL && 2090 !virtio_has_feature(dev->protocol_features, 2091 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 2092 error_setg(&dev->migration_blocker, 2093 "Migration disabled: vhost-user backend lacks " 2094 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 2095 } 2096 2097 if (dev->vq_index == 0) { 2098 err = vhost_setup_slave_channel(dev); 2099 if (err < 0) { 2100 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2101 return -EPROTO; 2102 } 2103 } 2104 2105 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 2106 postcopy_add_notifier(&u->postcopy_notifier); 2107 2108 return 0; 2109 } 2110 2111 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 2112 { 2113 struct vhost_user *u; 2114 2115 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2116 2117 u = dev->opaque; 2118 if (u->postcopy_notifier.notify) { 2119 postcopy_remove_notifier(&u->postcopy_notifier); 2120 u->postcopy_notifier.notify = NULL; 2121 } 2122 u->postcopy_listen = false; 2123 if (u->postcopy_fd.handler) { 2124 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2125 close(u->postcopy_fd.fd); 2126 u->postcopy_fd.handler = NULL; 2127 } 2128 if (u->slave_ioc) { 2129 close_slave_channel(u); 2130 } 2131 g_free(u->region_rb); 2132 u->region_rb = NULL; 2133 g_free(u->region_rb_offset); 2134 u->region_rb_offset = NULL; 2135 u->region_rb_len = 0; 2136 g_free(u); 2137 dev->opaque = 0; 2138 2139 return 0; 2140 } 2141 2142 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 2143 { 2144 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 2145 2146 return idx; 2147 } 2148 2149 static int vhost_user_memslots_limit(struct vhost_dev *dev) 2150 { 2151 struct vhost_user *u = dev->opaque; 2152 2153 return u->user->memory_slots; 2154 } 2155 2156 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 2157 { 2158 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2159 2160 return virtio_has_feature(dev->protocol_features, 2161 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 2162 } 2163 2164 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 2165 { 2166 VhostUserMsg msg = { }; 2167 2168 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2169 2170 /* If guest supports GUEST_ANNOUNCE do nothing */ 2171 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 2172 return 0; 2173 } 2174 2175 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 2176 if (virtio_has_feature(dev->protocol_features, 2177 VHOST_USER_PROTOCOL_F_RARP)) { 2178 msg.hdr.request = VHOST_USER_SEND_RARP; 2179 msg.hdr.flags = VHOST_USER_VERSION; 2180 memcpy((char *)&msg.payload.u64, mac_addr, 6); 2181 msg.hdr.size = sizeof(msg.payload.u64); 2182 2183 return vhost_user_write(dev, &msg, NULL, 0); 2184 } 2185 return -ENOTSUP; 2186 } 2187 2188 static bool vhost_user_can_merge(struct vhost_dev *dev, 2189 uint64_t start1, uint64_t size1, 2190 uint64_t start2, uint64_t size2) 2191 { 2192 ram_addr_t offset; 2193 int mfd, rfd; 2194 2195 (void)vhost_user_get_mr_data(start1, &offset, &mfd); 2196 (void)vhost_user_get_mr_data(start2, &offset, &rfd); 2197 2198 return mfd == rfd; 2199 } 2200 2201 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 2202 { 2203 VhostUserMsg msg; 2204 bool reply_supported = virtio_has_feature(dev->protocol_features, 2205 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2206 int ret; 2207 2208 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 2209 return 0; 2210 } 2211 2212 msg.hdr.request = VHOST_USER_NET_SET_MTU; 2213 msg.payload.u64 = mtu; 2214 msg.hdr.size = sizeof(msg.payload.u64); 2215 msg.hdr.flags = VHOST_USER_VERSION; 2216 if (reply_supported) { 2217 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2218 } 2219 2220 ret = vhost_user_write(dev, &msg, NULL, 0); 2221 if (ret < 0) { 2222 return ret; 2223 } 2224 2225 /* If reply_ack supported, slave has to ack specified MTU is valid */ 2226 if (reply_supported) { 2227 return process_message_reply(dev, &msg); 2228 } 2229 2230 return 0; 2231 } 2232 2233 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 2234 struct vhost_iotlb_msg *imsg) 2235 { 2236 int ret; 2237 VhostUserMsg msg = { 2238 .hdr.request = VHOST_USER_IOTLB_MSG, 2239 .hdr.size = sizeof(msg.payload.iotlb), 2240 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2241 .payload.iotlb = *imsg, 2242 }; 2243 2244 ret = vhost_user_write(dev, &msg, NULL, 0); 2245 if (ret < 0) { 2246 return ret; 2247 } 2248 2249 return process_message_reply(dev, &msg); 2250 } 2251 2252 2253 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 2254 { 2255 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 2256 } 2257 2258 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 2259 uint32_t config_len, Error **errp) 2260 { 2261 int ret; 2262 VhostUserMsg msg = { 2263 .hdr.request = VHOST_USER_GET_CONFIG, 2264 .hdr.flags = VHOST_USER_VERSION, 2265 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 2266 }; 2267 2268 if (!virtio_has_feature(dev->protocol_features, 2269 VHOST_USER_PROTOCOL_F_CONFIG)) { 2270 error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported"); 2271 return -EINVAL; 2272 } 2273 2274 assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE); 2275 2276 msg.payload.config.offset = 0; 2277 msg.payload.config.size = config_len; 2278 ret = vhost_user_write(dev, &msg, NULL, 0); 2279 if (ret < 0) { 2280 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2281 return ret; 2282 } 2283 2284 ret = vhost_user_read(dev, &msg); 2285 if (ret < 0) { 2286 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2287 return ret; 2288 } 2289 2290 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 2291 error_setg(errp, 2292 "Received unexpected msg type. Expected %d received %d", 2293 VHOST_USER_GET_CONFIG, msg.hdr.request); 2294 return -EPROTO; 2295 } 2296 2297 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 2298 error_setg(errp, "Received bad msg size."); 2299 return -EPROTO; 2300 } 2301 2302 memcpy(config, msg.payload.config.region, config_len); 2303 2304 return 0; 2305 } 2306 2307 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 2308 uint32_t offset, uint32_t size, uint32_t flags) 2309 { 2310 int ret; 2311 uint8_t *p; 2312 bool reply_supported = virtio_has_feature(dev->protocol_features, 2313 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2314 2315 VhostUserMsg msg = { 2316 .hdr.request = VHOST_USER_SET_CONFIG, 2317 .hdr.flags = VHOST_USER_VERSION, 2318 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 2319 }; 2320 2321 if (!virtio_has_feature(dev->protocol_features, 2322 VHOST_USER_PROTOCOL_F_CONFIG)) { 2323 return -ENOTSUP; 2324 } 2325 2326 if (reply_supported) { 2327 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2328 } 2329 2330 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 2331 return -EINVAL; 2332 } 2333 2334 msg.payload.config.offset = offset, 2335 msg.payload.config.size = size, 2336 msg.payload.config.flags = flags, 2337 p = msg.payload.config.region; 2338 memcpy(p, data, size); 2339 2340 ret = vhost_user_write(dev, &msg, NULL, 0); 2341 if (ret < 0) { 2342 return ret; 2343 } 2344 2345 if (reply_supported) { 2346 return process_message_reply(dev, &msg); 2347 } 2348 2349 return 0; 2350 } 2351 2352 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 2353 void *session_info, 2354 uint64_t *session_id) 2355 { 2356 int ret; 2357 bool crypto_session = virtio_has_feature(dev->protocol_features, 2358 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2359 CryptoDevBackendSymSessionInfo *sess_info = session_info; 2360 VhostUserMsg msg = { 2361 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 2362 .hdr.flags = VHOST_USER_VERSION, 2363 .hdr.size = sizeof(msg.payload.session), 2364 }; 2365 2366 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2367 2368 if (!crypto_session) { 2369 error_report("vhost-user trying to send unhandled ioctl"); 2370 return -ENOTSUP; 2371 } 2372 2373 memcpy(&msg.payload.session.session_setup_data, sess_info, 2374 sizeof(CryptoDevBackendSymSessionInfo)); 2375 if (sess_info->key_len) { 2376 memcpy(&msg.payload.session.key, sess_info->cipher_key, 2377 sess_info->key_len); 2378 } 2379 if (sess_info->auth_key_len > 0) { 2380 memcpy(&msg.payload.session.auth_key, sess_info->auth_key, 2381 sess_info->auth_key_len); 2382 } 2383 ret = vhost_user_write(dev, &msg, NULL, 0); 2384 if (ret < 0) { 2385 error_report("vhost_user_write() return %d, create session failed", 2386 ret); 2387 return ret; 2388 } 2389 2390 ret = vhost_user_read(dev, &msg); 2391 if (ret < 0) { 2392 error_report("vhost_user_read() return %d, create session failed", 2393 ret); 2394 return ret; 2395 } 2396 2397 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 2398 error_report("Received unexpected msg type. Expected %d received %d", 2399 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 2400 return -EPROTO; 2401 } 2402 2403 if (msg.hdr.size != sizeof(msg.payload.session)) { 2404 error_report("Received bad msg size."); 2405 return -EPROTO; 2406 } 2407 2408 if (msg.payload.session.session_id < 0) { 2409 error_report("Bad session id: %" PRId64 "", 2410 msg.payload.session.session_id); 2411 return -EINVAL; 2412 } 2413 *session_id = msg.payload.session.session_id; 2414 2415 return 0; 2416 } 2417 2418 static int 2419 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 2420 { 2421 int ret; 2422 bool crypto_session = virtio_has_feature(dev->protocol_features, 2423 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2424 VhostUserMsg msg = { 2425 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 2426 .hdr.flags = VHOST_USER_VERSION, 2427 .hdr.size = sizeof(msg.payload.u64), 2428 }; 2429 msg.payload.u64 = session_id; 2430 2431 if (!crypto_session) { 2432 error_report("vhost-user trying to send unhandled ioctl"); 2433 return -ENOTSUP; 2434 } 2435 2436 ret = vhost_user_write(dev, &msg, NULL, 0); 2437 if (ret < 0) { 2438 error_report("vhost_user_write() return %d, close session failed", 2439 ret); 2440 return ret; 2441 } 2442 2443 return 0; 2444 } 2445 2446 static bool vhost_user_mem_section_filter(struct vhost_dev *dev, 2447 MemoryRegionSection *section) 2448 { 2449 return memory_region_get_fd(section->mr) >= 0; 2450 } 2451 2452 static int vhost_user_get_inflight_fd(struct vhost_dev *dev, 2453 uint16_t queue_size, 2454 struct vhost_inflight *inflight) 2455 { 2456 void *addr; 2457 int fd; 2458 int ret; 2459 struct vhost_user *u = dev->opaque; 2460 CharBackend *chr = u->user->chr; 2461 VhostUserMsg msg = { 2462 .hdr.request = VHOST_USER_GET_INFLIGHT_FD, 2463 .hdr.flags = VHOST_USER_VERSION, 2464 .payload.inflight.num_queues = dev->nvqs, 2465 .payload.inflight.queue_size = queue_size, 2466 .hdr.size = sizeof(msg.payload.inflight), 2467 }; 2468 2469 if (!virtio_has_feature(dev->protocol_features, 2470 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2471 return 0; 2472 } 2473 2474 ret = vhost_user_write(dev, &msg, NULL, 0); 2475 if (ret < 0) { 2476 return ret; 2477 } 2478 2479 ret = vhost_user_read(dev, &msg); 2480 if (ret < 0) { 2481 return ret; 2482 } 2483 2484 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) { 2485 error_report("Received unexpected msg type. " 2486 "Expected %d received %d", 2487 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request); 2488 return -EPROTO; 2489 } 2490 2491 if (msg.hdr.size != sizeof(msg.payload.inflight)) { 2492 error_report("Received bad msg size."); 2493 return -EPROTO; 2494 } 2495 2496 if (!msg.payload.inflight.mmap_size) { 2497 return 0; 2498 } 2499 2500 fd = qemu_chr_fe_get_msgfd(chr); 2501 if (fd < 0) { 2502 error_report("Failed to get mem fd"); 2503 return -EIO; 2504 } 2505 2506 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE, 2507 MAP_SHARED, fd, msg.payload.inflight.mmap_offset); 2508 2509 if (addr == MAP_FAILED) { 2510 error_report("Failed to mmap mem fd"); 2511 close(fd); 2512 return -EFAULT; 2513 } 2514 2515 inflight->addr = addr; 2516 inflight->fd = fd; 2517 inflight->size = msg.payload.inflight.mmap_size; 2518 inflight->offset = msg.payload.inflight.mmap_offset; 2519 inflight->queue_size = queue_size; 2520 2521 return 0; 2522 } 2523 2524 static int vhost_user_set_inflight_fd(struct vhost_dev *dev, 2525 struct vhost_inflight *inflight) 2526 { 2527 VhostUserMsg msg = { 2528 .hdr.request = VHOST_USER_SET_INFLIGHT_FD, 2529 .hdr.flags = VHOST_USER_VERSION, 2530 .payload.inflight.mmap_size = inflight->size, 2531 .payload.inflight.mmap_offset = inflight->offset, 2532 .payload.inflight.num_queues = dev->nvqs, 2533 .payload.inflight.queue_size = inflight->queue_size, 2534 .hdr.size = sizeof(msg.payload.inflight), 2535 }; 2536 2537 if (!virtio_has_feature(dev->protocol_features, 2538 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2539 return 0; 2540 } 2541 2542 return vhost_user_write(dev, &msg, &inflight->fd, 1); 2543 } 2544 2545 static void vhost_user_state_destroy(gpointer data) 2546 { 2547 VhostUserHostNotifier *n = (VhostUserHostNotifier *) data; 2548 if (n) { 2549 vhost_user_host_notifier_remove(n, NULL); 2550 object_unparent(OBJECT(&n->mr)); 2551 /* 2552 * We can't free until vhost_user_host_notifier_remove has 2553 * done it's thing so schedule the free with RCU. 2554 */ 2555 g_free_rcu(n, rcu); 2556 } 2557 } 2558 2559 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) 2560 { 2561 if (user->chr) { 2562 error_setg(errp, "Cannot initialize vhost-user state"); 2563 return false; 2564 } 2565 user->chr = chr; 2566 user->memory_slots = 0; 2567 user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4, 2568 &vhost_user_state_destroy); 2569 return true; 2570 } 2571 2572 void vhost_user_cleanup(VhostUserState *user) 2573 { 2574 if (!user->chr) { 2575 return; 2576 } 2577 memory_region_transaction_begin(); 2578 user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true); 2579 memory_region_transaction_commit(); 2580 user->chr = NULL; 2581 } 2582 2583 2584 typedef struct { 2585 vu_async_close_fn cb; 2586 DeviceState *dev; 2587 CharBackend *cd; 2588 struct vhost_dev *vhost; 2589 } VhostAsyncCallback; 2590 2591 static void vhost_user_async_close_bh(void *opaque) 2592 { 2593 VhostAsyncCallback *data = opaque; 2594 struct vhost_dev *vhost = data->vhost; 2595 2596 /* 2597 * If the vhost_dev has been cleared in the meantime there is 2598 * nothing left to do as some other path has completed the 2599 * cleanup. 2600 */ 2601 if (vhost->vdev) { 2602 data->cb(data->dev); 2603 } 2604 2605 g_free(data); 2606 } 2607 2608 /* 2609 * We only schedule the work if the machine is running. If suspended 2610 * we want to keep all the in-flight data as is for migration 2611 * purposes. 2612 */ 2613 void vhost_user_async_close(DeviceState *d, 2614 CharBackend *chardev, struct vhost_dev *vhost, 2615 vu_async_close_fn cb) 2616 { 2617 if (!runstate_check(RUN_STATE_SHUTDOWN)) { 2618 /* 2619 * A close event may happen during a read/write, but vhost 2620 * code assumes the vhost_dev remains setup, so delay the 2621 * stop & clear. 2622 */ 2623 AioContext *ctx = qemu_get_current_aio_context(); 2624 VhostAsyncCallback *data = g_new0(VhostAsyncCallback, 1); 2625 2626 /* Save data for the callback */ 2627 data->cb = cb; 2628 data->dev = d; 2629 data->cd = chardev; 2630 data->vhost = vhost; 2631 2632 /* Disable any further notifications on the chardev */ 2633 qemu_chr_fe_set_handlers(chardev, 2634 NULL, NULL, NULL, NULL, NULL, NULL, 2635 false); 2636 2637 aio_bh_schedule_oneshot(ctx, vhost_user_async_close_bh, data); 2638 2639 /* 2640 * Move vhost device to the stopped state. The vhost-user device 2641 * will be clean up and disconnected in BH. This can be useful in 2642 * the vhost migration code. If disconnect was caught there is an 2643 * option for the general vhost code to get the dev state without 2644 * knowing its type (in this case vhost-user). 2645 * 2646 * Note if the vhost device is fully cleared by the time we 2647 * execute the bottom half we won't continue with the cleanup. 2648 */ 2649 vhost->started = false; 2650 } 2651 } 2652 2653 static int vhost_user_dev_start(struct vhost_dev *dev, bool started) 2654 { 2655 if (!virtio_has_feature(dev->protocol_features, 2656 VHOST_USER_PROTOCOL_F_STATUS)) { 2657 return 0; 2658 } 2659 2660 /* Set device status only for last queue pair */ 2661 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 2662 return 0; 2663 } 2664 2665 if (started) { 2666 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 2667 VIRTIO_CONFIG_S_DRIVER | 2668 VIRTIO_CONFIG_S_DRIVER_OK); 2669 } else { 2670 return 0; 2671 } 2672 } 2673 2674 static void vhost_user_reset_status(struct vhost_dev *dev) 2675 { 2676 /* Set device status only for last queue pair */ 2677 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 2678 return; 2679 } 2680 2681 if (virtio_has_feature(dev->protocol_features, 2682 VHOST_USER_PROTOCOL_F_STATUS)) { 2683 vhost_user_set_status(dev, 0); 2684 } 2685 } 2686 2687 const VhostOps user_ops = { 2688 .backend_type = VHOST_BACKEND_TYPE_USER, 2689 .vhost_backend_init = vhost_user_backend_init, 2690 .vhost_backend_cleanup = vhost_user_backend_cleanup, 2691 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 2692 .vhost_set_log_base = vhost_user_set_log_base, 2693 .vhost_set_mem_table = vhost_user_set_mem_table, 2694 .vhost_set_vring_addr = vhost_user_set_vring_addr, 2695 .vhost_set_vring_endian = vhost_user_set_vring_endian, 2696 .vhost_set_vring_num = vhost_user_set_vring_num, 2697 .vhost_set_vring_base = vhost_user_set_vring_base, 2698 .vhost_get_vring_base = vhost_user_get_vring_base, 2699 .vhost_set_vring_kick = vhost_user_set_vring_kick, 2700 .vhost_set_vring_call = vhost_user_set_vring_call, 2701 .vhost_set_vring_err = vhost_user_set_vring_err, 2702 .vhost_set_features = vhost_user_set_features, 2703 .vhost_get_features = vhost_user_get_features, 2704 .vhost_set_owner = vhost_user_set_owner, 2705 .vhost_reset_device = vhost_user_reset_device, 2706 .vhost_get_vq_index = vhost_user_get_vq_index, 2707 .vhost_set_vring_enable = vhost_user_set_vring_enable, 2708 .vhost_requires_shm_log = vhost_user_requires_shm_log, 2709 .vhost_migration_done = vhost_user_migration_done, 2710 .vhost_backend_can_merge = vhost_user_can_merge, 2711 .vhost_net_set_mtu = vhost_user_net_set_mtu, 2712 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 2713 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 2714 .vhost_get_config = vhost_user_get_config, 2715 .vhost_set_config = vhost_user_set_config, 2716 .vhost_crypto_create_session = vhost_user_crypto_create_session, 2717 .vhost_crypto_close_session = vhost_user_crypto_close_session, 2718 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, 2719 .vhost_get_inflight_fd = vhost_user_get_inflight_fd, 2720 .vhost_set_inflight_fd = vhost_user_set_inflight_fd, 2721 .vhost_dev_start = vhost_user_dev_start, 2722 .vhost_reset_status = vhost_user_reset_status, 2723 }; 2724