1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/vhost.h" 14 #include "hw/virtio/vhost-user.h" 15 #include "hw/virtio/vhost-backend.h" 16 #include "hw/virtio/virtio.h" 17 #include "hw/virtio/virtio-net.h" 18 #include "chardev/char-fe.h" 19 #include "sysemu/kvm.h" 20 #include "qemu/error-report.h" 21 #include "qemu/main-loop.h" 22 #include "qemu/sockets.h" 23 #include "sysemu/cryptodev.h" 24 #include "migration/migration.h" 25 #include "migration/postcopy-ram.h" 26 #include "trace.h" 27 28 #include <sys/ioctl.h> 29 #include <sys/socket.h> 30 #include <sys/un.h> 31 32 #include "standard-headers/linux/vhost_types.h" 33 34 #ifdef CONFIG_LINUX 35 #include <linux/userfaultfd.h> 36 #endif 37 38 #define VHOST_MEMORY_MAX_NREGIONS 8 39 #define VHOST_USER_F_PROTOCOL_FEATURES 30 40 #define VHOST_USER_SLAVE_MAX_FDS 8 41 42 /* 43 * Maximum size of virtio device config space 44 */ 45 #define VHOST_USER_MAX_CONFIG_SIZE 256 46 47 enum VhostUserProtocolFeature { 48 VHOST_USER_PROTOCOL_F_MQ = 0, 49 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 50 VHOST_USER_PROTOCOL_F_RARP = 2, 51 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, 52 VHOST_USER_PROTOCOL_F_NET_MTU = 4, 53 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, 54 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, 55 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, 56 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, 57 VHOST_USER_PROTOCOL_F_CONFIG = 9, 58 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, 59 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, 60 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, 61 VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, 62 VHOST_USER_PROTOCOL_F_MAX 63 }; 64 65 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 66 67 typedef enum VhostUserRequest { 68 VHOST_USER_NONE = 0, 69 VHOST_USER_GET_FEATURES = 1, 70 VHOST_USER_SET_FEATURES = 2, 71 VHOST_USER_SET_OWNER = 3, 72 VHOST_USER_RESET_OWNER = 4, 73 VHOST_USER_SET_MEM_TABLE = 5, 74 VHOST_USER_SET_LOG_BASE = 6, 75 VHOST_USER_SET_LOG_FD = 7, 76 VHOST_USER_SET_VRING_NUM = 8, 77 VHOST_USER_SET_VRING_ADDR = 9, 78 VHOST_USER_SET_VRING_BASE = 10, 79 VHOST_USER_GET_VRING_BASE = 11, 80 VHOST_USER_SET_VRING_KICK = 12, 81 VHOST_USER_SET_VRING_CALL = 13, 82 VHOST_USER_SET_VRING_ERR = 14, 83 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 84 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 85 VHOST_USER_GET_QUEUE_NUM = 17, 86 VHOST_USER_SET_VRING_ENABLE = 18, 87 VHOST_USER_SEND_RARP = 19, 88 VHOST_USER_NET_SET_MTU = 20, 89 VHOST_USER_SET_SLAVE_REQ_FD = 21, 90 VHOST_USER_IOTLB_MSG = 22, 91 VHOST_USER_SET_VRING_ENDIAN = 23, 92 VHOST_USER_GET_CONFIG = 24, 93 VHOST_USER_SET_CONFIG = 25, 94 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 95 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 96 VHOST_USER_POSTCOPY_ADVISE = 28, 97 VHOST_USER_POSTCOPY_LISTEN = 29, 98 VHOST_USER_POSTCOPY_END = 30, 99 VHOST_USER_GET_INFLIGHT_FD = 31, 100 VHOST_USER_SET_INFLIGHT_FD = 32, 101 VHOST_USER_GPU_SET_SOCKET = 33, 102 VHOST_USER_RESET_DEVICE = 34, 103 VHOST_USER_MAX 104 } VhostUserRequest; 105 106 typedef enum VhostUserSlaveRequest { 107 VHOST_USER_SLAVE_NONE = 0, 108 VHOST_USER_SLAVE_IOTLB_MSG = 1, 109 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, 110 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, 111 VHOST_USER_SLAVE_MAX 112 } VhostUserSlaveRequest; 113 114 typedef struct VhostUserMemoryRegion { 115 uint64_t guest_phys_addr; 116 uint64_t memory_size; 117 uint64_t userspace_addr; 118 uint64_t mmap_offset; 119 } VhostUserMemoryRegion; 120 121 typedef struct VhostUserMemory { 122 uint32_t nregions; 123 uint32_t padding; 124 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; 125 } VhostUserMemory; 126 127 typedef struct VhostUserLog { 128 uint64_t mmap_size; 129 uint64_t mmap_offset; 130 } VhostUserLog; 131 132 typedef struct VhostUserConfig { 133 uint32_t offset; 134 uint32_t size; 135 uint32_t flags; 136 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 137 } VhostUserConfig; 138 139 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 140 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 141 142 typedef struct VhostUserCryptoSession { 143 /* session id for success, -1 on errors */ 144 int64_t session_id; 145 CryptoDevBackendSymSessionInfo session_setup_data; 146 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 147 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 148 } VhostUserCryptoSession; 149 150 static VhostUserConfig c __attribute__ ((unused)); 151 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 152 + sizeof(c.size) \ 153 + sizeof(c.flags)) 154 155 typedef struct VhostUserVringArea { 156 uint64_t u64; 157 uint64_t size; 158 uint64_t offset; 159 } VhostUserVringArea; 160 161 typedef struct VhostUserInflight { 162 uint64_t mmap_size; 163 uint64_t mmap_offset; 164 uint16_t num_queues; 165 uint16_t queue_size; 166 } VhostUserInflight; 167 168 typedef struct { 169 VhostUserRequest request; 170 171 #define VHOST_USER_VERSION_MASK (0x3) 172 #define VHOST_USER_REPLY_MASK (0x1<<2) 173 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 174 uint32_t flags; 175 uint32_t size; /* the following payload size */ 176 } QEMU_PACKED VhostUserHeader; 177 178 typedef union { 179 #define VHOST_USER_VRING_IDX_MASK (0xff) 180 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) 181 uint64_t u64; 182 struct vhost_vring_state state; 183 struct vhost_vring_addr addr; 184 VhostUserMemory memory; 185 VhostUserLog log; 186 struct vhost_iotlb_msg iotlb; 187 VhostUserConfig config; 188 VhostUserCryptoSession session; 189 VhostUserVringArea area; 190 VhostUserInflight inflight; 191 } VhostUserPayload; 192 193 typedef struct VhostUserMsg { 194 VhostUserHeader hdr; 195 VhostUserPayload payload; 196 } QEMU_PACKED VhostUserMsg; 197 198 static VhostUserMsg m __attribute__ ((unused)); 199 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 200 201 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 202 203 /* The version of the protocol we support */ 204 #define VHOST_USER_VERSION (0x1) 205 206 struct vhost_user { 207 struct vhost_dev *dev; 208 /* Shared between vhost devs of the same virtio device */ 209 VhostUserState *user; 210 int slave_fd; 211 NotifierWithReturn postcopy_notifier; 212 struct PostCopyFD postcopy_fd; 213 uint64_t postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS]; 214 /* Length of the region_rb and region_rb_offset arrays */ 215 size_t region_rb_len; 216 /* RAMBlock associated with a given region */ 217 RAMBlock **region_rb; 218 /* The offset from the start of the RAMBlock to the start of the 219 * vhost region. 220 */ 221 ram_addr_t *region_rb_offset; 222 223 /* True once we've entered postcopy_listen */ 224 bool postcopy_listen; 225 }; 226 227 static bool ioeventfd_enabled(void) 228 { 229 return !kvm_enabled() || kvm_eventfds_enabled(); 230 } 231 232 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) 233 { 234 struct vhost_user *u = dev->opaque; 235 CharBackend *chr = u->user->chr; 236 uint8_t *p = (uint8_t *) msg; 237 int r, size = VHOST_USER_HDR_SIZE; 238 239 r = qemu_chr_fe_read_all(chr, p, size); 240 if (r != size) { 241 error_report("Failed to read msg header. Read %d instead of %d." 242 " Original request %d.", r, size, msg->hdr.request); 243 return -1; 244 } 245 246 /* validate received flags */ 247 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 248 error_report("Failed to read msg header." 249 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 250 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 251 return -1; 252 } 253 254 return 0; 255 } 256 257 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 258 { 259 struct vhost_user *u = dev->opaque; 260 CharBackend *chr = u->user->chr; 261 uint8_t *p = (uint8_t *) msg; 262 int r, size; 263 264 if (vhost_user_read_header(dev, msg) < 0) { 265 return -1; 266 } 267 268 /* validate message size is sane */ 269 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 270 error_report("Failed to read msg header." 271 " Size %d exceeds the maximum %zu.", msg->hdr.size, 272 VHOST_USER_PAYLOAD_SIZE); 273 return -1; 274 } 275 276 if (msg->hdr.size) { 277 p += VHOST_USER_HDR_SIZE; 278 size = msg->hdr.size; 279 r = qemu_chr_fe_read_all(chr, p, size); 280 if (r != size) { 281 error_report("Failed to read msg payload." 282 " Read %d instead of %d.", r, msg->hdr.size); 283 return -1; 284 } 285 } 286 287 return 0; 288 } 289 290 static int process_message_reply(struct vhost_dev *dev, 291 const VhostUserMsg *msg) 292 { 293 VhostUserMsg msg_reply; 294 295 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 296 return 0; 297 } 298 299 if (vhost_user_read(dev, &msg_reply) < 0) { 300 return -1; 301 } 302 303 if (msg_reply.hdr.request != msg->hdr.request) { 304 error_report("Received unexpected msg type." 305 "Expected %d received %d", 306 msg->hdr.request, msg_reply.hdr.request); 307 return -1; 308 } 309 310 return msg_reply.payload.u64 ? -1 : 0; 311 } 312 313 static bool vhost_user_one_time_request(VhostUserRequest request) 314 { 315 switch (request) { 316 case VHOST_USER_SET_OWNER: 317 case VHOST_USER_RESET_OWNER: 318 case VHOST_USER_SET_MEM_TABLE: 319 case VHOST_USER_GET_QUEUE_NUM: 320 case VHOST_USER_NET_SET_MTU: 321 return true; 322 default: 323 return false; 324 } 325 } 326 327 /* most non-init callers ignore the error */ 328 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 329 int *fds, int fd_num) 330 { 331 struct vhost_user *u = dev->opaque; 332 CharBackend *chr = u->user->chr; 333 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 334 335 /* 336 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 337 * we just need send it once in the first time. For later such 338 * request, we just ignore it. 339 */ 340 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) { 341 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 342 return 0; 343 } 344 345 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 346 error_report("Failed to set msg fds."); 347 return -1; 348 } 349 350 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 351 if (ret != size) { 352 error_report("Failed to write msg." 353 " Wrote %d instead of %d.", ret, size); 354 return -1; 355 } 356 357 return 0; 358 } 359 360 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd) 361 { 362 VhostUserMsg msg = { 363 .hdr.request = VHOST_USER_GPU_SET_SOCKET, 364 .hdr.flags = VHOST_USER_VERSION, 365 }; 366 367 return vhost_user_write(dev, &msg, &fd, 1); 368 } 369 370 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 371 struct vhost_log *log) 372 { 373 int fds[VHOST_MEMORY_MAX_NREGIONS]; 374 size_t fd_num = 0; 375 bool shmfd = virtio_has_feature(dev->protocol_features, 376 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 377 VhostUserMsg msg = { 378 .hdr.request = VHOST_USER_SET_LOG_BASE, 379 .hdr.flags = VHOST_USER_VERSION, 380 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 381 .payload.log.mmap_offset = 0, 382 .hdr.size = sizeof(msg.payload.log), 383 }; 384 385 if (shmfd && log->fd != -1) { 386 fds[fd_num++] = log->fd; 387 } 388 389 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 390 return -1; 391 } 392 393 if (shmfd) { 394 msg.hdr.size = 0; 395 if (vhost_user_read(dev, &msg) < 0) { 396 return -1; 397 } 398 399 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 400 error_report("Received unexpected msg type. " 401 "Expected %d received %d", 402 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 403 return -1; 404 } 405 } 406 407 return 0; 408 } 409 410 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, 411 struct vhost_dev *dev, 412 VhostUserMsg *msg, 413 int *fds, size_t *fd_num, 414 bool track_ramblocks) 415 { 416 int i, fd; 417 ram_addr_t offset; 418 MemoryRegion *mr; 419 struct vhost_memory_region *reg; 420 421 msg->hdr.request = VHOST_USER_SET_MEM_TABLE; 422 423 for (i = 0; i < dev->mem->nregions; ++i) { 424 reg = dev->mem->regions + i; 425 426 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); 427 mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr, 428 &offset); 429 fd = memory_region_get_fd(mr); 430 if (fd > 0) { 431 if (track_ramblocks) { 432 assert(*fd_num < VHOST_MEMORY_MAX_NREGIONS); 433 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name, 434 reg->memory_size, 435 reg->guest_phys_addr, 436 reg->userspace_addr, 437 offset); 438 u->region_rb_offset[i] = offset; 439 u->region_rb[i] = mr->ram_block; 440 } else if (*fd_num == VHOST_MEMORY_MAX_NREGIONS) { 441 error_report("Failed preparing vhost-user memory table msg"); 442 return -1; 443 } 444 msg->payload.memory.regions[*fd_num].userspace_addr = 445 reg->userspace_addr; 446 msg->payload.memory.regions[*fd_num].memory_size = 447 reg->memory_size; 448 msg->payload.memory.regions[*fd_num].guest_phys_addr = 449 reg->guest_phys_addr; 450 msg->payload.memory.regions[*fd_num].mmap_offset = offset; 451 fds[(*fd_num)++] = fd; 452 } else if (track_ramblocks) { 453 u->region_rb_offset[i] = 0; 454 u->region_rb[i] = NULL; 455 } 456 } 457 458 msg->payload.memory.nregions = *fd_num; 459 460 if (!*fd_num) { 461 error_report("Failed initializing vhost-user memory map, " 462 "consider using -object memory-backend-file share=on"); 463 return -1; 464 } 465 466 msg->hdr.size = sizeof(msg->payload.memory.nregions); 467 msg->hdr.size += sizeof(msg->payload.memory.padding); 468 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion); 469 470 return 1; 471 } 472 473 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 474 struct vhost_memory *mem) 475 { 476 struct vhost_user *u = dev->opaque; 477 int fds[VHOST_MEMORY_MAX_NREGIONS]; 478 size_t fd_num = 0; 479 VhostUserMsg msg_reply; 480 int region_i, msg_i; 481 482 VhostUserMsg msg = { 483 .hdr.flags = VHOST_USER_VERSION, 484 }; 485 486 if (u->region_rb_len < dev->mem->nregions) { 487 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 488 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 489 dev->mem->nregions); 490 memset(&(u->region_rb[u->region_rb_len]), '\0', 491 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 492 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 493 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 494 u->region_rb_len = dev->mem->nregions; 495 } 496 497 if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 498 true) < 0) { 499 return -1; 500 } 501 502 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 503 return -1; 504 } 505 506 if (vhost_user_read(dev, &msg_reply) < 0) { 507 return -1; 508 } 509 510 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 511 error_report("%s: Received unexpected msg type." 512 "Expected %d received %d", __func__, 513 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 514 return -1; 515 } 516 /* We're using the same structure, just reusing one of the 517 * fields, so it should be the same size. 518 */ 519 if (msg_reply.hdr.size != msg.hdr.size) { 520 error_report("%s: Unexpected size for postcopy reply " 521 "%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size); 522 return -1; 523 } 524 525 memset(u->postcopy_client_bases, 0, 526 sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS); 527 528 /* They're in the same order as the regions that were sent 529 * but some of the regions were skipped (above) if they 530 * didn't have fd's 531 */ 532 for (msg_i = 0, region_i = 0; 533 region_i < dev->mem->nregions; 534 region_i++) { 535 if (msg_i < fd_num && 536 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 537 dev->mem->regions[region_i].guest_phys_addr) { 538 u->postcopy_client_bases[region_i] = 539 msg_reply.payload.memory.regions[msg_i].userspace_addr; 540 trace_vhost_user_set_mem_table_postcopy( 541 msg_reply.payload.memory.regions[msg_i].userspace_addr, 542 msg.payload.memory.regions[msg_i].userspace_addr, 543 msg_i, region_i); 544 msg_i++; 545 } 546 } 547 if (msg_i != fd_num) { 548 error_report("%s: postcopy reply not fully consumed " 549 "%d vs %zd", 550 __func__, msg_i, fd_num); 551 return -1; 552 } 553 /* Now we've registered this with the postcopy code, we ack to the client, 554 * because now we're in the position to be able to deal with any faults 555 * it generates. 556 */ 557 /* TODO: Use this for failure cases as well with a bad value */ 558 msg.hdr.size = sizeof(msg.payload.u64); 559 msg.payload.u64 = 0; /* OK */ 560 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 561 return -1; 562 } 563 564 return 0; 565 } 566 567 static int vhost_user_set_mem_table(struct vhost_dev *dev, 568 struct vhost_memory *mem) 569 { 570 struct vhost_user *u = dev->opaque; 571 int fds[VHOST_MEMORY_MAX_NREGIONS]; 572 size_t fd_num = 0; 573 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 574 bool reply_supported = virtio_has_feature(dev->protocol_features, 575 VHOST_USER_PROTOCOL_F_REPLY_ACK); 576 577 if (do_postcopy) { 578 /* Postcopy has enough differences that it's best done in it's own 579 * version 580 */ 581 return vhost_user_set_mem_table_postcopy(dev, mem); 582 } 583 584 VhostUserMsg msg = { 585 .hdr.flags = VHOST_USER_VERSION, 586 }; 587 588 if (reply_supported) { 589 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 590 } 591 592 if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 593 false) < 0) { 594 return -1; 595 } 596 597 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 598 return -1; 599 } 600 601 if (reply_supported) { 602 return process_message_reply(dev, &msg); 603 } 604 605 return 0; 606 } 607 608 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 609 struct vhost_vring_addr *addr) 610 { 611 VhostUserMsg msg = { 612 .hdr.request = VHOST_USER_SET_VRING_ADDR, 613 .hdr.flags = VHOST_USER_VERSION, 614 .payload.addr = *addr, 615 .hdr.size = sizeof(msg.payload.addr), 616 }; 617 618 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 619 return -1; 620 } 621 622 return 0; 623 } 624 625 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 626 struct vhost_vring_state *ring) 627 { 628 bool cross_endian = virtio_has_feature(dev->protocol_features, 629 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 630 VhostUserMsg msg = { 631 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 632 .hdr.flags = VHOST_USER_VERSION, 633 .payload.state = *ring, 634 .hdr.size = sizeof(msg.payload.state), 635 }; 636 637 if (!cross_endian) { 638 error_report("vhost-user trying to send unhandled ioctl"); 639 return -1; 640 } 641 642 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 643 return -1; 644 } 645 646 return 0; 647 } 648 649 static int vhost_set_vring(struct vhost_dev *dev, 650 unsigned long int request, 651 struct vhost_vring_state *ring) 652 { 653 VhostUserMsg msg = { 654 .hdr.request = request, 655 .hdr.flags = VHOST_USER_VERSION, 656 .payload.state = *ring, 657 .hdr.size = sizeof(msg.payload.state), 658 }; 659 660 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 661 return -1; 662 } 663 664 return 0; 665 } 666 667 static int vhost_user_set_vring_num(struct vhost_dev *dev, 668 struct vhost_vring_state *ring) 669 { 670 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 671 } 672 673 static void vhost_user_host_notifier_restore(struct vhost_dev *dev, 674 int queue_idx) 675 { 676 struct vhost_user *u = dev->opaque; 677 VhostUserHostNotifier *n = &u->user->notifier[queue_idx]; 678 VirtIODevice *vdev = dev->vdev; 679 680 if (n->addr && !n->set) { 681 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true); 682 n->set = true; 683 } 684 } 685 686 static void vhost_user_host_notifier_remove(struct vhost_dev *dev, 687 int queue_idx) 688 { 689 struct vhost_user *u = dev->opaque; 690 VhostUserHostNotifier *n = &u->user->notifier[queue_idx]; 691 VirtIODevice *vdev = dev->vdev; 692 693 if (n->addr && n->set) { 694 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); 695 n->set = false; 696 } 697 } 698 699 static int vhost_user_set_vring_base(struct vhost_dev *dev, 700 struct vhost_vring_state *ring) 701 { 702 vhost_user_host_notifier_restore(dev, ring->index); 703 704 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 705 } 706 707 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 708 { 709 int i; 710 711 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 712 return -1; 713 } 714 715 for (i = 0; i < dev->nvqs; ++i) { 716 struct vhost_vring_state state = { 717 .index = dev->vq_index + i, 718 .num = enable, 719 }; 720 721 vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 722 } 723 724 return 0; 725 } 726 727 static int vhost_user_get_vring_base(struct vhost_dev *dev, 728 struct vhost_vring_state *ring) 729 { 730 VhostUserMsg msg = { 731 .hdr.request = VHOST_USER_GET_VRING_BASE, 732 .hdr.flags = VHOST_USER_VERSION, 733 .payload.state = *ring, 734 .hdr.size = sizeof(msg.payload.state), 735 }; 736 737 vhost_user_host_notifier_remove(dev, ring->index); 738 739 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 740 return -1; 741 } 742 743 if (vhost_user_read(dev, &msg) < 0) { 744 return -1; 745 } 746 747 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 748 error_report("Received unexpected msg type. Expected %d received %d", 749 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 750 return -1; 751 } 752 753 if (msg.hdr.size != sizeof(msg.payload.state)) { 754 error_report("Received bad msg size."); 755 return -1; 756 } 757 758 *ring = msg.payload.state; 759 760 return 0; 761 } 762 763 static int vhost_set_vring_file(struct vhost_dev *dev, 764 VhostUserRequest request, 765 struct vhost_vring_file *file) 766 { 767 int fds[VHOST_MEMORY_MAX_NREGIONS]; 768 size_t fd_num = 0; 769 VhostUserMsg msg = { 770 .hdr.request = request, 771 .hdr.flags = VHOST_USER_VERSION, 772 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 773 .hdr.size = sizeof(msg.payload.u64), 774 }; 775 776 if (ioeventfd_enabled() && file->fd > 0) { 777 fds[fd_num++] = file->fd; 778 } else { 779 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 780 } 781 782 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 783 return -1; 784 } 785 786 return 0; 787 } 788 789 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 790 struct vhost_vring_file *file) 791 { 792 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 793 } 794 795 static int vhost_user_set_vring_call(struct vhost_dev *dev, 796 struct vhost_vring_file *file) 797 { 798 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 799 } 800 801 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64) 802 { 803 VhostUserMsg msg = { 804 .hdr.request = request, 805 .hdr.flags = VHOST_USER_VERSION, 806 .payload.u64 = u64, 807 .hdr.size = sizeof(msg.payload.u64), 808 }; 809 810 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 811 return -1; 812 } 813 814 return 0; 815 } 816 817 static int vhost_user_set_features(struct vhost_dev *dev, 818 uint64_t features) 819 { 820 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features); 821 } 822 823 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 824 uint64_t features) 825 { 826 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features); 827 } 828 829 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 830 { 831 VhostUserMsg msg = { 832 .hdr.request = request, 833 .hdr.flags = VHOST_USER_VERSION, 834 }; 835 836 if (vhost_user_one_time_request(request) && dev->vq_index != 0) { 837 return 0; 838 } 839 840 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 841 return -1; 842 } 843 844 if (vhost_user_read(dev, &msg) < 0) { 845 return -1; 846 } 847 848 if (msg.hdr.request != request) { 849 error_report("Received unexpected msg type. Expected %d received %d", 850 request, msg.hdr.request); 851 return -1; 852 } 853 854 if (msg.hdr.size != sizeof(msg.payload.u64)) { 855 error_report("Received bad msg size."); 856 return -1; 857 } 858 859 *u64 = msg.payload.u64; 860 861 return 0; 862 } 863 864 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 865 { 866 return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features); 867 } 868 869 static int vhost_user_set_owner(struct vhost_dev *dev) 870 { 871 VhostUserMsg msg = { 872 .hdr.request = VHOST_USER_SET_OWNER, 873 .hdr.flags = VHOST_USER_VERSION, 874 }; 875 876 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 877 return -1; 878 } 879 880 return 0; 881 } 882 883 static int vhost_user_reset_device(struct vhost_dev *dev) 884 { 885 VhostUserMsg msg = { 886 .hdr.flags = VHOST_USER_VERSION, 887 }; 888 889 msg.hdr.request = virtio_has_feature(dev->protocol_features, 890 VHOST_USER_PROTOCOL_F_RESET_DEVICE) 891 ? VHOST_USER_RESET_DEVICE 892 : VHOST_USER_RESET_OWNER; 893 894 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 895 return -1; 896 } 897 898 return 0; 899 } 900 901 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) 902 { 903 int ret = -1; 904 905 if (!dev->config_ops) { 906 return -1; 907 } 908 909 if (dev->config_ops->vhost_dev_config_notifier) { 910 ret = dev->config_ops->vhost_dev_config_notifier(dev); 911 } 912 913 return ret; 914 } 915 916 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, 917 VhostUserVringArea *area, 918 int fd) 919 { 920 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 921 size_t page_size = qemu_real_host_page_size; 922 struct vhost_user *u = dev->opaque; 923 VhostUserState *user = u->user; 924 VirtIODevice *vdev = dev->vdev; 925 VhostUserHostNotifier *n; 926 void *addr; 927 char *name; 928 929 if (!virtio_has_feature(dev->protocol_features, 930 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 931 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 932 return -1; 933 } 934 935 n = &user->notifier[queue_idx]; 936 937 if (n->addr) { 938 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); 939 object_unparent(OBJECT(&n->mr)); 940 munmap(n->addr, page_size); 941 n->addr = NULL; 942 } 943 944 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 945 return 0; 946 } 947 948 /* Sanity check. */ 949 if (area->size != page_size) { 950 return -1; 951 } 952 953 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 954 fd, area->offset); 955 if (addr == MAP_FAILED) { 956 return -1; 957 } 958 959 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 960 user, queue_idx); 961 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 962 page_size, addr); 963 g_free(name); 964 965 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 966 munmap(addr, page_size); 967 return -1; 968 } 969 970 n->addr = addr; 971 n->set = true; 972 973 return 0; 974 } 975 976 static void slave_read(void *opaque) 977 { 978 struct vhost_dev *dev = opaque; 979 struct vhost_user *u = dev->opaque; 980 VhostUserHeader hdr = { 0, }; 981 VhostUserPayload payload = { 0, }; 982 int size, ret = 0; 983 struct iovec iov; 984 struct msghdr msgh; 985 int fd[VHOST_USER_SLAVE_MAX_FDS]; 986 char control[CMSG_SPACE(sizeof(fd))]; 987 struct cmsghdr *cmsg; 988 int i, fdsize = 0; 989 990 memset(&msgh, 0, sizeof(msgh)); 991 msgh.msg_iov = &iov; 992 msgh.msg_iovlen = 1; 993 msgh.msg_control = control; 994 msgh.msg_controllen = sizeof(control); 995 996 memset(fd, -1, sizeof(fd)); 997 998 /* Read header */ 999 iov.iov_base = &hdr; 1000 iov.iov_len = VHOST_USER_HDR_SIZE; 1001 1002 do { 1003 size = recvmsg(u->slave_fd, &msgh, 0); 1004 } while (size < 0 && (errno == EINTR || errno == EAGAIN)); 1005 1006 if (size != VHOST_USER_HDR_SIZE) { 1007 error_report("Failed to read from slave."); 1008 goto err; 1009 } 1010 1011 if (msgh.msg_flags & MSG_CTRUNC) { 1012 error_report("Truncated message."); 1013 goto err; 1014 } 1015 1016 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; 1017 cmsg = CMSG_NXTHDR(&msgh, cmsg)) { 1018 if (cmsg->cmsg_level == SOL_SOCKET && 1019 cmsg->cmsg_type == SCM_RIGHTS) { 1020 fdsize = cmsg->cmsg_len - CMSG_LEN(0); 1021 memcpy(fd, CMSG_DATA(cmsg), fdsize); 1022 break; 1023 } 1024 } 1025 1026 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 1027 error_report("Failed to read msg header." 1028 " Size %d exceeds the maximum %zu.", hdr.size, 1029 VHOST_USER_PAYLOAD_SIZE); 1030 goto err; 1031 } 1032 1033 /* Read payload */ 1034 do { 1035 size = read(u->slave_fd, &payload, hdr.size); 1036 } while (size < 0 && (errno == EINTR || errno == EAGAIN)); 1037 1038 if (size != hdr.size) { 1039 error_report("Failed to read payload from slave."); 1040 goto err; 1041 } 1042 1043 switch (hdr.request) { 1044 case VHOST_USER_SLAVE_IOTLB_MSG: 1045 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1046 break; 1047 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG : 1048 ret = vhost_user_slave_handle_config_change(dev); 1049 break; 1050 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: 1051 ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, 1052 fd[0]); 1053 break; 1054 default: 1055 error_report("Received unexpected msg type: %d.", hdr.request); 1056 ret = -EINVAL; 1057 } 1058 1059 /* Close the remaining file descriptors. */ 1060 for (i = 0; i < fdsize; i++) { 1061 if (fd[i] != -1) { 1062 close(fd[i]); 1063 } 1064 } 1065 1066 /* 1067 * REPLY_ACK feature handling. Other reply types has to be managed 1068 * directly in their request handlers. 1069 */ 1070 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1071 struct iovec iovec[2]; 1072 1073 1074 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 1075 hdr.flags |= VHOST_USER_REPLY_MASK; 1076 1077 payload.u64 = !!ret; 1078 hdr.size = sizeof(payload.u64); 1079 1080 iovec[0].iov_base = &hdr; 1081 iovec[0].iov_len = VHOST_USER_HDR_SIZE; 1082 iovec[1].iov_base = &payload; 1083 iovec[1].iov_len = hdr.size; 1084 1085 do { 1086 size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec)); 1087 } while (size < 0 && (errno == EINTR || errno == EAGAIN)); 1088 1089 if (size != VHOST_USER_HDR_SIZE + hdr.size) { 1090 error_report("Failed to send msg reply to slave."); 1091 goto err; 1092 } 1093 } 1094 1095 return; 1096 1097 err: 1098 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); 1099 close(u->slave_fd); 1100 u->slave_fd = -1; 1101 for (i = 0; i < fdsize; i++) { 1102 if (fd[i] != -1) { 1103 close(fd[i]); 1104 } 1105 } 1106 return; 1107 } 1108 1109 static int vhost_setup_slave_channel(struct vhost_dev *dev) 1110 { 1111 VhostUserMsg msg = { 1112 .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD, 1113 .hdr.flags = VHOST_USER_VERSION, 1114 }; 1115 struct vhost_user *u = dev->opaque; 1116 int sv[2], ret = 0; 1117 bool reply_supported = virtio_has_feature(dev->protocol_features, 1118 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1119 1120 if (!virtio_has_feature(dev->protocol_features, 1121 VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { 1122 return 0; 1123 } 1124 1125 if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1126 error_report("socketpair() failed"); 1127 return -1; 1128 } 1129 1130 u->slave_fd = sv[0]; 1131 qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev); 1132 1133 if (reply_supported) { 1134 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1135 } 1136 1137 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1138 if (ret) { 1139 goto out; 1140 } 1141 1142 if (reply_supported) { 1143 ret = process_message_reply(dev, &msg); 1144 } 1145 1146 out: 1147 close(sv[1]); 1148 if (ret) { 1149 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); 1150 close(u->slave_fd); 1151 u->slave_fd = -1; 1152 } 1153 1154 return ret; 1155 } 1156 1157 #ifdef CONFIG_LINUX 1158 /* 1159 * Called back from the postcopy fault thread when a fault is received on our 1160 * ufd. 1161 * TODO: This is Linux specific 1162 */ 1163 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1164 void *ufd) 1165 { 1166 struct vhost_dev *dev = pcfd->data; 1167 struct vhost_user *u = dev->opaque; 1168 struct uffd_msg *msg = ufd; 1169 uint64_t faultaddr = msg->arg.pagefault.address; 1170 RAMBlock *rb = NULL; 1171 uint64_t rb_offset; 1172 int i; 1173 1174 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1175 dev->mem->nregions); 1176 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1177 trace_vhost_user_postcopy_fault_handler_loop(i, 1178 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1179 if (faultaddr >= u->postcopy_client_bases[i]) { 1180 /* Ofset of the fault address in the vhost region */ 1181 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1182 if (region_offset < dev->mem->regions[i].memory_size) { 1183 rb_offset = region_offset + u->region_rb_offset[i]; 1184 trace_vhost_user_postcopy_fault_handler_found(i, 1185 region_offset, rb_offset); 1186 rb = u->region_rb[i]; 1187 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1188 rb_offset); 1189 } 1190 } 1191 } 1192 error_report("%s: Failed to find region for fault %" PRIx64, 1193 __func__, faultaddr); 1194 return -1; 1195 } 1196 1197 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1198 uint64_t offset) 1199 { 1200 struct vhost_dev *dev = pcfd->data; 1201 struct vhost_user *u = dev->opaque; 1202 int i; 1203 1204 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1205 1206 if (!u) { 1207 return 0; 1208 } 1209 /* Translate the offset into an address in the clients address space */ 1210 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1211 if (u->region_rb[i] == rb && 1212 offset >= u->region_rb_offset[i] && 1213 offset < (u->region_rb_offset[i] + 1214 dev->mem->regions[i].memory_size)) { 1215 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1216 u->postcopy_client_bases[i]; 1217 trace_vhost_user_postcopy_waker_found(client_addr); 1218 return postcopy_wake_shared(pcfd, client_addr, rb); 1219 } 1220 } 1221 1222 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1223 return 0; 1224 } 1225 #endif 1226 1227 /* 1228 * Called at the start of an inbound postcopy on reception of the 1229 * 'advise' command. 1230 */ 1231 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 1232 { 1233 #ifdef CONFIG_LINUX 1234 struct vhost_user *u = dev->opaque; 1235 CharBackend *chr = u->user->chr; 1236 int ufd; 1237 VhostUserMsg msg = { 1238 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 1239 .hdr.flags = VHOST_USER_VERSION, 1240 }; 1241 1242 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1243 error_setg(errp, "Failed to send postcopy_advise to vhost"); 1244 return -1; 1245 } 1246 1247 if (vhost_user_read(dev, &msg) < 0) { 1248 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 1249 return -1; 1250 } 1251 1252 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 1253 error_setg(errp, "Unexpected msg type. Expected %d received %d", 1254 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 1255 return -1; 1256 } 1257 1258 if (msg.hdr.size) { 1259 error_setg(errp, "Received bad msg size."); 1260 return -1; 1261 } 1262 ufd = qemu_chr_fe_get_msgfd(chr); 1263 if (ufd < 0) { 1264 error_setg(errp, "%s: Failed to get ufd", __func__); 1265 return -1; 1266 } 1267 qemu_set_nonblock(ufd); 1268 1269 /* register ufd with userfault thread */ 1270 u->postcopy_fd.fd = ufd; 1271 u->postcopy_fd.data = dev; 1272 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 1273 u->postcopy_fd.waker = vhost_user_postcopy_waker; 1274 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 1275 postcopy_register_shared_ufd(&u->postcopy_fd); 1276 return 0; 1277 #else 1278 error_setg(errp, "Postcopy not supported on non-Linux systems"); 1279 return -1; 1280 #endif 1281 } 1282 1283 /* 1284 * Called at the switch to postcopy on reception of the 'listen' command. 1285 */ 1286 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 1287 { 1288 struct vhost_user *u = dev->opaque; 1289 int ret; 1290 VhostUserMsg msg = { 1291 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 1292 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1293 }; 1294 u->postcopy_listen = true; 1295 trace_vhost_user_postcopy_listen(); 1296 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1297 error_setg(errp, "Failed to send postcopy_listen to vhost"); 1298 return -1; 1299 } 1300 1301 ret = process_message_reply(dev, &msg); 1302 if (ret) { 1303 error_setg(errp, "Failed to receive reply to postcopy_listen"); 1304 return ret; 1305 } 1306 1307 return 0; 1308 } 1309 1310 /* 1311 * Called at the end of postcopy 1312 */ 1313 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 1314 { 1315 VhostUserMsg msg = { 1316 .hdr.request = VHOST_USER_POSTCOPY_END, 1317 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1318 }; 1319 int ret; 1320 struct vhost_user *u = dev->opaque; 1321 1322 trace_vhost_user_postcopy_end_entry(); 1323 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1324 error_setg(errp, "Failed to send postcopy_end to vhost"); 1325 return -1; 1326 } 1327 1328 ret = process_message_reply(dev, &msg); 1329 if (ret) { 1330 error_setg(errp, "Failed to receive reply to postcopy_end"); 1331 return ret; 1332 } 1333 postcopy_unregister_shared_ufd(&u->postcopy_fd); 1334 close(u->postcopy_fd.fd); 1335 u->postcopy_fd.handler = NULL; 1336 1337 trace_vhost_user_postcopy_end_exit(); 1338 1339 return 0; 1340 } 1341 1342 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 1343 void *opaque) 1344 { 1345 struct PostcopyNotifyData *pnd = opaque; 1346 struct vhost_user *u = container_of(notifier, struct vhost_user, 1347 postcopy_notifier); 1348 struct vhost_dev *dev = u->dev; 1349 1350 switch (pnd->reason) { 1351 case POSTCOPY_NOTIFY_PROBE: 1352 if (!virtio_has_feature(dev->protocol_features, 1353 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 1354 /* TODO: Get the device name into this error somehow */ 1355 error_setg(pnd->errp, 1356 "vhost-user backend not capable of postcopy"); 1357 return -ENOENT; 1358 } 1359 break; 1360 1361 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 1362 return vhost_user_postcopy_advise(dev, pnd->errp); 1363 1364 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 1365 return vhost_user_postcopy_listen(dev, pnd->errp); 1366 1367 case POSTCOPY_NOTIFY_INBOUND_END: 1368 return vhost_user_postcopy_end(dev, pnd->errp); 1369 1370 default: 1371 /* We ignore notifications we don't know */ 1372 break; 1373 } 1374 1375 return 0; 1376 } 1377 1378 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) 1379 { 1380 uint64_t features, protocol_features; 1381 struct vhost_user *u; 1382 int err; 1383 1384 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1385 1386 u = g_new0(struct vhost_user, 1); 1387 u->user = opaque; 1388 u->slave_fd = -1; 1389 u->dev = dev; 1390 dev->opaque = u; 1391 1392 err = vhost_user_get_features(dev, &features); 1393 if (err < 0) { 1394 return err; 1395 } 1396 1397 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1398 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 1399 1400 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 1401 &protocol_features); 1402 if (err < 0) { 1403 return err; 1404 } 1405 1406 dev->protocol_features = 1407 protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK; 1408 1409 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1410 /* Don't acknowledge CONFIG feature if device doesn't support it */ 1411 dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 1412 } else if (!(protocol_features & 1413 (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) { 1414 error_report("Device expects VHOST_USER_PROTOCOL_F_CONFIG " 1415 "but backend does not support it."); 1416 return -1; 1417 } 1418 1419 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 1420 if (err < 0) { 1421 return err; 1422 } 1423 1424 /* query the max queues we support if backend supports Multiple Queue */ 1425 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 1426 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 1427 &dev->max_queues); 1428 if (err < 0) { 1429 return err; 1430 } 1431 } 1432 1433 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 1434 !(virtio_has_feature(dev->protocol_features, 1435 VHOST_USER_PROTOCOL_F_SLAVE_REQ) && 1436 virtio_has_feature(dev->protocol_features, 1437 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 1438 error_report("IOMMU support requires reply-ack and " 1439 "slave-req protocol features."); 1440 return -1; 1441 } 1442 } 1443 1444 if (dev->migration_blocker == NULL && 1445 !virtio_has_feature(dev->protocol_features, 1446 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 1447 error_setg(&dev->migration_blocker, 1448 "Migration disabled: vhost-user backend lacks " 1449 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 1450 } 1451 1452 if (dev->vq_index == 0) { 1453 err = vhost_setup_slave_channel(dev); 1454 if (err < 0) { 1455 return err; 1456 } 1457 } 1458 1459 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 1460 postcopy_add_notifier(&u->postcopy_notifier); 1461 1462 return 0; 1463 } 1464 1465 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 1466 { 1467 struct vhost_user *u; 1468 1469 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1470 1471 u = dev->opaque; 1472 if (u->postcopy_notifier.notify) { 1473 postcopy_remove_notifier(&u->postcopy_notifier); 1474 u->postcopy_notifier.notify = NULL; 1475 } 1476 u->postcopy_listen = false; 1477 if (u->postcopy_fd.handler) { 1478 postcopy_unregister_shared_ufd(&u->postcopy_fd); 1479 close(u->postcopy_fd.fd); 1480 u->postcopy_fd.handler = NULL; 1481 } 1482 if (u->slave_fd >= 0) { 1483 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); 1484 close(u->slave_fd); 1485 u->slave_fd = -1; 1486 } 1487 g_free(u->region_rb); 1488 u->region_rb = NULL; 1489 g_free(u->region_rb_offset); 1490 u->region_rb_offset = NULL; 1491 u->region_rb_len = 0; 1492 g_free(u); 1493 dev->opaque = 0; 1494 1495 return 0; 1496 } 1497 1498 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 1499 { 1500 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 1501 1502 return idx; 1503 } 1504 1505 static int vhost_user_memslots_limit(struct vhost_dev *dev) 1506 { 1507 return VHOST_MEMORY_MAX_NREGIONS; 1508 } 1509 1510 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 1511 { 1512 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1513 1514 return virtio_has_feature(dev->protocol_features, 1515 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 1516 } 1517 1518 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 1519 { 1520 VhostUserMsg msg = { }; 1521 1522 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1523 1524 /* If guest supports GUEST_ANNOUNCE do nothing */ 1525 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 1526 return 0; 1527 } 1528 1529 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 1530 if (virtio_has_feature(dev->protocol_features, 1531 VHOST_USER_PROTOCOL_F_RARP)) { 1532 msg.hdr.request = VHOST_USER_SEND_RARP; 1533 msg.hdr.flags = VHOST_USER_VERSION; 1534 memcpy((char *)&msg.payload.u64, mac_addr, 6); 1535 msg.hdr.size = sizeof(msg.payload.u64); 1536 1537 return vhost_user_write(dev, &msg, NULL, 0); 1538 } 1539 return -1; 1540 } 1541 1542 static bool vhost_user_can_merge(struct vhost_dev *dev, 1543 uint64_t start1, uint64_t size1, 1544 uint64_t start2, uint64_t size2) 1545 { 1546 ram_addr_t offset; 1547 int mfd, rfd; 1548 MemoryRegion *mr; 1549 1550 mr = memory_region_from_host((void *)(uintptr_t)start1, &offset); 1551 mfd = memory_region_get_fd(mr); 1552 1553 mr = memory_region_from_host((void *)(uintptr_t)start2, &offset); 1554 rfd = memory_region_get_fd(mr); 1555 1556 return mfd == rfd; 1557 } 1558 1559 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 1560 { 1561 VhostUserMsg msg; 1562 bool reply_supported = virtio_has_feature(dev->protocol_features, 1563 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1564 1565 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 1566 return 0; 1567 } 1568 1569 msg.hdr.request = VHOST_USER_NET_SET_MTU; 1570 msg.payload.u64 = mtu; 1571 msg.hdr.size = sizeof(msg.payload.u64); 1572 msg.hdr.flags = VHOST_USER_VERSION; 1573 if (reply_supported) { 1574 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1575 } 1576 1577 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1578 return -1; 1579 } 1580 1581 /* If reply_ack supported, slave has to ack specified MTU is valid */ 1582 if (reply_supported) { 1583 return process_message_reply(dev, &msg); 1584 } 1585 1586 return 0; 1587 } 1588 1589 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 1590 struct vhost_iotlb_msg *imsg) 1591 { 1592 VhostUserMsg msg = { 1593 .hdr.request = VHOST_USER_IOTLB_MSG, 1594 .hdr.size = sizeof(msg.payload.iotlb), 1595 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1596 .payload.iotlb = *imsg, 1597 }; 1598 1599 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1600 return -EFAULT; 1601 } 1602 1603 return process_message_reply(dev, &msg); 1604 } 1605 1606 1607 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 1608 { 1609 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 1610 } 1611 1612 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 1613 uint32_t config_len) 1614 { 1615 VhostUserMsg msg = { 1616 .hdr.request = VHOST_USER_GET_CONFIG, 1617 .hdr.flags = VHOST_USER_VERSION, 1618 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 1619 }; 1620 1621 if (!virtio_has_feature(dev->protocol_features, 1622 VHOST_USER_PROTOCOL_F_CONFIG)) { 1623 return -1; 1624 } 1625 1626 if (config_len > VHOST_USER_MAX_CONFIG_SIZE) { 1627 return -1; 1628 } 1629 1630 msg.payload.config.offset = 0; 1631 msg.payload.config.size = config_len; 1632 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1633 return -1; 1634 } 1635 1636 if (vhost_user_read(dev, &msg) < 0) { 1637 return -1; 1638 } 1639 1640 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 1641 error_report("Received unexpected msg type. Expected %d received %d", 1642 VHOST_USER_GET_CONFIG, msg.hdr.request); 1643 return -1; 1644 } 1645 1646 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 1647 error_report("Received bad msg size."); 1648 return -1; 1649 } 1650 1651 memcpy(config, msg.payload.config.region, config_len); 1652 1653 return 0; 1654 } 1655 1656 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 1657 uint32_t offset, uint32_t size, uint32_t flags) 1658 { 1659 uint8_t *p; 1660 bool reply_supported = virtio_has_feature(dev->protocol_features, 1661 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1662 1663 VhostUserMsg msg = { 1664 .hdr.request = VHOST_USER_SET_CONFIG, 1665 .hdr.flags = VHOST_USER_VERSION, 1666 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 1667 }; 1668 1669 if (!virtio_has_feature(dev->protocol_features, 1670 VHOST_USER_PROTOCOL_F_CONFIG)) { 1671 return -1; 1672 } 1673 1674 if (reply_supported) { 1675 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1676 } 1677 1678 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 1679 return -1; 1680 } 1681 1682 msg.payload.config.offset = offset, 1683 msg.payload.config.size = size, 1684 msg.payload.config.flags = flags, 1685 p = msg.payload.config.region; 1686 memcpy(p, data, size); 1687 1688 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1689 return -1; 1690 } 1691 1692 if (reply_supported) { 1693 return process_message_reply(dev, &msg); 1694 } 1695 1696 return 0; 1697 } 1698 1699 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 1700 void *session_info, 1701 uint64_t *session_id) 1702 { 1703 bool crypto_session = virtio_has_feature(dev->protocol_features, 1704 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 1705 CryptoDevBackendSymSessionInfo *sess_info = session_info; 1706 VhostUserMsg msg = { 1707 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 1708 .hdr.flags = VHOST_USER_VERSION, 1709 .hdr.size = sizeof(msg.payload.session), 1710 }; 1711 1712 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1713 1714 if (!crypto_session) { 1715 error_report("vhost-user trying to send unhandled ioctl"); 1716 return -1; 1717 } 1718 1719 memcpy(&msg.payload.session.session_setup_data, sess_info, 1720 sizeof(CryptoDevBackendSymSessionInfo)); 1721 if (sess_info->key_len) { 1722 memcpy(&msg.payload.session.key, sess_info->cipher_key, 1723 sess_info->key_len); 1724 } 1725 if (sess_info->auth_key_len > 0) { 1726 memcpy(&msg.payload.session.auth_key, sess_info->auth_key, 1727 sess_info->auth_key_len); 1728 } 1729 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1730 error_report("vhost_user_write() return -1, create session failed"); 1731 return -1; 1732 } 1733 1734 if (vhost_user_read(dev, &msg) < 0) { 1735 error_report("vhost_user_read() return -1, create session failed"); 1736 return -1; 1737 } 1738 1739 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 1740 error_report("Received unexpected msg type. Expected %d received %d", 1741 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 1742 return -1; 1743 } 1744 1745 if (msg.hdr.size != sizeof(msg.payload.session)) { 1746 error_report("Received bad msg size."); 1747 return -1; 1748 } 1749 1750 if (msg.payload.session.session_id < 0) { 1751 error_report("Bad session id: %" PRId64 "", 1752 msg.payload.session.session_id); 1753 return -1; 1754 } 1755 *session_id = msg.payload.session.session_id; 1756 1757 return 0; 1758 } 1759 1760 static int 1761 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 1762 { 1763 bool crypto_session = virtio_has_feature(dev->protocol_features, 1764 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 1765 VhostUserMsg msg = { 1766 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 1767 .hdr.flags = VHOST_USER_VERSION, 1768 .hdr.size = sizeof(msg.payload.u64), 1769 }; 1770 msg.payload.u64 = session_id; 1771 1772 if (!crypto_session) { 1773 error_report("vhost-user trying to send unhandled ioctl"); 1774 return -1; 1775 } 1776 1777 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1778 error_report("vhost_user_write() return -1, close session failed"); 1779 return -1; 1780 } 1781 1782 return 0; 1783 } 1784 1785 static bool vhost_user_mem_section_filter(struct vhost_dev *dev, 1786 MemoryRegionSection *section) 1787 { 1788 bool result; 1789 1790 result = memory_region_get_fd(section->mr) >= 0; 1791 1792 return result; 1793 } 1794 1795 static int vhost_user_get_inflight_fd(struct vhost_dev *dev, 1796 uint16_t queue_size, 1797 struct vhost_inflight *inflight) 1798 { 1799 void *addr; 1800 int fd; 1801 struct vhost_user *u = dev->opaque; 1802 CharBackend *chr = u->user->chr; 1803 VhostUserMsg msg = { 1804 .hdr.request = VHOST_USER_GET_INFLIGHT_FD, 1805 .hdr.flags = VHOST_USER_VERSION, 1806 .payload.inflight.num_queues = dev->nvqs, 1807 .payload.inflight.queue_size = queue_size, 1808 .hdr.size = sizeof(msg.payload.inflight), 1809 }; 1810 1811 if (!virtio_has_feature(dev->protocol_features, 1812 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 1813 return 0; 1814 } 1815 1816 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1817 return -1; 1818 } 1819 1820 if (vhost_user_read(dev, &msg) < 0) { 1821 return -1; 1822 } 1823 1824 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) { 1825 error_report("Received unexpected msg type. " 1826 "Expected %d received %d", 1827 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request); 1828 return -1; 1829 } 1830 1831 if (msg.hdr.size != sizeof(msg.payload.inflight)) { 1832 error_report("Received bad msg size."); 1833 return -1; 1834 } 1835 1836 if (!msg.payload.inflight.mmap_size) { 1837 return 0; 1838 } 1839 1840 fd = qemu_chr_fe_get_msgfd(chr); 1841 if (fd < 0) { 1842 error_report("Failed to get mem fd"); 1843 return -1; 1844 } 1845 1846 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE, 1847 MAP_SHARED, fd, msg.payload.inflight.mmap_offset); 1848 1849 if (addr == MAP_FAILED) { 1850 error_report("Failed to mmap mem fd"); 1851 close(fd); 1852 return -1; 1853 } 1854 1855 inflight->addr = addr; 1856 inflight->fd = fd; 1857 inflight->size = msg.payload.inflight.mmap_size; 1858 inflight->offset = msg.payload.inflight.mmap_offset; 1859 inflight->queue_size = queue_size; 1860 1861 return 0; 1862 } 1863 1864 static int vhost_user_set_inflight_fd(struct vhost_dev *dev, 1865 struct vhost_inflight *inflight) 1866 { 1867 VhostUserMsg msg = { 1868 .hdr.request = VHOST_USER_SET_INFLIGHT_FD, 1869 .hdr.flags = VHOST_USER_VERSION, 1870 .payload.inflight.mmap_size = inflight->size, 1871 .payload.inflight.mmap_offset = inflight->offset, 1872 .payload.inflight.num_queues = dev->nvqs, 1873 .payload.inflight.queue_size = inflight->queue_size, 1874 .hdr.size = sizeof(msg.payload.inflight), 1875 }; 1876 1877 if (!virtio_has_feature(dev->protocol_features, 1878 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 1879 return 0; 1880 } 1881 1882 if (vhost_user_write(dev, &msg, &inflight->fd, 1) < 0) { 1883 return -1; 1884 } 1885 1886 return 0; 1887 } 1888 1889 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) 1890 { 1891 if (user->chr) { 1892 error_setg(errp, "Cannot initialize vhost-user state"); 1893 return false; 1894 } 1895 user->chr = chr; 1896 return true; 1897 } 1898 1899 void vhost_user_cleanup(VhostUserState *user) 1900 { 1901 int i; 1902 1903 if (!user->chr) { 1904 return; 1905 } 1906 1907 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1908 if (user->notifier[i].addr) { 1909 object_unparent(OBJECT(&user->notifier[i].mr)); 1910 munmap(user->notifier[i].addr, qemu_real_host_page_size); 1911 user->notifier[i].addr = NULL; 1912 } 1913 } 1914 user->chr = NULL; 1915 } 1916 1917 const VhostOps user_ops = { 1918 .backend_type = VHOST_BACKEND_TYPE_USER, 1919 .vhost_backend_init = vhost_user_backend_init, 1920 .vhost_backend_cleanup = vhost_user_backend_cleanup, 1921 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 1922 .vhost_set_log_base = vhost_user_set_log_base, 1923 .vhost_set_mem_table = vhost_user_set_mem_table, 1924 .vhost_set_vring_addr = vhost_user_set_vring_addr, 1925 .vhost_set_vring_endian = vhost_user_set_vring_endian, 1926 .vhost_set_vring_num = vhost_user_set_vring_num, 1927 .vhost_set_vring_base = vhost_user_set_vring_base, 1928 .vhost_get_vring_base = vhost_user_get_vring_base, 1929 .vhost_set_vring_kick = vhost_user_set_vring_kick, 1930 .vhost_set_vring_call = vhost_user_set_vring_call, 1931 .vhost_set_features = vhost_user_set_features, 1932 .vhost_get_features = vhost_user_get_features, 1933 .vhost_set_owner = vhost_user_set_owner, 1934 .vhost_reset_device = vhost_user_reset_device, 1935 .vhost_get_vq_index = vhost_user_get_vq_index, 1936 .vhost_set_vring_enable = vhost_user_set_vring_enable, 1937 .vhost_requires_shm_log = vhost_user_requires_shm_log, 1938 .vhost_migration_done = vhost_user_migration_done, 1939 .vhost_backend_can_merge = vhost_user_can_merge, 1940 .vhost_net_set_mtu = vhost_user_net_set_mtu, 1941 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 1942 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 1943 .vhost_get_config = vhost_user_get_config, 1944 .vhost_set_config = vhost_user_set_config, 1945 .vhost_crypto_create_session = vhost_user_crypto_create_session, 1946 .vhost_crypto_close_session = vhost_user_crypto_close_session, 1947 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, 1948 .vhost_get_inflight_fd = vhost_user_get_inflight_fd, 1949 .vhost_set_inflight_fd = vhost_user_set_inflight_fd, 1950 }; 1951