1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/vhost.h" 14 #include "hw/virtio/vhost-user.h" 15 #include "hw/virtio/vhost-backend.h" 16 #include "hw/virtio/virtio.h" 17 #include "hw/virtio/virtio-net.h" 18 #include "chardev/char-fe.h" 19 #include "sysemu/kvm.h" 20 #include "qemu/error-report.h" 21 #include "qemu/main-loop.h" 22 #include "qemu/sockets.h" 23 #include "sysemu/cryptodev.h" 24 #include "migration/migration.h" 25 #include "migration/postcopy-ram.h" 26 #include "trace.h" 27 28 #include <sys/ioctl.h> 29 #include <sys/socket.h> 30 #include <sys/un.h> 31 32 #include "standard-headers/linux/vhost_types.h" 33 34 #ifdef CONFIG_LINUX 35 #include <linux/userfaultfd.h> 36 #endif 37 38 #define VHOST_MEMORY_MAX_NREGIONS 8 39 #define VHOST_USER_F_PROTOCOL_FEATURES 30 40 #define VHOST_USER_SLAVE_MAX_FDS 8 41 42 /* 43 * Maximum size of virtio device config space 44 */ 45 #define VHOST_USER_MAX_CONFIG_SIZE 256 46 47 enum VhostUserProtocolFeature { 48 VHOST_USER_PROTOCOL_F_MQ = 0, 49 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 50 VHOST_USER_PROTOCOL_F_RARP = 2, 51 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, 52 VHOST_USER_PROTOCOL_F_NET_MTU = 4, 53 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, 54 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, 55 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, 56 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, 57 VHOST_USER_PROTOCOL_F_CONFIG = 9, 58 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, 59 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, 60 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, 61 VHOST_USER_PROTOCOL_F_MAX 62 }; 63 64 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 65 66 typedef enum VhostUserRequest { 67 VHOST_USER_NONE = 0, 68 VHOST_USER_GET_FEATURES = 1, 69 VHOST_USER_SET_FEATURES = 2, 70 VHOST_USER_SET_OWNER = 3, 71 VHOST_USER_RESET_OWNER = 4, 72 VHOST_USER_SET_MEM_TABLE = 5, 73 VHOST_USER_SET_LOG_BASE = 6, 74 VHOST_USER_SET_LOG_FD = 7, 75 VHOST_USER_SET_VRING_NUM = 8, 76 VHOST_USER_SET_VRING_ADDR = 9, 77 VHOST_USER_SET_VRING_BASE = 10, 78 VHOST_USER_GET_VRING_BASE = 11, 79 VHOST_USER_SET_VRING_KICK = 12, 80 VHOST_USER_SET_VRING_CALL = 13, 81 VHOST_USER_SET_VRING_ERR = 14, 82 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 83 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 84 VHOST_USER_GET_QUEUE_NUM = 17, 85 VHOST_USER_SET_VRING_ENABLE = 18, 86 VHOST_USER_SEND_RARP = 19, 87 VHOST_USER_NET_SET_MTU = 20, 88 VHOST_USER_SET_SLAVE_REQ_FD = 21, 89 VHOST_USER_IOTLB_MSG = 22, 90 VHOST_USER_SET_VRING_ENDIAN = 23, 91 VHOST_USER_GET_CONFIG = 24, 92 VHOST_USER_SET_CONFIG = 25, 93 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 94 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 95 VHOST_USER_POSTCOPY_ADVISE = 28, 96 VHOST_USER_POSTCOPY_LISTEN = 29, 97 VHOST_USER_POSTCOPY_END = 30, 98 VHOST_USER_GET_INFLIGHT_FD = 31, 99 VHOST_USER_SET_INFLIGHT_FD = 32, 100 VHOST_USER_GPU_SET_SOCKET = 33, 101 VHOST_USER_MAX 102 } VhostUserRequest; 103 104 typedef enum VhostUserSlaveRequest { 105 VHOST_USER_SLAVE_NONE = 0, 106 VHOST_USER_SLAVE_IOTLB_MSG = 1, 107 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, 108 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, 109 VHOST_USER_SLAVE_MAX 110 } VhostUserSlaveRequest; 111 112 typedef struct VhostUserMemoryRegion { 113 uint64_t guest_phys_addr; 114 uint64_t memory_size; 115 uint64_t userspace_addr; 116 uint64_t mmap_offset; 117 } VhostUserMemoryRegion; 118 119 typedef struct VhostUserMemory { 120 uint32_t nregions; 121 uint32_t padding; 122 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; 123 } VhostUserMemory; 124 125 typedef struct VhostUserLog { 126 uint64_t mmap_size; 127 uint64_t mmap_offset; 128 } VhostUserLog; 129 130 typedef struct VhostUserConfig { 131 uint32_t offset; 132 uint32_t size; 133 uint32_t flags; 134 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 135 } VhostUserConfig; 136 137 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 138 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 139 140 typedef struct VhostUserCryptoSession { 141 /* session id for success, -1 on errors */ 142 int64_t session_id; 143 CryptoDevBackendSymSessionInfo session_setup_data; 144 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 145 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 146 } VhostUserCryptoSession; 147 148 static VhostUserConfig c __attribute__ ((unused)); 149 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 150 + sizeof(c.size) \ 151 + sizeof(c.flags)) 152 153 typedef struct VhostUserVringArea { 154 uint64_t u64; 155 uint64_t size; 156 uint64_t offset; 157 } VhostUserVringArea; 158 159 typedef struct VhostUserInflight { 160 uint64_t mmap_size; 161 uint64_t mmap_offset; 162 uint16_t num_queues; 163 uint16_t queue_size; 164 } VhostUserInflight; 165 166 typedef struct { 167 VhostUserRequest request; 168 169 #define VHOST_USER_VERSION_MASK (0x3) 170 #define VHOST_USER_REPLY_MASK (0x1<<2) 171 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 172 uint32_t flags; 173 uint32_t size; /* the following payload size */ 174 } QEMU_PACKED VhostUserHeader; 175 176 typedef union { 177 #define VHOST_USER_VRING_IDX_MASK (0xff) 178 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) 179 uint64_t u64; 180 struct vhost_vring_state state; 181 struct vhost_vring_addr addr; 182 VhostUserMemory memory; 183 VhostUserLog log; 184 struct vhost_iotlb_msg iotlb; 185 VhostUserConfig config; 186 VhostUserCryptoSession session; 187 VhostUserVringArea area; 188 VhostUserInflight inflight; 189 } VhostUserPayload; 190 191 typedef struct VhostUserMsg { 192 VhostUserHeader hdr; 193 VhostUserPayload payload; 194 } QEMU_PACKED VhostUserMsg; 195 196 static VhostUserMsg m __attribute__ ((unused)); 197 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 198 199 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 200 201 /* The version of the protocol we support */ 202 #define VHOST_USER_VERSION (0x1) 203 204 struct vhost_user { 205 struct vhost_dev *dev; 206 /* Shared between vhost devs of the same virtio device */ 207 VhostUserState *user; 208 int slave_fd; 209 NotifierWithReturn postcopy_notifier; 210 struct PostCopyFD postcopy_fd; 211 uint64_t postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS]; 212 /* Length of the region_rb and region_rb_offset arrays */ 213 size_t region_rb_len; 214 /* RAMBlock associated with a given region */ 215 RAMBlock **region_rb; 216 /* The offset from the start of the RAMBlock to the start of the 217 * vhost region. 218 */ 219 ram_addr_t *region_rb_offset; 220 221 /* True once we've entered postcopy_listen */ 222 bool postcopy_listen; 223 }; 224 225 static bool ioeventfd_enabled(void) 226 { 227 return !kvm_enabled() || kvm_eventfds_enabled(); 228 } 229 230 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) 231 { 232 struct vhost_user *u = dev->opaque; 233 CharBackend *chr = u->user->chr; 234 uint8_t *p = (uint8_t *) msg; 235 int r, size = VHOST_USER_HDR_SIZE; 236 237 r = qemu_chr_fe_read_all(chr, p, size); 238 if (r != size) { 239 error_report("Failed to read msg header. Read %d instead of %d." 240 " Original request %d.", r, size, msg->hdr.request); 241 return -1; 242 } 243 244 /* validate received flags */ 245 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 246 error_report("Failed to read msg header." 247 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 248 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 249 return -1; 250 } 251 252 return 0; 253 } 254 255 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 256 { 257 struct vhost_user *u = dev->opaque; 258 CharBackend *chr = u->user->chr; 259 uint8_t *p = (uint8_t *) msg; 260 int r, size; 261 262 if (vhost_user_read_header(dev, msg) < 0) { 263 return -1; 264 } 265 266 /* validate message size is sane */ 267 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 268 error_report("Failed to read msg header." 269 " Size %d exceeds the maximum %zu.", msg->hdr.size, 270 VHOST_USER_PAYLOAD_SIZE); 271 return -1; 272 } 273 274 if (msg->hdr.size) { 275 p += VHOST_USER_HDR_SIZE; 276 size = msg->hdr.size; 277 r = qemu_chr_fe_read_all(chr, p, size); 278 if (r != size) { 279 error_report("Failed to read msg payload." 280 " Read %d instead of %d.", r, msg->hdr.size); 281 return -1; 282 } 283 } 284 285 return 0; 286 } 287 288 static int process_message_reply(struct vhost_dev *dev, 289 const VhostUserMsg *msg) 290 { 291 VhostUserMsg msg_reply; 292 293 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 294 return 0; 295 } 296 297 if (vhost_user_read(dev, &msg_reply) < 0) { 298 return -1; 299 } 300 301 if (msg_reply.hdr.request != msg->hdr.request) { 302 error_report("Received unexpected msg type." 303 "Expected %d received %d", 304 msg->hdr.request, msg_reply.hdr.request); 305 return -1; 306 } 307 308 return msg_reply.payload.u64 ? -1 : 0; 309 } 310 311 static bool vhost_user_one_time_request(VhostUserRequest request) 312 { 313 switch (request) { 314 case VHOST_USER_SET_OWNER: 315 case VHOST_USER_RESET_OWNER: 316 case VHOST_USER_SET_MEM_TABLE: 317 case VHOST_USER_GET_QUEUE_NUM: 318 case VHOST_USER_NET_SET_MTU: 319 return true; 320 default: 321 return false; 322 } 323 } 324 325 /* most non-init callers ignore the error */ 326 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 327 int *fds, int fd_num) 328 { 329 struct vhost_user *u = dev->opaque; 330 CharBackend *chr = u->user->chr; 331 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 332 333 /* 334 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 335 * we just need send it once in the first time. For later such 336 * request, we just ignore it. 337 */ 338 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) { 339 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 340 return 0; 341 } 342 343 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 344 error_report("Failed to set msg fds."); 345 return -1; 346 } 347 348 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 349 if (ret != size) { 350 error_report("Failed to write msg." 351 " Wrote %d instead of %d.", ret, size); 352 return -1; 353 } 354 355 return 0; 356 } 357 358 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd) 359 { 360 VhostUserMsg msg = { 361 .hdr.request = VHOST_USER_GPU_SET_SOCKET, 362 .hdr.flags = VHOST_USER_VERSION, 363 }; 364 365 return vhost_user_write(dev, &msg, &fd, 1); 366 } 367 368 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 369 struct vhost_log *log) 370 { 371 int fds[VHOST_MEMORY_MAX_NREGIONS]; 372 size_t fd_num = 0; 373 bool shmfd = virtio_has_feature(dev->protocol_features, 374 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 375 VhostUserMsg msg = { 376 .hdr.request = VHOST_USER_SET_LOG_BASE, 377 .hdr.flags = VHOST_USER_VERSION, 378 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 379 .payload.log.mmap_offset = 0, 380 .hdr.size = sizeof(msg.payload.log), 381 }; 382 383 if (shmfd && log->fd != -1) { 384 fds[fd_num++] = log->fd; 385 } 386 387 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 388 return -1; 389 } 390 391 if (shmfd) { 392 msg.hdr.size = 0; 393 if (vhost_user_read(dev, &msg) < 0) { 394 return -1; 395 } 396 397 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 398 error_report("Received unexpected msg type. " 399 "Expected %d received %d", 400 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 401 return -1; 402 } 403 } 404 405 return 0; 406 } 407 408 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 409 struct vhost_memory *mem) 410 { 411 struct vhost_user *u = dev->opaque; 412 int fds[VHOST_MEMORY_MAX_NREGIONS]; 413 int i, fd; 414 size_t fd_num = 0; 415 VhostUserMsg msg_reply; 416 int region_i, msg_i; 417 418 VhostUserMsg msg = { 419 .hdr.request = VHOST_USER_SET_MEM_TABLE, 420 .hdr.flags = VHOST_USER_VERSION, 421 }; 422 423 if (u->region_rb_len < dev->mem->nregions) { 424 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 425 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 426 dev->mem->nregions); 427 memset(&(u->region_rb[u->region_rb_len]), '\0', 428 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 429 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 430 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 431 u->region_rb_len = dev->mem->nregions; 432 } 433 434 for (i = 0; i < dev->mem->nregions; ++i) { 435 struct vhost_memory_region *reg = dev->mem->regions + i; 436 ram_addr_t offset; 437 MemoryRegion *mr; 438 439 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); 440 mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr, 441 &offset); 442 fd = memory_region_get_fd(mr); 443 if (fd > 0) { 444 trace_vhost_user_set_mem_table_withfd(fd_num, mr->name, 445 reg->memory_size, 446 reg->guest_phys_addr, 447 reg->userspace_addr, offset); 448 u->region_rb_offset[i] = offset; 449 u->region_rb[i] = mr->ram_block; 450 msg.payload.memory.regions[fd_num].userspace_addr = 451 reg->userspace_addr; 452 msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; 453 msg.payload.memory.regions[fd_num].guest_phys_addr = 454 reg->guest_phys_addr; 455 msg.payload.memory.regions[fd_num].mmap_offset = offset; 456 assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); 457 fds[fd_num++] = fd; 458 } else { 459 u->region_rb_offset[i] = 0; 460 u->region_rb[i] = NULL; 461 } 462 } 463 464 msg.payload.memory.nregions = fd_num; 465 466 if (!fd_num) { 467 error_report("Failed initializing vhost-user memory map, " 468 "consider using -object memory-backend-file share=on"); 469 return -1; 470 } 471 472 msg.hdr.size = sizeof(msg.payload.memory.nregions); 473 msg.hdr.size += sizeof(msg.payload.memory.padding); 474 msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion); 475 476 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 477 return -1; 478 } 479 480 if (vhost_user_read(dev, &msg_reply) < 0) { 481 return -1; 482 } 483 484 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 485 error_report("%s: Received unexpected msg type." 486 "Expected %d received %d", __func__, 487 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 488 return -1; 489 } 490 /* We're using the same structure, just reusing one of the 491 * fields, so it should be the same size. 492 */ 493 if (msg_reply.hdr.size != msg.hdr.size) { 494 error_report("%s: Unexpected size for postcopy reply " 495 "%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size); 496 return -1; 497 } 498 499 memset(u->postcopy_client_bases, 0, 500 sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS); 501 502 /* They're in the same order as the regions that were sent 503 * but some of the regions were skipped (above) if they 504 * didn't have fd's 505 */ 506 for (msg_i = 0, region_i = 0; 507 region_i < dev->mem->nregions; 508 region_i++) { 509 if (msg_i < fd_num && 510 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 511 dev->mem->regions[region_i].guest_phys_addr) { 512 u->postcopy_client_bases[region_i] = 513 msg_reply.payload.memory.regions[msg_i].userspace_addr; 514 trace_vhost_user_set_mem_table_postcopy( 515 msg_reply.payload.memory.regions[msg_i].userspace_addr, 516 msg.payload.memory.regions[msg_i].userspace_addr, 517 msg_i, region_i); 518 msg_i++; 519 } 520 } 521 if (msg_i != fd_num) { 522 error_report("%s: postcopy reply not fully consumed " 523 "%d vs %zd", 524 __func__, msg_i, fd_num); 525 return -1; 526 } 527 /* Now we've registered this with the postcopy code, we ack to the client, 528 * because now we're in the position to be able to deal with any faults 529 * it generates. 530 */ 531 /* TODO: Use this for failure cases as well with a bad value */ 532 msg.hdr.size = sizeof(msg.payload.u64); 533 msg.payload.u64 = 0; /* OK */ 534 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 535 return -1; 536 } 537 538 return 0; 539 } 540 541 static int vhost_user_set_mem_table(struct vhost_dev *dev, 542 struct vhost_memory *mem) 543 { 544 struct vhost_user *u = dev->opaque; 545 int fds[VHOST_MEMORY_MAX_NREGIONS]; 546 int i, fd; 547 size_t fd_num = 0; 548 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 549 bool reply_supported = virtio_has_feature(dev->protocol_features, 550 VHOST_USER_PROTOCOL_F_REPLY_ACK); 551 552 if (do_postcopy) { 553 /* Postcopy has enough differences that it's best done in it's own 554 * version 555 */ 556 return vhost_user_set_mem_table_postcopy(dev, mem); 557 } 558 559 VhostUserMsg msg = { 560 .hdr.request = VHOST_USER_SET_MEM_TABLE, 561 .hdr.flags = VHOST_USER_VERSION, 562 }; 563 564 if (reply_supported) { 565 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 566 } 567 568 for (i = 0; i < dev->mem->nregions; ++i) { 569 struct vhost_memory_region *reg = dev->mem->regions + i; 570 ram_addr_t offset; 571 MemoryRegion *mr; 572 573 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); 574 mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr, 575 &offset); 576 fd = memory_region_get_fd(mr); 577 if (fd > 0) { 578 if (fd_num == VHOST_MEMORY_MAX_NREGIONS) { 579 error_report("Failed preparing vhost-user memory table msg"); 580 return -1; 581 } 582 msg.payload.memory.regions[fd_num].userspace_addr = 583 reg->userspace_addr; 584 msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; 585 msg.payload.memory.regions[fd_num].guest_phys_addr = 586 reg->guest_phys_addr; 587 msg.payload.memory.regions[fd_num].mmap_offset = offset; 588 fds[fd_num++] = fd; 589 } 590 } 591 592 msg.payload.memory.nregions = fd_num; 593 594 if (!fd_num) { 595 error_report("Failed initializing vhost-user memory map, " 596 "consider using -object memory-backend-file share=on"); 597 return -1; 598 } 599 600 msg.hdr.size = sizeof(msg.payload.memory.nregions); 601 msg.hdr.size += sizeof(msg.payload.memory.padding); 602 msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion); 603 604 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 605 return -1; 606 } 607 608 if (reply_supported) { 609 return process_message_reply(dev, &msg); 610 } 611 612 return 0; 613 } 614 615 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 616 struct vhost_vring_addr *addr) 617 { 618 VhostUserMsg msg = { 619 .hdr.request = VHOST_USER_SET_VRING_ADDR, 620 .hdr.flags = VHOST_USER_VERSION, 621 .payload.addr = *addr, 622 .hdr.size = sizeof(msg.payload.addr), 623 }; 624 625 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 626 return -1; 627 } 628 629 return 0; 630 } 631 632 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 633 struct vhost_vring_state *ring) 634 { 635 bool cross_endian = virtio_has_feature(dev->protocol_features, 636 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 637 VhostUserMsg msg = { 638 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 639 .hdr.flags = VHOST_USER_VERSION, 640 .payload.state = *ring, 641 .hdr.size = sizeof(msg.payload.state), 642 }; 643 644 if (!cross_endian) { 645 error_report("vhost-user trying to send unhandled ioctl"); 646 return -1; 647 } 648 649 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 650 return -1; 651 } 652 653 return 0; 654 } 655 656 static int vhost_set_vring(struct vhost_dev *dev, 657 unsigned long int request, 658 struct vhost_vring_state *ring) 659 { 660 VhostUserMsg msg = { 661 .hdr.request = request, 662 .hdr.flags = VHOST_USER_VERSION, 663 .payload.state = *ring, 664 .hdr.size = sizeof(msg.payload.state), 665 }; 666 667 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 668 return -1; 669 } 670 671 return 0; 672 } 673 674 static int vhost_user_set_vring_num(struct vhost_dev *dev, 675 struct vhost_vring_state *ring) 676 { 677 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 678 } 679 680 static void vhost_user_host_notifier_restore(struct vhost_dev *dev, 681 int queue_idx) 682 { 683 struct vhost_user *u = dev->opaque; 684 VhostUserHostNotifier *n = &u->user->notifier[queue_idx]; 685 VirtIODevice *vdev = dev->vdev; 686 687 if (n->addr && !n->set) { 688 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true); 689 n->set = true; 690 } 691 } 692 693 static void vhost_user_host_notifier_remove(struct vhost_dev *dev, 694 int queue_idx) 695 { 696 struct vhost_user *u = dev->opaque; 697 VhostUserHostNotifier *n = &u->user->notifier[queue_idx]; 698 VirtIODevice *vdev = dev->vdev; 699 700 if (n->addr && n->set) { 701 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); 702 n->set = false; 703 } 704 } 705 706 static int vhost_user_set_vring_base(struct vhost_dev *dev, 707 struct vhost_vring_state *ring) 708 { 709 vhost_user_host_notifier_restore(dev, ring->index); 710 711 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 712 } 713 714 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 715 { 716 int i; 717 718 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 719 return -1; 720 } 721 722 for (i = 0; i < dev->nvqs; ++i) { 723 struct vhost_vring_state state = { 724 .index = dev->vq_index + i, 725 .num = enable, 726 }; 727 728 vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 729 } 730 731 return 0; 732 } 733 734 static int vhost_user_get_vring_base(struct vhost_dev *dev, 735 struct vhost_vring_state *ring) 736 { 737 VhostUserMsg msg = { 738 .hdr.request = VHOST_USER_GET_VRING_BASE, 739 .hdr.flags = VHOST_USER_VERSION, 740 .payload.state = *ring, 741 .hdr.size = sizeof(msg.payload.state), 742 }; 743 744 vhost_user_host_notifier_remove(dev, ring->index); 745 746 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 747 return -1; 748 } 749 750 if (vhost_user_read(dev, &msg) < 0) { 751 return -1; 752 } 753 754 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 755 error_report("Received unexpected msg type. Expected %d received %d", 756 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 757 return -1; 758 } 759 760 if (msg.hdr.size != sizeof(msg.payload.state)) { 761 error_report("Received bad msg size."); 762 return -1; 763 } 764 765 *ring = msg.payload.state; 766 767 return 0; 768 } 769 770 static int vhost_set_vring_file(struct vhost_dev *dev, 771 VhostUserRequest request, 772 struct vhost_vring_file *file) 773 { 774 int fds[VHOST_MEMORY_MAX_NREGIONS]; 775 size_t fd_num = 0; 776 VhostUserMsg msg = { 777 .hdr.request = request, 778 .hdr.flags = VHOST_USER_VERSION, 779 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 780 .hdr.size = sizeof(msg.payload.u64), 781 }; 782 783 if (ioeventfd_enabled() && file->fd > 0) { 784 fds[fd_num++] = file->fd; 785 } else { 786 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 787 } 788 789 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 790 return -1; 791 } 792 793 return 0; 794 } 795 796 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 797 struct vhost_vring_file *file) 798 { 799 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 800 } 801 802 static int vhost_user_set_vring_call(struct vhost_dev *dev, 803 struct vhost_vring_file *file) 804 { 805 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 806 } 807 808 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64) 809 { 810 VhostUserMsg msg = { 811 .hdr.request = request, 812 .hdr.flags = VHOST_USER_VERSION, 813 .payload.u64 = u64, 814 .hdr.size = sizeof(msg.payload.u64), 815 }; 816 817 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 818 return -1; 819 } 820 821 return 0; 822 } 823 824 static int vhost_user_set_features(struct vhost_dev *dev, 825 uint64_t features) 826 { 827 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features); 828 } 829 830 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 831 uint64_t features) 832 { 833 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features); 834 } 835 836 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 837 { 838 VhostUserMsg msg = { 839 .hdr.request = request, 840 .hdr.flags = VHOST_USER_VERSION, 841 }; 842 843 if (vhost_user_one_time_request(request) && dev->vq_index != 0) { 844 return 0; 845 } 846 847 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 848 return -1; 849 } 850 851 if (vhost_user_read(dev, &msg) < 0) { 852 return -1; 853 } 854 855 if (msg.hdr.request != request) { 856 error_report("Received unexpected msg type. Expected %d received %d", 857 request, msg.hdr.request); 858 return -1; 859 } 860 861 if (msg.hdr.size != sizeof(msg.payload.u64)) { 862 error_report("Received bad msg size."); 863 return -1; 864 } 865 866 *u64 = msg.payload.u64; 867 868 return 0; 869 } 870 871 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 872 { 873 return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features); 874 } 875 876 static int vhost_user_set_owner(struct vhost_dev *dev) 877 { 878 VhostUserMsg msg = { 879 .hdr.request = VHOST_USER_SET_OWNER, 880 .hdr.flags = VHOST_USER_VERSION, 881 }; 882 883 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 884 return -1; 885 } 886 887 return 0; 888 } 889 890 static int vhost_user_reset_device(struct vhost_dev *dev) 891 { 892 VhostUserMsg msg = { 893 .hdr.request = VHOST_USER_RESET_OWNER, 894 .hdr.flags = VHOST_USER_VERSION, 895 }; 896 897 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 898 return -1; 899 } 900 901 return 0; 902 } 903 904 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) 905 { 906 int ret = -1; 907 908 if (!dev->config_ops) { 909 return -1; 910 } 911 912 if (dev->config_ops->vhost_dev_config_notifier) { 913 ret = dev->config_ops->vhost_dev_config_notifier(dev); 914 } 915 916 return ret; 917 } 918 919 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, 920 VhostUserVringArea *area, 921 int fd) 922 { 923 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 924 size_t page_size = qemu_real_host_page_size; 925 struct vhost_user *u = dev->opaque; 926 VhostUserState *user = u->user; 927 VirtIODevice *vdev = dev->vdev; 928 VhostUserHostNotifier *n; 929 void *addr; 930 char *name; 931 932 if (!virtio_has_feature(dev->protocol_features, 933 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 934 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 935 return -1; 936 } 937 938 n = &user->notifier[queue_idx]; 939 940 if (n->addr) { 941 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); 942 object_unparent(OBJECT(&n->mr)); 943 munmap(n->addr, page_size); 944 n->addr = NULL; 945 } 946 947 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 948 return 0; 949 } 950 951 /* Sanity check. */ 952 if (area->size != page_size) { 953 return -1; 954 } 955 956 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 957 fd, area->offset); 958 if (addr == MAP_FAILED) { 959 return -1; 960 } 961 962 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 963 user, queue_idx); 964 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 965 page_size, addr); 966 g_free(name); 967 968 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 969 munmap(addr, page_size); 970 return -1; 971 } 972 973 n->addr = addr; 974 n->set = true; 975 976 return 0; 977 } 978 979 static void slave_read(void *opaque) 980 { 981 struct vhost_dev *dev = opaque; 982 struct vhost_user *u = dev->opaque; 983 VhostUserHeader hdr = { 0, }; 984 VhostUserPayload payload = { 0, }; 985 int size, ret = 0; 986 struct iovec iov; 987 struct msghdr msgh; 988 int fd[VHOST_USER_SLAVE_MAX_FDS]; 989 char control[CMSG_SPACE(sizeof(fd))]; 990 struct cmsghdr *cmsg; 991 int i, fdsize = 0; 992 993 memset(&msgh, 0, sizeof(msgh)); 994 msgh.msg_iov = &iov; 995 msgh.msg_iovlen = 1; 996 msgh.msg_control = control; 997 msgh.msg_controllen = sizeof(control); 998 999 memset(fd, -1, sizeof(fd)); 1000 1001 /* Read header */ 1002 iov.iov_base = &hdr; 1003 iov.iov_len = VHOST_USER_HDR_SIZE; 1004 1005 do { 1006 size = recvmsg(u->slave_fd, &msgh, 0); 1007 } while (size < 0 && (errno == EINTR || errno == EAGAIN)); 1008 1009 if (size != VHOST_USER_HDR_SIZE) { 1010 error_report("Failed to read from slave."); 1011 goto err; 1012 } 1013 1014 if (msgh.msg_flags & MSG_CTRUNC) { 1015 error_report("Truncated message."); 1016 goto err; 1017 } 1018 1019 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; 1020 cmsg = CMSG_NXTHDR(&msgh, cmsg)) { 1021 if (cmsg->cmsg_level == SOL_SOCKET && 1022 cmsg->cmsg_type == SCM_RIGHTS) { 1023 fdsize = cmsg->cmsg_len - CMSG_LEN(0); 1024 memcpy(fd, CMSG_DATA(cmsg), fdsize); 1025 break; 1026 } 1027 } 1028 1029 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 1030 error_report("Failed to read msg header." 1031 " Size %d exceeds the maximum %zu.", hdr.size, 1032 VHOST_USER_PAYLOAD_SIZE); 1033 goto err; 1034 } 1035 1036 /* Read payload */ 1037 do { 1038 size = read(u->slave_fd, &payload, hdr.size); 1039 } while (size < 0 && (errno == EINTR || errno == EAGAIN)); 1040 1041 if (size != hdr.size) { 1042 error_report("Failed to read payload from slave."); 1043 goto err; 1044 } 1045 1046 switch (hdr.request) { 1047 case VHOST_USER_SLAVE_IOTLB_MSG: 1048 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1049 break; 1050 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG : 1051 ret = vhost_user_slave_handle_config_change(dev); 1052 break; 1053 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: 1054 ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, 1055 fd[0]); 1056 break; 1057 default: 1058 error_report("Received unexpected msg type."); 1059 ret = -EINVAL; 1060 } 1061 1062 /* Close the remaining file descriptors. */ 1063 for (i = 0; i < fdsize; i++) { 1064 if (fd[i] != -1) { 1065 close(fd[i]); 1066 } 1067 } 1068 1069 /* 1070 * REPLY_ACK feature handling. Other reply types has to be managed 1071 * directly in their request handlers. 1072 */ 1073 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1074 struct iovec iovec[2]; 1075 1076 1077 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 1078 hdr.flags |= VHOST_USER_REPLY_MASK; 1079 1080 payload.u64 = !!ret; 1081 hdr.size = sizeof(payload.u64); 1082 1083 iovec[0].iov_base = &hdr; 1084 iovec[0].iov_len = VHOST_USER_HDR_SIZE; 1085 iovec[1].iov_base = &payload; 1086 iovec[1].iov_len = hdr.size; 1087 1088 do { 1089 size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec)); 1090 } while (size < 0 && (errno == EINTR || errno == EAGAIN)); 1091 1092 if (size != VHOST_USER_HDR_SIZE + hdr.size) { 1093 error_report("Failed to send msg reply to slave."); 1094 goto err; 1095 } 1096 } 1097 1098 return; 1099 1100 err: 1101 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); 1102 close(u->slave_fd); 1103 u->slave_fd = -1; 1104 for (i = 0; i < fdsize; i++) { 1105 if (fd[i] != -1) { 1106 close(fd[i]); 1107 } 1108 } 1109 return; 1110 } 1111 1112 static int vhost_setup_slave_channel(struct vhost_dev *dev) 1113 { 1114 VhostUserMsg msg = { 1115 .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD, 1116 .hdr.flags = VHOST_USER_VERSION, 1117 }; 1118 struct vhost_user *u = dev->opaque; 1119 int sv[2], ret = 0; 1120 bool reply_supported = virtio_has_feature(dev->protocol_features, 1121 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1122 1123 if (!virtio_has_feature(dev->protocol_features, 1124 VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { 1125 return 0; 1126 } 1127 1128 if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1129 error_report("socketpair() failed"); 1130 return -1; 1131 } 1132 1133 u->slave_fd = sv[0]; 1134 qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev); 1135 1136 if (reply_supported) { 1137 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1138 } 1139 1140 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1141 if (ret) { 1142 goto out; 1143 } 1144 1145 if (reply_supported) { 1146 ret = process_message_reply(dev, &msg); 1147 } 1148 1149 out: 1150 close(sv[1]); 1151 if (ret) { 1152 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); 1153 close(u->slave_fd); 1154 u->slave_fd = -1; 1155 } 1156 1157 return ret; 1158 } 1159 1160 #ifdef CONFIG_LINUX 1161 /* 1162 * Called back from the postcopy fault thread when a fault is received on our 1163 * ufd. 1164 * TODO: This is Linux specific 1165 */ 1166 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1167 void *ufd) 1168 { 1169 struct vhost_dev *dev = pcfd->data; 1170 struct vhost_user *u = dev->opaque; 1171 struct uffd_msg *msg = ufd; 1172 uint64_t faultaddr = msg->arg.pagefault.address; 1173 RAMBlock *rb = NULL; 1174 uint64_t rb_offset; 1175 int i; 1176 1177 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1178 dev->mem->nregions); 1179 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1180 trace_vhost_user_postcopy_fault_handler_loop(i, 1181 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1182 if (faultaddr >= u->postcopy_client_bases[i]) { 1183 /* Ofset of the fault address in the vhost region */ 1184 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1185 if (region_offset < dev->mem->regions[i].memory_size) { 1186 rb_offset = region_offset + u->region_rb_offset[i]; 1187 trace_vhost_user_postcopy_fault_handler_found(i, 1188 region_offset, rb_offset); 1189 rb = u->region_rb[i]; 1190 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1191 rb_offset); 1192 } 1193 } 1194 } 1195 error_report("%s: Failed to find region for fault %" PRIx64, 1196 __func__, faultaddr); 1197 return -1; 1198 } 1199 1200 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1201 uint64_t offset) 1202 { 1203 struct vhost_dev *dev = pcfd->data; 1204 struct vhost_user *u = dev->opaque; 1205 int i; 1206 1207 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1208 1209 if (!u) { 1210 return 0; 1211 } 1212 /* Translate the offset into an address in the clients address space */ 1213 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1214 if (u->region_rb[i] == rb && 1215 offset >= u->region_rb_offset[i] && 1216 offset < (u->region_rb_offset[i] + 1217 dev->mem->regions[i].memory_size)) { 1218 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1219 u->postcopy_client_bases[i]; 1220 trace_vhost_user_postcopy_waker_found(client_addr); 1221 return postcopy_wake_shared(pcfd, client_addr, rb); 1222 } 1223 } 1224 1225 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1226 return 0; 1227 } 1228 #endif 1229 1230 /* 1231 * Called at the start of an inbound postcopy on reception of the 1232 * 'advise' command. 1233 */ 1234 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 1235 { 1236 #ifdef CONFIG_LINUX 1237 struct vhost_user *u = dev->opaque; 1238 CharBackend *chr = u->user->chr; 1239 int ufd; 1240 VhostUserMsg msg = { 1241 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 1242 .hdr.flags = VHOST_USER_VERSION, 1243 }; 1244 1245 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1246 error_setg(errp, "Failed to send postcopy_advise to vhost"); 1247 return -1; 1248 } 1249 1250 if (vhost_user_read(dev, &msg) < 0) { 1251 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 1252 return -1; 1253 } 1254 1255 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 1256 error_setg(errp, "Unexpected msg type. Expected %d received %d", 1257 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 1258 return -1; 1259 } 1260 1261 if (msg.hdr.size) { 1262 error_setg(errp, "Received bad msg size."); 1263 return -1; 1264 } 1265 ufd = qemu_chr_fe_get_msgfd(chr); 1266 if (ufd < 0) { 1267 error_setg(errp, "%s: Failed to get ufd", __func__); 1268 return -1; 1269 } 1270 qemu_set_nonblock(ufd); 1271 1272 /* register ufd with userfault thread */ 1273 u->postcopy_fd.fd = ufd; 1274 u->postcopy_fd.data = dev; 1275 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 1276 u->postcopy_fd.waker = vhost_user_postcopy_waker; 1277 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 1278 postcopy_register_shared_ufd(&u->postcopy_fd); 1279 return 0; 1280 #else 1281 error_setg(errp, "Postcopy not supported on non-Linux systems"); 1282 return -1; 1283 #endif 1284 } 1285 1286 /* 1287 * Called at the switch to postcopy on reception of the 'listen' command. 1288 */ 1289 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 1290 { 1291 struct vhost_user *u = dev->opaque; 1292 int ret; 1293 VhostUserMsg msg = { 1294 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 1295 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1296 }; 1297 u->postcopy_listen = true; 1298 trace_vhost_user_postcopy_listen(); 1299 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1300 error_setg(errp, "Failed to send postcopy_listen to vhost"); 1301 return -1; 1302 } 1303 1304 ret = process_message_reply(dev, &msg); 1305 if (ret) { 1306 error_setg(errp, "Failed to receive reply to postcopy_listen"); 1307 return ret; 1308 } 1309 1310 return 0; 1311 } 1312 1313 /* 1314 * Called at the end of postcopy 1315 */ 1316 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 1317 { 1318 VhostUserMsg msg = { 1319 .hdr.request = VHOST_USER_POSTCOPY_END, 1320 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1321 }; 1322 int ret; 1323 struct vhost_user *u = dev->opaque; 1324 1325 trace_vhost_user_postcopy_end_entry(); 1326 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1327 error_setg(errp, "Failed to send postcopy_end to vhost"); 1328 return -1; 1329 } 1330 1331 ret = process_message_reply(dev, &msg); 1332 if (ret) { 1333 error_setg(errp, "Failed to receive reply to postcopy_end"); 1334 return ret; 1335 } 1336 postcopy_unregister_shared_ufd(&u->postcopy_fd); 1337 close(u->postcopy_fd.fd); 1338 u->postcopy_fd.handler = NULL; 1339 1340 trace_vhost_user_postcopy_end_exit(); 1341 1342 return 0; 1343 } 1344 1345 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 1346 void *opaque) 1347 { 1348 struct PostcopyNotifyData *pnd = opaque; 1349 struct vhost_user *u = container_of(notifier, struct vhost_user, 1350 postcopy_notifier); 1351 struct vhost_dev *dev = u->dev; 1352 1353 switch (pnd->reason) { 1354 case POSTCOPY_NOTIFY_PROBE: 1355 if (!virtio_has_feature(dev->protocol_features, 1356 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 1357 /* TODO: Get the device name into this error somehow */ 1358 error_setg(pnd->errp, 1359 "vhost-user backend not capable of postcopy"); 1360 return -ENOENT; 1361 } 1362 break; 1363 1364 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 1365 return vhost_user_postcopy_advise(dev, pnd->errp); 1366 1367 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 1368 return vhost_user_postcopy_listen(dev, pnd->errp); 1369 1370 case POSTCOPY_NOTIFY_INBOUND_END: 1371 return vhost_user_postcopy_end(dev, pnd->errp); 1372 1373 default: 1374 /* We ignore notifications we don't know */ 1375 break; 1376 } 1377 1378 return 0; 1379 } 1380 1381 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) 1382 { 1383 uint64_t features, protocol_features; 1384 struct vhost_user *u; 1385 int err; 1386 1387 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1388 1389 u = g_new0(struct vhost_user, 1); 1390 u->user = opaque; 1391 u->slave_fd = -1; 1392 u->dev = dev; 1393 dev->opaque = u; 1394 1395 err = vhost_user_get_features(dev, &features); 1396 if (err < 0) { 1397 return err; 1398 } 1399 1400 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1401 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 1402 1403 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 1404 &protocol_features); 1405 if (err < 0) { 1406 return err; 1407 } 1408 1409 dev->protocol_features = 1410 protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK; 1411 1412 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1413 /* Don't acknowledge CONFIG feature if device doesn't support it */ 1414 dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 1415 } else if (!(protocol_features & 1416 (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) { 1417 error_report("Device expects VHOST_USER_PROTOCOL_F_CONFIG " 1418 "but backend does not support it."); 1419 return -1; 1420 } 1421 1422 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 1423 if (err < 0) { 1424 return err; 1425 } 1426 1427 /* query the max queues we support if backend supports Multiple Queue */ 1428 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 1429 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 1430 &dev->max_queues); 1431 if (err < 0) { 1432 return err; 1433 } 1434 } 1435 1436 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 1437 !(virtio_has_feature(dev->protocol_features, 1438 VHOST_USER_PROTOCOL_F_SLAVE_REQ) && 1439 virtio_has_feature(dev->protocol_features, 1440 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 1441 error_report("IOMMU support requires reply-ack and " 1442 "slave-req protocol features."); 1443 return -1; 1444 } 1445 } 1446 1447 if (dev->migration_blocker == NULL && 1448 !virtio_has_feature(dev->protocol_features, 1449 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 1450 error_setg(&dev->migration_blocker, 1451 "Migration disabled: vhost-user backend lacks " 1452 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 1453 } 1454 1455 err = vhost_setup_slave_channel(dev); 1456 if (err < 0) { 1457 return err; 1458 } 1459 1460 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 1461 postcopy_add_notifier(&u->postcopy_notifier); 1462 1463 return 0; 1464 } 1465 1466 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 1467 { 1468 struct vhost_user *u; 1469 1470 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1471 1472 u = dev->opaque; 1473 if (u->postcopy_notifier.notify) { 1474 postcopy_remove_notifier(&u->postcopy_notifier); 1475 u->postcopy_notifier.notify = NULL; 1476 } 1477 u->postcopy_listen = false; 1478 if (u->postcopy_fd.handler) { 1479 postcopy_unregister_shared_ufd(&u->postcopy_fd); 1480 close(u->postcopy_fd.fd); 1481 u->postcopy_fd.handler = NULL; 1482 } 1483 if (u->slave_fd >= 0) { 1484 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); 1485 close(u->slave_fd); 1486 u->slave_fd = -1; 1487 } 1488 g_free(u->region_rb); 1489 u->region_rb = NULL; 1490 g_free(u->region_rb_offset); 1491 u->region_rb_offset = NULL; 1492 u->region_rb_len = 0; 1493 g_free(u); 1494 dev->opaque = 0; 1495 1496 return 0; 1497 } 1498 1499 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 1500 { 1501 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 1502 1503 return idx; 1504 } 1505 1506 static int vhost_user_memslots_limit(struct vhost_dev *dev) 1507 { 1508 return VHOST_MEMORY_MAX_NREGIONS; 1509 } 1510 1511 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 1512 { 1513 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1514 1515 return virtio_has_feature(dev->protocol_features, 1516 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 1517 } 1518 1519 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 1520 { 1521 VhostUserMsg msg = { }; 1522 1523 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1524 1525 /* If guest supports GUEST_ANNOUNCE do nothing */ 1526 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 1527 return 0; 1528 } 1529 1530 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 1531 if (virtio_has_feature(dev->protocol_features, 1532 VHOST_USER_PROTOCOL_F_RARP)) { 1533 msg.hdr.request = VHOST_USER_SEND_RARP; 1534 msg.hdr.flags = VHOST_USER_VERSION; 1535 memcpy((char *)&msg.payload.u64, mac_addr, 6); 1536 msg.hdr.size = sizeof(msg.payload.u64); 1537 1538 return vhost_user_write(dev, &msg, NULL, 0); 1539 } 1540 return -1; 1541 } 1542 1543 static bool vhost_user_can_merge(struct vhost_dev *dev, 1544 uint64_t start1, uint64_t size1, 1545 uint64_t start2, uint64_t size2) 1546 { 1547 ram_addr_t offset; 1548 int mfd, rfd; 1549 MemoryRegion *mr; 1550 1551 mr = memory_region_from_host((void *)(uintptr_t)start1, &offset); 1552 mfd = memory_region_get_fd(mr); 1553 1554 mr = memory_region_from_host((void *)(uintptr_t)start2, &offset); 1555 rfd = memory_region_get_fd(mr); 1556 1557 return mfd == rfd; 1558 } 1559 1560 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 1561 { 1562 VhostUserMsg msg; 1563 bool reply_supported = virtio_has_feature(dev->protocol_features, 1564 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1565 1566 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 1567 return 0; 1568 } 1569 1570 msg.hdr.request = VHOST_USER_NET_SET_MTU; 1571 msg.payload.u64 = mtu; 1572 msg.hdr.size = sizeof(msg.payload.u64); 1573 msg.hdr.flags = VHOST_USER_VERSION; 1574 if (reply_supported) { 1575 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1576 } 1577 1578 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1579 return -1; 1580 } 1581 1582 /* If reply_ack supported, slave has to ack specified MTU is valid */ 1583 if (reply_supported) { 1584 return process_message_reply(dev, &msg); 1585 } 1586 1587 return 0; 1588 } 1589 1590 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 1591 struct vhost_iotlb_msg *imsg) 1592 { 1593 VhostUserMsg msg = { 1594 .hdr.request = VHOST_USER_IOTLB_MSG, 1595 .hdr.size = sizeof(msg.payload.iotlb), 1596 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1597 .payload.iotlb = *imsg, 1598 }; 1599 1600 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1601 return -EFAULT; 1602 } 1603 1604 return process_message_reply(dev, &msg); 1605 } 1606 1607 1608 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 1609 { 1610 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 1611 } 1612 1613 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 1614 uint32_t config_len) 1615 { 1616 VhostUserMsg msg = { 1617 .hdr.request = VHOST_USER_GET_CONFIG, 1618 .hdr.flags = VHOST_USER_VERSION, 1619 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 1620 }; 1621 1622 if (!virtio_has_feature(dev->protocol_features, 1623 VHOST_USER_PROTOCOL_F_CONFIG)) { 1624 return -1; 1625 } 1626 1627 if (config_len > VHOST_USER_MAX_CONFIG_SIZE) { 1628 return -1; 1629 } 1630 1631 msg.payload.config.offset = 0; 1632 msg.payload.config.size = config_len; 1633 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1634 return -1; 1635 } 1636 1637 if (vhost_user_read(dev, &msg) < 0) { 1638 return -1; 1639 } 1640 1641 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 1642 error_report("Received unexpected msg type. Expected %d received %d", 1643 VHOST_USER_GET_CONFIG, msg.hdr.request); 1644 return -1; 1645 } 1646 1647 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 1648 error_report("Received bad msg size."); 1649 return -1; 1650 } 1651 1652 memcpy(config, msg.payload.config.region, config_len); 1653 1654 return 0; 1655 } 1656 1657 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 1658 uint32_t offset, uint32_t size, uint32_t flags) 1659 { 1660 uint8_t *p; 1661 bool reply_supported = virtio_has_feature(dev->protocol_features, 1662 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1663 1664 VhostUserMsg msg = { 1665 .hdr.request = VHOST_USER_SET_CONFIG, 1666 .hdr.flags = VHOST_USER_VERSION, 1667 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 1668 }; 1669 1670 if (!virtio_has_feature(dev->protocol_features, 1671 VHOST_USER_PROTOCOL_F_CONFIG)) { 1672 return -1; 1673 } 1674 1675 if (reply_supported) { 1676 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1677 } 1678 1679 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 1680 return -1; 1681 } 1682 1683 msg.payload.config.offset = offset, 1684 msg.payload.config.size = size, 1685 msg.payload.config.flags = flags, 1686 p = msg.payload.config.region; 1687 memcpy(p, data, size); 1688 1689 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1690 return -1; 1691 } 1692 1693 if (reply_supported) { 1694 return process_message_reply(dev, &msg); 1695 } 1696 1697 return 0; 1698 } 1699 1700 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 1701 void *session_info, 1702 uint64_t *session_id) 1703 { 1704 bool crypto_session = virtio_has_feature(dev->protocol_features, 1705 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 1706 CryptoDevBackendSymSessionInfo *sess_info = session_info; 1707 VhostUserMsg msg = { 1708 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 1709 .hdr.flags = VHOST_USER_VERSION, 1710 .hdr.size = sizeof(msg.payload.session), 1711 }; 1712 1713 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1714 1715 if (!crypto_session) { 1716 error_report("vhost-user trying to send unhandled ioctl"); 1717 return -1; 1718 } 1719 1720 memcpy(&msg.payload.session.session_setup_data, sess_info, 1721 sizeof(CryptoDevBackendSymSessionInfo)); 1722 if (sess_info->key_len) { 1723 memcpy(&msg.payload.session.key, sess_info->cipher_key, 1724 sess_info->key_len); 1725 } 1726 if (sess_info->auth_key_len > 0) { 1727 memcpy(&msg.payload.session.auth_key, sess_info->auth_key, 1728 sess_info->auth_key_len); 1729 } 1730 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1731 error_report("vhost_user_write() return -1, create session failed"); 1732 return -1; 1733 } 1734 1735 if (vhost_user_read(dev, &msg) < 0) { 1736 error_report("vhost_user_read() return -1, create session failed"); 1737 return -1; 1738 } 1739 1740 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 1741 error_report("Received unexpected msg type. Expected %d received %d", 1742 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 1743 return -1; 1744 } 1745 1746 if (msg.hdr.size != sizeof(msg.payload.session)) { 1747 error_report("Received bad msg size."); 1748 return -1; 1749 } 1750 1751 if (msg.payload.session.session_id < 0) { 1752 error_report("Bad session id: %" PRId64 "", 1753 msg.payload.session.session_id); 1754 return -1; 1755 } 1756 *session_id = msg.payload.session.session_id; 1757 1758 return 0; 1759 } 1760 1761 static int 1762 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 1763 { 1764 bool crypto_session = virtio_has_feature(dev->protocol_features, 1765 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 1766 VhostUserMsg msg = { 1767 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 1768 .hdr.flags = VHOST_USER_VERSION, 1769 .hdr.size = sizeof(msg.payload.u64), 1770 }; 1771 msg.payload.u64 = session_id; 1772 1773 if (!crypto_session) { 1774 error_report("vhost-user trying to send unhandled ioctl"); 1775 return -1; 1776 } 1777 1778 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1779 error_report("vhost_user_write() return -1, close session failed"); 1780 return -1; 1781 } 1782 1783 return 0; 1784 } 1785 1786 static bool vhost_user_mem_section_filter(struct vhost_dev *dev, 1787 MemoryRegionSection *section) 1788 { 1789 bool result; 1790 1791 result = memory_region_get_fd(section->mr) >= 0; 1792 1793 return result; 1794 } 1795 1796 static int vhost_user_get_inflight_fd(struct vhost_dev *dev, 1797 uint16_t queue_size, 1798 struct vhost_inflight *inflight) 1799 { 1800 void *addr; 1801 int fd; 1802 struct vhost_user *u = dev->opaque; 1803 CharBackend *chr = u->user->chr; 1804 VhostUserMsg msg = { 1805 .hdr.request = VHOST_USER_GET_INFLIGHT_FD, 1806 .hdr.flags = VHOST_USER_VERSION, 1807 .payload.inflight.num_queues = dev->nvqs, 1808 .payload.inflight.queue_size = queue_size, 1809 .hdr.size = sizeof(msg.payload.inflight), 1810 }; 1811 1812 if (!virtio_has_feature(dev->protocol_features, 1813 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 1814 return 0; 1815 } 1816 1817 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1818 return -1; 1819 } 1820 1821 if (vhost_user_read(dev, &msg) < 0) { 1822 return -1; 1823 } 1824 1825 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) { 1826 error_report("Received unexpected msg type. " 1827 "Expected %d received %d", 1828 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request); 1829 return -1; 1830 } 1831 1832 if (msg.hdr.size != sizeof(msg.payload.inflight)) { 1833 error_report("Received bad msg size."); 1834 return -1; 1835 } 1836 1837 if (!msg.payload.inflight.mmap_size) { 1838 return 0; 1839 } 1840 1841 fd = qemu_chr_fe_get_msgfd(chr); 1842 if (fd < 0) { 1843 error_report("Failed to get mem fd"); 1844 return -1; 1845 } 1846 1847 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE, 1848 MAP_SHARED, fd, msg.payload.inflight.mmap_offset); 1849 1850 if (addr == MAP_FAILED) { 1851 error_report("Failed to mmap mem fd"); 1852 close(fd); 1853 return -1; 1854 } 1855 1856 inflight->addr = addr; 1857 inflight->fd = fd; 1858 inflight->size = msg.payload.inflight.mmap_size; 1859 inflight->offset = msg.payload.inflight.mmap_offset; 1860 inflight->queue_size = queue_size; 1861 1862 return 0; 1863 } 1864 1865 static int vhost_user_set_inflight_fd(struct vhost_dev *dev, 1866 struct vhost_inflight *inflight) 1867 { 1868 VhostUserMsg msg = { 1869 .hdr.request = VHOST_USER_SET_INFLIGHT_FD, 1870 .hdr.flags = VHOST_USER_VERSION, 1871 .payload.inflight.mmap_size = inflight->size, 1872 .payload.inflight.mmap_offset = inflight->offset, 1873 .payload.inflight.num_queues = dev->nvqs, 1874 .payload.inflight.queue_size = inflight->queue_size, 1875 .hdr.size = sizeof(msg.payload.inflight), 1876 }; 1877 1878 if (!virtio_has_feature(dev->protocol_features, 1879 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 1880 return 0; 1881 } 1882 1883 if (vhost_user_write(dev, &msg, &inflight->fd, 1) < 0) { 1884 return -1; 1885 } 1886 1887 return 0; 1888 } 1889 1890 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) 1891 { 1892 if (user->chr) { 1893 error_setg(errp, "Cannot initialize vhost-user state"); 1894 return false; 1895 } 1896 user->chr = chr; 1897 return true; 1898 } 1899 1900 void vhost_user_cleanup(VhostUserState *user) 1901 { 1902 int i; 1903 1904 if (!user->chr) { 1905 return; 1906 } 1907 1908 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1909 if (user->notifier[i].addr) { 1910 object_unparent(OBJECT(&user->notifier[i].mr)); 1911 munmap(user->notifier[i].addr, qemu_real_host_page_size); 1912 user->notifier[i].addr = NULL; 1913 } 1914 } 1915 user->chr = NULL; 1916 } 1917 1918 const VhostOps user_ops = { 1919 .backend_type = VHOST_BACKEND_TYPE_USER, 1920 .vhost_backend_init = vhost_user_backend_init, 1921 .vhost_backend_cleanup = vhost_user_backend_cleanup, 1922 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 1923 .vhost_set_log_base = vhost_user_set_log_base, 1924 .vhost_set_mem_table = vhost_user_set_mem_table, 1925 .vhost_set_vring_addr = vhost_user_set_vring_addr, 1926 .vhost_set_vring_endian = vhost_user_set_vring_endian, 1927 .vhost_set_vring_num = vhost_user_set_vring_num, 1928 .vhost_set_vring_base = vhost_user_set_vring_base, 1929 .vhost_get_vring_base = vhost_user_get_vring_base, 1930 .vhost_set_vring_kick = vhost_user_set_vring_kick, 1931 .vhost_set_vring_call = vhost_user_set_vring_call, 1932 .vhost_set_features = vhost_user_set_features, 1933 .vhost_get_features = vhost_user_get_features, 1934 .vhost_set_owner = vhost_user_set_owner, 1935 .vhost_reset_device = vhost_user_reset_device, 1936 .vhost_get_vq_index = vhost_user_get_vq_index, 1937 .vhost_set_vring_enable = vhost_user_set_vring_enable, 1938 .vhost_requires_shm_log = vhost_user_requires_shm_log, 1939 .vhost_migration_done = vhost_user_migration_done, 1940 .vhost_backend_can_merge = vhost_user_can_merge, 1941 .vhost_net_set_mtu = vhost_user_net_set_mtu, 1942 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 1943 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 1944 .vhost_get_config = vhost_user_get_config, 1945 .vhost_set_config = vhost_user_set_config, 1946 .vhost_crypto_create_session = vhost_user_crypto_create_session, 1947 .vhost_crypto_close_session = vhost_user_crypto_close_session, 1948 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, 1949 .vhost_get_inflight_fd = vhost_user_get_inflight_fd, 1950 .vhost_set_inflight_fd = vhost_user_set_inflight_fd, 1951 }; 1952