1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/vhost.h" 14 #include "hw/virtio/vhost-user.h" 15 #include "hw/virtio/vhost-backend.h" 16 #include "hw/virtio/virtio.h" 17 #include "hw/virtio/virtio-net.h" 18 #include "chardev/char-fe.h" 19 #include "sysemu/kvm.h" 20 #include "qemu/error-report.h" 21 #include "qemu/sockets.h" 22 #include "sysemu/cryptodev.h" 23 #include "migration/migration.h" 24 #include "migration/postcopy-ram.h" 25 #include "trace.h" 26 27 #include <sys/ioctl.h> 28 #include <sys/socket.h> 29 #include <sys/un.h> 30 31 #include "standard-headers/linux/vhost_types.h" 32 33 #ifdef CONFIG_LINUX 34 #include <linux/userfaultfd.h> 35 #endif 36 37 #define VHOST_MEMORY_MAX_NREGIONS 8 38 #define VHOST_USER_F_PROTOCOL_FEATURES 30 39 #define VHOST_USER_SLAVE_MAX_FDS 8 40 41 /* 42 * Maximum size of virtio device config space 43 */ 44 #define VHOST_USER_MAX_CONFIG_SIZE 256 45 46 enum VhostUserProtocolFeature { 47 VHOST_USER_PROTOCOL_F_MQ = 0, 48 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 49 VHOST_USER_PROTOCOL_F_RARP = 2, 50 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, 51 VHOST_USER_PROTOCOL_F_NET_MTU = 4, 52 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, 53 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, 54 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, 55 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, 56 VHOST_USER_PROTOCOL_F_CONFIG = 9, 57 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, 58 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, 59 VHOST_USER_PROTOCOL_F_MAX 60 }; 61 62 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 63 64 typedef enum VhostUserRequest { 65 VHOST_USER_NONE = 0, 66 VHOST_USER_GET_FEATURES = 1, 67 VHOST_USER_SET_FEATURES = 2, 68 VHOST_USER_SET_OWNER = 3, 69 VHOST_USER_RESET_OWNER = 4, 70 VHOST_USER_SET_MEM_TABLE = 5, 71 VHOST_USER_SET_LOG_BASE = 6, 72 VHOST_USER_SET_LOG_FD = 7, 73 VHOST_USER_SET_VRING_NUM = 8, 74 VHOST_USER_SET_VRING_ADDR = 9, 75 VHOST_USER_SET_VRING_BASE = 10, 76 VHOST_USER_GET_VRING_BASE = 11, 77 VHOST_USER_SET_VRING_KICK = 12, 78 VHOST_USER_SET_VRING_CALL = 13, 79 VHOST_USER_SET_VRING_ERR = 14, 80 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 81 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 82 VHOST_USER_GET_QUEUE_NUM = 17, 83 VHOST_USER_SET_VRING_ENABLE = 18, 84 VHOST_USER_SEND_RARP = 19, 85 VHOST_USER_NET_SET_MTU = 20, 86 VHOST_USER_SET_SLAVE_REQ_FD = 21, 87 VHOST_USER_IOTLB_MSG = 22, 88 VHOST_USER_SET_VRING_ENDIAN = 23, 89 VHOST_USER_GET_CONFIG = 24, 90 VHOST_USER_SET_CONFIG = 25, 91 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 92 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 93 VHOST_USER_POSTCOPY_ADVISE = 28, 94 VHOST_USER_POSTCOPY_LISTEN = 29, 95 VHOST_USER_POSTCOPY_END = 30, 96 VHOST_USER_MAX 97 } VhostUserRequest; 98 99 typedef enum VhostUserSlaveRequest { 100 VHOST_USER_SLAVE_NONE = 0, 101 VHOST_USER_SLAVE_IOTLB_MSG = 1, 102 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, 103 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, 104 VHOST_USER_SLAVE_MAX 105 } VhostUserSlaveRequest; 106 107 typedef struct VhostUserMemoryRegion { 108 uint64_t guest_phys_addr; 109 uint64_t memory_size; 110 uint64_t userspace_addr; 111 uint64_t mmap_offset; 112 } VhostUserMemoryRegion; 113 114 typedef struct VhostUserMemory { 115 uint32_t nregions; 116 uint32_t padding; 117 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; 118 } VhostUserMemory; 119 120 typedef struct VhostUserLog { 121 uint64_t mmap_size; 122 uint64_t mmap_offset; 123 } VhostUserLog; 124 125 typedef struct VhostUserConfig { 126 uint32_t offset; 127 uint32_t size; 128 uint32_t flags; 129 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 130 } VhostUserConfig; 131 132 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 133 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 134 135 typedef struct VhostUserCryptoSession { 136 /* session id for success, -1 on errors */ 137 int64_t session_id; 138 CryptoDevBackendSymSessionInfo session_setup_data; 139 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 140 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 141 } VhostUserCryptoSession; 142 143 static VhostUserConfig c __attribute__ ((unused)); 144 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 145 + sizeof(c.size) \ 146 + sizeof(c.flags)) 147 148 typedef struct VhostUserVringArea { 149 uint64_t u64; 150 uint64_t size; 151 uint64_t offset; 152 } VhostUserVringArea; 153 154 typedef struct { 155 VhostUserRequest request; 156 157 #define VHOST_USER_VERSION_MASK (0x3) 158 #define VHOST_USER_REPLY_MASK (0x1<<2) 159 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 160 uint32_t flags; 161 uint32_t size; /* the following payload size */ 162 } QEMU_PACKED VhostUserHeader; 163 164 typedef union { 165 #define VHOST_USER_VRING_IDX_MASK (0xff) 166 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) 167 uint64_t u64; 168 struct vhost_vring_state state; 169 struct vhost_vring_addr addr; 170 VhostUserMemory memory; 171 VhostUserLog log; 172 struct vhost_iotlb_msg iotlb; 173 VhostUserConfig config; 174 VhostUserCryptoSession session; 175 VhostUserVringArea area; 176 } VhostUserPayload; 177 178 typedef struct VhostUserMsg { 179 VhostUserHeader hdr; 180 VhostUserPayload payload; 181 } QEMU_PACKED VhostUserMsg; 182 183 static VhostUserMsg m __attribute__ ((unused)); 184 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 185 186 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 187 188 /* The version of the protocol we support */ 189 #define VHOST_USER_VERSION (0x1) 190 191 struct vhost_user { 192 struct vhost_dev *dev; 193 /* Shared between vhost devs of the same virtio device */ 194 VhostUserState *user; 195 int slave_fd; 196 NotifierWithReturn postcopy_notifier; 197 struct PostCopyFD postcopy_fd; 198 uint64_t postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS]; 199 /* Length of the region_rb and region_rb_offset arrays */ 200 size_t region_rb_len; 201 /* RAMBlock associated with a given region */ 202 RAMBlock **region_rb; 203 /* The offset from the start of the RAMBlock to the start of the 204 * vhost region. 205 */ 206 ram_addr_t *region_rb_offset; 207 208 /* True once we've entered postcopy_listen */ 209 bool postcopy_listen; 210 }; 211 212 static bool ioeventfd_enabled(void) 213 { 214 return !kvm_enabled() || kvm_eventfds_enabled(); 215 } 216 217 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 218 { 219 struct vhost_user *u = dev->opaque; 220 CharBackend *chr = u->user->chr; 221 uint8_t *p = (uint8_t *) msg; 222 int r, size = VHOST_USER_HDR_SIZE; 223 224 r = qemu_chr_fe_read_all(chr, p, size); 225 if (r != size) { 226 error_report("Failed to read msg header. Read %d instead of %d." 227 " Original request %d.", r, size, msg->hdr.request); 228 goto fail; 229 } 230 231 /* validate received flags */ 232 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 233 error_report("Failed to read msg header." 234 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 235 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 236 goto fail; 237 } 238 239 /* validate message size is sane */ 240 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 241 error_report("Failed to read msg header." 242 " Size %d exceeds the maximum %zu.", msg->hdr.size, 243 VHOST_USER_PAYLOAD_SIZE); 244 goto fail; 245 } 246 247 if (msg->hdr.size) { 248 p += VHOST_USER_HDR_SIZE; 249 size = msg->hdr.size; 250 r = qemu_chr_fe_read_all(chr, p, size); 251 if (r != size) { 252 error_report("Failed to read msg payload." 253 " Read %d instead of %d.", r, msg->hdr.size); 254 goto fail; 255 } 256 } 257 258 return 0; 259 260 fail: 261 return -1; 262 } 263 264 static int process_message_reply(struct vhost_dev *dev, 265 const VhostUserMsg *msg) 266 { 267 VhostUserMsg msg_reply; 268 269 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 270 return 0; 271 } 272 273 if (vhost_user_read(dev, &msg_reply) < 0) { 274 return -1; 275 } 276 277 if (msg_reply.hdr.request != msg->hdr.request) { 278 error_report("Received unexpected msg type." 279 "Expected %d received %d", 280 msg->hdr.request, msg_reply.hdr.request); 281 return -1; 282 } 283 284 return msg_reply.payload.u64 ? -1 : 0; 285 } 286 287 static bool vhost_user_one_time_request(VhostUserRequest request) 288 { 289 switch (request) { 290 case VHOST_USER_SET_OWNER: 291 case VHOST_USER_RESET_OWNER: 292 case VHOST_USER_SET_MEM_TABLE: 293 case VHOST_USER_GET_QUEUE_NUM: 294 case VHOST_USER_NET_SET_MTU: 295 return true; 296 default: 297 return false; 298 } 299 } 300 301 /* most non-init callers ignore the error */ 302 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 303 int *fds, int fd_num) 304 { 305 struct vhost_user *u = dev->opaque; 306 CharBackend *chr = u->user->chr; 307 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 308 309 /* 310 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 311 * we just need send it once in the first time. For later such 312 * request, we just ignore it. 313 */ 314 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) { 315 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 316 return 0; 317 } 318 319 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 320 error_report("Failed to set msg fds."); 321 return -1; 322 } 323 324 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 325 if (ret != size) { 326 error_report("Failed to write msg." 327 " Wrote %d instead of %d.", ret, size); 328 return -1; 329 } 330 331 return 0; 332 } 333 334 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 335 struct vhost_log *log) 336 { 337 int fds[VHOST_MEMORY_MAX_NREGIONS]; 338 size_t fd_num = 0; 339 bool shmfd = virtio_has_feature(dev->protocol_features, 340 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 341 VhostUserMsg msg = { 342 .hdr.request = VHOST_USER_SET_LOG_BASE, 343 .hdr.flags = VHOST_USER_VERSION, 344 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 345 .payload.log.mmap_offset = 0, 346 .hdr.size = sizeof(msg.payload.log), 347 }; 348 349 if (shmfd && log->fd != -1) { 350 fds[fd_num++] = log->fd; 351 } 352 353 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 354 return -1; 355 } 356 357 if (shmfd) { 358 msg.hdr.size = 0; 359 if (vhost_user_read(dev, &msg) < 0) { 360 return -1; 361 } 362 363 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 364 error_report("Received unexpected msg type. " 365 "Expected %d received %d", 366 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 367 return -1; 368 } 369 } 370 371 return 0; 372 } 373 374 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 375 struct vhost_memory *mem) 376 { 377 struct vhost_user *u = dev->opaque; 378 int fds[VHOST_MEMORY_MAX_NREGIONS]; 379 int i, fd; 380 size_t fd_num = 0; 381 VhostUserMsg msg_reply; 382 int region_i, msg_i; 383 384 VhostUserMsg msg = { 385 .hdr.request = VHOST_USER_SET_MEM_TABLE, 386 .hdr.flags = VHOST_USER_VERSION, 387 }; 388 389 if (u->region_rb_len < dev->mem->nregions) { 390 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 391 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 392 dev->mem->nregions); 393 memset(&(u->region_rb[u->region_rb_len]), '\0', 394 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 395 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 396 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 397 u->region_rb_len = dev->mem->nregions; 398 } 399 400 for (i = 0; i < dev->mem->nregions; ++i) { 401 struct vhost_memory_region *reg = dev->mem->regions + i; 402 ram_addr_t offset; 403 MemoryRegion *mr; 404 405 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); 406 mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr, 407 &offset); 408 fd = memory_region_get_fd(mr); 409 if (fd > 0) { 410 trace_vhost_user_set_mem_table_withfd(fd_num, mr->name, 411 reg->memory_size, 412 reg->guest_phys_addr, 413 reg->userspace_addr, offset); 414 u->region_rb_offset[i] = offset; 415 u->region_rb[i] = mr->ram_block; 416 msg.payload.memory.regions[fd_num].userspace_addr = 417 reg->userspace_addr; 418 msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; 419 msg.payload.memory.regions[fd_num].guest_phys_addr = 420 reg->guest_phys_addr; 421 msg.payload.memory.regions[fd_num].mmap_offset = offset; 422 assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); 423 fds[fd_num++] = fd; 424 } else { 425 u->region_rb_offset[i] = 0; 426 u->region_rb[i] = NULL; 427 } 428 } 429 430 msg.payload.memory.nregions = fd_num; 431 432 if (!fd_num) { 433 error_report("Failed initializing vhost-user memory map, " 434 "consider using -object memory-backend-file share=on"); 435 return -1; 436 } 437 438 msg.hdr.size = sizeof(msg.payload.memory.nregions); 439 msg.hdr.size += sizeof(msg.payload.memory.padding); 440 msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion); 441 442 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 443 return -1; 444 } 445 446 if (vhost_user_read(dev, &msg_reply) < 0) { 447 return -1; 448 } 449 450 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 451 error_report("%s: Received unexpected msg type." 452 "Expected %d received %d", __func__, 453 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 454 return -1; 455 } 456 /* We're using the same structure, just reusing one of the 457 * fields, so it should be the same size. 458 */ 459 if (msg_reply.hdr.size != msg.hdr.size) { 460 error_report("%s: Unexpected size for postcopy reply " 461 "%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size); 462 return -1; 463 } 464 465 memset(u->postcopy_client_bases, 0, 466 sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS); 467 468 /* They're in the same order as the regions that were sent 469 * but some of the regions were skipped (above) if they 470 * didn't have fd's 471 */ 472 for (msg_i = 0, region_i = 0; 473 region_i < dev->mem->nregions; 474 region_i++) { 475 if (msg_i < fd_num && 476 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 477 dev->mem->regions[region_i].guest_phys_addr) { 478 u->postcopy_client_bases[region_i] = 479 msg_reply.payload.memory.regions[msg_i].userspace_addr; 480 trace_vhost_user_set_mem_table_postcopy( 481 msg_reply.payload.memory.regions[msg_i].userspace_addr, 482 msg.payload.memory.regions[msg_i].userspace_addr, 483 msg_i, region_i); 484 msg_i++; 485 } 486 } 487 if (msg_i != fd_num) { 488 error_report("%s: postcopy reply not fully consumed " 489 "%d vs %zd", 490 __func__, msg_i, fd_num); 491 return -1; 492 } 493 /* Now we've registered this with the postcopy code, we ack to the client, 494 * because now we're in the position to be able to deal with any faults 495 * it generates. 496 */ 497 /* TODO: Use this for failure cases as well with a bad value */ 498 msg.hdr.size = sizeof(msg.payload.u64); 499 msg.payload.u64 = 0; /* OK */ 500 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 501 return -1; 502 } 503 504 return 0; 505 } 506 507 static int vhost_user_set_mem_table(struct vhost_dev *dev, 508 struct vhost_memory *mem) 509 { 510 struct vhost_user *u = dev->opaque; 511 int fds[VHOST_MEMORY_MAX_NREGIONS]; 512 int i, fd; 513 size_t fd_num = 0; 514 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 515 bool reply_supported = virtio_has_feature(dev->protocol_features, 516 VHOST_USER_PROTOCOL_F_REPLY_ACK); 517 518 if (do_postcopy) { 519 /* Postcopy has enough differences that it's best done in it's own 520 * version 521 */ 522 return vhost_user_set_mem_table_postcopy(dev, mem); 523 } 524 525 VhostUserMsg msg = { 526 .hdr.request = VHOST_USER_SET_MEM_TABLE, 527 .hdr.flags = VHOST_USER_VERSION, 528 }; 529 530 if (reply_supported) { 531 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 532 } 533 534 for (i = 0; i < dev->mem->nregions; ++i) { 535 struct vhost_memory_region *reg = dev->mem->regions + i; 536 ram_addr_t offset; 537 MemoryRegion *mr; 538 539 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); 540 mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr, 541 &offset); 542 fd = memory_region_get_fd(mr); 543 if (fd > 0) { 544 if (fd_num == VHOST_MEMORY_MAX_NREGIONS) { 545 error_report("Failed preparing vhost-user memory table msg"); 546 return -1; 547 } 548 msg.payload.memory.regions[fd_num].userspace_addr = 549 reg->userspace_addr; 550 msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; 551 msg.payload.memory.regions[fd_num].guest_phys_addr = 552 reg->guest_phys_addr; 553 msg.payload.memory.regions[fd_num].mmap_offset = offset; 554 fds[fd_num++] = fd; 555 } 556 } 557 558 msg.payload.memory.nregions = fd_num; 559 560 if (!fd_num) { 561 error_report("Failed initializing vhost-user memory map, " 562 "consider using -object memory-backend-file share=on"); 563 return -1; 564 } 565 566 msg.hdr.size = sizeof(msg.payload.memory.nregions); 567 msg.hdr.size += sizeof(msg.payload.memory.padding); 568 msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion); 569 570 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 571 return -1; 572 } 573 574 if (reply_supported) { 575 return process_message_reply(dev, &msg); 576 } 577 578 return 0; 579 } 580 581 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 582 struct vhost_vring_addr *addr) 583 { 584 VhostUserMsg msg = { 585 .hdr.request = VHOST_USER_SET_VRING_ADDR, 586 .hdr.flags = VHOST_USER_VERSION, 587 .payload.addr = *addr, 588 .hdr.size = sizeof(msg.payload.addr), 589 }; 590 591 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 592 return -1; 593 } 594 595 return 0; 596 } 597 598 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 599 struct vhost_vring_state *ring) 600 { 601 bool cross_endian = virtio_has_feature(dev->protocol_features, 602 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 603 VhostUserMsg msg = { 604 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 605 .hdr.flags = VHOST_USER_VERSION, 606 .payload.state = *ring, 607 .hdr.size = sizeof(msg.payload.state), 608 }; 609 610 if (!cross_endian) { 611 error_report("vhost-user trying to send unhandled ioctl"); 612 return -1; 613 } 614 615 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 616 return -1; 617 } 618 619 return 0; 620 } 621 622 static int vhost_set_vring(struct vhost_dev *dev, 623 unsigned long int request, 624 struct vhost_vring_state *ring) 625 { 626 VhostUserMsg msg = { 627 .hdr.request = request, 628 .hdr.flags = VHOST_USER_VERSION, 629 .payload.state = *ring, 630 .hdr.size = sizeof(msg.payload.state), 631 }; 632 633 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 634 return -1; 635 } 636 637 return 0; 638 } 639 640 static int vhost_user_set_vring_num(struct vhost_dev *dev, 641 struct vhost_vring_state *ring) 642 { 643 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 644 } 645 646 static void vhost_user_host_notifier_restore(struct vhost_dev *dev, 647 int queue_idx) 648 { 649 struct vhost_user *u = dev->opaque; 650 VhostUserHostNotifier *n = &u->user->notifier[queue_idx]; 651 VirtIODevice *vdev = dev->vdev; 652 653 if (n->addr && !n->set) { 654 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true); 655 n->set = true; 656 } 657 } 658 659 static void vhost_user_host_notifier_remove(struct vhost_dev *dev, 660 int queue_idx) 661 { 662 struct vhost_user *u = dev->opaque; 663 VhostUserHostNotifier *n = &u->user->notifier[queue_idx]; 664 VirtIODevice *vdev = dev->vdev; 665 666 if (n->addr && n->set) { 667 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); 668 n->set = false; 669 } 670 } 671 672 static int vhost_user_set_vring_base(struct vhost_dev *dev, 673 struct vhost_vring_state *ring) 674 { 675 vhost_user_host_notifier_restore(dev, ring->index); 676 677 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 678 } 679 680 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 681 { 682 int i; 683 684 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 685 return -1; 686 } 687 688 for (i = 0; i < dev->nvqs; ++i) { 689 struct vhost_vring_state state = { 690 .index = dev->vq_index + i, 691 .num = enable, 692 }; 693 694 vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 695 } 696 697 return 0; 698 } 699 700 static int vhost_user_get_vring_base(struct vhost_dev *dev, 701 struct vhost_vring_state *ring) 702 { 703 VhostUserMsg msg = { 704 .hdr.request = VHOST_USER_GET_VRING_BASE, 705 .hdr.flags = VHOST_USER_VERSION, 706 .payload.state = *ring, 707 .hdr.size = sizeof(msg.payload.state), 708 }; 709 710 vhost_user_host_notifier_remove(dev, ring->index); 711 712 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 713 return -1; 714 } 715 716 if (vhost_user_read(dev, &msg) < 0) { 717 return -1; 718 } 719 720 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 721 error_report("Received unexpected msg type. Expected %d received %d", 722 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 723 return -1; 724 } 725 726 if (msg.hdr.size != sizeof(msg.payload.state)) { 727 error_report("Received bad msg size."); 728 return -1; 729 } 730 731 *ring = msg.payload.state; 732 733 return 0; 734 } 735 736 static int vhost_set_vring_file(struct vhost_dev *dev, 737 VhostUserRequest request, 738 struct vhost_vring_file *file) 739 { 740 int fds[VHOST_MEMORY_MAX_NREGIONS]; 741 size_t fd_num = 0; 742 VhostUserMsg msg = { 743 .hdr.request = request, 744 .hdr.flags = VHOST_USER_VERSION, 745 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 746 .hdr.size = sizeof(msg.payload.u64), 747 }; 748 749 if (ioeventfd_enabled() && file->fd > 0) { 750 fds[fd_num++] = file->fd; 751 } else { 752 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 753 } 754 755 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 756 return -1; 757 } 758 759 return 0; 760 } 761 762 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 763 struct vhost_vring_file *file) 764 { 765 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 766 } 767 768 static int vhost_user_set_vring_call(struct vhost_dev *dev, 769 struct vhost_vring_file *file) 770 { 771 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 772 } 773 774 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64) 775 { 776 VhostUserMsg msg = { 777 .hdr.request = request, 778 .hdr.flags = VHOST_USER_VERSION, 779 .payload.u64 = u64, 780 .hdr.size = sizeof(msg.payload.u64), 781 }; 782 783 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 784 return -1; 785 } 786 787 return 0; 788 } 789 790 static int vhost_user_set_features(struct vhost_dev *dev, 791 uint64_t features) 792 { 793 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features); 794 } 795 796 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 797 uint64_t features) 798 { 799 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features); 800 } 801 802 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 803 { 804 VhostUserMsg msg = { 805 .hdr.request = request, 806 .hdr.flags = VHOST_USER_VERSION, 807 }; 808 809 if (vhost_user_one_time_request(request) && dev->vq_index != 0) { 810 return 0; 811 } 812 813 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 814 return -1; 815 } 816 817 if (vhost_user_read(dev, &msg) < 0) { 818 return -1; 819 } 820 821 if (msg.hdr.request != request) { 822 error_report("Received unexpected msg type. Expected %d received %d", 823 request, msg.hdr.request); 824 return -1; 825 } 826 827 if (msg.hdr.size != sizeof(msg.payload.u64)) { 828 error_report("Received bad msg size."); 829 return -1; 830 } 831 832 *u64 = msg.payload.u64; 833 834 return 0; 835 } 836 837 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 838 { 839 return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features); 840 } 841 842 static int vhost_user_set_owner(struct vhost_dev *dev) 843 { 844 VhostUserMsg msg = { 845 .hdr.request = VHOST_USER_SET_OWNER, 846 .hdr.flags = VHOST_USER_VERSION, 847 }; 848 849 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 850 return -1; 851 } 852 853 return 0; 854 } 855 856 static int vhost_user_reset_device(struct vhost_dev *dev) 857 { 858 VhostUserMsg msg = { 859 .hdr.request = VHOST_USER_RESET_OWNER, 860 .hdr.flags = VHOST_USER_VERSION, 861 }; 862 863 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 864 return -1; 865 } 866 867 return 0; 868 } 869 870 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) 871 { 872 int ret = -1; 873 874 if (!dev->config_ops) { 875 return -1; 876 } 877 878 if (dev->config_ops->vhost_dev_config_notifier) { 879 ret = dev->config_ops->vhost_dev_config_notifier(dev); 880 } 881 882 return ret; 883 } 884 885 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, 886 VhostUserVringArea *area, 887 int fd) 888 { 889 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 890 size_t page_size = qemu_real_host_page_size; 891 struct vhost_user *u = dev->opaque; 892 VhostUserState *user = u->user; 893 VirtIODevice *vdev = dev->vdev; 894 VhostUserHostNotifier *n; 895 void *addr; 896 char *name; 897 898 if (!virtio_has_feature(dev->protocol_features, 899 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 900 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 901 return -1; 902 } 903 904 n = &user->notifier[queue_idx]; 905 906 if (n->addr) { 907 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); 908 object_unparent(OBJECT(&n->mr)); 909 munmap(n->addr, page_size); 910 n->addr = NULL; 911 } 912 913 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 914 return 0; 915 } 916 917 /* Sanity check. */ 918 if (area->size != page_size) { 919 return -1; 920 } 921 922 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 923 fd, area->offset); 924 if (addr == MAP_FAILED) { 925 return -1; 926 } 927 928 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 929 user, queue_idx); 930 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 931 page_size, addr); 932 g_free(name); 933 934 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 935 munmap(addr, page_size); 936 return -1; 937 } 938 939 n->addr = addr; 940 n->set = true; 941 942 return 0; 943 } 944 945 static void slave_read(void *opaque) 946 { 947 struct vhost_dev *dev = opaque; 948 struct vhost_user *u = dev->opaque; 949 VhostUserHeader hdr = { 0, }; 950 VhostUserPayload payload = { 0, }; 951 int size, ret = 0; 952 struct iovec iov; 953 struct msghdr msgh; 954 int fd[VHOST_USER_SLAVE_MAX_FDS]; 955 char control[CMSG_SPACE(sizeof(fd))]; 956 struct cmsghdr *cmsg; 957 int i, fdsize = 0; 958 959 memset(&msgh, 0, sizeof(msgh)); 960 msgh.msg_iov = &iov; 961 msgh.msg_iovlen = 1; 962 msgh.msg_control = control; 963 msgh.msg_controllen = sizeof(control); 964 965 memset(fd, -1, sizeof(fd)); 966 967 /* Read header */ 968 iov.iov_base = &hdr; 969 iov.iov_len = VHOST_USER_HDR_SIZE; 970 971 size = recvmsg(u->slave_fd, &msgh, 0); 972 if (size != VHOST_USER_HDR_SIZE) { 973 error_report("Failed to read from slave."); 974 goto err; 975 } 976 977 if (msgh.msg_flags & MSG_CTRUNC) { 978 error_report("Truncated message."); 979 goto err; 980 } 981 982 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; 983 cmsg = CMSG_NXTHDR(&msgh, cmsg)) { 984 if (cmsg->cmsg_level == SOL_SOCKET && 985 cmsg->cmsg_type == SCM_RIGHTS) { 986 fdsize = cmsg->cmsg_len - CMSG_LEN(0); 987 memcpy(fd, CMSG_DATA(cmsg), fdsize); 988 break; 989 } 990 } 991 992 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 993 error_report("Failed to read msg header." 994 " Size %d exceeds the maximum %zu.", hdr.size, 995 VHOST_USER_PAYLOAD_SIZE); 996 goto err; 997 } 998 999 /* Read payload */ 1000 size = read(u->slave_fd, &payload, hdr.size); 1001 if (size != hdr.size) { 1002 error_report("Failed to read payload from slave."); 1003 goto err; 1004 } 1005 1006 switch (hdr.request) { 1007 case VHOST_USER_SLAVE_IOTLB_MSG: 1008 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1009 break; 1010 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG : 1011 ret = vhost_user_slave_handle_config_change(dev); 1012 break; 1013 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: 1014 ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, 1015 fd[0]); 1016 break; 1017 default: 1018 error_report("Received unexpected msg type."); 1019 ret = -EINVAL; 1020 } 1021 1022 /* Close the remaining file descriptors. */ 1023 for (i = 0; i < fdsize; i++) { 1024 if (fd[i] != -1) { 1025 close(fd[i]); 1026 } 1027 } 1028 1029 /* 1030 * REPLY_ACK feature handling. Other reply types has to be managed 1031 * directly in their request handlers. 1032 */ 1033 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1034 struct iovec iovec[2]; 1035 1036 1037 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 1038 hdr.flags |= VHOST_USER_REPLY_MASK; 1039 1040 payload.u64 = !!ret; 1041 hdr.size = sizeof(payload.u64); 1042 1043 iovec[0].iov_base = &hdr; 1044 iovec[0].iov_len = VHOST_USER_HDR_SIZE; 1045 iovec[1].iov_base = &payload; 1046 iovec[1].iov_len = hdr.size; 1047 1048 size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec)); 1049 if (size != VHOST_USER_HDR_SIZE + hdr.size) { 1050 error_report("Failed to send msg reply to slave."); 1051 goto err; 1052 } 1053 } 1054 1055 return; 1056 1057 err: 1058 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); 1059 close(u->slave_fd); 1060 u->slave_fd = -1; 1061 for (i = 0; i < fdsize; i++) { 1062 if (fd[i] != -1) { 1063 close(fd[i]); 1064 } 1065 } 1066 return; 1067 } 1068 1069 static int vhost_setup_slave_channel(struct vhost_dev *dev) 1070 { 1071 VhostUserMsg msg = { 1072 .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD, 1073 .hdr.flags = VHOST_USER_VERSION, 1074 }; 1075 struct vhost_user *u = dev->opaque; 1076 int sv[2], ret = 0; 1077 bool reply_supported = virtio_has_feature(dev->protocol_features, 1078 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1079 1080 if (!virtio_has_feature(dev->protocol_features, 1081 VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { 1082 return 0; 1083 } 1084 1085 if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1086 error_report("socketpair() failed"); 1087 return -1; 1088 } 1089 1090 u->slave_fd = sv[0]; 1091 qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev); 1092 1093 if (reply_supported) { 1094 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1095 } 1096 1097 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1098 if (ret) { 1099 goto out; 1100 } 1101 1102 if (reply_supported) { 1103 ret = process_message_reply(dev, &msg); 1104 } 1105 1106 out: 1107 close(sv[1]); 1108 if (ret) { 1109 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); 1110 close(u->slave_fd); 1111 u->slave_fd = -1; 1112 } 1113 1114 return ret; 1115 } 1116 1117 #ifdef CONFIG_LINUX 1118 /* 1119 * Called back from the postcopy fault thread when a fault is received on our 1120 * ufd. 1121 * TODO: This is Linux specific 1122 */ 1123 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1124 void *ufd) 1125 { 1126 struct vhost_dev *dev = pcfd->data; 1127 struct vhost_user *u = dev->opaque; 1128 struct uffd_msg *msg = ufd; 1129 uint64_t faultaddr = msg->arg.pagefault.address; 1130 RAMBlock *rb = NULL; 1131 uint64_t rb_offset; 1132 int i; 1133 1134 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1135 dev->mem->nregions); 1136 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1137 trace_vhost_user_postcopy_fault_handler_loop(i, 1138 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1139 if (faultaddr >= u->postcopy_client_bases[i]) { 1140 /* Ofset of the fault address in the vhost region */ 1141 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1142 if (region_offset < dev->mem->regions[i].memory_size) { 1143 rb_offset = region_offset + u->region_rb_offset[i]; 1144 trace_vhost_user_postcopy_fault_handler_found(i, 1145 region_offset, rb_offset); 1146 rb = u->region_rb[i]; 1147 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1148 rb_offset); 1149 } 1150 } 1151 } 1152 error_report("%s: Failed to find region for fault %" PRIx64, 1153 __func__, faultaddr); 1154 return -1; 1155 } 1156 1157 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1158 uint64_t offset) 1159 { 1160 struct vhost_dev *dev = pcfd->data; 1161 struct vhost_user *u = dev->opaque; 1162 int i; 1163 1164 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1165 1166 if (!u) { 1167 return 0; 1168 } 1169 /* Translate the offset into an address in the clients address space */ 1170 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1171 if (u->region_rb[i] == rb && 1172 offset >= u->region_rb_offset[i] && 1173 offset < (u->region_rb_offset[i] + 1174 dev->mem->regions[i].memory_size)) { 1175 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1176 u->postcopy_client_bases[i]; 1177 trace_vhost_user_postcopy_waker_found(client_addr); 1178 return postcopy_wake_shared(pcfd, client_addr, rb); 1179 } 1180 } 1181 1182 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1183 return 0; 1184 } 1185 #endif 1186 1187 /* 1188 * Called at the start of an inbound postcopy on reception of the 1189 * 'advise' command. 1190 */ 1191 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 1192 { 1193 #ifdef CONFIG_LINUX 1194 struct vhost_user *u = dev->opaque; 1195 CharBackend *chr = u->user->chr; 1196 int ufd; 1197 VhostUserMsg msg = { 1198 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 1199 .hdr.flags = VHOST_USER_VERSION, 1200 }; 1201 1202 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1203 error_setg(errp, "Failed to send postcopy_advise to vhost"); 1204 return -1; 1205 } 1206 1207 if (vhost_user_read(dev, &msg) < 0) { 1208 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 1209 return -1; 1210 } 1211 1212 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 1213 error_setg(errp, "Unexpected msg type. Expected %d received %d", 1214 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 1215 return -1; 1216 } 1217 1218 if (msg.hdr.size) { 1219 error_setg(errp, "Received bad msg size."); 1220 return -1; 1221 } 1222 ufd = qemu_chr_fe_get_msgfd(chr); 1223 if (ufd < 0) { 1224 error_setg(errp, "%s: Failed to get ufd", __func__); 1225 return -1; 1226 } 1227 qemu_set_nonblock(ufd); 1228 1229 /* register ufd with userfault thread */ 1230 u->postcopy_fd.fd = ufd; 1231 u->postcopy_fd.data = dev; 1232 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 1233 u->postcopy_fd.waker = vhost_user_postcopy_waker; 1234 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 1235 postcopy_register_shared_ufd(&u->postcopy_fd); 1236 return 0; 1237 #else 1238 error_setg(errp, "Postcopy not supported on non-Linux systems"); 1239 return -1; 1240 #endif 1241 } 1242 1243 /* 1244 * Called at the switch to postcopy on reception of the 'listen' command. 1245 */ 1246 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 1247 { 1248 struct vhost_user *u = dev->opaque; 1249 int ret; 1250 VhostUserMsg msg = { 1251 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 1252 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1253 }; 1254 u->postcopy_listen = true; 1255 trace_vhost_user_postcopy_listen(); 1256 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1257 error_setg(errp, "Failed to send postcopy_listen to vhost"); 1258 return -1; 1259 } 1260 1261 ret = process_message_reply(dev, &msg); 1262 if (ret) { 1263 error_setg(errp, "Failed to receive reply to postcopy_listen"); 1264 return ret; 1265 } 1266 1267 return 0; 1268 } 1269 1270 /* 1271 * Called at the end of postcopy 1272 */ 1273 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 1274 { 1275 VhostUserMsg msg = { 1276 .hdr.request = VHOST_USER_POSTCOPY_END, 1277 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1278 }; 1279 int ret; 1280 struct vhost_user *u = dev->opaque; 1281 1282 trace_vhost_user_postcopy_end_entry(); 1283 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1284 error_setg(errp, "Failed to send postcopy_end to vhost"); 1285 return -1; 1286 } 1287 1288 ret = process_message_reply(dev, &msg); 1289 if (ret) { 1290 error_setg(errp, "Failed to receive reply to postcopy_end"); 1291 return ret; 1292 } 1293 postcopy_unregister_shared_ufd(&u->postcopy_fd); 1294 close(u->postcopy_fd.fd); 1295 u->postcopy_fd.handler = NULL; 1296 1297 trace_vhost_user_postcopy_end_exit(); 1298 1299 return 0; 1300 } 1301 1302 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 1303 void *opaque) 1304 { 1305 struct PostcopyNotifyData *pnd = opaque; 1306 struct vhost_user *u = container_of(notifier, struct vhost_user, 1307 postcopy_notifier); 1308 struct vhost_dev *dev = u->dev; 1309 1310 switch (pnd->reason) { 1311 case POSTCOPY_NOTIFY_PROBE: 1312 if (!virtio_has_feature(dev->protocol_features, 1313 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 1314 /* TODO: Get the device name into this error somehow */ 1315 error_setg(pnd->errp, 1316 "vhost-user backend not capable of postcopy"); 1317 return -ENOENT; 1318 } 1319 break; 1320 1321 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 1322 return vhost_user_postcopy_advise(dev, pnd->errp); 1323 1324 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 1325 return vhost_user_postcopy_listen(dev, pnd->errp); 1326 1327 case POSTCOPY_NOTIFY_INBOUND_END: 1328 return vhost_user_postcopy_end(dev, pnd->errp); 1329 1330 default: 1331 /* We ignore notifications we don't know */ 1332 break; 1333 } 1334 1335 return 0; 1336 } 1337 1338 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) 1339 { 1340 uint64_t features, protocol_features; 1341 struct vhost_user *u; 1342 int err; 1343 1344 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1345 1346 u = g_new0(struct vhost_user, 1); 1347 u->user = opaque; 1348 u->slave_fd = -1; 1349 u->dev = dev; 1350 dev->opaque = u; 1351 1352 err = vhost_user_get_features(dev, &features); 1353 if (err < 0) { 1354 return err; 1355 } 1356 1357 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1358 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 1359 1360 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 1361 &protocol_features); 1362 if (err < 0) { 1363 return err; 1364 } 1365 1366 dev->protocol_features = 1367 protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK; 1368 1369 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1370 /* Don't acknowledge CONFIG feature if device doesn't support it */ 1371 dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 1372 } else if (!(protocol_features & 1373 (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) { 1374 error_report("Device expects VHOST_USER_PROTOCOL_F_CONFIG " 1375 "but backend does not support it."); 1376 return -1; 1377 } 1378 1379 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 1380 if (err < 0) { 1381 return err; 1382 } 1383 1384 /* query the max queues we support if backend supports Multiple Queue */ 1385 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 1386 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 1387 &dev->max_queues); 1388 if (err < 0) { 1389 return err; 1390 } 1391 } 1392 1393 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 1394 !(virtio_has_feature(dev->protocol_features, 1395 VHOST_USER_PROTOCOL_F_SLAVE_REQ) && 1396 virtio_has_feature(dev->protocol_features, 1397 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 1398 error_report("IOMMU support requires reply-ack and " 1399 "slave-req protocol features."); 1400 return -1; 1401 } 1402 } 1403 1404 if (dev->migration_blocker == NULL && 1405 !virtio_has_feature(dev->protocol_features, 1406 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 1407 error_setg(&dev->migration_blocker, 1408 "Migration disabled: vhost-user backend lacks " 1409 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 1410 } 1411 1412 err = vhost_setup_slave_channel(dev); 1413 if (err < 0) { 1414 return err; 1415 } 1416 1417 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 1418 postcopy_add_notifier(&u->postcopy_notifier); 1419 1420 return 0; 1421 } 1422 1423 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 1424 { 1425 struct vhost_user *u; 1426 1427 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1428 1429 u = dev->opaque; 1430 if (u->postcopy_notifier.notify) { 1431 postcopy_remove_notifier(&u->postcopy_notifier); 1432 u->postcopy_notifier.notify = NULL; 1433 } 1434 u->postcopy_listen = false; 1435 if (u->postcopy_fd.handler) { 1436 postcopy_unregister_shared_ufd(&u->postcopy_fd); 1437 close(u->postcopy_fd.fd); 1438 u->postcopy_fd.handler = NULL; 1439 } 1440 if (u->slave_fd >= 0) { 1441 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); 1442 close(u->slave_fd); 1443 u->slave_fd = -1; 1444 } 1445 g_free(u->region_rb); 1446 u->region_rb = NULL; 1447 g_free(u->region_rb_offset); 1448 u->region_rb_offset = NULL; 1449 u->region_rb_len = 0; 1450 g_free(u); 1451 dev->opaque = 0; 1452 1453 return 0; 1454 } 1455 1456 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 1457 { 1458 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 1459 1460 return idx; 1461 } 1462 1463 static int vhost_user_memslots_limit(struct vhost_dev *dev) 1464 { 1465 return VHOST_MEMORY_MAX_NREGIONS; 1466 } 1467 1468 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 1469 { 1470 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1471 1472 return virtio_has_feature(dev->protocol_features, 1473 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 1474 } 1475 1476 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 1477 { 1478 VhostUserMsg msg = { }; 1479 1480 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1481 1482 /* If guest supports GUEST_ANNOUNCE do nothing */ 1483 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 1484 return 0; 1485 } 1486 1487 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 1488 if (virtio_has_feature(dev->protocol_features, 1489 VHOST_USER_PROTOCOL_F_RARP)) { 1490 msg.hdr.request = VHOST_USER_SEND_RARP; 1491 msg.hdr.flags = VHOST_USER_VERSION; 1492 memcpy((char *)&msg.payload.u64, mac_addr, 6); 1493 msg.hdr.size = sizeof(msg.payload.u64); 1494 1495 return vhost_user_write(dev, &msg, NULL, 0); 1496 } 1497 return -1; 1498 } 1499 1500 static bool vhost_user_can_merge(struct vhost_dev *dev, 1501 uint64_t start1, uint64_t size1, 1502 uint64_t start2, uint64_t size2) 1503 { 1504 ram_addr_t offset; 1505 int mfd, rfd; 1506 MemoryRegion *mr; 1507 1508 mr = memory_region_from_host((void *)(uintptr_t)start1, &offset); 1509 mfd = memory_region_get_fd(mr); 1510 1511 mr = memory_region_from_host((void *)(uintptr_t)start2, &offset); 1512 rfd = memory_region_get_fd(mr); 1513 1514 return mfd == rfd; 1515 } 1516 1517 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 1518 { 1519 VhostUserMsg msg; 1520 bool reply_supported = virtio_has_feature(dev->protocol_features, 1521 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1522 1523 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 1524 return 0; 1525 } 1526 1527 msg.hdr.request = VHOST_USER_NET_SET_MTU; 1528 msg.payload.u64 = mtu; 1529 msg.hdr.size = sizeof(msg.payload.u64); 1530 msg.hdr.flags = VHOST_USER_VERSION; 1531 if (reply_supported) { 1532 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1533 } 1534 1535 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1536 return -1; 1537 } 1538 1539 /* If reply_ack supported, slave has to ack specified MTU is valid */ 1540 if (reply_supported) { 1541 return process_message_reply(dev, &msg); 1542 } 1543 1544 return 0; 1545 } 1546 1547 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 1548 struct vhost_iotlb_msg *imsg) 1549 { 1550 VhostUserMsg msg = { 1551 .hdr.request = VHOST_USER_IOTLB_MSG, 1552 .hdr.size = sizeof(msg.payload.iotlb), 1553 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1554 .payload.iotlb = *imsg, 1555 }; 1556 1557 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1558 return -EFAULT; 1559 } 1560 1561 return process_message_reply(dev, &msg); 1562 } 1563 1564 1565 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 1566 { 1567 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 1568 } 1569 1570 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 1571 uint32_t config_len) 1572 { 1573 VhostUserMsg msg = { 1574 .hdr.request = VHOST_USER_GET_CONFIG, 1575 .hdr.flags = VHOST_USER_VERSION, 1576 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 1577 }; 1578 1579 if (!virtio_has_feature(dev->protocol_features, 1580 VHOST_USER_PROTOCOL_F_CONFIG)) { 1581 return -1; 1582 } 1583 1584 if (config_len > VHOST_USER_MAX_CONFIG_SIZE) { 1585 return -1; 1586 } 1587 1588 msg.payload.config.offset = 0; 1589 msg.payload.config.size = config_len; 1590 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1591 return -1; 1592 } 1593 1594 if (vhost_user_read(dev, &msg) < 0) { 1595 return -1; 1596 } 1597 1598 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 1599 error_report("Received unexpected msg type. Expected %d received %d", 1600 VHOST_USER_GET_CONFIG, msg.hdr.request); 1601 return -1; 1602 } 1603 1604 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 1605 error_report("Received bad msg size."); 1606 return -1; 1607 } 1608 1609 memcpy(config, msg.payload.config.region, config_len); 1610 1611 return 0; 1612 } 1613 1614 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 1615 uint32_t offset, uint32_t size, uint32_t flags) 1616 { 1617 uint8_t *p; 1618 bool reply_supported = virtio_has_feature(dev->protocol_features, 1619 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1620 1621 VhostUserMsg msg = { 1622 .hdr.request = VHOST_USER_SET_CONFIG, 1623 .hdr.flags = VHOST_USER_VERSION, 1624 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 1625 }; 1626 1627 if (!virtio_has_feature(dev->protocol_features, 1628 VHOST_USER_PROTOCOL_F_CONFIG)) { 1629 return -1; 1630 } 1631 1632 if (reply_supported) { 1633 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1634 } 1635 1636 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 1637 return -1; 1638 } 1639 1640 msg.payload.config.offset = offset, 1641 msg.payload.config.size = size, 1642 msg.payload.config.flags = flags, 1643 p = msg.payload.config.region; 1644 memcpy(p, data, size); 1645 1646 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1647 return -1; 1648 } 1649 1650 if (reply_supported) { 1651 return process_message_reply(dev, &msg); 1652 } 1653 1654 return 0; 1655 } 1656 1657 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 1658 void *session_info, 1659 uint64_t *session_id) 1660 { 1661 bool crypto_session = virtio_has_feature(dev->protocol_features, 1662 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 1663 CryptoDevBackendSymSessionInfo *sess_info = session_info; 1664 VhostUserMsg msg = { 1665 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 1666 .hdr.flags = VHOST_USER_VERSION, 1667 .hdr.size = sizeof(msg.payload.session), 1668 }; 1669 1670 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1671 1672 if (!crypto_session) { 1673 error_report("vhost-user trying to send unhandled ioctl"); 1674 return -1; 1675 } 1676 1677 memcpy(&msg.payload.session.session_setup_data, sess_info, 1678 sizeof(CryptoDevBackendSymSessionInfo)); 1679 if (sess_info->key_len) { 1680 memcpy(&msg.payload.session.key, sess_info->cipher_key, 1681 sess_info->key_len); 1682 } 1683 if (sess_info->auth_key_len > 0) { 1684 memcpy(&msg.payload.session.auth_key, sess_info->auth_key, 1685 sess_info->auth_key_len); 1686 } 1687 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1688 error_report("vhost_user_write() return -1, create session failed"); 1689 return -1; 1690 } 1691 1692 if (vhost_user_read(dev, &msg) < 0) { 1693 error_report("vhost_user_read() return -1, create session failed"); 1694 return -1; 1695 } 1696 1697 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 1698 error_report("Received unexpected msg type. Expected %d received %d", 1699 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 1700 return -1; 1701 } 1702 1703 if (msg.hdr.size != sizeof(msg.payload.session)) { 1704 error_report("Received bad msg size."); 1705 return -1; 1706 } 1707 1708 if (msg.payload.session.session_id < 0) { 1709 error_report("Bad session id: %" PRId64 "", 1710 msg.payload.session.session_id); 1711 return -1; 1712 } 1713 *session_id = msg.payload.session.session_id; 1714 1715 return 0; 1716 } 1717 1718 static int 1719 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 1720 { 1721 bool crypto_session = virtio_has_feature(dev->protocol_features, 1722 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 1723 VhostUserMsg msg = { 1724 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 1725 .hdr.flags = VHOST_USER_VERSION, 1726 .hdr.size = sizeof(msg.payload.u64), 1727 }; 1728 msg.payload.u64 = session_id; 1729 1730 if (!crypto_session) { 1731 error_report("vhost-user trying to send unhandled ioctl"); 1732 return -1; 1733 } 1734 1735 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1736 error_report("vhost_user_write() return -1, close session failed"); 1737 return -1; 1738 } 1739 1740 return 0; 1741 } 1742 1743 static bool vhost_user_mem_section_filter(struct vhost_dev *dev, 1744 MemoryRegionSection *section) 1745 { 1746 bool result; 1747 1748 result = memory_region_get_fd(section->mr) >= 0; 1749 1750 return result; 1751 } 1752 1753 VhostUserState *vhost_user_init(void) 1754 { 1755 VhostUserState *user = g_new0(struct VhostUserState, 1); 1756 1757 return user; 1758 } 1759 1760 void vhost_user_cleanup(VhostUserState *user) 1761 { 1762 int i; 1763 1764 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1765 if (user->notifier[i].addr) { 1766 object_unparent(OBJECT(&user->notifier[i].mr)); 1767 munmap(user->notifier[i].addr, qemu_real_host_page_size); 1768 user->notifier[i].addr = NULL; 1769 } 1770 } 1771 } 1772 1773 const VhostOps user_ops = { 1774 .backend_type = VHOST_BACKEND_TYPE_USER, 1775 .vhost_backend_init = vhost_user_backend_init, 1776 .vhost_backend_cleanup = vhost_user_backend_cleanup, 1777 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 1778 .vhost_set_log_base = vhost_user_set_log_base, 1779 .vhost_set_mem_table = vhost_user_set_mem_table, 1780 .vhost_set_vring_addr = vhost_user_set_vring_addr, 1781 .vhost_set_vring_endian = vhost_user_set_vring_endian, 1782 .vhost_set_vring_num = vhost_user_set_vring_num, 1783 .vhost_set_vring_base = vhost_user_set_vring_base, 1784 .vhost_get_vring_base = vhost_user_get_vring_base, 1785 .vhost_set_vring_kick = vhost_user_set_vring_kick, 1786 .vhost_set_vring_call = vhost_user_set_vring_call, 1787 .vhost_set_features = vhost_user_set_features, 1788 .vhost_get_features = vhost_user_get_features, 1789 .vhost_set_owner = vhost_user_set_owner, 1790 .vhost_reset_device = vhost_user_reset_device, 1791 .vhost_get_vq_index = vhost_user_get_vq_index, 1792 .vhost_set_vring_enable = vhost_user_set_vring_enable, 1793 .vhost_requires_shm_log = vhost_user_requires_shm_log, 1794 .vhost_migration_done = vhost_user_migration_done, 1795 .vhost_backend_can_merge = vhost_user_can_merge, 1796 .vhost_net_set_mtu = vhost_user_net_set_mtu, 1797 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 1798 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 1799 .vhost_get_config = vhost_user_get_config, 1800 .vhost_set_config = vhost_user_set_config, 1801 .vhost_crypto_create_session = vhost_user_crypto_create_session, 1802 .vhost_crypto_close_session = vhost_user_crypto_close_session, 1803 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, 1804 }; 1805