1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/vhost.h" 14 #include "hw/virtio/vhost-user.h" 15 #include "hw/virtio/vhost-backend.h" 16 #include "hw/virtio/virtio.h" 17 #include "hw/virtio/virtio-net.h" 18 #include "chardev/char-fe.h" 19 #include "sysemu/kvm.h" 20 #include "qemu/error-report.h" 21 #include "qemu/sockets.h" 22 #include "sysemu/cryptodev.h" 23 #include "migration/migration.h" 24 #include "migration/postcopy-ram.h" 25 #include "trace.h" 26 27 #include <sys/ioctl.h> 28 #include <sys/socket.h> 29 #include <sys/un.h> 30 #include <linux/vhost.h> 31 #include <linux/userfaultfd.h> 32 33 #define VHOST_MEMORY_MAX_NREGIONS 8 34 #define VHOST_USER_F_PROTOCOL_FEATURES 30 35 #define VHOST_USER_SLAVE_MAX_FDS 8 36 37 /* 38 * Maximum size of virtio device config space 39 */ 40 #define VHOST_USER_MAX_CONFIG_SIZE 256 41 42 enum VhostUserProtocolFeature { 43 VHOST_USER_PROTOCOL_F_MQ = 0, 44 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 45 VHOST_USER_PROTOCOL_F_RARP = 2, 46 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, 47 VHOST_USER_PROTOCOL_F_NET_MTU = 4, 48 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, 49 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, 50 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, 51 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, 52 VHOST_USER_PROTOCOL_F_CONFIG = 9, 53 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, 54 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, 55 VHOST_USER_PROTOCOL_F_MAX 56 }; 57 58 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 59 60 typedef enum VhostUserRequest { 61 VHOST_USER_NONE = 0, 62 VHOST_USER_GET_FEATURES = 1, 63 VHOST_USER_SET_FEATURES = 2, 64 VHOST_USER_SET_OWNER = 3, 65 VHOST_USER_RESET_OWNER = 4, 66 VHOST_USER_SET_MEM_TABLE = 5, 67 VHOST_USER_SET_LOG_BASE = 6, 68 VHOST_USER_SET_LOG_FD = 7, 69 VHOST_USER_SET_VRING_NUM = 8, 70 VHOST_USER_SET_VRING_ADDR = 9, 71 VHOST_USER_SET_VRING_BASE = 10, 72 VHOST_USER_GET_VRING_BASE = 11, 73 VHOST_USER_SET_VRING_KICK = 12, 74 VHOST_USER_SET_VRING_CALL = 13, 75 VHOST_USER_SET_VRING_ERR = 14, 76 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 77 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 78 VHOST_USER_GET_QUEUE_NUM = 17, 79 VHOST_USER_SET_VRING_ENABLE = 18, 80 VHOST_USER_SEND_RARP = 19, 81 VHOST_USER_NET_SET_MTU = 20, 82 VHOST_USER_SET_SLAVE_REQ_FD = 21, 83 VHOST_USER_IOTLB_MSG = 22, 84 VHOST_USER_SET_VRING_ENDIAN = 23, 85 VHOST_USER_GET_CONFIG = 24, 86 VHOST_USER_SET_CONFIG = 25, 87 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 88 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 89 VHOST_USER_POSTCOPY_ADVISE = 28, 90 VHOST_USER_POSTCOPY_LISTEN = 29, 91 VHOST_USER_POSTCOPY_END = 30, 92 VHOST_USER_MAX 93 } VhostUserRequest; 94 95 typedef enum VhostUserSlaveRequest { 96 VHOST_USER_SLAVE_NONE = 0, 97 VHOST_USER_SLAVE_IOTLB_MSG = 1, 98 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, 99 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, 100 VHOST_USER_SLAVE_MAX 101 } VhostUserSlaveRequest; 102 103 typedef struct VhostUserMemoryRegion { 104 uint64_t guest_phys_addr; 105 uint64_t memory_size; 106 uint64_t userspace_addr; 107 uint64_t mmap_offset; 108 } VhostUserMemoryRegion; 109 110 typedef struct VhostUserMemory { 111 uint32_t nregions; 112 uint32_t padding; 113 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; 114 } VhostUserMemory; 115 116 typedef struct VhostUserLog { 117 uint64_t mmap_size; 118 uint64_t mmap_offset; 119 } VhostUserLog; 120 121 typedef struct VhostUserConfig { 122 uint32_t offset; 123 uint32_t size; 124 uint32_t flags; 125 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 126 } VhostUserConfig; 127 128 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 129 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 130 131 typedef struct VhostUserCryptoSession { 132 /* session id for success, -1 on errors */ 133 int64_t session_id; 134 CryptoDevBackendSymSessionInfo session_setup_data; 135 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 136 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 137 } VhostUserCryptoSession; 138 139 static VhostUserConfig c __attribute__ ((unused)); 140 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 141 + sizeof(c.size) \ 142 + sizeof(c.flags)) 143 144 typedef struct VhostUserVringArea { 145 uint64_t u64; 146 uint64_t size; 147 uint64_t offset; 148 } VhostUserVringArea; 149 150 typedef struct { 151 VhostUserRequest request; 152 153 #define VHOST_USER_VERSION_MASK (0x3) 154 #define VHOST_USER_REPLY_MASK (0x1<<2) 155 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 156 uint32_t flags; 157 uint32_t size; /* the following payload size */ 158 } QEMU_PACKED VhostUserHeader; 159 160 typedef union { 161 #define VHOST_USER_VRING_IDX_MASK (0xff) 162 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) 163 uint64_t u64; 164 struct vhost_vring_state state; 165 struct vhost_vring_addr addr; 166 VhostUserMemory memory; 167 VhostUserLog log; 168 struct vhost_iotlb_msg iotlb; 169 VhostUserConfig config; 170 VhostUserCryptoSession session; 171 VhostUserVringArea area; 172 } VhostUserPayload; 173 174 typedef struct VhostUserMsg { 175 VhostUserHeader hdr; 176 VhostUserPayload payload; 177 } QEMU_PACKED VhostUserMsg; 178 179 static VhostUserMsg m __attribute__ ((unused)); 180 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 181 182 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 183 184 /* The version of the protocol we support */ 185 #define VHOST_USER_VERSION (0x1) 186 187 struct vhost_user { 188 struct vhost_dev *dev; 189 /* Shared between vhost devs of the same virtio device */ 190 VhostUserState *user; 191 int slave_fd; 192 NotifierWithReturn postcopy_notifier; 193 struct PostCopyFD postcopy_fd; 194 uint64_t postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS]; 195 /* Length of the region_rb and region_rb_offset arrays */ 196 size_t region_rb_len; 197 /* RAMBlock associated with a given region */ 198 RAMBlock **region_rb; 199 /* The offset from the start of the RAMBlock to the start of the 200 * vhost region. 201 */ 202 ram_addr_t *region_rb_offset; 203 204 /* True once we've entered postcopy_listen */ 205 bool postcopy_listen; 206 }; 207 208 static bool ioeventfd_enabled(void) 209 { 210 return !kvm_enabled() || kvm_eventfds_enabled(); 211 } 212 213 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 214 { 215 struct vhost_user *u = dev->opaque; 216 CharBackend *chr = u->user->chr; 217 uint8_t *p = (uint8_t *) msg; 218 int r, size = VHOST_USER_HDR_SIZE; 219 220 r = qemu_chr_fe_read_all(chr, p, size); 221 if (r != size) { 222 error_report("Failed to read msg header. Read %d instead of %d." 223 " Original request %d.", r, size, msg->hdr.request); 224 goto fail; 225 } 226 227 /* validate received flags */ 228 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 229 error_report("Failed to read msg header." 230 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 231 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 232 goto fail; 233 } 234 235 /* validate message size is sane */ 236 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 237 error_report("Failed to read msg header." 238 " Size %d exceeds the maximum %zu.", msg->hdr.size, 239 VHOST_USER_PAYLOAD_SIZE); 240 goto fail; 241 } 242 243 if (msg->hdr.size) { 244 p += VHOST_USER_HDR_SIZE; 245 size = msg->hdr.size; 246 r = qemu_chr_fe_read_all(chr, p, size); 247 if (r != size) { 248 error_report("Failed to read msg payload." 249 " Read %d instead of %d.", r, msg->hdr.size); 250 goto fail; 251 } 252 } 253 254 return 0; 255 256 fail: 257 return -1; 258 } 259 260 static int process_message_reply(struct vhost_dev *dev, 261 const VhostUserMsg *msg) 262 { 263 VhostUserMsg msg_reply; 264 265 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 266 return 0; 267 } 268 269 if (vhost_user_read(dev, &msg_reply) < 0) { 270 return -1; 271 } 272 273 if (msg_reply.hdr.request != msg->hdr.request) { 274 error_report("Received unexpected msg type." 275 "Expected %d received %d", 276 msg->hdr.request, msg_reply.hdr.request); 277 return -1; 278 } 279 280 return msg_reply.payload.u64 ? -1 : 0; 281 } 282 283 static bool vhost_user_one_time_request(VhostUserRequest request) 284 { 285 switch (request) { 286 case VHOST_USER_SET_OWNER: 287 case VHOST_USER_RESET_OWNER: 288 case VHOST_USER_SET_MEM_TABLE: 289 case VHOST_USER_GET_QUEUE_NUM: 290 case VHOST_USER_NET_SET_MTU: 291 return true; 292 default: 293 return false; 294 } 295 } 296 297 /* most non-init callers ignore the error */ 298 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 299 int *fds, int fd_num) 300 { 301 struct vhost_user *u = dev->opaque; 302 CharBackend *chr = u->user->chr; 303 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 304 305 /* 306 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 307 * we just need send it once in the first time. For later such 308 * request, we just ignore it. 309 */ 310 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) { 311 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 312 return 0; 313 } 314 315 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 316 error_report("Failed to set msg fds."); 317 return -1; 318 } 319 320 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 321 if (ret != size) { 322 error_report("Failed to write msg." 323 " Wrote %d instead of %d.", ret, size); 324 return -1; 325 } 326 327 return 0; 328 } 329 330 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 331 struct vhost_log *log) 332 { 333 int fds[VHOST_MEMORY_MAX_NREGIONS]; 334 size_t fd_num = 0; 335 bool shmfd = virtio_has_feature(dev->protocol_features, 336 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 337 VhostUserMsg msg = { 338 .hdr.request = VHOST_USER_SET_LOG_BASE, 339 .hdr.flags = VHOST_USER_VERSION, 340 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 341 .payload.log.mmap_offset = 0, 342 .hdr.size = sizeof(msg.payload.log), 343 }; 344 345 if (shmfd && log->fd != -1) { 346 fds[fd_num++] = log->fd; 347 } 348 349 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 350 return -1; 351 } 352 353 if (shmfd) { 354 msg.hdr.size = 0; 355 if (vhost_user_read(dev, &msg) < 0) { 356 return -1; 357 } 358 359 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 360 error_report("Received unexpected msg type. " 361 "Expected %d received %d", 362 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 363 return -1; 364 } 365 } 366 367 return 0; 368 } 369 370 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 371 struct vhost_memory *mem) 372 { 373 struct vhost_user *u = dev->opaque; 374 int fds[VHOST_MEMORY_MAX_NREGIONS]; 375 int i, fd; 376 size_t fd_num = 0; 377 VhostUserMsg msg_reply; 378 int region_i, msg_i; 379 380 VhostUserMsg msg = { 381 .hdr.request = VHOST_USER_SET_MEM_TABLE, 382 .hdr.flags = VHOST_USER_VERSION, 383 }; 384 385 if (u->region_rb_len < dev->mem->nregions) { 386 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 387 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 388 dev->mem->nregions); 389 memset(&(u->region_rb[u->region_rb_len]), '\0', 390 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 391 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 392 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 393 u->region_rb_len = dev->mem->nregions; 394 } 395 396 for (i = 0; i < dev->mem->nregions; ++i) { 397 struct vhost_memory_region *reg = dev->mem->regions + i; 398 ram_addr_t offset; 399 MemoryRegion *mr; 400 401 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); 402 mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr, 403 &offset); 404 fd = memory_region_get_fd(mr); 405 if (fd > 0) { 406 trace_vhost_user_set_mem_table_withfd(fd_num, mr->name, 407 reg->memory_size, 408 reg->guest_phys_addr, 409 reg->userspace_addr, offset); 410 u->region_rb_offset[i] = offset; 411 u->region_rb[i] = mr->ram_block; 412 msg.payload.memory.regions[fd_num].userspace_addr = 413 reg->userspace_addr; 414 msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; 415 msg.payload.memory.regions[fd_num].guest_phys_addr = 416 reg->guest_phys_addr; 417 msg.payload.memory.regions[fd_num].mmap_offset = offset; 418 assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); 419 fds[fd_num++] = fd; 420 } else { 421 u->region_rb_offset[i] = 0; 422 u->region_rb[i] = NULL; 423 } 424 } 425 426 msg.payload.memory.nregions = fd_num; 427 428 if (!fd_num) { 429 error_report("Failed initializing vhost-user memory map, " 430 "consider using -object memory-backend-file share=on"); 431 return -1; 432 } 433 434 msg.hdr.size = sizeof(msg.payload.memory.nregions); 435 msg.hdr.size += sizeof(msg.payload.memory.padding); 436 msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion); 437 438 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 439 return -1; 440 } 441 442 if (vhost_user_read(dev, &msg_reply) < 0) { 443 return -1; 444 } 445 446 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 447 error_report("%s: Received unexpected msg type." 448 "Expected %d received %d", __func__, 449 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 450 return -1; 451 } 452 /* We're using the same structure, just reusing one of the 453 * fields, so it should be the same size. 454 */ 455 if (msg_reply.hdr.size != msg.hdr.size) { 456 error_report("%s: Unexpected size for postcopy reply " 457 "%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size); 458 return -1; 459 } 460 461 memset(u->postcopy_client_bases, 0, 462 sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS); 463 464 /* They're in the same order as the regions that were sent 465 * but some of the regions were skipped (above) if they 466 * didn't have fd's 467 */ 468 for (msg_i = 0, region_i = 0; 469 region_i < dev->mem->nregions; 470 region_i++) { 471 if (msg_i < fd_num && 472 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 473 dev->mem->regions[region_i].guest_phys_addr) { 474 u->postcopy_client_bases[region_i] = 475 msg_reply.payload.memory.regions[msg_i].userspace_addr; 476 trace_vhost_user_set_mem_table_postcopy( 477 msg_reply.payload.memory.regions[msg_i].userspace_addr, 478 msg.payload.memory.regions[msg_i].userspace_addr, 479 msg_i, region_i); 480 msg_i++; 481 } 482 } 483 if (msg_i != fd_num) { 484 error_report("%s: postcopy reply not fully consumed " 485 "%d vs %zd", 486 __func__, msg_i, fd_num); 487 return -1; 488 } 489 /* Now we've registered this with the postcopy code, we ack to the client, 490 * because now we're in the position to be able to deal with any faults 491 * it generates. 492 */ 493 /* TODO: Use this for failure cases as well with a bad value */ 494 msg.hdr.size = sizeof(msg.payload.u64); 495 msg.payload.u64 = 0; /* OK */ 496 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 497 return -1; 498 } 499 500 return 0; 501 } 502 503 static int vhost_user_set_mem_table(struct vhost_dev *dev, 504 struct vhost_memory *mem) 505 { 506 struct vhost_user *u = dev->opaque; 507 int fds[VHOST_MEMORY_MAX_NREGIONS]; 508 int i, fd; 509 size_t fd_num = 0; 510 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 511 bool reply_supported = virtio_has_feature(dev->protocol_features, 512 VHOST_USER_PROTOCOL_F_REPLY_ACK); 513 514 if (do_postcopy) { 515 /* Postcopy has enough differences that it's best done in it's own 516 * version 517 */ 518 return vhost_user_set_mem_table_postcopy(dev, mem); 519 } 520 521 VhostUserMsg msg = { 522 .hdr.request = VHOST_USER_SET_MEM_TABLE, 523 .hdr.flags = VHOST_USER_VERSION, 524 }; 525 526 if (reply_supported) { 527 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 528 } 529 530 for (i = 0; i < dev->mem->nregions; ++i) { 531 struct vhost_memory_region *reg = dev->mem->regions + i; 532 ram_addr_t offset; 533 MemoryRegion *mr; 534 535 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); 536 mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr, 537 &offset); 538 fd = memory_region_get_fd(mr); 539 if (fd > 0) { 540 if (fd_num == VHOST_MEMORY_MAX_NREGIONS) { 541 error_report("Failed preparing vhost-user memory table msg"); 542 return -1; 543 } 544 msg.payload.memory.regions[fd_num].userspace_addr = 545 reg->userspace_addr; 546 msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; 547 msg.payload.memory.regions[fd_num].guest_phys_addr = 548 reg->guest_phys_addr; 549 msg.payload.memory.regions[fd_num].mmap_offset = offset; 550 fds[fd_num++] = fd; 551 } 552 } 553 554 msg.payload.memory.nregions = fd_num; 555 556 if (!fd_num) { 557 error_report("Failed initializing vhost-user memory map, " 558 "consider using -object memory-backend-file share=on"); 559 return -1; 560 } 561 562 msg.hdr.size = sizeof(msg.payload.memory.nregions); 563 msg.hdr.size += sizeof(msg.payload.memory.padding); 564 msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion); 565 566 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 567 return -1; 568 } 569 570 if (reply_supported) { 571 return process_message_reply(dev, &msg); 572 } 573 574 return 0; 575 } 576 577 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 578 struct vhost_vring_addr *addr) 579 { 580 VhostUserMsg msg = { 581 .hdr.request = VHOST_USER_SET_VRING_ADDR, 582 .hdr.flags = VHOST_USER_VERSION, 583 .payload.addr = *addr, 584 .hdr.size = sizeof(msg.payload.addr), 585 }; 586 587 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 588 return -1; 589 } 590 591 return 0; 592 } 593 594 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 595 struct vhost_vring_state *ring) 596 { 597 bool cross_endian = virtio_has_feature(dev->protocol_features, 598 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 599 VhostUserMsg msg = { 600 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 601 .hdr.flags = VHOST_USER_VERSION, 602 .payload.state = *ring, 603 .hdr.size = sizeof(msg.payload.state), 604 }; 605 606 if (!cross_endian) { 607 error_report("vhost-user trying to send unhandled ioctl"); 608 return -1; 609 } 610 611 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 612 return -1; 613 } 614 615 return 0; 616 } 617 618 static int vhost_set_vring(struct vhost_dev *dev, 619 unsigned long int request, 620 struct vhost_vring_state *ring) 621 { 622 VhostUserMsg msg = { 623 .hdr.request = request, 624 .hdr.flags = VHOST_USER_VERSION, 625 .payload.state = *ring, 626 .hdr.size = sizeof(msg.payload.state), 627 }; 628 629 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 630 return -1; 631 } 632 633 return 0; 634 } 635 636 static int vhost_user_set_vring_num(struct vhost_dev *dev, 637 struct vhost_vring_state *ring) 638 { 639 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 640 } 641 642 static void vhost_user_host_notifier_restore(struct vhost_dev *dev, 643 int queue_idx) 644 { 645 struct vhost_user *u = dev->opaque; 646 VhostUserHostNotifier *n = &u->user->notifier[queue_idx]; 647 VirtIODevice *vdev = dev->vdev; 648 649 if (n->addr && !n->set) { 650 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true); 651 n->set = true; 652 } 653 } 654 655 static void vhost_user_host_notifier_remove(struct vhost_dev *dev, 656 int queue_idx) 657 { 658 struct vhost_user *u = dev->opaque; 659 VhostUserHostNotifier *n = &u->user->notifier[queue_idx]; 660 VirtIODevice *vdev = dev->vdev; 661 662 if (n->addr && n->set) { 663 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); 664 n->set = false; 665 } 666 } 667 668 static int vhost_user_set_vring_base(struct vhost_dev *dev, 669 struct vhost_vring_state *ring) 670 { 671 vhost_user_host_notifier_restore(dev, ring->index); 672 673 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 674 } 675 676 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 677 { 678 int i; 679 680 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 681 return -1; 682 } 683 684 for (i = 0; i < dev->nvqs; ++i) { 685 struct vhost_vring_state state = { 686 .index = dev->vq_index + i, 687 .num = enable, 688 }; 689 690 vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 691 } 692 693 return 0; 694 } 695 696 static int vhost_user_get_vring_base(struct vhost_dev *dev, 697 struct vhost_vring_state *ring) 698 { 699 VhostUserMsg msg = { 700 .hdr.request = VHOST_USER_GET_VRING_BASE, 701 .hdr.flags = VHOST_USER_VERSION, 702 .payload.state = *ring, 703 .hdr.size = sizeof(msg.payload.state), 704 }; 705 706 vhost_user_host_notifier_remove(dev, ring->index); 707 708 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 709 return -1; 710 } 711 712 if (vhost_user_read(dev, &msg) < 0) { 713 return -1; 714 } 715 716 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 717 error_report("Received unexpected msg type. Expected %d received %d", 718 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 719 return -1; 720 } 721 722 if (msg.hdr.size != sizeof(msg.payload.state)) { 723 error_report("Received bad msg size."); 724 return -1; 725 } 726 727 *ring = msg.payload.state; 728 729 return 0; 730 } 731 732 static int vhost_set_vring_file(struct vhost_dev *dev, 733 VhostUserRequest request, 734 struct vhost_vring_file *file) 735 { 736 int fds[VHOST_MEMORY_MAX_NREGIONS]; 737 size_t fd_num = 0; 738 VhostUserMsg msg = { 739 .hdr.request = request, 740 .hdr.flags = VHOST_USER_VERSION, 741 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 742 .hdr.size = sizeof(msg.payload.u64), 743 }; 744 745 if (ioeventfd_enabled() && file->fd > 0) { 746 fds[fd_num++] = file->fd; 747 } else { 748 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 749 } 750 751 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 752 return -1; 753 } 754 755 return 0; 756 } 757 758 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 759 struct vhost_vring_file *file) 760 { 761 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 762 } 763 764 static int vhost_user_set_vring_call(struct vhost_dev *dev, 765 struct vhost_vring_file *file) 766 { 767 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 768 } 769 770 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64) 771 { 772 VhostUserMsg msg = { 773 .hdr.request = request, 774 .hdr.flags = VHOST_USER_VERSION, 775 .payload.u64 = u64, 776 .hdr.size = sizeof(msg.payload.u64), 777 }; 778 779 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 780 return -1; 781 } 782 783 return 0; 784 } 785 786 static int vhost_user_set_features(struct vhost_dev *dev, 787 uint64_t features) 788 { 789 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features); 790 } 791 792 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 793 uint64_t features) 794 { 795 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features); 796 } 797 798 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 799 { 800 VhostUserMsg msg = { 801 .hdr.request = request, 802 .hdr.flags = VHOST_USER_VERSION, 803 }; 804 805 if (vhost_user_one_time_request(request) && dev->vq_index != 0) { 806 return 0; 807 } 808 809 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 810 return -1; 811 } 812 813 if (vhost_user_read(dev, &msg) < 0) { 814 return -1; 815 } 816 817 if (msg.hdr.request != request) { 818 error_report("Received unexpected msg type. Expected %d received %d", 819 request, msg.hdr.request); 820 return -1; 821 } 822 823 if (msg.hdr.size != sizeof(msg.payload.u64)) { 824 error_report("Received bad msg size."); 825 return -1; 826 } 827 828 *u64 = msg.payload.u64; 829 830 return 0; 831 } 832 833 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 834 { 835 return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features); 836 } 837 838 static int vhost_user_set_owner(struct vhost_dev *dev) 839 { 840 VhostUserMsg msg = { 841 .hdr.request = VHOST_USER_SET_OWNER, 842 .hdr.flags = VHOST_USER_VERSION, 843 }; 844 845 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 846 return -1; 847 } 848 849 return 0; 850 } 851 852 static int vhost_user_reset_device(struct vhost_dev *dev) 853 { 854 VhostUserMsg msg = { 855 .hdr.request = VHOST_USER_RESET_OWNER, 856 .hdr.flags = VHOST_USER_VERSION, 857 }; 858 859 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 860 return -1; 861 } 862 863 return 0; 864 } 865 866 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) 867 { 868 int ret = -1; 869 870 if (!dev->config_ops) { 871 return -1; 872 } 873 874 if (dev->config_ops->vhost_dev_config_notifier) { 875 ret = dev->config_ops->vhost_dev_config_notifier(dev); 876 } 877 878 return ret; 879 } 880 881 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, 882 VhostUserVringArea *area, 883 int fd) 884 { 885 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 886 size_t page_size = qemu_real_host_page_size; 887 struct vhost_user *u = dev->opaque; 888 VhostUserState *user = u->user; 889 VirtIODevice *vdev = dev->vdev; 890 VhostUserHostNotifier *n; 891 void *addr; 892 char *name; 893 894 if (!virtio_has_feature(dev->protocol_features, 895 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 896 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 897 return -1; 898 } 899 900 n = &user->notifier[queue_idx]; 901 902 if (n->addr) { 903 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); 904 object_unparent(OBJECT(&n->mr)); 905 munmap(n->addr, page_size); 906 n->addr = NULL; 907 } 908 909 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 910 return 0; 911 } 912 913 /* Sanity check. */ 914 if (area->size != page_size) { 915 return -1; 916 } 917 918 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 919 fd, area->offset); 920 if (addr == MAP_FAILED) { 921 return -1; 922 } 923 924 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 925 user, queue_idx); 926 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 927 page_size, addr); 928 g_free(name); 929 930 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 931 munmap(addr, page_size); 932 return -1; 933 } 934 935 n->addr = addr; 936 n->set = true; 937 938 return 0; 939 } 940 941 static void slave_read(void *opaque) 942 { 943 struct vhost_dev *dev = opaque; 944 struct vhost_user *u = dev->opaque; 945 VhostUserHeader hdr = { 0, }; 946 VhostUserPayload payload = { 0, }; 947 int size, ret = 0; 948 struct iovec iov; 949 struct msghdr msgh; 950 int fd[VHOST_USER_SLAVE_MAX_FDS]; 951 char control[CMSG_SPACE(sizeof(fd))]; 952 struct cmsghdr *cmsg; 953 int i, fdsize = 0; 954 955 memset(&msgh, 0, sizeof(msgh)); 956 msgh.msg_iov = &iov; 957 msgh.msg_iovlen = 1; 958 msgh.msg_control = control; 959 msgh.msg_controllen = sizeof(control); 960 961 memset(fd, -1, sizeof(fd)); 962 963 /* Read header */ 964 iov.iov_base = &hdr; 965 iov.iov_len = VHOST_USER_HDR_SIZE; 966 967 size = recvmsg(u->slave_fd, &msgh, 0); 968 if (size != VHOST_USER_HDR_SIZE) { 969 error_report("Failed to read from slave."); 970 goto err; 971 } 972 973 if (msgh.msg_flags & MSG_CTRUNC) { 974 error_report("Truncated message."); 975 goto err; 976 } 977 978 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; 979 cmsg = CMSG_NXTHDR(&msgh, cmsg)) { 980 if (cmsg->cmsg_level == SOL_SOCKET && 981 cmsg->cmsg_type == SCM_RIGHTS) { 982 fdsize = cmsg->cmsg_len - CMSG_LEN(0); 983 memcpy(fd, CMSG_DATA(cmsg), fdsize); 984 break; 985 } 986 } 987 988 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 989 error_report("Failed to read msg header." 990 " Size %d exceeds the maximum %zu.", hdr.size, 991 VHOST_USER_PAYLOAD_SIZE); 992 goto err; 993 } 994 995 /* Read payload */ 996 size = read(u->slave_fd, &payload, hdr.size); 997 if (size != hdr.size) { 998 error_report("Failed to read payload from slave."); 999 goto err; 1000 } 1001 1002 switch (hdr.request) { 1003 case VHOST_USER_SLAVE_IOTLB_MSG: 1004 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1005 break; 1006 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG : 1007 ret = vhost_user_slave_handle_config_change(dev); 1008 break; 1009 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: 1010 ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, 1011 fd[0]); 1012 break; 1013 default: 1014 error_report("Received unexpected msg type."); 1015 ret = -EINVAL; 1016 } 1017 1018 /* Close the remaining file descriptors. */ 1019 for (i = 0; i < fdsize; i++) { 1020 if (fd[i] != -1) { 1021 close(fd[i]); 1022 } 1023 } 1024 1025 /* 1026 * REPLY_ACK feature handling. Other reply types has to be managed 1027 * directly in their request handlers. 1028 */ 1029 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1030 struct iovec iovec[2]; 1031 1032 1033 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 1034 hdr.flags |= VHOST_USER_REPLY_MASK; 1035 1036 payload.u64 = !!ret; 1037 hdr.size = sizeof(payload.u64); 1038 1039 iovec[0].iov_base = &hdr; 1040 iovec[0].iov_len = VHOST_USER_HDR_SIZE; 1041 iovec[1].iov_base = &payload; 1042 iovec[1].iov_len = hdr.size; 1043 1044 size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec)); 1045 if (size != VHOST_USER_HDR_SIZE + hdr.size) { 1046 error_report("Failed to send msg reply to slave."); 1047 goto err; 1048 } 1049 } 1050 1051 return; 1052 1053 err: 1054 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); 1055 close(u->slave_fd); 1056 u->slave_fd = -1; 1057 for (i = 0; i < fdsize; i++) { 1058 if (fd[i] != -1) { 1059 close(fd[i]); 1060 } 1061 } 1062 return; 1063 } 1064 1065 static int vhost_setup_slave_channel(struct vhost_dev *dev) 1066 { 1067 VhostUserMsg msg = { 1068 .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD, 1069 .hdr.flags = VHOST_USER_VERSION, 1070 }; 1071 struct vhost_user *u = dev->opaque; 1072 int sv[2], ret = 0; 1073 bool reply_supported = virtio_has_feature(dev->protocol_features, 1074 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1075 1076 if (!virtio_has_feature(dev->protocol_features, 1077 VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { 1078 return 0; 1079 } 1080 1081 if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1082 error_report("socketpair() failed"); 1083 return -1; 1084 } 1085 1086 u->slave_fd = sv[0]; 1087 qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev); 1088 1089 if (reply_supported) { 1090 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1091 } 1092 1093 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1094 if (ret) { 1095 goto out; 1096 } 1097 1098 if (reply_supported) { 1099 ret = process_message_reply(dev, &msg); 1100 } 1101 1102 out: 1103 close(sv[1]); 1104 if (ret) { 1105 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); 1106 close(u->slave_fd); 1107 u->slave_fd = -1; 1108 } 1109 1110 return ret; 1111 } 1112 1113 /* 1114 * Called back from the postcopy fault thread when a fault is received on our 1115 * ufd. 1116 * TODO: This is Linux specific 1117 */ 1118 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1119 void *ufd) 1120 { 1121 struct vhost_dev *dev = pcfd->data; 1122 struct vhost_user *u = dev->opaque; 1123 struct uffd_msg *msg = ufd; 1124 uint64_t faultaddr = msg->arg.pagefault.address; 1125 RAMBlock *rb = NULL; 1126 uint64_t rb_offset; 1127 int i; 1128 1129 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1130 dev->mem->nregions); 1131 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1132 trace_vhost_user_postcopy_fault_handler_loop(i, 1133 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1134 if (faultaddr >= u->postcopy_client_bases[i]) { 1135 /* Ofset of the fault address in the vhost region */ 1136 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1137 if (region_offset < dev->mem->regions[i].memory_size) { 1138 rb_offset = region_offset + u->region_rb_offset[i]; 1139 trace_vhost_user_postcopy_fault_handler_found(i, 1140 region_offset, rb_offset); 1141 rb = u->region_rb[i]; 1142 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1143 rb_offset); 1144 } 1145 } 1146 } 1147 error_report("%s: Failed to find region for fault %" PRIx64, 1148 __func__, faultaddr); 1149 return -1; 1150 } 1151 1152 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1153 uint64_t offset) 1154 { 1155 struct vhost_dev *dev = pcfd->data; 1156 struct vhost_user *u = dev->opaque; 1157 int i; 1158 1159 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1160 1161 if (!u) { 1162 return 0; 1163 } 1164 /* Translate the offset into an address in the clients address space */ 1165 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1166 if (u->region_rb[i] == rb && 1167 offset >= u->region_rb_offset[i] && 1168 offset < (u->region_rb_offset[i] + 1169 dev->mem->regions[i].memory_size)) { 1170 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1171 u->postcopy_client_bases[i]; 1172 trace_vhost_user_postcopy_waker_found(client_addr); 1173 return postcopy_wake_shared(pcfd, client_addr, rb); 1174 } 1175 } 1176 1177 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1178 return 0; 1179 } 1180 1181 /* 1182 * Called at the start of an inbound postcopy on reception of the 1183 * 'advise' command. 1184 */ 1185 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 1186 { 1187 struct vhost_user *u = dev->opaque; 1188 CharBackend *chr = u->user->chr; 1189 int ufd; 1190 VhostUserMsg msg = { 1191 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 1192 .hdr.flags = VHOST_USER_VERSION, 1193 }; 1194 1195 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1196 error_setg(errp, "Failed to send postcopy_advise to vhost"); 1197 return -1; 1198 } 1199 1200 if (vhost_user_read(dev, &msg) < 0) { 1201 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 1202 return -1; 1203 } 1204 1205 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 1206 error_setg(errp, "Unexpected msg type. Expected %d received %d", 1207 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 1208 return -1; 1209 } 1210 1211 if (msg.hdr.size) { 1212 error_setg(errp, "Received bad msg size."); 1213 return -1; 1214 } 1215 ufd = qemu_chr_fe_get_msgfd(chr); 1216 if (ufd < 0) { 1217 error_setg(errp, "%s: Failed to get ufd", __func__); 1218 return -1; 1219 } 1220 qemu_set_nonblock(ufd); 1221 1222 /* register ufd with userfault thread */ 1223 u->postcopy_fd.fd = ufd; 1224 u->postcopy_fd.data = dev; 1225 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 1226 u->postcopy_fd.waker = vhost_user_postcopy_waker; 1227 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 1228 postcopy_register_shared_ufd(&u->postcopy_fd); 1229 return 0; 1230 } 1231 1232 /* 1233 * Called at the switch to postcopy on reception of the 'listen' command. 1234 */ 1235 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 1236 { 1237 struct vhost_user *u = dev->opaque; 1238 int ret; 1239 VhostUserMsg msg = { 1240 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 1241 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1242 }; 1243 u->postcopy_listen = true; 1244 trace_vhost_user_postcopy_listen(); 1245 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1246 error_setg(errp, "Failed to send postcopy_listen to vhost"); 1247 return -1; 1248 } 1249 1250 ret = process_message_reply(dev, &msg); 1251 if (ret) { 1252 error_setg(errp, "Failed to receive reply to postcopy_listen"); 1253 return ret; 1254 } 1255 1256 return 0; 1257 } 1258 1259 /* 1260 * Called at the end of postcopy 1261 */ 1262 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 1263 { 1264 VhostUserMsg msg = { 1265 .hdr.request = VHOST_USER_POSTCOPY_END, 1266 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1267 }; 1268 int ret; 1269 struct vhost_user *u = dev->opaque; 1270 1271 trace_vhost_user_postcopy_end_entry(); 1272 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1273 error_setg(errp, "Failed to send postcopy_end to vhost"); 1274 return -1; 1275 } 1276 1277 ret = process_message_reply(dev, &msg); 1278 if (ret) { 1279 error_setg(errp, "Failed to receive reply to postcopy_end"); 1280 return ret; 1281 } 1282 postcopy_unregister_shared_ufd(&u->postcopy_fd); 1283 close(u->postcopy_fd.fd); 1284 u->postcopy_fd.handler = NULL; 1285 1286 trace_vhost_user_postcopy_end_exit(); 1287 1288 return 0; 1289 } 1290 1291 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 1292 void *opaque) 1293 { 1294 struct PostcopyNotifyData *pnd = opaque; 1295 struct vhost_user *u = container_of(notifier, struct vhost_user, 1296 postcopy_notifier); 1297 struct vhost_dev *dev = u->dev; 1298 1299 switch (pnd->reason) { 1300 case POSTCOPY_NOTIFY_PROBE: 1301 if (!virtio_has_feature(dev->protocol_features, 1302 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 1303 /* TODO: Get the device name into this error somehow */ 1304 error_setg(pnd->errp, 1305 "vhost-user backend not capable of postcopy"); 1306 return -ENOENT; 1307 } 1308 break; 1309 1310 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 1311 return vhost_user_postcopy_advise(dev, pnd->errp); 1312 1313 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 1314 return vhost_user_postcopy_listen(dev, pnd->errp); 1315 1316 case POSTCOPY_NOTIFY_INBOUND_END: 1317 return vhost_user_postcopy_end(dev, pnd->errp); 1318 1319 default: 1320 /* We ignore notifications we don't know */ 1321 break; 1322 } 1323 1324 return 0; 1325 } 1326 1327 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) 1328 { 1329 uint64_t features, protocol_features; 1330 struct vhost_user *u; 1331 int err; 1332 1333 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1334 1335 u = g_new0(struct vhost_user, 1); 1336 u->user = opaque; 1337 u->slave_fd = -1; 1338 u->dev = dev; 1339 dev->opaque = u; 1340 1341 err = vhost_user_get_features(dev, &features); 1342 if (err < 0) { 1343 return err; 1344 } 1345 1346 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1347 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 1348 1349 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 1350 &protocol_features); 1351 if (err < 0) { 1352 return err; 1353 } 1354 1355 dev->protocol_features = 1356 protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK; 1357 1358 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1359 /* Don't acknowledge CONFIG feature if device doesn't support it */ 1360 dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 1361 } else if (!(protocol_features & 1362 (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) { 1363 error_report("Device expects VHOST_USER_PROTOCOL_F_CONFIG " 1364 "but backend does not support it."); 1365 return -1; 1366 } 1367 1368 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 1369 if (err < 0) { 1370 return err; 1371 } 1372 1373 /* query the max queues we support if backend supports Multiple Queue */ 1374 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 1375 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 1376 &dev->max_queues); 1377 if (err < 0) { 1378 return err; 1379 } 1380 } 1381 1382 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 1383 !(virtio_has_feature(dev->protocol_features, 1384 VHOST_USER_PROTOCOL_F_SLAVE_REQ) && 1385 virtio_has_feature(dev->protocol_features, 1386 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 1387 error_report("IOMMU support requires reply-ack and " 1388 "slave-req protocol features."); 1389 return -1; 1390 } 1391 } 1392 1393 if (dev->migration_blocker == NULL && 1394 !virtio_has_feature(dev->protocol_features, 1395 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 1396 error_setg(&dev->migration_blocker, 1397 "Migration disabled: vhost-user backend lacks " 1398 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 1399 } 1400 1401 err = vhost_setup_slave_channel(dev); 1402 if (err < 0) { 1403 return err; 1404 } 1405 1406 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 1407 postcopy_add_notifier(&u->postcopy_notifier); 1408 1409 return 0; 1410 } 1411 1412 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 1413 { 1414 struct vhost_user *u; 1415 1416 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1417 1418 u = dev->opaque; 1419 if (u->postcopy_notifier.notify) { 1420 postcopy_remove_notifier(&u->postcopy_notifier); 1421 u->postcopy_notifier.notify = NULL; 1422 } 1423 u->postcopy_listen = false; 1424 if (u->postcopy_fd.handler) { 1425 postcopy_unregister_shared_ufd(&u->postcopy_fd); 1426 close(u->postcopy_fd.fd); 1427 u->postcopy_fd.handler = NULL; 1428 } 1429 if (u->slave_fd >= 0) { 1430 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); 1431 close(u->slave_fd); 1432 u->slave_fd = -1; 1433 } 1434 g_free(u->region_rb); 1435 u->region_rb = NULL; 1436 g_free(u->region_rb_offset); 1437 u->region_rb_offset = NULL; 1438 u->region_rb_len = 0; 1439 g_free(u); 1440 dev->opaque = 0; 1441 1442 return 0; 1443 } 1444 1445 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 1446 { 1447 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 1448 1449 return idx; 1450 } 1451 1452 static int vhost_user_memslots_limit(struct vhost_dev *dev) 1453 { 1454 return VHOST_MEMORY_MAX_NREGIONS; 1455 } 1456 1457 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 1458 { 1459 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1460 1461 return virtio_has_feature(dev->protocol_features, 1462 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 1463 } 1464 1465 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 1466 { 1467 VhostUserMsg msg = { }; 1468 1469 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1470 1471 /* If guest supports GUEST_ANNOUNCE do nothing */ 1472 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 1473 return 0; 1474 } 1475 1476 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 1477 if (virtio_has_feature(dev->protocol_features, 1478 VHOST_USER_PROTOCOL_F_RARP)) { 1479 msg.hdr.request = VHOST_USER_SEND_RARP; 1480 msg.hdr.flags = VHOST_USER_VERSION; 1481 memcpy((char *)&msg.payload.u64, mac_addr, 6); 1482 msg.hdr.size = sizeof(msg.payload.u64); 1483 1484 return vhost_user_write(dev, &msg, NULL, 0); 1485 } 1486 return -1; 1487 } 1488 1489 static bool vhost_user_can_merge(struct vhost_dev *dev, 1490 uint64_t start1, uint64_t size1, 1491 uint64_t start2, uint64_t size2) 1492 { 1493 ram_addr_t offset; 1494 int mfd, rfd; 1495 MemoryRegion *mr; 1496 1497 mr = memory_region_from_host((void *)(uintptr_t)start1, &offset); 1498 mfd = memory_region_get_fd(mr); 1499 1500 mr = memory_region_from_host((void *)(uintptr_t)start2, &offset); 1501 rfd = memory_region_get_fd(mr); 1502 1503 return mfd == rfd; 1504 } 1505 1506 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 1507 { 1508 VhostUserMsg msg; 1509 bool reply_supported = virtio_has_feature(dev->protocol_features, 1510 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1511 1512 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 1513 return 0; 1514 } 1515 1516 msg.hdr.request = VHOST_USER_NET_SET_MTU; 1517 msg.payload.u64 = mtu; 1518 msg.hdr.size = sizeof(msg.payload.u64); 1519 msg.hdr.flags = VHOST_USER_VERSION; 1520 if (reply_supported) { 1521 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1522 } 1523 1524 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1525 return -1; 1526 } 1527 1528 /* If reply_ack supported, slave has to ack specified MTU is valid */ 1529 if (reply_supported) { 1530 return process_message_reply(dev, &msg); 1531 } 1532 1533 return 0; 1534 } 1535 1536 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 1537 struct vhost_iotlb_msg *imsg) 1538 { 1539 VhostUserMsg msg = { 1540 .hdr.request = VHOST_USER_IOTLB_MSG, 1541 .hdr.size = sizeof(msg.payload.iotlb), 1542 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1543 .payload.iotlb = *imsg, 1544 }; 1545 1546 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1547 return -EFAULT; 1548 } 1549 1550 return process_message_reply(dev, &msg); 1551 } 1552 1553 1554 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 1555 { 1556 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 1557 } 1558 1559 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 1560 uint32_t config_len) 1561 { 1562 VhostUserMsg msg = { 1563 .hdr.request = VHOST_USER_GET_CONFIG, 1564 .hdr.flags = VHOST_USER_VERSION, 1565 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 1566 }; 1567 1568 if (!virtio_has_feature(dev->protocol_features, 1569 VHOST_USER_PROTOCOL_F_CONFIG)) { 1570 return -1; 1571 } 1572 1573 if (config_len > VHOST_USER_MAX_CONFIG_SIZE) { 1574 return -1; 1575 } 1576 1577 msg.payload.config.offset = 0; 1578 msg.payload.config.size = config_len; 1579 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1580 return -1; 1581 } 1582 1583 if (vhost_user_read(dev, &msg) < 0) { 1584 return -1; 1585 } 1586 1587 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 1588 error_report("Received unexpected msg type. Expected %d received %d", 1589 VHOST_USER_GET_CONFIG, msg.hdr.request); 1590 return -1; 1591 } 1592 1593 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 1594 error_report("Received bad msg size."); 1595 return -1; 1596 } 1597 1598 memcpy(config, msg.payload.config.region, config_len); 1599 1600 return 0; 1601 } 1602 1603 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 1604 uint32_t offset, uint32_t size, uint32_t flags) 1605 { 1606 uint8_t *p; 1607 bool reply_supported = virtio_has_feature(dev->protocol_features, 1608 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1609 1610 VhostUserMsg msg = { 1611 .hdr.request = VHOST_USER_SET_CONFIG, 1612 .hdr.flags = VHOST_USER_VERSION, 1613 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 1614 }; 1615 1616 if (!virtio_has_feature(dev->protocol_features, 1617 VHOST_USER_PROTOCOL_F_CONFIG)) { 1618 return -1; 1619 } 1620 1621 if (reply_supported) { 1622 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1623 } 1624 1625 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 1626 return -1; 1627 } 1628 1629 msg.payload.config.offset = offset, 1630 msg.payload.config.size = size, 1631 msg.payload.config.flags = flags, 1632 p = msg.payload.config.region; 1633 memcpy(p, data, size); 1634 1635 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1636 return -1; 1637 } 1638 1639 if (reply_supported) { 1640 return process_message_reply(dev, &msg); 1641 } 1642 1643 return 0; 1644 } 1645 1646 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 1647 void *session_info, 1648 uint64_t *session_id) 1649 { 1650 bool crypto_session = virtio_has_feature(dev->protocol_features, 1651 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 1652 CryptoDevBackendSymSessionInfo *sess_info = session_info; 1653 VhostUserMsg msg = { 1654 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 1655 .hdr.flags = VHOST_USER_VERSION, 1656 .hdr.size = sizeof(msg.payload.session), 1657 }; 1658 1659 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1660 1661 if (!crypto_session) { 1662 error_report("vhost-user trying to send unhandled ioctl"); 1663 return -1; 1664 } 1665 1666 memcpy(&msg.payload.session.session_setup_data, sess_info, 1667 sizeof(CryptoDevBackendSymSessionInfo)); 1668 if (sess_info->key_len) { 1669 memcpy(&msg.payload.session.key, sess_info->cipher_key, 1670 sess_info->key_len); 1671 } 1672 if (sess_info->auth_key_len > 0) { 1673 memcpy(&msg.payload.session.auth_key, sess_info->auth_key, 1674 sess_info->auth_key_len); 1675 } 1676 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1677 error_report("vhost_user_write() return -1, create session failed"); 1678 return -1; 1679 } 1680 1681 if (vhost_user_read(dev, &msg) < 0) { 1682 error_report("vhost_user_read() return -1, create session failed"); 1683 return -1; 1684 } 1685 1686 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 1687 error_report("Received unexpected msg type. Expected %d received %d", 1688 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 1689 return -1; 1690 } 1691 1692 if (msg.hdr.size != sizeof(msg.payload.session)) { 1693 error_report("Received bad msg size."); 1694 return -1; 1695 } 1696 1697 if (msg.payload.session.session_id < 0) { 1698 error_report("Bad session id: %" PRId64 "", 1699 msg.payload.session.session_id); 1700 return -1; 1701 } 1702 *session_id = msg.payload.session.session_id; 1703 1704 return 0; 1705 } 1706 1707 static int 1708 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 1709 { 1710 bool crypto_session = virtio_has_feature(dev->protocol_features, 1711 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 1712 VhostUserMsg msg = { 1713 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 1714 .hdr.flags = VHOST_USER_VERSION, 1715 .hdr.size = sizeof(msg.payload.u64), 1716 }; 1717 msg.payload.u64 = session_id; 1718 1719 if (!crypto_session) { 1720 error_report("vhost-user trying to send unhandled ioctl"); 1721 return -1; 1722 } 1723 1724 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1725 error_report("vhost_user_write() return -1, close session failed"); 1726 return -1; 1727 } 1728 1729 return 0; 1730 } 1731 1732 static bool vhost_user_mem_section_filter(struct vhost_dev *dev, 1733 MemoryRegionSection *section) 1734 { 1735 bool result; 1736 1737 result = memory_region_get_fd(section->mr) >= 0; 1738 1739 return result; 1740 } 1741 1742 VhostUserState *vhost_user_init(void) 1743 { 1744 VhostUserState *user = g_new0(struct VhostUserState, 1); 1745 1746 return user; 1747 } 1748 1749 void vhost_user_cleanup(VhostUserState *user) 1750 { 1751 int i; 1752 1753 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1754 if (user->notifier[i].addr) { 1755 object_unparent(OBJECT(&user->notifier[i].mr)); 1756 munmap(user->notifier[i].addr, qemu_real_host_page_size); 1757 user->notifier[i].addr = NULL; 1758 } 1759 } 1760 } 1761 1762 const VhostOps user_ops = { 1763 .backend_type = VHOST_BACKEND_TYPE_USER, 1764 .vhost_backend_init = vhost_user_backend_init, 1765 .vhost_backend_cleanup = vhost_user_backend_cleanup, 1766 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 1767 .vhost_set_log_base = vhost_user_set_log_base, 1768 .vhost_set_mem_table = vhost_user_set_mem_table, 1769 .vhost_set_vring_addr = vhost_user_set_vring_addr, 1770 .vhost_set_vring_endian = vhost_user_set_vring_endian, 1771 .vhost_set_vring_num = vhost_user_set_vring_num, 1772 .vhost_set_vring_base = vhost_user_set_vring_base, 1773 .vhost_get_vring_base = vhost_user_get_vring_base, 1774 .vhost_set_vring_kick = vhost_user_set_vring_kick, 1775 .vhost_set_vring_call = vhost_user_set_vring_call, 1776 .vhost_set_features = vhost_user_set_features, 1777 .vhost_get_features = vhost_user_get_features, 1778 .vhost_set_owner = vhost_user_set_owner, 1779 .vhost_reset_device = vhost_user_reset_device, 1780 .vhost_get_vq_index = vhost_user_get_vq_index, 1781 .vhost_set_vring_enable = vhost_user_set_vring_enable, 1782 .vhost_requires_shm_log = vhost_user_requires_shm_log, 1783 .vhost_migration_done = vhost_user_migration_done, 1784 .vhost_backend_can_merge = vhost_user_can_merge, 1785 .vhost_net_set_mtu = vhost_user_net_set_mtu, 1786 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 1787 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 1788 .vhost_get_config = vhost_user_get_config, 1789 .vhost_set_config = vhost_user_set_config, 1790 .vhost_crypto_create_session = vhost_user_crypto_create_session, 1791 .vhost_crypto_close_session = vhost_user_crypto_close_session, 1792 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, 1793 }; 1794