1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/vhost.h" 14 #include "hw/virtio/vhost-user.h" 15 #include "hw/virtio/vhost-backend.h" 16 #include "hw/virtio/virtio.h" 17 #include "hw/virtio/virtio-net.h" 18 #include "chardev/char-fe.h" 19 #include "sysemu/kvm.h" 20 #include "qemu/error-report.h" 21 #include "qemu/sockets.h" 22 #include "sysemu/cryptodev.h" 23 #include "migration/migration.h" 24 #include "migration/postcopy-ram.h" 25 #include "trace.h" 26 27 #include <sys/ioctl.h> 28 #include <sys/socket.h> 29 #include <sys/un.h> 30 #include <linux/vhost.h> 31 #include <linux/userfaultfd.h> 32 33 #define VHOST_MEMORY_MAX_NREGIONS 8 34 #define VHOST_USER_F_PROTOCOL_FEATURES 30 35 #define VHOST_USER_SLAVE_MAX_FDS 8 36 37 /* 38 * Maximum size of virtio device config space 39 */ 40 #define VHOST_USER_MAX_CONFIG_SIZE 256 41 42 enum VhostUserProtocolFeature { 43 VHOST_USER_PROTOCOL_F_MQ = 0, 44 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 45 VHOST_USER_PROTOCOL_F_RARP = 2, 46 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, 47 VHOST_USER_PROTOCOL_F_NET_MTU = 4, 48 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, 49 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, 50 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, 51 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, 52 VHOST_USER_PROTOCOL_F_CONFIG = 9, 53 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, 54 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, 55 VHOST_USER_PROTOCOL_F_MAX 56 }; 57 58 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 59 60 typedef enum VhostUserRequest { 61 VHOST_USER_NONE = 0, 62 VHOST_USER_GET_FEATURES = 1, 63 VHOST_USER_SET_FEATURES = 2, 64 VHOST_USER_SET_OWNER = 3, 65 VHOST_USER_RESET_OWNER = 4, 66 VHOST_USER_SET_MEM_TABLE = 5, 67 VHOST_USER_SET_LOG_BASE = 6, 68 VHOST_USER_SET_LOG_FD = 7, 69 VHOST_USER_SET_VRING_NUM = 8, 70 VHOST_USER_SET_VRING_ADDR = 9, 71 VHOST_USER_SET_VRING_BASE = 10, 72 VHOST_USER_GET_VRING_BASE = 11, 73 VHOST_USER_SET_VRING_KICK = 12, 74 VHOST_USER_SET_VRING_CALL = 13, 75 VHOST_USER_SET_VRING_ERR = 14, 76 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 77 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 78 VHOST_USER_GET_QUEUE_NUM = 17, 79 VHOST_USER_SET_VRING_ENABLE = 18, 80 VHOST_USER_SEND_RARP = 19, 81 VHOST_USER_NET_SET_MTU = 20, 82 VHOST_USER_SET_SLAVE_REQ_FD = 21, 83 VHOST_USER_IOTLB_MSG = 22, 84 VHOST_USER_SET_VRING_ENDIAN = 23, 85 VHOST_USER_GET_CONFIG = 24, 86 VHOST_USER_SET_CONFIG = 25, 87 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 88 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 89 VHOST_USER_POSTCOPY_ADVISE = 28, 90 VHOST_USER_POSTCOPY_LISTEN = 29, 91 VHOST_USER_POSTCOPY_END = 30, 92 VHOST_USER_MAX 93 } VhostUserRequest; 94 95 typedef enum VhostUserSlaveRequest { 96 VHOST_USER_SLAVE_NONE = 0, 97 VHOST_USER_SLAVE_IOTLB_MSG = 1, 98 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, 99 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, 100 VHOST_USER_SLAVE_MAX 101 } VhostUserSlaveRequest; 102 103 typedef struct VhostUserMemoryRegion { 104 uint64_t guest_phys_addr; 105 uint64_t memory_size; 106 uint64_t userspace_addr; 107 uint64_t mmap_offset; 108 } VhostUserMemoryRegion; 109 110 typedef struct VhostUserMemory { 111 uint32_t nregions; 112 uint32_t padding; 113 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; 114 } VhostUserMemory; 115 116 typedef struct VhostUserLog { 117 uint64_t mmap_size; 118 uint64_t mmap_offset; 119 } VhostUserLog; 120 121 typedef struct VhostUserConfig { 122 uint32_t offset; 123 uint32_t size; 124 uint32_t flags; 125 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 126 } VhostUserConfig; 127 128 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 129 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 130 131 typedef struct VhostUserCryptoSession { 132 /* session id for success, -1 on errors */ 133 int64_t session_id; 134 CryptoDevBackendSymSessionInfo session_setup_data; 135 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 136 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 137 } VhostUserCryptoSession; 138 139 static VhostUserConfig c __attribute__ ((unused)); 140 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 141 + sizeof(c.size) \ 142 + sizeof(c.flags)) 143 144 typedef struct VhostUserVringArea { 145 uint64_t u64; 146 uint64_t size; 147 uint64_t offset; 148 } VhostUserVringArea; 149 150 typedef struct { 151 VhostUserRequest request; 152 153 #define VHOST_USER_VERSION_MASK (0x3) 154 #define VHOST_USER_REPLY_MASK (0x1<<2) 155 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 156 uint32_t flags; 157 uint32_t size; /* the following payload size */ 158 } QEMU_PACKED VhostUserHeader; 159 160 typedef union { 161 #define VHOST_USER_VRING_IDX_MASK (0xff) 162 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) 163 uint64_t u64; 164 struct vhost_vring_state state; 165 struct vhost_vring_addr addr; 166 VhostUserMemory memory; 167 VhostUserLog log; 168 struct vhost_iotlb_msg iotlb; 169 VhostUserConfig config; 170 VhostUserCryptoSession session; 171 VhostUserVringArea area; 172 } VhostUserPayload; 173 174 typedef struct VhostUserMsg { 175 VhostUserHeader hdr; 176 VhostUserPayload payload; 177 } QEMU_PACKED VhostUserMsg; 178 179 static VhostUserMsg m __attribute__ ((unused)); 180 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 181 182 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 183 184 /* The version of the protocol we support */ 185 #define VHOST_USER_VERSION (0x1) 186 187 struct vhost_user { 188 struct vhost_dev *dev; 189 /* Shared between vhost devs of the same virtio device */ 190 VhostUserState *user; 191 int slave_fd; 192 NotifierWithReturn postcopy_notifier; 193 struct PostCopyFD postcopy_fd; 194 uint64_t postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS]; 195 /* Length of the region_rb and region_rb_offset arrays */ 196 size_t region_rb_len; 197 /* RAMBlock associated with a given region */ 198 RAMBlock **region_rb; 199 /* The offset from the start of the RAMBlock to the start of the 200 * vhost region. 201 */ 202 ram_addr_t *region_rb_offset; 203 204 /* True once we've entered postcopy_listen */ 205 bool postcopy_listen; 206 }; 207 208 static bool ioeventfd_enabled(void) 209 { 210 return kvm_enabled() && kvm_eventfds_enabled(); 211 } 212 213 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 214 { 215 struct vhost_user *u = dev->opaque; 216 CharBackend *chr = u->user->chr; 217 uint8_t *p = (uint8_t *) msg; 218 int r, size = VHOST_USER_HDR_SIZE; 219 220 r = qemu_chr_fe_read_all(chr, p, size); 221 if (r != size) { 222 error_report("Failed to read msg header. Read %d instead of %d." 223 " Original request %d.", r, size, msg->hdr.request); 224 goto fail; 225 } 226 227 /* validate received flags */ 228 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 229 error_report("Failed to read msg header." 230 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 231 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 232 goto fail; 233 } 234 235 /* validate message size is sane */ 236 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 237 error_report("Failed to read msg header." 238 " Size %d exceeds the maximum %zu.", msg->hdr.size, 239 VHOST_USER_PAYLOAD_SIZE); 240 goto fail; 241 } 242 243 if (msg->hdr.size) { 244 p += VHOST_USER_HDR_SIZE; 245 size = msg->hdr.size; 246 r = qemu_chr_fe_read_all(chr, p, size); 247 if (r != size) { 248 error_report("Failed to read msg payload." 249 " Read %d instead of %d.", r, msg->hdr.size); 250 goto fail; 251 } 252 } 253 254 return 0; 255 256 fail: 257 return -1; 258 } 259 260 static int process_message_reply(struct vhost_dev *dev, 261 const VhostUserMsg *msg) 262 { 263 VhostUserMsg msg_reply; 264 265 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 266 return 0; 267 } 268 269 if (vhost_user_read(dev, &msg_reply) < 0) { 270 return -1; 271 } 272 273 if (msg_reply.hdr.request != msg->hdr.request) { 274 error_report("Received unexpected msg type." 275 "Expected %d received %d", 276 msg->hdr.request, msg_reply.hdr.request); 277 return -1; 278 } 279 280 return msg_reply.payload.u64 ? -1 : 0; 281 } 282 283 static bool vhost_user_one_time_request(VhostUserRequest request) 284 { 285 switch (request) { 286 case VHOST_USER_SET_OWNER: 287 case VHOST_USER_RESET_OWNER: 288 case VHOST_USER_SET_MEM_TABLE: 289 case VHOST_USER_GET_QUEUE_NUM: 290 case VHOST_USER_NET_SET_MTU: 291 return true; 292 default: 293 return false; 294 } 295 } 296 297 /* most non-init callers ignore the error */ 298 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 299 int *fds, int fd_num) 300 { 301 struct vhost_user *u = dev->opaque; 302 CharBackend *chr = u->user->chr; 303 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 304 305 /* 306 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 307 * we just need send it once in the first time. For later such 308 * request, we just ignore it. 309 */ 310 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) { 311 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 312 return 0; 313 } 314 315 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 316 error_report("Failed to set msg fds."); 317 return -1; 318 } 319 320 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 321 if (ret != size) { 322 error_report("Failed to write msg." 323 " Wrote %d instead of %d.", ret, size); 324 return -1; 325 } 326 327 return 0; 328 } 329 330 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 331 struct vhost_log *log) 332 { 333 int fds[VHOST_MEMORY_MAX_NREGIONS]; 334 size_t fd_num = 0; 335 bool shmfd = virtio_has_feature(dev->protocol_features, 336 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 337 VhostUserMsg msg = { 338 .hdr.request = VHOST_USER_SET_LOG_BASE, 339 .hdr.flags = VHOST_USER_VERSION, 340 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 341 .payload.log.mmap_offset = 0, 342 .hdr.size = sizeof(msg.payload.log), 343 }; 344 345 if (shmfd && log->fd != -1) { 346 fds[fd_num++] = log->fd; 347 } 348 349 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 350 return -1; 351 } 352 353 if (shmfd) { 354 msg.hdr.size = 0; 355 if (vhost_user_read(dev, &msg) < 0) { 356 return -1; 357 } 358 359 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 360 error_report("Received unexpected msg type. " 361 "Expected %d received %d", 362 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 363 return -1; 364 } 365 } 366 367 return 0; 368 } 369 370 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 371 struct vhost_memory *mem) 372 { 373 struct vhost_user *u = dev->opaque; 374 int fds[VHOST_MEMORY_MAX_NREGIONS]; 375 int i, fd; 376 size_t fd_num = 0; 377 bool reply_supported = virtio_has_feature(dev->protocol_features, 378 VHOST_USER_PROTOCOL_F_REPLY_ACK); 379 VhostUserMsg msg_reply; 380 int region_i, msg_i; 381 382 VhostUserMsg msg = { 383 .hdr.request = VHOST_USER_SET_MEM_TABLE, 384 .hdr.flags = VHOST_USER_VERSION, 385 }; 386 387 if (reply_supported) { 388 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 389 } 390 391 if (u->region_rb_len < dev->mem->nregions) { 392 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 393 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 394 dev->mem->nregions); 395 memset(&(u->region_rb[u->region_rb_len]), '\0', 396 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 397 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 398 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 399 u->region_rb_len = dev->mem->nregions; 400 } 401 402 for (i = 0; i < dev->mem->nregions; ++i) { 403 struct vhost_memory_region *reg = dev->mem->regions + i; 404 ram_addr_t offset; 405 MemoryRegion *mr; 406 407 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); 408 mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr, 409 &offset); 410 fd = memory_region_get_fd(mr); 411 if (fd > 0) { 412 trace_vhost_user_set_mem_table_withfd(fd_num, mr->name, 413 reg->memory_size, 414 reg->guest_phys_addr, 415 reg->userspace_addr, offset); 416 u->region_rb_offset[i] = offset; 417 u->region_rb[i] = mr->ram_block; 418 msg.payload.memory.regions[fd_num].userspace_addr = 419 reg->userspace_addr; 420 msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; 421 msg.payload.memory.regions[fd_num].guest_phys_addr = 422 reg->guest_phys_addr; 423 msg.payload.memory.regions[fd_num].mmap_offset = offset; 424 assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); 425 fds[fd_num++] = fd; 426 } else { 427 u->region_rb_offset[i] = 0; 428 u->region_rb[i] = NULL; 429 } 430 } 431 432 msg.payload.memory.nregions = fd_num; 433 434 if (!fd_num) { 435 error_report("Failed initializing vhost-user memory map, " 436 "consider using -object memory-backend-file share=on"); 437 return -1; 438 } 439 440 msg.hdr.size = sizeof(msg.payload.memory.nregions); 441 msg.hdr.size += sizeof(msg.payload.memory.padding); 442 msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion); 443 444 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 445 return -1; 446 } 447 448 if (vhost_user_read(dev, &msg_reply) < 0) { 449 return -1; 450 } 451 452 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 453 error_report("%s: Received unexpected msg type." 454 "Expected %d received %d", __func__, 455 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 456 return -1; 457 } 458 /* We're using the same structure, just reusing one of the 459 * fields, so it should be the same size. 460 */ 461 if (msg_reply.hdr.size != msg.hdr.size) { 462 error_report("%s: Unexpected size for postcopy reply " 463 "%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size); 464 return -1; 465 } 466 467 memset(u->postcopy_client_bases, 0, 468 sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS); 469 470 /* They're in the same order as the regions that were sent 471 * but some of the regions were skipped (above) if they 472 * didn't have fd's 473 */ 474 for (msg_i = 0, region_i = 0; 475 region_i < dev->mem->nregions; 476 region_i++) { 477 if (msg_i < fd_num && 478 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 479 dev->mem->regions[region_i].guest_phys_addr) { 480 u->postcopy_client_bases[region_i] = 481 msg_reply.payload.memory.regions[msg_i].userspace_addr; 482 trace_vhost_user_set_mem_table_postcopy( 483 msg_reply.payload.memory.regions[msg_i].userspace_addr, 484 msg.payload.memory.regions[msg_i].userspace_addr, 485 msg_i, region_i); 486 msg_i++; 487 } 488 } 489 if (msg_i != fd_num) { 490 error_report("%s: postcopy reply not fully consumed " 491 "%d vs %zd", 492 __func__, msg_i, fd_num); 493 return -1; 494 } 495 /* Now we've registered this with the postcopy code, we ack to the client, 496 * because now we're in the position to be able to deal with any faults 497 * it generates. 498 */ 499 /* TODO: Use this for failure cases as well with a bad value */ 500 msg.hdr.size = sizeof(msg.payload.u64); 501 msg.payload.u64 = 0; /* OK */ 502 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 503 return -1; 504 } 505 506 if (reply_supported) { 507 return process_message_reply(dev, &msg); 508 } 509 510 return 0; 511 } 512 513 static int vhost_user_set_mem_table(struct vhost_dev *dev, 514 struct vhost_memory *mem) 515 { 516 struct vhost_user *u = dev->opaque; 517 int fds[VHOST_MEMORY_MAX_NREGIONS]; 518 int i, fd; 519 size_t fd_num = 0; 520 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 521 bool reply_supported = virtio_has_feature(dev->protocol_features, 522 VHOST_USER_PROTOCOL_F_REPLY_ACK) && 523 !do_postcopy; 524 525 if (do_postcopy) { 526 /* Postcopy has enough differences that it's best done in it's own 527 * version 528 */ 529 return vhost_user_set_mem_table_postcopy(dev, mem); 530 } 531 532 VhostUserMsg msg = { 533 .hdr.request = VHOST_USER_SET_MEM_TABLE, 534 .hdr.flags = VHOST_USER_VERSION, 535 }; 536 537 if (reply_supported) { 538 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 539 } 540 541 for (i = 0; i < dev->mem->nregions; ++i) { 542 struct vhost_memory_region *reg = dev->mem->regions + i; 543 ram_addr_t offset; 544 MemoryRegion *mr; 545 546 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); 547 mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr, 548 &offset); 549 fd = memory_region_get_fd(mr); 550 if (fd > 0) { 551 if (fd_num == VHOST_MEMORY_MAX_NREGIONS) { 552 error_report("Failed preparing vhost-user memory table msg"); 553 return -1; 554 } 555 msg.payload.memory.regions[fd_num].userspace_addr = 556 reg->userspace_addr; 557 msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; 558 msg.payload.memory.regions[fd_num].guest_phys_addr = 559 reg->guest_phys_addr; 560 msg.payload.memory.regions[fd_num].mmap_offset = offset; 561 fds[fd_num++] = fd; 562 } 563 } 564 565 msg.payload.memory.nregions = fd_num; 566 567 if (!fd_num) { 568 error_report("Failed initializing vhost-user memory map, " 569 "consider using -object memory-backend-file share=on"); 570 return -1; 571 } 572 573 msg.hdr.size = sizeof(msg.payload.memory.nregions); 574 msg.hdr.size += sizeof(msg.payload.memory.padding); 575 msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion); 576 577 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 578 return -1; 579 } 580 581 if (reply_supported) { 582 return process_message_reply(dev, &msg); 583 } 584 585 return 0; 586 } 587 588 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 589 struct vhost_vring_addr *addr) 590 { 591 VhostUserMsg msg = { 592 .hdr.request = VHOST_USER_SET_VRING_ADDR, 593 .hdr.flags = VHOST_USER_VERSION, 594 .payload.addr = *addr, 595 .hdr.size = sizeof(msg.payload.addr), 596 }; 597 598 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 599 return -1; 600 } 601 602 return 0; 603 } 604 605 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 606 struct vhost_vring_state *ring) 607 { 608 bool cross_endian = virtio_has_feature(dev->protocol_features, 609 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 610 VhostUserMsg msg = { 611 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 612 .hdr.flags = VHOST_USER_VERSION, 613 .payload.state = *ring, 614 .hdr.size = sizeof(msg.payload.state), 615 }; 616 617 if (!cross_endian) { 618 error_report("vhost-user trying to send unhandled ioctl"); 619 return -1; 620 } 621 622 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 623 return -1; 624 } 625 626 return 0; 627 } 628 629 static int vhost_set_vring(struct vhost_dev *dev, 630 unsigned long int request, 631 struct vhost_vring_state *ring) 632 { 633 VhostUserMsg msg = { 634 .hdr.request = request, 635 .hdr.flags = VHOST_USER_VERSION, 636 .payload.state = *ring, 637 .hdr.size = sizeof(msg.payload.state), 638 }; 639 640 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 641 return -1; 642 } 643 644 return 0; 645 } 646 647 static int vhost_user_set_vring_num(struct vhost_dev *dev, 648 struct vhost_vring_state *ring) 649 { 650 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 651 } 652 653 static void vhost_user_host_notifier_restore(struct vhost_dev *dev, 654 int queue_idx) 655 { 656 struct vhost_user *u = dev->opaque; 657 VhostUserHostNotifier *n = &u->user->notifier[queue_idx]; 658 VirtIODevice *vdev = dev->vdev; 659 660 if (n->addr && !n->set) { 661 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true); 662 n->set = true; 663 } 664 } 665 666 static void vhost_user_host_notifier_remove(struct vhost_dev *dev, 667 int queue_idx) 668 { 669 struct vhost_user *u = dev->opaque; 670 VhostUserHostNotifier *n = &u->user->notifier[queue_idx]; 671 VirtIODevice *vdev = dev->vdev; 672 673 if (n->addr && n->set) { 674 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); 675 n->set = false; 676 } 677 } 678 679 static int vhost_user_set_vring_base(struct vhost_dev *dev, 680 struct vhost_vring_state *ring) 681 { 682 vhost_user_host_notifier_restore(dev, ring->index); 683 684 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 685 } 686 687 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 688 { 689 int i; 690 691 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 692 return -1; 693 } 694 695 for (i = 0; i < dev->nvqs; ++i) { 696 struct vhost_vring_state state = { 697 .index = dev->vq_index + i, 698 .num = enable, 699 }; 700 701 vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 702 } 703 704 return 0; 705 } 706 707 static int vhost_user_get_vring_base(struct vhost_dev *dev, 708 struct vhost_vring_state *ring) 709 { 710 VhostUserMsg msg = { 711 .hdr.request = VHOST_USER_GET_VRING_BASE, 712 .hdr.flags = VHOST_USER_VERSION, 713 .payload.state = *ring, 714 .hdr.size = sizeof(msg.payload.state), 715 }; 716 717 vhost_user_host_notifier_remove(dev, ring->index); 718 719 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 720 return -1; 721 } 722 723 if (vhost_user_read(dev, &msg) < 0) { 724 return -1; 725 } 726 727 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 728 error_report("Received unexpected msg type. Expected %d received %d", 729 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 730 return -1; 731 } 732 733 if (msg.hdr.size != sizeof(msg.payload.state)) { 734 error_report("Received bad msg size."); 735 return -1; 736 } 737 738 *ring = msg.payload.state; 739 740 return 0; 741 } 742 743 static int vhost_set_vring_file(struct vhost_dev *dev, 744 VhostUserRequest request, 745 struct vhost_vring_file *file) 746 { 747 int fds[VHOST_MEMORY_MAX_NREGIONS]; 748 size_t fd_num = 0; 749 VhostUserMsg msg = { 750 .hdr.request = request, 751 .hdr.flags = VHOST_USER_VERSION, 752 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 753 .hdr.size = sizeof(msg.payload.u64), 754 }; 755 756 if (ioeventfd_enabled() && file->fd > 0) { 757 fds[fd_num++] = file->fd; 758 } else { 759 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 760 } 761 762 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 763 return -1; 764 } 765 766 return 0; 767 } 768 769 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 770 struct vhost_vring_file *file) 771 { 772 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 773 } 774 775 static int vhost_user_set_vring_call(struct vhost_dev *dev, 776 struct vhost_vring_file *file) 777 { 778 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 779 } 780 781 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64) 782 { 783 VhostUserMsg msg = { 784 .hdr.request = request, 785 .hdr.flags = VHOST_USER_VERSION, 786 .payload.u64 = u64, 787 .hdr.size = sizeof(msg.payload.u64), 788 }; 789 790 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 791 return -1; 792 } 793 794 return 0; 795 } 796 797 static int vhost_user_set_features(struct vhost_dev *dev, 798 uint64_t features) 799 { 800 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features); 801 } 802 803 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 804 uint64_t features) 805 { 806 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features); 807 } 808 809 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 810 { 811 VhostUserMsg msg = { 812 .hdr.request = request, 813 .hdr.flags = VHOST_USER_VERSION, 814 }; 815 816 if (vhost_user_one_time_request(request) && dev->vq_index != 0) { 817 return 0; 818 } 819 820 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 821 return -1; 822 } 823 824 if (vhost_user_read(dev, &msg) < 0) { 825 return -1; 826 } 827 828 if (msg.hdr.request != request) { 829 error_report("Received unexpected msg type. Expected %d received %d", 830 request, msg.hdr.request); 831 return -1; 832 } 833 834 if (msg.hdr.size != sizeof(msg.payload.u64)) { 835 error_report("Received bad msg size."); 836 return -1; 837 } 838 839 *u64 = msg.payload.u64; 840 841 return 0; 842 } 843 844 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 845 { 846 return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features); 847 } 848 849 static int vhost_user_set_owner(struct vhost_dev *dev) 850 { 851 VhostUserMsg msg = { 852 .hdr.request = VHOST_USER_SET_OWNER, 853 .hdr.flags = VHOST_USER_VERSION, 854 }; 855 856 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 857 return -1; 858 } 859 860 return 0; 861 } 862 863 static int vhost_user_reset_device(struct vhost_dev *dev) 864 { 865 VhostUserMsg msg = { 866 .hdr.request = VHOST_USER_RESET_OWNER, 867 .hdr.flags = VHOST_USER_VERSION, 868 }; 869 870 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 871 return -1; 872 } 873 874 return 0; 875 } 876 877 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) 878 { 879 int ret = -1; 880 881 if (!dev->config_ops) { 882 return -1; 883 } 884 885 if (dev->config_ops->vhost_dev_config_notifier) { 886 ret = dev->config_ops->vhost_dev_config_notifier(dev); 887 } 888 889 return ret; 890 } 891 892 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, 893 VhostUserVringArea *area, 894 int fd) 895 { 896 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 897 size_t page_size = qemu_real_host_page_size; 898 struct vhost_user *u = dev->opaque; 899 VhostUserState *user = u->user; 900 VirtIODevice *vdev = dev->vdev; 901 VhostUserHostNotifier *n; 902 void *addr; 903 char *name; 904 905 if (!virtio_has_feature(dev->protocol_features, 906 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 907 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 908 return -1; 909 } 910 911 n = &user->notifier[queue_idx]; 912 913 if (n->addr) { 914 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); 915 object_unparent(OBJECT(&n->mr)); 916 munmap(n->addr, page_size); 917 n->addr = NULL; 918 } 919 920 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 921 return 0; 922 } 923 924 /* Sanity check. */ 925 if (area->size != page_size) { 926 return -1; 927 } 928 929 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 930 fd, area->offset); 931 if (addr == MAP_FAILED) { 932 return -1; 933 } 934 935 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 936 user, queue_idx); 937 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 938 page_size, addr); 939 g_free(name); 940 941 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 942 munmap(addr, page_size); 943 return -1; 944 } 945 946 n->addr = addr; 947 n->set = true; 948 949 return 0; 950 } 951 952 static void slave_read(void *opaque) 953 { 954 struct vhost_dev *dev = opaque; 955 struct vhost_user *u = dev->opaque; 956 VhostUserHeader hdr = { 0, }; 957 VhostUserPayload payload = { 0, }; 958 int size, ret = 0; 959 struct iovec iov; 960 struct msghdr msgh; 961 int fd[VHOST_USER_SLAVE_MAX_FDS]; 962 char control[CMSG_SPACE(sizeof(fd))]; 963 struct cmsghdr *cmsg; 964 int i, fdsize = 0; 965 966 memset(&msgh, 0, sizeof(msgh)); 967 msgh.msg_iov = &iov; 968 msgh.msg_iovlen = 1; 969 msgh.msg_control = control; 970 msgh.msg_controllen = sizeof(control); 971 972 memset(fd, -1, sizeof(fd)); 973 974 /* Read header */ 975 iov.iov_base = &hdr; 976 iov.iov_len = VHOST_USER_HDR_SIZE; 977 978 size = recvmsg(u->slave_fd, &msgh, 0); 979 if (size != VHOST_USER_HDR_SIZE) { 980 error_report("Failed to read from slave."); 981 goto err; 982 } 983 984 if (msgh.msg_flags & MSG_CTRUNC) { 985 error_report("Truncated message."); 986 goto err; 987 } 988 989 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; 990 cmsg = CMSG_NXTHDR(&msgh, cmsg)) { 991 if (cmsg->cmsg_level == SOL_SOCKET && 992 cmsg->cmsg_type == SCM_RIGHTS) { 993 fdsize = cmsg->cmsg_len - CMSG_LEN(0); 994 memcpy(fd, CMSG_DATA(cmsg), fdsize); 995 break; 996 } 997 } 998 999 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 1000 error_report("Failed to read msg header." 1001 " Size %d exceeds the maximum %zu.", hdr.size, 1002 VHOST_USER_PAYLOAD_SIZE); 1003 goto err; 1004 } 1005 1006 /* Read payload */ 1007 size = read(u->slave_fd, &payload, hdr.size); 1008 if (size != hdr.size) { 1009 error_report("Failed to read payload from slave."); 1010 goto err; 1011 } 1012 1013 switch (hdr.request) { 1014 case VHOST_USER_SLAVE_IOTLB_MSG: 1015 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1016 break; 1017 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG : 1018 ret = vhost_user_slave_handle_config_change(dev); 1019 break; 1020 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: 1021 ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, 1022 fd[0]); 1023 break; 1024 default: 1025 error_report("Received unexpected msg type."); 1026 ret = -EINVAL; 1027 } 1028 1029 /* Close the remaining file descriptors. */ 1030 for (i = 0; i < fdsize; i++) { 1031 if (fd[i] != -1) { 1032 close(fd[i]); 1033 } 1034 } 1035 1036 /* 1037 * REPLY_ACK feature handling. Other reply types has to be managed 1038 * directly in their request handlers. 1039 */ 1040 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1041 struct iovec iovec[2]; 1042 1043 1044 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 1045 hdr.flags |= VHOST_USER_REPLY_MASK; 1046 1047 payload.u64 = !!ret; 1048 hdr.size = sizeof(payload.u64); 1049 1050 iovec[0].iov_base = &hdr; 1051 iovec[0].iov_len = VHOST_USER_HDR_SIZE; 1052 iovec[1].iov_base = &payload; 1053 iovec[1].iov_len = hdr.size; 1054 1055 size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec)); 1056 if (size != VHOST_USER_HDR_SIZE + hdr.size) { 1057 error_report("Failed to send msg reply to slave."); 1058 goto err; 1059 } 1060 } 1061 1062 return; 1063 1064 err: 1065 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); 1066 close(u->slave_fd); 1067 u->slave_fd = -1; 1068 for (i = 0; i < fdsize; i++) { 1069 if (fd[i] != -1) { 1070 close(fd[i]); 1071 } 1072 } 1073 return; 1074 } 1075 1076 static int vhost_setup_slave_channel(struct vhost_dev *dev) 1077 { 1078 VhostUserMsg msg = { 1079 .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD, 1080 .hdr.flags = VHOST_USER_VERSION, 1081 }; 1082 struct vhost_user *u = dev->opaque; 1083 int sv[2], ret = 0; 1084 bool reply_supported = virtio_has_feature(dev->protocol_features, 1085 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1086 1087 if (!virtio_has_feature(dev->protocol_features, 1088 VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { 1089 return 0; 1090 } 1091 1092 if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1093 error_report("socketpair() failed"); 1094 return -1; 1095 } 1096 1097 u->slave_fd = sv[0]; 1098 qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev); 1099 1100 if (reply_supported) { 1101 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1102 } 1103 1104 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1105 if (ret) { 1106 goto out; 1107 } 1108 1109 if (reply_supported) { 1110 ret = process_message_reply(dev, &msg); 1111 } 1112 1113 out: 1114 close(sv[1]); 1115 if (ret) { 1116 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); 1117 close(u->slave_fd); 1118 u->slave_fd = -1; 1119 } 1120 1121 return ret; 1122 } 1123 1124 /* 1125 * Called back from the postcopy fault thread when a fault is received on our 1126 * ufd. 1127 * TODO: This is Linux specific 1128 */ 1129 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1130 void *ufd) 1131 { 1132 struct vhost_dev *dev = pcfd->data; 1133 struct vhost_user *u = dev->opaque; 1134 struct uffd_msg *msg = ufd; 1135 uint64_t faultaddr = msg->arg.pagefault.address; 1136 RAMBlock *rb = NULL; 1137 uint64_t rb_offset; 1138 int i; 1139 1140 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1141 dev->mem->nregions); 1142 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1143 trace_vhost_user_postcopy_fault_handler_loop(i, 1144 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1145 if (faultaddr >= u->postcopy_client_bases[i]) { 1146 /* Ofset of the fault address in the vhost region */ 1147 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1148 if (region_offset < dev->mem->regions[i].memory_size) { 1149 rb_offset = region_offset + u->region_rb_offset[i]; 1150 trace_vhost_user_postcopy_fault_handler_found(i, 1151 region_offset, rb_offset); 1152 rb = u->region_rb[i]; 1153 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1154 rb_offset); 1155 } 1156 } 1157 } 1158 error_report("%s: Failed to find region for fault %" PRIx64, 1159 __func__, faultaddr); 1160 return -1; 1161 } 1162 1163 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1164 uint64_t offset) 1165 { 1166 struct vhost_dev *dev = pcfd->data; 1167 struct vhost_user *u = dev->opaque; 1168 int i; 1169 1170 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1171 1172 if (!u) { 1173 return 0; 1174 } 1175 /* Translate the offset into an address in the clients address space */ 1176 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1177 if (u->region_rb[i] == rb && 1178 offset >= u->region_rb_offset[i] && 1179 offset < (u->region_rb_offset[i] + 1180 dev->mem->regions[i].memory_size)) { 1181 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1182 u->postcopy_client_bases[i]; 1183 trace_vhost_user_postcopy_waker_found(client_addr); 1184 return postcopy_wake_shared(pcfd, client_addr, rb); 1185 } 1186 } 1187 1188 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1189 return 0; 1190 } 1191 1192 /* 1193 * Called at the start of an inbound postcopy on reception of the 1194 * 'advise' command. 1195 */ 1196 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 1197 { 1198 struct vhost_user *u = dev->opaque; 1199 CharBackend *chr = u->user->chr; 1200 int ufd; 1201 VhostUserMsg msg = { 1202 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 1203 .hdr.flags = VHOST_USER_VERSION, 1204 }; 1205 1206 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1207 error_setg(errp, "Failed to send postcopy_advise to vhost"); 1208 return -1; 1209 } 1210 1211 if (vhost_user_read(dev, &msg) < 0) { 1212 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 1213 return -1; 1214 } 1215 1216 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 1217 error_setg(errp, "Unexpected msg type. Expected %d received %d", 1218 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 1219 return -1; 1220 } 1221 1222 if (msg.hdr.size) { 1223 error_setg(errp, "Received bad msg size."); 1224 return -1; 1225 } 1226 ufd = qemu_chr_fe_get_msgfd(chr); 1227 if (ufd < 0) { 1228 error_setg(errp, "%s: Failed to get ufd", __func__); 1229 return -1; 1230 } 1231 qemu_set_nonblock(ufd); 1232 1233 /* register ufd with userfault thread */ 1234 u->postcopy_fd.fd = ufd; 1235 u->postcopy_fd.data = dev; 1236 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 1237 u->postcopy_fd.waker = vhost_user_postcopy_waker; 1238 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 1239 postcopy_register_shared_ufd(&u->postcopy_fd); 1240 return 0; 1241 } 1242 1243 /* 1244 * Called at the switch to postcopy on reception of the 'listen' command. 1245 */ 1246 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 1247 { 1248 struct vhost_user *u = dev->opaque; 1249 int ret; 1250 VhostUserMsg msg = { 1251 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 1252 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1253 }; 1254 u->postcopy_listen = true; 1255 trace_vhost_user_postcopy_listen(); 1256 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1257 error_setg(errp, "Failed to send postcopy_listen to vhost"); 1258 return -1; 1259 } 1260 1261 ret = process_message_reply(dev, &msg); 1262 if (ret) { 1263 error_setg(errp, "Failed to receive reply to postcopy_listen"); 1264 return ret; 1265 } 1266 1267 return 0; 1268 } 1269 1270 /* 1271 * Called at the end of postcopy 1272 */ 1273 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 1274 { 1275 VhostUserMsg msg = { 1276 .hdr.request = VHOST_USER_POSTCOPY_END, 1277 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1278 }; 1279 int ret; 1280 struct vhost_user *u = dev->opaque; 1281 1282 trace_vhost_user_postcopy_end_entry(); 1283 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1284 error_setg(errp, "Failed to send postcopy_end to vhost"); 1285 return -1; 1286 } 1287 1288 ret = process_message_reply(dev, &msg); 1289 if (ret) { 1290 error_setg(errp, "Failed to receive reply to postcopy_end"); 1291 return ret; 1292 } 1293 postcopy_unregister_shared_ufd(&u->postcopy_fd); 1294 u->postcopy_fd.handler = NULL; 1295 1296 trace_vhost_user_postcopy_end_exit(); 1297 1298 return 0; 1299 } 1300 1301 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 1302 void *opaque) 1303 { 1304 struct PostcopyNotifyData *pnd = opaque; 1305 struct vhost_user *u = container_of(notifier, struct vhost_user, 1306 postcopy_notifier); 1307 struct vhost_dev *dev = u->dev; 1308 1309 switch (pnd->reason) { 1310 case POSTCOPY_NOTIFY_PROBE: 1311 if (!virtio_has_feature(dev->protocol_features, 1312 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 1313 /* TODO: Get the device name into this error somehow */ 1314 error_setg(pnd->errp, 1315 "vhost-user backend not capable of postcopy"); 1316 return -ENOENT; 1317 } 1318 break; 1319 1320 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 1321 return vhost_user_postcopy_advise(dev, pnd->errp); 1322 1323 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 1324 return vhost_user_postcopy_listen(dev, pnd->errp); 1325 1326 case POSTCOPY_NOTIFY_INBOUND_END: 1327 return vhost_user_postcopy_end(dev, pnd->errp); 1328 1329 default: 1330 /* We ignore notifications we don't know */ 1331 break; 1332 } 1333 1334 return 0; 1335 } 1336 1337 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) 1338 { 1339 uint64_t features, protocol_features; 1340 struct vhost_user *u; 1341 int err; 1342 1343 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1344 1345 u = g_new0(struct vhost_user, 1); 1346 u->user = opaque; 1347 u->slave_fd = -1; 1348 u->dev = dev; 1349 dev->opaque = u; 1350 1351 err = vhost_user_get_features(dev, &features); 1352 if (err < 0) { 1353 return err; 1354 } 1355 1356 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1357 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 1358 1359 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 1360 &protocol_features); 1361 if (err < 0) { 1362 return err; 1363 } 1364 1365 dev->protocol_features = 1366 protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK; 1367 1368 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1369 /* Don't acknowledge CONFIG feature if device doesn't support it */ 1370 dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 1371 } else if (!(protocol_features & 1372 (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) { 1373 error_report("Device expects VHOST_USER_PROTOCOL_F_CONFIG " 1374 "but backend does not support it."); 1375 return -1; 1376 } 1377 1378 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 1379 if (err < 0) { 1380 return err; 1381 } 1382 1383 /* query the max queues we support if backend supports Multiple Queue */ 1384 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 1385 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 1386 &dev->max_queues); 1387 if (err < 0) { 1388 return err; 1389 } 1390 } 1391 1392 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 1393 !(virtio_has_feature(dev->protocol_features, 1394 VHOST_USER_PROTOCOL_F_SLAVE_REQ) && 1395 virtio_has_feature(dev->protocol_features, 1396 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 1397 error_report("IOMMU support requires reply-ack and " 1398 "slave-req protocol features."); 1399 return -1; 1400 } 1401 } 1402 1403 if (dev->migration_blocker == NULL && 1404 !virtio_has_feature(dev->protocol_features, 1405 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 1406 error_setg(&dev->migration_blocker, 1407 "Migration disabled: vhost-user backend lacks " 1408 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 1409 } 1410 1411 err = vhost_setup_slave_channel(dev); 1412 if (err < 0) { 1413 return err; 1414 } 1415 1416 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 1417 postcopy_add_notifier(&u->postcopy_notifier); 1418 1419 return 0; 1420 } 1421 1422 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 1423 { 1424 struct vhost_user *u; 1425 1426 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1427 1428 u = dev->opaque; 1429 if (u->postcopy_notifier.notify) { 1430 postcopy_remove_notifier(&u->postcopy_notifier); 1431 u->postcopy_notifier.notify = NULL; 1432 } 1433 if (u->slave_fd >= 0) { 1434 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); 1435 close(u->slave_fd); 1436 u->slave_fd = -1; 1437 } 1438 g_free(u->region_rb); 1439 u->region_rb = NULL; 1440 g_free(u->region_rb_offset); 1441 u->region_rb_offset = NULL; 1442 u->region_rb_len = 0; 1443 g_free(u); 1444 dev->opaque = 0; 1445 1446 return 0; 1447 } 1448 1449 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 1450 { 1451 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 1452 1453 return idx; 1454 } 1455 1456 static int vhost_user_memslots_limit(struct vhost_dev *dev) 1457 { 1458 return VHOST_MEMORY_MAX_NREGIONS; 1459 } 1460 1461 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 1462 { 1463 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1464 1465 return virtio_has_feature(dev->protocol_features, 1466 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 1467 } 1468 1469 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 1470 { 1471 VhostUserMsg msg = { }; 1472 1473 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1474 1475 /* If guest supports GUEST_ANNOUNCE do nothing */ 1476 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 1477 return 0; 1478 } 1479 1480 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 1481 if (virtio_has_feature(dev->protocol_features, 1482 VHOST_USER_PROTOCOL_F_RARP)) { 1483 msg.hdr.request = VHOST_USER_SEND_RARP; 1484 msg.hdr.flags = VHOST_USER_VERSION; 1485 memcpy((char *)&msg.payload.u64, mac_addr, 6); 1486 msg.hdr.size = sizeof(msg.payload.u64); 1487 1488 return vhost_user_write(dev, &msg, NULL, 0); 1489 } 1490 return -1; 1491 } 1492 1493 static bool vhost_user_can_merge(struct vhost_dev *dev, 1494 uint64_t start1, uint64_t size1, 1495 uint64_t start2, uint64_t size2) 1496 { 1497 ram_addr_t offset; 1498 int mfd, rfd; 1499 MemoryRegion *mr; 1500 1501 mr = memory_region_from_host((void *)(uintptr_t)start1, &offset); 1502 mfd = memory_region_get_fd(mr); 1503 1504 mr = memory_region_from_host((void *)(uintptr_t)start2, &offset); 1505 rfd = memory_region_get_fd(mr); 1506 1507 return mfd == rfd; 1508 } 1509 1510 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 1511 { 1512 VhostUserMsg msg; 1513 bool reply_supported = virtio_has_feature(dev->protocol_features, 1514 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1515 1516 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 1517 return 0; 1518 } 1519 1520 msg.hdr.request = VHOST_USER_NET_SET_MTU; 1521 msg.payload.u64 = mtu; 1522 msg.hdr.size = sizeof(msg.payload.u64); 1523 msg.hdr.flags = VHOST_USER_VERSION; 1524 if (reply_supported) { 1525 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1526 } 1527 1528 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1529 return -1; 1530 } 1531 1532 /* If reply_ack supported, slave has to ack specified MTU is valid */ 1533 if (reply_supported) { 1534 return process_message_reply(dev, &msg); 1535 } 1536 1537 return 0; 1538 } 1539 1540 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 1541 struct vhost_iotlb_msg *imsg) 1542 { 1543 VhostUserMsg msg = { 1544 .hdr.request = VHOST_USER_IOTLB_MSG, 1545 .hdr.size = sizeof(msg.payload.iotlb), 1546 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1547 .payload.iotlb = *imsg, 1548 }; 1549 1550 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1551 return -EFAULT; 1552 } 1553 1554 return process_message_reply(dev, &msg); 1555 } 1556 1557 1558 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 1559 { 1560 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 1561 } 1562 1563 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 1564 uint32_t config_len) 1565 { 1566 VhostUserMsg msg = { 1567 .hdr.request = VHOST_USER_GET_CONFIG, 1568 .hdr.flags = VHOST_USER_VERSION, 1569 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 1570 }; 1571 1572 if (!virtio_has_feature(dev->protocol_features, 1573 VHOST_USER_PROTOCOL_F_CONFIG)) { 1574 return -1; 1575 } 1576 1577 if (config_len > VHOST_USER_MAX_CONFIG_SIZE) { 1578 return -1; 1579 } 1580 1581 msg.payload.config.offset = 0; 1582 msg.payload.config.size = config_len; 1583 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1584 return -1; 1585 } 1586 1587 if (vhost_user_read(dev, &msg) < 0) { 1588 return -1; 1589 } 1590 1591 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 1592 error_report("Received unexpected msg type. Expected %d received %d", 1593 VHOST_USER_GET_CONFIG, msg.hdr.request); 1594 return -1; 1595 } 1596 1597 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 1598 error_report("Received bad msg size."); 1599 return -1; 1600 } 1601 1602 memcpy(config, msg.payload.config.region, config_len); 1603 1604 return 0; 1605 } 1606 1607 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 1608 uint32_t offset, uint32_t size, uint32_t flags) 1609 { 1610 uint8_t *p; 1611 bool reply_supported = virtio_has_feature(dev->protocol_features, 1612 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1613 1614 VhostUserMsg msg = { 1615 .hdr.request = VHOST_USER_SET_CONFIG, 1616 .hdr.flags = VHOST_USER_VERSION, 1617 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 1618 }; 1619 1620 if (!virtio_has_feature(dev->protocol_features, 1621 VHOST_USER_PROTOCOL_F_CONFIG)) { 1622 return -1; 1623 } 1624 1625 if (reply_supported) { 1626 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1627 } 1628 1629 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 1630 return -1; 1631 } 1632 1633 msg.payload.config.offset = offset, 1634 msg.payload.config.size = size, 1635 msg.payload.config.flags = flags, 1636 p = msg.payload.config.region; 1637 memcpy(p, data, size); 1638 1639 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1640 return -1; 1641 } 1642 1643 if (reply_supported) { 1644 return process_message_reply(dev, &msg); 1645 } 1646 1647 return 0; 1648 } 1649 1650 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 1651 void *session_info, 1652 uint64_t *session_id) 1653 { 1654 bool crypto_session = virtio_has_feature(dev->protocol_features, 1655 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 1656 CryptoDevBackendSymSessionInfo *sess_info = session_info; 1657 VhostUserMsg msg = { 1658 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 1659 .hdr.flags = VHOST_USER_VERSION, 1660 .hdr.size = sizeof(msg.payload.session), 1661 }; 1662 1663 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1664 1665 if (!crypto_session) { 1666 error_report("vhost-user trying to send unhandled ioctl"); 1667 return -1; 1668 } 1669 1670 memcpy(&msg.payload.session.session_setup_data, sess_info, 1671 sizeof(CryptoDevBackendSymSessionInfo)); 1672 if (sess_info->key_len) { 1673 memcpy(&msg.payload.session.key, sess_info->cipher_key, 1674 sess_info->key_len); 1675 } 1676 if (sess_info->auth_key_len > 0) { 1677 memcpy(&msg.payload.session.auth_key, sess_info->auth_key, 1678 sess_info->auth_key_len); 1679 } 1680 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1681 error_report("vhost_user_write() return -1, create session failed"); 1682 return -1; 1683 } 1684 1685 if (vhost_user_read(dev, &msg) < 0) { 1686 error_report("vhost_user_read() return -1, create session failed"); 1687 return -1; 1688 } 1689 1690 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 1691 error_report("Received unexpected msg type. Expected %d received %d", 1692 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 1693 return -1; 1694 } 1695 1696 if (msg.hdr.size != sizeof(msg.payload.session)) { 1697 error_report("Received bad msg size."); 1698 return -1; 1699 } 1700 1701 if (msg.payload.session.session_id < 0) { 1702 error_report("Bad session id: %" PRId64 "", 1703 msg.payload.session.session_id); 1704 return -1; 1705 } 1706 *session_id = msg.payload.session.session_id; 1707 1708 return 0; 1709 } 1710 1711 static int 1712 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 1713 { 1714 bool crypto_session = virtio_has_feature(dev->protocol_features, 1715 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 1716 VhostUserMsg msg = { 1717 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 1718 .hdr.flags = VHOST_USER_VERSION, 1719 .hdr.size = sizeof(msg.payload.u64), 1720 }; 1721 msg.payload.u64 = session_id; 1722 1723 if (!crypto_session) { 1724 error_report("vhost-user trying to send unhandled ioctl"); 1725 return -1; 1726 } 1727 1728 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1729 error_report("vhost_user_write() return -1, close session failed"); 1730 return -1; 1731 } 1732 1733 return 0; 1734 } 1735 1736 static bool vhost_user_mem_section_filter(struct vhost_dev *dev, 1737 MemoryRegionSection *section) 1738 { 1739 bool result; 1740 1741 result = memory_region_get_fd(section->mr) >= 0; 1742 1743 return result; 1744 } 1745 1746 VhostUserState *vhost_user_init(void) 1747 { 1748 VhostUserState *user = g_new0(struct VhostUserState, 1); 1749 1750 return user; 1751 } 1752 1753 void vhost_user_cleanup(VhostUserState *user) 1754 { 1755 int i; 1756 1757 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1758 if (user->notifier[i].addr) { 1759 object_unparent(OBJECT(&user->notifier[i].mr)); 1760 munmap(user->notifier[i].addr, qemu_real_host_page_size); 1761 user->notifier[i].addr = NULL; 1762 } 1763 } 1764 } 1765 1766 const VhostOps user_ops = { 1767 .backend_type = VHOST_BACKEND_TYPE_USER, 1768 .vhost_backend_init = vhost_user_backend_init, 1769 .vhost_backend_cleanup = vhost_user_backend_cleanup, 1770 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 1771 .vhost_set_log_base = vhost_user_set_log_base, 1772 .vhost_set_mem_table = vhost_user_set_mem_table, 1773 .vhost_set_vring_addr = vhost_user_set_vring_addr, 1774 .vhost_set_vring_endian = vhost_user_set_vring_endian, 1775 .vhost_set_vring_num = vhost_user_set_vring_num, 1776 .vhost_set_vring_base = vhost_user_set_vring_base, 1777 .vhost_get_vring_base = vhost_user_get_vring_base, 1778 .vhost_set_vring_kick = vhost_user_set_vring_kick, 1779 .vhost_set_vring_call = vhost_user_set_vring_call, 1780 .vhost_set_features = vhost_user_set_features, 1781 .vhost_get_features = vhost_user_get_features, 1782 .vhost_set_owner = vhost_user_set_owner, 1783 .vhost_reset_device = vhost_user_reset_device, 1784 .vhost_get_vq_index = vhost_user_get_vq_index, 1785 .vhost_set_vring_enable = vhost_user_set_vring_enable, 1786 .vhost_requires_shm_log = vhost_user_requires_shm_log, 1787 .vhost_migration_done = vhost_user_migration_done, 1788 .vhost_backend_can_merge = vhost_user_can_merge, 1789 .vhost_net_set_mtu = vhost_user_net_set_mtu, 1790 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 1791 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 1792 .vhost_get_config = vhost_user_get_config, 1793 .vhost_set_config = vhost_user_set_config, 1794 .vhost_crypto_create_session = vhost_user_crypto_create_session, 1795 .vhost_crypto_close_session = vhost_user_crypto_close_session, 1796 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, 1797 }; 1798