1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/vhost.h" 14 #include "hw/virtio/vhost-user.h" 15 #include "hw/virtio/vhost-backend.h" 16 #include "hw/virtio/virtio.h" 17 #include "hw/virtio/virtio-net.h" 18 #include "chardev/char-fe.h" 19 #include "sysemu/kvm.h" 20 #include "qemu/error-report.h" 21 #include "qemu/sockets.h" 22 #include "sysemu/cryptodev.h" 23 #include "migration/migration.h" 24 #include "migration/postcopy-ram.h" 25 #include "trace.h" 26 27 #include <sys/ioctl.h> 28 #include <sys/socket.h> 29 #include <sys/un.h> 30 31 #include "standard-headers/linux/vhost_types.h" 32 33 #ifdef CONFIG_LINUX 34 #include <linux/userfaultfd.h> 35 #endif 36 37 #define VHOST_MEMORY_MAX_NREGIONS 8 38 #define VHOST_USER_F_PROTOCOL_FEATURES 30 39 #define VHOST_USER_SLAVE_MAX_FDS 8 40 41 /* 42 * Maximum size of virtio device config space 43 */ 44 #define VHOST_USER_MAX_CONFIG_SIZE 256 45 46 enum VhostUserProtocolFeature { 47 VHOST_USER_PROTOCOL_F_MQ = 0, 48 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 49 VHOST_USER_PROTOCOL_F_RARP = 2, 50 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, 51 VHOST_USER_PROTOCOL_F_NET_MTU = 4, 52 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, 53 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, 54 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, 55 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, 56 VHOST_USER_PROTOCOL_F_CONFIG = 9, 57 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, 58 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, 59 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, 60 VHOST_USER_PROTOCOL_F_MAX 61 }; 62 63 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 64 65 typedef enum VhostUserRequest { 66 VHOST_USER_NONE = 0, 67 VHOST_USER_GET_FEATURES = 1, 68 VHOST_USER_SET_FEATURES = 2, 69 VHOST_USER_SET_OWNER = 3, 70 VHOST_USER_RESET_OWNER = 4, 71 VHOST_USER_SET_MEM_TABLE = 5, 72 VHOST_USER_SET_LOG_BASE = 6, 73 VHOST_USER_SET_LOG_FD = 7, 74 VHOST_USER_SET_VRING_NUM = 8, 75 VHOST_USER_SET_VRING_ADDR = 9, 76 VHOST_USER_SET_VRING_BASE = 10, 77 VHOST_USER_GET_VRING_BASE = 11, 78 VHOST_USER_SET_VRING_KICK = 12, 79 VHOST_USER_SET_VRING_CALL = 13, 80 VHOST_USER_SET_VRING_ERR = 14, 81 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 82 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 83 VHOST_USER_GET_QUEUE_NUM = 17, 84 VHOST_USER_SET_VRING_ENABLE = 18, 85 VHOST_USER_SEND_RARP = 19, 86 VHOST_USER_NET_SET_MTU = 20, 87 VHOST_USER_SET_SLAVE_REQ_FD = 21, 88 VHOST_USER_IOTLB_MSG = 22, 89 VHOST_USER_SET_VRING_ENDIAN = 23, 90 VHOST_USER_GET_CONFIG = 24, 91 VHOST_USER_SET_CONFIG = 25, 92 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 93 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 94 VHOST_USER_POSTCOPY_ADVISE = 28, 95 VHOST_USER_POSTCOPY_LISTEN = 29, 96 VHOST_USER_POSTCOPY_END = 30, 97 VHOST_USER_GET_INFLIGHT_FD = 31, 98 VHOST_USER_SET_INFLIGHT_FD = 32, 99 VHOST_USER_MAX 100 } VhostUserRequest; 101 102 typedef enum VhostUserSlaveRequest { 103 VHOST_USER_SLAVE_NONE = 0, 104 VHOST_USER_SLAVE_IOTLB_MSG = 1, 105 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, 106 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, 107 VHOST_USER_SLAVE_MAX 108 } VhostUserSlaveRequest; 109 110 typedef struct VhostUserMemoryRegion { 111 uint64_t guest_phys_addr; 112 uint64_t memory_size; 113 uint64_t userspace_addr; 114 uint64_t mmap_offset; 115 } VhostUserMemoryRegion; 116 117 typedef struct VhostUserMemory { 118 uint32_t nregions; 119 uint32_t padding; 120 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; 121 } VhostUserMemory; 122 123 typedef struct VhostUserLog { 124 uint64_t mmap_size; 125 uint64_t mmap_offset; 126 } VhostUserLog; 127 128 typedef struct VhostUserConfig { 129 uint32_t offset; 130 uint32_t size; 131 uint32_t flags; 132 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 133 } VhostUserConfig; 134 135 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 136 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 137 138 typedef struct VhostUserCryptoSession { 139 /* session id for success, -1 on errors */ 140 int64_t session_id; 141 CryptoDevBackendSymSessionInfo session_setup_data; 142 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 143 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 144 } VhostUserCryptoSession; 145 146 static VhostUserConfig c __attribute__ ((unused)); 147 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 148 + sizeof(c.size) \ 149 + sizeof(c.flags)) 150 151 typedef struct VhostUserVringArea { 152 uint64_t u64; 153 uint64_t size; 154 uint64_t offset; 155 } VhostUserVringArea; 156 157 typedef struct VhostUserInflight { 158 uint64_t mmap_size; 159 uint64_t mmap_offset; 160 uint16_t num_queues; 161 uint16_t queue_size; 162 } VhostUserInflight; 163 164 typedef struct { 165 VhostUserRequest request; 166 167 #define VHOST_USER_VERSION_MASK (0x3) 168 #define VHOST_USER_REPLY_MASK (0x1<<2) 169 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 170 uint32_t flags; 171 uint32_t size; /* the following payload size */ 172 } QEMU_PACKED VhostUserHeader; 173 174 typedef union { 175 #define VHOST_USER_VRING_IDX_MASK (0xff) 176 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) 177 uint64_t u64; 178 struct vhost_vring_state state; 179 struct vhost_vring_addr addr; 180 VhostUserMemory memory; 181 VhostUserLog log; 182 struct vhost_iotlb_msg iotlb; 183 VhostUserConfig config; 184 VhostUserCryptoSession session; 185 VhostUserVringArea area; 186 VhostUserInflight inflight; 187 } VhostUserPayload; 188 189 typedef struct VhostUserMsg { 190 VhostUserHeader hdr; 191 VhostUserPayload payload; 192 } QEMU_PACKED VhostUserMsg; 193 194 static VhostUserMsg m __attribute__ ((unused)); 195 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 196 197 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 198 199 /* The version of the protocol we support */ 200 #define VHOST_USER_VERSION (0x1) 201 202 struct vhost_user { 203 struct vhost_dev *dev; 204 /* Shared between vhost devs of the same virtio device */ 205 VhostUserState *user; 206 int slave_fd; 207 NotifierWithReturn postcopy_notifier; 208 struct PostCopyFD postcopy_fd; 209 uint64_t postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS]; 210 /* Length of the region_rb and region_rb_offset arrays */ 211 size_t region_rb_len; 212 /* RAMBlock associated with a given region */ 213 RAMBlock **region_rb; 214 /* The offset from the start of the RAMBlock to the start of the 215 * vhost region. 216 */ 217 ram_addr_t *region_rb_offset; 218 219 /* True once we've entered postcopy_listen */ 220 bool postcopy_listen; 221 }; 222 223 static bool ioeventfd_enabled(void) 224 { 225 return !kvm_enabled() || kvm_eventfds_enabled(); 226 } 227 228 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) 229 { 230 struct vhost_user *u = dev->opaque; 231 CharBackend *chr = u->user->chr; 232 uint8_t *p = (uint8_t *) msg; 233 int r, size = VHOST_USER_HDR_SIZE; 234 235 r = qemu_chr_fe_read_all(chr, p, size); 236 if (r != size) { 237 error_report("Failed to read msg header. Read %d instead of %d." 238 " Original request %d.", r, size, msg->hdr.request); 239 return -1; 240 } 241 242 /* validate received flags */ 243 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 244 error_report("Failed to read msg header." 245 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 246 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 247 return -1; 248 } 249 250 return 0; 251 } 252 253 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 254 { 255 struct vhost_user *u = dev->opaque; 256 CharBackend *chr = u->user->chr; 257 uint8_t *p = (uint8_t *) msg; 258 int r, size; 259 260 if (vhost_user_read_header(dev, msg) < 0) { 261 return -1; 262 } 263 264 /* validate message size is sane */ 265 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 266 error_report("Failed to read msg header." 267 " Size %d exceeds the maximum %zu.", msg->hdr.size, 268 VHOST_USER_PAYLOAD_SIZE); 269 return -1; 270 } 271 272 if (msg->hdr.size) { 273 p += VHOST_USER_HDR_SIZE; 274 size = msg->hdr.size; 275 r = qemu_chr_fe_read_all(chr, p, size); 276 if (r != size) { 277 error_report("Failed to read msg payload." 278 " Read %d instead of %d.", r, msg->hdr.size); 279 return -1; 280 } 281 } 282 283 return 0; 284 } 285 286 static int process_message_reply(struct vhost_dev *dev, 287 const VhostUserMsg *msg) 288 { 289 VhostUserMsg msg_reply; 290 291 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 292 return 0; 293 } 294 295 if (vhost_user_read(dev, &msg_reply) < 0) { 296 return -1; 297 } 298 299 if (msg_reply.hdr.request != msg->hdr.request) { 300 error_report("Received unexpected msg type." 301 "Expected %d received %d", 302 msg->hdr.request, msg_reply.hdr.request); 303 return -1; 304 } 305 306 return msg_reply.payload.u64 ? -1 : 0; 307 } 308 309 static bool vhost_user_one_time_request(VhostUserRequest request) 310 { 311 switch (request) { 312 case VHOST_USER_SET_OWNER: 313 case VHOST_USER_RESET_OWNER: 314 case VHOST_USER_SET_MEM_TABLE: 315 case VHOST_USER_GET_QUEUE_NUM: 316 case VHOST_USER_NET_SET_MTU: 317 return true; 318 default: 319 return false; 320 } 321 } 322 323 /* most non-init callers ignore the error */ 324 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 325 int *fds, int fd_num) 326 { 327 struct vhost_user *u = dev->opaque; 328 CharBackend *chr = u->user->chr; 329 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 330 331 /* 332 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 333 * we just need send it once in the first time. For later such 334 * request, we just ignore it. 335 */ 336 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) { 337 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 338 return 0; 339 } 340 341 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 342 error_report("Failed to set msg fds."); 343 return -1; 344 } 345 346 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 347 if (ret != size) { 348 error_report("Failed to write msg." 349 " Wrote %d instead of %d.", ret, size); 350 return -1; 351 } 352 353 return 0; 354 } 355 356 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 357 struct vhost_log *log) 358 { 359 int fds[VHOST_MEMORY_MAX_NREGIONS]; 360 size_t fd_num = 0; 361 bool shmfd = virtio_has_feature(dev->protocol_features, 362 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 363 VhostUserMsg msg = { 364 .hdr.request = VHOST_USER_SET_LOG_BASE, 365 .hdr.flags = VHOST_USER_VERSION, 366 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 367 .payload.log.mmap_offset = 0, 368 .hdr.size = sizeof(msg.payload.log), 369 }; 370 371 if (shmfd && log->fd != -1) { 372 fds[fd_num++] = log->fd; 373 } 374 375 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 376 return -1; 377 } 378 379 if (shmfd) { 380 msg.hdr.size = 0; 381 if (vhost_user_read(dev, &msg) < 0) { 382 return -1; 383 } 384 385 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 386 error_report("Received unexpected msg type. " 387 "Expected %d received %d", 388 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 389 return -1; 390 } 391 } 392 393 return 0; 394 } 395 396 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 397 struct vhost_memory *mem) 398 { 399 struct vhost_user *u = dev->opaque; 400 int fds[VHOST_MEMORY_MAX_NREGIONS]; 401 int i, fd; 402 size_t fd_num = 0; 403 VhostUserMsg msg_reply; 404 int region_i, msg_i; 405 406 VhostUserMsg msg = { 407 .hdr.request = VHOST_USER_SET_MEM_TABLE, 408 .hdr.flags = VHOST_USER_VERSION, 409 }; 410 411 if (u->region_rb_len < dev->mem->nregions) { 412 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 413 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 414 dev->mem->nregions); 415 memset(&(u->region_rb[u->region_rb_len]), '\0', 416 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 417 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 418 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 419 u->region_rb_len = dev->mem->nregions; 420 } 421 422 for (i = 0; i < dev->mem->nregions; ++i) { 423 struct vhost_memory_region *reg = dev->mem->regions + i; 424 ram_addr_t offset; 425 MemoryRegion *mr; 426 427 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); 428 mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr, 429 &offset); 430 fd = memory_region_get_fd(mr); 431 if (fd > 0) { 432 trace_vhost_user_set_mem_table_withfd(fd_num, mr->name, 433 reg->memory_size, 434 reg->guest_phys_addr, 435 reg->userspace_addr, offset); 436 u->region_rb_offset[i] = offset; 437 u->region_rb[i] = mr->ram_block; 438 msg.payload.memory.regions[fd_num].userspace_addr = 439 reg->userspace_addr; 440 msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; 441 msg.payload.memory.regions[fd_num].guest_phys_addr = 442 reg->guest_phys_addr; 443 msg.payload.memory.regions[fd_num].mmap_offset = offset; 444 assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); 445 fds[fd_num++] = fd; 446 } else { 447 u->region_rb_offset[i] = 0; 448 u->region_rb[i] = NULL; 449 } 450 } 451 452 msg.payload.memory.nregions = fd_num; 453 454 if (!fd_num) { 455 error_report("Failed initializing vhost-user memory map, " 456 "consider using -object memory-backend-file share=on"); 457 return -1; 458 } 459 460 msg.hdr.size = sizeof(msg.payload.memory.nregions); 461 msg.hdr.size += sizeof(msg.payload.memory.padding); 462 msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion); 463 464 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 465 return -1; 466 } 467 468 if (vhost_user_read(dev, &msg_reply) < 0) { 469 return -1; 470 } 471 472 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 473 error_report("%s: Received unexpected msg type." 474 "Expected %d received %d", __func__, 475 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 476 return -1; 477 } 478 /* We're using the same structure, just reusing one of the 479 * fields, so it should be the same size. 480 */ 481 if (msg_reply.hdr.size != msg.hdr.size) { 482 error_report("%s: Unexpected size for postcopy reply " 483 "%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size); 484 return -1; 485 } 486 487 memset(u->postcopy_client_bases, 0, 488 sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS); 489 490 /* They're in the same order as the regions that were sent 491 * but some of the regions were skipped (above) if they 492 * didn't have fd's 493 */ 494 for (msg_i = 0, region_i = 0; 495 region_i < dev->mem->nregions; 496 region_i++) { 497 if (msg_i < fd_num && 498 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 499 dev->mem->regions[region_i].guest_phys_addr) { 500 u->postcopy_client_bases[region_i] = 501 msg_reply.payload.memory.regions[msg_i].userspace_addr; 502 trace_vhost_user_set_mem_table_postcopy( 503 msg_reply.payload.memory.regions[msg_i].userspace_addr, 504 msg.payload.memory.regions[msg_i].userspace_addr, 505 msg_i, region_i); 506 msg_i++; 507 } 508 } 509 if (msg_i != fd_num) { 510 error_report("%s: postcopy reply not fully consumed " 511 "%d vs %zd", 512 __func__, msg_i, fd_num); 513 return -1; 514 } 515 /* Now we've registered this with the postcopy code, we ack to the client, 516 * because now we're in the position to be able to deal with any faults 517 * it generates. 518 */ 519 /* TODO: Use this for failure cases as well with a bad value */ 520 msg.hdr.size = sizeof(msg.payload.u64); 521 msg.payload.u64 = 0; /* OK */ 522 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 523 return -1; 524 } 525 526 return 0; 527 } 528 529 static int vhost_user_set_mem_table(struct vhost_dev *dev, 530 struct vhost_memory *mem) 531 { 532 struct vhost_user *u = dev->opaque; 533 int fds[VHOST_MEMORY_MAX_NREGIONS]; 534 int i, fd; 535 size_t fd_num = 0; 536 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 537 bool reply_supported = virtio_has_feature(dev->protocol_features, 538 VHOST_USER_PROTOCOL_F_REPLY_ACK); 539 540 if (do_postcopy) { 541 /* Postcopy has enough differences that it's best done in it's own 542 * version 543 */ 544 return vhost_user_set_mem_table_postcopy(dev, mem); 545 } 546 547 VhostUserMsg msg = { 548 .hdr.request = VHOST_USER_SET_MEM_TABLE, 549 .hdr.flags = VHOST_USER_VERSION, 550 }; 551 552 if (reply_supported) { 553 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 554 } 555 556 for (i = 0; i < dev->mem->nregions; ++i) { 557 struct vhost_memory_region *reg = dev->mem->regions + i; 558 ram_addr_t offset; 559 MemoryRegion *mr; 560 561 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); 562 mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr, 563 &offset); 564 fd = memory_region_get_fd(mr); 565 if (fd > 0) { 566 if (fd_num == VHOST_MEMORY_MAX_NREGIONS) { 567 error_report("Failed preparing vhost-user memory table msg"); 568 return -1; 569 } 570 msg.payload.memory.regions[fd_num].userspace_addr = 571 reg->userspace_addr; 572 msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; 573 msg.payload.memory.regions[fd_num].guest_phys_addr = 574 reg->guest_phys_addr; 575 msg.payload.memory.regions[fd_num].mmap_offset = offset; 576 fds[fd_num++] = fd; 577 } 578 } 579 580 msg.payload.memory.nregions = fd_num; 581 582 if (!fd_num) { 583 error_report("Failed initializing vhost-user memory map, " 584 "consider using -object memory-backend-file share=on"); 585 return -1; 586 } 587 588 msg.hdr.size = sizeof(msg.payload.memory.nregions); 589 msg.hdr.size += sizeof(msg.payload.memory.padding); 590 msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion); 591 592 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 593 return -1; 594 } 595 596 if (reply_supported) { 597 return process_message_reply(dev, &msg); 598 } 599 600 return 0; 601 } 602 603 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 604 struct vhost_vring_addr *addr) 605 { 606 VhostUserMsg msg = { 607 .hdr.request = VHOST_USER_SET_VRING_ADDR, 608 .hdr.flags = VHOST_USER_VERSION, 609 .payload.addr = *addr, 610 .hdr.size = sizeof(msg.payload.addr), 611 }; 612 613 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 614 return -1; 615 } 616 617 return 0; 618 } 619 620 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 621 struct vhost_vring_state *ring) 622 { 623 bool cross_endian = virtio_has_feature(dev->protocol_features, 624 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 625 VhostUserMsg msg = { 626 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 627 .hdr.flags = VHOST_USER_VERSION, 628 .payload.state = *ring, 629 .hdr.size = sizeof(msg.payload.state), 630 }; 631 632 if (!cross_endian) { 633 error_report("vhost-user trying to send unhandled ioctl"); 634 return -1; 635 } 636 637 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 638 return -1; 639 } 640 641 return 0; 642 } 643 644 static int vhost_set_vring(struct vhost_dev *dev, 645 unsigned long int request, 646 struct vhost_vring_state *ring) 647 { 648 VhostUserMsg msg = { 649 .hdr.request = request, 650 .hdr.flags = VHOST_USER_VERSION, 651 .payload.state = *ring, 652 .hdr.size = sizeof(msg.payload.state), 653 }; 654 655 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 656 return -1; 657 } 658 659 return 0; 660 } 661 662 static int vhost_user_set_vring_num(struct vhost_dev *dev, 663 struct vhost_vring_state *ring) 664 { 665 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 666 } 667 668 static void vhost_user_host_notifier_restore(struct vhost_dev *dev, 669 int queue_idx) 670 { 671 struct vhost_user *u = dev->opaque; 672 VhostUserHostNotifier *n = &u->user->notifier[queue_idx]; 673 VirtIODevice *vdev = dev->vdev; 674 675 if (n->addr && !n->set) { 676 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true); 677 n->set = true; 678 } 679 } 680 681 static void vhost_user_host_notifier_remove(struct vhost_dev *dev, 682 int queue_idx) 683 { 684 struct vhost_user *u = dev->opaque; 685 VhostUserHostNotifier *n = &u->user->notifier[queue_idx]; 686 VirtIODevice *vdev = dev->vdev; 687 688 if (n->addr && n->set) { 689 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); 690 n->set = false; 691 } 692 } 693 694 static int vhost_user_set_vring_base(struct vhost_dev *dev, 695 struct vhost_vring_state *ring) 696 { 697 vhost_user_host_notifier_restore(dev, ring->index); 698 699 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 700 } 701 702 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 703 { 704 int i; 705 706 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 707 return -1; 708 } 709 710 for (i = 0; i < dev->nvqs; ++i) { 711 struct vhost_vring_state state = { 712 .index = dev->vq_index + i, 713 .num = enable, 714 }; 715 716 vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 717 } 718 719 return 0; 720 } 721 722 static int vhost_user_get_vring_base(struct vhost_dev *dev, 723 struct vhost_vring_state *ring) 724 { 725 VhostUserMsg msg = { 726 .hdr.request = VHOST_USER_GET_VRING_BASE, 727 .hdr.flags = VHOST_USER_VERSION, 728 .payload.state = *ring, 729 .hdr.size = sizeof(msg.payload.state), 730 }; 731 732 vhost_user_host_notifier_remove(dev, ring->index); 733 734 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 735 return -1; 736 } 737 738 if (vhost_user_read(dev, &msg) < 0) { 739 return -1; 740 } 741 742 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 743 error_report("Received unexpected msg type. Expected %d received %d", 744 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 745 return -1; 746 } 747 748 if (msg.hdr.size != sizeof(msg.payload.state)) { 749 error_report("Received bad msg size."); 750 return -1; 751 } 752 753 *ring = msg.payload.state; 754 755 return 0; 756 } 757 758 static int vhost_set_vring_file(struct vhost_dev *dev, 759 VhostUserRequest request, 760 struct vhost_vring_file *file) 761 { 762 int fds[VHOST_MEMORY_MAX_NREGIONS]; 763 size_t fd_num = 0; 764 VhostUserMsg msg = { 765 .hdr.request = request, 766 .hdr.flags = VHOST_USER_VERSION, 767 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 768 .hdr.size = sizeof(msg.payload.u64), 769 }; 770 771 if (ioeventfd_enabled() && file->fd > 0) { 772 fds[fd_num++] = file->fd; 773 } else { 774 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 775 } 776 777 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 778 return -1; 779 } 780 781 return 0; 782 } 783 784 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 785 struct vhost_vring_file *file) 786 { 787 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 788 } 789 790 static int vhost_user_set_vring_call(struct vhost_dev *dev, 791 struct vhost_vring_file *file) 792 { 793 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 794 } 795 796 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64) 797 { 798 VhostUserMsg msg = { 799 .hdr.request = request, 800 .hdr.flags = VHOST_USER_VERSION, 801 .payload.u64 = u64, 802 .hdr.size = sizeof(msg.payload.u64), 803 }; 804 805 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 806 return -1; 807 } 808 809 return 0; 810 } 811 812 static int vhost_user_set_features(struct vhost_dev *dev, 813 uint64_t features) 814 { 815 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features); 816 } 817 818 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 819 uint64_t features) 820 { 821 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features); 822 } 823 824 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 825 { 826 VhostUserMsg msg = { 827 .hdr.request = request, 828 .hdr.flags = VHOST_USER_VERSION, 829 }; 830 831 if (vhost_user_one_time_request(request) && dev->vq_index != 0) { 832 return 0; 833 } 834 835 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 836 return -1; 837 } 838 839 if (vhost_user_read(dev, &msg) < 0) { 840 return -1; 841 } 842 843 if (msg.hdr.request != request) { 844 error_report("Received unexpected msg type. Expected %d received %d", 845 request, msg.hdr.request); 846 return -1; 847 } 848 849 if (msg.hdr.size != sizeof(msg.payload.u64)) { 850 error_report("Received bad msg size."); 851 return -1; 852 } 853 854 *u64 = msg.payload.u64; 855 856 return 0; 857 } 858 859 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 860 { 861 return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features); 862 } 863 864 static int vhost_user_set_owner(struct vhost_dev *dev) 865 { 866 VhostUserMsg msg = { 867 .hdr.request = VHOST_USER_SET_OWNER, 868 .hdr.flags = VHOST_USER_VERSION, 869 }; 870 871 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 872 return -1; 873 } 874 875 return 0; 876 } 877 878 static int vhost_user_reset_device(struct vhost_dev *dev) 879 { 880 VhostUserMsg msg = { 881 .hdr.request = VHOST_USER_RESET_OWNER, 882 .hdr.flags = VHOST_USER_VERSION, 883 }; 884 885 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 886 return -1; 887 } 888 889 return 0; 890 } 891 892 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) 893 { 894 int ret = -1; 895 896 if (!dev->config_ops) { 897 return -1; 898 } 899 900 if (dev->config_ops->vhost_dev_config_notifier) { 901 ret = dev->config_ops->vhost_dev_config_notifier(dev); 902 } 903 904 return ret; 905 } 906 907 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, 908 VhostUserVringArea *area, 909 int fd) 910 { 911 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 912 size_t page_size = qemu_real_host_page_size; 913 struct vhost_user *u = dev->opaque; 914 VhostUserState *user = u->user; 915 VirtIODevice *vdev = dev->vdev; 916 VhostUserHostNotifier *n; 917 void *addr; 918 char *name; 919 920 if (!virtio_has_feature(dev->protocol_features, 921 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 922 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 923 return -1; 924 } 925 926 n = &user->notifier[queue_idx]; 927 928 if (n->addr) { 929 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); 930 object_unparent(OBJECT(&n->mr)); 931 munmap(n->addr, page_size); 932 n->addr = NULL; 933 } 934 935 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 936 return 0; 937 } 938 939 /* Sanity check. */ 940 if (area->size != page_size) { 941 return -1; 942 } 943 944 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 945 fd, area->offset); 946 if (addr == MAP_FAILED) { 947 return -1; 948 } 949 950 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 951 user, queue_idx); 952 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 953 page_size, addr); 954 g_free(name); 955 956 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 957 munmap(addr, page_size); 958 return -1; 959 } 960 961 n->addr = addr; 962 n->set = true; 963 964 return 0; 965 } 966 967 static void slave_read(void *opaque) 968 { 969 struct vhost_dev *dev = opaque; 970 struct vhost_user *u = dev->opaque; 971 VhostUserHeader hdr = { 0, }; 972 VhostUserPayload payload = { 0, }; 973 int size, ret = 0; 974 struct iovec iov; 975 struct msghdr msgh; 976 int fd[VHOST_USER_SLAVE_MAX_FDS]; 977 char control[CMSG_SPACE(sizeof(fd))]; 978 struct cmsghdr *cmsg; 979 int i, fdsize = 0; 980 981 memset(&msgh, 0, sizeof(msgh)); 982 msgh.msg_iov = &iov; 983 msgh.msg_iovlen = 1; 984 msgh.msg_control = control; 985 msgh.msg_controllen = sizeof(control); 986 987 memset(fd, -1, sizeof(fd)); 988 989 /* Read header */ 990 iov.iov_base = &hdr; 991 iov.iov_len = VHOST_USER_HDR_SIZE; 992 993 do { 994 size = recvmsg(u->slave_fd, &msgh, 0); 995 } while (size < 0 && (errno == EINTR || errno == EAGAIN)); 996 997 if (size != VHOST_USER_HDR_SIZE) { 998 error_report("Failed to read from slave."); 999 goto err; 1000 } 1001 1002 if (msgh.msg_flags & MSG_CTRUNC) { 1003 error_report("Truncated message."); 1004 goto err; 1005 } 1006 1007 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; 1008 cmsg = CMSG_NXTHDR(&msgh, cmsg)) { 1009 if (cmsg->cmsg_level == SOL_SOCKET && 1010 cmsg->cmsg_type == SCM_RIGHTS) { 1011 fdsize = cmsg->cmsg_len - CMSG_LEN(0); 1012 memcpy(fd, CMSG_DATA(cmsg), fdsize); 1013 break; 1014 } 1015 } 1016 1017 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 1018 error_report("Failed to read msg header." 1019 " Size %d exceeds the maximum %zu.", hdr.size, 1020 VHOST_USER_PAYLOAD_SIZE); 1021 goto err; 1022 } 1023 1024 /* Read payload */ 1025 do { 1026 size = read(u->slave_fd, &payload, hdr.size); 1027 } while (size < 0 && (errno == EINTR || errno == EAGAIN)); 1028 1029 if (size != hdr.size) { 1030 error_report("Failed to read payload from slave."); 1031 goto err; 1032 } 1033 1034 switch (hdr.request) { 1035 case VHOST_USER_SLAVE_IOTLB_MSG: 1036 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1037 break; 1038 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG : 1039 ret = vhost_user_slave_handle_config_change(dev); 1040 break; 1041 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: 1042 ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, 1043 fd[0]); 1044 break; 1045 default: 1046 error_report("Received unexpected msg type."); 1047 ret = -EINVAL; 1048 } 1049 1050 /* Close the remaining file descriptors. */ 1051 for (i = 0; i < fdsize; i++) { 1052 if (fd[i] != -1) { 1053 close(fd[i]); 1054 } 1055 } 1056 1057 /* 1058 * REPLY_ACK feature handling. Other reply types has to be managed 1059 * directly in their request handlers. 1060 */ 1061 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1062 struct iovec iovec[2]; 1063 1064 1065 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 1066 hdr.flags |= VHOST_USER_REPLY_MASK; 1067 1068 payload.u64 = !!ret; 1069 hdr.size = sizeof(payload.u64); 1070 1071 iovec[0].iov_base = &hdr; 1072 iovec[0].iov_len = VHOST_USER_HDR_SIZE; 1073 iovec[1].iov_base = &payload; 1074 iovec[1].iov_len = hdr.size; 1075 1076 do { 1077 size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec)); 1078 } while (size < 0 && (errno == EINTR || errno == EAGAIN)); 1079 1080 if (size != VHOST_USER_HDR_SIZE + hdr.size) { 1081 error_report("Failed to send msg reply to slave."); 1082 goto err; 1083 } 1084 } 1085 1086 return; 1087 1088 err: 1089 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); 1090 close(u->slave_fd); 1091 u->slave_fd = -1; 1092 for (i = 0; i < fdsize; i++) { 1093 if (fd[i] != -1) { 1094 close(fd[i]); 1095 } 1096 } 1097 return; 1098 } 1099 1100 static int vhost_setup_slave_channel(struct vhost_dev *dev) 1101 { 1102 VhostUserMsg msg = { 1103 .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD, 1104 .hdr.flags = VHOST_USER_VERSION, 1105 }; 1106 struct vhost_user *u = dev->opaque; 1107 int sv[2], ret = 0; 1108 bool reply_supported = virtio_has_feature(dev->protocol_features, 1109 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1110 1111 if (!virtio_has_feature(dev->protocol_features, 1112 VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { 1113 return 0; 1114 } 1115 1116 if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1117 error_report("socketpair() failed"); 1118 return -1; 1119 } 1120 1121 u->slave_fd = sv[0]; 1122 qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev); 1123 1124 if (reply_supported) { 1125 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1126 } 1127 1128 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1129 if (ret) { 1130 goto out; 1131 } 1132 1133 if (reply_supported) { 1134 ret = process_message_reply(dev, &msg); 1135 } 1136 1137 out: 1138 close(sv[1]); 1139 if (ret) { 1140 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); 1141 close(u->slave_fd); 1142 u->slave_fd = -1; 1143 } 1144 1145 return ret; 1146 } 1147 1148 #ifdef CONFIG_LINUX 1149 /* 1150 * Called back from the postcopy fault thread when a fault is received on our 1151 * ufd. 1152 * TODO: This is Linux specific 1153 */ 1154 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1155 void *ufd) 1156 { 1157 struct vhost_dev *dev = pcfd->data; 1158 struct vhost_user *u = dev->opaque; 1159 struct uffd_msg *msg = ufd; 1160 uint64_t faultaddr = msg->arg.pagefault.address; 1161 RAMBlock *rb = NULL; 1162 uint64_t rb_offset; 1163 int i; 1164 1165 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1166 dev->mem->nregions); 1167 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1168 trace_vhost_user_postcopy_fault_handler_loop(i, 1169 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1170 if (faultaddr >= u->postcopy_client_bases[i]) { 1171 /* Ofset of the fault address in the vhost region */ 1172 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1173 if (region_offset < dev->mem->regions[i].memory_size) { 1174 rb_offset = region_offset + u->region_rb_offset[i]; 1175 trace_vhost_user_postcopy_fault_handler_found(i, 1176 region_offset, rb_offset); 1177 rb = u->region_rb[i]; 1178 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1179 rb_offset); 1180 } 1181 } 1182 } 1183 error_report("%s: Failed to find region for fault %" PRIx64, 1184 __func__, faultaddr); 1185 return -1; 1186 } 1187 1188 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1189 uint64_t offset) 1190 { 1191 struct vhost_dev *dev = pcfd->data; 1192 struct vhost_user *u = dev->opaque; 1193 int i; 1194 1195 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1196 1197 if (!u) { 1198 return 0; 1199 } 1200 /* Translate the offset into an address in the clients address space */ 1201 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1202 if (u->region_rb[i] == rb && 1203 offset >= u->region_rb_offset[i] && 1204 offset < (u->region_rb_offset[i] + 1205 dev->mem->regions[i].memory_size)) { 1206 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1207 u->postcopy_client_bases[i]; 1208 trace_vhost_user_postcopy_waker_found(client_addr); 1209 return postcopy_wake_shared(pcfd, client_addr, rb); 1210 } 1211 } 1212 1213 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1214 return 0; 1215 } 1216 #endif 1217 1218 /* 1219 * Called at the start of an inbound postcopy on reception of the 1220 * 'advise' command. 1221 */ 1222 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 1223 { 1224 #ifdef CONFIG_LINUX 1225 struct vhost_user *u = dev->opaque; 1226 CharBackend *chr = u->user->chr; 1227 int ufd; 1228 VhostUserMsg msg = { 1229 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 1230 .hdr.flags = VHOST_USER_VERSION, 1231 }; 1232 1233 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1234 error_setg(errp, "Failed to send postcopy_advise to vhost"); 1235 return -1; 1236 } 1237 1238 if (vhost_user_read(dev, &msg) < 0) { 1239 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 1240 return -1; 1241 } 1242 1243 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 1244 error_setg(errp, "Unexpected msg type. Expected %d received %d", 1245 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 1246 return -1; 1247 } 1248 1249 if (msg.hdr.size) { 1250 error_setg(errp, "Received bad msg size."); 1251 return -1; 1252 } 1253 ufd = qemu_chr_fe_get_msgfd(chr); 1254 if (ufd < 0) { 1255 error_setg(errp, "%s: Failed to get ufd", __func__); 1256 return -1; 1257 } 1258 qemu_set_nonblock(ufd); 1259 1260 /* register ufd with userfault thread */ 1261 u->postcopy_fd.fd = ufd; 1262 u->postcopy_fd.data = dev; 1263 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 1264 u->postcopy_fd.waker = vhost_user_postcopy_waker; 1265 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 1266 postcopy_register_shared_ufd(&u->postcopy_fd); 1267 return 0; 1268 #else 1269 error_setg(errp, "Postcopy not supported on non-Linux systems"); 1270 return -1; 1271 #endif 1272 } 1273 1274 /* 1275 * Called at the switch to postcopy on reception of the 'listen' command. 1276 */ 1277 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 1278 { 1279 struct vhost_user *u = dev->opaque; 1280 int ret; 1281 VhostUserMsg msg = { 1282 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 1283 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1284 }; 1285 u->postcopy_listen = true; 1286 trace_vhost_user_postcopy_listen(); 1287 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1288 error_setg(errp, "Failed to send postcopy_listen to vhost"); 1289 return -1; 1290 } 1291 1292 ret = process_message_reply(dev, &msg); 1293 if (ret) { 1294 error_setg(errp, "Failed to receive reply to postcopy_listen"); 1295 return ret; 1296 } 1297 1298 return 0; 1299 } 1300 1301 /* 1302 * Called at the end of postcopy 1303 */ 1304 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 1305 { 1306 VhostUserMsg msg = { 1307 .hdr.request = VHOST_USER_POSTCOPY_END, 1308 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1309 }; 1310 int ret; 1311 struct vhost_user *u = dev->opaque; 1312 1313 trace_vhost_user_postcopy_end_entry(); 1314 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1315 error_setg(errp, "Failed to send postcopy_end to vhost"); 1316 return -1; 1317 } 1318 1319 ret = process_message_reply(dev, &msg); 1320 if (ret) { 1321 error_setg(errp, "Failed to receive reply to postcopy_end"); 1322 return ret; 1323 } 1324 postcopy_unregister_shared_ufd(&u->postcopy_fd); 1325 close(u->postcopy_fd.fd); 1326 u->postcopy_fd.handler = NULL; 1327 1328 trace_vhost_user_postcopy_end_exit(); 1329 1330 return 0; 1331 } 1332 1333 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 1334 void *opaque) 1335 { 1336 struct PostcopyNotifyData *pnd = opaque; 1337 struct vhost_user *u = container_of(notifier, struct vhost_user, 1338 postcopy_notifier); 1339 struct vhost_dev *dev = u->dev; 1340 1341 switch (pnd->reason) { 1342 case POSTCOPY_NOTIFY_PROBE: 1343 if (!virtio_has_feature(dev->protocol_features, 1344 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 1345 /* TODO: Get the device name into this error somehow */ 1346 error_setg(pnd->errp, 1347 "vhost-user backend not capable of postcopy"); 1348 return -ENOENT; 1349 } 1350 break; 1351 1352 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 1353 return vhost_user_postcopy_advise(dev, pnd->errp); 1354 1355 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 1356 return vhost_user_postcopy_listen(dev, pnd->errp); 1357 1358 case POSTCOPY_NOTIFY_INBOUND_END: 1359 return vhost_user_postcopy_end(dev, pnd->errp); 1360 1361 default: 1362 /* We ignore notifications we don't know */ 1363 break; 1364 } 1365 1366 return 0; 1367 } 1368 1369 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) 1370 { 1371 uint64_t features, protocol_features; 1372 struct vhost_user *u; 1373 int err; 1374 1375 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1376 1377 u = g_new0(struct vhost_user, 1); 1378 u->user = opaque; 1379 u->slave_fd = -1; 1380 u->dev = dev; 1381 dev->opaque = u; 1382 1383 err = vhost_user_get_features(dev, &features); 1384 if (err < 0) { 1385 return err; 1386 } 1387 1388 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1389 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 1390 1391 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 1392 &protocol_features); 1393 if (err < 0) { 1394 return err; 1395 } 1396 1397 dev->protocol_features = 1398 protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK; 1399 1400 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1401 /* Don't acknowledge CONFIG feature if device doesn't support it */ 1402 dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 1403 } else if (!(protocol_features & 1404 (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) { 1405 error_report("Device expects VHOST_USER_PROTOCOL_F_CONFIG " 1406 "but backend does not support it."); 1407 return -1; 1408 } 1409 1410 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 1411 if (err < 0) { 1412 return err; 1413 } 1414 1415 /* query the max queues we support if backend supports Multiple Queue */ 1416 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 1417 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 1418 &dev->max_queues); 1419 if (err < 0) { 1420 return err; 1421 } 1422 } 1423 1424 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 1425 !(virtio_has_feature(dev->protocol_features, 1426 VHOST_USER_PROTOCOL_F_SLAVE_REQ) && 1427 virtio_has_feature(dev->protocol_features, 1428 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 1429 error_report("IOMMU support requires reply-ack and " 1430 "slave-req protocol features."); 1431 return -1; 1432 } 1433 } 1434 1435 if (dev->migration_blocker == NULL && 1436 !virtio_has_feature(dev->protocol_features, 1437 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 1438 error_setg(&dev->migration_blocker, 1439 "Migration disabled: vhost-user backend lacks " 1440 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 1441 } 1442 1443 err = vhost_setup_slave_channel(dev); 1444 if (err < 0) { 1445 return err; 1446 } 1447 1448 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 1449 postcopy_add_notifier(&u->postcopy_notifier); 1450 1451 return 0; 1452 } 1453 1454 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 1455 { 1456 struct vhost_user *u; 1457 1458 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1459 1460 u = dev->opaque; 1461 if (u->postcopy_notifier.notify) { 1462 postcopy_remove_notifier(&u->postcopy_notifier); 1463 u->postcopy_notifier.notify = NULL; 1464 } 1465 u->postcopy_listen = false; 1466 if (u->postcopy_fd.handler) { 1467 postcopy_unregister_shared_ufd(&u->postcopy_fd); 1468 close(u->postcopy_fd.fd); 1469 u->postcopy_fd.handler = NULL; 1470 } 1471 if (u->slave_fd >= 0) { 1472 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); 1473 close(u->slave_fd); 1474 u->slave_fd = -1; 1475 } 1476 g_free(u->region_rb); 1477 u->region_rb = NULL; 1478 g_free(u->region_rb_offset); 1479 u->region_rb_offset = NULL; 1480 u->region_rb_len = 0; 1481 g_free(u); 1482 dev->opaque = 0; 1483 1484 return 0; 1485 } 1486 1487 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 1488 { 1489 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 1490 1491 return idx; 1492 } 1493 1494 static int vhost_user_memslots_limit(struct vhost_dev *dev) 1495 { 1496 return VHOST_MEMORY_MAX_NREGIONS; 1497 } 1498 1499 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 1500 { 1501 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1502 1503 return virtio_has_feature(dev->protocol_features, 1504 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 1505 } 1506 1507 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 1508 { 1509 VhostUserMsg msg = { }; 1510 1511 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1512 1513 /* If guest supports GUEST_ANNOUNCE do nothing */ 1514 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 1515 return 0; 1516 } 1517 1518 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 1519 if (virtio_has_feature(dev->protocol_features, 1520 VHOST_USER_PROTOCOL_F_RARP)) { 1521 msg.hdr.request = VHOST_USER_SEND_RARP; 1522 msg.hdr.flags = VHOST_USER_VERSION; 1523 memcpy((char *)&msg.payload.u64, mac_addr, 6); 1524 msg.hdr.size = sizeof(msg.payload.u64); 1525 1526 return vhost_user_write(dev, &msg, NULL, 0); 1527 } 1528 return -1; 1529 } 1530 1531 static bool vhost_user_can_merge(struct vhost_dev *dev, 1532 uint64_t start1, uint64_t size1, 1533 uint64_t start2, uint64_t size2) 1534 { 1535 ram_addr_t offset; 1536 int mfd, rfd; 1537 MemoryRegion *mr; 1538 1539 mr = memory_region_from_host((void *)(uintptr_t)start1, &offset); 1540 mfd = memory_region_get_fd(mr); 1541 1542 mr = memory_region_from_host((void *)(uintptr_t)start2, &offset); 1543 rfd = memory_region_get_fd(mr); 1544 1545 return mfd == rfd; 1546 } 1547 1548 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 1549 { 1550 VhostUserMsg msg; 1551 bool reply_supported = virtio_has_feature(dev->protocol_features, 1552 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1553 1554 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 1555 return 0; 1556 } 1557 1558 msg.hdr.request = VHOST_USER_NET_SET_MTU; 1559 msg.payload.u64 = mtu; 1560 msg.hdr.size = sizeof(msg.payload.u64); 1561 msg.hdr.flags = VHOST_USER_VERSION; 1562 if (reply_supported) { 1563 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1564 } 1565 1566 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1567 return -1; 1568 } 1569 1570 /* If reply_ack supported, slave has to ack specified MTU is valid */ 1571 if (reply_supported) { 1572 return process_message_reply(dev, &msg); 1573 } 1574 1575 return 0; 1576 } 1577 1578 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 1579 struct vhost_iotlb_msg *imsg) 1580 { 1581 VhostUserMsg msg = { 1582 .hdr.request = VHOST_USER_IOTLB_MSG, 1583 .hdr.size = sizeof(msg.payload.iotlb), 1584 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1585 .payload.iotlb = *imsg, 1586 }; 1587 1588 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1589 return -EFAULT; 1590 } 1591 1592 return process_message_reply(dev, &msg); 1593 } 1594 1595 1596 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 1597 { 1598 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 1599 } 1600 1601 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 1602 uint32_t config_len) 1603 { 1604 VhostUserMsg msg = { 1605 .hdr.request = VHOST_USER_GET_CONFIG, 1606 .hdr.flags = VHOST_USER_VERSION, 1607 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 1608 }; 1609 1610 if (!virtio_has_feature(dev->protocol_features, 1611 VHOST_USER_PROTOCOL_F_CONFIG)) { 1612 return -1; 1613 } 1614 1615 if (config_len > VHOST_USER_MAX_CONFIG_SIZE) { 1616 return -1; 1617 } 1618 1619 msg.payload.config.offset = 0; 1620 msg.payload.config.size = config_len; 1621 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1622 return -1; 1623 } 1624 1625 if (vhost_user_read(dev, &msg) < 0) { 1626 return -1; 1627 } 1628 1629 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 1630 error_report("Received unexpected msg type. Expected %d received %d", 1631 VHOST_USER_GET_CONFIG, msg.hdr.request); 1632 return -1; 1633 } 1634 1635 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 1636 error_report("Received bad msg size."); 1637 return -1; 1638 } 1639 1640 memcpy(config, msg.payload.config.region, config_len); 1641 1642 return 0; 1643 } 1644 1645 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 1646 uint32_t offset, uint32_t size, uint32_t flags) 1647 { 1648 uint8_t *p; 1649 bool reply_supported = virtio_has_feature(dev->protocol_features, 1650 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1651 1652 VhostUserMsg msg = { 1653 .hdr.request = VHOST_USER_SET_CONFIG, 1654 .hdr.flags = VHOST_USER_VERSION, 1655 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 1656 }; 1657 1658 if (!virtio_has_feature(dev->protocol_features, 1659 VHOST_USER_PROTOCOL_F_CONFIG)) { 1660 return -1; 1661 } 1662 1663 if (reply_supported) { 1664 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1665 } 1666 1667 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 1668 return -1; 1669 } 1670 1671 msg.payload.config.offset = offset, 1672 msg.payload.config.size = size, 1673 msg.payload.config.flags = flags, 1674 p = msg.payload.config.region; 1675 memcpy(p, data, size); 1676 1677 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1678 return -1; 1679 } 1680 1681 if (reply_supported) { 1682 return process_message_reply(dev, &msg); 1683 } 1684 1685 return 0; 1686 } 1687 1688 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 1689 void *session_info, 1690 uint64_t *session_id) 1691 { 1692 bool crypto_session = virtio_has_feature(dev->protocol_features, 1693 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 1694 CryptoDevBackendSymSessionInfo *sess_info = session_info; 1695 VhostUserMsg msg = { 1696 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 1697 .hdr.flags = VHOST_USER_VERSION, 1698 .hdr.size = sizeof(msg.payload.session), 1699 }; 1700 1701 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1702 1703 if (!crypto_session) { 1704 error_report("vhost-user trying to send unhandled ioctl"); 1705 return -1; 1706 } 1707 1708 memcpy(&msg.payload.session.session_setup_data, sess_info, 1709 sizeof(CryptoDevBackendSymSessionInfo)); 1710 if (sess_info->key_len) { 1711 memcpy(&msg.payload.session.key, sess_info->cipher_key, 1712 sess_info->key_len); 1713 } 1714 if (sess_info->auth_key_len > 0) { 1715 memcpy(&msg.payload.session.auth_key, sess_info->auth_key, 1716 sess_info->auth_key_len); 1717 } 1718 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1719 error_report("vhost_user_write() return -1, create session failed"); 1720 return -1; 1721 } 1722 1723 if (vhost_user_read(dev, &msg) < 0) { 1724 error_report("vhost_user_read() return -1, create session failed"); 1725 return -1; 1726 } 1727 1728 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 1729 error_report("Received unexpected msg type. Expected %d received %d", 1730 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 1731 return -1; 1732 } 1733 1734 if (msg.hdr.size != sizeof(msg.payload.session)) { 1735 error_report("Received bad msg size."); 1736 return -1; 1737 } 1738 1739 if (msg.payload.session.session_id < 0) { 1740 error_report("Bad session id: %" PRId64 "", 1741 msg.payload.session.session_id); 1742 return -1; 1743 } 1744 *session_id = msg.payload.session.session_id; 1745 1746 return 0; 1747 } 1748 1749 static int 1750 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 1751 { 1752 bool crypto_session = virtio_has_feature(dev->protocol_features, 1753 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 1754 VhostUserMsg msg = { 1755 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 1756 .hdr.flags = VHOST_USER_VERSION, 1757 .hdr.size = sizeof(msg.payload.u64), 1758 }; 1759 msg.payload.u64 = session_id; 1760 1761 if (!crypto_session) { 1762 error_report("vhost-user trying to send unhandled ioctl"); 1763 return -1; 1764 } 1765 1766 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1767 error_report("vhost_user_write() return -1, close session failed"); 1768 return -1; 1769 } 1770 1771 return 0; 1772 } 1773 1774 static bool vhost_user_mem_section_filter(struct vhost_dev *dev, 1775 MemoryRegionSection *section) 1776 { 1777 bool result; 1778 1779 result = memory_region_get_fd(section->mr) >= 0; 1780 1781 return result; 1782 } 1783 1784 static int vhost_user_get_inflight_fd(struct vhost_dev *dev, 1785 uint16_t queue_size, 1786 struct vhost_inflight *inflight) 1787 { 1788 void *addr; 1789 int fd; 1790 struct vhost_user *u = dev->opaque; 1791 CharBackend *chr = u->user->chr; 1792 VhostUserMsg msg = { 1793 .hdr.request = VHOST_USER_GET_INFLIGHT_FD, 1794 .hdr.flags = VHOST_USER_VERSION, 1795 .payload.inflight.num_queues = dev->nvqs, 1796 .payload.inflight.queue_size = queue_size, 1797 .hdr.size = sizeof(msg.payload.inflight), 1798 }; 1799 1800 if (!virtio_has_feature(dev->protocol_features, 1801 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 1802 return 0; 1803 } 1804 1805 if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1806 return -1; 1807 } 1808 1809 if (vhost_user_read(dev, &msg) < 0) { 1810 return -1; 1811 } 1812 1813 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) { 1814 error_report("Received unexpected msg type. " 1815 "Expected %d received %d", 1816 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request); 1817 return -1; 1818 } 1819 1820 if (msg.hdr.size != sizeof(msg.payload.inflight)) { 1821 error_report("Received bad msg size."); 1822 return -1; 1823 } 1824 1825 if (!msg.payload.inflight.mmap_size) { 1826 return 0; 1827 } 1828 1829 fd = qemu_chr_fe_get_msgfd(chr); 1830 if (fd < 0) { 1831 error_report("Failed to get mem fd"); 1832 return -1; 1833 } 1834 1835 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE, 1836 MAP_SHARED, fd, msg.payload.inflight.mmap_offset); 1837 1838 if (addr == MAP_FAILED) { 1839 error_report("Failed to mmap mem fd"); 1840 close(fd); 1841 return -1; 1842 } 1843 1844 inflight->addr = addr; 1845 inflight->fd = fd; 1846 inflight->size = msg.payload.inflight.mmap_size; 1847 inflight->offset = msg.payload.inflight.mmap_offset; 1848 inflight->queue_size = queue_size; 1849 1850 return 0; 1851 } 1852 1853 static int vhost_user_set_inflight_fd(struct vhost_dev *dev, 1854 struct vhost_inflight *inflight) 1855 { 1856 VhostUserMsg msg = { 1857 .hdr.request = VHOST_USER_SET_INFLIGHT_FD, 1858 .hdr.flags = VHOST_USER_VERSION, 1859 .payload.inflight.mmap_size = inflight->size, 1860 .payload.inflight.mmap_offset = inflight->offset, 1861 .payload.inflight.num_queues = dev->nvqs, 1862 .payload.inflight.queue_size = inflight->queue_size, 1863 .hdr.size = sizeof(msg.payload.inflight), 1864 }; 1865 1866 if (!virtio_has_feature(dev->protocol_features, 1867 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 1868 return 0; 1869 } 1870 1871 if (vhost_user_write(dev, &msg, &inflight->fd, 1) < 0) { 1872 return -1; 1873 } 1874 1875 return 0; 1876 } 1877 1878 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) 1879 { 1880 if (user->chr) { 1881 error_setg(errp, "Cannot initialize vhost-user state"); 1882 return false; 1883 } 1884 user->chr = chr; 1885 return true; 1886 } 1887 1888 void vhost_user_cleanup(VhostUserState *user) 1889 { 1890 int i; 1891 1892 if (!user->chr) { 1893 return; 1894 } 1895 1896 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { 1897 if (user->notifier[i].addr) { 1898 object_unparent(OBJECT(&user->notifier[i].mr)); 1899 munmap(user->notifier[i].addr, qemu_real_host_page_size); 1900 user->notifier[i].addr = NULL; 1901 } 1902 } 1903 user->chr = NULL; 1904 } 1905 1906 const VhostOps user_ops = { 1907 .backend_type = VHOST_BACKEND_TYPE_USER, 1908 .vhost_backend_init = vhost_user_backend_init, 1909 .vhost_backend_cleanup = vhost_user_backend_cleanup, 1910 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 1911 .vhost_set_log_base = vhost_user_set_log_base, 1912 .vhost_set_mem_table = vhost_user_set_mem_table, 1913 .vhost_set_vring_addr = vhost_user_set_vring_addr, 1914 .vhost_set_vring_endian = vhost_user_set_vring_endian, 1915 .vhost_set_vring_num = vhost_user_set_vring_num, 1916 .vhost_set_vring_base = vhost_user_set_vring_base, 1917 .vhost_get_vring_base = vhost_user_get_vring_base, 1918 .vhost_set_vring_kick = vhost_user_set_vring_kick, 1919 .vhost_set_vring_call = vhost_user_set_vring_call, 1920 .vhost_set_features = vhost_user_set_features, 1921 .vhost_get_features = vhost_user_get_features, 1922 .vhost_set_owner = vhost_user_set_owner, 1923 .vhost_reset_device = vhost_user_reset_device, 1924 .vhost_get_vq_index = vhost_user_get_vq_index, 1925 .vhost_set_vring_enable = vhost_user_set_vring_enable, 1926 .vhost_requires_shm_log = vhost_user_requires_shm_log, 1927 .vhost_migration_done = vhost_user_migration_done, 1928 .vhost_backend_can_merge = vhost_user_can_merge, 1929 .vhost_net_set_mtu = vhost_user_net_set_mtu, 1930 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 1931 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 1932 .vhost_get_config = vhost_user_get_config, 1933 .vhost_set_config = vhost_user_set_config, 1934 .vhost_crypto_create_session = vhost_user_crypto_create_session, 1935 .vhost_crypto_close_session = vhost_user_crypto_close_session, 1936 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, 1937 .vhost_get_inflight_fd = vhost_user_get_inflight_fd, 1938 .vhost_set_inflight_fd = vhost_user_set_inflight_fd, 1939 }; 1940