1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "hw/virtio/vhost.h" 12 #include "hw/virtio/vhost-backend.h" 13 #include "sysemu/char.h" 14 #include "sysemu/kvm.h" 15 #include "qemu/error-report.h" 16 #include "qemu/sockets.h" 17 #include "exec/ram_addr.h" 18 19 #include <fcntl.h> 20 #include <unistd.h> 21 #include <sys/ioctl.h> 22 #include <sys/socket.h> 23 #include <sys/un.h> 24 #include <linux/vhost.h> 25 26 #define VHOST_MEMORY_MAX_NREGIONS 8 27 #define VHOST_USER_F_PROTOCOL_FEATURES 30 28 #define VHOST_USER_PROTOCOL_FEATURE_MASK 0x1ULL 29 30 #define VHOST_USER_PROTOCOL_F_MQ 0 31 32 typedef enum VhostUserRequest { 33 VHOST_USER_NONE = 0, 34 VHOST_USER_GET_FEATURES = 1, 35 VHOST_USER_SET_FEATURES = 2, 36 VHOST_USER_SET_OWNER = 3, 37 VHOST_USER_RESET_DEVICE = 4, 38 VHOST_USER_SET_MEM_TABLE = 5, 39 VHOST_USER_SET_LOG_BASE = 6, 40 VHOST_USER_SET_LOG_FD = 7, 41 VHOST_USER_SET_VRING_NUM = 8, 42 VHOST_USER_SET_VRING_ADDR = 9, 43 VHOST_USER_SET_VRING_BASE = 10, 44 VHOST_USER_GET_VRING_BASE = 11, 45 VHOST_USER_SET_VRING_KICK = 12, 46 VHOST_USER_SET_VRING_CALL = 13, 47 VHOST_USER_SET_VRING_ERR = 14, 48 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 49 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 50 VHOST_USER_GET_QUEUE_NUM = 17, 51 VHOST_USER_SET_VRING_ENABLE = 18, 52 VHOST_USER_MAX 53 } VhostUserRequest; 54 55 typedef struct VhostUserMemoryRegion { 56 uint64_t guest_phys_addr; 57 uint64_t memory_size; 58 uint64_t userspace_addr; 59 uint64_t mmap_offset; 60 } VhostUserMemoryRegion; 61 62 typedef struct VhostUserMemory { 63 uint32_t nregions; 64 uint32_t padding; 65 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; 66 } VhostUserMemory; 67 68 typedef struct VhostUserMsg { 69 VhostUserRequest request; 70 71 #define VHOST_USER_VERSION_MASK (0x3) 72 #define VHOST_USER_REPLY_MASK (0x1<<2) 73 uint32_t flags; 74 uint32_t size; /* the following payload size */ 75 union { 76 #define VHOST_USER_VRING_IDX_MASK (0xff) 77 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) 78 uint64_t u64; 79 struct vhost_vring_state state; 80 struct vhost_vring_addr addr; 81 VhostUserMemory memory; 82 }; 83 } QEMU_PACKED VhostUserMsg; 84 85 static VhostUserMsg m __attribute__ ((unused)); 86 #define VHOST_USER_HDR_SIZE (sizeof(m.request) \ 87 + sizeof(m.flags) \ 88 + sizeof(m.size)) 89 90 #define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE) 91 92 /* The version of the protocol we support */ 93 #define VHOST_USER_VERSION (0x1) 94 95 static bool ioeventfd_enabled(void) 96 { 97 return kvm_enabled() && kvm_eventfds_enabled(); 98 } 99 100 static unsigned long int ioctl_to_vhost_user_request[VHOST_USER_MAX] = { 101 -1, /* VHOST_USER_NONE */ 102 VHOST_GET_FEATURES, /* VHOST_USER_GET_FEATURES */ 103 VHOST_SET_FEATURES, /* VHOST_USER_SET_FEATURES */ 104 VHOST_SET_OWNER, /* VHOST_USER_SET_OWNER */ 105 VHOST_RESET_DEVICE, /* VHOST_USER_RESET_DEVICE */ 106 VHOST_SET_MEM_TABLE, /* VHOST_USER_SET_MEM_TABLE */ 107 VHOST_SET_LOG_BASE, /* VHOST_USER_SET_LOG_BASE */ 108 VHOST_SET_LOG_FD, /* VHOST_USER_SET_LOG_FD */ 109 VHOST_SET_VRING_NUM, /* VHOST_USER_SET_VRING_NUM */ 110 VHOST_SET_VRING_ADDR, /* VHOST_USER_SET_VRING_ADDR */ 111 VHOST_SET_VRING_BASE, /* VHOST_USER_SET_VRING_BASE */ 112 VHOST_GET_VRING_BASE, /* VHOST_USER_GET_VRING_BASE */ 113 VHOST_SET_VRING_KICK, /* VHOST_USER_SET_VRING_KICK */ 114 VHOST_SET_VRING_CALL, /* VHOST_USER_SET_VRING_CALL */ 115 VHOST_SET_VRING_ERR /* VHOST_USER_SET_VRING_ERR */ 116 }; 117 118 static VhostUserRequest vhost_user_request_translate(unsigned long int request) 119 { 120 VhostUserRequest idx; 121 122 for (idx = 0; idx < VHOST_USER_MAX; idx++) { 123 if (ioctl_to_vhost_user_request[idx] == request) { 124 break; 125 } 126 } 127 128 return (idx == VHOST_USER_MAX) ? VHOST_USER_NONE : idx; 129 } 130 131 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 132 { 133 CharDriverState *chr = dev->opaque; 134 uint8_t *p = (uint8_t *) msg; 135 int r, size = VHOST_USER_HDR_SIZE; 136 137 r = qemu_chr_fe_read_all(chr, p, size); 138 if (r != size) { 139 error_report("Failed to read msg header. Read %d instead of %d.", r, 140 size); 141 goto fail; 142 } 143 144 /* validate received flags */ 145 if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 146 error_report("Failed to read msg header." 147 " Flags 0x%x instead of 0x%x.", msg->flags, 148 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 149 goto fail; 150 } 151 152 /* validate message size is sane */ 153 if (msg->size > VHOST_USER_PAYLOAD_SIZE) { 154 error_report("Failed to read msg header." 155 " Size %d exceeds the maximum %zu.", msg->size, 156 VHOST_USER_PAYLOAD_SIZE); 157 goto fail; 158 } 159 160 if (msg->size) { 161 p += VHOST_USER_HDR_SIZE; 162 size = msg->size; 163 r = qemu_chr_fe_read_all(chr, p, size); 164 if (r != size) { 165 error_report("Failed to read msg payload." 166 " Read %d instead of %d.", r, msg->size); 167 goto fail; 168 } 169 } 170 171 return 0; 172 173 fail: 174 return -1; 175 } 176 177 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 178 int *fds, int fd_num) 179 { 180 CharDriverState *chr = dev->opaque; 181 int size = VHOST_USER_HDR_SIZE + msg->size; 182 183 if (fd_num) { 184 qemu_chr_fe_set_msgfds(chr, fds, fd_num); 185 } 186 187 return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ? 188 0 : -1; 189 } 190 191 static bool vhost_user_one_time_request(VhostUserRequest request) 192 { 193 switch (request) { 194 case VHOST_USER_SET_OWNER: 195 case VHOST_USER_RESET_DEVICE: 196 case VHOST_USER_SET_MEM_TABLE: 197 case VHOST_USER_GET_QUEUE_NUM: 198 return true; 199 default: 200 return false; 201 } 202 } 203 204 static int vhost_user_call(struct vhost_dev *dev, unsigned long int request, 205 void *arg) 206 { 207 VhostUserMsg msg; 208 VhostUserRequest msg_request; 209 struct vhost_vring_file *file = 0; 210 int need_reply = 0; 211 int fds[VHOST_MEMORY_MAX_NREGIONS]; 212 int i, fd; 213 size_t fd_num = 0; 214 215 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 216 217 /* only translate vhost ioctl requests */ 218 if (request > VHOST_USER_MAX) { 219 msg_request = vhost_user_request_translate(request); 220 } else { 221 msg_request = request; 222 } 223 224 /* 225 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 226 * we just need send it once in the first time. For later such 227 * request, we just ignore it. 228 */ 229 if (vhost_user_one_time_request(msg_request) && dev->vq_index != 0) { 230 return 0; 231 } 232 233 msg.request = msg_request; 234 msg.flags = VHOST_USER_VERSION; 235 msg.size = 0; 236 237 switch (msg_request) { 238 case VHOST_USER_GET_FEATURES: 239 case VHOST_USER_GET_PROTOCOL_FEATURES: 240 case VHOST_USER_GET_QUEUE_NUM: 241 need_reply = 1; 242 break; 243 244 case VHOST_USER_SET_FEATURES: 245 case VHOST_USER_SET_LOG_BASE: 246 case VHOST_USER_SET_PROTOCOL_FEATURES: 247 msg.u64 = *((__u64 *) arg); 248 msg.size = sizeof(m.u64); 249 break; 250 251 case VHOST_USER_SET_OWNER: 252 case VHOST_USER_RESET_DEVICE: 253 break; 254 255 case VHOST_USER_SET_MEM_TABLE: 256 for (i = 0; i < dev->mem->nregions; ++i) { 257 struct vhost_memory_region *reg = dev->mem->regions + i; 258 ram_addr_t ram_addr; 259 260 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); 261 qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, &ram_addr); 262 fd = qemu_get_ram_fd(ram_addr); 263 if (fd > 0) { 264 msg.memory.regions[fd_num].userspace_addr = reg->userspace_addr; 265 msg.memory.regions[fd_num].memory_size = reg->memory_size; 266 msg.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; 267 msg.memory.regions[fd_num].mmap_offset = reg->userspace_addr - 268 (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr); 269 assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); 270 fds[fd_num++] = fd; 271 } 272 } 273 274 msg.memory.nregions = fd_num; 275 276 if (!fd_num) { 277 error_report("Failed initializing vhost-user memory map, " 278 "consider using -object memory-backend-file share=on"); 279 return -1; 280 } 281 282 msg.size = sizeof(m.memory.nregions); 283 msg.size += sizeof(m.memory.padding); 284 msg.size += fd_num * sizeof(VhostUserMemoryRegion); 285 286 break; 287 288 case VHOST_USER_SET_LOG_FD: 289 fds[fd_num++] = *((int *) arg); 290 break; 291 292 case VHOST_USER_SET_VRING_NUM: 293 case VHOST_USER_SET_VRING_BASE: 294 case VHOST_USER_SET_VRING_ENABLE: 295 memcpy(&msg.state, arg, sizeof(struct vhost_vring_state)); 296 msg.size = sizeof(m.state); 297 break; 298 299 case VHOST_USER_GET_VRING_BASE: 300 memcpy(&msg.state, arg, sizeof(struct vhost_vring_state)); 301 msg.size = sizeof(m.state); 302 need_reply = 1; 303 break; 304 305 case VHOST_USER_SET_VRING_ADDR: 306 memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr)); 307 msg.size = sizeof(m.addr); 308 break; 309 310 case VHOST_USER_SET_VRING_KICK: 311 case VHOST_USER_SET_VRING_CALL: 312 case VHOST_USER_SET_VRING_ERR: 313 file = arg; 314 msg.u64 = file->index & VHOST_USER_VRING_IDX_MASK; 315 msg.size = sizeof(m.u64); 316 if (ioeventfd_enabled() && file->fd > 0) { 317 fds[fd_num++] = file->fd; 318 } else { 319 msg.u64 |= VHOST_USER_VRING_NOFD_MASK; 320 } 321 break; 322 default: 323 error_report("vhost-user trying to send unhandled ioctl"); 324 return -1; 325 break; 326 } 327 328 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 329 return 0; 330 } 331 332 if (need_reply) { 333 if (vhost_user_read(dev, &msg) < 0) { 334 return 0; 335 } 336 337 if (msg_request != msg.request) { 338 error_report("Received unexpected msg type." 339 " Expected %d received %d", msg_request, msg.request); 340 return -1; 341 } 342 343 switch (msg_request) { 344 case VHOST_USER_GET_FEATURES: 345 case VHOST_USER_GET_PROTOCOL_FEATURES: 346 case VHOST_USER_GET_QUEUE_NUM: 347 if (msg.size != sizeof(m.u64)) { 348 error_report("Received bad msg size."); 349 return -1; 350 } 351 *((__u64 *) arg) = msg.u64; 352 break; 353 case VHOST_USER_GET_VRING_BASE: 354 if (msg.size != sizeof(m.state)) { 355 error_report("Received bad msg size."); 356 return -1; 357 } 358 memcpy(arg, &msg.state, sizeof(struct vhost_vring_state)); 359 break; 360 default: 361 error_report("Received unexpected msg type."); 362 return -1; 363 break; 364 } 365 } 366 367 return 0; 368 } 369 370 static int vhost_user_init(struct vhost_dev *dev, void *opaque) 371 { 372 unsigned long long features; 373 int err; 374 375 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 376 377 dev->opaque = opaque; 378 379 err = vhost_user_call(dev, VHOST_USER_GET_FEATURES, &features); 380 if (err < 0) { 381 return err; 382 } 383 384 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 385 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 386 387 err = vhost_user_call(dev, VHOST_USER_GET_PROTOCOL_FEATURES, &features); 388 if (err < 0) { 389 return err; 390 } 391 392 dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK; 393 err = vhost_user_call(dev, VHOST_USER_SET_PROTOCOL_FEATURES, 394 &dev->protocol_features); 395 if (err < 0) { 396 return err; 397 } 398 399 /* query the max queues we support if backend supports Multiple Queue */ 400 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 401 err = vhost_user_call(dev, VHOST_USER_GET_QUEUE_NUM, &dev->max_queues); 402 if (err < 0) { 403 return err; 404 } 405 } 406 } 407 408 return 0; 409 } 410 411 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 412 { 413 struct vhost_vring_state state = { 414 .index = dev->vq_index, 415 .num = enable, 416 }; 417 418 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 419 420 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ))) { 421 return -1; 422 } 423 424 return vhost_user_call(dev, VHOST_USER_SET_VRING_ENABLE, &state); 425 } 426 427 static int vhost_user_cleanup(struct vhost_dev *dev) 428 { 429 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 430 431 dev->opaque = 0; 432 433 return 0; 434 } 435 436 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 437 { 438 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 439 440 return idx; 441 } 442 443 static int vhost_user_memslots_limit(struct vhost_dev *dev) 444 { 445 return VHOST_MEMORY_MAX_NREGIONS; 446 } 447 448 const VhostOps user_ops = { 449 .backend_type = VHOST_BACKEND_TYPE_USER, 450 .vhost_call = vhost_user_call, 451 .vhost_backend_init = vhost_user_init, 452 .vhost_backend_cleanup = vhost_user_cleanup, 453 .vhost_backend_get_vq_index = vhost_user_get_vq_index, 454 .vhost_backend_set_vring_enable = vhost_user_set_vring_enable, 455 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 456 }; 457