1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "hw/virtio/vhost.h" 12 #include "hw/virtio/vhost-backend.h" 13 #include "sysemu/char.h" 14 #include "sysemu/kvm.h" 15 #include "qemu/error-report.h" 16 #include "qemu/sockets.h" 17 #include "exec/ram_addr.h" 18 19 #include <fcntl.h> 20 #include <unistd.h> 21 #include <sys/ioctl.h> 22 #include <sys/socket.h> 23 #include <sys/un.h> 24 #include <linux/vhost.h> 25 26 #define VHOST_MEMORY_MAX_NREGIONS 8 27 #define VHOST_USER_F_PROTOCOL_FEATURES 30 28 #define VHOST_USER_PROTOCOL_FEATURE_MASK 0x1ULL 29 30 #define VHOST_USER_PROTOCOL_F_MQ 0 31 32 typedef enum VhostUserRequest { 33 VHOST_USER_NONE = 0, 34 VHOST_USER_GET_FEATURES = 1, 35 VHOST_USER_SET_FEATURES = 2, 36 VHOST_USER_SET_OWNER = 3, 37 VHOST_USER_RESET_DEVICE = 4, 38 VHOST_USER_SET_MEM_TABLE = 5, 39 VHOST_USER_SET_LOG_BASE = 6, 40 VHOST_USER_SET_LOG_FD = 7, 41 VHOST_USER_SET_VRING_NUM = 8, 42 VHOST_USER_SET_VRING_ADDR = 9, 43 VHOST_USER_SET_VRING_BASE = 10, 44 VHOST_USER_GET_VRING_BASE = 11, 45 VHOST_USER_SET_VRING_KICK = 12, 46 VHOST_USER_SET_VRING_CALL = 13, 47 VHOST_USER_SET_VRING_ERR = 14, 48 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 49 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 50 VHOST_USER_GET_QUEUE_NUM = 17, 51 VHOST_USER_MAX 52 } VhostUserRequest; 53 54 typedef struct VhostUserMemoryRegion { 55 uint64_t guest_phys_addr; 56 uint64_t memory_size; 57 uint64_t userspace_addr; 58 uint64_t mmap_offset; 59 } VhostUserMemoryRegion; 60 61 typedef struct VhostUserMemory { 62 uint32_t nregions; 63 uint32_t padding; 64 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; 65 } VhostUserMemory; 66 67 typedef struct VhostUserMsg { 68 VhostUserRequest request; 69 70 #define VHOST_USER_VERSION_MASK (0x3) 71 #define VHOST_USER_REPLY_MASK (0x1<<2) 72 uint32_t flags; 73 uint32_t size; /* the following payload size */ 74 union { 75 #define VHOST_USER_VRING_IDX_MASK (0xff) 76 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) 77 uint64_t u64; 78 struct vhost_vring_state state; 79 struct vhost_vring_addr addr; 80 VhostUserMemory memory; 81 }; 82 } QEMU_PACKED VhostUserMsg; 83 84 static VhostUserMsg m __attribute__ ((unused)); 85 #define VHOST_USER_HDR_SIZE (sizeof(m.request) \ 86 + sizeof(m.flags) \ 87 + sizeof(m.size)) 88 89 #define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE) 90 91 /* The version of the protocol we support */ 92 #define VHOST_USER_VERSION (0x1) 93 94 static bool ioeventfd_enabled(void) 95 { 96 return kvm_enabled() && kvm_eventfds_enabled(); 97 } 98 99 static unsigned long int ioctl_to_vhost_user_request[VHOST_USER_MAX] = { 100 -1, /* VHOST_USER_NONE */ 101 VHOST_GET_FEATURES, /* VHOST_USER_GET_FEATURES */ 102 VHOST_SET_FEATURES, /* VHOST_USER_SET_FEATURES */ 103 VHOST_SET_OWNER, /* VHOST_USER_SET_OWNER */ 104 VHOST_RESET_DEVICE, /* VHOST_USER_RESET_DEVICE */ 105 VHOST_SET_MEM_TABLE, /* VHOST_USER_SET_MEM_TABLE */ 106 VHOST_SET_LOG_BASE, /* VHOST_USER_SET_LOG_BASE */ 107 VHOST_SET_LOG_FD, /* VHOST_USER_SET_LOG_FD */ 108 VHOST_SET_VRING_NUM, /* VHOST_USER_SET_VRING_NUM */ 109 VHOST_SET_VRING_ADDR, /* VHOST_USER_SET_VRING_ADDR */ 110 VHOST_SET_VRING_BASE, /* VHOST_USER_SET_VRING_BASE */ 111 VHOST_GET_VRING_BASE, /* VHOST_USER_GET_VRING_BASE */ 112 VHOST_SET_VRING_KICK, /* VHOST_USER_SET_VRING_KICK */ 113 VHOST_SET_VRING_CALL, /* VHOST_USER_SET_VRING_CALL */ 114 VHOST_SET_VRING_ERR /* VHOST_USER_SET_VRING_ERR */ 115 }; 116 117 static VhostUserRequest vhost_user_request_translate(unsigned long int request) 118 { 119 VhostUserRequest idx; 120 121 for (idx = 0; idx < VHOST_USER_MAX; idx++) { 122 if (ioctl_to_vhost_user_request[idx] == request) { 123 break; 124 } 125 } 126 127 return (idx == VHOST_USER_MAX) ? VHOST_USER_NONE : idx; 128 } 129 130 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 131 { 132 CharDriverState *chr = dev->opaque; 133 uint8_t *p = (uint8_t *) msg; 134 int r, size = VHOST_USER_HDR_SIZE; 135 136 r = qemu_chr_fe_read_all(chr, p, size); 137 if (r != size) { 138 error_report("Failed to read msg header. Read %d instead of %d.", r, 139 size); 140 goto fail; 141 } 142 143 /* validate received flags */ 144 if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 145 error_report("Failed to read msg header." 146 " Flags 0x%x instead of 0x%x.", msg->flags, 147 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 148 goto fail; 149 } 150 151 /* validate message size is sane */ 152 if (msg->size > VHOST_USER_PAYLOAD_SIZE) { 153 error_report("Failed to read msg header." 154 " Size %d exceeds the maximum %zu.", msg->size, 155 VHOST_USER_PAYLOAD_SIZE); 156 goto fail; 157 } 158 159 if (msg->size) { 160 p += VHOST_USER_HDR_SIZE; 161 size = msg->size; 162 r = qemu_chr_fe_read_all(chr, p, size); 163 if (r != size) { 164 error_report("Failed to read msg payload." 165 " Read %d instead of %d.", r, msg->size); 166 goto fail; 167 } 168 } 169 170 return 0; 171 172 fail: 173 return -1; 174 } 175 176 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 177 int *fds, int fd_num) 178 { 179 CharDriverState *chr = dev->opaque; 180 int size = VHOST_USER_HDR_SIZE + msg->size; 181 182 if (fd_num) { 183 qemu_chr_fe_set_msgfds(chr, fds, fd_num); 184 } 185 186 return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ? 187 0 : -1; 188 } 189 190 static bool vhost_user_one_time_request(VhostUserRequest request) 191 { 192 switch (request) { 193 case VHOST_USER_SET_OWNER: 194 case VHOST_USER_RESET_DEVICE: 195 case VHOST_USER_SET_MEM_TABLE: 196 case VHOST_USER_GET_QUEUE_NUM: 197 return true; 198 default: 199 return false; 200 } 201 } 202 203 static int vhost_user_call(struct vhost_dev *dev, unsigned long int request, 204 void *arg) 205 { 206 VhostUserMsg msg; 207 VhostUserRequest msg_request; 208 struct vhost_vring_file *file = 0; 209 int need_reply = 0; 210 int fds[VHOST_MEMORY_MAX_NREGIONS]; 211 int i, fd; 212 size_t fd_num = 0; 213 214 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 215 216 /* only translate vhost ioctl requests */ 217 if (request > VHOST_USER_MAX) { 218 msg_request = vhost_user_request_translate(request); 219 } else { 220 msg_request = request; 221 } 222 223 /* 224 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, 225 * we just need send it once in the first time. For later such 226 * request, we just ignore it. 227 */ 228 if (vhost_user_one_time_request(msg_request) && dev->vq_index != 0) { 229 return 0; 230 } 231 232 msg.request = msg_request; 233 msg.flags = VHOST_USER_VERSION; 234 msg.size = 0; 235 236 switch (msg_request) { 237 case VHOST_USER_GET_FEATURES: 238 case VHOST_USER_GET_PROTOCOL_FEATURES: 239 case VHOST_USER_GET_QUEUE_NUM: 240 need_reply = 1; 241 break; 242 243 case VHOST_USER_SET_FEATURES: 244 case VHOST_USER_SET_LOG_BASE: 245 case VHOST_USER_SET_PROTOCOL_FEATURES: 246 msg.u64 = *((__u64 *) arg); 247 msg.size = sizeof(m.u64); 248 break; 249 250 case VHOST_USER_SET_OWNER: 251 case VHOST_USER_RESET_DEVICE: 252 break; 253 254 case VHOST_USER_SET_MEM_TABLE: 255 for (i = 0; i < dev->mem->nregions; ++i) { 256 struct vhost_memory_region *reg = dev->mem->regions + i; 257 ram_addr_t ram_addr; 258 259 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); 260 qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, &ram_addr); 261 fd = qemu_get_ram_fd(ram_addr); 262 if (fd > 0) { 263 msg.memory.regions[fd_num].userspace_addr = reg->userspace_addr; 264 msg.memory.regions[fd_num].memory_size = reg->memory_size; 265 msg.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; 266 msg.memory.regions[fd_num].mmap_offset = reg->userspace_addr - 267 (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr); 268 assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); 269 fds[fd_num++] = fd; 270 } 271 } 272 273 msg.memory.nregions = fd_num; 274 275 if (!fd_num) { 276 error_report("Failed initializing vhost-user memory map, " 277 "consider using -object memory-backend-file share=on"); 278 return -1; 279 } 280 281 msg.size = sizeof(m.memory.nregions); 282 msg.size += sizeof(m.memory.padding); 283 msg.size += fd_num * sizeof(VhostUserMemoryRegion); 284 285 break; 286 287 case VHOST_USER_SET_LOG_FD: 288 fds[fd_num++] = *((int *) arg); 289 break; 290 291 case VHOST_USER_SET_VRING_NUM: 292 case VHOST_USER_SET_VRING_BASE: 293 memcpy(&msg.state, arg, sizeof(struct vhost_vring_state)); 294 msg.size = sizeof(m.state); 295 break; 296 297 case VHOST_USER_GET_VRING_BASE: 298 memcpy(&msg.state, arg, sizeof(struct vhost_vring_state)); 299 msg.size = sizeof(m.state); 300 need_reply = 1; 301 break; 302 303 case VHOST_USER_SET_VRING_ADDR: 304 memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr)); 305 msg.size = sizeof(m.addr); 306 break; 307 308 case VHOST_USER_SET_VRING_KICK: 309 case VHOST_USER_SET_VRING_CALL: 310 case VHOST_USER_SET_VRING_ERR: 311 file = arg; 312 msg.u64 = file->index & VHOST_USER_VRING_IDX_MASK; 313 msg.size = sizeof(m.u64); 314 if (ioeventfd_enabled() && file->fd > 0) { 315 fds[fd_num++] = file->fd; 316 } else { 317 msg.u64 |= VHOST_USER_VRING_NOFD_MASK; 318 } 319 break; 320 default: 321 error_report("vhost-user trying to send unhandled ioctl"); 322 return -1; 323 break; 324 } 325 326 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 327 return 0; 328 } 329 330 if (need_reply) { 331 if (vhost_user_read(dev, &msg) < 0) { 332 return 0; 333 } 334 335 if (msg_request != msg.request) { 336 error_report("Received unexpected msg type." 337 " Expected %d received %d", msg_request, msg.request); 338 return -1; 339 } 340 341 switch (msg_request) { 342 case VHOST_USER_GET_FEATURES: 343 case VHOST_USER_GET_PROTOCOL_FEATURES: 344 case VHOST_USER_GET_QUEUE_NUM: 345 if (msg.size != sizeof(m.u64)) { 346 error_report("Received bad msg size."); 347 return -1; 348 } 349 *((__u64 *) arg) = msg.u64; 350 break; 351 case VHOST_USER_GET_VRING_BASE: 352 if (msg.size != sizeof(m.state)) { 353 error_report("Received bad msg size."); 354 return -1; 355 } 356 memcpy(arg, &msg.state, sizeof(struct vhost_vring_state)); 357 break; 358 default: 359 error_report("Received unexpected msg type."); 360 return -1; 361 break; 362 } 363 } 364 365 return 0; 366 } 367 368 static int vhost_user_init(struct vhost_dev *dev, void *opaque) 369 { 370 unsigned long long features; 371 int err; 372 373 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 374 375 dev->opaque = opaque; 376 377 err = vhost_user_call(dev, VHOST_USER_GET_FEATURES, &features); 378 if (err < 0) { 379 return err; 380 } 381 382 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 383 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 384 385 err = vhost_user_call(dev, VHOST_USER_GET_PROTOCOL_FEATURES, &features); 386 if (err < 0) { 387 return err; 388 } 389 390 dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK; 391 err = vhost_user_call(dev, VHOST_USER_SET_PROTOCOL_FEATURES, 392 &dev->protocol_features); 393 if (err < 0) { 394 return err; 395 } 396 397 /* query the max queues we support if backend supports Multiple Queue */ 398 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 399 err = vhost_user_call(dev, VHOST_USER_GET_QUEUE_NUM, &dev->max_queues); 400 if (err < 0) { 401 return err; 402 } 403 } 404 } 405 406 return 0; 407 } 408 409 static int vhost_user_cleanup(struct vhost_dev *dev) 410 { 411 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 412 413 dev->opaque = 0; 414 415 return 0; 416 } 417 418 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 419 { 420 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 421 422 return idx; 423 } 424 425 const VhostOps user_ops = { 426 .backend_type = VHOST_BACKEND_TYPE_USER, 427 .vhost_call = vhost_user_call, 428 .vhost_backend_init = vhost_user_init, 429 .vhost_backend_cleanup = vhost_user_cleanup, 430 .vhost_backend_get_vq_index = vhost_user_get_vq_index, 431 }; 432