1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "hw/virtio/vhost.h" 12 #include "hw/virtio/vhost-backend.h" 13 #include "sysemu/char.h" 14 #include "sysemu/kvm.h" 15 #include "qemu/error-report.h" 16 #include "qemu/sockets.h" 17 #include "exec/ram_addr.h" 18 19 #include <fcntl.h> 20 #include <unistd.h> 21 #include <sys/ioctl.h> 22 #include <sys/socket.h> 23 #include <sys/un.h> 24 #include <linux/vhost.h> 25 26 #define VHOST_MEMORY_MAX_NREGIONS 8 27 #define VHOST_USER_F_PROTOCOL_FEATURES 30 28 #define VHOST_USER_PROTOCOL_FEATURE_MASK 0x1ULL 29 30 #define VHOST_USER_PROTOCOL_F_MQ 0 31 32 typedef enum VhostUserRequest { 33 VHOST_USER_NONE = 0, 34 VHOST_USER_GET_FEATURES = 1, 35 VHOST_USER_SET_FEATURES = 2, 36 VHOST_USER_SET_OWNER = 3, 37 VHOST_USER_RESET_DEVICE = 4, 38 VHOST_USER_SET_MEM_TABLE = 5, 39 VHOST_USER_SET_LOG_BASE = 6, 40 VHOST_USER_SET_LOG_FD = 7, 41 VHOST_USER_SET_VRING_NUM = 8, 42 VHOST_USER_SET_VRING_ADDR = 9, 43 VHOST_USER_SET_VRING_BASE = 10, 44 VHOST_USER_GET_VRING_BASE = 11, 45 VHOST_USER_SET_VRING_KICK = 12, 46 VHOST_USER_SET_VRING_CALL = 13, 47 VHOST_USER_SET_VRING_ERR = 14, 48 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 49 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 50 VHOST_USER_GET_QUEUE_NUM = 17, 51 VHOST_USER_MAX 52 } VhostUserRequest; 53 54 typedef struct VhostUserMemoryRegion { 55 uint64_t guest_phys_addr; 56 uint64_t memory_size; 57 uint64_t userspace_addr; 58 uint64_t mmap_offset; 59 } VhostUserMemoryRegion; 60 61 typedef struct VhostUserMemory { 62 uint32_t nregions; 63 uint32_t padding; 64 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; 65 } VhostUserMemory; 66 67 typedef struct VhostUserMsg { 68 VhostUserRequest request; 69 70 #define VHOST_USER_VERSION_MASK (0x3) 71 #define VHOST_USER_REPLY_MASK (0x1<<2) 72 uint32_t flags; 73 uint32_t size; /* the following payload size */ 74 union { 75 #define VHOST_USER_VRING_IDX_MASK (0xff) 76 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) 77 uint64_t u64; 78 struct vhost_vring_state state; 79 struct vhost_vring_addr addr; 80 VhostUserMemory memory; 81 }; 82 } QEMU_PACKED VhostUserMsg; 83 84 static VhostUserMsg m __attribute__ ((unused)); 85 #define VHOST_USER_HDR_SIZE (sizeof(m.request) \ 86 + sizeof(m.flags) \ 87 + sizeof(m.size)) 88 89 #define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE) 90 91 /* The version of the protocol we support */ 92 #define VHOST_USER_VERSION (0x1) 93 94 static bool ioeventfd_enabled(void) 95 { 96 return kvm_enabled() && kvm_eventfds_enabled(); 97 } 98 99 static unsigned long int ioctl_to_vhost_user_request[VHOST_USER_MAX] = { 100 -1, /* VHOST_USER_NONE */ 101 VHOST_GET_FEATURES, /* VHOST_USER_GET_FEATURES */ 102 VHOST_SET_FEATURES, /* VHOST_USER_SET_FEATURES */ 103 VHOST_SET_OWNER, /* VHOST_USER_SET_OWNER */ 104 VHOST_RESET_DEVICE, /* VHOST_USER_RESET_DEVICE */ 105 VHOST_SET_MEM_TABLE, /* VHOST_USER_SET_MEM_TABLE */ 106 VHOST_SET_LOG_BASE, /* VHOST_USER_SET_LOG_BASE */ 107 VHOST_SET_LOG_FD, /* VHOST_USER_SET_LOG_FD */ 108 VHOST_SET_VRING_NUM, /* VHOST_USER_SET_VRING_NUM */ 109 VHOST_SET_VRING_ADDR, /* VHOST_USER_SET_VRING_ADDR */ 110 VHOST_SET_VRING_BASE, /* VHOST_USER_SET_VRING_BASE */ 111 VHOST_GET_VRING_BASE, /* VHOST_USER_GET_VRING_BASE */ 112 VHOST_SET_VRING_KICK, /* VHOST_USER_SET_VRING_KICK */ 113 VHOST_SET_VRING_CALL, /* VHOST_USER_SET_VRING_CALL */ 114 VHOST_SET_VRING_ERR /* VHOST_USER_SET_VRING_ERR */ 115 }; 116 117 static VhostUserRequest vhost_user_request_translate(unsigned long int request) 118 { 119 VhostUserRequest idx; 120 121 for (idx = 0; idx < VHOST_USER_MAX; idx++) { 122 if (ioctl_to_vhost_user_request[idx] == request) { 123 break; 124 } 125 } 126 127 return (idx == VHOST_USER_MAX) ? VHOST_USER_NONE : idx; 128 } 129 130 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 131 { 132 CharDriverState *chr = dev->opaque; 133 uint8_t *p = (uint8_t *) msg; 134 int r, size = VHOST_USER_HDR_SIZE; 135 136 r = qemu_chr_fe_read_all(chr, p, size); 137 if (r != size) { 138 error_report("Failed to read msg header. Read %d instead of %d.", r, 139 size); 140 goto fail; 141 } 142 143 /* validate received flags */ 144 if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 145 error_report("Failed to read msg header." 146 " Flags 0x%x instead of 0x%x.", msg->flags, 147 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 148 goto fail; 149 } 150 151 /* validate message size is sane */ 152 if (msg->size > VHOST_USER_PAYLOAD_SIZE) { 153 error_report("Failed to read msg header." 154 " Size %d exceeds the maximum %zu.", msg->size, 155 VHOST_USER_PAYLOAD_SIZE); 156 goto fail; 157 } 158 159 if (msg->size) { 160 p += VHOST_USER_HDR_SIZE; 161 size = msg->size; 162 r = qemu_chr_fe_read_all(chr, p, size); 163 if (r != size) { 164 error_report("Failed to read msg payload." 165 " Read %d instead of %d.", r, msg->size); 166 goto fail; 167 } 168 } 169 170 return 0; 171 172 fail: 173 return -1; 174 } 175 176 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 177 int *fds, int fd_num) 178 { 179 CharDriverState *chr = dev->opaque; 180 int size = VHOST_USER_HDR_SIZE + msg->size; 181 182 if (fd_num) { 183 qemu_chr_fe_set_msgfds(chr, fds, fd_num); 184 } 185 186 return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ? 187 0 : -1; 188 } 189 190 static int vhost_user_call(struct vhost_dev *dev, unsigned long int request, 191 void *arg) 192 { 193 VhostUserMsg msg; 194 VhostUserRequest msg_request; 195 struct vhost_vring_file *file = 0; 196 int need_reply = 0; 197 int fds[VHOST_MEMORY_MAX_NREGIONS]; 198 int i, fd; 199 size_t fd_num = 0; 200 201 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 202 203 /* only translate vhost ioctl requests */ 204 if (request > VHOST_USER_MAX) { 205 msg_request = vhost_user_request_translate(request); 206 } else { 207 msg_request = request; 208 } 209 210 msg.request = msg_request; 211 msg.flags = VHOST_USER_VERSION; 212 msg.size = 0; 213 214 switch (msg_request) { 215 case VHOST_USER_GET_FEATURES: 216 case VHOST_USER_GET_PROTOCOL_FEATURES: 217 case VHOST_USER_GET_QUEUE_NUM: 218 need_reply = 1; 219 break; 220 221 case VHOST_USER_SET_FEATURES: 222 case VHOST_USER_SET_LOG_BASE: 223 case VHOST_USER_SET_PROTOCOL_FEATURES: 224 msg.u64 = *((__u64 *) arg); 225 msg.size = sizeof(m.u64); 226 break; 227 228 case VHOST_USER_SET_OWNER: 229 case VHOST_USER_RESET_DEVICE: 230 break; 231 232 case VHOST_USER_SET_MEM_TABLE: 233 for (i = 0; i < dev->mem->nregions; ++i) { 234 struct vhost_memory_region *reg = dev->mem->regions + i; 235 ram_addr_t ram_addr; 236 237 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); 238 qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, &ram_addr); 239 fd = qemu_get_ram_fd(ram_addr); 240 if (fd > 0) { 241 msg.memory.regions[fd_num].userspace_addr = reg->userspace_addr; 242 msg.memory.regions[fd_num].memory_size = reg->memory_size; 243 msg.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; 244 msg.memory.regions[fd_num].mmap_offset = reg->userspace_addr - 245 (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr); 246 assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); 247 fds[fd_num++] = fd; 248 } 249 } 250 251 msg.memory.nregions = fd_num; 252 253 if (!fd_num) { 254 error_report("Failed initializing vhost-user memory map, " 255 "consider using -object memory-backend-file share=on"); 256 return -1; 257 } 258 259 msg.size = sizeof(m.memory.nregions); 260 msg.size += sizeof(m.memory.padding); 261 msg.size += fd_num * sizeof(VhostUserMemoryRegion); 262 263 break; 264 265 case VHOST_USER_SET_LOG_FD: 266 fds[fd_num++] = *((int *) arg); 267 break; 268 269 case VHOST_USER_SET_VRING_NUM: 270 case VHOST_USER_SET_VRING_BASE: 271 memcpy(&msg.state, arg, sizeof(struct vhost_vring_state)); 272 msg.size = sizeof(m.state); 273 break; 274 275 case VHOST_USER_GET_VRING_BASE: 276 memcpy(&msg.state, arg, sizeof(struct vhost_vring_state)); 277 msg.size = sizeof(m.state); 278 need_reply = 1; 279 break; 280 281 case VHOST_USER_SET_VRING_ADDR: 282 memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr)); 283 msg.size = sizeof(m.addr); 284 break; 285 286 case VHOST_USER_SET_VRING_KICK: 287 case VHOST_USER_SET_VRING_CALL: 288 case VHOST_USER_SET_VRING_ERR: 289 file = arg; 290 msg.u64 = file->index & VHOST_USER_VRING_IDX_MASK; 291 msg.size = sizeof(m.u64); 292 if (ioeventfd_enabled() && file->fd > 0) { 293 fds[fd_num++] = file->fd; 294 } else { 295 msg.u64 |= VHOST_USER_VRING_NOFD_MASK; 296 } 297 break; 298 default: 299 error_report("vhost-user trying to send unhandled ioctl"); 300 return -1; 301 break; 302 } 303 304 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 305 return 0; 306 } 307 308 if (need_reply) { 309 if (vhost_user_read(dev, &msg) < 0) { 310 return 0; 311 } 312 313 if (msg_request != msg.request) { 314 error_report("Received unexpected msg type." 315 " Expected %d received %d", msg_request, msg.request); 316 return -1; 317 } 318 319 switch (msg_request) { 320 case VHOST_USER_GET_FEATURES: 321 case VHOST_USER_GET_PROTOCOL_FEATURES: 322 case VHOST_USER_GET_QUEUE_NUM: 323 if (msg.size != sizeof(m.u64)) { 324 error_report("Received bad msg size."); 325 return -1; 326 } 327 *((__u64 *) arg) = msg.u64; 328 break; 329 case VHOST_USER_GET_VRING_BASE: 330 if (msg.size != sizeof(m.state)) { 331 error_report("Received bad msg size."); 332 return -1; 333 } 334 memcpy(arg, &msg.state, sizeof(struct vhost_vring_state)); 335 break; 336 default: 337 error_report("Received unexpected msg type."); 338 return -1; 339 break; 340 } 341 } 342 343 return 0; 344 } 345 346 static int vhost_user_init(struct vhost_dev *dev, void *opaque) 347 { 348 unsigned long long features; 349 int err; 350 351 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 352 353 dev->opaque = opaque; 354 355 err = vhost_user_call(dev, VHOST_USER_GET_FEATURES, &features); 356 if (err < 0) { 357 return err; 358 } 359 360 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 361 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 362 363 err = vhost_user_call(dev, VHOST_USER_GET_PROTOCOL_FEATURES, &features); 364 if (err < 0) { 365 return err; 366 } 367 368 dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK; 369 err = vhost_user_call(dev, VHOST_USER_SET_PROTOCOL_FEATURES, 370 &dev->protocol_features); 371 if (err < 0) { 372 return err; 373 } 374 375 /* query the max queues we support if backend supports Multiple Queue */ 376 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 377 err = vhost_user_call(dev, VHOST_USER_GET_QUEUE_NUM, &dev->max_queues); 378 if (err < 0) { 379 return err; 380 } 381 } 382 } 383 384 return 0; 385 } 386 387 static int vhost_user_cleanup(struct vhost_dev *dev) 388 { 389 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 390 391 dev->opaque = 0; 392 393 return 0; 394 } 395 396 const VhostOps user_ops = { 397 .backend_type = VHOST_BACKEND_TYPE_USER, 398 .vhost_call = vhost_user_call, 399 .vhost_backend_init = vhost_user_init, 400 .vhost_backend_cleanup = vhost_user_cleanup 401 }; 402