1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "hw/virtio/vhost.h" 12 #include "hw/virtio/vhost-backend.h" 13 #include "sysemu/char.h" 14 #include "sysemu/kvm.h" 15 #include "qemu/error-report.h" 16 #include "qemu/sockets.h" 17 #include "exec/ram_addr.h" 18 19 #include <fcntl.h> 20 #include <unistd.h> 21 #include <sys/ioctl.h> 22 #include <sys/socket.h> 23 #include <sys/un.h> 24 #include <linux/vhost.h> 25 26 #define VHOST_MEMORY_MAX_NREGIONS 8 27 #define VHOST_USER_F_PROTOCOL_FEATURES 30 28 #define VHOST_USER_PROTOCOL_FEATURE_MASK 0x0ULL 29 30 typedef enum VhostUserRequest { 31 VHOST_USER_NONE = 0, 32 VHOST_USER_GET_FEATURES = 1, 33 VHOST_USER_SET_FEATURES = 2, 34 VHOST_USER_SET_OWNER = 3, 35 VHOST_USER_RESET_DEVICE = 4, 36 VHOST_USER_SET_MEM_TABLE = 5, 37 VHOST_USER_SET_LOG_BASE = 6, 38 VHOST_USER_SET_LOG_FD = 7, 39 VHOST_USER_SET_VRING_NUM = 8, 40 VHOST_USER_SET_VRING_ADDR = 9, 41 VHOST_USER_SET_VRING_BASE = 10, 42 VHOST_USER_GET_VRING_BASE = 11, 43 VHOST_USER_SET_VRING_KICK = 12, 44 VHOST_USER_SET_VRING_CALL = 13, 45 VHOST_USER_SET_VRING_ERR = 14, 46 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 47 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 48 VHOST_USER_MAX 49 } VhostUserRequest; 50 51 typedef struct VhostUserMemoryRegion { 52 uint64_t guest_phys_addr; 53 uint64_t memory_size; 54 uint64_t userspace_addr; 55 uint64_t mmap_offset; 56 } VhostUserMemoryRegion; 57 58 typedef struct VhostUserMemory { 59 uint32_t nregions; 60 uint32_t padding; 61 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; 62 } VhostUserMemory; 63 64 typedef struct VhostUserMsg { 65 VhostUserRequest request; 66 67 #define VHOST_USER_VERSION_MASK (0x3) 68 #define VHOST_USER_REPLY_MASK (0x1<<2) 69 uint32_t flags; 70 uint32_t size; /* the following payload size */ 71 union { 72 #define VHOST_USER_VRING_IDX_MASK (0xff) 73 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) 74 uint64_t u64; 75 struct vhost_vring_state state; 76 struct vhost_vring_addr addr; 77 VhostUserMemory memory; 78 }; 79 } QEMU_PACKED VhostUserMsg; 80 81 static VhostUserMsg m __attribute__ ((unused)); 82 #define VHOST_USER_HDR_SIZE (sizeof(m.request) \ 83 + sizeof(m.flags) \ 84 + sizeof(m.size)) 85 86 #define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE) 87 88 /* The version of the protocol we support */ 89 #define VHOST_USER_VERSION (0x1) 90 91 static bool ioeventfd_enabled(void) 92 { 93 return kvm_enabled() && kvm_eventfds_enabled(); 94 } 95 96 static unsigned long int ioctl_to_vhost_user_request[VHOST_USER_MAX] = { 97 -1, /* VHOST_USER_NONE */ 98 VHOST_GET_FEATURES, /* VHOST_USER_GET_FEATURES */ 99 VHOST_SET_FEATURES, /* VHOST_USER_SET_FEATURES */ 100 VHOST_SET_OWNER, /* VHOST_USER_SET_OWNER */ 101 VHOST_RESET_DEVICE, /* VHOST_USER_RESET_DEVICE */ 102 VHOST_SET_MEM_TABLE, /* VHOST_USER_SET_MEM_TABLE */ 103 VHOST_SET_LOG_BASE, /* VHOST_USER_SET_LOG_BASE */ 104 VHOST_SET_LOG_FD, /* VHOST_USER_SET_LOG_FD */ 105 VHOST_SET_VRING_NUM, /* VHOST_USER_SET_VRING_NUM */ 106 VHOST_SET_VRING_ADDR, /* VHOST_USER_SET_VRING_ADDR */ 107 VHOST_SET_VRING_BASE, /* VHOST_USER_SET_VRING_BASE */ 108 VHOST_GET_VRING_BASE, /* VHOST_USER_GET_VRING_BASE */ 109 VHOST_SET_VRING_KICK, /* VHOST_USER_SET_VRING_KICK */ 110 VHOST_SET_VRING_CALL, /* VHOST_USER_SET_VRING_CALL */ 111 VHOST_SET_VRING_ERR /* VHOST_USER_SET_VRING_ERR */ 112 }; 113 114 static VhostUserRequest vhost_user_request_translate(unsigned long int request) 115 { 116 VhostUserRequest idx; 117 118 for (idx = 0; idx < VHOST_USER_MAX; idx++) { 119 if (ioctl_to_vhost_user_request[idx] == request) { 120 break; 121 } 122 } 123 124 return (idx == VHOST_USER_MAX) ? VHOST_USER_NONE : idx; 125 } 126 127 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 128 { 129 CharDriverState *chr = dev->opaque; 130 uint8_t *p = (uint8_t *) msg; 131 int r, size = VHOST_USER_HDR_SIZE; 132 133 r = qemu_chr_fe_read_all(chr, p, size); 134 if (r != size) { 135 error_report("Failed to read msg header. Read %d instead of %d.", r, 136 size); 137 goto fail; 138 } 139 140 /* validate received flags */ 141 if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 142 error_report("Failed to read msg header." 143 " Flags 0x%x instead of 0x%x.", msg->flags, 144 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 145 goto fail; 146 } 147 148 /* validate message size is sane */ 149 if (msg->size > VHOST_USER_PAYLOAD_SIZE) { 150 error_report("Failed to read msg header." 151 " Size %d exceeds the maximum %zu.", msg->size, 152 VHOST_USER_PAYLOAD_SIZE); 153 goto fail; 154 } 155 156 if (msg->size) { 157 p += VHOST_USER_HDR_SIZE; 158 size = msg->size; 159 r = qemu_chr_fe_read_all(chr, p, size); 160 if (r != size) { 161 error_report("Failed to read msg payload." 162 " Read %d instead of %d.", r, msg->size); 163 goto fail; 164 } 165 } 166 167 return 0; 168 169 fail: 170 return -1; 171 } 172 173 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 174 int *fds, int fd_num) 175 { 176 CharDriverState *chr = dev->opaque; 177 int size = VHOST_USER_HDR_SIZE + msg->size; 178 179 if (fd_num) { 180 qemu_chr_fe_set_msgfds(chr, fds, fd_num); 181 } 182 183 return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ? 184 0 : -1; 185 } 186 187 static int vhost_user_call(struct vhost_dev *dev, unsigned long int request, 188 void *arg) 189 { 190 VhostUserMsg msg; 191 VhostUserRequest msg_request; 192 struct vhost_vring_file *file = 0; 193 int need_reply = 0; 194 int fds[VHOST_MEMORY_MAX_NREGIONS]; 195 int i, fd; 196 size_t fd_num = 0; 197 198 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 199 200 /* only translate vhost ioctl requests */ 201 if (request > VHOST_USER_MAX) { 202 msg_request = vhost_user_request_translate(request); 203 } else { 204 msg_request = request; 205 } 206 207 msg.request = msg_request; 208 msg.flags = VHOST_USER_VERSION; 209 msg.size = 0; 210 211 switch (msg_request) { 212 case VHOST_USER_GET_FEATURES: 213 case VHOST_USER_GET_PROTOCOL_FEATURES: 214 need_reply = 1; 215 break; 216 217 case VHOST_USER_SET_FEATURES: 218 case VHOST_USER_SET_LOG_BASE: 219 case VHOST_USER_SET_PROTOCOL_FEATURES: 220 msg.u64 = *((__u64 *) arg); 221 msg.size = sizeof(m.u64); 222 break; 223 224 case VHOST_USER_SET_OWNER: 225 case VHOST_USER_RESET_DEVICE: 226 break; 227 228 case VHOST_USER_SET_MEM_TABLE: 229 for (i = 0; i < dev->mem->nregions; ++i) { 230 struct vhost_memory_region *reg = dev->mem->regions + i; 231 ram_addr_t ram_addr; 232 233 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); 234 qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, &ram_addr); 235 fd = qemu_get_ram_fd(ram_addr); 236 if (fd > 0) { 237 msg.memory.regions[fd_num].userspace_addr = reg->userspace_addr; 238 msg.memory.regions[fd_num].memory_size = reg->memory_size; 239 msg.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; 240 msg.memory.regions[fd_num].mmap_offset = reg->userspace_addr - 241 (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr); 242 assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); 243 fds[fd_num++] = fd; 244 } 245 } 246 247 msg.memory.nregions = fd_num; 248 249 if (!fd_num) { 250 error_report("Failed initializing vhost-user memory map, " 251 "consider using -object memory-backend-file share=on"); 252 return -1; 253 } 254 255 msg.size = sizeof(m.memory.nregions); 256 msg.size += sizeof(m.memory.padding); 257 msg.size += fd_num * sizeof(VhostUserMemoryRegion); 258 259 break; 260 261 case VHOST_USER_SET_LOG_FD: 262 fds[fd_num++] = *((int *) arg); 263 break; 264 265 case VHOST_USER_SET_VRING_NUM: 266 case VHOST_USER_SET_VRING_BASE: 267 memcpy(&msg.state, arg, sizeof(struct vhost_vring_state)); 268 msg.size = sizeof(m.state); 269 break; 270 271 case VHOST_USER_GET_VRING_BASE: 272 memcpy(&msg.state, arg, sizeof(struct vhost_vring_state)); 273 msg.size = sizeof(m.state); 274 need_reply = 1; 275 break; 276 277 case VHOST_USER_SET_VRING_ADDR: 278 memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr)); 279 msg.size = sizeof(m.addr); 280 break; 281 282 case VHOST_USER_SET_VRING_KICK: 283 case VHOST_USER_SET_VRING_CALL: 284 case VHOST_USER_SET_VRING_ERR: 285 file = arg; 286 msg.u64 = file->index & VHOST_USER_VRING_IDX_MASK; 287 msg.size = sizeof(m.u64); 288 if (ioeventfd_enabled() && file->fd > 0) { 289 fds[fd_num++] = file->fd; 290 } else { 291 msg.u64 |= VHOST_USER_VRING_NOFD_MASK; 292 } 293 break; 294 default: 295 error_report("vhost-user trying to send unhandled ioctl"); 296 return -1; 297 break; 298 } 299 300 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 301 return 0; 302 } 303 304 if (need_reply) { 305 if (vhost_user_read(dev, &msg) < 0) { 306 return 0; 307 } 308 309 if (msg_request != msg.request) { 310 error_report("Received unexpected msg type." 311 " Expected %d received %d", msg_request, msg.request); 312 return -1; 313 } 314 315 switch (msg_request) { 316 case VHOST_USER_GET_FEATURES: 317 case VHOST_USER_GET_PROTOCOL_FEATURES: 318 if (msg.size != sizeof(m.u64)) { 319 error_report("Received bad msg size."); 320 return -1; 321 } 322 *((__u64 *) arg) = msg.u64; 323 break; 324 case VHOST_USER_GET_VRING_BASE: 325 if (msg.size != sizeof(m.state)) { 326 error_report("Received bad msg size."); 327 return -1; 328 } 329 memcpy(arg, &msg.state, sizeof(struct vhost_vring_state)); 330 break; 331 default: 332 error_report("Received unexpected msg type."); 333 return -1; 334 break; 335 } 336 } 337 338 return 0; 339 } 340 341 static int vhost_user_init(struct vhost_dev *dev, void *opaque) 342 { 343 unsigned long long features; 344 int err; 345 346 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 347 348 dev->opaque = opaque; 349 350 err = vhost_user_call(dev, VHOST_USER_GET_FEATURES, &features); 351 if (err < 0) { 352 return err; 353 } 354 355 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 356 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 357 358 err = vhost_user_call(dev, VHOST_USER_GET_PROTOCOL_FEATURES, &features); 359 if (err < 0) { 360 return err; 361 } 362 363 dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK; 364 err = vhost_user_call(dev, VHOST_USER_SET_PROTOCOL_FEATURES, 365 &dev->protocol_features); 366 if (err < 0) { 367 return err; 368 } 369 } 370 371 return 0; 372 } 373 374 static int vhost_user_cleanup(struct vhost_dev *dev) 375 { 376 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 377 378 dev->opaque = 0; 379 380 return 0; 381 } 382 383 const VhostOps user_ops = { 384 .backend_type = VHOST_BACKEND_TYPE_USER, 385 .vhost_call = vhost_user_call, 386 .vhost_backend_init = vhost_user_init, 387 .vhost_backend_cleanup = vhost_user_cleanup 388 }; 389