1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "hw/virtio/vhost.h" 12 #include "hw/virtio/vhost-backend.h" 13 #include "sysemu/char.h" 14 #include "sysemu/kvm.h" 15 #include "qemu/error-report.h" 16 #include "qemu/sockets.h" 17 #include "exec/ram_addr.h" 18 19 #include <fcntl.h> 20 #include <unistd.h> 21 #include <sys/ioctl.h> 22 #include <sys/socket.h> 23 #include <sys/un.h> 24 #include <linux/vhost.h> 25 26 #define VHOST_MEMORY_MAX_NREGIONS 8 27 28 typedef enum VhostUserRequest { 29 VHOST_USER_NONE = 0, 30 VHOST_USER_GET_FEATURES = 1, 31 VHOST_USER_SET_FEATURES = 2, 32 VHOST_USER_SET_OWNER = 3, 33 VHOST_USER_RESET_OWNER = 4, 34 VHOST_USER_SET_MEM_TABLE = 5, 35 VHOST_USER_SET_LOG_BASE = 6, 36 VHOST_USER_SET_LOG_FD = 7, 37 VHOST_USER_SET_VRING_NUM = 8, 38 VHOST_USER_SET_VRING_ADDR = 9, 39 VHOST_USER_SET_VRING_BASE = 10, 40 VHOST_USER_GET_VRING_BASE = 11, 41 VHOST_USER_SET_VRING_KICK = 12, 42 VHOST_USER_SET_VRING_CALL = 13, 43 VHOST_USER_SET_VRING_ERR = 14, 44 VHOST_USER_MAX 45 } VhostUserRequest; 46 47 typedef struct VhostUserMemoryRegion { 48 uint64_t guest_phys_addr; 49 uint64_t memory_size; 50 uint64_t userspace_addr; 51 uint64_t mmap_offset; 52 } VhostUserMemoryRegion; 53 54 typedef struct VhostUserMemory { 55 uint32_t nregions; 56 uint32_t padding; 57 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; 58 } VhostUserMemory; 59 60 typedef struct VhostUserMsg { 61 VhostUserRequest request; 62 63 #define VHOST_USER_VERSION_MASK (0x3) 64 #define VHOST_USER_REPLY_MASK (0x1<<2) 65 uint32_t flags; 66 uint32_t size; /* the following payload size */ 67 union { 68 #define VHOST_USER_VRING_IDX_MASK (0xff) 69 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) 70 uint64_t u64; 71 struct vhost_vring_state state; 72 struct vhost_vring_addr addr; 73 VhostUserMemory memory; 74 }; 75 } QEMU_PACKED VhostUserMsg; 76 77 static VhostUserMsg m __attribute__ ((unused)); 78 #define VHOST_USER_HDR_SIZE (sizeof(m.request) \ 79 + sizeof(m.flags) \ 80 + sizeof(m.size)) 81 82 #define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE) 83 84 /* The version of the protocol we support */ 85 #define VHOST_USER_VERSION (0x1) 86 87 static bool ioeventfd_enabled(void) 88 { 89 return kvm_enabled() && kvm_eventfds_enabled(); 90 } 91 92 static unsigned long int ioctl_to_vhost_user_request[VHOST_USER_MAX] = { 93 -1, /* VHOST_USER_NONE */ 94 VHOST_GET_FEATURES, /* VHOST_USER_GET_FEATURES */ 95 VHOST_SET_FEATURES, /* VHOST_USER_SET_FEATURES */ 96 VHOST_SET_OWNER, /* VHOST_USER_SET_OWNER */ 97 VHOST_RESET_OWNER, /* VHOST_USER_RESET_OWNER */ 98 VHOST_SET_MEM_TABLE, /* VHOST_USER_SET_MEM_TABLE */ 99 VHOST_SET_LOG_BASE, /* VHOST_USER_SET_LOG_BASE */ 100 VHOST_SET_LOG_FD, /* VHOST_USER_SET_LOG_FD */ 101 VHOST_SET_VRING_NUM, /* VHOST_USER_SET_VRING_NUM */ 102 VHOST_SET_VRING_ADDR, /* VHOST_USER_SET_VRING_ADDR */ 103 VHOST_SET_VRING_BASE, /* VHOST_USER_SET_VRING_BASE */ 104 VHOST_GET_VRING_BASE, /* VHOST_USER_GET_VRING_BASE */ 105 VHOST_SET_VRING_KICK, /* VHOST_USER_SET_VRING_KICK */ 106 VHOST_SET_VRING_CALL, /* VHOST_USER_SET_VRING_CALL */ 107 VHOST_SET_VRING_ERR /* VHOST_USER_SET_VRING_ERR */ 108 }; 109 110 static VhostUserRequest vhost_user_request_translate(unsigned long int request) 111 { 112 VhostUserRequest idx; 113 114 for (idx = 0; idx < VHOST_USER_MAX; idx++) { 115 if (ioctl_to_vhost_user_request[idx] == request) { 116 break; 117 } 118 } 119 120 return (idx == VHOST_USER_MAX) ? VHOST_USER_NONE : idx; 121 } 122 123 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 124 { 125 CharDriverState *chr = dev->opaque; 126 uint8_t *p = (uint8_t *) msg; 127 int r, size = VHOST_USER_HDR_SIZE; 128 129 r = qemu_chr_fe_read_all(chr, p, size); 130 if (r != size) { 131 error_report("Failed to read msg header. Read %d instead of %d.", r, 132 size); 133 goto fail; 134 } 135 136 /* validate received flags */ 137 if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 138 error_report("Failed to read msg header." 139 " Flags 0x%x instead of 0x%x.", msg->flags, 140 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 141 goto fail; 142 } 143 144 /* validate message size is sane */ 145 if (msg->size > VHOST_USER_PAYLOAD_SIZE) { 146 error_report("Failed to read msg header." 147 " Size %d exceeds the maximum %zu.", msg->size, 148 VHOST_USER_PAYLOAD_SIZE); 149 goto fail; 150 } 151 152 if (msg->size) { 153 p += VHOST_USER_HDR_SIZE; 154 size = msg->size; 155 r = qemu_chr_fe_read_all(chr, p, size); 156 if (r != size) { 157 error_report("Failed to read msg payload." 158 " Read %d instead of %d.", r, msg->size); 159 goto fail; 160 } 161 } 162 163 return 0; 164 165 fail: 166 return -1; 167 } 168 169 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 170 int *fds, int fd_num) 171 { 172 CharDriverState *chr = dev->opaque; 173 int size = VHOST_USER_HDR_SIZE + msg->size; 174 175 if (fd_num) { 176 qemu_chr_fe_set_msgfds(chr, fds, fd_num); 177 } 178 179 return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ? 180 0 : -1; 181 } 182 183 static int vhost_user_call(struct vhost_dev *dev, unsigned long int request, 184 void *arg) 185 { 186 VhostUserMsg msg; 187 VhostUserRequest msg_request; 188 struct vhost_vring_file *file = 0; 189 int need_reply = 0; 190 int fds[VHOST_MEMORY_MAX_NREGIONS]; 191 int i, fd; 192 size_t fd_num = 0; 193 194 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 195 196 msg_request = vhost_user_request_translate(request); 197 msg.request = msg_request; 198 msg.flags = VHOST_USER_VERSION; 199 msg.size = 0; 200 201 switch (request) { 202 case VHOST_GET_FEATURES: 203 need_reply = 1; 204 break; 205 206 case VHOST_SET_FEATURES: 207 case VHOST_SET_LOG_BASE: 208 msg.u64 = *((__u64 *) arg); 209 msg.size = sizeof(m.u64); 210 break; 211 212 case VHOST_SET_OWNER: 213 break; 214 215 case VHOST_RESET_OWNER: 216 memcpy(&msg.state, arg, sizeof(struct vhost_vring_state)); 217 msg.state.index += dev->vq_index; 218 msg.size = sizeof(m.state); 219 break; 220 221 case VHOST_SET_MEM_TABLE: 222 for (i = 0; i < dev->mem->nregions; ++i) { 223 struct vhost_memory_region *reg = dev->mem->regions + i; 224 ram_addr_t ram_addr; 225 226 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); 227 qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, &ram_addr); 228 fd = qemu_get_ram_fd(ram_addr); 229 if (fd > 0) { 230 msg.memory.regions[fd_num].userspace_addr = reg->userspace_addr; 231 msg.memory.regions[fd_num].memory_size = reg->memory_size; 232 msg.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; 233 msg.memory.regions[fd_num].mmap_offset = reg->userspace_addr - 234 (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr); 235 assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); 236 fds[fd_num++] = fd; 237 } 238 } 239 240 msg.memory.nregions = fd_num; 241 242 if (!fd_num) { 243 error_report("Failed initializing vhost-user memory map, " 244 "consider using -object memory-backend-file share=on"); 245 return -1; 246 } 247 248 msg.size = sizeof(m.memory.nregions); 249 msg.size += sizeof(m.memory.padding); 250 msg.size += fd_num * sizeof(VhostUserMemoryRegion); 251 252 break; 253 254 case VHOST_SET_LOG_FD: 255 fds[fd_num++] = *((int *) arg); 256 break; 257 258 case VHOST_SET_VRING_NUM: 259 case VHOST_SET_VRING_BASE: 260 memcpy(&msg.state, arg, sizeof(struct vhost_vring_state)); 261 msg.state.index += dev->vq_index; 262 msg.size = sizeof(m.state); 263 break; 264 265 case VHOST_GET_VRING_BASE: 266 memcpy(&msg.state, arg, sizeof(struct vhost_vring_state)); 267 msg.state.index += dev->vq_index; 268 msg.size = sizeof(m.state); 269 need_reply = 1; 270 break; 271 272 case VHOST_SET_VRING_ADDR: 273 memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr)); 274 msg.addr.index += dev->vq_index; 275 msg.size = sizeof(m.addr); 276 break; 277 278 case VHOST_SET_VRING_KICK: 279 case VHOST_SET_VRING_CALL: 280 case VHOST_SET_VRING_ERR: 281 file = arg; 282 msg.u64 = (file->index + dev->vq_index) & VHOST_USER_VRING_IDX_MASK; 283 msg.size = sizeof(m.u64); 284 if (ioeventfd_enabled() && file->fd > 0) { 285 fds[fd_num++] = file->fd; 286 } else { 287 msg.u64 |= VHOST_USER_VRING_NOFD_MASK; 288 } 289 break; 290 default: 291 error_report("vhost-user trying to send unhandled ioctl"); 292 return -1; 293 break; 294 } 295 296 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 297 return 0; 298 } 299 300 if (need_reply) { 301 if (vhost_user_read(dev, &msg) < 0) { 302 return 0; 303 } 304 305 if (msg_request != msg.request) { 306 error_report("Received unexpected msg type." 307 " Expected %d received %d", msg_request, msg.request); 308 return -1; 309 } 310 311 switch (msg_request) { 312 case VHOST_USER_GET_FEATURES: 313 if (msg.size != sizeof(m.u64)) { 314 error_report("Received bad msg size."); 315 return -1; 316 } 317 *((__u64 *) arg) = msg.u64; 318 break; 319 case VHOST_USER_GET_VRING_BASE: 320 if (msg.size != sizeof(m.state)) { 321 error_report("Received bad msg size."); 322 return -1; 323 } 324 msg.state.index -= dev->vq_index; 325 memcpy(arg, &msg.state, sizeof(struct vhost_vring_state)); 326 break; 327 default: 328 error_report("Received unexpected msg type."); 329 return -1; 330 break; 331 } 332 } 333 334 return 0; 335 } 336 337 static int vhost_user_init(struct vhost_dev *dev, void *opaque) 338 { 339 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 340 341 dev->opaque = opaque; 342 343 return 0; 344 } 345 346 static int vhost_user_cleanup(struct vhost_dev *dev) 347 { 348 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 349 350 dev->opaque = 0; 351 352 return 0; 353 } 354 355 const VhostOps user_ops = { 356 .backend_type = VHOST_BACKEND_TYPE_USER, 357 .vhost_call = vhost_user_call, 358 .vhost_backend_init = vhost_user_init, 359 .vhost_backend_cleanup = vhost_user_cleanup 360 }; 361