1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "hw/virtio/vhost.h" 12 #include "hw/virtio/vhost-backend.h" 13 #include "sysemu/char.h" 14 #include "sysemu/kvm.h" 15 #include "qemu/error-report.h" 16 #include "qemu/sockets.h" 17 #include "exec/ram_addr.h" 18 19 #include <fcntl.h> 20 #include <unistd.h> 21 #include <sys/ioctl.h> 22 #include <sys/socket.h> 23 #include <sys/un.h> 24 #include <linux/vhost.h> 25 26 #define VHOST_MEMORY_MAX_NREGIONS 8 27 28 typedef enum VhostUserRequest { 29 VHOST_USER_NONE = 0, 30 VHOST_USER_GET_FEATURES = 1, 31 VHOST_USER_SET_FEATURES = 2, 32 VHOST_USER_SET_OWNER = 3, 33 VHOST_USER_RESET_OWNER = 4, 34 VHOST_USER_SET_MEM_TABLE = 5, 35 VHOST_USER_SET_LOG_BASE = 6, 36 VHOST_USER_SET_LOG_FD = 7, 37 VHOST_USER_SET_VRING_NUM = 8, 38 VHOST_USER_SET_VRING_ADDR = 9, 39 VHOST_USER_SET_VRING_BASE = 10, 40 VHOST_USER_GET_VRING_BASE = 11, 41 VHOST_USER_SET_VRING_KICK = 12, 42 VHOST_USER_SET_VRING_CALL = 13, 43 VHOST_USER_SET_VRING_ERR = 14, 44 VHOST_USER_MAX 45 } VhostUserRequest; 46 47 typedef struct VhostUserMemoryRegion { 48 uint64_t guest_phys_addr; 49 uint64_t memory_size; 50 uint64_t userspace_addr; 51 uint64_t mmap_offset; 52 } VhostUserMemoryRegion; 53 54 typedef struct VhostUserMemory { 55 uint32_t nregions; 56 uint32_t padding; 57 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; 58 } VhostUserMemory; 59 60 typedef struct VhostUserMsg { 61 VhostUserRequest request; 62 63 #define VHOST_USER_VERSION_MASK (0x3) 64 #define VHOST_USER_REPLY_MASK (0x1<<2) 65 uint32_t flags; 66 uint32_t size; /* the following payload size */ 67 union { 68 #define VHOST_USER_VRING_IDX_MASK (0xff) 69 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) 70 uint64_t u64; 71 struct vhost_vring_state state; 72 struct vhost_vring_addr addr; 73 VhostUserMemory memory; 74 }; 75 } QEMU_PACKED VhostUserMsg; 76 77 static VhostUserMsg m __attribute__ ((unused)); 78 #define VHOST_USER_HDR_SIZE (sizeof(m.request) \ 79 + sizeof(m.flags) \ 80 + sizeof(m.size)) 81 82 #define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE) 83 84 /* The version of the protocol we support */ 85 #define VHOST_USER_VERSION (0x1) 86 87 static bool ioeventfd_enabled(void) 88 { 89 return kvm_enabled() && kvm_eventfds_enabled(); 90 } 91 92 static unsigned long int ioctl_to_vhost_user_request[VHOST_USER_MAX] = { 93 -1, /* VHOST_USER_NONE */ 94 VHOST_GET_FEATURES, /* VHOST_USER_GET_FEATURES */ 95 VHOST_SET_FEATURES, /* VHOST_USER_SET_FEATURES */ 96 VHOST_SET_OWNER, /* VHOST_USER_SET_OWNER */ 97 VHOST_RESET_OWNER, /* VHOST_USER_RESET_OWNER */ 98 VHOST_SET_MEM_TABLE, /* VHOST_USER_SET_MEM_TABLE */ 99 VHOST_SET_LOG_BASE, /* VHOST_USER_SET_LOG_BASE */ 100 VHOST_SET_LOG_FD, /* VHOST_USER_SET_LOG_FD */ 101 VHOST_SET_VRING_NUM, /* VHOST_USER_SET_VRING_NUM */ 102 VHOST_SET_VRING_ADDR, /* VHOST_USER_SET_VRING_ADDR */ 103 VHOST_SET_VRING_BASE, /* VHOST_USER_SET_VRING_BASE */ 104 VHOST_GET_VRING_BASE, /* VHOST_USER_GET_VRING_BASE */ 105 VHOST_SET_VRING_KICK, /* VHOST_USER_SET_VRING_KICK */ 106 VHOST_SET_VRING_CALL, /* VHOST_USER_SET_VRING_CALL */ 107 VHOST_SET_VRING_ERR /* VHOST_USER_SET_VRING_ERR */ 108 }; 109 110 static VhostUserRequest vhost_user_request_translate(unsigned long int request) 111 { 112 VhostUserRequest idx; 113 114 for (idx = 0; idx < VHOST_USER_MAX; idx++) { 115 if (ioctl_to_vhost_user_request[idx] == request) { 116 break; 117 } 118 } 119 120 return (idx == VHOST_USER_MAX) ? VHOST_USER_NONE : idx; 121 } 122 123 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 124 { 125 CharDriverState *chr = dev->opaque; 126 uint8_t *p = (uint8_t *) msg; 127 int r, size = VHOST_USER_HDR_SIZE; 128 129 r = qemu_chr_fe_read_all(chr, p, size); 130 if (r != size) { 131 error_report("Failed to read msg header. Read %d instead of %d.\n", r, 132 size); 133 goto fail; 134 } 135 136 /* validate received flags */ 137 if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 138 error_report("Failed to read msg header." 139 " Flags 0x%x instead of 0x%x.\n", msg->flags, 140 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 141 goto fail; 142 } 143 144 /* validate message size is sane */ 145 if (msg->size > VHOST_USER_PAYLOAD_SIZE) { 146 error_report("Failed to read msg header." 147 " Size %d exceeds the maximum %zu.\n", msg->size, 148 VHOST_USER_PAYLOAD_SIZE); 149 goto fail; 150 } 151 152 if (msg->size) { 153 p += VHOST_USER_HDR_SIZE; 154 size = msg->size; 155 r = qemu_chr_fe_read_all(chr, p, size); 156 if (r != size) { 157 error_report("Failed to read msg payload." 158 " Read %d instead of %d.\n", r, msg->size); 159 goto fail; 160 } 161 } 162 163 return 0; 164 165 fail: 166 return -1; 167 } 168 169 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 170 int *fds, int fd_num) 171 { 172 CharDriverState *chr = dev->opaque; 173 int size = VHOST_USER_HDR_SIZE + msg->size; 174 175 if (fd_num) { 176 qemu_chr_fe_set_msgfds(chr, fds, fd_num); 177 } 178 179 return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ? 180 0 : -1; 181 } 182 183 static int vhost_user_call(struct vhost_dev *dev, unsigned long int request, 184 void *arg) 185 { 186 VhostUserMsg msg; 187 VhostUserRequest msg_request; 188 struct vhost_vring_file *file = 0; 189 int need_reply = 0; 190 int fds[VHOST_MEMORY_MAX_NREGIONS]; 191 int i, fd; 192 size_t fd_num = 0; 193 194 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 195 196 msg_request = vhost_user_request_translate(request); 197 msg.request = msg_request; 198 msg.flags = VHOST_USER_VERSION; 199 msg.size = 0; 200 201 switch (request) { 202 case VHOST_GET_FEATURES: 203 need_reply = 1; 204 break; 205 206 case VHOST_SET_FEATURES: 207 case VHOST_SET_LOG_BASE: 208 msg.u64 = *((__u64 *) arg); 209 msg.size = sizeof(m.u64); 210 break; 211 212 case VHOST_SET_OWNER: 213 case VHOST_RESET_OWNER: 214 break; 215 216 case VHOST_SET_MEM_TABLE: 217 for (i = 0; i < dev->mem->nregions; ++i) { 218 struct vhost_memory_region *reg = dev->mem->regions + i; 219 ram_addr_t ram_addr; 220 221 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); 222 qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, &ram_addr); 223 fd = qemu_get_ram_fd(ram_addr); 224 if (fd > 0) { 225 msg.memory.regions[fd_num].userspace_addr = reg->userspace_addr; 226 msg.memory.regions[fd_num].memory_size = reg->memory_size; 227 msg.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; 228 msg.memory.regions[fd_num].mmap_offset = reg->userspace_addr - 229 (uintptr_t) qemu_get_ram_block_host_ptr(reg->guest_phys_addr); 230 assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); 231 fds[fd_num++] = fd; 232 } 233 } 234 235 msg.memory.nregions = fd_num; 236 237 if (!fd_num) { 238 error_report("Failed initializing vhost-user memory map\n" 239 "consider using -object memory-backend-file share=on\n"); 240 return -1; 241 } 242 243 msg.size = sizeof(m.memory.nregions); 244 msg.size += sizeof(m.memory.padding); 245 msg.size += fd_num * sizeof(VhostUserMemoryRegion); 246 247 break; 248 249 case VHOST_SET_LOG_FD: 250 fds[fd_num++] = *((int *) arg); 251 break; 252 253 case VHOST_SET_VRING_NUM: 254 case VHOST_SET_VRING_BASE: 255 memcpy(&msg.state, arg, sizeof(struct vhost_vring_state)); 256 msg.size = sizeof(m.state); 257 break; 258 259 case VHOST_GET_VRING_BASE: 260 memcpy(&msg.state, arg, sizeof(struct vhost_vring_state)); 261 msg.size = sizeof(m.state); 262 need_reply = 1; 263 break; 264 265 case VHOST_SET_VRING_ADDR: 266 memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr)); 267 msg.size = sizeof(m.addr); 268 break; 269 270 case VHOST_SET_VRING_KICK: 271 case VHOST_SET_VRING_CALL: 272 case VHOST_SET_VRING_ERR: 273 file = arg; 274 msg.u64 = file->index & VHOST_USER_VRING_IDX_MASK; 275 msg.size = sizeof(m.u64); 276 if (ioeventfd_enabled() && file->fd > 0) { 277 fds[fd_num++] = file->fd; 278 } else { 279 msg.u64 |= VHOST_USER_VRING_NOFD_MASK; 280 } 281 break; 282 default: 283 error_report("vhost-user trying to send unhandled ioctl\n"); 284 return -1; 285 break; 286 } 287 288 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 289 return 0; 290 } 291 292 if (need_reply) { 293 if (vhost_user_read(dev, &msg) < 0) { 294 return 0; 295 } 296 297 if (msg_request != msg.request) { 298 error_report("Received unexpected msg type." 299 " Expected %d received %d\n", msg_request, msg.request); 300 return -1; 301 } 302 303 switch (msg_request) { 304 case VHOST_USER_GET_FEATURES: 305 if (msg.size != sizeof(m.u64)) { 306 error_report("Received bad msg size.\n"); 307 return -1; 308 } 309 *((__u64 *) arg) = msg.u64; 310 break; 311 case VHOST_USER_GET_VRING_BASE: 312 if (msg.size != sizeof(m.state)) { 313 error_report("Received bad msg size.\n"); 314 return -1; 315 } 316 memcpy(arg, &msg.state, sizeof(struct vhost_vring_state)); 317 break; 318 default: 319 error_report("Received unexpected msg type.\n"); 320 return -1; 321 break; 322 } 323 } 324 325 return 0; 326 } 327 328 static int vhost_user_init(struct vhost_dev *dev, void *opaque) 329 { 330 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 331 332 dev->opaque = opaque; 333 334 return 0; 335 } 336 337 static int vhost_user_cleanup(struct vhost_dev *dev) 338 { 339 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 340 341 dev->opaque = 0; 342 343 return 0; 344 } 345 346 const VhostOps user_ops = { 347 .backend_type = VHOST_BACKEND_TYPE_USER, 348 .vhost_call = vhost_user_call, 349 .vhost_backend_init = vhost_user_init, 350 .vhost_backend_cleanup = vhost_user_cleanup 351 }; 352