1 /* 2 * Copyright 6WIND S.A., 2014 3 * 4 * This work is licensed under the terms of the GNU GPL, version 2 or 5 * (at your option) any later version. See the COPYING file in the 6 * top-level directory. 7 */ 8 #include "qemu-common.h" 9 #include "qemu/sockets.h" 10 11 #include <sys/mman.h> 12 #include <sys/types.h> 13 #include <sys/socket.h> 14 #include <sys/un.h> 15 #ifdef CONFIG_LINUX 16 #include <sys/vfs.h> 17 #endif 18 19 #include "ivshmem-server.h" 20 21 /* log a message on stdout if verbose=1 */ 22 #define IVSHMEM_SERVER_DEBUG(server, fmt, ...) do { \ 23 if ((server)->verbose) { \ 24 printf(fmt, ## __VA_ARGS__); \ 25 } \ 26 } while (0) 27 28 /** maximum size of a huge page, used by ivshmem_server_ftruncate() */ 29 #define IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE (1024 * 1024 * 1024) 30 31 /** default listen backlog (number of sockets not accepted) */ 32 #define IVSHMEM_SERVER_LISTEN_BACKLOG 10 33 34 /* send message to a client unix socket */ 35 static int 36 ivshmem_server_send_one_msg(int sock_fd, int64_t peer_id, int fd) 37 { 38 int ret; 39 struct msghdr msg; 40 struct iovec iov[1]; 41 union { 42 struct cmsghdr cmsg; 43 char control[CMSG_SPACE(sizeof(int))]; 44 } msg_control; 45 struct cmsghdr *cmsg; 46 47 peer_id = GINT64_TO_LE(peer_id); 48 iov[0].iov_base = &peer_id; 49 iov[0].iov_len = sizeof(peer_id); 50 51 memset(&msg, 0, sizeof(msg)); 52 msg.msg_iov = iov; 53 msg.msg_iovlen = 1; 54 55 /* if fd is specified, add it in a cmsg */ 56 if (fd >= 0) { 57 memset(&msg_control, 0, sizeof(msg_control)); 58 msg.msg_control = &msg_control; 59 msg.msg_controllen = sizeof(msg_control); 60 cmsg = CMSG_FIRSTHDR(&msg); 61 cmsg->cmsg_level = SOL_SOCKET; 62 cmsg->cmsg_type = SCM_RIGHTS; 63 cmsg->cmsg_len = CMSG_LEN(sizeof(int)); 64 memcpy(CMSG_DATA(cmsg), &fd, sizeof(fd)); 65 } 66 67 ret = sendmsg(sock_fd, &msg, 0); 68 if (ret <= 0) { 69 return -1; 70 } 71 72 return 0; 73 } 74 75 /* free a peer when the server advertises a disconnection or when the 76 * server is freed */ 77 static void 78 ivshmem_server_free_peer(IvshmemServer *server, IvshmemServerPeer *peer) 79 { 80 unsigned vector; 81 IvshmemServerPeer *other_peer; 82 83 IVSHMEM_SERVER_DEBUG(server, "free peer %" PRId64 "\n", peer->id); 84 close(peer->sock_fd); 85 QTAILQ_REMOVE(&server->peer_list, peer, next); 86 87 /* advertise the deletion to other peers */ 88 QTAILQ_FOREACH(other_peer, &server->peer_list, next) { 89 ivshmem_server_send_one_msg(other_peer->sock_fd, peer->id, -1); 90 } 91 92 for (vector = 0; vector < peer->vectors_count; vector++) { 93 event_notifier_cleanup(&peer->vectors[vector]); 94 } 95 96 g_free(peer); 97 } 98 99 /* send the peer id and the shm_fd just after a new client connection */ 100 static int 101 ivshmem_server_send_initial_info(IvshmemServer *server, IvshmemServerPeer *peer) 102 { 103 int ret; 104 105 /* send our protocol version first */ 106 ret = ivshmem_server_send_one_msg(peer->sock_fd, IVSHMEM_PROTOCOL_VERSION, 107 -1); 108 if (ret < 0) { 109 IVSHMEM_SERVER_DEBUG(server, "cannot send version: %s\n", 110 strerror(errno)); 111 return -1; 112 } 113 114 /* send the peer id to the client */ 115 ret = ivshmem_server_send_one_msg(peer->sock_fd, peer->id, -1); 116 if (ret < 0) { 117 IVSHMEM_SERVER_DEBUG(server, "cannot send peer id: %s\n", 118 strerror(errno)); 119 return -1; 120 } 121 122 /* send the shm_fd */ 123 ret = ivshmem_server_send_one_msg(peer->sock_fd, -1, server->shm_fd); 124 if (ret < 0) { 125 IVSHMEM_SERVER_DEBUG(server, "cannot send shm fd: %s\n", 126 strerror(errno)); 127 return -1; 128 } 129 130 return 0; 131 } 132 133 /* handle message on listening unix socket (new client connection) */ 134 static int 135 ivshmem_server_handle_new_conn(IvshmemServer *server) 136 { 137 IvshmemServerPeer *peer, *other_peer; 138 struct sockaddr_un unaddr; 139 socklen_t unaddr_len; 140 int newfd; 141 unsigned i; 142 143 /* accept the incoming connection */ 144 unaddr_len = sizeof(unaddr); 145 newfd = qemu_accept(server->sock_fd, 146 (struct sockaddr *)&unaddr, &unaddr_len); 147 148 if (newfd < 0) { 149 IVSHMEM_SERVER_DEBUG(server, "cannot accept() %s\n", strerror(errno)); 150 return -1; 151 } 152 153 qemu_set_nonblock(newfd); 154 IVSHMEM_SERVER_DEBUG(server, "accept()=%d\n", newfd); 155 156 /* allocate new structure for this peer */ 157 peer = g_malloc0(sizeof(*peer)); 158 peer->sock_fd = newfd; 159 160 /* get an unused peer id */ 161 /* XXX: this could use id allocation such as Linux IDA, or simply 162 * a free-list */ 163 for (i = 0; i < G_MAXUINT16; i++) { 164 if (ivshmem_server_search_peer(server, server->cur_id) == NULL) { 165 break; 166 } 167 server->cur_id++; 168 } 169 if (i == G_MAXUINT16) { 170 IVSHMEM_SERVER_DEBUG(server, "cannot allocate new client id\n"); 171 close(newfd); 172 g_free(peer); 173 return -1; 174 } 175 peer->id = server->cur_id++; 176 177 /* create eventfd, one per vector */ 178 peer->vectors_count = server->n_vectors; 179 for (i = 0; i < peer->vectors_count; i++) { 180 if (event_notifier_init(&peer->vectors[i], FALSE) < 0) { 181 IVSHMEM_SERVER_DEBUG(server, "cannot create eventfd\n"); 182 goto fail; 183 } 184 } 185 186 /* send peer id and shm fd */ 187 if (ivshmem_server_send_initial_info(server, peer) < 0) { 188 IVSHMEM_SERVER_DEBUG(server, "cannot send initial info\n"); 189 goto fail; 190 } 191 192 /* advertise the new peer to others */ 193 QTAILQ_FOREACH(other_peer, &server->peer_list, next) { 194 for (i = 0; i < peer->vectors_count; i++) { 195 ivshmem_server_send_one_msg(other_peer->sock_fd, peer->id, 196 peer->vectors[i].wfd); 197 } 198 } 199 200 /* advertise the other peers to the new one */ 201 QTAILQ_FOREACH(other_peer, &server->peer_list, next) { 202 for (i = 0; i < peer->vectors_count; i++) { 203 ivshmem_server_send_one_msg(peer->sock_fd, other_peer->id, 204 other_peer->vectors[i].wfd); 205 } 206 } 207 208 /* advertise the new peer to itself */ 209 for (i = 0; i < peer->vectors_count; i++) { 210 ivshmem_server_send_one_msg(peer->sock_fd, peer->id, 211 event_notifier_get_fd(&peer->vectors[i])); 212 } 213 214 QTAILQ_INSERT_TAIL(&server->peer_list, peer, next); 215 IVSHMEM_SERVER_DEBUG(server, "new peer id = %" PRId64 "\n", 216 peer->id); 217 return 0; 218 219 fail: 220 while (i--) { 221 event_notifier_cleanup(&peer->vectors[i]); 222 } 223 close(newfd); 224 g_free(peer); 225 return -1; 226 } 227 228 /* Try to ftruncate a file to next power of 2 of shmsize. 229 * If it fails; all power of 2 above shmsize are tested until 230 * we reach the maximum huge page size. This is useful 231 * if the shm file is in a hugetlbfs that cannot be truncated to the 232 * shm_size value. */ 233 static int 234 ivshmem_server_ftruncate(int fd, unsigned shmsize) 235 { 236 int ret; 237 struct stat mapstat; 238 239 /* align shmsize to next power of 2 */ 240 shmsize = pow2ceil(shmsize); 241 242 if (fstat(fd, &mapstat) != -1 && mapstat.st_size == shmsize) { 243 return 0; 244 } 245 246 while (shmsize <= IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE) { 247 ret = ftruncate(fd, shmsize); 248 if (ret == 0) { 249 return ret; 250 } 251 shmsize *= 2; 252 } 253 254 return -1; 255 } 256 257 /* Init a new ivshmem server */ 258 int 259 ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path, 260 const char *shm_path, size_t shm_size, unsigned n_vectors, 261 bool verbose) 262 { 263 int ret; 264 265 memset(server, 0, sizeof(*server)); 266 server->verbose = verbose; 267 268 ret = snprintf(server->unix_sock_path, sizeof(server->unix_sock_path), 269 "%s", unix_sock_path); 270 if (ret < 0 || ret >= sizeof(server->unix_sock_path)) { 271 IVSHMEM_SERVER_DEBUG(server, "could not copy unix socket path\n"); 272 return -1; 273 } 274 ret = snprintf(server->shm_path, sizeof(server->shm_path), 275 "%s", shm_path); 276 if (ret < 0 || ret >= sizeof(server->shm_path)) { 277 IVSHMEM_SERVER_DEBUG(server, "could not copy shm path\n"); 278 return -1; 279 } 280 281 server->shm_size = shm_size; 282 server->n_vectors = n_vectors; 283 284 QTAILQ_INIT(&server->peer_list); 285 286 return 0; 287 } 288 289 #ifdef CONFIG_LINUX 290 291 #define HUGETLBFS_MAGIC 0x958458f6 292 293 static long gethugepagesize(const char *path) 294 { 295 struct statfs fs; 296 int ret; 297 298 do { 299 ret = statfs(path, &fs); 300 } while (ret != 0 && errno == EINTR); 301 302 if (ret != 0) { 303 return -1; 304 } 305 306 if (fs.f_type != HUGETLBFS_MAGIC) { 307 return -1; 308 } 309 310 return fs.f_bsize; 311 } 312 #endif 313 314 /* open shm, create and bind to the unix socket */ 315 int 316 ivshmem_server_start(IvshmemServer *server) 317 { 318 struct sockaddr_un sun; 319 int shm_fd, sock_fd, ret; 320 321 /* open shm file */ 322 #ifdef CONFIG_LINUX 323 long hpagesize; 324 325 hpagesize = gethugepagesize(server->shm_path); 326 if (hpagesize < 0 && errno != ENOENT) { 327 IVSHMEM_SERVER_DEBUG(server, "cannot stat shm file %s: %s\n", 328 server->shm_path, strerror(errno)); 329 } 330 331 if (hpagesize > 0) { 332 gchar *filename = g_strdup_printf("%s/ivshmem.XXXXXX", server->shm_path); 333 IVSHMEM_SERVER_DEBUG(server, "Using hugepages: %s\n", server->shm_path); 334 shm_fd = mkstemp(filename); 335 unlink(filename); 336 g_free(filename); 337 } else 338 #endif 339 { 340 IVSHMEM_SERVER_DEBUG(server, "Using POSIX shared memory: %s\n", 341 server->shm_path); 342 shm_fd = shm_open(server->shm_path, O_CREAT|O_RDWR, S_IRWXU); 343 } 344 345 if (shm_fd < 0) { 346 fprintf(stderr, "cannot open shm file %s: %s\n", server->shm_path, 347 strerror(errno)); 348 return -1; 349 } 350 if (ivshmem_server_ftruncate(shm_fd, server->shm_size) < 0) { 351 fprintf(stderr, "ftruncate(%s) failed: %s\n", server->shm_path, 352 strerror(errno)); 353 goto err_close_shm; 354 } 355 356 IVSHMEM_SERVER_DEBUG(server, "create & bind socket %s\n", 357 server->unix_sock_path); 358 359 /* create the unix listening socket */ 360 sock_fd = socket(AF_UNIX, SOCK_STREAM, 0); 361 if (sock_fd < 0) { 362 IVSHMEM_SERVER_DEBUG(server, "cannot create socket: %s\n", 363 strerror(errno)); 364 goto err_close_shm; 365 } 366 367 sun.sun_family = AF_UNIX; 368 ret = snprintf(sun.sun_path, sizeof(sun.sun_path), "%s", 369 server->unix_sock_path); 370 if (ret < 0 || ret >= sizeof(sun.sun_path)) { 371 IVSHMEM_SERVER_DEBUG(server, "could not copy unix socket path\n"); 372 goto err_close_sock; 373 } 374 if (bind(sock_fd, (struct sockaddr *)&sun, sizeof(sun)) < 0) { 375 IVSHMEM_SERVER_DEBUG(server, "cannot connect to %s: %s\n", sun.sun_path, 376 strerror(errno)); 377 goto err_close_sock; 378 } 379 380 if (listen(sock_fd, IVSHMEM_SERVER_LISTEN_BACKLOG) < 0) { 381 IVSHMEM_SERVER_DEBUG(server, "listen() failed: %s\n", strerror(errno)); 382 goto err_close_sock; 383 } 384 385 server->sock_fd = sock_fd; 386 server->shm_fd = shm_fd; 387 388 return 0; 389 390 err_close_sock: 391 close(sock_fd); 392 err_close_shm: 393 close(shm_fd); 394 return -1; 395 } 396 397 /* close connections to clients, the unix socket and the shm fd */ 398 void 399 ivshmem_server_close(IvshmemServer *server) 400 { 401 IvshmemServerPeer *peer, *npeer; 402 403 IVSHMEM_SERVER_DEBUG(server, "close server\n"); 404 405 QTAILQ_FOREACH_SAFE(peer, &server->peer_list, next, npeer) { 406 ivshmem_server_free_peer(server, peer); 407 } 408 409 unlink(server->unix_sock_path); 410 close(server->sock_fd); 411 close(server->shm_fd); 412 server->sock_fd = -1; 413 server->shm_fd = -1; 414 } 415 416 /* get the fd_set according to the unix socket and the peer list */ 417 void 418 ivshmem_server_get_fds(const IvshmemServer *server, fd_set *fds, int *maxfd) 419 { 420 IvshmemServerPeer *peer; 421 422 if (server->sock_fd == -1) { 423 return; 424 } 425 426 FD_SET(server->sock_fd, fds); 427 if (server->sock_fd >= *maxfd) { 428 *maxfd = server->sock_fd + 1; 429 } 430 431 QTAILQ_FOREACH(peer, &server->peer_list, next) { 432 FD_SET(peer->sock_fd, fds); 433 if (peer->sock_fd >= *maxfd) { 434 *maxfd = peer->sock_fd + 1; 435 } 436 } 437 } 438 439 /* process incoming messages on the sockets in fd_set */ 440 int 441 ivshmem_server_handle_fds(IvshmemServer *server, fd_set *fds, int maxfd) 442 { 443 IvshmemServerPeer *peer, *peer_next; 444 445 if (server->sock_fd < maxfd && FD_ISSET(server->sock_fd, fds) && 446 ivshmem_server_handle_new_conn(server) < 0 && errno != EINTR) { 447 IVSHMEM_SERVER_DEBUG(server, "ivshmem_server_handle_new_conn() " 448 "failed\n"); 449 return -1; 450 } 451 452 QTAILQ_FOREACH_SAFE(peer, &server->peer_list, next, peer_next) { 453 /* any message from a peer socket result in a close() */ 454 IVSHMEM_SERVER_DEBUG(server, "peer->sock_fd=%d\n", peer->sock_fd); 455 if (peer->sock_fd < maxfd && FD_ISSET(peer->sock_fd, fds)) { 456 ivshmem_server_free_peer(server, peer); 457 } 458 } 459 460 return 0; 461 } 462 463 /* lookup peer from its id */ 464 IvshmemServerPeer * 465 ivshmem_server_search_peer(IvshmemServer *server, int64_t peer_id) 466 { 467 IvshmemServerPeer *peer; 468 469 QTAILQ_FOREACH(peer, &server->peer_list, next) { 470 if (peer->id == peer_id) { 471 return peer; 472 } 473 } 474 return NULL; 475 } 476 477 /* dump our info, the list of peers their vectors on stdout */ 478 void 479 ivshmem_server_dump(const IvshmemServer *server) 480 { 481 const IvshmemServerPeer *peer; 482 unsigned vector; 483 484 /* dump peers */ 485 QTAILQ_FOREACH(peer, &server->peer_list, next) { 486 printf("peer_id = %" PRId64 "\n", peer->id); 487 488 for (vector = 0; vector < peer->vectors_count; vector++) { 489 printf(" vector %d is enabled (fd=%d)\n", vector, 490 event_notifier_get_fd(&peer->vectors[vector])); 491 } 492 } 493 } 494