1 /*
2  * Copyright 6WIND S.A., 2014
3  *
4  * This work is licensed under the terms of the GNU GPL, version 2 or
5  * (at your option) any later version.  See the COPYING file in the
6  * top-level directory.
7  */
8 #include "qemu-common.h"
9 #include "qemu/sockets.h"
10 
11 #include <sys/mman.h>
12 #include <sys/types.h>
13 #include <sys/socket.h>
14 #include <sys/un.h>
15 #ifdef CONFIG_LINUX
16 #include <sys/vfs.h>
17 #endif
18 
19 #include "ivshmem-server.h"
20 
21 /* log a message on stdout if verbose=1 */
22 #define IVSHMEM_SERVER_DEBUG(server, fmt, ...) do { \
23         if ((server)->verbose) {         \
24             printf(fmt, ## __VA_ARGS__); \
25         }                                \
26     } while (0)
27 
28 /** maximum size of a huge page, used by ivshmem_server_ftruncate() */
29 #define IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE (1024 * 1024 * 1024)
30 
31 /** default listen backlog (number of sockets not accepted) */
32 #define IVSHMEM_SERVER_LISTEN_BACKLOG 10
33 
34 /* send message to a client unix socket */
35 static int
36 ivshmem_server_send_one_msg(int sock_fd, int64_t peer_id, int fd)
37 {
38     int ret;
39     struct msghdr msg;
40     struct iovec iov[1];
41     union {
42         struct cmsghdr cmsg;
43         char control[CMSG_SPACE(sizeof(int))];
44     } msg_control;
45     struct cmsghdr *cmsg;
46 
47     peer_id = GINT64_TO_LE(peer_id);
48     iov[0].iov_base = &peer_id;
49     iov[0].iov_len = sizeof(peer_id);
50 
51     memset(&msg, 0, sizeof(msg));
52     msg.msg_iov = iov;
53     msg.msg_iovlen = 1;
54 
55     /* if fd is specified, add it in a cmsg */
56     if (fd >= 0) {
57         memset(&msg_control, 0, sizeof(msg_control));
58         msg.msg_control = &msg_control;
59         msg.msg_controllen = sizeof(msg_control);
60         cmsg = CMSG_FIRSTHDR(&msg);
61         cmsg->cmsg_level = SOL_SOCKET;
62         cmsg->cmsg_type = SCM_RIGHTS;
63         cmsg->cmsg_len = CMSG_LEN(sizeof(int));
64         memcpy(CMSG_DATA(cmsg), &fd, sizeof(fd));
65     }
66 
67     ret = sendmsg(sock_fd, &msg, 0);
68     if (ret <= 0) {
69         return -1;
70     }
71 
72     return 0;
73 }
74 
75 /* free a peer when the server advertises a disconnection or when the
76  * server is freed */
77 static void
78 ivshmem_server_free_peer(IvshmemServer *server, IvshmemServerPeer *peer)
79 {
80     unsigned vector;
81     IvshmemServerPeer *other_peer;
82 
83     IVSHMEM_SERVER_DEBUG(server, "free peer %" PRId64 "\n", peer->id);
84     close(peer->sock_fd);
85     QTAILQ_REMOVE(&server->peer_list, peer, next);
86 
87     /* advertise the deletion to other peers */
88     QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
89         ivshmem_server_send_one_msg(other_peer->sock_fd, peer->id, -1);
90     }
91 
92     for (vector = 0; vector < peer->vectors_count; vector++) {
93         event_notifier_cleanup(&peer->vectors[vector]);
94     }
95 
96     g_free(peer);
97 }
98 
99 /* send the peer id and the shm_fd just after a new client connection */
100 static int
101 ivshmem_server_send_initial_info(IvshmemServer *server, IvshmemServerPeer *peer)
102 {
103     int ret;
104 
105     /* send our protocol version first */
106     ret = ivshmem_server_send_one_msg(peer->sock_fd, IVSHMEM_PROTOCOL_VERSION,
107                                       -1);
108     if (ret < 0) {
109         IVSHMEM_SERVER_DEBUG(server, "cannot send version: %s\n",
110                              strerror(errno));
111         return -1;
112     }
113 
114     /* send the peer id to the client */
115     ret = ivshmem_server_send_one_msg(peer->sock_fd, peer->id, -1);
116     if (ret < 0) {
117         IVSHMEM_SERVER_DEBUG(server, "cannot send peer id: %s\n",
118                              strerror(errno));
119         return -1;
120     }
121 
122     /* send the shm_fd */
123     ret = ivshmem_server_send_one_msg(peer->sock_fd, -1, server->shm_fd);
124     if (ret < 0) {
125         IVSHMEM_SERVER_DEBUG(server, "cannot send shm fd: %s\n",
126                              strerror(errno));
127         return -1;
128     }
129 
130     return 0;
131 }
132 
133 /* handle message on listening unix socket (new client connection) */
134 static int
135 ivshmem_server_handle_new_conn(IvshmemServer *server)
136 {
137     IvshmemServerPeer *peer, *other_peer;
138     struct sockaddr_un unaddr;
139     socklen_t unaddr_len;
140     int newfd;
141     unsigned i;
142 
143     /* accept the incoming connection */
144     unaddr_len = sizeof(unaddr);
145     newfd = qemu_accept(server->sock_fd,
146                         (struct sockaddr *)&unaddr, &unaddr_len);
147 
148     if (newfd < 0) {
149         IVSHMEM_SERVER_DEBUG(server, "cannot accept() %s\n", strerror(errno));
150         return -1;
151     }
152 
153     qemu_set_nonblock(newfd);
154     IVSHMEM_SERVER_DEBUG(server, "accept()=%d\n", newfd);
155 
156     /* allocate new structure for this peer */
157     peer = g_malloc0(sizeof(*peer));
158     peer->sock_fd = newfd;
159 
160     /* get an unused peer id */
161     /* XXX: this could use id allocation such as Linux IDA, or simply
162      * a free-list */
163     for (i = 0; i < G_MAXUINT16; i++) {
164         if (ivshmem_server_search_peer(server, server->cur_id) == NULL) {
165             break;
166         }
167         server->cur_id++;
168     }
169     if (i == G_MAXUINT16) {
170         IVSHMEM_SERVER_DEBUG(server, "cannot allocate new client id\n");
171         close(newfd);
172         g_free(peer);
173         return -1;
174     }
175     peer->id = server->cur_id++;
176 
177     /* create eventfd, one per vector */
178     peer->vectors_count = server->n_vectors;
179     for (i = 0; i < peer->vectors_count; i++) {
180         if (event_notifier_init(&peer->vectors[i], FALSE) < 0) {
181             IVSHMEM_SERVER_DEBUG(server, "cannot create eventfd\n");
182             goto fail;
183         }
184     }
185 
186     /* send peer id and shm fd */
187     if (ivshmem_server_send_initial_info(server, peer) < 0) {
188         IVSHMEM_SERVER_DEBUG(server, "cannot send initial info\n");
189         goto fail;
190     }
191 
192     /* advertise the new peer to others */
193     QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
194         for (i = 0; i < peer->vectors_count; i++) {
195             ivshmem_server_send_one_msg(other_peer->sock_fd, peer->id,
196                                         peer->vectors[i].wfd);
197         }
198     }
199 
200     /* advertise the other peers to the new one */
201     QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
202         for (i = 0; i < peer->vectors_count; i++) {
203             ivshmem_server_send_one_msg(peer->sock_fd, other_peer->id,
204                                         other_peer->vectors[i].wfd);
205         }
206     }
207 
208     /* advertise the new peer to itself */
209     for (i = 0; i < peer->vectors_count; i++) {
210         ivshmem_server_send_one_msg(peer->sock_fd, peer->id,
211                                     event_notifier_get_fd(&peer->vectors[i]));
212     }
213 
214     QTAILQ_INSERT_TAIL(&server->peer_list, peer, next);
215     IVSHMEM_SERVER_DEBUG(server, "new peer id = %" PRId64 "\n",
216                          peer->id);
217     return 0;
218 
219 fail:
220     while (i--) {
221         event_notifier_cleanup(&peer->vectors[i]);
222     }
223     close(newfd);
224     g_free(peer);
225     return -1;
226 }
227 
228 /* Try to ftruncate a file to next power of 2 of shmsize.
229  * If it fails; all power of 2 above shmsize are tested until
230  * we reach the maximum huge page size. This is useful
231  * if the shm file is in a hugetlbfs that cannot be truncated to the
232  * shm_size value. */
233 static int
234 ivshmem_server_ftruncate(int fd, unsigned shmsize)
235 {
236     int ret;
237     struct stat mapstat;
238 
239     /* align shmsize to next power of 2 */
240     shmsize = pow2ceil(shmsize);
241 
242     if (fstat(fd, &mapstat) != -1 && mapstat.st_size == shmsize) {
243         return 0;
244     }
245 
246     while (shmsize <= IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE) {
247         ret = ftruncate(fd, shmsize);
248         if (ret == 0) {
249             return ret;
250         }
251         shmsize *= 2;
252     }
253 
254     return -1;
255 }
256 
257 /* Init a new ivshmem server */
258 int
259 ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
260                     const char *shm_path, size_t shm_size, unsigned n_vectors,
261                     bool verbose)
262 {
263     int ret;
264 
265     memset(server, 0, sizeof(*server));
266     server->verbose = verbose;
267 
268     ret = snprintf(server->unix_sock_path, sizeof(server->unix_sock_path),
269                    "%s", unix_sock_path);
270     if (ret < 0 || ret >= sizeof(server->unix_sock_path)) {
271         IVSHMEM_SERVER_DEBUG(server, "could not copy unix socket path\n");
272         return -1;
273     }
274     ret = snprintf(server->shm_path, sizeof(server->shm_path),
275                    "%s", shm_path);
276     if (ret < 0 || ret >= sizeof(server->shm_path)) {
277         IVSHMEM_SERVER_DEBUG(server, "could not copy shm path\n");
278         return -1;
279     }
280 
281     server->shm_size = shm_size;
282     server->n_vectors = n_vectors;
283 
284     QTAILQ_INIT(&server->peer_list);
285 
286     return 0;
287 }
288 
289 #ifdef CONFIG_LINUX
290 
291 #define HUGETLBFS_MAGIC       0x958458f6
292 
293 static long gethugepagesize(const char *path)
294 {
295     struct statfs fs;
296     int ret;
297 
298     do {
299         ret = statfs(path, &fs);
300     } while (ret != 0 && errno == EINTR);
301 
302     if (ret != 0) {
303         return -1;
304     }
305 
306     if (fs.f_type != HUGETLBFS_MAGIC) {
307         return -1;
308     }
309 
310     return fs.f_bsize;
311 }
312 #endif
313 
314 /* open shm, create and bind to the unix socket */
315 int
316 ivshmem_server_start(IvshmemServer *server)
317 {
318     struct sockaddr_un sun;
319     int shm_fd, sock_fd, ret;
320 
321     /* open shm file */
322 #ifdef CONFIG_LINUX
323     long hpagesize;
324 
325     hpagesize = gethugepagesize(server->shm_path);
326     if (hpagesize < 0 && errno != ENOENT) {
327         IVSHMEM_SERVER_DEBUG(server, "cannot stat shm file %s: %s\n",
328                              server->shm_path, strerror(errno));
329     }
330 
331     if (hpagesize > 0) {
332         gchar *filename = g_strdup_printf("%s/ivshmem.XXXXXX", server->shm_path);
333         IVSHMEM_SERVER_DEBUG(server, "Using hugepages: %s\n", server->shm_path);
334         shm_fd = mkstemp(filename);
335         unlink(filename);
336         g_free(filename);
337     } else
338 #endif
339     {
340         IVSHMEM_SERVER_DEBUG(server, "Using POSIX shared memory: %s\n",
341                              server->shm_path);
342         shm_fd = shm_open(server->shm_path, O_CREAT|O_RDWR, S_IRWXU);
343     }
344 
345     if (shm_fd < 0) {
346         fprintf(stderr, "cannot open shm file %s: %s\n", server->shm_path,
347                 strerror(errno));
348         return -1;
349     }
350     if (ivshmem_server_ftruncate(shm_fd, server->shm_size) < 0) {
351         fprintf(stderr, "ftruncate(%s) failed: %s\n", server->shm_path,
352                 strerror(errno));
353         goto err_close_shm;
354     }
355 
356     IVSHMEM_SERVER_DEBUG(server, "create & bind socket %s\n",
357                          server->unix_sock_path);
358 
359     /* create the unix listening socket */
360     sock_fd = socket(AF_UNIX, SOCK_STREAM, 0);
361     if (sock_fd < 0) {
362         IVSHMEM_SERVER_DEBUG(server, "cannot create socket: %s\n",
363                              strerror(errno));
364         goto err_close_shm;
365     }
366 
367     sun.sun_family = AF_UNIX;
368     ret = snprintf(sun.sun_path, sizeof(sun.sun_path), "%s",
369                    server->unix_sock_path);
370     if (ret < 0 || ret >= sizeof(sun.sun_path)) {
371         IVSHMEM_SERVER_DEBUG(server, "could not copy unix socket path\n");
372         goto err_close_sock;
373     }
374     if (bind(sock_fd, (struct sockaddr *)&sun, sizeof(sun)) < 0) {
375         IVSHMEM_SERVER_DEBUG(server, "cannot connect to %s: %s\n", sun.sun_path,
376                              strerror(errno));
377         goto err_close_sock;
378     }
379 
380     if (listen(sock_fd, IVSHMEM_SERVER_LISTEN_BACKLOG) < 0) {
381         IVSHMEM_SERVER_DEBUG(server, "listen() failed: %s\n", strerror(errno));
382         goto err_close_sock;
383     }
384 
385     server->sock_fd = sock_fd;
386     server->shm_fd = shm_fd;
387 
388     return 0;
389 
390 err_close_sock:
391     close(sock_fd);
392 err_close_shm:
393     close(shm_fd);
394     return -1;
395 }
396 
397 /* close connections to clients, the unix socket and the shm fd */
398 void
399 ivshmem_server_close(IvshmemServer *server)
400 {
401     IvshmemServerPeer *peer, *npeer;
402 
403     IVSHMEM_SERVER_DEBUG(server, "close server\n");
404 
405     QTAILQ_FOREACH_SAFE(peer, &server->peer_list, next, npeer) {
406         ivshmem_server_free_peer(server, peer);
407     }
408 
409     unlink(server->unix_sock_path);
410     close(server->sock_fd);
411     close(server->shm_fd);
412     server->sock_fd = -1;
413     server->shm_fd = -1;
414 }
415 
416 /* get the fd_set according to the unix socket and the peer list */
417 void
418 ivshmem_server_get_fds(const IvshmemServer *server, fd_set *fds, int *maxfd)
419 {
420     IvshmemServerPeer *peer;
421 
422     if (server->sock_fd == -1) {
423         return;
424     }
425 
426     FD_SET(server->sock_fd, fds);
427     if (server->sock_fd >= *maxfd) {
428         *maxfd = server->sock_fd + 1;
429     }
430 
431     QTAILQ_FOREACH(peer, &server->peer_list, next) {
432         FD_SET(peer->sock_fd, fds);
433         if (peer->sock_fd >= *maxfd) {
434             *maxfd = peer->sock_fd + 1;
435         }
436     }
437 }
438 
439 /* process incoming messages on the sockets in fd_set */
440 int
441 ivshmem_server_handle_fds(IvshmemServer *server, fd_set *fds, int maxfd)
442 {
443     IvshmemServerPeer *peer, *peer_next;
444 
445     if (server->sock_fd < maxfd && FD_ISSET(server->sock_fd, fds) &&
446         ivshmem_server_handle_new_conn(server) < 0 && errno != EINTR) {
447         IVSHMEM_SERVER_DEBUG(server, "ivshmem_server_handle_new_conn() "
448                              "failed\n");
449         return -1;
450     }
451 
452     QTAILQ_FOREACH_SAFE(peer, &server->peer_list, next, peer_next) {
453         /* any message from a peer socket result in a close() */
454         IVSHMEM_SERVER_DEBUG(server, "peer->sock_fd=%d\n", peer->sock_fd);
455         if (peer->sock_fd < maxfd && FD_ISSET(peer->sock_fd, fds)) {
456             ivshmem_server_free_peer(server, peer);
457         }
458     }
459 
460     return 0;
461 }
462 
463 /* lookup peer from its id */
464 IvshmemServerPeer *
465 ivshmem_server_search_peer(IvshmemServer *server, int64_t peer_id)
466 {
467     IvshmemServerPeer *peer;
468 
469     QTAILQ_FOREACH(peer, &server->peer_list, next) {
470         if (peer->id == peer_id) {
471             return peer;
472         }
473     }
474     return NULL;
475 }
476 
477 /* dump our info, the list of peers their vectors on stdout */
478 void
479 ivshmem_server_dump(const IvshmemServer *server)
480 {
481     const IvshmemServerPeer *peer;
482     unsigned vector;
483 
484     /* dump peers */
485     QTAILQ_FOREACH(peer, &server->peer_list, next) {
486         printf("peer_id = %" PRId64 "\n", peer->id);
487 
488         for (vector = 0; vector < peer->vectors_count; vector++) {
489             printf("  vector %d is enabled (fd=%d)\n", vector,
490                    event_notifier_get_fd(&peer->vectors[vector]));
491         }
492     }
493 }
494