xref: /openbmc/qemu/util/oslib-posix.c (revision 77a8257e)
1 /*
2  * os-posix-lib.c
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  * Copyright (c) 2010 Red Hat, Inc.
6  *
7  * QEMU library functions on POSIX which are shared between QEMU and
8  * the QEMU tools.
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  * THE SOFTWARE.
27  */
28 
29 /* The following block of code temporarily renames the daemon() function so the
30    compiler does not see the warning associated with it in stdlib.h on OSX */
31 #ifdef __APPLE__
32 #define daemon qemu_fake_daemon_function
33 #include <stdlib.h>
34 #undef daemon
35 extern int daemon(int, int);
36 #endif
37 
38 #if defined(__linux__) && (defined(__x86_64__) || defined(__arm__))
39    /* Use 2 MiB alignment so transparent hugepages can be used by KVM.
40       Valgrind does not support alignments larger than 1 MiB,
41       therefore we need special code which handles running on Valgrind. */
42 #  define QEMU_VMALLOC_ALIGN (512 * 4096)
43 #elif defined(__linux__) && defined(__s390x__)
44    /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */
45 #  define QEMU_VMALLOC_ALIGN (256 * 4096)
46 #else
47 #  define QEMU_VMALLOC_ALIGN getpagesize()
48 #endif
49 #define HUGETLBFS_MAGIC       0x958458f6
50 
51 #include <termios.h>
52 #include <unistd.h>
53 
54 #include <glib/gprintf.h>
55 
56 #include "config-host.h"
57 #include "sysemu/sysemu.h"
58 #include "trace.h"
59 #include "qemu/sockets.h"
60 #include <sys/mman.h>
61 #include <libgen.h>
62 #include <setjmp.h>
63 #include <sys/signal.h>
64 
65 #ifdef CONFIG_LINUX
66 #include <sys/syscall.h>
67 #include <sys/vfs.h>
68 #endif
69 
70 #ifdef __FreeBSD__
71 #include <sys/sysctl.h>
72 #endif
73 
74 int qemu_get_thread_id(void)
75 {
76 #if defined(__linux__)
77     return syscall(SYS_gettid);
78 #else
79     return getpid();
80 #endif
81 }
82 
83 int qemu_daemon(int nochdir, int noclose)
84 {
85     return daemon(nochdir, noclose);
86 }
87 
88 void *qemu_oom_check(void *ptr)
89 {
90     if (ptr == NULL) {
91         fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno));
92         abort();
93     }
94     return ptr;
95 }
96 
97 void *qemu_try_memalign(size_t alignment, size_t size)
98 {
99     void *ptr;
100 
101     if (alignment < sizeof(void*)) {
102         alignment = sizeof(void*);
103     }
104 
105 #if defined(_POSIX_C_SOURCE) && !defined(__sun__)
106     int ret;
107     ret = posix_memalign(&ptr, alignment, size);
108     if (ret != 0) {
109         errno = ret;
110         ptr = NULL;
111     }
112 #elif defined(CONFIG_BSD)
113     ptr = valloc(size);
114 #else
115     ptr = memalign(alignment, size);
116 #endif
117     trace_qemu_memalign(alignment, size, ptr);
118     return ptr;
119 }
120 
121 void *qemu_memalign(size_t alignment, size_t size)
122 {
123     return qemu_oom_check(qemu_try_memalign(alignment, size));
124 }
125 
126 /* alloc shared memory pages */
127 void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
128 {
129     size_t align = QEMU_VMALLOC_ALIGN;
130     size_t total = size + align - getpagesize();
131     void *ptr = mmap(0, total, PROT_READ | PROT_WRITE,
132                      MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
133     size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
134 
135     if (ptr == MAP_FAILED) {
136         return NULL;
137     }
138 
139     if (alignment) {
140         *alignment = align;
141     }
142     ptr += offset;
143     total -= offset;
144 
145     if (offset > 0) {
146         munmap(ptr - offset, offset);
147     }
148     if (total > size) {
149         munmap(ptr + size, total - size);
150     }
151 
152     trace_qemu_anon_ram_alloc(size, ptr);
153     return ptr;
154 }
155 
156 void qemu_vfree(void *ptr)
157 {
158     trace_qemu_vfree(ptr);
159     free(ptr);
160 }
161 
162 void qemu_anon_ram_free(void *ptr, size_t size)
163 {
164     trace_qemu_anon_ram_free(ptr, size);
165     if (ptr) {
166         munmap(ptr, size);
167     }
168 }
169 
170 void qemu_set_block(int fd)
171 {
172     int f;
173     f = fcntl(fd, F_GETFL);
174     fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
175 }
176 
177 void qemu_set_nonblock(int fd)
178 {
179     int f;
180     f = fcntl(fd, F_GETFL);
181     fcntl(fd, F_SETFL, f | O_NONBLOCK);
182 }
183 
184 int socket_set_fast_reuse(int fd)
185 {
186     int val = 1, ret;
187 
188     ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
189                      (const char *)&val, sizeof(val));
190 
191     assert(ret == 0);
192 
193     return ret;
194 }
195 
196 void qemu_set_cloexec(int fd)
197 {
198     int f;
199     f = fcntl(fd, F_GETFD);
200     fcntl(fd, F_SETFD, f | FD_CLOEXEC);
201 }
202 
203 /*
204  * Creates a pipe with FD_CLOEXEC set on both file descriptors
205  */
206 int qemu_pipe(int pipefd[2])
207 {
208     int ret;
209 
210 #ifdef CONFIG_PIPE2
211     ret = pipe2(pipefd, O_CLOEXEC);
212     if (ret != -1 || errno != ENOSYS) {
213         return ret;
214     }
215 #endif
216     ret = pipe(pipefd);
217     if (ret == 0) {
218         qemu_set_cloexec(pipefd[0]);
219         qemu_set_cloexec(pipefd[1]);
220     }
221 
222     return ret;
223 }
224 
225 int qemu_utimens(const char *path, const struct timespec *times)
226 {
227     struct timeval tv[2], tv_now;
228     struct stat st;
229     int i;
230 #ifdef CONFIG_UTIMENSAT
231     int ret;
232 
233     ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW);
234     if (ret != -1 || errno != ENOSYS) {
235         return ret;
236     }
237 #endif
238     /* Fallback: use utimes() instead of utimensat() */
239 
240     /* happy if special cases */
241     if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) {
242         return 0;
243     }
244     if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) {
245         return utimes(path, NULL);
246     }
247 
248     /* prepare for hard cases */
249     if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) {
250         gettimeofday(&tv_now, NULL);
251     }
252     if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) {
253         stat(path, &st);
254     }
255 
256     for (i = 0; i < 2; i++) {
257         if (times[i].tv_nsec == UTIME_NOW) {
258             tv[i].tv_sec = tv_now.tv_sec;
259             tv[i].tv_usec = tv_now.tv_usec;
260         } else if (times[i].tv_nsec == UTIME_OMIT) {
261             tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime;
262             tv[i].tv_usec = 0;
263         } else {
264             tv[i].tv_sec = times[i].tv_sec;
265             tv[i].tv_usec = times[i].tv_nsec / 1000;
266         }
267     }
268 
269     return utimes(path, &tv[0]);
270 }
271 
272 char *
273 qemu_get_local_state_pathname(const char *relative_pathname)
274 {
275     return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR,
276                            relative_pathname);
277 }
278 
279 void qemu_set_tty_echo(int fd, bool echo)
280 {
281     struct termios tty;
282 
283     tcgetattr(fd, &tty);
284 
285     if (echo) {
286         tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN;
287     } else {
288         tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
289     }
290 
291     tcsetattr(fd, TCSANOW, &tty);
292 }
293 
294 static char exec_dir[PATH_MAX];
295 
296 void qemu_init_exec_dir(const char *argv0)
297 {
298     char *dir;
299     char *p = NULL;
300     char buf[PATH_MAX];
301 
302     assert(!exec_dir[0]);
303 
304 #if defined(__linux__)
305     {
306         int len;
307         len = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
308         if (len > 0) {
309             buf[len] = 0;
310             p = buf;
311         }
312     }
313 #elif defined(__FreeBSD__)
314     {
315         static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
316         size_t len = sizeof(buf) - 1;
317 
318         *buf = '\0';
319         if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) &&
320             *buf) {
321             buf[sizeof(buf) - 1] = '\0';
322             p = buf;
323         }
324     }
325 #endif
326     /* If we don't have any way of figuring out the actual executable
327        location then try argv[0].  */
328     if (!p) {
329         if (!argv0) {
330             return;
331         }
332         p = realpath(argv0, buf);
333         if (!p) {
334             return;
335         }
336     }
337     dir = dirname(p);
338 
339     pstrcpy(exec_dir, sizeof(exec_dir), dir);
340 }
341 
342 char *qemu_get_exec_dir(void)
343 {
344     return g_strdup(exec_dir);
345 }
346 
347 static sigjmp_buf sigjump;
348 
349 static void sigbus_handler(int signal)
350 {
351     siglongjmp(sigjump, 1);
352 }
353 
354 static size_t fd_getpagesize(int fd)
355 {
356 #ifdef CONFIG_LINUX
357     struct statfs fs;
358     int ret;
359 
360     if (fd != -1) {
361         do {
362             ret = fstatfs(fd, &fs);
363         } while (ret != 0 && errno == EINTR);
364 
365         if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
366             return fs.f_bsize;
367         }
368     }
369 #endif
370 
371     return getpagesize();
372 }
373 
374 void os_mem_prealloc(int fd, char *area, size_t memory)
375 {
376     int ret;
377     struct sigaction act, oldact;
378     sigset_t set, oldset;
379 
380     memset(&act, 0, sizeof(act));
381     act.sa_handler = &sigbus_handler;
382     act.sa_flags = 0;
383 
384     ret = sigaction(SIGBUS, &act, &oldact);
385     if (ret) {
386         perror("os_mem_prealloc: failed to install signal handler");
387         exit(1);
388     }
389 
390     /* unblock SIGBUS */
391     sigemptyset(&set);
392     sigaddset(&set, SIGBUS);
393     pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
394 
395     if (sigsetjmp(sigjump, 1)) {
396         fprintf(stderr, "os_mem_prealloc: Insufficient free host memory "
397                         "pages available to allocate guest RAM\n");
398         exit(1);
399     } else {
400         int i;
401         size_t hpagesize = fd_getpagesize(fd);
402         size_t numpages = DIV_ROUND_UP(memory, hpagesize);
403 
404         /* MAP_POPULATE silently ignores failures */
405         for (i = 0; i < numpages; i++) {
406             memset(area + (hpagesize * i), 0, 1);
407         }
408 
409         ret = sigaction(SIGBUS, &oldact, NULL);
410         if (ret) {
411             perror("os_mem_prealloc: failed to reinstall signal handler");
412             exit(1);
413         }
414 
415         pthread_sigmask(SIG_SETMASK, &oldset, NULL);
416     }
417 }
418