xref: /openbmc/qemu/util/oslib-posix.c (revision 9dc44aa5)
1baacf047SPaolo Bonzini /*
2baacf047SPaolo Bonzini  * os-posix-lib.c
3baacf047SPaolo Bonzini  *
4baacf047SPaolo Bonzini  * Copyright (c) 2003-2008 Fabrice Bellard
5baacf047SPaolo Bonzini  * Copyright (c) 2010 Red Hat, Inc.
6baacf047SPaolo Bonzini  *
7baacf047SPaolo Bonzini  * QEMU library functions on POSIX which are shared between QEMU and
8baacf047SPaolo Bonzini  * the QEMU tools.
9baacf047SPaolo Bonzini  *
10baacf047SPaolo Bonzini  * Permission is hereby granted, free of charge, to any person obtaining a copy
11baacf047SPaolo Bonzini  * of this software and associated documentation files (the "Software"), to deal
12baacf047SPaolo Bonzini  * in the Software without restriction, including without limitation the rights
13baacf047SPaolo Bonzini  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14baacf047SPaolo Bonzini  * copies of the Software, and to permit persons to whom the Software is
15baacf047SPaolo Bonzini  * furnished to do so, subject to the following conditions:
16baacf047SPaolo Bonzini  *
17baacf047SPaolo Bonzini  * The above copyright notice and this permission notice shall be included in
18baacf047SPaolo Bonzini  * all copies or substantial portions of the Software.
19baacf047SPaolo Bonzini  *
20baacf047SPaolo Bonzini  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21baacf047SPaolo Bonzini  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22baacf047SPaolo Bonzini  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23baacf047SPaolo Bonzini  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24baacf047SPaolo Bonzini  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25baacf047SPaolo Bonzini  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26baacf047SPaolo Bonzini  * THE SOFTWARE.
27baacf047SPaolo Bonzini  */
28baacf047SPaolo Bonzini 
29aafd7584SPeter Maydell #include "qemu/osdep.h"
3013401ba0SStefan Hajnoczi #include <termios.h>
3113401ba0SStefan Hajnoczi 
32e2ea3515SLaszlo Ersek #include <glib/gprintf.h>
33e2ea3515SLaszlo Ersek 
34baacf047SPaolo Bonzini #include "sysemu/sysemu.h"
35baacf047SPaolo Bonzini #include "trace.h"
36da34e65cSMarkus Armbruster #include "qapi/error.h"
37baacf047SPaolo Bonzini #include "qemu/sockets.h"
3810f5bff6SFam Zheng #include <libgen.h>
3938183310SPaolo Bonzini #include <sys/signal.h>
40f348b6d1SVeronia Bahaa #include "qemu/cutils.h"
41baacf047SPaolo Bonzini 
42baacf047SPaolo Bonzini #ifdef CONFIG_LINUX
43baacf047SPaolo Bonzini #include <sys/syscall.h>
44baacf047SPaolo Bonzini #endif
45baacf047SPaolo Bonzini 
4641975b26SAndreas Färber #ifdef __FreeBSD__
4741975b26SAndreas Färber #include <sys/sysctl.h>
48a7764f15SEd Maste #include <sys/user.h>
497dc9ae43SMichal Privoznik #include <libutil.h>
5041975b26SAndreas Färber #endif
5141975b26SAndreas Färber 
52a9c94277SMarkus Armbruster #include "qemu/mmap-alloc.h"
53794e8f30SMichael S. Tsirkin 
547d992e4dSPeter Lieven #ifdef CONFIG_DEBUG_STACK_USAGE
557d992e4dSPeter Lieven #include "qemu/error-report.h"
567d992e4dSPeter Lieven #endif
577d992e4dSPeter Lieven 
581e356fc1SJitendra Kolhe #define MAX_MEM_PREALLOC_THREAD_COUNT (MIN(sysconf(_SC_NPROCESSORS_ONLN), 16))
591e356fc1SJitendra Kolhe 
601e356fc1SJitendra Kolhe struct MemsetThread {
611e356fc1SJitendra Kolhe     char *addr;
621e356fc1SJitendra Kolhe     uint64_t numpages;
631e356fc1SJitendra Kolhe     uint64_t hpagesize;
641e356fc1SJitendra Kolhe     QemuThread pgthread;
651e356fc1SJitendra Kolhe     sigjmp_buf env;
661e356fc1SJitendra Kolhe };
671e356fc1SJitendra Kolhe typedef struct MemsetThread MemsetThread;
681e356fc1SJitendra Kolhe 
691e356fc1SJitendra Kolhe static MemsetThread *memset_thread;
701e356fc1SJitendra Kolhe static int memset_num_threads;
711e356fc1SJitendra Kolhe static bool memset_thread_failed;
721e356fc1SJitendra Kolhe 
73baacf047SPaolo Bonzini int qemu_get_thread_id(void)
74baacf047SPaolo Bonzini {
75baacf047SPaolo Bonzini #if defined(__linux__)
76baacf047SPaolo Bonzini     return syscall(SYS_gettid);
77baacf047SPaolo Bonzini #else
78baacf047SPaolo Bonzini     return getpid();
79baacf047SPaolo Bonzini #endif
80baacf047SPaolo Bonzini }
81baacf047SPaolo Bonzini 
82baacf047SPaolo Bonzini int qemu_daemon(int nochdir, int noclose)
83baacf047SPaolo Bonzini {
84baacf047SPaolo Bonzini     return daemon(nochdir, noclose);
85baacf047SPaolo Bonzini }
86baacf047SPaolo Bonzini 
87baacf047SPaolo Bonzini void *qemu_oom_check(void *ptr)
88baacf047SPaolo Bonzini {
89baacf047SPaolo Bonzini     if (ptr == NULL) {
90baacf047SPaolo Bonzini         fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno));
91baacf047SPaolo Bonzini         abort();
92baacf047SPaolo Bonzini     }
93baacf047SPaolo Bonzini     return ptr;
94baacf047SPaolo Bonzini }
95baacf047SPaolo Bonzini 
967d2a35ccSKevin Wolf void *qemu_try_memalign(size_t alignment, size_t size)
97baacf047SPaolo Bonzini {
98baacf047SPaolo Bonzini     void *ptr;
99e5354657SKevin Wolf 
100e5354657SKevin Wolf     if (alignment < sizeof(void*)) {
101e5354657SKevin Wolf         alignment = sizeof(void*);
102e5354657SKevin Wolf     }
103e5354657SKevin Wolf 
104baacf047SPaolo Bonzini #if defined(_POSIX_C_SOURCE) && !defined(__sun__)
105baacf047SPaolo Bonzini     int ret;
106baacf047SPaolo Bonzini     ret = posix_memalign(&ptr, alignment, size);
107baacf047SPaolo Bonzini     if (ret != 0) {
1087d2a35ccSKevin Wolf         errno = ret;
1097d2a35ccSKevin Wolf         ptr = NULL;
110baacf047SPaolo Bonzini     }
111baacf047SPaolo Bonzini #elif defined(CONFIG_BSD)
1127d2a35ccSKevin Wolf     ptr = valloc(size);
113baacf047SPaolo Bonzini #else
1147d2a35ccSKevin Wolf     ptr = memalign(alignment, size);
115baacf047SPaolo Bonzini #endif
116baacf047SPaolo Bonzini     trace_qemu_memalign(alignment, size, ptr);
117baacf047SPaolo Bonzini     return ptr;
118baacf047SPaolo Bonzini }
119baacf047SPaolo Bonzini 
1207d2a35ccSKevin Wolf void *qemu_memalign(size_t alignment, size_t size)
1217d2a35ccSKevin Wolf {
1227d2a35ccSKevin Wolf     return qemu_oom_check(qemu_try_memalign(alignment, size));
1237d2a35ccSKevin Wolf }
1247d2a35ccSKevin Wolf 
125baacf047SPaolo Bonzini /* alloc shared memory pages */
126a2b257d6SIgor Mammedov void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
127baacf047SPaolo Bonzini {
128baacf047SPaolo Bonzini     size_t align = QEMU_VMALLOC_ALIGN;
129794e8f30SMichael S. Tsirkin     void *ptr = qemu_ram_mmap(-1, size, align, false);
130baacf047SPaolo Bonzini 
1317dda5dc8SPaolo Bonzini     if (ptr == MAP_FAILED) {
13239228250SMarkus Armbruster         return NULL;
133baacf047SPaolo Bonzini     }
134baacf047SPaolo Bonzini 
135a2b257d6SIgor Mammedov     if (alignment) {
136a2b257d6SIgor Mammedov         *alignment = align;
137a2b257d6SIgor Mammedov     }
138c2dfc5baSMichael S. Tsirkin 
1396eebf958SPaolo Bonzini     trace_qemu_anon_ram_alloc(size, ptr);
140baacf047SPaolo Bonzini     return ptr;
141baacf047SPaolo Bonzini }
142baacf047SPaolo Bonzini 
143baacf047SPaolo Bonzini void qemu_vfree(void *ptr)
144baacf047SPaolo Bonzini {
145baacf047SPaolo Bonzini     trace_qemu_vfree(ptr);
146baacf047SPaolo Bonzini     free(ptr);
147baacf047SPaolo Bonzini }
148baacf047SPaolo Bonzini 
149e7a09b92SPaolo Bonzini void qemu_anon_ram_free(void *ptr, size_t size)
150e7a09b92SPaolo Bonzini {
151e7a09b92SPaolo Bonzini     trace_qemu_anon_ram_free(ptr, size);
152794e8f30SMichael S. Tsirkin     qemu_ram_munmap(ptr, size);
153e7a09b92SPaolo Bonzini }
154e7a09b92SPaolo Bonzini 
155f9e8caccSStefan Hajnoczi void qemu_set_block(int fd)
156baacf047SPaolo Bonzini {
157baacf047SPaolo Bonzini     int f;
158baacf047SPaolo Bonzini     f = fcntl(fd, F_GETFL);
159baacf047SPaolo Bonzini     fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
160baacf047SPaolo Bonzini }
161baacf047SPaolo Bonzini 
162f9e8caccSStefan Hajnoczi void qemu_set_nonblock(int fd)
163baacf047SPaolo Bonzini {
164baacf047SPaolo Bonzini     int f;
165baacf047SPaolo Bonzini     f = fcntl(fd, F_GETFL);
166baacf047SPaolo Bonzini     fcntl(fd, F_SETFL, f | O_NONBLOCK);
167baacf047SPaolo Bonzini }
168baacf047SPaolo Bonzini 
169606600a1SSebastian Ottlik int socket_set_fast_reuse(int fd)
170606600a1SSebastian Ottlik {
171606600a1SSebastian Ottlik     int val = 1, ret;
172606600a1SSebastian Ottlik 
173606600a1SSebastian Ottlik     ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
174606600a1SSebastian Ottlik                      (const char *)&val, sizeof(val));
175606600a1SSebastian Ottlik 
176606600a1SSebastian Ottlik     assert(ret == 0);
177606600a1SSebastian Ottlik 
178606600a1SSebastian Ottlik     return ret;
179606600a1SSebastian Ottlik }
180606600a1SSebastian Ottlik 
181baacf047SPaolo Bonzini void qemu_set_cloexec(int fd)
182baacf047SPaolo Bonzini {
183baacf047SPaolo Bonzini     int f;
184baacf047SPaolo Bonzini     f = fcntl(fd, F_GETFD);
185baacf047SPaolo Bonzini     fcntl(fd, F_SETFD, f | FD_CLOEXEC);
186baacf047SPaolo Bonzini }
187baacf047SPaolo Bonzini 
188baacf047SPaolo Bonzini /*
189baacf047SPaolo Bonzini  * Creates a pipe with FD_CLOEXEC set on both file descriptors
190baacf047SPaolo Bonzini  */
191baacf047SPaolo Bonzini int qemu_pipe(int pipefd[2])
192baacf047SPaolo Bonzini {
193baacf047SPaolo Bonzini     int ret;
194baacf047SPaolo Bonzini 
195baacf047SPaolo Bonzini #ifdef CONFIG_PIPE2
196baacf047SPaolo Bonzini     ret = pipe2(pipefd, O_CLOEXEC);
197baacf047SPaolo Bonzini     if (ret != -1 || errno != ENOSYS) {
198baacf047SPaolo Bonzini         return ret;
199baacf047SPaolo Bonzini     }
200baacf047SPaolo Bonzini #endif
201baacf047SPaolo Bonzini     ret = pipe(pipefd);
202baacf047SPaolo Bonzini     if (ret == 0) {
203baacf047SPaolo Bonzini         qemu_set_cloexec(pipefd[0]);
204baacf047SPaolo Bonzini         qemu_set_cloexec(pipefd[1]);
205baacf047SPaolo Bonzini     }
206baacf047SPaolo Bonzini 
207baacf047SPaolo Bonzini     return ret;
208baacf047SPaolo Bonzini }
209baacf047SPaolo Bonzini 
210baacf047SPaolo Bonzini int qemu_utimens(const char *path, const struct timespec *times)
211baacf047SPaolo Bonzini {
212baacf047SPaolo Bonzini     struct timeval tv[2], tv_now;
213baacf047SPaolo Bonzini     struct stat st;
214baacf047SPaolo Bonzini     int i;
215baacf047SPaolo Bonzini #ifdef CONFIG_UTIMENSAT
216baacf047SPaolo Bonzini     int ret;
217baacf047SPaolo Bonzini 
218baacf047SPaolo Bonzini     ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW);
219baacf047SPaolo Bonzini     if (ret != -1 || errno != ENOSYS) {
220baacf047SPaolo Bonzini         return ret;
221baacf047SPaolo Bonzini     }
222baacf047SPaolo Bonzini #endif
223baacf047SPaolo Bonzini     /* Fallback: use utimes() instead of utimensat() */
224baacf047SPaolo Bonzini 
225baacf047SPaolo Bonzini     /* happy if special cases */
226baacf047SPaolo Bonzini     if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) {
227baacf047SPaolo Bonzini         return 0;
228baacf047SPaolo Bonzini     }
229baacf047SPaolo Bonzini     if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) {
230baacf047SPaolo Bonzini         return utimes(path, NULL);
231baacf047SPaolo Bonzini     }
232baacf047SPaolo Bonzini 
233baacf047SPaolo Bonzini     /* prepare for hard cases */
234baacf047SPaolo Bonzini     if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) {
235baacf047SPaolo Bonzini         gettimeofday(&tv_now, NULL);
236baacf047SPaolo Bonzini     }
237baacf047SPaolo Bonzini     if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) {
238baacf047SPaolo Bonzini         stat(path, &st);
239baacf047SPaolo Bonzini     }
240baacf047SPaolo Bonzini 
241baacf047SPaolo Bonzini     for (i = 0; i < 2; i++) {
242baacf047SPaolo Bonzini         if (times[i].tv_nsec == UTIME_NOW) {
243baacf047SPaolo Bonzini             tv[i].tv_sec = tv_now.tv_sec;
244baacf047SPaolo Bonzini             tv[i].tv_usec = tv_now.tv_usec;
245baacf047SPaolo Bonzini         } else if (times[i].tv_nsec == UTIME_OMIT) {
246baacf047SPaolo Bonzini             tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime;
247baacf047SPaolo Bonzini             tv[i].tv_usec = 0;
248baacf047SPaolo Bonzini         } else {
249baacf047SPaolo Bonzini             tv[i].tv_sec = times[i].tv_sec;
250baacf047SPaolo Bonzini             tv[i].tv_usec = times[i].tv_nsec / 1000;
251baacf047SPaolo Bonzini         }
252baacf047SPaolo Bonzini     }
253baacf047SPaolo Bonzini 
254baacf047SPaolo Bonzini     return utimes(path, &tv[0]);
255baacf047SPaolo Bonzini }
256e2ea3515SLaszlo Ersek 
257e2ea3515SLaszlo Ersek char *
258e2ea3515SLaszlo Ersek qemu_get_local_state_pathname(const char *relative_pathname)
259e2ea3515SLaszlo Ersek {
260e2ea3515SLaszlo Ersek     return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR,
261e2ea3515SLaszlo Ersek                            relative_pathname);
262e2ea3515SLaszlo Ersek }
26313401ba0SStefan Hajnoczi 
26413401ba0SStefan Hajnoczi void qemu_set_tty_echo(int fd, bool echo)
26513401ba0SStefan Hajnoczi {
26613401ba0SStefan Hajnoczi     struct termios tty;
26713401ba0SStefan Hajnoczi 
26813401ba0SStefan Hajnoczi     tcgetattr(fd, &tty);
26913401ba0SStefan Hajnoczi 
27013401ba0SStefan Hajnoczi     if (echo) {
27113401ba0SStefan Hajnoczi         tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN;
27213401ba0SStefan Hajnoczi     } else {
27313401ba0SStefan Hajnoczi         tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
27413401ba0SStefan Hajnoczi     }
27513401ba0SStefan Hajnoczi 
27613401ba0SStefan Hajnoczi     tcsetattr(fd, TCSANOW, &tty);
27713401ba0SStefan Hajnoczi }
27810f5bff6SFam Zheng 
27910f5bff6SFam Zheng static char exec_dir[PATH_MAX];
28010f5bff6SFam Zheng 
28110f5bff6SFam Zheng void qemu_init_exec_dir(const char *argv0)
28210f5bff6SFam Zheng {
28310f5bff6SFam Zheng     char *dir;
28410f5bff6SFam Zheng     char *p = NULL;
28510f5bff6SFam Zheng     char buf[PATH_MAX];
28610f5bff6SFam Zheng 
28710f5bff6SFam Zheng     assert(!exec_dir[0]);
28810f5bff6SFam Zheng 
28910f5bff6SFam Zheng #if defined(__linux__)
29010f5bff6SFam Zheng     {
29110f5bff6SFam Zheng         int len;
29210f5bff6SFam Zheng         len = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
29310f5bff6SFam Zheng         if (len > 0) {
29410f5bff6SFam Zheng             buf[len] = 0;
29510f5bff6SFam Zheng             p = buf;
29610f5bff6SFam Zheng         }
29710f5bff6SFam Zheng     }
29810f5bff6SFam Zheng #elif defined(__FreeBSD__)
29910f5bff6SFam Zheng     {
30010f5bff6SFam Zheng         static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
30110f5bff6SFam Zheng         size_t len = sizeof(buf) - 1;
30210f5bff6SFam Zheng 
30310f5bff6SFam Zheng         *buf = '\0';
30410f5bff6SFam Zheng         if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) &&
30510f5bff6SFam Zheng             *buf) {
30610f5bff6SFam Zheng             buf[sizeof(buf) - 1] = '\0';
30710f5bff6SFam Zheng             p = buf;
30810f5bff6SFam Zheng         }
30910f5bff6SFam Zheng     }
31010f5bff6SFam Zheng #endif
31110f5bff6SFam Zheng     /* If we don't have any way of figuring out the actual executable
31210f5bff6SFam Zheng        location then try argv[0].  */
31310f5bff6SFam Zheng     if (!p) {
31410f5bff6SFam Zheng         if (!argv0) {
31510f5bff6SFam Zheng             return;
31610f5bff6SFam Zheng         }
31710f5bff6SFam Zheng         p = realpath(argv0, buf);
31810f5bff6SFam Zheng         if (!p) {
31910f5bff6SFam Zheng             return;
32010f5bff6SFam Zheng         }
32110f5bff6SFam Zheng     }
32255ad781cSWei Jiangang     dir = g_path_get_dirname(p);
32310f5bff6SFam Zheng 
32410f5bff6SFam Zheng     pstrcpy(exec_dir, sizeof(exec_dir), dir);
32555ad781cSWei Jiangang 
32655ad781cSWei Jiangang     g_free(dir);
32710f5bff6SFam Zheng }
32810f5bff6SFam Zheng 
32910f5bff6SFam Zheng char *qemu_get_exec_dir(void)
33010f5bff6SFam Zheng {
33110f5bff6SFam Zheng     return g_strdup(exec_dir);
33210f5bff6SFam Zheng }
33338183310SPaolo Bonzini 
33438183310SPaolo Bonzini static void sigbus_handler(int signal)
33538183310SPaolo Bonzini {
3361e356fc1SJitendra Kolhe     int i;
3371e356fc1SJitendra Kolhe     if (memset_thread) {
3381e356fc1SJitendra Kolhe         for (i = 0; i < memset_num_threads; i++) {
3391e356fc1SJitendra Kolhe             if (qemu_thread_is_self(&memset_thread[i].pgthread)) {
3401e356fc1SJitendra Kolhe                 siglongjmp(memset_thread[i].env, 1);
3411e356fc1SJitendra Kolhe             }
3421e356fc1SJitendra Kolhe         }
3431e356fc1SJitendra Kolhe     }
34438183310SPaolo Bonzini }
34538183310SPaolo Bonzini 
3461e356fc1SJitendra Kolhe static void *do_touch_pages(void *arg)
3471e356fc1SJitendra Kolhe {
3481e356fc1SJitendra Kolhe     MemsetThread *memset_args = (MemsetThread *)arg;
3491e356fc1SJitendra Kolhe     char *addr = memset_args->addr;
3501e356fc1SJitendra Kolhe     uint64_t numpages = memset_args->numpages;
3511e356fc1SJitendra Kolhe     uint64_t hpagesize = memset_args->hpagesize;
3521e356fc1SJitendra Kolhe     sigset_t set, oldset;
3531e356fc1SJitendra Kolhe     int i = 0;
3541e356fc1SJitendra Kolhe 
3551e356fc1SJitendra Kolhe     /* unblock SIGBUS */
3561e356fc1SJitendra Kolhe     sigemptyset(&set);
3571e356fc1SJitendra Kolhe     sigaddset(&set, SIGBUS);
3581e356fc1SJitendra Kolhe     pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
3591e356fc1SJitendra Kolhe 
3601e356fc1SJitendra Kolhe     if (sigsetjmp(memset_args->env, 1)) {
3611e356fc1SJitendra Kolhe         memset_thread_failed = true;
3621e356fc1SJitendra Kolhe     } else {
3631e356fc1SJitendra Kolhe         for (i = 0; i < numpages; i++) {
364*9dc44aa5SDaniel P. Berrange             /*
365*9dc44aa5SDaniel P. Berrange              * Read & write back the same value, so we don't
366*9dc44aa5SDaniel P. Berrange              * corrupt existing user/app data that might be
367*9dc44aa5SDaniel P. Berrange              * stored.
368*9dc44aa5SDaniel P. Berrange              *
369*9dc44aa5SDaniel P. Berrange              * 'volatile' to stop compiler optimizing this away
370*9dc44aa5SDaniel P. Berrange              * to a no-op
371*9dc44aa5SDaniel P. Berrange              *
372*9dc44aa5SDaniel P. Berrange              * TODO: get a better solution from kernel so we
373*9dc44aa5SDaniel P. Berrange              * don't need to write at all so we don't cause
374*9dc44aa5SDaniel P. Berrange              * wear on the storage backing the region...
375*9dc44aa5SDaniel P. Berrange              */
376*9dc44aa5SDaniel P. Berrange             *(volatile char *)addr = *addr;
3771e356fc1SJitendra Kolhe             addr += hpagesize;
3781e356fc1SJitendra Kolhe         }
3791e356fc1SJitendra Kolhe     }
3801e356fc1SJitendra Kolhe     pthread_sigmask(SIG_SETMASK, &oldset, NULL);
3811e356fc1SJitendra Kolhe     return NULL;
3821e356fc1SJitendra Kolhe }
3831e356fc1SJitendra Kolhe 
3841e356fc1SJitendra Kolhe static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
3851e356fc1SJitendra Kolhe                             int smp_cpus)
3861e356fc1SJitendra Kolhe {
3871e356fc1SJitendra Kolhe     uint64_t numpages_per_thread, size_per_thread;
3881e356fc1SJitendra Kolhe     char *addr = area;
3891e356fc1SJitendra Kolhe     int i = 0;
3901e356fc1SJitendra Kolhe 
3911e356fc1SJitendra Kolhe     memset_thread_failed = false;
3921e356fc1SJitendra Kolhe     memset_num_threads = MIN(smp_cpus, MAX_MEM_PREALLOC_THREAD_COUNT);
3931e356fc1SJitendra Kolhe     memset_thread = g_new0(MemsetThread, memset_num_threads);
3941e356fc1SJitendra Kolhe     numpages_per_thread = (numpages / memset_num_threads);
3951e356fc1SJitendra Kolhe     size_per_thread = (hpagesize * numpages_per_thread);
3961e356fc1SJitendra Kolhe     for (i = 0; i < memset_num_threads; i++) {
3971e356fc1SJitendra Kolhe         memset_thread[i].addr = addr;
3981e356fc1SJitendra Kolhe         memset_thread[i].numpages = (i == (memset_num_threads - 1)) ?
3991e356fc1SJitendra Kolhe                                     numpages : numpages_per_thread;
4001e356fc1SJitendra Kolhe         memset_thread[i].hpagesize = hpagesize;
4011e356fc1SJitendra Kolhe         qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
4021e356fc1SJitendra Kolhe                            do_touch_pages, &memset_thread[i],
4031e356fc1SJitendra Kolhe                            QEMU_THREAD_JOINABLE);
4041e356fc1SJitendra Kolhe         addr += size_per_thread;
4051e356fc1SJitendra Kolhe         numpages -= numpages_per_thread;
4061e356fc1SJitendra Kolhe     }
4071e356fc1SJitendra Kolhe     for (i = 0; i < memset_num_threads; i++) {
4081e356fc1SJitendra Kolhe         qemu_thread_join(&memset_thread[i].pgthread);
4091e356fc1SJitendra Kolhe     }
4101e356fc1SJitendra Kolhe     g_free(memset_thread);
4111e356fc1SJitendra Kolhe     memset_thread = NULL;
4121e356fc1SJitendra Kolhe 
4131e356fc1SJitendra Kolhe     return memset_thread_failed;
4141e356fc1SJitendra Kolhe }
4151e356fc1SJitendra Kolhe 
4161e356fc1SJitendra Kolhe void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
4171e356fc1SJitendra Kolhe                      Error **errp)
41838183310SPaolo Bonzini {
419b7bf8f56SStefan Weil     int ret;
42038183310SPaolo Bonzini     struct sigaction act, oldact;
4211e356fc1SJitendra Kolhe     size_t hpagesize = qemu_fd_getpagesize(fd);
4221e356fc1SJitendra Kolhe     size_t numpages = DIV_ROUND_UP(memory, hpagesize);
42338183310SPaolo Bonzini 
42438183310SPaolo Bonzini     memset(&act, 0, sizeof(act));
42538183310SPaolo Bonzini     act.sa_handler = &sigbus_handler;
42638183310SPaolo Bonzini     act.sa_flags = 0;
42738183310SPaolo Bonzini 
42838183310SPaolo Bonzini     ret = sigaction(SIGBUS, &act, &oldact);
42938183310SPaolo Bonzini     if (ret) {
430056b68afSIgor Mammedov         error_setg_errno(errp, errno,
431056b68afSIgor Mammedov             "os_mem_prealloc: failed to install signal handler");
432056b68afSIgor Mammedov         return;
43338183310SPaolo Bonzini     }
43438183310SPaolo Bonzini 
4351e356fc1SJitendra Kolhe     /* touch pages simultaneously */
4361e356fc1SJitendra Kolhe     if (touch_all_pages(area, hpagesize, numpages, smp_cpus)) {
437056b68afSIgor Mammedov         error_setg(errp, "os_mem_prealloc: Insufficient free host memory "
438404ac83eSMichal Privoznik             "pages available to allocate guest RAM\n");
439056b68afSIgor Mammedov     }
44038183310SPaolo Bonzini 
44138183310SPaolo Bonzini     ret = sigaction(SIGBUS, &oldact, NULL);
44238183310SPaolo Bonzini     if (ret) {
443056b68afSIgor Mammedov         /* Terminate QEMU since it can't recover from error */
44438183310SPaolo Bonzini         perror("os_mem_prealloc: failed to reinstall signal handler");
44538183310SPaolo Bonzini         exit(1);
44638183310SPaolo Bonzini     }
44738183310SPaolo Bonzini }
448d57e4e48SDaniel P. Berrange 
449d57e4e48SDaniel P. Berrange 
450d57e4e48SDaniel P. Berrange static struct termios oldtty;
451d57e4e48SDaniel P. Berrange 
452d57e4e48SDaniel P. Berrange static void term_exit(void)
453d57e4e48SDaniel P. Berrange {
454d57e4e48SDaniel P. Berrange     tcsetattr(0, TCSANOW, &oldtty);
455d57e4e48SDaniel P. Berrange }
456d57e4e48SDaniel P. Berrange 
457d57e4e48SDaniel P. Berrange static void term_init(void)
458d57e4e48SDaniel P. Berrange {
459d57e4e48SDaniel P. Berrange     struct termios tty;
460d57e4e48SDaniel P. Berrange 
461d57e4e48SDaniel P. Berrange     tcgetattr(0, &tty);
462d57e4e48SDaniel P. Berrange     oldtty = tty;
463d57e4e48SDaniel P. Berrange 
464d57e4e48SDaniel P. Berrange     tty.c_iflag &= ~(IGNBRK|BRKINT|PARMRK|ISTRIP
465d57e4e48SDaniel P. Berrange                           |INLCR|IGNCR|ICRNL|IXON);
466d57e4e48SDaniel P. Berrange     tty.c_oflag |= OPOST;
467d57e4e48SDaniel P. Berrange     tty.c_lflag &= ~(ECHO|ECHONL|ICANON|IEXTEN);
468d57e4e48SDaniel P. Berrange     tty.c_cflag &= ~(CSIZE|PARENB);
469d57e4e48SDaniel P. Berrange     tty.c_cflag |= CS8;
470d57e4e48SDaniel P. Berrange     tty.c_cc[VMIN] = 1;
471d57e4e48SDaniel P. Berrange     tty.c_cc[VTIME] = 0;
472d57e4e48SDaniel P. Berrange 
473d57e4e48SDaniel P. Berrange     tcsetattr(0, TCSANOW, &tty);
474d57e4e48SDaniel P. Berrange 
475d57e4e48SDaniel P. Berrange     atexit(term_exit);
476d57e4e48SDaniel P. Berrange }
477d57e4e48SDaniel P. Berrange 
478d57e4e48SDaniel P. Berrange int qemu_read_password(char *buf, int buf_size)
479d57e4e48SDaniel P. Berrange {
480d57e4e48SDaniel P. Berrange     uint8_t ch;
481d57e4e48SDaniel P. Berrange     int i, ret;
482d57e4e48SDaniel P. Berrange 
483d57e4e48SDaniel P. Berrange     printf("password: ");
484d57e4e48SDaniel P. Berrange     fflush(stdout);
485d57e4e48SDaniel P. Berrange     term_init();
486d57e4e48SDaniel P. Berrange     i = 0;
487d57e4e48SDaniel P. Berrange     for (;;) {
488d57e4e48SDaniel P. Berrange         ret = read(0, &ch, 1);
489d57e4e48SDaniel P. Berrange         if (ret == -1) {
490d57e4e48SDaniel P. Berrange             if (errno == EAGAIN || errno == EINTR) {
491d57e4e48SDaniel P. Berrange                 continue;
492d57e4e48SDaniel P. Berrange             } else {
493d57e4e48SDaniel P. Berrange                 break;
494d57e4e48SDaniel P. Berrange             }
495d57e4e48SDaniel P. Berrange         } else if (ret == 0) {
496d57e4e48SDaniel P. Berrange             ret = -1;
497d57e4e48SDaniel P. Berrange             break;
498d57e4e48SDaniel P. Berrange         } else {
4996a11d518SDaniel P. Berrange             if (ch == '\r' ||
5006a11d518SDaniel P. Berrange                 ch == '\n') {
501d57e4e48SDaniel P. Berrange                 ret = 0;
502d57e4e48SDaniel P. Berrange                 break;
503d57e4e48SDaniel P. Berrange             }
504d57e4e48SDaniel P. Berrange             if (i < (buf_size - 1)) {
505d57e4e48SDaniel P. Berrange                 buf[i++] = ch;
506d57e4e48SDaniel P. Berrange             }
507d57e4e48SDaniel P. Berrange         }
508d57e4e48SDaniel P. Berrange     }
509d57e4e48SDaniel P. Berrange     term_exit();
510d57e4e48SDaniel P. Berrange     buf[i] = '\0';
511d57e4e48SDaniel P. Berrange     printf("\n");
512d57e4e48SDaniel P. Berrange     return ret;
513d57e4e48SDaniel P. Berrange }
51457cb38b3SDaniel P. Berrange 
51557cb38b3SDaniel P. Berrange 
5167dc9ae43SMichal Privoznik char *qemu_get_pid_name(pid_t pid)
5177dc9ae43SMichal Privoznik {
5187dc9ae43SMichal Privoznik     char *name = NULL;
5197dc9ae43SMichal Privoznik 
5207dc9ae43SMichal Privoznik #if defined(__FreeBSD__)
5217dc9ae43SMichal Privoznik     /* BSDs don't have /proc, but they provide a nice substitute */
5227dc9ae43SMichal Privoznik     struct kinfo_proc *proc = kinfo_getproc(pid);
5237dc9ae43SMichal Privoznik 
5247dc9ae43SMichal Privoznik     if (proc) {
5257dc9ae43SMichal Privoznik         name = g_strdup(proc->ki_comm);
5267dc9ae43SMichal Privoznik         free(proc);
5277dc9ae43SMichal Privoznik     }
5287dc9ae43SMichal Privoznik #else
5297dc9ae43SMichal Privoznik     /* Assume a system with reasonable procfs */
5307dc9ae43SMichal Privoznik     char *pid_path;
5317dc9ae43SMichal Privoznik     size_t len;
5327dc9ae43SMichal Privoznik 
5337dc9ae43SMichal Privoznik     pid_path = g_strdup_printf("/proc/%d/cmdline", pid);
5347dc9ae43SMichal Privoznik     g_file_get_contents(pid_path, &name, &len, NULL);
5357dc9ae43SMichal Privoznik     g_free(pid_path);
5367dc9ae43SMichal Privoznik #endif
5377dc9ae43SMichal Privoznik 
5387dc9ae43SMichal Privoznik     return name;
5397dc9ae43SMichal Privoznik }
5407dc9ae43SMichal Privoznik 
5417dc9ae43SMichal Privoznik 
54257cb38b3SDaniel P. Berrange pid_t qemu_fork(Error **errp)
54357cb38b3SDaniel P. Berrange {
54457cb38b3SDaniel P. Berrange     sigset_t oldmask, newmask;
54557cb38b3SDaniel P. Berrange     struct sigaction sig_action;
54657cb38b3SDaniel P. Berrange     int saved_errno;
54757cb38b3SDaniel P. Berrange     pid_t pid;
54857cb38b3SDaniel P. Berrange 
54957cb38b3SDaniel P. Berrange     /*
55057cb38b3SDaniel P. Berrange      * Need to block signals now, so that child process can safely
55157cb38b3SDaniel P. Berrange      * kill off caller's signal handlers without a race.
55257cb38b3SDaniel P. Berrange      */
55357cb38b3SDaniel P. Berrange     sigfillset(&newmask);
55457cb38b3SDaniel P. Berrange     if (pthread_sigmask(SIG_SETMASK, &newmask, &oldmask) != 0) {
55557cb38b3SDaniel P. Berrange         error_setg_errno(errp, errno,
55657cb38b3SDaniel P. Berrange                          "cannot block signals");
55757cb38b3SDaniel P. Berrange         return -1;
55857cb38b3SDaniel P. Berrange     }
55957cb38b3SDaniel P. Berrange 
56057cb38b3SDaniel P. Berrange     pid = fork();
56157cb38b3SDaniel P. Berrange     saved_errno = errno;
56257cb38b3SDaniel P. Berrange 
56357cb38b3SDaniel P. Berrange     if (pid < 0) {
56457cb38b3SDaniel P. Berrange         /* attempt to restore signal mask, but ignore failure, to
56557cb38b3SDaniel P. Berrange          * avoid obscuring the fork failure */
56657cb38b3SDaniel P. Berrange         (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
56757cb38b3SDaniel P. Berrange         error_setg_errno(errp, saved_errno,
56857cb38b3SDaniel P. Berrange                          "cannot fork child process");
56957cb38b3SDaniel P. Berrange         errno = saved_errno;
57057cb38b3SDaniel P. Berrange         return -1;
57157cb38b3SDaniel P. Berrange     } else if (pid) {
57257cb38b3SDaniel P. Berrange         /* parent process */
57357cb38b3SDaniel P. Berrange 
57457cb38b3SDaniel P. Berrange         /* Restore our original signal mask now that the child is
57557cb38b3SDaniel P. Berrange          * safely running. Only documented failures are EFAULT (not
57657cb38b3SDaniel P. Berrange          * possible, since we are using just-grabbed mask) or EINVAL
57757cb38b3SDaniel P. Berrange          * (not possible, since we are using correct arguments).  */
57857cb38b3SDaniel P. Berrange         (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
57957cb38b3SDaniel P. Berrange     } else {
58057cb38b3SDaniel P. Berrange         /* child process */
58157cb38b3SDaniel P. Berrange         size_t i;
58257cb38b3SDaniel P. Berrange 
58357cb38b3SDaniel P. Berrange         /* Clear out all signal handlers from parent so nothing
58457cb38b3SDaniel P. Berrange          * unexpected can happen in our child once we unblock
58557cb38b3SDaniel P. Berrange          * signals */
58657cb38b3SDaniel P. Berrange         sig_action.sa_handler = SIG_DFL;
58757cb38b3SDaniel P. Berrange         sig_action.sa_flags = 0;
58857cb38b3SDaniel P. Berrange         sigemptyset(&sig_action.sa_mask);
58957cb38b3SDaniel P. Berrange 
59057cb38b3SDaniel P. Berrange         for (i = 1; i < NSIG; i++) {
59157cb38b3SDaniel P. Berrange             /* Only possible errors are EFAULT or EINVAL The former
59257cb38b3SDaniel P. Berrange              * won't happen, the latter we expect, so no need to check
59357cb38b3SDaniel P. Berrange              * return value */
59457cb38b3SDaniel P. Berrange             (void)sigaction(i, &sig_action, NULL);
59557cb38b3SDaniel P. Berrange         }
59657cb38b3SDaniel P. Berrange 
59757cb38b3SDaniel P. Berrange         /* Unmask all signals in child, since we've no idea what the
59857cb38b3SDaniel P. Berrange          * caller's done with their signal mask and don't want to
59957cb38b3SDaniel P. Berrange          * propagate that to children */
60057cb38b3SDaniel P. Berrange         sigemptyset(&newmask);
60157cb38b3SDaniel P. Berrange         if (pthread_sigmask(SIG_SETMASK, &newmask, NULL) != 0) {
60257cb38b3SDaniel P. Berrange             Error *local_err = NULL;
60357cb38b3SDaniel P. Berrange             error_setg_errno(&local_err, errno,
60457cb38b3SDaniel P. Berrange                              "cannot unblock signals");
60557cb38b3SDaniel P. Berrange             error_report_err(local_err);
60657cb38b3SDaniel P. Berrange             _exit(1);
60757cb38b3SDaniel P. Berrange         }
60857cb38b3SDaniel P. Berrange     }
60957cb38b3SDaniel P. Berrange     return pid;
61057cb38b3SDaniel P. Berrange }
6118737d9e0SPeter Lieven 
6128737d9e0SPeter Lieven void *qemu_alloc_stack(size_t *sz)
6138737d9e0SPeter Lieven {
6148737d9e0SPeter Lieven     void *ptr, *guardpage;
6157d992e4dSPeter Lieven #ifdef CONFIG_DEBUG_STACK_USAGE
6167d992e4dSPeter Lieven     void *ptr2;
6177d992e4dSPeter Lieven #endif
6188737d9e0SPeter Lieven     size_t pagesz = getpagesize();
6198737d9e0SPeter Lieven #ifdef _SC_THREAD_STACK_MIN
6208737d9e0SPeter Lieven     /* avoid stacks smaller than _SC_THREAD_STACK_MIN */
6218737d9e0SPeter Lieven     long min_stack_sz = sysconf(_SC_THREAD_STACK_MIN);
6228737d9e0SPeter Lieven     *sz = MAX(MAX(min_stack_sz, 0), *sz);
6238737d9e0SPeter Lieven #endif
6248737d9e0SPeter Lieven     /* adjust stack size to a multiple of the page size */
6258737d9e0SPeter Lieven     *sz = ROUND_UP(*sz, pagesz);
6268737d9e0SPeter Lieven     /* allocate one extra page for the guard page */
6278737d9e0SPeter Lieven     *sz += pagesz;
6288737d9e0SPeter Lieven 
6298737d9e0SPeter Lieven     ptr = mmap(NULL, *sz, PROT_READ | PROT_WRITE,
6308737d9e0SPeter Lieven                MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
6318737d9e0SPeter Lieven     if (ptr == MAP_FAILED) {
6328737d9e0SPeter Lieven         abort();
6338737d9e0SPeter Lieven     }
6348737d9e0SPeter Lieven 
6358737d9e0SPeter Lieven #if defined(HOST_IA64)
6368737d9e0SPeter Lieven     /* separate register stack */
6378737d9e0SPeter Lieven     guardpage = ptr + (((*sz - pagesz) / 2) & ~pagesz);
6388737d9e0SPeter Lieven #elif defined(HOST_HPPA)
6398737d9e0SPeter Lieven     /* stack grows up */
6408737d9e0SPeter Lieven     guardpage = ptr + *sz - pagesz;
6418737d9e0SPeter Lieven #else
6428737d9e0SPeter Lieven     /* stack grows down */
6438737d9e0SPeter Lieven     guardpage = ptr;
6448737d9e0SPeter Lieven #endif
6458737d9e0SPeter Lieven     if (mprotect(guardpage, pagesz, PROT_NONE) != 0) {
6468737d9e0SPeter Lieven         abort();
6478737d9e0SPeter Lieven     }
6488737d9e0SPeter Lieven 
6497d992e4dSPeter Lieven #ifdef CONFIG_DEBUG_STACK_USAGE
6507d992e4dSPeter Lieven     for (ptr2 = ptr + pagesz; ptr2 < ptr + *sz; ptr2 += sizeof(uint32_t)) {
6517d992e4dSPeter Lieven         *(uint32_t *)ptr2 = 0xdeadbeaf;
6527d992e4dSPeter Lieven     }
6537d992e4dSPeter Lieven #endif
6547d992e4dSPeter Lieven 
6558737d9e0SPeter Lieven     return ptr;
6568737d9e0SPeter Lieven }
6578737d9e0SPeter Lieven 
6587d992e4dSPeter Lieven #ifdef CONFIG_DEBUG_STACK_USAGE
6597d992e4dSPeter Lieven static __thread unsigned int max_stack_usage;
6607d992e4dSPeter Lieven #endif
6617d992e4dSPeter Lieven 
6628737d9e0SPeter Lieven void qemu_free_stack(void *stack, size_t sz)
6638737d9e0SPeter Lieven {
6647d992e4dSPeter Lieven #ifdef CONFIG_DEBUG_STACK_USAGE
6657d992e4dSPeter Lieven     unsigned int usage;
6667d992e4dSPeter Lieven     void *ptr;
6677d992e4dSPeter Lieven 
6687d992e4dSPeter Lieven     for (ptr = stack + getpagesize(); ptr < stack + sz;
6697d992e4dSPeter Lieven          ptr += sizeof(uint32_t)) {
6707d992e4dSPeter Lieven         if (*(uint32_t *)ptr != 0xdeadbeaf) {
6717d992e4dSPeter Lieven             break;
6727d992e4dSPeter Lieven         }
6737d992e4dSPeter Lieven     }
6747d992e4dSPeter Lieven     usage = sz - (uintptr_t) (ptr - stack);
6757d992e4dSPeter Lieven     if (usage > max_stack_usage) {
6767d992e4dSPeter Lieven         error_report("thread %d max stack usage increased from %u to %u",
6777d992e4dSPeter Lieven                      qemu_get_thread_id(), max_stack_usage, usage);
6787d992e4dSPeter Lieven         max_stack_usage = usage;
6797d992e4dSPeter Lieven     }
6807d992e4dSPeter Lieven #endif
6817d992e4dSPeter Lieven 
6828737d9e0SPeter Lieven     munmap(stack, sz);
6838737d9e0SPeter Lieven }
684d98d4072SPaolo Bonzini 
685d98d4072SPaolo Bonzini void sigaction_invoke(struct sigaction *action,
686d98d4072SPaolo Bonzini                       struct qemu_signalfd_siginfo *info)
687d98d4072SPaolo Bonzini {
688d98d4072SPaolo Bonzini     siginfo_t si = { 0 };
689d98d4072SPaolo Bonzini     si.si_signo = info->ssi_signo;
690d98d4072SPaolo Bonzini     si.si_errno = info->ssi_errno;
691d98d4072SPaolo Bonzini     si.si_code = info->ssi_code;
692d98d4072SPaolo Bonzini 
693d98d4072SPaolo Bonzini     /* Convert the minimal set of fields defined by POSIX.
694d98d4072SPaolo Bonzini      * Positive si_code values are reserved for kernel-generated
695d98d4072SPaolo Bonzini      * signals, where the valid siginfo fields are determined by
696d98d4072SPaolo Bonzini      * the signal number.  But according to POSIX, it is unspecified
697d98d4072SPaolo Bonzini      * whether SI_USER and SI_QUEUE have values less than or equal to
698d98d4072SPaolo Bonzini      * zero.
699d98d4072SPaolo Bonzini      */
700d98d4072SPaolo Bonzini     if (info->ssi_code == SI_USER || info->ssi_code == SI_QUEUE ||
701d98d4072SPaolo Bonzini         info->ssi_code <= 0) {
702d98d4072SPaolo Bonzini         /* SIGTERM, etc.  */
703d98d4072SPaolo Bonzini         si.si_pid = info->ssi_pid;
704d98d4072SPaolo Bonzini         si.si_uid = info->ssi_uid;
705d98d4072SPaolo Bonzini     } else if (info->ssi_signo == SIGILL || info->ssi_signo == SIGFPE ||
706d98d4072SPaolo Bonzini                info->ssi_signo == SIGSEGV || info->ssi_signo == SIGBUS) {
707d98d4072SPaolo Bonzini         si.si_addr = (void *)(uintptr_t)info->ssi_addr;
708d98d4072SPaolo Bonzini     } else if (info->ssi_signo == SIGCHLD) {
709d98d4072SPaolo Bonzini         si.si_pid = info->ssi_pid;
710d98d4072SPaolo Bonzini         si.si_status = info->ssi_status;
711d98d4072SPaolo Bonzini         si.si_uid = info->ssi_uid;
712d98d4072SPaolo Bonzini     } else if (info->ssi_signo == SIGIO) {
713d98d4072SPaolo Bonzini         si.si_band = info->ssi_band;
714d98d4072SPaolo Bonzini     }
715d98d4072SPaolo Bonzini     action->sa_sigaction(info->ssi_signo, &si, NULL);
716d98d4072SPaolo Bonzini }
717