1baacf047SPaolo Bonzini /* 2baacf047SPaolo Bonzini * os-posix-lib.c 3baacf047SPaolo Bonzini * 4baacf047SPaolo Bonzini * Copyright (c) 2003-2008 Fabrice Bellard 5baacf047SPaolo Bonzini * Copyright (c) 2010 Red Hat, Inc. 6baacf047SPaolo Bonzini * 7baacf047SPaolo Bonzini * QEMU library functions on POSIX which are shared between QEMU and 8baacf047SPaolo Bonzini * the QEMU tools. 9baacf047SPaolo Bonzini * 10baacf047SPaolo Bonzini * Permission is hereby granted, free of charge, to any person obtaining a copy 11baacf047SPaolo Bonzini * of this software and associated documentation files (the "Software"), to deal 12baacf047SPaolo Bonzini * in the Software without restriction, including without limitation the rights 13baacf047SPaolo Bonzini * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14baacf047SPaolo Bonzini * copies of the Software, and to permit persons to whom the Software is 15baacf047SPaolo Bonzini * furnished to do so, subject to the following conditions: 16baacf047SPaolo Bonzini * 17baacf047SPaolo Bonzini * The above copyright notice and this permission notice shall be included in 18baacf047SPaolo Bonzini * all copies or substantial portions of the Software. 19baacf047SPaolo Bonzini * 20baacf047SPaolo Bonzini * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21baacf047SPaolo Bonzini * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22baacf047SPaolo Bonzini * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 23baacf047SPaolo Bonzini * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24baacf047SPaolo Bonzini * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25baacf047SPaolo Bonzini * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26baacf047SPaolo Bonzini * THE SOFTWARE. 27baacf047SPaolo Bonzini */ 28baacf047SPaolo Bonzini 29aafd7584SPeter Maydell #include "qemu/osdep.h" 3013401ba0SStefan Hajnoczi #include <termios.h> 3113401ba0SStefan Hajnoczi 32e2ea3515SLaszlo Ersek #include <glib/gprintf.h> 33e2ea3515SLaszlo Ersek 34baacf047SPaolo Bonzini #include "sysemu/sysemu.h" 35baacf047SPaolo Bonzini #include "trace.h" 36da34e65cSMarkus Armbruster #include "qapi/error.h" 37baacf047SPaolo Bonzini #include "qemu/sockets.h" 3810f5bff6SFam Zheng #include <libgen.h> 3938183310SPaolo Bonzini #include <sys/signal.h> 40f348b6d1SVeronia Bahaa #include "qemu/cutils.h" 41baacf047SPaolo Bonzini 42baacf047SPaolo Bonzini #ifdef CONFIG_LINUX 43baacf047SPaolo Bonzini #include <sys/syscall.h> 44baacf047SPaolo Bonzini #endif 45baacf047SPaolo Bonzini 4641975b26SAndreas Färber #ifdef __FreeBSD__ 4741975b26SAndreas Färber #include <sys/sysctl.h> 48a7764f15SEd Maste #include <sys/user.h> 497dc9ae43SMichal Privoznik #include <libutil.h> 5041975b26SAndreas Färber #endif 5141975b26SAndreas Färber 52a9c94277SMarkus Armbruster #include "qemu/mmap-alloc.h" 53794e8f30SMichael S. Tsirkin 547d992e4dSPeter Lieven #ifdef CONFIG_DEBUG_STACK_USAGE 557d992e4dSPeter Lieven #include "qemu/error-report.h" 567d992e4dSPeter Lieven #endif 577d992e4dSPeter Lieven 581e356fc1SJitendra Kolhe #define MAX_MEM_PREALLOC_THREAD_COUNT (MIN(sysconf(_SC_NPROCESSORS_ONLN), 16)) 591e356fc1SJitendra Kolhe 601e356fc1SJitendra Kolhe struct MemsetThread { 611e356fc1SJitendra Kolhe char *addr; 621e356fc1SJitendra Kolhe uint64_t numpages; 631e356fc1SJitendra Kolhe uint64_t hpagesize; 641e356fc1SJitendra Kolhe QemuThread pgthread; 651e356fc1SJitendra Kolhe sigjmp_buf env; 661e356fc1SJitendra Kolhe }; 671e356fc1SJitendra Kolhe typedef struct MemsetThread MemsetThread; 681e356fc1SJitendra Kolhe 691e356fc1SJitendra Kolhe static MemsetThread *memset_thread; 701e356fc1SJitendra Kolhe static int memset_num_threads; 711e356fc1SJitendra Kolhe static bool memset_thread_failed; 721e356fc1SJitendra Kolhe 73baacf047SPaolo Bonzini int qemu_get_thread_id(void) 74baacf047SPaolo Bonzini { 75baacf047SPaolo Bonzini #if defined(__linux__) 76baacf047SPaolo Bonzini return syscall(SYS_gettid); 77baacf047SPaolo Bonzini #else 78baacf047SPaolo Bonzini return getpid(); 79baacf047SPaolo Bonzini #endif 80baacf047SPaolo Bonzini } 81baacf047SPaolo Bonzini 82baacf047SPaolo Bonzini int qemu_daemon(int nochdir, int noclose) 83baacf047SPaolo Bonzini { 84baacf047SPaolo Bonzini return daemon(nochdir, noclose); 85baacf047SPaolo Bonzini } 86baacf047SPaolo Bonzini 87baacf047SPaolo Bonzini void *qemu_oom_check(void *ptr) 88baacf047SPaolo Bonzini { 89baacf047SPaolo Bonzini if (ptr == NULL) { 90baacf047SPaolo Bonzini fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno)); 91baacf047SPaolo Bonzini abort(); 92baacf047SPaolo Bonzini } 93baacf047SPaolo Bonzini return ptr; 94baacf047SPaolo Bonzini } 95baacf047SPaolo Bonzini 967d2a35ccSKevin Wolf void *qemu_try_memalign(size_t alignment, size_t size) 97baacf047SPaolo Bonzini { 98baacf047SPaolo Bonzini void *ptr; 99e5354657SKevin Wolf 100e5354657SKevin Wolf if (alignment < sizeof(void*)) { 101e5354657SKevin Wolf alignment = sizeof(void*); 102e5354657SKevin Wolf } 103e5354657SKevin Wolf 104baacf047SPaolo Bonzini #if defined(_POSIX_C_SOURCE) && !defined(__sun__) 105baacf047SPaolo Bonzini int ret; 106baacf047SPaolo Bonzini ret = posix_memalign(&ptr, alignment, size); 107baacf047SPaolo Bonzini if (ret != 0) { 1087d2a35ccSKevin Wolf errno = ret; 1097d2a35ccSKevin Wolf ptr = NULL; 110baacf047SPaolo Bonzini } 111baacf047SPaolo Bonzini #elif defined(CONFIG_BSD) 1127d2a35ccSKevin Wolf ptr = valloc(size); 113baacf047SPaolo Bonzini #else 1147d2a35ccSKevin Wolf ptr = memalign(alignment, size); 115baacf047SPaolo Bonzini #endif 116baacf047SPaolo Bonzini trace_qemu_memalign(alignment, size, ptr); 117baacf047SPaolo Bonzini return ptr; 118baacf047SPaolo Bonzini } 119baacf047SPaolo Bonzini 1207d2a35ccSKevin Wolf void *qemu_memalign(size_t alignment, size_t size) 1217d2a35ccSKevin Wolf { 1227d2a35ccSKevin Wolf return qemu_oom_check(qemu_try_memalign(alignment, size)); 1237d2a35ccSKevin Wolf } 1247d2a35ccSKevin Wolf 125baacf047SPaolo Bonzini /* alloc shared memory pages */ 126a2b257d6SIgor Mammedov void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment) 127baacf047SPaolo Bonzini { 128baacf047SPaolo Bonzini size_t align = QEMU_VMALLOC_ALIGN; 129794e8f30SMichael S. Tsirkin void *ptr = qemu_ram_mmap(-1, size, align, false); 130baacf047SPaolo Bonzini 1317dda5dc8SPaolo Bonzini if (ptr == MAP_FAILED) { 13239228250SMarkus Armbruster return NULL; 133baacf047SPaolo Bonzini } 134baacf047SPaolo Bonzini 135a2b257d6SIgor Mammedov if (alignment) { 136a2b257d6SIgor Mammedov *alignment = align; 137a2b257d6SIgor Mammedov } 138c2dfc5baSMichael S. Tsirkin 1396eebf958SPaolo Bonzini trace_qemu_anon_ram_alloc(size, ptr); 140baacf047SPaolo Bonzini return ptr; 141baacf047SPaolo Bonzini } 142baacf047SPaolo Bonzini 143baacf047SPaolo Bonzini void qemu_vfree(void *ptr) 144baacf047SPaolo Bonzini { 145baacf047SPaolo Bonzini trace_qemu_vfree(ptr); 146baacf047SPaolo Bonzini free(ptr); 147baacf047SPaolo Bonzini } 148baacf047SPaolo Bonzini 149e7a09b92SPaolo Bonzini void qemu_anon_ram_free(void *ptr, size_t size) 150e7a09b92SPaolo Bonzini { 151e7a09b92SPaolo Bonzini trace_qemu_anon_ram_free(ptr, size); 152794e8f30SMichael S. Tsirkin qemu_ram_munmap(ptr, size); 153e7a09b92SPaolo Bonzini } 154e7a09b92SPaolo Bonzini 155f9e8caccSStefan Hajnoczi void qemu_set_block(int fd) 156baacf047SPaolo Bonzini { 157baacf047SPaolo Bonzini int f; 158baacf047SPaolo Bonzini f = fcntl(fd, F_GETFL); 159baacf047SPaolo Bonzini fcntl(fd, F_SETFL, f & ~O_NONBLOCK); 160baacf047SPaolo Bonzini } 161baacf047SPaolo Bonzini 162f9e8caccSStefan Hajnoczi void qemu_set_nonblock(int fd) 163baacf047SPaolo Bonzini { 164baacf047SPaolo Bonzini int f; 165baacf047SPaolo Bonzini f = fcntl(fd, F_GETFL); 166baacf047SPaolo Bonzini fcntl(fd, F_SETFL, f | O_NONBLOCK); 167baacf047SPaolo Bonzini } 168baacf047SPaolo Bonzini 169606600a1SSebastian Ottlik int socket_set_fast_reuse(int fd) 170606600a1SSebastian Ottlik { 171606600a1SSebastian Ottlik int val = 1, ret; 172606600a1SSebastian Ottlik 173606600a1SSebastian Ottlik ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, 174606600a1SSebastian Ottlik (const char *)&val, sizeof(val)); 175606600a1SSebastian Ottlik 176606600a1SSebastian Ottlik assert(ret == 0); 177606600a1SSebastian Ottlik 178606600a1SSebastian Ottlik return ret; 179606600a1SSebastian Ottlik } 180606600a1SSebastian Ottlik 181baacf047SPaolo Bonzini void qemu_set_cloexec(int fd) 182baacf047SPaolo Bonzini { 183baacf047SPaolo Bonzini int f; 184baacf047SPaolo Bonzini f = fcntl(fd, F_GETFD); 185baacf047SPaolo Bonzini fcntl(fd, F_SETFD, f | FD_CLOEXEC); 186baacf047SPaolo Bonzini } 187baacf047SPaolo Bonzini 188baacf047SPaolo Bonzini /* 189baacf047SPaolo Bonzini * Creates a pipe with FD_CLOEXEC set on both file descriptors 190baacf047SPaolo Bonzini */ 191baacf047SPaolo Bonzini int qemu_pipe(int pipefd[2]) 192baacf047SPaolo Bonzini { 193baacf047SPaolo Bonzini int ret; 194baacf047SPaolo Bonzini 195baacf047SPaolo Bonzini #ifdef CONFIG_PIPE2 196baacf047SPaolo Bonzini ret = pipe2(pipefd, O_CLOEXEC); 197baacf047SPaolo Bonzini if (ret != -1 || errno != ENOSYS) { 198baacf047SPaolo Bonzini return ret; 199baacf047SPaolo Bonzini } 200baacf047SPaolo Bonzini #endif 201baacf047SPaolo Bonzini ret = pipe(pipefd); 202baacf047SPaolo Bonzini if (ret == 0) { 203baacf047SPaolo Bonzini qemu_set_cloexec(pipefd[0]); 204baacf047SPaolo Bonzini qemu_set_cloexec(pipefd[1]); 205baacf047SPaolo Bonzini } 206baacf047SPaolo Bonzini 207baacf047SPaolo Bonzini return ret; 208baacf047SPaolo Bonzini } 209baacf047SPaolo Bonzini 210baacf047SPaolo Bonzini int qemu_utimens(const char *path, const struct timespec *times) 211baacf047SPaolo Bonzini { 212baacf047SPaolo Bonzini struct timeval tv[2], tv_now; 213baacf047SPaolo Bonzini struct stat st; 214baacf047SPaolo Bonzini int i; 215baacf047SPaolo Bonzini #ifdef CONFIG_UTIMENSAT 216baacf047SPaolo Bonzini int ret; 217baacf047SPaolo Bonzini 218baacf047SPaolo Bonzini ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW); 219baacf047SPaolo Bonzini if (ret != -1 || errno != ENOSYS) { 220baacf047SPaolo Bonzini return ret; 221baacf047SPaolo Bonzini } 222baacf047SPaolo Bonzini #endif 223baacf047SPaolo Bonzini /* Fallback: use utimes() instead of utimensat() */ 224baacf047SPaolo Bonzini 225baacf047SPaolo Bonzini /* happy if special cases */ 226baacf047SPaolo Bonzini if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) { 227baacf047SPaolo Bonzini return 0; 228baacf047SPaolo Bonzini } 229baacf047SPaolo Bonzini if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) { 230baacf047SPaolo Bonzini return utimes(path, NULL); 231baacf047SPaolo Bonzini } 232baacf047SPaolo Bonzini 233baacf047SPaolo Bonzini /* prepare for hard cases */ 234baacf047SPaolo Bonzini if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) { 235baacf047SPaolo Bonzini gettimeofday(&tv_now, NULL); 236baacf047SPaolo Bonzini } 237baacf047SPaolo Bonzini if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) { 238baacf047SPaolo Bonzini stat(path, &st); 239baacf047SPaolo Bonzini } 240baacf047SPaolo Bonzini 241baacf047SPaolo Bonzini for (i = 0; i < 2; i++) { 242baacf047SPaolo Bonzini if (times[i].tv_nsec == UTIME_NOW) { 243baacf047SPaolo Bonzini tv[i].tv_sec = tv_now.tv_sec; 244baacf047SPaolo Bonzini tv[i].tv_usec = tv_now.tv_usec; 245baacf047SPaolo Bonzini } else if (times[i].tv_nsec == UTIME_OMIT) { 246baacf047SPaolo Bonzini tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime; 247baacf047SPaolo Bonzini tv[i].tv_usec = 0; 248baacf047SPaolo Bonzini } else { 249baacf047SPaolo Bonzini tv[i].tv_sec = times[i].tv_sec; 250baacf047SPaolo Bonzini tv[i].tv_usec = times[i].tv_nsec / 1000; 251baacf047SPaolo Bonzini } 252baacf047SPaolo Bonzini } 253baacf047SPaolo Bonzini 254baacf047SPaolo Bonzini return utimes(path, &tv[0]); 255baacf047SPaolo Bonzini } 256e2ea3515SLaszlo Ersek 257e2ea3515SLaszlo Ersek char * 258e2ea3515SLaszlo Ersek qemu_get_local_state_pathname(const char *relative_pathname) 259e2ea3515SLaszlo Ersek { 260e2ea3515SLaszlo Ersek return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR, 261e2ea3515SLaszlo Ersek relative_pathname); 262e2ea3515SLaszlo Ersek } 26313401ba0SStefan Hajnoczi 26413401ba0SStefan Hajnoczi void qemu_set_tty_echo(int fd, bool echo) 26513401ba0SStefan Hajnoczi { 26613401ba0SStefan Hajnoczi struct termios tty; 26713401ba0SStefan Hajnoczi 26813401ba0SStefan Hajnoczi tcgetattr(fd, &tty); 26913401ba0SStefan Hajnoczi 27013401ba0SStefan Hajnoczi if (echo) { 27113401ba0SStefan Hajnoczi tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN; 27213401ba0SStefan Hajnoczi } else { 27313401ba0SStefan Hajnoczi tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN); 27413401ba0SStefan Hajnoczi } 27513401ba0SStefan Hajnoczi 27613401ba0SStefan Hajnoczi tcsetattr(fd, TCSANOW, &tty); 27713401ba0SStefan Hajnoczi } 27810f5bff6SFam Zheng 27910f5bff6SFam Zheng static char exec_dir[PATH_MAX]; 28010f5bff6SFam Zheng 28110f5bff6SFam Zheng void qemu_init_exec_dir(const char *argv0) 28210f5bff6SFam Zheng { 28310f5bff6SFam Zheng char *dir; 28410f5bff6SFam Zheng char *p = NULL; 28510f5bff6SFam Zheng char buf[PATH_MAX]; 28610f5bff6SFam Zheng 28710f5bff6SFam Zheng assert(!exec_dir[0]); 28810f5bff6SFam Zheng 28910f5bff6SFam Zheng #if defined(__linux__) 29010f5bff6SFam Zheng { 29110f5bff6SFam Zheng int len; 29210f5bff6SFam Zheng len = readlink("/proc/self/exe", buf, sizeof(buf) - 1); 29310f5bff6SFam Zheng if (len > 0) { 29410f5bff6SFam Zheng buf[len] = 0; 29510f5bff6SFam Zheng p = buf; 29610f5bff6SFam Zheng } 29710f5bff6SFam Zheng } 29810f5bff6SFam Zheng #elif defined(__FreeBSD__) 29910f5bff6SFam Zheng { 30010f5bff6SFam Zheng static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1}; 30110f5bff6SFam Zheng size_t len = sizeof(buf) - 1; 30210f5bff6SFam Zheng 30310f5bff6SFam Zheng *buf = '\0'; 30410f5bff6SFam Zheng if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) && 30510f5bff6SFam Zheng *buf) { 30610f5bff6SFam Zheng buf[sizeof(buf) - 1] = '\0'; 30710f5bff6SFam Zheng p = buf; 30810f5bff6SFam Zheng } 30910f5bff6SFam Zheng } 31010f5bff6SFam Zheng #endif 31110f5bff6SFam Zheng /* If we don't have any way of figuring out the actual executable 31210f5bff6SFam Zheng location then try argv[0]. */ 31310f5bff6SFam Zheng if (!p) { 31410f5bff6SFam Zheng if (!argv0) { 31510f5bff6SFam Zheng return; 31610f5bff6SFam Zheng } 31710f5bff6SFam Zheng p = realpath(argv0, buf); 31810f5bff6SFam Zheng if (!p) { 31910f5bff6SFam Zheng return; 32010f5bff6SFam Zheng } 32110f5bff6SFam Zheng } 32255ad781cSWei Jiangang dir = g_path_get_dirname(p); 32310f5bff6SFam Zheng 32410f5bff6SFam Zheng pstrcpy(exec_dir, sizeof(exec_dir), dir); 32555ad781cSWei Jiangang 32655ad781cSWei Jiangang g_free(dir); 32710f5bff6SFam Zheng } 32810f5bff6SFam Zheng 32910f5bff6SFam Zheng char *qemu_get_exec_dir(void) 33010f5bff6SFam Zheng { 33110f5bff6SFam Zheng return g_strdup(exec_dir); 33210f5bff6SFam Zheng } 33338183310SPaolo Bonzini 33438183310SPaolo Bonzini static void sigbus_handler(int signal) 33538183310SPaolo Bonzini { 3361e356fc1SJitendra Kolhe int i; 3371e356fc1SJitendra Kolhe if (memset_thread) { 3381e356fc1SJitendra Kolhe for (i = 0; i < memset_num_threads; i++) { 3391e356fc1SJitendra Kolhe if (qemu_thread_is_self(&memset_thread[i].pgthread)) { 3401e356fc1SJitendra Kolhe siglongjmp(memset_thread[i].env, 1); 3411e356fc1SJitendra Kolhe } 3421e356fc1SJitendra Kolhe } 3431e356fc1SJitendra Kolhe } 34438183310SPaolo Bonzini } 34538183310SPaolo Bonzini 3461e356fc1SJitendra Kolhe static void *do_touch_pages(void *arg) 3471e356fc1SJitendra Kolhe { 3481e356fc1SJitendra Kolhe MemsetThread *memset_args = (MemsetThread *)arg; 3491e356fc1SJitendra Kolhe char *addr = memset_args->addr; 3501e356fc1SJitendra Kolhe uint64_t numpages = memset_args->numpages; 3511e356fc1SJitendra Kolhe uint64_t hpagesize = memset_args->hpagesize; 3521e356fc1SJitendra Kolhe sigset_t set, oldset; 3531e356fc1SJitendra Kolhe int i = 0; 3541e356fc1SJitendra Kolhe 3551e356fc1SJitendra Kolhe /* unblock SIGBUS */ 3561e356fc1SJitendra Kolhe sigemptyset(&set); 3571e356fc1SJitendra Kolhe sigaddset(&set, SIGBUS); 3581e356fc1SJitendra Kolhe pthread_sigmask(SIG_UNBLOCK, &set, &oldset); 3591e356fc1SJitendra Kolhe 3601e356fc1SJitendra Kolhe if (sigsetjmp(memset_args->env, 1)) { 3611e356fc1SJitendra Kolhe memset_thread_failed = true; 3621e356fc1SJitendra Kolhe } else { 3631e356fc1SJitendra Kolhe for (i = 0; i < numpages; i++) { 364*9dc44aa5SDaniel P. Berrange /* 365*9dc44aa5SDaniel P. Berrange * Read & write back the same value, so we don't 366*9dc44aa5SDaniel P. Berrange * corrupt existing user/app data that might be 367*9dc44aa5SDaniel P. Berrange * stored. 368*9dc44aa5SDaniel P. Berrange * 369*9dc44aa5SDaniel P. Berrange * 'volatile' to stop compiler optimizing this away 370*9dc44aa5SDaniel P. Berrange * to a no-op 371*9dc44aa5SDaniel P. Berrange * 372*9dc44aa5SDaniel P. Berrange * TODO: get a better solution from kernel so we 373*9dc44aa5SDaniel P. Berrange * don't need to write at all so we don't cause 374*9dc44aa5SDaniel P. Berrange * wear on the storage backing the region... 375*9dc44aa5SDaniel P. Berrange */ 376*9dc44aa5SDaniel P. Berrange *(volatile char *)addr = *addr; 3771e356fc1SJitendra Kolhe addr += hpagesize; 3781e356fc1SJitendra Kolhe } 3791e356fc1SJitendra Kolhe } 3801e356fc1SJitendra Kolhe pthread_sigmask(SIG_SETMASK, &oldset, NULL); 3811e356fc1SJitendra Kolhe return NULL; 3821e356fc1SJitendra Kolhe } 3831e356fc1SJitendra Kolhe 3841e356fc1SJitendra Kolhe static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages, 3851e356fc1SJitendra Kolhe int smp_cpus) 3861e356fc1SJitendra Kolhe { 3871e356fc1SJitendra Kolhe uint64_t numpages_per_thread, size_per_thread; 3881e356fc1SJitendra Kolhe char *addr = area; 3891e356fc1SJitendra Kolhe int i = 0; 3901e356fc1SJitendra Kolhe 3911e356fc1SJitendra Kolhe memset_thread_failed = false; 3921e356fc1SJitendra Kolhe memset_num_threads = MIN(smp_cpus, MAX_MEM_PREALLOC_THREAD_COUNT); 3931e356fc1SJitendra Kolhe memset_thread = g_new0(MemsetThread, memset_num_threads); 3941e356fc1SJitendra Kolhe numpages_per_thread = (numpages / memset_num_threads); 3951e356fc1SJitendra Kolhe size_per_thread = (hpagesize * numpages_per_thread); 3961e356fc1SJitendra Kolhe for (i = 0; i < memset_num_threads; i++) { 3971e356fc1SJitendra Kolhe memset_thread[i].addr = addr; 3981e356fc1SJitendra Kolhe memset_thread[i].numpages = (i == (memset_num_threads - 1)) ? 3991e356fc1SJitendra Kolhe numpages : numpages_per_thread; 4001e356fc1SJitendra Kolhe memset_thread[i].hpagesize = hpagesize; 4011e356fc1SJitendra Kolhe qemu_thread_create(&memset_thread[i].pgthread, "touch_pages", 4021e356fc1SJitendra Kolhe do_touch_pages, &memset_thread[i], 4031e356fc1SJitendra Kolhe QEMU_THREAD_JOINABLE); 4041e356fc1SJitendra Kolhe addr += size_per_thread; 4051e356fc1SJitendra Kolhe numpages -= numpages_per_thread; 4061e356fc1SJitendra Kolhe } 4071e356fc1SJitendra Kolhe for (i = 0; i < memset_num_threads; i++) { 4081e356fc1SJitendra Kolhe qemu_thread_join(&memset_thread[i].pgthread); 4091e356fc1SJitendra Kolhe } 4101e356fc1SJitendra Kolhe g_free(memset_thread); 4111e356fc1SJitendra Kolhe memset_thread = NULL; 4121e356fc1SJitendra Kolhe 4131e356fc1SJitendra Kolhe return memset_thread_failed; 4141e356fc1SJitendra Kolhe } 4151e356fc1SJitendra Kolhe 4161e356fc1SJitendra Kolhe void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, 4171e356fc1SJitendra Kolhe Error **errp) 41838183310SPaolo Bonzini { 419b7bf8f56SStefan Weil int ret; 42038183310SPaolo Bonzini struct sigaction act, oldact; 4211e356fc1SJitendra Kolhe size_t hpagesize = qemu_fd_getpagesize(fd); 4221e356fc1SJitendra Kolhe size_t numpages = DIV_ROUND_UP(memory, hpagesize); 42338183310SPaolo Bonzini 42438183310SPaolo Bonzini memset(&act, 0, sizeof(act)); 42538183310SPaolo Bonzini act.sa_handler = &sigbus_handler; 42638183310SPaolo Bonzini act.sa_flags = 0; 42738183310SPaolo Bonzini 42838183310SPaolo Bonzini ret = sigaction(SIGBUS, &act, &oldact); 42938183310SPaolo Bonzini if (ret) { 430056b68afSIgor Mammedov error_setg_errno(errp, errno, 431056b68afSIgor Mammedov "os_mem_prealloc: failed to install signal handler"); 432056b68afSIgor Mammedov return; 43338183310SPaolo Bonzini } 43438183310SPaolo Bonzini 4351e356fc1SJitendra Kolhe /* touch pages simultaneously */ 4361e356fc1SJitendra Kolhe if (touch_all_pages(area, hpagesize, numpages, smp_cpus)) { 437056b68afSIgor Mammedov error_setg(errp, "os_mem_prealloc: Insufficient free host memory " 438404ac83eSMichal Privoznik "pages available to allocate guest RAM\n"); 439056b68afSIgor Mammedov } 44038183310SPaolo Bonzini 44138183310SPaolo Bonzini ret = sigaction(SIGBUS, &oldact, NULL); 44238183310SPaolo Bonzini if (ret) { 443056b68afSIgor Mammedov /* Terminate QEMU since it can't recover from error */ 44438183310SPaolo Bonzini perror("os_mem_prealloc: failed to reinstall signal handler"); 44538183310SPaolo Bonzini exit(1); 44638183310SPaolo Bonzini } 44738183310SPaolo Bonzini } 448d57e4e48SDaniel P. Berrange 449d57e4e48SDaniel P. Berrange 450d57e4e48SDaniel P. Berrange static struct termios oldtty; 451d57e4e48SDaniel P. Berrange 452d57e4e48SDaniel P. Berrange static void term_exit(void) 453d57e4e48SDaniel P. Berrange { 454d57e4e48SDaniel P. Berrange tcsetattr(0, TCSANOW, &oldtty); 455d57e4e48SDaniel P. Berrange } 456d57e4e48SDaniel P. Berrange 457d57e4e48SDaniel P. Berrange static void term_init(void) 458d57e4e48SDaniel P. Berrange { 459d57e4e48SDaniel P. Berrange struct termios tty; 460d57e4e48SDaniel P. Berrange 461d57e4e48SDaniel P. Berrange tcgetattr(0, &tty); 462d57e4e48SDaniel P. Berrange oldtty = tty; 463d57e4e48SDaniel P. Berrange 464d57e4e48SDaniel P. Berrange tty.c_iflag &= ~(IGNBRK|BRKINT|PARMRK|ISTRIP 465d57e4e48SDaniel P. Berrange |INLCR|IGNCR|ICRNL|IXON); 466d57e4e48SDaniel P. Berrange tty.c_oflag |= OPOST; 467d57e4e48SDaniel P. Berrange tty.c_lflag &= ~(ECHO|ECHONL|ICANON|IEXTEN); 468d57e4e48SDaniel P. Berrange tty.c_cflag &= ~(CSIZE|PARENB); 469d57e4e48SDaniel P. Berrange tty.c_cflag |= CS8; 470d57e4e48SDaniel P. Berrange tty.c_cc[VMIN] = 1; 471d57e4e48SDaniel P. Berrange tty.c_cc[VTIME] = 0; 472d57e4e48SDaniel P. Berrange 473d57e4e48SDaniel P. Berrange tcsetattr(0, TCSANOW, &tty); 474d57e4e48SDaniel P. Berrange 475d57e4e48SDaniel P. Berrange atexit(term_exit); 476d57e4e48SDaniel P. Berrange } 477d57e4e48SDaniel P. Berrange 478d57e4e48SDaniel P. Berrange int qemu_read_password(char *buf, int buf_size) 479d57e4e48SDaniel P. Berrange { 480d57e4e48SDaniel P. Berrange uint8_t ch; 481d57e4e48SDaniel P. Berrange int i, ret; 482d57e4e48SDaniel P. Berrange 483d57e4e48SDaniel P. Berrange printf("password: "); 484d57e4e48SDaniel P. Berrange fflush(stdout); 485d57e4e48SDaniel P. Berrange term_init(); 486d57e4e48SDaniel P. Berrange i = 0; 487d57e4e48SDaniel P. Berrange for (;;) { 488d57e4e48SDaniel P. Berrange ret = read(0, &ch, 1); 489d57e4e48SDaniel P. Berrange if (ret == -1) { 490d57e4e48SDaniel P. Berrange if (errno == EAGAIN || errno == EINTR) { 491d57e4e48SDaniel P. Berrange continue; 492d57e4e48SDaniel P. Berrange } else { 493d57e4e48SDaniel P. Berrange break; 494d57e4e48SDaniel P. Berrange } 495d57e4e48SDaniel P. Berrange } else if (ret == 0) { 496d57e4e48SDaniel P. Berrange ret = -1; 497d57e4e48SDaniel P. Berrange break; 498d57e4e48SDaniel P. Berrange } else { 4996a11d518SDaniel P. Berrange if (ch == '\r' || 5006a11d518SDaniel P. Berrange ch == '\n') { 501d57e4e48SDaniel P. Berrange ret = 0; 502d57e4e48SDaniel P. Berrange break; 503d57e4e48SDaniel P. Berrange } 504d57e4e48SDaniel P. Berrange if (i < (buf_size - 1)) { 505d57e4e48SDaniel P. Berrange buf[i++] = ch; 506d57e4e48SDaniel P. Berrange } 507d57e4e48SDaniel P. Berrange } 508d57e4e48SDaniel P. Berrange } 509d57e4e48SDaniel P. Berrange term_exit(); 510d57e4e48SDaniel P. Berrange buf[i] = '\0'; 511d57e4e48SDaniel P. Berrange printf("\n"); 512d57e4e48SDaniel P. Berrange return ret; 513d57e4e48SDaniel P. Berrange } 51457cb38b3SDaniel P. Berrange 51557cb38b3SDaniel P. Berrange 5167dc9ae43SMichal Privoznik char *qemu_get_pid_name(pid_t pid) 5177dc9ae43SMichal Privoznik { 5187dc9ae43SMichal Privoznik char *name = NULL; 5197dc9ae43SMichal Privoznik 5207dc9ae43SMichal Privoznik #if defined(__FreeBSD__) 5217dc9ae43SMichal Privoznik /* BSDs don't have /proc, but they provide a nice substitute */ 5227dc9ae43SMichal Privoznik struct kinfo_proc *proc = kinfo_getproc(pid); 5237dc9ae43SMichal Privoznik 5247dc9ae43SMichal Privoznik if (proc) { 5257dc9ae43SMichal Privoznik name = g_strdup(proc->ki_comm); 5267dc9ae43SMichal Privoznik free(proc); 5277dc9ae43SMichal Privoznik } 5287dc9ae43SMichal Privoznik #else 5297dc9ae43SMichal Privoznik /* Assume a system with reasonable procfs */ 5307dc9ae43SMichal Privoznik char *pid_path; 5317dc9ae43SMichal Privoznik size_t len; 5327dc9ae43SMichal Privoznik 5337dc9ae43SMichal Privoznik pid_path = g_strdup_printf("/proc/%d/cmdline", pid); 5347dc9ae43SMichal Privoznik g_file_get_contents(pid_path, &name, &len, NULL); 5357dc9ae43SMichal Privoznik g_free(pid_path); 5367dc9ae43SMichal Privoznik #endif 5377dc9ae43SMichal Privoznik 5387dc9ae43SMichal Privoznik return name; 5397dc9ae43SMichal Privoznik } 5407dc9ae43SMichal Privoznik 5417dc9ae43SMichal Privoznik 54257cb38b3SDaniel P. Berrange pid_t qemu_fork(Error **errp) 54357cb38b3SDaniel P. Berrange { 54457cb38b3SDaniel P. Berrange sigset_t oldmask, newmask; 54557cb38b3SDaniel P. Berrange struct sigaction sig_action; 54657cb38b3SDaniel P. Berrange int saved_errno; 54757cb38b3SDaniel P. Berrange pid_t pid; 54857cb38b3SDaniel P. Berrange 54957cb38b3SDaniel P. Berrange /* 55057cb38b3SDaniel P. Berrange * Need to block signals now, so that child process can safely 55157cb38b3SDaniel P. Berrange * kill off caller's signal handlers without a race. 55257cb38b3SDaniel P. Berrange */ 55357cb38b3SDaniel P. Berrange sigfillset(&newmask); 55457cb38b3SDaniel P. Berrange if (pthread_sigmask(SIG_SETMASK, &newmask, &oldmask) != 0) { 55557cb38b3SDaniel P. Berrange error_setg_errno(errp, errno, 55657cb38b3SDaniel P. Berrange "cannot block signals"); 55757cb38b3SDaniel P. Berrange return -1; 55857cb38b3SDaniel P. Berrange } 55957cb38b3SDaniel P. Berrange 56057cb38b3SDaniel P. Berrange pid = fork(); 56157cb38b3SDaniel P. Berrange saved_errno = errno; 56257cb38b3SDaniel P. Berrange 56357cb38b3SDaniel P. Berrange if (pid < 0) { 56457cb38b3SDaniel P. Berrange /* attempt to restore signal mask, but ignore failure, to 56557cb38b3SDaniel P. Berrange * avoid obscuring the fork failure */ 56657cb38b3SDaniel P. Berrange (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL); 56757cb38b3SDaniel P. Berrange error_setg_errno(errp, saved_errno, 56857cb38b3SDaniel P. Berrange "cannot fork child process"); 56957cb38b3SDaniel P. Berrange errno = saved_errno; 57057cb38b3SDaniel P. Berrange return -1; 57157cb38b3SDaniel P. Berrange } else if (pid) { 57257cb38b3SDaniel P. Berrange /* parent process */ 57357cb38b3SDaniel P. Berrange 57457cb38b3SDaniel P. Berrange /* Restore our original signal mask now that the child is 57557cb38b3SDaniel P. Berrange * safely running. Only documented failures are EFAULT (not 57657cb38b3SDaniel P. Berrange * possible, since we are using just-grabbed mask) or EINVAL 57757cb38b3SDaniel P. Berrange * (not possible, since we are using correct arguments). */ 57857cb38b3SDaniel P. Berrange (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL); 57957cb38b3SDaniel P. Berrange } else { 58057cb38b3SDaniel P. Berrange /* child process */ 58157cb38b3SDaniel P. Berrange size_t i; 58257cb38b3SDaniel P. Berrange 58357cb38b3SDaniel P. Berrange /* Clear out all signal handlers from parent so nothing 58457cb38b3SDaniel P. Berrange * unexpected can happen in our child once we unblock 58557cb38b3SDaniel P. Berrange * signals */ 58657cb38b3SDaniel P. Berrange sig_action.sa_handler = SIG_DFL; 58757cb38b3SDaniel P. Berrange sig_action.sa_flags = 0; 58857cb38b3SDaniel P. Berrange sigemptyset(&sig_action.sa_mask); 58957cb38b3SDaniel P. Berrange 59057cb38b3SDaniel P. Berrange for (i = 1; i < NSIG; i++) { 59157cb38b3SDaniel P. Berrange /* Only possible errors are EFAULT or EINVAL The former 59257cb38b3SDaniel P. Berrange * won't happen, the latter we expect, so no need to check 59357cb38b3SDaniel P. Berrange * return value */ 59457cb38b3SDaniel P. Berrange (void)sigaction(i, &sig_action, NULL); 59557cb38b3SDaniel P. Berrange } 59657cb38b3SDaniel P. Berrange 59757cb38b3SDaniel P. Berrange /* Unmask all signals in child, since we've no idea what the 59857cb38b3SDaniel P. Berrange * caller's done with their signal mask and don't want to 59957cb38b3SDaniel P. Berrange * propagate that to children */ 60057cb38b3SDaniel P. Berrange sigemptyset(&newmask); 60157cb38b3SDaniel P. Berrange if (pthread_sigmask(SIG_SETMASK, &newmask, NULL) != 0) { 60257cb38b3SDaniel P. Berrange Error *local_err = NULL; 60357cb38b3SDaniel P. Berrange error_setg_errno(&local_err, errno, 60457cb38b3SDaniel P. Berrange "cannot unblock signals"); 60557cb38b3SDaniel P. Berrange error_report_err(local_err); 60657cb38b3SDaniel P. Berrange _exit(1); 60757cb38b3SDaniel P. Berrange } 60857cb38b3SDaniel P. Berrange } 60957cb38b3SDaniel P. Berrange return pid; 61057cb38b3SDaniel P. Berrange } 6118737d9e0SPeter Lieven 6128737d9e0SPeter Lieven void *qemu_alloc_stack(size_t *sz) 6138737d9e0SPeter Lieven { 6148737d9e0SPeter Lieven void *ptr, *guardpage; 6157d992e4dSPeter Lieven #ifdef CONFIG_DEBUG_STACK_USAGE 6167d992e4dSPeter Lieven void *ptr2; 6177d992e4dSPeter Lieven #endif 6188737d9e0SPeter Lieven size_t pagesz = getpagesize(); 6198737d9e0SPeter Lieven #ifdef _SC_THREAD_STACK_MIN 6208737d9e0SPeter Lieven /* avoid stacks smaller than _SC_THREAD_STACK_MIN */ 6218737d9e0SPeter Lieven long min_stack_sz = sysconf(_SC_THREAD_STACK_MIN); 6228737d9e0SPeter Lieven *sz = MAX(MAX(min_stack_sz, 0), *sz); 6238737d9e0SPeter Lieven #endif 6248737d9e0SPeter Lieven /* adjust stack size to a multiple of the page size */ 6258737d9e0SPeter Lieven *sz = ROUND_UP(*sz, pagesz); 6268737d9e0SPeter Lieven /* allocate one extra page for the guard page */ 6278737d9e0SPeter Lieven *sz += pagesz; 6288737d9e0SPeter Lieven 6298737d9e0SPeter Lieven ptr = mmap(NULL, *sz, PROT_READ | PROT_WRITE, 6308737d9e0SPeter Lieven MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 6318737d9e0SPeter Lieven if (ptr == MAP_FAILED) { 6328737d9e0SPeter Lieven abort(); 6338737d9e0SPeter Lieven } 6348737d9e0SPeter Lieven 6358737d9e0SPeter Lieven #if defined(HOST_IA64) 6368737d9e0SPeter Lieven /* separate register stack */ 6378737d9e0SPeter Lieven guardpage = ptr + (((*sz - pagesz) / 2) & ~pagesz); 6388737d9e0SPeter Lieven #elif defined(HOST_HPPA) 6398737d9e0SPeter Lieven /* stack grows up */ 6408737d9e0SPeter Lieven guardpage = ptr + *sz - pagesz; 6418737d9e0SPeter Lieven #else 6428737d9e0SPeter Lieven /* stack grows down */ 6438737d9e0SPeter Lieven guardpage = ptr; 6448737d9e0SPeter Lieven #endif 6458737d9e0SPeter Lieven if (mprotect(guardpage, pagesz, PROT_NONE) != 0) { 6468737d9e0SPeter Lieven abort(); 6478737d9e0SPeter Lieven } 6488737d9e0SPeter Lieven 6497d992e4dSPeter Lieven #ifdef CONFIG_DEBUG_STACK_USAGE 6507d992e4dSPeter Lieven for (ptr2 = ptr + pagesz; ptr2 < ptr + *sz; ptr2 += sizeof(uint32_t)) { 6517d992e4dSPeter Lieven *(uint32_t *)ptr2 = 0xdeadbeaf; 6527d992e4dSPeter Lieven } 6537d992e4dSPeter Lieven #endif 6547d992e4dSPeter Lieven 6558737d9e0SPeter Lieven return ptr; 6568737d9e0SPeter Lieven } 6578737d9e0SPeter Lieven 6587d992e4dSPeter Lieven #ifdef CONFIG_DEBUG_STACK_USAGE 6597d992e4dSPeter Lieven static __thread unsigned int max_stack_usage; 6607d992e4dSPeter Lieven #endif 6617d992e4dSPeter Lieven 6628737d9e0SPeter Lieven void qemu_free_stack(void *stack, size_t sz) 6638737d9e0SPeter Lieven { 6647d992e4dSPeter Lieven #ifdef CONFIG_DEBUG_STACK_USAGE 6657d992e4dSPeter Lieven unsigned int usage; 6667d992e4dSPeter Lieven void *ptr; 6677d992e4dSPeter Lieven 6687d992e4dSPeter Lieven for (ptr = stack + getpagesize(); ptr < stack + sz; 6697d992e4dSPeter Lieven ptr += sizeof(uint32_t)) { 6707d992e4dSPeter Lieven if (*(uint32_t *)ptr != 0xdeadbeaf) { 6717d992e4dSPeter Lieven break; 6727d992e4dSPeter Lieven } 6737d992e4dSPeter Lieven } 6747d992e4dSPeter Lieven usage = sz - (uintptr_t) (ptr - stack); 6757d992e4dSPeter Lieven if (usage > max_stack_usage) { 6767d992e4dSPeter Lieven error_report("thread %d max stack usage increased from %u to %u", 6777d992e4dSPeter Lieven qemu_get_thread_id(), max_stack_usage, usage); 6787d992e4dSPeter Lieven max_stack_usage = usage; 6797d992e4dSPeter Lieven } 6807d992e4dSPeter Lieven #endif 6817d992e4dSPeter Lieven 6828737d9e0SPeter Lieven munmap(stack, sz); 6838737d9e0SPeter Lieven } 684d98d4072SPaolo Bonzini 685d98d4072SPaolo Bonzini void sigaction_invoke(struct sigaction *action, 686d98d4072SPaolo Bonzini struct qemu_signalfd_siginfo *info) 687d98d4072SPaolo Bonzini { 688d98d4072SPaolo Bonzini siginfo_t si = { 0 }; 689d98d4072SPaolo Bonzini si.si_signo = info->ssi_signo; 690d98d4072SPaolo Bonzini si.si_errno = info->ssi_errno; 691d98d4072SPaolo Bonzini si.si_code = info->ssi_code; 692d98d4072SPaolo Bonzini 693d98d4072SPaolo Bonzini /* Convert the minimal set of fields defined by POSIX. 694d98d4072SPaolo Bonzini * Positive si_code values are reserved for kernel-generated 695d98d4072SPaolo Bonzini * signals, where the valid siginfo fields are determined by 696d98d4072SPaolo Bonzini * the signal number. But according to POSIX, it is unspecified 697d98d4072SPaolo Bonzini * whether SI_USER and SI_QUEUE have values less than or equal to 698d98d4072SPaolo Bonzini * zero. 699d98d4072SPaolo Bonzini */ 700d98d4072SPaolo Bonzini if (info->ssi_code == SI_USER || info->ssi_code == SI_QUEUE || 701d98d4072SPaolo Bonzini info->ssi_code <= 0) { 702d98d4072SPaolo Bonzini /* SIGTERM, etc. */ 703d98d4072SPaolo Bonzini si.si_pid = info->ssi_pid; 704d98d4072SPaolo Bonzini si.si_uid = info->ssi_uid; 705d98d4072SPaolo Bonzini } else if (info->ssi_signo == SIGILL || info->ssi_signo == SIGFPE || 706d98d4072SPaolo Bonzini info->ssi_signo == SIGSEGV || info->ssi_signo == SIGBUS) { 707d98d4072SPaolo Bonzini si.si_addr = (void *)(uintptr_t)info->ssi_addr; 708d98d4072SPaolo Bonzini } else if (info->ssi_signo == SIGCHLD) { 709d98d4072SPaolo Bonzini si.si_pid = info->ssi_pid; 710d98d4072SPaolo Bonzini si.si_status = info->ssi_status; 711d98d4072SPaolo Bonzini si.si_uid = info->ssi_uid; 712d98d4072SPaolo Bonzini } else if (info->ssi_signo == SIGIO) { 713d98d4072SPaolo Bonzini si.si_band = info->ssi_band; 714d98d4072SPaolo Bonzini } 715d98d4072SPaolo Bonzini action->sa_sigaction(info->ssi_signo, &si, NULL); 716d98d4072SPaolo Bonzini } 717