1baacf047SPaolo Bonzini /* 2baacf047SPaolo Bonzini * os-posix-lib.c 3baacf047SPaolo Bonzini * 4baacf047SPaolo Bonzini * Copyright (c) 2003-2008 Fabrice Bellard 5baacf047SPaolo Bonzini * Copyright (c) 2010 Red Hat, Inc. 6baacf047SPaolo Bonzini * 7baacf047SPaolo Bonzini * QEMU library functions on POSIX which are shared between QEMU and 8baacf047SPaolo Bonzini * the QEMU tools. 9baacf047SPaolo Bonzini * 10baacf047SPaolo Bonzini * Permission is hereby granted, free of charge, to any person obtaining a copy 11baacf047SPaolo Bonzini * of this software and associated documentation files (the "Software"), to deal 12baacf047SPaolo Bonzini * in the Software without restriction, including without limitation the rights 13baacf047SPaolo Bonzini * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14baacf047SPaolo Bonzini * copies of the Software, and to permit persons to whom the Software is 15baacf047SPaolo Bonzini * furnished to do so, subject to the following conditions: 16baacf047SPaolo Bonzini * 17baacf047SPaolo Bonzini * The above copyright notice and this permission notice shall be included in 18baacf047SPaolo Bonzini * all copies or substantial portions of the Software. 19baacf047SPaolo Bonzini * 20baacf047SPaolo Bonzini * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21baacf047SPaolo Bonzini * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22baacf047SPaolo Bonzini * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 23baacf047SPaolo Bonzini * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24baacf047SPaolo Bonzini * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25baacf047SPaolo Bonzini * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26baacf047SPaolo Bonzini * THE SOFTWARE. 27baacf047SPaolo Bonzini */ 28baacf047SPaolo Bonzini 29aafd7584SPeter Maydell #include "qemu/osdep.h" 3013401ba0SStefan Hajnoczi #include <termios.h> 3113401ba0SStefan Hajnoczi 32e2ea3515SLaszlo Ersek #include <glib/gprintf.h> 33e2ea3515SLaszlo Ersek 34baacf047SPaolo Bonzini #include "sysemu/sysemu.h" 35baacf047SPaolo Bonzini #include "trace.h" 36da34e65cSMarkus Armbruster #include "qapi/error.h" 3729b838c0SDavid Hildenbrand #include "qemu/error-report.h" 38b85ea5faSPeter Maydell #include "qemu/madvise.h" 39baacf047SPaolo Bonzini #include "qemu/sockets.h" 40db725815SMarkus Armbruster #include "qemu/thread.h" 4110f5bff6SFam Zheng #include <libgen.h> 42f348b6d1SVeronia Bahaa #include "qemu/cutils.h" 43c905a368SDaniele Buono #include "qemu/compiler.h" 4489aec641SDavid Hildenbrand #include "qemu/units.h" 45baacf047SPaolo Bonzini 46baacf047SPaolo Bonzini #ifdef CONFIG_LINUX 47baacf047SPaolo Bonzini #include <sys/syscall.h> 48baacf047SPaolo Bonzini #endif 49baacf047SPaolo Bonzini 5041975b26SAndreas Färber #ifdef __FreeBSD__ 5141975b26SAndreas Färber #include <sys/sysctl.h> 52a7764f15SEd Maste #include <sys/user.h> 539548a891SDavid Carlier #include <sys/thr.h> 547dc9ae43SMichal Privoznik #include <libutil.h> 5541975b26SAndreas Färber #endif 5641975b26SAndreas Färber 57094611b4SKamil Rytarowski #ifdef __NetBSD__ 58094611b4SKamil Rytarowski #include <sys/sysctl.h> 599548a891SDavid Carlier #include <lwp.h> 60094611b4SKamil Rytarowski #endif 61094611b4SKamil Rytarowski 622032e243SDavid CARLIER #ifdef __APPLE__ 632032e243SDavid CARLIER #include <mach-o/dyld.h> 642032e243SDavid CARLIER #endif 652032e243SDavid CARLIER 662b9b9e70SDavid CARLIER #ifdef __HAIKU__ 672b9b9e70SDavid CARLIER #include <kernel/image.h> 682b9b9e70SDavid CARLIER #endif 692b9b9e70SDavid CARLIER 70a9c94277SMarkus Armbruster #include "qemu/mmap-alloc.h" 71794e8f30SMichael S. Tsirkin 727d992e4dSPeter Lieven #ifdef CONFIG_DEBUG_STACK_USAGE 737d992e4dSPeter Lieven #include "qemu/error-report.h" 747d992e4dSPeter Lieven #endif 757d992e4dSPeter Lieven 76dfd0dcc7SJitendra Kolhe #define MAX_MEM_PREALLOC_THREAD_COUNT 16 771e356fc1SJitendra Kolhe 78dba50678SDavid Hildenbrand struct MemsetThread; 79dba50678SDavid Hildenbrand 80dba50678SDavid Hildenbrand typedef struct MemsetContext { 81dba50678SDavid Hildenbrand bool all_threads_created; 82dba50678SDavid Hildenbrand bool any_thread_failed; 83dba50678SDavid Hildenbrand struct MemsetThread *threads; 84dba50678SDavid Hildenbrand int num_threads; 85dba50678SDavid Hildenbrand } MemsetContext; 86dba50678SDavid Hildenbrand 871e356fc1SJitendra Kolhe struct MemsetThread { 881e356fc1SJitendra Kolhe char *addr; 89e947d47dSStefan Weil size_t numpages; 90e947d47dSStefan Weil size_t hpagesize; 911e356fc1SJitendra Kolhe QemuThread pgthread; 921e356fc1SJitendra Kolhe sigjmp_buf env; 93dba50678SDavid Hildenbrand MemsetContext *context; 941e356fc1SJitendra Kolhe }; 951e356fc1SJitendra Kolhe typedef struct MemsetThread MemsetThread; 961e356fc1SJitendra Kolhe 97dba50678SDavid Hildenbrand /* used by sigbus_handler() */ 98dba50678SDavid Hildenbrand static MemsetContext *sigbus_memset_context; 9929b838c0SDavid Hildenbrand struct sigaction sigbus_oldact; 100a960d664SDavid Hildenbrand static QemuMutex sigbus_mutex; 1011e356fc1SJitendra Kolhe 102037fb5ebSbauerchen static QemuMutex page_mutex; 103037fb5ebSbauerchen static QemuCond page_cond; 104037fb5ebSbauerchen 105baacf047SPaolo Bonzini int qemu_get_thread_id(void) 106baacf047SPaolo Bonzini { 107baacf047SPaolo Bonzini #if defined(__linux__) 108baacf047SPaolo Bonzini return syscall(SYS_gettid); 1099548a891SDavid Carlier #elif defined(__FreeBSD__) 1109548a891SDavid Carlier /* thread id is up to INT_MAX */ 1119548a891SDavid Carlier long tid; 1129548a891SDavid Carlier thr_self(&tid); 1139548a891SDavid Carlier return (int)tid; 1149548a891SDavid Carlier #elif defined(__NetBSD__) 1159548a891SDavid Carlier return _lwp_self(); 1168edbca51SDavid CARLIER #elif defined(__OpenBSD__) 1178edbca51SDavid CARLIER return getthrid(); 118baacf047SPaolo Bonzini #else 119baacf047SPaolo Bonzini return getpid(); 120baacf047SPaolo Bonzini #endif 121baacf047SPaolo Bonzini } 122baacf047SPaolo Bonzini 123baacf047SPaolo Bonzini int qemu_daemon(int nochdir, int noclose) 124baacf047SPaolo Bonzini { 125baacf047SPaolo Bonzini return daemon(nochdir, noclose); 126baacf047SPaolo Bonzini } 127baacf047SPaolo Bonzini 1289e6bdef2SMarc-André Lureau bool qemu_write_pidfile(const char *path, Error **errp) 1299e6bdef2SMarc-André Lureau { 1309e6bdef2SMarc-André Lureau int fd; 1319e6bdef2SMarc-André Lureau char pidstr[32]; 1329e6bdef2SMarc-André Lureau 1339e6bdef2SMarc-André Lureau while (1) { 1349e6bdef2SMarc-André Lureau struct stat a, b; 13535f7f3fbSMarc-André Lureau struct flock lock = { 13635f7f3fbSMarc-André Lureau .l_type = F_WRLCK, 13735f7f3fbSMarc-André Lureau .l_whence = SEEK_SET, 13835f7f3fbSMarc-André Lureau .l_len = 0, 13935f7f3fbSMarc-André Lureau }; 1409e6bdef2SMarc-André Lureau 141*1b34d08fSMarc-André Lureau fd = qemu_create(path, O_WRONLY, S_IRUSR | S_IWUSR, errp); 1429e6bdef2SMarc-André Lureau if (fd == -1) { 1439e6bdef2SMarc-André Lureau return false; 1449e6bdef2SMarc-André Lureau } 1459e6bdef2SMarc-André Lureau 1469e6bdef2SMarc-André Lureau if (fstat(fd, &b) < 0) { 1479e6bdef2SMarc-André Lureau error_setg_errno(errp, errno, "Cannot stat file"); 1489e6bdef2SMarc-André Lureau goto fail_close; 1499e6bdef2SMarc-André Lureau } 1509e6bdef2SMarc-André Lureau 15135f7f3fbSMarc-André Lureau if (fcntl(fd, F_SETLK, &lock)) { 1529e6bdef2SMarc-André Lureau error_setg_errno(errp, errno, "Cannot lock pid file"); 1539e6bdef2SMarc-André Lureau goto fail_close; 1549e6bdef2SMarc-André Lureau } 1559e6bdef2SMarc-André Lureau 1569e6bdef2SMarc-André Lureau /* 1579e6bdef2SMarc-André Lureau * Now make sure the path we locked is the same one that now 1589e6bdef2SMarc-André Lureau * exists on the filesystem. 1599e6bdef2SMarc-André Lureau */ 1609e6bdef2SMarc-André Lureau if (stat(path, &a) < 0) { 1619e6bdef2SMarc-André Lureau /* 1629e6bdef2SMarc-André Lureau * PID file disappeared, someone else must be racing with 1639e6bdef2SMarc-André Lureau * us, so try again. 1649e6bdef2SMarc-André Lureau */ 1659e6bdef2SMarc-André Lureau close(fd); 1669e6bdef2SMarc-André Lureau continue; 1679e6bdef2SMarc-André Lureau } 1689e6bdef2SMarc-André Lureau 1699e6bdef2SMarc-André Lureau if (a.st_ino == b.st_ino) { 1709e6bdef2SMarc-André Lureau break; 1719e6bdef2SMarc-André Lureau } 1729e6bdef2SMarc-André Lureau 1739e6bdef2SMarc-André Lureau /* 1749e6bdef2SMarc-André Lureau * PID file was recreated, someone else must be racing with 1759e6bdef2SMarc-André Lureau * us, so try again. 1769e6bdef2SMarc-André Lureau */ 1779e6bdef2SMarc-André Lureau close(fd); 1789e6bdef2SMarc-André Lureau } 1799e6bdef2SMarc-André Lureau 1809e6bdef2SMarc-André Lureau if (ftruncate(fd, 0) < 0) { 1819e6bdef2SMarc-André Lureau error_setg_errno(errp, errno, "Failed to truncate pid file"); 1829e6bdef2SMarc-André Lureau goto fail_unlink; 1839e6bdef2SMarc-André Lureau } 1849e6bdef2SMarc-André Lureau 1859e6bdef2SMarc-André Lureau snprintf(pidstr, sizeof(pidstr), FMT_pid "\n", getpid()); 18696eb9b2bSMarc-André Lureau if (qemu_write_full(fd, pidstr, strlen(pidstr)) != strlen(pidstr)) { 1879e6bdef2SMarc-André Lureau error_setg(errp, "Failed to write pid file"); 1889e6bdef2SMarc-André Lureau goto fail_unlink; 1899e6bdef2SMarc-André Lureau } 1909e6bdef2SMarc-André Lureau 1919e6bdef2SMarc-André Lureau return true; 1929e6bdef2SMarc-André Lureau 1939e6bdef2SMarc-André Lureau fail_unlink: 1949e6bdef2SMarc-André Lureau unlink(path); 1959e6bdef2SMarc-André Lureau fail_close: 1969e6bdef2SMarc-André Lureau close(fd); 1979e6bdef2SMarc-André Lureau return false; 1989e6bdef2SMarc-André Lureau } 1999e6bdef2SMarc-André Lureau 200baacf047SPaolo Bonzini /* alloc shared memory pages */ 2018dbe22c6SDavid Hildenbrand void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared, 2028dbe22c6SDavid Hildenbrand bool noreserve) 203baacf047SPaolo Bonzini { 2048dbe22c6SDavid Hildenbrand const uint32_t qemu_map_flags = (shared ? QEMU_MAP_SHARED : 0) | 2058dbe22c6SDavid Hildenbrand (noreserve ? QEMU_MAP_NORESERVE : 0); 206baacf047SPaolo Bonzini size_t align = QEMU_VMALLOC_ALIGN; 207b444f5c0SDavid Hildenbrand void *ptr = qemu_ram_mmap(-1, size, align, qemu_map_flags, 0); 208baacf047SPaolo Bonzini 2097dda5dc8SPaolo Bonzini if (ptr == MAP_FAILED) { 21039228250SMarkus Armbruster return NULL; 211baacf047SPaolo Bonzini } 212baacf047SPaolo Bonzini 213a2b257d6SIgor Mammedov if (alignment) { 214a2b257d6SIgor Mammedov *alignment = align; 215a2b257d6SIgor Mammedov } 216c2dfc5baSMichael S. Tsirkin 2176eebf958SPaolo Bonzini trace_qemu_anon_ram_alloc(size, ptr); 218baacf047SPaolo Bonzini return ptr; 219baacf047SPaolo Bonzini } 220baacf047SPaolo Bonzini 221e7a09b92SPaolo Bonzini void qemu_anon_ram_free(void *ptr, size_t size) 222e7a09b92SPaolo Bonzini { 223e7a09b92SPaolo Bonzini trace_qemu_anon_ram_free(ptr, size); 22453adb9d4SMurilo Opsfelder Araujo qemu_ram_munmap(-1, ptr, size); 225e7a09b92SPaolo Bonzini } 226e7a09b92SPaolo Bonzini 227f9e8caccSStefan Hajnoczi void qemu_set_block(int fd) 228baacf047SPaolo Bonzini { 229baacf047SPaolo Bonzini int f; 230baacf047SPaolo Bonzini f = fcntl(fd, F_GETFL); 231da93b820SLi Qiang assert(f != -1); 232da93b820SLi Qiang f = fcntl(fd, F_SETFL, f & ~O_NONBLOCK); 233da93b820SLi Qiang assert(f != -1); 234baacf047SPaolo Bonzini } 235baacf047SPaolo Bonzini 236894022e6SLaurent Vivier int qemu_try_set_nonblock(int fd) 237baacf047SPaolo Bonzini { 238baacf047SPaolo Bonzini int f; 239baacf047SPaolo Bonzini f = fcntl(fd, F_GETFL); 24002cdcc96SPhilippe Mathieu-Daudé if (f == -1) { 241894022e6SLaurent Vivier return -errno; 242894022e6SLaurent Vivier } 243894022e6SLaurent Vivier if (fcntl(fd, F_SETFL, f | O_NONBLOCK) == -1) { 244894022e6SLaurent Vivier return -errno; 245894022e6SLaurent Vivier } 246894022e6SLaurent Vivier return 0; 247894022e6SLaurent Vivier } 248894022e6SLaurent Vivier 249894022e6SLaurent Vivier void qemu_set_nonblock(int fd) 250894022e6SLaurent Vivier { 251894022e6SLaurent Vivier int f; 252894022e6SLaurent Vivier f = qemu_try_set_nonblock(fd); 253894022e6SLaurent Vivier assert(f == 0); 254baacf047SPaolo Bonzini } 255baacf047SPaolo Bonzini 256606600a1SSebastian Ottlik int socket_set_fast_reuse(int fd) 257606600a1SSebastian Ottlik { 258606600a1SSebastian Ottlik int val = 1, ret; 259606600a1SSebastian Ottlik 260606600a1SSebastian Ottlik ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, 261606600a1SSebastian Ottlik (const char *)&val, sizeof(val)); 262606600a1SSebastian Ottlik 263606600a1SSebastian Ottlik assert(ret == 0); 264606600a1SSebastian Ottlik 265606600a1SSebastian Ottlik return ret; 266606600a1SSebastian Ottlik } 267606600a1SSebastian Ottlik 268baacf047SPaolo Bonzini void qemu_set_cloexec(int fd) 269baacf047SPaolo Bonzini { 270baacf047SPaolo Bonzini int f; 271baacf047SPaolo Bonzini f = fcntl(fd, F_GETFD); 2727e6478e7SStefano Stabellini assert(f != -1); 2737e6478e7SStefano Stabellini f = fcntl(fd, F_SETFD, f | FD_CLOEXEC); 2747e6478e7SStefano Stabellini assert(f != -1); 275baacf047SPaolo Bonzini } 276baacf047SPaolo Bonzini 277baacf047SPaolo Bonzini /* 278baacf047SPaolo Bonzini * Creates a pipe with FD_CLOEXEC set on both file descriptors 279baacf047SPaolo Bonzini */ 280baacf047SPaolo Bonzini int qemu_pipe(int pipefd[2]) 281baacf047SPaolo Bonzini { 282baacf047SPaolo Bonzini int ret; 283baacf047SPaolo Bonzini 284baacf047SPaolo Bonzini #ifdef CONFIG_PIPE2 285baacf047SPaolo Bonzini ret = pipe2(pipefd, O_CLOEXEC); 286baacf047SPaolo Bonzini if (ret != -1 || errno != ENOSYS) { 287baacf047SPaolo Bonzini return ret; 288baacf047SPaolo Bonzini } 289baacf047SPaolo Bonzini #endif 290baacf047SPaolo Bonzini ret = pipe(pipefd); 291baacf047SPaolo Bonzini if (ret == 0) { 292baacf047SPaolo Bonzini qemu_set_cloexec(pipefd[0]); 293baacf047SPaolo Bonzini qemu_set_cloexec(pipefd[1]); 294baacf047SPaolo Bonzini } 295baacf047SPaolo Bonzini 296baacf047SPaolo Bonzini return ret; 297baacf047SPaolo Bonzini } 298baacf047SPaolo Bonzini 299e2ea3515SLaszlo Ersek char * 300e2ea3515SLaszlo Ersek qemu_get_local_state_pathname(const char *relative_pathname) 301e2ea3515SLaszlo Ersek { 302fcb4f59cSPaolo Bonzini g_autofree char *dir = g_strdup_printf("%s/%s", 303fcb4f59cSPaolo Bonzini CONFIG_QEMU_LOCALSTATEDIR, 304e2ea3515SLaszlo Ersek relative_pathname); 305fcb4f59cSPaolo Bonzini return get_relocated_path(dir); 306e2ea3515SLaszlo Ersek } 30713401ba0SStefan Hajnoczi 30813401ba0SStefan Hajnoczi void qemu_set_tty_echo(int fd, bool echo) 30913401ba0SStefan Hajnoczi { 31013401ba0SStefan Hajnoczi struct termios tty; 31113401ba0SStefan Hajnoczi 31213401ba0SStefan Hajnoczi tcgetattr(fd, &tty); 31313401ba0SStefan Hajnoczi 31413401ba0SStefan Hajnoczi if (echo) { 31513401ba0SStefan Hajnoczi tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN; 31613401ba0SStefan Hajnoczi } else { 31713401ba0SStefan Hajnoczi tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN); 31813401ba0SStefan Hajnoczi } 31913401ba0SStefan Hajnoczi 32013401ba0SStefan Hajnoczi tcsetattr(fd, TCSANOW, &tty); 32113401ba0SStefan Hajnoczi } 32210f5bff6SFam Zheng 3239386a4a7SPaolo Bonzini static const char *exec_dir; 32410f5bff6SFam Zheng 32510f5bff6SFam Zheng void qemu_init_exec_dir(const char *argv0) 32610f5bff6SFam Zheng { 32710f5bff6SFam Zheng char *p = NULL; 32810f5bff6SFam Zheng char buf[PATH_MAX]; 32910f5bff6SFam Zheng 3309386a4a7SPaolo Bonzini if (exec_dir) { 331a4c13869SPaolo Bonzini return; 332a4c13869SPaolo Bonzini } 33310f5bff6SFam Zheng 33410f5bff6SFam Zheng #if defined(__linux__) 33510f5bff6SFam Zheng { 33610f5bff6SFam Zheng int len; 33710f5bff6SFam Zheng len = readlink("/proc/self/exe", buf, sizeof(buf) - 1); 33810f5bff6SFam Zheng if (len > 0) { 33910f5bff6SFam Zheng buf[len] = 0; 34010f5bff6SFam Zheng p = buf; 34110f5bff6SFam Zheng } 34210f5bff6SFam Zheng } 343094611b4SKamil Rytarowski #elif defined(__FreeBSD__) \ 344094611b4SKamil Rytarowski || (defined(__NetBSD__) && defined(KERN_PROC_PATHNAME)) 34510f5bff6SFam Zheng { 346094611b4SKamil Rytarowski #if defined(__FreeBSD__) 34710f5bff6SFam Zheng static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1}; 348094611b4SKamil Rytarowski #else 349094611b4SKamil Rytarowski static int mib[4] = {CTL_KERN, KERN_PROC_ARGS, -1, KERN_PROC_PATHNAME}; 350094611b4SKamil Rytarowski #endif 35110f5bff6SFam Zheng size_t len = sizeof(buf) - 1; 35210f5bff6SFam Zheng 35310f5bff6SFam Zheng *buf = '\0'; 35410f5bff6SFam Zheng if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) && 35510f5bff6SFam Zheng *buf) { 35610f5bff6SFam Zheng buf[sizeof(buf) - 1] = '\0'; 35710f5bff6SFam Zheng p = buf; 35810f5bff6SFam Zheng } 35910f5bff6SFam Zheng } 3602032e243SDavid CARLIER #elif defined(__APPLE__) 3612032e243SDavid CARLIER { 3622032e243SDavid CARLIER char fpath[PATH_MAX]; 3632032e243SDavid CARLIER uint32_t len = sizeof(fpath); 3642032e243SDavid CARLIER if (_NSGetExecutablePath(fpath, &len) == 0) { 3652032e243SDavid CARLIER p = realpath(fpath, buf); 3662032e243SDavid CARLIER if (!p) { 3672032e243SDavid CARLIER return; 3682032e243SDavid CARLIER } 3692032e243SDavid CARLIER } 3702032e243SDavid CARLIER } 3712b9b9e70SDavid CARLIER #elif defined(__HAIKU__) 3722b9b9e70SDavid CARLIER { 3732b9b9e70SDavid CARLIER image_info ii; 3742b9b9e70SDavid CARLIER int32_t c = 0; 3752b9b9e70SDavid CARLIER 3762b9b9e70SDavid CARLIER *buf = '\0'; 3772b9b9e70SDavid CARLIER while (get_next_image_info(0, &c, &ii) == B_OK) { 3782b9b9e70SDavid CARLIER if (ii.type == B_APP_IMAGE) { 3792b9b9e70SDavid CARLIER strncpy(buf, ii.name, sizeof(buf)); 3802b9b9e70SDavid CARLIER buf[sizeof(buf) - 1] = 0; 3812b9b9e70SDavid CARLIER p = buf; 3822b9b9e70SDavid CARLIER break; 3832b9b9e70SDavid CARLIER } 3842b9b9e70SDavid CARLIER } 3852b9b9e70SDavid CARLIER } 38610f5bff6SFam Zheng #endif 38710f5bff6SFam Zheng /* If we don't have any way of figuring out the actual executable 38810f5bff6SFam Zheng location then try argv[0]. */ 3899386a4a7SPaolo Bonzini if (!p && argv0) { 39010f5bff6SFam Zheng p = realpath(argv0, buf); 39110f5bff6SFam Zheng } 3929386a4a7SPaolo Bonzini if (p) { 3939386a4a7SPaolo Bonzini exec_dir = g_path_get_dirname(p); 3949386a4a7SPaolo Bonzini } else { 3959386a4a7SPaolo Bonzini exec_dir = CONFIG_BINDIR; 39610f5bff6SFam Zheng } 39710f5bff6SFam Zheng } 39810f5bff6SFam Zheng 399a4c13869SPaolo Bonzini const char *qemu_get_exec_dir(void) 40010f5bff6SFam Zheng { 401a4c13869SPaolo Bonzini return exec_dir; 40210f5bff6SFam Zheng } 40338183310SPaolo Bonzini 40429b838c0SDavid Hildenbrand #ifdef CONFIG_LINUX 40529b838c0SDavid Hildenbrand static void sigbus_handler(int signal, siginfo_t *siginfo, void *ctx) 40629b838c0SDavid Hildenbrand #else /* CONFIG_LINUX */ 40738183310SPaolo Bonzini static void sigbus_handler(int signal) 40829b838c0SDavid Hildenbrand #endif /* CONFIG_LINUX */ 40938183310SPaolo Bonzini { 4101e356fc1SJitendra Kolhe int i; 411dba50678SDavid Hildenbrand 412dba50678SDavid Hildenbrand if (sigbus_memset_context) { 413dba50678SDavid Hildenbrand for (i = 0; i < sigbus_memset_context->num_threads; i++) { 414dba50678SDavid Hildenbrand MemsetThread *thread = &sigbus_memset_context->threads[i]; 415dba50678SDavid Hildenbrand 416dba50678SDavid Hildenbrand if (qemu_thread_is_self(&thread->pgthread)) { 417dba50678SDavid Hildenbrand siglongjmp(thread->env, 1); 4181e356fc1SJitendra Kolhe } 4191e356fc1SJitendra Kolhe } 4201e356fc1SJitendra Kolhe } 42129b838c0SDavid Hildenbrand 42229b838c0SDavid Hildenbrand #ifdef CONFIG_LINUX 42329b838c0SDavid Hildenbrand /* 42429b838c0SDavid Hildenbrand * We assume that the MCE SIGBUS handler could have been registered. We 42529b838c0SDavid Hildenbrand * should never receive BUS_MCEERR_AO on any of our threads, but only on 42629b838c0SDavid Hildenbrand * the main thread registered for PR_MCE_KILL_EARLY. Further, we should not 42729b838c0SDavid Hildenbrand * receive BUS_MCEERR_AR triggered by action of other threads on one of 42829b838c0SDavid Hildenbrand * our threads. So, no need to check for unrelated SIGBUS when seeing one 42929b838c0SDavid Hildenbrand * for our threads. 43029b838c0SDavid Hildenbrand * 43129b838c0SDavid Hildenbrand * We will forward to the MCE handler, which will either handle the SIGBUS 43229b838c0SDavid Hildenbrand * or reinstall the default SIGBUS handler and reraise the SIGBUS. The 43329b838c0SDavid Hildenbrand * default SIGBUS handler will crash the process, so we don't care. 43429b838c0SDavid Hildenbrand */ 43529b838c0SDavid Hildenbrand if (sigbus_oldact.sa_flags & SA_SIGINFO) { 43629b838c0SDavid Hildenbrand sigbus_oldact.sa_sigaction(signal, siginfo, ctx); 43729b838c0SDavid Hildenbrand return; 43829b838c0SDavid Hildenbrand } 43929b838c0SDavid Hildenbrand #endif /* CONFIG_LINUX */ 44029b838c0SDavid Hildenbrand warn_report("os_mem_prealloc: unrelated SIGBUS detected and ignored"); 44138183310SPaolo Bonzini } 44238183310SPaolo Bonzini 4431e356fc1SJitendra Kolhe static void *do_touch_pages(void *arg) 4441e356fc1SJitendra Kolhe { 4451e356fc1SJitendra Kolhe MemsetThread *memset_args = (MemsetThread *)arg; 4461e356fc1SJitendra Kolhe sigset_t set, oldset; 4476c427ab9SDavid Hildenbrand int ret = 0; 4481e356fc1SJitendra Kolhe 449037fb5ebSbauerchen /* 450037fb5ebSbauerchen * On Linux, the page faults from the loop below can cause mmap_sem 451037fb5ebSbauerchen * contention with allocation of the thread stacks. Do not start 452037fb5ebSbauerchen * clearing until all threads have been created. 453037fb5ebSbauerchen */ 454037fb5ebSbauerchen qemu_mutex_lock(&page_mutex); 455dba50678SDavid Hildenbrand while (!memset_args->context->all_threads_created) { 456037fb5ebSbauerchen qemu_cond_wait(&page_cond, &page_mutex); 457037fb5ebSbauerchen } 458037fb5ebSbauerchen qemu_mutex_unlock(&page_mutex); 459037fb5ebSbauerchen 4601e356fc1SJitendra Kolhe /* unblock SIGBUS */ 4611e356fc1SJitendra Kolhe sigemptyset(&set); 4621e356fc1SJitendra Kolhe sigaddset(&set, SIGBUS); 4631e356fc1SJitendra Kolhe pthread_sigmask(SIG_UNBLOCK, &set, &oldset); 4641e356fc1SJitendra Kolhe 4651e356fc1SJitendra Kolhe if (sigsetjmp(memset_args->env, 1)) { 4666c427ab9SDavid Hildenbrand ret = -EFAULT; 4671e356fc1SJitendra Kolhe } else { 468e947d47dSStefan Weil char *addr = memset_args->addr; 469e947d47dSStefan Weil size_t numpages = memset_args->numpages; 470e947d47dSStefan Weil size_t hpagesize = memset_args->hpagesize; 471e947d47dSStefan Weil size_t i; 4721e356fc1SJitendra Kolhe for (i = 0; i < numpages; i++) { 4739dc44aa5SDaniel P. Berrange /* 4749dc44aa5SDaniel P. Berrange * Read & write back the same value, so we don't 4759dc44aa5SDaniel P. Berrange * corrupt existing user/app data that might be 4769dc44aa5SDaniel P. Berrange * stored. 4779dc44aa5SDaniel P. Berrange * 4789dc44aa5SDaniel P. Berrange * 'volatile' to stop compiler optimizing this away 4799dc44aa5SDaniel P. Berrange * to a no-op 4809dc44aa5SDaniel P. Berrange */ 4819dc44aa5SDaniel P. Berrange *(volatile char *)addr = *addr; 4821e356fc1SJitendra Kolhe addr += hpagesize; 4831e356fc1SJitendra Kolhe } 4841e356fc1SJitendra Kolhe } 4851e356fc1SJitendra Kolhe pthread_sigmask(SIG_SETMASK, &oldset, NULL); 4866c427ab9SDavid Hildenbrand return (void *)(uintptr_t)ret; 4871e356fc1SJitendra Kolhe } 4881e356fc1SJitendra Kolhe 489a384bfa3SDavid Hildenbrand static void *do_madv_populate_write_pages(void *arg) 490a384bfa3SDavid Hildenbrand { 491a384bfa3SDavid Hildenbrand MemsetThread *memset_args = (MemsetThread *)arg; 492a384bfa3SDavid Hildenbrand const size_t size = memset_args->numpages * memset_args->hpagesize; 493a384bfa3SDavid Hildenbrand char * const addr = memset_args->addr; 494a384bfa3SDavid Hildenbrand int ret = 0; 495a384bfa3SDavid Hildenbrand 496a384bfa3SDavid Hildenbrand /* See do_touch_pages(). */ 497a384bfa3SDavid Hildenbrand qemu_mutex_lock(&page_mutex); 498dba50678SDavid Hildenbrand while (!memset_args->context->all_threads_created) { 499a384bfa3SDavid Hildenbrand qemu_cond_wait(&page_cond, &page_mutex); 500a384bfa3SDavid Hildenbrand } 501a384bfa3SDavid Hildenbrand qemu_mutex_unlock(&page_mutex); 502a384bfa3SDavid Hildenbrand 503a384bfa3SDavid Hildenbrand if (size && qemu_madvise(addr, size, QEMU_MADV_POPULATE_WRITE)) { 504a384bfa3SDavid Hildenbrand ret = -errno; 505a384bfa3SDavid Hildenbrand } 506a384bfa3SDavid Hildenbrand return (void *)(uintptr_t)ret; 507a384bfa3SDavid Hildenbrand } 508a384bfa3SDavid Hildenbrand 50989aec641SDavid Hildenbrand static inline int get_memset_num_threads(size_t hpagesize, size_t numpages, 51089aec641SDavid Hildenbrand int smp_cpus) 511dfd0dcc7SJitendra Kolhe { 512dfd0dcc7SJitendra Kolhe long host_procs = sysconf(_SC_NPROCESSORS_ONLN); 513dfd0dcc7SJitendra Kolhe int ret = 1; 514dfd0dcc7SJitendra Kolhe 515dfd0dcc7SJitendra Kolhe if (host_procs > 0) { 516dfd0dcc7SJitendra Kolhe ret = MIN(MIN(host_procs, MAX_MEM_PREALLOC_THREAD_COUNT), smp_cpus); 517dfd0dcc7SJitendra Kolhe } 51889aec641SDavid Hildenbrand 51989aec641SDavid Hildenbrand /* Especially with gigantic pages, don't create more threads than pages. */ 52089aec641SDavid Hildenbrand ret = MIN(ret, numpages); 52189aec641SDavid Hildenbrand /* Don't start threads to prealloc comparatively little memory. */ 52289aec641SDavid Hildenbrand ret = MIN(ret, MAX(1, hpagesize * numpages / (64 * MiB))); 52389aec641SDavid Hildenbrand 524dfd0dcc7SJitendra Kolhe /* In case sysconf() fails, we fall back to single threaded */ 525dfd0dcc7SJitendra Kolhe return ret; 526dfd0dcc7SJitendra Kolhe } 527dfd0dcc7SJitendra Kolhe 5286c427ab9SDavid Hildenbrand static int touch_all_pages(char *area, size_t hpagesize, size_t numpages, 529a384bfa3SDavid Hildenbrand int smp_cpus, bool use_madv_populate_write) 5301e356fc1SJitendra Kolhe { 53178b3f67aSPaolo Bonzini static gsize initialized = 0; 532dba50678SDavid Hildenbrand MemsetContext context = { 53389aec641SDavid Hildenbrand .num_threads = get_memset_num_threads(hpagesize, numpages, smp_cpus), 534dba50678SDavid Hildenbrand }; 535037fb5ebSbauerchen size_t numpages_per_thread, leftover; 536a384bfa3SDavid Hildenbrand void *(*touch_fn)(void *); 5376c427ab9SDavid Hildenbrand int ret = 0, i = 0; 5381e356fc1SJitendra Kolhe char *addr = area; 5391e356fc1SJitendra Kolhe 54078b3f67aSPaolo Bonzini if (g_once_init_enter(&initialized)) { 54178b3f67aSPaolo Bonzini qemu_mutex_init(&page_mutex); 54278b3f67aSPaolo Bonzini qemu_cond_init(&page_cond); 54378b3f67aSPaolo Bonzini g_once_init_leave(&initialized, 1); 54478b3f67aSPaolo Bonzini } 54578b3f67aSPaolo Bonzini 546a384bfa3SDavid Hildenbrand if (use_madv_populate_write) { 547ac86e5c3SDavid Hildenbrand /* Avoid creating a single thread for MADV_POPULATE_WRITE */ 548ac86e5c3SDavid Hildenbrand if (context.num_threads == 1) { 549ac86e5c3SDavid Hildenbrand if (qemu_madvise(area, hpagesize * numpages, 550ac86e5c3SDavid Hildenbrand QEMU_MADV_POPULATE_WRITE)) { 551ac86e5c3SDavid Hildenbrand return -errno; 552ac86e5c3SDavid Hildenbrand } 553ac86e5c3SDavid Hildenbrand return 0; 554ac86e5c3SDavid Hildenbrand } 555a384bfa3SDavid Hildenbrand touch_fn = do_madv_populate_write_pages; 556a384bfa3SDavid Hildenbrand } else { 557a384bfa3SDavid Hildenbrand touch_fn = do_touch_pages; 558a384bfa3SDavid Hildenbrand } 559a384bfa3SDavid Hildenbrand 560dba50678SDavid Hildenbrand context.threads = g_new0(MemsetThread, context.num_threads); 561dba50678SDavid Hildenbrand numpages_per_thread = numpages / context.num_threads; 562dba50678SDavid Hildenbrand leftover = numpages % context.num_threads; 563dba50678SDavid Hildenbrand for (i = 0; i < context.num_threads; i++) { 564dba50678SDavid Hildenbrand context.threads[i].addr = addr; 565dba50678SDavid Hildenbrand context.threads[i].numpages = numpages_per_thread + (i < leftover); 566dba50678SDavid Hildenbrand context.threads[i].hpagesize = hpagesize; 567dba50678SDavid Hildenbrand context.threads[i].context = &context; 568dba50678SDavid Hildenbrand qemu_thread_create(&context.threads[i].pgthread, "touch_pages", 569dba50678SDavid Hildenbrand touch_fn, &context.threads[i], 5701e356fc1SJitendra Kolhe QEMU_THREAD_JOINABLE); 571dba50678SDavid Hildenbrand addr += context.threads[i].numpages * hpagesize; 572dba50678SDavid Hildenbrand } 573dba50678SDavid Hildenbrand 574dba50678SDavid Hildenbrand if (!use_madv_populate_write) { 575dba50678SDavid Hildenbrand sigbus_memset_context = &context; 5761e356fc1SJitendra Kolhe } 577278fb162SBauerchen 578278fb162SBauerchen qemu_mutex_lock(&page_mutex); 579dba50678SDavid Hildenbrand context.all_threads_created = true; 580037fb5ebSbauerchen qemu_cond_broadcast(&page_cond); 581278fb162SBauerchen qemu_mutex_unlock(&page_mutex); 582037fb5ebSbauerchen 583dba50678SDavid Hildenbrand for (i = 0; i < context.num_threads; i++) { 584dba50678SDavid Hildenbrand int tmp = (uintptr_t)qemu_thread_join(&context.threads[i].pgthread); 5856c427ab9SDavid Hildenbrand 5866c427ab9SDavid Hildenbrand if (tmp) { 5876c427ab9SDavid Hildenbrand ret = tmp; 5886c427ab9SDavid Hildenbrand } 5891e356fc1SJitendra Kolhe } 590dba50678SDavid Hildenbrand 591dba50678SDavid Hildenbrand if (!use_madv_populate_write) { 592dba50678SDavid Hildenbrand sigbus_memset_context = NULL; 593dba50678SDavid Hildenbrand } 594dba50678SDavid Hildenbrand g_free(context.threads); 5951e356fc1SJitendra Kolhe 5966c427ab9SDavid Hildenbrand return ret; 5971e356fc1SJitendra Kolhe } 5981e356fc1SJitendra Kolhe 599a384bfa3SDavid Hildenbrand static bool madv_populate_write_possible(char *area, size_t pagesize) 600a384bfa3SDavid Hildenbrand { 601a384bfa3SDavid Hildenbrand return !qemu_madvise(area, pagesize, QEMU_MADV_POPULATE_WRITE) || 602a384bfa3SDavid Hildenbrand errno != EINVAL; 603a384bfa3SDavid Hildenbrand } 604a384bfa3SDavid Hildenbrand 6051e356fc1SJitendra Kolhe void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, 6061e356fc1SJitendra Kolhe Error **errp) 60738183310SPaolo Bonzini { 608a960d664SDavid Hildenbrand static gsize initialized; 609b7bf8f56SStefan Weil int ret; 6101e356fc1SJitendra Kolhe size_t hpagesize = qemu_fd_getpagesize(fd); 6111e356fc1SJitendra Kolhe size_t numpages = DIV_ROUND_UP(memory, hpagesize); 612a384bfa3SDavid Hildenbrand bool use_madv_populate_write; 61329b838c0SDavid Hildenbrand struct sigaction act; 61438183310SPaolo Bonzini 615a384bfa3SDavid Hildenbrand /* 616a384bfa3SDavid Hildenbrand * Sense on every invocation, as MADV_POPULATE_WRITE cannot be used for 617a384bfa3SDavid Hildenbrand * some special mappings, such as mapping /dev/mem. 618a384bfa3SDavid Hildenbrand */ 619a384bfa3SDavid Hildenbrand use_madv_populate_write = madv_populate_write_possible(area, hpagesize); 620a384bfa3SDavid Hildenbrand 621a384bfa3SDavid Hildenbrand if (!use_madv_populate_write) { 622a960d664SDavid Hildenbrand if (g_once_init_enter(&initialized)) { 623a960d664SDavid Hildenbrand qemu_mutex_init(&sigbus_mutex); 624a960d664SDavid Hildenbrand g_once_init_leave(&initialized, 1); 625a960d664SDavid Hildenbrand } 626a960d664SDavid Hildenbrand 627a960d664SDavid Hildenbrand qemu_mutex_lock(&sigbus_mutex); 62838183310SPaolo Bonzini memset(&act, 0, sizeof(act)); 62929b838c0SDavid Hildenbrand #ifdef CONFIG_LINUX 63029b838c0SDavid Hildenbrand act.sa_sigaction = &sigbus_handler; 63129b838c0SDavid Hildenbrand act.sa_flags = SA_SIGINFO; 63229b838c0SDavid Hildenbrand #else /* CONFIG_LINUX */ 63338183310SPaolo Bonzini act.sa_handler = &sigbus_handler; 63438183310SPaolo Bonzini act.sa_flags = 0; 63529b838c0SDavid Hildenbrand #endif /* CONFIG_LINUX */ 63638183310SPaolo Bonzini 63729b838c0SDavid Hildenbrand ret = sigaction(SIGBUS, &act, &sigbus_oldact); 63838183310SPaolo Bonzini if (ret) { 639dd4fc605SDavid Hildenbrand qemu_mutex_unlock(&sigbus_mutex); 640056b68afSIgor Mammedov error_setg_errno(errp, errno, 641056b68afSIgor Mammedov "os_mem_prealloc: failed to install signal handler"); 642056b68afSIgor Mammedov return; 64338183310SPaolo Bonzini } 644a384bfa3SDavid Hildenbrand } 64538183310SPaolo Bonzini 6461e356fc1SJitendra Kolhe /* touch pages simultaneously */ 647a384bfa3SDavid Hildenbrand ret = touch_all_pages(area, hpagesize, numpages, smp_cpus, 648a384bfa3SDavid Hildenbrand use_madv_populate_write); 6496c427ab9SDavid Hildenbrand if (ret) { 6506c427ab9SDavid Hildenbrand error_setg_errno(errp, -ret, 6516c427ab9SDavid Hildenbrand "os_mem_prealloc: preallocating memory failed"); 652056b68afSIgor Mammedov } 65338183310SPaolo Bonzini 654a384bfa3SDavid Hildenbrand if (!use_madv_populate_write) { 65529b838c0SDavid Hildenbrand ret = sigaction(SIGBUS, &sigbus_oldact, NULL); 65638183310SPaolo Bonzini if (ret) { 657056b68afSIgor Mammedov /* Terminate QEMU since it can't recover from error */ 65838183310SPaolo Bonzini perror("os_mem_prealloc: failed to reinstall signal handler"); 65938183310SPaolo Bonzini exit(1); 66038183310SPaolo Bonzini } 661a960d664SDavid Hildenbrand qemu_mutex_unlock(&sigbus_mutex); 66238183310SPaolo Bonzini } 663a384bfa3SDavid Hildenbrand } 664d57e4e48SDaniel P. Berrange 6657dc9ae43SMichal Privoznik char *qemu_get_pid_name(pid_t pid) 6667dc9ae43SMichal Privoznik { 6677dc9ae43SMichal Privoznik char *name = NULL; 6687dc9ae43SMichal Privoznik 6697dc9ae43SMichal Privoznik #if defined(__FreeBSD__) 6707dc9ae43SMichal Privoznik /* BSDs don't have /proc, but they provide a nice substitute */ 6717dc9ae43SMichal Privoznik struct kinfo_proc *proc = kinfo_getproc(pid); 6727dc9ae43SMichal Privoznik 6737dc9ae43SMichal Privoznik if (proc) { 6747dc9ae43SMichal Privoznik name = g_strdup(proc->ki_comm); 6757dc9ae43SMichal Privoznik free(proc); 6767dc9ae43SMichal Privoznik } 6777dc9ae43SMichal Privoznik #else 6787dc9ae43SMichal Privoznik /* Assume a system with reasonable procfs */ 6797dc9ae43SMichal Privoznik char *pid_path; 6807dc9ae43SMichal Privoznik size_t len; 6817dc9ae43SMichal Privoznik 6827dc9ae43SMichal Privoznik pid_path = g_strdup_printf("/proc/%d/cmdline", pid); 6837dc9ae43SMichal Privoznik g_file_get_contents(pid_path, &name, &len, NULL); 6847dc9ae43SMichal Privoznik g_free(pid_path); 6857dc9ae43SMichal Privoznik #endif 6867dc9ae43SMichal Privoznik 6877dc9ae43SMichal Privoznik return name; 6887dc9ae43SMichal Privoznik } 6897dc9ae43SMichal Privoznik 6907dc9ae43SMichal Privoznik 69157cb38b3SDaniel P. Berrange pid_t qemu_fork(Error **errp) 69257cb38b3SDaniel P. Berrange { 69357cb38b3SDaniel P. Berrange sigset_t oldmask, newmask; 69457cb38b3SDaniel P. Berrange struct sigaction sig_action; 69557cb38b3SDaniel P. Berrange int saved_errno; 69657cb38b3SDaniel P. Berrange pid_t pid; 69757cb38b3SDaniel P. Berrange 69857cb38b3SDaniel P. Berrange /* 69957cb38b3SDaniel P. Berrange * Need to block signals now, so that child process can safely 70057cb38b3SDaniel P. Berrange * kill off caller's signal handlers without a race. 70157cb38b3SDaniel P. Berrange */ 70257cb38b3SDaniel P. Berrange sigfillset(&newmask); 70357cb38b3SDaniel P. Berrange if (pthread_sigmask(SIG_SETMASK, &newmask, &oldmask) != 0) { 70457cb38b3SDaniel P. Berrange error_setg_errno(errp, errno, 70557cb38b3SDaniel P. Berrange "cannot block signals"); 70657cb38b3SDaniel P. Berrange return -1; 70757cb38b3SDaniel P. Berrange } 70857cb38b3SDaniel P. Berrange 70957cb38b3SDaniel P. Berrange pid = fork(); 71057cb38b3SDaniel P. Berrange saved_errno = errno; 71157cb38b3SDaniel P. Berrange 71257cb38b3SDaniel P. Berrange if (pid < 0) { 71357cb38b3SDaniel P. Berrange /* attempt to restore signal mask, but ignore failure, to 71457cb38b3SDaniel P. Berrange * avoid obscuring the fork failure */ 71557cb38b3SDaniel P. Berrange (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL); 71657cb38b3SDaniel P. Berrange error_setg_errno(errp, saved_errno, 71757cb38b3SDaniel P. Berrange "cannot fork child process"); 71857cb38b3SDaniel P. Berrange errno = saved_errno; 71957cb38b3SDaniel P. Berrange return -1; 72057cb38b3SDaniel P. Berrange } else if (pid) { 72157cb38b3SDaniel P. Berrange /* parent process */ 72257cb38b3SDaniel P. Berrange 72357cb38b3SDaniel P. Berrange /* Restore our original signal mask now that the child is 72457cb38b3SDaniel P. Berrange * safely running. Only documented failures are EFAULT (not 72557cb38b3SDaniel P. Berrange * possible, since we are using just-grabbed mask) or EINVAL 72657cb38b3SDaniel P. Berrange * (not possible, since we are using correct arguments). */ 72757cb38b3SDaniel P. Berrange (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL); 72857cb38b3SDaniel P. Berrange } else { 72957cb38b3SDaniel P. Berrange /* child process */ 73057cb38b3SDaniel P. Berrange size_t i; 73157cb38b3SDaniel P. Berrange 73257cb38b3SDaniel P. Berrange /* Clear out all signal handlers from parent so nothing 73357cb38b3SDaniel P. Berrange * unexpected can happen in our child once we unblock 73457cb38b3SDaniel P. Berrange * signals */ 73557cb38b3SDaniel P. Berrange sig_action.sa_handler = SIG_DFL; 73657cb38b3SDaniel P. Berrange sig_action.sa_flags = 0; 73757cb38b3SDaniel P. Berrange sigemptyset(&sig_action.sa_mask); 73857cb38b3SDaniel P. Berrange 73957cb38b3SDaniel P. Berrange for (i = 1; i < NSIG; i++) { 74057cb38b3SDaniel P. Berrange /* Only possible errors are EFAULT or EINVAL The former 74157cb38b3SDaniel P. Berrange * won't happen, the latter we expect, so no need to check 74257cb38b3SDaniel P. Berrange * return value */ 74357cb38b3SDaniel P. Berrange (void)sigaction(i, &sig_action, NULL); 74457cb38b3SDaniel P. Berrange } 74557cb38b3SDaniel P. Berrange 74657cb38b3SDaniel P. Berrange /* Unmask all signals in child, since we've no idea what the 74757cb38b3SDaniel P. Berrange * caller's done with their signal mask and don't want to 74857cb38b3SDaniel P. Berrange * propagate that to children */ 74957cb38b3SDaniel P. Berrange sigemptyset(&newmask); 75057cb38b3SDaniel P. Berrange if (pthread_sigmask(SIG_SETMASK, &newmask, NULL) != 0) { 75157cb38b3SDaniel P. Berrange Error *local_err = NULL; 75257cb38b3SDaniel P. Berrange error_setg_errno(&local_err, errno, 75357cb38b3SDaniel P. Berrange "cannot unblock signals"); 75457cb38b3SDaniel P. Berrange error_report_err(local_err); 75557cb38b3SDaniel P. Berrange _exit(1); 75657cb38b3SDaniel P. Berrange } 75757cb38b3SDaniel P. Berrange } 75857cb38b3SDaniel P. Berrange return pid; 75957cb38b3SDaniel P. Berrange } 7608737d9e0SPeter Lieven 7618737d9e0SPeter Lieven void *qemu_alloc_stack(size_t *sz) 7628737d9e0SPeter Lieven { 7638737d9e0SPeter Lieven void *ptr, *guardpage; 764fc3d1badSBrad Smith int flags; 7657d992e4dSPeter Lieven #ifdef CONFIG_DEBUG_STACK_USAGE 7667d992e4dSPeter Lieven void *ptr2; 7677d992e4dSPeter Lieven #endif 7688e3b0cbbSMarc-André Lureau size_t pagesz = qemu_real_host_page_size(); 7698737d9e0SPeter Lieven #ifdef _SC_THREAD_STACK_MIN 7708737d9e0SPeter Lieven /* avoid stacks smaller than _SC_THREAD_STACK_MIN */ 7718737d9e0SPeter Lieven long min_stack_sz = sysconf(_SC_THREAD_STACK_MIN); 7728737d9e0SPeter Lieven *sz = MAX(MAX(min_stack_sz, 0), *sz); 7738737d9e0SPeter Lieven #endif 7748737d9e0SPeter Lieven /* adjust stack size to a multiple of the page size */ 7758737d9e0SPeter Lieven *sz = ROUND_UP(*sz, pagesz); 7768737d9e0SPeter Lieven /* allocate one extra page for the guard page */ 7778737d9e0SPeter Lieven *sz += pagesz; 7788737d9e0SPeter Lieven 779fc3d1badSBrad Smith flags = MAP_PRIVATE | MAP_ANONYMOUS; 780fc3d1badSBrad Smith #if defined(MAP_STACK) && defined(__OpenBSD__) 781fc3d1badSBrad Smith /* Only enable MAP_STACK on OpenBSD. Other OS's such as 782fc3d1badSBrad Smith * Linux/FreeBSD/NetBSD have a flag with the same name 783fc3d1badSBrad Smith * but have differing functionality. OpenBSD will SEGV 784fc3d1badSBrad Smith * if it spots execution with a stack pointer pointing 785fc3d1badSBrad Smith * at memory that was not allocated with MAP_STACK. 786fc3d1badSBrad Smith */ 787fc3d1badSBrad Smith flags |= MAP_STACK; 788fc3d1badSBrad Smith #endif 789fc3d1badSBrad Smith 790fc3d1badSBrad Smith ptr = mmap(NULL, *sz, PROT_READ | PROT_WRITE, flags, -1, 0); 7918737d9e0SPeter Lieven if (ptr == MAP_FAILED) { 792e916a6e8SEduardo Habkost perror("failed to allocate memory for stack"); 7938737d9e0SPeter Lieven abort(); 7948737d9e0SPeter Lieven } 7958737d9e0SPeter Lieven 7968737d9e0SPeter Lieven #if defined(HOST_IA64) 7978737d9e0SPeter Lieven /* separate register stack */ 7988737d9e0SPeter Lieven guardpage = ptr + (((*sz - pagesz) / 2) & ~pagesz); 7998737d9e0SPeter Lieven #elif defined(HOST_HPPA) 8008737d9e0SPeter Lieven /* stack grows up */ 8018737d9e0SPeter Lieven guardpage = ptr + *sz - pagesz; 8028737d9e0SPeter Lieven #else 8038737d9e0SPeter Lieven /* stack grows down */ 8048737d9e0SPeter Lieven guardpage = ptr; 8058737d9e0SPeter Lieven #endif 8068737d9e0SPeter Lieven if (mprotect(guardpage, pagesz, PROT_NONE) != 0) { 807e916a6e8SEduardo Habkost perror("failed to set up stack guard page"); 8088737d9e0SPeter Lieven abort(); 8098737d9e0SPeter Lieven } 8108737d9e0SPeter Lieven 8117d992e4dSPeter Lieven #ifdef CONFIG_DEBUG_STACK_USAGE 8127d992e4dSPeter Lieven for (ptr2 = ptr + pagesz; ptr2 < ptr + *sz; ptr2 += sizeof(uint32_t)) { 8137d992e4dSPeter Lieven *(uint32_t *)ptr2 = 0xdeadbeaf; 8147d992e4dSPeter Lieven } 8157d992e4dSPeter Lieven #endif 8167d992e4dSPeter Lieven 8178737d9e0SPeter Lieven return ptr; 8188737d9e0SPeter Lieven } 8198737d9e0SPeter Lieven 8207d992e4dSPeter Lieven #ifdef CONFIG_DEBUG_STACK_USAGE 8217d992e4dSPeter Lieven static __thread unsigned int max_stack_usage; 8227d992e4dSPeter Lieven #endif 8237d992e4dSPeter Lieven 8248737d9e0SPeter Lieven void qemu_free_stack(void *stack, size_t sz) 8258737d9e0SPeter Lieven { 8267d992e4dSPeter Lieven #ifdef CONFIG_DEBUG_STACK_USAGE 8277d992e4dSPeter Lieven unsigned int usage; 8287d992e4dSPeter Lieven void *ptr; 8297d992e4dSPeter Lieven 8308e3b0cbbSMarc-André Lureau for (ptr = stack + qemu_real_host_page_size(); ptr < stack + sz; 8317d992e4dSPeter Lieven ptr += sizeof(uint32_t)) { 8327d992e4dSPeter Lieven if (*(uint32_t *)ptr != 0xdeadbeaf) { 8337d992e4dSPeter Lieven break; 8347d992e4dSPeter Lieven } 8357d992e4dSPeter Lieven } 8367d992e4dSPeter Lieven usage = sz - (uintptr_t) (ptr - stack); 8377d992e4dSPeter Lieven if (usage > max_stack_usage) { 8387d992e4dSPeter Lieven error_report("thread %d max stack usage increased from %u to %u", 8397d992e4dSPeter Lieven qemu_get_thread_id(), max_stack_usage, usage); 8407d992e4dSPeter Lieven max_stack_usage = usage; 8417d992e4dSPeter Lieven } 8427d992e4dSPeter Lieven #endif 8437d992e4dSPeter Lieven 8448737d9e0SPeter Lieven munmap(stack, sz); 8458737d9e0SPeter Lieven } 846d98d4072SPaolo Bonzini 847c905a368SDaniele Buono /* 848c905a368SDaniele Buono * Disable CFI checks. 849c905a368SDaniele Buono * We are going to call a signal hander directly. Such handler may or may not 850c905a368SDaniele Buono * have been defined in our binary, so there's no guarantee that the pointer 851c905a368SDaniele Buono * used to set the handler is a cfi-valid pointer. Since the handlers are 852c905a368SDaniele Buono * stored in kernel memory, changing the handler to an attacker-defined 853c905a368SDaniele Buono * function requires being able to call a sigaction() syscall, 854c905a368SDaniele Buono * which is not as easy as overwriting a pointer in memory. 855c905a368SDaniele Buono */ 856c905a368SDaniele Buono QEMU_DISABLE_CFI 857d98d4072SPaolo Bonzini void sigaction_invoke(struct sigaction *action, 858d98d4072SPaolo Bonzini struct qemu_signalfd_siginfo *info) 859d98d4072SPaolo Bonzini { 86002ffa034SPeter Maydell siginfo_t si = {}; 861d98d4072SPaolo Bonzini si.si_signo = info->ssi_signo; 862d98d4072SPaolo Bonzini si.si_errno = info->ssi_errno; 863d98d4072SPaolo Bonzini si.si_code = info->ssi_code; 864d98d4072SPaolo Bonzini 865d98d4072SPaolo Bonzini /* Convert the minimal set of fields defined by POSIX. 866d98d4072SPaolo Bonzini * Positive si_code values are reserved for kernel-generated 867d98d4072SPaolo Bonzini * signals, where the valid siginfo fields are determined by 868d98d4072SPaolo Bonzini * the signal number. But according to POSIX, it is unspecified 869d98d4072SPaolo Bonzini * whether SI_USER and SI_QUEUE have values less than or equal to 870d98d4072SPaolo Bonzini * zero. 871d98d4072SPaolo Bonzini */ 872d98d4072SPaolo Bonzini if (info->ssi_code == SI_USER || info->ssi_code == SI_QUEUE || 873d98d4072SPaolo Bonzini info->ssi_code <= 0) { 874d98d4072SPaolo Bonzini /* SIGTERM, etc. */ 875d98d4072SPaolo Bonzini si.si_pid = info->ssi_pid; 876d98d4072SPaolo Bonzini si.si_uid = info->ssi_uid; 877d98d4072SPaolo Bonzini } else if (info->ssi_signo == SIGILL || info->ssi_signo == SIGFPE || 878d98d4072SPaolo Bonzini info->ssi_signo == SIGSEGV || info->ssi_signo == SIGBUS) { 879d98d4072SPaolo Bonzini si.si_addr = (void *)(uintptr_t)info->ssi_addr; 880d98d4072SPaolo Bonzini } else if (info->ssi_signo == SIGCHLD) { 881d98d4072SPaolo Bonzini si.si_pid = info->ssi_pid; 882d98d4072SPaolo Bonzini si.si_status = info->ssi_status; 883d98d4072SPaolo Bonzini si.si_uid = info->ssi_uid; 884d98d4072SPaolo Bonzini } 885d98d4072SPaolo Bonzini action->sa_sigaction(info->ssi_signo, &si, NULL); 886d98d4072SPaolo Bonzini } 887e47f4765SMichal Privoznik 888ad06ef0eSAlex Bennée size_t qemu_get_host_physmem(void) 889ad06ef0eSAlex Bennée { 890ad06ef0eSAlex Bennée #ifdef _SC_PHYS_PAGES 891ad06ef0eSAlex Bennée long pages = sysconf(_SC_PHYS_PAGES); 892ad06ef0eSAlex Bennée if (pages > 0) { 8938e3b0cbbSMarc-André Lureau if (pages > SIZE_MAX / qemu_real_host_page_size()) { 894ad06ef0eSAlex Bennée return SIZE_MAX; 895ad06ef0eSAlex Bennée } else { 8968e3b0cbbSMarc-André Lureau return pages * qemu_real_host_page_size(); 897ad06ef0eSAlex Bennée } 898ad06ef0eSAlex Bennée } 899ad06ef0eSAlex Bennée #endif 900ad06ef0eSAlex Bennée return 0; 901ad06ef0eSAlex Bennée } 902e9c4e0a8SMarc-André Lureau 903e9c4e0a8SMarc-André Lureau /* Sets a specific flag */ 904e9c4e0a8SMarc-André Lureau int fcntl_setfl(int fd, int flag) 905e9c4e0a8SMarc-André Lureau { 906e9c4e0a8SMarc-André Lureau int flags; 907e9c4e0a8SMarc-André Lureau 908e9c4e0a8SMarc-André Lureau flags = fcntl(fd, F_GETFL); 909e9c4e0a8SMarc-André Lureau if (flags == -1) { 910e9c4e0a8SMarc-André Lureau return -errno; 911e9c4e0a8SMarc-André Lureau } 912e9c4e0a8SMarc-André Lureau if (fcntl(fd, F_SETFL, flags | flag) == -1) { 913e9c4e0a8SMarc-André Lureau return -errno; 914e9c4e0a8SMarc-André Lureau } 915e9c4e0a8SMarc-André Lureau return 0; 916e9c4e0a8SMarc-André Lureau } 91773991a92SMarc-André Lureau 91873991a92SMarc-André Lureau int qemu_msync(void *addr, size_t length, int fd) 91973991a92SMarc-André Lureau { 92073991a92SMarc-André Lureau size_t align_mask = ~(qemu_real_host_page_size() - 1); 92173991a92SMarc-André Lureau 92273991a92SMarc-André Lureau /** 92373991a92SMarc-André Lureau * There are no strict reqs as per the length of mapping 92473991a92SMarc-André Lureau * to be synced. Still the length needs to follow the address 92573991a92SMarc-André Lureau * alignment changes. Additionally - round the size to the multiple 92673991a92SMarc-André Lureau * of PAGE_SIZE 92773991a92SMarc-André Lureau */ 92873991a92SMarc-André Lureau length += ((uintptr_t)addr & (qemu_real_host_page_size() - 1)); 92973991a92SMarc-André Lureau length = (length + ~align_mask) & align_mask; 93073991a92SMarc-André Lureau 93173991a92SMarc-André Lureau addr = (void *)((uintptr_t)addr & align_mask); 93273991a92SMarc-André Lureau 93373991a92SMarc-André Lureau return msync(addr, length, MS_SYNC); 93473991a92SMarc-André Lureau } 935