1baacf047SPaolo Bonzini /*
2baacf047SPaolo Bonzini * os-posix-lib.c
3baacf047SPaolo Bonzini *
4baacf047SPaolo Bonzini * Copyright (c) 2003-2008 Fabrice Bellard
5baacf047SPaolo Bonzini * Copyright (c) 2010 Red Hat, Inc.
6baacf047SPaolo Bonzini *
7baacf047SPaolo Bonzini * QEMU library functions on POSIX which are shared between QEMU and
8baacf047SPaolo Bonzini * the QEMU tools.
9baacf047SPaolo Bonzini *
10baacf047SPaolo Bonzini * Permission is hereby granted, free of charge, to any person obtaining a copy
11baacf047SPaolo Bonzini * of this software and associated documentation files (the "Software"), to deal
12baacf047SPaolo Bonzini * in the Software without restriction, including without limitation the rights
13baacf047SPaolo Bonzini * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14baacf047SPaolo Bonzini * copies of the Software, and to permit persons to whom the Software is
15baacf047SPaolo Bonzini * furnished to do so, subject to the following conditions:
16baacf047SPaolo Bonzini *
17baacf047SPaolo Bonzini * The above copyright notice and this permission notice shall be included in
18baacf047SPaolo Bonzini * all copies or substantial portions of the Software.
19baacf047SPaolo Bonzini *
20baacf047SPaolo Bonzini * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21baacf047SPaolo Bonzini * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22baacf047SPaolo Bonzini * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23baacf047SPaolo Bonzini * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24baacf047SPaolo Bonzini * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25baacf047SPaolo Bonzini * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26baacf047SPaolo Bonzini * THE SOFTWARE.
27baacf047SPaolo Bonzini */
28baacf047SPaolo Bonzini
29aafd7584SPeter Maydell #include "qemu/osdep.h"
3013401ba0SStefan Hajnoczi #include <termios.h>
3113401ba0SStefan Hajnoczi
32e2ea3515SLaszlo Ersek #include <glib/gprintf.h>
33e2ea3515SLaszlo Ersek
34baacf047SPaolo Bonzini #include "sysemu/sysemu.h"
35baacf047SPaolo Bonzini #include "trace.h"
36da34e65cSMarkus Armbruster #include "qapi/error.h"
3729b838c0SDavid Hildenbrand #include "qemu/error-report.h"
38b85ea5faSPeter Maydell #include "qemu/madvise.h"
39baacf047SPaolo Bonzini #include "qemu/sockets.h"
40db725815SMarkus Armbruster #include "qemu/thread.h"
4110f5bff6SFam Zheng #include <libgen.h>
42f348b6d1SVeronia Bahaa #include "qemu/cutils.h"
4389aec641SDavid Hildenbrand #include "qemu/units.h"
44e2de2c49SDavid Hildenbrand #include "qemu/thread-context.h"
4504accf43SMark Kanda #include "qemu/main-loop.h"
46baacf047SPaolo Bonzini
47baacf047SPaolo Bonzini #ifdef CONFIG_LINUX
48baacf047SPaolo Bonzini #include <sys/syscall.h>
49baacf047SPaolo Bonzini #endif
50baacf047SPaolo Bonzini
5141975b26SAndreas Färber #ifdef __FreeBSD__
529548a891SDavid Carlier #include <sys/thr.h>
5306680b15SMarc-André Lureau #include <sys/user.h>
547dc9ae43SMichal Privoznik #include <libutil.h>
5541975b26SAndreas Färber #endif
5641975b26SAndreas Färber
57094611b4SKamil Rytarowski #ifdef __NetBSD__
589548a891SDavid Carlier #include <lwp.h>
59094611b4SKamil Rytarowski #endif
60094611b4SKamil Rytarowski
61a9c94277SMarkus Armbruster #include "qemu/mmap-alloc.h"
62794e8f30SMichael S. Tsirkin
63dfd0dcc7SJitendra Kolhe #define MAX_MEM_PREALLOC_THREAD_COUNT 16
641e356fc1SJitendra Kolhe
65dba50678SDavid Hildenbrand struct MemsetThread;
66dba50678SDavid Hildenbrand
6704accf43SMark Kanda static QLIST_HEAD(, MemsetContext) memset_contexts =
6804accf43SMark Kanda QLIST_HEAD_INITIALIZER(memset_contexts);
6904accf43SMark Kanda
70dba50678SDavid Hildenbrand typedef struct MemsetContext {
71dba50678SDavid Hildenbrand bool all_threads_created;
72dba50678SDavid Hildenbrand bool any_thread_failed;
73dba50678SDavid Hildenbrand struct MemsetThread *threads;
74dba50678SDavid Hildenbrand int num_threads;
7504accf43SMark Kanda QLIST_ENTRY(MemsetContext) next;
76dba50678SDavid Hildenbrand } MemsetContext;
77dba50678SDavid Hildenbrand
781e356fc1SJitendra Kolhe struct MemsetThread {
791e356fc1SJitendra Kolhe char *addr;
80e947d47dSStefan Weil size_t numpages;
81e947d47dSStefan Weil size_t hpagesize;
821e356fc1SJitendra Kolhe QemuThread pgthread;
831e356fc1SJitendra Kolhe sigjmp_buf env;
84dba50678SDavid Hildenbrand MemsetContext *context;
851e356fc1SJitendra Kolhe };
861e356fc1SJitendra Kolhe typedef struct MemsetThread MemsetThread;
871e356fc1SJitendra Kolhe
88dba50678SDavid Hildenbrand /* used by sigbus_handler() */
89dba50678SDavid Hildenbrand static MemsetContext *sigbus_memset_context;
9029b838c0SDavid Hildenbrand struct sigaction sigbus_oldact;
91a960d664SDavid Hildenbrand static QemuMutex sigbus_mutex;
921e356fc1SJitendra Kolhe
93037fb5ebSbauerchen static QemuMutex page_mutex;
94037fb5ebSbauerchen static QemuCond page_cond;
95037fb5ebSbauerchen
qemu_get_thread_id(void)96baacf047SPaolo Bonzini int qemu_get_thread_id(void)
97baacf047SPaolo Bonzini {
98baacf047SPaolo Bonzini #if defined(__linux__)
99baacf047SPaolo Bonzini return syscall(SYS_gettid);
1009548a891SDavid Carlier #elif defined(__FreeBSD__)
1019548a891SDavid Carlier /* thread id is up to INT_MAX */
1029548a891SDavid Carlier long tid;
1039548a891SDavid Carlier thr_self(&tid);
1049548a891SDavid Carlier return (int)tid;
1059548a891SDavid Carlier #elif defined(__NetBSD__)
1069548a891SDavid Carlier return _lwp_self();
1078edbca51SDavid CARLIER #elif defined(__OpenBSD__)
1088edbca51SDavid CARLIER return getthrid();
109baacf047SPaolo Bonzini #else
110baacf047SPaolo Bonzini return getpid();
111baacf047SPaolo Bonzini #endif
112baacf047SPaolo Bonzini }
113baacf047SPaolo Bonzini
qemu_daemon(int nochdir,int noclose)114baacf047SPaolo Bonzini int qemu_daemon(int nochdir, int noclose)
115baacf047SPaolo Bonzini {
116baacf047SPaolo Bonzini return daemon(nochdir, noclose);
117baacf047SPaolo Bonzini }
118baacf047SPaolo Bonzini
qemu_write_pidfile(const char * path,Error ** errp)1199e6bdef2SMarc-André Lureau bool qemu_write_pidfile(const char *path, Error **errp)
1209e6bdef2SMarc-André Lureau {
1219e6bdef2SMarc-André Lureau int fd;
1229e6bdef2SMarc-André Lureau char pidstr[32];
1239e6bdef2SMarc-André Lureau
1249e6bdef2SMarc-André Lureau while (1) {
1259e6bdef2SMarc-André Lureau struct stat a, b;
12635f7f3fbSMarc-André Lureau struct flock lock = {
12735f7f3fbSMarc-André Lureau .l_type = F_WRLCK,
12835f7f3fbSMarc-André Lureau .l_whence = SEEK_SET,
12935f7f3fbSMarc-André Lureau .l_len = 0,
13035f7f3fbSMarc-André Lureau };
1319e6bdef2SMarc-André Lureau
1321b34d08fSMarc-André Lureau fd = qemu_create(path, O_WRONLY, S_IRUSR | S_IWUSR, errp);
1339e6bdef2SMarc-André Lureau if (fd == -1) {
1349e6bdef2SMarc-André Lureau return false;
1359e6bdef2SMarc-André Lureau }
1369e6bdef2SMarc-André Lureau
1379e6bdef2SMarc-André Lureau if (fstat(fd, &b) < 0) {
1389e6bdef2SMarc-André Lureau error_setg_errno(errp, errno, "Cannot stat file");
1399e6bdef2SMarc-André Lureau goto fail_close;
1409e6bdef2SMarc-André Lureau }
1419e6bdef2SMarc-André Lureau
14235f7f3fbSMarc-André Lureau if (fcntl(fd, F_SETLK, &lock)) {
1439e6bdef2SMarc-André Lureau error_setg_errno(errp, errno, "Cannot lock pid file");
1449e6bdef2SMarc-André Lureau goto fail_close;
1459e6bdef2SMarc-André Lureau }
1469e6bdef2SMarc-André Lureau
1479e6bdef2SMarc-André Lureau /*
1489e6bdef2SMarc-André Lureau * Now make sure the path we locked is the same one that now
1499e6bdef2SMarc-André Lureau * exists on the filesystem.
1509e6bdef2SMarc-André Lureau */
1519e6bdef2SMarc-André Lureau if (stat(path, &a) < 0) {
1529e6bdef2SMarc-André Lureau /*
1539e6bdef2SMarc-André Lureau * PID file disappeared, someone else must be racing with
1549e6bdef2SMarc-André Lureau * us, so try again.
1559e6bdef2SMarc-André Lureau */
1569e6bdef2SMarc-André Lureau close(fd);
1579e6bdef2SMarc-André Lureau continue;
1589e6bdef2SMarc-André Lureau }
1599e6bdef2SMarc-André Lureau
1609e6bdef2SMarc-André Lureau if (a.st_ino == b.st_ino) {
1619e6bdef2SMarc-André Lureau break;
1629e6bdef2SMarc-André Lureau }
1639e6bdef2SMarc-André Lureau
1649e6bdef2SMarc-André Lureau /*
1659e6bdef2SMarc-André Lureau * PID file was recreated, someone else must be racing with
1669e6bdef2SMarc-André Lureau * us, so try again.
1679e6bdef2SMarc-André Lureau */
1689e6bdef2SMarc-André Lureau close(fd);
1699e6bdef2SMarc-André Lureau }
1709e6bdef2SMarc-André Lureau
1719e6bdef2SMarc-André Lureau if (ftruncate(fd, 0) < 0) {
1729e6bdef2SMarc-André Lureau error_setg_errno(errp, errno, "Failed to truncate pid file");
1739e6bdef2SMarc-André Lureau goto fail_unlink;
1749e6bdef2SMarc-André Lureau }
1759e6bdef2SMarc-André Lureau
1769e6bdef2SMarc-André Lureau snprintf(pidstr, sizeof(pidstr), FMT_pid "\n", getpid());
17796eb9b2bSMarc-André Lureau if (qemu_write_full(fd, pidstr, strlen(pidstr)) != strlen(pidstr)) {
1789e6bdef2SMarc-André Lureau error_setg(errp, "Failed to write pid file");
1799e6bdef2SMarc-André Lureau goto fail_unlink;
1809e6bdef2SMarc-André Lureau }
1819e6bdef2SMarc-André Lureau
1829e6bdef2SMarc-André Lureau return true;
1839e6bdef2SMarc-André Lureau
1849e6bdef2SMarc-André Lureau fail_unlink:
1859e6bdef2SMarc-André Lureau unlink(path);
1869e6bdef2SMarc-André Lureau fail_close:
1879e6bdef2SMarc-André Lureau close(fd);
1889e6bdef2SMarc-André Lureau return false;
1899e6bdef2SMarc-André Lureau }
1909e6bdef2SMarc-André Lureau
191baacf047SPaolo Bonzini /* alloc shared memory pages */
qemu_anon_ram_alloc(size_t size,uint64_t * alignment,bool shared,bool noreserve)1928dbe22c6SDavid Hildenbrand void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared,
1938dbe22c6SDavid Hildenbrand bool noreserve)
194baacf047SPaolo Bonzini {
1958dbe22c6SDavid Hildenbrand const uint32_t qemu_map_flags = (shared ? QEMU_MAP_SHARED : 0) |
1968dbe22c6SDavid Hildenbrand (noreserve ? QEMU_MAP_NORESERVE : 0);
197baacf047SPaolo Bonzini size_t align = QEMU_VMALLOC_ALIGN;
198b444f5c0SDavid Hildenbrand void *ptr = qemu_ram_mmap(-1, size, align, qemu_map_flags, 0);
199baacf047SPaolo Bonzini
2007dda5dc8SPaolo Bonzini if (ptr == MAP_FAILED) {
20139228250SMarkus Armbruster return NULL;
202baacf047SPaolo Bonzini }
203baacf047SPaolo Bonzini
204a2b257d6SIgor Mammedov if (alignment) {
205a2b257d6SIgor Mammedov *alignment = align;
206a2b257d6SIgor Mammedov }
207c2dfc5baSMichael S. Tsirkin
2086eebf958SPaolo Bonzini trace_qemu_anon_ram_alloc(size, ptr);
209baacf047SPaolo Bonzini return ptr;
210baacf047SPaolo Bonzini }
211baacf047SPaolo Bonzini
qemu_anon_ram_free(void * ptr,size_t size)212e7a09b92SPaolo Bonzini void qemu_anon_ram_free(void *ptr, size_t size)
213e7a09b92SPaolo Bonzini {
214e7a09b92SPaolo Bonzini trace_qemu_anon_ram_free(ptr, size);
21553adb9d4SMurilo Opsfelder Araujo qemu_ram_munmap(-1, ptr, size);
216e7a09b92SPaolo Bonzini }
217e7a09b92SPaolo Bonzini
qemu_socket_set_block(int fd)218ff5927baSMarc-André Lureau void qemu_socket_set_block(int fd)
219baacf047SPaolo Bonzini {
22022e135fcSMarc-André Lureau g_unix_set_fd_nonblocking(fd, false, NULL);
221baacf047SPaolo Bonzini }
222baacf047SPaolo Bonzini
qemu_socket_try_set_nonblock(int fd)223ff5927baSMarc-André Lureau int qemu_socket_try_set_nonblock(int fd)
224baacf047SPaolo Bonzini {
22522e135fcSMarc-André Lureau return g_unix_set_fd_nonblocking(fd, true, NULL) ? 0 : -errno;
226894022e6SLaurent Vivier }
227894022e6SLaurent Vivier
qemu_socket_set_nonblock(int fd)228ff5927baSMarc-André Lureau void qemu_socket_set_nonblock(int fd)
229894022e6SLaurent Vivier {
230894022e6SLaurent Vivier int f;
231ff5927baSMarc-André Lureau f = qemu_socket_try_set_nonblock(fd);
232894022e6SLaurent Vivier assert(f == 0);
233baacf047SPaolo Bonzini }
234baacf047SPaolo Bonzini
socket_set_fast_reuse(int fd)235606600a1SSebastian Ottlik int socket_set_fast_reuse(int fd)
236606600a1SSebastian Ottlik {
237606600a1SSebastian Ottlik int val = 1, ret;
238606600a1SSebastian Ottlik
239606600a1SSebastian Ottlik ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
240606600a1SSebastian Ottlik (const char *)&val, sizeof(val));
241606600a1SSebastian Ottlik
242606600a1SSebastian Ottlik assert(ret == 0);
243606600a1SSebastian Ottlik
244606600a1SSebastian Ottlik return ret;
245606600a1SSebastian Ottlik }
246606600a1SSebastian Ottlik
qemu_set_cloexec(int fd)247baacf047SPaolo Bonzini void qemu_set_cloexec(int fd)
248baacf047SPaolo Bonzini {
249baacf047SPaolo Bonzini int f;
250baacf047SPaolo Bonzini f = fcntl(fd, F_GETFD);
2517e6478e7SStefano Stabellini assert(f != -1);
2527e6478e7SStefano Stabellini f = fcntl(fd, F_SETFD, f | FD_CLOEXEC);
2537e6478e7SStefano Stabellini assert(f != -1);
254baacf047SPaolo Bonzini }
255baacf047SPaolo Bonzini
qemu_socketpair(int domain,int type,int protocol,int sv[2])2563c63b4e9SGuoyi Tu int qemu_socketpair(int domain, int type, int protocol, int sv[2])
2573c63b4e9SGuoyi Tu {
2583c63b4e9SGuoyi Tu int ret;
2593c63b4e9SGuoyi Tu
2603c63b4e9SGuoyi Tu #ifdef SOCK_CLOEXEC
2613c63b4e9SGuoyi Tu ret = socketpair(domain, type | SOCK_CLOEXEC, protocol, sv);
2623c63b4e9SGuoyi Tu if (ret != -1 || errno != EINVAL) {
2633c63b4e9SGuoyi Tu return ret;
2643c63b4e9SGuoyi Tu }
2653c63b4e9SGuoyi Tu #endif
266083c4e71SZhao Liu ret = socketpair(domain, type, protocol, sv);
2673c63b4e9SGuoyi Tu if (ret == 0) {
2683c63b4e9SGuoyi Tu qemu_set_cloexec(sv[0]);
2693c63b4e9SGuoyi Tu qemu_set_cloexec(sv[1]);
2703c63b4e9SGuoyi Tu }
2713c63b4e9SGuoyi Tu
2723c63b4e9SGuoyi Tu return ret;
2733c63b4e9SGuoyi Tu }
2743c63b4e9SGuoyi Tu
275e2ea3515SLaszlo Ersek char *
qemu_get_local_state_dir(void)2761fbf2665SMarc-André Lureau qemu_get_local_state_dir(void)
277e2ea3515SLaszlo Ersek {
2781fbf2665SMarc-André Lureau return get_relocated_path(CONFIG_QEMU_LOCALSTATEDIR);
279e2ea3515SLaszlo Ersek }
28013401ba0SStefan Hajnoczi
qemu_set_tty_echo(int fd,bool echo)28113401ba0SStefan Hajnoczi void qemu_set_tty_echo(int fd, bool echo)
28213401ba0SStefan Hajnoczi {
28313401ba0SStefan Hajnoczi struct termios tty;
28413401ba0SStefan Hajnoczi
28513401ba0SStefan Hajnoczi tcgetattr(fd, &tty);
28613401ba0SStefan Hajnoczi
28713401ba0SStefan Hajnoczi if (echo) {
28813401ba0SStefan Hajnoczi tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN;
28913401ba0SStefan Hajnoczi } else {
29013401ba0SStefan Hajnoczi tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
29113401ba0SStefan Hajnoczi }
29213401ba0SStefan Hajnoczi
29313401ba0SStefan Hajnoczi tcsetattr(fd, TCSANOW, &tty);
29413401ba0SStefan Hajnoczi }
29510f5bff6SFam Zheng
29629b838c0SDavid Hildenbrand #ifdef CONFIG_LINUX
sigbus_handler(int signal,siginfo_t * siginfo,void * ctx)29729b838c0SDavid Hildenbrand static void sigbus_handler(int signal, siginfo_t *siginfo, void *ctx)
29829b838c0SDavid Hildenbrand #else /* CONFIG_LINUX */
29938183310SPaolo Bonzini static void sigbus_handler(int signal)
30029b838c0SDavid Hildenbrand #endif /* CONFIG_LINUX */
30138183310SPaolo Bonzini {
3021e356fc1SJitendra Kolhe int i;
303dba50678SDavid Hildenbrand
304dba50678SDavid Hildenbrand if (sigbus_memset_context) {
305dba50678SDavid Hildenbrand for (i = 0; i < sigbus_memset_context->num_threads; i++) {
306dba50678SDavid Hildenbrand MemsetThread *thread = &sigbus_memset_context->threads[i];
307dba50678SDavid Hildenbrand
308dba50678SDavid Hildenbrand if (qemu_thread_is_self(&thread->pgthread)) {
309dba50678SDavid Hildenbrand siglongjmp(thread->env, 1);
3101e356fc1SJitendra Kolhe }
3111e356fc1SJitendra Kolhe }
3121e356fc1SJitendra Kolhe }
31329b838c0SDavid Hildenbrand
31429b838c0SDavid Hildenbrand #ifdef CONFIG_LINUX
31529b838c0SDavid Hildenbrand /*
31629b838c0SDavid Hildenbrand * We assume that the MCE SIGBUS handler could have been registered. We
31729b838c0SDavid Hildenbrand * should never receive BUS_MCEERR_AO on any of our threads, but only on
31829b838c0SDavid Hildenbrand * the main thread registered for PR_MCE_KILL_EARLY. Further, we should not
31929b838c0SDavid Hildenbrand * receive BUS_MCEERR_AR triggered by action of other threads on one of
32029b838c0SDavid Hildenbrand * our threads. So, no need to check for unrelated SIGBUS when seeing one
32129b838c0SDavid Hildenbrand * for our threads.
32229b838c0SDavid Hildenbrand *
32329b838c0SDavid Hildenbrand * We will forward to the MCE handler, which will either handle the SIGBUS
32429b838c0SDavid Hildenbrand * or reinstall the default SIGBUS handler and reraise the SIGBUS. The
32529b838c0SDavid Hildenbrand * default SIGBUS handler will crash the process, so we don't care.
32629b838c0SDavid Hildenbrand */
32729b838c0SDavid Hildenbrand if (sigbus_oldact.sa_flags & SA_SIGINFO) {
32829b838c0SDavid Hildenbrand sigbus_oldact.sa_sigaction(signal, siginfo, ctx);
32929b838c0SDavid Hildenbrand return;
33029b838c0SDavid Hildenbrand }
33129b838c0SDavid Hildenbrand #endif /* CONFIG_LINUX */
3326556aadcSDavid Hildenbrand warn_report("qemu_prealloc_mem: unrelated SIGBUS detected and ignored");
33338183310SPaolo Bonzini }
33438183310SPaolo Bonzini
do_touch_pages(void * arg)3351e356fc1SJitendra Kolhe static void *do_touch_pages(void *arg)
3361e356fc1SJitendra Kolhe {
3371e356fc1SJitendra Kolhe MemsetThread *memset_args = (MemsetThread *)arg;
3381e356fc1SJitendra Kolhe sigset_t set, oldset;
3396c427ab9SDavid Hildenbrand int ret = 0;
3401e356fc1SJitendra Kolhe
341037fb5ebSbauerchen /*
342037fb5ebSbauerchen * On Linux, the page faults from the loop below can cause mmap_sem
343037fb5ebSbauerchen * contention with allocation of the thread stacks. Do not start
344037fb5ebSbauerchen * clearing until all threads have been created.
345037fb5ebSbauerchen */
346037fb5ebSbauerchen qemu_mutex_lock(&page_mutex);
347dba50678SDavid Hildenbrand while (!memset_args->context->all_threads_created) {
348037fb5ebSbauerchen qemu_cond_wait(&page_cond, &page_mutex);
349037fb5ebSbauerchen }
350037fb5ebSbauerchen qemu_mutex_unlock(&page_mutex);
351037fb5ebSbauerchen
3521e356fc1SJitendra Kolhe /* unblock SIGBUS */
3531e356fc1SJitendra Kolhe sigemptyset(&set);
3541e356fc1SJitendra Kolhe sigaddset(&set, SIGBUS);
3551e356fc1SJitendra Kolhe pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
3561e356fc1SJitendra Kolhe
3571e356fc1SJitendra Kolhe if (sigsetjmp(memset_args->env, 1)) {
3586c427ab9SDavid Hildenbrand ret = -EFAULT;
3591e356fc1SJitendra Kolhe } else {
360e947d47dSStefan Weil char *addr = memset_args->addr;
361e947d47dSStefan Weil size_t numpages = memset_args->numpages;
362e947d47dSStefan Weil size_t hpagesize = memset_args->hpagesize;
363e947d47dSStefan Weil size_t i;
3641e356fc1SJitendra Kolhe for (i = 0; i < numpages; i++) {
3659dc44aa5SDaniel P. Berrange /*
3669dc44aa5SDaniel P. Berrange * Read & write back the same value, so we don't
3679dc44aa5SDaniel P. Berrange * corrupt existing user/app data that might be
3689dc44aa5SDaniel P. Berrange * stored.
3699dc44aa5SDaniel P. Berrange *
3709dc44aa5SDaniel P. Berrange * 'volatile' to stop compiler optimizing this away
3719dc44aa5SDaniel P. Berrange * to a no-op
3729dc44aa5SDaniel P. Berrange */
3739dc44aa5SDaniel P. Berrange *(volatile char *)addr = *addr;
3741e356fc1SJitendra Kolhe addr += hpagesize;
3751e356fc1SJitendra Kolhe }
3761e356fc1SJitendra Kolhe }
3771e356fc1SJitendra Kolhe pthread_sigmask(SIG_SETMASK, &oldset, NULL);
3786c427ab9SDavid Hildenbrand return (void *)(uintptr_t)ret;
3791e356fc1SJitendra Kolhe }
3801e356fc1SJitendra Kolhe
do_madv_populate_write_pages(void * arg)381a384bfa3SDavid Hildenbrand static void *do_madv_populate_write_pages(void *arg)
382a384bfa3SDavid Hildenbrand {
383a384bfa3SDavid Hildenbrand MemsetThread *memset_args = (MemsetThread *)arg;
384a384bfa3SDavid Hildenbrand const size_t size = memset_args->numpages * memset_args->hpagesize;
385a384bfa3SDavid Hildenbrand char * const addr = memset_args->addr;
386a384bfa3SDavid Hildenbrand int ret = 0;
387a384bfa3SDavid Hildenbrand
388a384bfa3SDavid Hildenbrand /* See do_touch_pages(). */
389a384bfa3SDavid Hildenbrand qemu_mutex_lock(&page_mutex);
390dba50678SDavid Hildenbrand while (!memset_args->context->all_threads_created) {
391a384bfa3SDavid Hildenbrand qemu_cond_wait(&page_cond, &page_mutex);
392a384bfa3SDavid Hildenbrand }
393a384bfa3SDavid Hildenbrand qemu_mutex_unlock(&page_mutex);
394a384bfa3SDavid Hildenbrand
395a384bfa3SDavid Hildenbrand if (size && qemu_madvise(addr, size, QEMU_MADV_POPULATE_WRITE)) {
396a384bfa3SDavid Hildenbrand ret = -errno;
397a384bfa3SDavid Hildenbrand }
398a384bfa3SDavid Hildenbrand return (void *)(uintptr_t)ret;
399a384bfa3SDavid Hildenbrand }
400a384bfa3SDavid Hildenbrand
get_memset_num_threads(size_t hpagesize,size_t numpages,int max_threads)40189aec641SDavid Hildenbrand static inline int get_memset_num_threads(size_t hpagesize, size_t numpages,
4026556aadcSDavid Hildenbrand int max_threads)
403dfd0dcc7SJitendra Kolhe {
404dfd0dcc7SJitendra Kolhe long host_procs = sysconf(_SC_NPROCESSORS_ONLN);
405dfd0dcc7SJitendra Kolhe int ret = 1;
406dfd0dcc7SJitendra Kolhe
407dfd0dcc7SJitendra Kolhe if (host_procs > 0) {
4086556aadcSDavid Hildenbrand ret = MIN(MIN(host_procs, MAX_MEM_PREALLOC_THREAD_COUNT), max_threads);
409dfd0dcc7SJitendra Kolhe }
41089aec641SDavid Hildenbrand
41189aec641SDavid Hildenbrand /* Especially with gigantic pages, don't create more threads than pages. */
41289aec641SDavid Hildenbrand ret = MIN(ret, numpages);
41389aec641SDavid Hildenbrand /* Don't start threads to prealloc comparatively little memory. */
41489aec641SDavid Hildenbrand ret = MIN(ret, MAX(1, hpagesize * numpages / (64 * MiB)));
41589aec641SDavid Hildenbrand
416dfd0dcc7SJitendra Kolhe /* In case sysconf() fails, we fall back to single threaded */
417dfd0dcc7SJitendra Kolhe return ret;
418dfd0dcc7SJitendra Kolhe }
419dfd0dcc7SJitendra Kolhe
wait_and_free_mem_prealloc_context(MemsetContext * context)42004accf43SMark Kanda static int wait_and_free_mem_prealloc_context(MemsetContext *context)
42104accf43SMark Kanda {
42204accf43SMark Kanda int i, ret = 0, tmp;
42304accf43SMark Kanda
42404accf43SMark Kanda for (i = 0; i < context->num_threads; i++) {
42504accf43SMark Kanda tmp = (uintptr_t)qemu_thread_join(&context->threads[i].pgthread);
42604accf43SMark Kanda
42704accf43SMark Kanda if (tmp) {
42804accf43SMark Kanda ret = tmp;
42904accf43SMark Kanda }
43004accf43SMark Kanda }
43104accf43SMark Kanda g_free(context->threads);
43204accf43SMark Kanda g_free(context);
43304accf43SMark Kanda return ret;
43404accf43SMark Kanda }
43504accf43SMark Kanda
touch_all_pages(char * area,size_t hpagesize,size_t numpages,int max_threads,ThreadContext * tc,bool async,bool use_madv_populate_write)4366c427ab9SDavid Hildenbrand static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
43704accf43SMark Kanda int max_threads, ThreadContext *tc, bool async,
438e04a34e5SDavid Hildenbrand bool use_madv_populate_write)
4391e356fc1SJitendra Kolhe {
44078b3f67aSPaolo Bonzini static gsize initialized = 0;
44104accf43SMark Kanda MemsetContext *context = g_malloc0(sizeof(MemsetContext));
442037fb5ebSbauerchen size_t numpages_per_thread, leftover;
443a384bfa3SDavid Hildenbrand void *(*touch_fn)(void *);
44404accf43SMark Kanda int ret, i = 0;
4451e356fc1SJitendra Kolhe char *addr = area;
4461e356fc1SJitendra Kolhe
44704accf43SMark Kanda /*
44804accf43SMark Kanda * Asynchronous preallocation is only allowed when using MADV_POPULATE_WRITE
44904accf43SMark Kanda * and prealloc context for thread placement.
45004accf43SMark Kanda */
45104accf43SMark Kanda if (!use_madv_populate_write || !tc) {
45204accf43SMark Kanda async = false;
45304accf43SMark Kanda }
45404accf43SMark Kanda
45504accf43SMark Kanda context->num_threads =
45604accf43SMark Kanda get_memset_num_threads(hpagesize, numpages, max_threads);
45704accf43SMark Kanda
45878b3f67aSPaolo Bonzini if (g_once_init_enter(&initialized)) {
45978b3f67aSPaolo Bonzini qemu_mutex_init(&page_mutex);
46078b3f67aSPaolo Bonzini qemu_cond_init(&page_cond);
46178b3f67aSPaolo Bonzini g_once_init_leave(&initialized, 1);
46278b3f67aSPaolo Bonzini }
46378b3f67aSPaolo Bonzini
464a384bfa3SDavid Hildenbrand if (use_madv_populate_write) {
46504accf43SMark Kanda /*
46604accf43SMark Kanda * Avoid creating a single thread for MADV_POPULATE_WRITE when
46704accf43SMark Kanda * preallocating synchronously.
46804accf43SMark Kanda */
46904accf43SMark Kanda if (context->num_threads == 1 && !async) {
47044a90c08SPaolo Bonzini ret = 0;
471ac86e5c3SDavid Hildenbrand if (qemu_madvise(area, hpagesize * numpages,
472ac86e5c3SDavid Hildenbrand QEMU_MADV_POPULATE_WRITE)) {
47344a90c08SPaolo Bonzini ret = -errno;
474ac86e5c3SDavid Hildenbrand }
47544a90c08SPaolo Bonzini g_free(context);
47644a90c08SPaolo Bonzini return ret;
477ac86e5c3SDavid Hildenbrand }
478a384bfa3SDavid Hildenbrand touch_fn = do_madv_populate_write_pages;
479a384bfa3SDavid Hildenbrand } else {
480a384bfa3SDavid Hildenbrand touch_fn = do_touch_pages;
481a384bfa3SDavid Hildenbrand }
482a384bfa3SDavid Hildenbrand
48304accf43SMark Kanda context->threads = g_new0(MemsetThread, context->num_threads);
48404accf43SMark Kanda numpages_per_thread = numpages / context->num_threads;
48504accf43SMark Kanda leftover = numpages % context->num_threads;
48604accf43SMark Kanda for (i = 0; i < context->num_threads; i++) {
48704accf43SMark Kanda context->threads[i].addr = addr;
48804accf43SMark Kanda context->threads[i].numpages = numpages_per_thread + (i < leftover);
48904accf43SMark Kanda context->threads[i].hpagesize = hpagesize;
49004accf43SMark Kanda context->threads[i].context = context;
491e04a34e5SDavid Hildenbrand if (tc) {
49204accf43SMark Kanda thread_context_create_thread(tc, &context->threads[i].pgthread,
493e04a34e5SDavid Hildenbrand "touch_pages",
49404accf43SMark Kanda touch_fn, &context->threads[i],
495e04a34e5SDavid Hildenbrand QEMU_THREAD_JOINABLE);
496e04a34e5SDavid Hildenbrand } else {
49704accf43SMark Kanda qemu_thread_create(&context->threads[i].pgthread, "touch_pages",
49804accf43SMark Kanda touch_fn, &context->threads[i],
4991e356fc1SJitendra Kolhe QEMU_THREAD_JOINABLE);
500e04a34e5SDavid Hildenbrand }
50104accf43SMark Kanda addr += context->threads[i].numpages * hpagesize;
50204accf43SMark Kanda }
50304accf43SMark Kanda
50404accf43SMark Kanda if (async) {
50504accf43SMark Kanda /*
50604accf43SMark Kanda * async requests currently require the BQL. Add it to the list and kick
50704accf43SMark Kanda * preallocation off during qemu_finish_async_prealloc_mem().
50804accf43SMark Kanda */
50904accf43SMark Kanda assert(bql_locked());
51004accf43SMark Kanda QLIST_INSERT_HEAD(&memset_contexts, context, next);
51104accf43SMark Kanda return 0;
512dba50678SDavid Hildenbrand }
513dba50678SDavid Hildenbrand
514dba50678SDavid Hildenbrand if (!use_madv_populate_write) {
51504accf43SMark Kanda sigbus_memset_context = context;
5161e356fc1SJitendra Kolhe }
517278fb162SBauerchen
518278fb162SBauerchen qemu_mutex_lock(&page_mutex);
51904accf43SMark Kanda context->all_threads_created = true;
520037fb5ebSbauerchen qemu_cond_broadcast(&page_cond);
521278fb162SBauerchen qemu_mutex_unlock(&page_mutex);
522037fb5ebSbauerchen
52304accf43SMark Kanda ret = wait_and_free_mem_prealloc_context(context);
5246c427ab9SDavid Hildenbrand
52504accf43SMark Kanda if (!use_madv_populate_write) {
52604accf43SMark Kanda sigbus_memset_context = NULL;
52704accf43SMark Kanda }
52804accf43SMark Kanda return ret;
52904accf43SMark Kanda }
53004accf43SMark Kanda
qemu_finish_async_prealloc_mem(Error ** errp)53104accf43SMark Kanda bool qemu_finish_async_prealloc_mem(Error **errp)
53204accf43SMark Kanda {
53304accf43SMark Kanda int ret = 0, tmp;
53404accf43SMark Kanda MemsetContext *context, *next_context;
53504accf43SMark Kanda
53604accf43SMark Kanda /* Waiting for preallocation requires the BQL. */
53704accf43SMark Kanda assert(bql_locked());
53804accf43SMark Kanda if (QLIST_EMPTY(&memset_contexts)) {
53904accf43SMark Kanda return true;
54004accf43SMark Kanda }
54104accf43SMark Kanda
54204accf43SMark Kanda qemu_mutex_lock(&page_mutex);
54304accf43SMark Kanda QLIST_FOREACH(context, &memset_contexts, next) {
54404accf43SMark Kanda context->all_threads_created = true;
54504accf43SMark Kanda }
54604accf43SMark Kanda qemu_cond_broadcast(&page_cond);
54704accf43SMark Kanda qemu_mutex_unlock(&page_mutex);
54804accf43SMark Kanda
54904accf43SMark Kanda QLIST_FOREACH_SAFE(context, &memset_contexts, next, next_context) {
55004accf43SMark Kanda QLIST_REMOVE(context, next);
55104accf43SMark Kanda tmp = wait_and_free_mem_prealloc_context(context);
5526c427ab9SDavid Hildenbrand if (tmp) {
5536c427ab9SDavid Hildenbrand ret = tmp;
5546c427ab9SDavid Hildenbrand }
5551e356fc1SJitendra Kolhe }
556dba50678SDavid Hildenbrand
55704accf43SMark Kanda if (ret) {
55804accf43SMark Kanda error_setg_errno(errp, -ret,
55904accf43SMark Kanda "qemu_prealloc_mem: preallocating memory failed");
56004accf43SMark Kanda return false;
561dba50678SDavid Hildenbrand }
56204accf43SMark Kanda return true;
5631e356fc1SJitendra Kolhe }
5641e356fc1SJitendra Kolhe
madv_populate_write_possible(char * area,size_t pagesize)565a384bfa3SDavid Hildenbrand static bool madv_populate_write_possible(char *area, size_t pagesize)
566a384bfa3SDavid Hildenbrand {
567a384bfa3SDavid Hildenbrand return !qemu_madvise(area, pagesize, QEMU_MADV_POPULATE_WRITE) ||
568a384bfa3SDavid Hildenbrand errno != EINVAL;
569a384bfa3SDavid Hildenbrand }
570a384bfa3SDavid Hildenbrand
qemu_prealloc_mem(int fd,char * area,size_t sz,int max_threads,ThreadContext * tc,bool async,Error ** errp)571b622ee98SPhilippe Mathieu-Daudé bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
57204accf43SMark Kanda ThreadContext *tc, bool async, Error **errp)
57338183310SPaolo Bonzini {
574a960d664SDavid Hildenbrand static gsize initialized;
575b7bf8f56SStefan Weil int ret;
5761e356fc1SJitendra Kolhe size_t hpagesize = qemu_fd_getpagesize(fd);
5776556aadcSDavid Hildenbrand size_t numpages = DIV_ROUND_UP(sz, hpagesize);
578a384bfa3SDavid Hildenbrand bool use_madv_populate_write;
57929b838c0SDavid Hildenbrand struct sigaction act;
580b622ee98SPhilippe Mathieu-Daudé bool rv = true;
58138183310SPaolo Bonzini
582a384bfa3SDavid Hildenbrand /*
583a384bfa3SDavid Hildenbrand * Sense on every invocation, as MADV_POPULATE_WRITE cannot be used for
584a384bfa3SDavid Hildenbrand * some special mappings, such as mapping /dev/mem.
585a384bfa3SDavid Hildenbrand */
586a384bfa3SDavid Hildenbrand use_madv_populate_write = madv_populate_write_possible(area, hpagesize);
587a384bfa3SDavid Hildenbrand
588a384bfa3SDavid Hildenbrand if (!use_madv_populate_write) {
589a960d664SDavid Hildenbrand if (g_once_init_enter(&initialized)) {
590a960d664SDavid Hildenbrand qemu_mutex_init(&sigbus_mutex);
591a960d664SDavid Hildenbrand g_once_init_leave(&initialized, 1);
592a960d664SDavid Hildenbrand }
593a960d664SDavid Hildenbrand
594a960d664SDavid Hildenbrand qemu_mutex_lock(&sigbus_mutex);
59538183310SPaolo Bonzini memset(&act, 0, sizeof(act));
59629b838c0SDavid Hildenbrand #ifdef CONFIG_LINUX
59729b838c0SDavid Hildenbrand act.sa_sigaction = &sigbus_handler;
59829b838c0SDavid Hildenbrand act.sa_flags = SA_SIGINFO;
59929b838c0SDavid Hildenbrand #else /* CONFIG_LINUX */
60038183310SPaolo Bonzini act.sa_handler = &sigbus_handler;
60138183310SPaolo Bonzini act.sa_flags = 0;
60229b838c0SDavid Hildenbrand #endif /* CONFIG_LINUX */
60338183310SPaolo Bonzini
60429b838c0SDavid Hildenbrand ret = sigaction(SIGBUS, &act, &sigbus_oldact);
60538183310SPaolo Bonzini if (ret) {
606dd4fc605SDavid Hildenbrand qemu_mutex_unlock(&sigbus_mutex);
607056b68afSIgor Mammedov error_setg_errno(errp, errno,
6086556aadcSDavid Hildenbrand "qemu_prealloc_mem: failed to install signal handler");
609b622ee98SPhilippe Mathieu-Daudé return false;
61038183310SPaolo Bonzini }
611a384bfa3SDavid Hildenbrand }
61238183310SPaolo Bonzini
6131e356fc1SJitendra Kolhe /* touch pages simultaneously */
61404accf43SMark Kanda ret = touch_all_pages(area, hpagesize, numpages, max_threads, tc, async,
615a384bfa3SDavid Hildenbrand use_madv_populate_write);
6166c427ab9SDavid Hildenbrand if (ret) {
6176c427ab9SDavid Hildenbrand error_setg_errno(errp, -ret,
6186556aadcSDavid Hildenbrand "qemu_prealloc_mem: preallocating memory failed");
619b622ee98SPhilippe Mathieu-Daudé rv = false;
620056b68afSIgor Mammedov }
62138183310SPaolo Bonzini
622a384bfa3SDavid Hildenbrand if (!use_madv_populate_write) {
62329b838c0SDavid Hildenbrand ret = sigaction(SIGBUS, &sigbus_oldact, NULL);
62438183310SPaolo Bonzini if (ret) {
625056b68afSIgor Mammedov /* Terminate QEMU since it can't recover from error */
6266556aadcSDavid Hildenbrand perror("qemu_prealloc_mem: failed to reinstall signal handler");
62738183310SPaolo Bonzini exit(1);
62838183310SPaolo Bonzini }
629a960d664SDavid Hildenbrand qemu_mutex_unlock(&sigbus_mutex);
63038183310SPaolo Bonzini }
631b622ee98SPhilippe Mathieu-Daudé return rv;
632a384bfa3SDavid Hildenbrand }
633d57e4e48SDaniel P. Berrange
qemu_get_pid_name(pid_t pid)6347dc9ae43SMichal Privoznik char *qemu_get_pid_name(pid_t pid)
6357dc9ae43SMichal Privoznik {
6367dc9ae43SMichal Privoznik char *name = NULL;
6377dc9ae43SMichal Privoznik
6387dc9ae43SMichal Privoznik #if defined(__FreeBSD__)
6397dc9ae43SMichal Privoznik /* BSDs don't have /proc, but they provide a nice substitute */
6407dc9ae43SMichal Privoznik struct kinfo_proc *proc = kinfo_getproc(pid);
6417dc9ae43SMichal Privoznik
6427dc9ae43SMichal Privoznik if (proc) {
6437dc9ae43SMichal Privoznik name = g_strdup(proc->ki_comm);
6447dc9ae43SMichal Privoznik free(proc);
6457dc9ae43SMichal Privoznik }
6467dc9ae43SMichal Privoznik #else
6477dc9ae43SMichal Privoznik /* Assume a system with reasonable procfs */
6487dc9ae43SMichal Privoznik char *pid_path;
6497dc9ae43SMichal Privoznik size_t len;
6507dc9ae43SMichal Privoznik
6517dc9ae43SMichal Privoznik pid_path = g_strdup_printf("/proc/%d/cmdline", pid);
6527dc9ae43SMichal Privoznik g_file_get_contents(pid_path, &name, &len, NULL);
6537dc9ae43SMichal Privoznik g_free(pid_path);
6547dc9ae43SMichal Privoznik #endif
6557dc9ae43SMichal Privoznik
6567dc9ae43SMichal Privoznik return name;
6577dc9ae43SMichal Privoznik }
6587dc9ae43SMichal Privoznik
6597dc9ae43SMichal Privoznik
qemu_alloc_stack(size_t * sz)6608737d9e0SPeter Lieven void *qemu_alloc_stack(size_t *sz)
6618737d9e0SPeter Lieven {
662a1eaa628SAkihiko Odaki void *ptr;
663fc3d1badSBrad Smith int flags;
6647d992e4dSPeter Lieven #ifdef CONFIG_DEBUG_STACK_USAGE
6657d992e4dSPeter Lieven void *ptr2;
6667d992e4dSPeter Lieven #endif
6678e3b0cbbSMarc-André Lureau size_t pagesz = qemu_real_host_page_size();
6688737d9e0SPeter Lieven #ifdef _SC_THREAD_STACK_MIN
6698737d9e0SPeter Lieven /* avoid stacks smaller than _SC_THREAD_STACK_MIN */
6708737d9e0SPeter Lieven long min_stack_sz = sysconf(_SC_THREAD_STACK_MIN);
6718737d9e0SPeter Lieven *sz = MAX(MAX(min_stack_sz, 0), *sz);
6728737d9e0SPeter Lieven #endif
6738737d9e0SPeter Lieven /* adjust stack size to a multiple of the page size */
6748737d9e0SPeter Lieven *sz = ROUND_UP(*sz, pagesz);
6758737d9e0SPeter Lieven /* allocate one extra page for the guard page */
6768737d9e0SPeter Lieven *sz += pagesz;
6778737d9e0SPeter Lieven
678fc3d1badSBrad Smith flags = MAP_PRIVATE | MAP_ANONYMOUS;
679fc3d1badSBrad Smith #if defined(MAP_STACK) && defined(__OpenBSD__)
680fc3d1badSBrad Smith /* Only enable MAP_STACK on OpenBSD. Other OS's such as
681fc3d1badSBrad Smith * Linux/FreeBSD/NetBSD have a flag with the same name
682fc3d1badSBrad Smith * but have differing functionality. OpenBSD will SEGV
683fc3d1badSBrad Smith * if it spots execution with a stack pointer pointing
684fc3d1badSBrad Smith * at memory that was not allocated with MAP_STACK.
685fc3d1badSBrad Smith */
686fc3d1badSBrad Smith flags |= MAP_STACK;
687fc3d1badSBrad Smith #endif
688fc3d1badSBrad Smith
689fc3d1badSBrad Smith ptr = mmap(NULL, *sz, PROT_READ | PROT_WRITE, flags, -1, 0);
6908737d9e0SPeter Lieven if (ptr == MAP_FAILED) {
691e916a6e8SEduardo Habkost perror("failed to allocate memory for stack");
6928737d9e0SPeter Lieven abort();
6938737d9e0SPeter Lieven }
6948737d9e0SPeter Lieven
695a1eaa628SAkihiko Odaki /* Stack grows down -- guard page at the bottom. */
696a1eaa628SAkihiko Odaki if (mprotect(ptr, pagesz, PROT_NONE) != 0) {
697e916a6e8SEduardo Habkost perror("failed to set up stack guard page");
6988737d9e0SPeter Lieven abort();
6998737d9e0SPeter Lieven }
7008737d9e0SPeter Lieven
7017d992e4dSPeter Lieven #ifdef CONFIG_DEBUG_STACK_USAGE
7027d992e4dSPeter Lieven for (ptr2 = ptr + pagesz; ptr2 < ptr + *sz; ptr2 += sizeof(uint32_t)) {
7037d992e4dSPeter Lieven *(uint32_t *)ptr2 = 0xdeadbeaf;
7047d992e4dSPeter Lieven }
7057d992e4dSPeter Lieven #endif
7067d992e4dSPeter Lieven
7078737d9e0SPeter Lieven return ptr;
7088737d9e0SPeter Lieven }
7098737d9e0SPeter Lieven
7107d992e4dSPeter Lieven #ifdef CONFIG_DEBUG_STACK_USAGE
7117d992e4dSPeter Lieven static __thread unsigned int max_stack_usage;
7127d992e4dSPeter Lieven #endif
7137d992e4dSPeter Lieven
qemu_free_stack(void * stack,size_t sz)7148737d9e0SPeter Lieven void qemu_free_stack(void *stack, size_t sz)
7158737d9e0SPeter Lieven {
7167d992e4dSPeter Lieven #ifdef CONFIG_DEBUG_STACK_USAGE
7177d992e4dSPeter Lieven unsigned int usage;
7187d992e4dSPeter Lieven void *ptr;
7197d992e4dSPeter Lieven
7208e3b0cbbSMarc-André Lureau for (ptr = stack + qemu_real_host_page_size(); ptr < stack + sz;
7217d992e4dSPeter Lieven ptr += sizeof(uint32_t)) {
7227d992e4dSPeter Lieven if (*(uint32_t *)ptr != 0xdeadbeaf) {
7237d992e4dSPeter Lieven break;
7247d992e4dSPeter Lieven }
7257d992e4dSPeter Lieven }
7267d992e4dSPeter Lieven usage = sz - (uintptr_t) (ptr - stack);
7277d992e4dSPeter Lieven if (usage > max_stack_usage) {
7287d992e4dSPeter Lieven error_report("thread %d max stack usage increased from %u to %u",
7297d992e4dSPeter Lieven qemu_get_thread_id(), max_stack_usage, usage);
7307d992e4dSPeter Lieven max_stack_usage = usage;
7317d992e4dSPeter Lieven }
7327d992e4dSPeter Lieven #endif
7337d992e4dSPeter Lieven
7348737d9e0SPeter Lieven munmap(stack, sz);
7358737d9e0SPeter Lieven }
736d98d4072SPaolo Bonzini
737c905a368SDaniele Buono /*
738c905a368SDaniele Buono * Disable CFI checks.
739d02d06f8SMichael Tokarev * We are going to call a signal handler directly. Such handler may or may not
740c905a368SDaniele Buono * have been defined in our binary, so there's no guarantee that the pointer
741c905a368SDaniele Buono * used to set the handler is a cfi-valid pointer. Since the handlers are
742c905a368SDaniele Buono * stored in kernel memory, changing the handler to an attacker-defined
743c905a368SDaniele Buono * function requires being able to call a sigaction() syscall,
744c905a368SDaniele Buono * which is not as easy as overwriting a pointer in memory.
745c905a368SDaniele Buono */
746c905a368SDaniele Buono QEMU_DISABLE_CFI
sigaction_invoke(struct sigaction * action,struct qemu_signalfd_siginfo * info)747d98d4072SPaolo Bonzini void sigaction_invoke(struct sigaction *action,
748d98d4072SPaolo Bonzini struct qemu_signalfd_siginfo *info)
749d98d4072SPaolo Bonzini {
75002ffa034SPeter Maydell siginfo_t si = {};
751d98d4072SPaolo Bonzini si.si_signo = info->ssi_signo;
752d98d4072SPaolo Bonzini si.si_errno = info->ssi_errno;
753d98d4072SPaolo Bonzini si.si_code = info->ssi_code;
754d98d4072SPaolo Bonzini
755d98d4072SPaolo Bonzini /* Convert the minimal set of fields defined by POSIX.
756d98d4072SPaolo Bonzini * Positive si_code values are reserved for kernel-generated
757d98d4072SPaolo Bonzini * signals, where the valid siginfo fields are determined by
758d98d4072SPaolo Bonzini * the signal number. But according to POSIX, it is unspecified
759d98d4072SPaolo Bonzini * whether SI_USER and SI_QUEUE have values less than or equal to
760d98d4072SPaolo Bonzini * zero.
761d98d4072SPaolo Bonzini */
762d98d4072SPaolo Bonzini if (info->ssi_code == SI_USER || info->ssi_code == SI_QUEUE ||
763d98d4072SPaolo Bonzini info->ssi_code <= 0) {
764d98d4072SPaolo Bonzini /* SIGTERM, etc. */
765d98d4072SPaolo Bonzini si.si_pid = info->ssi_pid;
766d98d4072SPaolo Bonzini si.si_uid = info->ssi_uid;
767d98d4072SPaolo Bonzini } else if (info->ssi_signo == SIGILL || info->ssi_signo == SIGFPE ||
768d98d4072SPaolo Bonzini info->ssi_signo == SIGSEGV || info->ssi_signo == SIGBUS) {
769d98d4072SPaolo Bonzini si.si_addr = (void *)(uintptr_t)info->ssi_addr;
770d98d4072SPaolo Bonzini } else if (info->ssi_signo == SIGCHLD) {
771d98d4072SPaolo Bonzini si.si_pid = info->ssi_pid;
772d98d4072SPaolo Bonzini si.si_status = info->ssi_status;
773d98d4072SPaolo Bonzini si.si_uid = info->ssi_uid;
774d98d4072SPaolo Bonzini }
775d98d4072SPaolo Bonzini action->sa_sigaction(info->ssi_signo, &si, NULL);
776d98d4072SPaolo Bonzini }
777e47f4765SMichal Privoznik
qemu_get_host_physmem(void)778ad06ef0eSAlex Bennée size_t qemu_get_host_physmem(void)
779ad06ef0eSAlex Bennée {
780ad06ef0eSAlex Bennée #ifdef _SC_PHYS_PAGES
781ad06ef0eSAlex Bennée long pages = sysconf(_SC_PHYS_PAGES);
782ad06ef0eSAlex Bennée if (pages > 0) {
7838e3b0cbbSMarc-André Lureau if (pages > SIZE_MAX / qemu_real_host_page_size()) {
784ad06ef0eSAlex Bennée return SIZE_MAX;
785ad06ef0eSAlex Bennée } else {
7868e3b0cbbSMarc-André Lureau return pages * qemu_real_host_page_size();
787ad06ef0eSAlex Bennée }
788ad06ef0eSAlex Bennée }
789ad06ef0eSAlex Bennée #endif
790ad06ef0eSAlex Bennée return 0;
791ad06ef0eSAlex Bennée }
792e9c4e0a8SMarc-André Lureau
qemu_msync(void * addr,size_t length,int fd)79373991a92SMarc-André Lureau int qemu_msync(void *addr, size_t length, int fd)
79473991a92SMarc-André Lureau {
79573991a92SMarc-André Lureau size_t align_mask = ~(qemu_real_host_page_size() - 1);
79673991a92SMarc-André Lureau
79773991a92SMarc-André Lureau /**
79873991a92SMarc-André Lureau * There are no strict reqs as per the length of mapping
79973991a92SMarc-André Lureau * to be synced. Still the length needs to follow the address
80073991a92SMarc-André Lureau * alignment changes. Additionally - round the size to the multiple
80173991a92SMarc-André Lureau * of PAGE_SIZE
80273991a92SMarc-André Lureau */
80373991a92SMarc-André Lureau length += ((uintptr_t)addr & (qemu_real_host_page_size() - 1));
80473991a92SMarc-André Lureau length = (length + ~align_mask) & align_mask;
80573991a92SMarc-André Lureau
80673991a92SMarc-André Lureau addr = (void *)((uintptr_t)addr & align_mask);
80773991a92SMarc-André Lureau
80873991a92SMarc-André Lureau return msync(addr, length, MS_SYNC);
80973991a92SMarc-André Lureau }
8104ec5ebeaSClément Léger
qemu_close_all_open_fd_proc(const int * skip,unsigned int nskip)811*7532ca57SClément Léger static bool qemu_close_all_open_fd_proc(const int *skip, unsigned int nskip)
8124ec5ebeaSClément Léger {
8134ec5ebeaSClément Léger struct dirent *de;
8144ec5ebeaSClément Léger int fd, dfd;
8154ec5ebeaSClément Léger DIR *dir;
816*7532ca57SClément Léger unsigned int skip_start = 0, skip_end = nskip;
8174ec5ebeaSClément Léger
8184ec5ebeaSClément Léger dir = opendir("/proc/self/fd");
8194ec5ebeaSClément Léger if (!dir) {
8204ec5ebeaSClément Léger /* If /proc is not mounted, there is nothing that can be done. */
821ffa28f9cSClément Léger return false;
8224ec5ebeaSClément Léger }
8234ec5ebeaSClément Léger /* Avoid closing the directory. */
8244ec5ebeaSClément Léger dfd = dirfd(dir);
8254ec5ebeaSClément Léger
8264ec5ebeaSClément Léger for (de = readdir(dir); de; de = readdir(dir)) {
827*7532ca57SClément Léger bool close_fd = true;
828*7532ca57SClément Léger
829*7532ca57SClément Léger if (de->d_name[0] == '.') {
830*7532ca57SClément Léger continue;
831*7532ca57SClément Léger }
8324ec5ebeaSClément Léger fd = atoi(de->d_name);
833*7532ca57SClément Léger if (fd == dfd) {
834*7532ca57SClément Léger continue;
835*7532ca57SClément Léger }
836*7532ca57SClément Léger
837*7532ca57SClément Léger for (unsigned int i = skip_start; i < skip_end; i++) {
838*7532ca57SClément Léger if (fd < skip[i]) {
839*7532ca57SClément Léger /* We are below the next skipped fd, break */
840*7532ca57SClément Léger break;
841*7532ca57SClément Léger } else if (fd == skip[i]) {
842*7532ca57SClément Léger close_fd = false;
843*7532ca57SClément Léger /* Restrict the range as we found fds matching start/end */
844*7532ca57SClément Léger if (i == skip_start) {
845*7532ca57SClément Léger skip_start++;
846*7532ca57SClément Léger } else if (i == skip_end) {
847*7532ca57SClément Léger skip_end--;
848*7532ca57SClément Léger }
849*7532ca57SClément Léger break;
850*7532ca57SClément Léger }
851*7532ca57SClément Léger }
852*7532ca57SClément Léger
853*7532ca57SClément Léger if (close_fd) {
8544ec5ebeaSClément Léger close(fd);
8554ec5ebeaSClément Léger }
8564ec5ebeaSClément Léger }
8574ec5ebeaSClément Léger closedir(dir);
858ffa28f9cSClément Léger
859ffa28f9cSClément Léger return true;
860ffa28f9cSClément Léger }
861ffa28f9cSClément Léger
qemu_close_all_open_fd_close_range(const int * skip,unsigned int nskip,int open_max)862*7532ca57SClément Léger static bool qemu_close_all_open_fd_close_range(const int *skip,
863*7532ca57SClément Léger unsigned int nskip,
864*7532ca57SClément Léger int open_max)
865ffa28f9cSClément Léger {
866ffa28f9cSClément Léger #ifdef CONFIG_CLOSE_RANGE
867*7532ca57SClément Léger int max_fd = open_max - 1;
868*7532ca57SClément Léger int first = 0, last;
869*7532ca57SClément Léger unsigned int cur_skip = 0;
870*7532ca57SClément Léger int ret;
871*7532ca57SClément Léger
872*7532ca57SClément Léger do {
873*7532ca57SClément Léger /* Find the start boundary of the range to close */
874*7532ca57SClément Léger while (cur_skip < nskip && first == skip[cur_skip]) {
875*7532ca57SClément Léger cur_skip++;
876*7532ca57SClément Léger first++;
877ffa28f9cSClément Léger }
878*7532ca57SClément Léger
879*7532ca57SClément Léger /* Find the upper boundary of the range to close */
880*7532ca57SClément Léger last = max_fd;
881*7532ca57SClément Léger if (cur_skip < nskip) {
882*7532ca57SClément Léger last = skip[cur_skip] - 1;
883*7532ca57SClément Léger last = MIN(last, max_fd);
884*7532ca57SClément Léger }
885*7532ca57SClément Léger
886*7532ca57SClément Léger /* With the adjustments to the range, we might be done. */
887*7532ca57SClément Léger if (first > last) {
888*7532ca57SClément Léger break;
889*7532ca57SClément Léger }
890*7532ca57SClément Léger
891*7532ca57SClément Léger ret = close_range(first, last, 0);
892*7532ca57SClément Léger if (ret < 0) {
893ffa28f9cSClément Léger return false;
894ffa28f9cSClément Léger }
895ffa28f9cSClément Léger
896*7532ca57SClément Léger first = last + 1;
897*7532ca57SClément Léger } while (last < max_fd);
898*7532ca57SClément Léger
899*7532ca57SClément Léger return true;
900*7532ca57SClément Léger #else
901*7532ca57SClément Léger return false;
902*7532ca57SClément Léger #endif
903*7532ca57SClément Léger }
904*7532ca57SClément Léger
qemu_close_all_open_fd_fallback(const int * skip,unsigned int nskip,int open_max)905*7532ca57SClément Léger static void qemu_close_all_open_fd_fallback(const int *skip, unsigned int nskip,
906*7532ca57SClément Léger int open_max)
907ffa28f9cSClément Léger {
908*7532ca57SClément Léger unsigned int cur_skip = 0;
909ffa28f9cSClément Léger
910ffa28f9cSClément Léger /* Fallback */
911*7532ca57SClément Léger for (int i = 0; i < open_max; i++) {
912*7532ca57SClément Léger if (cur_skip < nskip && i == skip[cur_skip]) {
913*7532ca57SClément Léger cur_skip++;
914*7532ca57SClément Léger continue;
915*7532ca57SClément Léger }
916ffa28f9cSClément Léger close(i);
917ffa28f9cSClément Léger }
918ffa28f9cSClément Léger }
919ffa28f9cSClément Léger
920ffa28f9cSClément Léger /*
921ffa28f9cSClément Léger * Close all open file descriptors.
922ffa28f9cSClément Léger */
qemu_close_all_open_fd(const int * skip,unsigned int nskip)923*7532ca57SClément Léger void qemu_close_all_open_fd(const int *skip, unsigned int nskip)
924ffa28f9cSClément Léger {
925*7532ca57SClément Léger int open_max = sysconf(_SC_OPEN_MAX);
926*7532ca57SClément Léger
927*7532ca57SClément Léger assert(skip != NULL || nskip == 0);
928*7532ca57SClément Léger
929*7532ca57SClément Léger if (!qemu_close_all_open_fd_close_range(skip, nskip, open_max) &&
930*7532ca57SClément Léger !qemu_close_all_open_fd_proc(skip, nskip)) {
931*7532ca57SClément Léger qemu_close_all_open_fd_fallback(skip, nskip, open_max);
932ffa28f9cSClément Léger }
9334ec5ebeaSClément Léger }
934