1f04cf923SMarc-André Lureau /*
2f04cf923SMarc-André Lureau * memfd.c
3f04cf923SMarc-André Lureau *
4f04cf923SMarc-André Lureau * Copyright (c) 2015 Red Hat, Inc.
5f04cf923SMarc-André Lureau *
6f04cf923SMarc-André Lureau * QEMU library functions on POSIX which are shared between QEMU and
7f04cf923SMarc-André Lureau * the QEMU tools.
8f04cf923SMarc-André Lureau *
9f04cf923SMarc-André Lureau * Permission is hereby granted, free of charge, to any person obtaining a copy
10f04cf923SMarc-André Lureau * of this software and associated documentation files (the "Software"), to deal
11f04cf923SMarc-André Lureau * in the Software without restriction, including without limitation the rights
12f04cf923SMarc-André Lureau * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13f04cf923SMarc-André Lureau * copies of the Software, and to permit persons to whom the Software is
14f04cf923SMarc-André Lureau * furnished to do so, subject to the following conditions:
15f04cf923SMarc-André Lureau *
16f04cf923SMarc-André Lureau * The above copyright notice and this permission notice shall be included in
17f04cf923SMarc-André Lureau * all copies or substantial portions of the Software.
18f04cf923SMarc-André Lureau *
19f04cf923SMarc-André Lureau * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20f04cf923SMarc-André Lureau * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21f04cf923SMarc-André Lureau * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22f04cf923SMarc-André Lureau * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23f04cf923SMarc-André Lureau * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24f04cf923SMarc-André Lureau * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25f04cf923SMarc-André Lureau * THE SOFTWARE.
26f04cf923SMarc-André Lureau */
27f04cf923SMarc-André Lureau
28f04cf923SMarc-André Lureau #include "qemu/osdep.h"
29f04cf923SMarc-André Lureau
300f2956f9SMarc-André Lureau #include "qapi/error.h"
31f04cf923SMarc-André Lureau #include "qemu/memfd.h"
322ef8c0c9SMarc-André Lureau #include "qemu/host-utils.h"
33f04cf923SMarc-André Lureau
3475e5b70eSPaolo Bonzini #if defined CONFIG_LINUX && !defined CONFIG_MEMFD
35f04cf923SMarc-André Lureau #include <sys/syscall.h>
36f04cf923SMarc-André Lureau #include <asm/unistd.h>
37f04cf923SMarc-André Lureau
memfd_create(const char * name,unsigned int flags)38*9bdfa4d2SShu-Chun Weng int memfd_create(const char *name, unsigned int flags)
39f04cf923SMarc-André Lureau {
40f04cf923SMarc-André Lureau #ifdef __NR_memfd_create
41f04cf923SMarc-André Lureau return syscall(__NR_memfd_create, name, flags);
42f04cf923SMarc-André Lureau #else
43df208193SIlya Maximets errno = ENOSYS;
44f04cf923SMarc-André Lureau return -1;
45f04cf923SMarc-André Lureau #endif
46f04cf923SMarc-André Lureau }
47f04cf923SMarc-André Lureau #endif
48f04cf923SMarc-André Lureau
qemu_memfd_create(const char * name,size_t size,bool hugetlb,uint64_t hugetlbsize,unsigned int seals,Error ** errp)49c5b2a9e0SMarc-André Lureau int qemu_memfd_create(const char *name, size_t size, bool hugetlb,
502ef8c0c9SMarc-André Lureau uint64_t hugetlbsize, unsigned int seals, Error **errp)
51dcff1035SMarc-André Lureau {
522ef8c0c9SMarc-André Lureau int htsize = hugetlbsize ? ctz64(hugetlbsize) : 0;
532ef8c0c9SMarc-André Lureau
544f938cbdSPeter Maydell if (htsize && 1ULL << htsize != hugetlbsize) {
552ef8c0c9SMarc-André Lureau error_setg(errp, "Hugepage size must be a power of 2");
562ef8c0c9SMarc-André Lureau return -1;
572ef8c0c9SMarc-André Lureau }
582ef8c0c9SMarc-André Lureau
592ef8c0c9SMarc-André Lureau htsize = htsize << MFD_HUGE_SHIFT;
602ef8c0c9SMarc-André Lureau
61dcff1035SMarc-André Lureau #ifdef CONFIG_LINUX
620f2956f9SMarc-André Lureau int mfd = -1;
63dcff1035SMarc-André Lureau unsigned int flags = MFD_CLOEXEC;
64dcff1035SMarc-André Lureau
65dcff1035SMarc-André Lureau if (seals) {
66dcff1035SMarc-André Lureau flags |= MFD_ALLOW_SEALING;
67dcff1035SMarc-André Lureau }
68c5b2a9e0SMarc-André Lureau if (hugetlb) {
69c5b2a9e0SMarc-André Lureau flags |= MFD_HUGETLB;
702ef8c0c9SMarc-André Lureau flags |= htsize;
71c5b2a9e0SMarc-André Lureau }
72dcff1035SMarc-André Lureau mfd = memfd_create(name, flags);
73dcff1035SMarc-André Lureau if (mfd < 0) {
74edaed6c7SIlya Maximets error_setg_errno(errp, errno,
75edaed6c7SIlya Maximets "failed to create memfd with flags 0x%x", flags);
760f2956f9SMarc-André Lureau goto err;
77dcff1035SMarc-André Lureau }
78dcff1035SMarc-André Lureau
79dcff1035SMarc-André Lureau if (ftruncate(mfd, size) == -1) {
80edaed6c7SIlya Maximets error_setg_errno(errp, errno, "failed to resize memfd to %zu", size);
810f2956f9SMarc-André Lureau goto err;
82dcff1035SMarc-André Lureau }
83dcff1035SMarc-André Lureau
84dcff1035SMarc-André Lureau if (seals && fcntl(mfd, F_ADD_SEALS, seals) == -1) {
85edaed6c7SIlya Maximets error_setg_errno(errp, errno, "failed to add seals 0x%x", seals);
860f2956f9SMarc-André Lureau goto err;
87dcff1035SMarc-André Lureau }
88dcff1035SMarc-André Lureau
89dcff1035SMarc-André Lureau return mfd;
900f2956f9SMarc-André Lureau
910f2956f9SMarc-André Lureau err:
920f2956f9SMarc-André Lureau if (mfd >= 0) {
930f2956f9SMarc-André Lureau close(mfd);
940f2956f9SMarc-André Lureau }
95edaed6c7SIlya Maximets #else
96edaed6c7SIlya Maximets error_setg_errno(errp, ENOSYS, "failed to create memfd");
970f2956f9SMarc-André Lureau #endif
980f2956f9SMarc-André Lureau return -1;
99dcff1035SMarc-André Lureau }
100dcff1035SMarc-André Lureau
101d3592199SMarc-André Lureau /*
102d3592199SMarc-André Lureau * This is a best-effort helper for shared memory allocation, with
103d3592199SMarc-André Lureau * optional sealing. The helper will do his best to allocate using
104d3592199SMarc-André Lureau * memfd with sealing, but may fallback on other methods without
105d3592199SMarc-André Lureau * sealing.
106d3592199SMarc-André Lureau */
qemu_memfd_alloc(const char * name,size_t size,unsigned int seals,int * fd,Error ** errp)107d3592199SMarc-André Lureau void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals,
1080f2956f9SMarc-André Lureau int *fd, Error **errp)
109d3592199SMarc-André Lureau {
110d3592199SMarc-André Lureau void *ptr;
1112ef8c0c9SMarc-André Lureau int mfd = qemu_memfd_create(name, size, false, 0, seals, NULL);
112d3592199SMarc-André Lureau
113dcff1035SMarc-André Lureau /* some systems have memfd without sealing */
114dcff1035SMarc-André Lureau if (mfd == -1) {
1152ef8c0c9SMarc-André Lureau mfd = qemu_memfd_create(name, size, false, 0, 0, NULL);
116d3592199SMarc-André Lureau }
117d3592199SMarc-André Lureau
118d3592199SMarc-André Lureau if (mfd == -1) {
11935f9b6efSMarc-André Lureau const char *tmpdir = g_get_tmp_dir();
12035f9b6efSMarc-André Lureau gchar *fname;
12135f9b6efSMarc-André Lureau
12235f9b6efSMarc-André Lureau fname = g_strdup_printf("%s/memfd-XXXXXX", tmpdir);
12335f9b6efSMarc-André Lureau mfd = mkstemp(fname);
12435f9b6efSMarc-André Lureau unlink(fname);
12535f9b6efSMarc-André Lureau g_free(fname);
12635f9b6efSMarc-André Lureau
1270f2956f9SMarc-André Lureau if (mfd == -1 ||
1280f2956f9SMarc-André Lureau ftruncate(mfd, size) == -1) {
1290f2956f9SMarc-André Lureau goto err;
13035f9b6efSMarc-André Lureau }
13135f9b6efSMarc-André Lureau }
13235f9b6efSMarc-André Lureau
133d3592199SMarc-André Lureau ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0);
134d3592199SMarc-André Lureau if (ptr == MAP_FAILED) {
1350f2956f9SMarc-André Lureau goto err;
136d3592199SMarc-André Lureau }
137d3592199SMarc-André Lureau
138d3592199SMarc-André Lureau *fd = mfd;
139d3592199SMarc-André Lureau return ptr;
1400f2956f9SMarc-André Lureau
1410f2956f9SMarc-André Lureau err:
1420f2956f9SMarc-André Lureau error_setg_errno(errp, errno, "failed to allocate shared memory");
1430f2956f9SMarc-André Lureau if (mfd >= 0) {
1440f2956f9SMarc-André Lureau close(mfd);
1450f2956f9SMarc-André Lureau }
1460f2956f9SMarc-André Lureau return NULL;
147d3592199SMarc-André Lureau }
148d3592199SMarc-André Lureau
qemu_memfd_free(void * ptr,size_t size,int fd)149d3592199SMarc-André Lureau void qemu_memfd_free(void *ptr, size_t size, int fd)
150d3592199SMarc-André Lureau {
151d3592199SMarc-André Lureau if (ptr) {
152d3592199SMarc-André Lureau munmap(ptr, size);
153d3592199SMarc-André Lureau }
154d3592199SMarc-André Lureau
155d3592199SMarc-André Lureau if (fd != -1) {
156d3592199SMarc-André Lureau close(fd);
157d3592199SMarc-André Lureau }
158d3592199SMarc-André Lureau }
15931190ed7SMarc-André Lureau
16031190ed7SMarc-André Lureau enum {
16131190ed7SMarc-André Lureau MEMFD_KO,
16231190ed7SMarc-André Lureau MEMFD_OK,
16331190ed7SMarc-André Lureau MEMFD_TODO
16431190ed7SMarc-André Lureau };
16531190ed7SMarc-André Lureau
166648abbfbSMarc-André Lureau /**
167648abbfbSMarc-André Lureau * qemu_memfd_alloc_check():
168648abbfbSMarc-André Lureau *
169648abbfbSMarc-André Lureau * Check if qemu_memfd_alloc() can allocate, including using a
170648abbfbSMarc-André Lureau * fallback implementation when host doesn't support memfd.
171648abbfbSMarc-André Lureau */
qemu_memfd_alloc_check(void)172648abbfbSMarc-André Lureau bool qemu_memfd_alloc_check(void)
17331190ed7SMarc-André Lureau {
17431190ed7SMarc-André Lureau static int memfd_check = MEMFD_TODO;
17531190ed7SMarc-André Lureau
17631190ed7SMarc-André Lureau if (memfd_check == MEMFD_TODO) {
17731190ed7SMarc-André Lureau int fd;
17831190ed7SMarc-André Lureau void *ptr;
17931190ed7SMarc-André Lureau
1801e7ec6cfSDima Stepanov fd = -1;
1810f2956f9SMarc-André Lureau ptr = qemu_memfd_alloc("test", 4096, 0, &fd, NULL);
18231190ed7SMarc-André Lureau memfd_check = ptr ? MEMFD_OK : MEMFD_KO;
18331190ed7SMarc-André Lureau qemu_memfd_free(ptr, 4096, fd);
18431190ed7SMarc-André Lureau }
18531190ed7SMarc-André Lureau
18631190ed7SMarc-André Lureau return memfd_check == MEMFD_OK;
18731190ed7SMarc-André Lureau }
188648abbfbSMarc-André Lureau
189648abbfbSMarc-André Lureau /**
190648abbfbSMarc-André Lureau * qemu_memfd_check():
191648abbfbSMarc-André Lureau *
192648abbfbSMarc-André Lureau * Check if host supports memfd.
193648abbfbSMarc-André Lureau */
qemu_memfd_check(unsigned int flags)19438296400SMarc-André Lureau bool qemu_memfd_check(unsigned int flags)
195648abbfbSMarc-André Lureau {
196648abbfbSMarc-André Lureau #ifdef CONFIG_LINUX
19792db922fSIlya Maximets int mfd = memfd_create("test", flags | MFD_CLOEXEC);
198648abbfbSMarc-André Lureau
199648abbfbSMarc-André Lureau if (mfd >= 0) {
200648abbfbSMarc-André Lureau close(mfd);
20138296400SMarc-André Lureau return true;
202648abbfbSMarc-André Lureau }
203648abbfbSMarc-André Lureau #endif
20438296400SMarc-André Lureau
20538296400SMarc-André Lureau return false;
206648abbfbSMarc-André Lureau }
207