1f04cf923SMarc-André Lureau /*
2f04cf923SMarc-André Lureau * memfd.c
3f04cf923SMarc-André Lureau *
4f04cf923SMarc-André Lureau * Copyright (c) 2015 Red Hat, Inc.
5f04cf923SMarc-André Lureau *
6f04cf923SMarc-André Lureau * QEMU library functions on POSIX which are shared between QEMU and
7f04cf923SMarc-André Lureau * the QEMU tools.
8f04cf923SMarc-André Lureau *
9f04cf923SMarc-André Lureau * Permission is hereby granted, free of charge, to any person obtaining a copy
10f04cf923SMarc-André Lureau * of this software and associated documentation files (the "Software"), to deal
11f04cf923SMarc-André Lureau * in the Software without restriction, including without limitation the rights
12f04cf923SMarc-André Lureau * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13f04cf923SMarc-André Lureau * copies of the Software, and to permit persons to whom the Software is
14f04cf923SMarc-André Lureau * furnished to do so, subject to the following conditions:
15f04cf923SMarc-André Lureau *
16f04cf923SMarc-André Lureau * The above copyright notice and this permission notice shall be included in
17f04cf923SMarc-André Lureau * all copies or substantial portions of the Software.
18f04cf923SMarc-André Lureau *
19f04cf923SMarc-André Lureau * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20f04cf923SMarc-André Lureau * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21f04cf923SMarc-André Lureau * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22f04cf923SMarc-André Lureau * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23f04cf923SMarc-André Lureau * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24f04cf923SMarc-André Lureau * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25f04cf923SMarc-André Lureau * THE SOFTWARE.
26f04cf923SMarc-André Lureau */
27f04cf923SMarc-André Lureau
28f04cf923SMarc-André Lureau #include "qemu/osdep.h"
29f04cf923SMarc-André Lureau
300f2956f9SMarc-André Lureau #include "qapi/error.h"
31*c90204b6SMarc-André Lureau #include "qemu/error-report.h"
32f04cf923SMarc-André Lureau #include "qemu/memfd.h"
332ef8c0c9SMarc-André Lureau #include "qemu/host-utils.h"
34f04cf923SMarc-André Lureau
3575e5b70eSPaolo Bonzini #if defined CONFIG_LINUX && !defined CONFIG_MEMFD
36f04cf923SMarc-André Lureau #include <sys/syscall.h>
37f04cf923SMarc-André Lureau #include <asm/unistd.h>
38f04cf923SMarc-André Lureau
memfd_create(const char * name,unsigned int flags)399bdfa4d2SShu-Chun Weng int memfd_create(const char *name, unsigned int flags)
40f04cf923SMarc-André Lureau {
41f04cf923SMarc-André Lureau #ifdef __NR_memfd_create
42f04cf923SMarc-André Lureau return syscall(__NR_memfd_create, name, flags);
43f04cf923SMarc-André Lureau #else
44df208193SIlya Maximets errno = ENOSYS;
45f04cf923SMarc-André Lureau return -1;
46f04cf923SMarc-André Lureau #endif
47f04cf923SMarc-André Lureau }
48f04cf923SMarc-André Lureau #endif
49f04cf923SMarc-André Lureau
qemu_memfd_create(const char * name,size_t size,bool hugetlb,uint64_t hugetlbsize,unsigned int seals,Error ** errp)50c5b2a9e0SMarc-André Lureau int qemu_memfd_create(const char *name, size_t size, bool hugetlb,
512ef8c0c9SMarc-André Lureau uint64_t hugetlbsize, unsigned int seals, Error **errp)
52dcff1035SMarc-André Lureau {
532ef8c0c9SMarc-André Lureau int htsize = hugetlbsize ? ctz64(hugetlbsize) : 0;
542ef8c0c9SMarc-André Lureau
554f938cbdSPeter Maydell if (htsize && 1ULL << htsize != hugetlbsize) {
562ef8c0c9SMarc-André Lureau error_setg(errp, "Hugepage size must be a power of 2");
572ef8c0c9SMarc-André Lureau return -1;
582ef8c0c9SMarc-André Lureau }
592ef8c0c9SMarc-André Lureau
602ef8c0c9SMarc-André Lureau htsize = htsize << MFD_HUGE_SHIFT;
612ef8c0c9SMarc-André Lureau
62dcff1035SMarc-André Lureau #ifdef CONFIG_LINUX
630f2956f9SMarc-André Lureau int mfd = -1;
64dcff1035SMarc-André Lureau unsigned int flags = MFD_CLOEXEC;
65dcff1035SMarc-André Lureau
66dcff1035SMarc-André Lureau if (seals) {
67dcff1035SMarc-André Lureau flags |= MFD_ALLOW_SEALING;
68dcff1035SMarc-André Lureau }
69c5b2a9e0SMarc-André Lureau if (hugetlb) {
70c5b2a9e0SMarc-André Lureau flags |= MFD_HUGETLB;
712ef8c0c9SMarc-André Lureau flags |= htsize;
72c5b2a9e0SMarc-André Lureau }
73dcff1035SMarc-André Lureau mfd = memfd_create(name, flags);
74dcff1035SMarc-André Lureau if (mfd < 0) {
75edaed6c7SIlya Maximets error_setg_errno(errp, errno,
76edaed6c7SIlya Maximets "failed to create memfd with flags 0x%x", flags);
770f2956f9SMarc-André Lureau goto err;
78dcff1035SMarc-André Lureau }
79dcff1035SMarc-André Lureau
80dcff1035SMarc-André Lureau if (ftruncate(mfd, size) == -1) {
81edaed6c7SIlya Maximets error_setg_errno(errp, errno, "failed to resize memfd to %zu", size);
820f2956f9SMarc-André Lureau goto err;
83dcff1035SMarc-André Lureau }
84dcff1035SMarc-André Lureau
85dcff1035SMarc-André Lureau if (seals && fcntl(mfd, F_ADD_SEALS, seals) == -1) {
86edaed6c7SIlya Maximets error_setg_errno(errp, errno, "failed to add seals 0x%x", seals);
870f2956f9SMarc-André Lureau goto err;
88dcff1035SMarc-André Lureau }
89dcff1035SMarc-André Lureau
90dcff1035SMarc-André Lureau return mfd;
910f2956f9SMarc-André Lureau
920f2956f9SMarc-André Lureau err:
930f2956f9SMarc-André Lureau if (mfd >= 0) {
940f2956f9SMarc-André Lureau close(mfd);
950f2956f9SMarc-André Lureau }
96edaed6c7SIlya Maximets #else
97edaed6c7SIlya Maximets error_setg_errno(errp, ENOSYS, "failed to create memfd");
980f2956f9SMarc-André Lureau #endif
990f2956f9SMarc-André Lureau return -1;
100dcff1035SMarc-André Lureau }
101dcff1035SMarc-André Lureau
102d3592199SMarc-André Lureau /*
103d3592199SMarc-André Lureau * This is a best-effort helper for shared memory allocation, with
104d3592199SMarc-André Lureau * optional sealing. The helper will do his best to allocate using
105d3592199SMarc-André Lureau * memfd with sealing, but may fallback on other methods without
106d3592199SMarc-André Lureau * sealing.
107d3592199SMarc-André Lureau */
qemu_memfd_alloc(const char * name,size_t size,unsigned int seals,int * fd,Error ** errp)108d3592199SMarc-André Lureau void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals,
1090f2956f9SMarc-André Lureau int *fd, Error **errp)
110d3592199SMarc-André Lureau {
111d3592199SMarc-André Lureau void *ptr;
1122ef8c0c9SMarc-André Lureau int mfd = qemu_memfd_create(name, size, false, 0, seals, NULL);
113d3592199SMarc-André Lureau
114dcff1035SMarc-André Lureau /* some systems have memfd without sealing */
115dcff1035SMarc-André Lureau if (mfd == -1) {
1162ef8c0c9SMarc-André Lureau mfd = qemu_memfd_create(name, size, false, 0, 0, NULL);
117d3592199SMarc-André Lureau }
118d3592199SMarc-André Lureau
119d3592199SMarc-André Lureau if (mfd == -1) {
12035f9b6efSMarc-André Lureau const char *tmpdir = g_get_tmp_dir();
12135f9b6efSMarc-André Lureau gchar *fname;
12235f9b6efSMarc-André Lureau
12335f9b6efSMarc-André Lureau fname = g_strdup_printf("%s/memfd-XXXXXX", tmpdir);
12435f9b6efSMarc-André Lureau mfd = mkstemp(fname);
12535f9b6efSMarc-André Lureau unlink(fname);
12635f9b6efSMarc-André Lureau g_free(fname);
12735f9b6efSMarc-André Lureau
1280f2956f9SMarc-André Lureau if (mfd == -1 ||
1290f2956f9SMarc-André Lureau ftruncate(mfd, size) == -1) {
1300f2956f9SMarc-André Lureau goto err;
13135f9b6efSMarc-André Lureau }
13235f9b6efSMarc-André Lureau }
13335f9b6efSMarc-André Lureau
134d3592199SMarc-André Lureau ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0);
135d3592199SMarc-André Lureau if (ptr == MAP_FAILED) {
1360f2956f9SMarc-André Lureau goto err;
137d3592199SMarc-André Lureau }
138d3592199SMarc-André Lureau
139d3592199SMarc-André Lureau *fd = mfd;
140d3592199SMarc-André Lureau return ptr;
1410f2956f9SMarc-André Lureau
1420f2956f9SMarc-André Lureau err:
1430f2956f9SMarc-André Lureau error_setg_errno(errp, errno, "failed to allocate shared memory");
1440f2956f9SMarc-André Lureau if (mfd >= 0) {
1450f2956f9SMarc-André Lureau close(mfd);
1460f2956f9SMarc-André Lureau }
1470f2956f9SMarc-André Lureau return NULL;
148d3592199SMarc-André Lureau }
149d3592199SMarc-André Lureau
qemu_memfd_free(void * ptr,size_t size,int fd)150d3592199SMarc-André Lureau void qemu_memfd_free(void *ptr, size_t size, int fd)
151d3592199SMarc-André Lureau {
152d3592199SMarc-André Lureau if (ptr) {
153*c90204b6SMarc-André Lureau if (munmap(ptr, size) != 0) {
154*c90204b6SMarc-André Lureau error_report("memfd munmap() failed: %s", strerror(errno));
155*c90204b6SMarc-André Lureau }
156d3592199SMarc-André Lureau }
157d3592199SMarc-André Lureau
158d3592199SMarc-André Lureau if (fd != -1) {
159*c90204b6SMarc-André Lureau if (close(fd) != 0) {
160*c90204b6SMarc-André Lureau error_report("memfd close() failed: %s", strerror(errno));
161*c90204b6SMarc-André Lureau }
162d3592199SMarc-André Lureau }
163d3592199SMarc-André Lureau }
16431190ed7SMarc-André Lureau
16531190ed7SMarc-André Lureau enum {
16631190ed7SMarc-André Lureau MEMFD_KO,
16731190ed7SMarc-André Lureau MEMFD_OK,
16831190ed7SMarc-André Lureau MEMFD_TODO
16931190ed7SMarc-André Lureau };
17031190ed7SMarc-André Lureau
171648abbfbSMarc-André Lureau /**
172648abbfbSMarc-André Lureau * qemu_memfd_alloc_check():
173648abbfbSMarc-André Lureau *
174648abbfbSMarc-André Lureau * Check if qemu_memfd_alloc() can allocate, including using a
175648abbfbSMarc-André Lureau * fallback implementation when host doesn't support memfd.
176648abbfbSMarc-André Lureau */
qemu_memfd_alloc_check(void)177648abbfbSMarc-André Lureau bool qemu_memfd_alloc_check(void)
17831190ed7SMarc-André Lureau {
17931190ed7SMarc-André Lureau static int memfd_check = MEMFD_TODO;
18031190ed7SMarc-André Lureau
18131190ed7SMarc-André Lureau if (memfd_check == MEMFD_TODO) {
18231190ed7SMarc-André Lureau int fd;
18331190ed7SMarc-André Lureau void *ptr;
18431190ed7SMarc-André Lureau
1851e7ec6cfSDima Stepanov fd = -1;
1860f2956f9SMarc-André Lureau ptr = qemu_memfd_alloc("test", 4096, 0, &fd, NULL);
18731190ed7SMarc-André Lureau memfd_check = ptr ? MEMFD_OK : MEMFD_KO;
18831190ed7SMarc-André Lureau qemu_memfd_free(ptr, 4096, fd);
18931190ed7SMarc-André Lureau }
19031190ed7SMarc-André Lureau
19131190ed7SMarc-André Lureau return memfd_check == MEMFD_OK;
19231190ed7SMarc-André Lureau }
193648abbfbSMarc-André Lureau
194648abbfbSMarc-André Lureau /**
195648abbfbSMarc-André Lureau * qemu_memfd_check():
196648abbfbSMarc-André Lureau *
197648abbfbSMarc-André Lureau * Check if host supports memfd.
198648abbfbSMarc-André Lureau */
qemu_memfd_check(unsigned int flags)19938296400SMarc-André Lureau bool qemu_memfd_check(unsigned int flags)
200648abbfbSMarc-André Lureau {
201648abbfbSMarc-André Lureau #ifdef CONFIG_LINUX
20292db922fSIlya Maximets int mfd = memfd_create("test", flags | MFD_CLOEXEC);
203648abbfbSMarc-André Lureau
204648abbfbSMarc-André Lureau if (mfd >= 0) {
205648abbfbSMarc-André Lureau close(mfd);
20638296400SMarc-André Lureau return true;
207648abbfbSMarc-André Lureau }
208648abbfbSMarc-André Lureau #endif
20938296400SMarc-André Lureau
21038296400SMarc-André Lureau return false;
211648abbfbSMarc-André Lureau }
212