xref: /openbmc/qemu/util/mmap-alloc.c (revision 52f2b8961409be834abaee5189bff2cc9e372851)
1 /*
2  * Support for RAM backed by mmaped host memory.
3  *
4  * Copyright (c) 2015 Red Hat, Inc.
5  *
6  * Authors:
7  *  Michael S. Tsirkin <mst@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or
10  * later.  See the COPYING file in the top-level directory.
11  */
12 
13 #ifdef CONFIG_LINUX
14 #include <linux/mman.h>
15 #else  /* !CONFIG_LINUX */
16 #define MAP_SYNC              0x0
17 #define MAP_SHARED_VALIDATE   0x0
18 #endif /* CONFIG_LINUX */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/mmap-alloc.h"
22 #include "qemu/host-utils.h"
23 
24 #define HUGETLBFS_MAGIC       0x958458f6
25 
26 #ifdef CONFIG_LINUX
27 #include <sys/vfs.h>
28 #endif
29 
30 size_t qemu_fd_getpagesize(int fd)
31 {
32 #ifdef CONFIG_LINUX
33     struct statfs fs;
34     int ret;
35 
36     if (fd != -1) {
37         do {
38             ret = fstatfs(fd, &fs);
39         } while (ret != 0 && errno == EINTR);
40 
41         if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
42             return fs.f_bsize;
43         }
44     }
45 #ifdef __sparc__
46     /* SPARC Linux needs greater alignment than the pagesize */
47     return QEMU_VMALLOC_ALIGN;
48 #endif
49 #endif
50 
51     return getpagesize();
52 }
53 
54 size_t qemu_mempath_getpagesize(const char *mem_path)
55 {
56 #ifdef CONFIG_LINUX
57     struct statfs fs;
58     int ret;
59 
60     if (mem_path) {
61         do {
62             ret = statfs(mem_path, &fs);
63         } while (ret != 0 && errno == EINTR);
64 
65         if (ret != 0) {
66             fprintf(stderr, "Couldn't statfs() memory path: %s\n",
67                     strerror(errno));
68             exit(1);
69         }
70 
71         if (fs.f_type == HUGETLBFS_MAGIC) {
72             /* It's hugepage, return the huge page size */
73             return fs.f_bsize;
74         }
75     }
76 #ifdef __sparc__
77     /* SPARC Linux needs greater alignment than the pagesize */
78     return QEMU_VMALLOC_ALIGN;
79 #endif
80 #endif
81 
82     return getpagesize();
83 }
84 
85 void *qemu_ram_mmap(int fd,
86                     size_t size,
87                     size_t align,
88                     bool shared,
89                     bool is_pmem)
90 {
91     int flags;
92     int map_sync_flags = 0;
93     int guardfd;
94     size_t offset;
95     size_t pagesize;
96     size_t total;
97     void *guardptr;
98     void *ptr;
99 
100     /*
101      * Note: this always allocates at least one extra page of virtual address
102      * space, even if size is already aligned.
103      */
104     total = size + align;
105 
106 #if defined(__powerpc64__) && defined(__linux__)
107     /* On ppc64 mappings in the same segment (aka slice) must share the same
108      * page size. Since we will be re-allocating part of this segment
109      * from the supplied fd, we should make sure to use the same page size, to
110      * this end we mmap the supplied fd.  In this case, set MAP_NORESERVE to
111      * avoid allocating backing store memory.
112      * We do this unless we are using the system page size, in which case
113      * anonymous memory is OK.
114      */
115     flags = MAP_PRIVATE;
116     pagesize = qemu_fd_getpagesize(fd);
117     if (fd == -1 || pagesize == getpagesize()) {
118         guardfd = -1;
119         flags |= MAP_ANONYMOUS;
120     } else {
121         guardfd = fd;
122         flags |= MAP_NORESERVE;
123     }
124 #else
125     guardfd = -1;
126     pagesize = getpagesize();
127     flags = MAP_PRIVATE | MAP_ANONYMOUS;
128 #endif
129 
130     guardptr = mmap(0, total, PROT_NONE, flags, guardfd, 0);
131 
132     if (guardptr == MAP_FAILED) {
133         return MAP_FAILED;
134     }
135 
136     assert(is_power_of_2(align));
137     /* Always align to host page size */
138     assert(align >= pagesize);
139 
140     flags = MAP_FIXED;
141     flags |= fd == -1 ? MAP_ANONYMOUS : 0;
142     flags |= shared ? MAP_SHARED : MAP_PRIVATE;
143     if (shared && is_pmem) {
144         map_sync_flags = MAP_SYNC | MAP_SHARED_VALIDATE;
145     }
146 
147     offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr;
148 
149     ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE,
150                flags | map_sync_flags, fd, 0);
151 
152     if (ptr == MAP_FAILED && map_sync_flags) {
153         if (errno == ENOTSUP) {
154             char *proc_link, *file_name;
155             int len;
156             proc_link = g_strdup_printf("/proc/self/fd/%d", fd);
157             file_name = g_malloc0(PATH_MAX);
158             len = readlink(proc_link, file_name, PATH_MAX - 1);
159             if (len < 0) {
160                 len = 0;
161             }
162             file_name[len] = '\0';
163             fprintf(stderr, "Warning: requesting persistence across crashes "
164                     "for backend file %s failed. Proceeding without "
165                     "persistence, data might become corrupted in case of host "
166                     "crash.\n", file_name);
167             g_free(proc_link);
168             g_free(file_name);
169         }
170         /*
171          * if map failed with MAP_SHARED_VALIDATE | MAP_SYNC,
172          * we will remove these flags to handle compatibility.
173          */
174         ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE,
175                    flags, fd, 0);
176     }
177 
178     if (ptr == MAP_FAILED) {
179         munmap(guardptr, total);
180         return MAP_FAILED;
181     }
182 
183     if (offset > 0) {
184         munmap(guardptr, offset);
185     }
186 
187     /*
188      * Leave a single PROT_NONE page allocated after the RAM block, to serve as
189      * a guard page guarding against potential buffer overflows.
190      */
191     total -= offset;
192     if (total > size + pagesize) {
193         munmap(ptr + size + pagesize, total - size - pagesize);
194     }
195 
196     return ptr;
197 }
198 
199 void qemu_ram_munmap(int fd, void *ptr, size_t size)
200 {
201     size_t pagesize;
202 
203     if (ptr) {
204         /* Unmap both the RAM block and the guard page */
205 #if defined(__powerpc64__) && defined(__linux__)
206         pagesize = qemu_fd_getpagesize(fd);
207 #else
208         pagesize = getpagesize();
209 #endif
210         munmap(ptr, size + pagesize);
211     }
212 }
213