xref: /openbmc/qemu/backends/hostmem.c (revision a4d50b1d)
1 /*
2  * QEMU Host Memory Backend
3  *
4  * Copyright (C) 2013-2014 Red Hat Inc
5  *
6  * Authors:
7  *   Igor Mammedov <imammedo@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 #include "qemu/osdep.h"
13 #include "sysemu/hostmem.h"
14 #include "hw/boards.h"
15 #include "qapi/error.h"
16 #include "qapi/visitor.h"
17 #include "qapi-types.h"
18 #include "qapi-visit.h"
19 #include "qemu/config-file.h"
20 #include "qom/object_interfaces.h"
21 
22 #ifdef CONFIG_NUMA
23 #include <numaif.h>
24 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
25 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
26 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
27 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
28 #endif
29 
30 static void
31 host_memory_backend_get_size(Object *obj, Visitor *v, const char *name,
32                              void *opaque, Error **errp)
33 {
34     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
35     uint64_t value = backend->size;
36 
37     visit_type_size(v, name, &value, errp);
38 }
39 
40 static void
41 host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
42                              void *opaque, Error **errp)
43 {
44     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
45     Error *local_err = NULL;
46     uint64_t value;
47 
48     if (memory_region_size(&backend->mr)) {
49         error_setg(&local_err, "cannot change property value");
50         goto out;
51     }
52 
53     visit_type_size(v, name, &value, &local_err);
54     if (local_err) {
55         goto out;
56     }
57     if (!value) {
58         error_setg(&local_err, "Property '%s.%s' doesn't take value '%"
59                    PRIu64 "'", object_get_typename(obj), name, value);
60         goto out;
61     }
62     backend->size = value;
63 out:
64     error_propagate(errp, local_err);
65 }
66 
67 static uint16List **host_memory_append_node(uint16List **node,
68                                             unsigned long value)
69 {
70      *node = g_malloc0(sizeof(**node));
71      (*node)->value = value;
72      return &(*node)->next;
73 }
74 
75 static void
76 host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
77                                    void *opaque, Error **errp)
78 {
79     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
80     uint16List *host_nodes = NULL;
81     uint16List **node = &host_nodes;
82     unsigned long value;
83 
84     value = find_first_bit(backend->host_nodes, MAX_NODES);
85 
86     node = host_memory_append_node(node, value);
87 
88     if (value == MAX_NODES) {
89         goto out;
90     }
91 
92     do {
93         value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
94         if (value == MAX_NODES) {
95             break;
96         }
97 
98         node = host_memory_append_node(node, value);
99     } while (true);
100 
101 out:
102     visit_type_uint16List(v, name, &host_nodes, errp);
103 }
104 
105 static void
106 host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name,
107                                    void *opaque, Error **errp)
108 {
109 #ifdef CONFIG_NUMA
110     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
111     uint16List *l = NULL;
112 
113     visit_type_uint16List(v, name, &l, errp);
114 
115     while (l) {
116         bitmap_set(backend->host_nodes, l->value, 1);
117         l = l->next;
118     }
119 #else
120     error_setg(errp, "NUMA node binding are not supported by this QEMU");
121 #endif
122 }
123 
124 static int
125 host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED)
126 {
127     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
128     return backend->policy;
129 }
130 
131 static void
132 host_memory_backend_set_policy(Object *obj, int policy, Error **errp)
133 {
134     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
135     backend->policy = policy;
136 
137 #ifndef CONFIG_NUMA
138     if (policy != HOST_MEM_POLICY_DEFAULT) {
139         error_setg(errp, "NUMA policies are not supported by this QEMU");
140     }
141 #endif
142 }
143 
144 static bool host_memory_backend_get_merge(Object *obj, Error **errp)
145 {
146     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
147 
148     return backend->merge;
149 }
150 
151 static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
152 {
153     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
154 
155     if (!memory_region_size(&backend->mr)) {
156         backend->merge = value;
157         return;
158     }
159 
160     if (value != backend->merge) {
161         void *ptr = memory_region_get_ram_ptr(&backend->mr);
162         uint64_t sz = memory_region_size(&backend->mr);
163 
164         qemu_madvise(ptr, sz,
165                      value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
166         backend->merge = value;
167     }
168 }
169 
170 static bool host_memory_backend_get_dump(Object *obj, Error **errp)
171 {
172     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
173 
174     return backend->dump;
175 }
176 
177 static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
178 {
179     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
180 
181     if (!memory_region_size(&backend->mr)) {
182         backend->dump = value;
183         return;
184     }
185 
186     if (value != backend->dump) {
187         void *ptr = memory_region_get_ram_ptr(&backend->mr);
188         uint64_t sz = memory_region_size(&backend->mr);
189 
190         qemu_madvise(ptr, sz,
191                      value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
192         backend->dump = value;
193     }
194 }
195 
196 static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
197 {
198     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
199 
200     return backend->prealloc || backend->force_prealloc;
201 }
202 
203 static void host_memory_backend_set_prealloc(Object *obj, bool value,
204                                              Error **errp)
205 {
206     Error *local_err = NULL;
207     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
208 
209     if (backend->force_prealloc) {
210         if (value) {
211             error_setg(errp,
212                        "remove -mem-prealloc to use the prealloc property");
213             return;
214         }
215     }
216 
217     if (!memory_region_size(&backend->mr)) {
218         backend->prealloc = value;
219         return;
220     }
221 
222     if (value && !backend->prealloc) {
223         int fd = memory_region_get_fd(&backend->mr);
224         void *ptr = memory_region_get_ram_ptr(&backend->mr);
225         uint64_t sz = memory_region_size(&backend->mr);
226 
227         os_mem_prealloc(fd, ptr, sz, &local_err);
228         if (local_err) {
229             error_propagate(errp, local_err);
230             return;
231         }
232         backend->prealloc = true;
233     }
234 }
235 
236 static void host_memory_backend_init(Object *obj)
237 {
238     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
239     MachineState *machine = MACHINE(qdev_get_machine());
240 
241     backend->merge = machine_mem_merge(machine);
242     backend->dump = machine_dump_guest_core(machine);
243     backend->prealloc = mem_prealloc;
244 
245     object_property_add_bool(obj, "merge",
246                         host_memory_backend_get_merge,
247                         host_memory_backend_set_merge, NULL);
248     object_property_add_bool(obj, "dump",
249                         host_memory_backend_get_dump,
250                         host_memory_backend_set_dump, NULL);
251     object_property_add_bool(obj, "prealloc",
252                         host_memory_backend_get_prealloc,
253                         host_memory_backend_set_prealloc, NULL);
254     object_property_add(obj, "size", "int",
255                         host_memory_backend_get_size,
256                         host_memory_backend_set_size, NULL, NULL, NULL);
257     object_property_add(obj, "host-nodes", "int",
258                         host_memory_backend_get_host_nodes,
259                         host_memory_backend_set_host_nodes, NULL, NULL, NULL);
260     object_property_add_enum(obj, "policy", "HostMemPolicy",
261                              HostMemPolicy_lookup,
262                              host_memory_backend_get_policy,
263                              host_memory_backend_set_policy, NULL);
264 }
265 
266 MemoryRegion *
267 host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp)
268 {
269     return memory_region_size(&backend->mr) ? &backend->mr : NULL;
270 }
271 
272 void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
273 {
274     backend->is_mapped = mapped;
275 }
276 
277 bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
278 {
279     return backend->is_mapped;
280 }
281 
282 static void
283 host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
284 {
285     HostMemoryBackend *backend = MEMORY_BACKEND(uc);
286     HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
287     Error *local_err = NULL;
288     void *ptr;
289     uint64_t sz;
290 
291     if (bc->alloc) {
292         bc->alloc(backend, &local_err);
293         if (local_err) {
294             goto out;
295         }
296 
297         ptr = memory_region_get_ram_ptr(&backend->mr);
298         sz = memory_region_size(&backend->mr);
299 
300         if (backend->merge) {
301             qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
302         }
303         if (!backend->dump) {
304             qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
305         }
306 #ifdef CONFIG_NUMA
307         unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
308         /* lastbit == MAX_NODES means maxnode = 0 */
309         unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
310         /* ensure policy won't be ignored in case memory is preallocated
311          * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
312          * this doesn't catch hugepage case. */
313         unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
314 
315         /* check for invalid host-nodes and policies and give more verbose
316          * error messages than mbind(). */
317         if (maxnode && backend->policy == MPOL_DEFAULT) {
318             error_setg(errp, "host-nodes must be empty for policy default,"
319                        " or you should explicitly specify a policy other"
320                        " than default");
321             return;
322         } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
323             error_setg(errp, "host-nodes must be set for policy %s",
324                        HostMemPolicy_lookup[backend->policy]);
325             return;
326         }
327 
328         /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
329          * as argument to mbind() due to an old Linux bug (feature?) which
330          * cuts off the last specified node. This means backend->host_nodes
331          * must have MAX_NODES+1 bits available.
332          */
333         assert(sizeof(backend->host_nodes) >=
334                BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
335         assert(maxnode <= MAX_NODES);
336         if (mbind(ptr, sz, backend->policy,
337                   maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) {
338             if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
339                 error_setg_errno(errp, errno,
340                                  "cannot bind memory to host NUMA nodes");
341                 return;
342             }
343         }
344 #endif
345         /* Preallocate memory after the NUMA policy has been instantiated.
346          * This is necessary to guarantee memory is allocated with
347          * specified NUMA policy in place.
348          */
349         if (backend->prealloc) {
350             os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
351                             &local_err);
352             if (local_err) {
353                 goto out;
354             }
355         }
356     }
357 out:
358     error_propagate(errp, local_err);
359 }
360 
361 static bool
362 host_memory_backend_can_be_deleted(UserCreatable *uc, Error **errp)
363 {
364     if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
365         return false;
366     } else {
367         return true;
368     }
369 }
370 
371 static void
372 host_memory_backend_class_init(ObjectClass *oc, void *data)
373 {
374     UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
375 
376     ucc->complete = host_memory_backend_memory_complete;
377     ucc->can_be_deleted = host_memory_backend_can_be_deleted;
378 }
379 
380 static const TypeInfo host_memory_backend_info = {
381     .name = TYPE_MEMORY_BACKEND,
382     .parent = TYPE_OBJECT,
383     .abstract = true,
384     .class_size = sizeof(HostMemoryBackendClass),
385     .class_init = host_memory_backend_class_init,
386     .instance_size = sizeof(HostMemoryBackend),
387     .instance_init = host_memory_backend_init,
388     .interfaces = (InterfaceInfo[]) {
389         { TYPE_USER_CREATABLE },
390         { }
391     }
392 };
393 
394 static void register_types(void)
395 {
396     type_register_static(&host_memory_backend_info);
397 }
398 
399 type_init(register_types);
400