xref: /openbmc/qemu/backends/hostmem.c (revision 4a09d0bb)
1 /*
2  * QEMU Host Memory Backend
3  *
4  * Copyright (C) 2013-2014 Red Hat Inc
5  *
6  * Authors:
7  *   Igor Mammedov <imammedo@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 #include "qemu/osdep.h"
13 #include "sysemu/hostmem.h"
14 #include "hw/boards.h"
15 #include "qapi/error.h"
16 #include "qapi/visitor.h"
17 #include "qapi-types.h"
18 #include "qapi-visit.h"
19 #include "qemu/config-file.h"
20 #include "qom/object_interfaces.h"
21 
22 #ifdef CONFIG_NUMA
23 #include <numaif.h>
24 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
25 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
26 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
27 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
28 #endif
29 
30 static void
31 host_memory_backend_get_size(Object *obj, Visitor *v, const char *name,
32                              void *opaque, Error **errp)
33 {
34     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
35     uint64_t value = backend->size;
36 
37     visit_type_size(v, name, &value, errp);
38 }
39 
40 static void
41 host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
42                              void *opaque, Error **errp)
43 {
44     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
45     Error *local_err = NULL;
46     uint64_t value;
47 
48     if (memory_region_size(&backend->mr)) {
49         error_setg(&local_err, "cannot change property value");
50         goto out;
51     }
52 
53     visit_type_size(v, name, &value, &local_err);
54     if (local_err) {
55         goto out;
56     }
57     if (!value) {
58         error_setg(&local_err, "Property '%s.%s' doesn't take value '%"
59                    PRIu64 "'", object_get_typename(obj), name, value);
60         goto out;
61     }
62     backend->size = value;
63 out:
64     error_propagate(errp, local_err);
65 }
66 
67 static uint16List **host_memory_append_node(uint16List **node,
68                                             unsigned long value)
69 {
70      *node = g_malloc0(sizeof(**node));
71      (*node)->value = value;
72      return &(*node)->next;
73 }
74 
75 static void
76 host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
77                                    void *opaque, Error **errp)
78 {
79     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
80     uint16List *host_nodes = NULL;
81     uint16List **node = &host_nodes;
82     unsigned long value;
83 
84     value = find_first_bit(backend->host_nodes, MAX_NODES);
85 
86     node = host_memory_append_node(node, value);
87 
88     if (value == MAX_NODES) {
89         goto out;
90     }
91 
92     do {
93         value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
94         if (value == MAX_NODES) {
95             break;
96         }
97 
98         node = host_memory_append_node(node, value);
99     } while (true);
100 
101 out:
102     visit_type_uint16List(v, name, &host_nodes, errp);
103 }
104 
105 static void
106 host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name,
107                                    void *opaque, Error **errp)
108 {
109 #ifdef CONFIG_NUMA
110     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
111     uint16List *l = NULL;
112 
113     visit_type_uint16List(v, name, &l, errp);
114 
115     while (l) {
116         bitmap_set(backend->host_nodes, l->value, 1);
117         l = l->next;
118     }
119 #else
120     error_setg(errp, "NUMA node binding are not supported by this QEMU");
121 #endif
122 }
123 
124 static int
125 host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED)
126 {
127     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
128     return backend->policy;
129 }
130 
131 static void
132 host_memory_backend_set_policy(Object *obj, int policy, Error **errp)
133 {
134     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
135     backend->policy = policy;
136 
137 #ifndef CONFIG_NUMA
138     if (policy != HOST_MEM_POLICY_DEFAULT) {
139         error_setg(errp, "NUMA policies are not supported by this QEMU");
140     }
141 #endif
142 }
143 
144 static bool host_memory_backend_get_merge(Object *obj, Error **errp)
145 {
146     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
147 
148     return backend->merge;
149 }
150 
151 static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
152 {
153     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
154 
155     if (!memory_region_size(&backend->mr)) {
156         backend->merge = value;
157         return;
158     }
159 
160     if (value != backend->merge) {
161         void *ptr = memory_region_get_ram_ptr(&backend->mr);
162         uint64_t sz = memory_region_size(&backend->mr);
163 
164         qemu_madvise(ptr, sz,
165                      value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
166         backend->merge = value;
167     }
168 }
169 
170 static bool host_memory_backend_get_dump(Object *obj, Error **errp)
171 {
172     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
173 
174     return backend->dump;
175 }
176 
177 static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
178 {
179     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
180 
181     if (!memory_region_size(&backend->mr)) {
182         backend->dump = value;
183         return;
184     }
185 
186     if (value != backend->dump) {
187         void *ptr = memory_region_get_ram_ptr(&backend->mr);
188         uint64_t sz = memory_region_size(&backend->mr);
189 
190         qemu_madvise(ptr, sz,
191                      value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
192         backend->dump = value;
193     }
194 }
195 
196 static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
197 {
198     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
199 
200     return backend->prealloc || backend->force_prealloc;
201 }
202 
203 static void host_memory_backend_set_prealloc(Object *obj, bool value,
204                                              Error **errp)
205 {
206     Error *local_err = NULL;
207     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
208 
209     if (backend->force_prealloc) {
210         if (value) {
211             error_setg(errp,
212                        "remove -mem-prealloc to use the prealloc property");
213             return;
214         }
215     }
216 
217     if (!memory_region_size(&backend->mr)) {
218         backend->prealloc = value;
219         return;
220     }
221 
222     if (value && !backend->prealloc) {
223         int fd = memory_region_get_fd(&backend->mr);
224         void *ptr = memory_region_get_ram_ptr(&backend->mr);
225         uint64_t sz = memory_region_size(&backend->mr);
226 
227         os_mem_prealloc(fd, ptr, sz, &local_err);
228         if (local_err) {
229             error_propagate(errp, local_err);
230             return;
231         }
232         backend->prealloc = true;
233     }
234 }
235 
236 static void host_memory_backend_init(Object *obj)
237 {
238     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
239     MachineState *machine = MACHINE(qdev_get_machine());
240 
241     backend->merge = machine_mem_merge(machine);
242     backend->dump = machine_dump_guest_core(machine);
243     backend->prealloc = mem_prealloc;
244 }
245 
246 MemoryRegion *
247 host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp)
248 {
249     return memory_region_size(&backend->mr) ? &backend->mr : NULL;
250 }
251 
252 void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
253 {
254     backend->is_mapped = mapped;
255 }
256 
257 bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
258 {
259     return backend->is_mapped;
260 }
261 
262 static void
263 host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
264 {
265     HostMemoryBackend *backend = MEMORY_BACKEND(uc);
266     HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
267     Error *local_err = NULL;
268     void *ptr;
269     uint64_t sz;
270 
271     if (bc->alloc) {
272         bc->alloc(backend, &local_err);
273         if (local_err) {
274             goto out;
275         }
276 
277         ptr = memory_region_get_ram_ptr(&backend->mr);
278         sz = memory_region_size(&backend->mr);
279 
280         if (backend->merge) {
281             qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
282         }
283         if (!backend->dump) {
284             qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
285         }
286 #ifdef CONFIG_NUMA
287         unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
288         /* lastbit == MAX_NODES means maxnode = 0 */
289         unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
290         /* ensure policy won't be ignored in case memory is preallocated
291          * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
292          * this doesn't catch hugepage case. */
293         unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
294 
295         /* check for invalid host-nodes and policies and give more verbose
296          * error messages than mbind(). */
297         if (maxnode && backend->policy == MPOL_DEFAULT) {
298             error_setg(errp, "host-nodes must be empty for policy default,"
299                        " or you should explicitly specify a policy other"
300                        " than default");
301             return;
302         } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
303             error_setg(errp, "host-nodes must be set for policy %s",
304                        HostMemPolicy_lookup[backend->policy]);
305             return;
306         }
307 
308         /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
309          * as argument to mbind() due to an old Linux bug (feature?) which
310          * cuts off the last specified node. This means backend->host_nodes
311          * must have MAX_NODES+1 bits available.
312          */
313         assert(sizeof(backend->host_nodes) >=
314                BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
315         assert(maxnode <= MAX_NODES);
316         if (mbind(ptr, sz, backend->policy,
317                   maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) {
318             if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
319                 error_setg_errno(errp, errno,
320                                  "cannot bind memory to host NUMA nodes");
321                 return;
322             }
323         }
324 #endif
325         /* Preallocate memory after the NUMA policy has been instantiated.
326          * This is necessary to guarantee memory is allocated with
327          * specified NUMA policy in place.
328          */
329         if (backend->prealloc) {
330             os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
331                             &local_err);
332             if (local_err) {
333                 goto out;
334             }
335         }
336     }
337 out:
338     error_propagate(errp, local_err);
339 }
340 
341 static bool
342 host_memory_backend_can_be_deleted(UserCreatable *uc, Error **errp)
343 {
344     if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
345         return false;
346     } else {
347         return true;
348     }
349 }
350 
351 static char *get_id(Object *o, Error **errp)
352 {
353     HostMemoryBackend *backend = MEMORY_BACKEND(o);
354 
355     return g_strdup(backend->id);
356 }
357 
358 static void set_id(Object *o, const char *str, Error **errp)
359 {
360     HostMemoryBackend *backend = MEMORY_BACKEND(o);
361 
362     if (backend->id) {
363         error_setg(errp, "cannot change property value");
364         return;
365     }
366     backend->id = g_strdup(str);
367 }
368 
369 static void
370 host_memory_backend_class_init(ObjectClass *oc, void *data)
371 {
372     UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
373 
374     ucc->complete = host_memory_backend_memory_complete;
375     ucc->can_be_deleted = host_memory_backend_can_be_deleted;
376 
377     object_class_property_add_bool(oc, "merge",
378         host_memory_backend_get_merge,
379         host_memory_backend_set_merge, &error_abort);
380     object_class_property_add_bool(oc, "dump",
381         host_memory_backend_get_dump,
382         host_memory_backend_set_dump, &error_abort);
383     object_class_property_add_bool(oc, "prealloc",
384         host_memory_backend_get_prealloc,
385         host_memory_backend_set_prealloc, &error_abort);
386     object_class_property_add(oc, "size", "int",
387         host_memory_backend_get_size,
388         host_memory_backend_set_size,
389         NULL, NULL, &error_abort);
390     object_class_property_add(oc, "host-nodes", "int",
391         host_memory_backend_get_host_nodes,
392         host_memory_backend_set_host_nodes,
393         NULL, NULL, &error_abort);
394     object_class_property_add_enum(oc, "policy", "HostMemPolicy",
395         HostMemPolicy_lookup,
396         host_memory_backend_get_policy,
397         host_memory_backend_set_policy, &error_abort);
398     object_class_property_add_str(oc, "id", get_id, set_id, &error_abort);
399 }
400 
401 static void host_memory_backend_finalize(Object *o)
402 {
403     HostMemoryBackend *backend = MEMORY_BACKEND(o);
404     g_free(backend->id);
405 }
406 
407 static const TypeInfo host_memory_backend_info = {
408     .name = TYPE_MEMORY_BACKEND,
409     .parent = TYPE_OBJECT,
410     .abstract = true,
411     .class_size = sizeof(HostMemoryBackendClass),
412     .class_init = host_memory_backend_class_init,
413     .instance_size = sizeof(HostMemoryBackend),
414     .instance_init = host_memory_backend_init,
415     .instance_finalize = host_memory_backend_finalize,
416     .interfaces = (InterfaceInfo[]) {
417         { TYPE_USER_CREATABLE },
418         { }
419     }
420 };
421 
422 static void register_types(void)
423 {
424     type_register_static(&host_memory_backend_info);
425 }
426 
427 type_init(register_types);
428