xref: /openbmc/qemu/backends/hostmem.c (revision 073d9f2c)
1 /*
2  * QEMU Host Memory Backend
3  *
4  * Copyright (C) 2013-2014 Red Hat Inc
5  *
6  * Authors:
7  *   Igor Mammedov <imammedo@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 
13 #include "qemu/osdep.h"
14 #include "sysemu/hostmem.h"
15 #include "hw/boards.h"
16 #include "qapi/error.h"
17 #include "qapi/qapi-builtin-visit.h"
18 #include "qapi/visitor.h"
19 #include "qemu/config-file.h"
20 #include "qom/object_interfaces.h"
21 #include "qemu/mmap-alloc.h"
22 
23 #ifdef CONFIG_NUMA
24 #include <numaif.h>
25 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
26 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
27 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
28 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
29 #endif
30 
31 char *
32 host_memory_backend_get_name(HostMemoryBackend *backend)
33 {
34     if (!backend->use_canonical_path) {
35         return object_get_canonical_path_component(OBJECT(backend));
36     }
37 
38     return object_get_canonical_path(OBJECT(backend));
39 }
40 
41 static void
42 host_memory_backend_get_size(Object *obj, Visitor *v, const char *name,
43                              void *opaque, Error **errp)
44 {
45     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
46     uint64_t value = backend->size;
47 
48     visit_type_size(v, name, &value, errp);
49 }
50 
51 static void
52 host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
53                              void *opaque, Error **errp)
54 {
55     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
56     Error *local_err = NULL;
57     uint64_t value;
58 
59     if (host_memory_backend_mr_inited(backend)) {
60         error_setg(&local_err, "cannot change property value");
61         goto out;
62     }
63 
64     visit_type_size(v, name, &value, &local_err);
65     if (local_err) {
66         goto out;
67     }
68     if (!value) {
69         error_setg(&local_err, "Property '%s.%s' doesn't take value '%"
70                    PRIu64 "'", object_get_typename(obj), name, value);
71         goto out;
72     }
73     backend->size = value;
74 out:
75     error_propagate(errp, local_err);
76 }
77 
78 static void
79 host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
80                                    void *opaque, Error **errp)
81 {
82     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
83     uint16List *host_nodes = NULL;
84     uint16List **node = &host_nodes;
85     unsigned long value;
86 
87     value = find_first_bit(backend->host_nodes, MAX_NODES);
88     if (value == MAX_NODES) {
89         return;
90     }
91 
92     *node = g_malloc0(sizeof(**node));
93     (*node)->value = value;
94     node = &(*node)->next;
95 
96     do {
97         value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
98         if (value == MAX_NODES) {
99             break;
100         }
101 
102         *node = g_malloc0(sizeof(**node));
103         (*node)->value = value;
104         node = &(*node)->next;
105     } while (true);
106 
107     visit_type_uint16List(v, name, &host_nodes, errp);
108 }
109 
110 static void
111 host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name,
112                                    void *opaque, Error **errp)
113 {
114 #ifdef CONFIG_NUMA
115     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
116     uint16List *l, *host_nodes = NULL;
117 
118     visit_type_uint16List(v, name, &host_nodes, errp);
119 
120     for (l = host_nodes; l; l = l->next) {
121         if (l->value >= MAX_NODES) {
122             error_setg(errp, "Invalid host-nodes value: %d", l->value);
123             goto out;
124         }
125     }
126 
127     for (l = host_nodes; l; l = l->next) {
128         bitmap_set(backend->host_nodes, l->value, 1);
129     }
130 
131 out:
132     qapi_free_uint16List(host_nodes);
133 #else
134     error_setg(errp, "NUMA node binding are not supported by this QEMU");
135 #endif
136 }
137 
138 static int
139 host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED)
140 {
141     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
142     return backend->policy;
143 }
144 
145 static void
146 host_memory_backend_set_policy(Object *obj, int policy, Error **errp)
147 {
148     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
149     backend->policy = policy;
150 
151 #ifndef CONFIG_NUMA
152     if (policy != HOST_MEM_POLICY_DEFAULT) {
153         error_setg(errp, "NUMA policies are not supported by this QEMU");
154     }
155 #endif
156 }
157 
158 static bool host_memory_backend_get_merge(Object *obj, Error **errp)
159 {
160     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
161 
162     return backend->merge;
163 }
164 
165 static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
166 {
167     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
168 
169     if (!host_memory_backend_mr_inited(backend)) {
170         backend->merge = value;
171         return;
172     }
173 
174     if (value != backend->merge) {
175         void *ptr = memory_region_get_ram_ptr(&backend->mr);
176         uint64_t sz = memory_region_size(&backend->mr);
177 
178         qemu_madvise(ptr, sz,
179                      value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
180         backend->merge = value;
181     }
182 }
183 
184 static bool host_memory_backend_get_dump(Object *obj, Error **errp)
185 {
186     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
187 
188     return backend->dump;
189 }
190 
191 static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
192 {
193     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
194 
195     if (!host_memory_backend_mr_inited(backend)) {
196         backend->dump = value;
197         return;
198     }
199 
200     if (value != backend->dump) {
201         void *ptr = memory_region_get_ram_ptr(&backend->mr);
202         uint64_t sz = memory_region_size(&backend->mr);
203 
204         qemu_madvise(ptr, sz,
205                      value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
206         backend->dump = value;
207     }
208 }
209 
210 static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
211 {
212     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
213 
214     return backend->prealloc || backend->force_prealloc;
215 }
216 
217 static void host_memory_backend_set_prealloc(Object *obj, bool value,
218                                              Error **errp)
219 {
220     Error *local_err = NULL;
221     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
222 
223     if (backend->force_prealloc) {
224         if (value) {
225             error_setg(errp,
226                        "remove -mem-prealloc to use the prealloc property");
227             return;
228         }
229     }
230 
231     if (!host_memory_backend_mr_inited(backend)) {
232         backend->prealloc = value;
233         return;
234     }
235 
236     if (value && !backend->prealloc) {
237         int fd = memory_region_get_fd(&backend->mr);
238         void *ptr = memory_region_get_ram_ptr(&backend->mr);
239         uint64_t sz = memory_region_size(&backend->mr);
240 
241         os_mem_prealloc(fd, ptr, sz, smp_cpus, &local_err);
242         if (local_err) {
243             error_propagate(errp, local_err);
244             return;
245         }
246         backend->prealloc = true;
247     }
248 }
249 
250 static void host_memory_backend_init(Object *obj)
251 {
252     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
253     MachineState *machine = MACHINE(qdev_get_machine());
254 
255     backend->merge = machine_mem_merge(machine);
256     backend->dump = machine_dump_guest_core(machine);
257     backend->prealloc = mem_prealloc;
258 }
259 
260 static void host_memory_backend_post_init(Object *obj)
261 {
262     object_apply_compat_props(obj);
263 }
264 
265 bool host_memory_backend_mr_inited(HostMemoryBackend *backend)
266 {
267     /*
268      * NOTE: We forbid zero-length memory backend, so here zero means
269      * "we haven't inited the backend memory region yet".
270      */
271     return memory_region_size(&backend->mr) != 0;
272 }
273 
274 MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend)
275 {
276     return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL;
277 }
278 
279 void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
280 {
281     backend->is_mapped = mapped;
282 }
283 
284 bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
285 {
286     return backend->is_mapped;
287 }
288 
289 #ifdef __linux__
290 size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
291 {
292     Object *obj = OBJECT(memdev);
293     char *path = object_property_get_str(obj, "mem-path", NULL);
294     size_t pagesize = qemu_mempath_getpagesize(path);
295 
296     g_free(path);
297     return pagesize;
298 }
299 #else
300 size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
301 {
302     return getpagesize();
303 }
304 #endif
305 
306 static void
307 host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
308 {
309     HostMemoryBackend *backend = MEMORY_BACKEND(uc);
310     HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
311     Error *local_err = NULL;
312     void *ptr;
313     uint64_t sz;
314 
315     if (bc->alloc) {
316         bc->alloc(backend, &local_err);
317         if (local_err) {
318             goto out;
319         }
320 
321         ptr = memory_region_get_ram_ptr(&backend->mr);
322         sz = memory_region_size(&backend->mr);
323 
324         if (backend->merge) {
325             qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
326         }
327         if (!backend->dump) {
328             qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
329         }
330 #ifdef CONFIG_NUMA
331         unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
332         /* lastbit == MAX_NODES means maxnode = 0 */
333         unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
334         /* ensure policy won't be ignored in case memory is preallocated
335          * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
336          * this doesn't catch hugepage case. */
337         unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
338 
339         /* check for invalid host-nodes and policies and give more verbose
340          * error messages than mbind(). */
341         if (maxnode && backend->policy == MPOL_DEFAULT) {
342             error_setg(errp, "host-nodes must be empty for policy default,"
343                        " or you should explicitly specify a policy other"
344                        " than default");
345             return;
346         } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
347             error_setg(errp, "host-nodes must be set for policy %s",
348                        HostMemPolicy_str(backend->policy));
349             return;
350         }
351 
352         /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
353          * as argument to mbind() due to an old Linux bug (feature?) which
354          * cuts off the last specified node. This means backend->host_nodes
355          * must have MAX_NODES+1 bits available.
356          */
357         assert(sizeof(backend->host_nodes) >=
358                BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
359         assert(maxnode <= MAX_NODES);
360         if (mbind(ptr, sz, backend->policy,
361                   maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) {
362             if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
363                 error_setg_errno(errp, errno,
364                                  "cannot bind memory to host NUMA nodes");
365                 return;
366             }
367         }
368 #endif
369         /* Preallocate memory after the NUMA policy has been instantiated.
370          * This is necessary to guarantee memory is allocated with
371          * specified NUMA policy in place.
372          */
373         if (backend->prealloc) {
374             os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
375                             smp_cpus, &local_err);
376             if (local_err) {
377                 goto out;
378             }
379         }
380     }
381 out:
382     error_propagate(errp, local_err);
383 }
384 
385 static bool
386 host_memory_backend_can_be_deleted(UserCreatable *uc)
387 {
388     if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
389         return false;
390     } else {
391         return true;
392     }
393 }
394 
395 static bool host_memory_backend_get_share(Object *o, Error **errp)
396 {
397     HostMemoryBackend *backend = MEMORY_BACKEND(o);
398 
399     return backend->share;
400 }
401 
402 static void host_memory_backend_set_share(Object *o, bool value, Error **errp)
403 {
404     HostMemoryBackend *backend = MEMORY_BACKEND(o);
405 
406     if (host_memory_backend_mr_inited(backend)) {
407         error_setg(errp, "cannot change property value");
408         return;
409     }
410     backend->share = value;
411 }
412 
413 static bool
414 host_memory_backend_get_use_canonical_path(Object *obj, Error **errp)
415 {
416     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
417 
418     return backend->use_canonical_path;
419 }
420 
421 static void
422 host_memory_backend_set_use_canonical_path(Object *obj, bool value,
423                                            Error **errp)
424 {
425     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
426 
427     backend->use_canonical_path = value;
428 }
429 
430 static void
431 host_memory_backend_class_init(ObjectClass *oc, void *data)
432 {
433     UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
434 
435     ucc->complete = host_memory_backend_memory_complete;
436     ucc->can_be_deleted = host_memory_backend_can_be_deleted;
437 
438     object_class_property_add_bool(oc, "merge",
439         host_memory_backend_get_merge,
440         host_memory_backend_set_merge, &error_abort);
441     object_class_property_set_description(oc, "merge",
442         "Mark memory as mergeable", &error_abort);
443     object_class_property_add_bool(oc, "dump",
444         host_memory_backend_get_dump,
445         host_memory_backend_set_dump, &error_abort);
446     object_class_property_set_description(oc, "dump",
447         "Set to 'off' to exclude from core dump", &error_abort);
448     object_class_property_add_bool(oc, "prealloc",
449         host_memory_backend_get_prealloc,
450         host_memory_backend_set_prealloc, &error_abort);
451     object_class_property_set_description(oc, "prealloc",
452         "Preallocate memory", &error_abort);
453     object_class_property_add(oc, "size", "int",
454         host_memory_backend_get_size,
455         host_memory_backend_set_size,
456         NULL, NULL, &error_abort);
457     object_class_property_set_description(oc, "size",
458         "Size of the memory region (ex: 500M)", &error_abort);
459     object_class_property_add(oc, "host-nodes", "int",
460         host_memory_backend_get_host_nodes,
461         host_memory_backend_set_host_nodes,
462         NULL, NULL, &error_abort);
463     object_class_property_set_description(oc, "host-nodes",
464         "Binds memory to the list of NUMA host nodes", &error_abort);
465     object_class_property_add_enum(oc, "policy", "HostMemPolicy",
466         &HostMemPolicy_lookup,
467         host_memory_backend_get_policy,
468         host_memory_backend_set_policy, &error_abort);
469     object_class_property_set_description(oc, "policy",
470         "Set the NUMA policy", &error_abort);
471     object_class_property_add_bool(oc, "share",
472         host_memory_backend_get_share, host_memory_backend_set_share,
473         &error_abort);
474     object_class_property_set_description(oc, "share",
475         "Mark the memory as private to QEMU or shared", &error_abort);
476     object_class_property_add_bool(oc, "x-use-canonical-path-for-ramblock-id",
477         host_memory_backend_get_use_canonical_path,
478         host_memory_backend_set_use_canonical_path, &error_abort);
479 }
480 
481 static const TypeInfo host_memory_backend_info = {
482     .name = TYPE_MEMORY_BACKEND,
483     .parent = TYPE_OBJECT,
484     .abstract = true,
485     .class_size = sizeof(HostMemoryBackendClass),
486     .class_init = host_memory_backend_class_init,
487     .instance_size = sizeof(HostMemoryBackend),
488     .instance_init = host_memory_backend_init,
489     .instance_post_init = host_memory_backend_post_init,
490     .interfaces = (InterfaceInfo[]) {
491         { TYPE_USER_CREATABLE },
492         { }
493     }
494 };
495 
496 static void register_types(void)
497 {
498     type_register_static(&host_memory_backend_info);
499 }
500 
501 type_init(register_types);
502