1 /* 2 * QEMU Host Memory Backend 3 * 4 * Copyright (C) 2013-2014 Red Hat Inc 5 * 6 * Authors: 7 * Igor Mammedov <imammedo@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 #include "qemu/osdep.h" 13 #include "sysemu/hostmem.h" 14 #include "hw/boards.h" 15 #include "qapi/visitor.h" 16 #include "qapi-types.h" 17 #include "qapi-visit.h" 18 #include "qemu/config-file.h" 19 #include "qom/object_interfaces.h" 20 21 #ifdef CONFIG_NUMA 22 #include <numaif.h> 23 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT); 24 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED); 25 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND); 26 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE); 27 #endif 28 29 static void 30 host_memory_backend_get_size(Object *obj, Visitor *v, const char *name, 31 void *opaque, Error **errp) 32 { 33 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 34 uint64_t value = backend->size; 35 36 visit_type_size(v, name, &value, errp); 37 } 38 39 static void 40 host_memory_backend_set_size(Object *obj, Visitor *v, const char *name, 41 void *opaque, Error **errp) 42 { 43 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 44 Error *local_err = NULL; 45 uint64_t value; 46 47 if (memory_region_size(&backend->mr)) { 48 error_setg(&local_err, "cannot change property value"); 49 goto out; 50 } 51 52 visit_type_size(v, name, &value, &local_err); 53 if (local_err) { 54 goto out; 55 } 56 if (!value) { 57 error_setg(&local_err, "Property '%s.%s' doesn't take value '%" 58 PRIu64 "'", object_get_typename(obj), name, value); 59 goto out; 60 } 61 backend->size = value; 62 out: 63 error_propagate(errp, local_err); 64 } 65 66 static void 67 host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name, 68 void *opaque, Error **errp) 69 { 70 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 71 uint16List *host_nodes = NULL; 72 uint16List **node = &host_nodes; 73 unsigned long value; 74 75 value = find_first_bit(backend->host_nodes, MAX_NODES); 76 if (value == MAX_NODES) { 77 return; 78 } 79 80 *node = g_malloc0(sizeof(**node)); 81 (*node)->value = value; 82 node = &(*node)->next; 83 84 do { 85 value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1); 86 if (value == MAX_NODES) { 87 break; 88 } 89 90 *node = g_malloc0(sizeof(**node)); 91 (*node)->value = value; 92 node = &(*node)->next; 93 } while (true); 94 95 visit_type_uint16List(v, name, &host_nodes, errp); 96 } 97 98 static void 99 host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name, 100 void *opaque, Error **errp) 101 { 102 #ifdef CONFIG_NUMA 103 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 104 uint16List *l = NULL; 105 106 visit_type_uint16List(v, name, &l, errp); 107 108 while (l) { 109 bitmap_set(backend->host_nodes, l->value, 1); 110 l = l->next; 111 } 112 #else 113 error_setg(errp, "NUMA node binding are not supported by this QEMU"); 114 #endif 115 } 116 117 static int 118 host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED) 119 { 120 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 121 return backend->policy; 122 } 123 124 static void 125 host_memory_backend_set_policy(Object *obj, int policy, Error **errp) 126 { 127 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 128 backend->policy = policy; 129 130 #ifndef CONFIG_NUMA 131 if (policy != HOST_MEM_POLICY_DEFAULT) { 132 error_setg(errp, "NUMA policies are not supported by this QEMU"); 133 } 134 #endif 135 } 136 137 static bool host_memory_backend_get_merge(Object *obj, Error **errp) 138 { 139 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 140 141 return backend->merge; 142 } 143 144 static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp) 145 { 146 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 147 148 if (!memory_region_size(&backend->mr)) { 149 backend->merge = value; 150 return; 151 } 152 153 if (value != backend->merge) { 154 void *ptr = memory_region_get_ram_ptr(&backend->mr); 155 uint64_t sz = memory_region_size(&backend->mr); 156 157 qemu_madvise(ptr, sz, 158 value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE); 159 backend->merge = value; 160 } 161 } 162 163 static bool host_memory_backend_get_dump(Object *obj, Error **errp) 164 { 165 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 166 167 return backend->dump; 168 } 169 170 static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp) 171 { 172 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 173 174 if (!memory_region_size(&backend->mr)) { 175 backend->dump = value; 176 return; 177 } 178 179 if (value != backend->dump) { 180 void *ptr = memory_region_get_ram_ptr(&backend->mr); 181 uint64_t sz = memory_region_size(&backend->mr); 182 183 qemu_madvise(ptr, sz, 184 value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP); 185 backend->dump = value; 186 } 187 } 188 189 static bool host_memory_backend_get_prealloc(Object *obj, Error **errp) 190 { 191 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 192 193 return backend->prealloc || backend->force_prealloc; 194 } 195 196 static void host_memory_backend_set_prealloc(Object *obj, bool value, 197 Error **errp) 198 { 199 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 200 201 if (backend->force_prealloc) { 202 if (value) { 203 error_setg(errp, 204 "remove -mem-prealloc to use the prealloc property"); 205 return; 206 } 207 } 208 209 if (!memory_region_size(&backend->mr)) { 210 backend->prealloc = value; 211 return; 212 } 213 214 if (value && !backend->prealloc) { 215 int fd = memory_region_get_fd(&backend->mr); 216 void *ptr = memory_region_get_ram_ptr(&backend->mr); 217 uint64_t sz = memory_region_size(&backend->mr); 218 219 os_mem_prealloc(fd, ptr, sz); 220 backend->prealloc = true; 221 } 222 } 223 224 static void host_memory_backend_init(Object *obj) 225 { 226 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 227 MachineState *machine = MACHINE(qdev_get_machine()); 228 229 backend->merge = machine_mem_merge(machine); 230 backend->dump = machine_dump_guest_core(machine); 231 backend->prealloc = mem_prealloc; 232 233 object_property_add_bool(obj, "merge", 234 host_memory_backend_get_merge, 235 host_memory_backend_set_merge, NULL); 236 object_property_add_bool(obj, "dump", 237 host_memory_backend_get_dump, 238 host_memory_backend_set_dump, NULL); 239 object_property_add_bool(obj, "prealloc", 240 host_memory_backend_get_prealloc, 241 host_memory_backend_set_prealloc, NULL); 242 object_property_add(obj, "size", "int", 243 host_memory_backend_get_size, 244 host_memory_backend_set_size, NULL, NULL, NULL); 245 object_property_add(obj, "host-nodes", "int", 246 host_memory_backend_get_host_nodes, 247 host_memory_backend_set_host_nodes, NULL, NULL, NULL); 248 object_property_add_enum(obj, "policy", "HostMemPolicy", 249 HostMemPolicy_lookup, 250 host_memory_backend_get_policy, 251 host_memory_backend_set_policy, NULL); 252 } 253 254 MemoryRegion * 255 host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp) 256 { 257 return memory_region_size(&backend->mr) ? &backend->mr : NULL; 258 } 259 260 static void 261 host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) 262 { 263 HostMemoryBackend *backend = MEMORY_BACKEND(uc); 264 HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc); 265 Error *local_err = NULL; 266 void *ptr; 267 uint64_t sz; 268 269 if (bc->alloc) { 270 bc->alloc(backend, &local_err); 271 if (local_err) { 272 error_propagate(errp, local_err); 273 return; 274 } 275 276 ptr = memory_region_get_ram_ptr(&backend->mr); 277 sz = memory_region_size(&backend->mr); 278 279 if (backend->merge) { 280 qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE); 281 } 282 if (!backend->dump) { 283 qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP); 284 } 285 #ifdef CONFIG_NUMA 286 unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES); 287 /* lastbit == MAX_NODES means maxnode = 0 */ 288 unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1); 289 /* ensure policy won't be ignored in case memory is preallocated 290 * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so 291 * this doesn't catch hugepage case. */ 292 unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE; 293 294 /* check for invalid host-nodes and policies and give more verbose 295 * error messages than mbind(). */ 296 if (maxnode && backend->policy == MPOL_DEFAULT) { 297 error_setg(errp, "host-nodes must be empty for policy default," 298 " or you should explicitly specify a policy other" 299 " than default"); 300 return; 301 } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) { 302 error_setg(errp, "host-nodes must be set for policy %s", 303 HostMemPolicy_lookup[backend->policy]); 304 return; 305 } 306 307 /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1 308 * as argument to mbind() due to an old Linux bug (feature?) which 309 * cuts off the last specified node. This means backend->host_nodes 310 * must have MAX_NODES+1 bits available. 311 */ 312 assert(sizeof(backend->host_nodes) >= 313 BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long)); 314 assert(maxnode <= MAX_NODES); 315 if (mbind(ptr, sz, backend->policy, 316 maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) { 317 if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) { 318 error_setg_errno(errp, errno, 319 "cannot bind memory to host NUMA nodes"); 320 return; 321 } 322 } 323 #endif 324 /* Preallocate memory after the NUMA policy has been instantiated. 325 * This is necessary to guarantee memory is allocated with 326 * specified NUMA policy in place. 327 */ 328 if (backend->prealloc) { 329 os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz); 330 } 331 } 332 } 333 334 static bool 335 host_memory_backend_can_be_deleted(UserCreatable *uc, Error **errp) 336 { 337 MemoryRegion *mr; 338 339 mr = host_memory_backend_get_memory(MEMORY_BACKEND(uc), errp); 340 if (memory_region_is_mapped(mr)) { 341 return false; 342 } else { 343 return true; 344 } 345 } 346 347 static void 348 host_memory_backend_class_init(ObjectClass *oc, void *data) 349 { 350 UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); 351 352 ucc->complete = host_memory_backend_memory_complete; 353 ucc->can_be_deleted = host_memory_backend_can_be_deleted; 354 } 355 356 static const TypeInfo host_memory_backend_info = { 357 .name = TYPE_MEMORY_BACKEND, 358 .parent = TYPE_OBJECT, 359 .abstract = true, 360 .class_size = sizeof(HostMemoryBackendClass), 361 .class_init = host_memory_backend_class_init, 362 .instance_size = sizeof(HostMemoryBackend), 363 .instance_init = host_memory_backend_init, 364 .interfaces = (InterfaceInfo[]) { 365 { TYPE_USER_CREATABLE }, 366 { } 367 } 368 }; 369 370 static void register_types(void) 371 { 372 type_register_static(&host_memory_backend_info); 373 } 374 375 type_init(register_types); 376