1 /* 2 * QEMU Host Memory Backend 3 * 4 * Copyright (C) 2013-2014 Red Hat Inc 5 * 6 * Authors: 7 * Igor Mammedov <imammedo@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "sysemu/hostmem.h" 15 #include "hw/boards.h" 16 #include "qapi/error.h" 17 #include "qapi/qapi-builtin-visit.h" 18 #include "qapi/visitor.h" 19 #include "qemu/config-file.h" 20 #include "qom/object_interfaces.h" 21 #include "qemu/mmap-alloc.h" 22 23 #ifdef CONFIG_NUMA 24 #include <numaif.h> 25 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT); 26 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED); 27 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND); 28 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE); 29 #endif 30 31 char * 32 host_memory_backend_get_name(HostMemoryBackend *backend) 33 { 34 if (!backend->use_canonical_path) { 35 return object_get_canonical_path_component(OBJECT(backend)); 36 } 37 38 return object_get_canonical_path(OBJECT(backend)); 39 } 40 41 static void 42 host_memory_backend_get_size(Object *obj, Visitor *v, const char *name, 43 void *opaque, Error **errp) 44 { 45 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 46 uint64_t value = backend->size; 47 48 visit_type_size(v, name, &value, errp); 49 } 50 51 static void 52 host_memory_backend_set_size(Object *obj, Visitor *v, const char *name, 53 void *opaque, Error **errp) 54 { 55 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 56 Error *local_err = NULL; 57 uint64_t value; 58 59 if (host_memory_backend_mr_inited(backend)) { 60 error_setg(&local_err, "cannot change property %s of %s ", 61 name, object_get_typename(obj)); 62 goto out; 63 } 64 65 visit_type_size(v, name, &value, &local_err); 66 if (local_err) { 67 goto out; 68 } 69 if (!value) { 70 error_setg(&local_err, 71 "property '%s' of %s doesn't take value '%" PRIu64 "'", 72 name, object_get_typename(obj), value); 73 goto out; 74 } 75 backend->size = value; 76 out: 77 error_propagate(errp, local_err); 78 } 79 80 static void 81 host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name, 82 void *opaque, Error **errp) 83 { 84 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 85 uint16List *host_nodes = NULL; 86 uint16List **node = &host_nodes; 87 unsigned long value; 88 89 value = find_first_bit(backend->host_nodes, MAX_NODES); 90 if (value == MAX_NODES) { 91 goto ret; 92 } 93 94 *node = g_malloc0(sizeof(**node)); 95 (*node)->value = value; 96 node = &(*node)->next; 97 98 do { 99 value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1); 100 if (value == MAX_NODES) { 101 break; 102 } 103 104 *node = g_malloc0(sizeof(**node)); 105 (*node)->value = value; 106 node = &(*node)->next; 107 } while (true); 108 109 ret: 110 visit_type_uint16List(v, name, &host_nodes, errp); 111 } 112 113 static void 114 host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name, 115 void *opaque, Error **errp) 116 { 117 #ifdef CONFIG_NUMA 118 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 119 uint16List *l, *host_nodes = NULL; 120 121 visit_type_uint16List(v, name, &host_nodes, errp); 122 123 for (l = host_nodes; l; l = l->next) { 124 if (l->value >= MAX_NODES) { 125 error_setg(errp, "Invalid host-nodes value: %d", l->value); 126 goto out; 127 } 128 } 129 130 for (l = host_nodes; l; l = l->next) { 131 bitmap_set(backend->host_nodes, l->value, 1); 132 } 133 134 out: 135 qapi_free_uint16List(host_nodes); 136 #else 137 error_setg(errp, "NUMA node binding are not supported by this QEMU"); 138 #endif 139 } 140 141 static int 142 host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED) 143 { 144 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 145 return backend->policy; 146 } 147 148 static void 149 host_memory_backend_set_policy(Object *obj, int policy, Error **errp) 150 { 151 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 152 backend->policy = policy; 153 154 #ifndef CONFIG_NUMA 155 if (policy != HOST_MEM_POLICY_DEFAULT) { 156 error_setg(errp, "NUMA policies are not supported by this QEMU"); 157 } 158 #endif 159 } 160 161 static bool host_memory_backend_get_merge(Object *obj, Error **errp) 162 { 163 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 164 165 return backend->merge; 166 } 167 168 static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp) 169 { 170 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 171 172 if (!host_memory_backend_mr_inited(backend)) { 173 backend->merge = value; 174 return; 175 } 176 177 if (value != backend->merge) { 178 void *ptr = memory_region_get_ram_ptr(&backend->mr); 179 uint64_t sz = memory_region_size(&backend->mr); 180 181 qemu_madvise(ptr, sz, 182 value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE); 183 backend->merge = value; 184 } 185 } 186 187 static bool host_memory_backend_get_dump(Object *obj, Error **errp) 188 { 189 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 190 191 return backend->dump; 192 } 193 194 static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp) 195 { 196 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 197 198 if (!host_memory_backend_mr_inited(backend)) { 199 backend->dump = value; 200 return; 201 } 202 203 if (value != backend->dump) { 204 void *ptr = memory_region_get_ram_ptr(&backend->mr); 205 uint64_t sz = memory_region_size(&backend->mr); 206 207 qemu_madvise(ptr, sz, 208 value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP); 209 backend->dump = value; 210 } 211 } 212 213 static bool host_memory_backend_get_prealloc(Object *obj, Error **errp) 214 { 215 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 216 217 return backend->prealloc || backend->force_prealloc; 218 } 219 220 static void host_memory_backend_set_prealloc(Object *obj, bool value, 221 Error **errp) 222 { 223 Error *local_err = NULL; 224 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 225 MachineState *ms = MACHINE(qdev_get_machine()); 226 227 if (backend->force_prealloc) { 228 if (value) { 229 error_setg(errp, 230 "remove -mem-prealloc to use the prealloc property"); 231 return; 232 } 233 } 234 235 if (!host_memory_backend_mr_inited(backend)) { 236 backend->prealloc = value; 237 return; 238 } 239 240 if (value && !backend->prealloc) { 241 int fd = memory_region_get_fd(&backend->mr); 242 void *ptr = memory_region_get_ram_ptr(&backend->mr); 243 uint64_t sz = memory_region_size(&backend->mr); 244 245 os_mem_prealloc(fd, ptr, sz, ms->smp.cpus, &local_err); 246 if (local_err) { 247 error_propagate(errp, local_err); 248 return; 249 } 250 backend->prealloc = true; 251 } 252 } 253 254 static void host_memory_backend_init(Object *obj) 255 { 256 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 257 MachineState *machine = MACHINE(qdev_get_machine()); 258 259 backend->merge = machine_mem_merge(machine); 260 backend->dump = machine_dump_guest_core(machine); 261 backend->prealloc = mem_prealloc; 262 } 263 264 static void host_memory_backend_post_init(Object *obj) 265 { 266 object_apply_compat_props(obj); 267 } 268 269 bool host_memory_backend_mr_inited(HostMemoryBackend *backend) 270 { 271 /* 272 * NOTE: We forbid zero-length memory backend, so here zero means 273 * "we haven't inited the backend memory region yet". 274 */ 275 return memory_region_size(&backend->mr) != 0; 276 } 277 278 MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend) 279 { 280 return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL; 281 } 282 283 void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped) 284 { 285 backend->is_mapped = mapped; 286 } 287 288 bool host_memory_backend_is_mapped(HostMemoryBackend *backend) 289 { 290 return backend->is_mapped; 291 } 292 293 #ifdef __linux__ 294 size_t host_memory_backend_pagesize(HostMemoryBackend *memdev) 295 { 296 Object *obj = OBJECT(memdev); 297 char *path = object_property_get_str(obj, "mem-path", NULL); 298 size_t pagesize = qemu_mempath_getpagesize(path); 299 300 g_free(path); 301 return pagesize; 302 } 303 #else 304 size_t host_memory_backend_pagesize(HostMemoryBackend *memdev) 305 { 306 return getpagesize(); 307 } 308 #endif 309 310 static void 311 host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) 312 { 313 HostMemoryBackend *backend = MEMORY_BACKEND(uc); 314 HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc); 315 MachineState *ms = MACHINE(qdev_get_machine()); 316 Error *local_err = NULL; 317 void *ptr; 318 uint64_t sz; 319 320 if (bc->alloc) { 321 bc->alloc(backend, &local_err); 322 if (local_err) { 323 goto out; 324 } 325 326 ptr = memory_region_get_ram_ptr(&backend->mr); 327 sz = memory_region_size(&backend->mr); 328 329 if (backend->merge) { 330 qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE); 331 } 332 if (!backend->dump) { 333 qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP); 334 } 335 #ifdef CONFIG_NUMA 336 unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES); 337 /* lastbit == MAX_NODES means maxnode = 0 */ 338 unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1); 339 /* ensure policy won't be ignored in case memory is preallocated 340 * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so 341 * this doesn't catch hugepage case. */ 342 unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE; 343 344 /* check for invalid host-nodes and policies and give more verbose 345 * error messages than mbind(). */ 346 if (maxnode && backend->policy == MPOL_DEFAULT) { 347 error_setg(errp, "host-nodes must be empty for policy default," 348 " or you should explicitly specify a policy other" 349 " than default"); 350 return; 351 } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) { 352 error_setg(errp, "host-nodes must be set for policy %s", 353 HostMemPolicy_str(backend->policy)); 354 return; 355 } 356 357 /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1 358 * as argument to mbind() due to an old Linux bug (feature?) which 359 * cuts off the last specified node. This means backend->host_nodes 360 * must have MAX_NODES+1 bits available. 361 */ 362 assert(sizeof(backend->host_nodes) >= 363 BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long)); 364 assert(maxnode <= MAX_NODES); 365 if (mbind(ptr, sz, backend->policy, 366 maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) { 367 if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) { 368 error_setg_errno(errp, errno, 369 "cannot bind memory to host NUMA nodes"); 370 return; 371 } 372 } 373 #endif 374 /* Preallocate memory after the NUMA policy has been instantiated. 375 * This is necessary to guarantee memory is allocated with 376 * specified NUMA policy in place. 377 */ 378 if (backend->prealloc) { 379 os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz, 380 ms->smp.cpus, &local_err); 381 if (local_err) { 382 goto out; 383 } 384 } 385 } 386 out: 387 error_propagate(errp, local_err); 388 } 389 390 static bool 391 host_memory_backend_can_be_deleted(UserCreatable *uc) 392 { 393 if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) { 394 return false; 395 } else { 396 return true; 397 } 398 } 399 400 static bool host_memory_backend_get_share(Object *o, Error **errp) 401 { 402 HostMemoryBackend *backend = MEMORY_BACKEND(o); 403 404 return backend->share; 405 } 406 407 static void host_memory_backend_set_share(Object *o, bool value, Error **errp) 408 { 409 HostMemoryBackend *backend = MEMORY_BACKEND(o); 410 411 if (host_memory_backend_mr_inited(backend)) { 412 error_setg(errp, "cannot change property value"); 413 return; 414 } 415 backend->share = value; 416 } 417 418 static bool 419 host_memory_backend_get_use_canonical_path(Object *obj, Error **errp) 420 { 421 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 422 423 return backend->use_canonical_path; 424 } 425 426 static void 427 host_memory_backend_set_use_canonical_path(Object *obj, bool value, 428 Error **errp) 429 { 430 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 431 432 backend->use_canonical_path = value; 433 } 434 435 static void 436 host_memory_backend_class_init(ObjectClass *oc, void *data) 437 { 438 UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); 439 440 ucc->complete = host_memory_backend_memory_complete; 441 ucc->can_be_deleted = host_memory_backend_can_be_deleted; 442 443 object_class_property_add_bool(oc, "merge", 444 host_memory_backend_get_merge, 445 host_memory_backend_set_merge, &error_abort); 446 object_class_property_set_description(oc, "merge", 447 "Mark memory as mergeable", &error_abort); 448 object_class_property_add_bool(oc, "dump", 449 host_memory_backend_get_dump, 450 host_memory_backend_set_dump, &error_abort); 451 object_class_property_set_description(oc, "dump", 452 "Set to 'off' to exclude from core dump", &error_abort); 453 object_class_property_add_bool(oc, "prealloc", 454 host_memory_backend_get_prealloc, 455 host_memory_backend_set_prealloc, &error_abort); 456 object_class_property_set_description(oc, "prealloc", 457 "Preallocate memory", &error_abort); 458 object_class_property_add(oc, "size", "int", 459 host_memory_backend_get_size, 460 host_memory_backend_set_size, 461 NULL, NULL, &error_abort); 462 object_class_property_set_description(oc, "size", 463 "Size of the memory region (ex: 500M)", &error_abort); 464 object_class_property_add(oc, "host-nodes", "int", 465 host_memory_backend_get_host_nodes, 466 host_memory_backend_set_host_nodes, 467 NULL, NULL, &error_abort); 468 object_class_property_set_description(oc, "host-nodes", 469 "Binds memory to the list of NUMA host nodes", &error_abort); 470 object_class_property_add_enum(oc, "policy", "HostMemPolicy", 471 &HostMemPolicy_lookup, 472 host_memory_backend_get_policy, 473 host_memory_backend_set_policy, &error_abort); 474 object_class_property_set_description(oc, "policy", 475 "Set the NUMA policy", &error_abort); 476 object_class_property_add_bool(oc, "share", 477 host_memory_backend_get_share, host_memory_backend_set_share, 478 &error_abort); 479 object_class_property_set_description(oc, "share", 480 "Mark the memory as private to QEMU or shared", &error_abort); 481 object_class_property_add_bool(oc, "x-use-canonical-path-for-ramblock-id", 482 host_memory_backend_get_use_canonical_path, 483 host_memory_backend_set_use_canonical_path, &error_abort); 484 } 485 486 static const TypeInfo host_memory_backend_info = { 487 .name = TYPE_MEMORY_BACKEND, 488 .parent = TYPE_OBJECT, 489 .abstract = true, 490 .class_size = sizeof(HostMemoryBackendClass), 491 .class_init = host_memory_backend_class_init, 492 .instance_size = sizeof(HostMemoryBackend), 493 .instance_init = host_memory_backend_init, 494 .instance_post_init = host_memory_backend_post_init, 495 .interfaces = (InterfaceInfo[]) { 496 { TYPE_USER_CREATABLE }, 497 { } 498 } 499 }; 500 501 static void register_types(void) 502 { 503 type_register_static(&host_memory_backend_info); 504 } 505 506 type_init(register_types); 507