1 /* 2 * QEMU Host Memory Backend 3 * 4 * Copyright (C) 2013-2014 Red Hat Inc 5 * 6 * Authors: 7 * Igor Mammedov <imammedo@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "sysemu/hostmem.h" 15 #include "hw/boards.h" 16 #include "qapi/error.h" 17 #include "qapi/qapi-builtin-visit.h" 18 #include "qapi/visitor.h" 19 #include "qemu/config-file.h" 20 #include "qom/object_interfaces.h" 21 #include "qemu/mmap-alloc.h" 22 23 #ifdef CONFIG_NUMA 24 #include <numaif.h> 25 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT); 26 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED); 27 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND); 28 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE); 29 #endif 30 31 char * 32 host_memory_backend_get_name(HostMemoryBackend *backend) 33 { 34 if (!backend->use_canonical_path) { 35 return g_strdup(object_get_canonical_path_component(OBJECT(backend))); 36 } 37 38 return object_get_canonical_path(OBJECT(backend)); 39 } 40 41 static void 42 host_memory_backend_get_size(Object *obj, Visitor *v, const char *name, 43 void *opaque, Error **errp) 44 { 45 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 46 uint64_t value = backend->size; 47 48 visit_type_size(v, name, &value, errp); 49 } 50 51 static void 52 host_memory_backend_set_size(Object *obj, Visitor *v, const char *name, 53 void *opaque, Error **errp) 54 { 55 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 56 uint64_t value; 57 58 if (host_memory_backend_mr_inited(backend)) { 59 error_setg(errp, "cannot change property %s of %s ", name, 60 object_get_typename(obj)); 61 return; 62 } 63 64 if (!visit_type_size(v, name, &value, errp)) { 65 return; 66 } 67 if (!value) { 68 error_setg(errp, 69 "property '%s' of %s doesn't take value '%" PRIu64 "'", 70 name, object_get_typename(obj), value); 71 return; 72 } 73 backend->size = value; 74 } 75 76 static void 77 host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name, 78 void *opaque, Error **errp) 79 { 80 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 81 uint16List *host_nodes = NULL; 82 uint16List **tail = &host_nodes; 83 unsigned long value; 84 85 value = find_first_bit(backend->host_nodes, MAX_NODES); 86 if (value == MAX_NODES) { 87 goto ret; 88 } 89 90 QAPI_LIST_APPEND(tail, value); 91 92 do { 93 value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1); 94 if (value == MAX_NODES) { 95 break; 96 } 97 98 QAPI_LIST_APPEND(tail, value); 99 } while (true); 100 101 ret: 102 visit_type_uint16List(v, name, &host_nodes, errp); 103 qapi_free_uint16List(host_nodes); 104 } 105 106 static void 107 host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name, 108 void *opaque, Error **errp) 109 { 110 #ifdef CONFIG_NUMA 111 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 112 uint16List *l, *host_nodes = NULL; 113 114 visit_type_uint16List(v, name, &host_nodes, errp); 115 116 for (l = host_nodes; l; l = l->next) { 117 if (l->value >= MAX_NODES) { 118 error_setg(errp, "Invalid host-nodes value: %d", l->value); 119 goto out; 120 } 121 } 122 123 for (l = host_nodes; l; l = l->next) { 124 bitmap_set(backend->host_nodes, l->value, 1); 125 } 126 127 out: 128 qapi_free_uint16List(host_nodes); 129 #else 130 error_setg(errp, "NUMA node binding are not supported by this QEMU"); 131 #endif 132 } 133 134 static int 135 host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED) 136 { 137 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 138 return backend->policy; 139 } 140 141 static void 142 host_memory_backend_set_policy(Object *obj, int policy, Error **errp) 143 { 144 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 145 backend->policy = policy; 146 147 #ifndef CONFIG_NUMA 148 if (policy != HOST_MEM_POLICY_DEFAULT) { 149 error_setg(errp, "NUMA policies are not supported by this QEMU"); 150 } 151 #endif 152 } 153 154 static bool host_memory_backend_get_merge(Object *obj, Error **errp) 155 { 156 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 157 158 return backend->merge; 159 } 160 161 static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp) 162 { 163 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 164 165 if (!host_memory_backend_mr_inited(backend)) { 166 backend->merge = value; 167 return; 168 } 169 170 if (value != backend->merge) { 171 void *ptr = memory_region_get_ram_ptr(&backend->mr); 172 uint64_t sz = memory_region_size(&backend->mr); 173 174 qemu_madvise(ptr, sz, 175 value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE); 176 backend->merge = value; 177 } 178 } 179 180 static bool host_memory_backend_get_dump(Object *obj, Error **errp) 181 { 182 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 183 184 return backend->dump; 185 } 186 187 static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp) 188 { 189 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 190 191 if (!host_memory_backend_mr_inited(backend)) { 192 backend->dump = value; 193 return; 194 } 195 196 if (value != backend->dump) { 197 void *ptr = memory_region_get_ram_ptr(&backend->mr); 198 uint64_t sz = memory_region_size(&backend->mr); 199 200 qemu_madvise(ptr, sz, 201 value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP); 202 backend->dump = value; 203 } 204 } 205 206 static bool host_memory_backend_get_prealloc(Object *obj, Error **errp) 207 { 208 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 209 210 return backend->prealloc; 211 } 212 213 static void host_memory_backend_set_prealloc(Object *obj, bool value, 214 Error **errp) 215 { 216 Error *local_err = NULL; 217 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 218 219 if (!backend->reserve && value) { 220 error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible"); 221 return; 222 } 223 224 if (!host_memory_backend_mr_inited(backend)) { 225 backend->prealloc = value; 226 return; 227 } 228 229 if (value && !backend->prealloc) { 230 int fd = memory_region_get_fd(&backend->mr); 231 void *ptr = memory_region_get_ram_ptr(&backend->mr); 232 uint64_t sz = memory_region_size(&backend->mr); 233 234 os_mem_prealloc(fd, ptr, sz, backend->prealloc_threads, &local_err); 235 if (local_err) { 236 error_propagate(errp, local_err); 237 return; 238 } 239 backend->prealloc = true; 240 } 241 } 242 243 static void host_memory_backend_get_prealloc_threads(Object *obj, Visitor *v, 244 const char *name, void *opaque, Error **errp) 245 { 246 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 247 visit_type_uint32(v, name, &backend->prealloc_threads, errp); 248 } 249 250 static void host_memory_backend_set_prealloc_threads(Object *obj, Visitor *v, 251 const char *name, void *opaque, Error **errp) 252 { 253 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 254 uint32_t value; 255 256 if (!visit_type_uint32(v, name, &value, errp)) { 257 return; 258 } 259 if (value <= 0) { 260 error_setg(errp, "property '%s' of %s doesn't take value '%d'", name, 261 object_get_typename(obj), value); 262 return; 263 } 264 backend->prealloc_threads = value; 265 } 266 267 static void host_memory_backend_init(Object *obj) 268 { 269 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 270 MachineState *machine = MACHINE(qdev_get_machine()); 271 272 /* TODO: convert access to globals to compat properties */ 273 backend->merge = machine_mem_merge(machine); 274 backend->dump = machine_dump_guest_core(machine); 275 backend->reserve = true; 276 backend->prealloc_threads = 1; 277 } 278 279 static void host_memory_backend_post_init(Object *obj) 280 { 281 object_apply_compat_props(obj); 282 } 283 284 bool host_memory_backend_mr_inited(HostMemoryBackend *backend) 285 { 286 /* 287 * NOTE: We forbid zero-length memory backend, so here zero means 288 * "we haven't inited the backend memory region yet". 289 */ 290 return memory_region_size(&backend->mr) != 0; 291 } 292 293 MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend) 294 { 295 return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL; 296 } 297 298 void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped) 299 { 300 backend->is_mapped = mapped; 301 } 302 303 bool host_memory_backend_is_mapped(HostMemoryBackend *backend) 304 { 305 return backend->is_mapped; 306 } 307 308 #ifdef __linux__ 309 size_t host_memory_backend_pagesize(HostMemoryBackend *memdev) 310 { 311 Object *obj = OBJECT(memdev); 312 char *path = object_property_get_str(obj, "mem-path", NULL); 313 size_t pagesize = qemu_mempath_getpagesize(path); 314 315 g_free(path); 316 return pagesize; 317 } 318 #else 319 size_t host_memory_backend_pagesize(HostMemoryBackend *memdev) 320 { 321 return qemu_real_host_page_size; 322 } 323 #endif 324 325 static void 326 host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) 327 { 328 HostMemoryBackend *backend = MEMORY_BACKEND(uc); 329 HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc); 330 Error *local_err = NULL; 331 void *ptr; 332 uint64_t sz; 333 334 if (bc->alloc) { 335 bc->alloc(backend, &local_err); 336 if (local_err) { 337 goto out; 338 } 339 340 ptr = memory_region_get_ram_ptr(&backend->mr); 341 sz = memory_region_size(&backend->mr); 342 343 if (backend->merge) { 344 qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE); 345 } 346 if (!backend->dump) { 347 qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP); 348 } 349 #ifdef CONFIG_NUMA 350 unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES); 351 /* lastbit == MAX_NODES means maxnode = 0 */ 352 unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1); 353 /* ensure policy won't be ignored in case memory is preallocated 354 * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so 355 * this doesn't catch hugepage case. */ 356 unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE; 357 358 /* check for invalid host-nodes and policies and give more verbose 359 * error messages than mbind(). */ 360 if (maxnode && backend->policy == MPOL_DEFAULT) { 361 error_setg(errp, "host-nodes must be empty for policy default," 362 " or you should explicitly specify a policy other" 363 " than default"); 364 return; 365 } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) { 366 error_setg(errp, "host-nodes must be set for policy %s", 367 HostMemPolicy_str(backend->policy)); 368 return; 369 } 370 371 /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1 372 * as argument to mbind() due to an old Linux bug (feature?) which 373 * cuts off the last specified node. This means backend->host_nodes 374 * must have MAX_NODES+1 bits available. 375 */ 376 assert(sizeof(backend->host_nodes) >= 377 BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long)); 378 assert(maxnode <= MAX_NODES); 379 380 if (maxnode && 381 mbind(ptr, sz, backend->policy, backend->host_nodes, maxnode + 1, 382 flags)) { 383 if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) { 384 error_setg_errno(errp, errno, 385 "cannot bind memory to host NUMA nodes"); 386 return; 387 } 388 } 389 #endif 390 /* Preallocate memory after the NUMA policy has been instantiated. 391 * This is necessary to guarantee memory is allocated with 392 * specified NUMA policy in place. 393 */ 394 if (backend->prealloc) { 395 os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz, 396 backend->prealloc_threads, &local_err); 397 if (local_err) { 398 goto out; 399 } 400 } 401 } 402 out: 403 error_propagate(errp, local_err); 404 } 405 406 static bool 407 host_memory_backend_can_be_deleted(UserCreatable *uc) 408 { 409 if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) { 410 return false; 411 } else { 412 return true; 413 } 414 } 415 416 static bool host_memory_backend_get_share(Object *o, Error **errp) 417 { 418 HostMemoryBackend *backend = MEMORY_BACKEND(o); 419 420 return backend->share; 421 } 422 423 static void host_memory_backend_set_share(Object *o, bool value, Error **errp) 424 { 425 HostMemoryBackend *backend = MEMORY_BACKEND(o); 426 427 if (host_memory_backend_mr_inited(backend)) { 428 error_setg(errp, "cannot change property value"); 429 return; 430 } 431 backend->share = value; 432 } 433 434 #ifdef CONFIG_LINUX 435 static bool host_memory_backend_get_reserve(Object *o, Error **errp) 436 { 437 HostMemoryBackend *backend = MEMORY_BACKEND(o); 438 439 return backend->reserve; 440 } 441 442 static void host_memory_backend_set_reserve(Object *o, bool value, Error **errp) 443 { 444 HostMemoryBackend *backend = MEMORY_BACKEND(o); 445 446 if (host_memory_backend_mr_inited(backend)) { 447 error_setg(errp, "cannot change property value"); 448 return; 449 } 450 if (backend->prealloc && !value) { 451 error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible"); 452 return; 453 } 454 backend->reserve = value; 455 } 456 #endif /* CONFIG_LINUX */ 457 458 static bool 459 host_memory_backend_get_use_canonical_path(Object *obj, Error **errp) 460 { 461 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 462 463 return backend->use_canonical_path; 464 } 465 466 static void 467 host_memory_backend_set_use_canonical_path(Object *obj, bool value, 468 Error **errp) 469 { 470 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 471 472 backend->use_canonical_path = value; 473 } 474 475 static void 476 host_memory_backend_class_init(ObjectClass *oc, void *data) 477 { 478 UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); 479 480 ucc->complete = host_memory_backend_memory_complete; 481 ucc->can_be_deleted = host_memory_backend_can_be_deleted; 482 483 object_class_property_add_bool(oc, "merge", 484 host_memory_backend_get_merge, 485 host_memory_backend_set_merge); 486 object_class_property_set_description(oc, "merge", 487 "Mark memory as mergeable"); 488 object_class_property_add_bool(oc, "dump", 489 host_memory_backend_get_dump, 490 host_memory_backend_set_dump); 491 object_class_property_set_description(oc, "dump", 492 "Set to 'off' to exclude from core dump"); 493 object_class_property_add_bool(oc, "prealloc", 494 host_memory_backend_get_prealloc, 495 host_memory_backend_set_prealloc); 496 object_class_property_set_description(oc, "prealloc", 497 "Preallocate memory"); 498 object_class_property_add(oc, "prealloc-threads", "int", 499 host_memory_backend_get_prealloc_threads, 500 host_memory_backend_set_prealloc_threads, 501 NULL, NULL); 502 object_class_property_set_description(oc, "prealloc-threads", 503 "Number of CPU threads to use for prealloc"); 504 object_class_property_add(oc, "size", "int", 505 host_memory_backend_get_size, 506 host_memory_backend_set_size, 507 NULL, NULL); 508 object_class_property_set_description(oc, "size", 509 "Size of the memory region (ex: 500M)"); 510 object_class_property_add(oc, "host-nodes", "int", 511 host_memory_backend_get_host_nodes, 512 host_memory_backend_set_host_nodes, 513 NULL, NULL); 514 object_class_property_set_description(oc, "host-nodes", 515 "Binds memory to the list of NUMA host nodes"); 516 object_class_property_add_enum(oc, "policy", "HostMemPolicy", 517 &HostMemPolicy_lookup, 518 host_memory_backend_get_policy, 519 host_memory_backend_set_policy); 520 object_class_property_set_description(oc, "policy", 521 "Set the NUMA policy"); 522 object_class_property_add_bool(oc, "share", 523 host_memory_backend_get_share, host_memory_backend_set_share); 524 object_class_property_set_description(oc, "share", 525 "Mark the memory as private to QEMU or shared"); 526 #ifdef CONFIG_LINUX 527 object_class_property_add_bool(oc, "reserve", 528 host_memory_backend_get_reserve, host_memory_backend_set_reserve); 529 object_class_property_set_description(oc, "reserve", 530 "Reserve swap space (or huge pages) if applicable"); 531 #endif /* CONFIG_LINUX */ 532 /* 533 * Do not delete/rename option. This option must be considered stable 534 * (as if it didn't have the 'x-' prefix including deprecation period) as 535 * long as 4.0 and older machine types exists. 536 * Option will be used by upper layers to override (disable) canonical path 537 * for ramblock-id set by compat properties on old machine types ( <= 4.0), 538 * to keep migration working when backend is used for main RAM with 539 * -machine memory-backend= option (main RAM historically used prefix-less 540 * ramblock-id). 541 */ 542 object_class_property_add_bool(oc, "x-use-canonical-path-for-ramblock-id", 543 host_memory_backend_get_use_canonical_path, 544 host_memory_backend_set_use_canonical_path); 545 } 546 547 static const TypeInfo host_memory_backend_info = { 548 .name = TYPE_MEMORY_BACKEND, 549 .parent = TYPE_OBJECT, 550 .abstract = true, 551 .class_size = sizeof(HostMemoryBackendClass), 552 .class_init = host_memory_backend_class_init, 553 .instance_size = sizeof(HostMemoryBackend), 554 .instance_init = host_memory_backend_init, 555 .instance_post_init = host_memory_backend_post_init, 556 .interfaces = (InterfaceInfo[]) { 557 { TYPE_USER_CREATABLE }, 558 { } 559 } 560 }; 561 562 static void register_types(void) 563 { 564 type_register_static(&host_memory_backend_info); 565 } 566 567 type_init(register_types); 568