1 /* 2 * QEMU Host Memory Backend 3 * 4 * Copyright (C) 2013-2014 Red Hat Inc 5 * 6 * Authors: 7 * Igor Mammedov <imammedo@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "sysemu/hostmem.h" 15 #include "hw/boards.h" 16 #include "qapi/error.h" 17 #include "qapi/qapi-builtin-visit.h" 18 #include "qapi/visitor.h" 19 #include "qemu/config-file.h" 20 #include "qom/object_interfaces.h" 21 #include "qemu/mmap-alloc.h" 22 23 #ifdef CONFIG_NUMA 24 #include <numaif.h> 25 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT); 26 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED); 27 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND); 28 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE); 29 #endif 30 31 char * 32 host_memory_backend_get_name(HostMemoryBackend *backend) 33 { 34 if (!backend->use_canonical_path) { 35 return g_strdup(object_get_canonical_path_component(OBJECT(backend))); 36 } 37 38 return object_get_canonical_path(OBJECT(backend)); 39 } 40 41 static void 42 host_memory_backend_get_size(Object *obj, Visitor *v, const char *name, 43 void *opaque, Error **errp) 44 { 45 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 46 uint64_t value = backend->size; 47 48 visit_type_size(v, name, &value, errp); 49 } 50 51 static void 52 host_memory_backend_set_size(Object *obj, Visitor *v, const char *name, 53 void *opaque, Error **errp) 54 { 55 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 56 uint64_t value; 57 58 if (host_memory_backend_mr_inited(backend)) { 59 error_setg(errp, "cannot change property %s of %s ", name, 60 object_get_typename(obj)); 61 return; 62 } 63 64 if (!visit_type_size(v, name, &value, errp)) { 65 return; 66 } 67 if (!value) { 68 error_setg(errp, 69 "property '%s' of %s doesn't take value '%" PRIu64 "'", 70 name, object_get_typename(obj), value); 71 return; 72 } 73 backend->size = value; 74 } 75 76 static void 77 host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name, 78 void *opaque, Error **errp) 79 { 80 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 81 uint16List *host_nodes = NULL; 82 uint16List **tail = &host_nodes; 83 unsigned long value; 84 85 value = find_first_bit(backend->host_nodes, MAX_NODES); 86 if (value == MAX_NODES) { 87 goto ret; 88 } 89 90 QAPI_LIST_APPEND(tail, value); 91 92 do { 93 value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1); 94 if (value == MAX_NODES) { 95 break; 96 } 97 98 QAPI_LIST_APPEND(tail, value); 99 } while (true); 100 101 ret: 102 visit_type_uint16List(v, name, &host_nodes, errp); 103 qapi_free_uint16List(host_nodes); 104 } 105 106 static void 107 host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name, 108 void *opaque, Error **errp) 109 { 110 #ifdef CONFIG_NUMA 111 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 112 uint16List *l, *host_nodes = NULL; 113 114 visit_type_uint16List(v, name, &host_nodes, errp); 115 116 for (l = host_nodes; l; l = l->next) { 117 if (l->value >= MAX_NODES) { 118 error_setg(errp, "Invalid host-nodes value: %d", l->value); 119 goto out; 120 } 121 } 122 123 for (l = host_nodes; l; l = l->next) { 124 bitmap_set(backend->host_nodes, l->value, 1); 125 } 126 127 out: 128 qapi_free_uint16List(host_nodes); 129 #else 130 error_setg(errp, "NUMA node binding are not supported by this QEMU"); 131 #endif 132 } 133 134 static int 135 host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED) 136 { 137 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 138 return backend->policy; 139 } 140 141 static void 142 host_memory_backend_set_policy(Object *obj, int policy, Error **errp) 143 { 144 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 145 backend->policy = policy; 146 147 #ifndef CONFIG_NUMA 148 if (policy != HOST_MEM_POLICY_DEFAULT) { 149 error_setg(errp, "NUMA policies are not supported by this QEMU"); 150 } 151 #endif 152 } 153 154 static bool host_memory_backend_get_merge(Object *obj, Error **errp) 155 { 156 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 157 158 return backend->merge; 159 } 160 161 static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp) 162 { 163 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 164 165 if (!host_memory_backend_mr_inited(backend)) { 166 backend->merge = value; 167 return; 168 } 169 170 if (value != backend->merge) { 171 void *ptr = memory_region_get_ram_ptr(&backend->mr); 172 uint64_t sz = memory_region_size(&backend->mr); 173 174 qemu_madvise(ptr, sz, 175 value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE); 176 backend->merge = value; 177 } 178 } 179 180 static bool host_memory_backend_get_dump(Object *obj, Error **errp) 181 { 182 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 183 184 return backend->dump; 185 } 186 187 static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp) 188 { 189 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 190 191 if (!host_memory_backend_mr_inited(backend)) { 192 backend->dump = value; 193 return; 194 } 195 196 if (value != backend->dump) { 197 void *ptr = memory_region_get_ram_ptr(&backend->mr); 198 uint64_t sz = memory_region_size(&backend->mr); 199 200 qemu_madvise(ptr, sz, 201 value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP); 202 backend->dump = value; 203 } 204 } 205 206 static bool host_memory_backend_get_prealloc(Object *obj, Error **errp) 207 { 208 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 209 210 return backend->prealloc; 211 } 212 213 static void host_memory_backend_set_prealloc(Object *obj, bool value, 214 Error **errp) 215 { 216 Error *local_err = NULL; 217 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 218 219 if (!host_memory_backend_mr_inited(backend)) { 220 backend->prealloc = value; 221 return; 222 } 223 224 if (value && !backend->prealloc) { 225 int fd = memory_region_get_fd(&backend->mr); 226 void *ptr = memory_region_get_ram_ptr(&backend->mr); 227 uint64_t sz = memory_region_size(&backend->mr); 228 229 os_mem_prealloc(fd, ptr, sz, backend->prealloc_threads, &local_err); 230 if (local_err) { 231 error_propagate(errp, local_err); 232 return; 233 } 234 backend->prealloc = true; 235 } 236 } 237 238 static void host_memory_backend_get_prealloc_threads(Object *obj, Visitor *v, 239 const char *name, void *opaque, Error **errp) 240 { 241 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 242 visit_type_uint32(v, name, &backend->prealloc_threads, errp); 243 } 244 245 static void host_memory_backend_set_prealloc_threads(Object *obj, Visitor *v, 246 const char *name, void *opaque, Error **errp) 247 { 248 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 249 uint32_t value; 250 251 if (!visit_type_uint32(v, name, &value, errp)) { 252 return; 253 } 254 if (value <= 0) { 255 error_setg(errp, "property '%s' of %s doesn't take value '%d'", name, 256 object_get_typename(obj), value); 257 return; 258 } 259 backend->prealloc_threads = value; 260 } 261 262 static void host_memory_backend_init(Object *obj) 263 { 264 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 265 MachineState *machine = MACHINE(qdev_get_machine()); 266 267 /* TODO: convert access to globals to compat properties */ 268 backend->merge = machine_mem_merge(machine); 269 backend->dump = machine_dump_guest_core(machine); 270 backend->prealloc_threads = 1; 271 } 272 273 static void host_memory_backend_post_init(Object *obj) 274 { 275 object_apply_compat_props(obj); 276 } 277 278 bool host_memory_backend_mr_inited(HostMemoryBackend *backend) 279 { 280 /* 281 * NOTE: We forbid zero-length memory backend, so here zero means 282 * "we haven't inited the backend memory region yet". 283 */ 284 return memory_region_size(&backend->mr) != 0; 285 } 286 287 MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend) 288 { 289 return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL; 290 } 291 292 void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped) 293 { 294 backend->is_mapped = mapped; 295 } 296 297 bool host_memory_backend_is_mapped(HostMemoryBackend *backend) 298 { 299 return backend->is_mapped; 300 } 301 302 #ifdef __linux__ 303 size_t host_memory_backend_pagesize(HostMemoryBackend *memdev) 304 { 305 Object *obj = OBJECT(memdev); 306 char *path = object_property_get_str(obj, "mem-path", NULL); 307 size_t pagesize = qemu_mempath_getpagesize(path); 308 309 g_free(path); 310 return pagesize; 311 } 312 #else 313 size_t host_memory_backend_pagesize(HostMemoryBackend *memdev) 314 { 315 return qemu_real_host_page_size; 316 } 317 #endif 318 319 static void 320 host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) 321 { 322 HostMemoryBackend *backend = MEMORY_BACKEND(uc); 323 HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc); 324 Error *local_err = NULL; 325 void *ptr; 326 uint64_t sz; 327 328 if (bc->alloc) { 329 bc->alloc(backend, &local_err); 330 if (local_err) { 331 goto out; 332 } 333 334 ptr = memory_region_get_ram_ptr(&backend->mr); 335 sz = memory_region_size(&backend->mr); 336 337 if (backend->merge) { 338 qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE); 339 } 340 if (!backend->dump) { 341 qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP); 342 } 343 #ifdef CONFIG_NUMA 344 unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES); 345 /* lastbit == MAX_NODES means maxnode = 0 */ 346 unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1); 347 /* ensure policy won't be ignored in case memory is preallocated 348 * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so 349 * this doesn't catch hugepage case. */ 350 unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE; 351 352 /* check for invalid host-nodes and policies and give more verbose 353 * error messages than mbind(). */ 354 if (maxnode && backend->policy == MPOL_DEFAULT) { 355 error_setg(errp, "host-nodes must be empty for policy default," 356 " or you should explicitly specify a policy other" 357 " than default"); 358 return; 359 } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) { 360 error_setg(errp, "host-nodes must be set for policy %s", 361 HostMemPolicy_str(backend->policy)); 362 return; 363 } 364 365 /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1 366 * as argument to mbind() due to an old Linux bug (feature?) which 367 * cuts off the last specified node. This means backend->host_nodes 368 * must have MAX_NODES+1 bits available. 369 */ 370 assert(sizeof(backend->host_nodes) >= 371 BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long)); 372 assert(maxnode <= MAX_NODES); 373 374 if (maxnode && 375 mbind(ptr, sz, backend->policy, backend->host_nodes, maxnode + 1, 376 flags)) { 377 if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) { 378 error_setg_errno(errp, errno, 379 "cannot bind memory to host NUMA nodes"); 380 return; 381 } 382 } 383 #endif 384 /* Preallocate memory after the NUMA policy has been instantiated. 385 * This is necessary to guarantee memory is allocated with 386 * specified NUMA policy in place. 387 */ 388 if (backend->prealloc) { 389 os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz, 390 backend->prealloc_threads, &local_err); 391 if (local_err) { 392 goto out; 393 } 394 } 395 } 396 out: 397 error_propagate(errp, local_err); 398 } 399 400 static bool 401 host_memory_backend_can_be_deleted(UserCreatable *uc) 402 { 403 if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) { 404 return false; 405 } else { 406 return true; 407 } 408 } 409 410 static bool host_memory_backend_get_share(Object *o, Error **errp) 411 { 412 HostMemoryBackend *backend = MEMORY_BACKEND(o); 413 414 return backend->share; 415 } 416 417 static void host_memory_backend_set_share(Object *o, bool value, Error **errp) 418 { 419 HostMemoryBackend *backend = MEMORY_BACKEND(o); 420 421 if (host_memory_backend_mr_inited(backend)) { 422 error_setg(errp, "cannot change property value"); 423 return; 424 } 425 backend->share = value; 426 } 427 428 static bool 429 host_memory_backend_get_use_canonical_path(Object *obj, Error **errp) 430 { 431 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 432 433 return backend->use_canonical_path; 434 } 435 436 static void 437 host_memory_backend_set_use_canonical_path(Object *obj, bool value, 438 Error **errp) 439 { 440 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 441 442 backend->use_canonical_path = value; 443 } 444 445 static void 446 host_memory_backend_class_init(ObjectClass *oc, void *data) 447 { 448 UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); 449 450 ucc->complete = host_memory_backend_memory_complete; 451 ucc->can_be_deleted = host_memory_backend_can_be_deleted; 452 453 object_class_property_add_bool(oc, "merge", 454 host_memory_backend_get_merge, 455 host_memory_backend_set_merge); 456 object_class_property_set_description(oc, "merge", 457 "Mark memory as mergeable"); 458 object_class_property_add_bool(oc, "dump", 459 host_memory_backend_get_dump, 460 host_memory_backend_set_dump); 461 object_class_property_set_description(oc, "dump", 462 "Set to 'off' to exclude from core dump"); 463 object_class_property_add_bool(oc, "prealloc", 464 host_memory_backend_get_prealloc, 465 host_memory_backend_set_prealloc); 466 object_class_property_set_description(oc, "prealloc", 467 "Preallocate memory"); 468 object_class_property_add(oc, "prealloc-threads", "int", 469 host_memory_backend_get_prealloc_threads, 470 host_memory_backend_set_prealloc_threads, 471 NULL, NULL); 472 object_class_property_set_description(oc, "prealloc-threads", 473 "Number of CPU threads to use for prealloc"); 474 object_class_property_add(oc, "size", "int", 475 host_memory_backend_get_size, 476 host_memory_backend_set_size, 477 NULL, NULL); 478 object_class_property_set_description(oc, "size", 479 "Size of the memory region (ex: 500M)"); 480 object_class_property_add(oc, "host-nodes", "int", 481 host_memory_backend_get_host_nodes, 482 host_memory_backend_set_host_nodes, 483 NULL, NULL); 484 object_class_property_set_description(oc, "host-nodes", 485 "Binds memory to the list of NUMA host nodes"); 486 object_class_property_add_enum(oc, "policy", "HostMemPolicy", 487 &HostMemPolicy_lookup, 488 host_memory_backend_get_policy, 489 host_memory_backend_set_policy); 490 object_class_property_set_description(oc, "policy", 491 "Set the NUMA policy"); 492 object_class_property_add_bool(oc, "share", 493 host_memory_backend_get_share, host_memory_backend_set_share); 494 object_class_property_set_description(oc, "share", 495 "Mark the memory as private to QEMU or shared"); 496 /* 497 * Do not delete/rename option. This option must be considered stable 498 * (as if it didn't have the 'x-' prefix including deprecation period) as 499 * long as 4.0 and older machine types exists. 500 * Option will be used by upper layers to override (disable) canonical path 501 * for ramblock-id set by compat properties on old machine types ( <= 4.0), 502 * to keep migration working when backend is used for main RAM with 503 * -machine memory-backend= option (main RAM historically used prefix-less 504 * ramblock-id). 505 */ 506 object_class_property_add_bool(oc, "x-use-canonical-path-for-ramblock-id", 507 host_memory_backend_get_use_canonical_path, 508 host_memory_backend_set_use_canonical_path); 509 } 510 511 static const TypeInfo host_memory_backend_info = { 512 .name = TYPE_MEMORY_BACKEND, 513 .parent = TYPE_OBJECT, 514 .abstract = true, 515 .class_size = sizeof(HostMemoryBackendClass), 516 .class_init = host_memory_backend_class_init, 517 .instance_size = sizeof(HostMemoryBackend), 518 .instance_init = host_memory_backend_init, 519 .instance_post_init = host_memory_backend_post_init, 520 .interfaces = (InterfaceInfo[]) { 521 { TYPE_USER_CREATABLE }, 522 { } 523 } 524 }; 525 526 static void register_types(void) 527 { 528 type_register_static(&host_memory_backend_info); 529 } 530 531 type_init(register_types); 532