1 /* 2 * QEMU Host Memory Backend 3 * 4 * Copyright (C) 2013-2014 Red Hat Inc 5 * 6 * Authors: 7 * Igor Mammedov <imammedo@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "sysemu/hostmem.h" 15 #include "sysemu/sysemu.h" 16 #include "hw/boards.h" 17 #include "qapi/error.h" 18 #include "qapi/qapi-builtin-visit.h" 19 #include "qapi/visitor.h" 20 #include "qemu/config-file.h" 21 #include "qom/object_interfaces.h" 22 #include "qemu/mmap-alloc.h" 23 24 #ifdef CONFIG_NUMA 25 #include <numaif.h> 26 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT); 27 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED); 28 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND); 29 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE); 30 #endif 31 32 char * 33 host_memory_backend_get_name(HostMemoryBackend *backend) 34 { 35 if (!backend->use_canonical_path) { 36 return g_strdup(object_get_canonical_path_component(OBJECT(backend))); 37 } 38 39 return object_get_canonical_path(OBJECT(backend)); 40 } 41 42 static void 43 host_memory_backend_get_size(Object *obj, Visitor *v, const char *name, 44 void *opaque, Error **errp) 45 { 46 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 47 uint64_t value = backend->size; 48 49 visit_type_size(v, name, &value, errp); 50 } 51 52 static void 53 host_memory_backend_set_size(Object *obj, Visitor *v, const char *name, 54 void *opaque, Error **errp) 55 { 56 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 57 uint64_t value; 58 59 if (host_memory_backend_mr_inited(backend)) { 60 error_setg(errp, "cannot change property %s of %s ", name, 61 object_get_typename(obj)); 62 return; 63 } 64 65 if (!visit_type_size(v, name, &value, errp)) { 66 return; 67 } 68 if (!value) { 69 error_setg(errp, 70 "property '%s' of %s doesn't take value '%" PRIu64 "'", 71 name, object_get_typename(obj), value); 72 return; 73 } 74 backend->size = value; 75 } 76 77 static void 78 host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name, 79 void *opaque, Error **errp) 80 { 81 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 82 uint16List *host_nodes = NULL; 83 uint16List **node = &host_nodes; 84 unsigned long value; 85 86 value = find_first_bit(backend->host_nodes, MAX_NODES); 87 if (value == MAX_NODES) { 88 goto ret; 89 } 90 91 *node = g_malloc0(sizeof(**node)); 92 (*node)->value = value; 93 node = &(*node)->next; 94 95 do { 96 value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1); 97 if (value == MAX_NODES) { 98 break; 99 } 100 101 *node = g_malloc0(sizeof(**node)); 102 (*node)->value = value; 103 node = &(*node)->next; 104 } while (true); 105 106 ret: 107 visit_type_uint16List(v, name, &host_nodes, errp); 108 } 109 110 static void 111 host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name, 112 void *opaque, Error **errp) 113 { 114 #ifdef CONFIG_NUMA 115 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 116 uint16List *l, *host_nodes = NULL; 117 118 visit_type_uint16List(v, name, &host_nodes, errp); 119 120 for (l = host_nodes; l; l = l->next) { 121 if (l->value >= MAX_NODES) { 122 error_setg(errp, "Invalid host-nodes value: %d", l->value); 123 goto out; 124 } 125 } 126 127 for (l = host_nodes; l; l = l->next) { 128 bitmap_set(backend->host_nodes, l->value, 1); 129 } 130 131 out: 132 qapi_free_uint16List(host_nodes); 133 #else 134 error_setg(errp, "NUMA node binding are not supported by this QEMU"); 135 #endif 136 } 137 138 static int 139 host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED) 140 { 141 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 142 return backend->policy; 143 } 144 145 static void 146 host_memory_backend_set_policy(Object *obj, int policy, Error **errp) 147 { 148 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 149 backend->policy = policy; 150 151 #ifndef CONFIG_NUMA 152 if (policy != HOST_MEM_POLICY_DEFAULT) { 153 error_setg(errp, "NUMA policies are not supported by this QEMU"); 154 } 155 #endif 156 } 157 158 static bool host_memory_backend_get_merge(Object *obj, Error **errp) 159 { 160 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 161 162 return backend->merge; 163 } 164 165 static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp) 166 { 167 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 168 169 if (!host_memory_backend_mr_inited(backend)) { 170 backend->merge = value; 171 return; 172 } 173 174 if (value != backend->merge) { 175 void *ptr = memory_region_get_ram_ptr(&backend->mr); 176 uint64_t sz = memory_region_size(&backend->mr); 177 178 qemu_madvise(ptr, sz, 179 value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE); 180 backend->merge = value; 181 } 182 } 183 184 static bool host_memory_backend_get_dump(Object *obj, Error **errp) 185 { 186 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 187 188 return backend->dump; 189 } 190 191 static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp) 192 { 193 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 194 195 if (!host_memory_backend_mr_inited(backend)) { 196 backend->dump = value; 197 return; 198 } 199 200 if (value != backend->dump) { 201 void *ptr = memory_region_get_ram_ptr(&backend->mr); 202 uint64_t sz = memory_region_size(&backend->mr); 203 204 qemu_madvise(ptr, sz, 205 value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP); 206 backend->dump = value; 207 } 208 } 209 210 static bool host_memory_backend_get_prealloc(Object *obj, Error **errp) 211 { 212 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 213 214 return backend->prealloc; 215 } 216 217 static void host_memory_backend_set_prealloc(Object *obj, bool value, 218 Error **errp) 219 { 220 Error *local_err = NULL; 221 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 222 223 if (!host_memory_backend_mr_inited(backend)) { 224 backend->prealloc = value; 225 return; 226 } 227 228 if (value && !backend->prealloc) { 229 int fd = memory_region_get_fd(&backend->mr); 230 void *ptr = memory_region_get_ram_ptr(&backend->mr); 231 uint64_t sz = memory_region_size(&backend->mr); 232 233 os_mem_prealloc(fd, ptr, sz, backend->prealloc_threads, &local_err); 234 if (local_err) { 235 error_propagate(errp, local_err); 236 return; 237 } 238 backend->prealloc = true; 239 } 240 } 241 242 static void host_memory_backend_get_prealloc_threads(Object *obj, Visitor *v, 243 const char *name, void *opaque, Error **errp) 244 { 245 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 246 visit_type_uint32(v, name, &backend->prealloc_threads, errp); 247 } 248 249 static void host_memory_backend_set_prealloc_threads(Object *obj, Visitor *v, 250 const char *name, void *opaque, Error **errp) 251 { 252 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 253 uint32_t value; 254 255 if (!visit_type_uint32(v, name, &value, errp)) { 256 return; 257 } 258 if (value <= 0) { 259 error_setg(errp, "property '%s' of %s doesn't take value '%d'", name, 260 object_get_typename(obj), value); 261 return; 262 } 263 backend->prealloc_threads = value; 264 } 265 266 static void host_memory_backend_init(Object *obj) 267 { 268 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 269 MachineState *machine = MACHINE(qdev_get_machine()); 270 271 /* TODO: convert access to globals to compat properties */ 272 backend->merge = machine_mem_merge(machine); 273 backend->dump = machine_dump_guest_core(machine); 274 backend->prealloc_threads = 1; 275 } 276 277 static void host_memory_backend_post_init(Object *obj) 278 { 279 object_apply_compat_props(obj); 280 } 281 282 bool host_memory_backend_mr_inited(HostMemoryBackend *backend) 283 { 284 /* 285 * NOTE: We forbid zero-length memory backend, so here zero means 286 * "we haven't inited the backend memory region yet". 287 */ 288 return memory_region_size(&backend->mr) != 0; 289 } 290 291 MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend) 292 { 293 return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL; 294 } 295 296 void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped) 297 { 298 backend->is_mapped = mapped; 299 } 300 301 bool host_memory_backend_is_mapped(HostMemoryBackend *backend) 302 { 303 return backend->is_mapped; 304 } 305 306 #ifdef __linux__ 307 size_t host_memory_backend_pagesize(HostMemoryBackend *memdev) 308 { 309 Object *obj = OBJECT(memdev); 310 char *path = object_property_get_str(obj, "mem-path", NULL); 311 size_t pagesize = qemu_mempath_getpagesize(path); 312 313 g_free(path); 314 return pagesize; 315 } 316 #else 317 size_t host_memory_backend_pagesize(HostMemoryBackend *memdev) 318 { 319 return qemu_real_host_page_size; 320 } 321 #endif 322 323 static void 324 host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) 325 { 326 HostMemoryBackend *backend = MEMORY_BACKEND(uc); 327 HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc); 328 Error *local_err = NULL; 329 void *ptr; 330 uint64_t sz; 331 332 if (bc->alloc) { 333 bc->alloc(backend, &local_err); 334 if (local_err) { 335 goto out; 336 } 337 338 ptr = memory_region_get_ram_ptr(&backend->mr); 339 sz = memory_region_size(&backend->mr); 340 341 if (backend->merge) { 342 qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE); 343 } 344 if (!backend->dump) { 345 qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP); 346 } 347 #ifdef CONFIG_NUMA 348 unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES); 349 /* lastbit == MAX_NODES means maxnode = 0 */ 350 unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1); 351 /* ensure policy won't be ignored in case memory is preallocated 352 * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so 353 * this doesn't catch hugepage case. */ 354 unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE; 355 356 /* check for invalid host-nodes and policies and give more verbose 357 * error messages than mbind(). */ 358 if (maxnode && backend->policy == MPOL_DEFAULT) { 359 error_setg(errp, "host-nodes must be empty for policy default," 360 " or you should explicitly specify a policy other" 361 " than default"); 362 return; 363 } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) { 364 error_setg(errp, "host-nodes must be set for policy %s", 365 HostMemPolicy_str(backend->policy)); 366 return; 367 } 368 369 /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1 370 * as argument to mbind() due to an old Linux bug (feature?) which 371 * cuts off the last specified node. This means backend->host_nodes 372 * must have MAX_NODES+1 bits available. 373 */ 374 assert(sizeof(backend->host_nodes) >= 375 BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long)); 376 assert(maxnode <= MAX_NODES); 377 378 if (maxnode && 379 mbind(ptr, sz, backend->policy, backend->host_nodes, maxnode + 1, 380 flags)) { 381 if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) { 382 error_setg_errno(errp, errno, 383 "cannot bind memory to host NUMA nodes"); 384 return; 385 } 386 } 387 #endif 388 /* Preallocate memory after the NUMA policy has been instantiated. 389 * This is necessary to guarantee memory is allocated with 390 * specified NUMA policy in place. 391 */ 392 if (backend->prealloc) { 393 os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz, 394 backend->prealloc_threads, &local_err); 395 if (local_err) { 396 goto out; 397 } 398 } 399 } 400 out: 401 error_propagate(errp, local_err); 402 } 403 404 static bool 405 host_memory_backend_can_be_deleted(UserCreatable *uc) 406 { 407 if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) { 408 return false; 409 } else { 410 return true; 411 } 412 } 413 414 static bool host_memory_backend_get_share(Object *o, Error **errp) 415 { 416 HostMemoryBackend *backend = MEMORY_BACKEND(o); 417 418 return backend->share; 419 } 420 421 static void host_memory_backend_set_share(Object *o, bool value, Error **errp) 422 { 423 HostMemoryBackend *backend = MEMORY_BACKEND(o); 424 425 if (host_memory_backend_mr_inited(backend)) { 426 error_setg(errp, "cannot change property value"); 427 return; 428 } 429 backend->share = value; 430 } 431 432 static bool 433 host_memory_backend_get_use_canonical_path(Object *obj, Error **errp) 434 { 435 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 436 437 return backend->use_canonical_path; 438 } 439 440 static void 441 host_memory_backend_set_use_canonical_path(Object *obj, bool value, 442 Error **errp) 443 { 444 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 445 446 backend->use_canonical_path = value; 447 } 448 449 static void 450 host_memory_backend_class_init(ObjectClass *oc, void *data) 451 { 452 UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); 453 454 ucc->complete = host_memory_backend_memory_complete; 455 ucc->can_be_deleted = host_memory_backend_can_be_deleted; 456 457 object_class_property_add_bool(oc, "merge", 458 host_memory_backend_get_merge, 459 host_memory_backend_set_merge); 460 object_class_property_set_description(oc, "merge", 461 "Mark memory as mergeable"); 462 object_class_property_add_bool(oc, "dump", 463 host_memory_backend_get_dump, 464 host_memory_backend_set_dump); 465 object_class_property_set_description(oc, "dump", 466 "Set to 'off' to exclude from core dump"); 467 object_class_property_add_bool(oc, "prealloc", 468 host_memory_backend_get_prealloc, 469 host_memory_backend_set_prealloc); 470 object_class_property_set_description(oc, "prealloc", 471 "Preallocate memory"); 472 object_class_property_add(oc, "prealloc-threads", "int", 473 host_memory_backend_get_prealloc_threads, 474 host_memory_backend_set_prealloc_threads, 475 NULL, NULL); 476 object_class_property_set_description(oc, "prealloc-threads", 477 "Number of CPU threads to use for prealloc"); 478 object_class_property_add(oc, "size", "int", 479 host_memory_backend_get_size, 480 host_memory_backend_set_size, 481 NULL, NULL); 482 object_class_property_set_description(oc, "size", 483 "Size of the memory region (ex: 500M)"); 484 object_class_property_add(oc, "host-nodes", "int", 485 host_memory_backend_get_host_nodes, 486 host_memory_backend_set_host_nodes, 487 NULL, NULL); 488 object_class_property_set_description(oc, "host-nodes", 489 "Binds memory to the list of NUMA host nodes"); 490 object_class_property_add_enum(oc, "policy", "HostMemPolicy", 491 &HostMemPolicy_lookup, 492 host_memory_backend_get_policy, 493 host_memory_backend_set_policy); 494 object_class_property_set_description(oc, "policy", 495 "Set the NUMA policy"); 496 object_class_property_add_bool(oc, "share", 497 host_memory_backend_get_share, host_memory_backend_set_share); 498 object_class_property_set_description(oc, "share", 499 "Mark the memory as private to QEMU or shared"); 500 object_class_property_add_bool(oc, "x-use-canonical-path-for-ramblock-id", 501 host_memory_backend_get_use_canonical_path, 502 host_memory_backend_set_use_canonical_path); 503 } 504 505 static const TypeInfo host_memory_backend_info = { 506 .name = TYPE_MEMORY_BACKEND, 507 .parent = TYPE_OBJECT, 508 .abstract = true, 509 .class_size = sizeof(HostMemoryBackendClass), 510 .class_init = host_memory_backend_class_init, 511 .instance_size = sizeof(HostMemoryBackend), 512 .instance_init = host_memory_backend_init, 513 .instance_post_init = host_memory_backend_post_init, 514 .interfaces = (InterfaceInfo[]) { 515 { TYPE_USER_CREATABLE }, 516 { } 517 } 518 }; 519 520 static void register_types(void) 521 { 522 type_register_static(&host_memory_backend_info); 523 } 524 525 type_init(register_types); 526