1 /* 2 * QEMU Host Memory Backend 3 * 4 * Copyright (C) 2013-2014 Red Hat Inc 5 * 6 * Authors: 7 * Igor Mammedov <imammedo@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "sysemu/hostmem.h" 15 #include "hw/boards.h" 16 #include "qapi/error.h" 17 #include "qapi/qapi-builtin-visit.h" 18 #include "qapi/visitor.h" 19 #include "qemu/config-file.h" 20 #include "qom/object_interfaces.h" 21 #include "qemu/mmap-alloc.h" 22 23 #ifdef CONFIG_NUMA 24 #include <numaif.h> 25 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT); 26 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED); 27 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND); 28 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE); 29 #endif 30 31 char * 32 host_memory_backend_get_name(HostMemoryBackend *backend) 33 { 34 if (!backend->use_canonical_path) { 35 return object_get_canonical_path_component(OBJECT(backend)); 36 } 37 38 return object_get_canonical_path(OBJECT(backend)); 39 } 40 41 static void 42 host_memory_backend_get_size(Object *obj, Visitor *v, const char *name, 43 void *opaque, Error **errp) 44 { 45 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 46 uint64_t value = backend->size; 47 48 visit_type_size(v, name, &value, errp); 49 } 50 51 static void 52 host_memory_backend_set_size(Object *obj, Visitor *v, const char *name, 53 void *opaque, Error **errp) 54 { 55 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 56 Error *local_err = NULL; 57 uint64_t value; 58 59 if (host_memory_backend_mr_inited(backend)) { 60 error_setg(&local_err, "cannot change property %s of %s ", 61 name, object_get_typename(obj)); 62 goto out; 63 } 64 65 visit_type_size(v, name, &value, &local_err); 66 if (local_err) { 67 goto out; 68 } 69 if (!value) { 70 error_setg(&local_err, 71 "property '%s' of %s doesn't take value '%" PRIu64 "'", 72 name, object_get_typename(obj), value); 73 goto out; 74 } 75 backend->size = value; 76 out: 77 error_propagate(errp, local_err); 78 } 79 80 static void 81 host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name, 82 void *opaque, Error **errp) 83 { 84 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 85 uint16List *host_nodes = NULL; 86 uint16List **node = &host_nodes; 87 unsigned long value; 88 89 value = find_first_bit(backend->host_nodes, MAX_NODES); 90 if (value == MAX_NODES) { 91 return; 92 } 93 94 *node = g_malloc0(sizeof(**node)); 95 (*node)->value = value; 96 node = &(*node)->next; 97 98 do { 99 value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1); 100 if (value == MAX_NODES) { 101 break; 102 } 103 104 *node = g_malloc0(sizeof(**node)); 105 (*node)->value = value; 106 node = &(*node)->next; 107 } while (true); 108 109 visit_type_uint16List(v, name, &host_nodes, errp); 110 } 111 112 static void 113 host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name, 114 void *opaque, Error **errp) 115 { 116 #ifdef CONFIG_NUMA 117 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 118 uint16List *l, *host_nodes = NULL; 119 120 visit_type_uint16List(v, name, &host_nodes, errp); 121 122 for (l = host_nodes; l; l = l->next) { 123 if (l->value >= MAX_NODES) { 124 error_setg(errp, "Invalid host-nodes value: %d", l->value); 125 goto out; 126 } 127 } 128 129 for (l = host_nodes; l; l = l->next) { 130 bitmap_set(backend->host_nodes, l->value, 1); 131 } 132 133 out: 134 qapi_free_uint16List(host_nodes); 135 #else 136 error_setg(errp, "NUMA node binding are not supported by this QEMU"); 137 #endif 138 } 139 140 static int 141 host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED) 142 { 143 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 144 return backend->policy; 145 } 146 147 static void 148 host_memory_backend_set_policy(Object *obj, int policy, Error **errp) 149 { 150 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 151 backend->policy = policy; 152 153 #ifndef CONFIG_NUMA 154 if (policy != HOST_MEM_POLICY_DEFAULT) { 155 error_setg(errp, "NUMA policies are not supported by this QEMU"); 156 } 157 #endif 158 } 159 160 static bool host_memory_backend_get_merge(Object *obj, Error **errp) 161 { 162 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 163 164 return backend->merge; 165 } 166 167 static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp) 168 { 169 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 170 171 if (!host_memory_backend_mr_inited(backend)) { 172 backend->merge = value; 173 return; 174 } 175 176 if (value != backend->merge) { 177 void *ptr = memory_region_get_ram_ptr(&backend->mr); 178 uint64_t sz = memory_region_size(&backend->mr); 179 180 qemu_madvise(ptr, sz, 181 value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE); 182 backend->merge = value; 183 } 184 } 185 186 static bool host_memory_backend_get_dump(Object *obj, Error **errp) 187 { 188 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 189 190 return backend->dump; 191 } 192 193 static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp) 194 { 195 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 196 197 if (!host_memory_backend_mr_inited(backend)) { 198 backend->dump = value; 199 return; 200 } 201 202 if (value != backend->dump) { 203 void *ptr = memory_region_get_ram_ptr(&backend->mr); 204 uint64_t sz = memory_region_size(&backend->mr); 205 206 qemu_madvise(ptr, sz, 207 value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP); 208 backend->dump = value; 209 } 210 } 211 212 static bool host_memory_backend_get_prealloc(Object *obj, Error **errp) 213 { 214 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 215 216 return backend->prealloc || backend->force_prealloc; 217 } 218 219 static void host_memory_backend_set_prealloc(Object *obj, bool value, 220 Error **errp) 221 { 222 Error *local_err = NULL; 223 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 224 225 if (backend->force_prealloc) { 226 if (value) { 227 error_setg(errp, 228 "remove -mem-prealloc to use the prealloc property"); 229 return; 230 } 231 } 232 233 if (!host_memory_backend_mr_inited(backend)) { 234 backend->prealloc = value; 235 return; 236 } 237 238 if (value && !backend->prealloc) { 239 int fd = memory_region_get_fd(&backend->mr); 240 void *ptr = memory_region_get_ram_ptr(&backend->mr); 241 uint64_t sz = memory_region_size(&backend->mr); 242 243 os_mem_prealloc(fd, ptr, sz, smp_cpus, &local_err); 244 if (local_err) { 245 error_propagate(errp, local_err); 246 return; 247 } 248 backend->prealloc = true; 249 } 250 } 251 252 static void host_memory_backend_init(Object *obj) 253 { 254 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 255 MachineState *machine = MACHINE(qdev_get_machine()); 256 257 backend->merge = machine_mem_merge(machine); 258 backend->dump = machine_dump_guest_core(machine); 259 backend->prealloc = mem_prealloc; 260 } 261 262 static void host_memory_backend_post_init(Object *obj) 263 { 264 object_apply_compat_props(obj); 265 } 266 267 bool host_memory_backend_mr_inited(HostMemoryBackend *backend) 268 { 269 /* 270 * NOTE: We forbid zero-length memory backend, so here zero means 271 * "we haven't inited the backend memory region yet". 272 */ 273 return memory_region_size(&backend->mr) != 0; 274 } 275 276 MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend) 277 { 278 return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL; 279 } 280 281 void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped) 282 { 283 backend->is_mapped = mapped; 284 } 285 286 bool host_memory_backend_is_mapped(HostMemoryBackend *backend) 287 { 288 return backend->is_mapped; 289 } 290 291 #ifdef __linux__ 292 size_t host_memory_backend_pagesize(HostMemoryBackend *memdev) 293 { 294 Object *obj = OBJECT(memdev); 295 char *path = object_property_get_str(obj, "mem-path", NULL); 296 size_t pagesize = qemu_mempath_getpagesize(path); 297 298 g_free(path); 299 return pagesize; 300 } 301 #else 302 size_t host_memory_backend_pagesize(HostMemoryBackend *memdev) 303 { 304 return getpagesize(); 305 } 306 #endif 307 308 static void 309 host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) 310 { 311 HostMemoryBackend *backend = MEMORY_BACKEND(uc); 312 HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc); 313 Error *local_err = NULL; 314 void *ptr; 315 uint64_t sz; 316 317 if (bc->alloc) { 318 bc->alloc(backend, &local_err); 319 if (local_err) { 320 goto out; 321 } 322 323 ptr = memory_region_get_ram_ptr(&backend->mr); 324 sz = memory_region_size(&backend->mr); 325 326 if (backend->merge) { 327 qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE); 328 } 329 if (!backend->dump) { 330 qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP); 331 } 332 #ifdef CONFIG_NUMA 333 unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES); 334 /* lastbit == MAX_NODES means maxnode = 0 */ 335 unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1); 336 /* ensure policy won't be ignored in case memory is preallocated 337 * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so 338 * this doesn't catch hugepage case. */ 339 unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE; 340 341 /* check for invalid host-nodes and policies and give more verbose 342 * error messages than mbind(). */ 343 if (maxnode && backend->policy == MPOL_DEFAULT) { 344 error_setg(errp, "host-nodes must be empty for policy default," 345 " or you should explicitly specify a policy other" 346 " than default"); 347 return; 348 } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) { 349 error_setg(errp, "host-nodes must be set for policy %s", 350 HostMemPolicy_str(backend->policy)); 351 return; 352 } 353 354 /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1 355 * as argument to mbind() due to an old Linux bug (feature?) which 356 * cuts off the last specified node. This means backend->host_nodes 357 * must have MAX_NODES+1 bits available. 358 */ 359 assert(sizeof(backend->host_nodes) >= 360 BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long)); 361 assert(maxnode <= MAX_NODES); 362 if (mbind(ptr, sz, backend->policy, 363 maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) { 364 if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) { 365 error_setg_errno(errp, errno, 366 "cannot bind memory to host NUMA nodes"); 367 return; 368 } 369 } 370 #endif 371 /* Preallocate memory after the NUMA policy has been instantiated. 372 * This is necessary to guarantee memory is allocated with 373 * specified NUMA policy in place. 374 */ 375 if (backend->prealloc) { 376 os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz, 377 smp_cpus, &local_err); 378 if (local_err) { 379 goto out; 380 } 381 } 382 } 383 out: 384 error_propagate(errp, local_err); 385 } 386 387 static bool 388 host_memory_backend_can_be_deleted(UserCreatable *uc) 389 { 390 if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) { 391 return false; 392 } else { 393 return true; 394 } 395 } 396 397 static bool host_memory_backend_get_share(Object *o, Error **errp) 398 { 399 HostMemoryBackend *backend = MEMORY_BACKEND(o); 400 401 return backend->share; 402 } 403 404 static void host_memory_backend_set_share(Object *o, bool value, Error **errp) 405 { 406 HostMemoryBackend *backend = MEMORY_BACKEND(o); 407 408 if (host_memory_backend_mr_inited(backend)) { 409 error_setg(errp, "cannot change property value"); 410 return; 411 } 412 backend->share = value; 413 } 414 415 static bool 416 host_memory_backend_get_use_canonical_path(Object *obj, Error **errp) 417 { 418 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 419 420 return backend->use_canonical_path; 421 } 422 423 static void 424 host_memory_backend_set_use_canonical_path(Object *obj, bool value, 425 Error **errp) 426 { 427 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 428 429 backend->use_canonical_path = value; 430 } 431 432 static void 433 host_memory_backend_class_init(ObjectClass *oc, void *data) 434 { 435 UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); 436 437 ucc->complete = host_memory_backend_memory_complete; 438 ucc->can_be_deleted = host_memory_backend_can_be_deleted; 439 440 object_class_property_add_bool(oc, "merge", 441 host_memory_backend_get_merge, 442 host_memory_backend_set_merge, &error_abort); 443 object_class_property_set_description(oc, "merge", 444 "Mark memory as mergeable", &error_abort); 445 object_class_property_add_bool(oc, "dump", 446 host_memory_backend_get_dump, 447 host_memory_backend_set_dump, &error_abort); 448 object_class_property_set_description(oc, "dump", 449 "Set to 'off' to exclude from core dump", &error_abort); 450 object_class_property_add_bool(oc, "prealloc", 451 host_memory_backend_get_prealloc, 452 host_memory_backend_set_prealloc, &error_abort); 453 object_class_property_set_description(oc, "prealloc", 454 "Preallocate memory", &error_abort); 455 object_class_property_add(oc, "size", "int", 456 host_memory_backend_get_size, 457 host_memory_backend_set_size, 458 NULL, NULL, &error_abort); 459 object_class_property_set_description(oc, "size", 460 "Size of the memory region (ex: 500M)", &error_abort); 461 object_class_property_add(oc, "host-nodes", "int", 462 host_memory_backend_get_host_nodes, 463 host_memory_backend_set_host_nodes, 464 NULL, NULL, &error_abort); 465 object_class_property_set_description(oc, "host-nodes", 466 "Binds memory to the list of NUMA host nodes", &error_abort); 467 object_class_property_add_enum(oc, "policy", "HostMemPolicy", 468 &HostMemPolicy_lookup, 469 host_memory_backend_get_policy, 470 host_memory_backend_set_policy, &error_abort); 471 object_class_property_set_description(oc, "policy", 472 "Set the NUMA policy", &error_abort); 473 object_class_property_add_bool(oc, "share", 474 host_memory_backend_get_share, host_memory_backend_set_share, 475 &error_abort); 476 object_class_property_set_description(oc, "share", 477 "Mark the memory as private to QEMU or shared", &error_abort); 478 object_class_property_add_bool(oc, "x-use-canonical-path-for-ramblock-id", 479 host_memory_backend_get_use_canonical_path, 480 host_memory_backend_set_use_canonical_path, &error_abort); 481 } 482 483 static const TypeInfo host_memory_backend_info = { 484 .name = TYPE_MEMORY_BACKEND, 485 .parent = TYPE_OBJECT, 486 .abstract = true, 487 .class_size = sizeof(HostMemoryBackendClass), 488 .class_init = host_memory_backend_class_init, 489 .instance_size = sizeof(HostMemoryBackend), 490 .instance_init = host_memory_backend_init, 491 .instance_post_init = host_memory_backend_post_init, 492 .interfaces = (InterfaceInfo[]) { 493 { TYPE_USER_CREATABLE }, 494 { } 495 } 496 }; 497 498 static void register_types(void) 499 { 500 type_register_static(&host_memory_backend_info); 501 } 502 503 type_init(register_types); 504