1 /* 2 * QEMU Host Memory Backend 3 * 4 * Copyright (C) 2013-2014 Red Hat Inc 5 * 6 * Authors: 7 * Igor Mammedov <imammedo@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "sysemu/hostmem.h" 15 #include "hw/boards.h" 16 #include "qapi/error.h" 17 #include "qapi/qapi-builtin-visit.h" 18 #include "qapi/visitor.h" 19 #include "qemu/config-file.h" 20 #include "qom/object_interfaces.h" 21 #include "qemu/mmap-alloc.h" 22 23 #ifdef CONFIG_NUMA 24 #include <numaif.h> 25 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT); 26 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED); 27 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND); 28 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE); 29 #endif 30 31 static void 32 host_memory_backend_get_size(Object *obj, Visitor *v, const char *name, 33 void *opaque, Error **errp) 34 { 35 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 36 uint64_t value = backend->size; 37 38 visit_type_size(v, name, &value, errp); 39 } 40 41 static void 42 host_memory_backend_set_size(Object *obj, Visitor *v, const char *name, 43 void *opaque, Error **errp) 44 { 45 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 46 Error *local_err = NULL; 47 uint64_t value; 48 49 if (host_memory_backend_mr_inited(backend)) { 50 error_setg(&local_err, "cannot change property value"); 51 goto out; 52 } 53 54 visit_type_size(v, name, &value, &local_err); 55 if (local_err) { 56 goto out; 57 } 58 if (!value) { 59 error_setg(&local_err, "Property '%s.%s' doesn't take value '%" 60 PRIu64 "'", object_get_typename(obj), name, value); 61 goto out; 62 } 63 backend->size = value; 64 out: 65 error_propagate(errp, local_err); 66 } 67 68 static void 69 host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name, 70 void *opaque, Error **errp) 71 { 72 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 73 uint16List *host_nodes = NULL; 74 uint16List **node = &host_nodes; 75 unsigned long value; 76 77 value = find_first_bit(backend->host_nodes, MAX_NODES); 78 if (value == MAX_NODES) { 79 return; 80 } 81 82 *node = g_malloc0(sizeof(**node)); 83 (*node)->value = value; 84 node = &(*node)->next; 85 86 do { 87 value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1); 88 if (value == MAX_NODES) { 89 break; 90 } 91 92 *node = g_malloc0(sizeof(**node)); 93 (*node)->value = value; 94 node = &(*node)->next; 95 } while (true); 96 97 visit_type_uint16List(v, name, &host_nodes, errp); 98 } 99 100 static void 101 host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name, 102 void *opaque, Error **errp) 103 { 104 #ifdef CONFIG_NUMA 105 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 106 uint16List *l, *host_nodes = NULL; 107 108 visit_type_uint16List(v, name, &host_nodes, errp); 109 110 for (l = host_nodes; l; l = l->next) { 111 if (l->value >= MAX_NODES) { 112 error_setg(errp, "Invalid host-nodes value: %d", l->value); 113 goto out; 114 } 115 } 116 117 for (l = host_nodes; l; l = l->next) { 118 bitmap_set(backend->host_nodes, l->value, 1); 119 } 120 121 out: 122 qapi_free_uint16List(host_nodes); 123 #else 124 error_setg(errp, "NUMA node binding are not supported by this QEMU"); 125 #endif 126 } 127 128 static int 129 host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED) 130 { 131 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 132 return backend->policy; 133 } 134 135 static void 136 host_memory_backend_set_policy(Object *obj, int policy, Error **errp) 137 { 138 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 139 backend->policy = policy; 140 141 #ifndef CONFIG_NUMA 142 if (policy != HOST_MEM_POLICY_DEFAULT) { 143 error_setg(errp, "NUMA policies are not supported by this QEMU"); 144 } 145 #endif 146 } 147 148 static bool host_memory_backend_get_merge(Object *obj, Error **errp) 149 { 150 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 151 152 return backend->merge; 153 } 154 155 static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp) 156 { 157 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 158 159 if (!host_memory_backend_mr_inited(backend)) { 160 backend->merge = value; 161 return; 162 } 163 164 if (value != backend->merge) { 165 void *ptr = memory_region_get_ram_ptr(&backend->mr); 166 uint64_t sz = memory_region_size(&backend->mr); 167 168 qemu_madvise(ptr, sz, 169 value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE); 170 backend->merge = value; 171 } 172 } 173 174 static bool host_memory_backend_get_dump(Object *obj, Error **errp) 175 { 176 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 177 178 return backend->dump; 179 } 180 181 static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp) 182 { 183 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 184 185 if (!host_memory_backend_mr_inited(backend)) { 186 backend->dump = value; 187 return; 188 } 189 190 if (value != backend->dump) { 191 void *ptr = memory_region_get_ram_ptr(&backend->mr); 192 uint64_t sz = memory_region_size(&backend->mr); 193 194 qemu_madvise(ptr, sz, 195 value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP); 196 backend->dump = value; 197 } 198 } 199 200 static bool host_memory_backend_get_prealloc(Object *obj, Error **errp) 201 { 202 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 203 204 return backend->prealloc || backend->force_prealloc; 205 } 206 207 static void host_memory_backend_set_prealloc(Object *obj, bool value, 208 Error **errp) 209 { 210 Error *local_err = NULL; 211 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 212 213 if (backend->force_prealloc) { 214 if (value) { 215 error_setg(errp, 216 "remove -mem-prealloc to use the prealloc property"); 217 return; 218 } 219 } 220 221 if (!host_memory_backend_mr_inited(backend)) { 222 backend->prealloc = value; 223 return; 224 } 225 226 if (value && !backend->prealloc) { 227 int fd = memory_region_get_fd(&backend->mr); 228 void *ptr = memory_region_get_ram_ptr(&backend->mr); 229 uint64_t sz = memory_region_size(&backend->mr); 230 231 os_mem_prealloc(fd, ptr, sz, smp_cpus, &local_err); 232 if (local_err) { 233 error_propagate(errp, local_err); 234 return; 235 } 236 backend->prealloc = true; 237 } 238 } 239 240 static void host_memory_backend_init(Object *obj) 241 { 242 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 243 MachineState *machine = MACHINE(qdev_get_machine()); 244 245 backend->merge = machine_mem_merge(machine); 246 backend->dump = machine_dump_guest_core(machine); 247 backend->prealloc = mem_prealloc; 248 } 249 250 bool host_memory_backend_mr_inited(HostMemoryBackend *backend) 251 { 252 /* 253 * NOTE: We forbid zero-length memory backend, so here zero means 254 * "we haven't inited the backend memory region yet". 255 */ 256 return memory_region_size(&backend->mr) != 0; 257 } 258 259 MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend) 260 { 261 return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL; 262 } 263 264 void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped) 265 { 266 backend->is_mapped = mapped; 267 } 268 269 bool host_memory_backend_is_mapped(HostMemoryBackend *backend) 270 { 271 return backend->is_mapped; 272 } 273 274 #ifdef __linux__ 275 size_t host_memory_backend_pagesize(HostMemoryBackend *memdev) 276 { 277 Object *obj = OBJECT(memdev); 278 char *path = object_property_get_str(obj, "mem-path", NULL); 279 size_t pagesize = qemu_mempath_getpagesize(path); 280 281 g_free(path); 282 return pagesize; 283 } 284 #else 285 size_t host_memory_backend_pagesize(HostMemoryBackend *memdev) 286 { 287 return getpagesize(); 288 } 289 #endif 290 291 static void 292 host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) 293 { 294 HostMemoryBackend *backend = MEMORY_BACKEND(uc); 295 HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc); 296 Error *local_err = NULL; 297 void *ptr; 298 uint64_t sz; 299 300 if (bc->alloc) { 301 bc->alloc(backend, &local_err); 302 if (local_err) { 303 goto out; 304 } 305 306 ptr = memory_region_get_ram_ptr(&backend->mr); 307 sz = memory_region_size(&backend->mr); 308 309 if (backend->merge) { 310 qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE); 311 } 312 if (!backend->dump) { 313 qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP); 314 } 315 #ifdef CONFIG_NUMA 316 unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES); 317 /* lastbit == MAX_NODES means maxnode = 0 */ 318 unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1); 319 /* ensure policy won't be ignored in case memory is preallocated 320 * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so 321 * this doesn't catch hugepage case. */ 322 unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE; 323 324 /* check for invalid host-nodes and policies and give more verbose 325 * error messages than mbind(). */ 326 if (maxnode && backend->policy == MPOL_DEFAULT) { 327 error_setg(errp, "host-nodes must be empty for policy default," 328 " or you should explicitly specify a policy other" 329 " than default"); 330 return; 331 } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) { 332 error_setg(errp, "host-nodes must be set for policy %s", 333 HostMemPolicy_str(backend->policy)); 334 return; 335 } 336 337 /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1 338 * as argument to mbind() due to an old Linux bug (feature?) which 339 * cuts off the last specified node. This means backend->host_nodes 340 * must have MAX_NODES+1 bits available. 341 */ 342 assert(sizeof(backend->host_nodes) >= 343 BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long)); 344 assert(maxnode <= MAX_NODES); 345 if (mbind(ptr, sz, backend->policy, 346 maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) { 347 if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) { 348 error_setg_errno(errp, errno, 349 "cannot bind memory to host NUMA nodes"); 350 return; 351 } 352 } 353 #endif 354 /* Preallocate memory after the NUMA policy has been instantiated. 355 * This is necessary to guarantee memory is allocated with 356 * specified NUMA policy in place. 357 */ 358 if (backend->prealloc) { 359 os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz, 360 smp_cpus, &local_err); 361 if (local_err) { 362 goto out; 363 } 364 } 365 } 366 out: 367 error_propagate(errp, local_err); 368 } 369 370 static bool 371 host_memory_backend_can_be_deleted(UserCreatable *uc) 372 { 373 if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) { 374 return false; 375 } else { 376 return true; 377 } 378 } 379 380 static bool host_memory_backend_get_share(Object *o, Error **errp) 381 { 382 HostMemoryBackend *backend = MEMORY_BACKEND(o); 383 384 return backend->share; 385 } 386 387 static void host_memory_backend_set_share(Object *o, bool value, Error **errp) 388 { 389 HostMemoryBackend *backend = MEMORY_BACKEND(o); 390 391 if (host_memory_backend_mr_inited(backend)) { 392 error_setg(errp, "cannot change property value"); 393 return; 394 } 395 backend->share = value; 396 } 397 398 static void 399 host_memory_backend_class_init(ObjectClass *oc, void *data) 400 { 401 UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); 402 403 ucc->complete = host_memory_backend_memory_complete; 404 ucc->can_be_deleted = host_memory_backend_can_be_deleted; 405 406 object_class_property_add_bool(oc, "merge", 407 host_memory_backend_get_merge, 408 host_memory_backend_set_merge, &error_abort); 409 object_class_property_set_description(oc, "merge", 410 "Mark memory as mergeable", &error_abort); 411 object_class_property_add_bool(oc, "dump", 412 host_memory_backend_get_dump, 413 host_memory_backend_set_dump, &error_abort); 414 object_class_property_set_description(oc, "dump", 415 "Set to 'off' to exclude from core dump", &error_abort); 416 object_class_property_add_bool(oc, "prealloc", 417 host_memory_backend_get_prealloc, 418 host_memory_backend_set_prealloc, &error_abort); 419 object_class_property_set_description(oc, "prealloc", 420 "Preallocate memory", &error_abort); 421 object_class_property_add(oc, "size", "int", 422 host_memory_backend_get_size, 423 host_memory_backend_set_size, 424 NULL, NULL, &error_abort); 425 object_class_property_set_description(oc, "size", 426 "Size of the memory region (ex: 500M)", &error_abort); 427 object_class_property_add(oc, "host-nodes", "int", 428 host_memory_backend_get_host_nodes, 429 host_memory_backend_set_host_nodes, 430 NULL, NULL, &error_abort); 431 object_class_property_set_description(oc, "host-nodes", 432 "Binds memory to the list of NUMA host nodes", &error_abort); 433 object_class_property_add_enum(oc, "policy", "HostMemPolicy", 434 &HostMemPolicy_lookup, 435 host_memory_backend_get_policy, 436 host_memory_backend_set_policy, &error_abort); 437 object_class_property_set_description(oc, "policy", 438 "Set the NUMA policy", &error_abort); 439 object_class_property_add_bool(oc, "share", 440 host_memory_backend_get_share, host_memory_backend_set_share, 441 &error_abort); 442 object_class_property_set_description(oc, "share", 443 "Mark the memory as private to QEMU or shared", &error_abort); 444 } 445 446 static const TypeInfo host_memory_backend_info = { 447 .name = TYPE_MEMORY_BACKEND, 448 .parent = TYPE_OBJECT, 449 .abstract = true, 450 .class_size = sizeof(HostMemoryBackendClass), 451 .class_init = host_memory_backend_class_init, 452 .instance_size = sizeof(HostMemoryBackend), 453 .instance_init = host_memory_backend_init, 454 .interfaces = (InterfaceInfo[]) { 455 { TYPE_USER_CREATABLE }, 456 { } 457 } 458 }; 459 460 static void register_types(void) 461 { 462 type_register_static(&host_memory_backend_info); 463 } 464 465 type_init(register_types); 466