1 /* 2 * QEMU Host Memory Backend 3 * 4 * Copyright (C) 2013-2014 Red Hat Inc 5 * 6 * Authors: 7 * Igor Mammedov <imammedo@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 #include "qemu/osdep.h" 13 #include "sysemu/hostmem.h" 14 #include "hw/boards.h" 15 #include "qapi/error.h" 16 #include "qapi/visitor.h" 17 #include "qapi-visit.h" 18 #include "qemu/config-file.h" 19 #include "qom/object_interfaces.h" 20 21 #ifdef CONFIG_NUMA 22 #include <numaif.h> 23 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT); 24 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED); 25 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND); 26 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE); 27 #endif 28 29 static void 30 host_memory_backend_get_size(Object *obj, Visitor *v, const char *name, 31 void *opaque, Error **errp) 32 { 33 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 34 uint64_t value = backend->size; 35 36 visit_type_size(v, name, &value, errp); 37 } 38 39 static void 40 host_memory_backend_set_size(Object *obj, Visitor *v, const char *name, 41 void *opaque, Error **errp) 42 { 43 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 44 Error *local_err = NULL; 45 uint64_t value; 46 47 if (host_memory_backend_mr_inited(backend)) { 48 error_setg(&local_err, "cannot change property value"); 49 goto out; 50 } 51 52 visit_type_size(v, name, &value, &local_err); 53 if (local_err) { 54 goto out; 55 } 56 if (!value) { 57 error_setg(&local_err, "Property '%s.%s' doesn't take value '%" 58 PRIu64 "'", object_get_typename(obj), name, value); 59 goto out; 60 } 61 backend->size = value; 62 out: 63 error_propagate(errp, local_err); 64 } 65 66 static void 67 host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name, 68 void *opaque, Error **errp) 69 { 70 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 71 uint16List *host_nodes = NULL; 72 uint16List **node = &host_nodes; 73 unsigned long value; 74 75 value = find_first_bit(backend->host_nodes, MAX_NODES); 76 if (value == MAX_NODES) { 77 return; 78 } 79 80 *node = g_malloc0(sizeof(**node)); 81 (*node)->value = value; 82 node = &(*node)->next; 83 84 do { 85 value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1); 86 if (value == MAX_NODES) { 87 break; 88 } 89 90 *node = g_malloc0(sizeof(**node)); 91 (*node)->value = value; 92 node = &(*node)->next; 93 } while (true); 94 95 visit_type_uint16List(v, name, &host_nodes, errp); 96 } 97 98 static void 99 host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name, 100 void *opaque, Error **errp) 101 { 102 #ifdef CONFIG_NUMA 103 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 104 uint16List *l = NULL; 105 106 visit_type_uint16List(v, name, &l, errp); 107 108 while (l) { 109 bitmap_set(backend->host_nodes, l->value, 1); 110 l = l->next; 111 } 112 #else 113 error_setg(errp, "NUMA node binding are not supported by this QEMU"); 114 #endif 115 } 116 117 static int 118 host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED) 119 { 120 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 121 return backend->policy; 122 } 123 124 static void 125 host_memory_backend_set_policy(Object *obj, int policy, Error **errp) 126 { 127 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 128 backend->policy = policy; 129 130 #ifndef CONFIG_NUMA 131 if (policy != HOST_MEM_POLICY_DEFAULT) { 132 error_setg(errp, "NUMA policies are not supported by this QEMU"); 133 } 134 #endif 135 } 136 137 static bool host_memory_backend_get_merge(Object *obj, Error **errp) 138 { 139 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 140 141 return backend->merge; 142 } 143 144 static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp) 145 { 146 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 147 148 if (!host_memory_backend_mr_inited(backend)) { 149 backend->merge = value; 150 return; 151 } 152 153 if (value != backend->merge) { 154 void *ptr = memory_region_get_ram_ptr(&backend->mr); 155 uint64_t sz = memory_region_size(&backend->mr); 156 157 qemu_madvise(ptr, sz, 158 value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE); 159 backend->merge = value; 160 } 161 } 162 163 static bool host_memory_backend_get_dump(Object *obj, Error **errp) 164 { 165 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 166 167 return backend->dump; 168 } 169 170 static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp) 171 { 172 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 173 174 if (!host_memory_backend_mr_inited(backend)) { 175 backend->dump = value; 176 return; 177 } 178 179 if (value != backend->dump) { 180 void *ptr = memory_region_get_ram_ptr(&backend->mr); 181 uint64_t sz = memory_region_size(&backend->mr); 182 183 qemu_madvise(ptr, sz, 184 value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP); 185 backend->dump = value; 186 } 187 } 188 189 static bool host_memory_backend_get_prealloc(Object *obj, Error **errp) 190 { 191 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 192 193 return backend->prealloc || backend->force_prealloc; 194 } 195 196 static void host_memory_backend_set_prealloc(Object *obj, bool value, 197 Error **errp) 198 { 199 Error *local_err = NULL; 200 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 201 202 if (backend->force_prealloc) { 203 if (value) { 204 error_setg(errp, 205 "remove -mem-prealloc to use the prealloc property"); 206 return; 207 } 208 } 209 210 if (!host_memory_backend_mr_inited(backend)) { 211 backend->prealloc = value; 212 return; 213 } 214 215 if (value && !backend->prealloc) { 216 int fd = memory_region_get_fd(&backend->mr); 217 void *ptr = memory_region_get_ram_ptr(&backend->mr); 218 uint64_t sz = memory_region_size(&backend->mr); 219 220 os_mem_prealloc(fd, ptr, sz, smp_cpus, &local_err); 221 if (local_err) { 222 error_propagate(errp, local_err); 223 return; 224 } 225 backend->prealloc = true; 226 } 227 } 228 229 static void host_memory_backend_init(Object *obj) 230 { 231 HostMemoryBackend *backend = MEMORY_BACKEND(obj); 232 MachineState *machine = MACHINE(qdev_get_machine()); 233 234 backend->merge = machine_mem_merge(machine); 235 backend->dump = machine_dump_guest_core(machine); 236 backend->prealloc = mem_prealloc; 237 } 238 239 bool host_memory_backend_mr_inited(HostMemoryBackend *backend) 240 { 241 /* 242 * NOTE: We forbid zero-length memory backend, so here zero means 243 * "we haven't inited the backend memory region yet". 244 */ 245 return memory_region_size(&backend->mr) != 0; 246 } 247 248 MemoryRegion * 249 host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp) 250 { 251 return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL; 252 } 253 254 void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped) 255 { 256 backend->is_mapped = mapped; 257 } 258 259 bool host_memory_backend_is_mapped(HostMemoryBackend *backend) 260 { 261 return backend->is_mapped; 262 } 263 264 static void 265 host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) 266 { 267 HostMemoryBackend *backend = MEMORY_BACKEND(uc); 268 HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc); 269 Error *local_err = NULL; 270 void *ptr; 271 uint64_t sz; 272 273 if (bc->alloc) { 274 bc->alloc(backend, &local_err); 275 if (local_err) { 276 goto out; 277 } 278 279 ptr = memory_region_get_ram_ptr(&backend->mr); 280 sz = memory_region_size(&backend->mr); 281 282 if (backend->merge) { 283 qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE); 284 } 285 if (!backend->dump) { 286 qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP); 287 } 288 #ifdef CONFIG_NUMA 289 unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES); 290 /* lastbit == MAX_NODES means maxnode = 0 */ 291 unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1); 292 /* ensure policy won't be ignored in case memory is preallocated 293 * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so 294 * this doesn't catch hugepage case. */ 295 unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE; 296 297 /* check for invalid host-nodes and policies and give more verbose 298 * error messages than mbind(). */ 299 if (maxnode && backend->policy == MPOL_DEFAULT) { 300 error_setg(errp, "host-nodes must be empty for policy default," 301 " or you should explicitly specify a policy other" 302 " than default"); 303 return; 304 } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) { 305 error_setg(errp, "host-nodes must be set for policy %s", 306 HostMemPolicy_str(backend->policy)); 307 return; 308 } 309 310 /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1 311 * as argument to mbind() due to an old Linux bug (feature?) which 312 * cuts off the last specified node. This means backend->host_nodes 313 * must have MAX_NODES+1 bits available. 314 */ 315 assert(sizeof(backend->host_nodes) >= 316 BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long)); 317 assert(maxnode <= MAX_NODES); 318 if (mbind(ptr, sz, backend->policy, 319 maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) { 320 if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) { 321 error_setg_errno(errp, errno, 322 "cannot bind memory to host NUMA nodes"); 323 return; 324 } 325 } 326 #endif 327 /* Preallocate memory after the NUMA policy has been instantiated. 328 * This is necessary to guarantee memory is allocated with 329 * specified NUMA policy in place. 330 */ 331 if (backend->prealloc) { 332 os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz, 333 smp_cpus, &local_err); 334 if (local_err) { 335 goto out; 336 } 337 } 338 } 339 out: 340 error_propagate(errp, local_err); 341 } 342 343 static bool 344 host_memory_backend_can_be_deleted(UserCreatable *uc) 345 { 346 if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) { 347 return false; 348 } else { 349 return true; 350 } 351 } 352 353 static char *get_id(Object *o, Error **errp) 354 { 355 HostMemoryBackend *backend = MEMORY_BACKEND(o); 356 357 return g_strdup(backend->id); 358 } 359 360 static void set_id(Object *o, const char *str, Error **errp) 361 { 362 HostMemoryBackend *backend = MEMORY_BACKEND(o); 363 364 if (backend->id) { 365 error_setg(errp, "cannot change property value"); 366 return; 367 } 368 backend->id = g_strdup(str); 369 } 370 371 static void 372 host_memory_backend_class_init(ObjectClass *oc, void *data) 373 { 374 UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); 375 376 ucc->complete = host_memory_backend_memory_complete; 377 ucc->can_be_deleted = host_memory_backend_can_be_deleted; 378 379 object_class_property_add_bool(oc, "merge", 380 host_memory_backend_get_merge, 381 host_memory_backend_set_merge, &error_abort); 382 object_class_property_add_bool(oc, "dump", 383 host_memory_backend_get_dump, 384 host_memory_backend_set_dump, &error_abort); 385 object_class_property_add_bool(oc, "prealloc", 386 host_memory_backend_get_prealloc, 387 host_memory_backend_set_prealloc, &error_abort); 388 object_class_property_add(oc, "size", "int", 389 host_memory_backend_get_size, 390 host_memory_backend_set_size, 391 NULL, NULL, &error_abort); 392 object_class_property_add(oc, "host-nodes", "int", 393 host_memory_backend_get_host_nodes, 394 host_memory_backend_set_host_nodes, 395 NULL, NULL, &error_abort); 396 object_class_property_add_enum(oc, "policy", "HostMemPolicy", 397 &HostMemPolicy_lookup, 398 host_memory_backend_get_policy, 399 host_memory_backend_set_policy, &error_abort); 400 object_class_property_add_str(oc, "id", get_id, set_id, &error_abort); 401 } 402 403 static void host_memory_backend_finalize(Object *o) 404 { 405 HostMemoryBackend *backend = MEMORY_BACKEND(o); 406 g_free(backend->id); 407 } 408 409 static const TypeInfo host_memory_backend_info = { 410 .name = TYPE_MEMORY_BACKEND, 411 .parent = TYPE_OBJECT, 412 .abstract = true, 413 .class_size = sizeof(HostMemoryBackendClass), 414 .class_init = host_memory_backend_class_init, 415 .instance_size = sizeof(HostMemoryBackend), 416 .instance_init = host_memory_backend_init, 417 .instance_finalize = host_memory_backend_finalize, 418 .interfaces = (InterfaceInfo[]) { 419 { TYPE_USER_CREATABLE }, 420 { } 421 } 422 }; 423 424 static void register_types(void) 425 { 426 type_register_static(&host_memory_backend_info); 427 } 428 429 type_init(register_types); 430