1 /* 2 * QEMU PAPR Storage Class Memory Interfaces 3 * 4 * Copyright (c) 2019-2020, IBM Corporation. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include "qemu/osdep.h" 25 #include "qemu/cutils.h" 26 #include "qapi/error.h" 27 #include "hw/ppc/spapr_drc.h" 28 #include "hw/ppc/spapr_nvdimm.h" 29 #include "hw/mem/nvdimm.h" 30 #include "qemu/nvdimm-utils.h" 31 #include "hw/ppc/fdt.h" 32 #include "qemu/range.h" 33 #include "hw/ppc/spapr_numa.h" 34 #include "block/thread-pool.h" 35 #include "migration/vmstate.h" 36 #include "qemu/pmem.h" 37 #include "hw/qdev-properties.h" 38 39 /* DIMM health bitmap bitmap indicators. Taken from kernel's papr_scm.c */ 40 /* SCM device is unable to persist memory contents */ 41 #define PAPR_PMEM_UNARMED PPC_BIT(0) 42 43 /* 44 * The nvdimm size should be aligned to SCM block size. 45 * The SCM block size should be aligned to SPAPR_MEMORY_BLOCK_SIZE 46 * in order to have SCM regions not to overlap with dimm memory regions. 47 * The SCM devices can have variable block sizes. For now, fixing the 48 * block size to the minimum value. 49 */ 50 #define SPAPR_MINIMUM_SCM_BLOCK_SIZE SPAPR_MEMORY_BLOCK_SIZE 51 52 /* Have an explicit check for alignment */ 53 QEMU_BUILD_BUG_ON(SPAPR_MINIMUM_SCM_BLOCK_SIZE % SPAPR_MEMORY_BLOCK_SIZE); 54 55 #define TYPE_SPAPR_NVDIMM "spapr-nvdimm" 56 OBJECT_DECLARE_TYPE(SpaprNVDIMMDevice, SPAPRNVDIMMClass, SPAPR_NVDIMM) 57 58 struct SPAPRNVDIMMClass { 59 /* private */ 60 NVDIMMClass parent_class; 61 62 /* public */ 63 void (*realize)(NVDIMMDevice *dimm, Error **errp); 64 void (*unrealize)(NVDIMMDevice *dimm, Error **errp); 65 }; 66 67 bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm, 68 uint64_t size, Error **errp) 69 { 70 const MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev); 71 const MachineState *ms = MACHINE(hotplug_dev); 72 PCDIMMDevice *dimm = PC_DIMM(nvdimm); 73 MemoryRegion *mr = host_memory_backend_get_memory(dimm->hostmem); 74 g_autofree char *uuidstr = NULL; 75 QemuUUID uuid; 76 int ret; 77 78 if (!mc->nvdimm_supported) { 79 error_setg(errp, "NVDIMM hotplug not supported for this machine"); 80 return false; 81 } 82 83 if (!ms->nvdimms_state->is_enabled) { 84 error_setg(errp, "nvdimm device found but 'nvdimm=off' was set"); 85 return false; 86 } 87 88 if (object_property_get_int(OBJECT(nvdimm), NVDIMM_LABEL_SIZE_PROP, 89 &error_abort) == 0) { 90 error_setg(errp, "PAPR requires NVDIMM devices to have label-size set"); 91 return false; 92 } 93 94 if (size % SPAPR_MINIMUM_SCM_BLOCK_SIZE) { 95 error_setg(errp, "PAPR requires NVDIMM memory size (excluding label)" 96 " to be a multiple of %" PRIu64 "MB", 97 SPAPR_MINIMUM_SCM_BLOCK_SIZE / MiB); 98 return false; 99 } 100 101 uuidstr = object_property_get_str(OBJECT(nvdimm), NVDIMM_UUID_PROP, 102 &error_abort); 103 ret = qemu_uuid_parse(uuidstr, &uuid); 104 g_assert(!ret); 105 106 if (qemu_uuid_is_null(&uuid)) { 107 error_setg(errp, "NVDIMM device requires the uuid to be set"); 108 return false; 109 } 110 111 if (object_dynamic_cast(OBJECT(nvdimm), TYPE_SPAPR_NVDIMM) && 112 (memory_region_get_fd(mr) < 0)) { 113 error_setg(errp, "spapr-nvdimm device requires the " 114 "memdev %s to be of memory-backend-file type", 115 object_get_canonical_path_component(OBJECT(dimm->hostmem))); 116 return false; 117 } 118 119 return true; 120 } 121 122 123 void spapr_add_nvdimm(DeviceState *dev, uint64_t slot) 124 { 125 SpaprDrc *drc; 126 bool hotplugged = spapr_drc_hotplugged(dev); 127 128 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot); 129 g_assert(drc); 130 131 /* 132 * pc_dimm_get_free_slot() provided a free slot at pre-plug. The 133 * corresponding DRC is thus assumed to be attachable. 134 */ 135 spapr_drc_attach(drc, dev); 136 137 if (hotplugged) { 138 spapr_hotplug_req_add_by_index(drc); 139 } 140 } 141 142 static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt, 143 int parent_offset, NVDIMMDevice *nvdimm) 144 { 145 int child_offset; 146 char *buf; 147 SpaprDrc *drc; 148 uint32_t drc_idx; 149 uint32_t node = object_property_get_uint(OBJECT(nvdimm), PC_DIMM_NODE_PROP, 150 &error_abort); 151 uint64_t slot = object_property_get_uint(OBJECT(nvdimm), PC_DIMM_SLOT_PROP, 152 &error_abort); 153 uint64_t lsize = nvdimm->label_size; 154 uint64_t size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP, 155 NULL); 156 157 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot); 158 g_assert(drc); 159 160 drc_idx = spapr_drc_index(drc); 161 162 buf = g_strdup_printf("ibm,pmemory@%x", drc_idx); 163 child_offset = fdt_add_subnode(fdt, parent_offset, buf); 164 g_free(buf); 165 166 _FDT(child_offset); 167 168 _FDT((fdt_setprop_cell(fdt, child_offset, "reg", drc_idx))); 169 _FDT((fdt_setprop_string(fdt, child_offset, "compatible", "ibm,pmemory"))); 170 _FDT((fdt_setprop_string(fdt, child_offset, "device_type", "ibm,pmemory"))); 171 172 spapr_numa_write_associativity_dt(spapr, fdt, child_offset, node); 173 174 buf = qemu_uuid_unparse_strdup(&nvdimm->uuid); 175 _FDT((fdt_setprop_string(fdt, child_offset, "ibm,unit-guid", buf))); 176 g_free(buf); 177 178 _FDT((fdt_setprop_cell(fdt, child_offset, "ibm,my-drc-index", drc_idx))); 179 180 _FDT((fdt_setprop_u64(fdt, child_offset, "ibm,block-size", 181 SPAPR_MINIMUM_SCM_BLOCK_SIZE))); 182 _FDT((fdt_setprop_u64(fdt, child_offset, "ibm,number-of-blocks", 183 size / SPAPR_MINIMUM_SCM_BLOCK_SIZE))); 184 _FDT((fdt_setprop_cell(fdt, child_offset, "ibm,metadata-size", lsize))); 185 186 _FDT((fdt_setprop_string(fdt, child_offset, "ibm,pmem-application", 187 "operating-system"))); 188 _FDT(fdt_setprop(fdt, child_offset, "ibm,cache-flush-required", NULL, 0)); 189 190 if (object_dynamic_cast(OBJECT(nvdimm), TYPE_SPAPR_NVDIMM)) { 191 bool is_pmem = false, pmem_override = false; 192 PCDIMMDevice *dimm = PC_DIMM(nvdimm); 193 HostMemoryBackend *hostmem = dimm->hostmem; 194 195 is_pmem = object_property_get_bool(OBJECT(hostmem), "pmem", NULL); 196 pmem_override = object_property_get_bool(OBJECT(nvdimm), 197 "pmem-override", NULL); 198 if (!is_pmem || pmem_override) { 199 _FDT(fdt_setprop(fdt, child_offset, "ibm,hcall-flush-required", 200 NULL, 0)); 201 } 202 } 203 204 return child_offset; 205 } 206 207 int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr, 208 void *fdt, int *fdt_start_offset, Error **errp) 209 { 210 NVDIMMDevice *nvdimm = NVDIMM(drc->dev); 211 212 *fdt_start_offset = spapr_dt_nvdimm(spapr, fdt, 0, nvdimm); 213 214 return 0; 215 } 216 217 void spapr_dt_persistent_memory(SpaprMachineState *spapr, void *fdt) 218 { 219 int offset = fdt_subnode_offset(fdt, 0, "ibm,persistent-memory"); 220 GSList *iter, *nvdimms = nvdimm_get_device_list(); 221 222 if (offset < 0) { 223 offset = fdt_add_subnode(fdt, 0, "ibm,persistent-memory"); 224 _FDT(offset); 225 _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0x1))); 226 _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 0x0))); 227 _FDT((fdt_setprop_string(fdt, offset, "device_type", 228 "ibm,persistent-memory"))); 229 } 230 231 /* Create DT entries for cold plugged NVDIMM devices */ 232 for (iter = nvdimms; iter; iter = iter->next) { 233 NVDIMMDevice *nvdimm = iter->data; 234 235 spapr_dt_nvdimm(spapr, fdt, offset, nvdimm); 236 } 237 g_slist_free(nvdimms); 238 239 return; 240 } 241 242 static target_ulong h_scm_read_metadata(PowerPCCPU *cpu, 243 SpaprMachineState *spapr, 244 target_ulong opcode, 245 target_ulong *args) 246 { 247 uint32_t drc_index = args[0]; 248 uint64_t offset = args[1]; 249 uint64_t len = args[2]; 250 SpaprDrc *drc = spapr_drc_by_index(drc_index); 251 NVDIMMDevice *nvdimm; 252 NVDIMMClass *ddc; 253 uint64_t data = 0; 254 uint8_t buf[8] = { 0 }; 255 256 if (!drc || !drc->dev || 257 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { 258 return H_PARAMETER; 259 } 260 261 if (len != 1 && len != 2 && 262 len != 4 && len != 8) { 263 return H_P3; 264 } 265 266 nvdimm = NVDIMM(drc->dev); 267 if ((offset + len < offset) || 268 (nvdimm->label_size < len + offset)) { 269 return H_P2; 270 } 271 272 ddc = NVDIMM_GET_CLASS(nvdimm); 273 ddc->read_label_data(nvdimm, buf, len, offset); 274 275 switch (len) { 276 case 1: 277 data = ldub_p(buf); 278 break; 279 case 2: 280 data = lduw_be_p(buf); 281 break; 282 case 4: 283 data = ldl_be_p(buf); 284 break; 285 case 8: 286 data = ldq_be_p(buf); 287 break; 288 default: 289 g_assert_not_reached(); 290 } 291 292 args[0] = data; 293 294 return H_SUCCESS; 295 } 296 297 static target_ulong h_scm_write_metadata(PowerPCCPU *cpu, 298 SpaprMachineState *spapr, 299 target_ulong opcode, 300 target_ulong *args) 301 { 302 uint32_t drc_index = args[0]; 303 uint64_t offset = args[1]; 304 uint64_t data = args[2]; 305 uint64_t len = args[3]; 306 SpaprDrc *drc = spapr_drc_by_index(drc_index); 307 NVDIMMDevice *nvdimm; 308 NVDIMMClass *ddc; 309 uint8_t buf[8] = { 0 }; 310 311 if (!drc || !drc->dev || 312 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { 313 return H_PARAMETER; 314 } 315 316 if (len != 1 && len != 2 && 317 len != 4 && len != 8) { 318 return H_P4; 319 } 320 321 nvdimm = NVDIMM(drc->dev); 322 if ((offset + len < offset) || 323 (nvdimm->label_size < len + offset)) { 324 return H_P2; 325 } 326 327 switch (len) { 328 case 1: 329 if (data & 0xffffffffffffff00) { 330 return H_P2; 331 } 332 stb_p(buf, data); 333 break; 334 case 2: 335 if (data & 0xffffffffffff0000) { 336 return H_P2; 337 } 338 stw_be_p(buf, data); 339 break; 340 case 4: 341 if (data & 0xffffffff00000000) { 342 return H_P2; 343 } 344 stl_be_p(buf, data); 345 break; 346 case 8: 347 stq_be_p(buf, data); 348 break; 349 default: 350 g_assert_not_reached(); 351 } 352 353 ddc = NVDIMM_GET_CLASS(nvdimm); 354 ddc->write_label_data(nvdimm, buf, len, offset); 355 356 return H_SUCCESS; 357 } 358 359 static target_ulong h_scm_bind_mem(PowerPCCPU *cpu, SpaprMachineState *spapr, 360 target_ulong opcode, target_ulong *args) 361 { 362 uint32_t drc_index = args[0]; 363 uint64_t starting_idx = args[1]; 364 uint64_t no_of_scm_blocks_to_bind = args[2]; 365 uint64_t target_logical_mem_addr = args[3]; 366 uint64_t continue_token = args[4]; 367 uint64_t size; 368 uint64_t total_no_of_scm_blocks; 369 SpaprDrc *drc = spapr_drc_by_index(drc_index); 370 hwaddr addr; 371 NVDIMMDevice *nvdimm; 372 373 if (!drc || !drc->dev || 374 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { 375 return H_PARAMETER; 376 } 377 378 /* 379 * Currently continue token should be zero qemu has already bound 380 * everything and this hcall doesnt return H_BUSY. 381 */ 382 if (continue_token > 0) { 383 return H_P5; 384 } 385 386 /* Currently qemu assigns the address. */ 387 if (target_logical_mem_addr != 0xffffffffffffffff) { 388 return H_OVERLAP; 389 } 390 391 nvdimm = NVDIMM(drc->dev); 392 393 size = object_property_get_uint(OBJECT(nvdimm), 394 PC_DIMM_SIZE_PROP, &error_abort); 395 396 total_no_of_scm_blocks = size / SPAPR_MINIMUM_SCM_BLOCK_SIZE; 397 398 if (starting_idx > total_no_of_scm_blocks) { 399 return H_P2; 400 } 401 402 if (((starting_idx + no_of_scm_blocks_to_bind) < starting_idx) || 403 ((starting_idx + no_of_scm_blocks_to_bind) > total_no_of_scm_blocks)) { 404 return H_P3; 405 } 406 407 addr = object_property_get_uint(OBJECT(nvdimm), 408 PC_DIMM_ADDR_PROP, &error_abort); 409 410 addr += starting_idx * SPAPR_MINIMUM_SCM_BLOCK_SIZE; 411 412 /* Already bound, Return target logical address in R5 */ 413 args[1] = addr; 414 args[2] = no_of_scm_blocks_to_bind; 415 416 return H_SUCCESS; 417 } 418 419 typedef struct SpaprNVDIMMDeviceFlushState { 420 uint64_t continue_token; 421 int64_t hcall_ret; 422 uint32_t drcidx; 423 424 QLIST_ENTRY(SpaprNVDIMMDeviceFlushState) node; 425 } SpaprNVDIMMDeviceFlushState; 426 427 typedef struct SpaprNVDIMMDevice SpaprNVDIMMDevice; 428 struct SpaprNVDIMMDevice { 429 /* private */ 430 NVDIMMDevice parent_obj; 431 432 bool hcall_flush_required; 433 uint64_t nvdimm_flush_token; 434 QLIST_HEAD(, SpaprNVDIMMDeviceFlushState) pending_nvdimm_flush_states; 435 QLIST_HEAD(, SpaprNVDIMMDeviceFlushState) completed_nvdimm_flush_states; 436 437 /* public */ 438 439 /* 440 * The 'on' value for this property forced the qemu to enable the hcall 441 * flush for the nvdimm device even if the backend is a pmem 442 */ 443 bool pmem_override; 444 }; 445 446 static int flush_worker_cb(void *opaque) 447 { 448 SpaprNVDIMMDeviceFlushState *state = opaque; 449 SpaprDrc *drc = spapr_drc_by_index(state->drcidx); 450 PCDIMMDevice *dimm; 451 HostMemoryBackend *backend; 452 int backend_fd; 453 454 g_assert(drc != NULL); 455 456 dimm = PC_DIMM(drc->dev); 457 backend = MEMORY_BACKEND(dimm->hostmem); 458 backend_fd = memory_region_get_fd(&backend->mr); 459 460 if (object_property_get_bool(OBJECT(backend), "pmem", NULL)) { 461 MemoryRegion *mr = host_memory_backend_get_memory(dimm->hostmem); 462 void *ptr = memory_region_get_ram_ptr(mr); 463 size_t size = object_property_get_uint(OBJECT(dimm), PC_DIMM_SIZE_PROP, 464 NULL); 465 466 /* flush pmem backend */ 467 pmem_persist(ptr, size); 468 } else { 469 /* flush raw backing image */ 470 if (qemu_fdatasync(backend_fd) < 0) { 471 error_report("papr_scm: Could not sync nvdimm to backend file: %s", 472 strerror(errno)); 473 return H_HARDWARE; 474 } 475 } 476 477 return H_SUCCESS; 478 } 479 480 static void spapr_nvdimm_flush_completion_cb(void *opaque, int hcall_ret) 481 { 482 SpaprNVDIMMDeviceFlushState *state = opaque; 483 SpaprDrc *drc = spapr_drc_by_index(state->drcidx); 484 SpaprNVDIMMDevice *s_nvdimm; 485 486 g_assert(drc != NULL); 487 488 s_nvdimm = SPAPR_NVDIMM(drc->dev); 489 490 state->hcall_ret = hcall_ret; 491 QLIST_REMOVE(state, node); 492 QLIST_INSERT_HEAD(&s_nvdimm->completed_nvdimm_flush_states, state, node); 493 } 494 495 static int spapr_nvdimm_flush_post_load(void *opaque, int version_id) 496 { 497 SpaprNVDIMMDevice *s_nvdimm = (SpaprNVDIMMDevice *)opaque; 498 SpaprNVDIMMDeviceFlushState *state; 499 ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context()); 500 HostMemoryBackend *backend = MEMORY_BACKEND(PC_DIMM(s_nvdimm)->hostmem); 501 bool is_pmem = object_property_get_bool(OBJECT(backend), "pmem", NULL); 502 bool pmem_override = object_property_get_bool(OBJECT(s_nvdimm), 503 "pmem-override", NULL); 504 bool dest_hcall_flush_required = pmem_override || !is_pmem; 505 506 if (!s_nvdimm->hcall_flush_required && dest_hcall_flush_required) { 507 error_report("The file backend for the spapr-nvdimm device %s at " 508 "source is a pmem, use pmem=on and pmem-override=off to " 509 "continue.", DEVICE(s_nvdimm)->id); 510 return -EINVAL; 511 } 512 if (s_nvdimm->hcall_flush_required && !dest_hcall_flush_required) { 513 error_report("The guest expects hcall-flush support for the " 514 "spapr-nvdimm device %s, use pmem_override=on to " 515 "continue.", DEVICE(s_nvdimm)->id); 516 return -EINVAL; 517 } 518 519 QLIST_FOREACH(state, &s_nvdimm->pending_nvdimm_flush_states, node) { 520 thread_pool_submit_aio(pool, flush_worker_cb, state, 521 spapr_nvdimm_flush_completion_cb, state); 522 } 523 524 return 0; 525 } 526 527 static const VMStateDescription vmstate_spapr_nvdimm_flush_state = { 528 .name = "spapr_nvdimm_flush_state", 529 .version_id = 1, 530 .minimum_version_id = 1, 531 .fields = (VMStateField[]) { 532 VMSTATE_UINT64(continue_token, SpaprNVDIMMDeviceFlushState), 533 VMSTATE_INT64(hcall_ret, SpaprNVDIMMDeviceFlushState), 534 VMSTATE_UINT32(drcidx, SpaprNVDIMMDeviceFlushState), 535 VMSTATE_END_OF_LIST() 536 }, 537 }; 538 539 const VMStateDescription vmstate_spapr_nvdimm_states = { 540 .name = "spapr_nvdimm_states", 541 .version_id = 1, 542 .minimum_version_id = 1, 543 .post_load = spapr_nvdimm_flush_post_load, 544 .fields = (VMStateField[]) { 545 VMSTATE_BOOL(hcall_flush_required, SpaprNVDIMMDevice), 546 VMSTATE_UINT64(nvdimm_flush_token, SpaprNVDIMMDevice), 547 VMSTATE_QLIST_V(completed_nvdimm_flush_states, SpaprNVDIMMDevice, 1, 548 vmstate_spapr_nvdimm_flush_state, 549 SpaprNVDIMMDeviceFlushState, node), 550 VMSTATE_QLIST_V(pending_nvdimm_flush_states, SpaprNVDIMMDevice, 1, 551 vmstate_spapr_nvdimm_flush_state, 552 SpaprNVDIMMDeviceFlushState, node), 553 VMSTATE_END_OF_LIST() 554 }, 555 }; 556 557 /* 558 * Assign a token and reserve it for the new flush state. 559 */ 560 static SpaprNVDIMMDeviceFlushState *spapr_nvdimm_init_new_flush_state( 561 SpaprNVDIMMDevice *spapr_nvdimm) 562 { 563 SpaprNVDIMMDeviceFlushState *state; 564 565 state = g_malloc0(sizeof(*state)); 566 567 spapr_nvdimm->nvdimm_flush_token++; 568 /* Token zero is presumed as no job pending. Assert on overflow to zero */ 569 g_assert(spapr_nvdimm->nvdimm_flush_token != 0); 570 571 state->continue_token = spapr_nvdimm->nvdimm_flush_token; 572 573 QLIST_INSERT_HEAD(&spapr_nvdimm->pending_nvdimm_flush_states, state, node); 574 575 return state; 576 } 577 578 /* 579 * spapr_nvdimm_finish_flushes 580 * Waits for all pending flush requests to complete 581 * their execution and free the states 582 */ 583 void spapr_nvdimm_finish_flushes(void) 584 { 585 SpaprNVDIMMDeviceFlushState *state, *next; 586 GSList *list, *nvdimms; 587 588 /* 589 * Called on reset path, the main loop thread which calls 590 * the pending BHs has gotten out running in the reset path, 591 * finally reaching here. Other code path being guest 592 * h_client_architecture_support, thats early boot up. 593 */ 594 nvdimms = nvdimm_get_device_list(); 595 for (list = nvdimms; list; list = list->next) { 596 NVDIMMDevice *nvdimm = list->data; 597 if (object_dynamic_cast(OBJECT(nvdimm), TYPE_SPAPR_NVDIMM)) { 598 SpaprNVDIMMDevice *s_nvdimm = SPAPR_NVDIMM(nvdimm); 599 while (!QLIST_EMPTY(&s_nvdimm->pending_nvdimm_flush_states)) { 600 aio_poll(qemu_get_aio_context(), true); 601 } 602 603 QLIST_FOREACH_SAFE(state, &s_nvdimm->completed_nvdimm_flush_states, 604 node, next) { 605 QLIST_REMOVE(state, node); 606 g_free(state); 607 } 608 } 609 } 610 g_slist_free(nvdimms); 611 } 612 613 /* 614 * spapr_nvdimm_get_flush_status 615 * Fetches the status of the hcall worker and returns 616 * H_LONG_BUSY_ORDER_10_MSEC if the worker is still running. 617 */ 618 static int spapr_nvdimm_get_flush_status(SpaprNVDIMMDevice *s_nvdimm, 619 uint64_t token) 620 { 621 SpaprNVDIMMDeviceFlushState *state, *node; 622 623 QLIST_FOREACH(state, &s_nvdimm->pending_nvdimm_flush_states, node) { 624 if (state->continue_token == token) { 625 return H_LONG_BUSY_ORDER_10_MSEC; 626 } 627 } 628 629 QLIST_FOREACH_SAFE(state, &s_nvdimm->completed_nvdimm_flush_states, 630 node, node) { 631 if (state->continue_token == token) { 632 int ret = state->hcall_ret; 633 QLIST_REMOVE(state, node); 634 g_free(state); 635 return ret; 636 } 637 } 638 639 /* If not found in complete list too, invalid token */ 640 return H_P2; 641 } 642 643 /* 644 * H_SCM_FLUSH 645 * Input: drc_index, continue-token 646 * Out: continue-token 647 * Return Value: H_SUCCESS, H_Parameter, H_P2, H_LONG_BUSY_ORDER_10_MSEC, 648 * H_UNSUPPORTED 649 * 650 * Given a DRC Index Flush the data to backend NVDIMM device. The hcall returns 651 * H_LONG_BUSY_ORDER_10_MSEC when the flush takes longer time and the hcall 652 * needs to be issued multiple times in order to be completely serviced. The 653 * continue-token from the output to be passed in the argument list of 654 * subsequent hcalls until the hcall is completely serviced at which point 655 * H_SUCCESS or other error is returned. 656 */ 657 static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr, 658 target_ulong opcode, target_ulong *args) 659 { 660 int ret; 661 uint32_t drc_index = args[0]; 662 uint64_t continue_token = args[1]; 663 SpaprDrc *drc = spapr_drc_by_index(drc_index); 664 PCDIMMDevice *dimm; 665 HostMemoryBackend *backend = NULL; 666 SpaprNVDIMMDeviceFlushState *state; 667 ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context()); 668 int fd; 669 670 if (!drc || !drc->dev || 671 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { 672 return H_PARAMETER; 673 } 674 675 dimm = PC_DIMM(drc->dev); 676 if (!object_dynamic_cast(OBJECT(dimm), TYPE_SPAPR_NVDIMM)) { 677 return H_PARAMETER; 678 } 679 if (continue_token == 0) { 680 bool is_pmem = false, pmem_override = false; 681 backend = MEMORY_BACKEND(dimm->hostmem); 682 fd = memory_region_get_fd(&backend->mr); 683 684 if (fd < 0) { 685 return H_UNSUPPORTED; 686 } 687 688 is_pmem = object_property_get_bool(OBJECT(backend), "pmem", NULL); 689 pmem_override = object_property_get_bool(OBJECT(dimm), 690 "pmem-override", NULL); 691 if (is_pmem && !pmem_override) { 692 return H_UNSUPPORTED; 693 } 694 695 state = spapr_nvdimm_init_new_flush_state(SPAPR_NVDIMM(dimm)); 696 if (!state) { 697 return H_HARDWARE; 698 } 699 700 state->drcidx = drc_index; 701 702 thread_pool_submit_aio(pool, flush_worker_cb, state, 703 spapr_nvdimm_flush_completion_cb, state); 704 705 continue_token = state->continue_token; 706 } 707 708 ret = spapr_nvdimm_get_flush_status(SPAPR_NVDIMM(dimm), continue_token); 709 if (H_IS_LONG_BUSY(ret)) { 710 args[0] = continue_token; 711 } 712 713 return ret; 714 } 715 716 static target_ulong h_scm_unbind_mem(PowerPCCPU *cpu, SpaprMachineState *spapr, 717 target_ulong opcode, target_ulong *args) 718 { 719 uint32_t drc_index = args[0]; 720 uint64_t starting_scm_logical_addr = args[1]; 721 uint64_t no_of_scm_blocks_to_unbind = args[2]; 722 uint64_t continue_token = args[3]; 723 uint64_t size_to_unbind; 724 Range blockrange = range_empty; 725 Range nvdimmrange = range_empty; 726 SpaprDrc *drc = spapr_drc_by_index(drc_index); 727 NVDIMMDevice *nvdimm; 728 uint64_t size, addr; 729 730 if (!drc || !drc->dev || 731 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { 732 return H_PARAMETER; 733 } 734 735 /* continue_token should be zero as this hcall doesn't return H_BUSY. */ 736 if (continue_token > 0) { 737 return H_P4; 738 } 739 740 /* Check if starting_scm_logical_addr is block aligned */ 741 if (!QEMU_IS_ALIGNED(starting_scm_logical_addr, 742 SPAPR_MINIMUM_SCM_BLOCK_SIZE)) { 743 return H_P2; 744 } 745 746 size_to_unbind = no_of_scm_blocks_to_unbind * SPAPR_MINIMUM_SCM_BLOCK_SIZE; 747 if (no_of_scm_blocks_to_unbind == 0 || no_of_scm_blocks_to_unbind != 748 size_to_unbind / SPAPR_MINIMUM_SCM_BLOCK_SIZE) { 749 return H_P3; 750 } 751 752 nvdimm = NVDIMM(drc->dev); 753 size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP, 754 &error_abort); 755 addr = object_property_get_int(OBJECT(nvdimm), PC_DIMM_ADDR_PROP, 756 &error_abort); 757 758 range_init_nofail(&nvdimmrange, addr, size); 759 range_init_nofail(&blockrange, starting_scm_logical_addr, size_to_unbind); 760 761 if (!range_contains_range(&nvdimmrange, &blockrange)) { 762 return H_P3; 763 } 764 765 args[1] = no_of_scm_blocks_to_unbind; 766 767 /* let unplug take care of actual unbind */ 768 return H_SUCCESS; 769 } 770 771 #define H_UNBIND_SCOPE_ALL 0x1 772 #define H_UNBIND_SCOPE_DRC 0x2 773 774 static target_ulong h_scm_unbind_all(PowerPCCPU *cpu, SpaprMachineState *spapr, 775 target_ulong opcode, target_ulong *args) 776 { 777 uint64_t target_scope = args[0]; 778 uint32_t drc_index = args[1]; 779 uint64_t continue_token = args[2]; 780 NVDIMMDevice *nvdimm; 781 uint64_t size; 782 uint64_t no_of_scm_blocks_unbound = 0; 783 784 /* continue_token should be zero as this hcall doesn't return H_BUSY. */ 785 if (continue_token > 0) { 786 return H_P4; 787 } 788 789 if (target_scope == H_UNBIND_SCOPE_DRC) { 790 SpaprDrc *drc = spapr_drc_by_index(drc_index); 791 792 if (!drc || !drc->dev || 793 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { 794 return H_P2; 795 } 796 797 nvdimm = NVDIMM(drc->dev); 798 size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP, 799 &error_abort); 800 801 no_of_scm_blocks_unbound = size / SPAPR_MINIMUM_SCM_BLOCK_SIZE; 802 } else if (target_scope == H_UNBIND_SCOPE_ALL) { 803 GSList *list, *nvdimms; 804 805 nvdimms = nvdimm_get_device_list(); 806 for (list = nvdimms; list; list = list->next) { 807 nvdimm = list->data; 808 size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP, 809 &error_abort); 810 811 no_of_scm_blocks_unbound += size / SPAPR_MINIMUM_SCM_BLOCK_SIZE; 812 } 813 g_slist_free(nvdimms); 814 } else { 815 return H_PARAMETER; 816 } 817 818 args[1] = no_of_scm_blocks_unbound; 819 820 /* let unplug take care of actual unbind */ 821 return H_SUCCESS; 822 } 823 824 static target_ulong h_scm_health(PowerPCCPU *cpu, SpaprMachineState *spapr, 825 target_ulong opcode, target_ulong *args) 826 { 827 828 NVDIMMDevice *nvdimm; 829 uint64_t hbitmap = 0; 830 uint32_t drc_index = args[0]; 831 SpaprDrc *drc = spapr_drc_by_index(drc_index); 832 const uint64_t hbitmap_mask = PAPR_PMEM_UNARMED; 833 834 835 /* Ensure that the drc is valid & is valid PMEM dimm and is plugged in */ 836 if (!drc || !drc->dev || 837 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { 838 return H_PARAMETER; 839 } 840 841 nvdimm = NVDIMM(drc->dev); 842 843 /* Update if the nvdimm is unarmed and send its status via health bitmaps */ 844 if (object_property_get_bool(OBJECT(nvdimm), NVDIMM_UNARMED_PROP, NULL)) { 845 hbitmap |= PAPR_PMEM_UNARMED; 846 } 847 848 /* Update the out args with health bitmap/mask */ 849 args[0] = hbitmap; 850 args[1] = hbitmap_mask; 851 852 return H_SUCCESS; 853 } 854 855 static void spapr_scm_register_types(void) 856 { 857 /* qemu/scm specific hcalls */ 858 spapr_register_hypercall(H_SCM_READ_METADATA, h_scm_read_metadata); 859 spapr_register_hypercall(H_SCM_WRITE_METADATA, h_scm_write_metadata); 860 spapr_register_hypercall(H_SCM_BIND_MEM, h_scm_bind_mem); 861 spapr_register_hypercall(H_SCM_UNBIND_MEM, h_scm_unbind_mem); 862 spapr_register_hypercall(H_SCM_UNBIND_ALL, h_scm_unbind_all); 863 spapr_register_hypercall(H_SCM_HEALTH, h_scm_health); 864 spapr_register_hypercall(H_SCM_FLUSH, h_scm_flush); 865 } 866 867 type_init(spapr_scm_register_types) 868 869 static void spapr_nvdimm_realize(NVDIMMDevice *dimm, Error **errp) 870 { 871 SpaprNVDIMMDevice *s_nvdimm = SPAPR_NVDIMM(dimm); 872 HostMemoryBackend *backend = MEMORY_BACKEND(PC_DIMM(dimm)->hostmem); 873 bool is_pmem = object_property_get_bool(OBJECT(backend), "pmem", NULL); 874 bool pmem_override = object_property_get_bool(OBJECT(dimm), "pmem-override", 875 NULL); 876 if (!is_pmem || pmem_override) { 877 s_nvdimm->hcall_flush_required = true; 878 } 879 880 vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, 881 &vmstate_spapr_nvdimm_states, dimm); 882 } 883 884 static void spapr_nvdimm_unrealize(NVDIMMDevice *dimm) 885 { 886 vmstate_unregister(NULL, &vmstate_spapr_nvdimm_states, dimm); 887 } 888 889 static Property spapr_nvdimm_properties[] = { 890 #ifdef CONFIG_LIBPMEM 891 DEFINE_PROP_BOOL("pmem-override", SpaprNVDIMMDevice, pmem_override, false), 892 #endif 893 DEFINE_PROP_END_OF_LIST(), 894 }; 895 896 static void spapr_nvdimm_class_init(ObjectClass *oc, void *data) 897 { 898 DeviceClass *dc = DEVICE_CLASS(oc); 899 NVDIMMClass *nvc = NVDIMM_CLASS(oc); 900 901 nvc->realize = spapr_nvdimm_realize; 902 nvc->unrealize = spapr_nvdimm_unrealize; 903 904 device_class_set_props(dc, spapr_nvdimm_properties); 905 } 906 907 static void spapr_nvdimm_init(Object *obj) 908 { 909 SpaprNVDIMMDevice *s_nvdimm = SPAPR_NVDIMM(obj); 910 911 s_nvdimm->hcall_flush_required = false; 912 QLIST_INIT(&s_nvdimm->pending_nvdimm_flush_states); 913 QLIST_INIT(&s_nvdimm->completed_nvdimm_flush_states); 914 } 915 916 static TypeInfo spapr_nvdimm_info = { 917 .name = TYPE_SPAPR_NVDIMM, 918 .parent = TYPE_NVDIMM, 919 .class_init = spapr_nvdimm_class_init, 920 .class_size = sizeof(SPAPRNVDIMMClass), 921 .instance_size = sizeof(SpaprNVDIMMDevice), 922 .instance_init = spapr_nvdimm_init, 923 }; 924 925 static void spapr_nvdimm_register_types(void) 926 { 927 type_register_static(&spapr_nvdimm_info); 928 } 929 930 type_init(spapr_nvdimm_register_types) 931