1 // SPDX-License-Identifier: GPL-2.0 2 3 #define pr_fmt(fmt) "papr-scm: " fmt 4 5 #include <linux/of.h> 6 #include <linux/kernel.h> 7 #include <linux/module.h> 8 #include <linux/ioport.h> 9 #include <linux/slab.h> 10 #include <linux/ndctl.h> 11 #include <linux/sched.h> 12 #include <linux/libnvdimm.h> 13 #include <linux/platform_device.h> 14 #include <linux/delay.h> 15 #include <linux/seq_buf.h> 16 #include <linux/nd.h> 17 18 #include <asm/plpar_wrappers.h> 19 #include <asm/papr_pdsm.h> 20 #include <asm/mce.h> 21 #include <asm/unaligned.h> 22 #include <linux/perf_event.h> 23 24 #define BIND_ANY_ADDR (~0ul) 25 26 #define PAPR_SCM_DIMM_CMD_MASK \ 27 ((1ul << ND_CMD_GET_CONFIG_SIZE) | \ 28 (1ul << ND_CMD_GET_CONFIG_DATA) | \ 29 (1ul << ND_CMD_SET_CONFIG_DATA) | \ 30 (1ul << ND_CMD_CALL)) 31 32 /* DIMM health bitmap bitmap indicators */ 33 /* SCM device is unable to persist memory contents */ 34 #define PAPR_PMEM_UNARMED (1ULL << (63 - 0)) 35 /* SCM device failed to persist memory contents */ 36 #define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1)) 37 /* SCM device contents are persisted from previous IPL */ 38 #define PAPR_PMEM_SHUTDOWN_CLEAN (1ULL << (63 - 2)) 39 /* SCM device contents are not persisted from previous IPL */ 40 #define PAPR_PMEM_EMPTY (1ULL << (63 - 3)) 41 /* SCM device memory life remaining is critically low */ 42 #define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4)) 43 /* SCM device will be garded off next IPL due to failure */ 44 #define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5)) 45 /* SCM contents cannot persist due to current platform health status */ 46 #define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6)) 47 /* SCM device is unable to persist memory contents in certain conditions */ 48 #define PAPR_PMEM_HEALTH_NON_CRITICAL (1ULL << (63 - 7)) 49 /* SCM device is encrypted */ 50 #define PAPR_PMEM_ENCRYPTED (1ULL << (63 - 8)) 51 /* SCM device has been scrubbed and locked */ 52 #define PAPR_PMEM_SCRUBBED_AND_LOCKED (1ULL << (63 - 9)) 53 54 /* Bits status indicators for health bitmap indicating unarmed dimm */ 55 #define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \ 56 PAPR_PMEM_HEALTH_UNHEALTHY) 57 58 /* Bits status indicators for health bitmap indicating unflushed dimm */ 59 #define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY) 60 61 /* Bits status indicators for health bitmap indicating unrestored dimm */ 62 #define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY) 63 64 /* Bit status indicators for smart event notification */ 65 #define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \ 66 PAPR_PMEM_HEALTH_FATAL | \ 67 PAPR_PMEM_HEALTH_UNHEALTHY) 68 69 #define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS) 70 #define PAPR_SCM_PERF_STATS_VERSION 0x1 71 72 /* Struct holding a single performance metric */ 73 struct papr_scm_perf_stat { 74 u8 stat_id[8]; 75 __be64 stat_val; 76 } __packed; 77 78 /* Struct exchanged between kernel and PHYP for fetching drc perf stats */ 79 struct papr_scm_perf_stats { 80 u8 eye_catcher[8]; 81 /* Should be PAPR_SCM_PERF_STATS_VERSION */ 82 __be32 stats_version; 83 /* Number of stats following */ 84 __be32 num_statistics; 85 /* zero or more performance matrics */ 86 struct papr_scm_perf_stat scm_statistic[]; 87 } __packed; 88 89 /* private struct associated with each region */ 90 struct papr_scm_priv { 91 struct platform_device *pdev; 92 struct device_node *dn; 93 uint32_t drc_index; 94 uint64_t blocks; 95 uint64_t block_size; 96 int metadata_size; 97 bool is_volatile; 98 bool hcall_flush_required; 99 100 uint64_t bound_addr; 101 102 struct nvdimm_bus_descriptor bus_desc; 103 struct nvdimm_bus *bus; 104 struct nvdimm *nvdimm; 105 struct resource res; 106 struct nd_region *region; 107 struct nd_interleave_set nd_set; 108 struct list_head region_list; 109 110 /* Protect dimm health data from concurrent read/writes */ 111 struct mutex health_mutex; 112 113 /* Last time the health information of the dimm was updated */ 114 unsigned long lasthealth_jiffies; 115 116 /* Health information for the dimm */ 117 u64 health_bitmap; 118 119 /* Holds the last known dirty shutdown counter value */ 120 u64 dirty_shutdown_counter; 121 122 /* length of the stat buffer as expected by phyp */ 123 size_t stat_buffer_len; 124 125 /* The bits which needs to be overridden */ 126 u64 health_bitmap_inject_mask; 127 128 /* array to have event_code and stat_id mappings */ 129 u8 *nvdimm_events_map; 130 }; 131 132 static int papr_scm_pmem_flush(struct nd_region *nd_region, 133 struct bio *bio __maybe_unused) 134 { 135 struct papr_scm_priv *p = nd_region_provider_data(nd_region); 136 unsigned long ret_buf[PLPAR_HCALL_BUFSIZE], token = 0; 137 long rc; 138 139 dev_dbg(&p->pdev->dev, "flush drc 0x%x", p->drc_index); 140 141 do { 142 rc = plpar_hcall(H_SCM_FLUSH, ret_buf, p->drc_index, token); 143 token = ret_buf[0]; 144 145 /* Check if we are stalled for some time */ 146 if (H_IS_LONG_BUSY(rc)) { 147 msleep(get_longbusy_msecs(rc)); 148 rc = H_BUSY; 149 } else if (rc == H_BUSY) { 150 cond_resched(); 151 } 152 } while (rc == H_BUSY); 153 154 if (rc) { 155 dev_err(&p->pdev->dev, "flush error: %ld", rc); 156 rc = -EIO; 157 } else { 158 dev_dbg(&p->pdev->dev, "flush drc 0x%x complete", p->drc_index); 159 } 160 161 return rc; 162 } 163 164 static LIST_HEAD(papr_nd_regions); 165 static DEFINE_MUTEX(papr_ndr_lock); 166 167 static int drc_pmem_bind(struct papr_scm_priv *p) 168 { 169 unsigned long ret[PLPAR_HCALL_BUFSIZE]; 170 uint64_t saved = 0; 171 uint64_t token; 172 int64_t rc; 173 174 /* 175 * When the hypervisor cannot map all the requested memory in a single 176 * hcall it returns H_BUSY and we call again with the token until 177 * we get H_SUCCESS. Aborting the retry loop before getting H_SUCCESS 178 * leave the system in an undefined state, so we wait. 179 */ 180 token = 0; 181 182 do { 183 rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0, 184 p->blocks, BIND_ANY_ADDR, token); 185 token = ret[0]; 186 if (!saved) 187 saved = ret[1]; 188 cond_resched(); 189 } while (rc == H_BUSY); 190 191 if (rc) 192 return rc; 193 194 p->bound_addr = saved; 195 dev_dbg(&p->pdev->dev, "bound drc 0x%x to 0x%lx\n", 196 p->drc_index, (unsigned long)saved); 197 return rc; 198 } 199 200 static void drc_pmem_unbind(struct papr_scm_priv *p) 201 { 202 unsigned long ret[PLPAR_HCALL_BUFSIZE]; 203 uint64_t token = 0; 204 int64_t rc; 205 206 dev_dbg(&p->pdev->dev, "unbind drc 0x%x\n", p->drc_index); 207 208 /* NB: unbind has the same retry requirements as drc_pmem_bind() */ 209 do { 210 211 /* Unbind of all SCM resources associated with drcIndex */ 212 rc = plpar_hcall(H_SCM_UNBIND_ALL, ret, H_UNBIND_SCOPE_DRC, 213 p->drc_index, token); 214 token = ret[0]; 215 216 /* Check if we are stalled for some time */ 217 if (H_IS_LONG_BUSY(rc)) { 218 msleep(get_longbusy_msecs(rc)); 219 rc = H_BUSY; 220 } else if (rc == H_BUSY) { 221 cond_resched(); 222 } 223 224 } while (rc == H_BUSY); 225 226 if (rc) 227 dev_err(&p->pdev->dev, "unbind error: %lld\n", rc); 228 else 229 dev_dbg(&p->pdev->dev, "unbind drc 0x%x complete\n", 230 p->drc_index); 231 232 return; 233 } 234 235 static int drc_pmem_query_n_bind(struct papr_scm_priv *p) 236 { 237 unsigned long start_addr; 238 unsigned long end_addr; 239 unsigned long ret[PLPAR_HCALL_BUFSIZE]; 240 int64_t rc; 241 242 243 rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret, 244 p->drc_index, 0); 245 if (rc) 246 goto err_out; 247 start_addr = ret[0]; 248 249 /* Make sure the full region is bound. */ 250 rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret, 251 p->drc_index, p->blocks - 1); 252 if (rc) 253 goto err_out; 254 end_addr = ret[0]; 255 256 if ((end_addr - start_addr) != ((p->blocks - 1) * p->block_size)) 257 goto err_out; 258 259 p->bound_addr = start_addr; 260 dev_dbg(&p->pdev->dev, "bound drc 0x%x to 0x%lx\n", p->drc_index, start_addr); 261 return rc; 262 263 err_out: 264 dev_info(&p->pdev->dev, 265 "Failed to query, trying an unbind followed by bind"); 266 drc_pmem_unbind(p); 267 return drc_pmem_bind(p); 268 } 269 270 /* 271 * Query the Dimm performance stats from PHYP and copy them (if returned) to 272 * provided struct papr_scm_perf_stats instance 'stats' that can hold atleast 273 * (num_stats + header) bytes. 274 * - If buff_stats == NULL the return value is the size in bytes of the buffer 275 * needed to hold all supported performance-statistics. 276 * - If buff_stats != NULL and num_stats == 0 then we copy all known 277 * performance-statistics to 'buff_stat' and expect to be large enough to 278 * hold them. 279 * - if buff_stats != NULL and num_stats > 0 then copy the requested 280 * performance-statistics to buff_stats. 281 */ 282 static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, 283 struct papr_scm_perf_stats *buff_stats, 284 unsigned int num_stats) 285 { 286 unsigned long ret[PLPAR_HCALL_BUFSIZE]; 287 size_t size; 288 s64 rc; 289 290 /* Setup the out buffer */ 291 if (buff_stats) { 292 memcpy(buff_stats->eye_catcher, 293 PAPR_SCM_PERF_STATS_EYECATCHER, 8); 294 buff_stats->stats_version = 295 cpu_to_be32(PAPR_SCM_PERF_STATS_VERSION); 296 buff_stats->num_statistics = 297 cpu_to_be32(num_stats); 298 299 /* 300 * Calculate the buffer size based on num-stats provided 301 * or use the prefetched max buffer length 302 */ 303 if (num_stats) 304 /* Calculate size from the num_stats */ 305 size = sizeof(struct papr_scm_perf_stats) + 306 num_stats * sizeof(struct papr_scm_perf_stat); 307 else 308 size = p->stat_buffer_len; 309 } else { 310 /* In case of no out buffer ignore the size */ 311 size = 0; 312 } 313 314 /* Do the HCALL asking PHYP for info */ 315 rc = plpar_hcall(H_SCM_PERFORMANCE_STATS, ret, p->drc_index, 316 buff_stats ? virt_to_phys(buff_stats) : 0, 317 size); 318 319 /* Check if the error was due to an unknown stat-id */ 320 if (rc == H_PARTIAL) { 321 dev_err(&p->pdev->dev, 322 "Unknown performance stats, Err:0x%016lX\n", ret[0]); 323 return -ENOENT; 324 } else if (rc == H_AUTHORITY) { 325 dev_info(&p->pdev->dev, 326 "Permission denied while accessing performance stats"); 327 return -EPERM; 328 } else if (rc == H_UNSUPPORTED) { 329 dev_dbg(&p->pdev->dev, "Performance stats unsupported\n"); 330 return -EOPNOTSUPP; 331 } else if (rc != H_SUCCESS) { 332 dev_err(&p->pdev->dev, 333 "Failed to query performance stats, Err:%lld\n", rc); 334 return -EIO; 335 336 } else if (!size) { 337 /* Handle case where stat buffer size was requested */ 338 dev_dbg(&p->pdev->dev, 339 "Performance stats size %ld\n", ret[0]); 340 return ret[0]; 341 } 342 343 /* Successfully fetched the requested stats from phyp */ 344 dev_dbg(&p->pdev->dev, 345 "Performance stats returned %d stats\n", 346 be32_to_cpu(buff_stats->num_statistics)); 347 return 0; 348 } 349 350 #ifdef CONFIG_PERF_EVENTS 351 #define to_nvdimm_pmu(_pmu) container_of(_pmu, struct nvdimm_pmu, pmu) 352 353 static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, u64 *count) 354 { 355 struct papr_scm_perf_stat *stat; 356 struct papr_scm_perf_stats *stats; 357 struct papr_scm_priv *p = (struct papr_scm_priv *)dev->driver_data; 358 int rc, size; 359 360 /* Allocate request buffer enough to hold single performance stat */ 361 size = sizeof(struct papr_scm_perf_stats) + 362 sizeof(struct papr_scm_perf_stat); 363 364 if (!p || !p->nvdimm_events_map) 365 return -EINVAL; 366 367 stats = kzalloc(size, GFP_KERNEL); 368 if (!stats) 369 return -ENOMEM; 370 371 stat = &stats->scm_statistic[0]; 372 memcpy(&stat->stat_id, 373 &p->nvdimm_events_map[event->attr.config * sizeof(stat->stat_id)], 374 sizeof(stat->stat_id)); 375 stat->stat_val = 0; 376 377 rc = drc_pmem_query_stats(p, stats, 1); 378 if (rc < 0) { 379 kfree(stats); 380 return rc; 381 } 382 383 *count = be64_to_cpu(stat->stat_val); 384 kfree(stats); 385 return 0; 386 } 387 388 static int papr_scm_pmu_event_init(struct perf_event *event) 389 { 390 struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu); 391 struct papr_scm_priv *p; 392 393 if (!nd_pmu) 394 return -EINVAL; 395 396 /* test the event attr type for PMU enumeration */ 397 if (event->attr.type != event->pmu->type) 398 return -ENOENT; 399 400 /* it does not support event sampling mode */ 401 if (is_sampling_event(event)) 402 return -EOPNOTSUPP; 403 404 /* no branch sampling */ 405 if (has_branch_stack(event)) 406 return -EOPNOTSUPP; 407 408 p = (struct papr_scm_priv *)nd_pmu->dev->driver_data; 409 if (!p) 410 return -EINVAL; 411 412 /* Invalid eventcode */ 413 if (event->attr.config == 0 || event->attr.config > 16) 414 return -EINVAL; 415 416 return 0; 417 } 418 419 static int papr_scm_pmu_add(struct perf_event *event, int flags) 420 { 421 u64 count; 422 int rc; 423 struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu); 424 425 if (!nd_pmu) 426 return -EINVAL; 427 428 if (flags & PERF_EF_START) { 429 rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &count); 430 if (rc) 431 return rc; 432 433 local64_set(&event->hw.prev_count, count); 434 } 435 436 return 0; 437 } 438 439 static void papr_scm_pmu_read(struct perf_event *event) 440 { 441 u64 prev, now; 442 int rc; 443 struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu); 444 445 if (!nd_pmu) 446 return; 447 448 rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &now); 449 if (rc) 450 return; 451 452 prev = local64_xchg(&event->hw.prev_count, now); 453 local64_add(now - prev, &event->count); 454 } 455 456 static void papr_scm_pmu_del(struct perf_event *event, int flags) 457 { 458 papr_scm_pmu_read(event); 459 } 460 461 static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu *nd_pmu) 462 { 463 struct papr_scm_perf_stat *stat; 464 struct papr_scm_perf_stats *stats; 465 u32 available_events; 466 int index, rc = 0; 467 468 available_events = (p->stat_buffer_len - sizeof(struct papr_scm_perf_stats)) 469 / sizeof(struct papr_scm_perf_stat); 470 if (available_events == 0) 471 return -EOPNOTSUPP; 472 473 /* Allocate the buffer for phyp where stats are written */ 474 stats = kzalloc(p->stat_buffer_len, GFP_KERNEL); 475 if (!stats) { 476 rc = -ENOMEM; 477 return rc; 478 } 479 480 /* Called to get list of events supported */ 481 rc = drc_pmem_query_stats(p, stats, 0); 482 if (rc) 483 goto out; 484 485 /* 486 * Allocate memory and populate nvdimm_event_map. 487 * Allocate an extra element for NULL entry 488 */ 489 p->nvdimm_events_map = kcalloc(available_events + 1, 490 sizeof(stat->stat_id), 491 GFP_KERNEL); 492 if (!p->nvdimm_events_map) { 493 rc = -ENOMEM; 494 goto out; 495 } 496 497 /* Copy all stat_ids to event map */ 498 for (index = 0, stat = stats->scm_statistic; 499 index < available_events; index++, ++stat) { 500 memcpy(&p->nvdimm_events_map[index * sizeof(stat->stat_id)], 501 &stat->stat_id, sizeof(stat->stat_id)); 502 } 503 out: 504 kfree(stats); 505 return rc; 506 } 507 508 static void papr_scm_pmu_register(struct papr_scm_priv *p) 509 { 510 struct nvdimm_pmu *nd_pmu; 511 int rc, nodeid; 512 513 nd_pmu = kzalloc(sizeof(*nd_pmu), GFP_KERNEL); 514 if (!nd_pmu) { 515 rc = -ENOMEM; 516 goto pmu_err_print; 517 } 518 519 rc = papr_scm_pmu_check_events(p, nd_pmu); 520 if (rc) 521 goto pmu_check_events_err; 522 523 nd_pmu->pmu.task_ctx_nr = perf_invalid_context; 524 nd_pmu->pmu.name = nvdimm_name(p->nvdimm); 525 nd_pmu->pmu.event_init = papr_scm_pmu_event_init; 526 nd_pmu->pmu.read = papr_scm_pmu_read; 527 nd_pmu->pmu.add = papr_scm_pmu_add; 528 nd_pmu->pmu.del = papr_scm_pmu_del; 529 530 nd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_INTERRUPT | 531 PERF_PMU_CAP_NO_EXCLUDE; 532 533 /*updating the cpumask variable */ 534 nodeid = numa_map_to_online_node(dev_to_node(&p->pdev->dev)); 535 nd_pmu->arch_cpumask = *cpumask_of_node(nodeid); 536 537 rc = register_nvdimm_pmu(nd_pmu, p->pdev); 538 if (rc) 539 goto pmu_register_err; 540 541 /* 542 * Set archdata.priv value to nvdimm_pmu structure, to handle the 543 * unregistering of pmu device. 544 */ 545 p->pdev->archdata.priv = nd_pmu; 546 return; 547 548 pmu_register_err: 549 kfree(p->nvdimm_events_map); 550 pmu_check_events_err: 551 kfree(nd_pmu); 552 pmu_err_print: 553 dev_info(&p->pdev->dev, "nvdimm pmu didn't register rc=%d\n", rc); 554 } 555 556 #else 557 static void papr_scm_pmu_register(struct papr_scm_priv *p) { } 558 #endif 559 560 /* 561 * Issue hcall to retrieve dimm health info and populate papr_scm_priv with the 562 * health information. 563 */ 564 static int __drc_pmem_query_health(struct papr_scm_priv *p) 565 { 566 unsigned long ret[PLPAR_HCALL_BUFSIZE]; 567 u64 bitmap = 0; 568 long rc; 569 570 /* issue the hcall */ 571 rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index); 572 if (rc == H_SUCCESS) 573 bitmap = ret[0] & ret[1]; 574 else if (rc == H_FUNCTION) 575 dev_info_once(&p->pdev->dev, 576 "Hcall H_SCM_HEALTH not implemented, assuming empty health bitmap"); 577 else { 578 579 dev_err(&p->pdev->dev, 580 "Failed to query health information, Err:%ld\n", rc); 581 return -ENXIO; 582 } 583 584 p->lasthealth_jiffies = jiffies; 585 /* Allow injecting specific health bits via inject mask. */ 586 if (p->health_bitmap_inject_mask) 587 bitmap = (bitmap & ~p->health_bitmap_inject_mask) | 588 p->health_bitmap_inject_mask; 589 WRITE_ONCE(p->health_bitmap, bitmap); 590 dev_dbg(&p->pdev->dev, 591 "Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n", 592 ret[0], ret[1]); 593 594 return 0; 595 } 596 597 /* Min interval in seconds for assuming stable dimm health */ 598 #define MIN_HEALTH_QUERY_INTERVAL 60 599 600 /* Query cached health info and if needed call drc_pmem_query_health */ 601 static int drc_pmem_query_health(struct papr_scm_priv *p) 602 { 603 unsigned long cache_timeout; 604 int rc; 605 606 /* Protect concurrent modifications to papr_scm_priv */ 607 rc = mutex_lock_interruptible(&p->health_mutex); 608 if (rc) 609 return rc; 610 611 /* Jiffies offset for which the health data is assumed to be same */ 612 cache_timeout = p->lasthealth_jiffies + 613 msecs_to_jiffies(MIN_HEALTH_QUERY_INTERVAL * 1000); 614 615 /* Fetch new health info is its older than MIN_HEALTH_QUERY_INTERVAL */ 616 if (time_after(jiffies, cache_timeout)) 617 rc = __drc_pmem_query_health(p); 618 else 619 /* Assume cached health data is valid */ 620 rc = 0; 621 622 mutex_unlock(&p->health_mutex); 623 return rc; 624 } 625 626 static int papr_scm_meta_get(struct papr_scm_priv *p, 627 struct nd_cmd_get_config_data_hdr *hdr) 628 { 629 unsigned long data[PLPAR_HCALL_BUFSIZE]; 630 unsigned long offset, data_offset; 631 int len, read; 632 int64_t ret; 633 634 if ((hdr->in_offset + hdr->in_length) > p->metadata_size) 635 return -EINVAL; 636 637 for (len = hdr->in_length; len; len -= read) { 638 639 data_offset = hdr->in_length - len; 640 offset = hdr->in_offset + data_offset; 641 642 if (len >= 8) 643 read = 8; 644 else if (len >= 4) 645 read = 4; 646 else if (len >= 2) 647 read = 2; 648 else 649 read = 1; 650 651 ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index, 652 offset, read); 653 654 if (ret == H_PARAMETER) /* bad DRC index */ 655 return -ENODEV; 656 if (ret) 657 return -EINVAL; /* other invalid parameter */ 658 659 switch (read) { 660 case 8: 661 *(uint64_t *)(hdr->out_buf + data_offset) = be64_to_cpu(data[0]); 662 break; 663 case 4: 664 *(uint32_t *)(hdr->out_buf + data_offset) = be32_to_cpu(data[0] & 0xffffffff); 665 break; 666 667 case 2: 668 *(uint16_t *)(hdr->out_buf + data_offset) = be16_to_cpu(data[0] & 0xffff); 669 break; 670 671 case 1: 672 *(uint8_t *)(hdr->out_buf + data_offset) = (data[0] & 0xff); 673 break; 674 } 675 } 676 return 0; 677 } 678 679 static int papr_scm_meta_set(struct papr_scm_priv *p, 680 struct nd_cmd_set_config_hdr *hdr) 681 { 682 unsigned long offset, data_offset; 683 int len, wrote; 684 unsigned long data; 685 __be64 data_be; 686 int64_t ret; 687 688 if ((hdr->in_offset + hdr->in_length) > p->metadata_size) 689 return -EINVAL; 690 691 for (len = hdr->in_length; len; len -= wrote) { 692 693 data_offset = hdr->in_length - len; 694 offset = hdr->in_offset + data_offset; 695 696 if (len >= 8) { 697 data = *(uint64_t *)(hdr->in_buf + data_offset); 698 data_be = cpu_to_be64(data); 699 wrote = 8; 700 } else if (len >= 4) { 701 data = *(uint32_t *)(hdr->in_buf + data_offset); 702 data &= 0xffffffff; 703 data_be = cpu_to_be32(data); 704 wrote = 4; 705 } else if (len >= 2) { 706 data = *(uint16_t *)(hdr->in_buf + data_offset); 707 data &= 0xffff; 708 data_be = cpu_to_be16(data); 709 wrote = 2; 710 } else { 711 data_be = *(uint8_t *)(hdr->in_buf + data_offset); 712 data_be &= 0xff; 713 wrote = 1; 714 } 715 716 ret = plpar_hcall_norets(H_SCM_WRITE_METADATA, p->drc_index, 717 offset, data_be, wrote); 718 if (ret == H_PARAMETER) /* bad DRC index */ 719 return -ENODEV; 720 if (ret) 721 return -EINVAL; /* other invalid parameter */ 722 } 723 724 return 0; 725 } 726 727 /* 728 * Do a sanity checks on the inputs args to dimm-control function and return 729 * '0' if valid. Validation of PDSM payloads happens later in 730 * papr_scm_service_pdsm. 731 */ 732 static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf, 733 unsigned int buf_len) 734 { 735 unsigned long cmd_mask = PAPR_SCM_DIMM_CMD_MASK; 736 struct nd_cmd_pkg *nd_cmd; 737 struct papr_scm_priv *p; 738 enum papr_pdsm pdsm; 739 740 /* Only dimm-specific calls are supported atm */ 741 if (!nvdimm) 742 return -EINVAL; 743 744 /* get the provider data from struct nvdimm */ 745 p = nvdimm_provider_data(nvdimm); 746 747 if (!test_bit(cmd, &cmd_mask)) { 748 dev_dbg(&p->pdev->dev, "Unsupported cmd=%u\n", cmd); 749 return -EINVAL; 750 } 751 752 /* For CMD_CALL verify pdsm request */ 753 if (cmd == ND_CMD_CALL) { 754 /* Verify the envelope and envelop size */ 755 if (!buf || 756 buf_len < (sizeof(struct nd_cmd_pkg) + ND_PDSM_HDR_SIZE)) { 757 dev_dbg(&p->pdev->dev, "Invalid pkg size=%u\n", 758 buf_len); 759 return -EINVAL; 760 } 761 762 /* Verify that the nd_cmd_pkg.nd_family is correct */ 763 nd_cmd = (struct nd_cmd_pkg *)buf; 764 765 if (nd_cmd->nd_family != NVDIMM_FAMILY_PAPR) { 766 dev_dbg(&p->pdev->dev, "Invalid pkg family=0x%llx\n", 767 nd_cmd->nd_family); 768 return -EINVAL; 769 } 770 771 pdsm = (enum papr_pdsm)nd_cmd->nd_command; 772 773 /* Verify if the pdsm command is valid */ 774 if (pdsm <= PAPR_PDSM_MIN || pdsm >= PAPR_PDSM_MAX) { 775 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid PDSM\n", 776 pdsm); 777 return -EINVAL; 778 } 779 780 /* Have enough space to hold returned 'nd_pkg_pdsm' header */ 781 if (nd_cmd->nd_size_out < ND_PDSM_HDR_SIZE) { 782 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid payload\n", 783 pdsm); 784 return -EINVAL; 785 } 786 } 787 788 /* Let the command be further processed */ 789 return 0; 790 } 791 792 static int papr_pdsm_fuel_gauge(struct papr_scm_priv *p, 793 union nd_pdsm_payload *payload) 794 { 795 int rc, size; 796 u64 statval; 797 struct papr_scm_perf_stat *stat; 798 struct papr_scm_perf_stats *stats; 799 800 /* Silently fail if fetching performance metrics isn't supported */ 801 if (!p->stat_buffer_len) 802 return 0; 803 804 /* Allocate request buffer enough to hold single performance stat */ 805 size = sizeof(struct papr_scm_perf_stats) + 806 sizeof(struct papr_scm_perf_stat); 807 808 stats = kzalloc(size, GFP_KERNEL); 809 if (!stats) 810 return -ENOMEM; 811 812 stat = &stats->scm_statistic[0]; 813 memcpy(&stat->stat_id, "MemLife ", sizeof(stat->stat_id)); 814 stat->stat_val = 0; 815 816 /* Fetch the fuel gauge and populate it in payload */ 817 rc = drc_pmem_query_stats(p, stats, 1); 818 if (rc < 0) { 819 dev_dbg(&p->pdev->dev, "Err(%d) fetching fuel gauge\n", rc); 820 goto free_stats; 821 } 822 823 statval = be64_to_cpu(stat->stat_val); 824 dev_dbg(&p->pdev->dev, 825 "Fetched fuel-gauge %llu", statval); 826 payload->health.extension_flags |= 827 PDSM_DIMM_HEALTH_RUN_GAUGE_VALID; 828 payload->health.dimm_fuel_gauge = statval; 829 830 rc = sizeof(struct nd_papr_pdsm_health); 831 832 free_stats: 833 kfree(stats); 834 return rc; 835 } 836 837 /* Add the dirty-shutdown-counter value to the pdsm */ 838 static int papr_pdsm_dsc(struct papr_scm_priv *p, 839 union nd_pdsm_payload *payload) 840 { 841 payload->health.extension_flags |= PDSM_DIMM_DSC_VALID; 842 payload->health.dimm_dsc = p->dirty_shutdown_counter; 843 844 return sizeof(struct nd_papr_pdsm_health); 845 } 846 847 /* Fetch the DIMM health info and populate it in provided package. */ 848 static int papr_pdsm_health(struct papr_scm_priv *p, 849 union nd_pdsm_payload *payload) 850 { 851 int rc; 852 853 /* Ensure dimm health mutex is taken preventing concurrent access */ 854 rc = mutex_lock_interruptible(&p->health_mutex); 855 if (rc) 856 goto out; 857 858 /* Always fetch upto date dimm health data ignoring cached values */ 859 rc = __drc_pmem_query_health(p); 860 if (rc) { 861 mutex_unlock(&p->health_mutex); 862 goto out; 863 } 864 865 /* update health struct with various flags derived from health bitmap */ 866 payload->health = (struct nd_papr_pdsm_health) { 867 .extension_flags = 0, 868 .dimm_unarmed = !!(p->health_bitmap & PAPR_PMEM_UNARMED_MASK), 869 .dimm_bad_shutdown = !!(p->health_bitmap & PAPR_PMEM_BAD_SHUTDOWN_MASK), 870 .dimm_bad_restore = !!(p->health_bitmap & PAPR_PMEM_BAD_RESTORE_MASK), 871 .dimm_scrubbed = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED), 872 .dimm_locked = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED), 873 .dimm_encrypted = !!(p->health_bitmap & PAPR_PMEM_ENCRYPTED), 874 .dimm_health = PAPR_PDSM_DIMM_HEALTHY, 875 }; 876 877 /* Update field dimm_health based on health_bitmap flags */ 878 if (p->health_bitmap & PAPR_PMEM_HEALTH_FATAL) 879 payload->health.dimm_health = PAPR_PDSM_DIMM_FATAL; 880 else if (p->health_bitmap & PAPR_PMEM_HEALTH_CRITICAL) 881 payload->health.dimm_health = PAPR_PDSM_DIMM_CRITICAL; 882 else if (p->health_bitmap & PAPR_PMEM_HEALTH_UNHEALTHY) 883 payload->health.dimm_health = PAPR_PDSM_DIMM_UNHEALTHY; 884 885 /* struct populated hence can release the mutex now */ 886 mutex_unlock(&p->health_mutex); 887 888 /* Populate the fuel gauge meter in the payload */ 889 papr_pdsm_fuel_gauge(p, payload); 890 /* Populate the dirty-shutdown-counter field */ 891 papr_pdsm_dsc(p, payload); 892 893 rc = sizeof(struct nd_papr_pdsm_health); 894 895 out: 896 return rc; 897 } 898 899 /* Inject a smart error Add the dirty-shutdown-counter value to the pdsm */ 900 static int papr_pdsm_smart_inject(struct papr_scm_priv *p, 901 union nd_pdsm_payload *payload) 902 { 903 int rc; 904 u32 supported_flags = 0; 905 u64 inject_mask = 0, clear_mask = 0; 906 u64 mask; 907 908 /* Check for individual smart error flags and update inject/clear masks */ 909 if (payload->smart_inject.flags & PDSM_SMART_INJECT_HEALTH_FATAL) { 910 supported_flags |= PDSM_SMART_INJECT_HEALTH_FATAL; 911 if (payload->smart_inject.fatal_enable) 912 inject_mask |= PAPR_PMEM_HEALTH_FATAL; 913 else 914 clear_mask |= PAPR_PMEM_HEALTH_FATAL; 915 } 916 917 if (payload->smart_inject.flags & PDSM_SMART_INJECT_BAD_SHUTDOWN) { 918 supported_flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN; 919 if (payload->smart_inject.unsafe_shutdown_enable) 920 inject_mask |= PAPR_PMEM_SHUTDOWN_DIRTY; 921 else 922 clear_mask |= PAPR_PMEM_SHUTDOWN_DIRTY; 923 } 924 925 dev_dbg(&p->pdev->dev, "[Smart-inject] inject_mask=%#llx clear_mask=%#llx\n", 926 inject_mask, clear_mask); 927 928 /* Prevent concurrent access to dimm health bitmap related members */ 929 rc = mutex_lock_interruptible(&p->health_mutex); 930 if (rc) 931 return rc; 932 933 /* Use inject/clear masks to set health_bitmap_inject_mask */ 934 mask = READ_ONCE(p->health_bitmap_inject_mask); 935 mask = (mask & ~clear_mask) | inject_mask; 936 WRITE_ONCE(p->health_bitmap_inject_mask, mask); 937 938 /* Invalidate cached health bitmap */ 939 p->lasthealth_jiffies = 0; 940 941 mutex_unlock(&p->health_mutex); 942 943 /* Return the supported flags back to userspace */ 944 payload->smart_inject.flags = supported_flags; 945 946 return sizeof(struct nd_papr_pdsm_health); 947 } 948 949 /* 950 * 'struct pdsm_cmd_desc' 951 * Identifies supported PDSMs' expected length of in/out payloads 952 * and pdsm service function. 953 * 954 * size_in : Size of input payload if any in the PDSM request. 955 * size_out : Size of output payload if any in the PDSM request. 956 * service : Service function for the PDSM request. Return semantics: 957 * rc < 0 : Error servicing PDSM and rc indicates the error. 958 * rc >=0 : Serviced successfully and 'rc' indicate number of 959 * bytes written to payload. 960 */ 961 struct pdsm_cmd_desc { 962 u32 size_in; 963 u32 size_out; 964 int (*service)(struct papr_scm_priv *dimm, 965 union nd_pdsm_payload *payload); 966 }; 967 968 /* Holds all supported PDSMs' command descriptors */ 969 static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = { 970 [PAPR_PDSM_MIN] = { 971 .size_in = 0, 972 .size_out = 0, 973 .service = NULL, 974 }, 975 /* New PDSM command descriptors to be added below */ 976 977 [PAPR_PDSM_HEALTH] = { 978 .size_in = 0, 979 .size_out = sizeof(struct nd_papr_pdsm_health), 980 .service = papr_pdsm_health, 981 }, 982 983 [PAPR_PDSM_SMART_INJECT] = { 984 .size_in = sizeof(struct nd_papr_pdsm_smart_inject), 985 .size_out = sizeof(struct nd_papr_pdsm_smart_inject), 986 .service = papr_pdsm_smart_inject, 987 }, 988 /* Empty */ 989 [PAPR_PDSM_MAX] = { 990 .size_in = 0, 991 .size_out = 0, 992 .service = NULL, 993 }, 994 }; 995 996 /* Given a valid pdsm cmd return its command descriptor else return NULL */ 997 static inline const struct pdsm_cmd_desc *pdsm_cmd_desc(enum papr_pdsm cmd) 998 { 999 if (cmd >= 0 || cmd < ARRAY_SIZE(__pdsm_cmd_descriptors)) 1000 return &__pdsm_cmd_descriptors[cmd]; 1001 1002 return NULL; 1003 } 1004 1005 /* 1006 * For a given pdsm request call an appropriate service function. 1007 * Returns errors if any while handling the pdsm command package. 1008 */ 1009 static int papr_scm_service_pdsm(struct papr_scm_priv *p, 1010 struct nd_cmd_pkg *pkg) 1011 { 1012 /* Get the PDSM header and PDSM command */ 1013 struct nd_pkg_pdsm *pdsm_pkg = (struct nd_pkg_pdsm *)pkg->nd_payload; 1014 enum papr_pdsm pdsm = (enum papr_pdsm)pkg->nd_command; 1015 const struct pdsm_cmd_desc *pdsc; 1016 int rc; 1017 1018 /* Fetch corresponding pdsm descriptor for validation and servicing */ 1019 pdsc = pdsm_cmd_desc(pdsm); 1020 1021 /* Validate pdsm descriptor */ 1022 /* Ensure that reserved fields are 0 */ 1023 if (pdsm_pkg->reserved[0] || pdsm_pkg->reserved[1]) { 1024 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid reserved field\n", 1025 pdsm); 1026 return -EINVAL; 1027 } 1028 1029 /* If pdsm expects some input, then ensure that the size_in matches */ 1030 if (pdsc->size_in && 1031 pkg->nd_size_in != (pdsc->size_in + ND_PDSM_HDR_SIZE)) { 1032 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_in=%d\n", 1033 pdsm, pkg->nd_size_in); 1034 return -EINVAL; 1035 } 1036 1037 /* If pdsm wants to return data, then ensure that size_out matches */ 1038 if (pdsc->size_out && 1039 pkg->nd_size_out != (pdsc->size_out + ND_PDSM_HDR_SIZE)) { 1040 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_out=%d\n", 1041 pdsm, pkg->nd_size_out); 1042 return -EINVAL; 1043 } 1044 1045 /* Service the pdsm */ 1046 if (pdsc->service) { 1047 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Servicing..\n", pdsm); 1048 1049 rc = pdsc->service(p, &pdsm_pkg->payload); 1050 1051 if (rc < 0) { 1052 /* error encountered while servicing pdsm */ 1053 pdsm_pkg->cmd_status = rc; 1054 pkg->nd_fw_size = ND_PDSM_HDR_SIZE; 1055 } else { 1056 /* pdsm serviced and 'rc' bytes written to payload */ 1057 pdsm_pkg->cmd_status = 0; 1058 pkg->nd_fw_size = ND_PDSM_HDR_SIZE + rc; 1059 } 1060 } else { 1061 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Unsupported PDSM request\n", 1062 pdsm); 1063 pdsm_pkg->cmd_status = -ENOENT; 1064 pkg->nd_fw_size = ND_PDSM_HDR_SIZE; 1065 } 1066 1067 return pdsm_pkg->cmd_status; 1068 } 1069 1070 static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, 1071 struct nvdimm *nvdimm, unsigned int cmd, void *buf, 1072 unsigned int buf_len, int *cmd_rc) 1073 { 1074 struct nd_cmd_get_config_size *get_size_hdr; 1075 struct nd_cmd_pkg *call_pkg = NULL; 1076 struct papr_scm_priv *p; 1077 int rc; 1078 1079 rc = is_cmd_valid(nvdimm, cmd, buf, buf_len); 1080 if (rc) { 1081 pr_debug("Invalid cmd=0x%x. Err=%d\n", cmd, rc); 1082 return rc; 1083 } 1084 1085 /* Use a local variable in case cmd_rc pointer is NULL */ 1086 if (!cmd_rc) 1087 cmd_rc = &rc; 1088 1089 p = nvdimm_provider_data(nvdimm); 1090 1091 switch (cmd) { 1092 case ND_CMD_GET_CONFIG_SIZE: 1093 get_size_hdr = buf; 1094 1095 get_size_hdr->status = 0; 1096 get_size_hdr->max_xfer = 8; 1097 get_size_hdr->config_size = p->metadata_size; 1098 *cmd_rc = 0; 1099 break; 1100 1101 case ND_CMD_GET_CONFIG_DATA: 1102 *cmd_rc = papr_scm_meta_get(p, buf); 1103 break; 1104 1105 case ND_CMD_SET_CONFIG_DATA: 1106 *cmd_rc = papr_scm_meta_set(p, buf); 1107 break; 1108 1109 case ND_CMD_CALL: 1110 call_pkg = (struct nd_cmd_pkg *)buf; 1111 *cmd_rc = papr_scm_service_pdsm(p, call_pkg); 1112 break; 1113 1114 default: 1115 dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd); 1116 return -EINVAL; 1117 } 1118 1119 dev_dbg(&p->pdev->dev, "returned with cmd_rc = %d\n", *cmd_rc); 1120 1121 return 0; 1122 } 1123 1124 static ssize_t health_bitmap_inject_show(struct device *dev, 1125 struct device_attribute *attr, 1126 char *buf) 1127 { 1128 struct nvdimm *dimm = to_nvdimm(dev); 1129 struct papr_scm_priv *p = nvdimm_provider_data(dimm); 1130 1131 return sprintf(buf, "%#llx\n", 1132 READ_ONCE(p->health_bitmap_inject_mask)); 1133 } 1134 1135 static DEVICE_ATTR_ADMIN_RO(health_bitmap_inject); 1136 1137 static ssize_t perf_stats_show(struct device *dev, 1138 struct device_attribute *attr, char *buf) 1139 { 1140 int index; 1141 ssize_t rc; 1142 struct seq_buf s; 1143 struct papr_scm_perf_stat *stat; 1144 struct papr_scm_perf_stats *stats; 1145 struct nvdimm *dimm = to_nvdimm(dev); 1146 struct papr_scm_priv *p = nvdimm_provider_data(dimm); 1147 1148 if (!p->stat_buffer_len) 1149 return -ENOENT; 1150 1151 /* Allocate the buffer for phyp where stats are written */ 1152 stats = kzalloc(p->stat_buffer_len, GFP_KERNEL); 1153 if (!stats) 1154 return -ENOMEM; 1155 1156 /* Ask phyp to return all dimm perf stats */ 1157 rc = drc_pmem_query_stats(p, stats, 0); 1158 if (rc) 1159 goto free_stats; 1160 /* 1161 * Go through the returned output buffer and print stats and 1162 * values. Since stat_id is essentially a char string of 1163 * 8 bytes, simply use the string format specifier to print it. 1164 */ 1165 seq_buf_init(&s, buf, PAGE_SIZE); 1166 for (index = 0, stat = stats->scm_statistic; 1167 index < be32_to_cpu(stats->num_statistics); 1168 ++index, ++stat) { 1169 seq_buf_printf(&s, "%.8s = 0x%016llX\n", 1170 stat->stat_id, 1171 be64_to_cpu(stat->stat_val)); 1172 } 1173 1174 free_stats: 1175 kfree(stats); 1176 return rc ? rc : (ssize_t)seq_buf_used(&s); 1177 } 1178 static DEVICE_ATTR_ADMIN_RO(perf_stats); 1179 1180 static ssize_t flags_show(struct device *dev, 1181 struct device_attribute *attr, char *buf) 1182 { 1183 struct nvdimm *dimm = to_nvdimm(dev); 1184 struct papr_scm_priv *p = nvdimm_provider_data(dimm); 1185 struct seq_buf s; 1186 u64 health; 1187 int rc; 1188 1189 rc = drc_pmem_query_health(p); 1190 if (rc) 1191 return rc; 1192 1193 /* Copy health_bitmap locally, check masks & update out buffer */ 1194 health = READ_ONCE(p->health_bitmap); 1195 1196 seq_buf_init(&s, buf, PAGE_SIZE); 1197 if (health & PAPR_PMEM_UNARMED_MASK) 1198 seq_buf_printf(&s, "not_armed "); 1199 1200 if (health & PAPR_PMEM_BAD_SHUTDOWN_MASK) 1201 seq_buf_printf(&s, "flush_fail "); 1202 1203 if (health & PAPR_PMEM_BAD_RESTORE_MASK) 1204 seq_buf_printf(&s, "restore_fail "); 1205 1206 if (health & PAPR_PMEM_ENCRYPTED) 1207 seq_buf_printf(&s, "encrypted "); 1208 1209 if (health & PAPR_PMEM_SMART_EVENT_MASK) 1210 seq_buf_printf(&s, "smart_notify "); 1211 1212 if (health & PAPR_PMEM_SCRUBBED_AND_LOCKED) 1213 seq_buf_printf(&s, "scrubbed locked "); 1214 1215 if (seq_buf_used(&s)) 1216 seq_buf_printf(&s, "\n"); 1217 1218 return seq_buf_used(&s); 1219 } 1220 DEVICE_ATTR_RO(flags); 1221 1222 static ssize_t dirty_shutdown_show(struct device *dev, 1223 struct device_attribute *attr, char *buf) 1224 { 1225 struct nvdimm *dimm = to_nvdimm(dev); 1226 struct papr_scm_priv *p = nvdimm_provider_data(dimm); 1227 1228 return sysfs_emit(buf, "%llu\n", p->dirty_shutdown_counter); 1229 } 1230 DEVICE_ATTR_RO(dirty_shutdown); 1231 1232 static umode_t papr_nd_attribute_visible(struct kobject *kobj, 1233 struct attribute *attr, int n) 1234 { 1235 struct device *dev = kobj_to_dev(kobj); 1236 struct nvdimm *nvdimm = to_nvdimm(dev); 1237 struct papr_scm_priv *p = nvdimm_provider_data(nvdimm); 1238 1239 /* For if perf-stats not available remove perf_stats sysfs */ 1240 if (attr == &dev_attr_perf_stats.attr && p->stat_buffer_len == 0) 1241 return 0; 1242 1243 return attr->mode; 1244 } 1245 1246 /* papr_scm specific dimm attributes */ 1247 static struct attribute *papr_nd_attributes[] = { 1248 &dev_attr_flags.attr, 1249 &dev_attr_perf_stats.attr, 1250 &dev_attr_dirty_shutdown.attr, 1251 &dev_attr_health_bitmap_inject.attr, 1252 NULL, 1253 }; 1254 1255 static const struct attribute_group papr_nd_attribute_group = { 1256 .name = "papr", 1257 .is_visible = papr_nd_attribute_visible, 1258 .attrs = papr_nd_attributes, 1259 }; 1260 1261 static const struct attribute_group *papr_nd_attr_groups[] = { 1262 &papr_nd_attribute_group, 1263 NULL, 1264 }; 1265 1266 static int papr_scm_nvdimm_init(struct papr_scm_priv *p) 1267 { 1268 struct device *dev = &p->pdev->dev; 1269 struct nd_mapping_desc mapping; 1270 struct nd_region_desc ndr_desc; 1271 unsigned long dimm_flags; 1272 int target_nid, online_nid; 1273 1274 p->bus_desc.ndctl = papr_scm_ndctl; 1275 p->bus_desc.module = THIS_MODULE; 1276 p->bus_desc.of_node = p->pdev->dev.of_node; 1277 p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL); 1278 1279 /* Set the dimm command family mask to accept PDSMs */ 1280 set_bit(NVDIMM_FAMILY_PAPR, &p->bus_desc.dimm_family_mask); 1281 1282 if (!p->bus_desc.provider_name) 1283 return -ENOMEM; 1284 1285 p->bus = nvdimm_bus_register(NULL, &p->bus_desc); 1286 if (!p->bus) { 1287 dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn); 1288 kfree(p->bus_desc.provider_name); 1289 return -ENXIO; 1290 } 1291 1292 dimm_flags = 0; 1293 set_bit(NDD_LABELING, &dimm_flags); 1294 1295 /* 1296 * Check if the nvdimm is unarmed. No locking needed as we are still 1297 * initializing. Ignore error encountered if any. 1298 */ 1299 __drc_pmem_query_health(p); 1300 1301 if (p->health_bitmap & PAPR_PMEM_UNARMED_MASK) 1302 set_bit(NDD_UNARMED, &dimm_flags); 1303 1304 p->nvdimm = nvdimm_create(p->bus, p, papr_nd_attr_groups, 1305 dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL); 1306 if (!p->nvdimm) { 1307 dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn); 1308 goto err; 1309 } 1310 1311 if (nvdimm_bus_check_dimm_count(p->bus, 1)) 1312 goto err; 1313 1314 /* now add the region */ 1315 1316 memset(&mapping, 0, sizeof(mapping)); 1317 mapping.nvdimm = p->nvdimm; 1318 mapping.start = 0; 1319 mapping.size = p->blocks * p->block_size; // XXX: potential overflow? 1320 1321 memset(&ndr_desc, 0, sizeof(ndr_desc)); 1322 target_nid = dev_to_node(&p->pdev->dev); 1323 online_nid = numa_map_to_online_node(target_nid); 1324 ndr_desc.numa_node = online_nid; 1325 ndr_desc.target_node = target_nid; 1326 ndr_desc.res = &p->res; 1327 ndr_desc.of_node = p->dn; 1328 ndr_desc.provider_data = p; 1329 ndr_desc.mapping = &mapping; 1330 ndr_desc.num_mappings = 1; 1331 ndr_desc.nd_set = &p->nd_set; 1332 1333 if (p->hcall_flush_required) { 1334 set_bit(ND_REGION_ASYNC, &ndr_desc.flags); 1335 ndr_desc.flush = papr_scm_pmem_flush; 1336 } 1337 1338 if (p->is_volatile) 1339 p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc); 1340 else { 1341 set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags); 1342 p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc); 1343 } 1344 if (!p->region) { 1345 dev_err(dev, "Error registering region %pR from %pOF\n", 1346 ndr_desc.res, p->dn); 1347 goto err; 1348 } 1349 if (target_nid != online_nid) 1350 dev_info(dev, "Region registered with target node %d and online node %d", 1351 target_nid, online_nid); 1352 1353 mutex_lock(&papr_ndr_lock); 1354 list_add_tail(&p->region_list, &papr_nd_regions); 1355 mutex_unlock(&papr_ndr_lock); 1356 1357 return 0; 1358 1359 err: nvdimm_bus_unregister(p->bus); 1360 kfree(p->bus_desc.provider_name); 1361 return -ENXIO; 1362 } 1363 1364 static void papr_scm_add_badblock(struct nd_region *region, 1365 struct nvdimm_bus *bus, u64 phys_addr) 1366 { 1367 u64 aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES); 1368 1369 if (nvdimm_bus_add_badrange(bus, aligned_addr, L1_CACHE_BYTES)) { 1370 pr_err("Bad block registration for 0x%llx failed\n", phys_addr); 1371 return; 1372 } 1373 1374 pr_debug("Add memory range (0x%llx - 0x%llx) as bad range\n", 1375 aligned_addr, aligned_addr + L1_CACHE_BYTES); 1376 1377 nvdimm_region_notify(region, NVDIMM_REVALIDATE_POISON); 1378 } 1379 1380 static int handle_mce_ue(struct notifier_block *nb, unsigned long val, 1381 void *data) 1382 { 1383 struct machine_check_event *evt = data; 1384 struct papr_scm_priv *p; 1385 u64 phys_addr; 1386 bool found = false; 1387 1388 if (evt->error_type != MCE_ERROR_TYPE_UE) 1389 return NOTIFY_DONE; 1390 1391 if (list_empty(&papr_nd_regions)) 1392 return NOTIFY_DONE; 1393 1394 /* 1395 * The physical address obtained here is PAGE_SIZE aligned, so get the 1396 * exact address from the effective address 1397 */ 1398 phys_addr = evt->u.ue_error.physical_address + 1399 (evt->u.ue_error.effective_address & ~PAGE_MASK); 1400 1401 if (!evt->u.ue_error.physical_address_provided || 1402 !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT))) 1403 return NOTIFY_DONE; 1404 1405 /* mce notifier is called from a process context, so mutex is safe */ 1406 mutex_lock(&papr_ndr_lock); 1407 list_for_each_entry(p, &papr_nd_regions, region_list) { 1408 if (phys_addr >= p->res.start && phys_addr <= p->res.end) { 1409 found = true; 1410 break; 1411 } 1412 } 1413 1414 if (found) 1415 papr_scm_add_badblock(p->region, p->bus, phys_addr); 1416 1417 mutex_unlock(&papr_ndr_lock); 1418 1419 return found ? NOTIFY_OK : NOTIFY_DONE; 1420 } 1421 1422 static struct notifier_block mce_ue_nb = { 1423 .notifier_call = handle_mce_ue 1424 }; 1425 1426 static int papr_scm_probe(struct platform_device *pdev) 1427 { 1428 struct device_node *dn = pdev->dev.of_node; 1429 u32 drc_index, metadata_size; 1430 u64 blocks, block_size; 1431 struct papr_scm_priv *p; 1432 u8 uuid_raw[UUID_SIZE]; 1433 const char *uuid_str; 1434 ssize_t stat_size; 1435 uuid_t uuid; 1436 int rc; 1437 1438 /* check we have all the required DT properties */ 1439 if (of_property_read_u32(dn, "ibm,my-drc-index", &drc_index)) { 1440 dev_err(&pdev->dev, "%pOF: missing drc-index!\n", dn); 1441 return -ENODEV; 1442 } 1443 1444 if (of_property_read_u64(dn, "ibm,block-size", &block_size)) { 1445 dev_err(&pdev->dev, "%pOF: missing block-size!\n", dn); 1446 return -ENODEV; 1447 } 1448 1449 if (of_property_read_u64(dn, "ibm,number-of-blocks", &blocks)) { 1450 dev_err(&pdev->dev, "%pOF: missing number-of-blocks!\n", dn); 1451 return -ENODEV; 1452 } 1453 1454 if (of_property_read_string(dn, "ibm,unit-guid", &uuid_str)) { 1455 dev_err(&pdev->dev, "%pOF: missing unit-guid!\n", dn); 1456 return -ENODEV; 1457 } 1458 1459 1460 p = kzalloc(sizeof(*p), GFP_KERNEL); 1461 if (!p) 1462 return -ENOMEM; 1463 1464 /* Initialize the dimm mutex */ 1465 mutex_init(&p->health_mutex); 1466 1467 /* optional DT properties */ 1468 of_property_read_u32(dn, "ibm,metadata-size", &metadata_size); 1469 1470 p->dn = dn; 1471 p->drc_index = drc_index; 1472 p->block_size = block_size; 1473 p->blocks = blocks; 1474 p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required"); 1475 p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required"); 1476 1477 if (of_property_read_u64(dn, "ibm,persistence-failed-count", 1478 &p->dirty_shutdown_counter)) 1479 p->dirty_shutdown_counter = 0; 1480 1481 /* We just need to ensure that set cookies are unique across */ 1482 uuid_parse(uuid_str, &uuid); 1483 1484 /* 1485 * The cookie1 and cookie2 are not really little endian. 1486 * We store a raw buffer representation of the 1487 * uuid string so that we can compare this with the label 1488 * area cookie irrespective of the endian configuration 1489 * with which the kernel is built. 1490 * 1491 * Historically we stored the cookie in the below format. 1492 * for a uuid string 72511b67-0b3b-42fd-8d1d-5be3cae8bcaa 1493 * cookie1 was 0xfd423b0b671b5172 1494 * cookie2 was 0xaabce8cae35b1d8d 1495 */ 1496 export_uuid(uuid_raw, &uuid); 1497 p->nd_set.cookie1 = get_unaligned_le64(&uuid_raw[0]); 1498 p->nd_set.cookie2 = get_unaligned_le64(&uuid_raw[8]); 1499 1500 /* might be zero */ 1501 p->metadata_size = metadata_size; 1502 p->pdev = pdev; 1503 1504 /* request the hypervisor to bind this region to somewhere in memory */ 1505 rc = drc_pmem_bind(p); 1506 1507 /* If phyp says drc memory still bound then force unbound and retry */ 1508 if (rc == H_OVERLAP) 1509 rc = drc_pmem_query_n_bind(p); 1510 1511 if (rc != H_SUCCESS) { 1512 dev_err(&p->pdev->dev, "bind err: %d\n", rc); 1513 rc = -ENXIO; 1514 goto err; 1515 } 1516 1517 /* setup the resource for the newly bound range */ 1518 p->res.start = p->bound_addr; 1519 p->res.end = p->bound_addr + p->blocks * p->block_size - 1; 1520 p->res.name = pdev->name; 1521 p->res.flags = IORESOURCE_MEM; 1522 1523 /* Try retrieving the stat buffer and see if its supported */ 1524 stat_size = drc_pmem_query_stats(p, NULL, 0); 1525 if (stat_size > 0) { 1526 p->stat_buffer_len = stat_size; 1527 dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n", 1528 p->stat_buffer_len); 1529 } 1530 1531 rc = papr_scm_nvdimm_init(p); 1532 if (rc) 1533 goto err2; 1534 1535 platform_set_drvdata(pdev, p); 1536 papr_scm_pmu_register(p); 1537 1538 return 0; 1539 1540 err2: drc_pmem_unbind(p); 1541 err: kfree(p); 1542 return rc; 1543 } 1544 1545 static int papr_scm_remove(struct platform_device *pdev) 1546 { 1547 struct papr_scm_priv *p = platform_get_drvdata(pdev); 1548 1549 mutex_lock(&papr_ndr_lock); 1550 list_del(&p->region_list); 1551 mutex_unlock(&papr_ndr_lock); 1552 1553 nvdimm_bus_unregister(p->bus); 1554 drc_pmem_unbind(p); 1555 1556 if (pdev->archdata.priv) 1557 unregister_nvdimm_pmu(pdev->archdata.priv); 1558 1559 pdev->archdata.priv = NULL; 1560 kfree(p->nvdimm_events_map); 1561 kfree(p->bus_desc.provider_name); 1562 kfree(p); 1563 1564 return 0; 1565 } 1566 1567 static const struct of_device_id papr_scm_match[] = { 1568 { .compatible = "ibm,pmemory" }, 1569 { .compatible = "ibm,pmemory-v2" }, 1570 { }, 1571 }; 1572 1573 static struct platform_driver papr_scm_driver = { 1574 .probe = papr_scm_probe, 1575 .remove = papr_scm_remove, 1576 .driver = { 1577 .name = "papr_scm", 1578 .of_match_table = papr_scm_match, 1579 }, 1580 }; 1581 1582 static int __init papr_scm_init(void) 1583 { 1584 int ret; 1585 1586 ret = platform_driver_register(&papr_scm_driver); 1587 if (!ret) 1588 mce_register_notifier(&mce_ue_nb); 1589 1590 return ret; 1591 } 1592 module_init(papr_scm_init); 1593 1594 static void __exit papr_scm_exit(void) 1595 { 1596 mce_unregister_notifier(&mce_ue_nb); 1597 platform_driver_unregister(&papr_scm_driver); 1598 } 1599 module_exit(papr_scm_exit); 1600 1601 MODULE_DEVICE_TABLE(of, papr_scm_match); 1602 MODULE_LICENSE("GPL"); 1603 MODULE_AUTHOR("IBM Corporation"); 1604