1 // SPDX-License-Identifier: GPL-2.0 2 3 #define pr_fmt(fmt) "papr-scm: " fmt 4 5 #include <linux/of.h> 6 #include <linux/kernel.h> 7 #include <linux/module.h> 8 #include <linux/ioport.h> 9 #include <linux/slab.h> 10 #include <linux/ndctl.h> 11 #include <linux/sched.h> 12 #include <linux/libnvdimm.h> 13 #include <linux/platform_device.h> 14 #include <linux/delay.h> 15 #include <linux/seq_buf.h> 16 #include <linux/nd.h> 17 18 #include <asm/plpar_wrappers.h> 19 #include <asm/papr_pdsm.h> 20 #include <asm/mce.h> 21 #include <asm/unaligned.h> 22 #include <linux/perf_event.h> 23 24 #define BIND_ANY_ADDR (~0ul) 25 26 #define PAPR_SCM_DIMM_CMD_MASK \ 27 ((1ul << ND_CMD_GET_CONFIG_SIZE) | \ 28 (1ul << ND_CMD_GET_CONFIG_DATA) | \ 29 (1ul << ND_CMD_SET_CONFIG_DATA) | \ 30 (1ul << ND_CMD_CALL)) 31 32 /* DIMM health bitmap bitmap indicators */ 33 /* SCM device is unable to persist memory contents */ 34 #define PAPR_PMEM_UNARMED (1ULL << (63 - 0)) 35 /* SCM device failed to persist memory contents */ 36 #define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1)) 37 /* SCM device contents are persisted from previous IPL */ 38 #define PAPR_PMEM_SHUTDOWN_CLEAN (1ULL << (63 - 2)) 39 /* SCM device contents are not persisted from previous IPL */ 40 #define PAPR_PMEM_EMPTY (1ULL << (63 - 3)) 41 /* SCM device memory life remaining is critically low */ 42 #define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4)) 43 /* SCM device will be garded off next IPL due to failure */ 44 #define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5)) 45 /* SCM contents cannot persist due to current platform health status */ 46 #define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6)) 47 /* SCM device is unable to persist memory contents in certain conditions */ 48 #define PAPR_PMEM_HEALTH_NON_CRITICAL (1ULL << (63 - 7)) 49 /* SCM device is encrypted */ 50 #define PAPR_PMEM_ENCRYPTED (1ULL << (63 - 8)) 51 /* SCM device has been scrubbed and locked */ 52 #define PAPR_PMEM_SCRUBBED_AND_LOCKED (1ULL << (63 - 9)) 53 54 /* Bits status indicators for health bitmap indicating unarmed dimm */ 55 #define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \ 56 PAPR_PMEM_HEALTH_UNHEALTHY) 57 58 /* Bits status indicators for health bitmap indicating unflushed dimm */ 59 #define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY) 60 61 /* Bits status indicators for health bitmap indicating unrestored dimm */ 62 #define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY) 63 64 /* Bit status indicators for smart event notification */ 65 #define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \ 66 PAPR_PMEM_HEALTH_FATAL | \ 67 PAPR_PMEM_HEALTH_UNHEALTHY) 68 69 #define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS) 70 #define PAPR_SCM_PERF_STATS_VERSION 0x1 71 72 /* Struct holding a single performance metric */ 73 struct papr_scm_perf_stat { 74 u8 stat_id[8]; 75 __be64 stat_val; 76 } __packed; 77 78 /* Struct exchanged between kernel and PHYP for fetching drc perf stats */ 79 struct papr_scm_perf_stats { 80 u8 eye_catcher[8]; 81 /* Should be PAPR_SCM_PERF_STATS_VERSION */ 82 __be32 stats_version; 83 /* Number of stats following */ 84 __be32 num_statistics; 85 /* zero or more performance matrics */ 86 struct papr_scm_perf_stat scm_statistic[]; 87 } __packed; 88 89 /* private struct associated with each region */ 90 struct papr_scm_priv { 91 struct platform_device *pdev; 92 struct device_node *dn; 93 uint32_t drc_index; 94 uint64_t blocks; 95 uint64_t block_size; 96 int metadata_size; 97 bool is_volatile; 98 bool hcall_flush_required; 99 100 uint64_t bound_addr; 101 102 struct nvdimm_bus_descriptor bus_desc; 103 struct nvdimm_bus *bus; 104 struct nvdimm *nvdimm; 105 struct resource res; 106 struct nd_region *region; 107 struct nd_interleave_set nd_set; 108 struct list_head region_list; 109 110 /* Protect dimm health data from concurrent read/writes */ 111 struct mutex health_mutex; 112 113 /* Last time the health information of the dimm was updated */ 114 unsigned long lasthealth_jiffies; 115 116 /* Health information for the dimm */ 117 u64 health_bitmap; 118 119 /* Holds the last known dirty shutdown counter value */ 120 u64 dirty_shutdown_counter; 121 122 /* length of the stat buffer as expected by phyp */ 123 size_t stat_buffer_len; 124 125 /* The bits which needs to be overridden */ 126 u64 health_bitmap_inject_mask; 127 128 /* array to have event_code and stat_id mappings */ 129 char **nvdimm_events_map; 130 }; 131 132 static int papr_scm_pmem_flush(struct nd_region *nd_region, 133 struct bio *bio __maybe_unused) 134 { 135 struct papr_scm_priv *p = nd_region_provider_data(nd_region); 136 unsigned long ret_buf[PLPAR_HCALL_BUFSIZE], token = 0; 137 long rc; 138 139 dev_dbg(&p->pdev->dev, "flush drc 0x%x", p->drc_index); 140 141 do { 142 rc = plpar_hcall(H_SCM_FLUSH, ret_buf, p->drc_index, token); 143 token = ret_buf[0]; 144 145 /* Check if we are stalled for some time */ 146 if (H_IS_LONG_BUSY(rc)) { 147 msleep(get_longbusy_msecs(rc)); 148 rc = H_BUSY; 149 } else if (rc == H_BUSY) { 150 cond_resched(); 151 } 152 } while (rc == H_BUSY); 153 154 if (rc) { 155 dev_err(&p->pdev->dev, "flush error: %ld", rc); 156 rc = -EIO; 157 } else { 158 dev_dbg(&p->pdev->dev, "flush drc 0x%x complete", p->drc_index); 159 } 160 161 return rc; 162 } 163 164 static LIST_HEAD(papr_nd_regions); 165 static DEFINE_MUTEX(papr_ndr_lock); 166 167 static int drc_pmem_bind(struct papr_scm_priv *p) 168 { 169 unsigned long ret[PLPAR_HCALL_BUFSIZE]; 170 uint64_t saved = 0; 171 uint64_t token; 172 int64_t rc; 173 174 /* 175 * When the hypervisor cannot map all the requested memory in a single 176 * hcall it returns H_BUSY and we call again with the token until 177 * we get H_SUCCESS. Aborting the retry loop before getting H_SUCCESS 178 * leave the system in an undefined state, so we wait. 179 */ 180 token = 0; 181 182 do { 183 rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0, 184 p->blocks, BIND_ANY_ADDR, token); 185 token = ret[0]; 186 if (!saved) 187 saved = ret[1]; 188 cond_resched(); 189 } while (rc == H_BUSY); 190 191 if (rc) 192 return rc; 193 194 p->bound_addr = saved; 195 dev_dbg(&p->pdev->dev, "bound drc 0x%x to 0x%lx\n", 196 p->drc_index, (unsigned long)saved); 197 return rc; 198 } 199 200 static void drc_pmem_unbind(struct papr_scm_priv *p) 201 { 202 unsigned long ret[PLPAR_HCALL_BUFSIZE]; 203 uint64_t token = 0; 204 int64_t rc; 205 206 dev_dbg(&p->pdev->dev, "unbind drc 0x%x\n", p->drc_index); 207 208 /* NB: unbind has the same retry requirements as drc_pmem_bind() */ 209 do { 210 211 /* Unbind of all SCM resources associated with drcIndex */ 212 rc = plpar_hcall(H_SCM_UNBIND_ALL, ret, H_UNBIND_SCOPE_DRC, 213 p->drc_index, token); 214 token = ret[0]; 215 216 /* Check if we are stalled for some time */ 217 if (H_IS_LONG_BUSY(rc)) { 218 msleep(get_longbusy_msecs(rc)); 219 rc = H_BUSY; 220 } else if (rc == H_BUSY) { 221 cond_resched(); 222 } 223 224 } while (rc == H_BUSY); 225 226 if (rc) 227 dev_err(&p->pdev->dev, "unbind error: %lld\n", rc); 228 else 229 dev_dbg(&p->pdev->dev, "unbind drc 0x%x complete\n", 230 p->drc_index); 231 232 return; 233 } 234 235 static int drc_pmem_query_n_bind(struct papr_scm_priv *p) 236 { 237 unsigned long start_addr; 238 unsigned long end_addr; 239 unsigned long ret[PLPAR_HCALL_BUFSIZE]; 240 int64_t rc; 241 242 243 rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret, 244 p->drc_index, 0); 245 if (rc) 246 goto err_out; 247 start_addr = ret[0]; 248 249 /* Make sure the full region is bound. */ 250 rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret, 251 p->drc_index, p->blocks - 1); 252 if (rc) 253 goto err_out; 254 end_addr = ret[0]; 255 256 if ((end_addr - start_addr) != ((p->blocks - 1) * p->block_size)) 257 goto err_out; 258 259 p->bound_addr = start_addr; 260 dev_dbg(&p->pdev->dev, "bound drc 0x%x to 0x%lx\n", p->drc_index, start_addr); 261 return rc; 262 263 err_out: 264 dev_info(&p->pdev->dev, 265 "Failed to query, trying an unbind followed by bind"); 266 drc_pmem_unbind(p); 267 return drc_pmem_bind(p); 268 } 269 270 /* 271 * Query the Dimm performance stats from PHYP and copy them (if returned) to 272 * provided struct papr_scm_perf_stats instance 'stats' that can hold atleast 273 * (num_stats + header) bytes. 274 * - If buff_stats == NULL the return value is the size in bytes of the buffer 275 * needed to hold all supported performance-statistics. 276 * - If buff_stats != NULL and num_stats == 0 then we copy all known 277 * performance-statistics to 'buff_stat' and expect to be large enough to 278 * hold them. 279 * - if buff_stats != NULL and num_stats > 0 then copy the requested 280 * performance-statistics to buff_stats. 281 */ 282 static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, 283 struct papr_scm_perf_stats *buff_stats, 284 unsigned int num_stats) 285 { 286 unsigned long ret[PLPAR_HCALL_BUFSIZE]; 287 size_t size; 288 s64 rc; 289 290 /* Setup the out buffer */ 291 if (buff_stats) { 292 memcpy(buff_stats->eye_catcher, 293 PAPR_SCM_PERF_STATS_EYECATCHER, 8); 294 buff_stats->stats_version = 295 cpu_to_be32(PAPR_SCM_PERF_STATS_VERSION); 296 buff_stats->num_statistics = 297 cpu_to_be32(num_stats); 298 299 /* 300 * Calculate the buffer size based on num-stats provided 301 * or use the prefetched max buffer length 302 */ 303 if (num_stats) 304 /* Calculate size from the num_stats */ 305 size = sizeof(struct papr_scm_perf_stats) + 306 num_stats * sizeof(struct papr_scm_perf_stat); 307 else 308 size = p->stat_buffer_len; 309 } else { 310 /* In case of no out buffer ignore the size */ 311 size = 0; 312 } 313 314 /* Do the HCALL asking PHYP for info */ 315 rc = plpar_hcall(H_SCM_PERFORMANCE_STATS, ret, p->drc_index, 316 buff_stats ? virt_to_phys(buff_stats) : 0, 317 size); 318 319 /* Check if the error was due to an unknown stat-id */ 320 if (rc == H_PARTIAL) { 321 dev_err(&p->pdev->dev, 322 "Unknown performance stats, Err:0x%016lX\n", ret[0]); 323 return -ENOENT; 324 } else if (rc == H_AUTHORITY) { 325 dev_info(&p->pdev->dev, 326 "Permission denied while accessing performance stats"); 327 return -EPERM; 328 } else if (rc == H_UNSUPPORTED) { 329 dev_dbg(&p->pdev->dev, "Performance stats unsupported\n"); 330 return -EOPNOTSUPP; 331 } else if (rc != H_SUCCESS) { 332 dev_err(&p->pdev->dev, 333 "Failed to query performance stats, Err:%lld\n", rc); 334 return -EIO; 335 336 } else if (!size) { 337 /* Handle case where stat buffer size was requested */ 338 dev_dbg(&p->pdev->dev, 339 "Performance stats size %ld\n", ret[0]); 340 return ret[0]; 341 } 342 343 /* Successfully fetched the requested stats from phyp */ 344 dev_dbg(&p->pdev->dev, 345 "Performance stats returned %d stats\n", 346 be32_to_cpu(buff_stats->num_statistics)); 347 return 0; 348 } 349 350 #ifdef CONFIG_PERF_EVENTS 351 #define to_nvdimm_pmu(_pmu) container_of(_pmu, struct nvdimm_pmu, pmu) 352 353 static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, u64 *count) 354 { 355 struct papr_scm_perf_stat *stat; 356 struct papr_scm_perf_stats *stats; 357 struct papr_scm_priv *p = (struct papr_scm_priv *)dev->driver_data; 358 int rc, size; 359 360 /* Allocate request buffer enough to hold single performance stat */ 361 size = sizeof(struct papr_scm_perf_stats) + 362 sizeof(struct papr_scm_perf_stat); 363 364 if (!p || !p->nvdimm_events_map) 365 return -EINVAL; 366 367 stats = kzalloc(size, GFP_KERNEL); 368 if (!stats) 369 return -ENOMEM; 370 371 stat = &stats->scm_statistic[0]; 372 memcpy(&stat->stat_id, 373 p->nvdimm_events_map[event->attr.config], 374 sizeof(stat->stat_id)); 375 stat->stat_val = 0; 376 377 rc = drc_pmem_query_stats(p, stats, 1); 378 if (rc < 0) { 379 kfree(stats); 380 return rc; 381 } 382 383 *count = be64_to_cpu(stat->stat_val); 384 kfree(stats); 385 return 0; 386 } 387 388 static int papr_scm_pmu_event_init(struct perf_event *event) 389 { 390 struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu); 391 struct papr_scm_priv *p; 392 393 if (!nd_pmu) 394 return -EINVAL; 395 396 /* test the event attr type for PMU enumeration */ 397 if (event->attr.type != event->pmu->type) 398 return -ENOENT; 399 400 /* it does not support event sampling mode */ 401 if (is_sampling_event(event)) 402 return -EOPNOTSUPP; 403 404 /* no branch sampling */ 405 if (has_branch_stack(event)) 406 return -EOPNOTSUPP; 407 408 p = (struct papr_scm_priv *)nd_pmu->dev->driver_data; 409 if (!p) 410 return -EINVAL; 411 412 /* Invalid eventcode */ 413 if (event->attr.config == 0 || event->attr.config > 16) 414 return -EINVAL; 415 416 return 0; 417 } 418 419 static int papr_scm_pmu_add(struct perf_event *event, int flags) 420 { 421 u64 count; 422 int rc; 423 struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu); 424 425 if (!nd_pmu) 426 return -EINVAL; 427 428 if (flags & PERF_EF_START) { 429 rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &count); 430 if (rc) 431 return rc; 432 433 local64_set(&event->hw.prev_count, count); 434 } 435 436 return 0; 437 } 438 439 static void papr_scm_pmu_read(struct perf_event *event) 440 { 441 u64 prev, now; 442 int rc; 443 struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu); 444 445 if (!nd_pmu) 446 return; 447 448 rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &now); 449 if (rc) 450 return; 451 452 prev = local64_xchg(&event->hw.prev_count, now); 453 local64_add(now - prev, &event->count); 454 } 455 456 static void papr_scm_pmu_del(struct perf_event *event, int flags) 457 { 458 papr_scm_pmu_read(event); 459 } 460 461 static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu *nd_pmu) 462 { 463 struct papr_scm_perf_stat *stat; 464 struct papr_scm_perf_stats *stats; 465 char *statid; 466 int index, rc, count; 467 u32 available_events; 468 469 if (!p->stat_buffer_len) 470 return -ENOENT; 471 472 available_events = (p->stat_buffer_len - sizeof(struct papr_scm_perf_stats)) 473 / sizeof(struct papr_scm_perf_stat); 474 475 /* Allocate the buffer for phyp where stats are written */ 476 stats = kzalloc(p->stat_buffer_len, GFP_KERNEL); 477 if (!stats) { 478 rc = -ENOMEM; 479 return rc; 480 } 481 482 /* Allocate memory to nvdimm_event_map */ 483 p->nvdimm_events_map = kcalloc(available_events, sizeof(char *), GFP_KERNEL); 484 if (!p->nvdimm_events_map) { 485 rc = -ENOMEM; 486 goto out_stats; 487 } 488 489 /* Called to get list of events supported */ 490 rc = drc_pmem_query_stats(p, stats, 0); 491 if (rc) 492 goto out_nvdimm_events_map; 493 494 for (index = 0, stat = stats->scm_statistic, count = 0; 495 index < available_events; index++, ++stat) { 496 statid = kzalloc(strlen(stat->stat_id) + 1, GFP_KERNEL); 497 if (!statid) { 498 rc = -ENOMEM; 499 goto out_nvdimm_events_map; 500 } 501 502 strcpy(statid, stat->stat_id); 503 p->nvdimm_events_map[count] = statid; 504 count++; 505 } 506 p->nvdimm_events_map[count] = NULL; 507 kfree(stats); 508 return 0; 509 510 out_nvdimm_events_map: 511 kfree(p->nvdimm_events_map); 512 out_stats: 513 kfree(stats); 514 return rc; 515 } 516 517 static void papr_scm_pmu_register(struct papr_scm_priv *p) 518 { 519 struct nvdimm_pmu *nd_pmu; 520 int rc, nodeid; 521 522 nd_pmu = kzalloc(sizeof(*nd_pmu), GFP_KERNEL); 523 if (!nd_pmu) { 524 rc = -ENOMEM; 525 goto pmu_err_print; 526 } 527 528 rc = papr_scm_pmu_check_events(p, nd_pmu); 529 if (rc) 530 goto pmu_check_events_err; 531 532 nd_pmu->pmu.task_ctx_nr = perf_invalid_context; 533 nd_pmu->pmu.name = nvdimm_name(p->nvdimm); 534 nd_pmu->pmu.event_init = papr_scm_pmu_event_init; 535 nd_pmu->pmu.read = papr_scm_pmu_read; 536 nd_pmu->pmu.add = papr_scm_pmu_add; 537 nd_pmu->pmu.del = papr_scm_pmu_del; 538 539 nd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_INTERRUPT | 540 PERF_PMU_CAP_NO_EXCLUDE; 541 542 /*updating the cpumask variable */ 543 nodeid = numa_map_to_online_node(dev_to_node(&p->pdev->dev)); 544 nd_pmu->arch_cpumask = *cpumask_of_node(nodeid); 545 546 rc = register_nvdimm_pmu(nd_pmu, p->pdev); 547 if (rc) 548 goto pmu_register_err; 549 550 /* 551 * Set archdata.priv value to nvdimm_pmu structure, to handle the 552 * unregistering of pmu device. 553 */ 554 p->pdev->archdata.priv = nd_pmu; 555 return; 556 557 pmu_register_err: 558 kfree(p->nvdimm_events_map); 559 pmu_check_events_err: 560 kfree(nd_pmu); 561 pmu_err_print: 562 dev_info(&p->pdev->dev, "nvdimm pmu didn't register rc=%d\n", rc); 563 } 564 565 #else 566 static void papr_scm_pmu_register(struct papr_scm_priv *p) { } 567 #endif 568 569 /* 570 * Issue hcall to retrieve dimm health info and populate papr_scm_priv with the 571 * health information. 572 */ 573 static int __drc_pmem_query_health(struct papr_scm_priv *p) 574 { 575 unsigned long ret[PLPAR_HCALL_BUFSIZE]; 576 u64 bitmap = 0; 577 long rc; 578 579 /* issue the hcall */ 580 rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index); 581 if (rc == H_SUCCESS) 582 bitmap = ret[0] & ret[1]; 583 else if (rc == H_FUNCTION) 584 dev_info_once(&p->pdev->dev, 585 "Hcall H_SCM_HEALTH not implemented, assuming empty health bitmap"); 586 else { 587 588 dev_err(&p->pdev->dev, 589 "Failed to query health information, Err:%ld\n", rc); 590 return -ENXIO; 591 } 592 593 p->lasthealth_jiffies = jiffies; 594 /* Allow injecting specific health bits via inject mask. */ 595 if (p->health_bitmap_inject_mask) 596 bitmap = (bitmap & ~p->health_bitmap_inject_mask) | 597 p->health_bitmap_inject_mask; 598 WRITE_ONCE(p->health_bitmap, bitmap); 599 dev_dbg(&p->pdev->dev, 600 "Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n", 601 ret[0], ret[1]); 602 603 return 0; 604 } 605 606 /* Min interval in seconds for assuming stable dimm health */ 607 #define MIN_HEALTH_QUERY_INTERVAL 60 608 609 /* Query cached health info and if needed call drc_pmem_query_health */ 610 static int drc_pmem_query_health(struct papr_scm_priv *p) 611 { 612 unsigned long cache_timeout; 613 int rc; 614 615 /* Protect concurrent modifications to papr_scm_priv */ 616 rc = mutex_lock_interruptible(&p->health_mutex); 617 if (rc) 618 return rc; 619 620 /* Jiffies offset for which the health data is assumed to be same */ 621 cache_timeout = p->lasthealth_jiffies + 622 msecs_to_jiffies(MIN_HEALTH_QUERY_INTERVAL * 1000); 623 624 /* Fetch new health info is its older than MIN_HEALTH_QUERY_INTERVAL */ 625 if (time_after(jiffies, cache_timeout)) 626 rc = __drc_pmem_query_health(p); 627 else 628 /* Assume cached health data is valid */ 629 rc = 0; 630 631 mutex_unlock(&p->health_mutex); 632 return rc; 633 } 634 635 static int papr_scm_meta_get(struct papr_scm_priv *p, 636 struct nd_cmd_get_config_data_hdr *hdr) 637 { 638 unsigned long data[PLPAR_HCALL_BUFSIZE]; 639 unsigned long offset, data_offset; 640 int len, read; 641 int64_t ret; 642 643 if ((hdr->in_offset + hdr->in_length) > p->metadata_size) 644 return -EINVAL; 645 646 for (len = hdr->in_length; len; len -= read) { 647 648 data_offset = hdr->in_length - len; 649 offset = hdr->in_offset + data_offset; 650 651 if (len >= 8) 652 read = 8; 653 else if (len >= 4) 654 read = 4; 655 else if (len >= 2) 656 read = 2; 657 else 658 read = 1; 659 660 ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index, 661 offset, read); 662 663 if (ret == H_PARAMETER) /* bad DRC index */ 664 return -ENODEV; 665 if (ret) 666 return -EINVAL; /* other invalid parameter */ 667 668 switch (read) { 669 case 8: 670 *(uint64_t *)(hdr->out_buf + data_offset) = be64_to_cpu(data[0]); 671 break; 672 case 4: 673 *(uint32_t *)(hdr->out_buf + data_offset) = be32_to_cpu(data[0] & 0xffffffff); 674 break; 675 676 case 2: 677 *(uint16_t *)(hdr->out_buf + data_offset) = be16_to_cpu(data[0] & 0xffff); 678 break; 679 680 case 1: 681 *(uint8_t *)(hdr->out_buf + data_offset) = (data[0] & 0xff); 682 break; 683 } 684 } 685 return 0; 686 } 687 688 static int papr_scm_meta_set(struct papr_scm_priv *p, 689 struct nd_cmd_set_config_hdr *hdr) 690 { 691 unsigned long offset, data_offset; 692 int len, wrote; 693 unsigned long data; 694 __be64 data_be; 695 int64_t ret; 696 697 if ((hdr->in_offset + hdr->in_length) > p->metadata_size) 698 return -EINVAL; 699 700 for (len = hdr->in_length; len; len -= wrote) { 701 702 data_offset = hdr->in_length - len; 703 offset = hdr->in_offset + data_offset; 704 705 if (len >= 8) { 706 data = *(uint64_t *)(hdr->in_buf + data_offset); 707 data_be = cpu_to_be64(data); 708 wrote = 8; 709 } else if (len >= 4) { 710 data = *(uint32_t *)(hdr->in_buf + data_offset); 711 data &= 0xffffffff; 712 data_be = cpu_to_be32(data); 713 wrote = 4; 714 } else if (len >= 2) { 715 data = *(uint16_t *)(hdr->in_buf + data_offset); 716 data &= 0xffff; 717 data_be = cpu_to_be16(data); 718 wrote = 2; 719 } else { 720 data_be = *(uint8_t *)(hdr->in_buf + data_offset); 721 data_be &= 0xff; 722 wrote = 1; 723 } 724 725 ret = plpar_hcall_norets(H_SCM_WRITE_METADATA, p->drc_index, 726 offset, data_be, wrote); 727 if (ret == H_PARAMETER) /* bad DRC index */ 728 return -ENODEV; 729 if (ret) 730 return -EINVAL; /* other invalid parameter */ 731 } 732 733 return 0; 734 } 735 736 /* 737 * Do a sanity checks on the inputs args to dimm-control function and return 738 * '0' if valid. Validation of PDSM payloads happens later in 739 * papr_scm_service_pdsm. 740 */ 741 static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf, 742 unsigned int buf_len) 743 { 744 unsigned long cmd_mask = PAPR_SCM_DIMM_CMD_MASK; 745 struct nd_cmd_pkg *nd_cmd; 746 struct papr_scm_priv *p; 747 enum papr_pdsm pdsm; 748 749 /* Only dimm-specific calls are supported atm */ 750 if (!nvdimm) 751 return -EINVAL; 752 753 /* get the provider data from struct nvdimm */ 754 p = nvdimm_provider_data(nvdimm); 755 756 if (!test_bit(cmd, &cmd_mask)) { 757 dev_dbg(&p->pdev->dev, "Unsupported cmd=%u\n", cmd); 758 return -EINVAL; 759 } 760 761 /* For CMD_CALL verify pdsm request */ 762 if (cmd == ND_CMD_CALL) { 763 /* Verify the envelope and envelop size */ 764 if (!buf || 765 buf_len < (sizeof(struct nd_cmd_pkg) + ND_PDSM_HDR_SIZE)) { 766 dev_dbg(&p->pdev->dev, "Invalid pkg size=%u\n", 767 buf_len); 768 return -EINVAL; 769 } 770 771 /* Verify that the nd_cmd_pkg.nd_family is correct */ 772 nd_cmd = (struct nd_cmd_pkg *)buf; 773 774 if (nd_cmd->nd_family != NVDIMM_FAMILY_PAPR) { 775 dev_dbg(&p->pdev->dev, "Invalid pkg family=0x%llx\n", 776 nd_cmd->nd_family); 777 return -EINVAL; 778 } 779 780 pdsm = (enum papr_pdsm)nd_cmd->nd_command; 781 782 /* Verify if the pdsm command is valid */ 783 if (pdsm <= PAPR_PDSM_MIN || pdsm >= PAPR_PDSM_MAX) { 784 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid PDSM\n", 785 pdsm); 786 return -EINVAL; 787 } 788 789 /* Have enough space to hold returned 'nd_pkg_pdsm' header */ 790 if (nd_cmd->nd_size_out < ND_PDSM_HDR_SIZE) { 791 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid payload\n", 792 pdsm); 793 return -EINVAL; 794 } 795 } 796 797 /* Let the command be further processed */ 798 return 0; 799 } 800 801 static int papr_pdsm_fuel_gauge(struct papr_scm_priv *p, 802 union nd_pdsm_payload *payload) 803 { 804 int rc, size; 805 u64 statval; 806 struct papr_scm_perf_stat *stat; 807 struct papr_scm_perf_stats *stats; 808 809 /* Silently fail if fetching performance metrics isn't supported */ 810 if (!p->stat_buffer_len) 811 return 0; 812 813 /* Allocate request buffer enough to hold single performance stat */ 814 size = sizeof(struct papr_scm_perf_stats) + 815 sizeof(struct papr_scm_perf_stat); 816 817 stats = kzalloc(size, GFP_KERNEL); 818 if (!stats) 819 return -ENOMEM; 820 821 stat = &stats->scm_statistic[0]; 822 memcpy(&stat->stat_id, "MemLife ", sizeof(stat->stat_id)); 823 stat->stat_val = 0; 824 825 /* Fetch the fuel gauge and populate it in payload */ 826 rc = drc_pmem_query_stats(p, stats, 1); 827 if (rc < 0) { 828 dev_dbg(&p->pdev->dev, "Err(%d) fetching fuel gauge\n", rc); 829 goto free_stats; 830 } 831 832 statval = be64_to_cpu(stat->stat_val); 833 dev_dbg(&p->pdev->dev, 834 "Fetched fuel-gauge %llu", statval); 835 payload->health.extension_flags |= 836 PDSM_DIMM_HEALTH_RUN_GAUGE_VALID; 837 payload->health.dimm_fuel_gauge = statval; 838 839 rc = sizeof(struct nd_papr_pdsm_health); 840 841 free_stats: 842 kfree(stats); 843 return rc; 844 } 845 846 /* Add the dirty-shutdown-counter value to the pdsm */ 847 static int papr_pdsm_dsc(struct papr_scm_priv *p, 848 union nd_pdsm_payload *payload) 849 { 850 payload->health.extension_flags |= PDSM_DIMM_DSC_VALID; 851 payload->health.dimm_dsc = p->dirty_shutdown_counter; 852 853 return sizeof(struct nd_papr_pdsm_health); 854 } 855 856 /* Fetch the DIMM health info and populate it in provided package. */ 857 static int papr_pdsm_health(struct papr_scm_priv *p, 858 union nd_pdsm_payload *payload) 859 { 860 int rc; 861 862 /* Ensure dimm health mutex is taken preventing concurrent access */ 863 rc = mutex_lock_interruptible(&p->health_mutex); 864 if (rc) 865 goto out; 866 867 /* Always fetch upto date dimm health data ignoring cached values */ 868 rc = __drc_pmem_query_health(p); 869 if (rc) { 870 mutex_unlock(&p->health_mutex); 871 goto out; 872 } 873 874 /* update health struct with various flags derived from health bitmap */ 875 payload->health = (struct nd_papr_pdsm_health) { 876 .extension_flags = 0, 877 .dimm_unarmed = !!(p->health_bitmap & PAPR_PMEM_UNARMED_MASK), 878 .dimm_bad_shutdown = !!(p->health_bitmap & PAPR_PMEM_BAD_SHUTDOWN_MASK), 879 .dimm_bad_restore = !!(p->health_bitmap & PAPR_PMEM_BAD_RESTORE_MASK), 880 .dimm_scrubbed = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED), 881 .dimm_locked = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED), 882 .dimm_encrypted = !!(p->health_bitmap & PAPR_PMEM_ENCRYPTED), 883 .dimm_health = PAPR_PDSM_DIMM_HEALTHY, 884 }; 885 886 /* Update field dimm_health based on health_bitmap flags */ 887 if (p->health_bitmap & PAPR_PMEM_HEALTH_FATAL) 888 payload->health.dimm_health = PAPR_PDSM_DIMM_FATAL; 889 else if (p->health_bitmap & PAPR_PMEM_HEALTH_CRITICAL) 890 payload->health.dimm_health = PAPR_PDSM_DIMM_CRITICAL; 891 else if (p->health_bitmap & PAPR_PMEM_HEALTH_UNHEALTHY) 892 payload->health.dimm_health = PAPR_PDSM_DIMM_UNHEALTHY; 893 894 /* struct populated hence can release the mutex now */ 895 mutex_unlock(&p->health_mutex); 896 897 /* Populate the fuel gauge meter in the payload */ 898 papr_pdsm_fuel_gauge(p, payload); 899 /* Populate the dirty-shutdown-counter field */ 900 papr_pdsm_dsc(p, payload); 901 902 rc = sizeof(struct nd_papr_pdsm_health); 903 904 out: 905 return rc; 906 } 907 908 /* Inject a smart error Add the dirty-shutdown-counter value to the pdsm */ 909 static int papr_pdsm_smart_inject(struct papr_scm_priv *p, 910 union nd_pdsm_payload *payload) 911 { 912 int rc; 913 u32 supported_flags = 0; 914 u64 inject_mask = 0, clear_mask = 0; 915 u64 mask; 916 917 /* Check for individual smart error flags and update inject/clear masks */ 918 if (payload->smart_inject.flags & PDSM_SMART_INJECT_HEALTH_FATAL) { 919 supported_flags |= PDSM_SMART_INJECT_HEALTH_FATAL; 920 if (payload->smart_inject.fatal_enable) 921 inject_mask |= PAPR_PMEM_HEALTH_FATAL; 922 else 923 clear_mask |= PAPR_PMEM_HEALTH_FATAL; 924 } 925 926 if (payload->smart_inject.flags & PDSM_SMART_INJECT_BAD_SHUTDOWN) { 927 supported_flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN; 928 if (payload->smart_inject.unsafe_shutdown_enable) 929 inject_mask |= PAPR_PMEM_SHUTDOWN_DIRTY; 930 else 931 clear_mask |= PAPR_PMEM_SHUTDOWN_DIRTY; 932 } 933 934 dev_dbg(&p->pdev->dev, "[Smart-inject] inject_mask=%#llx clear_mask=%#llx\n", 935 inject_mask, clear_mask); 936 937 /* Prevent concurrent access to dimm health bitmap related members */ 938 rc = mutex_lock_interruptible(&p->health_mutex); 939 if (rc) 940 return rc; 941 942 /* Use inject/clear masks to set health_bitmap_inject_mask */ 943 mask = READ_ONCE(p->health_bitmap_inject_mask); 944 mask = (mask & ~clear_mask) | inject_mask; 945 WRITE_ONCE(p->health_bitmap_inject_mask, mask); 946 947 /* Invalidate cached health bitmap */ 948 p->lasthealth_jiffies = 0; 949 950 mutex_unlock(&p->health_mutex); 951 952 /* Return the supported flags back to userspace */ 953 payload->smart_inject.flags = supported_flags; 954 955 return sizeof(struct nd_papr_pdsm_health); 956 } 957 958 /* 959 * 'struct pdsm_cmd_desc' 960 * Identifies supported PDSMs' expected length of in/out payloads 961 * and pdsm service function. 962 * 963 * size_in : Size of input payload if any in the PDSM request. 964 * size_out : Size of output payload if any in the PDSM request. 965 * service : Service function for the PDSM request. Return semantics: 966 * rc < 0 : Error servicing PDSM and rc indicates the error. 967 * rc >=0 : Serviced successfully and 'rc' indicate number of 968 * bytes written to payload. 969 */ 970 struct pdsm_cmd_desc { 971 u32 size_in; 972 u32 size_out; 973 int (*service)(struct papr_scm_priv *dimm, 974 union nd_pdsm_payload *payload); 975 }; 976 977 /* Holds all supported PDSMs' command descriptors */ 978 static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = { 979 [PAPR_PDSM_MIN] = { 980 .size_in = 0, 981 .size_out = 0, 982 .service = NULL, 983 }, 984 /* New PDSM command descriptors to be added below */ 985 986 [PAPR_PDSM_HEALTH] = { 987 .size_in = 0, 988 .size_out = sizeof(struct nd_papr_pdsm_health), 989 .service = papr_pdsm_health, 990 }, 991 992 [PAPR_PDSM_SMART_INJECT] = { 993 .size_in = sizeof(struct nd_papr_pdsm_smart_inject), 994 .size_out = sizeof(struct nd_papr_pdsm_smart_inject), 995 .service = papr_pdsm_smart_inject, 996 }, 997 /* Empty */ 998 [PAPR_PDSM_MAX] = { 999 .size_in = 0, 1000 .size_out = 0, 1001 .service = NULL, 1002 }, 1003 }; 1004 1005 /* Given a valid pdsm cmd return its command descriptor else return NULL */ 1006 static inline const struct pdsm_cmd_desc *pdsm_cmd_desc(enum papr_pdsm cmd) 1007 { 1008 if (cmd >= 0 || cmd < ARRAY_SIZE(__pdsm_cmd_descriptors)) 1009 return &__pdsm_cmd_descriptors[cmd]; 1010 1011 return NULL; 1012 } 1013 1014 /* 1015 * For a given pdsm request call an appropriate service function. 1016 * Returns errors if any while handling the pdsm command package. 1017 */ 1018 static int papr_scm_service_pdsm(struct papr_scm_priv *p, 1019 struct nd_cmd_pkg *pkg) 1020 { 1021 /* Get the PDSM header and PDSM command */ 1022 struct nd_pkg_pdsm *pdsm_pkg = (struct nd_pkg_pdsm *)pkg->nd_payload; 1023 enum papr_pdsm pdsm = (enum papr_pdsm)pkg->nd_command; 1024 const struct pdsm_cmd_desc *pdsc; 1025 int rc; 1026 1027 /* Fetch corresponding pdsm descriptor for validation and servicing */ 1028 pdsc = pdsm_cmd_desc(pdsm); 1029 1030 /* Validate pdsm descriptor */ 1031 /* Ensure that reserved fields are 0 */ 1032 if (pdsm_pkg->reserved[0] || pdsm_pkg->reserved[1]) { 1033 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid reserved field\n", 1034 pdsm); 1035 return -EINVAL; 1036 } 1037 1038 /* If pdsm expects some input, then ensure that the size_in matches */ 1039 if (pdsc->size_in && 1040 pkg->nd_size_in != (pdsc->size_in + ND_PDSM_HDR_SIZE)) { 1041 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_in=%d\n", 1042 pdsm, pkg->nd_size_in); 1043 return -EINVAL; 1044 } 1045 1046 /* If pdsm wants to return data, then ensure that size_out matches */ 1047 if (pdsc->size_out && 1048 pkg->nd_size_out != (pdsc->size_out + ND_PDSM_HDR_SIZE)) { 1049 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_out=%d\n", 1050 pdsm, pkg->nd_size_out); 1051 return -EINVAL; 1052 } 1053 1054 /* Service the pdsm */ 1055 if (pdsc->service) { 1056 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Servicing..\n", pdsm); 1057 1058 rc = pdsc->service(p, &pdsm_pkg->payload); 1059 1060 if (rc < 0) { 1061 /* error encountered while servicing pdsm */ 1062 pdsm_pkg->cmd_status = rc; 1063 pkg->nd_fw_size = ND_PDSM_HDR_SIZE; 1064 } else { 1065 /* pdsm serviced and 'rc' bytes written to payload */ 1066 pdsm_pkg->cmd_status = 0; 1067 pkg->nd_fw_size = ND_PDSM_HDR_SIZE + rc; 1068 } 1069 } else { 1070 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Unsupported PDSM request\n", 1071 pdsm); 1072 pdsm_pkg->cmd_status = -ENOENT; 1073 pkg->nd_fw_size = ND_PDSM_HDR_SIZE; 1074 } 1075 1076 return pdsm_pkg->cmd_status; 1077 } 1078 1079 static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, 1080 struct nvdimm *nvdimm, unsigned int cmd, void *buf, 1081 unsigned int buf_len, int *cmd_rc) 1082 { 1083 struct nd_cmd_get_config_size *get_size_hdr; 1084 struct nd_cmd_pkg *call_pkg = NULL; 1085 struct papr_scm_priv *p; 1086 int rc; 1087 1088 rc = is_cmd_valid(nvdimm, cmd, buf, buf_len); 1089 if (rc) { 1090 pr_debug("Invalid cmd=0x%x. Err=%d\n", cmd, rc); 1091 return rc; 1092 } 1093 1094 /* Use a local variable in case cmd_rc pointer is NULL */ 1095 if (!cmd_rc) 1096 cmd_rc = &rc; 1097 1098 p = nvdimm_provider_data(nvdimm); 1099 1100 switch (cmd) { 1101 case ND_CMD_GET_CONFIG_SIZE: 1102 get_size_hdr = buf; 1103 1104 get_size_hdr->status = 0; 1105 get_size_hdr->max_xfer = 8; 1106 get_size_hdr->config_size = p->metadata_size; 1107 *cmd_rc = 0; 1108 break; 1109 1110 case ND_CMD_GET_CONFIG_DATA: 1111 *cmd_rc = papr_scm_meta_get(p, buf); 1112 break; 1113 1114 case ND_CMD_SET_CONFIG_DATA: 1115 *cmd_rc = papr_scm_meta_set(p, buf); 1116 break; 1117 1118 case ND_CMD_CALL: 1119 call_pkg = (struct nd_cmd_pkg *)buf; 1120 *cmd_rc = papr_scm_service_pdsm(p, call_pkg); 1121 break; 1122 1123 default: 1124 dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd); 1125 return -EINVAL; 1126 } 1127 1128 dev_dbg(&p->pdev->dev, "returned with cmd_rc = %d\n", *cmd_rc); 1129 1130 return 0; 1131 } 1132 1133 static ssize_t health_bitmap_inject_show(struct device *dev, 1134 struct device_attribute *attr, 1135 char *buf) 1136 { 1137 struct nvdimm *dimm = to_nvdimm(dev); 1138 struct papr_scm_priv *p = nvdimm_provider_data(dimm); 1139 1140 return sprintf(buf, "%#llx\n", 1141 READ_ONCE(p->health_bitmap_inject_mask)); 1142 } 1143 1144 static DEVICE_ATTR_ADMIN_RO(health_bitmap_inject); 1145 1146 static ssize_t perf_stats_show(struct device *dev, 1147 struct device_attribute *attr, char *buf) 1148 { 1149 int index; 1150 ssize_t rc; 1151 struct seq_buf s; 1152 struct papr_scm_perf_stat *stat; 1153 struct papr_scm_perf_stats *stats; 1154 struct nvdimm *dimm = to_nvdimm(dev); 1155 struct papr_scm_priv *p = nvdimm_provider_data(dimm); 1156 1157 if (!p->stat_buffer_len) 1158 return -ENOENT; 1159 1160 /* Allocate the buffer for phyp where stats are written */ 1161 stats = kzalloc(p->stat_buffer_len, GFP_KERNEL); 1162 if (!stats) 1163 return -ENOMEM; 1164 1165 /* Ask phyp to return all dimm perf stats */ 1166 rc = drc_pmem_query_stats(p, stats, 0); 1167 if (rc) 1168 goto free_stats; 1169 /* 1170 * Go through the returned output buffer and print stats and 1171 * values. Since stat_id is essentially a char string of 1172 * 8 bytes, simply use the string format specifier to print it. 1173 */ 1174 seq_buf_init(&s, buf, PAGE_SIZE); 1175 for (index = 0, stat = stats->scm_statistic; 1176 index < be32_to_cpu(stats->num_statistics); 1177 ++index, ++stat) { 1178 seq_buf_printf(&s, "%.8s = 0x%016llX\n", 1179 stat->stat_id, 1180 be64_to_cpu(stat->stat_val)); 1181 } 1182 1183 free_stats: 1184 kfree(stats); 1185 return rc ? rc : (ssize_t)seq_buf_used(&s); 1186 } 1187 static DEVICE_ATTR_ADMIN_RO(perf_stats); 1188 1189 static ssize_t flags_show(struct device *dev, 1190 struct device_attribute *attr, char *buf) 1191 { 1192 struct nvdimm *dimm = to_nvdimm(dev); 1193 struct papr_scm_priv *p = nvdimm_provider_data(dimm); 1194 struct seq_buf s; 1195 u64 health; 1196 int rc; 1197 1198 rc = drc_pmem_query_health(p); 1199 if (rc) 1200 return rc; 1201 1202 /* Copy health_bitmap locally, check masks & update out buffer */ 1203 health = READ_ONCE(p->health_bitmap); 1204 1205 seq_buf_init(&s, buf, PAGE_SIZE); 1206 if (health & PAPR_PMEM_UNARMED_MASK) 1207 seq_buf_printf(&s, "not_armed "); 1208 1209 if (health & PAPR_PMEM_BAD_SHUTDOWN_MASK) 1210 seq_buf_printf(&s, "flush_fail "); 1211 1212 if (health & PAPR_PMEM_BAD_RESTORE_MASK) 1213 seq_buf_printf(&s, "restore_fail "); 1214 1215 if (health & PAPR_PMEM_ENCRYPTED) 1216 seq_buf_printf(&s, "encrypted "); 1217 1218 if (health & PAPR_PMEM_SMART_EVENT_MASK) 1219 seq_buf_printf(&s, "smart_notify "); 1220 1221 if (health & PAPR_PMEM_SCRUBBED_AND_LOCKED) 1222 seq_buf_printf(&s, "scrubbed locked "); 1223 1224 if (seq_buf_used(&s)) 1225 seq_buf_printf(&s, "\n"); 1226 1227 return seq_buf_used(&s); 1228 } 1229 DEVICE_ATTR_RO(flags); 1230 1231 static ssize_t dirty_shutdown_show(struct device *dev, 1232 struct device_attribute *attr, char *buf) 1233 { 1234 struct nvdimm *dimm = to_nvdimm(dev); 1235 struct papr_scm_priv *p = nvdimm_provider_data(dimm); 1236 1237 return sysfs_emit(buf, "%llu\n", p->dirty_shutdown_counter); 1238 } 1239 DEVICE_ATTR_RO(dirty_shutdown); 1240 1241 static umode_t papr_nd_attribute_visible(struct kobject *kobj, 1242 struct attribute *attr, int n) 1243 { 1244 struct device *dev = kobj_to_dev(kobj); 1245 struct nvdimm *nvdimm = to_nvdimm(dev); 1246 struct papr_scm_priv *p = nvdimm_provider_data(nvdimm); 1247 1248 /* For if perf-stats not available remove perf_stats sysfs */ 1249 if (attr == &dev_attr_perf_stats.attr && p->stat_buffer_len == 0) 1250 return 0; 1251 1252 return attr->mode; 1253 } 1254 1255 /* papr_scm specific dimm attributes */ 1256 static struct attribute *papr_nd_attributes[] = { 1257 &dev_attr_flags.attr, 1258 &dev_attr_perf_stats.attr, 1259 &dev_attr_dirty_shutdown.attr, 1260 &dev_attr_health_bitmap_inject.attr, 1261 NULL, 1262 }; 1263 1264 static const struct attribute_group papr_nd_attribute_group = { 1265 .name = "papr", 1266 .is_visible = papr_nd_attribute_visible, 1267 .attrs = papr_nd_attributes, 1268 }; 1269 1270 static const struct attribute_group *papr_nd_attr_groups[] = { 1271 &papr_nd_attribute_group, 1272 NULL, 1273 }; 1274 1275 static int papr_scm_nvdimm_init(struct papr_scm_priv *p) 1276 { 1277 struct device *dev = &p->pdev->dev; 1278 struct nd_mapping_desc mapping; 1279 struct nd_region_desc ndr_desc; 1280 unsigned long dimm_flags; 1281 int target_nid, online_nid; 1282 1283 p->bus_desc.ndctl = papr_scm_ndctl; 1284 p->bus_desc.module = THIS_MODULE; 1285 p->bus_desc.of_node = p->pdev->dev.of_node; 1286 p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL); 1287 1288 /* Set the dimm command family mask to accept PDSMs */ 1289 set_bit(NVDIMM_FAMILY_PAPR, &p->bus_desc.dimm_family_mask); 1290 1291 if (!p->bus_desc.provider_name) 1292 return -ENOMEM; 1293 1294 p->bus = nvdimm_bus_register(NULL, &p->bus_desc); 1295 if (!p->bus) { 1296 dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn); 1297 kfree(p->bus_desc.provider_name); 1298 return -ENXIO; 1299 } 1300 1301 dimm_flags = 0; 1302 set_bit(NDD_LABELING, &dimm_flags); 1303 1304 /* 1305 * Check if the nvdimm is unarmed. No locking needed as we are still 1306 * initializing. Ignore error encountered if any. 1307 */ 1308 __drc_pmem_query_health(p); 1309 1310 if (p->health_bitmap & PAPR_PMEM_UNARMED_MASK) 1311 set_bit(NDD_UNARMED, &dimm_flags); 1312 1313 p->nvdimm = nvdimm_create(p->bus, p, papr_nd_attr_groups, 1314 dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL); 1315 if (!p->nvdimm) { 1316 dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn); 1317 goto err; 1318 } 1319 1320 if (nvdimm_bus_check_dimm_count(p->bus, 1)) 1321 goto err; 1322 1323 /* now add the region */ 1324 1325 memset(&mapping, 0, sizeof(mapping)); 1326 mapping.nvdimm = p->nvdimm; 1327 mapping.start = 0; 1328 mapping.size = p->blocks * p->block_size; // XXX: potential overflow? 1329 1330 memset(&ndr_desc, 0, sizeof(ndr_desc)); 1331 target_nid = dev_to_node(&p->pdev->dev); 1332 online_nid = numa_map_to_online_node(target_nid); 1333 ndr_desc.numa_node = online_nid; 1334 ndr_desc.target_node = target_nid; 1335 ndr_desc.res = &p->res; 1336 ndr_desc.of_node = p->dn; 1337 ndr_desc.provider_data = p; 1338 ndr_desc.mapping = &mapping; 1339 ndr_desc.num_mappings = 1; 1340 ndr_desc.nd_set = &p->nd_set; 1341 1342 if (p->hcall_flush_required) { 1343 set_bit(ND_REGION_ASYNC, &ndr_desc.flags); 1344 ndr_desc.flush = papr_scm_pmem_flush; 1345 } 1346 1347 if (p->is_volatile) 1348 p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc); 1349 else { 1350 set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags); 1351 p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc); 1352 } 1353 if (!p->region) { 1354 dev_err(dev, "Error registering region %pR from %pOF\n", 1355 ndr_desc.res, p->dn); 1356 goto err; 1357 } 1358 if (target_nid != online_nid) 1359 dev_info(dev, "Region registered with target node %d and online node %d", 1360 target_nid, online_nid); 1361 1362 mutex_lock(&papr_ndr_lock); 1363 list_add_tail(&p->region_list, &papr_nd_regions); 1364 mutex_unlock(&papr_ndr_lock); 1365 1366 return 0; 1367 1368 err: nvdimm_bus_unregister(p->bus); 1369 kfree(p->bus_desc.provider_name); 1370 return -ENXIO; 1371 } 1372 1373 static void papr_scm_add_badblock(struct nd_region *region, 1374 struct nvdimm_bus *bus, u64 phys_addr) 1375 { 1376 u64 aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES); 1377 1378 if (nvdimm_bus_add_badrange(bus, aligned_addr, L1_CACHE_BYTES)) { 1379 pr_err("Bad block registration for 0x%llx failed\n", phys_addr); 1380 return; 1381 } 1382 1383 pr_debug("Add memory range (0x%llx - 0x%llx) as bad range\n", 1384 aligned_addr, aligned_addr + L1_CACHE_BYTES); 1385 1386 nvdimm_region_notify(region, NVDIMM_REVALIDATE_POISON); 1387 } 1388 1389 static int handle_mce_ue(struct notifier_block *nb, unsigned long val, 1390 void *data) 1391 { 1392 struct machine_check_event *evt = data; 1393 struct papr_scm_priv *p; 1394 u64 phys_addr; 1395 bool found = false; 1396 1397 if (evt->error_type != MCE_ERROR_TYPE_UE) 1398 return NOTIFY_DONE; 1399 1400 if (list_empty(&papr_nd_regions)) 1401 return NOTIFY_DONE; 1402 1403 /* 1404 * The physical address obtained here is PAGE_SIZE aligned, so get the 1405 * exact address from the effective address 1406 */ 1407 phys_addr = evt->u.ue_error.physical_address + 1408 (evt->u.ue_error.effective_address & ~PAGE_MASK); 1409 1410 if (!evt->u.ue_error.physical_address_provided || 1411 !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT))) 1412 return NOTIFY_DONE; 1413 1414 /* mce notifier is called from a process context, so mutex is safe */ 1415 mutex_lock(&papr_ndr_lock); 1416 list_for_each_entry(p, &papr_nd_regions, region_list) { 1417 if (phys_addr >= p->res.start && phys_addr <= p->res.end) { 1418 found = true; 1419 break; 1420 } 1421 } 1422 1423 if (found) 1424 papr_scm_add_badblock(p->region, p->bus, phys_addr); 1425 1426 mutex_unlock(&papr_ndr_lock); 1427 1428 return found ? NOTIFY_OK : NOTIFY_DONE; 1429 } 1430 1431 static struct notifier_block mce_ue_nb = { 1432 .notifier_call = handle_mce_ue 1433 }; 1434 1435 static int papr_scm_probe(struct platform_device *pdev) 1436 { 1437 struct device_node *dn = pdev->dev.of_node; 1438 u32 drc_index, metadata_size; 1439 u64 blocks, block_size; 1440 struct papr_scm_priv *p; 1441 u8 uuid_raw[UUID_SIZE]; 1442 const char *uuid_str; 1443 ssize_t stat_size; 1444 uuid_t uuid; 1445 int rc; 1446 1447 /* check we have all the required DT properties */ 1448 if (of_property_read_u32(dn, "ibm,my-drc-index", &drc_index)) { 1449 dev_err(&pdev->dev, "%pOF: missing drc-index!\n", dn); 1450 return -ENODEV; 1451 } 1452 1453 if (of_property_read_u64(dn, "ibm,block-size", &block_size)) { 1454 dev_err(&pdev->dev, "%pOF: missing block-size!\n", dn); 1455 return -ENODEV; 1456 } 1457 1458 if (of_property_read_u64(dn, "ibm,number-of-blocks", &blocks)) { 1459 dev_err(&pdev->dev, "%pOF: missing number-of-blocks!\n", dn); 1460 return -ENODEV; 1461 } 1462 1463 if (of_property_read_string(dn, "ibm,unit-guid", &uuid_str)) { 1464 dev_err(&pdev->dev, "%pOF: missing unit-guid!\n", dn); 1465 return -ENODEV; 1466 } 1467 1468 1469 p = kzalloc(sizeof(*p), GFP_KERNEL); 1470 if (!p) 1471 return -ENOMEM; 1472 1473 /* Initialize the dimm mutex */ 1474 mutex_init(&p->health_mutex); 1475 1476 /* optional DT properties */ 1477 of_property_read_u32(dn, "ibm,metadata-size", &metadata_size); 1478 1479 p->dn = dn; 1480 p->drc_index = drc_index; 1481 p->block_size = block_size; 1482 p->blocks = blocks; 1483 p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required"); 1484 p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required"); 1485 1486 if (of_property_read_u64(dn, "ibm,persistence-failed-count", 1487 &p->dirty_shutdown_counter)) 1488 p->dirty_shutdown_counter = 0; 1489 1490 /* We just need to ensure that set cookies are unique across */ 1491 uuid_parse(uuid_str, &uuid); 1492 1493 /* 1494 * The cookie1 and cookie2 are not really little endian. 1495 * We store a raw buffer representation of the 1496 * uuid string so that we can compare this with the label 1497 * area cookie irrespective of the endian configuration 1498 * with which the kernel is built. 1499 * 1500 * Historically we stored the cookie in the below format. 1501 * for a uuid string 72511b67-0b3b-42fd-8d1d-5be3cae8bcaa 1502 * cookie1 was 0xfd423b0b671b5172 1503 * cookie2 was 0xaabce8cae35b1d8d 1504 */ 1505 export_uuid(uuid_raw, &uuid); 1506 p->nd_set.cookie1 = get_unaligned_le64(&uuid_raw[0]); 1507 p->nd_set.cookie2 = get_unaligned_le64(&uuid_raw[8]); 1508 1509 /* might be zero */ 1510 p->metadata_size = metadata_size; 1511 p->pdev = pdev; 1512 1513 /* request the hypervisor to bind this region to somewhere in memory */ 1514 rc = drc_pmem_bind(p); 1515 1516 /* If phyp says drc memory still bound then force unbound and retry */ 1517 if (rc == H_OVERLAP) 1518 rc = drc_pmem_query_n_bind(p); 1519 1520 if (rc != H_SUCCESS) { 1521 dev_err(&p->pdev->dev, "bind err: %d\n", rc); 1522 rc = -ENXIO; 1523 goto err; 1524 } 1525 1526 /* setup the resource for the newly bound range */ 1527 p->res.start = p->bound_addr; 1528 p->res.end = p->bound_addr + p->blocks * p->block_size - 1; 1529 p->res.name = pdev->name; 1530 p->res.flags = IORESOURCE_MEM; 1531 1532 /* Try retrieving the stat buffer and see if its supported */ 1533 stat_size = drc_pmem_query_stats(p, NULL, 0); 1534 if (stat_size > 0) { 1535 p->stat_buffer_len = stat_size; 1536 dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n", 1537 p->stat_buffer_len); 1538 } 1539 1540 rc = papr_scm_nvdimm_init(p); 1541 if (rc) 1542 goto err2; 1543 1544 platform_set_drvdata(pdev, p); 1545 papr_scm_pmu_register(p); 1546 1547 return 0; 1548 1549 err2: drc_pmem_unbind(p); 1550 err: kfree(p); 1551 return rc; 1552 } 1553 1554 static int papr_scm_remove(struct platform_device *pdev) 1555 { 1556 struct papr_scm_priv *p = platform_get_drvdata(pdev); 1557 1558 mutex_lock(&papr_ndr_lock); 1559 list_del(&p->region_list); 1560 mutex_unlock(&papr_ndr_lock); 1561 1562 nvdimm_bus_unregister(p->bus); 1563 drc_pmem_unbind(p); 1564 1565 if (pdev->archdata.priv) 1566 unregister_nvdimm_pmu(pdev->archdata.priv); 1567 1568 pdev->archdata.priv = NULL; 1569 kfree(p->nvdimm_events_map); 1570 kfree(p->bus_desc.provider_name); 1571 kfree(p); 1572 1573 return 0; 1574 } 1575 1576 static const struct of_device_id papr_scm_match[] = { 1577 { .compatible = "ibm,pmemory" }, 1578 { .compatible = "ibm,pmemory-v2" }, 1579 { }, 1580 }; 1581 1582 static struct platform_driver papr_scm_driver = { 1583 .probe = papr_scm_probe, 1584 .remove = papr_scm_remove, 1585 .driver = { 1586 .name = "papr_scm", 1587 .of_match_table = papr_scm_match, 1588 }, 1589 }; 1590 1591 static int __init papr_scm_init(void) 1592 { 1593 int ret; 1594 1595 ret = platform_driver_register(&papr_scm_driver); 1596 if (!ret) 1597 mce_register_notifier(&mce_ue_nb); 1598 1599 return ret; 1600 } 1601 module_init(papr_scm_init); 1602 1603 static void __exit papr_scm_exit(void) 1604 { 1605 mce_unregister_notifier(&mce_ue_nb); 1606 platform_driver_unregister(&papr_scm_driver); 1607 } 1608 module_exit(papr_scm_exit); 1609 1610 MODULE_DEVICE_TABLE(of, papr_scm_match); 1611 MODULE_LICENSE("GPL"); 1612 MODULE_AUTHOR("IBM Corporation"); 1613