1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. 4 */ 5 #include <linux/memremap.h> 6 #include <linux/blkdev.h> 7 #include <linux/device.h> 8 #include <linux/genhd.h> 9 #include <linux/sizes.h> 10 #include <linux/slab.h> 11 #include <linux/fs.h> 12 #include <linux/mm.h> 13 #include "nd-core.h" 14 #include "pfn.h" 15 #include "nd.h" 16 17 static void nd_pfn_release(struct device *dev) 18 { 19 struct nd_region *nd_region = to_nd_region(dev->parent); 20 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 21 22 dev_dbg(dev, "trace\n"); 23 nd_detach_ndns(&nd_pfn->dev, &nd_pfn->ndns); 24 ida_simple_remove(&nd_region->pfn_ida, nd_pfn->id); 25 kfree(nd_pfn->uuid); 26 kfree(nd_pfn); 27 } 28 29 static struct device_type nd_pfn_device_type = { 30 .name = "nd_pfn", 31 .release = nd_pfn_release, 32 }; 33 34 bool is_nd_pfn(struct device *dev) 35 { 36 return dev ? dev->type == &nd_pfn_device_type : false; 37 } 38 EXPORT_SYMBOL(is_nd_pfn); 39 40 struct nd_pfn *to_nd_pfn(struct device *dev) 41 { 42 struct nd_pfn *nd_pfn = container_of(dev, struct nd_pfn, dev); 43 44 WARN_ON(!is_nd_pfn(dev)); 45 return nd_pfn; 46 } 47 EXPORT_SYMBOL(to_nd_pfn); 48 49 static ssize_t mode_show(struct device *dev, 50 struct device_attribute *attr, char *buf) 51 { 52 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 53 54 switch (nd_pfn->mode) { 55 case PFN_MODE_RAM: 56 return sprintf(buf, "ram\n"); 57 case PFN_MODE_PMEM: 58 return sprintf(buf, "pmem\n"); 59 default: 60 return sprintf(buf, "none\n"); 61 } 62 } 63 64 static ssize_t mode_store(struct device *dev, 65 struct device_attribute *attr, const char *buf, size_t len) 66 { 67 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 68 ssize_t rc = 0; 69 70 device_lock(dev); 71 nvdimm_bus_lock(dev); 72 if (dev->driver) 73 rc = -EBUSY; 74 else { 75 size_t n = len - 1; 76 77 if (strncmp(buf, "pmem\n", n) == 0 78 || strncmp(buf, "pmem", n) == 0) { 79 nd_pfn->mode = PFN_MODE_PMEM; 80 } else if (strncmp(buf, "ram\n", n) == 0 81 || strncmp(buf, "ram", n) == 0) 82 nd_pfn->mode = PFN_MODE_RAM; 83 else if (strncmp(buf, "none\n", n) == 0 84 || strncmp(buf, "none", n) == 0) 85 nd_pfn->mode = PFN_MODE_NONE; 86 else 87 rc = -EINVAL; 88 } 89 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 90 buf[len - 1] == '\n' ? "" : "\n"); 91 nvdimm_bus_unlock(dev); 92 device_unlock(dev); 93 94 return rc ? rc : len; 95 } 96 static DEVICE_ATTR_RW(mode); 97 98 static ssize_t align_show(struct device *dev, 99 struct device_attribute *attr, char *buf) 100 { 101 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 102 103 return sprintf(buf, "%ld\n", nd_pfn->align); 104 } 105 106 static const unsigned long *nd_pfn_supported_alignments(void) 107 { 108 /* 109 * This needs to be a non-static variable because the *_SIZE 110 * macros aren't always constants. 111 */ 112 const unsigned long supported_alignments[] = { 113 PAGE_SIZE, 114 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 115 HPAGE_PMD_SIZE, 116 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 117 HPAGE_PUD_SIZE, 118 #endif 119 #endif 120 0, 121 }; 122 static unsigned long data[ARRAY_SIZE(supported_alignments)]; 123 124 memcpy(data, supported_alignments, sizeof(data)); 125 126 return data; 127 } 128 129 static ssize_t align_store(struct device *dev, 130 struct device_attribute *attr, const char *buf, size_t len) 131 { 132 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 133 ssize_t rc; 134 135 device_lock(dev); 136 nvdimm_bus_lock(dev); 137 rc = nd_size_select_store(dev, buf, &nd_pfn->align, 138 nd_pfn_supported_alignments()); 139 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 140 buf[len - 1] == '\n' ? "" : "\n"); 141 nvdimm_bus_unlock(dev); 142 device_unlock(dev); 143 144 return rc ? rc : len; 145 } 146 static DEVICE_ATTR_RW(align); 147 148 static ssize_t uuid_show(struct device *dev, 149 struct device_attribute *attr, char *buf) 150 { 151 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 152 153 if (nd_pfn->uuid) 154 return sprintf(buf, "%pUb\n", nd_pfn->uuid); 155 return sprintf(buf, "\n"); 156 } 157 158 static ssize_t uuid_store(struct device *dev, 159 struct device_attribute *attr, const char *buf, size_t len) 160 { 161 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 162 ssize_t rc; 163 164 device_lock(dev); 165 rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len); 166 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 167 buf[len - 1] == '\n' ? "" : "\n"); 168 device_unlock(dev); 169 170 return rc ? rc : len; 171 } 172 static DEVICE_ATTR_RW(uuid); 173 174 static ssize_t namespace_show(struct device *dev, 175 struct device_attribute *attr, char *buf) 176 { 177 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 178 ssize_t rc; 179 180 nvdimm_bus_lock(dev); 181 rc = sprintf(buf, "%s\n", nd_pfn->ndns 182 ? dev_name(&nd_pfn->ndns->dev) : ""); 183 nvdimm_bus_unlock(dev); 184 return rc; 185 } 186 187 static ssize_t namespace_store(struct device *dev, 188 struct device_attribute *attr, const char *buf, size_t len) 189 { 190 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 191 ssize_t rc; 192 193 device_lock(dev); 194 nvdimm_bus_lock(dev); 195 rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len); 196 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 197 buf[len - 1] == '\n' ? "" : "\n"); 198 nvdimm_bus_unlock(dev); 199 device_unlock(dev); 200 201 return rc; 202 } 203 static DEVICE_ATTR_RW(namespace); 204 205 static ssize_t resource_show(struct device *dev, 206 struct device_attribute *attr, char *buf) 207 { 208 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 209 ssize_t rc; 210 211 device_lock(dev); 212 if (dev->driver) { 213 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 214 u64 offset = __le64_to_cpu(pfn_sb->dataoff); 215 struct nd_namespace_common *ndns = nd_pfn->ndns; 216 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); 217 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 218 219 rc = sprintf(buf, "%#llx\n", (unsigned long long) nsio->res.start 220 + start_pad + offset); 221 } else { 222 /* no address to convey if the pfn instance is disabled */ 223 rc = -ENXIO; 224 } 225 device_unlock(dev); 226 227 return rc; 228 } 229 static DEVICE_ATTR_RO(resource); 230 231 static ssize_t size_show(struct device *dev, 232 struct device_attribute *attr, char *buf) 233 { 234 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 235 ssize_t rc; 236 237 device_lock(dev); 238 if (dev->driver) { 239 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 240 u64 offset = __le64_to_cpu(pfn_sb->dataoff); 241 struct nd_namespace_common *ndns = nd_pfn->ndns; 242 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); 243 u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); 244 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 245 246 rc = sprintf(buf, "%llu\n", (unsigned long long) 247 resource_size(&nsio->res) - start_pad 248 - end_trunc - offset); 249 } else { 250 /* no size to convey if the pfn instance is disabled */ 251 rc = -ENXIO; 252 } 253 device_unlock(dev); 254 255 return rc; 256 } 257 static DEVICE_ATTR_RO(size); 258 259 static ssize_t supported_alignments_show(struct device *dev, 260 struct device_attribute *attr, char *buf) 261 { 262 return nd_size_select_show(0, nd_pfn_supported_alignments(), buf); 263 } 264 static DEVICE_ATTR_RO(supported_alignments); 265 266 static struct attribute *nd_pfn_attributes[] = { 267 &dev_attr_mode.attr, 268 &dev_attr_namespace.attr, 269 &dev_attr_uuid.attr, 270 &dev_attr_align.attr, 271 &dev_attr_resource.attr, 272 &dev_attr_size.attr, 273 &dev_attr_supported_alignments.attr, 274 NULL, 275 }; 276 277 static umode_t pfn_visible(struct kobject *kobj, struct attribute *a, int n) 278 { 279 if (a == &dev_attr_resource.attr) 280 return 0400; 281 return a->mode; 282 } 283 284 struct attribute_group nd_pfn_attribute_group = { 285 .attrs = nd_pfn_attributes, 286 .is_visible = pfn_visible, 287 }; 288 289 static const struct attribute_group *nd_pfn_attribute_groups[] = { 290 &nd_pfn_attribute_group, 291 &nd_device_attribute_group, 292 &nd_numa_attribute_group, 293 NULL, 294 }; 295 296 struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, 297 struct nd_namespace_common *ndns) 298 { 299 struct device *dev; 300 301 if (!nd_pfn) 302 return NULL; 303 304 nd_pfn->mode = PFN_MODE_NONE; 305 nd_pfn->align = PFN_DEFAULT_ALIGNMENT; 306 dev = &nd_pfn->dev; 307 device_initialize(&nd_pfn->dev); 308 if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { 309 dev_dbg(&ndns->dev, "failed, already claimed by %s\n", 310 dev_name(ndns->claim)); 311 put_device(dev); 312 return NULL; 313 } 314 return dev; 315 } 316 317 static struct nd_pfn *nd_pfn_alloc(struct nd_region *nd_region) 318 { 319 struct nd_pfn *nd_pfn; 320 struct device *dev; 321 322 nd_pfn = kzalloc(sizeof(*nd_pfn), GFP_KERNEL); 323 if (!nd_pfn) 324 return NULL; 325 326 nd_pfn->id = ida_simple_get(&nd_region->pfn_ida, 0, 0, GFP_KERNEL); 327 if (nd_pfn->id < 0) { 328 kfree(nd_pfn); 329 return NULL; 330 } 331 332 dev = &nd_pfn->dev; 333 dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id); 334 dev->groups = nd_pfn_attribute_groups; 335 dev->type = &nd_pfn_device_type; 336 dev->parent = &nd_region->dev; 337 338 return nd_pfn; 339 } 340 341 struct device *nd_pfn_create(struct nd_region *nd_region) 342 { 343 struct nd_pfn *nd_pfn; 344 struct device *dev; 345 346 if (!is_memory(&nd_region->dev)) 347 return NULL; 348 349 nd_pfn = nd_pfn_alloc(nd_region); 350 dev = nd_pfn_devinit(nd_pfn, NULL); 351 352 __nd_device_register(dev); 353 return dev; 354 } 355 356 /* 357 * nd_pfn_clear_memmap_errors() clears any errors in the volatile memmap 358 * space associated with the namespace. If the memmap is set to DRAM, then 359 * this is a no-op. Since the memmap area is freshly initialized during 360 * probe, we have an opportunity to clear any badblocks in this area. 361 */ 362 static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn) 363 { 364 struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent); 365 struct nd_namespace_common *ndns = nd_pfn->ndns; 366 void *zero_page = page_address(ZERO_PAGE(0)); 367 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 368 int num_bad, meta_num, rc, bb_present; 369 sector_t first_bad, meta_start; 370 struct nd_namespace_io *nsio; 371 372 if (nd_pfn->mode != PFN_MODE_PMEM) 373 return 0; 374 375 nsio = to_nd_namespace_io(&ndns->dev); 376 meta_start = (SZ_4K + sizeof(*pfn_sb)) >> 9; 377 meta_num = (le64_to_cpu(pfn_sb->dataoff) >> 9) - meta_start; 378 379 do { 380 unsigned long zero_len; 381 u64 nsoff; 382 383 bb_present = badblocks_check(&nd_region->bb, meta_start, 384 meta_num, &first_bad, &num_bad); 385 if (bb_present) { 386 dev_dbg(&nd_pfn->dev, "meta: %x badblocks at %llx\n", 387 num_bad, first_bad); 388 nsoff = ALIGN_DOWN((nd_region->ndr_start 389 + (first_bad << 9)) - nsio->res.start, 390 PAGE_SIZE); 391 zero_len = ALIGN(num_bad << 9, PAGE_SIZE); 392 while (zero_len) { 393 unsigned long chunk = min(zero_len, PAGE_SIZE); 394 395 rc = nvdimm_write_bytes(ndns, nsoff, zero_page, 396 chunk, 0); 397 if (rc) 398 break; 399 400 zero_len -= chunk; 401 nsoff += chunk; 402 } 403 if (rc) { 404 dev_err(&nd_pfn->dev, 405 "error clearing %x badblocks at %llx\n", 406 num_bad, first_bad); 407 return rc; 408 } 409 } 410 } while (bb_present); 411 412 return 0; 413 } 414 415 int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) 416 { 417 u64 checksum, offset; 418 enum nd_pfn_mode mode; 419 struct nd_namespace_io *nsio; 420 unsigned long align, start_pad; 421 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 422 struct nd_namespace_common *ndns = nd_pfn->ndns; 423 const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev); 424 425 if (!pfn_sb || !ndns) 426 return -ENODEV; 427 428 if (!is_memory(nd_pfn->dev.parent)) 429 return -ENODEV; 430 431 if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0)) 432 return -ENXIO; 433 434 if (memcmp(pfn_sb->signature, sig, PFN_SIG_LEN) != 0) 435 return -ENODEV; 436 437 checksum = le64_to_cpu(pfn_sb->checksum); 438 pfn_sb->checksum = 0; 439 if (checksum != nd_sb_checksum((struct nd_gen_sb *) pfn_sb)) 440 return -ENODEV; 441 pfn_sb->checksum = cpu_to_le64(checksum); 442 443 if (memcmp(pfn_sb->parent_uuid, parent_uuid, 16) != 0) 444 return -ENODEV; 445 446 if (__le16_to_cpu(pfn_sb->version_minor) < 1) { 447 pfn_sb->start_pad = 0; 448 pfn_sb->end_trunc = 0; 449 } 450 451 if (__le16_to_cpu(pfn_sb->version_minor) < 2) 452 pfn_sb->align = 0; 453 454 switch (le32_to_cpu(pfn_sb->mode)) { 455 case PFN_MODE_RAM: 456 case PFN_MODE_PMEM: 457 break; 458 default: 459 return -ENXIO; 460 } 461 462 align = le32_to_cpu(pfn_sb->align); 463 offset = le64_to_cpu(pfn_sb->dataoff); 464 start_pad = le32_to_cpu(pfn_sb->start_pad); 465 if (align == 0) 466 align = 1UL << ilog2(offset); 467 mode = le32_to_cpu(pfn_sb->mode); 468 469 if (!nd_pfn->uuid) { 470 /* 471 * When probing a namepace via nd_pfn_probe() the uuid 472 * is NULL (see: nd_pfn_devinit()) we init settings from 473 * pfn_sb 474 */ 475 nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL); 476 if (!nd_pfn->uuid) 477 return -ENOMEM; 478 nd_pfn->align = align; 479 nd_pfn->mode = mode; 480 } else { 481 /* 482 * When probing a pfn / dax instance we validate the 483 * live settings against the pfn_sb 484 */ 485 if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0) 486 return -ENODEV; 487 488 /* 489 * If the uuid validates, but other settings mismatch 490 * return EINVAL because userspace has managed to change 491 * the configuration without specifying new 492 * identification. 493 */ 494 if (nd_pfn->align != align || nd_pfn->mode != mode) { 495 dev_err(&nd_pfn->dev, 496 "init failed, settings mismatch\n"); 497 dev_dbg(&nd_pfn->dev, "align: %lx:%lx mode: %d:%d\n", 498 nd_pfn->align, align, nd_pfn->mode, 499 mode); 500 return -EINVAL; 501 } 502 } 503 504 if (align > nvdimm_namespace_capacity(ndns)) { 505 dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n", 506 align, nvdimm_namespace_capacity(ndns)); 507 return -EINVAL; 508 } 509 510 /* 511 * These warnings are verbose because they can only trigger in 512 * the case where the physical address alignment of the 513 * namespace has changed since the pfn superblock was 514 * established. 515 */ 516 nsio = to_nd_namespace_io(&ndns->dev); 517 if (offset >= resource_size(&nsio->res)) { 518 dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n", 519 dev_name(&ndns->dev)); 520 return -EBUSY; 521 } 522 523 if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align)) 524 || !IS_ALIGNED(offset, PAGE_SIZE)) { 525 dev_err(&nd_pfn->dev, 526 "bad offset: %#llx dax disabled align: %#lx\n", 527 offset, align); 528 return -ENXIO; 529 } 530 531 return nd_pfn_clear_memmap_errors(nd_pfn); 532 } 533 EXPORT_SYMBOL(nd_pfn_validate); 534 535 int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) 536 { 537 int rc; 538 struct nd_pfn *nd_pfn; 539 struct device *pfn_dev; 540 struct nd_pfn_sb *pfn_sb; 541 struct nd_region *nd_region = to_nd_region(ndns->dev.parent); 542 543 if (ndns->force_raw) 544 return -ENODEV; 545 546 switch (ndns->claim_class) { 547 case NVDIMM_CCLASS_NONE: 548 case NVDIMM_CCLASS_PFN: 549 break; 550 default: 551 return -ENODEV; 552 } 553 554 nvdimm_bus_lock(&ndns->dev); 555 nd_pfn = nd_pfn_alloc(nd_region); 556 pfn_dev = nd_pfn_devinit(nd_pfn, ndns); 557 nvdimm_bus_unlock(&ndns->dev); 558 if (!pfn_dev) 559 return -ENOMEM; 560 pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL); 561 nd_pfn = to_nd_pfn(pfn_dev); 562 nd_pfn->pfn_sb = pfn_sb; 563 rc = nd_pfn_validate(nd_pfn, PFN_SIG); 564 dev_dbg(dev, "pfn: %s\n", rc == 0 ? dev_name(pfn_dev) : "<none>"); 565 if (rc < 0) { 566 nd_detach_ndns(pfn_dev, &nd_pfn->ndns); 567 put_device(pfn_dev); 568 } else 569 __nd_device_register(pfn_dev); 570 571 return rc; 572 } 573 EXPORT_SYMBOL(nd_pfn_probe); 574 575 static u32 info_block_reserve(void) 576 { 577 return ALIGN(SZ_8K, PAGE_SIZE); 578 } 579 580 /* 581 * We hotplug memory at section granularity, pad the reserved area from 582 * the previous section base to the namespace base address. 583 */ 584 static unsigned long init_altmap_base(resource_size_t base) 585 { 586 unsigned long base_pfn = PHYS_PFN(base); 587 588 return PFN_SECTION_ALIGN_DOWN(base_pfn); 589 } 590 591 static unsigned long init_altmap_reserve(resource_size_t base) 592 { 593 unsigned long reserve = info_block_reserve() >> PAGE_SHIFT; 594 unsigned long base_pfn = PHYS_PFN(base); 595 596 reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn); 597 return reserve; 598 } 599 600 static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) 601 { 602 struct resource *res = &pgmap->res; 603 struct vmem_altmap *altmap = &pgmap->altmap; 604 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 605 u64 offset = le64_to_cpu(pfn_sb->dataoff); 606 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); 607 u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); 608 u32 reserve = info_block_reserve(); 609 struct nd_namespace_common *ndns = nd_pfn->ndns; 610 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 611 resource_size_t base = nsio->res.start + start_pad; 612 struct vmem_altmap __altmap = { 613 .base_pfn = init_altmap_base(base), 614 .reserve = init_altmap_reserve(base), 615 }; 616 617 memcpy(res, &nsio->res, sizeof(*res)); 618 res->start += start_pad; 619 res->end -= end_trunc; 620 621 if (nd_pfn->mode == PFN_MODE_RAM) { 622 if (offset < reserve) 623 return -EINVAL; 624 nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); 625 } else if (nd_pfn->mode == PFN_MODE_PMEM) { 626 nd_pfn->npfns = PFN_SECTION_ALIGN_UP((resource_size(res) 627 - offset) / PAGE_SIZE); 628 if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) 629 dev_info(&nd_pfn->dev, 630 "number of pfns truncated from %lld to %ld\n", 631 le64_to_cpu(nd_pfn->pfn_sb->npfns), 632 nd_pfn->npfns); 633 memcpy(altmap, &__altmap, sizeof(*altmap)); 634 altmap->free = PHYS_PFN(offset - reserve); 635 altmap->alloc = 0; 636 pgmap->flags |= PGMAP_ALTMAP_VALID; 637 } else 638 return -ENXIO; 639 640 return 0; 641 } 642 643 static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys) 644 { 645 return min_t(u64, PHYS_SECTION_ALIGN_DOWN(phys), 646 ALIGN_DOWN(phys, nd_pfn->align)); 647 } 648 649 /* 650 * Check if pmem collides with 'System RAM', or other regions when 651 * section aligned. Trim it accordingly. 652 */ 653 static void trim_pfn_device(struct nd_pfn *nd_pfn, u32 *start_pad, u32 *end_trunc) 654 { 655 struct nd_namespace_common *ndns = nd_pfn->ndns; 656 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 657 struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent); 658 const resource_size_t start = nsio->res.start; 659 const resource_size_t end = start + resource_size(&nsio->res); 660 resource_size_t adjust, size; 661 662 *start_pad = 0; 663 *end_trunc = 0; 664 665 adjust = start - PHYS_SECTION_ALIGN_DOWN(start); 666 size = resource_size(&nsio->res) + adjust; 667 if (region_intersects(start - adjust, size, IORESOURCE_SYSTEM_RAM, 668 IORES_DESC_NONE) == REGION_MIXED 669 || nd_region_conflict(nd_region, start - adjust, size)) 670 *start_pad = PHYS_SECTION_ALIGN_UP(start) - start; 671 672 /* Now check that end of the range does not collide. */ 673 adjust = PHYS_SECTION_ALIGN_UP(end) - end; 674 size = resource_size(&nsio->res) + adjust; 675 if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, 676 IORES_DESC_NONE) == REGION_MIXED 677 || !IS_ALIGNED(end, nd_pfn->align) 678 || nd_region_conflict(nd_region, start, size)) 679 *end_trunc = end - phys_pmem_align_down(nd_pfn, end); 680 } 681 682 static int nd_pfn_init(struct nd_pfn *nd_pfn) 683 { 684 struct nd_namespace_common *ndns = nd_pfn->ndns; 685 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 686 u32 start_pad, end_trunc, reserve = info_block_reserve(); 687 resource_size_t start, size; 688 struct nd_region *nd_region; 689 struct nd_pfn_sb *pfn_sb; 690 unsigned long npfns; 691 phys_addr_t offset; 692 const char *sig; 693 u64 checksum; 694 int rc; 695 696 pfn_sb = devm_kzalloc(&nd_pfn->dev, sizeof(*pfn_sb), GFP_KERNEL); 697 if (!pfn_sb) 698 return -ENOMEM; 699 700 nd_pfn->pfn_sb = pfn_sb; 701 if (is_nd_dax(&nd_pfn->dev)) 702 sig = DAX_SIG; 703 else 704 sig = PFN_SIG; 705 rc = nd_pfn_validate(nd_pfn, sig); 706 if (rc != -ENODEV) 707 return rc; 708 709 /* no info block, do init */; 710 nd_region = to_nd_region(nd_pfn->dev.parent); 711 if (nd_region->ro) { 712 dev_info(&nd_pfn->dev, 713 "%s is read-only, unable to init metadata\n", 714 dev_name(&nd_region->dev)); 715 return -ENXIO; 716 } 717 718 memset(pfn_sb, 0, sizeof(*pfn_sb)); 719 720 trim_pfn_device(nd_pfn, &start_pad, &end_trunc); 721 if (start_pad + end_trunc) 722 dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n", 723 dev_name(&ndns->dev), start_pad + end_trunc); 724 725 /* 726 * Note, we use 64 here for the standard size of struct page, 727 * debugging options may cause it to be larger in which case the 728 * implementation will limit the pfns advertised through 729 * ->direct_access() to those that are included in the memmap. 730 */ 731 start = nsio->res.start + start_pad; 732 size = resource_size(&nsio->res); 733 npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - reserve) 734 / PAGE_SIZE); 735 if (nd_pfn->mode == PFN_MODE_PMEM) { 736 /* 737 * The altmap should be padded out to the block size used 738 * when populating the vmemmap. This *should* be equal to 739 * PMD_SIZE for most architectures. 740 */ 741 offset = ALIGN(start + reserve + 64 * npfns, 742 max(nd_pfn->align, PMD_SIZE)) - start; 743 } else if (nd_pfn->mode == PFN_MODE_RAM) 744 offset = ALIGN(start + reserve, nd_pfn->align) - start; 745 else 746 return -ENXIO; 747 748 if (offset + start_pad + end_trunc >= size) { 749 dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n", 750 dev_name(&ndns->dev)); 751 return -ENXIO; 752 } 753 754 npfns = (size - offset - start_pad - end_trunc) / SZ_4K; 755 pfn_sb->mode = cpu_to_le32(nd_pfn->mode); 756 pfn_sb->dataoff = cpu_to_le64(offset); 757 pfn_sb->npfns = cpu_to_le64(npfns); 758 memcpy(pfn_sb->signature, sig, PFN_SIG_LEN); 759 memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); 760 memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); 761 pfn_sb->version_major = cpu_to_le16(1); 762 pfn_sb->version_minor = cpu_to_le16(2); 763 pfn_sb->start_pad = cpu_to_le32(start_pad); 764 pfn_sb->end_trunc = cpu_to_le32(end_trunc); 765 pfn_sb->align = cpu_to_le32(nd_pfn->align); 766 checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); 767 pfn_sb->checksum = cpu_to_le64(checksum); 768 769 return nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0); 770 } 771 772 /* 773 * Determine the effective resource range and vmem_altmap from an nd_pfn 774 * instance. 775 */ 776 int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) 777 { 778 int rc; 779 780 if (!nd_pfn->uuid || !nd_pfn->ndns) 781 return -ENODEV; 782 783 rc = nd_pfn_init(nd_pfn); 784 if (rc) 785 return rc; 786 787 /* we need a valid pfn_sb before we can init a dev_pagemap */ 788 return __nvdimm_setup_pfn(nd_pfn, pgmap); 789 } 790 EXPORT_SYMBOL_GPL(nvdimm_setup_pfn); 791