1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. 4 */ 5 #include <linux/memremap.h> 6 #include <linux/blkdev.h> 7 #include <linux/device.h> 8 #include <linux/genhd.h> 9 #include <linux/sizes.h> 10 #include <linux/slab.h> 11 #include <linux/fs.h> 12 #include <linux/mm.h> 13 #include "nd-core.h" 14 #include "pfn.h" 15 #include "nd.h" 16 17 static void nd_pfn_release(struct device *dev) 18 { 19 struct nd_region *nd_region = to_nd_region(dev->parent); 20 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 21 22 dev_dbg(dev, "trace\n"); 23 nd_detach_ndns(&nd_pfn->dev, &nd_pfn->ndns); 24 ida_simple_remove(&nd_region->pfn_ida, nd_pfn->id); 25 kfree(nd_pfn->uuid); 26 kfree(nd_pfn); 27 } 28 29 struct nd_pfn *to_nd_pfn(struct device *dev) 30 { 31 struct nd_pfn *nd_pfn = container_of(dev, struct nd_pfn, dev); 32 33 WARN_ON(!is_nd_pfn(dev)); 34 return nd_pfn; 35 } 36 EXPORT_SYMBOL(to_nd_pfn); 37 38 static ssize_t mode_show(struct device *dev, 39 struct device_attribute *attr, char *buf) 40 { 41 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 42 43 switch (nd_pfn->mode) { 44 case PFN_MODE_RAM: 45 return sprintf(buf, "ram\n"); 46 case PFN_MODE_PMEM: 47 return sprintf(buf, "pmem\n"); 48 default: 49 return sprintf(buf, "none\n"); 50 } 51 } 52 53 static ssize_t mode_store(struct device *dev, 54 struct device_attribute *attr, const char *buf, size_t len) 55 { 56 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 57 ssize_t rc = 0; 58 59 nd_device_lock(dev); 60 nvdimm_bus_lock(dev); 61 if (dev->driver) 62 rc = -EBUSY; 63 else { 64 size_t n = len - 1; 65 66 if (strncmp(buf, "pmem\n", n) == 0 67 || strncmp(buf, "pmem", n) == 0) { 68 nd_pfn->mode = PFN_MODE_PMEM; 69 } else if (strncmp(buf, "ram\n", n) == 0 70 || strncmp(buf, "ram", n) == 0) 71 nd_pfn->mode = PFN_MODE_RAM; 72 else if (strncmp(buf, "none\n", n) == 0 73 || strncmp(buf, "none", n) == 0) 74 nd_pfn->mode = PFN_MODE_NONE; 75 else 76 rc = -EINVAL; 77 } 78 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 79 buf[len - 1] == '\n' ? "" : "\n"); 80 nvdimm_bus_unlock(dev); 81 nd_device_unlock(dev); 82 83 return rc ? rc : len; 84 } 85 static DEVICE_ATTR_RW(mode); 86 87 static ssize_t align_show(struct device *dev, 88 struct device_attribute *attr, char *buf) 89 { 90 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 91 92 return sprintf(buf, "%ld\n", nd_pfn->align); 93 } 94 95 static unsigned long *nd_pfn_supported_alignments(unsigned long *alignments) 96 { 97 98 alignments[0] = PAGE_SIZE; 99 100 if (has_transparent_hugepage()) { 101 alignments[1] = HPAGE_PMD_SIZE; 102 if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) 103 alignments[2] = HPAGE_PUD_SIZE; 104 } 105 106 return alignments; 107 } 108 109 /* 110 * Use pmd mapping if supported as default alignment 111 */ 112 static unsigned long nd_pfn_default_alignment(void) 113 { 114 115 if (has_transparent_hugepage()) 116 return HPAGE_PMD_SIZE; 117 return PAGE_SIZE; 118 } 119 120 static ssize_t align_store(struct device *dev, 121 struct device_attribute *attr, const char *buf, size_t len) 122 { 123 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 124 unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, }; 125 ssize_t rc; 126 127 nd_device_lock(dev); 128 nvdimm_bus_lock(dev); 129 rc = nd_size_select_store(dev, buf, &nd_pfn->align, 130 nd_pfn_supported_alignments(aligns)); 131 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 132 buf[len - 1] == '\n' ? "" : "\n"); 133 nvdimm_bus_unlock(dev); 134 nd_device_unlock(dev); 135 136 return rc ? rc : len; 137 } 138 static DEVICE_ATTR_RW(align); 139 140 static ssize_t uuid_show(struct device *dev, 141 struct device_attribute *attr, char *buf) 142 { 143 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 144 145 if (nd_pfn->uuid) 146 return sprintf(buf, "%pUb\n", nd_pfn->uuid); 147 return sprintf(buf, "\n"); 148 } 149 150 static ssize_t uuid_store(struct device *dev, 151 struct device_attribute *attr, const char *buf, size_t len) 152 { 153 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 154 ssize_t rc; 155 156 nd_device_lock(dev); 157 rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len); 158 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 159 buf[len - 1] == '\n' ? "" : "\n"); 160 nd_device_unlock(dev); 161 162 return rc ? rc : len; 163 } 164 static DEVICE_ATTR_RW(uuid); 165 166 static ssize_t namespace_show(struct device *dev, 167 struct device_attribute *attr, char *buf) 168 { 169 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 170 ssize_t rc; 171 172 nvdimm_bus_lock(dev); 173 rc = sprintf(buf, "%s\n", nd_pfn->ndns 174 ? dev_name(&nd_pfn->ndns->dev) : ""); 175 nvdimm_bus_unlock(dev); 176 return rc; 177 } 178 179 static ssize_t namespace_store(struct device *dev, 180 struct device_attribute *attr, const char *buf, size_t len) 181 { 182 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 183 ssize_t rc; 184 185 nd_device_lock(dev); 186 nvdimm_bus_lock(dev); 187 rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len); 188 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 189 buf[len - 1] == '\n' ? "" : "\n"); 190 nvdimm_bus_unlock(dev); 191 nd_device_unlock(dev); 192 193 return rc; 194 } 195 static DEVICE_ATTR_RW(namespace); 196 197 static ssize_t resource_show(struct device *dev, 198 struct device_attribute *attr, char *buf) 199 { 200 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 201 ssize_t rc; 202 203 nd_device_lock(dev); 204 if (dev->driver) { 205 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 206 u64 offset = __le64_to_cpu(pfn_sb->dataoff); 207 struct nd_namespace_common *ndns = nd_pfn->ndns; 208 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); 209 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 210 211 rc = sprintf(buf, "%#llx\n", (unsigned long long) nsio->res.start 212 + start_pad + offset); 213 } else { 214 /* no address to convey if the pfn instance is disabled */ 215 rc = -ENXIO; 216 } 217 nd_device_unlock(dev); 218 219 return rc; 220 } 221 static DEVICE_ATTR(resource, 0400, resource_show, NULL); 222 223 static ssize_t size_show(struct device *dev, 224 struct device_attribute *attr, char *buf) 225 { 226 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 227 ssize_t rc; 228 229 nd_device_lock(dev); 230 if (dev->driver) { 231 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 232 u64 offset = __le64_to_cpu(pfn_sb->dataoff); 233 struct nd_namespace_common *ndns = nd_pfn->ndns; 234 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); 235 u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); 236 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 237 238 rc = sprintf(buf, "%llu\n", (unsigned long long) 239 resource_size(&nsio->res) - start_pad 240 - end_trunc - offset); 241 } else { 242 /* no size to convey if the pfn instance is disabled */ 243 rc = -ENXIO; 244 } 245 nd_device_unlock(dev); 246 247 return rc; 248 } 249 static DEVICE_ATTR_RO(size); 250 251 static ssize_t supported_alignments_show(struct device *dev, 252 struct device_attribute *attr, char *buf) 253 { 254 unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, }; 255 256 return nd_size_select_show(0, 257 nd_pfn_supported_alignments(aligns), buf); 258 } 259 static DEVICE_ATTR_RO(supported_alignments); 260 261 static struct attribute *nd_pfn_attributes[] = { 262 &dev_attr_mode.attr, 263 &dev_attr_namespace.attr, 264 &dev_attr_uuid.attr, 265 &dev_attr_align.attr, 266 &dev_attr_resource.attr, 267 &dev_attr_size.attr, 268 &dev_attr_supported_alignments.attr, 269 NULL, 270 }; 271 272 static struct attribute_group nd_pfn_attribute_group = { 273 .attrs = nd_pfn_attributes, 274 }; 275 276 const struct attribute_group *nd_pfn_attribute_groups[] = { 277 &nd_pfn_attribute_group, 278 &nd_device_attribute_group, 279 &nd_numa_attribute_group, 280 NULL, 281 }; 282 283 static const struct device_type nd_pfn_device_type = { 284 .name = "nd_pfn", 285 .release = nd_pfn_release, 286 .groups = nd_pfn_attribute_groups, 287 }; 288 289 bool is_nd_pfn(struct device *dev) 290 { 291 return dev ? dev->type == &nd_pfn_device_type : false; 292 } 293 EXPORT_SYMBOL(is_nd_pfn); 294 295 struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, 296 struct nd_namespace_common *ndns) 297 { 298 struct device *dev; 299 300 if (!nd_pfn) 301 return NULL; 302 303 nd_pfn->mode = PFN_MODE_NONE; 304 nd_pfn->align = nd_pfn_default_alignment(); 305 dev = &nd_pfn->dev; 306 device_initialize(&nd_pfn->dev); 307 if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { 308 dev_dbg(&ndns->dev, "failed, already claimed by %s\n", 309 dev_name(ndns->claim)); 310 put_device(dev); 311 return NULL; 312 } 313 return dev; 314 } 315 316 static struct nd_pfn *nd_pfn_alloc(struct nd_region *nd_region) 317 { 318 struct nd_pfn *nd_pfn; 319 struct device *dev; 320 321 nd_pfn = kzalloc(sizeof(*nd_pfn), GFP_KERNEL); 322 if (!nd_pfn) 323 return NULL; 324 325 nd_pfn->id = ida_simple_get(&nd_region->pfn_ida, 0, 0, GFP_KERNEL); 326 if (nd_pfn->id < 0) { 327 kfree(nd_pfn); 328 return NULL; 329 } 330 331 dev = &nd_pfn->dev; 332 dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id); 333 dev->type = &nd_pfn_device_type; 334 dev->parent = &nd_region->dev; 335 336 return nd_pfn; 337 } 338 339 struct device *nd_pfn_create(struct nd_region *nd_region) 340 { 341 struct nd_pfn *nd_pfn; 342 struct device *dev; 343 344 if (!is_memory(&nd_region->dev)) 345 return NULL; 346 347 nd_pfn = nd_pfn_alloc(nd_region); 348 dev = nd_pfn_devinit(nd_pfn, NULL); 349 350 __nd_device_register(dev); 351 return dev; 352 } 353 354 /* 355 * nd_pfn_clear_memmap_errors() clears any errors in the volatile memmap 356 * space associated with the namespace. If the memmap is set to DRAM, then 357 * this is a no-op. Since the memmap area is freshly initialized during 358 * probe, we have an opportunity to clear any badblocks in this area. 359 */ 360 static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn) 361 { 362 struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent); 363 struct nd_namespace_common *ndns = nd_pfn->ndns; 364 void *zero_page = page_address(ZERO_PAGE(0)); 365 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 366 int num_bad, meta_num, rc, bb_present; 367 sector_t first_bad, meta_start; 368 struct nd_namespace_io *nsio; 369 370 if (nd_pfn->mode != PFN_MODE_PMEM) 371 return 0; 372 373 nsio = to_nd_namespace_io(&ndns->dev); 374 meta_start = (SZ_4K + sizeof(*pfn_sb)) >> 9; 375 meta_num = (le64_to_cpu(pfn_sb->dataoff) >> 9) - meta_start; 376 377 /* 378 * re-enable the namespace with correct size so that we can access 379 * the device memmap area. 380 */ 381 devm_namespace_disable(&nd_pfn->dev, ndns); 382 rc = devm_namespace_enable(&nd_pfn->dev, ndns, le64_to_cpu(pfn_sb->dataoff)); 383 if (rc) 384 return rc; 385 386 do { 387 unsigned long zero_len; 388 u64 nsoff; 389 390 bb_present = badblocks_check(&nd_region->bb, meta_start, 391 meta_num, &first_bad, &num_bad); 392 if (bb_present) { 393 dev_dbg(&nd_pfn->dev, "meta: %x badblocks at %llx\n", 394 num_bad, first_bad); 395 nsoff = ALIGN_DOWN((nd_region->ndr_start 396 + (first_bad << 9)) - nsio->res.start, 397 PAGE_SIZE); 398 zero_len = ALIGN(num_bad << 9, PAGE_SIZE); 399 while (zero_len) { 400 unsigned long chunk = min(zero_len, PAGE_SIZE); 401 402 rc = nvdimm_write_bytes(ndns, nsoff, zero_page, 403 chunk, 0); 404 if (rc) 405 break; 406 407 zero_len -= chunk; 408 nsoff += chunk; 409 } 410 if (rc) { 411 dev_err(&nd_pfn->dev, 412 "error clearing %x badblocks at %llx\n", 413 num_bad, first_bad); 414 return rc; 415 } 416 } 417 } while (bb_present); 418 419 return 0; 420 } 421 422 static bool nd_supported_alignment(unsigned long align) 423 { 424 int i; 425 unsigned long supported[MAX_NVDIMM_ALIGN] = { [0] = 0, }; 426 427 if (align == 0) 428 return false; 429 430 nd_pfn_supported_alignments(supported); 431 for (i = 0; supported[i]; i++) 432 if (align == supported[i]) 433 return true; 434 return false; 435 } 436 437 /** 438 * nd_pfn_validate - read and validate info-block 439 * @nd_pfn: fsdax namespace runtime state / properties 440 * @sig: 'devdax' or 'fsdax' signature 441 * 442 * Upon return the info-block buffer contents (->pfn_sb) are 443 * indeterminate when validation fails, and a coherent info-block 444 * otherwise. 445 */ 446 int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) 447 { 448 u64 checksum, offset; 449 enum nd_pfn_mode mode; 450 struct nd_namespace_io *nsio; 451 unsigned long align, start_pad; 452 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 453 struct nd_namespace_common *ndns = nd_pfn->ndns; 454 const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev); 455 456 if (!pfn_sb || !ndns) 457 return -ENODEV; 458 459 if (!is_memory(nd_pfn->dev.parent)) 460 return -ENODEV; 461 462 if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0)) 463 return -ENXIO; 464 465 if (memcmp(pfn_sb->signature, sig, PFN_SIG_LEN) != 0) 466 return -ENODEV; 467 468 checksum = le64_to_cpu(pfn_sb->checksum); 469 pfn_sb->checksum = 0; 470 if (checksum != nd_sb_checksum((struct nd_gen_sb *) pfn_sb)) 471 return -ENODEV; 472 pfn_sb->checksum = cpu_to_le64(checksum); 473 474 if (memcmp(pfn_sb->parent_uuid, parent_uuid, 16) != 0) 475 return -ENODEV; 476 477 if (__le16_to_cpu(pfn_sb->version_minor) < 1) { 478 pfn_sb->start_pad = 0; 479 pfn_sb->end_trunc = 0; 480 } 481 482 if (__le16_to_cpu(pfn_sb->version_minor) < 2) 483 pfn_sb->align = 0; 484 485 if (__le16_to_cpu(pfn_sb->version_minor) < 4) { 486 pfn_sb->page_struct_size = cpu_to_le16(64); 487 pfn_sb->page_size = cpu_to_le32(PAGE_SIZE); 488 } 489 490 switch (le32_to_cpu(pfn_sb->mode)) { 491 case PFN_MODE_RAM: 492 case PFN_MODE_PMEM: 493 break; 494 default: 495 return -ENXIO; 496 } 497 498 align = le32_to_cpu(pfn_sb->align); 499 offset = le64_to_cpu(pfn_sb->dataoff); 500 start_pad = le32_to_cpu(pfn_sb->start_pad); 501 if (align == 0) 502 align = 1UL << ilog2(offset); 503 mode = le32_to_cpu(pfn_sb->mode); 504 505 if ((le32_to_cpu(pfn_sb->page_size) > PAGE_SIZE) && 506 (mode == PFN_MODE_PMEM)) { 507 dev_err(&nd_pfn->dev, 508 "init failed, page size mismatch %d\n", 509 le32_to_cpu(pfn_sb->page_size)); 510 return -EOPNOTSUPP; 511 } 512 513 if ((le16_to_cpu(pfn_sb->page_struct_size) < sizeof(struct page)) && 514 (mode == PFN_MODE_PMEM)) { 515 dev_err(&nd_pfn->dev, 516 "init failed, struct page size mismatch %d\n", 517 le16_to_cpu(pfn_sb->page_struct_size)); 518 return -EOPNOTSUPP; 519 } 520 521 /* 522 * Check whether the we support the alignment. For Dax if the 523 * superblock alignment is not matching, we won't initialize 524 * the device. 525 */ 526 if (!nd_supported_alignment(align) && 527 !memcmp(pfn_sb->signature, DAX_SIG, PFN_SIG_LEN)) { 528 dev_err(&nd_pfn->dev, "init failed, alignment mismatch: " 529 "%ld:%ld\n", nd_pfn->align, align); 530 return -EOPNOTSUPP; 531 } 532 533 if (!nd_pfn->uuid) { 534 /* 535 * When probing a namepace via nd_pfn_probe() the uuid 536 * is NULL (see: nd_pfn_devinit()) we init settings from 537 * pfn_sb 538 */ 539 nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL); 540 if (!nd_pfn->uuid) 541 return -ENOMEM; 542 nd_pfn->align = align; 543 nd_pfn->mode = mode; 544 } else { 545 /* 546 * When probing a pfn / dax instance we validate the 547 * live settings against the pfn_sb 548 */ 549 if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0) 550 return -ENODEV; 551 552 /* 553 * If the uuid validates, but other settings mismatch 554 * return EINVAL because userspace has managed to change 555 * the configuration without specifying new 556 * identification. 557 */ 558 if (nd_pfn->align != align || nd_pfn->mode != mode) { 559 dev_err(&nd_pfn->dev, 560 "init failed, settings mismatch\n"); 561 dev_dbg(&nd_pfn->dev, "align: %lx:%lx mode: %d:%d\n", 562 nd_pfn->align, align, nd_pfn->mode, 563 mode); 564 return -EINVAL; 565 } 566 } 567 568 if (align > nvdimm_namespace_capacity(ndns)) { 569 dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n", 570 align, nvdimm_namespace_capacity(ndns)); 571 return -EINVAL; 572 } 573 574 /* 575 * These warnings are verbose because they can only trigger in 576 * the case where the physical address alignment of the 577 * namespace has changed since the pfn superblock was 578 * established. 579 */ 580 nsio = to_nd_namespace_io(&ndns->dev); 581 if (offset >= resource_size(&nsio->res)) { 582 dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n", 583 dev_name(&ndns->dev)); 584 return -EBUSY; 585 } 586 587 if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align)) 588 || !IS_ALIGNED(offset, PAGE_SIZE)) { 589 dev_err(&nd_pfn->dev, 590 "bad offset: %#llx dax disabled align: %#lx\n", 591 offset, align); 592 return -ENXIO; 593 } 594 595 return 0; 596 } 597 EXPORT_SYMBOL(nd_pfn_validate); 598 599 int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) 600 { 601 int rc; 602 struct nd_pfn *nd_pfn; 603 struct device *pfn_dev; 604 struct nd_pfn_sb *pfn_sb; 605 struct nd_region *nd_region = to_nd_region(ndns->dev.parent); 606 607 if (ndns->force_raw) 608 return -ENODEV; 609 610 switch (ndns->claim_class) { 611 case NVDIMM_CCLASS_NONE: 612 case NVDIMM_CCLASS_PFN: 613 break; 614 default: 615 return -ENODEV; 616 } 617 618 nvdimm_bus_lock(&ndns->dev); 619 nd_pfn = nd_pfn_alloc(nd_region); 620 pfn_dev = nd_pfn_devinit(nd_pfn, ndns); 621 nvdimm_bus_unlock(&ndns->dev); 622 if (!pfn_dev) 623 return -ENOMEM; 624 pfn_sb = devm_kmalloc(dev, sizeof(*pfn_sb), GFP_KERNEL); 625 nd_pfn = to_nd_pfn(pfn_dev); 626 nd_pfn->pfn_sb = pfn_sb; 627 rc = nd_pfn_validate(nd_pfn, PFN_SIG); 628 dev_dbg(dev, "pfn: %s\n", rc == 0 ? dev_name(pfn_dev) : "<none>"); 629 if (rc < 0) { 630 nd_detach_ndns(pfn_dev, &nd_pfn->ndns); 631 put_device(pfn_dev); 632 } else 633 __nd_device_register(pfn_dev); 634 635 return rc; 636 } 637 EXPORT_SYMBOL(nd_pfn_probe); 638 639 /* 640 * We hotplug memory at sub-section granularity, pad the reserved area 641 * from the previous section base to the namespace base address. 642 */ 643 static unsigned long init_altmap_base(resource_size_t base) 644 { 645 unsigned long base_pfn = PHYS_PFN(base); 646 647 return SUBSECTION_ALIGN_DOWN(base_pfn); 648 } 649 650 static unsigned long init_altmap_reserve(resource_size_t base) 651 { 652 unsigned long reserve = nd_info_block_reserve() >> PAGE_SHIFT; 653 unsigned long base_pfn = PHYS_PFN(base); 654 655 reserve += base_pfn - SUBSECTION_ALIGN_DOWN(base_pfn); 656 return reserve; 657 } 658 659 static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) 660 { 661 struct resource *res = &pgmap->res; 662 struct vmem_altmap *altmap = &pgmap->altmap; 663 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 664 u64 offset = le64_to_cpu(pfn_sb->dataoff); 665 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); 666 u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); 667 u32 reserve = nd_info_block_reserve(); 668 struct nd_namespace_common *ndns = nd_pfn->ndns; 669 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 670 resource_size_t base = nsio->res.start + start_pad; 671 resource_size_t end = nsio->res.end - end_trunc; 672 struct vmem_altmap __altmap = { 673 .base_pfn = init_altmap_base(base), 674 .reserve = init_altmap_reserve(base), 675 .end_pfn = PHYS_PFN(end), 676 }; 677 678 memcpy(res, &nsio->res, sizeof(*res)); 679 res->start += start_pad; 680 res->end -= end_trunc; 681 682 if (nd_pfn->mode == PFN_MODE_RAM) { 683 if (offset < reserve) 684 return -EINVAL; 685 nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); 686 } else if (nd_pfn->mode == PFN_MODE_PMEM) { 687 nd_pfn->npfns = PHYS_PFN((resource_size(res) - offset)); 688 if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) 689 dev_info(&nd_pfn->dev, 690 "number of pfns truncated from %lld to %ld\n", 691 le64_to_cpu(nd_pfn->pfn_sb->npfns), 692 nd_pfn->npfns); 693 memcpy(altmap, &__altmap, sizeof(*altmap)); 694 altmap->free = PHYS_PFN(offset - reserve); 695 altmap->alloc = 0; 696 pgmap->flags |= PGMAP_ALTMAP_VALID; 697 } else 698 return -ENXIO; 699 700 return 0; 701 } 702 703 static int nd_pfn_init(struct nd_pfn *nd_pfn) 704 { 705 struct nd_namespace_common *ndns = nd_pfn->ndns; 706 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 707 resource_size_t start, size; 708 struct nd_region *nd_region; 709 unsigned long npfns, align; 710 u32 end_trunc; 711 struct nd_pfn_sb *pfn_sb; 712 phys_addr_t offset; 713 const char *sig; 714 u64 checksum; 715 int rc; 716 717 pfn_sb = devm_kmalloc(&nd_pfn->dev, sizeof(*pfn_sb), GFP_KERNEL); 718 if (!pfn_sb) 719 return -ENOMEM; 720 721 nd_pfn->pfn_sb = pfn_sb; 722 if (is_nd_dax(&nd_pfn->dev)) 723 sig = DAX_SIG; 724 else 725 sig = PFN_SIG; 726 727 rc = nd_pfn_validate(nd_pfn, sig); 728 if (rc == 0) 729 return nd_pfn_clear_memmap_errors(nd_pfn); 730 if (rc != -ENODEV) 731 return rc; 732 733 /* no info block, do init */; 734 memset(pfn_sb, 0, sizeof(*pfn_sb)); 735 736 nd_region = to_nd_region(nd_pfn->dev.parent); 737 if (nd_region->ro) { 738 dev_info(&nd_pfn->dev, 739 "%s is read-only, unable to init metadata\n", 740 dev_name(&nd_region->dev)); 741 return -ENXIO; 742 } 743 744 /* 745 * Note, we use 64 here for the standard size of struct page, 746 * debugging options may cause it to be larger in which case the 747 * implementation will limit the pfns advertised through 748 * ->direct_access() to those that are included in the memmap. 749 */ 750 start = nsio->res.start; 751 size = resource_size(&nsio->res); 752 npfns = PHYS_PFN(size - SZ_8K); 753 align = max(nd_pfn->align, (1UL << SUBSECTION_SHIFT)); 754 end_trunc = start + size - ALIGN_DOWN(start + size, align); 755 if (nd_pfn->mode == PFN_MODE_PMEM) { 756 /* 757 * The altmap should be padded out to the block size used 758 * when populating the vmemmap. This *should* be equal to 759 * PMD_SIZE for most architectures. 760 * 761 * Also make sure size of struct page is less than 64. We 762 * want to make sure we use large enough size here so that 763 * we don't have a dynamic reserve space depending on 764 * struct page size. But we also want to make sure we notice 765 * when we end up adding new elements to struct page. 766 */ 767 BUILD_BUG_ON(sizeof(struct page) > MAX_STRUCT_PAGE_SIZE); 768 offset = ALIGN(start + SZ_8K + MAX_STRUCT_PAGE_SIZE * npfns, align) 769 - start; 770 } else if (nd_pfn->mode == PFN_MODE_RAM) 771 offset = ALIGN(start + SZ_8K, align) - start; 772 else 773 return -ENXIO; 774 775 if (offset >= size) { 776 dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n", 777 dev_name(&ndns->dev)); 778 return -ENXIO; 779 } 780 781 npfns = PHYS_PFN(size - offset - end_trunc); 782 pfn_sb->mode = cpu_to_le32(nd_pfn->mode); 783 pfn_sb->dataoff = cpu_to_le64(offset); 784 pfn_sb->npfns = cpu_to_le64(npfns); 785 memcpy(pfn_sb->signature, sig, PFN_SIG_LEN); 786 memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); 787 memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); 788 pfn_sb->version_major = cpu_to_le16(1); 789 pfn_sb->version_minor = cpu_to_le16(4); 790 pfn_sb->end_trunc = cpu_to_le32(end_trunc); 791 pfn_sb->align = cpu_to_le32(nd_pfn->align); 792 pfn_sb->page_struct_size = cpu_to_le16(MAX_STRUCT_PAGE_SIZE); 793 pfn_sb->page_size = cpu_to_le32(PAGE_SIZE); 794 checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); 795 pfn_sb->checksum = cpu_to_le64(checksum); 796 797 rc = nd_pfn_clear_memmap_errors(nd_pfn); 798 if (rc) 799 return rc; 800 801 return nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0); 802 } 803 804 /* 805 * Determine the effective resource range and vmem_altmap from an nd_pfn 806 * instance. 807 */ 808 int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) 809 { 810 int rc; 811 812 if (!nd_pfn->uuid || !nd_pfn->ndns) 813 return -ENODEV; 814 815 rc = nd_pfn_init(nd_pfn); 816 if (rc) 817 return rc; 818 819 /* we need a valid pfn_sb before we can init a dev_pagemap */ 820 return __nvdimm_setup_pfn(nd_pfn, pgmap); 821 } 822 EXPORT_SYMBOL_GPL(nvdimm_setup_pfn); 823