1 /* 2 * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of version 2 of the GNU General Public License as 6 * published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 */ 13 #include <linux/memremap.h> 14 #include <linux/blkdev.h> 15 #include <linux/device.h> 16 #include <linux/genhd.h> 17 #include <linux/sizes.h> 18 #include <linux/slab.h> 19 #include <linux/fs.h> 20 #include <linux/mm.h> 21 #include "nd-core.h" 22 #include "pfn.h" 23 #include "nd.h" 24 25 static void nd_pfn_release(struct device *dev) 26 { 27 struct nd_region *nd_region = to_nd_region(dev->parent); 28 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 29 30 dev_dbg(dev, "trace\n"); 31 nd_detach_ndns(&nd_pfn->dev, &nd_pfn->ndns); 32 ida_simple_remove(&nd_region->pfn_ida, nd_pfn->id); 33 kfree(nd_pfn->uuid); 34 kfree(nd_pfn); 35 } 36 37 static struct device_type nd_pfn_device_type = { 38 .name = "nd_pfn", 39 .release = nd_pfn_release, 40 }; 41 42 bool is_nd_pfn(struct device *dev) 43 { 44 return dev ? dev->type == &nd_pfn_device_type : false; 45 } 46 EXPORT_SYMBOL(is_nd_pfn); 47 48 struct nd_pfn *to_nd_pfn(struct device *dev) 49 { 50 struct nd_pfn *nd_pfn = container_of(dev, struct nd_pfn, dev); 51 52 WARN_ON(!is_nd_pfn(dev)); 53 return nd_pfn; 54 } 55 EXPORT_SYMBOL(to_nd_pfn); 56 57 static ssize_t mode_show(struct device *dev, 58 struct device_attribute *attr, char *buf) 59 { 60 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 61 62 switch (nd_pfn->mode) { 63 case PFN_MODE_RAM: 64 return sprintf(buf, "ram\n"); 65 case PFN_MODE_PMEM: 66 return sprintf(buf, "pmem\n"); 67 default: 68 return sprintf(buf, "none\n"); 69 } 70 } 71 72 static ssize_t mode_store(struct device *dev, 73 struct device_attribute *attr, const char *buf, size_t len) 74 { 75 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 76 ssize_t rc = 0; 77 78 device_lock(dev); 79 nvdimm_bus_lock(dev); 80 if (dev->driver) 81 rc = -EBUSY; 82 else { 83 size_t n = len - 1; 84 85 if (strncmp(buf, "pmem\n", n) == 0 86 || strncmp(buf, "pmem", n) == 0) { 87 nd_pfn->mode = PFN_MODE_PMEM; 88 } else if (strncmp(buf, "ram\n", n) == 0 89 || strncmp(buf, "ram", n) == 0) 90 nd_pfn->mode = PFN_MODE_RAM; 91 else if (strncmp(buf, "none\n", n) == 0 92 || strncmp(buf, "none", n) == 0) 93 nd_pfn->mode = PFN_MODE_NONE; 94 else 95 rc = -EINVAL; 96 } 97 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 98 buf[len - 1] == '\n' ? "" : "\n"); 99 nvdimm_bus_unlock(dev); 100 device_unlock(dev); 101 102 return rc ? rc : len; 103 } 104 static DEVICE_ATTR_RW(mode); 105 106 static ssize_t align_show(struct device *dev, 107 struct device_attribute *attr, char *buf) 108 { 109 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 110 111 return sprintf(buf, "%ld\n", nd_pfn->align); 112 } 113 114 static const unsigned long *nd_pfn_supported_alignments(void) 115 { 116 /* 117 * This needs to be a non-static variable because the *_SIZE 118 * macros aren't always constants. 119 */ 120 const unsigned long supported_alignments[] = { 121 PAGE_SIZE, 122 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 123 HPAGE_PMD_SIZE, 124 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 125 HPAGE_PUD_SIZE, 126 #endif 127 #endif 128 0, 129 }; 130 static unsigned long data[ARRAY_SIZE(supported_alignments)]; 131 132 memcpy(data, supported_alignments, sizeof(data)); 133 134 return data; 135 } 136 137 static ssize_t align_store(struct device *dev, 138 struct device_attribute *attr, const char *buf, size_t len) 139 { 140 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 141 ssize_t rc; 142 143 device_lock(dev); 144 nvdimm_bus_lock(dev); 145 rc = nd_size_select_store(dev, buf, &nd_pfn->align, 146 nd_pfn_supported_alignments()); 147 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 148 buf[len - 1] == '\n' ? "" : "\n"); 149 nvdimm_bus_unlock(dev); 150 device_unlock(dev); 151 152 return rc ? rc : len; 153 } 154 static DEVICE_ATTR_RW(align); 155 156 static ssize_t uuid_show(struct device *dev, 157 struct device_attribute *attr, char *buf) 158 { 159 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 160 161 if (nd_pfn->uuid) 162 return sprintf(buf, "%pUb\n", nd_pfn->uuid); 163 return sprintf(buf, "\n"); 164 } 165 166 static ssize_t uuid_store(struct device *dev, 167 struct device_attribute *attr, const char *buf, size_t len) 168 { 169 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 170 ssize_t rc; 171 172 device_lock(dev); 173 rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len); 174 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 175 buf[len - 1] == '\n' ? "" : "\n"); 176 device_unlock(dev); 177 178 return rc ? rc : len; 179 } 180 static DEVICE_ATTR_RW(uuid); 181 182 static ssize_t namespace_show(struct device *dev, 183 struct device_attribute *attr, char *buf) 184 { 185 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 186 ssize_t rc; 187 188 nvdimm_bus_lock(dev); 189 rc = sprintf(buf, "%s\n", nd_pfn->ndns 190 ? dev_name(&nd_pfn->ndns->dev) : ""); 191 nvdimm_bus_unlock(dev); 192 return rc; 193 } 194 195 static ssize_t namespace_store(struct device *dev, 196 struct device_attribute *attr, const char *buf, size_t len) 197 { 198 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 199 ssize_t rc; 200 201 device_lock(dev); 202 nvdimm_bus_lock(dev); 203 rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len); 204 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 205 buf[len - 1] == '\n' ? "" : "\n"); 206 nvdimm_bus_unlock(dev); 207 device_unlock(dev); 208 209 return rc; 210 } 211 static DEVICE_ATTR_RW(namespace); 212 213 static ssize_t resource_show(struct device *dev, 214 struct device_attribute *attr, char *buf) 215 { 216 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 217 ssize_t rc; 218 219 device_lock(dev); 220 if (dev->driver) { 221 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 222 u64 offset = __le64_to_cpu(pfn_sb->dataoff); 223 struct nd_namespace_common *ndns = nd_pfn->ndns; 224 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); 225 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 226 227 rc = sprintf(buf, "%#llx\n", (unsigned long long) nsio->res.start 228 + start_pad + offset); 229 } else { 230 /* no address to convey if the pfn instance is disabled */ 231 rc = -ENXIO; 232 } 233 device_unlock(dev); 234 235 return rc; 236 } 237 static DEVICE_ATTR_RO(resource); 238 239 static ssize_t size_show(struct device *dev, 240 struct device_attribute *attr, char *buf) 241 { 242 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 243 ssize_t rc; 244 245 device_lock(dev); 246 if (dev->driver) { 247 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 248 u64 offset = __le64_to_cpu(pfn_sb->dataoff); 249 struct nd_namespace_common *ndns = nd_pfn->ndns; 250 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); 251 u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); 252 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 253 254 rc = sprintf(buf, "%llu\n", (unsigned long long) 255 resource_size(&nsio->res) - start_pad 256 - end_trunc - offset); 257 } else { 258 /* no size to convey if the pfn instance is disabled */ 259 rc = -ENXIO; 260 } 261 device_unlock(dev); 262 263 return rc; 264 } 265 static DEVICE_ATTR_RO(size); 266 267 static ssize_t supported_alignments_show(struct device *dev, 268 struct device_attribute *attr, char *buf) 269 { 270 return nd_size_select_show(0, nd_pfn_supported_alignments(), buf); 271 } 272 static DEVICE_ATTR_RO(supported_alignments); 273 274 static struct attribute *nd_pfn_attributes[] = { 275 &dev_attr_mode.attr, 276 &dev_attr_namespace.attr, 277 &dev_attr_uuid.attr, 278 &dev_attr_align.attr, 279 &dev_attr_resource.attr, 280 &dev_attr_size.attr, 281 &dev_attr_supported_alignments.attr, 282 NULL, 283 }; 284 285 static umode_t pfn_visible(struct kobject *kobj, struct attribute *a, int n) 286 { 287 if (a == &dev_attr_resource.attr) 288 return 0400; 289 return a->mode; 290 } 291 292 struct attribute_group nd_pfn_attribute_group = { 293 .attrs = nd_pfn_attributes, 294 .is_visible = pfn_visible, 295 }; 296 297 static const struct attribute_group *nd_pfn_attribute_groups[] = { 298 &nd_pfn_attribute_group, 299 &nd_device_attribute_group, 300 &nd_numa_attribute_group, 301 NULL, 302 }; 303 304 struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, 305 struct nd_namespace_common *ndns) 306 { 307 struct device *dev; 308 309 if (!nd_pfn) 310 return NULL; 311 312 nd_pfn->mode = PFN_MODE_NONE; 313 nd_pfn->align = PFN_DEFAULT_ALIGNMENT; 314 dev = &nd_pfn->dev; 315 device_initialize(&nd_pfn->dev); 316 if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { 317 dev_dbg(&ndns->dev, "failed, already claimed by %s\n", 318 dev_name(ndns->claim)); 319 put_device(dev); 320 return NULL; 321 } 322 return dev; 323 } 324 325 static struct nd_pfn *nd_pfn_alloc(struct nd_region *nd_region) 326 { 327 struct nd_pfn *nd_pfn; 328 struct device *dev; 329 330 nd_pfn = kzalloc(sizeof(*nd_pfn), GFP_KERNEL); 331 if (!nd_pfn) 332 return NULL; 333 334 nd_pfn->id = ida_simple_get(&nd_region->pfn_ida, 0, 0, GFP_KERNEL); 335 if (nd_pfn->id < 0) { 336 kfree(nd_pfn); 337 return NULL; 338 } 339 340 dev = &nd_pfn->dev; 341 dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id); 342 dev->groups = nd_pfn_attribute_groups; 343 dev->type = &nd_pfn_device_type; 344 dev->parent = &nd_region->dev; 345 346 return nd_pfn; 347 } 348 349 struct device *nd_pfn_create(struct nd_region *nd_region) 350 { 351 struct nd_pfn *nd_pfn; 352 struct device *dev; 353 354 if (!is_memory(&nd_region->dev)) 355 return NULL; 356 357 nd_pfn = nd_pfn_alloc(nd_region); 358 dev = nd_pfn_devinit(nd_pfn, NULL); 359 360 __nd_device_register(dev); 361 return dev; 362 } 363 364 /* 365 * nd_pfn_clear_memmap_errors() clears any errors in the volatile memmap 366 * space associated with the namespace. If the memmap is set to DRAM, then 367 * this is a no-op. Since the memmap area is freshly initialized during 368 * probe, we have an opportunity to clear any badblocks in this area. 369 */ 370 static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn) 371 { 372 struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent); 373 struct nd_namespace_common *ndns = nd_pfn->ndns; 374 void *zero_page = page_address(ZERO_PAGE(0)); 375 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 376 int num_bad, meta_num, rc, bb_present; 377 sector_t first_bad, meta_start; 378 struct nd_namespace_io *nsio; 379 380 if (nd_pfn->mode != PFN_MODE_PMEM) 381 return 0; 382 383 nsio = to_nd_namespace_io(&ndns->dev); 384 meta_start = (SZ_4K + sizeof(*pfn_sb)) >> 9; 385 meta_num = (le64_to_cpu(pfn_sb->dataoff) >> 9) - meta_start; 386 387 do { 388 unsigned long zero_len; 389 u64 nsoff; 390 391 bb_present = badblocks_check(&nd_region->bb, meta_start, 392 meta_num, &first_bad, &num_bad); 393 if (bb_present) { 394 dev_dbg(&nd_pfn->dev, "meta: %x badblocks at %llx\n", 395 num_bad, first_bad); 396 nsoff = ALIGN_DOWN((nd_region->ndr_start 397 + (first_bad << 9)) - nsio->res.start, 398 PAGE_SIZE); 399 zero_len = ALIGN(num_bad << 9, PAGE_SIZE); 400 while (zero_len) { 401 unsigned long chunk = min(zero_len, PAGE_SIZE); 402 403 rc = nvdimm_write_bytes(ndns, nsoff, zero_page, 404 chunk, 0); 405 if (rc) 406 break; 407 408 zero_len -= chunk; 409 nsoff += chunk; 410 } 411 if (rc) { 412 dev_err(&nd_pfn->dev, 413 "error clearing %x badblocks at %llx\n", 414 num_bad, first_bad); 415 return rc; 416 } 417 } 418 } while (bb_present); 419 420 return 0; 421 } 422 423 int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) 424 { 425 u64 checksum, offset; 426 enum nd_pfn_mode mode; 427 struct nd_namespace_io *nsio; 428 unsigned long align, start_pad; 429 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 430 struct nd_namespace_common *ndns = nd_pfn->ndns; 431 const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev); 432 433 if (!pfn_sb || !ndns) 434 return -ENODEV; 435 436 if (!is_memory(nd_pfn->dev.parent)) 437 return -ENODEV; 438 439 if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0)) 440 return -ENXIO; 441 442 if (memcmp(pfn_sb->signature, sig, PFN_SIG_LEN) != 0) 443 return -ENODEV; 444 445 checksum = le64_to_cpu(pfn_sb->checksum); 446 pfn_sb->checksum = 0; 447 if (checksum != nd_sb_checksum((struct nd_gen_sb *) pfn_sb)) 448 return -ENODEV; 449 pfn_sb->checksum = cpu_to_le64(checksum); 450 451 if (memcmp(pfn_sb->parent_uuid, parent_uuid, 16) != 0) 452 return -ENODEV; 453 454 if (__le16_to_cpu(pfn_sb->version_minor) < 1) { 455 pfn_sb->start_pad = 0; 456 pfn_sb->end_trunc = 0; 457 } 458 459 if (__le16_to_cpu(pfn_sb->version_minor) < 2) 460 pfn_sb->align = 0; 461 462 switch (le32_to_cpu(pfn_sb->mode)) { 463 case PFN_MODE_RAM: 464 case PFN_MODE_PMEM: 465 break; 466 default: 467 return -ENXIO; 468 } 469 470 align = le32_to_cpu(pfn_sb->align); 471 offset = le64_to_cpu(pfn_sb->dataoff); 472 start_pad = le32_to_cpu(pfn_sb->start_pad); 473 if (align == 0) 474 align = 1UL << ilog2(offset); 475 mode = le32_to_cpu(pfn_sb->mode); 476 477 if (!nd_pfn->uuid) { 478 /* 479 * When probing a namepace via nd_pfn_probe() the uuid 480 * is NULL (see: nd_pfn_devinit()) we init settings from 481 * pfn_sb 482 */ 483 nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL); 484 if (!nd_pfn->uuid) 485 return -ENOMEM; 486 nd_pfn->align = align; 487 nd_pfn->mode = mode; 488 } else { 489 /* 490 * When probing a pfn / dax instance we validate the 491 * live settings against the pfn_sb 492 */ 493 if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0) 494 return -ENODEV; 495 496 /* 497 * If the uuid validates, but other settings mismatch 498 * return EINVAL because userspace has managed to change 499 * the configuration without specifying new 500 * identification. 501 */ 502 if (nd_pfn->align != align || nd_pfn->mode != mode) { 503 dev_err(&nd_pfn->dev, 504 "init failed, settings mismatch\n"); 505 dev_dbg(&nd_pfn->dev, "align: %lx:%lx mode: %d:%d\n", 506 nd_pfn->align, align, nd_pfn->mode, 507 mode); 508 return -EINVAL; 509 } 510 } 511 512 if (align > nvdimm_namespace_capacity(ndns)) { 513 dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n", 514 align, nvdimm_namespace_capacity(ndns)); 515 return -EINVAL; 516 } 517 518 /* 519 * These warnings are verbose because they can only trigger in 520 * the case where the physical address alignment of the 521 * namespace has changed since the pfn superblock was 522 * established. 523 */ 524 nsio = to_nd_namespace_io(&ndns->dev); 525 if (offset >= resource_size(&nsio->res)) { 526 dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n", 527 dev_name(&ndns->dev)); 528 return -EBUSY; 529 } 530 531 if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align)) 532 || !IS_ALIGNED(offset, PAGE_SIZE)) { 533 dev_err(&nd_pfn->dev, 534 "bad offset: %#llx dax disabled align: %#lx\n", 535 offset, align); 536 return -ENXIO; 537 } 538 539 return nd_pfn_clear_memmap_errors(nd_pfn); 540 } 541 EXPORT_SYMBOL(nd_pfn_validate); 542 543 int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) 544 { 545 int rc; 546 struct nd_pfn *nd_pfn; 547 struct device *pfn_dev; 548 struct nd_pfn_sb *pfn_sb; 549 struct nd_region *nd_region = to_nd_region(ndns->dev.parent); 550 551 if (ndns->force_raw) 552 return -ENODEV; 553 554 switch (ndns->claim_class) { 555 case NVDIMM_CCLASS_NONE: 556 case NVDIMM_CCLASS_PFN: 557 break; 558 default: 559 return -ENODEV; 560 } 561 562 nvdimm_bus_lock(&ndns->dev); 563 nd_pfn = nd_pfn_alloc(nd_region); 564 pfn_dev = nd_pfn_devinit(nd_pfn, ndns); 565 nvdimm_bus_unlock(&ndns->dev); 566 if (!pfn_dev) 567 return -ENOMEM; 568 pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL); 569 nd_pfn = to_nd_pfn(pfn_dev); 570 nd_pfn->pfn_sb = pfn_sb; 571 rc = nd_pfn_validate(nd_pfn, PFN_SIG); 572 dev_dbg(dev, "pfn: %s\n", rc == 0 ? dev_name(pfn_dev) : "<none>"); 573 if (rc < 0) { 574 nd_detach_ndns(pfn_dev, &nd_pfn->ndns); 575 put_device(pfn_dev); 576 } else 577 __nd_device_register(pfn_dev); 578 579 return rc; 580 } 581 EXPORT_SYMBOL(nd_pfn_probe); 582 583 static u32 info_block_reserve(void) 584 { 585 return ALIGN(SZ_8K, PAGE_SIZE); 586 } 587 588 /* 589 * We hotplug memory at section granularity, pad the reserved area from 590 * the previous section base to the namespace base address. 591 */ 592 static unsigned long init_altmap_base(resource_size_t base) 593 { 594 unsigned long base_pfn = PHYS_PFN(base); 595 596 return PFN_SECTION_ALIGN_DOWN(base_pfn); 597 } 598 599 static unsigned long init_altmap_reserve(resource_size_t base) 600 { 601 unsigned long reserve = info_block_reserve() >> PAGE_SHIFT; 602 unsigned long base_pfn = PHYS_PFN(base); 603 604 reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn); 605 return reserve; 606 } 607 608 static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) 609 { 610 struct resource *res = &pgmap->res; 611 struct vmem_altmap *altmap = &pgmap->altmap; 612 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 613 u64 offset = le64_to_cpu(pfn_sb->dataoff); 614 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); 615 u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); 616 u32 reserve = info_block_reserve(); 617 struct nd_namespace_common *ndns = nd_pfn->ndns; 618 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 619 resource_size_t base = nsio->res.start + start_pad; 620 struct vmem_altmap __altmap = { 621 .base_pfn = init_altmap_base(base), 622 .reserve = init_altmap_reserve(base), 623 }; 624 625 memcpy(res, &nsio->res, sizeof(*res)); 626 res->start += start_pad; 627 res->end -= end_trunc; 628 629 if (nd_pfn->mode == PFN_MODE_RAM) { 630 if (offset < reserve) 631 return -EINVAL; 632 nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); 633 pgmap->altmap_valid = false; 634 } else if (nd_pfn->mode == PFN_MODE_PMEM) { 635 nd_pfn->npfns = PFN_SECTION_ALIGN_UP((resource_size(res) 636 - offset) / PAGE_SIZE); 637 if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) 638 dev_info(&nd_pfn->dev, 639 "number of pfns truncated from %lld to %ld\n", 640 le64_to_cpu(nd_pfn->pfn_sb->npfns), 641 nd_pfn->npfns); 642 memcpy(altmap, &__altmap, sizeof(*altmap)); 643 altmap->free = PHYS_PFN(offset - reserve); 644 altmap->alloc = 0; 645 pgmap->altmap_valid = true; 646 } else 647 return -ENXIO; 648 649 return 0; 650 } 651 652 static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys) 653 { 654 return min_t(u64, PHYS_SECTION_ALIGN_DOWN(phys), 655 ALIGN_DOWN(phys, nd_pfn->align)); 656 } 657 658 /* 659 * Check if pmem collides with 'System RAM', or other regions when 660 * section aligned. Trim it accordingly. 661 */ 662 static void trim_pfn_device(struct nd_pfn *nd_pfn, u32 *start_pad, u32 *end_trunc) 663 { 664 struct nd_namespace_common *ndns = nd_pfn->ndns; 665 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 666 struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent); 667 const resource_size_t start = nsio->res.start; 668 const resource_size_t end = start + resource_size(&nsio->res); 669 resource_size_t adjust, size; 670 671 *start_pad = 0; 672 *end_trunc = 0; 673 674 adjust = start - PHYS_SECTION_ALIGN_DOWN(start); 675 size = resource_size(&nsio->res) + adjust; 676 if (region_intersects(start - adjust, size, IORESOURCE_SYSTEM_RAM, 677 IORES_DESC_NONE) == REGION_MIXED 678 || nd_region_conflict(nd_region, start - adjust, size)) 679 *start_pad = PHYS_SECTION_ALIGN_UP(start) - start; 680 681 /* Now check that end of the range does not collide. */ 682 adjust = PHYS_SECTION_ALIGN_UP(end) - end; 683 size = resource_size(&nsio->res) + adjust; 684 if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, 685 IORES_DESC_NONE) == REGION_MIXED 686 || !IS_ALIGNED(end, nd_pfn->align) 687 || nd_region_conflict(nd_region, start, size)) 688 *end_trunc = end - phys_pmem_align_down(nd_pfn, end); 689 } 690 691 static int nd_pfn_init(struct nd_pfn *nd_pfn) 692 { 693 struct nd_namespace_common *ndns = nd_pfn->ndns; 694 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 695 u32 start_pad, end_trunc, reserve = info_block_reserve(); 696 resource_size_t start, size; 697 struct nd_region *nd_region; 698 struct nd_pfn_sb *pfn_sb; 699 unsigned long npfns; 700 phys_addr_t offset; 701 const char *sig; 702 u64 checksum; 703 int rc; 704 705 pfn_sb = devm_kzalloc(&nd_pfn->dev, sizeof(*pfn_sb), GFP_KERNEL); 706 if (!pfn_sb) 707 return -ENOMEM; 708 709 nd_pfn->pfn_sb = pfn_sb; 710 if (is_nd_dax(&nd_pfn->dev)) 711 sig = DAX_SIG; 712 else 713 sig = PFN_SIG; 714 rc = nd_pfn_validate(nd_pfn, sig); 715 if (rc != -ENODEV) 716 return rc; 717 718 /* no info block, do init */; 719 nd_region = to_nd_region(nd_pfn->dev.parent); 720 if (nd_region->ro) { 721 dev_info(&nd_pfn->dev, 722 "%s is read-only, unable to init metadata\n", 723 dev_name(&nd_region->dev)); 724 return -ENXIO; 725 } 726 727 memset(pfn_sb, 0, sizeof(*pfn_sb)); 728 729 trim_pfn_device(nd_pfn, &start_pad, &end_trunc); 730 if (start_pad + end_trunc) 731 dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n", 732 dev_name(&ndns->dev), start_pad + end_trunc); 733 734 /* 735 * Note, we use 64 here for the standard size of struct page, 736 * debugging options may cause it to be larger in which case the 737 * implementation will limit the pfns advertised through 738 * ->direct_access() to those that are included in the memmap. 739 */ 740 start = nsio->res.start + start_pad; 741 size = resource_size(&nsio->res); 742 npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - reserve) 743 / PAGE_SIZE); 744 if (nd_pfn->mode == PFN_MODE_PMEM) { 745 /* 746 * The altmap should be padded out to the block size used 747 * when populating the vmemmap. This *should* be equal to 748 * PMD_SIZE for most architectures. 749 */ 750 offset = ALIGN(start + reserve + 64 * npfns, 751 max(nd_pfn->align, PMD_SIZE)) - start; 752 } else if (nd_pfn->mode == PFN_MODE_RAM) 753 offset = ALIGN(start + reserve, nd_pfn->align) - start; 754 else 755 return -ENXIO; 756 757 if (offset + start_pad + end_trunc >= size) { 758 dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n", 759 dev_name(&ndns->dev)); 760 return -ENXIO; 761 } 762 763 npfns = (size - offset - start_pad - end_trunc) / SZ_4K; 764 pfn_sb->mode = cpu_to_le32(nd_pfn->mode); 765 pfn_sb->dataoff = cpu_to_le64(offset); 766 pfn_sb->npfns = cpu_to_le64(npfns); 767 memcpy(pfn_sb->signature, sig, PFN_SIG_LEN); 768 memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); 769 memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); 770 pfn_sb->version_major = cpu_to_le16(1); 771 pfn_sb->version_minor = cpu_to_le16(2); 772 pfn_sb->start_pad = cpu_to_le32(start_pad); 773 pfn_sb->end_trunc = cpu_to_le32(end_trunc); 774 pfn_sb->align = cpu_to_le32(nd_pfn->align); 775 checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); 776 pfn_sb->checksum = cpu_to_le64(checksum); 777 778 return nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0); 779 } 780 781 /* 782 * Determine the effective resource range and vmem_altmap from an nd_pfn 783 * instance. 784 */ 785 int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) 786 { 787 int rc; 788 789 if (!nd_pfn->uuid || !nd_pfn->ndns) 790 return -ENODEV; 791 792 rc = nd_pfn_init(nd_pfn); 793 if (rc) 794 return rc; 795 796 /* we need a valid pfn_sb before we can init a dev_pagemap */ 797 return __nvdimm_setup_pfn(nd_pfn, pgmap); 798 } 799 EXPORT_SYMBOL_GPL(nvdimm_setup_pfn); 800