1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Persistent Memory Driver 4 * 5 * Copyright (c) 2014-2015, Intel Corporation. 6 * Copyright (c) 2015, Christoph Hellwig <hch@lst.de>. 7 * Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>. 8 */ 9 10 #include <linux/blkdev.h> 11 #include <linux/pagemap.h> 12 #include <linux/hdreg.h> 13 #include <linux/init.h> 14 #include <linux/platform_device.h> 15 #include <linux/set_memory.h> 16 #include <linux/module.h> 17 #include <linux/moduleparam.h> 18 #include <linux/badblocks.h> 19 #include <linux/memremap.h> 20 #include <linux/vmalloc.h> 21 #include <linux/blk-mq.h> 22 #include <linux/pfn_t.h> 23 #include <linux/slab.h> 24 #include <linux/uio.h> 25 #include <linux/dax.h> 26 #include <linux/nd.h> 27 #include <linux/mm.h> 28 #include <asm/cacheflush.h> 29 #include "pmem.h" 30 #include "btt.h" 31 #include "pfn.h" 32 #include "nd.h" 33 34 static struct device *to_dev(struct pmem_device *pmem) 35 { 36 /* 37 * nvdimm bus services need a 'dev' parameter, and we record the device 38 * at init in bb.dev. 39 */ 40 return pmem->bb.dev; 41 } 42 43 static struct nd_region *to_region(struct pmem_device *pmem) 44 { 45 return to_nd_region(to_dev(pmem)->parent); 46 } 47 48 static phys_addr_t to_phys(struct pmem_device *pmem, phys_addr_t offset) 49 { 50 return pmem->phys_addr + offset; 51 } 52 53 static sector_t to_sect(struct pmem_device *pmem, phys_addr_t offset) 54 { 55 return (offset - pmem->data_offset) >> SECTOR_SHIFT; 56 } 57 58 static phys_addr_t to_offset(struct pmem_device *pmem, sector_t sector) 59 { 60 return (sector << SECTOR_SHIFT) + pmem->data_offset; 61 } 62 63 static void pmem_mkpage_present(struct pmem_device *pmem, phys_addr_t offset, 64 unsigned int len) 65 { 66 phys_addr_t phys = to_phys(pmem, offset); 67 unsigned long pfn_start, pfn_end, pfn; 68 69 /* only pmem in the linear map supports HWPoison */ 70 if (is_vmalloc_addr(pmem->virt_addr)) 71 return; 72 73 pfn_start = PHYS_PFN(phys); 74 pfn_end = pfn_start + PHYS_PFN(len); 75 for (pfn = pfn_start; pfn < pfn_end; pfn++) { 76 struct page *page = pfn_to_page(pfn); 77 78 /* 79 * Note, no need to hold a get_dev_pagemap() reference 80 * here since we're in the driver I/O path and 81 * outstanding I/O requests pin the dev_pagemap. 82 */ 83 if (test_and_clear_pmem_poison(page)) 84 clear_mce_nospec(pfn); 85 } 86 } 87 88 static void pmem_clear_bb(struct pmem_device *pmem, sector_t sector, long blks) 89 { 90 if (blks == 0) 91 return; 92 badblocks_clear(&pmem->bb, sector, blks); 93 if (pmem->bb_state) 94 sysfs_notify_dirent(pmem->bb_state); 95 } 96 97 static long __pmem_clear_poison(struct pmem_device *pmem, 98 phys_addr_t offset, unsigned int len) 99 { 100 phys_addr_t phys = to_phys(pmem, offset); 101 long cleared = nvdimm_clear_poison(to_dev(pmem), phys, len); 102 103 if (cleared > 0) { 104 pmem_mkpage_present(pmem, offset, cleared); 105 arch_invalidate_pmem(pmem->virt_addr + offset, len); 106 } 107 return cleared; 108 } 109 110 static blk_status_t pmem_clear_poison(struct pmem_device *pmem, 111 phys_addr_t offset, unsigned int len) 112 { 113 long cleared = __pmem_clear_poison(pmem, offset, len); 114 115 if (cleared < 0) 116 return BLK_STS_IOERR; 117 118 pmem_clear_bb(pmem, to_sect(pmem, offset), cleared >> SECTOR_SHIFT); 119 if (cleared < len) 120 return BLK_STS_IOERR; 121 return BLK_STS_OK; 122 } 123 124 static void write_pmem(void *pmem_addr, struct page *page, 125 unsigned int off, unsigned int len) 126 { 127 unsigned int chunk; 128 void *mem; 129 130 while (len) { 131 mem = kmap_atomic(page); 132 chunk = min_t(unsigned int, len, PAGE_SIZE - off); 133 memcpy_flushcache(pmem_addr, mem + off, chunk); 134 kunmap_atomic(mem); 135 len -= chunk; 136 off = 0; 137 page++; 138 pmem_addr += chunk; 139 } 140 } 141 142 static blk_status_t read_pmem(struct page *page, unsigned int off, 143 void *pmem_addr, unsigned int len) 144 { 145 unsigned int chunk; 146 unsigned long rem; 147 void *mem; 148 149 while (len) { 150 mem = kmap_atomic(page); 151 chunk = min_t(unsigned int, len, PAGE_SIZE - off); 152 rem = copy_mc_to_kernel(mem + off, pmem_addr, chunk); 153 kunmap_atomic(mem); 154 if (rem) 155 return BLK_STS_IOERR; 156 len -= chunk; 157 off = 0; 158 page++; 159 pmem_addr += chunk; 160 } 161 return BLK_STS_OK; 162 } 163 164 static blk_status_t pmem_do_read(struct pmem_device *pmem, 165 struct page *page, unsigned int page_off, 166 sector_t sector, unsigned int len) 167 { 168 blk_status_t rc; 169 phys_addr_t pmem_off = to_offset(pmem, sector); 170 void *pmem_addr = pmem->virt_addr + pmem_off; 171 172 if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) 173 return BLK_STS_IOERR; 174 175 rc = read_pmem(page, page_off, pmem_addr, len); 176 flush_dcache_page(page); 177 return rc; 178 } 179 180 static blk_status_t pmem_do_write(struct pmem_device *pmem, 181 struct page *page, unsigned int page_off, 182 sector_t sector, unsigned int len) 183 { 184 phys_addr_t pmem_off = to_offset(pmem, sector); 185 void *pmem_addr = pmem->virt_addr + pmem_off; 186 187 if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) { 188 blk_status_t rc = pmem_clear_poison(pmem, pmem_off, len); 189 190 if (rc != BLK_STS_OK) 191 return rc; 192 } 193 194 flush_dcache_page(page); 195 write_pmem(pmem_addr, page, page_off, len); 196 197 return BLK_STS_OK; 198 } 199 200 static void pmem_submit_bio(struct bio *bio) 201 { 202 int ret = 0; 203 blk_status_t rc = 0; 204 bool do_acct; 205 unsigned long start; 206 struct bio_vec bvec; 207 struct bvec_iter iter; 208 struct pmem_device *pmem = bio->bi_bdev->bd_disk->private_data; 209 struct nd_region *nd_region = to_region(pmem); 210 211 if (bio->bi_opf & REQ_PREFLUSH) 212 ret = nvdimm_flush(nd_region, bio); 213 214 do_acct = blk_queue_io_stat(bio->bi_bdev->bd_disk->queue); 215 if (do_acct) 216 start = bio_start_io_acct(bio); 217 bio_for_each_segment(bvec, bio, iter) { 218 if (op_is_write(bio_op(bio))) 219 rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset, 220 iter.bi_sector, bvec.bv_len); 221 else 222 rc = pmem_do_read(pmem, bvec.bv_page, bvec.bv_offset, 223 iter.bi_sector, bvec.bv_len); 224 if (rc) { 225 bio->bi_status = rc; 226 break; 227 } 228 } 229 if (do_acct) 230 bio_end_io_acct(bio, start); 231 232 if (bio->bi_opf & REQ_FUA) 233 ret = nvdimm_flush(nd_region, bio); 234 235 if (ret) 236 bio->bi_status = errno_to_blk_status(ret); 237 238 bio_endio(bio); 239 } 240 241 static int pmem_rw_page(struct block_device *bdev, sector_t sector, 242 struct page *page, unsigned int op) 243 { 244 struct pmem_device *pmem = bdev->bd_disk->private_data; 245 blk_status_t rc; 246 247 if (op_is_write(op)) 248 rc = pmem_do_write(pmem, page, 0, sector, thp_size(page)); 249 else 250 rc = pmem_do_read(pmem, page, 0, sector, thp_size(page)); 251 /* 252 * The ->rw_page interface is subtle and tricky. The core 253 * retries on any error, so we can only invoke page_endio() in 254 * the successful completion case. Otherwise, we'll see crashes 255 * caused by double completion. 256 */ 257 if (rc == 0) 258 page_endio(page, op_is_write(op), 0); 259 260 return blk_status_to_errno(rc); 261 } 262 263 /* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */ 264 __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, 265 long nr_pages, enum dax_access_mode mode, void **kaddr, 266 pfn_t *pfn) 267 { 268 resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset; 269 sector_t sector = PFN_PHYS(pgoff) >> SECTOR_SHIFT; 270 unsigned int num = PFN_PHYS(nr_pages) >> SECTOR_SHIFT; 271 struct badblocks *bb = &pmem->bb; 272 sector_t first_bad; 273 int num_bad; 274 275 if (kaddr) 276 *kaddr = pmem->virt_addr + offset; 277 if (pfn) 278 *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags); 279 280 if (bb->count && 281 badblocks_check(bb, sector, num, &first_bad, &num_bad)) { 282 long actual_nr; 283 284 if (mode != DAX_RECOVERY_WRITE) 285 return -EIO; 286 287 /* 288 * Set the recovery stride is set to kernel page size because 289 * the underlying driver and firmware clear poison functions 290 * don't appear to handle large chunk(such as 2MiB) reliably. 291 */ 292 actual_nr = PHYS_PFN( 293 PAGE_ALIGN((first_bad - sector) << SECTOR_SHIFT)); 294 dev_dbg(pmem->bb.dev, "start sector(%llu), nr_pages(%ld), first_bad(%llu), actual_nr(%ld)\n", 295 sector, nr_pages, first_bad, actual_nr); 296 if (actual_nr) 297 return actual_nr; 298 return 1; 299 } 300 301 /* 302 * If badblocks are present but not in the range, limit known good range 303 * to the requested range. 304 */ 305 if (bb->count) 306 return nr_pages; 307 return PHYS_PFN(pmem->size - pmem->pfn_pad - offset); 308 } 309 310 static const struct block_device_operations pmem_fops = { 311 .owner = THIS_MODULE, 312 .submit_bio = pmem_submit_bio, 313 .rw_page = pmem_rw_page, 314 }; 315 316 static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, 317 size_t nr_pages) 318 { 319 struct pmem_device *pmem = dax_get_private(dax_dev); 320 321 return blk_status_to_errno(pmem_do_write(pmem, ZERO_PAGE(0), 0, 322 PFN_PHYS(pgoff) >> SECTOR_SHIFT, 323 PAGE_SIZE)); 324 } 325 326 static long pmem_dax_direct_access(struct dax_device *dax_dev, 327 pgoff_t pgoff, long nr_pages, enum dax_access_mode mode, 328 void **kaddr, pfn_t *pfn) 329 { 330 struct pmem_device *pmem = dax_get_private(dax_dev); 331 332 return __pmem_direct_access(pmem, pgoff, nr_pages, mode, kaddr, pfn); 333 } 334 335 /* 336 * The recovery write thread started out as a normal pwrite thread and 337 * when the filesystem was told about potential media error in the 338 * range, filesystem turns the normal pwrite to a dax_recovery_write. 339 * 340 * The recovery write consists of clearing media poison, clearing page 341 * HWPoison bit, reenable page-wide read-write permission, flush the 342 * caches and finally write. A competing pread thread will be held 343 * off during the recovery process since data read back might not be 344 * valid, and this is achieved by clearing the badblock records after 345 * the recovery write is complete. Competing recovery write threads 346 * are already serialized by writer lock held by dax_iomap_rw(). 347 */ 348 static size_t pmem_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff, 349 void *addr, size_t bytes, struct iov_iter *i) 350 { 351 struct pmem_device *pmem = dax_get_private(dax_dev); 352 size_t olen, len, off; 353 phys_addr_t pmem_off; 354 struct device *dev = pmem->bb.dev; 355 long cleared; 356 357 off = offset_in_page(addr); 358 len = PFN_PHYS(PFN_UP(off + bytes)); 359 if (!is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) >> SECTOR_SHIFT, len)) 360 return _copy_from_iter_flushcache(addr, bytes, i); 361 362 /* 363 * Not page-aligned range cannot be recovered. This should not 364 * happen unless something else went wrong. 365 */ 366 if (off || !PAGE_ALIGNED(bytes)) { 367 dev_dbg(dev, "Found poison, but addr(%p) or bytes(%#zx) not page aligned\n", 368 addr, bytes); 369 return 0; 370 } 371 372 pmem_off = PFN_PHYS(pgoff) + pmem->data_offset; 373 cleared = __pmem_clear_poison(pmem, pmem_off, len); 374 if (cleared > 0 && cleared < len) { 375 dev_dbg(dev, "poison cleared only %ld out of %zu bytes\n", 376 cleared, len); 377 return 0; 378 } 379 if (cleared < 0) { 380 dev_dbg(dev, "poison clear failed: %ld\n", cleared); 381 return 0; 382 } 383 384 olen = _copy_from_iter_flushcache(addr, bytes, i); 385 pmem_clear_bb(pmem, to_sect(pmem, pmem_off), cleared >> SECTOR_SHIFT); 386 387 return olen; 388 } 389 390 static const struct dax_operations pmem_dax_ops = { 391 .direct_access = pmem_dax_direct_access, 392 .zero_page_range = pmem_dax_zero_page_range, 393 .recovery_write = pmem_recovery_write, 394 }; 395 396 static ssize_t write_cache_show(struct device *dev, 397 struct device_attribute *attr, char *buf) 398 { 399 struct pmem_device *pmem = dev_to_disk(dev)->private_data; 400 401 return sprintf(buf, "%d\n", !!dax_write_cache_enabled(pmem->dax_dev)); 402 } 403 404 static ssize_t write_cache_store(struct device *dev, 405 struct device_attribute *attr, const char *buf, size_t len) 406 { 407 struct pmem_device *pmem = dev_to_disk(dev)->private_data; 408 bool write_cache; 409 int rc; 410 411 rc = strtobool(buf, &write_cache); 412 if (rc) 413 return rc; 414 dax_write_cache(pmem->dax_dev, write_cache); 415 return len; 416 } 417 static DEVICE_ATTR_RW(write_cache); 418 419 static umode_t dax_visible(struct kobject *kobj, struct attribute *a, int n) 420 { 421 #ifndef CONFIG_ARCH_HAS_PMEM_API 422 if (a == &dev_attr_write_cache.attr) 423 return 0; 424 #endif 425 return a->mode; 426 } 427 428 static struct attribute *dax_attributes[] = { 429 &dev_attr_write_cache.attr, 430 NULL, 431 }; 432 433 static const struct attribute_group dax_attribute_group = { 434 .name = "dax", 435 .attrs = dax_attributes, 436 .is_visible = dax_visible, 437 }; 438 439 static const struct attribute_group *pmem_attribute_groups[] = { 440 &dax_attribute_group, 441 NULL, 442 }; 443 444 static void pmem_release_disk(void *__pmem) 445 { 446 struct pmem_device *pmem = __pmem; 447 448 dax_remove_host(pmem->disk); 449 kill_dax(pmem->dax_dev); 450 put_dax(pmem->dax_dev); 451 del_gendisk(pmem->disk); 452 453 blk_cleanup_disk(pmem->disk); 454 } 455 456 static int pmem_attach_disk(struct device *dev, 457 struct nd_namespace_common *ndns) 458 { 459 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 460 struct nd_region *nd_region = to_nd_region(dev->parent); 461 int nid = dev_to_node(dev), fua; 462 struct resource *res = &nsio->res; 463 struct range bb_range; 464 struct nd_pfn *nd_pfn = NULL; 465 struct dax_device *dax_dev; 466 struct nd_pfn_sb *pfn_sb; 467 struct pmem_device *pmem; 468 struct request_queue *q; 469 struct gendisk *disk; 470 void *addr; 471 int rc; 472 473 pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); 474 if (!pmem) 475 return -ENOMEM; 476 477 rc = devm_namespace_enable(dev, ndns, nd_info_block_reserve()); 478 if (rc) 479 return rc; 480 481 /* while nsio_rw_bytes is active, parse a pfn info block if present */ 482 if (is_nd_pfn(dev)) { 483 nd_pfn = to_nd_pfn(dev); 484 rc = nvdimm_setup_pfn(nd_pfn, &pmem->pgmap); 485 if (rc) 486 return rc; 487 } 488 489 /* we're attaching a block device, disable raw namespace access */ 490 devm_namespace_disable(dev, ndns); 491 492 dev_set_drvdata(dev, pmem); 493 pmem->phys_addr = res->start; 494 pmem->size = resource_size(res); 495 fua = nvdimm_has_flush(nd_region); 496 if (!IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) || fua < 0) { 497 dev_warn(dev, "unable to guarantee persistence of writes\n"); 498 fua = 0; 499 } 500 501 if (!devm_request_mem_region(dev, res->start, resource_size(res), 502 dev_name(&ndns->dev))) { 503 dev_warn(dev, "could not reserve region %pR\n", res); 504 return -EBUSY; 505 } 506 507 disk = blk_alloc_disk(nid); 508 if (!disk) 509 return -ENOMEM; 510 q = disk->queue; 511 512 pmem->disk = disk; 513 pmem->pgmap.owner = pmem; 514 pmem->pfn_flags = PFN_DEV; 515 if (is_nd_pfn(dev)) { 516 pmem->pgmap.type = MEMORY_DEVICE_FS_DAX; 517 addr = devm_memremap_pages(dev, &pmem->pgmap); 518 pfn_sb = nd_pfn->pfn_sb; 519 pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); 520 pmem->pfn_pad = resource_size(res) - 521 range_len(&pmem->pgmap.range); 522 pmem->pfn_flags |= PFN_MAP; 523 bb_range = pmem->pgmap.range; 524 bb_range.start += pmem->data_offset; 525 } else if (pmem_should_map_pages(dev)) { 526 pmem->pgmap.range.start = res->start; 527 pmem->pgmap.range.end = res->end; 528 pmem->pgmap.nr_range = 1; 529 pmem->pgmap.type = MEMORY_DEVICE_FS_DAX; 530 addr = devm_memremap_pages(dev, &pmem->pgmap); 531 pmem->pfn_flags |= PFN_MAP; 532 bb_range = pmem->pgmap.range; 533 } else { 534 addr = devm_memremap(dev, pmem->phys_addr, 535 pmem->size, ARCH_MEMREMAP_PMEM); 536 bb_range.start = res->start; 537 bb_range.end = res->end; 538 } 539 540 if (IS_ERR(addr)) { 541 rc = PTR_ERR(addr); 542 goto out; 543 } 544 pmem->virt_addr = addr; 545 546 blk_queue_write_cache(q, true, fua); 547 blk_queue_physical_block_size(q, PAGE_SIZE); 548 blk_queue_logical_block_size(q, pmem_sector_size(ndns)); 549 blk_queue_max_hw_sectors(q, UINT_MAX); 550 blk_queue_flag_set(QUEUE_FLAG_NONROT, q); 551 if (pmem->pfn_flags & PFN_MAP) 552 blk_queue_flag_set(QUEUE_FLAG_DAX, q); 553 554 disk->fops = &pmem_fops; 555 disk->private_data = pmem; 556 nvdimm_namespace_disk_name(ndns, disk->disk_name); 557 set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset) 558 / 512); 559 if (devm_init_badblocks(dev, &pmem->bb)) 560 return -ENOMEM; 561 nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_range); 562 disk->bb = &pmem->bb; 563 564 dax_dev = alloc_dax(pmem, &pmem_dax_ops); 565 if (IS_ERR(dax_dev)) { 566 rc = PTR_ERR(dax_dev); 567 goto out; 568 } 569 set_dax_nocache(dax_dev); 570 set_dax_nomc(dax_dev); 571 if (is_nvdimm_sync(nd_region)) 572 set_dax_synchronous(dax_dev); 573 rc = dax_add_host(dax_dev, disk); 574 if (rc) 575 goto out_cleanup_dax; 576 dax_write_cache(dax_dev, nvdimm_has_cache(nd_region)); 577 pmem->dax_dev = dax_dev; 578 579 rc = device_add_disk(dev, disk, pmem_attribute_groups); 580 if (rc) 581 goto out_remove_host; 582 if (devm_add_action_or_reset(dev, pmem_release_disk, pmem)) 583 return -ENOMEM; 584 585 nvdimm_check_and_set_ro(disk); 586 587 pmem->bb_state = sysfs_get_dirent(disk_to_dev(disk)->kobj.sd, 588 "badblocks"); 589 if (!pmem->bb_state) 590 dev_warn(dev, "'badblocks' notification disabled\n"); 591 return 0; 592 593 out_remove_host: 594 dax_remove_host(pmem->disk); 595 out_cleanup_dax: 596 kill_dax(pmem->dax_dev); 597 put_dax(pmem->dax_dev); 598 out: 599 blk_cleanup_disk(pmem->disk); 600 return rc; 601 } 602 603 static int nd_pmem_probe(struct device *dev) 604 { 605 int ret; 606 struct nd_namespace_common *ndns; 607 608 ndns = nvdimm_namespace_common_probe(dev); 609 if (IS_ERR(ndns)) 610 return PTR_ERR(ndns); 611 612 if (is_nd_btt(dev)) 613 return nvdimm_namespace_attach_btt(ndns); 614 615 if (is_nd_pfn(dev)) 616 return pmem_attach_disk(dev, ndns); 617 618 ret = devm_namespace_enable(dev, ndns, nd_info_block_reserve()); 619 if (ret) 620 return ret; 621 622 ret = nd_btt_probe(dev, ndns); 623 if (ret == 0) 624 return -ENXIO; 625 626 /* 627 * We have two failure conditions here, there is no 628 * info reserver block or we found a valid info reserve block 629 * but failed to initialize the pfn superblock. 630 * 631 * For the first case consider namespace as a raw pmem namespace 632 * and attach a disk. 633 * 634 * For the latter, consider this a success and advance the namespace 635 * seed. 636 */ 637 ret = nd_pfn_probe(dev, ndns); 638 if (ret == 0) 639 return -ENXIO; 640 else if (ret == -EOPNOTSUPP) 641 return ret; 642 643 ret = nd_dax_probe(dev, ndns); 644 if (ret == 0) 645 return -ENXIO; 646 else if (ret == -EOPNOTSUPP) 647 return ret; 648 649 /* probe complete, attach handles namespace enabling */ 650 devm_namespace_disable(dev, ndns); 651 652 return pmem_attach_disk(dev, ndns); 653 } 654 655 static void nd_pmem_remove(struct device *dev) 656 { 657 struct pmem_device *pmem = dev_get_drvdata(dev); 658 659 if (is_nd_btt(dev)) 660 nvdimm_namespace_detach_btt(to_nd_btt(dev)); 661 else { 662 /* 663 * Note, this assumes device_lock() context to not 664 * race nd_pmem_notify() 665 */ 666 sysfs_put(pmem->bb_state); 667 pmem->bb_state = NULL; 668 } 669 nvdimm_flush(to_nd_region(dev->parent), NULL); 670 } 671 672 static void nd_pmem_shutdown(struct device *dev) 673 { 674 nvdimm_flush(to_nd_region(dev->parent), NULL); 675 } 676 677 static void pmem_revalidate_poison(struct device *dev) 678 { 679 struct nd_region *nd_region; 680 resource_size_t offset = 0, end_trunc = 0; 681 struct nd_namespace_common *ndns; 682 struct nd_namespace_io *nsio; 683 struct badblocks *bb; 684 struct range range; 685 struct kernfs_node *bb_state; 686 687 if (is_nd_btt(dev)) { 688 struct nd_btt *nd_btt = to_nd_btt(dev); 689 690 ndns = nd_btt->ndns; 691 nd_region = to_nd_region(ndns->dev.parent); 692 nsio = to_nd_namespace_io(&ndns->dev); 693 bb = &nsio->bb; 694 bb_state = NULL; 695 } else { 696 struct pmem_device *pmem = dev_get_drvdata(dev); 697 698 nd_region = to_region(pmem); 699 bb = &pmem->bb; 700 bb_state = pmem->bb_state; 701 702 if (is_nd_pfn(dev)) { 703 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 704 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 705 706 ndns = nd_pfn->ndns; 707 offset = pmem->data_offset + 708 __le32_to_cpu(pfn_sb->start_pad); 709 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); 710 } else { 711 ndns = to_ndns(dev); 712 } 713 714 nsio = to_nd_namespace_io(&ndns->dev); 715 } 716 717 range.start = nsio->res.start + offset; 718 range.end = nsio->res.end - end_trunc; 719 nvdimm_badblocks_populate(nd_region, bb, &range); 720 if (bb_state) 721 sysfs_notify_dirent(bb_state); 722 } 723 724 static void pmem_revalidate_region(struct device *dev) 725 { 726 struct pmem_device *pmem; 727 728 if (is_nd_btt(dev)) { 729 struct nd_btt *nd_btt = to_nd_btt(dev); 730 struct btt *btt = nd_btt->btt; 731 732 nvdimm_check_and_set_ro(btt->btt_disk); 733 return; 734 } 735 736 pmem = dev_get_drvdata(dev); 737 nvdimm_check_and_set_ro(pmem->disk); 738 } 739 740 static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) 741 { 742 switch (event) { 743 case NVDIMM_REVALIDATE_POISON: 744 pmem_revalidate_poison(dev); 745 break; 746 case NVDIMM_REVALIDATE_REGION: 747 pmem_revalidate_region(dev); 748 break; 749 default: 750 dev_WARN_ONCE(dev, 1, "notify: unknown event: %d\n", event); 751 break; 752 } 753 } 754 755 MODULE_ALIAS("pmem"); 756 MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO); 757 MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM); 758 static struct nd_device_driver nd_pmem_driver = { 759 .probe = nd_pmem_probe, 760 .remove = nd_pmem_remove, 761 .notify = nd_pmem_notify, 762 .shutdown = nd_pmem_shutdown, 763 .drv = { 764 .name = "nd_pmem", 765 }, 766 .type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM, 767 }; 768 769 module_nd_driver(nd_pmem_driver); 770 771 MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>"); 772 MODULE_LICENSE("GPL v2"); 773