1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Persistent Memory Driver 4 * 5 * Copyright (c) 2014-2015, Intel Corporation. 6 * Copyright (c) 2015, Christoph Hellwig <hch@lst.de>. 7 * Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>. 8 */ 9 10 #include <linux/blkdev.h> 11 #include <linux/pagemap.h> 12 #include <linux/hdreg.h> 13 #include <linux/init.h> 14 #include <linux/platform_device.h> 15 #include <linux/set_memory.h> 16 #include <linux/module.h> 17 #include <linux/moduleparam.h> 18 #include <linux/badblocks.h> 19 #include <linux/memremap.h> 20 #include <linux/vmalloc.h> 21 #include <linux/blk-mq.h> 22 #include <linux/pfn_t.h> 23 #include <linux/slab.h> 24 #include <linux/uio.h> 25 #include <linux/dax.h> 26 #include <linux/nd.h> 27 #include <linux/mm.h> 28 #include <asm/cacheflush.h> 29 #include "pmem.h" 30 #include "btt.h" 31 #include "pfn.h" 32 #include "nd.h" 33 34 static struct device *to_dev(struct pmem_device *pmem) 35 { 36 /* 37 * nvdimm bus services need a 'dev' parameter, and we record the device 38 * at init in bb.dev. 39 */ 40 return pmem->bb.dev; 41 } 42 43 static struct nd_region *to_region(struct pmem_device *pmem) 44 { 45 return to_nd_region(to_dev(pmem)->parent); 46 } 47 48 static phys_addr_t pmem_to_phys(struct pmem_device *pmem, phys_addr_t offset) 49 { 50 return pmem->phys_addr + offset; 51 } 52 53 static sector_t to_sect(struct pmem_device *pmem, phys_addr_t offset) 54 { 55 return (offset - pmem->data_offset) >> SECTOR_SHIFT; 56 } 57 58 static phys_addr_t to_offset(struct pmem_device *pmem, sector_t sector) 59 { 60 return (sector << SECTOR_SHIFT) + pmem->data_offset; 61 } 62 63 static void pmem_mkpage_present(struct pmem_device *pmem, phys_addr_t offset, 64 unsigned int len) 65 { 66 phys_addr_t phys = pmem_to_phys(pmem, offset); 67 unsigned long pfn_start, pfn_end, pfn; 68 69 /* only pmem in the linear map supports HWPoison */ 70 if (is_vmalloc_addr(pmem->virt_addr)) 71 return; 72 73 pfn_start = PHYS_PFN(phys); 74 pfn_end = pfn_start + PHYS_PFN(len); 75 for (pfn = pfn_start; pfn < pfn_end; pfn++) { 76 struct page *page = pfn_to_page(pfn); 77 78 /* 79 * Note, no need to hold a get_dev_pagemap() reference 80 * here since we're in the driver I/O path and 81 * outstanding I/O requests pin the dev_pagemap. 82 */ 83 if (test_and_clear_pmem_poison(page)) 84 clear_mce_nospec(pfn); 85 } 86 } 87 88 static void pmem_clear_bb(struct pmem_device *pmem, sector_t sector, long blks) 89 { 90 if (blks == 0) 91 return; 92 badblocks_clear(&pmem->bb, sector, blks); 93 if (pmem->bb_state) 94 sysfs_notify_dirent(pmem->bb_state); 95 } 96 97 static long __pmem_clear_poison(struct pmem_device *pmem, 98 phys_addr_t offset, unsigned int len) 99 { 100 phys_addr_t phys = pmem_to_phys(pmem, offset); 101 long cleared = nvdimm_clear_poison(to_dev(pmem), phys, len); 102 103 if (cleared > 0) { 104 pmem_mkpage_present(pmem, offset, cleared); 105 arch_invalidate_pmem(pmem->virt_addr + offset, len); 106 } 107 return cleared; 108 } 109 110 static blk_status_t pmem_clear_poison(struct pmem_device *pmem, 111 phys_addr_t offset, unsigned int len) 112 { 113 long cleared = __pmem_clear_poison(pmem, offset, len); 114 115 if (cleared < 0) 116 return BLK_STS_IOERR; 117 118 pmem_clear_bb(pmem, to_sect(pmem, offset), cleared >> SECTOR_SHIFT); 119 if (cleared < len) 120 return BLK_STS_IOERR; 121 return BLK_STS_OK; 122 } 123 124 static void write_pmem(void *pmem_addr, struct page *page, 125 unsigned int off, unsigned int len) 126 { 127 unsigned int chunk; 128 void *mem; 129 130 while (len) { 131 mem = kmap_atomic(page); 132 chunk = min_t(unsigned int, len, PAGE_SIZE - off); 133 memcpy_flushcache(pmem_addr, mem + off, chunk); 134 kunmap_atomic(mem); 135 len -= chunk; 136 off = 0; 137 page++; 138 pmem_addr += chunk; 139 } 140 } 141 142 static blk_status_t read_pmem(struct page *page, unsigned int off, 143 void *pmem_addr, unsigned int len) 144 { 145 unsigned int chunk; 146 unsigned long rem; 147 void *mem; 148 149 while (len) { 150 mem = kmap_atomic(page); 151 chunk = min_t(unsigned int, len, PAGE_SIZE - off); 152 rem = copy_mc_to_kernel(mem + off, pmem_addr, chunk); 153 kunmap_atomic(mem); 154 if (rem) 155 return BLK_STS_IOERR; 156 len -= chunk; 157 off = 0; 158 page++; 159 pmem_addr += chunk; 160 } 161 return BLK_STS_OK; 162 } 163 164 static blk_status_t pmem_do_read(struct pmem_device *pmem, 165 struct page *page, unsigned int page_off, 166 sector_t sector, unsigned int len) 167 { 168 blk_status_t rc; 169 phys_addr_t pmem_off = to_offset(pmem, sector); 170 void *pmem_addr = pmem->virt_addr + pmem_off; 171 172 if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) 173 return BLK_STS_IOERR; 174 175 rc = read_pmem(page, page_off, pmem_addr, len); 176 flush_dcache_page(page); 177 return rc; 178 } 179 180 static blk_status_t pmem_do_write(struct pmem_device *pmem, 181 struct page *page, unsigned int page_off, 182 sector_t sector, unsigned int len) 183 { 184 phys_addr_t pmem_off = to_offset(pmem, sector); 185 void *pmem_addr = pmem->virt_addr + pmem_off; 186 187 if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) { 188 blk_status_t rc = pmem_clear_poison(pmem, pmem_off, len); 189 190 if (rc != BLK_STS_OK) 191 return rc; 192 } 193 194 flush_dcache_page(page); 195 write_pmem(pmem_addr, page, page_off, len); 196 197 return BLK_STS_OK; 198 } 199 200 static void pmem_submit_bio(struct bio *bio) 201 { 202 int ret = 0; 203 blk_status_t rc = 0; 204 bool do_acct; 205 unsigned long start; 206 struct bio_vec bvec; 207 struct bvec_iter iter; 208 struct pmem_device *pmem = bio->bi_bdev->bd_disk->private_data; 209 struct nd_region *nd_region = to_region(pmem); 210 211 if (bio->bi_opf & REQ_PREFLUSH) 212 ret = nvdimm_flush(nd_region, bio); 213 214 do_acct = blk_queue_io_stat(bio->bi_bdev->bd_disk->queue); 215 if (do_acct) 216 start = bio_start_io_acct(bio); 217 bio_for_each_segment(bvec, bio, iter) { 218 if (op_is_write(bio_op(bio))) 219 rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset, 220 iter.bi_sector, bvec.bv_len); 221 else 222 rc = pmem_do_read(pmem, bvec.bv_page, bvec.bv_offset, 223 iter.bi_sector, bvec.bv_len); 224 if (rc) { 225 bio->bi_status = rc; 226 break; 227 } 228 } 229 if (do_acct) 230 bio_end_io_acct(bio, start); 231 232 if (bio->bi_opf & REQ_FUA) 233 ret = nvdimm_flush(nd_region, bio); 234 235 if (ret) 236 bio->bi_status = errno_to_blk_status(ret); 237 238 bio_endio(bio); 239 } 240 241 static int pmem_rw_page(struct block_device *bdev, sector_t sector, 242 struct page *page, enum req_op op) 243 { 244 struct pmem_device *pmem = bdev->bd_disk->private_data; 245 blk_status_t rc; 246 247 if (op_is_write(op)) 248 rc = pmem_do_write(pmem, page, 0, sector, thp_size(page)); 249 else 250 rc = pmem_do_read(pmem, page, 0, sector, thp_size(page)); 251 /* 252 * The ->rw_page interface is subtle and tricky. The core 253 * retries on any error, so we can only invoke page_endio() in 254 * the successful completion case. Otherwise, we'll see crashes 255 * caused by double completion. 256 */ 257 if (rc == 0) 258 page_endio(page, op_is_write(op), 0); 259 260 return blk_status_to_errno(rc); 261 } 262 263 /* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */ 264 __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, 265 long nr_pages, enum dax_access_mode mode, void **kaddr, 266 pfn_t *pfn) 267 { 268 resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset; 269 sector_t sector = PFN_PHYS(pgoff) >> SECTOR_SHIFT; 270 unsigned int num = PFN_PHYS(nr_pages) >> SECTOR_SHIFT; 271 struct badblocks *bb = &pmem->bb; 272 sector_t first_bad; 273 int num_bad; 274 275 if (kaddr) 276 *kaddr = pmem->virt_addr + offset; 277 if (pfn) 278 *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags); 279 280 if (bb->count && 281 badblocks_check(bb, sector, num, &first_bad, &num_bad)) { 282 long actual_nr; 283 284 if (mode != DAX_RECOVERY_WRITE) 285 return -EIO; 286 287 /* 288 * Set the recovery stride is set to kernel page size because 289 * the underlying driver and firmware clear poison functions 290 * don't appear to handle large chunk(such as 2MiB) reliably. 291 */ 292 actual_nr = PHYS_PFN( 293 PAGE_ALIGN((first_bad - sector) << SECTOR_SHIFT)); 294 dev_dbg(pmem->bb.dev, "start sector(%llu), nr_pages(%ld), first_bad(%llu), actual_nr(%ld)\n", 295 sector, nr_pages, first_bad, actual_nr); 296 if (actual_nr) 297 return actual_nr; 298 return 1; 299 } 300 301 /* 302 * If badblocks are present but not in the range, limit known good range 303 * to the requested range. 304 */ 305 if (bb->count) 306 return nr_pages; 307 return PHYS_PFN(pmem->size - pmem->pfn_pad - offset); 308 } 309 310 static const struct block_device_operations pmem_fops = { 311 .owner = THIS_MODULE, 312 .submit_bio = pmem_submit_bio, 313 .rw_page = pmem_rw_page, 314 }; 315 316 static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, 317 size_t nr_pages) 318 { 319 struct pmem_device *pmem = dax_get_private(dax_dev); 320 321 return blk_status_to_errno(pmem_do_write(pmem, ZERO_PAGE(0), 0, 322 PFN_PHYS(pgoff) >> SECTOR_SHIFT, 323 PAGE_SIZE)); 324 } 325 326 static long pmem_dax_direct_access(struct dax_device *dax_dev, 327 pgoff_t pgoff, long nr_pages, enum dax_access_mode mode, 328 void **kaddr, pfn_t *pfn) 329 { 330 struct pmem_device *pmem = dax_get_private(dax_dev); 331 332 return __pmem_direct_access(pmem, pgoff, nr_pages, mode, kaddr, pfn); 333 } 334 335 /* 336 * The recovery write thread started out as a normal pwrite thread and 337 * when the filesystem was told about potential media error in the 338 * range, filesystem turns the normal pwrite to a dax_recovery_write. 339 * 340 * The recovery write consists of clearing media poison, clearing page 341 * HWPoison bit, reenable page-wide read-write permission, flush the 342 * caches and finally write. A competing pread thread will be held 343 * off during the recovery process since data read back might not be 344 * valid, and this is achieved by clearing the badblock records after 345 * the recovery write is complete. Competing recovery write threads 346 * are already serialized by writer lock held by dax_iomap_rw(). 347 */ 348 static size_t pmem_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff, 349 void *addr, size_t bytes, struct iov_iter *i) 350 { 351 struct pmem_device *pmem = dax_get_private(dax_dev); 352 size_t olen, len, off; 353 phys_addr_t pmem_off; 354 struct device *dev = pmem->bb.dev; 355 long cleared; 356 357 off = offset_in_page(addr); 358 len = PFN_PHYS(PFN_UP(off + bytes)); 359 if (!is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) >> SECTOR_SHIFT, len)) 360 return _copy_from_iter_flushcache(addr, bytes, i); 361 362 /* 363 * Not page-aligned range cannot be recovered. This should not 364 * happen unless something else went wrong. 365 */ 366 if (off || !PAGE_ALIGNED(bytes)) { 367 dev_dbg(dev, "Found poison, but addr(%p) or bytes(%#zx) not page aligned\n", 368 addr, bytes); 369 return 0; 370 } 371 372 pmem_off = PFN_PHYS(pgoff) + pmem->data_offset; 373 cleared = __pmem_clear_poison(pmem, pmem_off, len); 374 if (cleared > 0 && cleared < len) { 375 dev_dbg(dev, "poison cleared only %ld out of %zu bytes\n", 376 cleared, len); 377 return 0; 378 } 379 if (cleared < 0) { 380 dev_dbg(dev, "poison clear failed: %ld\n", cleared); 381 return 0; 382 } 383 384 olen = _copy_from_iter_flushcache(addr, bytes, i); 385 pmem_clear_bb(pmem, to_sect(pmem, pmem_off), cleared >> SECTOR_SHIFT); 386 387 return olen; 388 } 389 390 static const struct dax_operations pmem_dax_ops = { 391 .direct_access = pmem_dax_direct_access, 392 .zero_page_range = pmem_dax_zero_page_range, 393 .recovery_write = pmem_recovery_write, 394 }; 395 396 static ssize_t write_cache_show(struct device *dev, 397 struct device_attribute *attr, char *buf) 398 { 399 struct pmem_device *pmem = dev_to_disk(dev)->private_data; 400 401 return sprintf(buf, "%d\n", !!dax_write_cache_enabled(pmem->dax_dev)); 402 } 403 404 static ssize_t write_cache_store(struct device *dev, 405 struct device_attribute *attr, const char *buf, size_t len) 406 { 407 struct pmem_device *pmem = dev_to_disk(dev)->private_data; 408 bool write_cache; 409 int rc; 410 411 rc = strtobool(buf, &write_cache); 412 if (rc) 413 return rc; 414 dax_write_cache(pmem->dax_dev, write_cache); 415 return len; 416 } 417 static DEVICE_ATTR_RW(write_cache); 418 419 static umode_t dax_visible(struct kobject *kobj, struct attribute *a, int n) 420 { 421 #ifndef CONFIG_ARCH_HAS_PMEM_API 422 if (a == &dev_attr_write_cache.attr) 423 return 0; 424 #endif 425 return a->mode; 426 } 427 428 static struct attribute *dax_attributes[] = { 429 &dev_attr_write_cache.attr, 430 NULL, 431 }; 432 433 static const struct attribute_group dax_attribute_group = { 434 .name = "dax", 435 .attrs = dax_attributes, 436 .is_visible = dax_visible, 437 }; 438 439 static const struct attribute_group *pmem_attribute_groups[] = { 440 &dax_attribute_group, 441 NULL, 442 }; 443 444 static void pmem_release_disk(void *__pmem) 445 { 446 struct pmem_device *pmem = __pmem; 447 448 dax_remove_host(pmem->disk); 449 kill_dax(pmem->dax_dev); 450 put_dax(pmem->dax_dev); 451 del_gendisk(pmem->disk); 452 453 put_disk(pmem->disk); 454 } 455 456 static int pmem_pagemap_memory_failure(struct dev_pagemap *pgmap, 457 unsigned long pfn, unsigned long nr_pages, int mf_flags) 458 { 459 struct pmem_device *pmem = 460 container_of(pgmap, struct pmem_device, pgmap); 461 u64 offset = PFN_PHYS(pfn) - pmem->phys_addr - pmem->data_offset; 462 u64 len = nr_pages << PAGE_SHIFT; 463 464 return dax_holder_notify_failure(pmem->dax_dev, offset, len, mf_flags); 465 } 466 467 static const struct dev_pagemap_ops fsdax_pagemap_ops = { 468 .memory_failure = pmem_pagemap_memory_failure, 469 }; 470 471 static int pmem_attach_disk(struct device *dev, 472 struct nd_namespace_common *ndns) 473 { 474 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 475 struct nd_region *nd_region = to_nd_region(dev->parent); 476 int nid = dev_to_node(dev), fua; 477 struct resource *res = &nsio->res; 478 struct range bb_range; 479 struct nd_pfn *nd_pfn = NULL; 480 struct dax_device *dax_dev; 481 struct nd_pfn_sb *pfn_sb; 482 struct pmem_device *pmem; 483 struct request_queue *q; 484 struct gendisk *disk; 485 void *addr; 486 int rc; 487 488 pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); 489 if (!pmem) 490 return -ENOMEM; 491 492 rc = devm_namespace_enable(dev, ndns, nd_info_block_reserve()); 493 if (rc) 494 return rc; 495 496 /* while nsio_rw_bytes is active, parse a pfn info block if present */ 497 if (is_nd_pfn(dev)) { 498 nd_pfn = to_nd_pfn(dev); 499 rc = nvdimm_setup_pfn(nd_pfn, &pmem->pgmap); 500 if (rc) 501 return rc; 502 } 503 504 /* we're attaching a block device, disable raw namespace access */ 505 devm_namespace_disable(dev, ndns); 506 507 dev_set_drvdata(dev, pmem); 508 pmem->phys_addr = res->start; 509 pmem->size = resource_size(res); 510 fua = nvdimm_has_flush(nd_region); 511 if (!IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) || fua < 0) { 512 dev_warn(dev, "unable to guarantee persistence of writes\n"); 513 fua = 0; 514 } 515 516 if (!devm_request_mem_region(dev, res->start, resource_size(res), 517 dev_name(&ndns->dev))) { 518 dev_warn(dev, "could not reserve region %pR\n", res); 519 return -EBUSY; 520 } 521 522 disk = blk_alloc_disk(nid); 523 if (!disk) 524 return -ENOMEM; 525 q = disk->queue; 526 527 pmem->disk = disk; 528 pmem->pgmap.owner = pmem; 529 pmem->pfn_flags = PFN_DEV; 530 if (is_nd_pfn(dev)) { 531 pmem->pgmap.type = MEMORY_DEVICE_FS_DAX; 532 pmem->pgmap.ops = &fsdax_pagemap_ops; 533 addr = devm_memremap_pages(dev, &pmem->pgmap); 534 pfn_sb = nd_pfn->pfn_sb; 535 pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); 536 pmem->pfn_pad = resource_size(res) - 537 range_len(&pmem->pgmap.range); 538 pmem->pfn_flags |= PFN_MAP; 539 bb_range = pmem->pgmap.range; 540 bb_range.start += pmem->data_offset; 541 } else if (pmem_should_map_pages(dev)) { 542 pmem->pgmap.range.start = res->start; 543 pmem->pgmap.range.end = res->end; 544 pmem->pgmap.nr_range = 1; 545 pmem->pgmap.type = MEMORY_DEVICE_FS_DAX; 546 pmem->pgmap.ops = &fsdax_pagemap_ops; 547 addr = devm_memremap_pages(dev, &pmem->pgmap); 548 pmem->pfn_flags |= PFN_MAP; 549 bb_range = pmem->pgmap.range; 550 } else { 551 addr = devm_memremap(dev, pmem->phys_addr, 552 pmem->size, ARCH_MEMREMAP_PMEM); 553 bb_range.start = res->start; 554 bb_range.end = res->end; 555 } 556 557 if (IS_ERR(addr)) { 558 rc = PTR_ERR(addr); 559 goto out; 560 } 561 pmem->virt_addr = addr; 562 563 blk_queue_write_cache(q, true, fua); 564 blk_queue_physical_block_size(q, PAGE_SIZE); 565 blk_queue_logical_block_size(q, pmem_sector_size(ndns)); 566 blk_queue_max_hw_sectors(q, UINT_MAX); 567 blk_queue_flag_set(QUEUE_FLAG_NONROT, q); 568 if (pmem->pfn_flags & PFN_MAP) 569 blk_queue_flag_set(QUEUE_FLAG_DAX, q); 570 571 disk->fops = &pmem_fops; 572 disk->private_data = pmem; 573 nvdimm_namespace_disk_name(ndns, disk->disk_name); 574 set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset) 575 / 512); 576 if (devm_init_badblocks(dev, &pmem->bb)) 577 return -ENOMEM; 578 nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_range); 579 disk->bb = &pmem->bb; 580 581 dax_dev = alloc_dax(pmem, &pmem_dax_ops); 582 if (IS_ERR(dax_dev)) { 583 rc = PTR_ERR(dax_dev); 584 goto out; 585 } 586 set_dax_nocache(dax_dev); 587 set_dax_nomc(dax_dev); 588 if (is_nvdimm_sync(nd_region)) 589 set_dax_synchronous(dax_dev); 590 rc = dax_add_host(dax_dev, disk); 591 if (rc) 592 goto out_cleanup_dax; 593 dax_write_cache(dax_dev, nvdimm_has_cache(nd_region)); 594 pmem->dax_dev = dax_dev; 595 596 rc = device_add_disk(dev, disk, pmem_attribute_groups); 597 if (rc) 598 goto out_remove_host; 599 if (devm_add_action_or_reset(dev, pmem_release_disk, pmem)) 600 return -ENOMEM; 601 602 nvdimm_check_and_set_ro(disk); 603 604 pmem->bb_state = sysfs_get_dirent(disk_to_dev(disk)->kobj.sd, 605 "badblocks"); 606 if (!pmem->bb_state) 607 dev_warn(dev, "'badblocks' notification disabled\n"); 608 return 0; 609 610 out_remove_host: 611 dax_remove_host(pmem->disk); 612 out_cleanup_dax: 613 kill_dax(pmem->dax_dev); 614 put_dax(pmem->dax_dev); 615 out: 616 put_disk(pmem->disk); 617 return rc; 618 } 619 620 static int nd_pmem_probe(struct device *dev) 621 { 622 int ret; 623 struct nd_namespace_common *ndns; 624 625 ndns = nvdimm_namespace_common_probe(dev); 626 if (IS_ERR(ndns)) 627 return PTR_ERR(ndns); 628 629 if (is_nd_btt(dev)) 630 return nvdimm_namespace_attach_btt(ndns); 631 632 if (is_nd_pfn(dev)) 633 return pmem_attach_disk(dev, ndns); 634 635 ret = devm_namespace_enable(dev, ndns, nd_info_block_reserve()); 636 if (ret) 637 return ret; 638 639 ret = nd_btt_probe(dev, ndns); 640 if (ret == 0) 641 return -ENXIO; 642 643 /* 644 * We have two failure conditions here, there is no 645 * info reserver block or we found a valid info reserve block 646 * but failed to initialize the pfn superblock. 647 * 648 * For the first case consider namespace as a raw pmem namespace 649 * and attach a disk. 650 * 651 * For the latter, consider this a success and advance the namespace 652 * seed. 653 */ 654 ret = nd_pfn_probe(dev, ndns); 655 if (ret == 0) 656 return -ENXIO; 657 else if (ret == -EOPNOTSUPP) 658 return ret; 659 660 ret = nd_dax_probe(dev, ndns); 661 if (ret == 0) 662 return -ENXIO; 663 else if (ret == -EOPNOTSUPP) 664 return ret; 665 666 /* probe complete, attach handles namespace enabling */ 667 devm_namespace_disable(dev, ndns); 668 669 return pmem_attach_disk(dev, ndns); 670 } 671 672 static void nd_pmem_remove(struct device *dev) 673 { 674 struct pmem_device *pmem = dev_get_drvdata(dev); 675 676 if (is_nd_btt(dev)) 677 nvdimm_namespace_detach_btt(to_nd_btt(dev)); 678 else { 679 /* 680 * Note, this assumes device_lock() context to not 681 * race nd_pmem_notify() 682 */ 683 sysfs_put(pmem->bb_state); 684 pmem->bb_state = NULL; 685 } 686 nvdimm_flush(to_nd_region(dev->parent), NULL); 687 } 688 689 static void nd_pmem_shutdown(struct device *dev) 690 { 691 nvdimm_flush(to_nd_region(dev->parent), NULL); 692 } 693 694 static void pmem_revalidate_poison(struct device *dev) 695 { 696 struct nd_region *nd_region; 697 resource_size_t offset = 0, end_trunc = 0; 698 struct nd_namespace_common *ndns; 699 struct nd_namespace_io *nsio; 700 struct badblocks *bb; 701 struct range range; 702 struct kernfs_node *bb_state; 703 704 if (is_nd_btt(dev)) { 705 struct nd_btt *nd_btt = to_nd_btt(dev); 706 707 ndns = nd_btt->ndns; 708 nd_region = to_nd_region(ndns->dev.parent); 709 nsio = to_nd_namespace_io(&ndns->dev); 710 bb = &nsio->bb; 711 bb_state = NULL; 712 } else { 713 struct pmem_device *pmem = dev_get_drvdata(dev); 714 715 nd_region = to_region(pmem); 716 bb = &pmem->bb; 717 bb_state = pmem->bb_state; 718 719 if (is_nd_pfn(dev)) { 720 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 721 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 722 723 ndns = nd_pfn->ndns; 724 offset = pmem->data_offset + 725 __le32_to_cpu(pfn_sb->start_pad); 726 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); 727 } else { 728 ndns = to_ndns(dev); 729 } 730 731 nsio = to_nd_namespace_io(&ndns->dev); 732 } 733 734 range.start = nsio->res.start + offset; 735 range.end = nsio->res.end - end_trunc; 736 nvdimm_badblocks_populate(nd_region, bb, &range); 737 if (bb_state) 738 sysfs_notify_dirent(bb_state); 739 } 740 741 static void pmem_revalidate_region(struct device *dev) 742 { 743 struct pmem_device *pmem; 744 745 if (is_nd_btt(dev)) { 746 struct nd_btt *nd_btt = to_nd_btt(dev); 747 struct btt *btt = nd_btt->btt; 748 749 nvdimm_check_and_set_ro(btt->btt_disk); 750 return; 751 } 752 753 pmem = dev_get_drvdata(dev); 754 nvdimm_check_and_set_ro(pmem->disk); 755 } 756 757 static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) 758 { 759 switch (event) { 760 case NVDIMM_REVALIDATE_POISON: 761 pmem_revalidate_poison(dev); 762 break; 763 case NVDIMM_REVALIDATE_REGION: 764 pmem_revalidate_region(dev); 765 break; 766 default: 767 dev_WARN_ONCE(dev, 1, "notify: unknown event: %d\n", event); 768 break; 769 } 770 } 771 772 MODULE_ALIAS("pmem"); 773 MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO); 774 MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM); 775 static struct nd_device_driver nd_pmem_driver = { 776 .probe = nd_pmem_probe, 777 .remove = nd_pmem_remove, 778 .notify = nd_pmem_notify, 779 .shutdown = nd_pmem_shutdown, 780 .drv = { 781 .name = "nd_pmem", 782 }, 783 .type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM, 784 }; 785 786 module_nd_driver(nd_pmem_driver); 787 788 MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>"); 789 MODULE_LICENSE("GPL v2"); 790