1 /* 2 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of version 2 of the GNU General Public License as 6 * published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 */ 13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 14 #include <linux/libnvdimm.h> 15 #include <linux/sched/mm.h> 16 #include <linux/vmalloc.h> 17 #include <linux/uaccess.h> 18 #include <linux/module.h> 19 #include <linux/blkdev.h> 20 #include <linux/fcntl.h> 21 #include <linux/async.h> 22 #include <linux/genhd.h> 23 #include <linux/ndctl.h> 24 #include <linux/sched.h> 25 #include <linux/slab.h> 26 #include <linux/fs.h> 27 #include <linux/io.h> 28 #include <linux/mm.h> 29 #include <linux/nd.h> 30 #include "nd-core.h" 31 #include "nd.h" 32 #include "pfn.h" 33 34 int nvdimm_major; 35 static int nvdimm_bus_major; 36 static struct class *nd_class; 37 static DEFINE_IDA(nd_ida); 38 39 static int to_nd_device_type(struct device *dev) 40 { 41 if (is_nvdimm(dev)) 42 return ND_DEVICE_DIMM; 43 else if (is_memory(dev)) 44 return ND_DEVICE_REGION_PMEM; 45 else if (is_nd_blk(dev)) 46 return ND_DEVICE_REGION_BLK; 47 else if (is_nd_dax(dev)) 48 return ND_DEVICE_DAX_PMEM; 49 else if (is_nd_region(dev->parent)) 50 return nd_region_to_nstype(to_nd_region(dev->parent)); 51 52 return 0; 53 } 54 55 static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env) 56 { 57 return add_uevent_var(env, "MODALIAS=" ND_DEVICE_MODALIAS_FMT, 58 to_nd_device_type(dev)); 59 } 60 61 static struct module *to_bus_provider(struct device *dev) 62 { 63 /* pin bus providers while regions are enabled */ 64 if (is_nd_region(dev)) { 65 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); 66 67 return nvdimm_bus->nd_desc->module; 68 } 69 return NULL; 70 } 71 72 static void nvdimm_bus_probe_start(struct nvdimm_bus *nvdimm_bus) 73 { 74 nvdimm_bus_lock(&nvdimm_bus->dev); 75 nvdimm_bus->probe_active++; 76 nvdimm_bus_unlock(&nvdimm_bus->dev); 77 } 78 79 static void nvdimm_bus_probe_end(struct nvdimm_bus *nvdimm_bus) 80 { 81 nvdimm_bus_lock(&nvdimm_bus->dev); 82 if (--nvdimm_bus->probe_active == 0) 83 wake_up(&nvdimm_bus->probe_wait); 84 nvdimm_bus_unlock(&nvdimm_bus->dev); 85 } 86 87 static int nvdimm_bus_probe(struct device *dev) 88 { 89 struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver); 90 struct module *provider = to_bus_provider(dev); 91 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); 92 int rc; 93 94 if (!try_module_get(provider)) 95 return -ENXIO; 96 97 dev_dbg(&nvdimm_bus->dev, "START: %s.probe(%s)\n", 98 dev->driver->name, dev_name(dev)); 99 100 nvdimm_bus_probe_start(nvdimm_bus); 101 rc = nd_drv->probe(dev); 102 if (rc == 0) 103 nd_region_probe_success(nvdimm_bus, dev); 104 else 105 nd_region_disable(nvdimm_bus, dev); 106 nvdimm_bus_probe_end(nvdimm_bus); 107 108 dev_dbg(&nvdimm_bus->dev, "END: %s.probe(%s) = %d\n", dev->driver->name, 109 dev_name(dev), rc); 110 111 if (rc != 0) 112 module_put(provider); 113 return rc; 114 } 115 116 static int nvdimm_bus_remove(struct device *dev) 117 { 118 struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver); 119 struct module *provider = to_bus_provider(dev); 120 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); 121 int rc = 0; 122 123 if (nd_drv->remove) 124 rc = nd_drv->remove(dev); 125 nd_region_disable(nvdimm_bus, dev); 126 127 dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name, 128 dev_name(dev), rc); 129 module_put(provider); 130 return rc; 131 } 132 133 static void nvdimm_bus_shutdown(struct device *dev) 134 { 135 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); 136 struct nd_device_driver *nd_drv = NULL; 137 138 if (dev->driver) 139 nd_drv = to_nd_device_driver(dev->driver); 140 141 if (nd_drv && nd_drv->shutdown) { 142 nd_drv->shutdown(dev); 143 dev_dbg(&nvdimm_bus->dev, "%s.shutdown(%s)\n", 144 dev->driver->name, dev_name(dev)); 145 } 146 } 147 148 void nd_device_notify(struct device *dev, enum nvdimm_event event) 149 { 150 device_lock(dev); 151 if (dev->driver) { 152 struct nd_device_driver *nd_drv; 153 154 nd_drv = to_nd_device_driver(dev->driver); 155 if (nd_drv->notify) 156 nd_drv->notify(dev, event); 157 } 158 device_unlock(dev); 159 } 160 EXPORT_SYMBOL(nd_device_notify); 161 162 void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event) 163 { 164 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev); 165 166 if (!nvdimm_bus) 167 return; 168 169 /* caller is responsible for holding a reference on the device */ 170 nd_device_notify(&nd_region->dev, event); 171 } 172 EXPORT_SYMBOL_GPL(nvdimm_region_notify); 173 174 struct clear_badblocks_context { 175 resource_size_t phys, cleared; 176 }; 177 178 static int nvdimm_clear_badblocks_region(struct device *dev, void *data) 179 { 180 struct clear_badblocks_context *ctx = data; 181 struct nd_region *nd_region; 182 resource_size_t ndr_end; 183 sector_t sector; 184 185 /* make sure device is a region */ 186 if (!is_nd_pmem(dev)) 187 return 0; 188 189 nd_region = to_nd_region(dev); 190 ndr_end = nd_region->ndr_start + nd_region->ndr_size - 1; 191 192 /* make sure we are in the region */ 193 if (ctx->phys < nd_region->ndr_start 194 || (ctx->phys + ctx->cleared) > ndr_end) 195 return 0; 196 197 sector = (ctx->phys - nd_region->ndr_start) / 512; 198 badblocks_clear(&nd_region->bb, sector, ctx->cleared / 512); 199 200 if (nd_region->bb_state) 201 sysfs_notify_dirent(nd_region->bb_state); 202 203 return 0; 204 } 205 206 static void nvdimm_clear_badblocks_regions(struct nvdimm_bus *nvdimm_bus, 207 phys_addr_t phys, u64 cleared) 208 { 209 struct clear_badblocks_context ctx = { 210 .phys = phys, 211 .cleared = cleared, 212 }; 213 214 device_for_each_child(&nvdimm_bus->dev, &ctx, 215 nvdimm_clear_badblocks_region); 216 } 217 218 static void nvdimm_account_cleared_poison(struct nvdimm_bus *nvdimm_bus, 219 phys_addr_t phys, u64 cleared) 220 { 221 if (cleared > 0) 222 badrange_forget(&nvdimm_bus->badrange, phys, cleared); 223 224 if (cleared > 0 && cleared / 512) 225 nvdimm_clear_badblocks_regions(nvdimm_bus, phys, cleared); 226 } 227 228 long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, 229 unsigned int len) 230 { 231 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); 232 struct nvdimm_bus_descriptor *nd_desc; 233 struct nd_cmd_clear_error clear_err; 234 struct nd_cmd_ars_cap ars_cap; 235 u32 clear_err_unit, mask; 236 unsigned int noio_flag; 237 int cmd_rc, rc; 238 239 if (!nvdimm_bus) 240 return -ENXIO; 241 242 nd_desc = nvdimm_bus->nd_desc; 243 /* 244 * if ndctl does not exist, it's PMEM_LEGACY and 245 * we want to just pretend everything is handled. 246 */ 247 if (!nd_desc->ndctl) 248 return len; 249 250 memset(&ars_cap, 0, sizeof(ars_cap)); 251 ars_cap.address = phys; 252 ars_cap.length = len; 253 noio_flag = memalloc_noio_save(); 254 rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, &ars_cap, 255 sizeof(ars_cap), &cmd_rc); 256 memalloc_noio_restore(noio_flag); 257 if (rc < 0) 258 return rc; 259 if (cmd_rc < 0) 260 return cmd_rc; 261 clear_err_unit = ars_cap.clear_err_unit; 262 if (!clear_err_unit || !is_power_of_2(clear_err_unit)) 263 return -ENXIO; 264 265 mask = clear_err_unit - 1; 266 if ((phys | len) & mask) 267 return -ENXIO; 268 memset(&clear_err, 0, sizeof(clear_err)); 269 clear_err.address = phys; 270 clear_err.length = len; 271 noio_flag = memalloc_noio_save(); 272 rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_CLEAR_ERROR, &clear_err, 273 sizeof(clear_err), &cmd_rc); 274 memalloc_noio_restore(noio_flag); 275 if (rc < 0) 276 return rc; 277 if (cmd_rc < 0) 278 return cmd_rc; 279 280 nvdimm_account_cleared_poison(nvdimm_bus, phys, clear_err.cleared); 281 282 return clear_err.cleared; 283 } 284 EXPORT_SYMBOL_GPL(nvdimm_clear_poison); 285 286 static int nvdimm_bus_match(struct device *dev, struct device_driver *drv); 287 288 static struct bus_type nvdimm_bus_type = { 289 .name = "nd", 290 .uevent = nvdimm_bus_uevent, 291 .match = nvdimm_bus_match, 292 .probe = nvdimm_bus_probe, 293 .remove = nvdimm_bus_remove, 294 .shutdown = nvdimm_bus_shutdown, 295 }; 296 297 static void nvdimm_bus_release(struct device *dev) 298 { 299 struct nvdimm_bus *nvdimm_bus; 300 301 nvdimm_bus = container_of(dev, struct nvdimm_bus, dev); 302 ida_simple_remove(&nd_ida, nvdimm_bus->id); 303 kfree(nvdimm_bus); 304 } 305 306 static bool is_nvdimm_bus(struct device *dev) 307 { 308 return dev->release == nvdimm_bus_release; 309 } 310 311 struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev) 312 { 313 struct device *dev; 314 315 for (dev = nd_dev; dev; dev = dev->parent) 316 if (is_nvdimm_bus(dev)) 317 break; 318 dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n"); 319 if (dev) 320 return to_nvdimm_bus(dev); 321 return NULL; 322 } 323 324 struct nvdimm_bus *to_nvdimm_bus(struct device *dev) 325 { 326 struct nvdimm_bus *nvdimm_bus; 327 328 nvdimm_bus = container_of(dev, struct nvdimm_bus, dev); 329 WARN_ON(!is_nvdimm_bus(dev)); 330 return nvdimm_bus; 331 } 332 EXPORT_SYMBOL_GPL(to_nvdimm_bus); 333 334 struct nvdimm_bus *nvdimm_to_bus(struct nvdimm *nvdimm) 335 { 336 return to_nvdimm_bus(nvdimm->dev.parent); 337 } 338 EXPORT_SYMBOL_GPL(nvdimm_to_bus); 339 340 struct nvdimm_bus *nvdimm_bus_register(struct device *parent, 341 struct nvdimm_bus_descriptor *nd_desc) 342 { 343 struct nvdimm_bus *nvdimm_bus; 344 int rc; 345 346 nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL); 347 if (!nvdimm_bus) 348 return NULL; 349 INIT_LIST_HEAD(&nvdimm_bus->list); 350 INIT_LIST_HEAD(&nvdimm_bus->mapping_list); 351 init_waitqueue_head(&nvdimm_bus->probe_wait); 352 nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL); 353 if (nvdimm_bus->id < 0) { 354 kfree(nvdimm_bus); 355 return NULL; 356 } 357 mutex_init(&nvdimm_bus->reconfig_mutex); 358 badrange_init(&nvdimm_bus->badrange); 359 nvdimm_bus->nd_desc = nd_desc; 360 nvdimm_bus->dev.parent = parent; 361 nvdimm_bus->dev.release = nvdimm_bus_release; 362 nvdimm_bus->dev.groups = nd_desc->attr_groups; 363 nvdimm_bus->dev.bus = &nvdimm_bus_type; 364 nvdimm_bus->dev.of_node = nd_desc->of_node; 365 dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id); 366 rc = device_register(&nvdimm_bus->dev); 367 if (rc) { 368 dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc); 369 goto err; 370 } 371 372 return nvdimm_bus; 373 err: 374 put_device(&nvdimm_bus->dev); 375 return NULL; 376 } 377 EXPORT_SYMBOL_GPL(nvdimm_bus_register); 378 379 void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus) 380 { 381 if (!nvdimm_bus) 382 return; 383 device_unregister(&nvdimm_bus->dev); 384 } 385 EXPORT_SYMBOL_GPL(nvdimm_bus_unregister); 386 387 static int child_unregister(struct device *dev, void *data) 388 { 389 /* 390 * the singular ndctl class device per bus needs to be 391 * "device_destroy"ed, so skip it here 392 * 393 * i.e. remove classless children 394 */ 395 if (dev->class) 396 return 0; 397 398 if (is_nvdimm(dev)) { 399 struct nvdimm *nvdimm = to_nvdimm(dev); 400 bool dev_put = false; 401 402 /* We are shutting down. Make state frozen artificially. */ 403 nvdimm_bus_lock(dev); 404 nvdimm->sec.state = NVDIMM_SECURITY_FROZEN; 405 if (test_and_clear_bit(NDD_WORK_PENDING, &nvdimm->flags)) 406 dev_put = true; 407 nvdimm_bus_unlock(dev); 408 cancel_delayed_work_sync(&nvdimm->dwork); 409 if (dev_put) 410 put_device(dev); 411 } 412 nd_device_unregister(dev, ND_SYNC); 413 414 return 0; 415 } 416 417 static void free_badrange_list(struct list_head *badrange_list) 418 { 419 struct badrange_entry *bre, *next; 420 421 list_for_each_entry_safe(bre, next, badrange_list, list) { 422 list_del(&bre->list); 423 kfree(bre); 424 } 425 list_del_init(badrange_list); 426 } 427 428 static int nd_bus_remove(struct device *dev) 429 { 430 struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); 431 432 mutex_lock(&nvdimm_bus_list_mutex); 433 list_del_init(&nvdimm_bus->list); 434 mutex_unlock(&nvdimm_bus_list_mutex); 435 436 nd_synchronize(); 437 device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister); 438 439 spin_lock(&nvdimm_bus->badrange.lock); 440 free_badrange_list(&nvdimm_bus->badrange.list); 441 spin_unlock(&nvdimm_bus->badrange.lock); 442 443 nvdimm_bus_destroy_ndctl(nvdimm_bus); 444 445 return 0; 446 } 447 448 static int nd_bus_probe(struct device *dev) 449 { 450 struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); 451 int rc; 452 453 rc = nvdimm_bus_create_ndctl(nvdimm_bus); 454 if (rc) 455 return rc; 456 457 mutex_lock(&nvdimm_bus_list_mutex); 458 list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list); 459 mutex_unlock(&nvdimm_bus_list_mutex); 460 461 /* enable bus provider attributes to look up their local context */ 462 dev_set_drvdata(dev, nvdimm_bus->nd_desc); 463 464 return 0; 465 } 466 467 static struct nd_device_driver nd_bus_driver = { 468 .probe = nd_bus_probe, 469 .remove = nd_bus_remove, 470 .drv = { 471 .name = "nd_bus", 472 .suppress_bind_attrs = true, 473 .bus = &nvdimm_bus_type, 474 .owner = THIS_MODULE, 475 .mod_name = KBUILD_MODNAME, 476 }, 477 }; 478 479 static int nvdimm_bus_match(struct device *dev, struct device_driver *drv) 480 { 481 struct nd_device_driver *nd_drv = to_nd_device_driver(drv); 482 483 if (is_nvdimm_bus(dev) && nd_drv == &nd_bus_driver) 484 return true; 485 486 return !!test_bit(to_nd_device_type(dev), &nd_drv->type); 487 } 488 489 static ASYNC_DOMAIN_EXCLUSIVE(nd_async_domain); 490 491 void nd_synchronize(void) 492 { 493 async_synchronize_full_domain(&nd_async_domain); 494 } 495 EXPORT_SYMBOL_GPL(nd_synchronize); 496 497 static void nd_async_device_register(void *d, async_cookie_t cookie) 498 { 499 struct device *dev = d; 500 501 if (device_add(dev) != 0) { 502 dev_err(dev, "%s: failed\n", __func__); 503 put_device(dev); 504 } 505 put_device(dev); 506 if (dev->parent) 507 put_device(dev->parent); 508 } 509 510 static void nd_async_device_unregister(void *d, async_cookie_t cookie) 511 { 512 struct device *dev = d; 513 514 /* flush bus operations before delete */ 515 nvdimm_bus_lock(dev); 516 nvdimm_bus_unlock(dev); 517 518 device_unregister(dev); 519 put_device(dev); 520 } 521 522 void __nd_device_register(struct device *dev) 523 { 524 if (!dev) 525 return; 526 527 /* 528 * Ensure that region devices always have their NUMA node set as 529 * early as possible. This way we are able to make certain that 530 * any memory associated with the creation and the creation 531 * itself of the region is associated with the correct node. 532 */ 533 if (is_nd_region(dev)) 534 set_dev_node(dev, to_nd_region(dev)->numa_node); 535 536 dev->bus = &nvdimm_bus_type; 537 if (dev->parent) 538 get_device(dev->parent); 539 get_device(dev); 540 async_schedule_domain(nd_async_device_register, dev, 541 &nd_async_domain); 542 } 543 544 void nd_device_register(struct device *dev) 545 { 546 device_initialize(dev); 547 __nd_device_register(dev); 548 } 549 EXPORT_SYMBOL(nd_device_register); 550 551 void nd_device_unregister(struct device *dev, enum nd_async_mode mode) 552 { 553 switch (mode) { 554 case ND_ASYNC: 555 get_device(dev); 556 async_schedule_domain(nd_async_device_unregister, dev, 557 &nd_async_domain); 558 break; 559 case ND_SYNC: 560 nd_synchronize(); 561 device_unregister(dev); 562 break; 563 } 564 } 565 EXPORT_SYMBOL(nd_device_unregister); 566 567 /** 568 * __nd_driver_register() - register a region or a namespace driver 569 * @nd_drv: driver to register 570 * @owner: automatically set by nd_driver_register() macro 571 * @mod_name: automatically set by nd_driver_register() macro 572 */ 573 int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner, 574 const char *mod_name) 575 { 576 struct device_driver *drv = &nd_drv->drv; 577 578 if (!nd_drv->type) { 579 pr_debug("driver type bitmask not set (%pf)\n", 580 __builtin_return_address(0)); 581 return -EINVAL; 582 } 583 584 if (!nd_drv->probe) { 585 pr_debug("%s ->probe() must be specified\n", mod_name); 586 return -EINVAL; 587 } 588 589 drv->bus = &nvdimm_bus_type; 590 drv->owner = owner; 591 drv->mod_name = mod_name; 592 593 return driver_register(drv); 594 } 595 EXPORT_SYMBOL(__nd_driver_register); 596 597 int nvdimm_revalidate_disk(struct gendisk *disk) 598 { 599 struct device *dev = disk_to_dev(disk)->parent; 600 struct nd_region *nd_region = to_nd_region(dev->parent); 601 int disk_ro = get_disk_ro(disk); 602 603 /* 604 * Upgrade to read-only if the region is read-only preserve as 605 * read-only if the disk is already read-only. 606 */ 607 if (disk_ro || nd_region->ro == disk_ro) 608 return 0; 609 610 dev_info(dev, "%s read-only, marking %s read-only\n", 611 dev_name(&nd_region->dev), disk->disk_name); 612 set_disk_ro(disk, 1); 613 614 return 0; 615 616 } 617 EXPORT_SYMBOL(nvdimm_revalidate_disk); 618 619 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, 620 char *buf) 621 { 622 return sprintf(buf, ND_DEVICE_MODALIAS_FMT "\n", 623 to_nd_device_type(dev)); 624 } 625 static DEVICE_ATTR_RO(modalias); 626 627 static ssize_t devtype_show(struct device *dev, struct device_attribute *attr, 628 char *buf) 629 { 630 return sprintf(buf, "%s\n", dev->type->name); 631 } 632 static DEVICE_ATTR_RO(devtype); 633 634 static struct attribute *nd_device_attributes[] = { 635 &dev_attr_modalias.attr, 636 &dev_attr_devtype.attr, 637 NULL, 638 }; 639 640 /** 641 * nd_device_attribute_group - generic attributes for all devices on an nd bus 642 */ 643 struct attribute_group nd_device_attribute_group = { 644 .attrs = nd_device_attributes, 645 }; 646 EXPORT_SYMBOL_GPL(nd_device_attribute_group); 647 648 static ssize_t numa_node_show(struct device *dev, 649 struct device_attribute *attr, char *buf) 650 { 651 return sprintf(buf, "%d\n", dev_to_node(dev)); 652 } 653 static DEVICE_ATTR_RO(numa_node); 654 655 static struct attribute *nd_numa_attributes[] = { 656 &dev_attr_numa_node.attr, 657 NULL, 658 }; 659 660 static umode_t nd_numa_attr_visible(struct kobject *kobj, struct attribute *a, 661 int n) 662 { 663 if (!IS_ENABLED(CONFIG_NUMA)) 664 return 0; 665 666 return a->mode; 667 } 668 669 /** 670 * nd_numa_attribute_group - NUMA attributes for all devices on an nd bus 671 */ 672 struct attribute_group nd_numa_attribute_group = { 673 .attrs = nd_numa_attributes, 674 .is_visible = nd_numa_attr_visible, 675 }; 676 EXPORT_SYMBOL_GPL(nd_numa_attribute_group); 677 678 int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus) 679 { 680 dev_t devt = MKDEV(nvdimm_bus_major, nvdimm_bus->id); 681 struct device *dev; 682 683 dev = device_create(nd_class, &nvdimm_bus->dev, devt, nvdimm_bus, 684 "ndctl%d", nvdimm_bus->id); 685 686 if (IS_ERR(dev)) 687 dev_dbg(&nvdimm_bus->dev, "failed to register ndctl%d: %ld\n", 688 nvdimm_bus->id, PTR_ERR(dev)); 689 return PTR_ERR_OR_ZERO(dev); 690 } 691 692 void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus) 693 { 694 device_destroy(nd_class, MKDEV(nvdimm_bus_major, nvdimm_bus->id)); 695 } 696 697 static const struct nd_cmd_desc __nd_cmd_dimm_descs[] = { 698 [ND_CMD_IMPLEMENTED] = { }, 699 [ND_CMD_SMART] = { 700 .out_num = 2, 701 .out_sizes = { 4, 128, }, 702 }, 703 [ND_CMD_SMART_THRESHOLD] = { 704 .out_num = 2, 705 .out_sizes = { 4, 8, }, 706 }, 707 [ND_CMD_DIMM_FLAGS] = { 708 .out_num = 2, 709 .out_sizes = { 4, 4 }, 710 }, 711 [ND_CMD_GET_CONFIG_SIZE] = { 712 .out_num = 3, 713 .out_sizes = { 4, 4, 4, }, 714 }, 715 [ND_CMD_GET_CONFIG_DATA] = { 716 .in_num = 2, 717 .in_sizes = { 4, 4, }, 718 .out_num = 2, 719 .out_sizes = { 4, UINT_MAX, }, 720 }, 721 [ND_CMD_SET_CONFIG_DATA] = { 722 .in_num = 3, 723 .in_sizes = { 4, 4, UINT_MAX, }, 724 .out_num = 1, 725 .out_sizes = { 4, }, 726 }, 727 [ND_CMD_VENDOR] = { 728 .in_num = 3, 729 .in_sizes = { 4, 4, UINT_MAX, }, 730 .out_num = 3, 731 .out_sizes = { 4, 4, UINT_MAX, }, 732 }, 733 [ND_CMD_CALL] = { 734 .in_num = 2, 735 .in_sizes = { sizeof(struct nd_cmd_pkg), UINT_MAX, }, 736 .out_num = 1, 737 .out_sizes = { UINT_MAX, }, 738 }, 739 }; 740 741 const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd) 742 { 743 if (cmd < ARRAY_SIZE(__nd_cmd_dimm_descs)) 744 return &__nd_cmd_dimm_descs[cmd]; 745 return NULL; 746 } 747 EXPORT_SYMBOL_GPL(nd_cmd_dimm_desc); 748 749 static const struct nd_cmd_desc __nd_cmd_bus_descs[] = { 750 [ND_CMD_IMPLEMENTED] = { }, 751 [ND_CMD_ARS_CAP] = { 752 .in_num = 2, 753 .in_sizes = { 8, 8, }, 754 .out_num = 4, 755 .out_sizes = { 4, 4, 4, 4, }, 756 }, 757 [ND_CMD_ARS_START] = { 758 .in_num = 5, 759 .in_sizes = { 8, 8, 2, 1, 5, }, 760 .out_num = 2, 761 .out_sizes = { 4, 4, }, 762 }, 763 [ND_CMD_ARS_STATUS] = { 764 .out_num = 3, 765 .out_sizes = { 4, 4, UINT_MAX, }, 766 }, 767 [ND_CMD_CLEAR_ERROR] = { 768 .in_num = 2, 769 .in_sizes = { 8, 8, }, 770 .out_num = 3, 771 .out_sizes = { 4, 4, 8, }, 772 }, 773 [ND_CMD_CALL] = { 774 .in_num = 2, 775 .in_sizes = { sizeof(struct nd_cmd_pkg), UINT_MAX, }, 776 .out_num = 1, 777 .out_sizes = { UINT_MAX, }, 778 }, 779 }; 780 781 const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd) 782 { 783 if (cmd < ARRAY_SIZE(__nd_cmd_bus_descs)) 784 return &__nd_cmd_bus_descs[cmd]; 785 return NULL; 786 } 787 EXPORT_SYMBOL_GPL(nd_cmd_bus_desc); 788 789 u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd, 790 const struct nd_cmd_desc *desc, int idx, void *buf) 791 { 792 if (idx >= desc->in_num) 793 return UINT_MAX; 794 795 if (desc->in_sizes[idx] < UINT_MAX) 796 return desc->in_sizes[idx]; 797 798 if (nvdimm && cmd == ND_CMD_SET_CONFIG_DATA && idx == 2) { 799 struct nd_cmd_set_config_hdr *hdr = buf; 800 801 return hdr->in_length; 802 } else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2) { 803 struct nd_cmd_vendor_hdr *hdr = buf; 804 805 return hdr->in_length; 806 } else if (cmd == ND_CMD_CALL) { 807 struct nd_cmd_pkg *pkg = buf; 808 809 return pkg->nd_size_in; 810 } 811 812 return UINT_MAX; 813 } 814 EXPORT_SYMBOL_GPL(nd_cmd_in_size); 815 816 u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd, 817 const struct nd_cmd_desc *desc, int idx, const u32 *in_field, 818 const u32 *out_field, unsigned long remainder) 819 { 820 if (idx >= desc->out_num) 821 return UINT_MAX; 822 823 if (desc->out_sizes[idx] < UINT_MAX) 824 return desc->out_sizes[idx]; 825 826 if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && idx == 1) 827 return in_field[1]; 828 else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2) 829 return out_field[1]; 830 else if (!nvdimm && cmd == ND_CMD_ARS_STATUS && idx == 2) { 831 /* 832 * Per table 9-276 ARS Data in ACPI 6.1, out_field[1] is 833 * "Size of Output Buffer in bytes, including this 834 * field." 835 */ 836 if (out_field[1] < 4) 837 return 0; 838 /* 839 * ACPI 6.1 is ambiguous if 'status' is included in the 840 * output size. If we encounter an output size that 841 * overshoots the remainder by 4 bytes, assume it was 842 * including 'status'. 843 */ 844 if (out_field[1] - 4 == remainder) 845 return remainder; 846 return out_field[1] - 8; 847 } else if (cmd == ND_CMD_CALL) { 848 struct nd_cmd_pkg *pkg = (struct nd_cmd_pkg *) in_field; 849 850 return pkg->nd_size_out; 851 } 852 853 854 return UINT_MAX; 855 } 856 EXPORT_SYMBOL_GPL(nd_cmd_out_size); 857 858 void wait_nvdimm_bus_probe_idle(struct device *dev) 859 { 860 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); 861 862 do { 863 if (nvdimm_bus->probe_active == 0) 864 break; 865 nvdimm_bus_unlock(&nvdimm_bus->dev); 866 wait_event(nvdimm_bus->probe_wait, 867 nvdimm_bus->probe_active == 0); 868 nvdimm_bus_lock(&nvdimm_bus->dev); 869 } while (true); 870 } 871 872 static int nd_pmem_forget_poison_check(struct device *dev, void *data) 873 { 874 struct nd_cmd_clear_error *clear_err = 875 (struct nd_cmd_clear_error *)data; 876 struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL; 877 struct nd_pfn *nd_pfn = is_nd_pfn(dev) ? to_nd_pfn(dev) : NULL; 878 struct nd_dax *nd_dax = is_nd_dax(dev) ? to_nd_dax(dev) : NULL; 879 struct nd_namespace_common *ndns = NULL; 880 struct nd_namespace_io *nsio; 881 resource_size_t offset = 0, end_trunc = 0, start, end, pstart, pend; 882 883 if (nd_dax || !dev->driver) 884 return 0; 885 886 start = clear_err->address; 887 end = clear_err->address + clear_err->cleared - 1; 888 889 if (nd_btt || nd_pfn || nd_dax) { 890 if (nd_btt) 891 ndns = nd_btt->ndns; 892 else if (nd_pfn) 893 ndns = nd_pfn->ndns; 894 else if (nd_dax) 895 ndns = nd_dax->nd_pfn.ndns; 896 897 if (!ndns) 898 return 0; 899 } else 900 ndns = to_ndns(dev); 901 902 nsio = to_nd_namespace_io(&ndns->dev); 903 pstart = nsio->res.start + offset; 904 pend = nsio->res.end - end_trunc; 905 906 if ((pstart >= start) && (pend <= end)) 907 return -EBUSY; 908 909 return 0; 910 911 } 912 913 static int nd_ns_forget_poison_check(struct device *dev, void *data) 914 { 915 return device_for_each_child(dev, data, nd_pmem_forget_poison_check); 916 } 917 918 /* set_config requires an idle interleave set */ 919 static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus, 920 struct nvdimm *nvdimm, unsigned int cmd, void *data) 921 { 922 struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; 923 924 /* ask the bus provider if it would like to block this request */ 925 if (nd_desc->clear_to_send) { 926 int rc = nd_desc->clear_to_send(nd_desc, nvdimm, cmd, data); 927 928 if (rc) 929 return rc; 930 } 931 932 /* require clear error to go through the pmem driver */ 933 if (!nvdimm && cmd == ND_CMD_CLEAR_ERROR) 934 return device_for_each_child(&nvdimm_bus->dev, data, 935 nd_ns_forget_poison_check); 936 937 if (!nvdimm || cmd != ND_CMD_SET_CONFIG_DATA) 938 return 0; 939 940 /* prevent label manipulation while the kernel owns label updates */ 941 wait_nvdimm_bus_probe_idle(&nvdimm_bus->dev); 942 if (atomic_read(&nvdimm->busy)) 943 return -EBUSY; 944 return 0; 945 } 946 947 static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, 948 int read_only, unsigned int ioctl_cmd, unsigned long arg) 949 { 950 struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; 951 static char out_env[ND_CMD_MAX_ENVELOPE]; 952 static char in_env[ND_CMD_MAX_ENVELOPE]; 953 const struct nd_cmd_desc *desc = NULL; 954 unsigned int cmd = _IOC_NR(ioctl_cmd); 955 struct device *dev = &nvdimm_bus->dev; 956 void __user *p = (void __user *) arg; 957 const char *cmd_name, *dimm_name; 958 u32 in_len = 0, out_len = 0; 959 unsigned int func = cmd; 960 unsigned long cmd_mask; 961 struct nd_cmd_pkg pkg; 962 int rc, i, cmd_rc; 963 u64 buf_len = 0; 964 void *buf; 965 966 if (nvdimm) { 967 desc = nd_cmd_dimm_desc(cmd); 968 cmd_name = nvdimm_cmd_name(cmd); 969 cmd_mask = nvdimm->cmd_mask; 970 dimm_name = dev_name(&nvdimm->dev); 971 } else { 972 desc = nd_cmd_bus_desc(cmd); 973 cmd_name = nvdimm_bus_cmd_name(cmd); 974 cmd_mask = nd_desc->cmd_mask; 975 dimm_name = "bus"; 976 } 977 978 if (cmd == ND_CMD_CALL) { 979 if (copy_from_user(&pkg, p, sizeof(pkg))) 980 return -EFAULT; 981 } 982 983 if (!desc || (desc->out_num + desc->in_num == 0) || 984 !test_bit(cmd, &cmd_mask)) 985 return -ENOTTY; 986 987 /* fail write commands (when read-only) */ 988 if (read_only) 989 switch (cmd) { 990 case ND_CMD_VENDOR: 991 case ND_CMD_SET_CONFIG_DATA: 992 case ND_CMD_ARS_START: 993 case ND_CMD_CLEAR_ERROR: 994 case ND_CMD_CALL: 995 dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n", 996 nvdimm ? nvdimm_cmd_name(cmd) 997 : nvdimm_bus_cmd_name(cmd)); 998 return -EPERM; 999 default: 1000 break; 1001 } 1002 1003 /* process an input envelope */ 1004 for (i = 0; i < desc->in_num; i++) { 1005 u32 in_size, copy; 1006 1007 in_size = nd_cmd_in_size(nvdimm, cmd, desc, i, in_env); 1008 if (in_size == UINT_MAX) { 1009 dev_err(dev, "%s:%s unknown input size cmd: %s field: %d\n", 1010 __func__, dimm_name, cmd_name, i); 1011 return -ENXIO; 1012 } 1013 if (in_len < sizeof(in_env)) 1014 copy = min_t(u32, sizeof(in_env) - in_len, in_size); 1015 else 1016 copy = 0; 1017 if (copy && copy_from_user(&in_env[in_len], p + in_len, copy)) 1018 return -EFAULT; 1019 in_len += in_size; 1020 } 1021 1022 if (cmd == ND_CMD_CALL) { 1023 func = pkg.nd_command; 1024 dev_dbg(dev, "%s, idx: %llu, in: %u, out: %u, len %llu\n", 1025 dimm_name, pkg.nd_command, 1026 in_len, out_len, buf_len); 1027 } 1028 1029 /* process an output envelope */ 1030 for (i = 0; i < desc->out_num; i++) { 1031 u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, 1032 (u32 *) in_env, (u32 *) out_env, 0); 1033 u32 copy; 1034 1035 if (out_size == UINT_MAX) { 1036 dev_dbg(dev, "%s unknown output size cmd: %s field: %d\n", 1037 dimm_name, cmd_name, i); 1038 return -EFAULT; 1039 } 1040 if (out_len < sizeof(out_env)) 1041 copy = min_t(u32, sizeof(out_env) - out_len, out_size); 1042 else 1043 copy = 0; 1044 if (copy && copy_from_user(&out_env[out_len], 1045 p + in_len + out_len, copy)) 1046 return -EFAULT; 1047 out_len += out_size; 1048 } 1049 1050 buf_len = (u64) out_len + (u64) in_len; 1051 if (buf_len > ND_IOCTL_MAX_BUFLEN) { 1052 dev_dbg(dev, "%s cmd: %s buf_len: %llu > %d\n", dimm_name, 1053 cmd_name, buf_len, ND_IOCTL_MAX_BUFLEN); 1054 return -EINVAL; 1055 } 1056 1057 buf = vmalloc(buf_len); 1058 if (!buf) 1059 return -ENOMEM; 1060 1061 if (copy_from_user(buf, p, buf_len)) { 1062 rc = -EFAULT; 1063 goto out; 1064 } 1065 1066 nvdimm_bus_lock(&nvdimm_bus->dev); 1067 rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, func, buf); 1068 if (rc) 1069 goto out_unlock; 1070 1071 rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, &cmd_rc); 1072 if (rc < 0) 1073 goto out_unlock; 1074 1075 if (!nvdimm && cmd == ND_CMD_CLEAR_ERROR && cmd_rc >= 0) { 1076 struct nd_cmd_clear_error *clear_err = buf; 1077 1078 nvdimm_account_cleared_poison(nvdimm_bus, clear_err->address, 1079 clear_err->cleared); 1080 } 1081 nvdimm_bus_unlock(&nvdimm_bus->dev); 1082 1083 if (copy_to_user(p, buf, buf_len)) 1084 rc = -EFAULT; 1085 1086 vfree(buf); 1087 return rc; 1088 1089 out_unlock: 1090 nvdimm_bus_unlock(&nvdimm_bus->dev); 1091 out: 1092 vfree(buf); 1093 return rc; 1094 } 1095 1096 static long nd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 1097 { 1098 long id = (long) file->private_data; 1099 int rc = -ENXIO, ro; 1100 struct nvdimm_bus *nvdimm_bus; 1101 1102 ro = ((file->f_flags & O_ACCMODE) == O_RDONLY); 1103 mutex_lock(&nvdimm_bus_list_mutex); 1104 list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) { 1105 if (nvdimm_bus->id == id) { 1106 rc = __nd_ioctl(nvdimm_bus, NULL, ro, cmd, arg); 1107 break; 1108 } 1109 } 1110 mutex_unlock(&nvdimm_bus_list_mutex); 1111 1112 return rc; 1113 } 1114 1115 static int match_dimm(struct device *dev, void *data) 1116 { 1117 long id = (long) data; 1118 1119 if (is_nvdimm(dev)) { 1120 struct nvdimm *nvdimm = to_nvdimm(dev); 1121 1122 return nvdimm->id == id; 1123 } 1124 1125 return 0; 1126 } 1127 1128 static long nvdimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 1129 { 1130 int rc = -ENXIO, ro; 1131 struct nvdimm_bus *nvdimm_bus; 1132 1133 ro = ((file->f_flags & O_ACCMODE) == O_RDONLY); 1134 mutex_lock(&nvdimm_bus_list_mutex); 1135 list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) { 1136 struct device *dev = device_find_child(&nvdimm_bus->dev, 1137 file->private_data, match_dimm); 1138 struct nvdimm *nvdimm; 1139 1140 if (!dev) 1141 continue; 1142 1143 nvdimm = to_nvdimm(dev); 1144 rc = __nd_ioctl(nvdimm_bus, nvdimm, ro, cmd, arg); 1145 put_device(dev); 1146 break; 1147 } 1148 mutex_unlock(&nvdimm_bus_list_mutex); 1149 1150 return rc; 1151 } 1152 1153 static int nd_open(struct inode *inode, struct file *file) 1154 { 1155 long minor = iminor(inode); 1156 1157 file->private_data = (void *) minor; 1158 return 0; 1159 } 1160 1161 static const struct file_operations nvdimm_bus_fops = { 1162 .owner = THIS_MODULE, 1163 .open = nd_open, 1164 .unlocked_ioctl = nd_ioctl, 1165 .compat_ioctl = nd_ioctl, 1166 .llseek = noop_llseek, 1167 }; 1168 1169 static const struct file_operations nvdimm_fops = { 1170 .owner = THIS_MODULE, 1171 .open = nd_open, 1172 .unlocked_ioctl = nvdimm_ioctl, 1173 .compat_ioctl = nvdimm_ioctl, 1174 .llseek = noop_llseek, 1175 }; 1176 1177 int __init nvdimm_bus_init(void) 1178 { 1179 int rc; 1180 1181 rc = bus_register(&nvdimm_bus_type); 1182 if (rc) 1183 return rc; 1184 1185 rc = register_chrdev(0, "ndctl", &nvdimm_bus_fops); 1186 if (rc < 0) 1187 goto err_bus_chrdev; 1188 nvdimm_bus_major = rc; 1189 1190 rc = register_chrdev(0, "dimmctl", &nvdimm_fops); 1191 if (rc < 0) 1192 goto err_dimm_chrdev; 1193 nvdimm_major = rc; 1194 1195 nd_class = class_create(THIS_MODULE, "nd"); 1196 if (IS_ERR(nd_class)) { 1197 rc = PTR_ERR(nd_class); 1198 goto err_class; 1199 } 1200 1201 rc = driver_register(&nd_bus_driver.drv); 1202 if (rc) 1203 goto err_nd_bus; 1204 1205 return 0; 1206 1207 err_nd_bus: 1208 class_destroy(nd_class); 1209 err_class: 1210 unregister_chrdev(nvdimm_major, "dimmctl"); 1211 err_dimm_chrdev: 1212 unregister_chrdev(nvdimm_bus_major, "ndctl"); 1213 err_bus_chrdev: 1214 bus_unregister(&nvdimm_bus_type); 1215 1216 return rc; 1217 } 1218 1219 void nvdimm_bus_exit(void) 1220 { 1221 driver_unregister(&nd_bus_driver.drv); 1222 class_destroy(nd_class); 1223 unregister_chrdev(nvdimm_bus_major, "ndctl"); 1224 unregister_chrdev(nvdimm_major, "dimmctl"); 1225 bus_unregister(&nvdimm_bus_type); 1226 ida_destroy(&nd_ida); 1227 } 1228