1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright(c) 2017-2018 Intel Corporation. All rights reserved. */ 3 #include <linux/memremap.h> 4 #include <linux/device.h> 5 #include <linux/mutex.h> 6 #include <linux/list.h> 7 #include <linux/slab.h> 8 #include <linux/dax.h> 9 #include <linux/io.h> 10 #include "dax-private.h" 11 #include "bus.h" 12 13 static struct class *dax_class; 14 15 static DEFINE_MUTEX(dax_bus_lock); 16 17 #define DAX_NAME_LEN 30 18 struct dax_id { 19 struct list_head list; 20 char dev_name[DAX_NAME_LEN]; 21 }; 22 23 static int dax_bus_uevent(struct device *dev, struct kobj_uevent_env *env) 24 { 25 /* 26 * We only ever expect to handle device-dax instances, i.e. the 27 * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero 28 */ 29 return add_uevent_var(env, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT, 0); 30 } 31 32 static struct dax_device_driver *to_dax_drv(struct device_driver *drv) 33 { 34 return container_of(drv, struct dax_device_driver, drv); 35 } 36 37 static struct dax_id *__dax_match_id(struct dax_device_driver *dax_drv, 38 const char *dev_name) 39 { 40 struct dax_id *dax_id; 41 42 lockdep_assert_held(&dax_bus_lock); 43 44 list_for_each_entry(dax_id, &dax_drv->ids, list) 45 if (sysfs_streq(dax_id->dev_name, dev_name)) 46 return dax_id; 47 return NULL; 48 } 49 50 static int dax_match_id(struct dax_device_driver *dax_drv, struct device *dev) 51 { 52 int match; 53 54 mutex_lock(&dax_bus_lock); 55 match = !!__dax_match_id(dax_drv, dev_name(dev)); 56 mutex_unlock(&dax_bus_lock); 57 58 return match; 59 } 60 61 enum id_action { 62 ID_REMOVE, 63 ID_ADD, 64 }; 65 66 static ssize_t do_id_store(struct device_driver *drv, const char *buf, 67 size_t count, enum id_action action) 68 { 69 struct dax_device_driver *dax_drv = to_dax_drv(drv); 70 unsigned int region_id, id; 71 char devname[DAX_NAME_LEN]; 72 struct dax_id *dax_id; 73 ssize_t rc = count; 74 int fields; 75 76 fields = sscanf(buf, "dax%d.%d", ®ion_id, &id); 77 if (fields != 2) 78 return -EINVAL; 79 sprintf(devname, "dax%d.%d", region_id, id); 80 if (!sysfs_streq(buf, devname)) 81 return -EINVAL; 82 83 mutex_lock(&dax_bus_lock); 84 dax_id = __dax_match_id(dax_drv, buf); 85 if (!dax_id) { 86 if (action == ID_ADD) { 87 dax_id = kzalloc(sizeof(*dax_id), GFP_KERNEL); 88 if (dax_id) { 89 strncpy(dax_id->dev_name, buf, DAX_NAME_LEN); 90 list_add(&dax_id->list, &dax_drv->ids); 91 } else 92 rc = -ENOMEM; 93 } else 94 /* nothing to remove */; 95 } else if (action == ID_REMOVE) { 96 list_del(&dax_id->list); 97 kfree(dax_id); 98 } else 99 /* dax_id already added */; 100 mutex_unlock(&dax_bus_lock); 101 102 if (rc < 0) 103 return rc; 104 if (action == ID_ADD) 105 rc = driver_attach(drv); 106 if (rc) 107 return rc; 108 return count; 109 } 110 111 static ssize_t new_id_store(struct device_driver *drv, const char *buf, 112 size_t count) 113 { 114 return do_id_store(drv, buf, count, ID_ADD); 115 } 116 static DRIVER_ATTR_WO(new_id); 117 118 static ssize_t remove_id_store(struct device_driver *drv, const char *buf, 119 size_t count) 120 { 121 return do_id_store(drv, buf, count, ID_REMOVE); 122 } 123 static DRIVER_ATTR_WO(remove_id); 124 125 static struct attribute *dax_drv_attrs[] = { 126 &driver_attr_new_id.attr, 127 &driver_attr_remove_id.attr, 128 NULL, 129 }; 130 ATTRIBUTE_GROUPS(dax_drv); 131 132 static int dax_bus_match(struct device *dev, struct device_driver *drv); 133 134 static bool is_static(struct dax_region *dax_region) 135 { 136 return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0; 137 } 138 139 static u64 dev_dax_size(struct dev_dax *dev_dax) 140 { 141 u64 size = 0; 142 int i; 143 144 device_lock_assert(&dev_dax->dev); 145 146 for (i = 0; i < dev_dax->nr_range; i++) 147 size += range_len(&dev_dax->ranges[i].range); 148 149 return size; 150 } 151 152 static int dax_bus_probe(struct device *dev) 153 { 154 struct dax_device_driver *dax_drv = to_dax_drv(dev->driver); 155 struct dev_dax *dev_dax = to_dev_dax(dev); 156 struct dax_region *dax_region = dev_dax->region; 157 int rc; 158 159 if (dev_dax_size(dev_dax) == 0 || dev_dax->id < 0) 160 return -ENXIO; 161 162 rc = dax_drv->probe(dev_dax); 163 164 if (rc || is_static(dax_region)) 165 return rc; 166 167 /* 168 * Track new seed creation only after successful probe of the 169 * previous seed. 170 */ 171 if (dax_region->seed == dev) 172 dax_region->seed = NULL; 173 174 return 0; 175 } 176 177 static int dax_bus_remove(struct device *dev) 178 { 179 struct dax_device_driver *dax_drv = to_dax_drv(dev->driver); 180 struct dev_dax *dev_dax = to_dev_dax(dev); 181 182 if (dax_drv->remove) 183 dax_drv->remove(dev_dax); 184 185 return 0; 186 } 187 188 static struct bus_type dax_bus_type = { 189 .name = "dax", 190 .uevent = dax_bus_uevent, 191 .match = dax_bus_match, 192 .probe = dax_bus_probe, 193 .remove = dax_bus_remove, 194 .drv_groups = dax_drv_groups, 195 }; 196 197 static int dax_bus_match(struct device *dev, struct device_driver *drv) 198 { 199 struct dax_device_driver *dax_drv = to_dax_drv(drv); 200 201 /* 202 * All but the 'device-dax' driver, which has 'match_always' 203 * set, requires an exact id match. 204 */ 205 if (dax_drv->match_always) 206 return 1; 207 208 return dax_match_id(dax_drv, dev); 209 } 210 211 /* 212 * Rely on the fact that drvdata is set before the attributes are 213 * registered, and that the attributes are unregistered before drvdata 214 * is cleared to assume that drvdata is always valid. 215 */ 216 static ssize_t id_show(struct device *dev, 217 struct device_attribute *attr, char *buf) 218 { 219 struct dax_region *dax_region = dev_get_drvdata(dev); 220 221 return sprintf(buf, "%d\n", dax_region->id); 222 } 223 static DEVICE_ATTR_RO(id); 224 225 static ssize_t region_size_show(struct device *dev, 226 struct device_attribute *attr, char *buf) 227 { 228 struct dax_region *dax_region = dev_get_drvdata(dev); 229 230 return sprintf(buf, "%llu\n", (unsigned long long) 231 resource_size(&dax_region->res)); 232 } 233 static struct device_attribute dev_attr_region_size = __ATTR(size, 0444, 234 region_size_show, NULL); 235 236 static ssize_t region_align_show(struct device *dev, 237 struct device_attribute *attr, char *buf) 238 { 239 struct dax_region *dax_region = dev_get_drvdata(dev); 240 241 return sprintf(buf, "%u\n", dax_region->align); 242 } 243 static struct device_attribute dev_attr_region_align = 244 __ATTR(align, 0400, region_align_show, NULL); 245 246 #define for_each_dax_region_resource(dax_region, res) \ 247 for (res = (dax_region)->res.child; res; res = res->sibling) 248 249 static unsigned long long dax_region_avail_size(struct dax_region *dax_region) 250 { 251 resource_size_t size = resource_size(&dax_region->res); 252 struct resource *res; 253 254 device_lock_assert(dax_region->dev); 255 256 for_each_dax_region_resource(dax_region, res) 257 size -= resource_size(res); 258 return size; 259 } 260 261 static ssize_t available_size_show(struct device *dev, 262 struct device_attribute *attr, char *buf) 263 { 264 struct dax_region *dax_region = dev_get_drvdata(dev); 265 unsigned long long size; 266 267 device_lock(dev); 268 size = dax_region_avail_size(dax_region); 269 device_unlock(dev); 270 271 return sprintf(buf, "%llu\n", size); 272 } 273 static DEVICE_ATTR_RO(available_size); 274 275 static ssize_t seed_show(struct device *dev, 276 struct device_attribute *attr, char *buf) 277 { 278 struct dax_region *dax_region = dev_get_drvdata(dev); 279 struct device *seed; 280 ssize_t rc; 281 282 if (is_static(dax_region)) 283 return -EINVAL; 284 285 device_lock(dev); 286 seed = dax_region->seed; 287 rc = sprintf(buf, "%s\n", seed ? dev_name(seed) : ""); 288 device_unlock(dev); 289 290 return rc; 291 } 292 static DEVICE_ATTR_RO(seed); 293 294 static ssize_t create_show(struct device *dev, 295 struct device_attribute *attr, char *buf) 296 { 297 struct dax_region *dax_region = dev_get_drvdata(dev); 298 struct device *youngest; 299 ssize_t rc; 300 301 if (is_static(dax_region)) 302 return -EINVAL; 303 304 device_lock(dev); 305 youngest = dax_region->youngest; 306 rc = sprintf(buf, "%s\n", youngest ? dev_name(youngest) : ""); 307 device_unlock(dev); 308 309 return rc; 310 } 311 312 static ssize_t create_store(struct device *dev, struct device_attribute *attr, 313 const char *buf, size_t len) 314 { 315 struct dax_region *dax_region = dev_get_drvdata(dev); 316 unsigned long long avail; 317 ssize_t rc; 318 int val; 319 320 if (is_static(dax_region)) 321 return -EINVAL; 322 323 rc = kstrtoint(buf, 0, &val); 324 if (rc) 325 return rc; 326 if (val != 1) 327 return -EINVAL; 328 329 device_lock(dev); 330 avail = dax_region_avail_size(dax_region); 331 if (avail == 0) 332 rc = -ENOSPC; 333 else { 334 struct dev_dax_data data = { 335 .dax_region = dax_region, 336 .size = 0, 337 .id = -1, 338 }; 339 struct dev_dax *dev_dax = devm_create_dev_dax(&data); 340 341 if (IS_ERR(dev_dax)) 342 rc = PTR_ERR(dev_dax); 343 else { 344 /* 345 * In support of crafting multiple new devices 346 * simultaneously multiple seeds can be created, 347 * but only the first one that has not been 348 * successfully bound is tracked as the region 349 * seed. 350 */ 351 if (!dax_region->seed) 352 dax_region->seed = &dev_dax->dev; 353 dax_region->youngest = &dev_dax->dev; 354 rc = len; 355 } 356 } 357 device_unlock(dev); 358 359 return rc; 360 } 361 static DEVICE_ATTR_RW(create); 362 363 void kill_dev_dax(struct dev_dax *dev_dax) 364 { 365 struct dax_device *dax_dev = dev_dax->dax_dev; 366 struct inode *inode = dax_inode(dax_dev); 367 368 kill_dax(dax_dev); 369 unmap_mapping_range(inode->i_mapping, 0, 0, 1); 370 } 371 EXPORT_SYMBOL_GPL(kill_dev_dax); 372 373 static void trim_dev_dax_range(struct dev_dax *dev_dax) 374 { 375 int i = dev_dax->nr_range - 1; 376 struct range *range = &dev_dax->ranges[i].range; 377 struct dax_region *dax_region = dev_dax->region; 378 379 device_lock_assert(dax_region->dev); 380 dev_dbg(&dev_dax->dev, "delete range[%d]: %#llx:%#llx\n", i, 381 (unsigned long long)range->start, 382 (unsigned long long)range->end); 383 384 __release_region(&dax_region->res, range->start, range_len(range)); 385 if (--dev_dax->nr_range == 0) { 386 kfree(dev_dax->ranges); 387 dev_dax->ranges = NULL; 388 } 389 } 390 391 static void free_dev_dax_ranges(struct dev_dax *dev_dax) 392 { 393 while (dev_dax->nr_range) 394 trim_dev_dax_range(dev_dax); 395 } 396 397 static void unregister_dev_dax(void *dev) 398 { 399 struct dev_dax *dev_dax = to_dev_dax(dev); 400 401 dev_dbg(dev, "%s\n", __func__); 402 403 kill_dev_dax(dev_dax); 404 free_dev_dax_ranges(dev_dax); 405 device_del(dev); 406 put_device(dev); 407 } 408 409 /* a return value >= 0 indicates this invocation invalidated the id */ 410 static int __free_dev_dax_id(struct dev_dax *dev_dax) 411 { 412 struct dax_region *dax_region = dev_dax->region; 413 struct device *dev = &dev_dax->dev; 414 int rc = dev_dax->id; 415 416 device_lock_assert(dev); 417 418 if (is_static(dax_region) || dev_dax->id < 0) 419 return -1; 420 ida_free(&dax_region->ida, dev_dax->id); 421 dev_dax->id = -1; 422 return rc; 423 } 424 425 static int free_dev_dax_id(struct dev_dax *dev_dax) 426 { 427 struct device *dev = &dev_dax->dev; 428 int rc; 429 430 device_lock(dev); 431 rc = __free_dev_dax_id(dev_dax); 432 device_unlock(dev); 433 return rc; 434 } 435 436 static ssize_t delete_store(struct device *dev, struct device_attribute *attr, 437 const char *buf, size_t len) 438 { 439 struct dax_region *dax_region = dev_get_drvdata(dev); 440 struct dev_dax *dev_dax; 441 struct device *victim; 442 bool do_del = false; 443 int rc; 444 445 if (is_static(dax_region)) 446 return -EINVAL; 447 448 victim = device_find_child_by_name(dax_region->dev, buf); 449 if (!victim) 450 return -ENXIO; 451 452 device_lock(dev); 453 device_lock(victim); 454 dev_dax = to_dev_dax(victim); 455 if (victim->driver || dev_dax_size(dev_dax)) 456 rc = -EBUSY; 457 else { 458 /* 459 * Invalidate the device so it does not become active 460 * again, but always preserve device-id-0 so that 461 * /sys/bus/dax/ is guaranteed to be populated while any 462 * dax_region is registered. 463 */ 464 if (dev_dax->id > 0) { 465 do_del = __free_dev_dax_id(dev_dax) >= 0; 466 rc = len; 467 if (dax_region->seed == victim) 468 dax_region->seed = NULL; 469 if (dax_region->youngest == victim) 470 dax_region->youngest = NULL; 471 } else 472 rc = -EBUSY; 473 } 474 device_unlock(victim); 475 476 /* won the race to invalidate the device, clean it up */ 477 if (do_del) 478 devm_release_action(dev, unregister_dev_dax, victim); 479 device_unlock(dev); 480 put_device(victim); 481 482 return rc; 483 } 484 static DEVICE_ATTR_WO(delete); 485 486 static umode_t dax_region_visible(struct kobject *kobj, struct attribute *a, 487 int n) 488 { 489 struct device *dev = container_of(kobj, struct device, kobj); 490 struct dax_region *dax_region = dev_get_drvdata(dev); 491 492 if (is_static(dax_region)) 493 if (a == &dev_attr_available_size.attr 494 || a == &dev_attr_create.attr 495 || a == &dev_attr_seed.attr 496 || a == &dev_attr_delete.attr) 497 return 0; 498 return a->mode; 499 } 500 501 static struct attribute *dax_region_attributes[] = { 502 &dev_attr_available_size.attr, 503 &dev_attr_region_size.attr, 504 &dev_attr_region_align.attr, 505 &dev_attr_create.attr, 506 &dev_attr_seed.attr, 507 &dev_attr_delete.attr, 508 &dev_attr_id.attr, 509 NULL, 510 }; 511 512 static const struct attribute_group dax_region_attribute_group = { 513 .name = "dax_region", 514 .attrs = dax_region_attributes, 515 .is_visible = dax_region_visible, 516 }; 517 518 static const struct attribute_group *dax_region_attribute_groups[] = { 519 &dax_region_attribute_group, 520 NULL, 521 }; 522 523 static void dax_region_free(struct kref *kref) 524 { 525 struct dax_region *dax_region; 526 527 dax_region = container_of(kref, struct dax_region, kref); 528 kfree(dax_region); 529 } 530 531 void dax_region_put(struct dax_region *dax_region) 532 { 533 kref_put(&dax_region->kref, dax_region_free); 534 } 535 EXPORT_SYMBOL_GPL(dax_region_put); 536 537 static void dax_region_unregister(void *region) 538 { 539 struct dax_region *dax_region = region; 540 541 sysfs_remove_groups(&dax_region->dev->kobj, 542 dax_region_attribute_groups); 543 dax_region_put(dax_region); 544 } 545 546 struct dax_region *alloc_dax_region(struct device *parent, int region_id, 547 struct range *range, int target_node, unsigned int align, 548 unsigned long flags) 549 { 550 struct dax_region *dax_region; 551 552 /* 553 * The DAX core assumes that it can store its private data in 554 * parent->driver_data. This WARN is a reminder / safeguard for 555 * developers of device-dax drivers. 556 */ 557 if (dev_get_drvdata(parent)) { 558 dev_WARN(parent, "dax core failed to setup private data\n"); 559 return NULL; 560 } 561 562 if (!IS_ALIGNED(range->start, align) 563 || !IS_ALIGNED(range_len(range), align)) 564 return NULL; 565 566 dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL); 567 if (!dax_region) 568 return NULL; 569 570 dev_set_drvdata(parent, dax_region); 571 kref_init(&dax_region->kref); 572 dax_region->id = region_id; 573 dax_region->align = align; 574 dax_region->dev = parent; 575 dax_region->target_node = target_node; 576 ida_init(&dax_region->ida); 577 dax_region->res = (struct resource) { 578 .start = range->start, 579 .end = range->end, 580 .flags = IORESOURCE_MEM | flags, 581 }; 582 583 if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) { 584 kfree(dax_region); 585 return NULL; 586 } 587 588 kref_get(&dax_region->kref); 589 if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region)) 590 return NULL; 591 return dax_region; 592 } 593 EXPORT_SYMBOL_GPL(alloc_dax_region); 594 595 static void dax_mapping_release(struct device *dev) 596 { 597 struct dax_mapping *mapping = to_dax_mapping(dev); 598 struct dev_dax *dev_dax = to_dev_dax(dev->parent); 599 600 ida_free(&dev_dax->ida, mapping->id); 601 kfree(mapping); 602 } 603 604 static void unregister_dax_mapping(void *data) 605 { 606 struct device *dev = data; 607 struct dax_mapping *mapping = to_dax_mapping(dev); 608 struct dev_dax *dev_dax = to_dev_dax(dev->parent); 609 struct dax_region *dax_region = dev_dax->region; 610 611 dev_dbg(dev, "%s\n", __func__); 612 613 device_lock_assert(dax_region->dev); 614 615 dev_dax->ranges[mapping->range_id].mapping = NULL; 616 mapping->range_id = -1; 617 618 device_del(dev); 619 put_device(dev); 620 } 621 622 static struct dev_dax_range *get_dax_range(struct device *dev) 623 { 624 struct dax_mapping *mapping = to_dax_mapping(dev); 625 struct dev_dax *dev_dax = to_dev_dax(dev->parent); 626 struct dax_region *dax_region = dev_dax->region; 627 628 device_lock(dax_region->dev); 629 if (mapping->range_id < 0) { 630 device_unlock(dax_region->dev); 631 return NULL; 632 } 633 634 return &dev_dax->ranges[mapping->range_id]; 635 } 636 637 static void put_dax_range(struct dev_dax_range *dax_range) 638 { 639 struct dax_mapping *mapping = dax_range->mapping; 640 struct dev_dax *dev_dax = to_dev_dax(mapping->dev.parent); 641 struct dax_region *dax_region = dev_dax->region; 642 643 device_unlock(dax_region->dev); 644 } 645 646 static ssize_t start_show(struct device *dev, 647 struct device_attribute *attr, char *buf) 648 { 649 struct dev_dax_range *dax_range; 650 ssize_t rc; 651 652 dax_range = get_dax_range(dev); 653 if (!dax_range) 654 return -ENXIO; 655 rc = sprintf(buf, "%#llx\n", dax_range->range.start); 656 put_dax_range(dax_range); 657 658 return rc; 659 } 660 static DEVICE_ATTR(start, 0400, start_show, NULL); 661 662 static ssize_t end_show(struct device *dev, 663 struct device_attribute *attr, char *buf) 664 { 665 struct dev_dax_range *dax_range; 666 ssize_t rc; 667 668 dax_range = get_dax_range(dev); 669 if (!dax_range) 670 return -ENXIO; 671 rc = sprintf(buf, "%#llx\n", dax_range->range.end); 672 put_dax_range(dax_range); 673 674 return rc; 675 } 676 static DEVICE_ATTR(end, 0400, end_show, NULL); 677 678 static ssize_t pgoff_show(struct device *dev, 679 struct device_attribute *attr, char *buf) 680 { 681 struct dev_dax_range *dax_range; 682 ssize_t rc; 683 684 dax_range = get_dax_range(dev); 685 if (!dax_range) 686 return -ENXIO; 687 rc = sprintf(buf, "%#lx\n", dax_range->pgoff); 688 put_dax_range(dax_range); 689 690 return rc; 691 } 692 static DEVICE_ATTR(page_offset, 0400, pgoff_show, NULL); 693 694 static struct attribute *dax_mapping_attributes[] = { 695 &dev_attr_start.attr, 696 &dev_attr_end.attr, 697 &dev_attr_page_offset.attr, 698 NULL, 699 }; 700 701 static const struct attribute_group dax_mapping_attribute_group = { 702 .attrs = dax_mapping_attributes, 703 }; 704 705 static const struct attribute_group *dax_mapping_attribute_groups[] = { 706 &dax_mapping_attribute_group, 707 NULL, 708 }; 709 710 static struct device_type dax_mapping_type = { 711 .release = dax_mapping_release, 712 .groups = dax_mapping_attribute_groups, 713 }; 714 715 static int devm_register_dax_mapping(struct dev_dax *dev_dax, int range_id) 716 { 717 struct dax_region *dax_region = dev_dax->region; 718 struct dax_mapping *mapping; 719 struct device *dev; 720 int rc; 721 722 device_lock_assert(dax_region->dev); 723 724 if (dev_WARN_ONCE(&dev_dax->dev, !dax_region->dev->driver, 725 "region disabled\n")) 726 return -ENXIO; 727 728 mapping = kzalloc(sizeof(*mapping), GFP_KERNEL); 729 if (!mapping) 730 return -ENOMEM; 731 mapping->range_id = range_id; 732 mapping->id = ida_alloc(&dev_dax->ida, GFP_KERNEL); 733 if (mapping->id < 0) { 734 kfree(mapping); 735 return -ENOMEM; 736 } 737 dev_dax->ranges[range_id].mapping = mapping; 738 dev = &mapping->dev; 739 device_initialize(dev); 740 dev->parent = &dev_dax->dev; 741 dev->type = &dax_mapping_type; 742 dev_set_name(dev, "mapping%d", mapping->id); 743 rc = device_add(dev); 744 if (rc) { 745 put_device(dev); 746 return rc; 747 } 748 749 rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_mapping, 750 dev); 751 if (rc) 752 return rc; 753 return 0; 754 } 755 756 static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start, 757 resource_size_t size) 758 { 759 struct dax_region *dax_region = dev_dax->region; 760 struct resource *res = &dax_region->res; 761 struct device *dev = &dev_dax->dev; 762 struct dev_dax_range *ranges; 763 unsigned long pgoff = 0; 764 struct resource *alloc; 765 int i, rc; 766 767 device_lock_assert(dax_region->dev); 768 769 /* handle the seed alloc special case */ 770 if (!size) { 771 if (dev_WARN_ONCE(dev, dev_dax->nr_range, 772 "0-size allocation must be first\n")) 773 return -EBUSY; 774 /* nr_range == 0 is elsewhere special cased as 0-size device */ 775 return 0; 776 } 777 778 alloc = __request_region(res, start, size, dev_name(dev), 0); 779 if (!alloc) 780 return -ENOMEM; 781 782 ranges = krealloc(dev_dax->ranges, sizeof(*ranges) 783 * (dev_dax->nr_range + 1), GFP_KERNEL); 784 if (!ranges) { 785 __release_region(res, alloc->start, resource_size(alloc)); 786 return -ENOMEM; 787 } 788 789 for (i = 0; i < dev_dax->nr_range; i++) 790 pgoff += PHYS_PFN(range_len(&ranges[i].range)); 791 dev_dax->ranges = ranges; 792 ranges[dev_dax->nr_range++] = (struct dev_dax_range) { 793 .pgoff = pgoff, 794 .range = { 795 .start = alloc->start, 796 .end = alloc->end, 797 }, 798 }; 799 800 dev_dbg(dev, "alloc range[%d]: %pa:%pa\n", dev_dax->nr_range - 1, 801 &alloc->start, &alloc->end); 802 /* 803 * A dev_dax instance must be registered before mapping device 804 * children can be added. Defer to devm_create_dev_dax() to add 805 * the initial mapping device. 806 */ 807 if (!device_is_registered(&dev_dax->dev)) 808 return 0; 809 810 rc = devm_register_dax_mapping(dev_dax, dev_dax->nr_range - 1); 811 if (rc) 812 trim_dev_dax_range(dev_dax); 813 814 return rc; 815 } 816 817 static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, resource_size_t size) 818 { 819 int last_range = dev_dax->nr_range - 1; 820 struct dev_dax_range *dax_range = &dev_dax->ranges[last_range]; 821 struct dax_region *dax_region = dev_dax->region; 822 bool is_shrink = resource_size(res) > size; 823 struct range *range = &dax_range->range; 824 struct device *dev = &dev_dax->dev; 825 int rc; 826 827 device_lock_assert(dax_region->dev); 828 829 if (dev_WARN_ONCE(dev, !size, "deletion is handled by dev_dax_shrink\n")) 830 return -EINVAL; 831 832 rc = adjust_resource(res, range->start, size); 833 if (rc) 834 return rc; 835 836 *range = (struct range) { 837 .start = range->start, 838 .end = range->start + size - 1, 839 }; 840 841 dev_dbg(dev, "%s range[%d]: %#llx:%#llx\n", is_shrink ? "shrink" : "extend", 842 last_range, (unsigned long long) range->start, 843 (unsigned long long) range->end); 844 845 return 0; 846 } 847 848 static ssize_t size_show(struct device *dev, 849 struct device_attribute *attr, char *buf) 850 { 851 struct dev_dax *dev_dax = to_dev_dax(dev); 852 unsigned long long size; 853 854 device_lock(dev); 855 size = dev_dax_size(dev_dax); 856 device_unlock(dev); 857 858 return sprintf(buf, "%llu\n", size); 859 } 860 861 static bool alloc_is_aligned(struct dev_dax *dev_dax, resource_size_t size) 862 { 863 /* 864 * The minimum mapping granularity for a device instance is a 865 * single subsection, unless the arch says otherwise. 866 */ 867 return IS_ALIGNED(size, max_t(unsigned long, dev_dax->align, memremap_compat_align())); 868 } 869 870 static int dev_dax_shrink(struct dev_dax *dev_dax, resource_size_t size) 871 { 872 resource_size_t to_shrink = dev_dax_size(dev_dax) - size; 873 struct dax_region *dax_region = dev_dax->region; 874 struct device *dev = &dev_dax->dev; 875 int i; 876 877 for (i = dev_dax->nr_range - 1; i >= 0; i--) { 878 struct range *range = &dev_dax->ranges[i].range; 879 struct dax_mapping *mapping = dev_dax->ranges[i].mapping; 880 struct resource *adjust = NULL, *res; 881 resource_size_t shrink; 882 883 shrink = min_t(u64, to_shrink, range_len(range)); 884 if (shrink >= range_len(range)) { 885 devm_release_action(dax_region->dev, 886 unregister_dax_mapping, &mapping->dev); 887 trim_dev_dax_range(dev_dax); 888 to_shrink -= shrink; 889 if (!to_shrink) 890 break; 891 continue; 892 } 893 894 for_each_dax_region_resource(dax_region, res) 895 if (strcmp(res->name, dev_name(dev)) == 0 896 && res->start == range->start) { 897 adjust = res; 898 break; 899 } 900 901 if (dev_WARN_ONCE(dev, !adjust || i != dev_dax->nr_range - 1, 902 "failed to find matching resource\n")) 903 return -ENXIO; 904 return adjust_dev_dax_range(dev_dax, adjust, range_len(range) 905 - shrink); 906 } 907 return 0; 908 } 909 910 /* 911 * Only allow adjustments that preserve the relative pgoff of existing 912 * allocations. I.e. the dev_dax->ranges array is ordered by increasing pgoff. 913 */ 914 static bool adjust_ok(struct dev_dax *dev_dax, struct resource *res) 915 { 916 struct dev_dax_range *last; 917 int i; 918 919 if (dev_dax->nr_range == 0) 920 return false; 921 if (strcmp(res->name, dev_name(&dev_dax->dev)) != 0) 922 return false; 923 last = &dev_dax->ranges[dev_dax->nr_range - 1]; 924 if (last->range.start != res->start || last->range.end != res->end) 925 return false; 926 for (i = 0; i < dev_dax->nr_range - 1; i++) { 927 struct dev_dax_range *dax_range = &dev_dax->ranges[i]; 928 929 if (dax_range->pgoff > last->pgoff) 930 return false; 931 } 932 933 return true; 934 } 935 936 static ssize_t dev_dax_resize(struct dax_region *dax_region, 937 struct dev_dax *dev_dax, resource_size_t size) 938 { 939 resource_size_t avail = dax_region_avail_size(dax_region), to_alloc; 940 resource_size_t dev_size = dev_dax_size(dev_dax); 941 struct resource *region_res = &dax_region->res; 942 struct device *dev = &dev_dax->dev; 943 struct resource *res, *first; 944 resource_size_t alloc = 0; 945 int rc; 946 947 if (dev->driver) 948 return -EBUSY; 949 if (size == dev_size) 950 return 0; 951 if (size > dev_size && size - dev_size > avail) 952 return -ENOSPC; 953 if (size < dev_size) 954 return dev_dax_shrink(dev_dax, size); 955 956 to_alloc = size - dev_size; 957 if (dev_WARN_ONCE(dev, !alloc_is_aligned(dev_dax, to_alloc), 958 "resize of %pa misaligned\n", &to_alloc)) 959 return -ENXIO; 960 961 /* 962 * Expand the device into the unused portion of the region. This 963 * may involve adjusting the end of an existing resource, or 964 * allocating a new resource. 965 */ 966 retry: 967 first = region_res->child; 968 if (!first) 969 return alloc_dev_dax_range(dev_dax, dax_region->res.start, to_alloc); 970 971 rc = -ENOSPC; 972 for (res = first; res; res = res->sibling) { 973 struct resource *next = res->sibling; 974 975 /* space at the beginning of the region */ 976 if (res == first && res->start > dax_region->res.start) { 977 alloc = min(res->start - dax_region->res.start, to_alloc); 978 rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, alloc); 979 break; 980 } 981 982 alloc = 0; 983 /* space between allocations */ 984 if (next && next->start > res->end + 1) 985 alloc = min(next->start - (res->end + 1), to_alloc); 986 987 /* space at the end of the region */ 988 if (!alloc && !next && res->end < region_res->end) 989 alloc = min(region_res->end - res->end, to_alloc); 990 991 if (!alloc) 992 continue; 993 994 if (adjust_ok(dev_dax, res)) { 995 rc = adjust_dev_dax_range(dev_dax, res, resource_size(res) + alloc); 996 break; 997 } 998 rc = alloc_dev_dax_range(dev_dax, res->end + 1, alloc); 999 break; 1000 } 1001 if (rc) 1002 return rc; 1003 to_alloc -= alloc; 1004 if (to_alloc) 1005 goto retry; 1006 return 0; 1007 } 1008 1009 static ssize_t size_store(struct device *dev, struct device_attribute *attr, 1010 const char *buf, size_t len) 1011 { 1012 ssize_t rc; 1013 unsigned long long val; 1014 struct dev_dax *dev_dax = to_dev_dax(dev); 1015 struct dax_region *dax_region = dev_dax->region; 1016 1017 rc = kstrtoull(buf, 0, &val); 1018 if (rc) 1019 return rc; 1020 1021 if (!alloc_is_aligned(dev_dax, val)) { 1022 dev_dbg(dev, "%s: size: %lld misaligned\n", __func__, val); 1023 return -EINVAL; 1024 } 1025 1026 device_lock(dax_region->dev); 1027 if (!dax_region->dev->driver) { 1028 device_unlock(dax_region->dev); 1029 return -ENXIO; 1030 } 1031 device_lock(dev); 1032 rc = dev_dax_resize(dax_region, dev_dax, val); 1033 device_unlock(dev); 1034 device_unlock(dax_region->dev); 1035 1036 return rc == 0 ? len : rc; 1037 } 1038 static DEVICE_ATTR_RW(size); 1039 1040 static ssize_t range_parse(const char *opt, size_t len, struct range *range) 1041 { 1042 unsigned long long addr = 0; 1043 char *start, *end, *str; 1044 ssize_t rc = -EINVAL; 1045 1046 str = kstrdup(opt, GFP_KERNEL); 1047 if (!str) 1048 return rc; 1049 1050 end = str; 1051 start = strsep(&end, "-"); 1052 if (!start || !end) 1053 goto err; 1054 1055 rc = kstrtoull(start, 16, &addr); 1056 if (rc) 1057 goto err; 1058 range->start = addr; 1059 1060 rc = kstrtoull(end, 16, &addr); 1061 if (rc) 1062 goto err; 1063 range->end = addr; 1064 1065 err: 1066 kfree(str); 1067 return rc; 1068 } 1069 1070 static ssize_t mapping_store(struct device *dev, struct device_attribute *attr, 1071 const char *buf, size_t len) 1072 { 1073 struct dev_dax *dev_dax = to_dev_dax(dev); 1074 struct dax_region *dax_region = dev_dax->region; 1075 size_t to_alloc; 1076 struct range r; 1077 ssize_t rc; 1078 1079 rc = range_parse(buf, len, &r); 1080 if (rc) 1081 return rc; 1082 1083 rc = -ENXIO; 1084 device_lock(dax_region->dev); 1085 if (!dax_region->dev->driver) { 1086 device_unlock(dax_region->dev); 1087 return rc; 1088 } 1089 device_lock(dev); 1090 1091 to_alloc = range_len(&r); 1092 if (alloc_is_aligned(dev_dax, to_alloc)) 1093 rc = alloc_dev_dax_range(dev_dax, r.start, to_alloc); 1094 device_unlock(dev); 1095 device_unlock(dax_region->dev); 1096 1097 return rc == 0 ? len : rc; 1098 } 1099 static DEVICE_ATTR_WO(mapping); 1100 1101 static ssize_t align_show(struct device *dev, 1102 struct device_attribute *attr, char *buf) 1103 { 1104 struct dev_dax *dev_dax = to_dev_dax(dev); 1105 1106 return sprintf(buf, "%d\n", dev_dax->align); 1107 } 1108 1109 static ssize_t dev_dax_validate_align(struct dev_dax *dev_dax) 1110 { 1111 struct device *dev = &dev_dax->dev; 1112 int i; 1113 1114 for (i = 0; i < dev_dax->nr_range; i++) { 1115 size_t len = range_len(&dev_dax->ranges[i].range); 1116 1117 if (!alloc_is_aligned(dev_dax, len)) { 1118 dev_dbg(dev, "%s: align %u invalid for range %d\n", 1119 __func__, dev_dax->align, i); 1120 return -EINVAL; 1121 } 1122 } 1123 1124 return 0; 1125 } 1126 1127 static ssize_t align_store(struct device *dev, struct device_attribute *attr, 1128 const char *buf, size_t len) 1129 { 1130 struct dev_dax *dev_dax = to_dev_dax(dev); 1131 struct dax_region *dax_region = dev_dax->region; 1132 unsigned long val, align_save; 1133 ssize_t rc; 1134 1135 rc = kstrtoul(buf, 0, &val); 1136 if (rc) 1137 return -ENXIO; 1138 1139 if (!dax_align_valid(val)) 1140 return -EINVAL; 1141 1142 device_lock(dax_region->dev); 1143 if (!dax_region->dev->driver) { 1144 device_unlock(dax_region->dev); 1145 return -ENXIO; 1146 } 1147 1148 device_lock(dev); 1149 if (dev->driver) { 1150 rc = -EBUSY; 1151 goto out_unlock; 1152 } 1153 1154 align_save = dev_dax->align; 1155 dev_dax->align = val; 1156 rc = dev_dax_validate_align(dev_dax); 1157 if (rc) 1158 dev_dax->align = align_save; 1159 out_unlock: 1160 device_unlock(dev); 1161 device_unlock(dax_region->dev); 1162 return rc == 0 ? len : rc; 1163 } 1164 static DEVICE_ATTR_RW(align); 1165 1166 static int dev_dax_target_node(struct dev_dax *dev_dax) 1167 { 1168 struct dax_region *dax_region = dev_dax->region; 1169 1170 return dax_region->target_node; 1171 } 1172 1173 static ssize_t target_node_show(struct device *dev, 1174 struct device_attribute *attr, char *buf) 1175 { 1176 struct dev_dax *dev_dax = to_dev_dax(dev); 1177 1178 return sprintf(buf, "%d\n", dev_dax_target_node(dev_dax)); 1179 } 1180 static DEVICE_ATTR_RO(target_node); 1181 1182 static ssize_t resource_show(struct device *dev, 1183 struct device_attribute *attr, char *buf) 1184 { 1185 struct dev_dax *dev_dax = to_dev_dax(dev); 1186 struct dax_region *dax_region = dev_dax->region; 1187 unsigned long long start; 1188 1189 if (dev_dax->nr_range < 1) 1190 start = dax_region->res.start; 1191 else 1192 start = dev_dax->ranges[0].range.start; 1193 1194 return sprintf(buf, "%#llx\n", start); 1195 } 1196 static DEVICE_ATTR(resource, 0400, resource_show, NULL); 1197 1198 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, 1199 char *buf) 1200 { 1201 /* 1202 * We only ever expect to handle device-dax instances, i.e. the 1203 * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero 1204 */ 1205 return sprintf(buf, DAX_DEVICE_MODALIAS_FMT "\n", 0); 1206 } 1207 static DEVICE_ATTR_RO(modalias); 1208 1209 static ssize_t numa_node_show(struct device *dev, 1210 struct device_attribute *attr, char *buf) 1211 { 1212 return sprintf(buf, "%d\n", dev_to_node(dev)); 1213 } 1214 static DEVICE_ATTR_RO(numa_node); 1215 1216 static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n) 1217 { 1218 struct device *dev = container_of(kobj, struct device, kobj); 1219 struct dev_dax *dev_dax = to_dev_dax(dev); 1220 struct dax_region *dax_region = dev_dax->region; 1221 1222 if (a == &dev_attr_target_node.attr && dev_dax_target_node(dev_dax) < 0) 1223 return 0; 1224 if (a == &dev_attr_numa_node.attr && !IS_ENABLED(CONFIG_NUMA)) 1225 return 0; 1226 if (a == &dev_attr_mapping.attr && is_static(dax_region)) 1227 return 0; 1228 if ((a == &dev_attr_align.attr || 1229 a == &dev_attr_size.attr) && is_static(dax_region)) 1230 return 0444; 1231 return a->mode; 1232 } 1233 1234 static struct attribute *dev_dax_attributes[] = { 1235 &dev_attr_modalias.attr, 1236 &dev_attr_size.attr, 1237 &dev_attr_mapping.attr, 1238 &dev_attr_target_node.attr, 1239 &dev_attr_align.attr, 1240 &dev_attr_resource.attr, 1241 &dev_attr_numa_node.attr, 1242 NULL, 1243 }; 1244 1245 static const struct attribute_group dev_dax_attribute_group = { 1246 .attrs = dev_dax_attributes, 1247 .is_visible = dev_dax_visible, 1248 }; 1249 1250 static const struct attribute_group *dax_attribute_groups[] = { 1251 &dev_dax_attribute_group, 1252 NULL, 1253 }; 1254 1255 static void dev_dax_release(struct device *dev) 1256 { 1257 struct dev_dax *dev_dax = to_dev_dax(dev); 1258 struct dax_region *dax_region = dev_dax->region; 1259 struct dax_device *dax_dev = dev_dax->dax_dev; 1260 1261 put_dax(dax_dev); 1262 free_dev_dax_id(dev_dax); 1263 dax_region_put(dax_region); 1264 kfree(dev_dax->pgmap); 1265 kfree(dev_dax); 1266 } 1267 1268 static const struct device_type dev_dax_type = { 1269 .release = dev_dax_release, 1270 .groups = dax_attribute_groups, 1271 }; 1272 1273 struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) 1274 { 1275 struct dax_region *dax_region = data->dax_region; 1276 struct device *parent = dax_region->dev; 1277 struct dax_device *dax_dev; 1278 struct dev_dax *dev_dax; 1279 struct inode *inode; 1280 struct device *dev; 1281 int rc; 1282 1283 dev_dax = kzalloc(sizeof(*dev_dax), GFP_KERNEL); 1284 if (!dev_dax) 1285 return ERR_PTR(-ENOMEM); 1286 1287 if (is_static(dax_region)) { 1288 if (dev_WARN_ONCE(parent, data->id < 0, 1289 "dynamic id specified to static region\n")) { 1290 rc = -EINVAL; 1291 goto err_id; 1292 } 1293 1294 dev_dax->id = data->id; 1295 } else { 1296 if (dev_WARN_ONCE(parent, data->id >= 0, 1297 "static id specified to dynamic region\n")) { 1298 rc = -EINVAL; 1299 goto err_id; 1300 } 1301 1302 rc = ida_alloc(&dax_region->ida, GFP_KERNEL); 1303 if (rc < 0) 1304 goto err_id; 1305 dev_dax->id = rc; 1306 } 1307 1308 dev_dax->region = dax_region; 1309 dev = &dev_dax->dev; 1310 device_initialize(dev); 1311 dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id); 1312 1313 rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, data->size); 1314 if (rc) 1315 goto err_range; 1316 1317 if (data->pgmap) { 1318 dev_WARN_ONCE(parent, !is_static(dax_region), 1319 "custom dev_pagemap requires a static dax_region\n"); 1320 1321 dev_dax->pgmap = kmemdup(data->pgmap, 1322 sizeof(struct dev_pagemap), GFP_KERNEL); 1323 if (!dev_dax->pgmap) { 1324 rc = -ENOMEM; 1325 goto err_pgmap; 1326 } 1327 } 1328 1329 /* 1330 * No 'host' or dax_operations since there is no access to this 1331 * device outside of mmap of the resulting character device. 1332 */ 1333 dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC); 1334 if (IS_ERR(dax_dev)) { 1335 rc = PTR_ERR(dax_dev); 1336 goto err_alloc_dax; 1337 } 1338 1339 /* a device_dax instance is dead while the driver is not attached */ 1340 kill_dax(dax_dev); 1341 1342 dev_dax->dax_dev = dax_dev; 1343 dev_dax->target_node = dax_region->target_node; 1344 dev_dax->align = dax_region->align; 1345 ida_init(&dev_dax->ida); 1346 kref_get(&dax_region->kref); 1347 1348 inode = dax_inode(dax_dev); 1349 dev->devt = inode->i_rdev; 1350 if (data->subsys == DEV_DAX_BUS) 1351 dev->bus = &dax_bus_type; 1352 else 1353 dev->class = dax_class; 1354 dev->parent = parent; 1355 dev->type = &dev_dax_type; 1356 1357 rc = device_add(dev); 1358 if (rc) { 1359 kill_dev_dax(dev_dax); 1360 put_device(dev); 1361 return ERR_PTR(rc); 1362 } 1363 1364 rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev); 1365 if (rc) 1366 return ERR_PTR(rc); 1367 1368 /* register mapping device for the initial allocation range */ 1369 if (dev_dax->nr_range && range_len(&dev_dax->ranges[0].range)) { 1370 rc = devm_register_dax_mapping(dev_dax, 0); 1371 if (rc) 1372 return ERR_PTR(rc); 1373 } 1374 1375 return dev_dax; 1376 1377 err_alloc_dax: 1378 kfree(dev_dax->pgmap); 1379 err_pgmap: 1380 free_dev_dax_ranges(dev_dax); 1381 err_range: 1382 free_dev_dax_id(dev_dax); 1383 err_id: 1384 kfree(dev_dax); 1385 1386 return ERR_PTR(rc); 1387 } 1388 EXPORT_SYMBOL_GPL(devm_create_dev_dax); 1389 1390 static int match_always_count; 1391 1392 int __dax_driver_register(struct dax_device_driver *dax_drv, 1393 struct module *module, const char *mod_name) 1394 { 1395 struct device_driver *drv = &dax_drv->drv; 1396 int rc = 0; 1397 1398 /* 1399 * dax_bus_probe() calls dax_drv->probe() unconditionally. 1400 * So better be safe than sorry and ensure it is provided. 1401 */ 1402 if (!dax_drv->probe) 1403 return -EINVAL; 1404 1405 INIT_LIST_HEAD(&dax_drv->ids); 1406 drv->owner = module; 1407 drv->name = mod_name; 1408 drv->mod_name = mod_name; 1409 drv->bus = &dax_bus_type; 1410 1411 /* there can only be one default driver */ 1412 mutex_lock(&dax_bus_lock); 1413 match_always_count += dax_drv->match_always; 1414 if (match_always_count > 1) { 1415 match_always_count--; 1416 WARN_ON(1); 1417 rc = -EINVAL; 1418 } 1419 mutex_unlock(&dax_bus_lock); 1420 if (rc) 1421 return rc; 1422 1423 rc = driver_register(drv); 1424 if (rc && dax_drv->match_always) { 1425 mutex_lock(&dax_bus_lock); 1426 match_always_count -= dax_drv->match_always; 1427 mutex_unlock(&dax_bus_lock); 1428 } 1429 1430 return rc; 1431 } 1432 EXPORT_SYMBOL_GPL(__dax_driver_register); 1433 1434 void dax_driver_unregister(struct dax_device_driver *dax_drv) 1435 { 1436 struct device_driver *drv = &dax_drv->drv; 1437 struct dax_id *dax_id, *_id; 1438 1439 mutex_lock(&dax_bus_lock); 1440 match_always_count -= dax_drv->match_always; 1441 list_for_each_entry_safe(dax_id, _id, &dax_drv->ids, list) { 1442 list_del(&dax_id->list); 1443 kfree(dax_id); 1444 } 1445 mutex_unlock(&dax_bus_lock); 1446 driver_unregister(drv); 1447 } 1448 EXPORT_SYMBOL_GPL(dax_driver_unregister); 1449 1450 int __init dax_bus_init(void) 1451 { 1452 int rc; 1453 1454 if (IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)) { 1455 dax_class = class_create(THIS_MODULE, "dax"); 1456 if (IS_ERR(dax_class)) 1457 return PTR_ERR(dax_class); 1458 } 1459 1460 rc = bus_register(&dax_bus_type); 1461 if (rc) 1462 class_destroy(dax_class); 1463 return rc; 1464 } 1465 1466 void __exit dax_bus_exit(void) 1467 { 1468 bus_unregister(&dax_bus_type); 1469 class_destroy(dax_class); 1470 } 1471