1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright(c) 2017-2018 Intel Corporation. All rights reserved. */ 3 #include <linux/memremap.h> 4 #include <linux/device.h> 5 #include <linux/mutex.h> 6 #include <linux/list.h> 7 #include <linux/slab.h> 8 #include <linux/dax.h> 9 #include <linux/io.h> 10 #include "dax-private.h" 11 #include "bus.h" 12 13 static DEFINE_MUTEX(dax_bus_lock); 14 15 #define DAX_NAME_LEN 30 16 struct dax_id { 17 struct list_head list; 18 char dev_name[DAX_NAME_LEN]; 19 }; 20 21 static int dax_bus_uevent(struct device *dev, struct kobj_uevent_env *env) 22 { 23 /* 24 * We only ever expect to handle device-dax instances, i.e. the 25 * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero 26 */ 27 return add_uevent_var(env, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT, 0); 28 } 29 30 static struct dax_device_driver *to_dax_drv(struct device_driver *drv) 31 { 32 return container_of(drv, struct dax_device_driver, drv); 33 } 34 35 static struct dax_id *__dax_match_id(struct dax_device_driver *dax_drv, 36 const char *dev_name) 37 { 38 struct dax_id *dax_id; 39 40 lockdep_assert_held(&dax_bus_lock); 41 42 list_for_each_entry(dax_id, &dax_drv->ids, list) 43 if (sysfs_streq(dax_id->dev_name, dev_name)) 44 return dax_id; 45 return NULL; 46 } 47 48 static int dax_match_id(struct dax_device_driver *dax_drv, struct device *dev) 49 { 50 int match; 51 52 mutex_lock(&dax_bus_lock); 53 match = !!__dax_match_id(dax_drv, dev_name(dev)); 54 mutex_unlock(&dax_bus_lock); 55 56 return match; 57 } 58 59 enum id_action { 60 ID_REMOVE, 61 ID_ADD, 62 }; 63 64 static ssize_t do_id_store(struct device_driver *drv, const char *buf, 65 size_t count, enum id_action action) 66 { 67 struct dax_device_driver *dax_drv = to_dax_drv(drv); 68 unsigned int region_id, id; 69 char devname[DAX_NAME_LEN]; 70 struct dax_id *dax_id; 71 ssize_t rc = count; 72 int fields; 73 74 fields = sscanf(buf, "dax%d.%d", ®ion_id, &id); 75 if (fields != 2) 76 return -EINVAL; 77 sprintf(devname, "dax%d.%d", region_id, id); 78 if (!sysfs_streq(buf, devname)) 79 return -EINVAL; 80 81 mutex_lock(&dax_bus_lock); 82 dax_id = __dax_match_id(dax_drv, buf); 83 if (!dax_id) { 84 if (action == ID_ADD) { 85 dax_id = kzalloc(sizeof(*dax_id), GFP_KERNEL); 86 if (dax_id) { 87 strncpy(dax_id->dev_name, buf, DAX_NAME_LEN); 88 list_add(&dax_id->list, &dax_drv->ids); 89 } else 90 rc = -ENOMEM; 91 } 92 } else if (action == ID_REMOVE) { 93 list_del(&dax_id->list); 94 kfree(dax_id); 95 } 96 mutex_unlock(&dax_bus_lock); 97 98 if (rc < 0) 99 return rc; 100 if (action == ID_ADD) 101 rc = driver_attach(drv); 102 if (rc) 103 return rc; 104 return count; 105 } 106 107 static ssize_t new_id_store(struct device_driver *drv, const char *buf, 108 size_t count) 109 { 110 return do_id_store(drv, buf, count, ID_ADD); 111 } 112 static DRIVER_ATTR_WO(new_id); 113 114 static ssize_t remove_id_store(struct device_driver *drv, const char *buf, 115 size_t count) 116 { 117 return do_id_store(drv, buf, count, ID_REMOVE); 118 } 119 static DRIVER_ATTR_WO(remove_id); 120 121 static struct attribute *dax_drv_attrs[] = { 122 &driver_attr_new_id.attr, 123 &driver_attr_remove_id.attr, 124 NULL, 125 }; 126 ATTRIBUTE_GROUPS(dax_drv); 127 128 static int dax_bus_match(struct device *dev, struct device_driver *drv); 129 130 static bool is_static(struct dax_region *dax_region) 131 { 132 return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0; 133 } 134 135 static u64 dev_dax_size(struct dev_dax *dev_dax) 136 { 137 u64 size = 0; 138 int i; 139 140 device_lock_assert(&dev_dax->dev); 141 142 for (i = 0; i < dev_dax->nr_range; i++) 143 size += range_len(&dev_dax->ranges[i].range); 144 145 return size; 146 } 147 148 static int dax_bus_probe(struct device *dev) 149 { 150 struct dax_device_driver *dax_drv = to_dax_drv(dev->driver); 151 struct dev_dax *dev_dax = to_dev_dax(dev); 152 struct dax_region *dax_region = dev_dax->region; 153 int rc; 154 155 if (dev_dax_size(dev_dax) == 0 || dev_dax->id < 0) 156 return -ENXIO; 157 158 rc = dax_drv->probe(dev_dax); 159 160 if (rc || is_static(dax_region)) 161 return rc; 162 163 /* 164 * Track new seed creation only after successful probe of the 165 * previous seed. 166 */ 167 if (dax_region->seed == dev) 168 dax_region->seed = NULL; 169 170 return 0; 171 } 172 173 static void dax_bus_remove(struct device *dev) 174 { 175 struct dax_device_driver *dax_drv = to_dax_drv(dev->driver); 176 struct dev_dax *dev_dax = to_dev_dax(dev); 177 178 if (dax_drv->remove) 179 dax_drv->remove(dev_dax); 180 } 181 182 static struct bus_type dax_bus_type = { 183 .name = "dax", 184 .uevent = dax_bus_uevent, 185 .match = dax_bus_match, 186 .probe = dax_bus_probe, 187 .remove = dax_bus_remove, 188 .drv_groups = dax_drv_groups, 189 }; 190 191 static int dax_bus_match(struct device *dev, struct device_driver *drv) 192 { 193 struct dax_device_driver *dax_drv = to_dax_drv(drv); 194 195 /* 196 * All but the 'device-dax' driver, which has 'match_always' 197 * set, requires an exact id match. 198 */ 199 if (dax_drv->match_always) 200 return 1; 201 202 return dax_match_id(dax_drv, dev); 203 } 204 205 /* 206 * Rely on the fact that drvdata is set before the attributes are 207 * registered, and that the attributes are unregistered before drvdata 208 * is cleared to assume that drvdata is always valid. 209 */ 210 static ssize_t id_show(struct device *dev, 211 struct device_attribute *attr, char *buf) 212 { 213 struct dax_region *dax_region = dev_get_drvdata(dev); 214 215 return sprintf(buf, "%d\n", dax_region->id); 216 } 217 static DEVICE_ATTR_RO(id); 218 219 static ssize_t region_size_show(struct device *dev, 220 struct device_attribute *attr, char *buf) 221 { 222 struct dax_region *dax_region = dev_get_drvdata(dev); 223 224 return sprintf(buf, "%llu\n", (unsigned long long) 225 resource_size(&dax_region->res)); 226 } 227 static struct device_attribute dev_attr_region_size = __ATTR(size, 0444, 228 region_size_show, NULL); 229 230 static ssize_t region_align_show(struct device *dev, 231 struct device_attribute *attr, char *buf) 232 { 233 struct dax_region *dax_region = dev_get_drvdata(dev); 234 235 return sprintf(buf, "%u\n", dax_region->align); 236 } 237 static struct device_attribute dev_attr_region_align = 238 __ATTR(align, 0400, region_align_show, NULL); 239 240 #define for_each_dax_region_resource(dax_region, res) \ 241 for (res = (dax_region)->res.child; res; res = res->sibling) 242 243 static unsigned long long dax_region_avail_size(struct dax_region *dax_region) 244 { 245 resource_size_t size = resource_size(&dax_region->res); 246 struct resource *res; 247 248 device_lock_assert(dax_region->dev); 249 250 for_each_dax_region_resource(dax_region, res) 251 size -= resource_size(res); 252 return size; 253 } 254 255 static ssize_t available_size_show(struct device *dev, 256 struct device_attribute *attr, char *buf) 257 { 258 struct dax_region *dax_region = dev_get_drvdata(dev); 259 unsigned long long size; 260 261 device_lock(dev); 262 size = dax_region_avail_size(dax_region); 263 device_unlock(dev); 264 265 return sprintf(buf, "%llu\n", size); 266 } 267 static DEVICE_ATTR_RO(available_size); 268 269 static ssize_t seed_show(struct device *dev, 270 struct device_attribute *attr, char *buf) 271 { 272 struct dax_region *dax_region = dev_get_drvdata(dev); 273 struct device *seed; 274 ssize_t rc; 275 276 if (is_static(dax_region)) 277 return -EINVAL; 278 279 device_lock(dev); 280 seed = dax_region->seed; 281 rc = sprintf(buf, "%s\n", seed ? dev_name(seed) : ""); 282 device_unlock(dev); 283 284 return rc; 285 } 286 static DEVICE_ATTR_RO(seed); 287 288 static ssize_t create_show(struct device *dev, 289 struct device_attribute *attr, char *buf) 290 { 291 struct dax_region *dax_region = dev_get_drvdata(dev); 292 struct device *youngest; 293 ssize_t rc; 294 295 if (is_static(dax_region)) 296 return -EINVAL; 297 298 device_lock(dev); 299 youngest = dax_region->youngest; 300 rc = sprintf(buf, "%s\n", youngest ? dev_name(youngest) : ""); 301 device_unlock(dev); 302 303 return rc; 304 } 305 306 static ssize_t create_store(struct device *dev, struct device_attribute *attr, 307 const char *buf, size_t len) 308 { 309 struct dax_region *dax_region = dev_get_drvdata(dev); 310 unsigned long long avail; 311 ssize_t rc; 312 int val; 313 314 if (is_static(dax_region)) 315 return -EINVAL; 316 317 rc = kstrtoint(buf, 0, &val); 318 if (rc) 319 return rc; 320 if (val != 1) 321 return -EINVAL; 322 323 device_lock(dev); 324 avail = dax_region_avail_size(dax_region); 325 if (avail == 0) 326 rc = -ENOSPC; 327 else { 328 struct dev_dax_data data = { 329 .dax_region = dax_region, 330 .size = 0, 331 .id = -1, 332 }; 333 struct dev_dax *dev_dax = devm_create_dev_dax(&data); 334 335 if (IS_ERR(dev_dax)) 336 rc = PTR_ERR(dev_dax); 337 else { 338 /* 339 * In support of crafting multiple new devices 340 * simultaneously multiple seeds can be created, 341 * but only the first one that has not been 342 * successfully bound is tracked as the region 343 * seed. 344 */ 345 if (!dax_region->seed) 346 dax_region->seed = &dev_dax->dev; 347 dax_region->youngest = &dev_dax->dev; 348 rc = len; 349 } 350 } 351 device_unlock(dev); 352 353 return rc; 354 } 355 static DEVICE_ATTR_RW(create); 356 357 void kill_dev_dax(struct dev_dax *dev_dax) 358 { 359 struct dax_device *dax_dev = dev_dax->dax_dev; 360 struct inode *inode = dax_inode(dax_dev); 361 362 kill_dax(dax_dev); 363 unmap_mapping_range(inode->i_mapping, 0, 0, 1); 364 } 365 EXPORT_SYMBOL_GPL(kill_dev_dax); 366 367 static void trim_dev_dax_range(struct dev_dax *dev_dax) 368 { 369 int i = dev_dax->nr_range - 1; 370 struct range *range = &dev_dax->ranges[i].range; 371 struct dax_region *dax_region = dev_dax->region; 372 373 device_lock_assert(dax_region->dev); 374 dev_dbg(&dev_dax->dev, "delete range[%d]: %#llx:%#llx\n", i, 375 (unsigned long long)range->start, 376 (unsigned long long)range->end); 377 378 __release_region(&dax_region->res, range->start, range_len(range)); 379 if (--dev_dax->nr_range == 0) { 380 kfree(dev_dax->ranges); 381 dev_dax->ranges = NULL; 382 } 383 } 384 385 static void free_dev_dax_ranges(struct dev_dax *dev_dax) 386 { 387 while (dev_dax->nr_range) 388 trim_dev_dax_range(dev_dax); 389 } 390 391 static void unregister_dev_dax(void *dev) 392 { 393 struct dev_dax *dev_dax = to_dev_dax(dev); 394 395 dev_dbg(dev, "%s\n", __func__); 396 397 kill_dev_dax(dev_dax); 398 free_dev_dax_ranges(dev_dax); 399 device_del(dev); 400 put_device(dev); 401 } 402 403 /* a return value >= 0 indicates this invocation invalidated the id */ 404 static int __free_dev_dax_id(struct dev_dax *dev_dax) 405 { 406 struct dax_region *dax_region = dev_dax->region; 407 struct device *dev = &dev_dax->dev; 408 int rc = dev_dax->id; 409 410 device_lock_assert(dev); 411 412 if (is_static(dax_region) || dev_dax->id < 0) 413 return -1; 414 ida_free(&dax_region->ida, dev_dax->id); 415 dev_dax->id = -1; 416 return rc; 417 } 418 419 static int free_dev_dax_id(struct dev_dax *dev_dax) 420 { 421 struct device *dev = &dev_dax->dev; 422 int rc; 423 424 device_lock(dev); 425 rc = __free_dev_dax_id(dev_dax); 426 device_unlock(dev); 427 return rc; 428 } 429 430 static ssize_t delete_store(struct device *dev, struct device_attribute *attr, 431 const char *buf, size_t len) 432 { 433 struct dax_region *dax_region = dev_get_drvdata(dev); 434 struct dev_dax *dev_dax; 435 struct device *victim; 436 bool do_del = false; 437 int rc; 438 439 if (is_static(dax_region)) 440 return -EINVAL; 441 442 victim = device_find_child_by_name(dax_region->dev, buf); 443 if (!victim) 444 return -ENXIO; 445 446 device_lock(dev); 447 device_lock(victim); 448 dev_dax = to_dev_dax(victim); 449 if (victim->driver || dev_dax_size(dev_dax)) 450 rc = -EBUSY; 451 else { 452 /* 453 * Invalidate the device so it does not become active 454 * again, but always preserve device-id-0 so that 455 * /sys/bus/dax/ is guaranteed to be populated while any 456 * dax_region is registered. 457 */ 458 if (dev_dax->id > 0) { 459 do_del = __free_dev_dax_id(dev_dax) >= 0; 460 rc = len; 461 if (dax_region->seed == victim) 462 dax_region->seed = NULL; 463 if (dax_region->youngest == victim) 464 dax_region->youngest = NULL; 465 } else 466 rc = -EBUSY; 467 } 468 device_unlock(victim); 469 470 /* won the race to invalidate the device, clean it up */ 471 if (do_del) 472 devm_release_action(dev, unregister_dev_dax, victim); 473 device_unlock(dev); 474 put_device(victim); 475 476 return rc; 477 } 478 static DEVICE_ATTR_WO(delete); 479 480 static umode_t dax_region_visible(struct kobject *kobj, struct attribute *a, 481 int n) 482 { 483 struct device *dev = container_of(kobj, struct device, kobj); 484 struct dax_region *dax_region = dev_get_drvdata(dev); 485 486 if (is_static(dax_region)) 487 if (a == &dev_attr_available_size.attr 488 || a == &dev_attr_create.attr 489 || a == &dev_attr_seed.attr 490 || a == &dev_attr_delete.attr) 491 return 0; 492 return a->mode; 493 } 494 495 static struct attribute *dax_region_attributes[] = { 496 &dev_attr_available_size.attr, 497 &dev_attr_region_size.attr, 498 &dev_attr_region_align.attr, 499 &dev_attr_create.attr, 500 &dev_attr_seed.attr, 501 &dev_attr_delete.attr, 502 &dev_attr_id.attr, 503 NULL, 504 }; 505 506 static const struct attribute_group dax_region_attribute_group = { 507 .name = "dax_region", 508 .attrs = dax_region_attributes, 509 .is_visible = dax_region_visible, 510 }; 511 512 static const struct attribute_group *dax_region_attribute_groups[] = { 513 &dax_region_attribute_group, 514 NULL, 515 }; 516 517 static void dax_region_free(struct kref *kref) 518 { 519 struct dax_region *dax_region; 520 521 dax_region = container_of(kref, struct dax_region, kref); 522 kfree(dax_region); 523 } 524 525 void dax_region_put(struct dax_region *dax_region) 526 { 527 kref_put(&dax_region->kref, dax_region_free); 528 } 529 EXPORT_SYMBOL_GPL(dax_region_put); 530 531 static void dax_region_unregister(void *region) 532 { 533 struct dax_region *dax_region = region; 534 535 sysfs_remove_groups(&dax_region->dev->kobj, 536 dax_region_attribute_groups); 537 dax_region_put(dax_region); 538 } 539 540 struct dax_region *alloc_dax_region(struct device *parent, int region_id, 541 struct range *range, int target_node, unsigned int align, 542 unsigned long flags) 543 { 544 struct dax_region *dax_region; 545 546 /* 547 * The DAX core assumes that it can store its private data in 548 * parent->driver_data. This WARN is a reminder / safeguard for 549 * developers of device-dax drivers. 550 */ 551 if (dev_get_drvdata(parent)) { 552 dev_WARN(parent, "dax core failed to setup private data\n"); 553 return NULL; 554 } 555 556 if (!IS_ALIGNED(range->start, align) 557 || !IS_ALIGNED(range_len(range), align)) 558 return NULL; 559 560 dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL); 561 if (!dax_region) 562 return NULL; 563 564 dev_set_drvdata(parent, dax_region); 565 kref_init(&dax_region->kref); 566 dax_region->id = region_id; 567 dax_region->align = align; 568 dax_region->dev = parent; 569 dax_region->target_node = target_node; 570 ida_init(&dax_region->ida); 571 dax_region->res = (struct resource) { 572 .start = range->start, 573 .end = range->end, 574 .flags = IORESOURCE_MEM | flags, 575 }; 576 577 if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) { 578 kfree(dax_region); 579 return NULL; 580 } 581 582 kref_get(&dax_region->kref); 583 if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region)) 584 return NULL; 585 return dax_region; 586 } 587 EXPORT_SYMBOL_GPL(alloc_dax_region); 588 589 static void dax_mapping_release(struct device *dev) 590 { 591 struct dax_mapping *mapping = to_dax_mapping(dev); 592 struct dev_dax *dev_dax = to_dev_dax(dev->parent); 593 594 ida_free(&dev_dax->ida, mapping->id); 595 kfree(mapping); 596 } 597 598 static void unregister_dax_mapping(void *data) 599 { 600 struct device *dev = data; 601 struct dax_mapping *mapping = to_dax_mapping(dev); 602 struct dev_dax *dev_dax = to_dev_dax(dev->parent); 603 struct dax_region *dax_region = dev_dax->region; 604 605 dev_dbg(dev, "%s\n", __func__); 606 607 device_lock_assert(dax_region->dev); 608 609 dev_dax->ranges[mapping->range_id].mapping = NULL; 610 mapping->range_id = -1; 611 612 device_del(dev); 613 put_device(dev); 614 } 615 616 static struct dev_dax_range *get_dax_range(struct device *dev) 617 { 618 struct dax_mapping *mapping = to_dax_mapping(dev); 619 struct dev_dax *dev_dax = to_dev_dax(dev->parent); 620 struct dax_region *dax_region = dev_dax->region; 621 622 device_lock(dax_region->dev); 623 if (mapping->range_id < 0) { 624 device_unlock(dax_region->dev); 625 return NULL; 626 } 627 628 return &dev_dax->ranges[mapping->range_id]; 629 } 630 631 static void put_dax_range(struct dev_dax_range *dax_range) 632 { 633 struct dax_mapping *mapping = dax_range->mapping; 634 struct dev_dax *dev_dax = to_dev_dax(mapping->dev.parent); 635 struct dax_region *dax_region = dev_dax->region; 636 637 device_unlock(dax_region->dev); 638 } 639 640 static ssize_t start_show(struct device *dev, 641 struct device_attribute *attr, char *buf) 642 { 643 struct dev_dax_range *dax_range; 644 ssize_t rc; 645 646 dax_range = get_dax_range(dev); 647 if (!dax_range) 648 return -ENXIO; 649 rc = sprintf(buf, "%#llx\n", dax_range->range.start); 650 put_dax_range(dax_range); 651 652 return rc; 653 } 654 static DEVICE_ATTR(start, 0400, start_show, NULL); 655 656 static ssize_t end_show(struct device *dev, 657 struct device_attribute *attr, char *buf) 658 { 659 struct dev_dax_range *dax_range; 660 ssize_t rc; 661 662 dax_range = get_dax_range(dev); 663 if (!dax_range) 664 return -ENXIO; 665 rc = sprintf(buf, "%#llx\n", dax_range->range.end); 666 put_dax_range(dax_range); 667 668 return rc; 669 } 670 static DEVICE_ATTR(end, 0400, end_show, NULL); 671 672 static ssize_t pgoff_show(struct device *dev, 673 struct device_attribute *attr, char *buf) 674 { 675 struct dev_dax_range *dax_range; 676 ssize_t rc; 677 678 dax_range = get_dax_range(dev); 679 if (!dax_range) 680 return -ENXIO; 681 rc = sprintf(buf, "%#lx\n", dax_range->pgoff); 682 put_dax_range(dax_range); 683 684 return rc; 685 } 686 static DEVICE_ATTR(page_offset, 0400, pgoff_show, NULL); 687 688 static struct attribute *dax_mapping_attributes[] = { 689 &dev_attr_start.attr, 690 &dev_attr_end.attr, 691 &dev_attr_page_offset.attr, 692 NULL, 693 }; 694 695 static const struct attribute_group dax_mapping_attribute_group = { 696 .attrs = dax_mapping_attributes, 697 }; 698 699 static const struct attribute_group *dax_mapping_attribute_groups[] = { 700 &dax_mapping_attribute_group, 701 NULL, 702 }; 703 704 static struct device_type dax_mapping_type = { 705 .release = dax_mapping_release, 706 .groups = dax_mapping_attribute_groups, 707 }; 708 709 static int devm_register_dax_mapping(struct dev_dax *dev_dax, int range_id) 710 { 711 struct dax_region *dax_region = dev_dax->region; 712 struct dax_mapping *mapping; 713 struct device *dev; 714 int rc; 715 716 device_lock_assert(dax_region->dev); 717 718 if (dev_WARN_ONCE(&dev_dax->dev, !dax_region->dev->driver, 719 "region disabled\n")) 720 return -ENXIO; 721 722 mapping = kzalloc(sizeof(*mapping), GFP_KERNEL); 723 if (!mapping) 724 return -ENOMEM; 725 mapping->range_id = range_id; 726 mapping->id = ida_alloc(&dev_dax->ida, GFP_KERNEL); 727 if (mapping->id < 0) { 728 kfree(mapping); 729 return -ENOMEM; 730 } 731 dev_dax->ranges[range_id].mapping = mapping; 732 dev = &mapping->dev; 733 device_initialize(dev); 734 dev->parent = &dev_dax->dev; 735 dev->type = &dax_mapping_type; 736 dev_set_name(dev, "mapping%d", mapping->id); 737 rc = device_add(dev); 738 if (rc) { 739 put_device(dev); 740 return rc; 741 } 742 743 rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_mapping, 744 dev); 745 if (rc) 746 return rc; 747 return 0; 748 } 749 750 static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start, 751 resource_size_t size) 752 { 753 struct dax_region *dax_region = dev_dax->region; 754 struct resource *res = &dax_region->res; 755 struct device *dev = &dev_dax->dev; 756 struct dev_dax_range *ranges; 757 unsigned long pgoff = 0; 758 struct resource *alloc; 759 int i, rc; 760 761 device_lock_assert(dax_region->dev); 762 763 /* handle the seed alloc special case */ 764 if (!size) { 765 if (dev_WARN_ONCE(dev, dev_dax->nr_range, 766 "0-size allocation must be first\n")) 767 return -EBUSY; 768 /* nr_range == 0 is elsewhere special cased as 0-size device */ 769 return 0; 770 } 771 772 alloc = __request_region(res, start, size, dev_name(dev), 0); 773 if (!alloc) 774 return -ENOMEM; 775 776 ranges = krealloc(dev_dax->ranges, sizeof(*ranges) 777 * (dev_dax->nr_range + 1), GFP_KERNEL); 778 if (!ranges) { 779 __release_region(res, alloc->start, resource_size(alloc)); 780 return -ENOMEM; 781 } 782 783 for (i = 0; i < dev_dax->nr_range; i++) 784 pgoff += PHYS_PFN(range_len(&ranges[i].range)); 785 dev_dax->ranges = ranges; 786 ranges[dev_dax->nr_range++] = (struct dev_dax_range) { 787 .pgoff = pgoff, 788 .range = { 789 .start = alloc->start, 790 .end = alloc->end, 791 }, 792 }; 793 794 dev_dbg(dev, "alloc range[%d]: %pa:%pa\n", dev_dax->nr_range - 1, 795 &alloc->start, &alloc->end); 796 /* 797 * A dev_dax instance must be registered before mapping device 798 * children can be added. Defer to devm_create_dev_dax() to add 799 * the initial mapping device. 800 */ 801 if (!device_is_registered(&dev_dax->dev)) 802 return 0; 803 804 rc = devm_register_dax_mapping(dev_dax, dev_dax->nr_range - 1); 805 if (rc) 806 trim_dev_dax_range(dev_dax); 807 808 return rc; 809 } 810 811 static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, resource_size_t size) 812 { 813 int last_range = dev_dax->nr_range - 1; 814 struct dev_dax_range *dax_range = &dev_dax->ranges[last_range]; 815 struct dax_region *dax_region = dev_dax->region; 816 bool is_shrink = resource_size(res) > size; 817 struct range *range = &dax_range->range; 818 struct device *dev = &dev_dax->dev; 819 int rc; 820 821 device_lock_assert(dax_region->dev); 822 823 if (dev_WARN_ONCE(dev, !size, "deletion is handled by dev_dax_shrink\n")) 824 return -EINVAL; 825 826 rc = adjust_resource(res, range->start, size); 827 if (rc) 828 return rc; 829 830 *range = (struct range) { 831 .start = range->start, 832 .end = range->start + size - 1, 833 }; 834 835 dev_dbg(dev, "%s range[%d]: %#llx:%#llx\n", is_shrink ? "shrink" : "extend", 836 last_range, (unsigned long long) range->start, 837 (unsigned long long) range->end); 838 839 return 0; 840 } 841 842 static ssize_t size_show(struct device *dev, 843 struct device_attribute *attr, char *buf) 844 { 845 struct dev_dax *dev_dax = to_dev_dax(dev); 846 unsigned long long size; 847 848 device_lock(dev); 849 size = dev_dax_size(dev_dax); 850 device_unlock(dev); 851 852 return sprintf(buf, "%llu\n", size); 853 } 854 855 static bool alloc_is_aligned(struct dev_dax *dev_dax, resource_size_t size) 856 { 857 /* 858 * The minimum mapping granularity for a device instance is a 859 * single subsection, unless the arch says otherwise. 860 */ 861 return IS_ALIGNED(size, max_t(unsigned long, dev_dax->align, memremap_compat_align())); 862 } 863 864 static int dev_dax_shrink(struct dev_dax *dev_dax, resource_size_t size) 865 { 866 resource_size_t to_shrink = dev_dax_size(dev_dax) - size; 867 struct dax_region *dax_region = dev_dax->region; 868 struct device *dev = &dev_dax->dev; 869 int i; 870 871 for (i = dev_dax->nr_range - 1; i >= 0; i--) { 872 struct range *range = &dev_dax->ranges[i].range; 873 struct dax_mapping *mapping = dev_dax->ranges[i].mapping; 874 struct resource *adjust = NULL, *res; 875 resource_size_t shrink; 876 877 shrink = min_t(u64, to_shrink, range_len(range)); 878 if (shrink >= range_len(range)) { 879 devm_release_action(dax_region->dev, 880 unregister_dax_mapping, &mapping->dev); 881 trim_dev_dax_range(dev_dax); 882 to_shrink -= shrink; 883 if (!to_shrink) 884 break; 885 continue; 886 } 887 888 for_each_dax_region_resource(dax_region, res) 889 if (strcmp(res->name, dev_name(dev)) == 0 890 && res->start == range->start) { 891 adjust = res; 892 break; 893 } 894 895 if (dev_WARN_ONCE(dev, !adjust || i != dev_dax->nr_range - 1, 896 "failed to find matching resource\n")) 897 return -ENXIO; 898 return adjust_dev_dax_range(dev_dax, adjust, range_len(range) 899 - shrink); 900 } 901 return 0; 902 } 903 904 /* 905 * Only allow adjustments that preserve the relative pgoff of existing 906 * allocations. I.e. the dev_dax->ranges array is ordered by increasing pgoff. 907 */ 908 static bool adjust_ok(struct dev_dax *dev_dax, struct resource *res) 909 { 910 struct dev_dax_range *last; 911 int i; 912 913 if (dev_dax->nr_range == 0) 914 return false; 915 if (strcmp(res->name, dev_name(&dev_dax->dev)) != 0) 916 return false; 917 last = &dev_dax->ranges[dev_dax->nr_range - 1]; 918 if (last->range.start != res->start || last->range.end != res->end) 919 return false; 920 for (i = 0; i < dev_dax->nr_range - 1; i++) { 921 struct dev_dax_range *dax_range = &dev_dax->ranges[i]; 922 923 if (dax_range->pgoff > last->pgoff) 924 return false; 925 } 926 927 return true; 928 } 929 930 static ssize_t dev_dax_resize(struct dax_region *dax_region, 931 struct dev_dax *dev_dax, resource_size_t size) 932 { 933 resource_size_t avail = dax_region_avail_size(dax_region), to_alloc; 934 resource_size_t dev_size = dev_dax_size(dev_dax); 935 struct resource *region_res = &dax_region->res; 936 struct device *dev = &dev_dax->dev; 937 struct resource *res, *first; 938 resource_size_t alloc = 0; 939 int rc; 940 941 if (dev->driver) 942 return -EBUSY; 943 if (size == dev_size) 944 return 0; 945 if (size > dev_size && size - dev_size > avail) 946 return -ENOSPC; 947 if (size < dev_size) 948 return dev_dax_shrink(dev_dax, size); 949 950 to_alloc = size - dev_size; 951 if (dev_WARN_ONCE(dev, !alloc_is_aligned(dev_dax, to_alloc), 952 "resize of %pa misaligned\n", &to_alloc)) 953 return -ENXIO; 954 955 /* 956 * Expand the device into the unused portion of the region. This 957 * may involve adjusting the end of an existing resource, or 958 * allocating a new resource. 959 */ 960 retry: 961 first = region_res->child; 962 if (!first) 963 return alloc_dev_dax_range(dev_dax, dax_region->res.start, to_alloc); 964 965 rc = -ENOSPC; 966 for (res = first; res; res = res->sibling) { 967 struct resource *next = res->sibling; 968 969 /* space at the beginning of the region */ 970 if (res == first && res->start > dax_region->res.start) { 971 alloc = min(res->start - dax_region->res.start, to_alloc); 972 rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, alloc); 973 break; 974 } 975 976 alloc = 0; 977 /* space between allocations */ 978 if (next && next->start > res->end + 1) 979 alloc = min(next->start - (res->end + 1), to_alloc); 980 981 /* space at the end of the region */ 982 if (!alloc && !next && res->end < region_res->end) 983 alloc = min(region_res->end - res->end, to_alloc); 984 985 if (!alloc) 986 continue; 987 988 if (adjust_ok(dev_dax, res)) { 989 rc = adjust_dev_dax_range(dev_dax, res, resource_size(res) + alloc); 990 break; 991 } 992 rc = alloc_dev_dax_range(dev_dax, res->end + 1, alloc); 993 break; 994 } 995 if (rc) 996 return rc; 997 to_alloc -= alloc; 998 if (to_alloc) 999 goto retry; 1000 return 0; 1001 } 1002 1003 static ssize_t size_store(struct device *dev, struct device_attribute *attr, 1004 const char *buf, size_t len) 1005 { 1006 ssize_t rc; 1007 unsigned long long val; 1008 struct dev_dax *dev_dax = to_dev_dax(dev); 1009 struct dax_region *dax_region = dev_dax->region; 1010 1011 rc = kstrtoull(buf, 0, &val); 1012 if (rc) 1013 return rc; 1014 1015 if (!alloc_is_aligned(dev_dax, val)) { 1016 dev_dbg(dev, "%s: size: %lld misaligned\n", __func__, val); 1017 return -EINVAL; 1018 } 1019 1020 device_lock(dax_region->dev); 1021 if (!dax_region->dev->driver) { 1022 device_unlock(dax_region->dev); 1023 return -ENXIO; 1024 } 1025 device_lock(dev); 1026 rc = dev_dax_resize(dax_region, dev_dax, val); 1027 device_unlock(dev); 1028 device_unlock(dax_region->dev); 1029 1030 return rc == 0 ? len : rc; 1031 } 1032 static DEVICE_ATTR_RW(size); 1033 1034 static ssize_t range_parse(const char *opt, size_t len, struct range *range) 1035 { 1036 unsigned long long addr = 0; 1037 char *start, *end, *str; 1038 ssize_t rc = -EINVAL; 1039 1040 str = kstrdup(opt, GFP_KERNEL); 1041 if (!str) 1042 return rc; 1043 1044 end = str; 1045 start = strsep(&end, "-"); 1046 if (!start || !end) 1047 goto err; 1048 1049 rc = kstrtoull(start, 16, &addr); 1050 if (rc) 1051 goto err; 1052 range->start = addr; 1053 1054 rc = kstrtoull(end, 16, &addr); 1055 if (rc) 1056 goto err; 1057 range->end = addr; 1058 1059 err: 1060 kfree(str); 1061 return rc; 1062 } 1063 1064 static ssize_t mapping_store(struct device *dev, struct device_attribute *attr, 1065 const char *buf, size_t len) 1066 { 1067 struct dev_dax *dev_dax = to_dev_dax(dev); 1068 struct dax_region *dax_region = dev_dax->region; 1069 size_t to_alloc; 1070 struct range r; 1071 ssize_t rc; 1072 1073 rc = range_parse(buf, len, &r); 1074 if (rc) 1075 return rc; 1076 1077 rc = -ENXIO; 1078 device_lock(dax_region->dev); 1079 if (!dax_region->dev->driver) { 1080 device_unlock(dax_region->dev); 1081 return rc; 1082 } 1083 device_lock(dev); 1084 1085 to_alloc = range_len(&r); 1086 if (alloc_is_aligned(dev_dax, to_alloc)) 1087 rc = alloc_dev_dax_range(dev_dax, r.start, to_alloc); 1088 device_unlock(dev); 1089 device_unlock(dax_region->dev); 1090 1091 return rc == 0 ? len : rc; 1092 } 1093 static DEVICE_ATTR_WO(mapping); 1094 1095 static ssize_t align_show(struct device *dev, 1096 struct device_attribute *attr, char *buf) 1097 { 1098 struct dev_dax *dev_dax = to_dev_dax(dev); 1099 1100 return sprintf(buf, "%d\n", dev_dax->align); 1101 } 1102 1103 static ssize_t dev_dax_validate_align(struct dev_dax *dev_dax) 1104 { 1105 struct device *dev = &dev_dax->dev; 1106 int i; 1107 1108 for (i = 0; i < dev_dax->nr_range; i++) { 1109 size_t len = range_len(&dev_dax->ranges[i].range); 1110 1111 if (!alloc_is_aligned(dev_dax, len)) { 1112 dev_dbg(dev, "%s: align %u invalid for range %d\n", 1113 __func__, dev_dax->align, i); 1114 return -EINVAL; 1115 } 1116 } 1117 1118 return 0; 1119 } 1120 1121 static ssize_t align_store(struct device *dev, struct device_attribute *attr, 1122 const char *buf, size_t len) 1123 { 1124 struct dev_dax *dev_dax = to_dev_dax(dev); 1125 struct dax_region *dax_region = dev_dax->region; 1126 unsigned long val, align_save; 1127 ssize_t rc; 1128 1129 rc = kstrtoul(buf, 0, &val); 1130 if (rc) 1131 return -ENXIO; 1132 1133 if (!dax_align_valid(val)) 1134 return -EINVAL; 1135 1136 device_lock(dax_region->dev); 1137 if (!dax_region->dev->driver) { 1138 device_unlock(dax_region->dev); 1139 return -ENXIO; 1140 } 1141 1142 device_lock(dev); 1143 if (dev->driver) { 1144 rc = -EBUSY; 1145 goto out_unlock; 1146 } 1147 1148 align_save = dev_dax->align; 1149 dev_dax->align = val; 1150 rc = dev_dax_validate_align(dev_dax); 1151 if (rc) 1152 dev_dax->align = align_save; 1153 out_unlock: 1154 device_unlock(dev); 1155 device_unlock(dax_region->dev); 1156 return rc == 0 ? len : rc; 1157 } 1158 static DEVICE_ATTR_RW(align); 1159 1160 static int dev_dax_target_node(struct dev_dax *dev_dax) 1161 { 1162 struct dax_region *dax_region = dev_dax->region; 1163 1164 return dax_region->target_node; 1165 } 1166 1167 static ssize_t target_node_show(struct device *dev, 1168 struct device_attribute *attr, char *buf) 1169 { 1170 struct dev_dax *dev_dax = to_dev_dax(dev); 1171 1172 return sprintf(buf, "%d\n", dev_dax_target_node(dev_dax)); 1173 } 1174 static DEVICE_ATTR_RO(target_node); 1175 1176 static ssize_t resource_show(struct device *dev, 1177 struct device_attribute *attr, char *buf) 1178 { 1179 struct dev_dax *dev_dax = to_dev_dax(dev); 1180 struct dax_region *dax_region = dev_dax->region; 1181 unsigned long long start; 1182 1183 if (dev_dax->nr_range < 1) 1184 start = dax_region->res.start; 1185 else 1186 start = dev_dax->ranges[0].range.start; 1187 1188 return sprintf(buf, "%#llx\n", start); 1189 } 1190 static DEVICE_ATTR(resource, 0400, resource_show, NULL); 1191 1192 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, 1193 char *buf) 1194 { 1195 /* 1196 * We only ever expect to handle device-dax instances, i.e. the 1197 * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero 1198 */ 1199 return sprintf(buf, DAX_DEVICE_MODALIAS_FMT "\n", 0); 1200 } 1201 static DEVICE_ATTR_RO(modalias); 1202 1203 static ssize_t numa_node_show(struct device *dev, 1204 struct device_attribute *attr, char *buf) 1205 { 1206 return sprintf(buf, "%d\n", dev_to_node(dev)); 1207 } 1208 static DEVICE_ATTR_RO(numa_node); 1209 1210 static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n) 1211 { 1212 struct device *dev = container_of(kobj, struct device, kobj); 1213 struct dev_dax *dev_dax = to_dev_dax(dev); 1214 struct dax_region *dax_region = dev_dax->region; 1215 1216 if (a == &dev_attr_target_node.attr && dev_dax_target_node(dev_dax) < 0) 1217 return 0; 1218 if (a == &dev_attr_numa_node.attr && !IS_ENABLED(CONFIG_NUMA)) 1219 return 0; 1220 if (a == &dev_attr_mapping.attr && is_static(dax_region)) 1221 return 0; 1222 if ((a == &dev_attr_align.attr || 1223 a == &dev_attr_size.attr) && is_static(dax_region)) 1224 return 0444; 1225 return a->mode; 1226 } 1227 1228 static struct attribute *dev_dax_attributes[] = { 1229 &dev_attr_modalias.attr, 1230 &dev_attr_size.attr, 1231 &dev_attr_mapping.attr, 1232 &dev_attr_target_node.attr, 1233 &dev_attr_align.attr, 1234 &dev_attr_resource.attr, 1235 &dev_attr_numa_node.attr, 1236 NULL, 1237 }; 1238 1239 static const struct attribute_group dev_dax_attribute_group = { 1240 .attrs = dev_dax_attributes, 1241 .is_visible = dev_dax_visible, 1242 }; 1243 1244 static const struct attribute_group *dax_attribute_groups[] = { 1245 &dev_dax_attribute_group, 1246 NULL, 1247 }; 1248 1249 static void dev_dax_release(struct device *dev) 1250 { 1251 struct dev_dax *dev_dax = to_dev_dax(dev); 1252 struct dax_region *dax_region = dev_dax->region; 1253 struct dax_device *dax_dev = dev_dax->dax_dev; 1254 1255 put_dax(dax_dev); 1256 free_dev_dax_id(dev_dax); 1257 dax_region_put(dax_region); 1258 kfree(dev_dax->pgmap); 1259 kfree(dev_dax); 1260 } 1261 1262 static const struct device_type dev_dax_type = { 1263 .release = dev_dax_release, 1264 .groups = dax_attribute_groups, 1265 }; 1266 1267 struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) 1268 { 1269 struct dax_region *dax_region = data->dax_region; 1270 struct device *parent = dax_region->dev; 1271 struct dax_device *dax_dev; 1272 struct dev_dax *dev_dax; 1273 struct inode *inode; 1274 struct device *dev; 1275 int rc; 1276 1277 dev_dax = kzalloc(sizeof(*dev_dax), GFP_KERNEL); 1278 if (!dev_dax) 1279 return ERR_PTR(-ENOMEM); 1280 1281 if (is_static(dax_region)) { 1282 if (dev_WARN_ONCE(parent, data->id < 0, 1283 "dynamic id specified to static region\n")) { 1284 rc = -EINVAL; 1285 goto err_id; 1286 } 1287 1288 dev_dax->id = data->id; 1289 } else { 1290 if (dev_WARN_ONCE(parent, data->id >= 0, 1291 "static id specified to dynamic region\n")) { 1292 rc = -EINVAL; 1293 goto err_id; 1294 } 1295 1296 rc = ida_alloc(&dax_region->ida, GFP_KERNEL); 1297 if (rc < 0) 1298 goto err_id; 1299 dev_dax->id = rc; 1300 } 1301 1302 dev_dax->region = dax_region; 1303 dev = &dev_dax->dev; 1304 device_initialize(dev); 1305 dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id); 1306 1307 rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, data->size); 1308 if (rc) 1309 goto err_range; 1310 1311 if (data->pgmap) { 1312 dev_WARN_ONCE(parent, !is_static(dax_region), 1313 "custom dev_pagemap requires a static dax_region\n"); 1314 1315 dev_dax->pgmap = kmemdup(data->pgmap, 1316 sizeof(struct dev_pagemap), GFP_KERNEL); 1317 if (!dev_dax->pgmap) { 1318 rc = -ENOMEM; 1319 goto err_pgmap; 1320 } 1321 } 1322 1323 /* 1324 * No dax_operations since there is no access to this device outside of 1325 * mmap of the resulting character device. 1326 */ 1327 dax_dev = alloc_dax(dev_dax, NULL, DAXDEV_F_SYNC); 1328 if (IS_ERR(dax_dev)) { 1329 rc = PTR_ERR(dax_dev); 1330 goto err_alloc_dax; 1331 } 1332 1333 /* a device_dax instance is dead while the driver is not attached */ 1334 kill_dax(dax_dev); 1335 1336 dev_dax->dax_dev = dax_dev; 1337 dev_dax->target_node = dax_region->target_node; 1338 dev_dax->align = dax_region->align; 1339 ida_init(&dev_dax->ida); 1340 kref_get(&dax_region->kref); 1341 1342 inode = dax_inode(dax_dev); 1343 dev->devt = inode->i_rdev; 1344 dev->bus = &dax_bus_type; 1345 dev->parent = parent; 1346 dev->type = &dev_dax_type; 1347 1348 rc = device_add(dev); 1349 if (rc) { 1350 kill_dev_dax(dev_dax); 1351 put_device(dev); 1352 return ERR_PTR(rc); 1353 } 1354 1355 rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev); 1356 if (rc) 1357 return ERR_PTR(rc); 1358 1359 /* register mapping device for the initial allocation range */ 1360 if (dev_dax->nr_range && range_len(&dev_dax->ranges[0].range)) { 1361 rc = devm_register_dax_mapping(dev_dax, 0); 1362 if (rc) 1363 return ERR_PTR(rc); 1364 } 1365 1366 return dev_dax; 1367 1368 err_alloc_dax: 1369 kfree(dev_dax->pgmap); 1370 err_pgmap: 1371 free_dev_dax_ranges(dev_dax); 1372 err_range: 1373 free_dev_dax_id(dev_dax); 1374 err_id: 1375 kfree(dev_dax); 1376 1377 return ERR_PTR(rc); 1378 } 1379 EXPORT_SYMBOL_GPL(devm_create_dev_dax); 1380 1381 static int match_always_count; 1382 1383 int __dax_driver_register(struct dax_device_driver *dax_drv, 1384 struct module *module, const char *mod_name) 1385 { 1386 struct device_driver *drv = &dax_drv->drv; 1387 int rc = 0; 1388 1389 /* 1390 * dax_bus_probe() calls dax_drv->probe() unconditionally. 1391 * So better be safe than sorry and ensure it is provided. 1392 */ 1393 if (!dax_drv->probe) 1394 return -EINVAL; 1395 1396 INIT_LIST_HEAD(&dax_drv->ids); 1397 drv->owner = module; 1398 drv->name = mod_name; 1399 drv->mod_name = mod_name; 1400 drv->bus = &dax_bus_type; 1401 1402 /* there can only be one default driver */ 1403 mutex_lock(&dax_bus_lock); 1404 match_always_count += dax_drv->match_always; 1405 if (match_always_count > 1) { 1406 match_always_count--; 1407 WARN_ON(1); 1408 rc = -EINVAL; 1409 } 1410 mutex_unlock(&dax_bus_lock); 1411 if (rc) 1412 return rc; 1413 1414 rc = driver_register(drv); 1415 if (rc && dax_drv->match_always) { 1416 mutex_lock(&dax_bus_lock); 1417 match_always_count -= dax_drv->match_always; 1418 mutex_unlock(&dax_bus_lock); 1419 } 1420 1421 return rc; 1422 } 1423 EXPORT_SYMBOL_GPL(__dax_driver_register); 1424 1425 void dax_driver_unregister(struct dax_device_driver *dax_drv) 1426 { 1427 struct device_driver *drv = &dax_drv->drv; 1428 struct dax_id *dax_id, *_id; 1429 1430 mutex_lock(&dax_bus_lock); 1431 match_always_count -= dax_drv->match_always; 1432 list_for_each_entry_safe(dax_id, _id, &dax_drv->ids, list) { 1433 list_del(&dax_id->list); 1434 kfree(dax_id); 1435 } 1436 mutex_unlock(&dax_bus_lock); 1437 driver_unregister(drv); 1438 } 1439 EXPORT_SYMBOL_GPL(dax_driver_unregister); 1440 1441 int __init dax_bus_init(void) 1442 { 1443 return bus_register(&dax_bus_type); 1444 } 1445 1446 void __exit dax_bus_exit(void) 1447 { 1448 bus_unregister(&dax_bus_type); 1449 } 1450