1 /* 2 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of version 2 of the GNU General Public License as 6 * published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 */ 13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 14 #include <linux/vmalloc.h> 15 #include <linux/device.h> 16 #include <linux/ndctl.h> 17 #include <linux/slab.h> 18 #include <linux/io.h> 19 #include <linux/fs.h> 20 #include <linux/mm.h> 21 #include "nd-core.h" 22 #include "label.h" 23 #include "pmem.h" 24 #include "nd.h" 25 26 static DEFINE_IDA(dimm_ida); 27 28 /* 29 * Retrieve bus and dimm handle and return if this bus supports 30 * get_config_data commands 31 */ 32 int nvdimm_check_config_data(struct device *dev) 33 { 34 struct nvdimm *nvdimm = to_nvdimm(dev); 35 36 if (!nvdimm->cmd_mask || 37 !test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask)) { 38 if (test_bit(NDD_ALIASING, &nvdimm->flags)) 39 return -ENXIO; 40 else 41 return -ENOTTY; 42 } 43 44 return 0; 45 } 46 47 static int validate_dimm(struct nvdimm_drvdata *ndd) 48 { 49 int rc; 50 51 if (!ndd) 52 return -EINVAL; 53 54 rc = nvdimm_check_config_data(ndd->dev); 55 if (rc) 56 dev_dbg(ndd->dev, "%pf: %s error: %d\n", 57 __builtin_return_address(0), __func__, rc); 58 return rc; 59 } 60 61 /** 62 * nvdimm_init_nsarea - determine the geometry of a dimm's namespace area 63 * @nvdimm: dimm to initialize 64 */ 65 int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd) 66 { 67 struct nd_cmd_get_config_size *cmd = &ndd->nsarea; 68 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); 69 struct nvdimm_bus_descriptor *nd_desc; 70 int rc = validate_dimm(ndd); 71 int cmd_rc = 0; 72 73 if (rc) 74 return rc; 75 76 if (cmd->config_size) 77 return 0; /* already valid */ 78 79 memset(cmd, 0, sizeof(*cmd)); 80 nd_desc = nvdimm_bus->nd_desc; 81 rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), 82 ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd), &cmd_rc); 83 if (rc < 0) 84 return rc; 85 return cmd_rc; 86 } 87 88 int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) 89 { 90 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); 91 struct nd_cmd_get_config_data_hdr *cmd; 92 struct nvdimm_bus_descriptor *nd_desc; 93 int rc = validate_dimm(ndd); 94 u32 max_cmd_size, config_size; 95 size_t offset; 96 97 if (rc) 98 return rc; 99 100 if (ndd->data) 101 return 0; 102 103 if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0 104 || ndd->nsarea.config_size < ND_LABEL_MIN_SIZE) { 105 dev_dbg(ndd->dev, "failed to init config data area: (%d:%d)\n", 106 ndd->nsarea.max_xfer, ndd->nsarea.config_size); 107 return -ENXIO; 108 } 109 110 ndd->data = kvmalloc(ndd->nsarea.config_size, GFP_KERNEL); 111 if (!ndd->data) 112 return -ENOMEM; 113 114 max_cmd_size = min_t(u32, PAGE_SIZE, ndd->nsarea.max_xfer); 115 cmd = kzalloc(max_cmd_size + sizeof(*cmd), GFP_KERNEL); 116 if (!cmd) 117 return -ENOMEM; 118 119 nd_desc = nvdimm_bus->nd_desc; 120 for (config_size = ndd->nsarea.config_size, offset = 0; 121 config_size; config_size -= cmd->in_length, 122 offset += cmd->in_length) { 123 cmd->in_length = min(config_size, max_cmd_size); 124 cmd->in_offset = offset; 125 rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), 126 ND_CMD_GET_CONFIG_DATA, cmd, 127 cmd->in_length + sizeof(*cmd), NULL); 128 if (rc || cmd->status) { 129 rc = -ENXIO; 130 break; 131 } 132 memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length); 133 } 134 dev_dbg(ndd->dev, "%s: len: %zu rc: %d\n", __func__, offset, rc); 135 kfree(cmd); 136 137 return rc; 138 } 139 140 int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, 141 void *buf, size_t len) 142 { 143 int rc = validate_dimm(ndd); 144 size_t max_cmd_size, buf_offset; 145 struct nd_cmd_set_config_hdr *cmd; 146 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); 147 struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; 148 149 if (rc) 150 return rc; 151 152 if (!ndd->data) 153 return -ENXIO; 154 155 if (offset + len > ndd->nsarea.config_size) 156 return -ENXIO; 157 158 max_cmd_size = min_t(u32, PAGE_SIZE, len); 159 max_cmd_size = min_t(u32, max_cmd_size, ndd->nsarea.max_xfer); 160 cmd = kzalloc(max_cmd_size + sizeof(*cmd) + sizeof(u32), GFP_KERNEL); 161 if (!cmd) 162 return -ENOMEM; 163 164 for (buf_offset = 0; len; len -= cmd->in_length, 165 buf_offset += cmd->in_length) { 166 size_t cmd_size; 167 u32 *status; 168 169 cmd->in_offset = offset + buf_offset; 170 cmd->in_length = min(max_cmd_size, len); 171 memcpy(cmd->in_buf, buf + buf_offset, cmd->in_length); 172 173 /* status is output in the last 4-bytes of the command buffer */ 174 cmd_size = sizeof(*cmd) + cmd->in_length + sizeof(u32); 175 status = ((void *) cmd) + cmd_size - sizeof(u32); 176 177 rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), 178 ND_CMD_SET_CONFIG_DATA, cmd, cmd_size, NULL); 179 if (rc || *status) { 180 rc = rc ? rc : -ENXIO; 181 break; 182 } 183 } 184 kfree(cmd); 185 186 return rc; 187 } 188 189 void nvdimm_set_aliasing(struct device *dev) 190 { 191 struct nvdimm *nvdimm = to_nvdimm(dev); 192 193 set_bit(NDD_ALIASING, &nvdimm->flags); 194 } 195 196 void nvdimm_set_locked(struct device *dev) 197 { 198 struct nvdimm *nvdimm = to_nvdimm(dev); 199 200 set_bit(NDD_LOCKED, &nvdimm->flags); 201 } 202 203 static void nvdimm_release(struct device *dev) 204 { 205 struct nvdimm *nvdimm = to_nvdimm(dev); 206 207 ida_simple_remove(&dimm_ida, nvdimm->id); 208 kfree(nvdimm); 209 } 210 211 static struct device_type nvdimm_device_type = { 212 .name = "nvdimm", 213 .release = nvdimm_release, 214 }; 215 216 bool is_nvdimm(struct device *dev) 217 { 218 return dev->type == &nvdimm_device_type; 219 } 220 221 struct nvdimm *to_nvdimm(struct device *dev) 222 { 223 struct nvdimm *nvdimm = container_of(dev, struct nvdimm, dev); 224 225 WARN_ON(!is_nvdimm(dev)); 226 return nvdimm; 227 } 228 EXPORT_SYMBOL_GPL(to_nvdimm); 229 230 struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr) 231 { 232 struct nd_region *nd_region = &ndbr->nd_region; 233 struct nd_mapping *nd_mapping = &nd_region->mapping[0]; 234 235 return nd_mapping->nvdimm; 236 } 237 EXPORT_SYMBOL_GPL(nd_blk_region_to_dimm); 238 239 unsigned long nd_blk_memremap_flags(struct nd_blk_region *ndbr) 240 { 241 /* pmem mapping properties are private to libnvdimm */ 242 return ARCH_MEMREMAP_PMEM; 243 } 244 EXPORT_SYMBOL_GPL(nd_blk_memremap_flags); 245 246 struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping) 247 { 248 struct nvdimm *nvdimm = nd_mapping->nvdimm; 249 250 WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm->dev)); 251 252 return dev_get_drvdata(&nvdimm->dev); 253 } 254 EXPORT_SYMBOL(to_ndd); 255 256 void nvdimm_drvdata_release(struct kref *kref) 257 { 258 struct nvdimm_drvdata *ndd = container_of(kref, typeof(*ndd), kref); 259 struct device *dev = ndd->dev; 260 struct resource *res, *_r; 261 262 dev_dbg(dev, "%s\n", __func__); 263 264 nvdimm_bus_lock(dev); 265 for_each_dpa_resource_safe(ndd, res, _r) 266 nvdimm_free_dpa(ndd, res); 267 nvdimm_bus_unlock(dev); 268 269 kvfree(ndd->data); 270 kfree(ndd); 271 put_device(dev); 272 } 273 274 void get_ndd(struct nvdimm_drvdata *ndd) 275 { 276 kref_get(&ndd->kref); 277 } 278 279 void put_ndd(struct nvdimm_drvdata *ndd) 280 { 281 if (ndd) 282 kref_put(&ndd->kref, nvdimm_drvdata_release); 283 } 284 285 const char *nvdimm_name(struct nvdimm *nvdimm) 286 { 287 return dev_name(&nvdimm->dev); 288 } 289 EXPORT_SYMBOL_GPL(nvdimm_name); 290 291 struct kobject *nvdimm_kobj(struct nvdimm *nvdimm) 292 { 293 return &nvdimm->dev.kobj; 294 } 295 EXPORT_SYMBOL_GPL(nvdimm_kobj); 296 297 unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm) 298 { 299 return nvdimm->cmd_mask; 300 } 301 EXPORT_SYMBOL_GPL(nvdimm_cmd_mask); 302 303 void *nvdimm_provider_data(struct nvdimm *nvdimm) 304 { 305 if (nvdimm) 306 return nvdimm->provider_data; 307 return NULL; 308 } 309 EXPORT_SYMBOL_GPL(nvdimm_provider_data); 310 311 static ssize_t commands_show(struct device *dev, 312 struct device_attribute *attr, char *buf) 313 { 314 struct nvdimm *nvdimm = to_nvdimm(dev); 315 int cmd, len = 0; 316 317 if (!nvdimm->cmd_mask) 318 return sprintf(buf, "\n"); 319 320 for_each_set_bit(cmd, &nvdimm->cmd_mask, BITS_PER_LONG) 321 len += sprintf(buf + len, "%s ", nvdimm_cmd_name(cmd)); 322 len += sprintf(buf + len, "\n"); 323 return len; 324 } 325 static DEVICE_ATTR_RO(commands); 326 327 static ssize_t state_show(struct device *dev, struct device_attribute *attr, 328 char *buf) 329 { 330 struct nvdimm *nvdimm = to_nvdimm(dev); 331 332 /* 333 * The state may be in the process of changing, userspace should 334 * quiesce probing if it wants a static answer 335 */ 336 nvdimm_bus_lock(dev); 337 nvdimm_bus_unlock(dev); 338 return sprintf(buf, "%s\n", atomic_read(&nvdimm->busy) 339 ? "active" : "idle"); 340 } 341 static DEVICE_ATTR_RO(state); 342 343 static ssize_t available_slots_show(struct device *dev, 344 struct device_attribute *attr, char *buf) 345 { 346 struct nvdimm_drvdata *ndd = dev_get_drvdata(dev); 347 ssize_t rc; 348 u32 nfree; 349 350 if (!ndd) 351 return -ENXIO; 352 353 nvdimm_bus_lock(dev); 354 nfree = nd_label_nfree(ndd); 355 if (nfree - 1 > nfree) { 356 dev_WARN_ONCE(dev, 1, "we ate our last label?\n"); 357 nfree = 0; 358 } else 359 nfree--; 360 rc = sprintf(buf, "%d\n", nfree); 361 nvdimm_bus_unlock(dev); 362 return rc; 363 } 364 static DEVICE_ATTR_RO(available_slots); 365 366 static struct attribute *nvdimm_attributes[] = { 367 &dev_attr_state.attr, 368 &dev_attr_commands.attr, 369 &dev_attr_available_slots.attr, 370 NULL, 371 }; 372 373 struct attribute_group nvdimm_attribute_group = { 374 .attrs = nvdimm_attributes, 375 }; 376 EXPORT_SYMBOL_GPL(nvdimm_attribute_group); 377 378 struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, 379 const struct attribute_group **groups, unsigned long flags, 380 unsigned long cmd_mask, int num_flush, 381 struct resource *flush_wpq) 382 { 383 struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL); 384 struct device *dev; 385 386 if (!nvdimm) 387 return NULL; 388 389 nvdimm->id = ida_simple_get(&dimm_ida, 0, 0, GFP_KERNEL); 390 if (nvdimm->id < 0) { 391 kfree(nvdimm); 392 return NULL; 393 } 394 nvdimm->provider_data = provider_data; 395 nvdimm->flags = flags; 396 nvdimm->cmd_mask = cmd_mask; 397 nvdimm->num_flush = num_flush; 398 nvdimm->flush_wpq = flush_wpq; 399 atomic_set(&nvdimm->busy, 0); 400 dev = &nvdimm->dev; 401 dev_set_name(dev, "nmem%d", nvdimm->id); 402 dev->parent = &nvdimm_bus->dev; 403 dev->type = &nvdimm_device_type; 404 dev->devt = MKDEV(nvdimm_major, nvdimm->id); 405 dev->groups = groups; 406 nd_device_register(dev); 407 408 return nvdimm; 409 } 410 EXPORT_SYMBOL_GPL(nvdimm_create); 411 412 int alias_dpa_busy(struct device *dev, void *data) 413 { 414 resource_size_t map_end, blk_start, new; 415 struct blk_alloc_info *info = data; 416 struct nd_mapping *nd_mapping; 417 struct nd_region *nd_region; 418 struct nvdimm_drvdata *ndd; 419 struct resource *res; 420 int i; 421 422 if (!is_memory(dev)) 423 return 0; 424 425 nd_region = to_nd_region(dev); 426 for (i = 0; i < nd_region->ndr_mappings; i++) { 427 nd_mapping = &nd_region->mapping[i]; 428 if (nd_mapping->nvdimm == info->nd_mapping->nvdimm) 429 break; 430 } 431 432 if (i >= nd_region->ndr_mappings) 433 return 0; 434 435 ndd = to_ndd(nd_mapping); 436 map_end = nd_mapping->start + nd_mapping->size - 1; 437 blk_start = nd_mapping->start; 438 439 /* 440 * In the allocation case ->res is set to free space that we are 441 * looking to validate against PMEM aliasing collision rules 442 * (i.e. BLK is allocated after all aliased PMEM). 443 */ 444 if (info->res) { 445 if (info->res->start >= nd_mapping->start 446 && info->res->start < map_end) 447 /* pass */; 448 else 449 return 0; 450 } 451 452 retry: 453 /* 454 * Find the free dpa from the end of the last pmem allocation to 455 * the end of the interleave-set mapping. 456 */ 457 for_each_dpa_resource(ndd, res) { 458 if (strncmp(res->name, "pmem", 4) != 0) 459 continue; 460 if ((res->start >= blk_start && res->start < map_end) 461 || (res->end >= blk_start 462 && res->end <= map_end)) { 463 new = max(blk_start, min(map_end + 1, res->end + 1)); 464 if (new != blk_start) { 465 blk_start = new; 466 goto retry; 467 } 468 } 469 } 470 471 /* update the free space range with the probed blk_start */ 472 if (info->res && blk_start > info->res->start) { 473 info->res->start = max(info->res->start, blk_start); 474 if (info->res->start > info->res->end) 475 info->res->end = info->res->start - 1; 476 return 1; 477 } 478 479 info->available -= blk_start - nd_mapping->start; 480 481 return 0; 482 } 483 484 /** 485 * nd_blk_available_dpa - account the unused dpa of BLK region 486 * @nd_mapping: container of dpa-resource-root + labels 487 * 488 * Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges, but 489 * we arrange for them to never start at an lower dpa than the last 490 * PMEM allocation in an aliased region. 491 */ 492 resource_size_t nd_blk_available_dpa(struct nd_region *nd_region) 493 { 494 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev); 495 struct nd_mapping *nd_mapping = &nd_region->mapping[0]; 496 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); 497 struct blk_alloc_info info = { 498 .nd_mapping = nd_mapping, 499 .available = nd_mapping->size, 500 .res = NULL, 501 }; 502 struct resource *res; 503 504 if (!ndd) 505 return 0; 506 507 device_for_each_child(&nvdimm_bus->dev, &info, alias_dpa_busy); 508 509 /* now account for busy blk allocations in unaliased dpa */ 510 for_each_dpa_resource(ndd, res) { 511 if (strncmp(res->name, "blk", 3) != 0) 512 continue; 513 info.available -= resource_size(res); 514 } 515 516 return info.available; 517 } 518 519 /** 520 * nd_pmem_available_dpa - for the given dimm+region account unallocated dpa 521 * @nd_mapping: container of dpa-resource-root + labels 522 * @nd_region: constrain available space check to this reference region 523 * @overlap: calculate available space assuming this level of overlap 524 * 525 * Validate that a PMEM label, if present, aligns with the start of an 526 * interleave set and truncate the available size at the lowest BLK 527 * overlap point. 528 * 529 * The expectation is that this routine is called multiple times as it 530 * probes for the largest BLK encroachment for any single member DIMM of 531 * the interleave set. Once that value is determined the PMEM-limit for 532 * the set can be established. 533 */ 534 resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region, 535 struct nd_mapping *nd_mapping, resource_size_t *overlap) 536 { 537 resource_size_t map_start, map_end, busy = 0, available, blk_start; 538 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); 539 struct resource *res; 540 const char *reason; 541 542 if (!ndd) 543 return 0; 544 545 map_start = nd_mapping->start; 546 map_end = map_start + nd_mapping->size - 1; 547 blk_start = max(map_start, map_end + 1 - *overlap); 548 for_each_dpa_resource(ndd, res) { 549 if (res->start >= map_start && res->start < map_end) { 550 if (strncmp(res->name, "blk", 3) == 0) 551 blk_start = min(blk_start, 552 max(map_start, res->start)); 553 else if (res->end > map_end) { 554 reason = "misaligned to iset"; 555 goto err; 556 } else 557 busy += resource_size(res); 558 } else if (res->end >= map_start && res->end <= map_end) { 559 if (strncmp(res->name, "blk", 3) == 0) { 560 /* 561 * If a BLK allocation overlaps the start of 562 * PMEM the entire interleave set may now only 563 * be used for BLK. 564 */ 565 blk_start = map_start; 566 } else 567 busy += resource_size(res); 568 } else if (map_start > res->start && map_start < res->end) { 569 /* total eclipse of the mapping */ 570 busy += nd_mapping->size; 571 blk_start = map_start; 572 } 573 } 574 575 *overlap = map_end + 1 - blk_start; 576 available = blk_start - map_start; 577 if (busy < available) 578 return available - busy; 579 return 0; 580 581 err: 582 nd_dbg_dpa(nd_region, ndd, res, "%s\n", reason); 583 return 0; 584 } 585 586 void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res) 587 { 588 WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev)); 589 kfree(res->name); 590 __release_region(&ndd->dpa, res->start, resource_size(res)); 591 } 592 593 struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd, 594 struct nd_label_id *label_id, resource_size_t start, 595 resource_size_t n) 596 { 597 char *name = kmemdup(label_id, sizeof(*label_id), GFP_KERNEL); 598 struct resource *res; 599 600 if (!name) 601 return NULL; 602 603 WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev)); 604 res = __request_region(&ndd->dpa, start, n, name, 0); 605 if (!res) 606 kfree(name); 607 return res; 608 } 609 610 /** 611 * nvdimm_allocated_dpa - sum up the dpa currently allocated to this label_id 612 * @nvdimm: container of dpa-resource-root + labels 613 * @label_id: dpa resource name of the form {pmem|blk}-<human readable uuid> 614 */ 615 resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd, 616 struct nd_label_id *label_id) 617 { 618 resource_size_t allocated = 0; 619 struct resource *res; 620 621 for_each_dpa_resource(ndd, res) 622 if (strcmp(res->name, label_id->id) == 0) 623 allocated += resource_size(res); 624 625 return allocated; 626 } 627 628 static int count_dimms(struct device *dev, void *c) 629 { 630 int *count = c; 631 632 if (is_nvdimm(dev)) 633 (*count)++; 634 return 0; 635 } 636 637 int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count) 638 { 639 int count = 0; 640 /* Flush any possible dimm registration failures */ 641 nd_synchronize(); 642 643 device_for_each_child(&nvdimm_bus->dev, &count, count_dimms); 644 dev_dbg(&nvdimm_bus->dev, "%s: count: %d\n", __func__, count); 645 if (count != dimm_count) 646 return -ENXIO; 647 return 0; 648 } 649 EXPORT_SYMBOL_GPL(nvdimm_bus_check_dimm_count); 650 651 void __exit nvdimm_devs_exit(void) 652 { 653 ida_destroy(&dimm_ida); 654 } 655