1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright(c) 2020 Intel Corporation. */ 3 4 #include <linux/device.h> 5 #include <linux/slab.h> 6 #include <linux/idr.h> 7 #include <linux/pci.h> 8 #include <cxlmem.h> 9 #include "trace.h" 10 #include "core.h" 11 12 static DECLARE_RWSEM(cxl_memdev_rwsem); 13 14 /* 15 * An entire PCI topology full of devices should be enough for any 16 * config 17 */ 18 #define CXL_MEM_MAX_DEVS 65536 19 20 static int cxl_mem_major; 21 static DEFINE_IDA(cxl_memdev_ida); 22 23 static void cxl_memdev_release(struct device *dev) 24 { 25 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 26 27 ida_free(&cxl_memdev_ida, cxlmd->id); 28 kfree(cxlmd); 29 } 30 31 static char *cxl_memdev_devnode(const struct device *dev, umode_t *mode, kuid_t *uid, 32 kgid_t *gid) 33 { 34 return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev)); 35 } 36 37 static ssize_t firmware_version_show(struct device *dev, 38 struct device_attribute *attr, char *buf) 39 { 40 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 41 struct cxl_dev_state *cxlds = cxlmd->cxlds; 42 43 return sysfs_emit(buf, "%.16s\n", cxlds->firmware_version); 44 } 45 static DEVICE_ATTR_RO(firmware_version); 46 47 static ssize_t payload_max_show(struct device *dev, 48 struct device_attribute *attr, char *buf) 49 { 50 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 51 struct cxl_dev_state *cxlds = cxlmd->cxlds; 52 53 return sysfs_emit(buf, "%zu\n", cxlds->payload_size); 54 } 55 static DEVICE_ATTR_RO(payload_max); 56 57 static ssize_t label_storage_size_show(struct device *dev, 58 struct device_attribute *attr, char *buf) 59 { 60 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 61 struct cxl_dev_state *cxlds = cxlmd->cxlds; 62 63 return sysfs_emit(buf, "%zu\n", cxlds->lsa_size); 64 } 65 static DEVICE_ATTR_RO(label_storage_size); 66 67 static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr, 68 char *buf) 69 { 70 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 71 struct cxl_dev_state *cxlds = cxlmd->cxlds; 72 unsigned long long len = resource_size(&cxlds->ram_res); 73 74 return sysfs_emit(buf, "%#llx\n", len); 75 } 76 77 static struct device_attribute dev_attr_ram_size = 78 __ATTR(size, 0444, ram_size_show, NULL); 79 80 static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr, 81 char *buf) 82 { 83 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 84 struct cxl_dev_state *cxlds = cxlmd->cxlds; 85 unsigned long long len = resource_size(&cxlds->pmem_res); 86 87 return sysfs_emit(buf, "%#llx\n", len); 88 } 89 90 static struct device_attribute dev_attr_pmem_size = 91 __ATTR(size, 0444, pmem_size_show, NULL); 92 93 static ssize_t serial_show(struct device *dev, struct device_attribute *attr, 94 char *buf) 95 { 96 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 97 struct cxl_dev_state *cxlds = cxlmd->cxlds; 98 99 return sysfs_emit(buf, "%#llx\n", cxlds->serial); 100 } 101 static DEVICE_ATTR_RO(serial); 102 103 static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr, 104 char *buf) 105 { 106 return sprintf(buf, "%d\n", dev_to_node(dev)); 107 } 108 static DEVICE_ATTR_RO(numa_node); 109 110 static ssize_t security_state_show(struct device *dev, 111 struct device_attribute *attr, 112 char *buf) 113 { 114 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 115 struct cxl_dev_state *cxlds = cxlmd->cxlds; 116 unsigned long state = cxlds->security.state; 117 118 if (!(state & CXL_PMEM_SEC_STATE_USER_PASS_SET)) 119 return sysfs_emit(buf, "disabled\n"); 120 if (state & CXL_PMEM_SEC_STATE_FROZEN || 121 state & CXL_PMEM_SEC_STATE_MASTER_PLIMIT || 122 state & CXL_PMEM_SEC_STATE_USER_PLIMIT) 123 return sysfs_emit(buf, "frozen\n"); 124 if (state & CXL_PMEM_SEC_STATE_LOCKED) 125 return sysfs_emit(buf, "locked\n"); 126 else 127 return sysfs_emit(buf, "unlocked\n"); 128 } 129 static struct device_attribute dev_attr_security_state = 130 __ATTR(state, 0444, security_state_show, NULL); 131 132 static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd) 133 { 134 struct cxl_dev_state *cxlds = cxlmd->cxlds; 135 u64 offset, length; 136 int rc = 0; 137 138 /* CXL 3.0 Spec 8.2.9.8.4.1 Separate pmem and ram poison requests */ 139 if (resource_size(&cxlds->pmem_res)) { 140 offset = cxlds->pmem_res.start; 141 length = resource_size(&cxlds->pmem_res); 142 rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); 143 if (rc) 144 return rc; 145 } 146 if (resource_size(&cxlds->ram_res)) { 147 offset = cxlds->ram_res.start; 148 length = resource_size(&cxlds->ram_res); 149 rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); 150 /* 151 * Invalid Physical Address is not an error for 152 * volatile addresses. Device support is optional. 153 */ 154 if (rc == -EFAULT) 155 rc = 0; 156 } 157 return rc; 158 } 159 160 int cxl_trigger_poison_list(struct cxl_memdev *cxlmd) 161 { 162 struct cxl_port *port; 163 int rc; 164 165 port = dev_get_drvdata(&cxlmd->dev); 166 if (!port || !is_cxl_endpoint(port)) 167 return -EINVAL; 168 169 rc = down_read_interruptible(&cxl_dpa_rwsem); 170 if (rc) 171 return rc; 172 173 if (port->commit_end == -1) { 174 /* No regions mapped to this memdev */ 175 rc = cxl_get_poison_by_memdev(cxlmd); 176 } else { 177 /* Regions mapped, collect poison by endpoint */ 178 rc = cxl_get_poison_by_endpoint(port); 179 } 180 up_read(&cxl_dpa_rwsem); 181 182 return rc; 183 } 184 EXPORT_SYMBOL_NS_GPL(cxl_trigger_poison_list, CXL); 185 186 struct cxl_dpa_to_region_context { 187 struct cxl_region *cxlr; 188 u64 dpa; 189 }; 190 191 static int __cxl_dpa_to_region(struct device *dev, void *arg) 192 { 193 struct cxl_dpa_to_region_context *ctx = arg; 194 struct cxl_endpoint_decoder *cxled; 195 u64 dpa = ctx->dpa; 196 197 if (!is_endpoint_decoder(dev)) 198 return 0; 199 200 cxled = to_cxl_endpoint_decoder(dev); 201 if (!cxled->dpa_res || !resource_size(cxled->dpa_res)) 202 return 0; 203 204 if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start) 205 return 0; 206 207 dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa, 208 dev_name(&cxled->cxld.region->dev)); 209 210 ctx->cxlr = cxled->cxld.region; 211 212 return 1; 213 } 214 215 static struct cxl_region *cxl_dpa_to_region(struct cxl_memdev *cxlmd, u64 dpa) 216 { 217 struct cxl_dpa_to_region_context ctx; 218 struct cxl_port *port; 219 220 ctx = (struct cxl_dpa_to_region_context) { 221 .dpa = dpa, 222 }; 223 port = dev_get_drvdata(&cxlmd->dev); 224 if (port && is_cxl_endpoint(port) && port->commit_end != -1) 225 device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region); 226 227 return ctx.cxlr; 228 } 229 230 static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa) 231 { 232 struct cxl_dev_state *cxlds = cxlmd->cxlds; 233 234 if (!IS_ENABLED(CONFIG_DEBUG_FS)) 235 return 0; 236 237 if (!resource_size(&cxlds->dpa_res)) { 238 dev_dbg(cxlds->dev, "device has no dpa resource\n"); 239 return -EINVAL; 240 } 241 if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) { 242 dev_dbg(cxlds->dev, "dpa:0x%llx not in resource:%pR\n", 243 dpa, &cxlds->dpa_res); 244 return -EINVAL; 245 } 246 if (!IS_ALIGNED(dpa, 64)) { 247 dev_dbg(cxlds->dev, "dpa:0x%llx is not 64-byte aligned\n", dpa); 248 return -EINVAL; 249 } 250 251 return 0; 252 } 253 254 int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) 255 { 256 struct cxl_dev_state *cxlds = cxlmd->cxlds; 257 struct cxl_mbox_inject_poison inject; 258 struct cxl_poison_record record; 259 struct cxl_mbox_cmd mbox_cmd; 260 struct cxl_region *cxlr; 261 int rc; 262 263 if (!IS_ENABLED(CONFIG_DEBUG_FS)) 264 return 0; 265 266 rc = down_read_interruptible(&cxl_dpa_rwsem); 267 if (rc) 268 return rc; 269 270 rc = cxl_validate_poison_dpa(cxlmd, dpa); 271 if (rc) 272 goto out; 273 274 inject.address = cpu_to_le64(dpa); 275 mbox_cmd = (struct cxl_mbox_cmd) { 276 .opcode = CXL_MBOX_OP_INJECT_POISON, 277 .size_in = sizeof(inject), 278 .payload_in = &inject, 279 }; 280 rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); 281 if (rc) 282 goto out; 283 284 cxlr = cxl_dpa_to_region(cxlmd, dpa); 285 if (cxlr) 286 dev_warn_once(cxlds->dev, 287 "poison inject dpa:%#llx region: %s\n", dpa, 288 dev_name(&cxlr->dev)); 289 290 record = (struct cxl_poison_record) { 291 .address = cpu_to_le64(dpa), 292 .length = cpu_to_le32(1), 293 }; 294 trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT); 295 out: 296 up_read(&cxl_dpa_rwsem); 297 298 return rc; 299 } 300 EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, CXL); 301 302 int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) 303 { 304 struct cxl_dev_state *cxlds = cxlmd->cxlds; 305 struct cxl_mbox_clear_poison clear; 306 struct cxl_poison_record record; 307 struct cxl_mbox_cmd mbox_cmd; 308 struct cxl_region *cxlr; 309 int rc; 310 311 if (!IS_ENABLED(CONFIG_DEBUG_FS)) 312 return 0; 313 314 rc = down_read_interruptible(&cxl_dpa_rwsem); 315 if (rc) 316 return rc; 317 318 rc = cxl_validate_poison_dpa(cxlmd, dpa); 319 if (rc) 320 goto out; 321 322 /* 323 * In CXL 3.0 Spec 8.2.9.8.4.3, the Clear Poison mailbox command 324 * is defined to accept 64 bytes of write-data, along with the 325 * address to clear. This driver uses zeroes as write-data. 326 */ 327 clear = (struct cxl_mbox_clear_poison) { 328 .address = cpu_to_le64(dpa) 329 }; 330 331 mbox_cmd = (struct cxl_mbox_cmd) { 332 .opcode = CXL_MBOX_OP_CLEAR_POISON, 333 .size_in = sizeof(clear), 334 .payload_in = &clear, 335 }; 336 337 rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); 338 if (rc) 339 goto out; 340 341 cxlr = cxl_dpa_to_region(cxlmd, dpa); 342 if (cxlr) 343 dev_warn_once(cxlds->dev, "poison clear dpa:%#llx region: %s\n", 344 dpa, dev_name(&cxlr->dev)); 345 346 record = (struct cxl_poison_record) { 347 .address = cpu_to_le64(dpa), 348 .length = cpu_to_le32(1), 349 }; 350 trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR); 351 out: 352 up_read(&cxl_dpa_rwsem); 353 354 return rc; 355 } 356 EXPORT_SYMBOL_NS_GPL(cxl_clear_poison, CXL); 357 358 static struct attribute *cxl_memdev_attributes[] = { 359 &dev_attr_serial.attr, 360 &dev_attr_firmware_version.attr, 361 &dev_attr_payload_max.attr, 362 &dev_attr_label_storage_size.attr, 363 &dev_attr_numa_node.attr, 364 NULL, 365 }; 366 367 static struct attribute *cxl_memdev_pmem_attributes[] = { 368 &dev_attr_pmem_size.attr, 369 NULL, 370 }; 371 372 static struct attribute *cxl_memdev_ram_attributes[] = { 373 &dev_attr_ram_size.attr, 374 NULL, 375 }; 376 377 static struct attribute *cxl_memdev_security_attributes[] = { 378 &dev_attr_security_state.attr, 379 NULL, 380 }; 381 382 static umode_t cxl_memdev_visible(struct kobject *kobj, struct attribute *a, 383 int n) 384 { 385 if (!IS_ENABLED(CONFIG_NUMA) && a == &dev_attr_numa_node.attr) 386 return 0; 387 return a->mode; 388 } 389 390 static struct attribute_group cxl_memdev_attribute_group = { 391 .attrs = cxl_memdev_attributes, 392 .is_visible = cxl_memdev_visible, 393 }; 394 395 static struct attribute_group cxl_memdev_ram_attribute_group = { 396 .name = "ram", 397 .attrs = cxl_memdev_ram_attributes, 398 }; 399 400 static struct attribute_group cxl_memdev_pmem_attribute_group = { 401 .name = "pmem", 402 .attrs = cxl_memdev_pmem_attributes, 403 }; 404 405 static struct attribute_group cxl_memdev_security_attribute_group = { 406 .name = "security", 407 .attrs = cxl_memdev_security_attributes, 408 }; 409 410 static const struct attribute_group *cxl_memdev_attribute_groups[] = { 411 &cxl_memdev_attribute_group, 412 &cxl_memdev_ram_attribute_group, 413 &cxl_memdev_pmem_attribute_group, 414 &cxl_memdev_security_attribute_group, 415 NULL, 416 }; 417 418 static const struct device_type cxl_memdev_type = { 419 .name = "cxl_memdev", 420 .release = cxl_memdev_release, 421 .devnode = cxl_memdev_devnode, 422 .groups = cxl_memdev_attribute_groups, 423 }; 424 425 bool is_cxl_memdev(const struct device *dev) 426 { 427 return dev->type == &cxl_memdev_type; 428 } 429 EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, CXL); 430 431 /** 432 * set_exclusive_cxl_commands() - atomically disable user cxl commands 433 * @cxlds: The device state to operate on 434 * @cmds: bitmap of commands to mark exclusive 435 * 436 * Grab the cxl_memdev_rwsem in write mode to flush in-flight 437 * invocations of the ioctl path and then disable future execution of 438 * commands with the command ids set in @cmds. 439 */ 440 void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds) 441 { 442 down_write(&cxl_memdev_rwsem); 443 bitmap_or(cxlds->exclusive_cmds, cxlds->exclusive_cmds, cmds, 444 CXL_MEM_COMMAND_ID_MAX); 445 up_write(&cxl_memdev_rwsem); 446 } 447 EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, CXL); 448 449 /** 450 * clear_exclusive_cxl_commands() - atomically enable user cxl commands 451 * @cxlds: The device state to modify 452 * @cmds: bitmap of commands to mark available for userspace 453 */ 454 void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds) 455 { 456 down_write(&cxl_memdev_rwsem); 457 bitmap_andnot(cxlds->exclusive_cmds, cxlds->exclusive_cmds, cmds, 458 CXL_MEM_COMMAND_ID_MAX); 459 up_write(&cxl_memdev_rwsem); 460 } 461 EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, CXL); 462 463 static void cxl_memdev_security_shutdown(struct device *dev) 464 { 465 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 466 struct cxl_dev_state *cxlds = cxlmd->cxlds; 467 468 if (cxlds->security.poll) 469 cancel_delayed_work_sync(&cxlds->security.poll_dwork); 470 } 471 472 static void cxl_memdev_shutdown(struct device *dev) 473 { 474 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 475 476 down_write(&cxl_memdev_rwsem); 477 cxl_memdev_security_shutdown(dev); 478 cxlmd->cxlds = NULL; 479 up_write(&cxl_memdev_rwsem); 480 } 481 482 static void cxl_memdev_unregister(void *_cxlmd) 483 { 484 struct cxl_memdev *cxlmd = _cxlmd; 485 struct device *dev = &cxlmd->dev; 486 487 cxl_memdev_shutdown(dev); 488 cdev_device_del(&cxlmd->cdev, dev); 489 put_device(dev); 490 } 491 492 static void detach_memdev(struct work_struct *work) 493 { 494 struct cxl_memdev *cxlmd; 495 496 cxlmd = container_of(work, typeof(*cxlmd), detach_work); 497 device_release_driver(&cxlmd->dev); 498 put_device(&cxlmd->dev); 499 } 500 501 static struct lock_class_key cxl_memdev_key; 502 503 static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, 504 const struct file_operations *fops) 505 { 506 struct cxl_memdev *cxlmd; 507 struct device *dev; 508 struct cdev *cdev; 509 int rc; 510 511 cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL); 512 if (!cxlmd) 513 return ERR_PTR(-ENOMEM); 514 515 rc = ida_alloc_max(&cxl_memdev_ida, CXL_MEM_MAX_DEVS - 1, GFP_KERNEL); 516 if (rc < 0) 517 goto err; 518 cxlmd->id = rc; 519 cxlmd->depth = -1; 520 521 dev = &cxlmd->dev; 522 device_initialize(dev); 523 lockdep_set_class(&dev->mutex, &cxl_memdev_key); 524 dev->parent = cxlds->dev; 525 dev->bus = &cxl_bus_type; 526 dev->devt = MKDEV(cxl_mem_major, cxlmd->id); 527 dev->type = &cxl_memdev_type; 528 device_set_pm_not_required(dev); 529 INIT_WORK(&cxlmd->detach_work, detach_memdev); 530 531 cdev = &cxlmd->cdev; 532 cdev_init(cdev, fops); 533 return cxlmd; 534 535 err: 536 kfree(cxlmd); 537 return ERR_PTR(rc); 538 } 539 540 static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd, 541 unsigned long arg) 542 { 543 switch (cmd) { 544 case CXL_MEM_QUERY_COMMANDS: 545 return cxl_query_cmd(cxlmd, (void __user *)arg); 546 case CXL_MEM_SEND_COMMAND: 547 return cxl_send_cmd(cxlmd, (void __user *)arg); 548 default: 549 return -ENOTTY; 550 } 551 } 552 553 static long cxl_memdev_ioctl(struct file *file, unsigned int cmd, 554 unsigned long arg) 555 { 556 struct cxl_memdev *cxlmd = file->private_data; 557 int rc = -ENXIO; 558 559 down_read(&cxl_memdev_rwsem); 560 if (cxlmd->cxlds) 561 rc = __cxl_memdev_ioctl(cxlmd, cmd, arg); 562 up_read(&cxl_memdev_rwsem); 563 564 return rc; 565 } 566 567 static int cxl_memdev_open(struct inode *inode, struct file *file) 568 { 569 struct cxl_memdev *cxlmd = 570 container_of(inode->i_cdev, typeof(*cxlmd), cdev); 571 572 get_device(&cxlmd->dev); 573 file->private_data = cxlmd; 574 575 return 0; 576 } 577 578 static int cxl_memdev_release_file(struct inode *inode, struct file *file) 579 { 580 struct cxl_memdev *cxlmd = 581 container_of(inode->i_cdev, typeof(*cxlmd), cdev); 582 583 put_device(&cxlmd->dev); 584 585 return 0; 586 } 587 588 static const struct file_operations cxl_memdev_fops = { 589 .owner = THIS_MODULE, 590 .unlocked_ioctl = cxl_memdev_ioctl, 591 .open = cxl_memdev_open, 592 .release = cxl_memdev_release_file, 593 .compat_ioctl = compat_ptr_ioctl, 594 .llseek = noop_llseek, 595 }; 596 597 struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) 598 { 599 struct cxl_memdev *cxlmd; 600 struct device *dev; 601 struct cdev *cdev; 602 int rc; 603 604 cxlmd = cxl_memdev_alloc(cxlds, &cxl_memdev_fops); 605 if (IS_ERR(cxlmd)) 606 return cxlmd; 607 608 dev = &cxlmd->dev; 609 rc = dev_set_name(dev, "mem%d", cxlmd->id); 610 if (rc) 611 goto err; 612 613 /* 614 * Activate ioctl operations, no cxl_memdev_rwsem manipulation 615 * needed as this is ordered with cdev_add() publishing the device. 616 */ 617 cxlmd->cxlds = cxlds; 618 cxlds->cxlmd = cxlmd; 619 620 cdev = &cxlmd->cdev; 621 rc = cdev_device_add(cdev, dev); 622 if (rc) 623 goto err; 624 625 rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd); 626 if (rc) 627 return ERR_PTR(rc); 628 return cxlmd; 629 630 err: 631 /* 632 * The cdev was briefly live, shutdown any ioctl operations that 633 * saw that state. 634 */ 635 cxl_memdev_shutdown(dev); 636 put_device(dev); 637 return ERR_PTR(rc); 638 } 639 EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL); 640 641 __init int cxl_memdev_init(void) 642 { 643 dev_t devt; 644 int rc; 645 646 rc = alloc_chrdev_region(&devt, 0, CXL_MEM_MAX_DEVS, "cxl"); 647 if (rc) 648 return rc; 649 650 cxl_mem_major = MAJOR(devt); 651 652 return 0; 653 } 654 655 void cxl_memdev_exit(void) 656 { 657 unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS); 658 } 659