1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright(c) 2020 Intel Corporation. */ 3 4 #include <linux/device.h> 5 #include <linux/slab.h> 6 #include <linux/idr.h> 7 #include <linux/pci.h> 8 #include <cxlmem.h> 9 #include "trace.h" 10 #include "core.h" 11 12 static DECLARE_RWSEM(cxl_memdev_rwsem); 13 14 /* 15 * An entire PCI topology full of devices should be enough for any 16 * config 17 */ 18 #define CXL_MEM_MAX_DEVS 65536 19 20 static int cxl_mem_major; 21 static DEFINE_IDA(cxl_memdev_ida); 22 23 static void cxl_memdev_release(struct device *dev) 24 { 25 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 26 27 ida_free(&cxl_memdev_ida, cxlmd->id); 28 kfree(cxlmd); 29 } 30 31 static char *cxl_memdev_devnode(const struct device *dev, umode_t *mode, kuid_t *uid, 32 kgid_t *gid) 33 { 34 return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev)); 35 } 36 37 static ssize_t firmware_version_show(struct device *dev, 38 struct device_attribute *attr, char *buf) 39 { 40 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 41 struct cxl_dev_state *cxlds = cxlmd->cxlds; 42 struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); 43 44 return sysfs_emit(buf, "%.16s\n", mds->firmware_version); 45 } 46 static DEVICE_ATTR_RO(firmware_version); 47 48 static ssize_t payload_max_show(struct device *dev, 49 struct device_attribute *attr, char *buf) 50 { 51 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 52 struct cxl_dev_state *cxlds = cxlmd->cxlds; 53 struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); 54 55 return sysfs_emit(buf, "%zu\n", mds->payload_size); 56 } 57 static DEVICE_ATTR_RO(payload_max); 58 59 static ssize_t label_storage_size_show(struct device *dev, 60 struct device_attribute *attr, char *buf) 61 { 62 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 63 struct cxl_dev_state *cxlds = cxlmd->cxlds; 64 struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); 65 66 return sysfs_emit(buf, "%zu\n", mds->lsa_size); 67 } 68 static DEVICE_ATTR_RO(label_storage_size); 69 70 static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr, 71 char *buf) 72 { 73 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 74 struct cxl_dev_state *cxlds = cxlmd->cxlds; 75 unsigned long long len = resource_size(&cxlds->ram_res); 76 77 return sysfs_emit(buf, "%#llx\n", len); 78 } 79 80 static struct device_attribute dev_attr_ram_size = 81 __ATTR(size, 0444, ram_size_show, NULL); 82 83 static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr, 84 char *buf) 85 { 86 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 87 struct cxl_dev_state *cxlds = cxlmd->cxlds; 88 unsigned long long len = resource_size(&cxlds->pmem_res); 89 90 return sysfs_emit(buf, "%#llx\n", len); 91 } 92 93 static struct device_attribute dev_attr_pmem_size = 94 __ATTR(size, 0444, pmem_size_show, NULL); 95 96 static ssize_t serial_show(struct device *dev, struct device_attribute *attr, 97 char *buf) 98 { 99 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 100 struct cxl_dev_state *cxlds = cxlmd->cxlds; 101 102 return sysfs_emit(buf, "%#llx\n", cxlds->serial); 103 } 104 static DEVICE_ATTR_RO(serial); 105 106 static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr, 107 char *buf) 108 { 109 return sprintf(buf, "%d\n", dev_to_node(dev)); 110 } 111 static DEVICE_ATTR_RO(numa_node); 112 113 static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd) 114 { 115 struct cxl_dev_state *cxlds = cxlmd->cxlds; 116 u64 offset, length; 117 int rc = 0; 118 119 /* CXL 3.0 Spec 8.2.9.8.4.1 Separate pmem and ram poison requests */ 120 if (resource_size(&cxlds->pmem_res)) { 121 offset = cxlds->pmem_res.start; 122 length = resource_size(&cxlds->pmem_res); 123 rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); 124 if (rc) 125 return rc; 126 } 127 if (resource_size(&cxlds->ram_res)) { 128 offset = cxlds->ram_res.start; 129 length = resource_size(&cxlds->ram_res); 130 rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); 131 /* 132 * Invalid Physical Address is not an error for 133 * volatile addresses. Device support is optional. 134 */ 135 if (rc == -EFAULT) 136 rc = 0; 137 } 138 return rc; 139 } 140 141 int cxl_trigger_poison_list(struct cxl_memdev *cxlmd) 142 { 143 struct cxl_port *port; 144 int rc; 145 146 port = dev_get_drvdata(&cxlmd->dev); 147 if (!port || !is_cxl_endpoint(port)) 148 return -EINVAL; 149 150 rc = down_read_interruptible(&cxl_dpa_rwsem); 151 if (rc) 152 return rc; 153 154 if (port->commit_end == -1) { 155 /* No regions mapped to this memdev */ 156 rc = cxl_get_poison_by_memdev(cxlmd); 157 } else { 158 /* Regions mapped, collect poison by endpoint */ 159 rc = cxl_get_poison_by_endpoint(port); 160 } 161 up_read(&cxl_dpa_rwsem); 162 163 return rc; 164 } 165 EXPORT_SYMBOL_NS_GPL(cxl_trigger_poison_list, CXL); 166 167 struct cxl_dpa_to_region_context { 168 struct cxl_region *cxlr; 169 u64 dpa; 170 }; 171 172 static int __cxl_dpa_to_region(struct device *dev, void *arg) 173 { 174 struct cxl_dpa_to_region_context *ctx = arg; 175 struct cxl_endpoint_decoder *cxled; 176 u64 dpa = ctx->dpa; 177 178 if (!is_endpoint_decoder(dev)) 179 return 0; 180 181 cxled = to_cxl_endpoint_decoder(dev); 182 if (!cxled->dpa_res || !resource_size(cxled->dpa_res)) 183 return 0; 184 185 if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start) 186 return 0; 187 188 dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa, 189 dev_name(&cxled->cxld.region->dev)); 190 191 ctx->cxlr = cxled->cxld.region; 192 193 return 1; 194 } 195 196 static struct cxl_region *cxl_dpa_to_region(struct cxl_memdev *cxlmd, u64 dpa) 197 { 198 struct cxl_dpa_to_region_context ctx; 199 struct cxl_port *port; 200 201 ctx = (struct cxl_dpa_to_region_context) { 202 .dpa = dpa, 203 }; 204 port = dev_get_drvdata(&cxlmd->dev); 205 if (port && is_cxl_endpoint(port) && port->commit_end != -1) 206 device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region); 207 208 return ctx.cxlr; 209 } 210 211 static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa) 212 { 213 struct cxl_dev_state *cxlds = cxlmd->cxlds; 214 215 if (!IS_ENABLED(CONFIG_DEBUG_FS)) 216 return 0; 217 218 if (!resource_size(&cxlds->dpa_res)) { 219 dev_dbg(cxlds->dev, "device has no dpa resource\n"); 220 return -EINVAL; 221 } 222 if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) { 223 dev_dbg(cxlds->dev, "dpa:0x%llx not in resource:%pR\n", 224 dpa, &cxlds->dpa_res); 225 return -EINVAL; 226 } 227 if (!IS_ALIGNED(dpa, 64)) { 228 dev_dbg(cxlds->dev, "dpa:0x%llx is not 64-byte aligned\n", dpa); 229 return -EINVAL; 230 } 231 232 return 0; 233 } 234 235 int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) 236 { 237 struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); 238 struct cxl_mbox_inject_poison inject; 239 struct cxl_poison_record record; 240 struct cxl_mbox_cmd mbox_cmd; 241 struct cxl_region *cxlr; 242 int rc; 243 244 if (!IS_ENABLED(CONFIG_DEBUG_FS)) 245 return 0; 246 247 rc = down_read_interruptible(&cxl_dpa_rwsem); 248 if (rc) 249 return rc; 250 251 rc = cxl_validate_poison_dpa(cxlmd, dpa); 252 if (rc) 253 goto out; 254 255 inject.address = cpu_to_le64(dpa); 256 mbox_cmd = (struct cxl_mbox_cmd) { 257 .opcode = CXL_MBOX_OP_INJECT_POISON, 258 .size_in = sizeof(inject), 259 .payload_in = &inject, 260 }; 261 rc = cxl_internal_send_cmd(mds, &mbox_cmd); 262 if (rc) 263 goto out; 264 265 cxlr = cxl_dpa_to_region(cxlmd, dpa); 266 if (cxlr) 267 dev_warn_once(mds->cxlds.dev, 268 "poison inject dpa:%#llx region: %s\n", dpa, 269 dev_name(&cxlr->dev)); 270 271 record = (struct cxl_poison_record) { 272 .address = cpu_to_le64(dpa), 273 .length = cpu_to_le32(1), 274 }; 275 trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT); 276 out: 277 up_read(&cxl_dpa_rwsem); 278 279 return rc; 280 } 281 EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, CXL); 282 283 int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) 284 { 285 struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); 286 struct cxl_mbox_clear_poison clear; 287 struct cxl_poison_record record; 288 struct cxl_mbox_cmd mbox_cmd; 289 struct cxl_region *cxlr; 290 int rc; 291 292 if (!IS_ENABLED(CONFIG_DEBUG_FS)) 293 return 0; 294 295 rc = down_read_interruptible(&cxl_dpa_rwsem); 296 if (rc) 297 return rc; 298 299 rc = cxl_validate_poison_dpa(cxlmd, dpa); 300 if (rc) 301 goto out; 302 303 /* 304 * In CXL 3.0 Spec 8.2.9.8.4.3, the Clear Poison mailbox command 305 * is defined to accept 64 bytes of write-data, along with the 306 * address to clear. This driver uses zeroes as write-data. 307 */ 308 clear = (struct cxl_mbox_clear_poison) { 309 .address = cpu_to_le64(dpa) 310 }; 311 312 mbox_cmd = (struct cxl_mbox_cmd) { 313 .opcode = CXL_MBOX_OP_CLEAR_POISON, 314 .size_in = sizeof(clear), 315 .payload_in = &clear, 316 }; 317 318 rc = cxl_internal_send_cmd(mds, &mbox_cmd); 319 if (rc) 320 goto out; 321 322 cxlr = cxl_dpa_to_region(cxlmd, dpa); 323 if (cxlr) 324 dev_warn_once(mds->cxlds.dev, 325 "poison clear dpa:%#llx region: %s\n", dpa, 326 dev_name(&cxlr->dev)); 327 328 record = (struct cxl_poison_record) { 329 .address = cpu_to_le64(dpa), 330 .length = cpu_to_le32(1), 331 }; 332 trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR); 333 out: 334 up_read(&cxl_dpa_rwsem); 335 336 return rc; 337 } 338 EXPORT_SYMBOL_NS_GPL(cxl_clear_poison, CXL); 339 340 static struct attribute *cxl_memdev_attributes[] = { 341 &dev_attr_serial.attr, 342 &dev_attr_firmware_version.attr, 343 &dev_attr_payload_max.attr, 344 &dev_attr_label_storage_size.attr, 345 &dev_attr_numa_node.attr, 346 NULL, 347 }; 348 349 static struct attribute *cxl_memdev_pmem_attributes[] = { 350 &dev_attr_pmem_size.attr, 351 NULL, 352 }; 353 354 static struct attribute *cxl_memdev_ram_attributes[] = { 355 &dev_attr_ram_size.attr, 356 NULL, 357 }; 358 359 static umode_t cxl_memdev_visible(struct kobject *kobj, struct attribute *a, 360 int n) 361 { 362 if (!IS_ENABLED(CONFIG_NUMA) && a == &dev_attr_numa_node.attr) 363 return 0; 364 return a->mode; 365 } 366 367 static struct attribute_group cxl_memdev_attribute_group = { 368 .attrs = cxl_memdev_attributes, 369 .is_visible = cxl_memdev_visible, 370 }; 371 372 static struct attribute_group cxl_memdev_ram_attribute_group = { 373 .name = "ram", 374 .attrs = cxl_memdev_ram_attributes, 375 }; 376 377 static struct attribute_group cxl_memdev_pmem_attribute_group = { 378 .name = "pmem", 379 .attrs = cxl_memdev_pmem_attributes, 380 }; 381 382 static const struct attribute_group *cxl_memdev_attribute_groups[] = { 383 &cxl_memdev_attribute_group, 384 &cxl_memdev_ram_attribute_group, 385 &cxl_memdev_pmem_attribute_group, 386 NULL, 387 }; 388 389 static const struct device_type cxl_memdev_type = { 390 .name = "cxl_memdev", 391 .release = cxl_memdev_release, 392 .devnode = cxl_memdev_devnode, 393 .groups = cxl_memdev_attribute_groups, 394 }; 395 396 bool is_cxl_memdev(const struct device *dev) 397 { 398 return dev->type == &cxl_memdev_type; 399 } 400 EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, CXL); 401 402 /** 403 * set_exclusive_cxl_commands() - atomically disable user cxl commands 404 * @mds: The device state to operate on 405 * @cmds: bitmap of commands to mark exclusive 406 * 407 * Grab the cxl_memdev_rwsem in write mode to flush in-flight 408 * invocations of the ioctl path and then disable future execution of 409 * commands with the command ids set in @cmds. 410 */ 411 void set_exclusive_cxl_commands(struct cxl_memdev_state *mds, 412 unsigned long *cmds) 413 { 414 down_write(&cxl_memdev_rwsem); 415 bitmap_or(mds->exclusive_cmds, mds->exclusive_cmds, cmds, 416 CXL_MEM_COMMAND_ID_MAX); 417 up_write(&cxl_memdev_rwsem); 418 } 419 EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, CXL); 420 421 /** 422 * clear_exclusive_cxl_commands() - atomically enable user cxl commands 423 * @mds: The device state to modify 424 * @cmds: bitmap of commands to mark available for userspace 425 */ 426 void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds, 427 unsigned long *cmds) 428 { 429 down_write(&cxl_memdev_rwsem); 430 bitmap_andnot(mds->exclusive_cmds, mds->exclusive_cmds, cmds, 431 CXL_MEM_COMMAND_ID_MAX); 432 up_write(&cxl_memdev_rwsem); 433 } 434 EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, CXL); 435 436 static void cxl_memdev_shutdown(struct device *dev) 437 { 438 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 439 440 down_write(&cxl_memdev_rwsem); 441 cxlmd->cxlds = NULL; 442 up_write(&cxl_memdev_rwsem); 443 } 444 445 static void cxl_memdev_unregister(void *_cxlmd) 446 { 447 struct cxl_memdev *cxlmd = _cxlmd; 448 struct device *dev = &cxlmd->dev; 449 450 cxl_memdev_shutdown(dev); 451 cdev_device_del(&cxlmd->cdev, dev); 452 put_device(dev); 453 } 454 455 static void detach_memdev(struct work_struct *work) 456 { 457 struct cxl_memdev *cxlmd; 458 459 cxlmd = container_of(work, typeof(*cxlmd), detach_work); 460 device_release_driver(&cxlmd->dev); 461 put_device(&cxlmd->dev); 462 } 463 464 static struct lock_class_key cxl_memdev_key; 465 466 static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, 467 const struct file_operations *fops) 468 { 469 struct cxl_memdev *cxlmd; 470 struct device *dev; 471 struct cdev *cdev; 472 int rc; 473 474 cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL); 475 if (!cxlmd) 476 return ERR_PTR(-ENOMEM); 477 478 rc = ida_alloc_max(&cxl_memdev_ida, CXL_MEM_MAX_DEVS - 1, GFP_KERNEL); 479 if (rc < 0) 480 goto err; 481 cxlmd->id = rc; 482 cxlmd->depth = -1; 483 484 dev = &cxlmd->dev; 485 device_initialize(dev); 486 lockdep_set_class(&dev->mutex, &cxl_memdev_key); 487 dev->parent = cxlds->dev; 488 dev->bus = &cxl_bus_type; 489 dev->devt = MKDEV(cxl_mem_major, cxlmd->id); 490 dev->type = &cxl_memdev_type; 491 device_set_pm_not_required(dev); 492 INIT_WORK(&cxlmd->detach_work, detach_memdev); 493 494 cdev = &cxlmd->cdev; 495 cdev_init(cdev, fops); 496 return cxlmd; 497 498 err: 499 kfree(cxlmd); 500 return ERR_PTR(rc); 501 } 502 503 static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd, 504 unsigned long arg) 505 { 506 switch (cmd) { 507 case CXL_MEM_QUERY_COMMANDS: 508 return cxl_query_cmd(cxlmd, (void __user *)arg); 509 case CXL_MEM_SEND_COMMAND: 510 return cxl_send_cmd(cxlmd, (void __user *)arg); 511 default: 512 return -ENOTTY; 513 } 514 } 515 516 static long cxl_memdev_ioctl(struct file *file, unsigned int cmd, 517 unsigned long arg) 518 { 519 struct cxl_memdev *cxlmd = file->private_data; 520 int rc = -ENXIO; 521 522 down_read(&cxl_memdev_rwsem); 523 if (cxlmd->cxlds) 524 rc = __cxl_memdev_ioctl(cxlmd, cmd, arg); 525 up_read(&cxl_memdev_rwsem); 526 527 return rc; 528 } 529 530 static int cxl_memdev_open(struct inode *inode, struct file *file) 531 { 532 struct cxl_memdev *cxlmd = 533 container_of(inode->i_cdev, typeof(*cxlmd), cdev); 534 535 get_device(&cxlmd->dev); 536 file->private_data = cxlmd; 537 538 return 0; 539 } 540 541 static int cxl_memdev_release_file(struct inode *inode, struct file *file) 542 { 543 struct cxl_memdev *cxlmd = 544 container_of(inode->i_cdev, typeof(*cxlmd), cdev); 545 546 put_device(&cxlmd->dev); 547 548 return 0; 549 } 550 551 static const struct file_operations cxl_memdev_fops = { 552 .owner = THIS_MODULE, 553 .unlocked_ioctl = cxl_memdev_ioctl, 554 .open = cxl_memdev_open, 555 .release = cxl_memdev_release_file, 556 .compat_ioctl = compat_ptr_ioctl, 557 .llseek = noop_llseek, 558 }; 559 560 struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) 561 { 562 struct cxl_memdev *cxlmd; 563 struct device *dev; 564 struct cdev *cdev; 565 int rc; 566 567 cxlmd = cxl_memdev_alloc(cxlds, &cxl_memdev_fops); 568 if (IS_ERR(cxlmd)) 569 return cxlmd; 570 571 dev = &cxlmd->dev; 572 rc = dev_set_name(dev, "mem%d", cxlmd->id); 573 if (rc) 574 goto err; 575 576 /* 577 * Activate ioctl operations, no cxl_memdev_rwsem manipulation 578 * needed as this is ordered with cdev_add() publishing the device. 579 */ 580 cxlmd->cxlds = cxlds; 581 cxlds->cxlmd = cxlmd; 582 583 cdev = &cxlmd->cdev; 584 rc = cdev_device_add(cdev, dev); 585 if (rc) 586 goto err; 587 588 rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd); 589 if (rc) 590 return ERR_PTR(rc); 591 return cxlmd; 592 593 err: 594 /* 595 * The cdev was briefly live, shutdown any ioctl operations that 596 * saw that state. 597 */ 598 cxl_memdev_shutdown(dev); 599 put_device(dev); 600 return ERR_PTR(rc); 601 } 602 EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL); 603 604 __init int cxl_memdev_init(void) 605 { 606 dev_t devt; 607 int rc; 608 609 rc = alloc_chrdev_region(&devt, 0, CXL_MEM_MAX_DEVS, "cxl"); 610 if (rc) 611 return rc; 612 613 cxl_mem_major = MAJOR(devt); 614 615 return 0; 616 } 617 618 void cxl_memdev_exit(void) 619 { 620 unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS); 621 } 622