1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ 3 #include <linux/io-64-nonatomic-lo-hi.h> 4 #include <linux/moduleparam.h> 5 #include <linux/module.h> 6 #include <linux/delay.h> 7 #include <linux/sizes.h> 8 #include <linux/mutex.h> 9 #include <linux/list.h> 10 #include <linux/pci.h> 11 #include <linux/aer.h> 12 #include <linux/io.h> 13 #include "cxlmem.h" 14 #include "cxlpci.h" 15 #include "cxl.h" 16 17 /** 18 * DOC: cxl pci 19 * 20 * This implements the PCI exclusive functionality for a CXL device as it is 21 * defined by the Compute Express Link specification. CXL devices may surface 22 * certain functionality even if it isn't CXL enabled. While this driver is 23 * focused around the PCI specific aspects of a CXL device, it binds to the 24 * specific CXL memory device class code, and therefore the implementation of 25 * cxl_pci is focused around CXL memory devices. 26 * 27 * The driver has several responsibilities, mainly: 28 * - Create the memX device and register on the CXL bus. 29 * - Enumerate device's register interface and map them. 30 * - Registers nvdimm bridge device with cxl_core. 31 * - Registers a CXL mailbox with cxl_core. 32 */ 33 34 #define cxl_doorbell_busy(cxlds) \ 35 (readl((cxlds)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) & \ 36 CXLDEV_MBOX_CTRL_DOORBELL) 37 38 /* CXL 2.0 - 8.2.8.4 */ 39 #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ) 40 41 /* 42 * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to 43 * dictate how long to wait for the mailbox to become ready. The new 44 * field allows the device to tell software the amount of time to wait 45 * before mailbox ready. This field per the spec theoretically allows 46 * for up to 255 seconds. 255 seconds is unreasonably long, its longer 47 * than the maximum SATA port link recovery wait. Default to 60 seconds 48 * until someone builds a CXL device that needs more time in practice. 49 */ 50 static unsigned short mbox_ready_timeout = 60; 51 module_param(mbox_ready_timeout, ushort, 0644); 52 MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready"); 53 54 static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) 55 { 56 const unsigned long start = jiffies; 57 unsigned long end = start; 58 59 while (cxl_doorbell_busy(cxlds)) { 60 end = jiffies; 61 62 if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) { 63 /* Check again in case preempted before timeout test */ 64 if (!cxl_doorbell_busy(cxlds)) 65 break; 66 return -ETIMEDOUT; 67 } 68 cpu_relax(); 69 } 70 71 dev_dbg(cxlds->dev, "Doorbell wait took %dms", 72 jiffies_to_msecs(end) - jiffies_to_msecs(start)); 73 return 0; 74 } 75 76 #define cxl_err(dev, status, msg) \ 77 dev_err_ratelimited(dev, msg ", device state %s%s\n", \ 78 status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ 79 status & CXLMDEV_FW_HALT ? " firmware-halt" : "") 80 81 #define cxl_cmd_err(dev, cmd, status, msg) \ 82 dev_err_ratelimited(dev, msg " (opcode: %#x), device state %s%s\n", \ 83 (cmd)->opcode, \ 84 status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ 85 status & CXLMDEV_FW_HALT ? " firmware-halt" : "") 86 87 struct cxl_dev_id { 88 struct cxl_dev_state *cxlds; 89 }; 90 91 static int cxl_request_irq(struct cxl_dev_state *cxlds, int irq, 92 irq_handler_t handler, irq_handler_t thread_fn) 93 { 94 struct device *dev = cxlds->dev; 95 struct cxl_dev_id *dev_id; 96 97 /* dev_id must be globally unique and must contain the cxlds */ 98 dev_id = devm_kzalloc(dev, sizeof(*dev_id), GFP_KERNEL); 99 if (!dev_id) 100 return -ENOMEM; 101 dev_id->cxlds = cxlds; 102 103 return devm_request_threaded_irq(dev, irq, handler, thread_fn, 104 IRQF_SHARED | IRQF_ONESHOT, 105 NULL, dev_id); 106 } 107 108 static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds) 109 { 110 u64 reg; 111 112 reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); 113 return FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, reg) == 100; 114 } 115 116 static irqreturn_t cxl_pci_mbox_irq(int irq, void *id) 117 { 118 u64 reg; 119 u16 opcode; 120 struct cxl_dev_id *dev_id = id; 121 struct cxl_dev_state *cxlds = dev_id->cxlds; 122 struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); 123 124 if (!cxl_mbox_background_complete(cxlds)) 125 return IRQ_NONE; 126 127 reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); 128 opcode = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg); 129 if (opcode == CXL_MBOX_OP_SANITIZE) { 130 if (mds->security.sanitize_node) 131 sysfs_notify_dirent(mds->security.sanitize_node); 132 133 dev_dbg(cxlds->dev, "Sanitization operation ended\n"); 134 } else { 135 /* short-circuit the wait in __cxl_pci_mbox_send_cmd() */ 136 rcuwait_wake_up(&mds->mbox_wait); 137 } 138 139 return IRQ_HANDLED; 140 } 141 142 /* 143 * Sanitization operation polling mode. 144 */ 145 static void cxl_mbox_sanitize_work(struct work_struct *work) 146 { 147 struct cxl_memdev_state *mds = 148 container_of(work, typeof(*mds), security.poll_dwork.work); 149 struct cxl_dev_state *cxlds = &mds->cxlds; 150 151 mutex_lock(&mds->mbox_mutex); 152 if (cxl_mbox_background_complete(cxlds)) { 153 mds->security.poll_tmo_secs = 0; 154 put_device(cxlds->dev); 155 156 if (mds->security.sanitize_node) 157 sysfs_notify_dirent(mds->security.sanitize_node); 158 159 dev_dbg(cxlds->dev, "Sanitization operation ended\n"); 160 } else { 161 int timeout = mds->security.poll_tmo_secs + 10; 162 163 mds->security.poll_tmo_secs = min(15 * 60, timeout); 164 queue_delayed_work(system_wq, &mds->security.poll_dwork, 165 timeout * HZ); 166 } 167 mutex_unlock(&mds->mbox_mutex); 168 } 169 170 /** 171 * __cxl_pci_mbox_send_cmd() - Execute a mailbox command 172 * @mds: The memory device driver data 173 * @mbox_cmd: Command to send to the memory device. 174 * 175 * Context: Any context. Expects mbox_mutex to be held. 176 * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success. 177 * Caller should check the return code in @mbox_cmd to make sure it 178 * succeeded. 179 * 180 * This is a generic form of the CXL mailbox send command thus only using the 181 * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory 182 * devices, and perhaps other types of CXL devices may have further information 183 * available upon error conditions. Driver facilities wishing to send mailbox 184 * commands should use the wrapper command. 185 * 186 * The CXL spec allows for up to two mailboxes. The intention is for the primary 187 * mailbox to be OS controlled and the secondary mailbox to be used by system 188 * firmware. This allows the OS and firmware to communicate with the device and 189 * not need to coordinate with each other. The driver only uses the primary 190 * mailbox. 191 */ 192 static int __cxl_pci_mbox_send_cmd(struct cxl_memdev_state *mds, 193 struct cxl_mbox_cmd *mbox_cmd) 194 { 195 struct cxl_dev_state *cxlds = &mds->cxlds; 196 void __iomem *payload = cxlds->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET; 197 struct device *dev = cxlds->dev; 198 u64 cmd_reg, status_reg; 199 size_t out_len; 200 int rc; 201 202 lockdep_assert_held(&mds->mbox_mutex); 203 204 /* 205 * Here are the steps from 8.2.8.4 of the CXL 2.0 spec. 206 * 1. Caller reads MB Control Register to verify doorbell is clear 207 * 2. Caller writes Command Register 208 * 3. Caller writes Command Payload Registers if input payload is non-empty 209 * 4. Caller writes MB Control Register to set doorbell 210 * 5. Caller either polls for doorbell to be clear or waits for interrupt if configured 211 * 6. Caller reads MB Status Register to fetch Return code 212 * 7. If command successful, Caller reads Command Register to get Payload Length 213 * 8. If output payload is non-empty, host reads Command Payload Registers 214 * 215 * Hardware is free to do whatever it wants before the doorbell is rung, 216 * and isn't allowed to change anything after it clears the doorbell. As 217 * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can 218 * also happen in any order (though some orders might not make sense). 219 */ 220 221 /* #1 */ 222 if (cxl_doorbell_busy(cxlds)) { 223 u64 md_status = 224 readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); 225 226 cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, 227 "mailbox queue busy"); 228 return -EBUSY; 229 } 230 231 /* 232 * With sanitize polling, hardware might be done and the poller still 233 * not be in sync. Ensure no new command comes in until so. Keep the 234 * hardware semantics and only allow device health status. 235 */ 236 if (mds->security.poll_tmo_secs > 0) { 237 if (mbox_cmd->opcode != CXL_MBOX_OP_GET_HEALTH_INFO) 238 return -EBUSY; 239 } 240 241 cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK, 242 mbox_cmd->opcode); 243 if (mbox_cmd->size_in) { 244 if (WARN_ON(!mbox_cmd->payload_in)) 245 return -EINVAL; 246 247 cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, 248 mbox_cmd->size_in); 249 memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in); 250 } 251 252 /* #2, #3 */ 253 writeq(cmd_reg, cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); 254 255 /* #4 */ 256 dev_dbg(dev, "Sending command: 0x%04x\n", mbox_cmd->opcode); 257 writel(CXLDEV_MBOX_CTRL_DOORBELL, 258 cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); 259 260 /* #5 */ 261 rc = cxl_pci_mbox_wait_for_doorbell(cxlds); 262 if (rc == -ETIMEDOUT) { 263 u64 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); 264 265 cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, "mailbox timeout"); 266 return rc; 267 } 268 269 /* #6 */ 270 status_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET); 271 mbox_cmd->return_code = 272 FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg); 273 274 /* 275 * Handle the background command in a synchronous manner. 276 * 277 * All other mailbox commands will serialize/queue on the mbox_mutex, 278 * which we currently hold. Furthermore this also guarantees that 279 * cxl_mbox_background_complete() checks are safe amongst each other, 280 * in that no new bg operation can occur in between. 281 * 282 * Background operations are timesliced in accordance with the nature 283 * of the command. In the event of timeout, the mailbox state is 284 * indeterminate until the next successful command submission and the 285 * driver can get back in sync with the hardware state. 286 */ 287 if (mbox_cmd->return_code == CXL_MBOX_CMD_RC_BACKGROUND) { 288 u64 bg_status_reg; 289 int i, timeout; 290 291 /* 292 * Sanitization is a special case which monopolizes the device 293 * and cannot be timesliced. Handle asynchronously instead, 294 * and allow userspace to poll(2) for completion. 295 */ 296 if (mbox_cmd->opcode == CXL_MBOX_OP_SANITIZE) { 297 if (mds->security.poll_tmo_secs != -1) { 298 /* hold the device throughout */ 299 get_device(cxlds->dev); 300 301 /* give first timeout a second */ 302 timeout = 1; 303 mds->security.poll_tmo_secs = timeout; 304 queue_delayed_work(system_wq, 305 &mds->security.poll_dwork, 306 timeout * HZ); 307 } 308 309 dev_dbg(dev, "Sanitization operation started\n"); 310 goto success; 311 } 312 313 dev_dbg(dev, "Mailbox background operation (0x%04x) started\n", 314 mbox_cmd->opcode); 315 316 timeout = mbox_cmd->poll_interval_ms; 317 for (i = 0; i < mbox_cmd->poll_count; i++) { 318 if (rcuwait_wait_event_timeout(&mds->mbox_wait, 319 cxl_mbox_background_complete(cxlds), 320 TASK_UNINTERRUPTIBLE, 321 msecs_to_jiffies(timeout)) > 0) 322 break; 323 } 324 325 if (!cxl_mbox_background_complete(cxlds)) { 326 dev_err(dev, "timeout waiting for background (%d ms)\n", 327 timeout * mbox_cmd->poll_count); 328 return -ETIMEDOUT; 329 } 330 331 bg_status_reg = readq(cxlds->regs.mbox + 332 CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); 333 mbox_cmd->return_code = 334 FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK, 335 bg_status_reg); 336 dev_dbg(dev, 337 "Mailbox background operation (0x%04x) completed\n", 338 mbox_cmd->opcode); 339 } 340 341 if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) { 342 dev_dbg(dev, "Mailbox operation had an error: %s\n", 343 cxl_mbox_cmd_rc2str(mbox_cmd)); 344 return 0; /* completed but caller must check return_code */ 345 } 346 347 success: 348 /* #7 */ 349 cmd_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); 350 out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg); 351 352 /* #8 */ 353 if (out_len && mbox_cmd->payload_out) { 354 /* 355 * Sanitize the copy. If hardware misbehaves, out_len per the 356 * spec can actually be greater than the max allowed size (21 357 * bits available but spec defined 1M max). The caller also may 358 * have requested less data than the hardware supplied even 359 * within spec. 360 */ 361 size_t n; 362 363 n = min3(mbox_cmd->size_out, mds->payload_size, out_len); 364 memcpy_fromio(mbox_cmd->payload_out, payload, n); 365 mbox_cmd->size_out = n; 366 } else { 367 mbox_cmd->size_out = 0; 368 } 369 370 return 0; 371 } 372 373 static int cxl_pci_mbox_send(struct cxl_memdev_state *mds, 374 struct cxl_mbox_cmd *cmd) 375 { 376 int rc; 377 378 mutex_lock_io(&mds->mbox_mutex); 379 rc = __cxl_pci_mbox_send_cmd(mds, cmd); 380 mutex_unlock(&mds->mbox_mutex); 381 382 return rc; 383 } 384 385 static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds) 386 { 387 struct cxl_dev_state *cxlds = &mds->cxlds; 388 const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET); 389 struct device *dev = cxlds->dev; 390 unsigned long timeout; 391 u64 md_status; 392 393 timeout = jiffies + mbox_ready_timeout * HZ; 394 do { 395 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); 396 if (md_status & CXLMDEV_MBOX_IF_READY) 397 break; 398 if (msleep_interruptible(100)) 399 break; 400 } while (!time_after(jiffies, timeout)); 401 402 if (!(md_status & CXLMDEV_MBOX_IF_READY)) { 403 cxl_err(dev, md_status, "timeout awaiting mailbox ready"); 404 return -ETIMEDOUT; 405 } 406 407 /* 408 * A command may be in flight from a previous driver instance, 409 * think kexec, do one doorbell wait so that 410 * __cxl_pci_mbox_send_cmd() can assume that it is the only 411 * source for future doorbell busy events. 412 */ 413 if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) { 414 cxl_err(dev, md_status, "timeout awaiting mailbox idle"); 415 return -ETIMEDOUT; 416 } 417 418 mds->mbox_send = cxl_pci_mbox_send; 419 mds->payload_size = 420 1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap); 421 422 /* 423 * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register 424 * 425 * If the size is too small, mandatory commands will not work and so 426 * there's no point in going forward. If the size is too large, there's 427 * no harm is soft limiting it. 428 */ 429 mds->payload_size = min_t(size_t, mds->payload_size, SZ_1M); 430 if (mds->payload_size < 256) { 431 dev_err(dev, "Mailbox is too small (%zub)", 432 mds->payload_size); 433 return -ENXIO; 434 } 435 436 dev_dbg(dev, "Mailbox payload sized %zu", mds->payload_size); 437 438 rcuwait_init(&mds->mbox_wait); 439 440 if (cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ) { 441 u32 ctrl; 442 int irq, msgnum; 443 struct pci_dev *pdev = to_pci_dev(cxlds->dev); 444 445 msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap); 446 irq = pci_irq_vector(pdev, msgnum); 447 if (irq < 0) 448 goto mbox_poll; 449 450 if (cxl_request_irq(cxlds, irq, cxl_pci_mbox_irq, NULL)) 451 goto mbox_poll; 452 453 /* enable background command mbox irq support */ 454 ctrl = readl(cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); 455 ctrl |= CXLDEV_MBOX_CTRL_BG_CMD_IRQ; 456 writel(ctrl, cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); 457 458 return 0; 459 } 460 461 mbox_poll: 462 mds->security.poll = true; 463 INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work); 464 465 dev_dbg(cxlds->dev, "Mailbox interrupts are unsupported"); 466 return 0; 467 } 468 469 static int cxl_map_regblock(struct pci_dev *pdev, struct cxl_register_map *map) 470 { 471 struct device *dev = &pdev->dev; 472 473 map->base = ioremap(map->resource, map->max_size); 474 if (!map->base) { 475 dev_err(dev, "failed to map registers\n"); 476 return -ENOMEM; 477 } 478 479 dev_dbg(dev, "Mapped CXL Memory Device resource %pa\n", &map->resource); 480 return 0; 481 } 482 483 static void cxl_unmap_regblock(struct pci_dev *pdev, 484 struct cxl_register_map *map) 485 { 486 iounmap(map->base); 487 map->base = NULL; 488 } 489 490 static int cxl_probe_regs(struct pci_dev *pdev, struct cxl_register_map *map) 491 { 492 struct cxl_component_reg_map *comp_map; 493 struct cxl_device_reg_map *dev_map; 494 struct device *dev = &pdev->dev; 495 void __iomem *base = map->base; 496 497 switch (map->reg_type) { 498 case CXL_REGLOC_RBI_COMPONENT: 499 comp_map = &map->component_map; 500 cxl_probe_component_regs(dev, base, comp_map); 501 if (!comp_map->hdm_decoder.valid) { 502 dev_err(dev, "HDM decoder registers not found\n"); 503 return -ENXIO; 504 } 505 506 if (!comp_map->ras.valid) 507 dev_dbg(dev, "RAS registers not found\n"); 508 509 dev_dbg(dev, "Set up component registers\n"); 510 break; 511 case CXL_REGLOC_RBI_MEMDEV: 512 dev_map = &map->device_map; 513 cxl_probe_device_regs(dev, base, dev_map); 514 if (!dev_map->status.valid || !dev_map->mbox.valid || 515 !dev_map->memdev.valid) { 516 dev_err(dev, "registers not found: %s%s%s\n", 517 !dev_map->status.valid ? "status " : "", 518 !dev_map->mbox.valid ? "mbox " : "", 519 !dev_map->memdev.valid ? "memdev " : ""); 520 return -ENXIO; 521 } 522 523 dev_dbg(dev, "Probing device registers...\n"); 524 break; 525 default: 526 break; 527 } 528 529 return 0; 530 } 531 532 static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, 533 struct cxl_register_map *map) 534 { 535 int rc; 536 537 rc = cxl_find_regblock(pdev, type, map); 538 if (rc) 539 return rc; 540 541 rc = cxl_map_regblock(pdev, map); 542 if (rc) 543 return rc; 544 545 rc = cxl_probe_regs(pdev, map); 546 cxl_unmap_regblock(pdev, map); 547 548 return rc; 549 } 550 551 /* 552 * Assume that any RCIEP that emits the CXL memory expander class code 553 * is an RCD 554 */ 555 static bool is_cxl_restricted(struct pci_dev *pdev) 556 { 557 return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END; 558 } 559 560 static int cxl_pci_ras_unmask(struct pci_dev *pdev) 561 { 562 struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); 563 struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); 564 void __iomem *addr; 565 u32 orig_val, val, mask; 566 u16 cap; 567 int rc; 568 569 if (!cxlds->regs.ras) { 570 dev_dbg(&pdev->dev, "No RAS registers.\n"); 571 return 0; 572 } 573 574 /* BIOS has CXL error control */ 575 if (!host_bridge->native_cxl_error) 576 return -ENXIO; 577 578 rc = pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &cap); 579 if (rc) 580 return rc; 581 582 if (cap & PCI_EXP_DEVCTL_URRE) { 583 addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_MASK_OFFSET; 584 orig_val = readl(addr); 585 586 mask = CXL_RAS_UNCORRECTABLE_MASK_MASK | 587 CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK; 588 val = orig_val & ~mask; 589 writel(val, addr); 590 dev_dbg(&pdev->dev, 591 "Uncorrectable RAS Errors Mask: %#x -> %#x\n", 592 orig_val, val); 593 } 594 595 if (cap & PCI_EXP_DEVCTL_CERE) { 596 addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_MASK_OFFSET; 597 orig_val = readl(addr); 598 val = orig_val & ~CXL_RAS_CORRECTABLE_MASK_MASK; 599 writel(val, addr); 600 dev_dbg(&pdev->dev, "Correctable RAS Errors Mask: %#x -> %#x\n", 601 orig_val, val); 602 } 603 604 return 0; 605 } 606 607 static void free_event_buf(void *buf) 608 { 609 kvfree(buf); 610 } 611 612 /* 613 * There is a single buffer for reading event logs from the mailbox. All logs 614 * share this buffer protected by the mds->event_log_lock. 615 */ 616 static int cxl_mem_alloc_event_buf(struct cxl_memdev_state *mds) 617 { 618 struct cxl_get_event_payload *buf; 619 620 buf = kvmalloc(mds->payload_size, GFP_KERNEL); 621 if (!buf) 622 return -ENOMEM; 623 mds->event.buf = buf; 624 625 return devm_add_action_or_reset(mds->cxlds.dev, free_event_buf, buf); 626 } 627 628 static int cxl_alloc_irq_vectors(struct pci_dev *pdev) 629 { 630 int nvecs; 631 632 /* 633 * Per CXL 3.0 3.1.1 CXL.io Endpoint a function on a CXL device must 634 * not generate INTx messages if that function participates in 635 * CXL.cache or CXL.mem. 636 * 637 * Additionally pci_alloc_irq_vectors() handles calling 638 * pci_free_irq_vectors() automatically despite not being called 639 * pcim_*. See pci_setup_msi_context(). 640 */ 641 nvecs = pci_alloc_irq_vectors(pdev, 1, CXL_PCI_DEFAULT_MAX_VECTORS, 642 PCI_IRQ_MSIX | PCI_IRQ_MSI); 643 if (nvecs < 1) { 644 dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n", nvecs); 645 return -ENXIO; 646 } 647 return 0; 648 } 649 650 static irqreturn_t cxl_event_thread(int irq, void *id) 651 { 652 struct cxl_dev_id *dev_id = id; 653 struct cxl_dev_state *cxlds = dev_id->cxlds; 654 struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); 655 u32 status; 656 657 do { 658 /* 659 * CXL 3.0 8.2.8.3.1: The lower 32 bits are the status; 660 * ignore the reserved upper 32 bits 661 */ 662 status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET); 663 /* Ignore logs unknown to the driver */ 664 status &= CXLDEV_EVENT_STATUS_ALL; 665 if (!status) 666 break; 667 cxl_mem_get_event_records(mds, status); 668 cond_resched(); 669 } while (status); 670 671 return IRQ_HANDLED; 672 } 673 674 static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting) 675 { 676 struct pci_dev *pdev = to_pci_dev(cxlds->dev); 677 int irq; 678 679 if (FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting) != CXL_INT_MSI_MSIX) 680 return -ENXIO; 681 682 irq = pci_irq_vector(pdev, 683 FIELD_GET(CXLDEV_EVENT_INT_MSGNUM_MASK, setting)); 684 if (irq < 0) 685 return irq; 686 687 return cxl_request_irq(cxlds, irq, NULL, cxl_event_thread); 688 } 689 690 static int cxl_event_get_int_policy(struct cxl_memdev_state *mds, 691 struct cxl_event_interrupt_policy *policy) 692 { 693 struct cxl_mbox_cmd mbox_cmd = { 694 .opcode = CXL_MBOX_OP_GET_EVT_INT_POLICY, 695 .payload_out = policy, 696 .size_out = sizeof(*policy), 697 }; 698 int rc; 699 700 rc = cxl_internal_send_cmd(mds, &mbox_cmd); 701 if (rc < 0) 702 dev_err(mds->cxlds.dev, 703 "Failed to get event interrupt policy : %d", rc); 704 705 return rc; 706 } 707 708 static int cxl_event_config_msgnums(struct cxl_memdev_state *mds, 709 struct cxl_event_interrupt_policy *policy) 710 { 711 struct cxl_mbox_cmd mbox_cmd; 712 int rc; 713 714 *policy = (struct cxl_event_interrupt_policy) { 715 .info_settings = CXL_INT_MSI_MSIX, 716 .warn_settings = CXL_INT_MSI_MSIX, 717 .failure_settings = CXL_INT_MSI_MSIX, 718 .fatal_settings = CXL_INT_MSI_MSIX, 719 }; 720 721 mbox_cmd = (struct cxl_mbox_cmd) { 722 .opcode = CXL_MBOX_OP_SET_EVT_INT_POLICY, 723 .payload_in = policy, 724 .size_in = sizeof(*policy), 725 }; 726 727 rc = cxl_internal_send_cmd(mds, &mbox_cmd); 728 if (rc < 0) { 729 dev_err(mds->cxlds.dev, "Failed to set event interrupt policy : %d", 730 rc); 731 return rc; 732 } 733 734 /* Retrieve final interrupt settings */ 735 return cxl_event_get_int_policy(mds, policy); 736 } 737 738 static int cxl_event_irqsetup(struct cxl_memdev_state *mds) 739 { 740 struct cxl_dev_state *cxlds = &mds->cxlds; 741 struct cxl_event_interrupt_policy policy; 742 int rc; 743 744 rc = cxl_event_config_msgnums(mds, &policy); 745 if (rc) 746 return rc; 747 748 rc = cxl_event_req_irq(cxlds, policy.info_settings); 749 if (rc) { 750 dev_err(cxlds->dev, "Failed to get interrupt for event Info log\n"); 751 return rc; 752 } 753 754 rc = cxl_event_req_irq(cxlds, policy.warn_settings); 755 if (rc) { 756 dev_err(cxlds->dev, "Failed to get interrupt for event Warn log\n"); 757 return rc; 758 } 759 760 rc = cxl_event_req_irq(cxlds, policy.failure_settings); 761 if (rc) { 762 dev_err(cxlds->dev, "Failed to get interrupt for event Failure log\n"); 763 return rc; 764 } 765 766 rc = cxl_event_req_irq(cxlds, policy.fatal_settings); 767 if (rc) { 768 dev_err(cxlds->dev, "Failed to get interrupt for event Fatal log\n"); 769 return rc; 770 } 771 772 return 0; 773 } 774 775 static bool cxl_event_int_is_fw(u8 setting) 776 { 777 u8 mode = FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting); 778 779 return mode == CXL_INT_FW; 780 } 781 782 static int cxl_event_config(struct pci_host_bridge *host_bridge, 783 struct cxl_memdev_state *mds) 784 { 785 struct cxl_event_interrupt_policy policy; 786 int rc; 787 788 /* 789 * When BIOS maintains CXL error reporting control, it will process 790 * event records. Only one agent can do so. 791 */ 792 if (!host_bridge->native_cxl_error) 793 return 0; 794 795 rc = cxl_mem_alloc_event_buf(mds); 796 if (rc) 797 return rc; 798 799 rc = cxl_event_get_int_policy(mds, &policy); 800 if (rc) 801 return rc; 802 803 if (cxl_event_int_is_fw(policy.info_settings) || 804 cxl_event_int_is_fw(policy.warn_settings) || 805 cxl_event_int_is_fw(policy.failure_settings) || 806 cxl_event_int_is_fw(policy.fatal_settings)) { 807 dev_err(mds->cxlds.dev, 808 "FW still in control of Event Logs despite _OSC settings\n"); 809 return -EBUSY; 810 } 811 812 rc = cxl_event_irqsetup(mds); 813 if (rc) 814 return rc; 815 816 cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL); 817 818 return 0; 819 } 820 821 static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) 822 { 823 struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); 824 struct cxl_memdev_state *mds; 825 struct cxl_dev_state *cxlds; 826 struct cxl_register_map map; 827 struct cxl_memdev *cxlmd; 828 int rc; 829 830 /* 831 * Double check the anonymous union trickery in struct cxl_regs 832 * FIXME switch to struct_group() 833 */ 834 BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) != 835 offsetof(struct cxl_regs, device_regs.memdev)); 836 837 rc = pcim_enable_device(pdev); 838 if (rc) 839 return rc; 840 pci_set_master(pdev); 841 842 mds = cxl_memdev_state_create(&pdev->dev); 843 if (IS_ERR(mds)) 844 return PTR_ERR(mds); 845 cxlds = &mds->cxlds; 846 pci_set_drvdata(pdev, cxlds); 847 848 cxlds->rcd = is_cxl_restricted(pdev); 849 cxlds->serial = pci_get_dsn(pdev); 850 cxlds->cxl_dvsec = pci_find_dvsec_capability( 851 pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE); 852 if (!cxlds->cxl_dvsec) 853 dev_warn(&pdev->dev, 854 "Device DVSEC not present, skip CXL.mem init\n"); 855 856 rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map); 857 if (rc) 858 return rc; 859 860 rc = cxl_map_device_regs(&pdev->dev, &cxlds->regs.device_regs, &map); 861 if (rc) 862 return rc; 863 864 /* 865 * If the component registers can't be found, the cxl_pci driver may 866 * still be useful for management functions so don't return an error. 867 */ 868 cxlds->component_reg_phys = CXL_RESOURCE_NONE; 869 rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT, &map); 870 if (rc) 871 dev_warn(&pdev->dev, "No component registers (%d)\n", rc); 872 873 cxlds->component_reg_phys = map.resource; 874 875 rc = cxl_map_component_regs(&pdev->dev, &cxlds->regs.component, 876 &map, BIT(CXL_CM_CAP_CAP_ID_RAS)); 877 if (rc) 878 dev_dbg(&pdev->dev, "Failed to map RAS capability.\n"); 879 880 rc = cxl_await_media_ready(cxlds); 881 if (rc == 0) 882 cxlds->media_ready = true; 883 else 884 dev_warn(&pdev->dev, "Media not active (%d)\n", rc); 885 886 rc = cxl_alloc_irq_vectors(pdev); 887 if (rc) 888 return rc; 889 890 rc = cxl_pci_setup_mailbox(mds); 891 if (rc) 892 return rc; 893 894 rc = cxl_enumerate_cmds(mds); 895 if (rc) 896 return rc; 897 898 rc = cxl_set_timestamp(mds); 899 if (rc) 900 return rc; 901 902 rc = cxl_poison_state_init(mds); 903 if (rc) 904 return rc; 905 906 rc = cxl_dev_state_identify(mds); 907 if (rc) 908 return rc; 909 910 rc = cxl_mem_create_range_info(mds); 911 if (rc) 912 return rc; 913 914 cxlmd = devm_cxl_add_memdev(cxlds); 915 if (IS_ERR(cxlmd)) 916 return PTR_ERR(cxlmd); 917 918 rc = cxl_memdev_setup_fw_upload(mds); 919 if (rc) 920 return rc; 921 922 rc = cxl_event_config(host_bridge, mds); 923 if (rc) 924 return rc; 925 926 rc = cxl_pci_ras_unmask(pdev); 927 if (rc) 928 dev_dbg(&pdev->dev, "No RAS reporting unmasked\n"); 929 930 pci_save_state(pdev); 931 932 return rc; 933 } 934 935 static const struct pci_device_id cxl_mem_pci_tbl[] = { 936 /* PCI class code for CXL.mem Type-3 Devices */ 937 { PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)}, 938 { /* terminate list */ }, 939 }; 940 MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl); 941 942 static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev) 943 { 944 struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); 945 struct cxl_memdev *cxlmd = cxlds->cxlmd; 946 struct device *dev = &cxlmd->dev; 947 948 dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n", 949 dev_name(dev)); 950 pci_restore_state(pdev); 951 if (device_attach(dev) <= 0) 952 return PCI_ERS_RESULT_DISCONNECT; 953 return PCI_ERS_RESULT_RECOVERED; 954 } 955 956 static void cxl_error_resume(struct pci_dev *pdev) 957 { 958 struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); 959 struct cxl_memdev *cxlmd = cxlds->cxlmd; 960 struct device *dev = &cxlmd->dev; 961 962 dev_info(&pdev->dev, "%s: error resume %s\n", dev_name(dev), 963 dev->driver ? "successful" : "failed"); 964 } 965 966 static const struct pci_error_handlers cxl_error_handlers = { 967 .error_detected = cxl_error_detected, 968 .slot_reset = cxl_slot_reset, 969 .resume = cxl_error_resume, 970 .cor_error_detected = cxl_cor_error_detected, 971 }; 972 973 static struct pci_driver cxl_pci_driver = { 974 .name = KBUILD_MODNAME, 975 .id_table = cxl_mem_pci_tbl, 976 .probe = cxl_pci_probe, 977 .err_handler = &cxl_error_handlers, 978 .driver = { 979 .probe_type = PROBE_PREFER_ASYNCHRONOUS, 980 }, 981 }; 982 983 MODULE_LICENSE("GPL v2"); 984 module_pci_driver(cxl_pci_driver); 985 MODULE_IMPORT_NS(CXL); 986