1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ 3 #include <linux/io-64-nonatomic-lo-hi.h> 4 #include <linux/moduleparam.h> 5 #include <linux/module.h> 6 #include <linux/delay.h> 7 #include <linux/sizes.h> 8 #include <linux/mutex.h> 9 #include <linux/list.h> 10 #include <linux/pci.h> 11 #include <linux/io.h> 12 #include "cxlmem.h" 13 #include "cxlpci.h" 14 #include "cxl.h" 15 16 /** 17 * DOC: cxl pci 18 * 19 * This implements the PCI exclusive functionality for a CXL device as it is 20 * defined by the Compute Express Link specification. CXL devices may surface 21 * certain functionality even if it isn't CXL enabled. While this driver is 22 * focused around the PCI specific aspects of a CXL device, it binds to the 23 * specific CXL memory device class code, and therefore the implementation of 24 * cxl_pci is focused around CXL memory devices. 25 * 26 * The driver has several responsibilities, mainly: 27 * - Create the memX device and register on the CXL bus. 28 * - Enumerate device's register interface and map them. 29 * - Registers nvdimm bridge device with cxl_core. 30 * - Registers a CXL mailbox with cxl_core. 31 */ 32 33 #define cxl_doorbell_busy(cxlds) \ 34 (readl((cxlds)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) & \ 35 CXLDEV_MBOX_CTRL_DOORBELL) 36 37 /* CXL 2.0 - 8.2.8.4 */ 38 #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ) 39 40 /* 41 * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to 42 * dictate how long to wait for the mailbox to become ready. The new 43 * field allows the device to tell software the amount of time to wait 44 * before mailbox ready. This field per the spec theoretically allows 45 * for up to 255 seconds. 255 seconds is unreasonably long, its longer 46 * than the maximum SATA port link recovery wait. Default to 60 seconds 47 * until someone builds a CXL device that needs more time in practice. 48 */ 49 static unsigned short mbox_ready_timeout = 60; 50 module_param(mbox_ready_timeout, ushort, 0644); 51 MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready"); 52 53 static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) 54 { 55 const unsigned long start = jiffies; 56 unsigned long end = start; 57 58 while (cxl_doorbell_busy(cxlds)) { 59 end = jiffies; 60 61 if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) { 62 /* Check again in case preempted before timeout test */ 63 if (!cxl_doorbell_busy(cxlds)) 64 break; 65 return -ETIMEDOUT; 66 } 67 cpu_relax(); 68 } 69 70 dev_dbg(cxlds->dev, "Doorbell wait took %dms", 71 jiffies_to_msecs(end) - jiffies_to_msecs(start)); 72 return 0; 73 } 74 75 #define cxl_err(dev, status, msg) \ 76 dev_err_ratelimited(dev, msg ", device state %s%s\n", \ 77 status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ 78 status & CXLMDEV_FW_HALT ? " firmware-halt" : "") 79 80 #define cxl_cmd_err(dev, cmd, status, msg) \ 81 dev_err_ratelimited(dev, msg " (opcode: %#x), device state %s%s\n", \ 82 (cmd)->opcode, \ 83 status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ 84 status & CXLMDEV_FW_HALT ? " firmware-halt" : "") 85 86 /** 87 * __cxl_pci_mbox_send_cmd() - Execute a mailbox command 88 * @cxlds: The device state to communicate with. 89 * @mbox_cmd: Command to send to the memory device. 90 * 91 * Context: Any context. Expects mbox_mutex to be held. 92 * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success. 93 * Caller should check the return code in @mbox_cmd to make sure it 94 * succeeded. 95 * 96 * This is a generic form of the CXL mailbox send command thus only using the 97 * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory 98 * devices, and perhaps other types of CXL devices may have further information 99 * available upon error conditions. Driver facilities wishing to send mailbox 100 * commands should use the wrapper command. 101 * 102 * The CXL spec allows for up to two mailboxes. The intention is for the primary 103 * mailbox to be OS controlled and the secondary mailbox to be used by system 104 * firmware. This allows the OS and firmware to communicate with the device and 105 * not need to coordinate with each other. The driver only uses the primary 106 * mailbox. 107 */ 108 static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, 109 struct cxl_mbox_cmd *mbox_cmd) 110 { 111 void __iomem *payload = cxlds->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET; 112 struct device *dev = cxlds->dev; 113 u64 cmd_reg, status_reg; 114 size_t out_len; 115 int rc; 116 117 lockdep_assert_held(&cxlds->mbox_mutex); 118 119 /* 120 * Here are the steps from 8.2.8.4 of the CXL 2.0 spec. 121 * 1. Caller reads MB Control Register to verify doorbell is clear 122 * 2. Caller writes Command Register 123 * 3. Caller writes Command Payload Registers if input payload is non-empty 124 * 4. Caller writes MB Control Register to set doorbell 125 * 5. Caller either polls for doorbell to be clear or waits for interrupt if configured 126 * 6. Caller reads MB Status Register to fetch Return code 127 * 7. If command successful, Caller reads Command Register to get Payload Length 128 * 8. If output payload is non-empty, host reads Command Payload Registers 129 * 130 * Hardware is free to do whatever it wants before the doorbell is rung, 131 * and isn't allowed to change anything after it clears the doorbell. As 132 * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can 133 * also happen in any order (though some orders might not make sense). 134 */ 135 136 /* #1 */ 137 if (cxl_doorbell_busy(cxlds)) { 138 u64 md_status = 139 readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); 140 141 cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, 142 "mailbox queue busy"); 143 return -EBUSY; 144 } 145 146 cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK, 147 mbox_cmd->opcode); 148 if (mbox_cmd->size_in) { 149 if (WARN_ON(!mbox_cmd->payload_in)) 150 return -EINVAL; 151 152 cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, 153 mbox_cmd->size_in); 154 memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in); 155 } 156 157 /* #2, #3 */ 158 writeq(cmd_reg, cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); 159 160 /* #4 */ 161 dev_dbg(dev, "Sending command\n"); 162 writel(CXLDEV_MBOX_CTRL_DOORBELL, 163 cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); 164 165 /* #5 */ 166 rc = cxl_pci_mbox_wait_for_doorbell(cxlds); 167 if (rc == -ETIMEDOUT) { 168 u64 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); 169 170 cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, "mailbox timeout"); 171 return rc; 172 } 173 174 /* #6 */ 175 status_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET); 176 mbox_cmd->return_code = 177 FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg); 178 179 if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) { 180 dev_dbg(dev, "Mailbox operation had an error: %s\n", 181 cxl_mbox_cmd_rc2str(mbox_cmd)); 182 return 0; /* completed but caller must check return_code */ 183 } 184 185 /* #7 */ 186 cmd_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); 187 out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg); 188 189 /* #8 */ 190 if (out_len && mbox_cmd->payload_out) { 191 /* 192 * Sanitize the copy. If hardware misbehaves, out_len per the 193 * spec can actually be greater than the max allowed size (21 194 * bits available but spec defined 1M max). The caller also may 195 * have requested less data than the hardware supplied even 196 * within spec. 197 */ 198 size_t n = min3(mbox_cmd->size_out, cxlds->payload_size, out_len); 199 200 memcpy_fromio(mbox_cmd->payload_out, payload, n); 201 mbox_cmd->size_out = n; 202 } else { 203 mbox_cmd->size_out = 0; 204 } 205 206 return 0; 207 } 208 209 static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) 210 { 211 int rc; 212 213 mutex_lock_io(&cxlds->mbox_mutex); 214 rc = __cxl_pci_mbox_send_cmd(cxlds, cmd); 215 mutex_unlock(&cxlds->mbox_mutex); 216 217 return rc; 218 } 219 220 static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds) 221 { 222 const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET); 223 unsigned long timeout; 224 u64 md_status; 225 226 timeout = jiffies + mbox_ready_timeout * HZ; 227 do { 228 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); 229 if (md_status & CXLMDEV_MBOX_IF_READY) 230 break; 231 if (msleep_interruptible(100)) 232 break; 233 } while (!time_after(jiffies, timeout)); 234 235 if (!(md_status & CXLMDEV_MBOX_IF_READY)) { 236 cxl_err(cxlds->dev, md_status, 237 "timeout awaiting mailbox ready"); 238 return -ETIMEDOUT; 239 } 240 241 /* 242 * A command may be in flight from a previous driver instance, 243 * think kexec, do one doorbell wait so that 244 * __cxl_pci_mbox_send_cmd() can assume that it is the only 245 * source for future doorbell busy events. 246 */ 247 if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) { 248 cxl_err(cxlds->dev, md_status, "timeout awaiting mailbox idle"); 249 return -ETIMEDOUT; 250 } 251 252 cxlds->mbox_send = cxl_pci_mbox_send; 253 cxlds->payload_size = 254 1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap); 255 256 /* 257 * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register 258 * 259 * If the size is too small, mandatory commands will not work and so 260 * there's no point in going forward. If the size is too large, there's 261 * no harm is soft limiting it. 262 */ 263 cxlds->payload_size = min_t(size_t, cxlds->payload_size, SZ_1M); 264 if (cxlds->payload_size < 256) { 265 dev_err(cxlds->dev, "Mailbox is too small (%zub)", 266 cxlds->payload_size); 267 return -ENXIO; 268 } 269 270 dev_dbg(cxlds->dev, "Mailbox payload sized %zu", 271 cxlds->payload_size); 272 273 return 0; 274 } 275 276 static int cxl_map_regblock(struct pci_dev *pdev, struct cxl_register_map *map) 277 { 278 void __iomem *addr; 279 int bar = map->barno; 280 struct device *dev = &pdev->dev; 281 resource_size_t offset = map->block_offset; 282 283 /* Basic sanity check that BAR is big enough */ 284 if (pci_resource_len(pdev, bar) < offset) { 285 dev_err(dev, "BAR%d: %pr: too small (offset: %pa)\n", bar, 286 &pdev->resource[bar], &offset); 287 return -ENXIO; 288 } 289 290 addr = pci_iomap(pdev, bar, 0); 291 if (!addr) { 292 dev_err(dev, "failed to map registers\n"); 293 return -ENOMEM; 294 } 295 296 dev_dbg(dev, "Mapped CXL Memory Device resource bar %u @ %pa\n", 297 bar, &offset); 298 299 map->base = addr + map->block_offset; 300 return 0; 301 } 302 303 static void cxl_unmap_regblock(struct pci_dev *pdev, 304 struct cxl_register_map *map) 305 { 306 pci_iounmap(pdev, map->base - map->block_offset); 307 map->base = NULL; 308 } 309 310 static int cxl_probe_regs(struct pci_dev *pdev, struct cxl_register_map *map) 311 { 312 struct cxl_component_reg_map *comp_map; 313 struct cxl_device_reg_map *dev_map; 314 struct device *dev = &pdev->dev; 315 void __iomem *base = map->base; 316 317 switch (map->reg_type) { 318 case CXL_REGLOC_RBI_COMPONENT: 319 comp_map = &map->component_map; 320 cxl_probe_component_regs(dev, base, comp_map); 321 if (!comp_map->hdm_decoder.valid) { 322 dev_err(dev, "HDM decoder registers not found\n"); 323 return -ENXIO; 324 } 325 326 dev_dbg(dev, "Set up component registers\n"); 327 break; 328 case CXL_REGLOC_RBI_MEMDEV: 329 dev_map = &map->device_map; 330 cxl_probe_device_regs(dev, base, dev_map); 331 if (!dev_map->status.valid || !dev_map->mbox.valid || 332 !dev_map->memdev.valid) { 333 dev_err(dev, "registers not found: %s%s%s\n", 334 !dev_map->status.valid ? "status " : "", 335 !dev_map->mbox.valid ? "mbox " : "", 336 !dev_map->memdev.valid ? "memdev " : ""); 337 return -ENXIO; 338 } 339 340 dev_dbg(dev, "Probing device registers...\n"); 341 break; 342 default: 343 break; 344 } 345 346 return 0; 347 } 348 349 static int cxl_map_regs(struct cxl_dev_state *cxlds, struct cxl_register_map *map) 350 { 351 struct device *dev = cxlds->dev; 352 struct pci_dev *pdev = to_pci_dev(dev); 353 354 switch (map->reg_type) { 355 case CXL_REGLOC_RBI_COMPONENT: 356 cxl_map_component_regs(pdev, &cxlds->regs.component, map); 357 dev_dbg(dev, "Mapping component registers...\n"); 358 break; 359 case CXL_REGLOC_RBI_MEMDEV: 360 cxl_map_device_regs(pdev, &cxlds->regs.device_regs, map); 361 dev_dbg(dev, "Probing device registers...\n"); 362 break; 363 default: 364 break; 365 } 366 367 return 0; 368 } 369 370 static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, 371 struct cxl_register_map *map) 372 { 373 int rc; 374 375 rc = cxl_find_regblock(pdev, type, map); 376 if (rc) 377 return rc; 378 379 rc = cxl_map_regblock(pdev, map); 380 if (rc) 381 return rc; 382 383 rc = cxl_probe_regs(pdev, map); 384 cxl_unmap_regblock(pdev, map); 385 386 return rc; 387 } 388 389 static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) 390 { 391 struct cxl_register_map map; 392 struct cxl_memdev *cxlmd; 393 struct cxl_dev_state *cxlds; 394 int rc; 395 396 /* 397 * Double check the anonymous union trickery in struct cxl_regs 398 * FIXME switch to struct_group() 399 */ 400 BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) != 401 offsetof(struct cxl_regs, device_regs.memdev)); 402 403 rc = pcim_enable_device(pdev); 404 if (rc) 405 return rc; 406 407 cxlds = cxl_dev_state_create(&pdev->dev); 408 if (IS_ERR(cxlds)) 409 return PTR_ERR(cxlds); 410 411 cxlds->serial = pci_get_dsn(pdev); 412 cxlds->cxl_dvsec = pci_find_dvsec_capability( 413 pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE); 414 if (!cxlds->cxl_dvsec) 415 dev_warn(&pdev->dev, 416 "Device DVSEC not present, skip CXL.mem init\n"); 417 418 rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map); 419 if (rc) 420 return rc; 421 422 rc = cxl_map_regs(cxlds, &map); 423 if (rc) 424 return rc; 425 426 /* 427 * If the component registers can't be found, the cxl_pci driver may 428 * still be useful for management functions so don't return an error. 429 */ 430 cxlds->component_reg_phys = CXL_RESOURCE_NONE; 431 rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT, &map); 432 if (rc) 433 dev_warn(&pdev->dev, "No component registers (%d)\n", rc); 434 435 cxlds->component_reg_phys = cxl_regmap_to_base(pdev, &map); 436 437 rc = cxl_pci_setup_mailbox(cxlds); 438 if (rc) 439 return rc; 440 441 rc = cxl_enumerate_cmds(cxlds); 442 if (rc) 443 return rc; 444 445 rc = cxl_dev_state_identify(cxlds); 446 if (rc) 447 return rc; 448 449 rc = cxl_mem_create_range_info(cxlds); 450 if (rc) 451 return rc; 452 453 cxlmd = devm_cxl_add_memdev(cxlds); 454 if (IS_ERR(cxlmd)) 455 return PTR_ERR(cxlmd); 456 457 if (range_len(&cxlds->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM)) 458 rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd); 459 460 return rc; 461 } 462 463 static const struct pci_device_id cxl_mem_pci_tbl[] = { 464 /* PCI class code for CXL.mem Type-3 Devices */ 465 { PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)}, 466 { /* terminate list */ }, 467 }; 468 MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl); 469 470 static struct pci_driver cxl_pci_driver = { 471 .name = KBUILD_MODNAME, 472 .id_table = cxl_mem_pci_tbl, 473 .probe = cxl_pci_probe, 474 .driver = { 475 .probe_type = PROBE_PREFER_ASYNCHRONOUS, 476 }, 477 }; 478 479 MODULE_LICENSE("GPL v2"); 480 module_pci_driver(cxl_pci_driver); 481 MODULE_IMPORT_NS(CXL); 482