1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ 3 #include <uapi/linux/cxl_mem.h> 4 #include <linux/security.h> 5 #include <linux/debugfs.h> 6 #include <linux/module.h> 7 #include <linux/sizes.h> 8 #include <linux/mutex.h> 9 #include <linux/list.h> 10 #include <linux/cdev.h> 11 #include <linux/idr.h> 12 #include <linux/pci.h> 13 #include <linux/io.h> 14 #include <linux/io-64-nonatomic-lo-hi.h> 15 #include "cxlmem.h" 16 #include "pci.h" 17 #include "cxl.h" 18 19 /** 20 * DOC: cxl pci 21 * 22 * This implements the PCI exclusive functionality for a CXL device as it is 23 * defined by the Compute Express Link specification. CXL devices may surface 24 * certain functionality even if it isn't CXL enabled. 25 * 26 * The driver has several responsibilities, mainly: 27 * - Create the memX device and register on the CXL bus. 28 * - Enumerate device's register interface and map them. 29 * - Probe the device attributes to establish sysfs interface. 30 * - Provide an IOCTL interface to userspace to communicate with the device for 31 * things like firmware update. 32 */ 33 34 #define cxl_doorbell_busy(cxlm) \ 35 (readl((cxlm)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) & \ 36 CXLDEV_MBOX_CTRL_DOORBELL) 37 38 /* CXL 2.0 - 8.2.8.4 */ 39 #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ) 40 41 enum opcode { 42 CXL_MBOX_OP_INVALID = 0x0000, 43 CXL_MBOX_OP_RAW = CXL_MBOX_OP_INVALID, 44 CXL_MBOX_OP_GET_FW_INFO = 0x0200, 45 CXL_MBOX_OP_ACTIVATE_FW = 0x0202, 46 CXL_MBOX_OP_GET_SUPPORTED_LOGS = 0x0400, 47 CXL_MBOX_OP_GET_LOG = 0x0401, 48 CXL_MBOX_OP_IDENTIFY = 0x4000, 49 CXL_MBOX_OP_GET_PARTITION_INFO = 0x4100, 50 CXL_MBOX_OP_SET_PARTITION_INFO = 0x4101, 51 CXL_MBOX_OP_GET_LSA = 0x4102, 52 CXL_MBOX_OP_SET_LSA = 0x4103, 53 CXL_MBOX_OP_GET_HEALTH_INFO = 0x4200, 54 CXL_MBOX_OP_GET_ALERT_CONFIG = 0x4201, 55 CXL_MBOX_OP_SET_ALERT_CONFIG = 0x4202, 56 CXL_MBOX_OP_GET_SHUTDOWN_STATE = 0x4203, 57 CXL_MBOX_OP_SET_SHUTDOWN_STATE = 0x4204, 58 CXL_MBOX_OP_GET_POISON = 0x4300, 59 CXL_MBOX_OP_INJECT_POISON = 0x4301, 60 CXL_MBOX_OP_CLEAR_POISON = 0x4302, 61 CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS = 0x4303, 62 CXL_MBOX_OP_SCAN_MEDIA = 0x4304, 63 CXL_MBOX_OP_GET_SCAN_MEDIA = 0x4305, 64 CXL_MBOX_OP_MAX = 0x10000 65 }; 66 67 /* 68 * CXL 2.0 - Memory capacity multiplier 69 * See Section 8.2.9.5 70 * 71 * Volatile, Persistent, and Partition capacities are specified to be in 72 * multiples of 256MB - define a multiplier to convert to/from bytes. 73 */ 74 #define CXL_CAPACITY_MULTIPLIER SZ_256M 75 76 /** 77 * struct mbox_cmd - A command to be submitted to hardware. 78 * @opcode: (input) The command set and command submitted to hardware. 79 * @payload_in: (input) Pointer to the input payload. 80 * @payload_out: (output) Pointer to the output payload. Must be allocated by 81 * the caller. 82 * @size_in: (input) Number of bytes to load from @payload_in. 83 * @size_out: (input) Max number of bytes loaded into @payload_out. 84 * (output) Number of bytes generated by the device. For fixed size 85 * outputs commands this is always expected to be deterministic. For 86 * variable sized output commands, it tells the exact number of bytes 87 * written. 88 * @return_code: (output) Error code returned from hardware. 89 * 90 * This is the primary mechanism used to send commands to the hardware. 91 * All the fields except @payload_* correspond exactly to the fields described in 92 * Command Register section of the CXL 2.0 8.2.8.4.5. @payload_in and 93 * @payload_out are written to, and read from the Command Payload Registers 94 * defined in CXL 2.0 8.2.8.4.8. 95 */ 96 struct mbox_cmd { 97 u16 opcode; 98 void *payload_in; 99 void *payload_out; 100 size_t size_in; 101 size_t size_out; 102 u16 return_code; 103 #define CXL_MBOX_SUCCESS 0 104 }; 105 106 static DECLARE_RWSEM(cxl_memdev_rwsem); 107 static struct dentry *cxl_debugfs; 108 static bool cxl_raw_allow_all; 109 110 enum { 111 CEL_UUID, 112 VENDOR_DEBUG_UUID, 113 }; 114 115 /* See CXL 2.0 Table 170. Get Log Input Payload */ 116 static const uuid_t log_uuid[] = { 117 [CEL_UUID] = UUID_INIT(0xda9c0b5, 0xbf41, 0x4b78, 0x8f, 0x79, 0x96, 118 0xb1, 0x62, 0x3b, 0x3f, 0x17), 119 [VENDOR_DEBUG_UUID] = UUID_INIT(0xe1819d9, 0x11a9, 0x400c, 0x81, 0x1f, 120 0xd6, 0x07, 0x19, 0x40, 0x3d, 0x86), 121 }; 122 123 /** 124 * struct cxl_mem_command - Driver representation of a memory device command 125 * @info: Command information as it exists for the UAPI 126 * @opcode: The actual bits used for the mailbox protocol 127 * @flags: Set of flags effecting driver behavior. 128 * 129 * * %CXL_CMD_FLAG_FORCE_ENABLE: In cases of error, commands with this flag 130 * will be enabled by the driver regardless of what hardware may have 131 * advertised. 132 * 133 * The cxl_mem_command is the driver's internal representation of commands that 134 * are supported by the driver. Some of these commands may not be supported by 135 * the hardware. The driver will use @info to validate the fields passed in by 136 * the user then submit the @opcode to the hardware. 137 * 138 * See struct cxl_command_info. 139 */ 140 struct cxl_mem_command { 141 struct cxl_command_info info; 142 enum opcode opcode; 143 u32 flags; 144 #define CXL_CMD_FLAG_NONE 0 145 #define CXL_CMD_FLAG_FORCE_ENABLE BIT(0) 146 }; 147 148 #define CXL_CMD(_id, sin, sout, _flags) \ 149 [CXL_MEM_COMMAND_ID_##_id] = { \ 150 .info = { \ 151 .id = CXL_MEM_COMMAND_ID_##_id, \ 152 .size_in = sin, \ 153 .size_out = sout, \ 154 }, \ 155 .opcode = CXL_MBOX_OP_##_id, \ 156 .flags = _flags, \ 157 } 158 159 /* 160 * This table defines the supported mailbox commands for the driver. This table 161 * is made up of a UAPI structure. Non-negative values as parameters in the 162 * table will be validated against the user's input. For example, if size_in is 163 * 0, and the user passed in 1, it is an error. 164 */ 165 static struct cxl_mem_command mem_commands[CXL_MEM_COMMAND_ID_MAX] = { 166 CXL_CMD(IDENTIFY, 0, 0x43, CXL_CMD_FLAG_FORCE_ENABLE), 167 #ifdef CONFIG_CXL_MEM_RAW_COMMANDS 168 CXL_CMD(RAW, ~0, ~0, 0), 169 #endif 170 CXL_CMD(GET_SUPPORTED_LOGS, 0, ~0, CXL_CMD_FLAG_FORCE_ENABLE), 171 CXL_CMD(GET_FW_INFO, 0, 0x50, 0), 172 CXL_CMD(GET_PARTITION_INFO, 0, 0x20, 0), 173 CXL_CMD(GET_LSA, 0x8, ~0, 0), 174 CXL_CMD(GET_HEALTH_INFO, 0, 0x12, 0), 175 CXL_CMD(GET_LOG, 0x18, ~0, CXL_CMD_FLAG_FORCE_ENABLE), 176 CXL_CMD(SET_PARTITION_INFO, 0x0a, 0, 0), 177 CXL_CMD(SET_LSA, ~0, 0, 0), 178 CXL_CMD(GET_ALERT_CONFIG, 0, 0x10, 0), 179 CXL_CMD(SET_ALERT_CONFIG, 0xc, 0, 0), 180 CXL_CMD(GET_SHUTDOWN_STATE, 0, 0x1, 0), 181 CXL_CMD(SET_SHUTDOWN_STATE, 0x1, 0, 0), 182 CXL_CMD(GET_POISON, 0x10, ~0, 0), 183 CXL_CMD(INJECT_POISON, 0x8, 0, 0), 184 CXL_CMD(CLEAR_POISON, 0x48, 0, 0), 185 CXL_CMD(GET_SCAN_MEDIA_CAPS, 0x10, 0x4, 0), 186 CXL_CMD(SCAN_MEDIA, 0x11, 0, 0), 187 CXL_CMD(GET_SCAN_MEDIA, 0, ~0, 0), 188 }; 189 190 /* 191 * Commands that RAW doesn't permit. The rationale for each: 192 * 193 * CXL_MBOX_OP_ACTIVATE_FW: Firmware activation requires adjustment / 194 * coordination of transaction timeout values at the root bridge level. 195 * 196 * CXL_MBOX_OP_SET_PARTITION_INFO: The device memory map may change live 197 * and needs to be coordinated with HDM updates. 198 * 199 * CXL_MBOX_OP_SET_LSA: The label storage area may be cached by the 200 * driver and any writes from userspace invalidates those contents. 201 * 202 * CXL_MBOX_OP_SET_SHUTDOWN_STATE: Set shutdown state assumes no writes 203 * to the device after it is marked clean, userspace can not make that 204 * assertion. 205 * 206 * CXL_MBOX_OP_[GET_]SCAN_MEDIA: The kernel provides a native error list that 207 * is kept up to date with patrol notifications and error management. 208 */ 209 static u16 cxl_disabled_raw_commands[] = { 210 CXL_MBOX_OP_ACTIVATE_FW, 211 CXL_MBOX_OP_SET_PARTITION_INFO, 212 CXL_MBOX_OP_SET_LSA, 213 CXL_MBOX_OP_SET_SHUTDOWN_STATE, 214 CXL_MBOX_OP_SCAN_MEDIA, 215 CXL_MBOX_OP_GET_SCAN_MEDIA, 216 }; 217 218 /* 219 * Command sets that RAW doesn't permit. All opcodes in this set are 220 * disabled because they pass plain text security payloads over the 221 * user/kernel boundary. This functionality is intended to be wrapped 222 * behind the keys ABI which allows for encrypted payloads in the UAPI 223 */ 224 static u8 security_command_sets[] = { 225 0x44, /* Sanitize */ 226 0x45, /* Persistent Memory Data-at-rest Security */ 227 0x46, /* Security Passthrough */ 228 }; 229 230 #define cxl_for_each_cmd(cmd) \ 231 for ((cmd) = &mem_commands[0]; \ 232 ((cmd) - mem_commands) < ARRAY_SIZE(mem_commands); (cmd)++) 233 234 #define cxl_cmd_count ARRAY_SIZE(mem_commands) 235 236 static int cxl_mem_wait_for_doorbell(struct cxl_mem *cxlm) 237 { 238 const unsigned long start = jiffies; 239 unsigned long end = start; 240 241 while (cxl_doorbell_busy(cxlm)) { 242 end = jiffies; 243 244 if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) { 245 /* Check again in case preempted before timeout test */ 246 if (!cxl_doorbell_busy(cxlm)) 247 break; 248 return -ETIMEDOUT; 249 } 250 cpu_relax(); 251 } 252 253 dev_dbg(&cxlm->pdev->dev, "Doorbell wait took %dms", 254 jiffies_to_msecs(end) - jiffies_to_msecs(start)); 255 return 0; 256 } 257 258 static bool cxl_is_security_command(u16 opcode) 259 { 260 int i; 261 262 for (i = 0; i < ARRAY_SIZE(security_command_sets); i++) 263 if (security_command_sets[i] == (opcode >> 8)) 264 return true; 265 return false; 266 } 267 268 static void cxl_mem_mbox_timeout(struct cxl_mem *cxlm, 269 struct mbox_cmd *mbox_cmd) 270 { 271 struct device *dev = &cxlm->pdev->dev; 272 273 dev_dbg(dev, "Mailbox command (opcode: %#x size: %zub) timed out\n", 274 mbox_cmd->opcode, mbox_cmd->size_in); 275 } 276 277 /** 278 * __cxl_mem_mbox_send_cmd() - Execute a mailbox command 279 * @cxlm: The CXL memory device to communicate with. 280 * @mbox_cmd: Command to send to the memory device. 281 * 282 * Context: Any context. Expects mbox_mutex to be held. 283 * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success. 284 * Caller should check the return code in @mbox_cmd to make sure it 285 * succeeded. 286 * 287 * This is a generic form of the CXL mailbox send command thus only using the 288 * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory 289 * devices, and perhaps other types of CXL devices may have further information 290 * available upon error conditions. Driver facilities wishing to send mailbox 291 * commands should use the wrapper command. 292 * 293 * The CXL spec allows for up to two mailboxes. The intention is for the primary 294 * mailbox to be OS controlled and the secondary mailbox to be used by system 295 * firmware. This allows the OS and firmware to communicate with the device and 296 * not need to coordinate with each other. The driver only uses the primary 297 * mailbox. 298 */ 299 static int __cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm, 300 struct mbox_cmd *mbox_cmd) 301 { 302 void __iomem *payload = cxlm->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET; 303 u64 cmd_reg, status_reg; 304 size_t out_len; 305 int rc; 306 307 lockdep_assert_held(&cxlm->mbox_mutex); 308 309 /* 310 * Here are the steps from 8.2.8.4 of the CXL 2.0 spec. 311 * 1. Caller reads MB Control Register to verify doorbell is clear 312 * 2. Caller writes Command Register 313 * 3. Caller writes Command Payload Registers if input payload is non-empty 314 * 4. Caller writes MB Control Register to set doorbell 315 * 5. Caller either polls for doorbell to be clear or waits for interrupt if configured 316 * 6. Caller reads MB Status Register to fetch Return code 317 * 7. If command successful, Caller reads Command Register to get Payload Length 318 * 8. If output payload is non-empty, host reads Command Payload Registers 319 * 320 * Hardware is free to do whatever it wants before the doorbell is rung, 321 * and isn't allowed to change anything after it clears the doorbell. As 322 * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can 323 * also happen in any order (though some orders might not make sense). 324 */ 325 326 /* #1 */ 327 if (cxl_doorbell_busy(cxlm)) { 328 dev_err_ratelimited(&cxlm->pdev->dev, 329 "Mailbox re-busy after acquiring\n"); 330 return -EBUSY; 331 } 332 333 cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK, 334 mbox_cmd->opcode); 335 if (mbox_cmd->size_in) { 336 if (WARN_ON(!mbox_cmd->payload_in)) 337 return -EINVAL; 338 339 cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, 340 mbox_cmd->size_in); 341 memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in); 342 } 343 344 /* #2, #3 */ 345 writeq(cmd_reg, cxlm->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); 346 347 /* #4 */ 348 dev_dbg(&cxlm->pdev->dev, "Sending command\n"); 349 writel(CXLDEV_MBOX_CTRL_DOORBELL, 350 cxlm->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); 351 352 /* #5 */ 353 rc = cxl_mem_wait_for_doorbell(cxlm); 354 if (rc == -ETIMEDOUT) { 355 cxl_mem_mbox_timeout(cxlm, mbox_cmd); 356 return rc; 357 } 358 359 /* #6 */ 360 status_reg = readq(cxlm->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET); 361 mbox_cmd->return_code = 362 FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg); 363 364 if (mbox_cmd->return_code != 0) { 365 dev_dbg(&cxlm->pdev->dev, "Mailbox operation had an error\n"); 366 return 0; 367 } 368 369 /* #7 */ 370 cmd_reg = readq(cxlm->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); 371 out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg); 372 373 /* #8 */ 374 if (out_len && mbox_cmd->payload_out) { 375 /* 376 * Sanitize the copy. If hardware misbehaves, out_len per the 377 * spec can actually be greater than the max allowed size (21 378 * bits available but spec defined 1M max). The caller also may 379 * have requested less data than the hardware supplied even 380 * within spec. 381 */ 382 size_t n = min3(mbox_cmd->size_out, cxlm->payload_size, out_len); 383 384 memcpy_fromio(mbox_cmd->payload_out, payload, n); 385 mbox_cmd->size_out = n; 386 } else { 387 mbox_cmd->size_out = 0; 388 } 389 390 return 0; 391 } 392 393 /** 394 * cxl_mem_mbox_get() - Acquire exclusive access to the mailbox. 395 * @cxlm: The memory device to gain access to. 396 * 397 * Context: Any context. Takes the mbox_mutex. 398 * Return: 0 if exclusive access was acquired. 399 */ 400 static int cxl_mem_mbox_get(struct cxl_mem *cxlm) 401 { 402 struct device *dev = &cxlm->pdev->dev; 403 u64 md_status; 404 int rc; 405 406 mutex_lock_io(&cxlm->mbox_mutex); 407 408 /* 409 * XXX: There is some amount of ambiguity in the 2.0 version of the spec 410 * around the mailbox interface ready (8.2.8.5.1.1). The purpose of the 411 * bit is to allow firmware running on the device to notify the driver 412 * that it's ready to receive commands. It is unclear if the bit needs 413 * to be read for each transaction mailbox, ie. the firmware can switch 414 * it on and off as needed. Second, there is no defined timeout for 415 * mailbox ready, like there is for the doorbell interface. 416 * 417 * Assumptions: 418 * 1. The firmware might toggle the Mailbox Interface Ready bit, check 419 * it for every command. 420 * 421 * 2. If the doorbell is clear, the firmware should have first set the 422 * Mailbox Interface Ready bit. Therefore, waiting for the doorbell 423 * to be ready is sufficient. 424 */ 425 rc = cxl_mem_wait_for_doorbell(cxlm); 426 if (rc) { 427 dev_warn(dev, "Mailbox interface not ready\n"); 428 goto out; 429 } 430 431 md_status = readq(cxlm->regs.memdev + CXLMDEV_STATUS_OFFSET); 432 if (!(md_status & CXLMDEV_MBOX_IF_READY && CXLMDEV_READY(md_status))) { 433 dev_err(dev, "mbox: reported doorbell ready, but not mbox ready\n"); 434 rc = -EBUSY; 435 goto out; 436 } 437 438 /* 439 * Hardware shouldn't allow a ready status but also have failure bits 440 * set. Spit out an error, this should be a bug report 441 */ 442 rc = -EFAULT; 443 if (md_status & CXLMDEV_DEV_FATAL) { 444 dev_err(dev, "mbox: reported ready, but fatal\n"); 445 goto out; 446 } 447 if (md_status & CXLMDEV_FW_HALT) { 448 dev_err(dev, "mbox: reported ready, but halted\n"); 449 goto out; 450 } 451 if (CXLMDEV_RESET_NEEDED(md_status)) { 452 dev_err(dev, "mbox: reported ready, but reset needed\n"); 453 goto out; 454 } 455 456 /* with lock held */ 457 return 0; 458 459 out: 460 mutex_unlock(&cxlm->mbox_mutex); 461 return rc; 462 } 463 464 /** 465 * cxl_mem_mbox_put() - Release exclusive access to the mailbox. 466 * @cxlm: The CXL memory device to communicate with. 467 * 468 * Context: Any context. Expects mbox_mutex to be held. 469 */ 470 static void cxl_mem_mbox_put(struct cxl_mem *cxlm) 471 { 472 mutex_unlock(&cxlm->mbox_mutex); 473 } 474 475 /** 476 * handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace. 477 * @cxlm: The CXL memory device to communicate with. 478 * @cmd: The validated command. 479 * @in_payload: Pointer to userspace's input payload. 480 * @out_payload: Pointer to userspace's output payload. 481 * @size_out: (Input) Max payload size to copy out. 482 * (Output) Payload size hardware generated. 483 * @retval: Hardware generated return code from the operation. 484 * 485 * Return: 486 * * %0 - Mailbox transaction succeeded. This implies the mailbox 487 * protocol completed successfully not that the operation itself 488 * was successful. 489 * * %-ENOMEM - Couldn't allocate a bounce buffer. 490 * * %-EFAULT - Something happened with copy_to/from_user. 491 * * %-EINTR - Mailbox acquisition interrupted. 492 * * %-EXXX - Transaction level failures. 493 * 494 * Creates the appropriate mailbox command and dispatches it on behalf of a 495 * userspace request. The input and output payloads are copied between 496 * userspace. 497 * 498 * See cxl_send_cmd(). 499 */ 500 static int handle_mailbox_cmd_from_user(struct cxl_mem *cxlm, 501 const struct cxl_mem_command *cmd, 502 u64 in_payload, u64 out_payload, 503 s32 *size_out, u32 *retval) 504 { 505 struct device *dev = &cxlm->pdev->dev; 506 struct mbox_cmd mbox_cmd = { 507 .opcode = cmd->opcode, 508 .size_in = cmd->info.size_in, 509 .size_out = cmd->info.size_out, 510 }; 511 int rc; 512 513 if (cmd->info.size_out) { 514 mbox_cmd.payload_out = kvzalloc(cmd->info.size_out, GFP_KERNEL); 515 if (!mbox_cmd.payload_out) 516 return -ENOMEM; 517 } 518 519 if (cmd->info.size_in) { 520 mbox_cmd.payload_in = vmemdup_user(u64_to_user_ptr(in_payload), 521 cmd->info.size_in); 522 if (IS_ERR(mbox_cmd.payload_in)) { 523 kvfree(mbox_cmd.payload_out); 524 return PTR_ERR(mbox_cmd.payload_in); 525 } 526 } 527 528 rc = cxl_mem_mbox_get(cxlm); 529 if (rc) 530 goto out; 531 532 dev_dbg(dev, 533 "Submitting %s command for user\n" 534 "\topcode: %x\n" 535 "\tsize: %ub\n", 536 cxl_command_names[cmd->info.id].name, mbox_cmd.opcode, 537 cmd->info.size_in); 538 539 dev_WARN_ONCE(dev, cmd->info.id == CXL_MEM_COMMAND_ID_RAW, 540 "raw command path used\n"); 541 542 rc = __cxl_mem_mbox_send_cmd(cxlm, &mbox_cmd); 543 cxl_mem_mbox_put(cxlm); 544 if (rc) 545 goto out; 546 547 /* 548 * @size_out contains the max size that's allowed to be written back out 549 * to userspace. While the payload may have written more output than 550 * this it will have to be ignored. 551 */ 552 if (mbox_cmd.size_out) { 553 dev_WARN_ONCE(dev, mbox_cmd.size_out > *size_out, 554 "Invalid return size\n"); 555 if (copy_to_user(u64_to_user_ptr(out_payload), 556 mbox_cmd.payload_out, mbox_cmd.size_out)) { 557 rc = -EFAULT; 558 goto out; 559 } 560 } 561 562 *size_out = mbox_cmd.size_out; 563 *retval = mbox_cmd.return_code; 564 565 out: 566 kvfree(mbox_cmd.payload_in); 567 kvfree(mbox_cmd.payload_out); 568 return rc; 569 } 570 571 static bool cxl_mem_raw_command_allowed(u16 opcode) 572 { 573 int i; 574 575 if (!IS_ENABLED(CONFIG_CXL_MEM_RAW_COMMANDS)) 576 return false; 577 578 if (security_locked_down(LOCKDOWN_PCI_ACCESS)) 579 return false; 580 581 if (cxl_raw_allow_all) 582 return true; 583 584 if (cxl_is_security_command(opcode)) 585 return false; 586 587 for (i = 0; i < ARRAY_SIZE(cxl_disabled_raw_commands); i++) 588 if (cxl_disabled_raw_commands[i] == opcode) 589 return false; 590 591 return true; 592 } 593 594 /** 595 * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND. 596 * @cxlm: &struct cxl_mem device whose mailbox will be used. 597 * @send_cmd: &struct cxl_send_command copied in from userspace. 598 * @out_cmd: Sanitized and populated &struct cxl_mem_command. 599 * 600 * Return: 601 * * %0 - @out_cmd is ready to send. 602 * * %-ENOTTY - Invalid command specified. 603 * * %-EINVAL - Reserved fields or invalid values were used. 604 * * %-ENOMEM - Input or output buffer wasn't sized properly. 605 * * %-EPERM - Attempted to use a protected command. 606 * 607 * The result of this command is a fully validated command in @out_cmd that is 608 * safe to send to the hardware. 609 * 610 * See handle_mailbox_cmd_from_user() 611 */ 612 static int cxl_validate_cmd_from_user(struct cxl_mem *cxlm, 613 const struct cxl_send_command *send_cmd, 614 struct cxl_mem_command *out_cmd) 615 { 616 const struct cxl_command_info *info; 617 struct cxl_mem_command *c; 618 619 if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX) 620 return -ENOTTY; 621 622 /* 623 * The user can never specify an input payload larger than what hardware 624 * supports, but output can be arbitrarily large (simply write out as 625 * much data as the hardware provides). 626 */ 627 if (send_cmd->in.size > cxlm->payload_size) 628 return -EINVAL; 629 630 /* 631 * Checks are bypassed for raw commands but a WARN/taint will occur 632 * later in the callchain 633 */ 634 if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) { 635 const struct cxl_mem_command temp = { 636 .info = { 637 .id = CXL_MEM_COMMAND_ID_RAW, 638 .flags = 0, 639 .size_in = send_cmd->in.size, 640 .size_out = send_cmd->out.size, 641 }, 642 .opcode = send_cmd->raw.opcode 643 }; 644 645 if (send_cmd->raw.rsvd) 646 return -EINVAL; 647 648 /* 649 * Unlike supported commands, the output size of RAW commands 650 * gets passed along without further checking, so it must be 651 * validated here. 652 */ 653 if (send_cmd->out.size > cxlm->payload_size) 654 return -EINVAL; 655 656 if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode)) 657 return -EPERM; 658 659 memcpy(out_cmd, &temp, sizeof(temp)); 660 661 return 0; 662 } 663 664 if (send_cmd->flags & ~CXL_MEM_COMMAND_FLAG_MASK) 665 return -EINVAL; 666 667 if (send_cmd->rsvd) 668 return -EINVAL; 669 670 if (send_cmd->in.rsvd || send_cmd->out.rsvd) 671 return -EINVAL; 672 673 /* Convert user's command into the internal representation */ 674 c = &mem_commands[send_cmd->id]; 675 info = &c->info; 676 677 /* Check that the command is enabled for hardware */ 678 if (!test_bit(info->id, cxlm->enabled_cmds)) 679 return -ENOTTY; 680 681 /* Check the input buffer is the expected size */ 682 if (info->size_in >= 0 && info->size_in != send_cmd->in.size) 683 return -ENOMEM; 684 685 /* Check the output buffer is at least large enough */ 686 if (info->size_out >= 0 && send_cmd->out.size < info->size_out) 687 return -ENOMEM; 688 689 memcpy(out_cmd, c, sizeof(*c)); 690 out_cmd->info.size_in = send_cmd->in.size; 691 /* 692 * XXX: out_cmd->info.size_out will be controlled by the driver, and the 693 * specified number of bytes @send_cmd->out.size will be copied back out 694 * to userspace. 695 */ 696 697 return 0; 698 } 699 700 static int cxl_query_cmd(struct cxl_memdev *cxlmd, 701 struct cxl_mem_query_commands __user *q) 702 { 703 struct device *dev = &cxlmd->dev; 704 struct cxl_mem_command *cmd; 705 u32 n_commands; 706 int j = 0; 707 708 dev_dbg(dev, "Query IOCTL\n"); 709 710 if (get_user(n_commands, &q->n_commands)) 711 return -EFAULT; 712 713 /* returns the total number if 0 elements are requested. */ 714 if (n_commands == 0) 715 return put_user(cxl_cmd_count, &q->n_commands); 716 717 /* 718 * otherwise, return max(n_commands, total commands) cxl_command_info 719 * structures. 720 */ 721 cxl_for_each_cmd(cmd) { 722 const struct cxl_command_info *info = &cmd->info; 723 724 if (copy_to_user(&q->commands[j++], info, sizeof(*info))) 725 return -EFAULT; 726 727 if (j == n_commands) 728 break; 729 } 730 731 return 0; 732 } 733 734 static int cxl_send_cmd(struct cxl_memdev *cxlmd, 735 struct cxl_send_command __user *s) 736 { 737 struct cxl_mem *cxlm = cxlmd->cxlm; 738 struct device *dev = &cxlmd->dev; 739 struct cxl_send_command send; 740 struct cxl_mem_command c; 741 int rc; 742 743 dev_dbg(dev, "Send IOCTL\n"); 744 745 if (copy_from_user(&send, s, sizeof(send))) 746 return -EFAULT; 747 748 rc = cxl_validate_cmd_from_user(cxlmd->cxlm, &send, &c); 749 if (rc) 750 return rc; 751 752 /* Prepare to handle a full payload for variable sized output */ 753 if (c.info.size_out < 0) 754 c.info.size_out = cxlm->payload_size; 755 756 rc = handle_mailbox_cmd_from_user(cxlm, &c, send.in.payload, 757 send.out.payload, &send.out.size, 758 &send.retval); 759 if (rc) 760 return rc; 761 762 if (copy_to_user(s, &send, sizeof(send))) 763 return -EFAULT; 764 765 return 0; 766 } 767 768 static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd, 769 unsigned long arg) 770 { 771 switch (cmd) { 772 case CXL_MEM_QUERY_COMMANDS: 773 return cxl_query_cmd(cxlmd, (void __user *)arg); 774 case CXL_MEM_SEND_COMMAND: 775 return cxl_send_cmd(cxlmd, (void __user *)arg); 776 default: 777 return -ENOTTY; 778 } 779 } 780 781 static long cxl_memdev_ioctl(struct file *file, unsigned int cmd, 782 unsigned long arg) 783 { 784 struct cxl_memdev *cxlmd = file->private_data; 785 int rc = -ENXIO; 786 787 down_read(&cxl_memdev_rwsem); 788 if (cxlmd->cxlm) 789 rc = __cxl_memdev_ioctl(cxlmd, cmd, arg); 790 up_read(&cxl_memdev_rwsem); 791 792 return rc; 793 } 794 795 static int cxl_memdev_open(struct inode *inode, struct file *file) 796 { 797 struct cxl_memdev *cxlmd = 798 container_of(inode->i_cdev, typeof(*cxlmd), cdev); 799 800 get_device(&cxlmd->dev); 801 file->private_data = cxlmd; 802 803 return 0; 804 } 805 806 static int cxl_memdev_release_file(struct inode *inode, struct file *file) 807 { 808 struct cxl_memdev *cxlmd = 809 container_of(inode->i_cdev, typeof(*cxlmd), cdev); 810 811 put_device(&cxlmd->dev); 812 813 return 0; 814 } 815 816 static void cxl_memdev_shutdown(struct device *dev) 817 { 818 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 819 820 down_write(&cxl_memdev_rwsem); 821 cxlmd->cxlm = NULL; 822 up_write(&cxl_memdev_rwsem); 823 } 824 825 static const struct cdevm_file_operations cxl_memdev_fops = { 826 .fops = { 827 .owner = THIS_MODULE, 828 .unlocked_ioctl = cxl_memdev_ioctl, 829 .open = cxl_memdev_open, 830 .release = cxl_memdev_release_file, 831 .compat_ioctl = compat_ptr_ioctl, 832 .llseek = noop_llseek, 833 }, 834 .shutdown = cxl_memdev_shutdown, 835 }; 836 837 static inline struct cxl_mem_command *cxl_mem_find_command(u16 opcode) 838 { 839 struct cxl_mem_command *c; 840 841 cxl_for_each_cmd(c) 842 if (c->opcode == opcode) 843 return c; 844 845 return NULL; 846 } 847 848 /** 849 * cxl_mem_mbox_send_cmd() - Send a mailbox command to a memory device. 850 * @cxlm: The CXL memory device to communicate with. 851 * @opcode: Opcode for the mailbox command. 852 * @in: The input payload for the mailbox command. 853 * @in_size: The length of the input payload 854 * @out: Caller allocated buffer for the output. 855 * @out_size: Expected size of output. 856 * 857 * Context: Any context. Will acquire and release mbox_mutex. 858 * Return: 859 * * %>=0 - Number of bytes returned in @out. 860 * * %-E2BIG - Payload is too large for hardware. 861 * * %-EBUSY - Couldn't acquire exclusive mailbox access. 862 * * %-EFAULT - Hardware error occurred. 863 * * %-ENXIO - Command completed, but device reported an error. 864 * * %-EIO - Unexpected output size. 865 * 866 * Mailbox commands may execute successfully yet the device itself reported an 867 * error. While this distinction can be useful for commands from userspace, the 868 * kernel will only be able to use results when both are successful. 869 * 870 * See __cxl_mem_mbox_send_cmd() 871 */ 872 static int cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm, u16 opcode, 873 void *in, size_t in_size, 874 void *out, size_t out_size) 875 { 876 const struct cxl_mem_command *cmd = cxl_mem_find_command(opcode); 877 struct mbox_cmd mbox_cmd = { 878 .opcode = opcode, 879 .payload_in = in, 880 .size_in = in_size, 881 .size_out = out_size, 882 .payload_out = out, 883 }; 884 int rc; 885 886 if (out_size > cxlm->payload_size) 887 return -E2BIG; 888 889 rc = cxl_mem_mbox_get(cxlm); 890 if (rc) 891 return rc; 892 893 rc = __cxl_mem_mbox_send_cmd(cxlm, &mbox_cmd); 894 cxl_mem_mbox_put(cxlm); 895 if (rc) 896 return rc; 897 898 /* TODO: Map return code to proper kernel style errno */ 899 if (mbox_cmd.return_code != CXL_MBOX_SUCCESS) 900 return -ENXIO; 901 902 /* 903 * Variable sized commands can't be validated and so it's up to the 904 * caller to do that if they wish. 905 */ 906 if (cmd->info.size_out >= 0 && mbox_cmd.size_out != out_size) 907 return -EIO; 908 909 return 0; 910 } 911 912 static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm) 913 { 914 const int cap = readl(cxlm->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET); 915 916 cxlm->payload_size = 917 1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap); 918 919 /* 920 * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register 921 * 922 * If the size is too small, mandatory commands will not work and so 923 * there's no point in going forward. If the size is too large, there's 924 * no harm is soft limiting it. 925 */ 926 cxlm->payload_size = min_t(size_t, cxlm->payload_size, SZ_1M); 927 if (cxlm->payload_size < 256) { 928 dev_err(&cxlm->pdev->dev, "Mailbox is too small (%zub)", 929 cxlm->payload_size); 930 return -ENXIO; 931 } 932 933 dev_dbg(&cxlm->pdev->dev, "Mailbox payload sized %zu", 934 cxlm->payload_size); 935 936 return 0; 937 } 938 939 static struct cxl_mem *cxl_mem_create(struct pci_dev *pdev) 940 { 941 struct device *dev = &pdev->dev; 942 struct cxl_mem *cxlm; 943 944 cxlm = devm_kzalloc(dev, sizeof(*cxlm), GFP_KERNEL); 945 if (!cxlm) { 946 dev_err(dev, "No memory available\n"); 947 return ERR_PTR(-ENOMEM); 948 } 949 950 mutex_init(&cxlm->mbox_mutex); 951 cxlm->pdev = pdev; 952 cxlm->enabled_cmds = 953 devm_kmalloc_array(dev, BITS_TO_LONGS(cxl_cmd_count), 954 sizeof(unsigned long), 955 GFP_KERNEL | __GFP_ZERO); 956 if (!cxlm->enabled_cmds) { 957 dev_err(dev, "No memory available for bitmap\n"); 958 return ERR_PTR(-ENOMEM); 959 } 960 961 return cxlm; 962 } 963 964 static void __iomem *cxl_mem_map_regblock(struct cxl_mem *cxlm, 965 u8 bar, u64 offset) 966 { 967 struct pci_dev *pdev = cxlm->pdev; 968 struct device *dev = &pdev->dev; 969 void __iomem *addr; 970 971 /* Basic sanity check that BAR is big enough */ 972 if (pci_resource_len(pdev, bar) < offset) { 973 dev_err(dev, "BAR%d: %pr: too small (offset: %#llx)\n", bar, 974 &pdev->resource[bar], (unsigned long long)offset); 975 return IOMEM_ERR_PTR(-ENXIO); 976 } 977 978 addr = pci_iomap(pdev, bar, 0); 979 if (!addr) { 980 dev_err(dev, "failed to map registers\n"); 981 return addr; 982 } 983 984 dev_dbg(dev, "Mapped CXL Memory Device resource bar %u @ %#llx\n", 985 bar, offset); 986 987 return addr; 988 } 989 990 static void cxl_mem_unmap_regblock(struct cxl_mem *cxlm, void __iomem *base) 991 { 992 pci_iounmap(cxlm->pdev, base); 993 } 994 995 static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec) 996 { 997 int pos; 998 999 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DVSEC); 1000 if (!pos) 1001 return 0; 1002 1003 while (pos) { 1004 u16 vendor, id; 1005 1006 pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vendor); 1007 pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, &id); 1008 if (vendor == PCI_DVSEC_VENDOR_ID_CXL && dvsec == id) 1009 return pos; 1010 1011 pos = pci_find_next_ext_capability(pdev, pos, 1012 PCI_EXT_CAP_ID_DVSEC); 1013 } 1014 1015 return 0; 1016 } 1017 1018 static int cxl_probe_regs(struct cxl_mem *cxlm, void __iomem *base, 1019 struct cxl_register_map *map) 1020 { 1021 struct pci_dev *pdev = cxlm->pdev; 1022 struct device *dev = &pdev->dev; 1023 struct cxl_component_reg_map *comp_map; 1024 struct cxl_device_reg_map *dev_map; 1025 1026 switch (map->reg_type) { 1027 case CXL_REGLOC_RBI_COMPONENT: 1028 comp_map = &map->component_map; 1029 cxl_probe_component_regs(dev, base, comp_map); 1030 if (!comp_map->hdm_decoder.valid) { 1031 dev_err(dev, "HDM decoder registers not found\n"); 1032 return -ENXIO; 1033 } 1034 1035 dev_dbg(dev, "Set up component registers\n"); 1036 break; 1037 case CXL_REGLOC_RBI_MEMDEV: 1038 dev_map = &map->device_map; 1039 cxl_probe_device_regs(dev, base, dev_map); 1040 if (!dev_map->status.valid || !dev_map->mbox.valid || 1041 !dev_map->memdev.valid) { 1042 dev_err(dev, "registers not found: %s%s%s\n", 1043 !dev_map->status.valid ? "status " : "", 1044 !dev_map->mbox.valid ? "mbox " : "", 1045 !dev_map->memdev.valid ? "memdev " : ""); 1046 return -ENXIO; 1047 } 1048 1049 dev_dbg(dev, "Probing device registers...\n"); 1050 break; 1051 default: 1052 break; 1053 } 1054 1055 return 0; 1056 } 1057 1058 static int cxl_map_regs(struct cxl_mem *cxlm, struct cxl_register_map *map) 1059 { 1060 struct pci_dev *pdev = cxlm->pdev; 1061 struct device *dev = &pdev->dev; 1062 1063 switch (map->reg_type) { 1064 case CXL_REGLOC_RBI_COMPONENT: 1065 cxl_map_component_regs(pdev, &cxlm->regs.component, map); 1066 dev_dbg(dev, "Mapping component registers...\n"); 1067 break; 1068 case CXL_REGLOC_RBI_MEMDEV: 1069 cxl_map_device_regs(pdev, &cxlm->regs.device_regs, map); 1070 dev_dbg(dev, "Probing device registers...\n"); 1071 break; 1072 default: 1073 break; 1074 } 1075 1076 return 0; 1077 } 1078 1079 static void cxl_decode_register_block(u32 reg_lo, u32 reg_hi, 1080 u8 *bar, u64 *offset, u8 *reg_type) 1081 { 1082 *offset = ((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK); 1083 *bar = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo); 1084 *reg_type = FIELD_GET(CXL_REGLOC_RBI_MASK, reg_lo); 1085 } 1086 1087 /** 1088 * cxl_mem_setup_regs() - Setup necessary MMIO. 1089 * @cxlm: The CXL memory device to communicate with. 1090 * 1091 * Return: 0 if all necessary registers mapped. 1092 * 1093 * A memory device is required by spec to implement a certain set of MMIO 1094 * regions. The purpose of this function is to enumerate and map those 1095 * registers. 1096 */ 1097 static int cxl_mem_setup_regs(struct cxl_mem *cxlm) 1098 { 1099 struct pci_dev *pdev = cxlm->pdev; 1100 struct device *dev = &pdev->dev; 1101 u32 regloc_size, regblocks; 1102 void __iomem *base; 1103 int regloc, i, n_maps; 1104 struct cxl_register_map *map, maps[CXL_REGLOC_RBI_TYPES]; 1105 int ret = 0; 1106 1107 regloc = cxl_mem_dvsec(pdev, PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID); 1108 if (!regloc) { 1109 dev_err(dev, "register location dvsec not found\n"); 1110 return -ENXIO; 1111 } 1112 1113 if (pci_request_mem_regions(pdev, pci_name(pdev))) 1114 return -ENODEV; 1115 1116 /* Get the size of the Register Locator DVSEC */ 1117 pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, ®loc_size); 1118 regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size); 1119 1120 regloc += PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET; 1121 regblocks = (regloc_size - PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET) / 8; 1122 1123 for (i = 0, n_maps = 0; i < regblocks; i++, regloc += 8) { 1124 u32 reg_lo, reg_hi; 1125 u8 reg_type; 1126 u64 offset; 1127 u8 bar; 1128 1129 pci_read_config_dword(pdev, regloc, ®_lo); 1130 pci_read_config_dword(pdev, regloc + 4, ®_hi); 1131 1132 cxl_decode_register_block(reg_lo, reg_hi, &bar, &offset, 1133 ®_type); 1134 1135 dev_dbg(dev, "Found register block in bar %u @ 0x%llx of type %u\n", 1136 bar, offset, reg_type); 1137 1138 /* Ignore unknown register block types */ 1139 if (reg_type > CXL_REGLOC_RBI_MEMDEV) 1140 continue; 1141 1142 base = cxl_mem_map_regblock(cxlm, bar, offset); 1143 if (!base) 1144 return -ENOMEM; 1145 1146 map = &maps[n_maps]; 1147 map->barno = bar; 1148 map->block_offset = offset; 1149 map->reg_type = reg_type; 1150 1151 ret = cxl_probe_regs(cxlm, base + offset, map); 1152 1153 /* Always unmap the regblock regardless of probe success */ 1154 cxl_mem_unmap_regblock(cxlm, base); 1155 1156 if (ret) 1157 return ret; 1158 1159 n_maps++; 1160 } 1161 1162 pci_release_mem_regions(pdev); 1163 1164 for (i = 0; i < n_maps; i++) { 1165 ret = cxl_map_regs(cxlm, &maps[i]); 1166 if (ret) 1167 break; 1168 } 1169 1170 return ret; 1171 } 1172 1173 static int cxl_xfer_log(struct cxl_mem *cxlm, uuid_t *uuid, u32 size, u8 *out) 1174 { 1175 u32 remaining = size; 1176 u32 offset = 0; 1177 1178 while (remaining) { 1179 u32 xfer_size = min_t(u32, remaining, cxlm->payload_size); 1180 struct cxl_mbox_get_log { 1181 uuid_t uuid; 1182 __le32 offset; 1183 __le32 length; 1184 } __packed log = { 1185 .uuid = *uuid, 1186 .offset = cpu_to_le32(offset), 1187 .length = cpu_to_le32(xfer_size) 1188 }; 1189 int rc; 1190 1191 rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_LOG, &log, 1192 sizeof(log), out, xfer_size); 1193 if (rc < 0) 1194 return rc; 1195 1196 out += xfer_size; 1197 remaining -= xfer_size; 1198 offset += xfer_size; 1199 } 1200 1201 return 0; 1202 } 1203 1204 /** 1205 * cxl_walk_cel() - Walk through the Command Effects Log. 1206 * @cxlm: Device. 1207 * @size: Length of the Command Effects Log. 1208 * @cel: CEL 1209 * 1210 * Iterate over each entry in the CEL and determine if the driver supports the 1211 * command. If so, the command is enabled for the device and can be used later. 1212 */ 1213 static void cxl_walk_cel(struct cxl_mem *cxlm, size_t size, u8 *cel) 1214 { 1215 struct cel_entry { 1216 __le16 opcode; 1217 __le16 effect; 1218 } __packed * cel_entry; 1219 const int cel_entries = size / sizeof(*cel_entry); 1220 int i; 1221 1222 cel_entry = (struct cel_entry *)cel; 1223 1224 for (i = 0; i < cel_entries; i++) { 1225 u16 opcode = le16_to_cpu(cel_entry[i].opcode); 1226 struct cxl_mem_command *cmd = cxl_mem_find_command(opcode); 1227 1228 if (!cmd) { 1229 dev_dbg(&cxlm->pdev->dev, 1230 "Opcode 0x%04x unsupported by driver", opcode); 1231 continue; 1232 } 1233 1234 set_bit(cmd->info.id, cxlm->enabled_cmds); 1235 } 1236 } 1237 1238 struct cxl_mbox_get_supported_logs { 1239 __le16 entries; 1240 u8 rsvd[6]; 1241 struct gsl_entry { 1242 uuid_t uuid; 1243 __le32 size; 1244 } __packed entry[]; 1245 } __packed; 1246 1247 static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_mem *cxlm) 1248 { 1249 struct cxl_mbox_get_supported_logs *ret; 1250 int rc; 1251 1252 ret = kvmalloc(cxlm->payload_size, GFP_KERNEL); 1253 if (!ret) 1254 return ERR_PTR(-ENOMEM); 1255 1256 rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_SUPPORTED_LOGS, NULL, 1257 0, ret, cxlm->payload_size); 1258 if (rc < 0) { 1259 kvfree(ret); 1260 return ERR_PTR(rc); 1261 } 1262 1263 return ret; 1264 } 1265 1266 /** 1267 * cxl_mem_get_partition_info - Get partition info 1268 * @cxlm: The device to act on 1269 * @active_volatile_bytes: returned active volatile capacity 1270 * @active_persistent_bytes: returned active persistent capacity 1271 * @next_volatile_bytes: return next volatile capacity 1272 * @next_persistent_bytes: return next persistent capacity 1273 * 1274 * Retrieve the current partition info for the device specified. If not 0, the 1275 * 'next' values are pending and take affect on next cold reset. 1276 * 1277 * Return: 0 if no error: or the result of the mailbox command. 1278 * 1279 * See CXL @8.2.9.5.2.1 Get Partition Info 1280 */ 1281 static int cxl_mem_get_partition_info(struct cxl_mem *cxlm, 1282 u64 *active_volatile_bytes, 1283 u64 *active_persistent_bytes, 1284 u64 *next_volatile_bytes, 1285 u64 *next_persistent_bytes) 1286 { 1287 struct cxl_mbox_get_partition_info { 1288 __le64 active_volatile_cap; 1289 __le64 active_persistent_cap; 1290 __le64 next_volatile_cap; 1291 __le64 next_persistent_cap; 1292 } __packed pi; 1293 int rc; 1294 1295 rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_PARTITION_INFO, 1296 NULL, 0, &pi, sizeof(pi)); 1297 if (rc) 1298 return rc; 1299 1300 *active_volatile_bytes = le64_to_cpu(pi.active_volatile_cap); 1301 *active_persistent_bytes = le64_to_cpu(pi.active_persistent_cap); 1302 *next_volatile_bytes = le64_to_cpu(pi.next_volatile_cap); 1303 *next_persistent_bytes = le64_to_cpu(pi.next_volatile_cap); 1304 1305 *active_volatile_bytes *= CXL_CAPACITY_MULTIPLIER; 1306 *active_persistent_bytes *= CXL_CAPACITY_MULTIPLIER; 1307 *next_volatile_bytes *= CXL_CAPACITY_MULTIPLIER; 1308 *next_persistent_bytes *= CXL_CAPACITY_MULTIPLIER; 1309 1310 return 0; 1311 } 1312 1313 /** 1314 * cxl_mem_enumerate_cmds() - Enumerate commands for a device. 1315 * @cxlm: The device. 1316 * 1317 * Returns 0 if enumerate completed successfully. 1318 * 1319 * CXL devices have optional support for certain commands. This function will 1320 * determine the set of supported commands for the hardware and update the 1321 * enabled_cmds bitmap in the @cxlm. 1322 */ 1323 static int cxl_mem_enumerate_cmds(struct cxl_mem *cxlm) 1324 { 1325 struct cxl_mbox_get_supported_logs *gsl; 1326 struct device *dev = &cxlm->pdev->dev; 1327 struct cxl_mem_command *cmd; 1328 int i, rc; 1329 1330 gsl = cxl_get_gsl(cxlm); 1331 if (IS_ERR(gsl)) 1332 return PTR_ERR(gsl); 1333 1334 rc = -ENOENT; 1335 for (i = 0; i < le16_to_cpu(gsl->entries); i++) { 1336 u32 size = le32_to_cpu(gsl->entry[i].size); 1337 uuid_t uuid = gsl->entry[i].uuid; 1338 u8 *log; 1339 1340 dev_dbg(dev, "Found LOG type %pU of size %d", &uuid, size); 1341 1342 if (!uuid_equal(&uuid, &log_uuid[CEL_UUID])) 1343 continue; 1344 1345 log = kvmalloc(size, GFP_KERNEL); 1346 if (!log) { 1347 rc = -ENOMEM; 1348 goto out; 1349 } 1350 1351 rc = cxl_xfer_log(cxlm, &uuid, size, log); 1352 if (rc) { 1353 kvfree(log); 1354 goto out; 1355 } 1356 1357 cxl_walk_cel(cxlm, size, log); 1358 kvfree(log); 1359 1360 /* In case CEL was bogus, enable some default commands. */ 1361 cxl_for_each_cmd(cmd) 1362 if (cmd->flags & CXL_CMD_FLAG_FORCE_ENABLE) 1363 set_bit(cmd->info.id, cxlm->enabled_cmds); 1364 1365 /* Found the required CEL */ 1366 rc = 0; 1367 } 1368 1369 out: 1370 kvfree(gsl); 1371 return rc; 1372 } 1373 1374 /** 1375 * cxl_mem_identify() - Send the IDENTIFY command to the device. 1376 * @cxlm: The device to identify. 1377 * 1378 * Return: 0 if identify was executed successfully. 1379 * 1380 * This will dispatch the identify command to the device and on success populate 1381 * structures to be exported to sysfs. 1382 */ 1383 static int cxl_mem_identify(struct cxl_mem *cxlm) 1384 { 1385 /* See CXL 2.0 Table 175 Identify Memory Device Output Payload */ 1386 struct cxl_mbox_identify { 1387 char fw_revision[0x10]; 1388 __le64 total_capacity; 1389 __le64 volatile_capacity; 1390 __le64 persistent_capacity; 1391 __le64 partition_align; 1392 __le16 info_event_log_size; 1393 __le16 warning_event_log_size; 1394 __le16 failure_event_log_size; 1395 __le16 fatal_event_log_size; 1396 __le32 lsa_size; 1397 u8 poison_list_max_mer[3]; 1398 __le16 inject_poison_limit; 1399 u8 poison_caps; 1400 u8 qos_telemetry_caps; 1401 } __packed id; 1402 int rc; 1403 1404 rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_IDENTIFY, NULL, 0, &id, 1405 sizeof(id)); 1406 if (rc < 0) 1407 return rc; 1408 1409 cxlm->total_bytes = le64_to_cpu(id.total_capacity); 1410 cxlm->total_bytes *= CXL_CAPACITY_MULTIPLIER; 1411 1412 cxlm->volatile_only_bytes = le64_to_cpu(id.volatile_capacity); 1413 cxlm->volatile_only_bytes *= CXL_CAPACITY_MULTIPLIER; 1414 1415 cxlm->persistent_only_bytes = le64_to_cpu(id.persistent_capacity); 1416 cxlm->persistent_only_bytes *= CXL_CAPACITY_MULTIPLIER; 1417 1418 cxlm->partition_align_bytes = le64_to_cpu(id.partition_align); 1419 cxlm->partition_align_bytes *= CXL_CAPACITY_MULTIPLIER; 1420 1421 dev_dbg(&cxlm->pdev->dev, "Identify Memory Device\n" 1422 " total_bytes = %#llx\n" 1423 " volatile_only_bytes = %#llx\n" 1424 " persistent_only_bytes = %#llx\n" 1425 " partition_align_bytes = %#llx\n", 1426 cxlm->total_bytes, 1427 cxlm->volatile_only_bytes, 1428 cxlm->persistent_only_bytes, 1429 cxlm->partition_align_bytes); 1430 1431 cxlm->lsa_size = le32_to_cpu(id.lsa_size); 1432 memcpy(cxlm->firmware_version, id.fw_revision, sizeof(id.fw_revision)); 1433 1434 return 0; 1435 } 1436 1437 static int cxl_mem_create_range_info(struct cxl_mem *cxlm) 1438 { 1439 int rc; 1440 1441 if (cxlm->partition_align_bytes == 0) { 1442 cxlm->ram_range.start = 0; 1443 cxlm->ram_range.end = cxlm->volatile_only_bytes - 1; 1444 cxlm->pmem_range.start = cxlm->volatile_only_bytes; 1445 cxlm->pmem_range.end = cxlm->volatile_only_bytes + 1446 cxlm->persistent_only_bytes - 1; 1447 return 0; 1448 } 1449 1450 rc = cxl_mem_get_partition_info(cxlm, 1451 &cxlm->active_volatile_bytes, 1452 &cxlm->active_persistent_bytes, 1453 &cxlm->next_volatile_bytes, 1454 &cxlm->next_persistent_bytes); 1455 if (rc < 0) { 1456 dev_err(&cxlm->pdev->dev, "Failed to query partition information\n"); 1457 return rc; 1458 } 1459 1460 dev_dbg(&cxlm->pdev->dev, "Get Partition Info\n" 1461 " active_volatile_bytes = %#llx\n" 1462 " active_persistent_bytes = %#llx\n" 1463 " next_volatile_bytes = %#llx\n" 1464 " next_persistent_bytes = %#llx\n", 1465 cxlm->active_volatile_bytes, 1466 cxlm->active_persistent_bytes, 1467 cxlm->next_volatile_bytes, 1468 cxlm->next_persistent_bytes); 1469 1470 cxlm->ram_range.start = 0; 1471 cxlm->ram_range.end = cxlm->active_volatile_bytes - 1; 1472 1473 cxlm->pmem_range.start = cxlm->active_volatile_bytes; 1474 cxlm->pmem_range.end = cxlm->active_volatile_bytes + 1475 cxlm->active_persistent_bytes - 1; 1476 1477 return 0; 1478 } 1479 1480 static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id) 1481 { 1482 struct cxl_memdev *cxlmd; 1483 struct cxl_mem *cxlm; 1484 int rc; 1485 1486 rc = pcim_enable_device(pdev); 1487 if (rc) 1488 return rc; 1489 1490 cxlm = cxl_mem_create(pdev); 1491 if (IS_ERR(cxlm)) 1492 return PTR_ERR(cxlm); 1493 1494 rc = cxl_mem_setup_regs(cxlm); 1495 if (rc) 1496 return rc; 1497 1498 rc = cxl_mem_setup_mailbox(cxlm); 1499 if (rc) 1500 return rc; 1501 1502 rc = cxl_mem_enumerate_cmds(cxlm); 1503 if (rc) 1504 return rc; 1505 1506 rc = cxl_mem_identify(cxlm); 1507 if (rc) 1508 return rc; 1509 1510 rc = cxl_mem_create_range_info(cxlm); 1511 if (rc) 1512 return rc; 1513 1514 cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlm, &cxl_memdev_fops); 1515 if (IS_ERR(cxlmd)) 1516 return PTR_ERR(cxlmd); 1517 1518 if (range_len(&cxlm->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM)) 1519 rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd); 1520 1521 return rc; 1522 } 1523 1524 static const struct pci_device_id cxl_mem_pci_tbl[] = { 1525 /* PCI class code for CXL.mem Type-3 Devices */ 1526 { PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)}, 1527 { /* terminate list */ }, 1528 }; 1529 MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl); 1530 1531 static struct pci_driver cxl_mem_driver = { 1532 .name = KBUILD_MODNAME, 1533 .id_table = cxl_mem_pci_tbl, 1534 .probe = cxl_mem_probe, 1535 .driver = { 1536 .probe_type = PROBE_PREFER_ASYNCHRONOUS, 1537 }, 1538 }; 1539 1540 static __init int cxl_mem_init(void) 1541 { 1542 struct dentry *mbox_debugfs; 1543 int rc; 1544 1545 /* Double check the anonymous union trickery in struct cxl_regs */ 1546 BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) != 1547 offsetof(struct cxl_regs, device_regs.memdev)); 1548 1549 rc = pci_register_driver(&cxl_mem_driver); 1550 if (rc) 1551 return rc; 1552 1553 cxl_debugfs = debugfs_create_dir("cxl", NULL); 1554 mbox_debugfs = debugfs_create_dir("mbox", cxl_debugfs); 1555 debugfs_create_bool("raw_allow_all", 0600, mbox_debugfs, 1556 &cxl_raw_allow_all); 1557 1558 return 0; 1559 } 1560 1561 static __exit void cxl_mem_exit(void) 1562 { 1563 debugfs_remove_recursive(cxl_debugfs); 1564 pci_unregister_driver(&cxl_mem_driver); 1565 } 1566 1567 MODULE_LICENSE("GPL v2"); 1568 module_init(cxl_mem_init); 1569 module_exit(cxl_mem_exit); 1570 MODULE_IMPORT_NS(CXL); 1571