xref: /openbmc/linux/drivers/cxl/core/memdev.c (revision 59f8d1510739e92135df62d52e8c29bc075c46ad)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2020 Intel Corporation. */
3 
4 #include <linux/device.h>
5 #include <linux/slab.h>
6 #include <linux/idr.h>
7 #include <linux/pci.h>
8 #include <cxlmem.h>
9 #include "trace.h"
10 #include "core.h"
11 
12 static DECLARE_RWSEM(cxl_memdev_rwsem);
13 
14 /*
15  * An entire PCI topology full of devices should be enough for any
16  * config
17  */
18 #define CXL_MEM_MAX_DEVS 65536
19 
20 static int cxl_mem_major;
21 static DEFINE_IDA(cxl_memdev_ida);
22 
23 static void cxl_memdev_release(struct device *dev)
24 {
25 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
26 
27 	ida_free(&cxl_memdev_ida, cxlmd->id);
28 	kfree(cxlmd);
29 }
30 
31 static char *cxl_memdev_devnode(const struct device *dev, umode_t *mode, kuid_t *uid,
32 				kgid_t *gid)
33 {
34 	return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev));
35 }
36 
37 static ssize_t firmware_version_show(struct device *dev,
38 				     struct device_attribute *attr, char *buf)
39 {
40 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
41 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
42 	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
43 
44 	return sysfs_emit(buf, "%.16s\n", mds->firmware_version);
45 }
46 static DEVICE_ATTR_RO(firmware_version);
47 
48 static ssize_t payload_max_show(struct device *dev,
49 				struct device_attribute *attr, char *buf)
50 {
51 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
52 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
53 	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
54 
55 	return sysfs_emit(buf, "%zu\n", mds->payload_size);
56 }
57 static DEVICE_ATTR_RO(payload_max);
58 
59 static ssize_t label_storage_size_show(struct device *dev,
60 				       struct device_attribute *attr, char *buf)
61 {
62 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
63 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
64 	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
65 
66 	return sysfs_emit(buf, "%zu\n", mds->lsa_size);
67 }
68 static DEVICE_ATTR_RO(label_storage_size);
69 
70 static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr,
71 			     char *buf)
72 {
73 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
74 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
75 	unsigned long long len = resource_size(&cxlds->ram_res);
76 
77 	return sysfs_emit(buf, "%#llx\n", len);
78 }
79 
80 static struct device_attribute dev_attr_ram_size =
81 	__ATTR(size, 0444, ram_size_show, NULL);
82 
83 static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
84 			      char *buf)
85 {
86 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
87 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
88 	unsigned long long len = resource_size(&cxlds->pmem_res);
89 
90 	return sysfs_emit(buf, "%#llx\n", len);
91 }
92 
93 static struct device_attribute dev_attr_pmem_size =
94 	__ATTR(size, 0444, pmem_size_show, NULL);
95 
96 static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
97 			   char *buf)
98 {
99 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
100 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
101 
102 	return sysfs_emit(buf, "%#llx\n", cxlds->serial);
103 }
104 static DEVICE_ATTR_RO(serial);
105 
106 static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr,
107 			      char *buf)
108 {
109 	return sprintf(buf, "%d\n", dev_to_node(dev));
110 }
111 static DEVICE_ATTR_RO(numa_node);
112 
113 static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd)
114 {
115 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
116 	u64 offset, length;
117 	int rc = 0;
118 
119 	/* CXL 3.0 Spec 8.2.9.8.4.1 Separate pmem and ram poison requests */
120 	if (resource_size(&cxlds->pmem_res)) {
121 		offset = cxlds->pmem_res.start;
122 		length = resource_size(&cxlds->pmem_res);
123 		rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
124 		if (rc)
125 			return rc;
126 	}
127 	if (resource_size(&cxlds->ram_res)) {
128 		offset = cxlds->ram_res.start;
129 		length = resource_size(&cxlds->ram_res);
130 		rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
131 		/*
132 		 * Invalid Physical Address is not an error for
133 		 * volatile addresses. Device support is optional.
134 		 */
135 		if (rc == -EFAULT)
136 			rc = 0;
137 	}
138 	return rc;
139 }
140 
141 int cxl_trigger_poison_list(struct cxl_memdev *cxlmd)
142 {
143 	struct cxl_port *port;
144 	int rc;
145 
146 	port = dev_get_drvdata(&cxlmd->dev);
147 	if (!port || !is_cxl_endpoint(port))
148 		return -EINVAL;
149 
150 	rc = down_read_interruptible(&cxl_dpa_rwsem);
151 	if (rc)
152 		return rc;
153 
154 	if (port->commit_end == -1) {
155 		/* No regions mapped to this memdev */
156 		rc = cxl_get_poison_by_memdev(cxlmd);
157 	} else {
158 		/* Regions mapped, collect poison by endpoint */
159 		rc =  cxl_get_poison_by_endpoint(port);
160 	}
161 	up_read(&cxl_dpa_rwsem);
162 
163 	return rc;
164 }
165 EXPORT_SYMBOL_NS_GPL(cxl_trigger_poison_list, CXL);
166 
167 struct cxl_dpa_to_region_context {
168 	struct cxl_region *cxlr;
169 	u64 dpa;
170 };
171 
172 static int __cxl_dpa_to_region(struct device *dev, void *arg)
173 {
174 	struct cxl_dpa_to_region_context *ctx = arg;
175 	struct cxl_endpoint_decoder *cxled;
176 	u64 dpa = ctx->dpa;
177 
178 	if (!is_endpoint_decoder(dev))
179 		return 0;
180 
181 	cxled = to_cxl_endpoint_decoder(dev);
182 	if (!cxled->dpa_res || !resource_size(cxled->dpa_res))
183 		return 0;
184 
185 	if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start)
186 		return 0;
187 
188 	dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa,
189 		dev_name(&cxled->cxld.region->dev));
190 
191 	ctx->cxlr = cxled->cxld.region;
192 
193 	return 1;
194 }
195 
196 static struct cxl_region *cxl_dpa_to_region(struct cxl_memdev *cxlmd, u64 dpa)
197 {
198 	struct cxl_dpa_to_region_context ctx;
199 	struct cxl_port *port;
200 
201 	ctx = (struct cxl_dpa_to_region_context) {
202 		.dpa = dpa,
203 	};
204 	port = dev_get_drvdata(&cxlmd->dev);
205 	if (port && is_cxl_endpoint(port) && port->commit_end != -1)
206 		device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region);
207 
208 	return ctx.cxlr;
209 }
210 
211 static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa)
212 {
213 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
214 
215 	if (!IS_ENABLED(CONFIG_DEBUG_FS))
216 		return 0;
217 
218 	if (!resource_size(&cxlds->dpa_res)) {
219 		dev_dbg(cxlds->dev, "device has no dpa resource\n");
220 		return -EINVAL;
221 	}
222 	if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) {
223 		dev_dbg(cxlds->dev, "dpa:0x%llx not in resource:%pR\n",
224 			dpa, &cxlds->dpa_res);
225 		return -EINVAL;
226 	}
227 	if (!IS_ALIGNED(dpa, 64)) {
228 		dev_dbg(cxlds->dev, "dpa:0x%llx is not 64-byte aligned\n", dpa);
229 		return -EINVAL;
230 	}
231 
232 	return 0;
233 }
234 
235 int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
236 {
237 	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
238 	struct cxl_mbox_inject_poison inject;
239 	struct cxl_poison_record record;
240 	struct cxl_mbox_cmd mbox_cmd;
241 	struct cxl_region *cxlr;
242 	int rc;
243 
244 	if (!IS_ENABLED(CONFIG_DEBUG_FS))
245 		return 0;
246 
247 	rc = down_read_interruptible(&cxl_dpa_rwsem);
248 	if (rc)
249 		return rc;
250 
251 	rc = cxl_validate_poison_dpa(cxlmd, dpa);
252 	if (rc)
253 		goto out;
254 
255 	inject.address = cpu_to_le64(dpa);
256 	mbox_cmd = (struct cxl_mbox_cmd) {
257 		.opcode = CXL_MBOX_OP_INJECT_POISON,
258 		.size_in = sizeof(inject),
259 		.payload_in = &inject,
260 	};
261 	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
262 	if (rc)
263 		goto out;
264 
265 	cxlr = cxl_dpa_to_region(cxlmd, dpa);
266 	if (cxlr)
267 		dev_warn_once(mds->cxlds.dev,
268 			      "poison inject dpa:%#llx region: %s\n", dpa,
269 			      dev_name(&cxlr->dev));
270 
271 	record = (struct cxl_poison_record) {
272 		.address = cpu_to_le64(dpa),
273 		.length = cpu_to_le32(1),
274 	};
275 	trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT);
276 out:
277 	up_read(&cxl_dpa_rwsem);
278 
279 	return rc;
280 }
281 EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, CXL);
282 
283 int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
284 {
285 	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
286 	struct cxl_mbox_clear_poison clear;
287 	struct cxl_poison_record record;
288 	struct cxl_mbox_cmd mbox_cmd;
289 	struct cxl_region *cxlr;
290 	int rc;
291 
292 	if (!IS_ENABLED(CONFIG_DEBUG_FS))
293 		return 0;
294 
295 	rc = down_read_interruptible(&cxl_dpa_rwsem);
296 	if (rc)
297 		return rc;
298 
299 	rc = cxl_validate_poison_dpa(cxlmd, dpa);
300 	if (rc)
301 		goto out;
302 
303 	/*
304 	 * In CXL 3.0 Spec 8.2.9.8.4.3, the Clear Poison mailbox command
305 	 * is defined to accept 64 bytes of write-data, along with the
306 	 * address to clear. This driver uses zeroes as write-data.
307 	 */
308 	clear = (struct cxl_mbox_clear_poison) {
309 		.address = cpu_to_le64(dpa)
310 	};
311 
312 	mbox_cmd = (struct cxl_mbox_cmd) {
313 		.opcode = CXL_MBOX_OP_CLEAR_POISON,
314 		.size_in = sizeof(clear),
315 		.payload_in = &clear,
316 	};
317 
318 	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
319 	if (rc)
320 		goto out;
321 
322 	cxlr = cxl_dpa_to_region(cxlmd, dpa);
323 	if (cxlr)
324 		dev_warn_once(mds->cxlds.dev,
325 			      "poison clear dpa:%#llx region: %s\n", dpa,
326 			      dev_name(&cxlr->dev));
327 
328 	record = (struct cxl_poison_record) {
329 		.address = cpu_to_le64(dpa),
330 		.length = cpu_to_le32(1),
331 	};
332 	trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR);
333 out:
334 	up_read(&cxl_dpa_rwsem);
335 
336 	return rc;
337 }
338 EXPORT_SYMBOL_NS_GPL(cxl_clear_poison, CXL);
339 
340 static struct attribute *cxl_memdev_attributes[] = {
341 	&dev_attr_serial.attr,
342 	&dev_attr_firmware_version.attr,
343 	&dev_attr_payload_max.attr,
344 	&dev_attr_label_storage_size.attr,
345 	&dev_attr_numa_node.attr,
346 	NULL,
347 };
348 
349 static struct attribute *cxl_memdev_pmem_attributes[] = {
350 	&dev_attr_pmem_size.attr,
351 	NULL,
352 };
353 
354 static struct attribute *cxl_memdev_ram_attributes[] = {
355 	&dev_attr_ram_size.attr,
356 	NULL,
357 };
358 
359 static umode_t cxl_memdev_visible(struct kobject *kobj, struct attribute *a,
360 				  int n)
361 {
362 	if (!IS_ENABLED(CONFIG_NUMA) && a == &dev_attr_numa_node.attr)
363 		return 0;
364 	return a->mode;
365 }
366 
367 static struct attribute_group cxl_memdev_attribute_group = {
368 	.attrs = cxl_memdev_attributes,
369 	.is_visible = cxl_memdev_visible,
370 };
371 
372 static struct attribute_group cxl_memdev_ram_attribute_group = {
373 	.name = "ram",
374 	.attrs = cxl_memdev_ram_attributes,
375 };
376 
377 static struct attribute_group cxl_memdev_pmem_attribute_group = {
378 	.name = "pmem",
379 	.attrs = cxl_memdev_pmem_attributes,
380 };
381 
382 static const struct attribute_group *cxl_memdev_attribute_groups[] = {
383 	&cxl_memdev_attribute_group,
384 	&cxl_memdev_ram_attribute_group,
385 	&cxl_memdev_pmem_attribute_group,
386 	NULL,
387 };
388 
389 static const struct device_type cxl_memdev_type = {
390 	.name = "cxl_memdev",
391 	.release = cxl_memdev_release,
392 	.devnode = cxl_memdev_devnode,
393 	.groups = cxl_memdev_attribute_groups,
394 };
395 
396 bool is_cxl_memdev(const struct device *dev)
397 {
398 	return dev->type == &cxl_memdev_type;
399 }
400 EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, CXL);
401 
402 /**
403  * set_exclusive_cxl_commands() - atomically disable user cxl commands
404  * @mds: The device state to operate on
405  * @cmds: bitmap of commands to mark exclusive
406  *
407  * Grab the cxl_memdev_rwsem in write mode to flush in-flight
408  * invocations of the ioctl path and then disable future execution of
409  * commands with the command ids set in @cmds.
410  */
411 void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
412 				unsigned long *cmds)
413 {
414 	down_write(&cxl_memdev_rwsem);
415 	bitmap_or(mds->exclusive_cmds, mds->exclusive_cmds, cmds,
416 		  CXL_MEM_COMMAND_ID_MAX);
417 	up_write(&cxl_memdev_rwsem);
418 }
419 EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, CXL);
420 
421 /**
422  * clear_exclusive_cxl_commands() - atomically enable user cxl commands
423  * @mds: The device state to modify
424  * @cmds: bitmap of commands to mark available for userspace
425  */
426 void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds,
427 				  unsigned long *cmds)
428 {
429 	down_write(&cxl_memdev_rwsem);
430 	bitmap_andnot(mds->exclusive_cmds, mds->exclusive_cmds, cmds,
431 		      CXL_MEM_COMMAND_ID_MAX);
432 	up_write(&cxl_memdev_rwsem);
433 }
434 EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, CXL);
435 
436 static void cxl_memdev_shutdown(struct device *dev)
437 {
438 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
439 
440 	down_write(&cxl_memdev_rwsem);
441 	cxlmd->cxlds = NULL;
442 	up_write(&cxl_memdev_rwsem);
443 }
444 
445 static void cxl_memdev_unregister(void *_cxlmd)
446 {
447 	struct cxl_memdev *cxlmd = _cxlmd;
448 	struct device *dev = &cxlmd->dev;
449 
450 	cxl_memdev_shutdown(dev);
451 	cdev_device_del(&cxlmd->cdev, dev);
452 	put_device(dev);
453 }
454 
455 static void detach_memdev(struct work_struct *work)
456 {
457 	struct cxl_memdev *cxlmd;
458 
459 	cxlmd = container_of(work, typeof(*cxlmd), detach_work);
460 	device_release_driver(&cxlmd->dev);
461 	put_device(&cxlmd->dev);
462 }
463 
464 static struct lock_class_key cxl_memdev_key;
465 
466 static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
467 					   const struct file_operations *fops)
468 {
469 	struct cxl_memdev *cxlmd;
470 	struct device *dev;
471 	struct cdev *cdev;
472 	int rc;
473 
474 	cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL);
475 	if (!cxlmd)
476 		return ERR_PTR(-ENOMEM);
477 
478 	rc = ida_alloc_max(&cxl_memdev_ida, CXL_MEM_MAX_DEVS - 1, GFP_KERNEL);
479 	if (rc < 0)
480 		goto err;
481 	cxlmd->id = rc;
482 	cxlmd->depth = -1;
483 
484 	dev = &cxlmd->dev;
485 	device_initialize(dev);
486 	lockdep_set_class(&dev->mutex, &cxl_memdev_key);
487 	dev->parent = cxlds->dev;
488 	dev->bus = &cxl_bus_type;
489 	dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
490 	dev->type = &cxl_memdev_type;
491 	device_set_pm_not_required(dev);
492 	INIT_WORK(&cxlmd->detach_work, detach_memdev);
493 
494 	cdev = &cxlmd->cdev;
495 	cdev_init(cdev, fops);
496 	return cxlmd;
497 
498 err:
499 	kfree(cxlmd);
500 	return ERR_PTR(rc);
501 }
502 
503 static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd,
504 			       unsigned long arg)
505 {
506 	switch (cmd) {
507 	case CXL_MEM_QUERY_COMMANDS:
508 		return cxl_query_cmd(cxlmd, (void __user *)arg);
509 	case CXL_MEM_SEND_COMMAND:
510 		return cxl_send_cmd(cxlmd, (void __user *)arg);
511 	default:
512 		return -ENOTTY;
513 	}
514 }
515 
516 static long cxl_memdev_ioctl(struct file *file, unsigned int cmd,
517 			     unsigned long arg)
518 {
519 	struct cxl_memdev *cxlmd = file->private_data;
520 	int rc = -ENXIO;
521 
522 	down_read(&cxl_memdev_rwsem);
523 	if (cxlmd->cxlds)
524 		rc = __cxl_memdev_ioctl(cxlmd, cmd, arg);
525 	up_read(&cxl_memdev_rwsem);
526 
527 	return rc;
528 }
529 
530 static int cxl_memdev_open(struct inode *inode, struct file *file)
531 {
532 	struct cxl_memdev *cxlmd =
533 		container_of(inode->i_cdev, typeof(*cxlmd), cdev);
534 
535 	get_device(&cxlmd->dev);
536 	file->private_data = cxlmd;
537 
538 	return 0;
539 }
540 
541 static int cxl_memdev_release_file(struct inode *inode, struct file *file)
542 {
543 	struct cxl_memdev *cxlmd =
544 		container_of(inode->i_cdev, typeof(*cxlmd), cdev);
545 
546 	put_device(&cxlmd->dev);
547 
548 	return 0;
549 }
550 
551 static const struct file_operations cxl_memdev_fops = {
552 	.owner = THIS_MODULE,
553 	.unlocked_ioctl = cxl_memdev_ioctl,
554 	.open = cxl_memdev_open,
555 	.release = cxl_memdev_release_file,
556 	.compat_ioctl = compat_ptr_ioctl,
557 	.llseek = noop_llseek,
558 };
559 
560 struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
561 {
562 	struct cxl_memdev *cxlmd;
563 	struct device *dev;
564 	struct cdev *cdev;
565 	int rc;
566 
567 	cxlmd = cxl_memdev_alloc(cxlds, &cxl_memdev_fops);
568 	if (IS_ERR(cxlmd))
569 		return cxlmd;
570 
571 	dev = &cxlmd->dev;
572 	rc = dev_set_name(dev, "mem%d", cxlmd->id);
573 	if (rc)
574 		goto err;
575 
576 	/*
577 	 * Activate ioctl operations, no cxl_memdev_rwsem manipulation
578 	 * needed as this is ordered with cdev_add() publishing the device.
579 	 */
580 	cxlmd->cxlds = cxlds;
581 	cxlds->cxlmd = cxlmd;
582 
583 	cdev = &cxlmd->cdev;
584 	rc = cdev_device_add(cdev, dev);
585 	if (rc)
586 		goto err;
587 
588 	rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd);
589 	if (rc)
590 		return ERR_PTR(rc);
591 	return cxlmd;
592 
593 err:
594 	/*
595 	 * The cdev was briefly live, shutdown any ioctl operations that
596 	 * saw that state.
597 	 */
598 	cxl_memdev_shutdown(dev);
599 	put_device(dev);
600 	return ERR_PTR(rc);
601 }
602 EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL);
603 
604 __init int cxl_memdev_init(void)
605 {
606 	dev_t devt;
607 	int rc;
608 
609 	rc = alloc_chrdev_region(&devt, 0, CXL_MEM_MAX_DEVS, "cxl");
610 	if (rc)
611 		return rc;
612 
613 	cxl_mem_major = MAJOR(devt);
614 
615 	return 0;
616 }
617 
618 void cxl_memdev_exit(void)
619 {
620 	unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS);
621 }
622