xref: /openbmc/linux/drivers/cxl/core/memdev.c (revision 5aa39a9165cfc80d37f1db8ba8fee798a3ecf74f)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2020 Intel Corporation. */
3 
4 #include <linux/device.h>
5 #include <linux/slab.h>
6 #include <linux/idr.h>
7 #include <linux/pci.h>
8 #include <cxlmem.h>
9 #include "trace.h"
10 #include "core.h"
11 
12 static DECLARE_RWSEM(cxl_memdev_rwsem);
13 
14 /*
15  * An entire PCI topology full of devices should be enough for any
16  * config
17  */
18 #define CXL_MEM_MAX_DEVS 65536
19 
20 static int cxl_mem_major;
21 static DEFINE_IDA(cxl_memdev_ida);
22 
23 static void cxl_memdev_release(struct device *dev)
24 {
25 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
26 
27 	ida_free(&cxl_memdev_ida, cxlmd->id);
28 	kfree(cxlmd);
29 }
30 
31 static char *cxl_memdev_devnode(const struct device *dev, umode_t *mode, kuid_t *uid,
32 				kgid_t *gid)
33 {
34 	return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev));
35 }
36 
37 static ssize_t firmware_version_show(struct device *dev,
38 				     struct device_attribute *attr, char *buf)
39 {
40 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
41 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
42 	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
43 
44 	if (!mds)
45 		return sysfs_emit(buf, "\n");
46 	return sysfs_emit(buf, "%.16s\n", mds->firmware_version);
47 }
48 static DEVICE_ATTR_RO(firmware_version);
49 
50 static ssize_t payload_max_show(struct device *dev,
51 				struct device_attribute *attr, char *buf)
52 {
53 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
54 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
55 	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
56 
57 	if (!mds)
58 		return sysfs_emit(buf, "\n");
59 	return sysfs_emit(buf, "%zu\n", mds->payload_size);
60 }
61 static DEVICE_ATTR_RO(payload_max);
62 
63 static ssize_t label_storage_size_show(struct device *dev,
64 				       struct device_attribute *attr, char *buf)
65 {
66 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
67 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
68 	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
69 
70 	if (!mds)
71 		return sysfs_emit(buf, "\n");
72 	return sysfs_emit(buf, "%zu\n", mds->lsa_size);
73 }
74 static DEVICE_ATTR_RO(label_storage_size);
75 
76 static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr,
77 			     char *buf)
78 {
79 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
80 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
81 	unsigned long long len = resource_size(&cxlds->ram_res);
82 
83 	return sysfs_emit(buf, "%#llx\n", len);
84 }
85 
86 static struct device_attribute dev_attr_ram_size =
87 	__ATTR(size, 0444, ram_size_show, NULL);
88 
89 static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
90 			      char *buf)
91 {
92 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
93 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
94 	unsigned long long len = resource_size(&cxlds->pmem_res);
95 
96 	return sysfs_emit(buf, "%#llx\n", len);
97 }
98 
99 static struct device_attribute dev_attr_pmem_size =
100 	__ATTR(size, 0444, pmem_size_show, NULL);
101 
102 static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
103 			   char *buf)
104 {
105 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
106 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
107 
108 	return sysfs_emit(buf, "%#llx\n", cxlds->serial);
109 }
110 static DEVICE_ATTR_RO(serial);
111 
112 static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr,
113 			      char *buf)
114 {
115 	return sprintf(buf, "%d\n", dev_to_node(dev));
116 }
117 static DEVICE_ATTR_RO(numa_node);
118 
119 static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd)
120 {
121 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
122 	u64 offset, length;
123 	int rc = 0;
124 
125 	/* CXL 3.0 Spec 8.2.9.8.4.1 Separate pmem and ram poison requests */
126 	if (resource_size(&cxlds->pmem_res)) {
127 		offset = cxlds->pmem_res.start;
128 		length = resource_size(&cxlds->pmem_res);
129 		rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
130 		if (rc)
131 			return rc;
132 	}
133 	if (resource_size(&cxlds->ram_res)) {
134 		offset = cxlds->ram_res.start;
135 		length = resource_size(&cxlds->ram_res);
136 		rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
137 		/*
138 		 * Invalid Physical Address is not an error for
139 		 * volatile addresses. Device support is optional.
140 		 */
141 		if (rc == -EFAULT)
142 			rc = 0;
143 	}
144 	return rc;
145 }
146 
147 int cxl_trigger_poison_list(struct cxl_memdev *cxlmd)
148 {
149 	struct cxl_port *port;
150 	int rc;
151 
152 	port = dev_get_drvdata(&cxlmd->dev);
153 	if (!port || !is_cxl_endpoint(port))
154 		return -EINVAL;
155 
156 	rc = down_read_interruptible(&cxl_dpa_rwsem);
157 	if (rc)
158 		return rc;
159 
160 	if (port->commit_end == -1) {
161 		/* No regions mapped to this memdev */
162 		rc = cxl_get_poison_by_memdev(cxlmd);
163 	} else {
164 		/* Regions mapped, collect poison by endpoint */
165 		rc =  cxl_get_poison_by_endpoint(port);
166 	}
167 	up_read(&cxl_dpa_rwsem);
168 
169 	return rc;
170 }
171 EXPORT_SYMBOL_NS_GPL(cxl_trigger_poison_list, CXL);
172 
173 struct cxl_dpa_to_region_context {
174 	struct cxl_region *cxlr;
175 	u64 dpa;
176 };
177 
178 static int __cxl_dpa_to_region(struct device *dev, void *arg)
179 {
180 	struct cxl_dpa_to_region_context *ctx = arg;
181 	struct cxl_endpoint_decoder *cxled;
182 	u64 dpa = ctx->dpa;
183 
184 	if (!is_endpoint_decoder(dev))
185 		return 0;
186 
187 	cxled = to_cxl_endpoint_decoder(dev);
188 	if (!cxled->dpa_res || !resource_size(cxled->dpa_res))
189 		return 0;
190 
191 	if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start)
192 		return 0;
193 
194 	dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa,
195 		dev_name(&cxled->cxld.region->dev));
196 
197 	ctx->cxlr = cxled->cxld.region;
198 
199 	return 1;
200 }
201 
202 static struct cxl_region *cxl_dpa_to_region(struct cxl_memdev *cxlmd, u64 dpa)
203 {
204 	struct cxl_dpa_to_region_context ctx;
205 	struct cxl_port *port;
206 
207 	ctx = (struct cxl_dpa_to_region_context) {
208 		.dpa = dpa,
209 	};
210 	port = dev_get_drvdata(&cxlmd->dev);
211 	if (port && is_cxl_endpoint(port) && port->commit_end != -1)
212 		device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region);
213 
214 	return ctx.cxlr;
215 }
216 
217 static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa)
218 {
219 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
220 
221 	if (!IS_ENABLED(CONFIG_DEBUG_FS))
222 		return 0;
223 
224 	if (!resource_size(&cxlds->dpa_res)) {
225 		dev_dbg(cxlds->dev, "device has no dpa resource\n");
226 		return -EINVAL;
227 	}
228 	if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) {
229 		dev_dbg(cxlds->dev, "dpa:0x%llx not in resource:%pR\n",
230 			dpa, &cxlds->dpa_res);
231 		return -EINVAL;
232 	}
233 	if (!IS_ALIGNED(dpa, 64)) {
234 		dev_dbg(cxlds->dev, "dpa:0x%llx is not 64-byte aligned\n", dpa);
235 		return -EINVAL;
236 	}
237 
238 	return 0;
239 }
240 
241 int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
242 {
243 	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
244 	struct cxl_mbox_inject_poison inject;
245 	struct cxl_poison_record record;
246 	struct cxl_mbox_cmd mbox_cmd;
247 	struct cxl_region *cxlr;
248 	int rc;
249 
250 	if (!IS_ENABLED(CONFIG_DEBUG_FS))
251 		return 0;
252 
253 	rc = down_read_interruptible(&cxl_dpa_rwsem);
254 	if (rc)
255 		return rc;
256 
257 	rc = cxl_validate_poison_dpa(cxlmd, dpa);
258 	if (rc)
259 		goto out;
260 
261 	inject.address = cpu_to_le64(dpa);
262 	mbox_cmd = (struct cxl_mbox_cmd) {
263 		.opcode = CXL_MBOX_OP_INJECT_POISON,
264 		.size_in = sizeof(inject),
265 		.payload_in = &inject,
266 	};
267 	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
268 	if (rc)
269 		goto out;
270 
271 	cxlr = cxl_dpa_to_region(cxlmd, dpa);
272 	if (cxlr)
273 		dev_warn_once(mds->cxlds.dev,
274 			      "poison inject dpa:%#llx region: %s\n", dpa,
275 			      dev_name(&cxlr->dev));
276 
277 	record = (struct cxl_poison_record) {
278 		.address = cpu_to_le64(dpa),
279 		.length = cpu_to_le32(1),
280 	};
281 	trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT);
282 out:
283 	up_read(&cxl_dpa_rwsem);
284 
285 	return rc;
286 }
287 EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, CXL);
288 
289 int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
290 {
291 	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
292 	struct cxl_mbox_clear_poison clear;
293 	struct cxl_poison_record record;
294 	struct cxl_mbox_cmd mbox_cmd;
295 	struct cxl_region *cxlr;
296 	int rc;
297 
298 	if (!IS_ENABLED(CONFIG_DEBUG_FS))
299 		return 0;
300 
301 	rc = down_read_interruptible(&cxl_dpa_rwsem);
302 	if (rc)
303 		return rc;
304 
305 	rc = cxl_validate_poison_dpa(cxlmd, dpa);
306 	if (rc)
307 		goto out;
308 
309 	/*
310 	 * In CXL 3.0 Spec 8.2.9.8.4.3, the Clear Poison mailbox command
311 	 * is defined to accept 64 bytes of write-data, along with the
312 	 * address to clear. This driver uses zeroes as write-data.
313 	 */
314 	clear = (struct cxl_mbox_clear_poison) {
315 		.address = cpu_to_le64(dpa)
316 	};
317 
318 	mbox_cmd = (struct cxl_mbox_cmd) {
319 		.opcode = CXL_MBOX_OP_CLEAR_POISON,
320 		.size_in = sizeof(clear),
321 		.payload_in = &clear,
322 	};
323 
324 	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
325 	if (rc)
326 		goto out;
327 
328 	cxlr = cxl_dpa_to_region(cxlmd, dpa);
329 	if (cxlr)
330 		dev_warn_once(mds->cxlds.dev,
331 			      "poison clear dpa:%#llx region: %s\n", dpa,
332 			      dev_name(&cxlr->dev));
333 
334 	record = (struct cxl_poison_record) {
335 		.address = cpu_to_le64(dpa),
336 		.length = cpu_to_le32(1),
337 	};
338 	trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR);
339 out:
340 	up_read(&cxl_dpa_rwsem);
341 
342 	return rc;
343 }
344 EXPORT_SYMBOL_NS_GPL(cxl_clear_poison, CXL);
345 
346 static struct attribute *cxl_memdev_attributes[] = {
347 	&dev_attr_serial.attr,
348 	&dev_attr_firmware_version.attr,
349 	&dev_attr_payload_max.attr,
350 	&dev_attr_label_storage_size.attr,
351 	&dev_attr_numa_node.attr,
352 	NULL,
353 };
354 
355 static struct attribute *cxl_memdev_pmem_attributes[] = {
356 	&dev_attr_pmem_size.attr,
357 	NULL,
358 };
359 
360 static struct attribute *cxl_memdev_ram_attributes[] = {
361 	&dev_attr_ram_size.attr,
362 	NULL,
363 };
364 
365 static umode_t cxl_memdev_visible(struct kobject *kobj, struct attribute *a,
366 				  int n)
367 {
368 	if (!IS_ENABLED(CONFIG_NUMA) && a == &dev_attr_numa_node.attr)
369 		return 0;
370 	return a->mode;
371 }
372 
373 static struct attribute_group cxl_memdev_attribute_group = {
374 	.attrs = cxl_memdev_attributes,
375 	.is_visible = cxl_memdev_visible,
376 };
377 
378 static struct attribute_group cxl_memdev_ram_attribute_group = {
379 	.name = "ram",
380 	.attrs = cxl_memdev_ram_attributes,
381 };
382 
383 static struct attribute_group cxl_memdev_pmem_attribute_group = {
384 	.name = "pmem",
385 	.attrs = cxl_memdev_pmem_attributes,
386 };
387 
388 static const struct attribute_group *cxl_memdev_attribute_groups[] = {
389 	&cxl_memdev_attribute_group,
390 	&cxl_memdev_ram_attribute_group,
391 	&cxl_memdev_pmem_attribute_group,
392 	NULL,
393 };
394 
395 static const struct device_type cxl_memdev_type = {
396 	.name = "cxl_memdev",
397 	.release = cxl_memdev_release,
398 	.devnode = cxl_memdev_devnode,
399 	.groups = cxl_memdev_attribute_groups,
400 };
401 
402 bool is_cxl_memdev(const struct device *dev)
403 {
404 	return dev->type == &cxl_memdev_type;
405 }
406 EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, CXL);
407 
408 /**
409  * set_exclusive_cxl_commands() - atomically disable user cxl commands
410  * @mds: The device state to operate on
411  * @cmds: bitmap of commands to mark exclusive
412  *
413  * Grab the cxl_memdev_rwsem in write mode to flush in-flight
414  * invocations of the ioctl path and then disable future execution of
415  * commands with the command ids set in @cmds.
416  */
417 void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
418 				unsigned long *cmds)
419 {
420 	down_write(&cxl_memdev_rwsem);
421 	bitmap_or(mds->exclusive_cmds, mds->exclusive_cmds, cmds,
422 		  CXL_MEM_COMMAND_ID_MAX);
423 	up_write(&cxl_memdev_rwsem);
424 }
425 EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, CXL);
426 
427 /**
428  * clear_exclusive_cxl_commands() - atomically enable user cxl commands
429  * @mds: The device state to modify
430  * @cmds: bitmap of commands to mark available for userspace
431  */
432 void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds,
433 				  unsigned long *cmds)
434 {
435 	down_write(&cxl_memdev_rwsem);
436 	bitmap_andnot(mds->exclusive_cmds, mds->exclusive_cmds, cmds,
437 		      CXL_MEM_COMMAND_ID_MAX);
438 	up_write(&cxl_memdev_rwsem);
439 }
440 EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, CXL);
441 
442 static void cxl_memdev_shutdown(struct device *dev)
443 {
444 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
445 
446 	down_write(&cxl_memdev_rwsem);
447 	cxlmd->cxlds = NULL;
448 	up_write(&cxl_memdev_rwsem);
449 }
450 
451 static void cxl_memdev_unregister(void *_cxlmd)
452 {
453 	struct cxl_memdev *cxlmd = _cxlmd;
454 	struct device *dev = &cxlmd->dev;
455 
456 	cxl_memdev_shutdown(dev);
457 	cdev_device_del(&cxlmd->cdev, dev);
458 	put_device(dev);
459 }
460 
461 static void detach_memdev(struct work_struct *work)
462 {
463 	struct cxl_memdev *cxlmd;
464 
465 	cxlmd = container_of(work, typeof(*cxlmd), detach_work);
466 	device_release_driver(&cxlmd->dev);
467 	put_device(&cxlmd->dev);
468 }
469 
470 static struct lock_class_key cxl_memdev_key;
471 
472 static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
473 					   const struct file_operations *fops)
474 {
475 	struct cxl_memdev *cxlmd;
476 	struct device *dev;
477 	struct cdev *cdev;
478 	int rc;
479 
480 	cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL);
481 	if (!cxlmd)
482 		return ERR_PTR(-ENOMEM);
483 
484 	rc = ida_alloc_max(&cxl_memdev_ida, CXL_MEM_MAX_DEVS - 1, GFP_KERNEL);
485 	if (rc < 0)
486 		goto err;
487 	cxlmd->id = rc;
488 	cxlmd->depth = -1;
489 
490 	dev = &cxlmd->dev;
491 	device_initialize(dev);
492 	lockdep_set_class(&dev->mutex, &cxl_memdev_key);
493 	dev->parent = cxlds->dev;
494 	dev->bus = &cxl_bus_type;
495 	dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
496 	dev->type = &cxl_memdev_type;
497 	device_set_pm_not_required(dev);
498 	INIT_WORK(&cxlmd->detach_work, detach_memdev);
499 
500 	cdev = &cxlmd->cdev;
501 	cdev_init(cdev, fops);
502 	return cxlmd;
503 
504 err:
505 	kfree(cxlmd);
506 	return ERR_PTR(rc);
507 }
508 
509 static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd,
510 			       unsigned long arg)
511 {
512 	switch (cmd) {
513 	case CXL_MEM_QUERY_COMMANDS:
514 		return cxl_query_cmd(cxlmd, (void __user *)arg);
515 	case CXL_MEM_SEND_COMMAND:
516 		return cxl_send_cmd(cxlmd, (void __user *)arg);
517 	default:
518 		return -ENOTTY;
519 	}
520 }
521 
522 static long cxl_memdev_ioctl(struct file *file, unsigned int cmd,
523 			     unsigned long arg)
524 {
525 	struct cxl_memdev *cxlmd = file->private_data;
526 	struct cxl_dev_state *cxlds;
527 	int rc = -ENXIO;
528 
529 	down_read(&cxl_memdev_rwsem);
530 	cxlds = cxlmd->cxlds;
531 	if (cxlds && cxlds->type == CXL_DEVTYPE_CLASSMEM)
532 		rc = __cxl_memdev_ioctl(cxlmd, cmd, arg);
533 	up_read(&cxl_memdev_rwsem);
534 
535 	return rc;
536 }
537 
538 static int cxl_memdev_open(struct inode *inode, struct file *file)
539 {
540 	struct cxl_memdev *cxlmd =
541 		container_of(inode->i_cdev, typeof(*cxlmd), cdev);
542 
543 	get_device(&cxlmd->dev);
544 	file->private_data = cxlmd;
545 
546 	return 0;
547 }
548 
549 static int cxl_memdev_release_file(struct inode *inode, struct file *file)
550 {
551 	struct cxl_memdev *cxlmd =
552 		container_of(inode->i_cdev, typeof(*cxlmd), cdev);
553 
554 	put_device(&cxlmd->dev);
555 
556 	return 0;
557 }
558 
559 static const struct file_operations cxl_memdev_fops = {
560 	.owner = THIS_MODULE,
561 	.unlocked_ioctl = cxl_memdev_ioctl,
562 	.open = cxl_memdev_open,
563 	.release = cxl_memdev_release_file,
564 	.compat_ioctl = compat_ptr_ioctl,
565 	.llseek = noop_llseek,
566 };
567 
568 struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
569 {
570 	struct cxl_memdev *cxlmd;
571 	struct device *dev;
572 	struct cdev *cdev;
573 	int rc;
574 
575 	cxlmd = cxl_memdev_alloc(cxlds, &cxl_memdev_fops);
576 	if (IS_ERR(cxlmd))
577 		return cxlmd;
578 
579 	dev = &cxlmd->dev;
580 	rc = dev_set_name(dev, "mem%d", cxlmd->id);
581 	if (rc)
582 		goto err;
583 
584 	/*
585 	 * Activate ioctl operations, no cxl_memdev_rwsem manipulation
586 	 * needed as this is ordered with cdev_add() publishing the device.
587 	 */
588 	cxlmd->cxlds = cxlds;
589 	cxlds->cxlmd = cxlmd;
590 
591 	cdev = &cxlmd->cdev;
592 	rc = cdev_device_add(cdev, dev);
593 	if (rc)
594 		goto err;
595 
596 	rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd);
597 	if (rc)
598 		return ERR_PTR(rc);
599 	return cxlmd;
600 
601 err:
602 	/*
603 	 * The cdev was briefly live, shutdown any ioctl operations that
604 	 * saw that state.
605 	 */
606 	cxl_memdev_shutdown(dev);
607 	put_device(dev);
608 	return ERR_PTR(rc);
609 }
610 EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL);
611 
612 __init int cxl_memdev_init(void)
613 {
614 	dev_t devt;
615 	int rc;
616 
617 	rc = alloc_chrdev_region(&devt, 0, CXL_MEM_MAX_DEVS, "cxl");
618 	if (rc)
619 		return rc;
620 
621 	cxl_mem_major = MAJOR(devt);
622 
623 	return 0;
624 }
625 
626 void cxl_memdev_exit(void)
627 {
628 	unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS);
629 }
630