xref: /openbmc/linux/drivers/cxl/core/port.c (revision 2a12187d)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
3 #include <linux/io-64-nonatomic-lo-hi.h>
4 #include <linux/memregion.h>
5 #include <linux/workqueue.h>
6 #include <linux/debugfs.h>
7 #include <linux/device.h>
8 #include <linux/module.h>
9 #include <linux/pci.h>
10 #include <linux/slab.h>
11 #include <linux/idr.h>
12 #include <cxlmem.h>
13 #include <cxlpci.h>
14 #include <cxl.h>
15 #include "core.h"
16 
17 /**
18  * DOC: cxl core
19  *
20  * The CXL core provides a set of interfaces that can be consumed by CXL aware
21  * drivers. The interfaces allow for creation, modification, and destruction of
22  * regions, memory devices, ports, and decoders. CXL aware drivers must register
23  * with the CXL core via these interfaces in order to be able to participate in
24  * cross-device interleave coordination. The CXL core also establishes and
25  * maintains the bridge to the nvdimm subsystem.
26  *
27  * CXL core introduces sysfs hierarchy to control the devices that are
28  * instantiated by the core.
29  */
30 
31 static DEFINE_IDA(cxl_port_ida);
32 static DEFINE_XARRAY(cxl_root_buses);
33 
34 static ssize_t devtype_show(struct device *dev, struct device_attribute *attr,
35 			    char *buf)
36 {
37 	return sysfs_emit(buf, "%s\n", dev->type->name);
38 }
39 static DEVICE_ATTR_RO(devtype);
40 
41 static int cxl_device_id(struct device *dev)
42 {
43 	if (dev->type == &cxl_nvdimm_bridge_type)
44 		return CXL_DEVICE_NVDIMM_BRIDGE;
45 	if (dev->type == &cxl_nvdimm_type)
46 		return CXL_DEVICE_NVDIMM;
47 	if (dev->type == CXL_PMEM_REGION_TYPE())
48 		return CXL_DEVICE_PMEM_REGION;
49 	if (is_cxl_port(dev)) {
50 		if (is_cxl_root(to_cxl_port(dev)))
51 			return CXL_DEVICE_ROOT;
52 		return CXL_DEVICE_PORT;
53 	}
54 	if (is_cxl_memdev(dev))
55 		return CXL_DEVICE_MEMORY_EXPANDER;
56 	if (dev->type == CXL_REGION_TYPE())
57 		return CXL_DEVICE_REGION;
58 	return 0;
59 }
60 
61 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
62 			     char *buf)
63 {
64 	return sysfs_emit(buf, CXL_MODALIAS_FMT "\n", cxl_device_id(dev));
65 }
66 static DEVICE_ATTR_RO(modalias);
67 
68 static struct attribute *cxl_base_attributes[] = {
69 	&dev_attr_devtype.attr,
70 	&dev_attr_modalias.attr,
71 	NULL,
72 };
73 
74 struct attribute_group cxl_base_attribute_group = {
75 	.attrs = cxl_base_attributes,
76 };
77 
78 static ssize_t start_show(struct device *dev, struct device_attribute *attr,
79 			  char *buf)
80 {
81 	struct cxl_decoder *cxld = to_cxl_decoder(dev);
82 
83 	return sysfs_emit(buf, "%#llx\n", cxld->hpa_range.start);
84 }
85 static DEVICE_ATTR_ADMIN_RO(start);
86 
87 static ssize_t size_show(struct device *dev, struct device_attribute *attr,
88 			char *buf)
89 {
90 	struct cxl_decoder *cxld = to_cxl_decoder(dev);
91 
92 	return sysfs_emit(buf, "%#llx\n", range_len(&cxld->hpa_range));
93 }
94 static DEVICE_ATTR_RO(size);
95 
96 #define CXL_DECODER_FLAG_ATTR(name, flag)                            \
97 static ssize_t name##_show(struct device *dev,                       \
98 			   struct device_attribute *attr, char *buf) \
99 {                                                                    \
100 	struct cxl_decoder *cxld = to_cxl_decoder(dev);              \
101                                                                      \
102 	return sysfs_emit(buf, "%s\n",                               \
103 			  (cxld->flags & (flag)) ? "1" : "0");       \
104 }                                                                    \
105 static DEVICE_ATTR_RO(name)
106 
107 CXL_DECODER_FLAG_ATTR(cap_pmem, CXL_DECODER_F_PMEM);
108 CXL_DECODER_FLAG_ATTR(cap_ram, CXL_DECODER_F_RAM);
109 CXL_DECODER_FLAG_ATTR(cap_type2, CXL_DECODER_F_TYPE2);
110 CXL_DECODER_FLAG_ATTR(cap_type3, CXL_DECODER_F_TYPE3);
111 CXL_DECODER_FLAG_ATTR(locked, CXL_DECODER_F_LOCK);
112 
113 static ssize_t target_type_show(struct device *dev,
114 				struct device_attribute *attr, char *buf)
115 {
116 	struct cxl_decoder *cxld = to_cxl_decoder(dev);
117 
118 	switch (cxld->target_type) {
119 	case CXL_DECODER_ACCELERATOR:
120 		return sysfs_emit(buf, "accelerator\n");
121 	case CXL_DECODER_EXPANDER:
122 		return sysfs_emit(buf, "expander\n");
123 	}
124 	return -ENXIO;
125 }
126 static DEVICE_ATTR_RO(target_type);
127 
128 static ssize_t emit_target_list(struct cxl_switch_decoder *cxlsd, char *buf)
129 {
130 	struct cxl_decoder *cxld = &cxlsd->cxld;
131 	ssize_t offset = 0;
132 	int i, rc = 0;
133 
134 	for (i = 0; i < cxld->interleave_ways; i++) {
135 		struct cxl_dport *dport = cxlsd->target[i];
136 		struct cxl_dport *next = NULL;
137 
138 		if (!dport)
139 			break;
140 
141 		if (i + 1 < cxld->interleave_ways)
142 			next = cxlsd->target[i + 1];
143 		rc = sysfs_emit_at(buf, offset, "%d%s", dport->port_id,
144 				   next ? "," : "");
145 		if (rc < 0)
146 			return rc;
147 		offset += rc;
148 	}
149 
150 	return offset;
151 }
152 
153 static ssize_t target_list_show(struct device *dev,
154 				struct device_attribute *attr, char *buf)
155 {
156 	struct cxl_switch_decoder *cxlsd = to_cxl_switch_decoder(dev);
157 	ssize_t offset;
158 	unsigned int seq;
159 	int rc;
160 
161 	do {
162 		seq = read_seqbegin(&cxlsd->target_lock);
163 		rc = emit_target_list(cxlsd, buf);
164 	} while (read_seqretry(&cxlsd->target_lock, seq));
165 
166 	if (rc < 0)
167 		return rc;
168 	offset = rc;
169 
170 	rc = sysfs_emit_at(buf, offset, "\n");
171 	if (rc < 0)
172 		return rc;
173 
174 	return offset + rc;
175 }
176 static DEVICE_ATTR_RO(target_list);
177 
178 static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
179 			 char *buf)
180 {
181 	struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
182 
183 	switch (cxled->mode) {
184 	case CXL_DECODER_RAM:
185 		return sysfs_emit(buf, "ram\n");
186 	case CXL_DECODER_PMEM:
187 		return sysfs_emit(buf, "pmem\n");
188 	case CXL_DECODER_NONE:
189 		return sysfs_emit(buf, "none\n");
190 	case CXL_DECODER_MIXED:
191 	default:
192 		return sysfs_emit(buf, "mixed\n");
193 	}
194 }
195 
196 static ssize_t mode_store(struct device *dev, struct device_attribute *attr,
197 			  const char *buf, size_t len)
198 {
199 	struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
200 	enum cxl_decoder_mode mode;
201 	ssize_t rc;
202 
203 	if (sysfs_streq(buf, "pmem"))
204 		mode = CXL_DECODER_PMEM;
205 	else if (sysfs_streq(buf, "ram"))
206 		mode = CXL_DECODER_RAM;
207 	else
208 		return -EINVAL;
209 
210 	rc = cxl_dpa_set_mode(cxled, mode);
211 	if (rc)
212 		return rc;
213 
214 	return len;
215 }
216 static DEVICE_ATTR_RW(mode);
217 
218 static ssize_t dpa_resource_show(struct device *dev, struct device_attribute *attr,
219 			    char *buf)
220 {
221 	struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
222 	u64 base = cxl_dpa_resource_start(cxled);
223 
224 	return sysfs_emit(buf, "%#llx\n", base);
225 }
226 static DEVICE_ATTR_RO(dpa_resource);
227 
228 static ssize_t dpa_size_show(struct device *dev, struct device_attribute *attr,
229 			     char *buf)
230 {
231 	struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
232 	resource_size_t size = cxl_dpa_size(cxled);
233 
234 	return sysfs_emit(buf, "%pa\n", &size);
235 }
236 
237 static ssize_t dpa_size_store(struct device *dev, struct device_attribute *attr,
238 			      const char *buf, size_t len)
239 {
240 	struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
241 	unsigned long long size;
242 	ssize_t rc;
243 
244 	rc = kstrtoull(buf, 0, &size);
245 	if (rc)
246 		return rc;
247 
248 	if (!IS_ALIGNED(size, SZ_256M))
249 		return -EINVAL;
250 
251 	rc = cxl_dpa_free(cxled);
252 	if (rc)
253 		return rc;
254 
255 	if (size == 0)
256 		return len;
257 
258 	rc = cxl_dpa_alloc(cxled, size);
259 	if (rc)
260 		return rc;
261 
262 	return len;
263 }
264 static DEVICE_ATTR_RW(dpa_size);
265 
266 static ssize_t interleave_granularity_show(struct device *dev,
267 					   struct device_attribute *attr,
268 					   char *buf)
269 {
270 	struct cxl_decoder *cxld = to_cxl_decoder(dev);
271 
272 	return sysfs_emit(buf, "%d\n", cxld->interleave_granularity);
273 }
274 
275 static DEVICE_ATTR_RO(interleave_granularity);
276 
277 static ssize_t interleave_ways_show(struct device *dev,
278 				    struct device_attribute *attr, char *buf)
279 {
280 	struct cxl_decoder *cxld = to_cxl_decoder(dev);
281 
282 	return sysfs_emit(buf, "%d\n", cxld->interleave_ways);
283 }
284 
285 static DEVICE_ATTR_RO(interleave_ways);
286 
287 static struct attribute *cxl_decoder_base_attrs[] = {
288 	&dev_attr_start.attr,
289 	&dev_attr_size.attr,
290 	&dev_attr_locked.attr,
291 	&dev_attr_interleave_granularity.attr,
292 	&dev_attr_interleave_ways.attr,
293 	NULL,
294 };
295 
296 static struct attribute_group cxl_decoder_base_attribute_group = {
297 	.attrs = cxl_decoder_base_attrs,
298 };
299 
300 static struct attribute *cxl_decoder_root_attrs[] = {
301 	&dev_attr_cap_pmem.attr,
302 	&dev_attr_cap_ram.attr,
303 	&dev_attr_cap_type2.attr,
304 	&dev_attr_cap_type3.attr,
305 	&dev_attr_target_list.attr,
306 	SET_CXL_REGION_ATTR(create_pmem_region)
307 	SET_CXL_REGION_ATTR(delete_region)
308 	NULL,
309 };
310 
311 static bool can_create_pmem(struct cxl_root_decoder *cxlrd)
312 {
313 	unsigned long flags = CXL_DECODER_F_TYPE3 | CXL_DECODER_F_PMEM;
314 
315 	return (cxlrd->cxlsd.cxld.flags & flags) == flags;
316 }
317 
318 static umode_t cxl_root_decoder_visible(struct kobject *kobj, struct attribute *a, int n)
319 {
320 	struct device *dev = kobj_to_dev(kobj);
321 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
322 
323 	if (a == CXL_REGION_ATTR(create_pmem_region) && !can_create_pmem(cxlrd))
324 		return 0;
325 
326 	if (a == CXL_REGION_ATTR(delete_region) && !can_create_pmem(cxlrd))
327 		return 0;
328 
329 	return a->mode;
330 }
331 
332 static struct attribute_group cxl_decoder_root_attribute_group = {
333 	.attrs = cxl_decoder_root_attrs,
334 	.is_visible = cxl_root_decoder_visible,
335 };
336 
337 static const struct attribute_group *cxl_decoder_root_attribute_groups[] = {
338 	&cxl_decoder_root_attribute_group,
339 	&cxl_decoder_base_attribute_group,
340 	&cxl_base_attribute_group,
341 	NULL,
342 };
343 
344 static struct attribute *cxl_decoder_switch_attrs[] = {
345 	&dev_attr_target_type.attr,
346 	&dev_attr_target_list.attr,
347 	SET_CXL_REGION_ATTR(region)
348 	NULL,
349 };
350 
351 static struct attribute_group cxl_decoder_switch_attribute_group = {
352 	.attrs = cxl_decoder_switch_attrs,
353 };
354 
355 static const struct attribute_group *cxl_decoder_switch_attribute_groups[] = {
356 	&cxl_decoder_switch_attribute_group,
357 	&cxl_decoder_base_attribute_group,
358 	&cxl_base_attribute_group,
359 	NULL,
360 };
361 
362 static struct attribute *cxl_decoder_endpoint_attrs[] = {
363 	&dev_attr_target_type.attr,
364 	&dev_attr_mode.attr,
365 	&dev_attr_dpa_size.attr,
366 	&dev_attr_dpa_resource.attr,
367 	SET_CXL_REGION_ATTR(region)
368 	NULL,
369 };
370 
371 static struct attribute_group cxl_decoder_endpoint_attribute_group = {
372 	.attrs = cxl_decoder_endpoint_attrs,
373 };
374 
375 static const struct attribute_group *cxl_decoder_endpoint_attribute_groups[] = {
376 	&cxl_decoder_base_attribute_group,
377 	&cxl_decoder_endpoint_attribute_group,
378 	&cxl_base_attribute_group,
379 	NULL,
380 };
381 
382 static void __cxl_decoder_release(struct cxl_decoder *cxld)
383 {
384 	struct cxl_port *port = to_cxl_port(cxld->dev.parent);
385 
386 	ida_free(&port->decoder_ida, cxld->id);
387 	put_device(&port->dev);
388 }
389 
390 static void cxl_endpoint_decoder_release(struct device *dev)
391 {
392 	struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
393 
394 	__cxl_decoder_release(&cxled->cxld);
395 	kfree(cxled);
396 }
397 
398 static void cxl_switch_decoder_release(struct device *dev)
399 {
400 	struct cxl_switch_decoder *cxlsd = to_cxl_switch_decoder(dev);
401 
402 	__cxl_decoder_release(&cxlsd->cxld);
403 	kfree(cxlsd);
404 }
405 
406 struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev)
407 {
408 	if (dev_WARN_ONCE(dev, !is_root_decoder(dev),
409 			  "not a cxl_root_decoder device\n"))
410 		return NULL;
411 	return container_of(dev, struct cxl_root_decoder, cxlsd.cxld.dev);
412 }
413 EXPORT_SYMBOL_NS_GPL(to_cxl_root_decoder, CXL);
414 
415 static void cxl_root_decoder_release(struct device *dev)
416 {
417 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
418 
419 	if (atomic_read(&cxlrd->region_id) >= 0)
420 		memregion_free(atomic_read(&cxlrd->region_id));
421 	__cxl_decoder_release(&cxlrd->cxlsd.cxld);
422 	kfree(cxlrd);
423 }
424 
425 static const struct device_type cxl_decoder_endpoint_type = {
426 	.name = "cxl_decoder_endpoint",
427 	.release = cxl_endpoint_decoder_release,
428 	.groups = cxl_decoder_endpoint_attribute_groups,
429 };
430 
431 static const struct device_type cxl_decoder_switch_type = {
432 	.name = "cxl_decoder_switch",
433 	.release = cxl_switch_decoder_release,
434 	.groups = cxl_decoder_switch_attribute_groups,
435 };
436 
437 static const struct device_type cxl_decoder_root_type = {
438 	.name = "cxl_decoder_root",
439 	.release = cxl_root_decoder_release,
440 	.groups = cxl_decoder_root_attribute_groups,
441 };
442 
443 bool is_endpoint_decoder(struct device *dev)
444 {
445 	return dev->type == &cxl_decoder_endpoint_type;
446 }
447 
448 bool is_root_decoder(struct device *dev)
449 {
450 	return dev->type == &cxl_decoder_root_type;
451 }
452 EXPORT_SYMBOL_NS_GPL(is_root_decoder, CXL);
453 
454 bool is_switch_decoder(struct device *dev)
455 {
456 	return is_root_decoder(dev) || dev->type == &cxl_decoder_switch_type;
457 }
458 
459 struct cxl_decoder *to_cxl_decoder(struct device *dev)
460 {
461 	if (dev_WARN_ONCE(dev,
462 			  !is_switch_decoder(dev) && !is_endpoint_decoder(dev),
463 			  "not a cxl_decoder device\n"))
464 		return NULL;
465 	return container_of(dev, struct cxl_decoder, dev);
466 }
467 EXPORT_SYMBOL_NS_GPL(to_cxl_decoder, CXL);
468 
469 struct cxl_endpoint_decoder *to_cxl_endpoint_decoder(struct device *dev)
470 {
471 	if (dev_WARN_ONCE(dev, !is_endpoint_decoder(dev),
472 			  "not a cxl_endpoint_decoder device\n"))
473 		return NULL;
474 	return container_of(dev, struct cxl_endpoint_decoder, cxld.dev);
475 }
476 EXPORT_SYMBOL_NS_GPL(to_cxl_endpoint_decoder, CXL);
477 
478 struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev)
479 {
480 	if (dev_WARN_ONCE(dev, !is_switch_decoder(dev),
481 			  "not a cxl_switch_decoder device\n"))
482 		return NULL;
483 	return container_of(dev, struct cxl_switch_decoder, cxld.dev);
484 }
485 
486 static void cxl_ep_release(struct cxl_ep *ep)
487 {
488 	put_device(ep->ep);
489 	kfree(ep);
490 }
491 
492 static void cxl_ep_remove(struct cxl_port *port, struct cxl_ep *ep)
493 {
494 	if (!ep)
495 		return;
496 	xa_erase(&port->endpoints, (unsigned long) ep->ep);
497 	cxl_ep_release(ep);
498 }
499 
500 static void cxl_port_release(struct device *dev)
501 {
502 	struct cxl_port *port = to_cxl_port(dev);
503 	unsigned long index;
504 	struct cxl_ep *ep;
505 
506 	xa_for_each(&port->endpoints, index, ep)
507 		cxl_ep_remove(port, ep);
508 	xa_destroy(&port->endpoints);
509 	xa_destroy(&port->dports);
510 	xa_destroy(&port->regions);
511 	ida_free(&cxl_port_ida, port->id);
512 	kfree(port);
513 }
514 
515 static const struct attribute_group *cxl_port_attribute_groups[] = {
516 	&cxl_base_attribute_group,
517 	NULL,
518 };
519 
520 static const struct device_type cxl_port_type = {
521 	.name = "cxl_port",
522 	.release = cxl_port_release,
523 	.groups = cxl_port_attribute_groups,
524 };
525 
526 bool is_cxl_port(struct device *dev)
527 {
528 	return dev->type == &cxl_port_type;
529 }
530 EXPORT_SYMBOL_NS_GPL(is_cxl_port, CXL);
531 
532 struct cxl_port *to_cxl_port(struct device *dev)
533 {
534 	if (dev_WARN_ONCE(dev, dev->type != &cxl_port_type,
535 			  "not a cxl_port device\n"))
536 		return NULL;
537 	return container_of(dev, struct cxl_port, dev);
538 }
539 EXPORT_SYMBOL_NS_GPL(to_cxl_port, CXL);
540 
541 static void unregister_port(void *_port)
542 {
543 	struct cxl_port *port = _port;
544 	struct cxl_port *parent;
545 	struct device *lock_dev;
546 
547 	if (is_cxl_root(port))
548 		parent = NULL;
549 	else
550 		parent = to_cxl_port(port->dev.parent);
551 
552 	/*
553 	 * CXL root port's and the first level of ports are unregistered
554 	 * under the platform firmware device lock, all other ports are
555 	 * unregistered while holding their parent port lock.
556 	 */
557 	if (!parent)
558 		lock_dev = port->uport;
559 	else if (is_cxl_root(parent))
560 		lock_dev = parent->uport;
561 	else
562 		lock_dev = &parent->dev;
563 
564 	device_lock_assert(lock_dev);
565 	port->dead = true;
566 	device_unregister(&port->dev);
567 }
568 
569 static void cxl_unlink_uport(void *_port)
570 {
571 	struct cxl_port *port = _port;
572 
573 	sysfs_remove_link(&port->dev.kobj, "uport");
574 }
575 
576 static int devm_cxl_link_uport(struct device *host, struct cxl_port *port)
577 {
578 	int rc;
579 
580 	rc = sysfs_create_link(&port->dev.kobj, &port->uport->kobj, "uport");
581 	if (rc)
582 		return rc;
583 	return devm_add_action_or_reset(host, cxl_unlink_uport, port);
584 }
585 
586 static struct lock_class_key cxl_port_key;
587 
588 static struct cxl_port *cxl_port_alloc(struct device *uport,
589 				       resource_size_t component_reg_phys,
590 				       struct cxl_dport *parent_dport)
591 {
592 	struct cxl_port *port;
593 	struct device *dev;
594 	int rc;
595 
596 	port = kzalloc(sizeof(*port), GFP_KERNEL);
597 	if (!port)
598 		return ERR_PTR(-ENOMEM);
599 
600 	rc = ida_alloc(&cxl_port_ida, GFP_KERNEL);
601 	if (rc < 0)
602 		goto err;
603 	port->id = rc;
604 	port->uport = uport;
605 
606 	/*
607 	 * The top-level cxl_port "cxl_root" does not have a cxl_port as
608 	 * its parent and it does not have any corresponding component
609 	 * registers as its decode is described by a fixed platform
610 	 * description.
611 	 */
612 	dev = &port->dev;
613 	if (parent_dport) {
614 		struct cxl_port *parent_port = parent_dport->port;
615 		struct cxl_port *iter;
616 
617 		dev->parent = &parent_port->dev;
618 		port->depth = parent_port->depth + 1;
619 		port->parent_dport = parent_dport;
620 
621 		/*
622 		 * walk to the host bridge, or the first ancestor that knows
623 		 * the host bridge
624 		 */
625 		iter = port;
626 		while (!iter->host_bridge &&
627 		       !is_cxl_root(to_cxl_port(iter->dev.parent)))
628 			iter = to_cxl_port(iter->dev.parent);
629 		if (iter->host_bridge)
630 			port->host_bridge = iter->host_bridge;
631 		else if (parent_dport->rch)
632 			port->host_bridge = parent_dport->dport;
633 		else
634 			port->host_bridge = iter->uport;
635 		dev_dbg(uport, "host-bridge: %s\n", dev_name(port->host_bridge));
636 	} else
637 		dev->parent = uport;
638 
639 	port->component_reg_phys = component_reg_phys;
640 	ida_init(&port->decoder_ida);
641 	port->hdm_end = -1;
642 	port->commit_end = -1;
643 	xa_init(&port->dports);
644 	xa_init(&port->endpoints);
645 	xa_init(&port->regions);
646 
647 	device_initialize(dev);
648 	lockdep_set_class_and_subclass(&dev->mutex, &cxl_port_key, port->depth);
649 	device_set_pm_not_required(dev);
650 	dev->bus = &cxl_bus_type;
651 	dev->type = &cxl_port_type;
652 
653 	return port;
654 
655 err:
656 	kfree(port);
657 	return ERR_PTR(rc);
658 }
659 
660 static struct cxl_port *__devm_cxl_add_port(struct device *host,
661 					    struct device *uport,
662 					    resource_size_t component_reg_phys,
663 					    struct cxl_dport *parent_dport)
664 {
665 	struct cxl_port *port;
666 	struct device *dev;
667 	int rc;
668 
669 	port = cxl_port_alloc(uport, component_reg_phys, parent_dport);
670 	if (IS_ERR(port))
671 		return port;
672 
673 	dev = &port->dev;
674 	if (is_cxl_memdev(uport))
675 		rc = dev_set_name(dev, "endpoint%d", port->id);
676 	else if (parent_dport)
677 		rc = dev_set_name(dev, "port%d", port->id);
678 	else
679 		rc = dev_set_name(dev, "root%d", port->id);
680 	if (rc)
681 		goto err;
682 
683 	rc = device_add(dev);
684 	if (rc)
685 		goto err;
686 
687 	rc = devm_add_action_or_reset(host, unregister_port, port);
688 	if (rc)
689 		return ERR_PTR(rc);
690 
691 	rc = devm_cxl_link_uport(host, port);
692 	if (rc)
693 		return ERR_PTR(rc);
694 
695 	return port;
696 
697 err:
698 	put_device(dev);
699 	return ERR_PTR(rc);
700 }
701 
702 /**
703  * devm_cxl_add_port - register a cxl_port in CXL memory decode hierarchy
704  * @host: host device for devm operations
705  * @uport: "physical" device implementing this upstream port
706  * @component_reg_phys: (optional) for configurable cxl_port instances
707  * @parent_dport: next hop up in the CXL memory decode hierarchy
708  */
709 struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
710 				   resource_size_t component_reg_phys,
711 				   struct cxl_dport *parent_dport)
712 {
713 	struct cxl_port *port, *parent_port;
714 
715 	port = __devm_cxl_add_port(host, uport, component_reg_phys,
716 				   parent_dport);
717 
718 	parent_port = parent_dport ? parent_dport->port : NULL;
719 	if (IS_ERR(port)) {
720 		dev_dbg(uport, "Failed to add %s%s%s%s: %ld\n",
721 			dev_name(&port->dev),
722 			parent_port ? " to " : "",
723 			parent_port ? dev_name(&parent_port->dev) : "",
724 			parent_port ? "" : " (root port)",
725 			PTR_ERR(port));
726 	} else {
727 		dev_dbg(uport, "%s added%s%s%s\n",
728 			dev_name(&port->dev),
729 			parent_port ? " to " : "",
730 			parent_port ? dev_name(&parent_port->dev) : "",
731 			parent_port ? "" : " (root port)");
732 	}
733 
734 	return port;
735 }
736 EXPORT_SYMBOL_NS_GPL(devm_cxl_add_port, CXL);
737 
738 struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port)
739 {
740 	/* There is no pci_bus associated with a CXL platform-root port */
741 	if (is_cxl_root(port))
742 		return NULL;
743 
744 	if (dev_is_pci(port->uport)) {
745 		struct pci_dev *pdev = to_pci_dev(port->uport);
746 
747 		return pdev->subordinate;
748 	}
749 
750 	return xa_load(&cxl_root_buses, (unsigned long)port->uport);
751 }
752 EXPORT_SYMBOL_NS_GPL(cxl_port_to_pci_bus, CXL);
753 
754 static void unregister_pci_bus(void *uport)
755 {
756 	xa_erase(&cxl_root_buses, (unsigned long)uport);
757 }
758 
759 int devm_cxl_register_pci_bus(struct device *host, struct device *uport,
760 			      struct pci_bus *bus)
761 {
762 	int rc;
763 
764 	if (dev_is_pci(uport))
765 		return -EINVAL;
766 
767 	rc = xa_insert(&cxl_root_buses, (unsigned long)uport, bus, GFP_KERNEL);
768 	if (rc)
769 		return rc;
770 	return devm_add_action_or_reset(host, unregister_pci_bus, uport);
771 }
772 EXPORT_SYMBOL_NS_GPL(devm_cxl_register_pci_bus, CXL);
773 
774 static bool dev_is_cxl_root_child(struct device *dev)
775 {
776 	struct cxl_port *port, *parent;
777 
778 	if (!is_cxl_port(dev))
779 		return false;
780 
781 	port = to_cxl_port(dev);
782 	if (is_cxl_root(port))
783 		return false;
784 
785 	parent = to_cxl_port(port->dev.parent);
786 	if (is_cxl_root(parent))
787 		return true;
788 
789 	return false;
790 }
791 
792 /* Find a 2nd level CXL port that has a dport that is an ancestor of @match */
793 static int match_root_child(struct device *dev, const void *match)
794 {
795 	const struct device *iter = NULL;
796 	struct cxl_dport *dport;
797 	struct cxl_port *port;
798 
799 	if (!dev_is_cxl_root_child(dev))
800 		return 0;
801 
802 	port = to_cxl_port(dev);
803 	iter = match;
804 	while (iter) {
805 		dport = cxl_find_dport_by_dev(port, iter);
806 		if (dport)
807 			break;
808 		iter = iter->parent;
809 	}
810 
811 	return !!iter;
812 }
813 
814 struct cxl_port *find_cxl_root(struct device *dev)
815 {
816 	struct device *port_dev;
817 	struct cxl_port *root;
818 
819 	port_dev = bus_find_device(&cxl_bus_type, NULL, dev, match_root_child);
820 	if (!port_dev)
821 		return NULL;
822 
823 	root = to_cxl_port(port_dev->parent);
824 	get_device(&root->dev);
825 	put_device(port_dev);
826 	return root;
827 }
828 EXPORT_SYMBOL_NS_GPL(find_cxl_root, CXL);
829 
830 static struct cxl_dport *find_dport(struct cxl_port *port, int id)
831 {
832 	struct cxl_dport *dport;
833 	unsigned long index;
834 
835 	device_lock_assert(&port->dev);
836 	xa_for_each(&port->dports, index, dport)
837 		if (dport->port_id == id)
838 			return dport;
839 	return NULL;
840 }
841 
842 static int add_dport(struct cxl_port *port, struct cxl_dport *new)
843 {
844 	struct cxl_dport *dup;
845 	int rc;
846 
847 	device_lock_assert(&port->dev);
848 	dup = find_dport(port, new->port_id);
849 	if (dup) {
850 		dev_err(&port->dev,
851 			"unable to add dport%d-%s non-unique port id (%s)\n",
852 			new->port_id, dev_name(new->dport),
853 			dev_name(dup->dport));
854 		return -EBUSY;
855 	}
856 
857 	rc = xa_insert(&port->dports, (unsigned long)new->dport, new,
858 		       GFP_KERNEL);
859 	if (rc)
860 		return rc;
861 
862 	port->nr_dports++;
863 	return 0;
864 }
865 
866 /*
867  * Since root-level CXL dports cannot be enumerated by PCI they are not
868  * enumerated by the common port driver that acquires the port lock over
869  * dport add/remove. Instead, root dports are manually added by a
870  * platform driver and cond_cxl_root_lock() is used to take the missing
871  * port lock in that case.
872  */
873 static void cond_cxl_root_lock(struct cxl_port *port)
874 {
875 	if (is_cxl_root(port))
876 		device_lock(&port->dev);
877 }
878 
879 static void cond_cxl_root_unlock(struct cxl_port *port)
880 {
881 	if (is_cxl_root(port))
882 		device_unlock(&port->dev);
883 }
884 
885 static void cxl_dport_remove(void *data)
886 {
887 	struct cxl_dport *dport = data;
888 	struct cxl_port *port = dport->port;
889 
890 	xa_erase(&port->dports, (unsigned long) dport->dport);
891 	put_device(dport->dport);
892 }
893 
894 static void cxl_dport_unlink(void *data)
895 {
896 	struct cxl_dport *dport = data;
897 	struct cxl_port *port = dport->port;
898 	char link_name[CXL_TARGET_STRLEN];
899 
900 	sprintf(link_name, "dport%d", dport->port_id);
901 	sysfs_remove_link(&port->dev.kobj, link_name);
902 }
903 
904 static struct cxl_dport *
905 __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev,
906 		     int port_id, resource_size_t component_reg_phys,
907 		     resource_size_t rcrb)
908 {
909 	char link_name[CXL_TARGET_STRLEN];
910 	struct cxl_dport *dport;
911 	struct device *host;
912 	int rc;
913 
914 	if (is_cxl_root(port))
915 		host = port->uport;
916 	else
917 		host = &port->dev;
918 
919 	if (!host->driver) {
920 		dev_WARN_ONCE(&port->dev, 1, "dport:%s bad devm context\n",
921 			      dev_name(dport_dev));
922 		return ERR_PTR(-ENXIO);
923 	}
924 
925 	if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d", port_id) >=
926 	    CXL_TARGET_STRLEN)
927 		return ERR_PTR(-EINVAL);
928 
929 	dport = devm_kzalloc(host, sizeof(*dport), GFP_KERNEL);
930 	if (!dport)
931 		return ERR_PTR(-ENOMEM);
932 
933 	dport->dport = dport_dev;
934 	dport->port_id = port_id;
935 	dport->component_reg_phys = component_reg_phys;
936 	dport->port = port;
937 	if (rcrb != CXL_RESOURCE_NONE)
938 		dport->rch = true;
939 	dport->rcrb = rcrb;
940 
941 	cond_cxl_root_lock(port);
942 	rc = add_dport(port, dport);
943 	cond_cxl_root_unlock(port);
944 	if (rc)
945 		return ERR_PTR(rc);
946 
947 	get_device(dport_dev);
948 	rc = devm_add_action_or_reset(host, cxl_dport_remove, dport);
949 	if (rc)
950 		return ERR_PTR(rc);
951 
952 	rc = sysfs_create_link(&port->dev.kobj, &dport_dev->kobj, link_name);
953 	if (rc)
954 		return ERR_PTR(rc);
955 
956 	rc = devm_add_action_or_reset(host, cxl_dport_unlink, dport);
957 	if (rc)
958 		return ERR_PTR(rc);
959 
960 	return dport;
961 }
962 
963 /**
964  * devm_cxl_add_dport - append VH downstream port data to a cxl_port
965  * @port: the cxl_port that references this dport
966  * @dport_dev: firmware or PCI device representing the dport
967  * @port_id: identifier for this dport in a decoder's target list
968  * @component_reg_phys: optional location of CXL component registers
969  *
970  * Note that dports are appended to the devm release action's of the
971  * either the port's host (for root ports), or the port itself (for
972  * switch ports)
973  */
974 struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
975 				     struct device *dport_dev, int port_id,
976 				     resource_size_t component_reg_phys)
977 {
978 	struct cxl_dport *dport;
979 
980 	dport = __devm_cxl_add_dport(port, dport_dev, port_id,
981 				     component_reg_phys, CXL_RESOURCE_NONE);
982 	if (IS_ERR(dport)) {
983 		dev_dbg(dport_dev, "failed to add dport to %s: %ld\n",
984 			dev_name(&port->dev), PTR_ERR(dport));
985 	} else {
986 		dev_dbg(dport_dev, "dport added to %s\n",
987 			dev_name(&port->dev));
988 	}
989 
990 	return dport;
991 }
992 EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport, CXL);
993 
994 /**
995  * devm_cxl_add_rch_dport - append RCH downstream port data to a cxl_port
996  * @port: the cxl_port that references this dport
997  * @dport_dev: firmware or PCI device representing the dport
998  * @port_id: identifier for this dport in a decoder's target list
999  * @component_reg_phys: optional location of CXL component registers
1000  * @rcrb: mandatory location of a Root Complex Register Block
1001  *
1002  * See CXL 3.0 9.11.8 CXL Devices Attached to an RCH
1003  */
1004 struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
1005 					 struct device *dport_dev, int port_id,
1006 					 resource_size_t component_reg_phys,
1007 					 resource_size_t rcrb)
1008 {
1009 	struct cxl_dport *dport;
1010 
1011 	if (rcrb == CXL_RESOURCE_NONE) {
1012 		dev_dbg(&port->dev, "failed to add RCH dport, missing RCRB\n");
1013 		return ERR_PTR(-EINVAL);
1014 	}
1015 
1016 	dport = __devm_cxl_add_dport(port, dport_dev, port_id,
1017 				     component_reg_phys, rcrb);
1018 	if (IS_ERR(dport)) {
1019 		dev_dbg(dport_dev, "failed to add RCH dport to %s: %ld\n",
1020 			dev_name(&port->dev), PTR_ERR(dport));
1021 	} else {
1022 		dev_dbg(dport_dev, "RCH dport added to %s\n",
1023 			dev_name(&port->dev));
1024 	}
1025 
1026 	return dport;
1027 }
1028 EXPORT_SYMBOL_NS_GPL(devm_cxl_add_rch_dport, CXL);
1029 
1030 static int add_ep(struct cxl_ep *new)
1031 {
1032 	struct cxl_port *port = new->dport->port;
1033 	int rc;
1034 
1035 	device_lock(&port->dev);
1036 	if (port->dead) {
1037 		device_unlock(&port->dev);
1038 		return -ENXIO;
1039 	}
1040 	rc = xa_insert(&port->endpoints, (unsigned long)new->ep, new,
1041 		       GFP_KERNEL);
1042 	device_unlock(&port->dev);
1043 
1044 	return rc;
1045 }
1046 
1047 /**
1048  * cxl_add_ep - register an endpoint's interest in a port
1049  * @dport: the dport that routes to @ep_dev
1050  * @ep_dev: device representing the endpoint
1051  *
1052  * Intermediate CXL ports are scanned based on the arrival of endpoints.
1053  * When those endpoints depart the port can be destroyed once all
1054  * endpoints that care about that port have been removed.
1055  */
1056 static int cxl_add_ep(struct cxl_dport *dport, struct device *ep_dev)
1057 {
1058 	struct cxl_ep *ep;
1059 	int rc;
1060 
1061 	ep = kzalloc(sizeof(*ep), GFP_KERNEL);
1062 	if (!ep)
1063 		return -ENOMEM;
1064 
1065 	ep->ep = get_device(ep_dev);
1066 	ep->dport = dport;
1067 
1068 	rc = add_ep(ep);
1069 	if (rc)
1070 		cxl_ep_release(ep);
1071 	return rc;
1072 }
1073 
1074 struct cxl_find_port_ctx {
1075 	const struct device *dport_dev;
1076 	const struct cxl_port *parent_port;
1077 	struct cxl_dport **dport;
1078 };
1079 
1080 static int match_port_by_dport(struct device *dev, const void *data)
1081 {
1082 	const struct cxl_find_port_ctx *ctx = data;
1083 	struct cxl_dport *dport;
1084 	struct cxl_port *port;
1085 
1086 	if (!is_cxl_port(dev))
1087 		return 0;
1088 	if (ctx->parent_port && dev->parent != &ctx->parent_port->dev)
1089 		return 0;
1090 
1091 	port = to_cxl_port(dev);
1092 	dport = cxl_find_dport_by_dev(port, ctx->dport_dev);
1093 	if (ctx->dport)
1094 		*ctx->dport = dport;
1095 	return dport != NULL;
1096 }
1097 
1098 static struct cxl_port *__find_cxl_port(struct cxl_find_port_ctx *ctx)
1099 {
1100 	struct device *dev;
1101 
1102 	if (!ctx->dport_dev)
1103 		return NULL;
1104 
1105 	dev = bus_find_device(&cxl_bus_type, NULL, ctx, match_port_by_dport);
1106 	if (dev)
1107 		return to_cxl_port(dev);
1108 	return NULL;
1109 }
1110 
1111 static struct cxl_port *find_cxl_port(struct device *dport_dev,
1112 				      struct cxl_dport **dport)
1113 {
1114 	struct cxl_find_port_ctx ctx = {
1115 		.dport_dev = dport_dev,
1116 		.dport = dport,
1117 	};
1118 	struct cxl_port *port;
1119 
1120 	port = __find_cxl_port(&ctx);
1121 	return port;
1122 }
1123 
1124 static struct cxl_port *find_cxl_port_at(struct cxl_port *parent_port,
1125 					 struct device *dport_dev,
1126 					 struct cxl_dport **dport)
1127 {
1128 	struct cxl_find_port_ctx ctx = {
1129 		.dport_dev = dport_dev,
1130 		.parent_port = parent_port,
1131 		.dport = dport,
1132 	};
1133 	struct cxl_port *port;
1134 
1135 	port = __find_cxl_port(&ctx);
1136 	return port;
1137 }
1138 
1139 /*
1140  * All users of grandparent() are using it to walk PCIe-like swich port
1141  * hierarchy. A PCIe switch is comprised of a bridge device representing the
1142  * upstream switch port and N bridges representing downstream switch ports. When
1143  * bridges stack the grand-parent of a downstream switch port is another
1144  * downstream switch port in the immediate ancestor switch.
1145  */
1146 static struct device *grandparent(struct device *dev)
1147 {
1148 	if (dev && dev->parent)
1149 		return dev->parent->parent;
1150 	return NULL;
1151 }
1152 
1153 static void delete_endpoint(void *data)
1154 {
1155 	struct cxl_memdev *cxlmd = data;
1156 	struct cxl_port *endpoint = dev_get_drvdata(&cxlmd->dev);
1157 	struct cxl_port *parent_port;
1158 	struct device *parent;
1159 
1160 	parent_port = cxl_mem_find_port(cxlmd, NULL);
1161 	if (!parent_port)
1162 		goto out;
1163 	parent = &parent_port->dev;
1164 
1165 	device_lock(parent);
1166 	if (parent->driver && !endpoint->dead) {
1167 		devm_release_action(parent, cxl_unlink_uport, endpoint);
1168 		devm_release_action(parent, unregister_port, endpoint);
1169 	}
1170 	device_unlock(parent);
1171 	put_device(parent);
1172 out:
1173 	put_device(&endpoint->dev);
1174 }
1175 
1176 int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
1177 {
1178 	struct device *dev = &cxlmd->dev;
1179 
1180 	get_device(&endpoint->dev);
1181 	dev_set_drvdata(dev, endpoint);
1182 	return devm_add_action_or_reset(dev, delete_endpoint, cxlmd);
1183 }
1184 EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, CXL);
1185 
1186 /*
1187  * The natural end of life of a non-root 'cxl_port' is when its parent port goes
1188  * through a ->remove() event ("top-down" unregistration). The unnatural trigger
1189  * for a port to be unregistered is when all memdevs beneath that port have gone
1190  * through ->remove(). This "bottom-up" removal selectively removes individual
1191  * child ports manually. This depends on devm_cxl_add_port() to not change is
1192  * devm action registration order, and for dports to have already been
1193  * destroyed by reap_dports().
1194  */
1195 static void delete_switch_port(struct cxl_port *port)
1196 {
1197 	devm_release_action(port->dev.parent, cxl_unlink_uport, port);
1198 	devm_release_action(port->dev.parent, unregister_port, port);
1199 }
1200 
1201 static void reap_dports(struct cxl_port *port)
1202 {
1203 	struct cxl_dport *dport;
1204 	unsigned long index;
1205 
1206 	device_lock_assert(&port->dev);
1207 
1208 	xa_for_each(&port->dports, index, dport) {
1209 		devm_release_action(&port->dev, cxl_dport_unlink, dport);
1210 		devm_release_action(&port->dev, cxl_dport_remove, dport);
1211 		devm_kfree(&port->dev, dport);
1212 	}
1213 }
1214 
1215 static void cxl_detach_ep(void *data)
1216 {
1217 	struct cxl_memdev *cxlmd = data;
1218 	struct device *iter;
1219 
1220 	for (iter = &cxlmd->dev; iter; iter = grandparent(iter)) {
1221 		struct device *dport_dev = grandparent(iter);
1222 		struct cxl_port *port, *parent_port;
1223 		struct cxl_ep *ep;
1224 		bool died = false;
1225 
1226 		if (!dport_dev)
1227 			break;
1228 
1229 		port = find_cxl_port(dport_dev, NULL);
1230 		if (!port)
1231 			continue;
1232 
1233 		if (is_cxl_root(port)) {
1234 			put_device(&port->dev);
1235 			continue;
1236 		}
1237 
1238 		parent_port = to_cxl_port(port->dev.parent);
1239 		device_lock(&parent_port->dev);
1240 		if (!parent_port->dev.driver) {
1241 			/*
1242 			 * The bottom-up race to delete the port lost to a
1243 			 * top-down port disable, give up here, because the
1244 			 * parent_port ->remove() will have cleaned up all
1245 			 * descendants.
1246 			 */
1247 			device_unlock(&parent_port->dev);
1248 			put_device(&port->dev);
1249 			continue;
1250 		}
1251 
1252 		device_lock(&port->dev);
1253 		ep = cxl_ep_load(port, cxlmd);
1254 		dev_dbg(&cxlmd->dev, "disconnect %s from %s\n",
1255 			ep ? dev_name(ep->ep) : "", dev_name(&port->dev));
1256 		cxl_ep_remove(port, ep);
1257 		if (ep && !port->dead && xa_empty(&port->endpoints) &&
1258 		    !is_cxl_root(parent_port)) {
1259 			/*
1260 			 * This was the last ep attached to a dynamically
1261 			 * enumerated port. Block new cxl_add_ep() and garbage
1262 			 * collect the port.
1263 			 */
1264 			died = true;
1265 			port->dead = true;
1266 			reap_dports(port);
1267 		}
1268 		device_unlock(&port->dev);
1269 
1270 		if (died) {
1271 			dev_dbg(&cxlmd->dev, "delete %s\n",
1272 				dev_name(&port->dev));
1273 			delete_switch_port(port);
1274 		}
1275 		put_device(&port->dev);
1276 		device_unlock(&parent_port->dev);
1277 	}
1278 }
1279 
1280 static resource_size_t find_component_registers(struct device *dev)
1281 {
1282 	struct cxl_register_map map;
1283 	struct pci_dev *pdev;
1284 
1285 	/*
1286 	 * Theoretically, CXL component registers can be hosted on a
1287 	 * non-PCI device, in practice, only cxl_test hits this case.
1288 	 */
1289 	if (!dev_is_pci(dev))
1290 		return CXL_RESOURCE_NONE;
1291 
1292 	pdev = to_pci_dev(dev);
1293 
1294 	cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
1295 	return map.resource;
1296 }
1297 
1298 static int add_port_attach_ep(struct cxl_memdev *cxlmd,
1299 			      struct device *uport_dev,
1300 			      struct device *dport_dev)
1301 {
1302 	struct device *dparent = grandparent(dport_dev);
1303 	struct cxl_port *port, *parent_port = NULL;
1304 	struct cxl_dport *dport, *parent_dport;
1305 	resource_size_t component_reg_phys;
1306 	int rc;
1307 
1308 	if (!dparent) {
1309 		/*
1310 		 * The iteration reached the topology root without finding the
1311 		 * CXL-root 'cxl_port' on a previous iteration, fail for now to
1312 		 * be re-probed after platform driver attaches.
1313 		 */
1314 		dev_dbg(&cxlmd->dev, "%s is a root dport\n",
1315 			dev_name(dport_dev));
1316 		return -ENXIO;
1317 	}
1318 
1319 	parent_port = find_cxl_port(dparent, &parent_dport);
1320 	if (!parent_port) {
1321 		/* iterate to create this parent_port */
1322 		return -EAGAIN;
1323 	}
1324 
1325 	device_lock(&parent_port->dev);
1326 	if (!parent_port->dev.driver) {
1327 		dev_warn(&cxlmd->dev,
1328 			 "port %s:%s disabled, failed to enumerate CXL.mem\n",
1329 			 dev_name(&parent_port->dev), dev_name(uport_dev));
1330 		port = ERR_PTR(-ENXIO);
1331 		goto out;
1332 	}
1333 
1334 	port = find_cxl_port_at(parent_port, dport_dev, &dport);
1335 	if (!port) {
1336 		component_reg_phys = find_component_registers(uport_dev);
1337 		port = devm_cxl_add_port(&parent_port->dev, uport_dev,
1338 					 component_reg_phys, parent_dport);
1339 		/* retry find to pick up the new dport information */
1340 		if (!IS_ERR(port))
1341 			port = find_cxl_port_at(parent_port, dport_dev, &dport);
1342 	}
1343 out:
1344 	device_unlock(&parent_port->dev);
1345 
1346 	if (IS_ERR(port))
1347 		rc = PTR_ERR(port);
1348 	else {
1349 		dev_dbg(&cxlmd->dev, "add to new port %s:%s\n",
1350 			dev_name(&port->dev), dev_name(port->uport));
1351 		rc = cxl_add_ep(dport, &cxlmd->dev);
1352 		if (rc == -EBUSY) {
1353 			/*
1354 			 * "can't" happen, but this error code means
1355 			 * something to the caller, so translate it.
1356 			 */
1357 			rc = -ENXIO;
1358 		}
1359 		put_device(&port->dev);
1360 	}
1361 
1362 	put_device(&parent_port->dev);
1363 	return rc;
1364 }
1365 
1366 int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd)
1367 {
1368 	struct device *dev = &cxlmd->dev;
1369 	struct device *iter;
1370 	int rc;
1371 
1372 	/*
1373 	 * Skip intermediate port enumeration in the RCH case, there
1374 	 * are no ports in between a host bridge and an endpoint.
1375 	 */
1376 	if (cxlmd->cxlds->rcd)
1377 		return 0;
1378 
1379 	rc = devm_add_action_or_reset(&cxlmd->dev, cxl_detach_ep, cxlmd);
1380 	if (rc)
1381 		return rc;
1382 
1383 	/*
1384 	 * Scan for and add all cxl_ports in this device's ancestry.
1385 	 * Repeat until no more ports are added. Abort if a port add
1386 	 * attempt fails.
1387 	 */
1388 retry:
1389 	for (iter = dev; iter; iter = grandparent(iter)) {
1390 		struct device *dport_dev = grandparent(iter);
1391 		struct device *uport_dev;
1392 		struct cxl_dport *dport;
1393 		struct cxl_port *port;
1394 
1395 		if (!dport_dev)
1396 			return 0;
1397 
1398 		uport_dev = dport_dev->parent;
1399 		if (!uport_dev) {
1400 			dev_warn(dev, "at %s no parent for dport: %s\n",
1401 				 dev_name(iter), dev_name(dport_dev));
1402 			return -ENXIO;
1403 		}
1404 
1405 		dev_dbg(dev, "scan: iter: %s dport_dev: %s parent: %s\n",
1406 			dev_name(iter), dev_name(dport_dev),
1407 			dev_name(uport_dev));
1408 		port = find_cxl_port(dport_dev, &dport);
1409 		if (port) {
1410 			dev_dbg(&cxlmd->dev,
1411 				"found already registered port %s:%s\n",
1412 				dev_name(&port->dev), dev_name(port->uport));
1413 			rc = cxl_add_ep(dport, &cxlmd->dev);
1414 
1415 			/*
1416 			 * If the endpoint already exists in the port's list,
1417 			 * that's ok, it was added on a previous pass.
1418 			 * Otherwise, retry in add_port_attach_ep() after taking
1419 			 * the parent_port lock as the current port may be being
1420 			 * reaped.
1421 			 */
1422 			if (rc && rc != -EBUSY) {
1423 				put_device(&port->dev);
1424 				return rc;
1425 			}
1426 
1427 			/* Any more ports to add between this one and the root? */
1428 			if (!dev_is_cxl_root_child(&port->dev)) {
1429 				put_device(&port->dev);
1430 				continue;
1431 			}
1432 
1433 			put_device(&port->dev);
1434 			return 0;
1435 		}
1436 
1437 		rc = add_port_attach_ep(cxlmd, uport_dev, dport_dev);
1438 		/* port missing, try to add parent */
1439 		if (rc == -EAGAIN)
1440 			continue;
1441 		/* failed to add ep or port */
1442 		if (rc)
1443 			return rc;
1444 		/* port added, new descendants possible, start over */
1445 		goto retry;
1446 	}
1447 
1448 	return 0;
1449 }
1450 EXPORT_SYMBOL_NS_GPL(devm_cxl_enumerate_ports, CXL);
1451 
1452 struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd,
1453 				   struct cxl_dport **dport)
1454 {
1455 	return find_cxl_port(grandparent(&cxlmd->dev), dport);
1456 }
1457 EXPORT_SYMBOL_NS_GPL(cxl_mem_find_port, CXL);
1458 
1459 static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd,
1460 				    struct cxl_port *port, int *target_map)
1461 {
1462 	int i, rc = 0;
1463 
1464 	if (!target_map)
1465 		return 0;
1466 
1467 	device_lock_assert(&port->dev);
1468 
1469 	if (xa_empty(&port->dports))
1470 		return -EINVAL;
1471 
1472 	write_seqlock(&cxlsd->target_lock);
1473 	for (i = 0; i < cxlsd->nr_targets; i++) {
1474 		struct cxl_dport *dport = find_dport(port, target_map[i]);
1475 
1476 		if (!dport) {
1477 			rc = -ENXIO;
1478 			break;
1479 		}
1480 		cxlsd->target[i] = dport;
1481 	}
1482 	write_sequnlock(&cxlsd->target_lock);
1483 
1484 	return rc;
1485 }
1486 
1487 struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos)
1488 {
1489 	struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd;
1490 	struct cxl_decoder *cxld = &cxlsd->cxld;
1491 	int iw;
1492 
1493 	iw = cxld->interleave_ways;
1494 	if (dev_WARN_ONCE(&cxld->dev, iw != cxlsd->nr_targets,
1495 			  "misconfigured root decoder\n"))
1496 		return NULL;
1497 
1498 	return cxlrd->cxlsd.target[pos % iw];
1499 }
1500 EXPORT_SYMBOL_NS_GPL(cxl_hb_modulo, CXL);
1501 
1502 static struct lock_class_key cxl_decoder_key;
1503 
1504 /**
1505  * cxl_decoder_init - Common decoder setup / initialization
1506  * @port: owning port of this decoder
1507  * @cxld: common decoder properties to initialize
1508  *
1509  * A port may contain one or more decoders. Each of those decoders
1510  * enable some address space for CXL.mem utilization. A decoder is
1511  * expected to be configured by the caller before registering via
1512  * cxl_decoder_add()
1513  */
1514 static int cxl_decoder_init(struct cxl_port *port, struct cxl_decoder *cxld)
1515 {
1516 	struct device *dev;
1517 	int rc;
1518 
1519 	rc = ida_alloc(&port->decoder_ida, GFP_KERNEL);
1520 	if (rc < 0)
1521 		return rc;
1522 
1523 	/* need parent to stick around to release the id */
1524 	get_device(&port->dev);
1525 	cxld->id = rc;
1526 
1527 	dev = &cxld->dev;
1528 	device_initialize(dev);
1529 	lockdep_set_class(&dev->mutex, &cxl_decoder_key);
1530 	device_set_pm_not_required(dev);
1531 	dev->parent = &port->dev;
1532 	dev->bus = &cxl_bus_type;
1533 
1534 	/* Pre initialize an "empty" decoder */
1535 	cxld->interleave_ways = 1;
1536 	cxld->interleave_granularity = PAGE_SIZE;
1537 	cxld->target_type = CXL_DECODER_EXPANDER;
1538 	cxld->hpa_range = (struct range) {
1539 		.start = 0,
1540 		.end = -1,
1541 	};
1542 
1543 	return 0;
1544 }
1545 
1546 static int cxl_switch_decoder_init(struct cxl_port *port,
1547 				   struct cxl_switch_decoder *cxlsd,
1548 				   int nr_targets)
1549 {
1550 	if (nr_targets > CXL_DECODER_MAX_INTERLEAVE)
1551 		return -EINVAL;
1552 
1553 	cxlsd->nr_targets = nr_targets;
1554 	seqlock_init(&cxlsd->target_lock);
1555 	return cxl_decoder_init(port, &cxlsd->cxld);
1556 }
1557 
1558 /**
1559  * cxl_root_decoder_alloc - Allocate a root level decoder
1560  * @port: owning CXL root of this decoder
1561  * @nr_targets: static number of downstream targets
1562  * @calc_hb: which host bridge covers the n'th position by granularity
1563  *
1564  * Return: A new cxl decoder to be registered by cxl_decoder_add(). A
1565  * 'CXL root' decoder is one that decodes from a top-level / static platform
1566  * firmware description of CXL resources into a CXL standard decode
1567  * topology.
1568  */
1569 struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
1570 						unsigned int nr_targets,
1571 						cxl_calc_hb_fn calc_hb)
1572 {
1573 	struct cxl_root_decoder *cxlrd;
1574 	struct cxl_switch_decoder *cxlsd;
1575 	struct cxl_decoder *cxld;
1576 	int rc;
1577 
1578 	if (!is_cxl_root(port))
1579 		return ERR_PTR(-EINVAL);
1580 
1581 	cxlrd = kzalloc(struct_size(cxlrd, cxlsd.target, nr_targets),
1582 			GFP_KERNEL);
1583 	if (!cxlrd)
1584 		return ERR_PTR(-ENOMEM);
1585 
1586 	cxlsd = &cxlrd->cxlsd;
1587 	rc = cxl_switch_decoder_init(port, cxlsd, nr_targets);
1588 	if (rc) {
1589 		kfree(cxlrd);
1590 		return ERR_PTR(rc);
1591 	}
1592 
1593 	cxlrd->calc_hb = calc_hb;
1594 
1595 	cxld = &cxlsd->cxld;
1596 	cxld->dev.type = &cxl_decoder_root_type;
1597 	/*
1598 	 * cxl_root_decoder_release() special cases negative ids to
1599 	 * detect memregion_alloc() failures.
1600 	 */
1601 	atomic_set(&cxlrd->region_id, -1);
1602 	rc = memregion_alloc(GFP_KERNEL);
1603 	if (rc < 0) {
1604 		put_device(&cxld->dev);
1605 		return ERR_PTR(rc);
1606 	}
1607 
1608 	atomic_set(&cxlrd->region_id, rc);
1609 	return cxlrd;
1610 }
1611 EXPORT_SYMBOL_NS_GPL(cxl_root_decoder_alloc, CXL);
1612 
1613 /**
1614  * cxl_switch_decoder_alloc - Allocate a switch level decoder
1615  * @port: owning CXL switch port of this decoder
1616  * @nr_targets: max number of dynamically addressable downstream targets
1617  *
1618  * Return: A new cxl decoder to be registered by cxl_decoder_add(). A
1619  * 'switch' decoder is any decoder that can be enumerated by PCIe
1620  * topology and the HDM Decoder Capability. This includes the decoders
1621  * that sit between Switch Upstream Ports / Switch Downstream Ports and
1622  * Host Bridges / Root Ports.
1623  */
1624 struct cxl_switch_decoder *cxl_switch_decoder_alloc(struct cxl_port *port,
1625 						    unsigned int nr_targets)
1626 {
1627 	struct cxl_switch_decoder *cxlsd;
1628 	struct cxl_decoder *cxld;
1629 	int rc;
1630 
1631 	if (is_cxl_root(port) || is_cxl_endpoint(port))
1632 		return ERR_PTR(-EINVAL);
1633 
1634 	cxlsd = kzalloc(struct_size(cxlsd, target, nr_targets), GFP_KERNEL);
1635 	if (!cxlsd)
1636 		return ERR_PTR(-ENOMEM);
1637 
1638 	rc = cxl_switch_decoder_init(port, cxlsd, nr_targets);
1639 	if (rc) {
1640 		kfree(cxlsd);
1641 		return ERR_PTR(rc);
1642 	}
1643 
1644 	cxld = &cxlsd->cxld;
1645 	cxld->dev.type = &cxl_decoder_switch_type;
1646 	return cxlsd;
1647 }
1648 EXPORT_SYMBOL_NS_GPL(cxl_switch_decoder_alloc, CXL);
1649 
1650 /**
1651  * cxl_endpoint_decoder_alloc - Allocate an endpoint decoder
1652  * @port: owning port of this decoder
1653  *
1654  * Return: A new cxl decoder to be registered by cxl_decoder_add()
1655  */
1656 struct cxl_endpoint_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port)
1657 {
1658 	struct cxl_endpoint_decoder *cxled;
1659 	struct cxl_decoder *cxld;
1660 	int rc;
1661 
1662 	if (!is_cxl_endpoint(port))
1663 		return ERR_PTR(-EINVAL);
1664 
1665 	cxled = kzalloc(sizeof(*cxled), GFP_KERNEL);
1666 	if (!cxled)
1667 		return ERR_PTR(-ENOMEM);
1668 
1669 	cxled->pos = -1;
1670 	cxld = &cxled->cxld;
1671 	rc = cxl_decoder_init(port, cxld);
1672 	if (rc)	 {
1673 		kfree(cxled);
1674 		return ERR_PTR(rc);
1675 	}
1676 
1677 	cxld->dev.type = &cxl_decoder_endpoint_type;
1678 	return cxled;
1679 }
1680 EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_alloc, CXL);
1681 
1682 /**
1683  * cxl_decoder_add_locked - Add a decoder with targets
1684  * @cxld: The cxl decoder allocated by cxl_<type>_decoder_alloc()
1685  * @target_map: A list of downstream ports that this decoder can direct memory
1686  *              traffic to. These numbers should correspond with the port number
1687  *              in the PCIe Link Capabilities structure.
1688  *
1689  * Certain types of decoders may not have any targets. The main example of this
1690  * is an endpoint device. A more awkward example is a hostbridge whose root
1691  * ports get hot added (technically possible, though unlikely).
1692  *
1693  * This is the locked variant of cxl_decoder_add().
1694  *
1695  * Context: Process context. Expects the device lock of the port that owns the
1696  *	    @cxld to be held.
1697  *
1698  * Return: Negative error code if the decoder wasn't properly configured; else
1699  *	   returns 0.
1700  */
1701 int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map)
1702 {
1703 	struct cxl_port *port;
1704 	struct device *dev;
1705 	int rc;
1706 
1707 	if (WARN_ON_ONCE(!cxld))
1708 		return -EINVAL;
1709 
1710 	if (WARN_ON_ONCE(IS_ERR(cxld)))
1711 		return PTR_ERR(cxld);
1712 
1713 	if (cxld->interleave_ways < 1)
1714 		return -EINVAL;
1715 
1716 	dev = &cxld->dev;
1717 
1718 	port = to_cxl_port(cxld->dev.parent);
1719 	if (!is_endpoint_decoder(dev)) {
1720 		struct cxl_switch_decoder *cxlsd = to_cxl_switch_decoder(dev);
1721 
1722 		rc = decoder_populate_targets(cxlsd, port, target_map);
1723 		if (rc && (cxld->flags & CXL_DECODER_F_ENABLE)) {
1724 			dev_err(&port->dev,
1725 				"Failed to populate active decoder targets\n");
1726 			return rc;
1727 		}
1728 	}
1729 
1730 	rc = dev_set_name(dev, "decoder%d.%d", port->id, cxld->id);
1731 	if (rc)
1732 		return rc;
1733 
1734 	return device_add(dev);
1735 }
1736 EXPORT_SYMBOL_NS_GPL(cxl_decoder_add_locked, CXL);
1737 
1738 /**
1739  * cxl_decoder_add - Add a decoder with targets
1740  * @cxld: The cxl decoder allocated by cxl_<type>_decoder_alloc()
1741  * @target_map: A list of downstream ports that this decoder can direct memory
1742  *              traffic to. These numbers should correspond with the port number
1743  *              in the PCIe Link Capabilities structure.
1744  *
1745  * This is the unlocked variant of cxl_decoder_add_locked().
1746  * See cxl_decoder_add_locked().
1747  *
1748  * Context: Process context. Takes and releases the device lock of the port that
1749  *	    owns the @cxld.
1750  */
1751 int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map)
1752 {
1753 	struct cxl_port *port;
1754 	int rc;
1755 
1756 	if (WARN_ON_ONCE(!cxld))
1757 		return -EINVAL;
1758 
1759 	if (WARN_ON_ONCE(IS_ERR(cxld)))
1760 		return PTR_ERR(cxld);
1761 
1762 	port = to_cxl_port(cxld->dev.parent);
1763 
1764 	device_lock(&port->dev);
1765 	rc = cxl_decoder_add_locked(cxld, target_map);
1766 	device_unlock(&port->dev);
1767 
1768 	return rc;
1769 }
1770 EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, CXL);
1771 
1772 static void cxld_unregister(void *dev)
1773 {
1774 	struct cxl_endpoint_decoder *cxled;
1775 
1776 	if (is_endpoint_decoder(dev)) {
1777 		cxled = to_cxl_endpoint_decoder(dev);
1778 		cxl_decoder_kill_region(cxled);
1779 	}
1780 
1781 	device_unregister(dev);
1782 }
1783 
1784 int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld)
1785 {
1786 	return devm_add_action_or_reset(host, cxld_unregister, &cxld->dev);
1787 }
1788 EXPORT_SYMBOL_NS_GPL(cxl_decoder_autoremove, CXL);
1789 
1790 /**
1791  * __cxl_driver_register - register a driver for the cxl bus
1792  * @cxl_drv: cxl driver structure to attach
1793  * @owner: owning module/driver
1794  * @modname: KBUILD_MODNAME for parent driver
1795  */
1796 int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner,
1797 			  const char *modname)
1798 {
1799 	if (!cxl_drv->probe) {
1800 		pr_debug("%s ->probe() must be specified\n", modname);
1801 		return -EINVAL;
1802 	}
1803 
1804 	if (!cxl_drv->name) {
1805 		pr_debug("%s ->name must be specified\n", modname);
1806 		return -EINVAL;
1807 	}
1808 
1809 	if (!cxl_drv->id) {
1810 		pr_debug("%s ->id must be specified\n", modname);
1811 		return -EINVAL;
1812 	}
1813 
1814 	cxl_drv->drv.bus = &cxl_bus_type;
1815 	cxl_drv->drv.owner = owner;
1816 	cxl_drv->drv.mod_name = modname;
1817 	cxl_drv->drv.name = cxl_drv->name;
1818 
1819 	return driver_register(&cxl_drv->drv);
1820 }
1821 EXPORT_SYMBOL_NS_GPL(__cxl_driver_register, CXL);
1822 
1823 void cxl_driver_unregister(struct cxl_driver *cxl_drv)
1824 {
1825 	driver_unregister(&cxl_drv->drv);
1826 }
1827 EXPORT_SYMBOL_NS_GPL(cxl_driver_unregister, CXL);
1828 
1829 static int cxl_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
1830 {
1831 	return add_uevent_var(env, "MODALIAS=" CXL_MODALIAS_FMT,
1832 			      cxl_device_id(dev));
1833 }
1834 
1835 static int cxl_bus_match(struct device *dev, struct device_driver *drv)
1836 {
1837 	return cxl_device_id(dev) == to_cxl_drv(drv)->id;
1838 }
1839 
1840 static int cxl_bus_probe(struct device *dev)
1841 {
1842 	int rc;
1843 
1844 	rc = to_cxl_drv(dev->driver)->probe(dev);
1845 	dev_dbg(dev, "probe: %d\n", rc);
1846 	return rc;
1847 }
1848 
1849 static void cxl_bus_remove(struct device *dev)
1850 {
1851 	struct cxl_driver *cxl_drv = to_cxl_drv(dev->driver);
1852 
1853 	if (cxl_drv->remove)
1854 		cxl_drv->remove(dev);
1855 }
1856 
1857 static struct workqueue_struct *cxl_bus_wq;
1858 
1859 static void cxl_bus_rescan_queue(struct work_struct *w)
1860 {
1861 	int rc = bus_rescan_devices(&cxl_bus_type);
1862 
1863 	pr_debug("CXL bus rescan result: %d\n", rc);
1864 }
1865 
1866 void cxl_bus_rescan(void)
1867 {
1868 	static DECLARE_WORK(rescan_work, cxl_bus_rescan_queue);
1869 
1870 	queue_work(cxl_bus_wq, &rescan_work);
1871 }
1872 EXPORT_SYMBOL_NS_GPL(cxl_bus_rescan, CXL);
1873 
1874 void cxl_bus_drain(void)
1875 {
1876 	drain_workqueue(cxl_bus_wq);
1877 }
1878 EXPORT_SYMBOL_NS_GPL(cxl_bus_drain, CXL);
1879 
1880 bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd)
1881 {
1882 	return queue_work(cxl_bus_wq, &cxlmd->detach_work);
1883 }
1884 EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL);
1885 
1886 /* for user tooling to ensure port disable work has completed */
1887 static ssize_t flush_store(struct bus_type *bus, const char *buf, size_t count)
1888 {
1889 	if (sysfs_streq(buf, "1")) {
1890 		flush_workqueue(cxl_bus_wq);
1891 		return count;
1892 	}
1893 
1894 	return -EINVAL;
1895 }
1896 
1897 static BUS_ATTR_WO(flush);
1898 
1899 static struct attribute *cxl_bus_attributes[] = {
1900 	&bus_attr_flush.attr,
1901 	NULL,
1902 };
1903 
1904 static struct attribute_group cxl_bus_attribute_group = {
1905 	.attrs = cxl_bus_attributes,
1906 };
1907 
1908 static const struct attribute_group *cxl_bus_attribute_groups[] = {
1909 	&cxl_bus_attribute_group,
1910 	NULL,
1911 };
1912 
1913 struct bus_type cxl_bus_type = {
1914 	.name = "cxl",
1915 	.uevent = cxl_bus_uevent,
1916 	.match = cxl_bus_match,
1917 	.probe = cxl_bus_probe,
1918 	.remove = cxl_bus_remove,
1919 	.bus_groups = cxl_bus_attribute_groups,
1920 };
1921 EXPORT_SYMBOL_NS_GPL(cxl_bus_type, CXL);
1922 
1923 static struct dentry *cxl_debugfs;
1924 
1925 struct dentry *cxl_debugfs_create_dir(const char *dir)
1926 {
1927 	return debugfs_create_dir(dir, cxl_debugfs);
1928 }
1929 EXPORT_SYMBOL_NS_GPL(cxl_debugfs_create_dir, CXL);
1930 
1931 static __init int cxl_core_init(void)
1932 {
1933 	int rc;
1934 
1935 	cxl_debugfs = debugfs_create_dir("cxl", NULL);
1936 
1937 	cxl_mbox_init();
1938 
1939 	rc = cxl_memdev_init();
1940 	if (rc)
1941 		return rc;
1942 
1943 	cxl_bus_wq = alloc_ordered_workqueue("cxl_port", 0);
1944 	if (!cxl_bus_wq) {
1945 		rc = -ENOMEM;
1946 		goto err_wq;
1947 	}
1948 
1949 	rc = bus_register(&cxl_bus_type);
1950 	if (rc)
1951 		goto err_bus;
1952 
1953 	rc = cxl_region_init();
1954 	if (rc)
1955 		goto err_region;
1956 
1957 	return 0;
1958 
1959 err_region:
1960 	bus_unregister(&cxl_bus_type);
1961 err_bus:
1962 	destroy_workqueue(cxl_bus_wq);
1963 err_wq:
1964 	cxl_memdev_exit();
1965 	return rc;
1966 }
1967 
1968 static void cxl_core_exit(void)
1969 {
1970 	cxl_region_exit();
1971 	bus_unregister(&cxl_bus_type);
1972 	destroy_workqueue(cxl_bus_wq);
1973 	cxl_memdev_exit();
1974 	debugfs_remove_recursive(cxl_debugfs);
1975 }
1976 
1977 module_init(cxl_core_init);
1978 module_exit(cxl_core_exit);
1979 MODULE_LICENSE("GPL v2");
1980