xref: /openbmc/linux/drivers/cxl/core/region.c (revision 966a9b49033b472dcfb453abdc34bca7df17adce)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2022 Intel Corporation. All rights reserved. */
3 #include <linux/memregion.h>
4 #include <linux/genalloc.h>
5 #include <linux/device.h>
6 #include <linux/module.h>
7 #include <linux/slab.h>
8 #include <linux/uuid.h>
9 #include <linux/idr.h>
10 #include <cxlmem.h>
11 #include <cxl.h>
12 #include "core.h"
13 
14 /**
15  * DOC: cxl core region
16  *
17  * CXL Regions represent mapped memory capacity in system physical address
18  * space. Whereas the CXL Root Decoders identify the bounds of potential CXL
19  * Memory ranges, Regions represent the active mapped capacity by the HDM
20  * Decoder Capability structures throughout the Host Bridges, Switches, and
21  * Endpoints in the topology.
22  *
23  * Region configuration has ordering constraints. UUID may be set at any time
24  * but is only visible for persistent regions.
25  * 1. Interleave granularity
26  * 2. Interleave size
27  * 3. Decoder targets
28  */
29 
30 /*
31  * All changes to the interleave configuration occur with this lock held
32  * for write.
33  */
34 static DECLARE_RWSEM(cxl_region_rwsem);
35 
36 static struct cxl_region *to_cxl_region(struct device *dev);
37 
38 static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
39 			 char *buf)
40 {
41 	struct cxl_region *cxlr = to_cxl_region(dev);
42 	struct cxl_region_params *p = &cxlr->params;
43 	ssize_t rc;
44 
45 	rc = down_read_interruptible(&cxl_region_rwsem);
46 	if (rc)
47 		return rc;
48 	rc = sysfs_emit(buf, "%pUb\n", &p->uuid);
49 	up_read(&cxl_region_rwsem);
50 
51 	return rc;
52 }
53 
54 static int is_dup(struct device *match, void *data)
55 {
56 	struct cxl_region_params *p;
57 	struct cxl_region *cxlr;
58 	uuid_t *uuid = data;
59 
60 	if (!is_cxl_region(match))
61 		return 0;
62 
63 	lockdep_assert_held(&cxl_region_rwsem);
64 	cxlr = to_cxl_region(match);
65 	p = &cxlr->params;
66 
67 	if (uuid_equal(&p->uuid, uuid)) {
68 		dev_dbg(match, "already has uuid: %pUb\n", uuid);
69 		return -EBUSY;
70 	}
71 
72 	return 0;
73 }
74 
75 static ssize_t uuid_store(struct device *dev, struct device_attribute *attr,
76 			  const char *buf, size_t len)
77 {
78 	struct cxl_region *cxlr = to_cxl_region(dev);
79 	struct cxl_region_params *p = &cxlr->params;
80 	uuid_t temp;
81 	ssize_t rc;
82 
83 	if (len != UUID_STRING_LEN + 1)
84 		return -EINVAL;
85 
86 	rc = uuid_parse(buf, &temp);
87 	if (rc)
88 		return rc;
89 
90 	if (uuid_is_null(&temp))
91 		return -EINVAL;
92 
93 	rc = down_write_killable(&cxl_region_rwsem);
94 	if (rc)
95 		return rc;
96 
97 	if (uuid_equal(&p->uuid, &temp))
98 		goto out;
99 
100 	rc = -EBUSY;
101 	if (p->state >= CXL_CONFIG_ACTIVE)
102 		goto out;
103 
104 	rc = bus_for_each_dev(&cxl_bus_type, NULL, &temp, is_dup);
105 	if (rc < 0)
106 		goto out;
107 
108 	uuid_copy(&p->uuid, &temp);
109 out:
110 	up_write(&cxl_region_rwsem);
111 
112 	if (rc)
113 		return rc;
114 	return len;
115 }
116 static DEVICE_ATTR_RW(uuid);
117 
118 static struct cxl_region_ref *cxl_rr_load(struct cxl_port *port,
119 					  struct cxl_region *cxlr)
120 {
121 	return xa_load(&port->regions, (unsigned long)cxlr);
122 }
123 
124 static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
125 {
126 	struct cxl_region_params *p = &cxlr->params;
127 	int i;
128 
129 	for (i = count - 1; i >= 0; i--) {
130 		struct cxl_endpoint_decoder *cxled = p->targets[i];
131 		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
132 		struct cxl_port *iter = cxled_to_port(cxled);
133 		struct cxl_ep *ep;
134 		int rc;
135 
136 		while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
137 			iter = to_cxl_port(iter->dev.parent);
138 
139 		for (ep = cxl_ep_load(iter, cxlmd); iter;
140 		     iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
141 			struct cxl_region_ref *cxl_rr;
142 			struct cxl_decoder *cxld;
143 
144 			cxl_rr = cxl_rr_load(iter, cxlr);
145 			cxld = cxl_rr->decoder;
146 			rc = cxld->reset(cxld);
147 			if (rc)
148 				return rc;
149 		}
150 
151 		rc = cxled->cxld.reset(&cxled->cxld);
152 		if (rc)
153 			return rc;
154 	}
155 
156 	return 0;
157 }
158 
159 static int cxl_region_decode_commit(struct cxl_region *cxlr)
160 {
161 	struct cxl_region_params *p = &cxlr->params;
162 	int i, rc = 0;
163 
164 	for (i = 0; i < p->nr_targets; i++) {
165 		struct cxl_endpoint_decoder *cxled = p->targets[i];
166 		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
167 		struct cxl_region_ref *cxl_rr;
168 		struct cxl_decoder *cxld;
169 		struct cxl_port *iter;
170 		struct cxl_ep *ep;
171 
172 		/* commit bottom up */
173 		for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
174 		     iter = to_cxl_port(iter->dev.parent)) {
175 			cxl_rr = cxl_rr_load(iter, cxlr);
176 			cxld = cxl_rr->decoder;
177 			if (cxld->commit)
178 				rc = cxld->commit(cxld);
179 			if (rc)
180 				break;
181 		}
182 
183 		if (rc) {
184 			/* programming @iter failed, teardown */
185 			for (ep = cxl_ep_load(iter, cxlmd); ep && iter;
186 			     iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
187 				cxl_rr = cxl_rr_load(iter, cxlr);
188 				cxld = cxl_rr->decoder;
189 				cxld->reset(cxld);
190 			}
191 
192 			cxled->cxld.reset(&cxled->cxld);
193 			goto err;
194 		}
195 	}
196 
197 	return 0;
198 
199 err:
200 	/* undo the targets that were successfully committed */
201 	cxl_region_decode_reset(cxlr, i);
202 	return rc;
203 }
204 
205 static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
206 			    const char *buf, size_t len)
207 {
208 	struct cxl_region *cxlr = to_cxl_region(dev);
209 	struct cxl_region_params *p = &cxlr->params;
210 	bool commit;
211 	ssize_t rc;
212 
213 	rc = kstrtobool(buf, &commit);
214 	if (rc)
215 		return rc;
216 
217 	rc = down_write_killable(&cxl_region_rwsem);
218 	if (rc)
219 		return rc;
220 
221 	/* Already in the requested state? */
222 	if (commit && p->state >= CXL_CONFIG_COMMIT)
223 		goto out;
224 	if (!commit && p->state < CXL_CONFIG_COMMIT)
225 		goto out;
226 
227 	/* Not ready to commit? */
228 	if (commit && p->state < CXL_CONFIG_ACTIVE) {
229 		rc = -ENXIO;
230 		goto out;
231 	}
232 
233 	if (commit)
234 		rc = cxl_region_decode_commit(cxlr);
235 	else {
236 		p->state = CXL_CONFIG_RESET_PENDING;
237 		up_write(&cxl_region_rwsem);
238 		device_release_driver(&cxlr->dev);
239 		down_write(&cxl_region_rwsem);
240 
241 		/*
242 		 * The lock was dropped, so need to revalidate that the reset is
243 		 * still pending.
244 		 */
245 		if (p->state == CXL_CONFIG_RESET_PENDING)
246 			rc = cxl_region_decode_reset(cxlr, p->interleave_ways);
247 	}
248 
249 	if (rc)
250 		goto out;
251 
252 	if (commit)
253 		p->state = CXL_CONFIG_COMMIT;
254 	else if (p->state == CXL_CONFIG_RESET_PENDING)
255 		p->state = CXL_CONFIG_ACTIVE;
256 
257 out:
258 	up_write(&cxl_region_rwsem);
259 
260 	if (rc)
261 		return rc;
262 	return len;
263 }
264 
265 static ssize_t commit_show(struct device *dev, struct device_attribute *attr,
266 			   char *buf)
267 {
268 	struct cxl_region *cxlr = to_cxl_region(dev);
269 	struct cxl_region_params *p = &cxlr->params;
270 	ssize_t rc;
271 
272 	rc = down_read_interruptible(&cxl_region_rwsem);
273 	if (rc)
274 		return rc;
275 	rc = sysfs_emit(buf, "%d\n", p->state >= CXL_CONFIG_COMMIT);
276 	up_read(&cxl_region_rwsem);
277 
278 	return rc;
279 }
280 static DEVICE_ATTR_RW(commit);
281 
282 static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a,
283 				  int n)
284 {
285 	struct device *dev = kobj_to_dev(kobj);
286 	struct cxl_region *cxlr = to_cxl_region(dev);
287 
288 	if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM)
289 		return 0;
290 	return a->mode;
291 }
292 
293 static ssize_t interleave_ways_show(struct device *dev,
294 				    struct device_attribute *attr, char *buf)
295 {
296 	struct cxl_region *cxlr = to_cxl_region(dev);
297 	struct cxl_region_params *p = &cxlr->params;
298 	ssize_t rc;
299 
300 	rc = down_read_interruptible(&cxl_region_rwsem);
301 	if (rc)
302 		return rc;
303 	rc = sysfs_emit(buf, "%d\n", p->interleave_ways);
304 	up_read(&cxl_region_rwsem);
305 
306 	return rc;
307 }
308 
309 static const struct attribute_group *get_cxl_region_target_group(void);
310 
311 static ssize_t interleave_ways_store(struct device *dev,
312 				     struct device_attribute *attr,
313 				     const char *buf, size_t len)
314 {
315 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
316 	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
317 	struct cxl_region *cxlr = to_cxl_region(dev);
318 	struct cxl_region_params *p = &cxlr->params;
319 	unsigned int val, save;
320 	int rc;
321 	u8 iw;
322 
323 	rc = kstrtouint(buf, 0, &val);
324 	if (rc)
325 		return rc;
326 
327 	rc = ways_to_cxl(val, &iw);
328 	if (rc)
329 		return rc;
330 
331 	/*
332 	 * Even for x3, x9, and x12 interleaves the region interleave must be a
333 	 * power of 2 multiple of the host bridge interleave.
334 	 */
335 	if (!is_power_of_2(val / cxld->interleave_ways) ||
336 	    (val % cxld->interleave_ways)) {
337 		dev_dbg(&cxlr->dev, "invalid interleave: %d\n", val);
338 		return -EINVAL;
339 	}
340 
341 	rc = down_write_killable(&cxl_region_rwsem);
342 	if (rc)
343 		return rc;
344 	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
345 		rc = -EBUSY;
346 		goto out;
347 	}
348 
349 	save = p->interleave_ways;
350 	p->interleave_ways = val;
351 	rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
352 	if (rc)
353 		p->interleave_ways = save;
354 out:
355 	up_write(&cxl_region_rwsem);
356 	if (rc)
357 		return rc;
358 	return len;
359 }
360 static DEVICE_ATTR_RW(interleave_ways);
361 
362 static ssize_t interleave_granularity_show(struct device *dev,
363 					   struct device_attribute *attr,
364 					   char *buf)
365 {
366 	struct cxl_region *cxlr = to_cxl_region(dev);
367 	struct cxl_region_params *p = &cxlr->params;
368 	ssize_t rc;
369 
370 	rc = down_read_interruptible(&cxl_region_rwsem);
371 	if (rc)
372 		return rc;
373 	rc = sysfs_emit(buf, "%d\n", p->interleave_granularity);
374 	up_read(&cxl_region_rwsem);
375 
376 	return rc;
377 }
378 
379 static ssize_t interleave_granularity_store(struct device *dev,
380 					    struct device_attribute *attr,
381 					    const char *buf, size_t len)
382 {
383 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
384 	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
385 	struct cxl_region *cxlr = to_cxl_region(dev);
386 	struct cxl_region_params *p = &cxlr->params;
387 	int rc, val;
388 	u16 ig;
389 
390 	rc = kstrtoint(buf, 0, &val);
391 	if (rc)
392 		return rc;
393 
394 	rc = granularity_to_cxl(val, &ig);
395 	if (rc)
396 		return rc;
397 
398 	/*
399 	 * When the host-bridge is interleaved, disallow region granularity !=
400 	 * root granularity. Regions with a granularity less than the root
401 	 * interleave result in needing multiple endpoints to support a single
402 	 * slot in the interleave (possible to suport in the future). Regions
403 	 * with a granularity greater than the root interleave result in invalid
404 	 * DPA translations (invalid to support).
405 	 */
406 	if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity)
407 		return -EINVAL;
408 
409 	rc = down_write_killable(&cxl_region_rwsem);
410 	if (rc)
411 		return rc;
412 	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
413 		rc = -EBUSY;
414 		goto out;
415 	}
416 
417 	p->interleave_granularity = val;
418 out:
419 	up_write(&cxl_region_rwsem);
420 	if (rc)
421 		return rc;
422 	return len;
423 }
424 static DEVICE_ATTR_RW(interleave_granularity);
425 
426 static ssize_t resource_show(struct device *dev, struct device_attribute *attr,
427 			     char *buf)
428 {
429 	struct cxl_region *cxlr = to_cxl_region(dev);
430 	struct cxl_region_params *p = &cxlr->params;
431 	u64 resource = -1ULL;
432 	ssize_t rc;
433 
434 	rc = down_read_interruptible(&cxl_region_rwsem);
435 	if (rc)
436 		return rc;
437 	if (p->res)
438 		resource = p->res->start;
439 	rc = sysfs_emit(buf, "%#llx\n", resource);
440 	up_read(&cxl_region_rwsem);
441 
442 	return rc;
443 }
444 static DEVICE_ATTR_RO(resource);
445 
446 static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
447 {
448 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
449 	struct cxl_region_params *p = &cxlr->params;
450 	struct resource *res;
451 	u32 remainder = 0;
452 
453 	lockdep_assert_held_write(&cxl_region_rwsem);
454 
455 	/* Nothing to do... */
456 	if (p->res && resource_size(p->res) == size)
457 		return 0;
458 
459 	/* To change size the old size must be freed first */
460 	if (p->res)
461 		return -EBUSY;
462 
463 	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
464 		return -EBUSY;
465 
466 	/* ways, granularity and uuid (if PMEM) need to be set before HPA */
467 	if (!p->interleave_ways || !p->interleave_granularity ||
468 	    (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid)))
469 		return -ENXIO;
470 
471 	div_u64_rem(size, SZ_256M * p->interleave_ways, &remainder);
472 	if (remainder)
473 		return -EINVAL;
474 
475 	res = alloc_free_mem_region(cxlrd->res, size, SZ_256M,
476 				    dev_name(&cxlr->dev));
477 	if (IS_ERR(res)) {
478 		dev_dbg(&cxlr->dev, "failed to allocate HPA: %ld\n",
479 			PTR_ERR(res));
480 		return PTR_ERR(res);
481 	}
482 
483 	p->res = res;
484 	p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
485 
486 	return 0;
487 }
488 
489 static void cxl_region_iomem_release(struct cxl_region *cxlr)
490 {
491 	struct cxl_region_params *p = &cxlr->params;
492 
493 	if (device_is_registered(&cxlr->dev))
494 		lockdep_assert_held_write(&cxl_region_rwsem);
495 	if (p->res) {
496 		remove_resource(p->res);
497 		kfree(p->res);
498 		p->res = NULL;
499 	}
500 }
501 
502 static int free_hpa(struct cxl_region *cxlr)
503 {
504 	struct cxl_region_params *p = &cxlr->params;
505 
506 	lockdep_assert_held_write(&cxl_region_rwsem);
507 
508 	if (!p->res)
509 		return 0;
510 
511 	if (p->state >= CXL_CONFIG_ACTIVE)
512 		return -EBUSY;
513 
514 	cxl_region_iomem_release(cxlr);
515 	p->state = CXL_CONFIG_IDLE;
516 	return 0;
517 }
518 
519 static ssize_t size_store(struct device *dev, struct device_attribute *attr,
520 			  const char *buf, size_t len)
521 {
522 	struct cxl_region *cxlr = to_cxl_region(dev);
523 	u64 val;
524 	int rc;
525 
526 	rc = kstrtou64(buf, 0, &val);
527 	if (rc)
528 		return rc;
529 
530 	rc = down_write_killable(&cxl_region_rwsem);
531 	if (rc)
532 		return rc;
533 
534 	if (val)
535 		rc = alloc_hpa(cxlr, val);
536 	else
537 		rc = free_hpa(cxlr);
538 	up_write(&cxl_region_rwsem);
539 
540 	if (rc)
541 		return rc;
542 
543 	return len;
544 }
545 
546 static ssize_t size_show(struct device *dev, struct device_attribute *attr,
547 			 char *buf)
548 {
549 	struct cxl_region *cxlr = to_cxl_region(dev);
550 	struct cxl_region_params *p = &cxlr->params;
551 	u64 size = 0;
552 	ssize_t rc;
553 
554 	rc = down_read_interruptible(&cxl_region_rwsem);
555 	if (rc)
556 		return rc;
557 	if (p->res)
558 		size = resource_size(p->res);
559 	rc = sysfs_emit(buf, "%#llx\n", size);
560 	up_read(&cxl_region_rwsem);
561 
562 	return rc;
563 }
564 static DEVICE_ATTR_RW(size);
565 
566 static struct attribute *cxl_region_attrs[] = {
567 	&dev_attr_uuid.attr,
568 	&dev_attr_commit.attr,
569 	&dev_attr_interleave_ways.attr,
570 	&dev_attr_interleave_granularity.attr,
571 	&dev_attr_resource.attr,
572 	&dev_attr_size.attr,
573 	NULL,
574 };
575 
576 static const struct attribute_group cxl_region_group = {
577 	.attrs = cxl_region_attrs,
578 	.is_visible = cxl_region_visible,
579 };
580 
581 static size_t show_targetN(struct cxl_region *cxlr, char *buf, int pos)
582 {
583 	struct cxl_region_params *p = &cxlr->params;
584 	struct cxl_endpoint_decoder *cxled;
585 	int rc;
586 
587 	rc = down_read_interruptible(&cxl_region_rwsem);
588 	if (rc)
589 		return rc;
590 
591 	if (pos >= p->interleave_ways) {
592 		dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
593 			p->interleave_ways);
594 		rc = -ENXIO;
595 		goto out;
596 	}
597 
598 	cxled = p->targets[pos];
599 	if (!cxled)
600 		rc = sysfs_emit(buf, "\n");
601 	else
602 		rc = sysfs_emit(buf, "%s\n", dev_name(&cxled->cxld.dev));
603 out:
604 	up_read(&cxl_region_rwsem);
605 
606 	return rc;
607 }
608 
609 static int match_free_decoder(struct device *dev, void *data)
610 {
611 	struct cxl_decoder *cxld;
612 	int *id = data;
613 
614 	if (!is_switch_decoder(dev))
615 		return 0;
616 
617 	cxld = to_cxl_decoder(dev);
618 
619 	/* enforce ordered allocation */
620 	if (cxld->id != *id)
621 		return 0;
622 
623 	if (!cxld->region)
624 		return 1;
625 
626 	(*id)++;
627 
628 	return 0;
629 }
630 
631 static struct cxl_decoder *cxl_region_find_decoder(struct cxl_port *port,
632 						   struct cxl_region *cxlr)
633 {
634 	struct device *dev;
635 	int id = 0;
636 
637 	dev = device_find_child(&port->dev, &id, match_free_decoder);
638 	if (!dev)
639 		return NULL;
640 	/*
641 	 * This decoder is pinned registered as long as the endpoint decoder is
642 	 * registered, and endpoint decoder unregistration holds the
643 	 * cxl_region_rwsem over unregister events, so no need to hold on to
644 	 * this extra reference.
645 	 */
646 	put_device(dev);
647 	return to_cxl_decoder(dev);
648 }
649 
650 static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port,
651 					       struct cxl_region *cxlr)
652 {
653 	struct cxl_region_params *p = &cxlr->params;
654 	struct cxl_region_ref *cxl_rr, *iter;
655 	unsigned long index;
656 	int rc;
657 
658 	xa_for_each(&port->regions, index, iter) {
659 		struct cxl_region_params *ip = &iter->region->params;
660 
661 		if (!ip->res)
662 			continue;
663 
664 		if (ip->res->start > p->res->start) {
665 			dev_dbg(&cxlr->dev,
666 				"%s: HPA order violation %s:%pr vs %pr\n",
667 				dev_name(&port->dev),
668 				dev_name(&iter->region->dev), ip->res, p->res);
669 			return ERR_PTR(-EBUSY);
670 		}
671 	}
672 
673 	cxl_rr = kzalloc(sizeof(*cxl_rr), GFP_KERNEL);
674 	if (!cxl_rr)
675 		return ERR_PTR(-ENOMEM);
676 	cxl_rr->port = port;
677 	cxl_rr->region = cxlr;
678 	cxl_rr->nr_targets = 1;
679 	xa_init(&cxl_rr->endpoints);
680 
681 	rc = xa_insert(&port->regions, (unsigned long)cxlr, cxl_rr, GFP_KERNEL);
682 	if (rc) {
683 		dev_dbg(&cxlr->dev,
684 			"%s: failed to track region reference: %d\n",
685 			dev_name(&port->dev), rc);
686 		kfree(cxl_rr);
687 		return ERR_PTR(rc);
688 	}
689 
690 	return cxl_rr;
691 }
692 
693 static void cxl_rr_free_decoder(struct cxl_region_ref *cxl_rr)
694 {
695 	struct cxl_region *cxlr = cxl_rr->region;
696 	struct cxl_decoder *cxld = cxl_rr->decoder;
697 
698 	if (!cxld)
699 		return;
700 
701 	dev_WARN_ONCE(&cxlr->dev, cxld->region != cxlr, "region mismatch\n");
702 	if (cxld->region == cxlr) {
703 		cxld->region = NULL;
704 		put_device(&cxlr->dev);
705 	}
706 }
707 
708 static void free_region_ref(struct cxl_region_ref *cxl_rr)
709 {
710 	struct cxl_port *port = cxl_rr->port;
711 	struct cxl_region *cxlr = cxl_rr->region;
712 
713 	cxl_rr_free_decoder(cxl_rr);
714 	xa_erase(&port->regions, (unsigned long)cxlr);
715 	xa_destroy(&cxl_rr->endpoints);
716 	kfree(cxl_rr);
717 }
718 
719 static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr,
720 			 struct cxl_endpoint_decoder *cxled)
721 {
722 	int rc;
723 	struct cxl_port *port = cxl_rr->port;
724 	struct cxl_region *cxlr = cxl_rr->region;
725 	struct cxl_decoder *cxld = cxl_rr->decoder;
726 	struct cxl_ep *ep = cxl_ep_load(port, cxled_to_memdev(cxled));
727 
728 	if (ep) {
729 		rc = xa_insert(&cxl_rr->endpoints, (unsigned long)cxled, ep,
730 			       GFP_KERNEL);
731 		if (rc)
732 			return rc;
733 	}
734 	cxl_rr->nr_eps++;
735 
736 	if (!cxld->region) {
737 		cxld->region = cxlr;
738 		get_device(&cxlr->dev);
739 	}
740 
741 	return 0;
742 }
743 
744 static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr,
745 				struct cxl_endpoint_decoder *cxled,
746 				struct cxl_region_ref *cxl_rr)
747 {
748 	struct cxl_decoder *cxld;
749 
750 	if (port == cxled_to_port(cxled))
751 		cxld = &cxled->cxld;
752 	else
753 		cxld = cxl_region_find_decoder(port, cxlr);
754 	if (!cxld) {
755 		dev_dbg(&cxlr->dev, "%s: no decoder available\n",
756 			dev_name(&port->dev));
757 		return -EBUSY;
758 	}
759 
760 	if (cxld->region) {
761 		dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n",
762 			dev_name(&port->dev), dev_name(&cxld->dev),
763 			dev_name(&cxld->region->dev));
764 		return -EBUSY;
765 	}
766 
767 	cxl_rr->decoder = cxld;
768 	return 0;
769 }
770 
771 /**
772  * cxl_port_attach_region() - track a region's interest in a port by endpoint
773  * @port: port to add a new region reference 'struct cxl_region_ref'
774  * @cxlr: region to attach to @port
775  * @cxled: endpoint decoder used to create or further pin a region reference
776  * @pos: interleave position of @cxled in @cxlr
777  *
778  * The attach event is an opportunity to validate CXL decode setup
779  * constraints and record metadata needed for programming HDM decoders,
780  * in particular decoder target lists.
781  *
782  * The steps are:
783  *
784  * - validate that there are no other regions with a higher HPA already
785  *   associated with @port
786  * - establish a region reference if one is not already present
787  *
788  *   - additionally allocate a decoder instance that will host @cxlr on
789  *     @port
790  *
791  * - pin the region reference by the endpoint
792  * - account for how many entries in @port's target list are needed to
793  *   cover all of the added endpoints.
794  */
795 static int cxl_port_attach_region(struct cxl_port *port,
796 				  struct cxl_region *cxlr,
797 				  struct cxl_endpoint_decoder *cxled, int pos)
798 {
799 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
800 	struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
801 	struct cxl_region_ref *cxl_rr;
802 	bool nr_targets_inc = false;
803 	struct cxl_decoder *cxld;
804 	unsigned long index;
805 	int rc = -EBUSY;
806 
807 	lockdep_assert_held_write(&cxl_region_rwsem);
808 
809 	cxl_rr = cxl_rr_load(port, cxlr);
810 	if (cxl_rr) {
811 		struct cxl_ep *ep_iter;
812 		int found = 0;
813 
814 		/*
815 		 * Walk the existing endpoints that have been attached to
816 		 * @cxlr at @port and see if they share the same 'next' port
817 		 * in the downstream direction. I.e. endpoints that share common
818 		 * upstream switch.
819 		 */
820 		xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
821 			if (ep_iter == ep)
822 				continue;
823 			if (ep_iter->next == ep->next) {
824 				found++;
825 				break;
826 			}
827 		}
828 
829 		/*
830 		 * New target port, or @port is an endpoint port that always
831 		 * accounts its own local decode as a target.
832 		 */
833 		if (!found || !ep->next) {
834 			cxl_rr->nr_targets++;
835 			nr_targets_inc = true;
836 		}
837 	} else {
838 		cxl_rr = alloc_region_ref(port, cxlr);
839 		if (IS_ERR(cxl_rr)) {
840 			dev_dbg(&cxlr->dev,
841 				"%s: failed to allocate region reference\n",
842 				dev_name(&port->dev));
843 			return PTR_ERR(cxl_rr);
844 		}
845 		nr_targets_inc = true;
846 
847 		rc = cxl_rr_alloc_decoder(port, cxlr, cxled, cxl_rr);
848 		if (rc)
849 			goto out_erase;
850 	}
851 	cxld = cxl_rr->decoder;
852 
853 	rc = cxl_rr_ep_add(cxl_rr, cxled);
854 	if (rc) {
855 		dev_dbg(&cxlr->dev,
856 			"%s: failed to track endpoint %s:%s reference\n",
857 			dev_name(&port->dev), dev_name(&cxlmd->dev),
858 			dev_name(&cxld->dev));
859 		goto out_erase;
860 	}
861 
862 	dev_dbg(&cxlr->dev,
863 		"%s:%s %s add: %s:%s @ %d next: %s nr_eps: %d nr_targets: %d\n",
864 		dev_name(port->uport), dev_name(&port->dev),
865 		dev_name(&cxld->dev), dev_name(&cxlmd->dev),
866 		dev_name(&cxled->cxld.dev), pos,
867 		ep ? ep->next ? dev_name(ep->next->uport) :
868 				      dev_name(&cxlmd->dev) :
869 			   "none",
870 		cxl_rr->nr_eps, cxl_rr->nr_targets);
871 
872 	return 0;
873 out_erase:
874 	if (nr_targets_inc)
875 		cxl_rr->nr_targets--;
876 	if (cxl_rr->nr_eps == 0)
877 		free_region_ref(cxl_rr);
878 	return rc;
879 }
880 
881 static void cxl_port_detach_region(struct cxl_port *port,
882 				   struct cxl_region *cxlr,
883 				   struct cxl_endpoint_decoder *cxled)
884 {
885 	struct cxl_region_ref *cxl_rr;
886 	struct cxl_ep *ep = NULL;
887 
888 	lockdep_assert_held_write(&cxl_region_rwsem);
889 
890 	cxl_rr = cxl_rr_load(port, cxlr);
891 	if (!cxl_rr)
892 		return;
893 
894 	/*
895 	 * Endpoint ports do not carry cxl_ep references, and they
896 	 * never target more than one endpoint by definition
897 	 */
898 	if (cxl_rr->decoder == &cxled->cxld)
899 		cxl_rr->nr_eps--;
900 	else
901 		ep = xa_erase(&cxl_rr->endpoints, (unsigned long)cxled);
902 	if (ep) {
903 		struct cxl_ep *ep_iter;
904 		unsigned long index;
905 		int found = 0;
906 
907 		cxl_rr->nr_eps--;
908 		xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
909 			if (ep_iter->next == ep->next) {
910 				found++;
911 				break;
912 			}
913 		}
914 		if (!found)
915 			cxl_rr->nr_targets--;
916 	}
917 
918 	if (cxl_rr->nr_eps == 0)
919 		free_region_ref(cxl_rr);
920 }
921 
922 static int check_last_peer(struct cxl_endpoint_decoder *cxled,
923 			   struct cxl_ep *ep, struct cxl_region_ref *cxl_rr,
924 			   int distance)
925 {
926 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
927 	struct cxl_region *cxlr = cxl_rr->region;
928 	struct cxl_region_params *p = &cxlr->params;
929 	struct cxl_endpoint_decoder *cxled_peer;
930 	struct cxl_port *port = cxl_rr->port;
931 	struct cxl_memdev *cxlmd_peer;
932 	struct cxl_ep *ep_peer;
933 	int pos = cxled->pos;
934 
935 	/*
936 	 * If this position wants to share a dport with the last endpoint mapped
937 	 * then that endpoint, at index 'position - distance', must also be
938 	 * mapped by this dport.
939 	 */
940 	if (pos < distance) {
941 		dev_dbg(&cxlr->dev, "%s:%s: cannot host %s:%s at %d\n",
942 			dev_name(port->uport), dev_name(&port->dev),
943 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
944 		return -ENXIO;
945 	}
946 	cxled_peer = p->targets[pos - distance];
947 	cxlmd_peer = cxled_to_memdev(cxled_peer);
948 	ep_peer = cxl_ep_load(port, cxlmd_peer);
949 	if (ep->dport != ep_peer->dport) {
950 		dev_dbg(&cxlr->dev,
951 			"%s:%s: %s:%s pos %d mismatched peer %s:%s\n",
952 			dev_name(port->uport), dev_name(&port->dev),
953 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos,
954 			dev_name(&cxlmd_peer->dev),
955 			dev_name(&cxled_peer->cxld.dev));
956 		return -ENXIO;
957 	}
958 
959 	return 0;
960 }
961 
962 static int cxl_port_setup_targets(struct cxl_port *port,
963 				  struct cxl_region *cxlr,
964 				  struct cxl_endpoint_decoder *cxled)
965 {
966 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
967 	int parent_iw, parent_ig, ig, iw, rc, inc = 0, pos = cxled->pos;
968 	struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
969 	struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
970 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
971 	struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
972 	struct cxl_region_params *p = &cxlr->params;
973 	struct cxl_decoder *cxld = cxl_rr->decoder;
974 	struct cxl_switch_decoder *cxlsd;
975 	u16 eig, peig;
976 	u8 eiw, peiw;
977 
978 	/*
979 	 * While root level decoders support x3, x6, x12, switch level
980 	 * decoders only support powers of 2 up to x16.
981 	 */
982 	if (!is_power_of_2(cxl_rr->nr_targets)) {
983 		dev_dbg(&cxlr->dev, "%s:%s: invalid target count %d\n",
984 			dev_name(port->uport), dev_name(&port->dev),
985 			cxl_rr->nr_targets);
986 		return -EINVAL;
987 	}
988 
989 	cxlsd = to_cxl_switch_decoder(&cxld->dev);
990 	if (cxl_rr->nr_targets_set) {
991 		int i, distance;
992 
993 		/*
994 		 * Passthrough ports impose no distance requirements between
995 		 * peers
996 		 */
997 		if (port->nr_dports == 1)
998 			distance = 0;
999 		else
1000 			distance = p->nr_targets / cxl_rr->nr_targets;
1001 		for (i = 0; i < cxl_rr->nr_targets_set; i++)
1002 			if (ep->dport == cxlsd->target[i]) {
1003 				rc = check_last_peer(cxled, ep, cxl_rr,
1004 						     distance);
1005 				if (rc)
1006 					return rc;
1007 				goto out_target_set;
1008 			}
1009 		goto add_target;
1010 	}
1011 
1012 	if (is_cxl_root(parent_port)) {
1013 		parent_ig = cxlrd->cxlsd.cxld.interleave_granularity;
1014 		parent_iw = cxlrd->cxlsd.cxld.interleave_ways;
1015 		/*
1016 		 * For purposes of address bit routing, use power-of-2 math for
1017 		 * switch ports.
1018 		 */
1019 		if (!is_power_of_2(parent_iw))
1020 			parent_iw /= 3;
1021 	} else {
1022 		struct cxl_region_ref *parent_rr;
1023 		struct cxl_decoder *parent_cxld;
1024 
1025 		parent_rr = cxl_rr_load(parent_port, cxlr);
1026 		parent_cxld = parent_rr->decoder;
1027 		parent_ig = parent_cxld->interleave_granularity;
1028 		parent_iw = parent_cxld->interleave_ways;
1029 	}
1030 
1031 	rc = granularity_to_cxl(parent_ig, &peig);
1032 	if (rc) {
1033 		dev_dbg(&cxlr->dev, "%s:%s: invalid parent granularity: %d\n",
1034 			dev_name(parent_port->uport),
1035 			dev_name(&parent_port->dev), parent_ig);
1036 		return rc;
1037 	}
1038 
1039 	rc = ways_to_cxl(parent_iw, &peiw);
1040 	if (rc) {
1041 		dev_dbg(&cxlr->dev, "%s:%s: invalid parent interleave: %d\n",
1042 			dev_name(parent_port->uport),
1043 			dev_name(&parent_port->dev), parent_iw);
1044 		return rc;
1045 	}
1046 
1047 	iw = cxl_rr->nr_targets;
1048 	rc = ways_to_cxl(iw, &eiw);
1049 	if (rc) {
1050 		dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d\n",
1051 			dev_name(port->uport), dev_name(&port->dev), iw);
1052 		return rc;
1053 	}
1054 
1055 	/*
1056 	 * If @parent_port is masking address bits, pick the next unused address
1057 	 * bit to route @port's targets.
1058 	 */
1059 	if (parent_iw > 1 && cxl_rr->nr_targets > 1) {
1060 		u32 address_bit = max(peig + peiw, eiw + peig);
1061 
1062 		eig = address_bit - eiw + 1;
1063 	} else {
1064 		eiw = peiw;
1065 		eig = peig;
1066 	}
1067 
1068 	rc = cxl_to_granularity(eig, &ig);
1069 	if (rc) {
1070 		dev_dbg(&cxlr->dev, "%s:%s: invalid interleave: %d\n",
1071 			dev_name(port->uport), dev_name(&port->dev),
1072 			256 << eig);
1073 		return rc;
1074 	}
1075 
1076 	cxld->interleave_ways = iw;
1077 	cxld->interleave_granularity = ig;
1078 	cxld->hpa_range = (struct range) {
1079 		.start = p->res->start,
1080 		.end = p->res->end,
1081 	};
1082 	dev_dbg(&cxlr->dev, "%s:%s iw: %d ig: %d\n", dev_name(port->uport),
1083 		dev_name(&port->dev), iw, ig);
1084 add_target:
1085 	if (cxl_rr->nr_targets_set == cxl_rr->nr_targets) {
1086 		dev_dbg(&cxlr->dev,
1087 			"%s:%s: targets full trying to add %s:%s at %d\n",
1088 			dev_name(port->uport), dev_name(&port->dev),
1089 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1090 		return -ENXIO;
1091 	}
1092 	cxlsd->target[cxl_rr->nr_targets_set] = ep->dport;
1093 	inc = 1;
1094 out_target_set:
1095 	cxl_rr->nr_targets_set += inc;
1096 	dev_dbg(&cxlr->dev, "%s:%s target[%d] = %s for %s:%s @ %d\n",
1097 		dev_name(port->uport), dev_name(&port->dev),
1098 		cxl_rr->nr_targets_set - 1, dev_name(ep->dport->dport),
1099 		dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1100 
1101 	return 0;
1102 }
1103 
1104 static void cxl_port_reset_targets(struct cxl_port *port,
1105 				   struct cxl_region *cxlr)
1106 {
1107 	struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
1108 	struct cxl_decoder *cxld;
1109 
1110 	/*
1111 	 * After the last endpoint has been detached the entire cxl_rr may now
1112 	 * be gone.
1113 	 */
1114 	if (!cxl_rr)
1115 		return;
1116 	cxl_rr->nr_targets_set = 0;
1117 
1118 	cxld = cxl_rr->decoder;
1119 	cxld->hpa_range = (struct range) {
1120 		.start = 0,
1121 		.end = -1,
1122 	};
1123 }
1124 
1125 static void cxl_region_teardown_targets(struct cxl_region *cxlr)
1126 {
1127 	struct cxl_region_params *p = &cxlr->params;
1128 	struct cxl_endpoint_decoder *cxled;
1129 	struct cxl_memdev *cxlmd;
1130 	struct cxl_port *iter;
1131 	struct cxl_ep *ep;
1132 	int i;
1133 
1134 	for (i = 0; i < p->nr_targets; i++) {
1135 		cxled = p->targets[i];
1136 		cxlmd = cxled_to_memdev(cxled);
1137 
1138 		iter = cxled_to_port(cxled);
1139 		while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
1140 			iter = to_cxl_port(iter->dev.parent);
1141 
1142 		for (ep = cxl_ep_load(iter, cxlmd); iter;
1143 		     iter = ep->next, ep = cxl_ep_load(iter, cxlmd))
1144 			cxl_port_reset_targets(iter, cxlr);
1145 	}
1146 }
1147 
1148 static int cxl_region_setup_targets(struct cxl_region *cxlr)
1149 {
1150 	struct cxl_region_params *p = &cxlr->params;
1151 	struct cxl_endpoint_decoder *cxled;
1152 	struct cxl_memdev *cxlmd;
1153 	struct cxl_port *iter;
1154 	struct cxl_ep *ep;
1155 	int i, rc;
1156 
1157 	for (i = 0; i < p->nr_targets; i++) {
1158 		cxled = p->targets[i];
1159 		cxlmd = cxled_to_memdev(cxled);
1160 
1161 		iter = cxled_to_port(cxled);
1162 		while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
1163 			iter = to_cxl_port(iter->dev.parent);
1164 
1165 		/*
1166 		 * Descend the topology tree programming targets while
1167 		 * looking for conflicts.
1168 		 */
1169 		for (ep = cxl_ep_load(iter, cxlmd); iter;
1170 		     iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
1171 			rc = cxl_port_setup_targets(iter, cxlr, cxled);
1172 			if (rc) {
1173 				cxl_region_teardown_targets(cxlr);
1174 				return rc;
1175 			}
1176 		}
1177 	}
1178 
1179 	return 0;
1180 }
1181 
1182 static int cxl_region_attach(struct cxl_region *cxlr,
1183 			     struct cxl_endpoint_decoder *cxled, int pos)
1184 {
1185 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
1186 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1187 	struct cxl_port *ep_port, *root_port, *iter;
1188 	struct cxl_region_params *p = &cxlr->params;
1189 	struct cxl_dport *dport;
1190 	int i, rc = -ENXIO;
1191 
1192 	if (cxled->mode == CXL_DECODER_DEAD) {
1193 		dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev));
1194 		return -ENODEV;
1195 	}
1196 
1197 	/* all full of members, or interleave config not established? */
1198 	if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) {
1199 		dev_dbg(&cxlr->dev, "region already active\n");
1200 		return -EBUSY;
1201 	} else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
1202 		dev_dbg(&cxlr->dev, "interleave config missing\n");
1203 		return -ENXIO;
1204 	}
1205 
1206 	if (pos < 0 || pos >= p->interleave_ways) {
1207 		dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
1208 			p->interleave_ways);
1209 		return -ENXIO;
1210 	}
1211 
1212 	if (p->targets[pos] == cxled)
1213 		return 0;
1214 
1215 	if (p->targets[pos]) {
1216 		struct cxl_endpoint_decoder *cxled_target = p->targets[pos];
1217 		struct cxl_memdev *cxlmd_target = cxled_to_memdev(cxled_target);
1218 
1219 		dev_dbg(&cxlr->dev, "position %d already assigned to %s:%s\n",
1220 			pos, dev_name(&cxlmd_target->dev),
1221 			dev_name(&cxled_target->cxld.dev));
1222 		return -EBUSY;
1223 	}
1224 
1225 	for (i = 0; i < p->interleave_ways; i++) {
1226 		struct cxl_endpoint_decoder *cxled_target;
1227 		struct cxl_memdev *cxlmd_target;
1228 
1229 		cxled_target = p->targets[pos];
1230 		if (!cxled_target)
1231 			continue;
1232 
1233 		cxlmd_target = cxled_to_memdev(cxled_target);
1234 		if (cxlmd_target == cxlmd) {
1235 			dev_dbg(&cxlr->dev,
1236 				"%s already specified at position %d via: %s\n",
1237 				dev_name(&cxlmd->dev), pos,
1238 				dev_name(&cxled_target->cxld.dev));
1239 			return -EBUSY;
1240 		}
1241 	}
1242 
1243 	ep_port = cxled_to_port(cxled);
1244 	root_port = cxlrd_to_port(cxlrd);
1245 	dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge);
1246 	if (!dport) {
1247 		dev_dbg(&cxlr->dev, "%s:%s invalid target for %s\n",
1248 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1249 			dev_name(cxlr->dev.parent));
1250 		return -ENXIO;
1251 	}
1252 
1253 	if (cxlrd->calc_hb(cxlrd, pos) != dport) {
1254 		dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n",
1255 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1256 			dev_name(&cxlrd->cxlsd.cxld.dev));
1257 		return -ENXIO;
1258 	}
1259 
1260 	if (cxled->cxld.target_type != cxlr->type) {
1261 		dev_dbg(&cxlr->dev, "%s:%s type mismatch: %d vs %d\n",
1262 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1263 			cxled->cxld.target_type, cxlr->type);
1264 		return -ENXIO;
1265 	}
1266 
1267 	if (!cxled->dpa_res) {
1268 		dev_dbg(&cxlr->dev, "%s:%s: missing DPA allocation.\n",
1269 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev));
1270 		return -ENXIO;
1271 	}
1272 
1273 	if (resource_size(cxled->dpa_res) * p->interleave_ways !=
1274 	    resource_size(p->res)) {
1275 		dev_dbg(&cxlr->dev,
1276 			"%s:%s: decoder-size-%#llx * ways-%d != region-size-%#llx\n",
1277 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1278 			(u64)resource_size(cxled->dpa_res), p->interleave_ways,
1279 			(u64)resource_size(p->res));
1280 		return -EINVAL;
1281 	}
1282 
1283 	for (iter = ep_port; !is_cxl_root(iter);
1284 	     iter = to_cxl_port(iter->dev.parent)) {
1285 		rc = cxl_port_attach_region(iter, cxlr, cxled, pos);
1286 		if (rc)
1287 			goto err;
1288 	}
1289 
1290 	p->targets[pos] = cxled;
1291 	cxled->pos = pos;
1292 	p->nr_targets++;
1293 
1294 	if (p->nr_targets == p->interleave_ways) {
1295 		rc = cxl_region_setup_targets(cxlr);
1296 		if (rc)
1297 			goto err_decrement;
1298 		p->state = CXL_CONFIG_ACTIVE;
1299 	}
1300 
1301 	cxled->cxld.interleave_ways = p->interleave_ways;
1302 	cxled->cxld.interleave_granularity = p->interleave_granularity;
1303 	cxled->cxld.hpa_range = (struct range) {
1304 		.start = p->res->start,
1305 		.end = p->res->end,
1306 	};
1307 
1308 	return 0;
1309 
1310 err_decrement:
1311 	p->nr_targets--;
1312 err:
1313 	for (iter = ep_port; !is_cxl_root(iter);
1314 	     iter = to_cxl_port(iter->dev.parent))
1315 		cxl_port_detach_region(iter, cxlr, cxled);
1316 	return rc;
1317 }
1318 
1319 static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
1320 {
1321 	struct cxl_port *iter, *ep_port = cxled_to_port(cxled);
1322 	struct cxl_region *cxlr = cxled->cxld.region;
1323 	struct cxl_region_params *p;
1324 	int rc = 0;
1325 
1326 	lockdep_assert_held_write(&cxl_region_rwsem);
1327 
1328 	if (!cxlr)
1329 		return 0;
1330 
1331 	p = &cxlr->params;
1332 	get_device(&cxlr->dev);
1333 
1334 	if (p->state > CXL_CONFIG_ACTIVE) {
1335 		/*
1336 		 * TODO: tear down all impacted regions if a device is
1337 		 * removed out of order
1338 		 */
1339 		rc = cxl_region_decode_reset(cxlr, p->interleave_ways);
1340 		if (rc)
1341 			goto out;
1342 		p->state = CXL_CONFIG_ACTIVE;
1343 	}
1344 
1345 	for (iter = ep_port; !is_cxl_root(iter);
1346 	     iter = to_cxl_port(iter->dev.parent))
1347 		cxl_port_detach_region(iter, cxlr, cxled);
1348 
1349 	if (cxled->pos < 0 || cxled->pos >= p->interleave_ways ||
1350 	    p->targets[cxled->pos] != cxled) {
1351 		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1352 
1353 		dev_WARN_ONCE(&cxlr->dev, 1, "expected %s:%s at position %d\n",
1354 			      dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1355 			      cxled->pos);
1356 		goto out;
1357 	}
1358 
1359 	if (p->state == CXL_CONFIG_ACTIVE) {
1360 		p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
1361 		cxl_region_teardown_targets(cxlr);
1362 	}
1363 	p->targets[cxled->pos] = NULL;
1364 	p->nr_targets--;
1365 	cxled->cxld.hpa_range = (struct range) {
1366 		.start = 0,
1367 		.end = -1,
1368 	};
1369 
1370 	/* notify the region driver that one of its targets has departed */
1371 	up_write(&cxl_region_rwsem);
1372 	device_release_driver(&cxlr->dev);
1373 	down_write(&cxl_region_rwsem);
1374 out:
1375 	put_device(&cxlr->dev);
1376 	return rc;
1377 }
1378 
1379 void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
1380 {
1381 	down_write(&cxl_region_rwsem);
1382 	cxled->mode = CXL_DECODER_DEAD;
1383 	cxl_region_detach(cxled);
1384 	up_write(&cxl_region_rwsem);
1385 }
1386 
1387 static int attach_target(struct cxl_region *cxlr, const char *decoder, int pos)
1388 {
1389 	struct device *dev;
1390 	int rc;
1391 
1392 	dev = bus_find_device_by_name(&cxl_bus_type, NULL, decoder);
1393 	if (!dev)
1394 		return -ENODEV;
1395 
1396 	if (!is_endpoint_decoder(dev)) {
1397 		put_device(dev);
1398 		return -EINVAL;
1399 	}
1400 
1401 	rc = down_write_killable(&cxl_region_rwsem);
1402 	if (rc)
1403 		goto out;
1404 	down_read(&cxl_dpa_rwsem);
1405 	rc = cxl_region_attach(cxlr, to_cxl_endpoint_decoder(dev), pos);
1406 	up_read(&cxl_dpa_rwsem);
1407 	up_write(&cxl_region_rwsem);
1408 out:
1409 	put_device(dev);
1410 	return rc;
1411 }
1412 
1413 static int detach_target(struct cxl_region *cxlr, int pos)
1414 {
1415 	struct cxl_region_params *p = &cxlr->params;
1416 	int rc;
1417 
1418 	rc = down_write_killable(&cxl_region_rwsem);
1419 	if (rc)
1420 		return rc;
1421 
1422 	if (pos >= p->interleave_ways) {
1423 		dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
1424 			p->interleave_ways);
1425 		rc = -ENXIO;
1426 		goto out;
1427 	}
1428 
1429 	if (!p->targets[pos]) {
1430 		rc = 0;
1431 		goto out;
1432 	}
1433 
1434 	rc = cxl_region_detach(p->targets[pos]);
1435 out:
1436 	up_write(&cxl_region_rwsem);
1437 	return rc;
1438 }
1439 
1440 static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
1441 			    size_t len)
1442 {
1443 	int rc;
1444 
1445 	if (sysfs_streq(buf, "\n"))
1446 		rc = detach_target(cxlr, pos);
1447 	else
1448 		rc = attach_target(cxlr, buf, pos);
1449 
1450 	if (rc < 0)
1451 		return rc;
1452 	return len;
1453 }
1454 
1455 #define TARGET_ATTR_RW(n)                                              \
1456 static ssize_t target##n##_show(                                       \
1457 	struct device *dev, struct device_attribute *attr, char *buf)  \
1458 {                                                                      \
1459 	return show_targetN(to_cxl_region(dev), buf, (n));             \
1460 }                                                                      \
1461 static ssize_t target##n##_store(struct device *dev,                   \
1462 				 struct device_attribute *attr,        \
1463 				 const char *buf, size_t len)          \
1464 {                                                                      \
1465 	return store_targetN(to_cxl_region(dev), buf, (n), len);       \
1466 }                                                                      \
1467 static DEVICE_ATTR_RW(target##n)
1468 
1469 TARGET_ATTR_RW(0);
1470 TARGET_ATTR_RW(1);
1471 TARGET_ATTR_RW(2);
1472 TARGET_ATTR_RW(3);
1473 TARGET_ATTR_RW(4);
1474 TARGET_ATTR_RW(5);
1475 TARGET_ATTR_RW(6);
1476 TARGET_ATTR_RW(7);
1477 TARGET_ATTR_RW(8);
1478 TARGET_ATTR_RW(9);
1479 TARGET_ATTR_RW(10);
1480 TARGET_ATTR_RW(11);
1481 TARGET_ATTR_RW(12);
1482 TARGET_ATTR_RW(13);
1483 TARGET_ATTR_RW(14);
1484 TARGET_ATTR_RW(15);
1485 
1486 static struct attribute *target_attrs[] = {
1487 	&dev_attr_target0.attr,
1488 	&dev_attr_target1.attr,
1489 	&dev_attr_target2.attr,
1490 	&dev_attr_target3.attr,
1491 	&dev_attr_target4.attr,
1492 	&dev_attr_target5.attr,
1493 	&dev_attr_target6.attr,
1494 	&dev_attr_target7.attr,
1495 	&dev_attr_target8.attr,
1496 	&dev_attr_target9.attr,
1497 	&dev_attr_target10.attr,
1498 	&dev_attr_target11.attr,
1499 	&dev_attr_target12.attr,
1500 	&dev_attr_target13.attr,
1501 	&dev_attr_target14.attr,
1502 	&dev_attr_target15.attr,
1503 	NULL,
1504 };
1505 
1506 static umode_t cxl_region_target_visible(struct kobject *kobj,
1507 					 struct attribute *a, int n)
1508 {
1509 	struct device *dev = kobj_to_dev(kobj);
1510 	struct cxl_region *cxlr = to_cxl_region(dev);
1511 	struct cxl_region_params *p = &cxlr->params;
1512 
1513 	if (n < p->interleave_ways)
1514 		return a->mode;
1515 	return 0;
1516 }
1517 
1518 static const struct attribute_group cxl_region_target_group = {
1519 	.attrs = target_attrs,
1520 	.is_visible = cxl_region_target_visible,
1521 };
1522 
1523 static const struct attribute_group *get_cxl_region_target_group(void)
1524 {
1525 	return &cxl_region_target_group;
1526 }
1527 
1528 static const struct attribute_group *region_groups[] = {
1529 	&cxl_base_attribute_group,
1530 	&cxl_region_group,
1531 	&cxl_region_target_group,
1532 	NULL,
1533 };
1534 
1535 static void cxl_region_release(struct device *dev)
1536 {
1537 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
1538 	struct cxl_region *cxlr = to_cxl_region(dev);
1539 	int id = atomic_read(&cxlrd->region_id);
1540 
1541 	/*
1542 	 * Try to reuse the recently idled id rather than the cached
1543 	 * next id to prevent the region id space from increasing
1544 	 * unnecessarily.
1545 	 */
1546 	if (cxlr->id < id)
1547 		if (atomic_try_cmpxchg(&cxlrd->region_id, &id, cxlr->id)) {
1548 			memregion_free(id);
1549 			goto out;
1550 		}
1551 
1552 	memregion_free(cxlr->id);
1553 out:
1554 	put_device(dev->parent);
1555 	kfree(cxlr);
1556 }
1557 
1558 const struct device_type cxl_region_type = {
1559 	.name = "cxl_region",
1560 	.release = cxl_region_release,
1561 	.groups = region_groups
1562 };
1563 
1564 bool is_cxl_region(struct device *dev)
1565 {
1566 	return dev->type == &cxl_region_type;
1567 }
1568 EXPORT_SYMBOL_NS_GPL(is_cxl_region, CXL);
1569 
1570 static struct cxl_region *to_cxl_region(struct device *dev)
1571 {
1572 	if (dev_WARN_ONCE(dev, dev->type != &cxl_region_type,
1573 			  "not a cxl_region device\n"))
1574 		return NULL;
1575 
1576 	return container_of(dev, struct cxl_region, dev);
1577 }
1578 
1579 static void unregister_region(void *dev)
1580 {
1581 	struct cxl_region *cxlr = to_cxl_region(dev);
1582 	struct cxl_region_params *p = &cxlr->params;
1583 	int i;
1584 
1585 	device_del(dev);
1586 
1587 	/*
1588 	 * Now that region sysfs is shutdown, the parameter block is now
1589 	 * read-only, so no need to hold the region rwsem to access the
1590 	 * region parameters.
1591 	 */
1592 	for (i = 0; i < p->interleave_ways; i++)
1593 		detach_target(cxlr, i);
1594 
1595 	cxl_region_iomem_release(cxlr);
1596 	put_device(dev);
1597 }
1598 
1599 static struct lock_class_key cxl_region_key;
1600 
1601 static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int id)
1602 {
1603 	struct cxl_region *cxlr;
1604 	struct device *dev;
1605 
1606 	cxlr = kzalloc(sizeof(*cxlr), GFP_KERNEL);
1607 	if (!cxlr) {
1608 		memregion_free(id);
1609 		return ERR_PTR(-ENOMEM);
1610 	}
1611 
1612 	dev = &cxlr->dev;
1613 	device_initialize(dev);
1614 	lockdep_set_class(&dev->mutex, &cxl_region_key);
1615 	dev->parent = &cxlrd->cxlsd.cxld.dev;
1616 	/*
1617 	 * Keep root decoder pinned through cxl_region_release to fixup
1618 	 * region id allocations
1619 	 */
1620 	get_device(dev->parent);
1621 	device_set_pm_not_required(dev);
1622 	dev->bus = &cxl_bus_type;
1623 	dev->type = &cxl_region_type;
1624 	cxlr->id = id;
1625 
1626 	return cxlr;
1627 }
1628 
1629 /**
1630  * devm_cxl_add_region - Adds a region to a decoder
1631  * @cxlrd: root decoder
1632  * @id: memregion id to create, or memregion_free() on failure
1633  * @mode: mode for the endpoint decoders of this region
1634  * @type: select whether this is an expander or accelerator (type-2 or type-3)
1635  *
1636  * This is the second step of region initialization. Regions exist within an
1637  * address space which is mapped by a @cxlrd.
1638  *
1639  * Return: 0 if the region was added to the @cxlrd, else returns negative error
1640  * code. The region will be named "regionZ" where Z is the unique region number.
1641  */
1642 static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
1643 					      int id,
1644 					      enum cxl_decoder_mode mode,
1645 					      enum cxl_decoder_type type)
1646 {
1647 	struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
1648 	struct cxl_region *cxlr;
1649 	struct device *dev;
1650 	int rc;
1651 
1652 	cxlr = cxl_region_alloc(cxlrd, id);
1653 	if (IS_ERR(cxlr))
1654 		return cxlr;
1655 	cxlr->mode = mode;
1656 	cxlr->type = type;
1657 
1658 	dev = &cxlr->dev;
1659 	rc = dev_set_name(dev, "region%d", id);
1660 	if (rc)
1661 		goto err;
1662 
1663 	rc = device_add(dev);
1664 	if (rc)
1665 		goto err;
1666 
1667 	rc = devm_add_action_or_reset(port->uport, unregister_region, cxlr);
1668 	if (rc)
1669 		return ERR_PTR(rc);
1670 
1671 	dev_dbg(port->uport, "%s: created %s\n",
1672 		dev_name(&cxlrd->cxlsd.cxld.dev), dev_name(dev));
1673 	return cxlr;
1674 
1675 err:
1676 	put_device(dev);
1677 	return ERR_PTR(rc);
1678 }
1679 
1680 static ssize_t create_pmem_region_show(struct device *dev,
1681 				       struct device_attribute *attr, char *buf)
1682 {
1683 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
1684 
1685 	return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id));
1686 }
1687 
1688 static ssize_t create_pmem_region_store(struct device *dev,
1689 					struct device_attribute *attr,
1690 					const char *buf, size_t len)
1691 {
1692 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
1693 	struct cxl_region *cxlr;
1694 	int id, rc;
1695 
1696 	rc = sscanf(buf, "region%d\n", &id);
1697 	if (rc != 1)
1698 		return -EINVAL;
1699 
1700 	rc = memregion_alloc(GFP_KERNEL);
1701 	if (rc < 0)
1702 		return rc;
1703 
1704 	if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) {
1705 		memregion_free(rc);
1706 		return -EBUSY;
1707 	}
1708 
1709 	cxlr = devm_cxl_add_region(cxlrd, id, CXL_DECODER_PMEM,
1710 				   CXL_DECODER_EXPANDER);
1711 	if (IS_ERR(cxlr))
1712 		return PTR_ERR(cxlr);
1713 
1714 	return len;
1715 }
1716 DEVICE_ATTR_RW(create_pmem_region);
1717 
1718 static ssize_t region_show(struct device *dev, struct device_attribute *attr,
1719 			   char *buf)
1720 {
1721 	struct cxl_decoder *cxld = to_cxl_decoder(dev);
1722 	ssize_t rc;
1723 
1724 	rc = down_read_interruptible(&cxl_region_rwsem);
1725 	if (rc)
1726 		return rc;
1727 
1728 	if (cxld->region)
1729 		rc = sysfs_emit(buf, "%s\n", dev_name(&cxld->region->dev));
1730 	else
1731 		rc = sysfs_emit(buf, "\n");
1732 	up_read(&cxl_region_rwsem);
1733 
1734 	return rc;
1735 }
1736 DEVICE_ATTR_RO(region);
1737 
1738 static struct cxl_region *
1739 cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name)
1740 {
1741 	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
1742 	struct device *region_dev;
1743 
1744 	region_dev = device_find_child_by_name(&cxld->dev, name);
1745 	if (!region_dev)
1746 		return ERR_PTR(-ENODEV);
1747 
1748 	return to_cxl_region(region_dev);
1749 }
1750 
1751 static ssize_t delete_region_store(struct device *dev,
1752 				   struct device_attribute *attr,
1753 				   const char *buf, size_t len)
1754 {
1755 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
1756 	struct cxl_port *port = to_cxl_port(dev->parent);
1757 	struct cxl_region *cxlr;
1758 
1759 	cxlr = cxl_find_region_by_name(cxlrd, buf);
1760 	if (IS_ERR(cxlr))
1761 		return PTR_ERR(cxlr);
1762 
1763 	devm_release_action(port->uport, unregister_region, cxlr);
1764 	put_device(&cxlr->dev);
1765 
1766 	return len;
1767 }
1768 DEVICE_ATTR_WO(delete_region);
1769 
1770 static void cxl_pmem_region_release(struct device *dev)
1771 {
1772 	struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev);
1773 	int i;
1774 
1775 	for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
1776 		struct cxl_memdev *cxlmd = cxlr_pmem->mapping[i].cxlmd;
1777 
1778 		put_device(&cxlmd->dev);
1779 	}
1780 
1781 	kfree(cxlr_pmem);
1782 }
1783 
1784 static const struct attribute_group *cxl_pmem_region_attribute_groups[] = {
1785 	&cxl_base_attribute_group,
1786 	NULL,
1787 };
1788 
1789 const struct device_type cxl_pmem_region_type = {
1790 	.name = "cxl_pmem_region",
1791 	.release = cxl_pmem_region_release,
1792 	.groups = cxl_pmem_region_attribute_groups,
1793 };
1794 
1795 bool is_cxl_pmem_region(struct device *dev)
1796 {
1797 	return dev->type == &cxl_pmem_region_type;
1798 }
1799 EXPORT_SYMBOL_NS_GPL(is_cxl_pmem_region, CXL);
1800 
1801 struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev)
1802 {
1803 	if (dev_WARN_ONCE(dev, !is_cxl_pmem_region(dev),
1804 			  "not a cxl_pmem_region device\n"))
1805 		return NULL;
1806 	return container_of(dev, struct cxl_pmem_region, dev);
1807 }
1808 EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, CXL);
1809 
1810 static struct lock_class_key cxl_pmem_region_key;
1811 
1812 static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
1813 {
1814 	struct cxl_region_params *p = &cxlr->params;
1815 	struct cxl_pmem_region *cxlr_pmem;
1816 	struct device *dev;
1817 	int i;
1818 
1819 	down_read(&cxl_region_rwsem);
1820 	if (p->state != CXL_CONFIG_COMMIT) {
1821 		cxlr_pmem = ERR_PTR(-ENXIO);
1822 		goto out;
1823 	}
1824 
1825 	cxlr_pmem = kzalloc(struct_size(cxlr_pmem, mapping, p->nr_targets),
1826 			    GFP_KERNEL);
1827 	if (!cxlr_pmem) {
1828 		cxlr_pmem = ERR_PTR(-ENOMEM);
1829 		goto out;
1830 	}
1831 
1832 	cxlr_pmem->hpa_range.start = p->res->start;
1833 	cxlr_pmem->hpa_range.end = p->res->end;
1834 
1835 	/* Snapshot the region configuration underneath the cxl_region_rwsem */
1836 	cxlr_pmem->nr_mappings = p->nr_targets;
1837 	for (i = 0; i < p->nr_targets; i++) {
1838 		struct cxl_endpoint_decoder *cxled = p->targets[i];
1839 		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1840 		struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
1841 
1842 		m->cxlmd = cxlmd;
1843 		get_device(&cxlmd->dev);
1844 		m->start = cxled->dpa_res->start;
1845 		m->size = resource_size(cxled->dpa_res);
1846 		m->position = i;
1847 	}
1848 
1849 	dev = &cxlr_pmem->dev;
1850 	cxlr_pmem->cxlr = cxlr;
1851 	device_initialize(dev);
1852 	lockdep_set_class(&dev->mutex, &cxl_pmem_region_key);
1853 	device_set_pm_not_required(dev);
1854 	dev->parent = &cxlr->dev;
1855 	dev->bus = &cxl_bus_type;
1856 	dev->type = &cxl_pmem_region_type;
1857 out:
1858 	up_read(&cxl_region_rwsem);
1859 
1860 	return cxlr_pmem;
1861 }
1862 
1863 static void cxlr_pmem_unregister(void *dev)
1864 {
1865 	device_unregister(dev);
1866 }
1867 
1868 /**
1869  * devm_cxl_add_pmem_region() - add a cxl_region-to-nd_region bridge
1870  * @cxlr: parent CXL region for this pmem region bridge device
1871  *
1872  * Return: 0 on success negative error code on failure.
1873  */
1874 static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
1875 {
1876 	struct cxl_pmem_region *cxlr_pmem;
1877 	struct device *dev;
1878 	int rc;
1879 
1880 	cxlr_pmem = cxl_pmem_region_alloc(cxlr);
1881 	if (IS_ERR(cxlr_pmem))
1882 		return PTR_ERR(cxlr_pmem);
1883 
1884 	dev = &cxlr_pmem->dev;
1885 	rc = dev_set_name(dev, "pmem_region%d", cxlr->id);
1886 	if (rc)
1887 		goto err;
1888 
1889 	rc = device_add(dev);
1890 	if (rc)
1891 		goto err;
1892 
1893 	dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
1894 		dev_name(dev));
1895 
1896 	return devm_add_action_or_reset(&cxlr->dev, cxlr_pmem_unregister, dev);
1897 
1898 err:
1899 	put_device(dev);
1900 	return rc;
1901 }
1902 
1903 static int cxl_region_probe(struct device *dev)
1904 {
1905 	struct cxl_region *cxlr = to_cxl_region(dev);
1906 	struct cxl_region_params *p = &cxlr->params;
1907 	int rc;
1908 
1909 	rc = down_read_interruptible(&cxl_region_rwsem);
1910 	if (rc) {
1911 		dev_dbg(&cxlr->dev, "probe interrupted\n");
1912 		return rc;
1913 	}
1914 
1915 	if (p->state < CXL_CONFIG_COMMIT) {
1916 		dev_dbg(&cxlr->dev, "config state: %d\n", p->state);
1917 		rc = -ENXIO;
1918 	}
1919 
1920 	/*
1921 	 * From this point on any path that changes the region's state away from
1922 	 * CXL_CONFIG_COMMIT is also responsible for releasing the driver.
1923 	 */
1924 	up_read(&cxl_region_rwsem);
1925 
1926 	switch (cxlr->mode) {
1927 	case CXL_DECODER_PMEM:
1928 		return devm_cxl_add_pmem_region(cxlr);
1929 	default:
1930 		dev_dbg(&cxlr->dev, "unsupported region mode: %d\n",
1931 			cxlr->mode);
1932 		return -ENXIO;
1933 	}
1934 }
1935 
1936 static struct cxl_driver cxl_region_driver = {
1937 	.name = "cxl_region",
1938 	.probe = cxl_region_probe,
1939 	.id = CXL_DEVICE_REGION,
1940 };
1941 
1942 int cxl_region_init(void)
1943 {
1944 	return cxl_driver_register(&cxl_region_driver);
1945 }
1946 
1947 void cxl_region_exit(void)
1948 {
1949 	cxl_driver_unregister(&cxl_region_driver);
1950 }
1951 
1952 MODULE_IMPORT_NS(CXL);
1953 MODULE_ALIAS_CXL(CXL_DEVICE_REGION);
1954