xref: /openbmc/linux/drivers/cxl/core/region.c (revision ecefa105)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2022 Intel Corporation. All rights reserved. */
3 #include <linux/memregion.h>
4 #include <linux/genalloc.h>
5 #include <linux/device.h>
6 #include <linux/module.h>
7 #include <linux/slab.h>
8 #include <linux/uuid.h>
9 #include <linux/sort.h>
10 #include <linux/idr.h>
11 #include <cxlmem.h>
12 #include <cxl.h>
13 #include "core.h"
14 
15 /**
16  * DOC: cxl core region
17  *
18  * CXL Regions represent mapped memory capacity in system physical address
19  * space. Whereas the CXL Root Decoders identify the bounds of potential CXL
20  * Memory ranges, Regions represent the active mapped capacity by the HDM
21  * Decoder Capability structures throughout the Host Bridges, Switches, and
22  * Endpoints in the topology.
23  *
24  * Region configuration has ordering constraints. UUID may be set at any time
25  * but is only visible for persistent regions.
26  * 1. Interleave granularity
27  * 2. Interleave size
28  * 3. Decoder targets
29  */
30 
31 /*
32  * All changes to the interleave configuration occur with this lock held
33  * for write.
34  */
35 static DECLARE_RWSEM(cxl_region_rwsem);
36 
37 static struct cxl_region *to_cxl_region(struct device *dev);
38 
39 static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
40 			 char *buf)
41 {
42 	struct cxl_region *cxlr = to_cxl_region(dev);
43 	struct cxl_region_params *p = &cxlr->params;
44 	ssize_t rc;
45 
46 	rc = down_read_interruptible(&cxl_region_rwsem);
47 	if (rc)
48 		return rc;
49 	if (cxlr->mode != CXL_DECODER_PMEM)
50 		rc = sysfs_emit(buf, "\n");
51 	else
52 		rc = sysfs_emit(buf, "%pUb\n", &p->uuid);
53 	up_read(&cxl_region_rwsem);
54 
55 	return rc;
56 }
57 
58 static int is_dup(struct device *match, void *data)
59 {
60 	struct cxl_region_params *p;
61 	struct cxl_region *cxlr;
62 	uuid_t *uuid = data;
63 
64 	if (!is_cxl_region(match))
65 		return 0;
66 
67 	lockdep_assert_held(&cxl_region_rwsem);
68 	cxlr = to_cxl_region(match);
69 	p = &cxlr->params;
70 
71 	if (uuid_equal(&p->uuid, uuid)) {
72 		dev_dbg(match, "already has uuid: %pUb\n", uuid);
73 		return -EBUSY;
74 	}
75 
76 	return 0;
77 }
78 
79 static ssize_t uuid_store(struct device *dev, struct device_attribute *attr,
80 			  const char *buf, size_t len)
81 {
82 	struct cxl_region *cxlr = to_cxl_region(dev);
83 	struct cxl_region_params *p = &cxlr->params;
84 	uuid_t temp;
85 	ssize_t rc;
86 
87 	if (len != UUID_STRING_LEN + 1)
88 		return -EINVAL;
89 
90 	rc = uuid_parse(buf, &temp);
91 	if (rc)
92 		return rc;
93 
94 	if (uuid_is_null(&temp))
95 		return -EINVAL;
96 
97 	rc = down_write_killable(&cxl_region_rwsem);
98 	if (rc)
99 		return rc;
100 
101 	if (uuid_equal(&p->uuid, &temp))
102 		goto out;
103 
104 	rc = -EBUSY;
105 	if (p->state >= CXL_CONFIG_ACTIVE)
106 		goto out;
107 
108 	rc = bus_for_each_dev(&cxl_bus_type, NULL, &temp, is_dup);
109 	if (rc < 0)
110 		goto out;
111 
112 	uuid_copy(&p->uuid, &temp);
113 out:
114 	up_write(&cxl_region_rwsem);
115 
116 	if (rc)
117 		return rc;
118 	return len;
119 }
120 static DEVICE_ATTR_RW(uuid);
121 
122 static struct cxl_region_ref *cxl_rr_load(struct cxl_port *port,
123 					  struct cxl_region *cxlr)
124 {
125 	return xa_load(&port->regions, (unsigned long)cxlr);
126 }
127 
128 static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
129 {
130 	struct cxl_region_params *p = &cxlr->params;
131 	int i;
132 
133 	for (i = count - 1; i >= 0; i--) {
134 		struct cxl_endpoint_decoder *cxled = p->targets[i];
135 		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
136 		struct cxl_port *iter = cxled_to_port(cxled);
137 		struct cxl_ep *ep;
138 		int rc = 0;
139 
140 		while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
141 			iter = to_cxl_port(iter->dev.parent);
142 
143 		for (ep = cxl_ep_load(iter, cxlmd); iter;
144 		     iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
145 			struct cxl_region_ref *cxl_rr;
146 			struct cxl_decoder *cxld;
147 
148 			cxl_rr = cxl_rr_load(iter, cxlr);
149 			cxld = cxl_rr->decoder;
150 			if (cxld->reset)
151 				rc = cxld->reset(cxld);
152 			if (rc)
153 				return rc;
154 		}
155 
156 		rc = cxled->cxld.reset(&cxled->cxld);
157 		if (rc)
158 			return rc;
159 	}
160 
161 	return 0;
162 }
163 
164 static int commit_decoder(struct cxl_decoder *cxld)
165 {
166 	struct cxl_switch_decoder *cxlsd = NULL;
167 
168 	if (cxld->commit)
169 		return cxld->commit(cxld);
170 
171 	if (is_switch_decoder(&cxld->dev))
172 		cxlsd = to_cxl_switch_decoder(&cxld->dev);
173 
174 	if (dev_WARN_ONCE(&cxld->dev, !cxlsd || cxlsd->nr_targets > 1,
175 			  "->commit() is required\n"))
176 		return -ENXIO;
177 	return 0;
178 }
179 
180 static int cxl_region_decode_commit(struct cxl_region *cxlr)
181 {
182 	struct cxl_region_params *p = &cxlr->params;
183 	int i, rc = 0;
184 
185 	for (i = 0; i < p->nr_targets; i++) {
186 		struct cxl_endpoint_decoder *cxled = p->targets[i];
187 		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
188 		struct cxl_region_ref *cxl_rr;
189 		struct cxl_decoder *cxld;
190 		struct cxl_port *iter;
191 		struct cxl_ep *ep;
192 
193 		/* commit bottom up */
194 		for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
195 		     iter = to_cxl_port(iter->dev.parent)) {
196 			cxl_rr = cxl_rr_load(iter, cxlr);
197 			cxld = cxl_rr->decoder;
198 			rc = commit_decoder(cxld);
199 			if (rc)
200 				break;
201 		}
202 
203 		if (rc) {
204 			/* programming @iter failed, teardown */
205 			for (ep = cxl_ep_load(iter, cxlmd); ep && iter;
206 			     iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
207 				cxl_rr = cxl_rr_load(iter, cxlr);
208 				cxld = cxl_rr->decoder;
209 				if (cxld->reset)
210 					cxld->reset(cxld);
211 			}
212 
213 			cxled->cxld.reset(&cxled->cxld);
214 			goto err;
215 		}
216 	}
217 
218 	return 0;
219 
220 err:
221 	/* undo the targets that were successfully committed */
222 	cxl_region_decode_reset(cxlr, i);
223 	return rc;
224 }
225 
226 static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
227 			    const char *buf, size_t len)
228 {
229 	struct cxl_region *cxlr = to_cxl_region(dev);
230 	struct cxl_region_params *p = &cxlr->params;
231 	bool commit;
232 	ssize_t rc;
233 
234 	rc = kstrtobool(buf, &commit);
235 	if (rc)
236 		return rc;
237 
238 	rc = down_write_killable(&cxl_region_rwsem);
239 	if (rc)
240 		return rc;
241 
242 	/* Already in the requested state? */
243 	if (commit && p->state >= CXL_CONFIG_COMMIT)
244 		goto out;
245 	if (!commit && p->state < CXL_CONFIG_COMMIT)
246 		goto out;
247 
248 	/* Not ready to commit? */
249 	if (commit && p->state < CXL_CONFIG_ACTIVE) {
250 		rc = -ENXIO;
251 		goto out;
252 	}
253 
254 	if (commit)
255 		rc = cxl_region_decode_commit(cxlr);
256 	else {
257 		p->state = CXL_CONFIG_RESET_PENDING;
258 		up_write(&cxl_region_rwsem);
259 		device_release_driver(&cxlr->dev);
260 		down_write(&cxl_region_rwsem);
261 
262 		/*
263 		 * The lock was dropped, so need to revalidate that the reset is
264 		 * still pending.
265 		 */
266 		if (p->state == CXL_CONFIG_RESET_PENDING)
267 			rc = cxl_region_decode_reset(cxlr, p->interleave_ways);
268 	}
269 
270 	if (rc)
271 		goto out;
272 
273 	if (commit)
274 		p->state = CXL_CONFIG_COMMIT;
275 	else if (p->state == CXL_CONFIG_RESET_PENDING)
276 		p->state = CXL_CONFIG_ACTIVE;
277 
278 out:
279 	up_write(&cxl_region_rwsem);
280 
281 	if (rc)
282 		return rc;
283 	return len;
284 }
285 
286 static ssize_t commit_show(struct device *dev, struct device_attribute *attr,
287 			   char *buf)
288 {
289 	struct cxl_region *cxlr = to_cxl_region(dev);
290 	struct cxl_region_params *p = &cxlr->params;
291 	ssize_t rc;
292 
293 	rc = down_read_interruptible(&cxl_region_rwsem);
294 	if (rc)
295 		return rc;
296 	rc = sysfs_emit(buf, "%d\n", p->state >= CXL_CONFIG_COMMIT);
297 	up_read(&cxl_region_rwsem);
298 
299 	return rc;
300 }
301 static DEVICE_ATTR_RW(commit);
302 
303 static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a,
304 				  int n)
305 {
306 	struct device *dev = kobj_to_dev(kobj);
307 	struct cxl_region *cxlr = to_cxl_region(dev);
308 
309 	/*
310 	 * Support tooling that expects to find a 'uuid' attribute for all
311 	 * regions regardless of mode.
312 	 */
313 	if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM)
314 		return 0444;
315 	return a->mode;
316 }
317 
318 static ssize_t interleave_ways_show(struct device *dev,
319 				    struct device_attribute *attr, char *buf)
320 {
321 	struct cxl_region *cxlr = to_cxl_region(dev);
322 	struct cxl_region_params *p = &cxlr->params;
323 	ssize_t rc;
324 
325 	rc = down_read_interruptible(&cxl_region_rwsem);
326 	if (rc)
327 		return rc;
328 	rc = sysfs_emit(buf, "%d\n", p->interleave_ways);
329 	up_read(&cxl_region_rwsem);
330 
331 	return rc;
332 }
333 
334 static const struct attribute_group *get_cxl_region_target_group(void);
335 
336 static ssize_t interleave_ways_store(struct device *dev,
337 				     struct device_attribute *attr,
338 				     const char *buf, size_t len)
339 {
340 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
341 	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
342 	struct cxl_region *cxlr = to_cxl_region(dev);
343 	struct cxl_region_params *p = &cxlr->params;
344 	unsigned int val, save;
345 	int rc;
346 	u8 iw;
347 
348 	rc = kstrtouint(buf, 0, &val);
349 	if (rc)
350 		return rc;
351 
352 	rc = ways_to_eiw(val, &iw);
353 	if (rc)
354 		return rc;
355 
356 	/*
357 	 * Even for x3, x9, and x12 interleaves the region interleave must be a
358 	 * power of 2 multiple of the host bridge interleave.
359 	 */
360 	if (!is_power_of_2(val / cxld->interleave_ways) ||
361 	    (val % cxld->interleave_ways)) {
362 		dev_dbg(&cxlr->dev, "invalid interleave: %d\n", val);
363 		return -EINVAL;
364 	}
365 
366 	rc = down_write_killable(&cxl_region_rwsem);
367 	if (rc)
368 		return rc;
369 	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
370 		rc = -EBUSY;
371 		goto out;
372 	}
373 
374 	save = p->interleave_ways;
375 	p->interleave_ways = val;
376 	rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
377 	if (rc)
378 		p->interleave_ways = save;
379 out:
380 	up_write(&cxl_region_rwsem);
381 	if (rc)
382 		return rc;
383 	return len;
384 }
385 static DEVICE_ATTR_RW(interleave_ways);
386 
387 static ssize_t interleave_granularity_show(struct device *dev,
388 					   struct device_attribute *attr,
389 					   char *buf)
390 {
391 	struct cxl_region *cxlr = to_cxl_region(dev);
392 	struct cxl_region_params *p = &cxlr->params;
393 	ssize_t rc;
394 
395 	rc = down_read_interruptible(&cxl_region_rwsem);
396 	if (rc)
397 		return rc;
398 	rc = sysfs_emit(buf, "%d\n", p->interleave_granularity);
399 	up_read(&cxl_region_rwsem);
400 
401 	return rc;
402 }
403 
404 static ssize_t interleave_granularity_store(struct device *dev,
405 					    struct device_attribute *attr,
406 					    const char *buf, size_t len)
407 {
408 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
409 	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
410 	struct cxl_region *cxlr = to_cxl_region(dev);
411 	struct cxl_region_params *p = &cxlr->params;
412 	int rc, val;
413 	u16 ig;
414 
415 	rc = kstrtoint(buf, 0, &val);
416 	if (rc)
417 		return rc;
418 
419 	rc = granularity_to_eig(val, &ig);
420 	if (rc)
421 		return rc;
422 
423 	/*
424 	 * When the host-bridge is interleaved, disallow region granularity !=
425 	 * root granularity. Regions with a granularity less than the root
426 	 * interleave result in needing multiple endpoints to support a single
427 	 * slot in the interleave (possible to support in the future). Regions
428 	 * with a granularity greater than the root interleave result in invalid
429 	 * DPA translations (invalid to support).
430 	 */
431 	if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity)
432 		return -EINVAL;
433 
434 	rc = down_write_killable(&cxl_region_rwsem);
435 	if (rc)
436 		return rc;
437 	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
438 		rc = -EBUSY;
439 		goto out;
440 	}
441 
442 	p->interleave_granularity = val;
443 out:
444 	up_write(&cxl_region_rwsem);
445 	if (rc)
446 		return rc;
447 	return len;
448 }
449 static DEVICE_ATTR_RW(interleave_granularity);
450 
451 static ssize_t resource_show(struct device *dev, struct device_attribute *attr,
452 			     char *buf)
453 {
454 	struct cxl_region *cxlr = to_cxl_region(dev);
455 	struct cxl_region_params *p = &cxlr->params;
456 	u64 resource = -1ULL;
457 	ssize_t rc;
458 
459 	rc = down_read_interruptible(&cxl_region_rwsem);
460 	if (rc)
461 		return rc;
462 	if (p->res)
463 		resource = p->res->start;
464 	rc = sysfs_emit(buf, "%#llx\n", resource);
465 	up_read(&cxl_region_rwsem);
466 
467 	return rc;
468 }
469 static DEVICE_ATTR_RO(resource);
470 
471 static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
472 			 char *buf)
473 {
474 	struct cxl_region *cxlr = to_cxl_region(dev);
475 
476 	return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxlr->mode));
477 }
478 static DEVICE_ATTR_RO(mode);
479 
480 static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
481 {
482 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
483 	struct cxl_region_params *p = &cxlr->params;
484 	struct resource *res;
485 	u32 remainder = 0;
486 
487 	lockdep_assert_held_write(&cxl_region_rwsem);
488 
489 	/* Nothing to do... */
490 	if (p->res && resource_size(p->res) == size)
491 		return 0;
492 
493 	/* To change size the old size must be freed first */
494 	if (p->res)
495 		return -EBUSY;
496 
497 	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
498 		return -EBUSY;
499 
500 	/* ways, granularity and uuid (if PMEM) need to be set before HPA */
501 	if (!p->interleave_ways || !p->interleave_granularity ||
502 	    (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid)))
503 		return -ENXIO;
504 
505 	div_u64_rem(size, SZ_256M * p->interleave_ways, &remainder);
506 	if (remainder)
507 		return -EINVAL;
508 
509 	res = alloc_free_mem_region(cxlrd->res, size, SZ_256M,
510 				    dev_name(&cxlr->dev));
511 	if (IS_ERR(res)) {
512 		dev_dbg(&cxlr->dev, "failed to allocate HPA: %ld\n",
513 			PTR_ERR(res));
514 		return PTR_ERR(res);
515 	}
516 
517 	p->res = res;
518 	p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
519 
520 	return 0;
521 }
522 
523 static void cxl_region_iomem_release(struct cxl_region *cxlr)
524 {
525 	struct cxl_region_params *p = &cxlr->params;
526 
527 	if (device_is_registered(&cxlr->dev))
528 		lockdep_assert_held_write(&cxl_region_rwsem);
529 	if (p->res) {
530 		/*
531 		 * Autodiscovered regions may not have been able to insert their
532 		 * resource.
533 		 */
534 		if (p->res->parent)
535 			remove_resource(p->res);
536 		kfree(p->res);
537 		p->res = NULL;
538 	}
539 }
540 
541 static int free_hpa(struct cxl_region *cxlr)
542 {
543 	struct cxl_region_params *p = &cxlr->params;
544 
545 	lockdep_assert_held_write(&cxl_region_rwsem);
546 
547 	if (!p->res)
548 		return 0;
549 
550 	if (p->state >= CXL_CONFIG_ACTIVE)
551 		return -EBUSY;
552 
553 	cxl_region_iomem_release(cxlr);
554 	p->state = CXL_CONFIG_IDLE;
555 	return 0;
556 }
557 
558 static ssize_t size_store(struct device *dev, struct device_attribute *attr,
559 			  const char *buf, size_t len)
560 {
561 	struct cxl_region *cxlr = to_cxl_region(dev);
562 	u64 val;
563 	int rc;
564 
565 	rc = kstrtou64(buf, 0, &val);
566 	if (rc)
567 		return rc;
568 
569 	rc = down_write_killable(&cxl_region_rwsem);
570 	if (rc)
571 		return rc;
572 
573 	if (val)
574 		rc = alloc_hpa(cxlr, val);
575 	else
576 		rc = free_hpa(cxlr);
577 	up_write(&cxl_region_rwsem);
578 
579 	if (rc)
580 		return rc;
581 
582 	return len;
583 }
584 
585 static ssize_t size_show(struct device *dev, struct device_attribute *attr,
586 			 char *buf)
587 {
588 	struct cxl_region *cxlr = to_cxl_region(dev);
589 	struct cxl_region_params *p = &cxlr->params;
590 	u64 size = 0;
591 	ssize_t rc;
592 
593 	rc = down_read_interruptible(&cxl_region_rwsem);
594 	if (rc)
595 		return rc;
596 	if (p->res)
597 		size = resource_size(p->res);
598 	rc = sysfs_emit(buf, "%#llx\n", size);
599 	up_read(&cxl_region_rwsem);
600 
601 	return rc;
602 }
603 static DEVICE_ATTR_RW(size);
604 
605 static struct attribute *cxl_region_attrs[] = {
606 	&dev_attr_uuid.attr,
607 	&dev_attr_commit.attr,
608 	&dev_attr_interleave_ways.attr,
609 	&dev_attr_interleave_granularity.attr,
610 	&dev_attr_resource.attr,
611 	&dev_attr_size.attr,
612 	&dev_attr_mode.attr,
613 	NULL,
614 };
615 
616 static const struct attribute_group cxl_region_group = {
617 	.attrs = cxl_region_attrs,
618 	.is_visible = cxl_region_visible,
619 };
620 
621 static size_t show_targetN(struct cxl_region *cxlr, char *buf, int pos)
622 {
623 	struct cxl_region_params *p = &cxlr->params;
624 	struct cxl_endpoint_decoder *cxled;
625 	int rc;
626 
627 	rc = down_read_interruptible(&cxl_region_rwsem);
628 	if (rc)
629 		return rc;
630 
631 	if (pos >= p->interleave_ways) {
632 		dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
633 			p->interleave_ways);
634 		rc = -ENXIO;
635 		goto out;
636 	}
637 
638 	cxled = p->targets[pos];
639 	if (!cxled)
640 		rc = sysfs_emit(buf, "\n");
641 	else
642 		rc = sysfs_emit(buf, "%s\n", dev_name(&cxled->cxld.dev));
643 out:
644 	up_read(&cxl_region_rwsem);
645 
646 	return rc;
647 }
648 
649 static int match_free_decoder(struct device *dev, void *data)
650 {
651 	struct cxl_decoder *cxld;
652 	int *id = data;
653 
654 	if (!is_switch_decoder(dev))
655 		return 0;
656 
657 	cxld = to_cxl_decoder(dev);
658 
659 	/* enforce ordered allocation */
660 	if (cxld->id != *id)
661 		return 0;
662 
663 	if (!cxld->region)
664 		return 1;
665 
666 	(*id)++;
667 
668 	return 0;
669 }
670 
671 static struct cxl_decoder *cxl_region_find_decoder(struct cxl_port *port,
672 						   struct cxl_region *cxlr)
673 {
674 	struct device *dev;
675 	int id = 0;
676 
677 	dev = device_find_child(&port->dev, &id, match_free_decoder);
678 	if (!dev)
679 		return NULL;
680 	/*
681 	 * This decoder is pinned registered as long as the endpoint decoder is
682 	 * registered, and endpoint decoder unregistration holds the
683 	 * cxl_region_rwsem over unregister events, so no need to hold on to
684 	 * this extra reference.
685 	 */
686 	put_device(dev);
687 	return to_cxl_decoder(dev);
688 }
689 
690 static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port,
691 					       struct cxl_region *cxlr)
692 {
693 	struct cxl_region_params *p = &cxlr->params;
694 	struct cxl_region_ref *cxl_rr, *iter;
695 	unsigned long index;
696 	int rc;
697 
698 	xa_for_each(&port->regions, index, iter) {
699 		struct cxl_region_params *ip = &iter->region->params;
700 
701 		if (!ip->res)
702 			continue;
703 
704 		if (ip->res->start > p->res->start) {
705 			dev_dbg(&cxlr->dev,
706 				"%s: HPA order violation %s:%pr vs %pr\n",
707 				dev_name(&port->dev),
708 				dev_name(&iter->region->dev), ip->res, p->res);
709 			return ERR_PTR(-EBUSY);
710 		}
711 	}
712 
713 	cxl_rr = kzalloc(sizeof(*cxl_rr), GFP_KERNEL);
714 	if (!cxl_rr)
715 		return ERR_PTR(-ENOMEM);
716 	cxl_rr->port = port;
717 	cxl_rr->region = cxlr;
718 	cxl_rr->nr_targets = 1;
719 	xa_init(&cxl_rr->endpoints);
720 
721 	rc = xa_insert(&port->regions, (unsigned long)cxlr, cxl_rr, GFP_KERNEL);
722 	if (rc) {
723 		dev_dbg(&cxlr->dev,
724 			"%s: failed to track region reference: %d\n",
725 			dev_name(&port->dev), rc);
726 		kfree(cxl_rr);
727 		return ERR_PTR(rc);
728 	}
729 
730 	return cxl_rr;
731 }
732 
733 static void cxl_rr_free_decoder(struct cxl_region_ref *cxl_rr)
734 {
735 	struct cxl_region *cxlr = cxl_rr->region;
736 	struct cxl_decoder *cxld = cxl_rr->decoder;
737 
738 	if (!cxld)
739 		return;
740 
741 	dev_WARN_ONCE(&cxlr->dev, cxld->region != cxlr, "region mismatch\n");
742 	if (cxld->region == cxlr) {
743 		cxld->region = NULL;
744 		put_device(&cxlr->dev);
745 	}
746 }
747 
748 static void free_region_ref(struct cxl_region_ref *cxl_rr)
749 {
750 	struct cxl_port *port = cxl_rr->port;
751 	struct cxl_region *cxlr = cxl_rr->region;
752 
753 	cxl_rr_free_decoder(cxl_rr);
754 	xa_erase(&port->regions, (unsigned long)cxlr);
755 	xa_destroy(&cxl_rr->endpoints);
756 	kfree(cxl_rr);
757 }
758 
759 static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr,
760 			 struct cxl_endpoint_decoder *cxled)
761 {
762 	int rc;
763 	struct cxl_port *port = cxl_rr->port;
764 	struct cxl_region *cxlr = cxl_rr->region;
765 	struct cxl_decoder *cxld = cxl_rr->decoder;
766 	struct cxl_ep *ep = cxl_ep_load(port, cxled_to_memdev(cxled));
767 
768 	if (ep) {
769 		rc = xa_insert(&cxl_rr->endpoints, (unsigned long)cxled, ep,
770 			       GFP_KERNEL);
771 		if (rc)
772 			return rc;
773 	}
774 	cxl_rr->nr_eps++;
775 
776 	if (!cxld->region) {
777 		cxld->region = cxlr;
778 		get_device(&cxlr->dev);
779 	}
780 
781 	return 0;
782 }
783 
784 static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr,
785 				struct cxl_endpoint_decoder *cxled,
786 				struct cxl_region_ref *cxl_rr)
787 {
788 	struct cxl_decoder *cxld;
789 
790 	if (port == cxled_to_port(cxled))
791 		cxld = &cxled->cxld;
792 	else
793 		cxld = cxl_region_find_decoder(port, cxlr);
794 	if (!cxld) {
795 		dev_dbg(&cxlr->dev, "%s: no decoder available\n",
796 			dev_name(&port->dev));
797 		return -EBUSY;
798 	}
799 
800 	if (cxld->region) {
801 		dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n",
802 			dev_name(&port->dev), dev_name(&cxld->dev),
803 			dev_name(&cxld->region->dev));
804 		return -EBUSY;
805 	}
806 
807 	cxl_rr->decoder = cxld;
808 	return 0;
809 }
810 
811 /**
812  * cxl_port_attach_region() - track a region's interest in a port by endpoint
813  * @port: port to add a new region reference 'struct cxl_region_ref'
814  * @cxlr: region to attach to @port
815  * @cxled: endpoint decoder used to create or further pin a region reference
816  * @pos: interleave position of @cxled in @cxlr
817  *
818  * The attach event is an opportunity to validate CXL decode setup
819  * constraints and record metadata needed for programming HDM decoders,
820  * in particular decoder target lists.
821  *
822  * The steps are:
823  *
824  * - validate that there are no other regions with a higher HPA already
825  *   associated with @port
826  * - establish a region reference if one is not already present
827  *
828  *   - additionally allocate a decoder instance that will host @cxlr on
829  *     @port
830  *
831  * - pin the region reference by the endpoint
832  * - account for how many entries in @port's target list are needed to
833  *   cover all of the added endpoints.
834  */
835 static int cxl_port_attach_region(struct cxl_port *port,
836 				  struct cxl_region *cxlr,
837 				  struct cxl_endpoint_decoder *cxled, int pos)
838 {
839 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
840 	struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
841 	struct cxl_region_ref *cxl_rr;
842 	bool nr_targets_inc = false;
843 	struct cxl_decoder *cxld;
844 	unsigned long index;
845 	int rc = -EBUSY;
846 
847 	lockdep_assert_held_write(&cxl_region_rwsem);
848 
849 	cxl_rr = cxl_rr_load(port, cxlr);
850 	if (cxl_rr) {
851 		struct cxl_ep *ep_iter;
852 		int found = 0;
853 
854 		/*
855 		 * Walk the existing endpoints that have been attached to
856 		 * @cxlr at @port and see if they share the same 'next' port
857 		 * in the downstream direction. I.e. endpoints that share common
858 		 * upstream switch.
859 		 */
860 		xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
861 			if (ep_iter == ep)
862 				continue;
863 			if (ep_iter->next == ep->next) {
864 				found++;
865 				break;
866 			}
867 		}
868 
869 		/*
870 		 * New target port, or @port is an endpoint port that always
871 		 * accounts its own local decode as a target.
872 		 */
873 		if (!found || !ep->next) {
874 			cxl_rr->nr_targets++;
875 			nr_targets_inc = true;
876 		}
877 	} else {
878 		cxl_rr = alloc_region_ref(port, cxlr);
879 		if (IS_ERR(cxl_rr)) {
880 			dev_dbg(&cxlr->dev,
881 				"%s: failed to allocate region reference\n",
882 				dev_name(&port->dev));
883 			return PTR_ERR(cxl_rr);
884 		}
885 		nr_targets_inc = true;
886 
887 		rc = cxl_rr_alloc_decoder(port, cxlr, cxled, cxl_rr);
888 		if (rc)
889 			goto out_erase;
890 	}
891 	cxld = cxl_rr->decoder;
892 
893 	rc = cxl_rr_ep_add(cxl_rr, cxled);
894 	if (rc) {
895 		dev_dbg(&cxlr->dev,
896 			"%s: failed to track endpoint %s:%s reference\n",
897 			dev_name(&port->dev), dev_name(&cxlmd->dev),
898 			dev_name(&cxld->dev));
899 		goto out_erase;
900 	}
901 
902 	dev_dbg(&cxlr->dev,
903 		"%s:%s %s add: %s:%s @ %d next: %s nr_eps: %d nr_targets: %d\n",
904 		dev_name(port->uport), dev_name(&port->dev),
905 		dev_name(&cxld->dev), dev_name(&cxlmd->dev),
906 		dev_name(&cxled->cxld.dev), pos,
907 		ep ? ep->next ? dev_name(ep->next->uport) :
908 				      dev_name(&cxlmd->dev) :
909 			   "none",
910 		cxl_rr->nr_eps, cxl_rr->nr_targets);
911 
912 	return 0;
913 out_erase:
914 	if (nr_targets_inc)
915 		cxl_rr->nr_targets--;
916 	if (cxl_rr->nr_eps == 0)
917 		free_region_ref(cxl_rr);
918 	return rc;
919 }
920 
921 static void cxl_port_detach_region(struct cxl_port *port,
922 				   struct cxl_region *cxlr,
923 				   struct cxl_endpoint_decoder *cxled)
924 {
925 	struct cxl_region_ref *cxl_rr;
926 	struct cxl_ep *ep = NULL;
927 
928 	lockdep_assert_held_write(&cxl_region_rwsem);
929 
930 	cxl_rr = cxl_rr_load(port, cxlr);
931 	if (!cxl_rr)
932 		return;
933 
934 	/*
935 	 * Endpoint ports do not carry cxl_ep references, and they
936 	 * never target more than one endpoint by definition
937 	 */
938 	if (cxl_rr->decoder == &cxled->cxld)
939 		cxl_rr->nr_eps--;
940 	else
941 		ep = xa_erase(&cxl_rr->endpoints, (unsigned long)cxled);
942 	if (ep) {
943 		struct cxl_ep *ep_iter;
944 		unsigned long index;
945 		int found = 0;
946 
947 		cxl_rr->nr_eps--;
948 		xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
949 			if (ep_iter->next == ep->next) {
950 				found++;
951 				break;
952 			}
953 		}
954 		if (!found)
955 			cxl_rr->nr_targets--;
956 	}
957 
958 	if (cxl_rr->nr_eps == 0)
959 		free_region_ref(cxl_rr);
960 }
961 
962 static int check_last_peer(struct cxl_endpoint_decoder *cxled,
963 			   struct cxl_ep *ep, struct cxl_region_ref *cxl_rr,
964 			   int distance)
965 {
966 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
967 	struct cxl_region *cxlr = cxl_rr->region;
968 	struct cxl_region_params *p = &cxlr->params;
969 	struct cxl_endpoint_decoder *cxled_peer;
970 	struct cxl_port *port = cxl_rr->port;
971 	struct cxl_memdev *cxlmd_peer;
972 	struct cxl_ep *ep_peer;
973 	int pos = cxled->pos;
974 
975 	/*
976 	 * If this position wants to share a dport with the last endpoint mapped
977 	 * then that endpoint, at index 'position - distance', must also be
978 	 * mapped by this dport.
979 	 */
980 	if (pos < distance) {
981 		dev_dbg(&cxlr->dev, "%s:%s: cannot host %s:%s at %d\n",
982 			dev_name(port->uport), dev_name(&port->dev),
983 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
984 		return -ENXIO;
985 	}
986 	cxled_peer = p->targets[pos - distance];
987 	cxlmd_peer = cxled_to_memdev(cxled_peer);
988 	ep_peer = cxl_ep_load(port, cxlmd_peer);
989 	if (ep->dport != ep_peer->dport) {
990 		dev_dbg(&cxlr->dev,
991 			"%s:%s: %s:%s pos %d mismatched peer %s:%s\n",
992 			dev_name(port->uport), dev_name(&port->dev),
993 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos,
994 			dev_name(&cxlmd_peer->dev),
995 			dev_name(&cxled_peer->cxld.dev));
996 		return -ENXIO;
997 	}
998 
999 	return 0;
1000 }
1001 
1002 static int cxl_port_setup_targets(struct cxl_port *port,
1003 				  struct cxl_region *cxlr,
1004 				  struct cxl_endpoint_decoder *cxled)
1005 {
1006 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
1007 	int parent_iw, parent_ig, ig, iw, rc, inc = 0, pos = cxled->pos;
1008 	struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
1009 	struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
1010 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1011 	struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
1012 	struct cxl_region_params *p = &cxlr->params;
1013 	struct cxl_decoder *cxld = cxl_rr->decoder;
1014 	struct cxl_switch_decoder *cxlsd;
1015 	u16 eig, peig;
1016 	u8 eiw, peiw;
1017 
1018 	/*
1019 	 * While root level decoders support x3, x6, x12, switch level
1020 	 * decoders only support powers of 2 up to x16.
1021 	 */
1022 	if (!is_power_of_2(cxl_rr->nr_targets)) {
1023 		dev_dbg(&cxlr->dev, "%s:%s: invalid target count %d\n",
1024 			dev_name(port->uport), dev_name(&port->dev),
1025 			cxl_rr->nr_targets);
1026 		return -EINVAL;
1027 	}
1028 
1029 	cxlsd = to_cxl_switch_decoder(&cxld->dev);
1030 	if (cxl_rr->nr_targets_set) {
1031 		int i, distance;
1032 
1033 		/*
1034 		 * Passthrough decoders impose no distance requirements between
1035 		 * peers
1036 		 */
1037 		if (cxl_rr->nr_targets == 1)
1038 			distance = 0;
1039 		else
1040 			distance = p->nr_targets / cxl_rr->nr_targets;
1041 		for (i = 0; i < cxl_rr->nr_targets_set; i++)
1042 			if (ep->dport == cxlsd->target[i]) {
1043 				rc = check_last_peer(cxled, ep, cxl_rr,
1044 						     distance);
1045 				if (rc)
1046 					return rc;
1047 				goto out_target_set;
1048 			}
1049 		goto add_target;
1050 	}
1051 
1052 	if (is_cxl_root(parent_port)) {
1053 		parent_ig = cxlrd->cxlsd.cxld.interleave_granularity;
1054 		parent_iw = cxlrd->cxlsd.cxld.interleave_ways;
1055 		/*
1056 		 * For purposes of address bit routing, use power-of-2 math for
1057 		 * switch ports.
1058 		 */
1059 		if (!is_power_of_2(parent_iw))
1060 			parent_iw /= 3;
1061 	} else {
1062 		struct cxl_region_ref *parent_rr;
1063 		struct cxl_decoder *parent_cxld;
1064 
1065 		parent_rr = cxl_rr_load(parent_port, cxlr);
1066 		parent_cxld = parent_rr->decoder;
1067 		parent_ig = parent_cxld->interleave_granularity;
1068 		parent_iw = parent_cxld->interleave_ways;
1069 	}
1070 
1071 	rc = granularity_to_eig(parent_ig, &peig);
1072 	if (rc) {
1073 		dev_dbg(&cxlr->dev, "%s:%s: invalid parent granularity: %d\n",
1074 			dev_name(parent_port->uport),
1075 			dev_name(&parent_port->dev), parent_ig);
1076 		return rc;
1077 	}
1078 
1079 	rc = ways_to_eiw(parent_iw, &peiw);
1080 	if (rc) {
1081 		dev_dbg(&cxlr->dev, "%s:%s: invalid parent interleave: %d\n",
1082 			dev_name(parent_port->uport),
1083 			dev_name(&parent_port->dev), parent_iw);
1084 		return rc;
1085 	}
1086 
1087 	iw = cxl_rr->nr_targets;
1088 	rc = ways_to_eiw(iw, &eiw);
1089 	if (rc) {
1090 		dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d\n",
1091 			dev_name(port->uport), dev_name(&port->dev), iw);
1092 		return rc;
1093 	}
1094 
1095 	/*
1096 	 * If @parent_port is masking address bits, pick the next unused address
1097 	 * bit to route @port's targets.
1098 	 */
1099 	if (parent_iw > 1 && cxl_rr->nr_targets > 1) {
1100 		u32 address_bit = max(peig + peiw, eiw + peig);
1101 
1102 		eig = address_bit - eiw + 1;
1103 	} else {
1104 		eiw = peiw;
1105 		eig = peig;
1106 	}
1107 
1108 	rc = eig_to_granularity(eig, &ig);
1109 	if (rc) {
1110 		dev_dbg(&cxlr->dev, "%s:%s: invalid interleave: %d\n",
1111 			dev_name(port->uport), dev_name(&port->dev),
1112 			256 << eig);
1113 		return rc;
1114 	}
1115 
1116 	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
1117 		if (cxld->interleave_ways != iw ||
1118 		    cxld->interleave_granularity != ig ||
1119 		    cxld->hpa_range.start != p->res->start ||
1120 		    cxld->hpa_range.end != p->res->end ||
1121 		    ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
1122 			dev_err(&cxlr->dev,
1123 				"%s:%s %s expected iw: %d ig: %d %pr\n",
1124 				dev_name(port->uport), dev_name(&port->dev),
1125 				__func__, iw, ig, p->res);
1126 			dev_err(&cxlr->dev,
1127 				"%s:%s %s got iw: %d ig: %d state: %s %#llx:%#llx\n",
1128 				dev_name(port->uport), dev_name(&port->dev),
1129 				__func__, cxld->interleave_ways,
1130 				cxld->interleave_granularity,
1131 				(cxld->flags & CXL_DECODER_F_ENABLE) ?
1132 					"enabled" :
1133 					"disabled",
1134 				cxld->hpa_range.start, cxld->hpa_range.end);
1135 			return -ENXIO;
1136 		}
1137 	} else {
1138 		cxld->interleave_ways = iw;
1139 		cxld->interleave_granularity = ig;
1140 		cxld->hpa_range = (struct range) {
1141 			.start = p->res->start,
1142 			.end = p->res->end,
1143 		};
1144 	}
1145 	dev_dbg(&cxlr->dev, "%s:%s iw: %d ig: %d\n", dev_name(port->uport),
1146 		dev_name(&port->dev), iw, ig);
1147 add_target:
1148 	if (cxl_rr->nr_targets_set == cxl_rr->nr_targets) {
1149 		dev_dbg(&cxlr->dev,
1150 			"%s:%s: targets full trying to add %s:%s at %d\n",
1151 			dev_name(port->uport), dev_name(&port->dev),
1152 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1153 		return -ENXIO;
1154 	}
1155 	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
1156 		if (cxlsd->target[cxl_rr->nr_targets_set] != ep->dport) {
1157 			dev_dbg(&cxlr->dev, "%s:%s: %s expected %s at %d\n",
1158 				dev_name(port->uport), dev_name(&port->dev),
1159 				dev_name(&cxlsd->cxld.dev),
1160 				dev_name(ep->dport->dport),
1161 				cxl_rr->nr_targets_set);
1162 			return -ENXIO;
1163 		}
1164 	} else
1165 		cxlsd->target[cxl_rr->nr_targets_set] = ep->dport;
1166 	inc = 1;
1167 out_target_set:
1168 	cxl_rr->nr_targets_set += inc;
1169 	dev_dbg(&cxlr->dev, "%s:%s target[%d] = %s for %s:%s @ %d\n",
1170 		dev_name(port->uport), dev_name(&port->dev),
1171 		cxl_rr->nr_targets_set - 1, dev_name(ep->dport->dport),
1172 		dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1173 
1174 	return 0;
1175 }
1176 
1177 static void cxl_port_reset_targets(struct cxl_port *port,
1178 				   struct cxl_region *cxlr)
1179 {
1180 	struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
1181 	struct cxl_decoder *cxld;
1182 
1183 	/*
1184 	 * After the last endpoint has been detached the entire cxl_rr may now
1185 	 * be gone.
1186 	 */
1187 	if (!cxl_rr)
1188 		return;
1189 	cxl_rr->nr_targets_set = 0;
1190 
1191 	cxld = cxl_rr->decoder;
1192 	cxld->hpa_range = (struct range) {
1193 		.start = 0,
1194 		.end = -1,
1195 	};
1196 }
1197 
1198 static void cxl_region_teardown_targets(struct cxl_region *cxlr)
1199 {
1200 	struct cxl_region_params *p = &cxlr->params;
1201 	struct cxl_endpoint_decoder *cxled;
1202 	struct cxl_memdev *cxlmd;
1203 	struct cxl_port *iter;
1204 	struct cxl_ep *ep;
1205 	int i;
1206 
1207 	/*
1208 	 * In the auto-discovery case skip automatic teardown since the
1209 	 * address space is already active
1210 	 */
1211 	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
1212 		return;
1213 
1214 	for (i = 0; i < p->nr_targets; i++) {
1215 		cxled = p->targets[i];
1216 		cxlmd = cxled_to_memdev(cxled);
1217 
1218 		iter = cxled_to_port(cxled);
1219 		while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
1220 			iter = to_cxl_port(iter->dev.parent);
1221 
1222 		for (ep = cxl_ep_load(iter, cxlmd); iter;
1223 		     iter = ep->next, ep = cxl_ep_load(iter, cxlmd))
1224 			cxl_port_reset_targets(iter, cxlr);
1225 	}
1226 }
1227 
1228 static int cxl_region_setup_targets(struct cxl_region *cxlr)
1229 {
1230 	struct cxl_region_params *p = &cxlr->params;
1231 	struct cxl_endpoint_decoder *cxled;
1232 	struct cxl_memdev *cxlmd;
1233 	struct cxl_port *iter;
1234 	struct cxl_ep *ep;
1235 	int i, rc;
1236 
1237 	for (i = 0; i < p->nr_targets; i++) {
1238 		cxled = p->targets[i];
1239 		cxlmd = cxled_to_memdev(cxled);
1240 
1241 		iter = cxled_to_port(cxled);
1242 		while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
1243 			iter = to_cxl_port(iter->dev.parent);
1244 
1245 		/*
1246 		 * Descend the topology tree programming / validating
1247 		 * targets while looking for conflicts.
1248 		 */
1249 		for (ep = cxl_ep_load(iter, cxlmd); iter;
1250 		     iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
1251 			rc = cxl_port_setup_targets(iter, cxlr, cxled);
1252 			if (rc) {
1253 				cxl_region_teardown_targets(cxlr);
1254 				return rc;
1255 			}
1256 		}
1257 	}
1258 
1259 	return 0;
1260 }
1261 
1262 static int cxl_region_validate_position(struct cxl_region *cxlr,
1263 					struct cxl_endpoint_decoder *cxled,
1264 					int pos)
1265 {
1266 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1267 	struct cxl_region_params *p = &cxlr->params;
1268 	int i;
1269 
1270 	if (pos < 0 || pos >= p->interleave_ways) {
1271 		dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
1272 			p->interleave_ways);
1273 		return -ENXIO;
1274 	}
1275 
1276 	if (p->targets[pos] == cxled)
1277 		return 0;
1278 
1279 	if (p->targets[pos]) {
1280 		struct cxl_endpoint_decoder *cxled_target = p->targets[pos];
1281 		struct cxl_memdev *cxlmd_target = cxled_to_memdev(cxled_target);
1282 
1283 		dev_dbg(&cxlr->dev, "position %d already assigned to %s:%s\n",
1284 			pos, dev_name(&cxlmd_target->dev),
1285 			dev_name(&cxled_target->cxld.dev));
1286 		return -EBUSY;
1287 	}
1288 
1289 	for (i = 0; i < p->interleave_ways; i++) {
1290 		struct cxl_endpoint_decoder *cxled_target;
1291 		struct cxl_memdev *cxlmd_target;
1292 
1293 		cxled_target = p->targets[i];
1294 		if (!cxled_target)
1295 			continue;
1296 
1297 		cxlmd_target = cxled_to_memdev(cxled_target);
1298 		if (cxlmd_target == cxlmd) {
1299 			dev_dbg(&cxlr->dev,
1300 				"%s already specified at position %d via: %s\n",
1301 				dev_name(&cxlmd->dev), pos,
1302 				dev_name(&cxled_target->cxld.dev));
1303 			return -EBUSY;
1304 		}
1305 	}
1306 
1307 	return 0;
1308 }
1309 
1310 static int cxl_region_attach_position(struct cxl_region *cxlr,
1311 				      struct cxl_root_decoder *cxlrd,
1312 				      struct cxl_endpoint_decoder *cxled,
1313 				      const struct cxl_dport *dport, int pos)
1314 {
1315 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1316 	struct cxl_port *iter;
1317 	int rc;
1318 
1319 	if (cxlrd->calc_hb(cxlrd, pos) != dport) {
1320 		dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n",
1321 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1322 			dev_name(&cxlrd->cxlsd.cxld.dev));
1323 		return -ENXIO;
1324 	}
1325 
1326 	for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
1327 	     iter = to_cxl_port(iter->dev.parent)) {
1328 		rc = cxl_port_attach_region(iter, cxlr, cxled, pos);
1329 		if (rc)
1330 			goto err;
1331 	}
1332 
1333 	return 0;
1334 
1335 err:
1336 	for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
1337 	     iter = to_cxl_port(iter->dev.parent))
1338 		cxl_port_detach_region(iter, cxlr, cxled);
1339 	return rc;
1340 }
1341 
1342 static int cxl_region_attach_auto(struct cxl_region *cxlr,
1343 				  struct cxl_endpoint_decoder *cxled, int pos)
1344 {
1345 	struct cxl_region_params *p = &cxlr->params;
1346 
1347 	if (cxled->state != CXL_DECODER_STATE_AUTO) {
1348 		dev_err(&cxlr->dev,
1349 			"%s: unable to add decoder to autodetected region\n",
1350 			dev_name(&cxled->cxld.dev));
1351 		return -EINVAL;
1352 	}
1353 
1354 	if (pos >= 0) {
1355 		dev_dbg(&cxlr->dev, "%s: expected auto position, not %d\n",
1356 			dev_name(&cxled->cxld.dev), pos);
1357 		return -EINVAL;
1358 	}
1359 
1360 	if (p->nr_targets >= p->interleave_ways) {
1361 		dev_err(&cxlr->dev, "%s: no more target slots available\n",
1362 			dev_name(&cxled->cxld.dev));
1363 		return -ENXIO;
1364 	}
1365 
1366 	/*
1367 	 * Temporarily record the endpoint decoder into the target array. Yes,
1368 	 * this means that userspace can view devices in the wrong position
1369 	 * before the region activates, and must be careful to understand when
1370 	 * it might be racing region autodiscovery.
1371 	 */
1372 	pos = p->nr_targets;
1373 	p->targets[pos] = cxled;
1374 	cxled->pos = pos;
1375 	p->nr_targets++;
1376 
1377 	return 0;
1378 }
1379 
1380 static struct cxl_port *next_port(struct cxl_port *port)
1381 {
1382 	if (!port->parent_dport)
1383 		return NULL;
1384 	return port->parent_dport->port;
1385 }
1386 
1387 static int decoder_match_range(struct device *dev, void *data)
1388 {
1389 	struct cxl_endpoint_decoder *cxled = data;
1390 	struct cxl_switch_decoder *cxlsd;
1391 
1392 	if (!is_switch_decoder(dev))
1393 		return 0;
1394 
1395 	cxlsd = to_cxl_switch_decoder(dev);
1396 	return range_contains(&cxlsd->cxld.hpa_range, &cxled->cxld.hpa_range);
1397 }
1398 
1399 static void find_positions(const struct cxl_switch_decoder *cxlsd,
1400 			   const struct cxl_port *iter_a,
1401 			   const struct cxl_port *iter_b, int *a_pos,
1402 			   int *b_pos)
1403 {
1404 	int i;
1405 
1406 	for (i = 0, *a_pos = -1, *b_pos = -1; i < cxlsd->nr_targets; i++) {
1407 		if (cxlsd->target[i] == iter_a->parent_dport)
1408 			*a_pos = i;
1409 		else if (cxlsd->target[i] == iter_b->parent_dport)
1410 			*b_pos = i;
1411 		if (*a_pos >= 0 && *b_pos >= 0)
1412 			break;
1413 	}
1414 }
1415 
1416 static int cmp_decode_pos(const void *a, const void *b)
1417 {
1418 	struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a;
1419 	struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b;
1420 	struct cxl_memdev *cxlmd_a = cxled_to_memdev(cxled_a);
1421 	struct cxl_memdev *cxlmd_b = cxled_to_memdev(cxled_b);
1422 	struct cxl_port *port_a = cxled_to_port(cxled_a);
1423 	struct cxl_port *port_b = cxled_to_port(cxled_b);
1424 	struct cxl_port *iter_a, *iter_b, *port = NULL;
1425 	struct cxl_switch_decoder *cxlsd;
1426 	struct device *dev;
1427 	int a_pos, b_pos;
1428 	unsigned int seq;
1429 
1430 	/* Exit early if any prior sorting failed */
1431 	if (cxled_a->pos < 0 || cxled_b->pos < 0)
1432 		return 0;
1433 
1434 	/*
1435 	 * Walk up the hierarchy to find a shared port, find the decoder that
1436 	 * maps the range, compare the relative position of those dport
1437 	 * mappings.
1438 	 */
1439 	for (iter_a = port_a; iter_a; iter_a = next_port(iter_a)) {
1440 		struct cxl_port *next_a, *next_b;
1441 
1442 		next_a = next_port(iter_a);
1443 		if (!next_a)
1444 			break;
1445 
1446 		for (iter_b = port_b; iter_b; iter_b = next_port(iter_b)) {
1447 			next_b = next_port(iter_b);
1448 			if (next_a != next_b)
1449 				continue;
1450 			port = next_a;
1451 			break;
1452 		}
1453 
1454 		if (port)
1455 			break;
1456 	}
1457 
1458 	if (!port) {
1459 		dev_err(cxlmd_a->dev.parent,
1460 			"failed to find shared port with %s\n",
1461 			dev_name(cxlmd_b->dev.parent));
1462 		goto err;
1463 	}
1464 
1465 	dev = device_find_child(&port->dev, cxled_a, decoder_match_range);
1466 	if (!dev) {
1467 		struct range *range = &cxled_a->cxld.hpa_range;
1468 
1469 		dev_err(port->uport,
1470 			"failed to find decoder that maps %#llx-%#llx\n",
1471 			range->start, range->end);
1472 		goto err;
1473 	}
1474 
1475 	cxlsd = to_cxl_switch_decoder(dev);
1476 	do {
1477 		seq = read_seqbegin(&cxlsd->target_lock);
1478 		find_positions(cxlsd, iter_a, iter_b, &a_pos, &b_pos);
1479 	} while (read_seqretry(&cxlsd->target_lock, seq));
1480 
1481 	put_device(dev);
1482 
1483 	if (a_pos < 0 || b_pos < 0) {
1484 		dev_err(port->uport,
1485 			"failed to find shared decoder for %s and %s\n",
1486 			dev_name(cxlmd_a->dev.parent),
1487 			dev_name(cxlmd_b->dev.parent));
1488 		goto err;
1489 	}
1490 
1491 	dev_dbg(port->uport, "%s comes %s %s\n", dev_name(cxlmd_a->dev.parent),
1492 		a_pos - b_pos < 0 ? "before" : "after",
1493 		dev_name(cxlmd_b->dev.parent));
1494 
1495 	return a_pos - b_pos;
1496 err:
1497 	cxled_a->pos = -1;
1498 	return 0;
1499 }
1500 
1501 static int cxl_region_sort_targets(struct cxl_region *cxlr)
1502 {
1503 	struct cxl_region_params *p = &cxlr->params;
1504 	int i, rc = 0;
1505 
1506 	sort(p->targets, p->nr_targets, sizeof(p->targets[0]), cmp_decode_pos,
1507 	     NULL);
1508 
1509 	for (i = 0; i < p->nr_targets; i++) {
1510 		struct cxl_endpoint_decoder *cxled = p->targets[i];
1511 
1512 		/*
1513 		 * Record that sorting failed, but still continue to restore
1514 		 * cxled->pos with its ->targets[] position so that follow-on
1515 		 * code paths can reliably do p->targets[cxled->pos] to
1516 		 * self-reference their entry.
1517 		 */
1518 		if (cxled->pos < 0)
1519 			rc = -ENXIO;
1520 		cxled->pos = i;
1521 	}
1522 
1523 	dev_dbg(&cxlr->dev, "region sort %s\n", rc ? "failed" : "successful");
1524 	return rc;
1525 }
1526 
1527 static int cxl_region_attach(struct cxl_region *cxlr,
1528 			     struct cxl_endpoint_decoder *cxled, int pos)
1529 {
1530 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
1531 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1532 	struct cxl_region_params *p = &cxlr->params;
1533 	struct cxl_port *ep_port, *root_port;
1534 	struct cxl_dport *dport;
1535 	int rc = -ENXIO;
1536 
1537 	if (cxled->mode != cxlr->mode) {
1538 		dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n",
1539 			dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode);
1540 		return -EINVAL;
1541 	}
1542 
1543 	if (cxled->mode == CXL_DECODER_DEAD) {
1544 		dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev));
1545 		return -ENODEV;
1546 	}
1547 
1548 	/* all full of members, or interleave config not established? */
1549 	if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) {
1550 		dev_dbg(&cxlr->dev, "region already active\n");
1551 		return -EBUSY;
1552 	} else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
1553 		dev_dbg(&cxlr->dev, "interleave config missing\n");
1554 		return -ENXIO;
1555 	}
1556 
1557 	ep_port = cxled_to_port(cxled);
1558 	root_port = cxlrd_to_port(cxlrd);
1559 	dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge);
1560 	if (!dport) {
1561 		dev_dbg(&cxlr->dev, "%s:%s invalid target for %s\n",
1562 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1563 			dev_name(cxlr->dev.parent));
1564 		return -ENXIO;
1565 	}
1566 
1567 	if (cxled->cxld.target_type != cxlr->type) {
1568 		dev_dbg(&cxlr->dev, "%s:%s type mismatch: %d vs %d\n",
1569 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1570 			cxled->cxld.target_type, cxlr->type);
1571 		return -ENXIO;
1572 	}
1573 
1574 	if (!cxled->dpa_res) {
1575 		dev_dbg(&cxlr->dev, "%s:%s: missing DPA allocation.\n",
1576 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev));
1577 		return -ENXIO;
1578 	}
1579 
1580 	if (resource_size(cxled->dpa_res) * p->interleave_ways !=
1581 	    resource_size(p->res)) {
1582 		dev_dbg(&cxlr->dev,
1583 			"%s:%s: decoder-size-%#llx * ways-%d != region-size-%#llx\n",
1584 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1585 			(u64)resource_size(cxled->dpa_res), p->interleave_ways,
1586 			(u64)resource_size(p->res));
1587 		return -EINVAL;
1588 	}
1589 
1590 	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
1591 		int i;
1592 
1593 		rc = cxl_region_attach_auto(cxlr, cxled, pos);
1594 		if (rc)
1595 			return rc;
1596 
1597 		/* await more targets to arrive... */
1598 		if (p->nr_targets < p->interleave_ways)
1599 			return 0;
1600 
1601 		/*
1602 		 * All targets are here, which implies all PCI enumeration that
1603 		 * affects this region has been completed. Walk the topology to
1604 		 * sort the devices into their relative region decode position.
1605 		 */
1606 		rc = cxl_region_sort_targets(cxlr);
1607 		if (rc)
1608 			return rc;
1609 
1610 		for (i = 0; i < p->nr_targets; i++) {
1611 			cxled = p->targets[i];
1612 			ep_port = cxled_to_port(cxled);
1613 			dport = cxl_find_dport_by_dev(root_port,
1614 						      ep_port->host_bridge);
1615 			rc = cxl_region_attach_position(cxlr, cxlrd, cxled,
1616 							dport, i);
1617 			if (rc)
1618 				return rc;
1619 		}
1620 
1621 		rc = cxl_region_setup_targets(cxlr);
1622 		if (rc)
1623 			return rc;
1624 
1625 		/*
1626 		 * If target setup succeeds in the autodiscovery case
1627 		 * then the region is already committed.
1628 		 */
1629 		p->state = CXL_CONFIG_COMMIT;
1630 
1631 		return 0;
1632 	}
1633 
1634 	rc = cxl_region_validate_position(cxlr, cxled, pos);
1635 	if (rc)
1636 		return rc;
1637 
1638 	rc = cxl_region_attach_position(cxlr, cxlrd, cxled, dport, pos);
1639 	if (rc)
1640 		return rc;
1641 
1642 	p->targets[pos] = cxled;
1643 	cxled->pos = pos;
1644 	p->nr_targets++;
1645 
1646 	if (p->nr_targets == p->interleave_ways) {
1647 		rc = cxl_region_setup_targets(cxlr);
1648 		if (rc)
1649 			goto err_decrement;
1650 		p->state = CXL_CONFIG_ACTIVE;
1651 	}
1652 
1653 	cxled->cxld.interleave_ways = p->interleave_ways;
1654 	cxled->cxld.interleave_granularity = p->interleave_granularity;
1655 	cxled->cxld.hpa_range = (struct range) {
1656 		.start = p->res->start,
1657 		.end = p->res->end,
1658 	};
1659 
1660 	return 0;
1661 
1662 err_decrement:
1663 	p->nr_targets--;
1664 	cxled->pos = -1;
1665 	p->targets[pos] = NULL;
1666 	return rc;
1667 }
1668 
1669 static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
1670 {
1671 	struct cxl_port *iter, *ep_port = cxled_to_port(cxled);
1672 	struct cxl_region *cxlr = cxled->cxld.region;
1673 	struct cxl_region_params *p;
1674 	int rc = 0;
1675 
1676 	lockdep_assert_held_write(&cxl_region_rwsem);
1677 
1678 	if (!cxlr)
1679 		return 0;
1680 
1681 	p = &cxlr->params;
1682 	get_device(&cxlr->dev);
1683 
1684 	if (p->state > CXL_CONFIG_ACTIVE) {
1685 		/*
1686 		 * TODO: tear down all impacted regions if a device is
1687 		 * removed out of order
1688 		 */
1689 		rc = cxl_region_decode_reset(cxlr, p->interleave_ways);
1690 		if (rc)
1691 			goto out;
1692 		p->state = CXL_CONFIG_ACTIVE;
1693 	}
1694 
1695 	for (iter = ep_port; !is_cxl_root(iter);
1696 	     iter = to_cxl_port(iter->dev.parent))
1697 		cxl_port_detach_region(iter, cxlr, cxled);
1698 
1699 	if (cxled->pos < 0 || cxled->pos >= p->interleave_ways ||
1700 	    p->targets[cxled->pos] != cxled) {
1701 		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1702 
1703 		dev_WARN_ONCE(&cxlr->dev, 1, "expected %s:%s at position %d\n",
1704 			      dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1705 			      cxled->pos);
1706 		goto out;
1707 	}
1708 
1709 	if (p->state == CXL_CONFIG_ACTIVE) {
1710 		p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
1711 		cxl_region_teardown_targets(cxlr);
1712 	}
1713 	p->targets[cxled->pos] = NULL;
1714 	p->nr_targets--;
1715 	cxled->cxld.hpa_range = (struct range) {
1716 		.start = 0,
1717 		.end = -1,
1718 	};
1719 
1720 	/* notify the region driver that one of its targets has departed */
1721 	up_write(&cxl_region_rwsem);
1722 	device_release_driver(&cxlr->dev);
1723 	down_write(&cxl_region_rwsem);
1724 out:
1725 	put_device(&cxlr->dev);
1726 	return rc;
1727 }
1728 
1729 void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
1730 {
1731 	down_write(&cxl_region_rwsem);
1732 	cxled->mode = CXL_DECODER_DEAD;
1733 	cxl_region_detach(cxled);
1734 	up_write(&cxl_region_rwsem);
1735 }
1736 
1737 static int attach_target(struct cxl_region *cxlr,
1738 			 struct cxl_endpoint_decoder *cxled, int pos,
1739 			 unsigned int state)
1740 {
1741 	int rc = 0;
1742 
1743 	if (state == TASK_INTERRUPTIBLE)
1744 		rc = down_write_killable(&cxl_region_rwsem);
1745 	else
1746 		down_write(&cxl_region_rwsem);
1747 	if (rc)
1748 		return rc;
1749 
1750 	down_read(&cxl_dpa_rwsem);
1751 	rc = cxl_region_attach(cxlr, cxled, pos);
1752 	if (rc == 0)
1753 		set_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags);
1754 	up_read(&cxl_dpa_rwsem);
1755 	up_write(&cxl_region_rwsem);
1756 	return rc;
1757 }
1758 
1759 static int detach_target(struct cxl_region *cxlr, int pos)
1760 {
1761 	struct cxl_region_params *p = &cxlr->params;
1762 	int rc;
1763 
1764 	rc = down_write_killable(&cxl_region_rwsem);
1765 	if (rc)
1766 		return rc;
1767 
1768 	if (pos >= p->interleave_ways) {
1769 		dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
1770 			p->interleave_ways);
1771 		rc = -ENXIO;
1772 		goto out;
1773 	}
1774 
1775 	if (!p->targets[pos]) {
1776 		rc = 0;
1777 		goto out;
1778 	}
1779 
1780 	rc = cxl_region_detach(p->targets[pos]);
1781 out:
1782 	up_write(&cxl_region_rwsem);
1783 	return rc;
1784 }
1785 
1786 static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
1787 			    size_t len)
1788 {
1789 	int rc;
1790 
1791 	if (sysfs_streq(buf, "\n"))
1792 		rc = detach_target(cxlr, pos);
1793 	else {
1794 		struct device *dev;
1795 
1796 		dev = bus_find_device_by_name(&cxl_bus_type, NULL, buf);
1797 		if (!dev)
1798 			return -ENODEV;
1799 
1800 		if (!is_endpoint_decoder(dev)) {
1801 			rc = -EINVAL;
1802 			goto out;
1803 		}
1804 
1805 		rc = attach_target(cxlr, to_cxl_endpoint_decoder(dev), pos,
1806 				   TASK_INTERRUPTIBLE);
1807 out:
1808 		put_device(dev);
1809 	}
1810 
1811 	if (rc < 0)
1812 		return rc;
1813 	return len;
1814 }
1815 
1816 #define TARGET_ATTR_RW(n)                                              \
1817 static ssize_t target##n##_show(                                       \
1818 	struct device *dev, struct device_attribute *attr, char *buf)  \
1819 {                                                                      \
1820 	return show_targetN(to_cxl_region(dev), buf, (n));             \
1821 }                                                                      \
1822 static ssize_t target##n##_store(struct device *dev,                   \
1823 				 struct device_attribute *attr,        \
1824 				 const char *buf, size_t len)          \
1825 {                                                                      \
1826 	return store_targetN(to_cxl_region(dev), buf, (n), len);       \
1827 }                                                                      \
1828 static DEVICE_ATTR_RW(target##n)
1829 
1830 TARGET_ATTR_RW(0);
1831 TARGET_ATTR_RW(1);
1832 TARGET_ATTR_RW(2);
1833 TARGET_ATTR_RW(3);
1834 TARGET_ATTR_RW(4);
1835 TARGET_ATTR_RW(5);
1836 TARGET_ATTR_RW(6);
1837 TARGET_ATTR_RW(7);
1838 TARGET_ATTR_RW(8);
1839 TARGET_ATTR_RW(9);
1840 TARGET_ATTR_RW(10);
1841 TARGET_ATTR_RW(11);
1842 TARGET_ATTR_RW(12);
1843 TARGET_ATTR_RW(13);
1844 TARGET_ATTR_RW(14);
1845 TARGET_ATTR_RW(15);
1846 
1847 static struct attribute *target_attrs[] = {
1848 	&dev_attr_target0.attr,
1849 	&dev_attr_target1.attr,
1850 	&dev_attr_target2.attr,
1851 	&dev_attr_target3.attr,
1852 	&dev_attr_target4.attr,
1853 	&dev_attr_target5.attr,
1854 	&dev_attr_target6.attr,
1855 	&dev_attr_target7.attr,
1856 	&dev_attr_target8.attr,
1857 	&dev_attr_target9.attr,
1858 	&dev_attr_target10.attr,
1859 	&dev_attr_target11.attr,
1860 	&dev_attr_target12.attr,
1861 	&dev_attr_target13.attr,
1862 	&dev_attr_target14.attr,
1863 	&dev_attr_target15.attr,
1864 	NULL,
1865 };
1866 
1867 static umode_t cxl_region_target_visible(struct kobject *kobj,
1868 					 struct attribute *a, int n)
1869 {
1870 	struct device *dev = kobj_to_dev(kobj);
1871 	struct cxl_region *cxlr = to_cxl_region(dev);
1872 	struct cxl_region_params *p = &cxlr->params;
1873 
1874 	if (n < p->interleave_ways)
1875 		return a->mode;
1876 	return 0;
1877 }
1878 
1879 static const struct attribute_group cxl_region_target_group = {
1880 	.attrs = target_attrs,
1881 	.is_visible = cxl_region_target_visible,
1882 };
1883 
1884 static const struct attribute_group *get_cxl_region_target_group(void)
1885 {
1886 	return &cxl_region_target_group;
1887 }
1888 
1889 static const struct attribute_group *region_groups[] = {
1890 	&cxl_base_attribute_group,
1891 	&cxl_region_group,
1892 	&cxl_region_target_group,
1893 	NULL,
1894 };
1895 
1896 static void cxl_region_release(struct device *dev)
1897 {
1898 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
1899 	struct cxl_region *cxlr = to_cxl_region(dev);
1900 	int id = atomic_read(&cxlrd->region_id);
1901 
1902 	/*
1903 	 * Try to reuse the recently idled id rather than the cached
1904 	 * next id to prevent the region id space from increasing
1905 	 * unnecessarily.
1906 	 */
1907 	if (cxlr->id < id)
1908 		if (atomic_try_cmpxchg(&cxlrd->region_id, &id, cxlr->id)) {
1909 			memregion_free(id);
1910 			goto out;
1911 		}
1912 
1913 	memregion_free(cxlr->id);
1914 out:
1915 	put_device(dev->parent);
1916 	kfree(cxlr);
1917 }
1918 
1919 const struct device_type cxl_region_type = {
1920 	.name = "cxl_region",
1921 	.release = cxl_region_release,
1922 	.groups = region_groups
1923 };
1924 
1925 bool is_cxl_region(struct device *dev)
1926 {
1927 	return dev->type == &cxl_region_type;
1928 }
1929 EXPORT_SYMBOL_NS_GPL(is_cxl_region, CXL);
1930 
1931 static struct cxl_region *to_cxl_region(struct device *dev)
1932 {
1933 	if (dev_WARN_ONCE(dev, dev->type != &cxl_region_type,
1934 			  "not a cxl_region device\n"))
1935 		return NULL;
1936 
1937 	return container_of(dev, struct cxl_region, dev);
1938 }
1939 
1940 static void unregister_region(void *dev)
1941 {
1942 	struct cxl_region *cxlr = to_cxl_region(dev);
1943 	struct cxl_region_params *p = &cxlr->params;
1944 	int i;
1945 
1946 	device_del(dev);
1947 
1948 	/*
1949 	 * Now that region sysfs is shutdown, the parameter block is now
1950 	 * read-only, so no need to hold the region rwsem to access the
1951 	 * region parameters.
1952 	 */
1953 	for (i = 0; i < p->interleave_ways; i++)
1954 		detach_target(cxlr, i);
1955 
1956 	cxl_region_iomem_release(cxlr);
1957 	put_device(dev);
1958 }
1959 
1960 static struct lock_class_key cxl_region_key;
1961 
1962 static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int id)
1963 {
1964 	struct cxl_region *cxlr;
1965 	struct device *dev;
1966 
1967 	cxlr = kzalloc(sizeof(*cxlr), GFP_KERNEL);
1968 	if (!cxlr) {
1969 		memregion_free(id);
1970 		return ERR_PTR(-ENOMEM);
1971 	}
1972 
1973 	dev = &cxlr->dev;
1974 	device_initialize(dev);
1975 	lockdep_set_class(&dev->mutex, &cxl_region_key);
1976 	dev->parent = &cxlrd->cxlsd.cxld.dev;
1977 	/*
1978 	 * Keep root decoder pinned through cxl_region_release to fixup
1979 	 * region id allocations
1980 	 */
1981 	get_device(dev->parent);
1982 	device_set_pm_not_required(dev);
1983 	dev->bus = &cxl_bus_type;
1984 	dev->type = &cxl_region_type;
1985 	cxlr->id = id;
1986 
1987 	return cxlr;
1988 }
1989 
1990 /**
1991  * devm_cxl_add_region - Adds a region to a decoder
1992  * @cxlrd: root decoder
1993  * @id: memregion id to create, or memregion_free() on failure
1994  * @mode: mode for the endpoint decoders of this region
1995  * @type: select whether this is an expander or accelerator (type-2 or type-3)
1996  *
1997  * This is the second step of region initialization. Regions exist within an
1998  * address space which is mapped by a @cxlrd.
1999  *
2000  * Return: 0 if the region was added to the @cxlrd, else returns negative error
2001  * code. The region will be named "regionZ" where Z is the unique region number.
2002  */
2003 static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
2004 					      int id,
2005 					      enum cxl_decoder_mode mode,
2006 					      enum cxl_decoder_type type)
2007 {
2008 	struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
2009 	struct cxl_region *cxlr;
2010 	struct device *dev;
2011 	int rc;
2012 
2013 	switch (mode) {
2014 	case CXL_DECODER_RAM:
2015 	case CXL_DECODER_PMEM:
2016 		break;
2017 	default:
2018 		dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode);
2019 		return ERR_PTR(-EINVAL);
2020 	}
2021 
2022 	cxlr = cxl_region_alloc(cxlrd, id);
2023 	if (IS_ERR(cxlr))
2024 		return cxlr;
2025 	cxlr->mode = mode;
2026 	cxlr->type = type;
2027 
2028 	dev = &cxlr->dev;
2029 	rc = dev_set_name(dev, "region%d", id);
2030 	if (rc)
2031 		goto err;
2032 
2033 	rc = device_add(dev);
2034 	if (rc)
2035 		goto err;
2036 
2037 	rc = devm_add_action_or_reset(port->uport, unregister_region, cxlr);
2038 	if (rc)
2039 		return ERR_PTR(rc);
2040 
2041 	dev_dbg(port->uport, "%s: created %s\n",
2042 		dev_name(&cxlrd->cxlsd.cxld.dev), dev_name(dev));
2043 	return cxlr;
2044 
2045 err:
2046 	put_device(dev);
2047 	return ERR_PTR(rc);
2048 }
2049 
2050 static ssize_t __create_region_show(struct cxl_root_decoder *cxlrd, char *buf)
2051 {
2052 	return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id));
2053 }
2054 
2055 static ssize_t create_pmem_region_show(struct device *dev,
2056 				       struct device_attribute *attr, char *buf)
2057 {
2058 	return __create_region_show(to_cxl_root_decoder(dev), buf);
2059 }
2060 
2061 static ssize_t create_ram_region_show(struct device *dev,
2062 				      struct device_attribute *attr, char *buf)
2063 {
2064 	return __create_region_show(to_cxl_root_decoder(dev), buf);
2065 }
2066 
2067 static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
2068 					  enum cxl_decoder_mode mode, int id)
2069 {
2070 	int rc;
2071 
2072 	rc = memregion_alloc(GFP_KERNEL);
2073 	if (rc < 0)
2074 		return ERR_PTR(rc);
2075 
2076 	if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) {
2077 		memregion_free(rc);
2078 		return ERR_PTR(-EBUSY);
2079 	}
2080 
2081 	return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_EXPANDER);
2082 }
2083 
2084 static ssize_t create_pmem_region_store(struct device *dev,
2085 					struct device_attribute *attr,
2086 					const char *buf, size_t len)
2087 {
2088 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
2089 	struct cxl_region *cxlr;
2090 	int rc, id;
2091 
2092 	rc = sscanf(buf, "region%d\n", &id);
2093 	if (rc != 1)
2094 		return -EINVAL;
2095 
2096 	cxlr = __create_region(cxlrd, CXL_DECODER_PMEM, id);
2097 	if (IS_ERR(cxlr))
2098 		return PTR_ERR(cxlr);
2099 
2100 	return len;
2101 }
2102 DEVICE_ATTR_RW(create_pmem_region);
2103 
2104 static ssize_t create_ram_region_store(struct device *dev,
2105 				       struct device_attribute *attr,
2106 				       const char *buf, size_t len)
2107 {
2108 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
2109 	struct cxl_region *cxlr;
2110 	int rc, id;
2111 
2112 	rc = sscanf(buf, "region%d\n", &id);
2113 	if (rc != 1)
2114 		return -EINVAL;
2115 
2116 	cxlr = __create_region(cxlrd, CXL_DECODER_RAM, id);
2117 	if (IS_ERR(cxlr))
2118 		return PTR_ERR(cxlr);
2119 
2120 	return len;
2121 }
2122 DEVICE_ATTR_RW(create_ram_region);
2123 
2124 static ssize_t region_show(struct device *dev, struct device_attribute *attr,
2125 			   char *buf)
2126 {
2127 	struct cxl_decoder *cxld = to_cxl_decoder(dev);
2128 	ssize_t rc;
2129 
2130 	rc = down_read_interruptible(&cxl_region_rwsem);
2131 	if (rc)
2132 		return rc;
2133 
2134 	if (cxld->region)
2135 		rc = sysfs_emit(buf, "%s\n", dev_name(&cxld->region->dev));
2136 	else
2137 		rc = sysfs_emit(buf, "\n");
2138 	up_read(&cxl_region_rwsem);
2139 
2140 	return rc;
2141 }
2142 DEVICE_ATTR_RO(region);
2143 
2144 static struct cxl_region *
2145 cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name)
2146 {
2147 	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
2148 	struct device *region_dev;
2149 
2150 	region_dev = device_find_child_by_name(&cxld->dev, name);
2151 	if (!region_dev)
2152 		return ERR_PTR(-ENODEV);
2153 
2154 	return to_cxl_region(region_dev);
2155 }
2156 
2157 static ssize_t delete_region_store(struct device *dev,
2158 				   struct device_attribute *attr,
2159 				   const char *buf, size_t len)
2160 {
2161 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
2162 	struct cxl_port *port = to_cxl_port(dev->parent);
2163 	struct cxl_region *cxlr;
2164 
2165 	cxlr = cxl_find_region_by_name(cxlrd, buf);
2166 	if (IS_ERR(cxlr))
2167 		return PTR_ERR(cxlr);
2168 
2169 	devm_release_action(port->uport, unregister_region, cxlr);
2170 	put_device(&cxlr->dev);
2171 
2172 	return len;
2173 }
2174 DEVICE_ATTR_WO(delete_region);
2175 
2176 static void cxl_pmem_region_release(struct device *dev)
2177 {
2178 	struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev);
2179 	int i;
2180 
2181 	for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
2182 		struct cxl_memdev *cxlmd = cxlr_pmem->mapping[i].cxlmd;
2183 
2184 		put_device(&cxlmd->dev);
2185 	}
2186 
2187 	kfree(cxlr_pmem);
2188 }
2189 
2190 static const struct attribute_group *cxl_pmem_region_attribute_groups[] = {
2191 	&cxl_base_attribute_group,
2192 	NULL,
2193 };
2194 
2195 const struct device_type cxl_pmem_region_type = {
2196 	.name = "cxl_pmem_region",
2197 	.release = cxl_pmem_region_release,
2198 	.groups = cxl_pmem_region_attribute_groups,
2199 };
2200 
2201 bool is_cxl_pmem_region(struct device *dev)
2202 {
2203 	return dev->type == &cxl_pmem_region_type;
2204 }
2205 EXPORT_SYMBOL_NS_GPL(is_cxl_pmem_region, CXL);
2206 
2207 struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev)
2208 {
2209 	if (dev_WARN_ONCE(dev, !is_cxl_pmem_region(dev),
2210 			  "not a cxl_pmem_region device\n"))
2211 		return NULL;
2212 	return container_of(dev, struct cxl_pmem_region, dev);
2213 }
2214 EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, CXL);
2215 
2216 static struct lock_class_key cxl_pmem_region_key;
2217 
2218 static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
2219 {
2220 	struct cxl_region_params *p = &cxlr->params;
2221 	struct cxl_nvdimm_bridge *cxl_nvb;
2222 	struct cxl_pmem_region *cxlr_pmem;
2223 	struct device *dev;
2224 	int i;
2225 
2226 	down_read(&cxl_region_rwsem);
2227 	if (p->state != CXL_CONFIG_COMMIT) {
2228 		cxlr_pmem = ERR_PTR(-ENXIO);
2229 		goto out;
2230 	}
2231 
2232 	cxlr_pmem = kzalloc(struct_size(cxlr_pmem, mapping, p->nr_targets),
2233 			    GFP_KERNEL);
2234 	if (!cxlr_pmem) {
2235 		cxlr_pmem = ERR_PTR(-ENOMEM);
2236 		goto out;
2237 	}
2238 
2239 	cxlr_pmem->hpa_range.start = p->res->start;
2240 	cxlr_pmem->hpa_range.end = p->res->end;
2241 
2242 	/* Snapshot the region configuration underneath the cxl_region_rwsem */
2243 	cxlr_pmem->nr_mappings = p->nr_targets;
2244 	for (i = 0; i < p->nr_targets; i++) {
2245 		struct cxl_endpoint_decoder *cxled = p->targets[i];
2246 		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
2247 		struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
2248 
2249 		/*
2250 		 * Regions never span CXL root devices, so by definition the
2251 		 * bridge for one device is the same for all.
2252 		 */
2253 		if (i == 0) {
2254 			cxl_nvb = cxl_find_nvdimm_bridge(&cxlmd->dev);
2255 			if (!cxl_nvb) {
2256 				cxlr_pmem = ERR_PTR(-ENODEV);
2257 				goto out;
2258 			}
2259 			cxlr->cxl_nvb = cxl_nvb;
2260 		}
2261 		m->cxlmd = cxlmd;
2262 		get_device(&cxlmd->dev);
2263 		m->start = cxled->dpa_res->start;
2264 		m->size = resource_size(cxled->dpa_res);
2265 		m->position = i;
2266 	}
2267 
2268 	dev = &cxlr_pmem->dev;
2269 	cxlr_pmem->cxlr = cxlr;
2270 	cxlr->cxlr_pmem = cxlr_pmem;
2271 	device_initialize(dev);
2272 	lockdep_set_class(&dev->mutex, &cxl_pmem_region_key);
2273 	device_set_pm_not_required(dev);
2274 	dev->parent = &cxlr->dev;
2275 	dev->bus = &cxl_bus_type;
2276 	dev->type = &cxl_pmem_region_type;
2277 out:
2278 	up_read(&cxl_region_rwsem);
2279 
2280 	return cxlr_pmem;
2281 }
2282 
2283 static void cxl_dax_region_release(struct device *dev)
2284 {
2285 	struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev);
2286 
2287 	kfree(cxlr_dax);
2288 }
2289 
2290 static const struct attribute_group *cxl_dax_region_attribute_groups[] = {
2291 	&cxl_base_attribute_group,
2292 	NULL,
2293 };
2294 
2295 const struct device_type cxl_dax_region_type = {
2296 	.name = "cxl_dax_region",
2297 	.release = cxl_dax_region_release,
2298 	.groups = cxl_dax_region_attribute_groups,
2299 };
2300 
2301 static bool is_cxl_dax_region(struct device *dev)
2302 {
2303 	return dev->type == &cxl_dax_region_type;
2304 }
2305 
2306 struct cxl_dax_region *to_cxl_dax_region(struct device *dev)
2307 {
2308 	if (dev_WARN_ONCE(dev, !is_cxl_dax_region(dev),
2309 			  "not a cxl_dax_region device\n"))
2310 		return NULL;
2311 	return container_of(dev, struct cxl_dax_region, dev);
2312 }
2313 EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, CXL);
2314 
2315 static struct lock_class_key cxl_dax_region_key;
2316 
2317 static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
2318 {
2319 	struct cxl_region_params *p = &cxlr->params;
2320 	struct cxl_dax_region *cxlr_dax;
2321 	struct device *dev;
2322 
2323 	down_read(&cxl_region_rwsem);
2324 	if (p->state != CXL_CONFIG_COMMIT) {
2325 		cxlr_dax = ERR_PTR(-ENXIO);
2326 		goto out;
2327 	}
2328 
2329 	cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL);
2330 	if (!cxlr_dax) {
2331 		cxlr_dax = ERR_PTR(-ENOMEM);
2332 		goto out;
2333 	}
2334 
2335 	cxlr_dax->hpa_range.start = p->res->start;
2336 	cxlr_dax->hpa_range.end = p->res->end;
2337 
2338 	dev = &cxlr_dax->dev;
2339 	cxlr_dax->cxlr = cxlr;
2340 	device_initialize(dev);
2341 	lockdep_set_class(&dev->mutex, &cxl_dax_region_key);
2342 	device_set_pm_not_required(dev);
2343 	dev->parent = &cxlr->dev;
2344 	dev->bus = &cxl_bus_type;
2345 	dev->type = &cxl_dax_region_type;
2346 out:
2347 	up_read(&cxl_region_rwsem);
2348 
2349 	return cxlr_dax;
2350 }
2351 
2352 static void cxlr_pmem_unregister(void *_cxlr_pmem)
2353 {
2354 	struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem;
2355 	struct cxl_region *cxlr = cxlr_pmem->cxlr;
2356 	struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
2357 
2358 	/*
2359 	 * Either the bridge is in ->remove() context under the device_lock(),
2360 	 * or cxlr_release_nvdimm() is cancelling the bridge's release action
2361 	 * for @cxlr_pmem and doing it itself (while manually holding the bridge
2362 	 * lock).
2363 	 */
2364 	device_lock_assert(&cxl_nvb->dev);
2365 	cxlr->cxlr_pmem = NULL;
2366 	cxlr_pmem->cxlr = NULL;
2367 	device_unregister(&cxlr_pmem->dev);
2368 }
2369 
2370 static void cxlr_release_nvdimm(void *_cxlr)
2371 {
2372 	struct cxl_region *cxlr = _cxlr;
2373 	struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
2374 
2375 	device_lock(&cxl_nvb->dev);
2376 	if (cxlr->cxlr_pmem)
2377 		devm_release_action(&cxl_nvb->dev, cxlr_pmem_unregister,
2378 				    cxlr->cxlr_pmem);
2379 	device_unlock(&cxl_nvb->dev);
2380 	cxlr->cxl_nvb = NULL;
2381 	put_device(&cxl_nvb->dev);
2382 }
2383 
2384 /**
2385  * devm_cxl_add_pmem_region() - add a cxl_region-to-nd_region bridge
2386  * @cxlr: parent CXL region for this pmem region bridge device
2387  *
2388  * Return: 0 on success negative error code on failure.
2389  */
2390 static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
2391 {
2392 	struct cxl_pmem_region *cxlr_pmem;
2393 	struct cxl_nvdimm_bridge *cxl_nvb;
2394 	struct device *dev;
2395 	int rc;
2396 
2397 	cxlr_pmem = cxl_pmem_region_alloc(cxlr);
2398 	if (IS_ERR(cxlr_pmem))
2399 		return PTR_ERR(cxlr_pmem);
2400 	cxl_nvb = cxlr->cxl_nvb;
2401 
2402 	dev = &cxlr_pmem->dev;
2403 	rc = dev_set_name(dev, "pmem_region%d", cxlr->id);
2404 	if (rc)
2405 		goto err;
2406 
2407 	rc = device_add(dev);
2408 	if (rc)
2409 		goto err;
2410 
2411 	dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
2412 		dev_name(dev));
2413 
2414 	device_lock(&cxl_nvb->dev);
2415 	if (cxl_nvb->dev.driver)
2416 		rc = devm_add_action_or_reset(&cxl_nvb->dev,
2417 					      cxlr_pmem_unregister, cxlr_pmem);
2418 	else
2419 		rc = -ENXIO;
2420 	device_unlock(&cxl_nvb->dev);
2421 
2422 	if (rc)
2423 		goto err_bridge;
2424 
2425 	/* @cxlr carries a reference on @cxl_nvb until cxlr_release_nvdimm */
2426 	return devm_add_action_or_reset(&cxlr->dev, cxlr_release_nvdimm, cxlr);
2427 
2428 err:
2429 	put_device(dev);
2430 err_bridge:
2431 	put_device(&cxl_nvb->dev);
2432 	cxlr->cxl_nvb = NULL;
2433 	return rc;
2434 }
2435 
2436 static void cxlr_dax_unregister(void *_cxlr_dax)
2437 {
2438 	struct cxl_dax_region *cxlr_dax = _cxlr_dax;
2439 
2440 	device_unregister(&cxlr_dax->dev);
2441 }
2442 
2443 static int devm_cxl_add_dax_region(struct cxl_region *cxlr)
2444 {
2445 	struct cxl_dax_region *cxlr_dax;
2446 	struct device *dev;
2447 	int rc;
2448 
2449 	cxlr_dax = cxl_dax_region_alloc(cxlr);
2450 	if (IS_ERR(cxlr_dax))
2451 		return PTR_ERR(cxlr_dax);
2452 
2453 	dev = &cxlr_dax->dev;
2454 	rc = dev_set_name(dev, "dax_region%d", cxlr->id);
2455 	if (rc)
2456 		goto err;
2457 
2458 	rc = device_add(dev);
2459 	if (rc)
2460 		goto err;
2461 
2462 	dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
2463 		dev_name(dev));
2464 
2465 	return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister,
2466 					cxlr_dax);
2467 err:
2468 	put_device(dev);
2469 	return rc;
2470 }
2471 
2472 static int match_decoder_by_range(struct device *dev, void *data)
2473 {
2474 	struct range *r1, *r2 = data;
2475 	struct cxl_root_decoder *cxlrd;
2476 
2477 	if (!is_root_decoder(dev))
2478 		return 0;
2479 
2480 	cxlrd = to_cxl_root_decoder(dev);
2481 	r1 = &cxlrd->cxlsd.cxld.hpa_range;
2482 	return range_contains(r1, r2);
2483 }
2484 
2485 static int match_region_by_range(struct device *dev, void *data)
2486 {
2487 	struct cxl_region_params *p;
2488 	struct cxl_region *cxlr;
2489 	struct range *r = data;
2490 	int rc = 0;
2491 
2492 	if (!is_cxl_region(dev))
2493 		return 0;
2494 
2495 	cxlr = to_cxl_region(dev);
2496 	p = &cxlr->params;
2497 
2498 	down_read(&cxl_region_rwsem);
2499 	if (p->res && p->res->start == r->start && p->res->end == r->end)
2500 		rc = 1;
2501 	up_read(&cxl_region_rwsem);
2502 
2503 	return rc;
2504 }
2505 
2506 /* Establish an empty region covering the given HPA range */
2507 static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
2508 					   struct cxl_endpoint_decoder *cxled)
2509 {
2510 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
2511 	struct cxl_port *port = cxlrd_to_port(cxlrd);
2512 	struct range *hpa = &cxled->cxld.hpa_range;
2513 	struct cxl_region_params *p;
2514 	struct cxl_region *cxlr;
2515 	struct resource *res;
2516 	int rc;
2517 
2518 	do {
2519 		cxlr = __create_region(cxlrd, cxled->mode,
2520 				       atomic_read(&cxlrd->region_id));
2521 	} while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
2522 
2523 	if (IS_ERR(cxlr)) {
2524 		dev_err(cxlmd->dev.parent,
2525 			"%s:%s: %s failed assign region: %ld\n",
2526 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
2527 			__func__, PTR_ERR(cxlr));
2528 		return cxlr;
2529 	}
2530 
2531 	down_write(&cxl_region_rwsem);
2532 	p = &cxlr->params;
2533 	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
2534 		dev_err(cxlmd->dev.parent,
2535 			"%s:%s: %s autodiscovery interrupted\n",
2536 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
2537 			__func__);
2538 		rc = -EBUSY;
2539 		goto err;
2540 	}
2541 
2542 	set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
2543 
2544 	res = kmalloc(sizeof(*res), GFP_KERNEL);
2545 	if (!res) {
2546 		rc = -ENOMEM;
2547 		goto err;
2548 	}
2549 
2550 	*res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
2551 				    dev_name(&cxlr->dev));
2552 	rc = insert_resource(cxlrd->res, res);
2553 	if (rc) {
2554 		/*
2555 		 * Platform-firmware may not have split resources like "System
2556 		 * RAM" on CXL window boundaries see cxl_region_iomem_release()
2557 		 */
2558 		dev_warn(cxlmd->dev.parent,
2559 			 "%s:%s: %s %s cannot insert resource\n",
2560 			 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
2561 			 __func__, dev_name(&cxlr->dev));
2562 	}
2563 
2564 	p->res = res;
2565 	p->interleave_ways = cxled->cxld.interleave_ways;
2566 	p->interleave_granularity = cxled->cxld.interleave_granularity;
2567 	p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
2568 
2569 	rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
2570 	if (rc)
2571 		goto err;
2572 
2573 	dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n",
2574 		dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__,
2575 		dev_name(&cxlr->dev), p->res, p->interleave_ways,
2576 		p->interleave_granularity);
2577 
2578 	/* ...to match put_device() in cxl_add_to_region() */
2579 	get_device(&cxlr->dev);
2580 	up_write(&cxl_region_rwsem);
2581 
2582 	return cxlr;
2583 
2584 err:
2585 	up_write(&cxl_region_rwsem);
2586 	devm_release_action(port->uport, unregister_region, cxlr);
2587 	return ERR_PTR(rc);
2588 }
2589 
2590 int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
2591 {
2592 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
2593 	struct range *hpa = &cxled->cxld.hpa_range;
2594 	struct cxl_decoder *cxld = &cxled->cxld;
2595 	struct device *cxlrd_dev, *region_dev;
2596 	struct cxl_root_decoder *cxlrd;
2597 	struct cxl_region_params *p;
2598 	struct cxl_region *cxlr;
2599 	bool attach = false;
2600 	int rc;
2601 
2602 	cxlrd_dev = device_find_child(&root->dev, &cxld->hpa_range,
2603 				      match_decoder_by_range);
2604 	if (!cxlrd_dev) {
2605 		dev_err(cxlmd->dev.parent,
2606 			"%s:%s no CXL window for range %#llx:%#llx\n",
2607 			dev_name(&cxlmd->dev), dev_name(&cxld->dev),
2608 			cxld->hpa_range.start, cxld->hpa_range.end);
2609 		return -ENXIO;
2610 	}
2611 
2612 	cxlrd = to_cxl_root_decoder(cxlrd_dev);
2613 
2614 	/*
2615 	 * Ensure that if multiple threads race to construct_region() for @hpa
2616 	 * one does the construction and the others add to that.
2617 	 */
2618 	mutex_lock(&cxlrd->range_lock);
2619 	region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa,
2620 				       match_region_by_range);
2621 	if (!region_dev) {
2622 		cxlr = construct_region(cxlrd, cxled);
2623 		region_dev = &cxlr->dev;
2624 	} else
2625 		cxlr = to_cxl_region(region_dev);
2626 	mutex_unlock(&cxlrd->range_lock);
2627 
2628 	rc = PTR_ERR_OR_ZERO(cxlr);
2629 	if (rc)
2630 		goto out;
2631 
2632 	attach_target(cxlr, cxled, -1, TASK_UNINTERRUPTIBLE);
2633 
2634 	down_read(&cxl_region_rwsem);
2635 	p = &cxlr->params;
2636 	attach = p->state == CXL_CONFIG_COMMIT;
2637 	up_read(&cxl_region_rwsem);
2638 
2639 	if (attach) {
2640 		/*
2641 		 * If device_attach() fails the range may still be active via
2642 		 * the platform-firmware memory map, otherwise the driver for
2643 		 * regions is local to this file, so driver matching can't fail.
2644 		 */
2645 		if (device_attach(&cxlr->dev) < 0)
2646 			dev_err(&cxlr->dev, "failed to enable, range: %pr\n",
2647 				p->res);
2648 	}
2649 
2650 	put_device(region_dev);
2651 out:
2652 	put_device(cxlrd_dev);
2653 	return rc;
2654 }
2655 EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, CXL);
2656 
2657 static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
2658 {
2659 	if (!test_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags))
2660 		return 0;
2661 
2662 	if (!cpu_cache_has_invalidate_memregion()) {
2663 		if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) {
2664 			dev_warn_once(
2665 				&cxlr->dev,
2666 				"Bypassing cpu_cache_invalidate_memregion() for testing!\n");
2667 			clear_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags);
2668 			return 0;
2669 		} else {
2670 			dev_err(&cxlr->dev,
2671 				"Failed to synchronize CPU cache state\n");
2672 			return -ENXIO;
2673 		}
2674 	}
2675 
2676 	cpu_cache_invalidate_memregion(IORES_DESC_CXL);
2677 	clear_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags);
2678 	return 0;
2679 }
2680 
2681 static int is_system_ram(struct resource *res, void *arg)
2682 {
2683 	struct cxl_region *cxlr = arg;
2684 	struct cxl_region_params *p = &cxlr->params;
2685 
2686 	dev_dbg(&cxlr->dev, "%pr has System RAM: %pr\n", p->res, res);
2687 	return 1;
2688 }
2689 
2690 static int cxl_region_probe(struct device *dev)
2691 {
2692 	struct cxl_region *cxlr = to_cxl_region(dev);
2693 	struct cxl_region_params *p = &cxlr->params;
2694 	int rc;
2695 
2696 	rc = down_read_interruptible(&cxl_region_rwsem);
2697 	if (rc) {
2698 		dev_dbg(&cxlr->dev, "probe interrupted\n");
2699 		return rc;
2700 	}
2701 
2702 	if (p->state < CXL_CONFIG_COMMIT) {
2703 		dev_dbg(&cxlr->dev, "config state: %d\n", p->state);
2704 		rc = -ENXIO;
2705 		goto out;
2706 	}
2707 
2708 	rc = cxl_region_invalidate_memregion(cxlr);
2709 
2710 	/*
2711 	 * From this point on any path that changes the region's state away from
2712 	 * CXL_CONFIG_COMMIT is also responsible for releasing the driver.
2713 	 */
2714 out:
2715 	up_read(&cxl_region_rwsem);
2716 
2717 	if (rc)
2718 		return rc;
2719 
2720 	switch (cxlr->mode) {
2721 	case CXL_DECODER_PMEM:
2722 		return devm_cxl_add_pmem_region(cxlr);
2723 	case CXL_DECODER_RAM:
2724 		/*
2725 		 * The region can not be manged by CXL if any portion of
2726 		 * it is already online as 'System RAM'
2727 		 */
2728 		if (walk_iomem_res_desc(IORES_DESC_NONE,
2729 					IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
2730 					p->res->start, p->res->end, cxlr,
2731 					is_system_ram) > 0)
2732 			return 0;
2733 		return devm_cxl_add_dax_region(cxlr);
2734 	default:
2735 		dev_dbg(&cxlr->dev, "unsupported region mode: %d\n",
2736 			cxlr->mode);
2737 		return -ENXIO;
2738 	}
2739 }
2740 
2741 static struct cxl_driver cxl_region_driver = {
2742 	.name = "cxl_region",
2743 	.probe = cxl_region_probe,
2744 	.id = CXL_DEVICE_REGION,
2745 };
2746 
2747 int cxl_region_init(void)
2748 {
2749 	return cxl_driver_register(&cxl_region_driver);
2750 }
2751 
2752 void cxl_region_exit(void)
2753 {
2754 	cxl_driver_unregister(&cxl_region_driver);
2755 }
2756 
2757 MODULE_IMPORT_NS(CXL);
2758 MODULE_IMPORT_NS(DEVMEM);
2759 MODULE_ALIAS_CXL(CXL_DEVICE_REGION);
2760