xref: /openbmc/linux/kernel/irq/msi.c (revision 17cde5e6)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2014 Intel Corp.
4  * Author: Jiang Liu <jiang.liu@linux.intel.com>
5  *
6  * This file is licensed under GPLv2.
7  *
8  * This file contains common code to support Message Signaled Interrupts for
9  * PCI compatible and non PCI compatible devices.
10  */
11 #include <linux/types.h>
12 #include <linux/device.h>
13 #include <linux/irq.h>
14 #include <linux/irqdomain.h>
15 #include <linux/msi.h>
16 #include <linux/slab.h>
17 #include <linux/sysfs.h>
18 #include <linux/pci.h>
19 
20 #include "internals.h"
21 
22 /**
23  * struct msi_ctrl - MSI internal management control structure
24  * @domid:	ID of the domain on which management operations should be done
25  * @first:	First (hardware) slot index to operate on
26  * @last:	Last (hardware) slot index to operate on
27  * @nirqs:	The number of Linux interrupts to allocate. Can be larger
28  *		than the range due to PCI/multi-MSI.
29  */
30 struct msi_ctrl {
31 	unsigned int			domid;
32 	unsigned int			first;
33 	unsigned int			last;
34 	unsigned int			nirqs;
35 };
36 
37 /* Invalid Xarray index which is outside of any searchable range */
38 #define MSI_XA_MAX_INDEX	(ULONG_MAX - 1)
39 /* The maximum domain size */
40 #define MSI_XA_DOMAIN_SIZE	(MSI_MAX_INDEX + 1)
41 
42 static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl);
43 static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid);
44 static inline int msi_sysfs_create_group(struct device *dev);
45 
46 
47 /**
48  * msi_alloc_desc - Allocate an initialized msi_desc
49  * @dev:	Pointer to the device for which this is allocated
50  * @nvec:	The number of vectors used in this entry
51  * @affinity:	Optional pointer to an affinity mask array size of @nvec
52  *
53  * If @affinity is not %NULL then an affinity array[@nvec] is allocated
54  * and the affinity masks and flags from @affinity are copied.
55  *
56  * Return: pointer to allocated &msi_desc on success or %NULL on failure
57  */
58 static struct msi_desc *msi_alloc_desc(struct device *dev, int nvec,
59 				       const struct irq_affinity_desc *affinity)
60 {
61 	struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
62 
63 	if (!desc)
64 		return NULL;
65 
66 	desc->dev = dev;
67 	desc->nvec_used = nvec;
68 	if (affinity) {
69 		desc->affinity = kmemdup(affinity, nvec * sizeof(*desc->affinity), GFP_KERNEL);
70 		if (!desc->affinity) {
71 			kfree(desc);
72 			return NULL;
73 		}
74 	}
75 	return desc;
76 }
77 
78 static void msi_free_desc(struct msi_desc *desc)
79 {
80 	kfree(desc->affinity);
81 	kfree(desc);
82 }
83 
84 static int msi_insert_desc(struct device *dev, struct msi_desc *desc,
85 			   unsigned int domid, unsigned int index)
86 {
87 	struct msi_device_data *md = dev->msi.data;
88 	struct xarray *xa = &md->__domains[domid].store;
89 	unsigned int hwsize;
90 	int ret;
91 
92 	hwsize = msi_domain_get_hwsize(dev, domid);
93 
94 	if (index == MSI_ANY_INDEX) {
95 		struct xa_limit limit = { .min = 0, .max = hwsize - 1 };
96 		unsigned int index;
97 
98 		/* Let the xarray allocate a free index within the limit */
99 		ret = xa_alloc(xa, &index, desc, limit, GFP_KERNEL);
100 		if (ret)
101 			goto fail;
102 
103 		desc->msi_index = index;
104 		return 0;
105 	} else {
106 		if (index >= hwsize) {
107 			ret = -ERANGE;
108 			goto fail;
109 		}
110 
111 		desc->msi_index = index;
112 		ret = xa_insert(xa, index, desc, GFP_KERNEL);
113 		if (ret)
114 			goto fail;
115 		return 0;
116 	}
117 fail:
118 	msi_free_desc(desc);
119 	return ret;
120 }
121 
122 /**
123  * msi_domain_insert_msi_desc - Allocate and initialize a MSI descriptor and
124  *				insert it at @init_desc->msi_index
125  *
126  * @dev:	Pointer to the device for which the descriptor is allocated
127  * @domid:	The id of the interrupt domain to which the desriptor is added
128  * @init_desc:	Pointer to an MSI descriptor to initialize the new descriptor
129  *
130  * Return: 0 on success or an appropriate failure code.
131  */
132 int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid,
133 			       struct msi_desc *init_desc)
134 {
135 	struct msi_desc *desc;
136 
137 	lockdep_assert_held(&dev->msi.data->mutex);
138 
139 	desc = msi_alloc_desc(dev, init_desc->nvec_used, init_desc->affinity);
140 	if (!desc)
141 		return -ENOMEM;
142 
143 	/* Copy type specific data to the new descriptor. */
144 	desc->pci = init_desc->pci;
145 
146 	return msi_insert_desc(dev, desc, domid, init_desc->msi_index);
147 }
148 
149 static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter)
150 {
151 	switch (filter) {
152 	case MSI_DESC_ALL:
153 		return true;
154 	case MSI_DESC_NOTASSOCIATED:
155 		return !desc->irq;
156 	case MSI_DESC_ASSOCIATED:
157 		return !!desc->irq;
158 	}
159 	WARN_ON_ONCE(1);
160 	return false;
161 }
162 
163 static bool msi_ctrl_valid(struct device *dev, struct msi_ctrl *ctrl)
164 {
165 	unsigned int hwsize;
166 
167 	if (WARN_ON_ONCE(ctrl->domid >= MSI_MAX_DEVICE_IRQDOMAINS ||
168 			 (dev->msi.domain &&
169 			  !dev->msi.data->__domains[ctrl->domid].domain)))
170 		return false;
171 
172 	hwsize = msi_domain_get_hwsize(dev, ctrl->domid);
173 	if (WARN_ON_ONCE(ctrl->first > ctrl->last ||
174 			 ctrl->first >= hwsize ||
175 			 ctrl->last >= hwsize))
176 		return false;
177 	return true;
178 }
179 
180 static void msi_domain_free_descs(struct device *dev, struct msi_ctrl *ctrl)
181 {
182 	struct msi_desc *desc;
183 	struct xarray *xa;
184 	unsigned long idx;
185 
186 	lockdep_assert_held(&dev->msi.data->mutex);
187 
188 	if (!msi_ctrl_valid(dev, ctrl))
189 		return;
190 
191 	xa = &dev->msi.data->__domains[ctrl->domid].store;
192 	xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
193 		xa_erase(xa, idx);
194 
195 		/* Leak the descriptor when it is still referenced */
196 		if (WARN_ON_ONCE(msi_desc_match(desc, MSI_DESC_ASSOCIATED)))
197 			continue;
198 		msi_free_desc(desc);
199 	}
200 }
201 
202 /**
203  * msi_domain_free_msi_descs_range - Free a range of MSI descriptors of a device in an irqdomain
204  * @dev:	Device for which to free the descriptors
205  * @domid:	Id of the domain to operate on
206  * @first:	Index to start freeing from (inclusive)
207  * @last:	Last index to be freed (inclusive)
208  */
209 void msi_domain_free_msi_descs_range(struct device *dev, unsigned int domid,
210 				     unsigned int first, unsigned int last)
211 {
212 	struct msi_ctrl ctrl = {
213 		.domid	= domid,
214 		.first	= first,
215 		.last	= last,
216 	};
217 
218 	msi_domain_free_descs(dev, &ctrl);
219 }
220 
221 /**
222  * msi_domain_add_simple_msi_descs - Allocate and initialize MSI descriptors
223  * @dev:	Pointer to the device for which the descriptors are allocated
224  * @ctrl:	Allocation control struct
225  *
226  * Return: 0 on success or an appropriate failure code.
227  */
228 static int msi_domain_add_simple_msi_descs(struct device *dev, struct msi_ctrl *ctrl)
229 {
230 	struct msi_desc *desc;
231 	unsigned int idx;
232 	int ret;
233 
234 	lockdep_assert_held(&dev->msi.data->mutex);
235 
236 	if (!msi_ctrl_valid(dev, ctrl))
237 		return -EINVAL;
238 
239 	for (idx = ctrl->first; idx <= ctrl->last; idx++) {
240 		desc = msi_alloc_desc(dev, 1, NULL);
241 		if (!desc)
242 			goto fail_mem;
243 		ret = msi_insert_desc(dev, desc, ctrl->domid, idx);
244 		if (ret)
245 			goto fail;
246 	}
247 	return 0;
248 
249 fail_mem:
250 	ret = -ENOMEM;
251 fail:
252 	msi_domain_free_descs(dev, ctrl);
253 	return ret;
254 }
255 
256 void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
257 {
258 	*msg = entry->msg;
259 }
260 
261 void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
262 {
263 	struct msi_desc *entry = irq_get_msi_desc(irq);
264 
265 	__get_cached_msi_msg(entry, msg);
266 }
267 EXPORT_SYMBOL_GPL(get_cached_msi_msg);
268 
269 static void msi_device_data_release(struct device *dev, void *res)
270 {
271 	struct msi_device_data *md = res;
272 	int i;
273 
274 	for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) {
275 		msi_remove_device_irq_domain(dev, i);
276 		WARN_ON_ONCE(!xa_empty(&md->__domains[i].store));
277 		xa_destroy(&md->__domains[i].store);
278 	}
279 	dev->msi.data = NULL;
280 }
281 
282 /**
283  * msi_setup_device_data - Setup MSI device data
284  * @dev:	Device for which MSI device data should be set up
285  *
286  * Return: 0 on success, appropriate error code otherwise
287  *
288  * This can be called more than once for @dev. If the MSI device data is
289  * already allocated the call succeeds. The allocated memory is
290  * automatically released when the device is destroyed.
291  */
292 int msi_setup_device_data(struct device *dev)
293 {
294 	struct msi_device_data *md;
295 	int ret, i;
296 
297 	if (dev->msi.data)
298 		return 0;
299 
300 	md = devres_alloc(msi_device_data_release, sizeof(*md), GFP_KERNEL);
301 	if (!md)
302 		return -ENOMEM;
303 
304 	ret = msi_sysfs_create_group(dev);
305 	if (ret) {
306 		devres_free(md);
307 		return ret;
308 	}
309 
310 	for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++)
311 		xa_init_flags(&md->__domains[i].store, XA_FLAGS_ALLOC);
312 
313 	/*
314 	 * If @dev::msi::domain is set and is a global MSI domain, copy the
315 	 * pointer into the domain array so all code can operate on domain
316 	 * ids. The NULL pointer check is required to keep the legacy
317 	 * architecture specific PCI/MSI support working.
318 	 */
319 	if (dev->msi.domain && !irq_domain_is_msi_parent(dev->msi.domain))
320 		md->__domains[MSI_DEFAULT_DOMAIN].domain = dev->msi.domain;
321 
322 	mutex_init(&md->mutex);
323 	dev->msi.data = md;
324 	devres_add(dev, md);
325 	return 0;
326 }
327 
328 /**
329  * msi_lock_descs - Lock the MSI descriptor storage of a device
330  * @dev:	Device to operate on
331  */
332 void msi_lock_descs(struct device *dev)
333 {
334 	mutex_lock(&dev->msi.data->mutex);
335 }
336 EXPORT_SYMBOL_GPL(msi_lock_descs);
337 
338 /**
339  * msi_unlock_descs - Unlock the MSI descriptor storage of a device
340  * @dev:	Device to operate on
341  */
342 void msi_unlock_descs(struct device *dev)
343 {
344 	/* Invalidate the index which was cached by the iterator */
345 	dev->msi.data->__iter_idx = MSI_XA_MAX_INDEX;
346 	mutex_unlock(&dev->msi.data->mutex);
347 }
348 EXPORT_SYMBOL_GPL(msi_unlock_descs);
349 
350 static struct msi_desc *msi_find_desc(struct msi_device_data *md, unsigned int domid,
351 				      enum msi_desc_filter filter)
352 {
353 	struct xarray *xa = &md->__domains[domid].store;
354 	struct msi_desc *desc;
355 
356 	xa_for_each_start(xa, md->__iter_idx, desc, md->__iter_idx) {
357 		if (msi_desc_match(desc, filter))
358 			return desc;
359 	}
360 	md->__iter_idx = MSI_XA_MAX_INDEX;
361 	return NULL;
362 }
363 
364 /**
365  * msi_domain_first_desc - Get the first MSI descriptor of an irqdomain associated to a device
366  * @dev:	Device to operate on
367  * @domid:	The id of the interrupt domain which should be walked.
368  * @filter:	Descriptor state filter
369  *
370  * Must be called with the MSI descriptor mutex held, i.e. msi_lock_descs()
371  * must be invoked before the call.
372  *
373  * Return: Pointer to the first MSI descriptor matching the search
374  *	   criteria, NULL if none found.
375  */
376 struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid,
377 				       enum msi_desc_filter filter)
378 {
379 	struct msi_device_data *md = dev->msi.data;
380 
381 	if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
382 		return NULL;
383 
384 	lockdep_assert_held(&md->mutex);
385 
386 	md->__iter_idx = 0;
387 	return msi_find_desc(md, domid, filter);
388 }
389 EXPORT_SYMBOL_GPL(msi_domain_first_desc);
390 
391 /**
392  * msi_next_desc - Get the next MSI descriptor of a device
393  * @dev:	Device to operate on
394  * @domid:	The id of the interrupt domain which should be walked.
395  * @filter:	Descriptor state filter
396  *
397  * The first invocation of msi_next_desc() has to be preceeded by a
398  * successful invocation of __msi_first_desc(). Consecutive invocations are
399  * only valid if the previous one was successful. All these operations have
400  * to be done within the same MSI mutex held region.
401  *
402  * Return: Pointer to the next MSI descriptor matching the search
403  *	   criteria, NULL if none found.
404  */
405 struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid,
406 			       enum msi_desc_filter filter)
407 {
408 	struct msi_device_data *md = dev->msi.data;
409 
410 	if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
411 		return NULL;
412 
413 	lockdep_assert_held(&md->mutex);
414 
415 	if (md->__iter_idx >= (unsigned long)MSI_MAX_INDEX)
416 		return NULL;
417 
418 	md->__iter_idx++;
419 	return msi_find_desc(md, domid, filter);
420 }
421 EXPORT_SYMBOL_GPL(msi_next_desc);
422 
423 /**
424  * msi_domain_get_virq - Lookup the Linux interrupt number for a MSI index on a interrupt domain
425  * @dev:	Device to operate on
426  * @domid:	Domain ID of the interrupt domain associated to the device
427  * @index:	MSI interrupt index to look for (0-based)
428  *
429  * Return: The Linux interrupt number on success (> 0), 0 if not found
430  */
431 unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigned int index)
432 {
433 	struct msi_desc *desc;
434 	unsigned int ret = 0;
435 	bool pcimsi = false;
436 	struct xarray *xa;
437 
438 	if (!dev->msi.data)
439 		return 0;
440 
441 	if (WARN_ON_ONCE(index > MSI_MAX_INDEX || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
442 		return 0;
443 
444 	/* This check is only valid for the PCI default MSI domain */
445 	if (dev_is_pci(dev) && domid == MSI_DEFAULT_DOMAIN)
446 		pcimsi = to_pci_dev(dev)->msi_enabled;
447 
448 	msi_lock_descs(dev);
449 	xa = &dev->msi.data->__domains[domid].store;
450 	desc = xa_load(xa, pcimsi ? 0 : index);
451 	if (desc && desc->irq) {
452 		/*
453 		 * PCI-MSI has only one descriptor for multiple interrupts.
454 		 * PCI-MSIX and platform MSI use a descriptor per
455 		 * interrupt.
456 		 */
457 		if (pcimsi) {
458 			if (index < desc->nvec_used)
459 				ret = desc->irq + index;
460 		} else {
461 			ret = desc->irq;
462 		}
463 	}
464 
465 	msi_unlock_descs(dev);
466 	return ret;
467 }
468 EXPORT_SYMBOL_GPL(msi_domain_get_virq);
469 
470 #ifdef CONFIG_SYSFS
471 static struct attribute *msi_dev_attrs[] = {
472 	NULL
473 };
474 
475 static const struct attribute_group msi_irqs_group = {
476 	.name	= "msi_irqs",
477 	.attrs	= msi_dev_attrs,
478 };
479 
480 static inline int msi_sysfs_create_group(struct device *dev)
481 {
482 	return devm_device_add_group(dev, &msi_irqs_group);
483 }
484 
485 static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr,
486 			     char *buf)
487 {
488 	/* MSI vs. MSIX is per device not per interrupt */
489 	bool is_msix = dev_is_pci(dev) ? to_pci_dev(dev)->msix_enabled : false;
490 
491 	return sysfs_emit(buf, "%s\n", is_msix ? "msix" : "msi");
492 }
493 
494 static void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc)
495 {
496 	struct device_attribute *attrs = desc->sysfs_attrs;
497 	int i;
498 
499 	if (!attrs)
500 		return;
501 
502 	desc->sysfs_attrs = NULL;
503 	for (i = 0; i < desc->nvec_used; i++) {
504 		if (attrs[i].show)
505 			sysfs_remove_file_from_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name);
506 		kfree(attrs[i].attr.name);
507 	}
508 	kfree(attrs);
509 }
510 
511 static int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc)
512 {
513 	struct device_attribute *attrs;
514 	int ret, i;
515 
516 	attrs = kcalloc(desc->nvec_used, sizeof(*attrs), GFP_KERNEL);
517 	if (!attrs)
518 		return -ENOMEM;
519 
520 	desc->sysfs_attrs = attrs;
521 	for (i = 0; i < desc->nvec_used; i++) {
522 		sysfs_attr_init(&attrs[i].attr);
523 		attrs[i].attr.name = kasprintf(GFP_KERNEL, "%d", desc->irq + i);
524 		if (!attrs[i].attr.name) {
525 			ret = -ENOMEM;
526 			goto fail;
527 		}
528 
529 		attrs[i].attr.mode = 0444;
530 		attrs[i].show = msi_mode_show;
531 
532 		ret = sysfs_add_file_to_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name);
533 		if (ret) {
534 			attrs[i].show = NULL;
535 			goto fail;
536 		}
537 	}
538 	return 0;
539 
540 fail:
541 	msi_sysfs_remove_desc(dev, desc);
542 	return ret;
543 }
544 
545 #ifdef CONFIG_PCI_MSI_ARCH_FALLBACKS
546 /**
547  * msi_device_populate_sysfs - Populate msi_irqs sysfs entries for a device
548  * @dev:	The device (PCI, platform etc) which will get sysfs entries
549  */
550 int msi_device_populate_sysfs(struct device *dev)
551 {
552 	struct msi_desc *desc;
553 	int ret;
554 
555 	msi_for_each_desc(desc, dev, MSI_DESC_ASSOCIATED) {
556 		if (desc->sysfs_attrs)
557 			continue;
558 		ret = msi_sysfs_populate_desc(dev, desc);
559 		if (ret)
560 			return ret;
561 	}
562 	return 0;
563 }
564 
565 /**
566  * msi_device_destroy_sysfs - Destroy msi_irqs sysfs entries for a device
567  * @dev:		The device (PCI, platform etc) for which to remove
568  *			sysfs entries
569  */
570 void msi_device_destroy_sysfs(struct device *dev)
571 {
572 	struct msi_desc *desc;
573 
574 	msi_for_each_desc(desc, dev, MSI_DESC_ALL)
575 		msi_sysfs_remove_desc(dev, desc);
576 }
577 #endif /* CONFIG_PCI_MSI_ARCH_FALLBACK */
578 #else /* CONFIG_SYSFS */
579 static inline int msi_sysfs_create_group(struct device *dev) { return 0; }
580 static inline int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc) { return 0; }
581 static inline void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc) { }
582 #endif /* !CONFIG_SYSFS */
583 
584 static struct irq_domain *msi_get_device_domain(struct device *dev, unsigned int domid)
585 {
586 	struct irq_domain *domain;
587 
588 	lockdep_assert_held(&dev->msi.data->mutex);
589 
590 	if (WARN_ON_ONCE(domid >= MSI_MAX_DEVICE_IRQDOMAINS))
591 		return NULL;
592 
593 	domain = dev->msi.data->__domains[domid].domain;
594 	if (!domain)
595 		return NULL;
596 
597 	if (WARN_ON_ONCE(irq_domain_is_msi_parent(domain)))
598 		return NULL;
599 
600 	return domain;
601 }
602 
603 static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid)
604 {
605 	struct msi_domain_info *info;
606 	struct irq_domain *domain;
607 
608 	domain = msi_get_device_domain(dev, domid);
609 	if (domain) {
610 		info = domain->host_data;
611 		return info->hwsize;
612 	}
613 	/* No domain, default to MSI_XA_DOMAIN_SIZE */
614 	return MSI_XA_DOMAIN_SIZE;
615 }
616 
617 static inline void irq_chip_write_msi_msg(struct irq_data *data,
618 					  struct msi_msg *msg)
619 {
620 	data->chip->irq_write_msi_msg(data, msg);
621 }
622 
623 static void msi_check_level(struct irq_domain *domain, struct msi_msg *msg)
624 {
625 	struct msi_domain_info *info = domain->host_data;
626 
627 	/*
628 	 * If the MSI provider has messed with the second message and
629 	 * not advertized that it is level-capable, signal the breakage.
630 	 */
631 	WARN_ON(!((info->flags & MSI_FLAG_LEVEL_CAPABLE) &&
632 		  (info->chip->flags & IRQCHIP_SUPPORTS_LEVEL_MSI)) &&
633 		(msg[1].address_lo || msg[1].address_hi || msg[1].data));
634 }
635 
636 /**
637  * msi_domain_set_affinity - Generic affinity setter function for MSI domains
638  * @irq_data:	The irq data associated to the interrupt
639  * @mask:	The affinity mask to set
640  * @force:	Flag to enforce setting (disable online checks)
641  *
642  * Intended to be used by MSI interrupt controllers which are
643  * implemented with hierarchical domains.
644  *
645  * Return: IRQ_SET_MASK_* result code
646  */
647 int msi_domain_set_affinity(struct irq_data *irq_data,
648 			    const struct cpumask *mask, bool force)
649 {
650 	struct irq_data *parent = irq_data->parent_data;
651 	struct msi_msg msg[2] = { [1] = { }, };
652 	int ret;
653 
654 	ret = parent->chip->irq_set_affinity(parent, mask, force);
655 	if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) {
656 		BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
657 		msi_check_level(irq_data->domain, msg);
658 		irq_chip_write_msi_msg(irq_data, msg);
659 	}
660 
661 	return ret;
662 }
663 
664 static int msi_domain_activate(struct irq_domain *domain,
665 			       struct irq_data *irq_data, bool early)
666 {
667 	struct msi_msg msg[2] = { [1] = { }, };
668 
669 	BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
670 	msi_check_level(irq_data->domain, msg);
671 	irq_chip_write_msi_msg(irq_data, msg);
672 	return 0;
673 }
674 
675 static void msi_domain_deactivate(struct irq_domain *domain,
676 				  struct irq_data *irq_data)
677 {
678 	struct msi_msg msg[2];
679 
680 	memset(msg, 0, sizeof(msg));
681 	irq_chip_write_msi_msg(irq_data, msg);
682 }
683 
684 static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
685 			    unsigned int nr_irqs, void *arg)
686 {
687 	struct msi_domain_info *info = domain->host_data;
688 	struct msi_domain_ops *ops = info->ops;
689 	irq_hw_number_t hwirq = ops->get_hwirq(info, arg);
690 	int i, ret;
691 
692 	if (irq_find_mapping(domain, hwirq) > 0)
693 		return -EEXIST;
694 
695 	if (domain->parent) {
696 		ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
697 		if (ret < 0)
698 			return ret;
699 	}
700 
701 	for (i = 0; i < nr_irqs; i++) {
702 		ret = ops->msi_init(domain, info, virq + i, hwirq + i, arg);
703 		if (ret < 0) {
704 			if (ops->msi_free) {
705 				for (i--; i > 0; i--)
706 					ops->msi_free(domain, info, virq + i);
707 			}
708 			irq_domain_free_irqs_top(domain, virq, nr_irqs);
709 			return ret;
710 		}
711 	}
712 
713 	return 0;
714 }
715 
716 static void msi_domain_free(struct irq_domain *domain, unsigned int virq,
717 			    unsigned int nr_irqs)
718 {
719 	struct msi_domain_info *info = domain->host_data;
720 	int i;
721 
722 	if (info->ops->msi_free) {
723 		for (i = 0; i < nr_irqs; i++)
724 			info->ops->msi_free(domain, info, virq + i);
725 	}
726 	irq_domain_free_irqs_top(domain, virq, nr_irqs);
727 }
728 
729 static const struct irq_domain_ops msi_domain_ops = {
730 	.alloc		= msi_domain_alloc,
731 	.free		= msi_domain_free,
732 	.activate	= msi_domain_activate,
733 	.deactivate	= msi_domain_deactivate,
734 };
735 
736 static irq_hw_number_t msi_domain_ops_get_hwirq(struct msi_domain_info *info,
737 						msi_alloc_info_t *arg)
738 {
739 	return arg->hwirq;
740 }
741 
742 static int msi_domain_ops_prepare(struct irq_domain *domain, struct device *dev,
743 				  int nvec, msi_alloc_info_t *arg)
744 {
745 	memset(arg, 0, sizeof(*arg));
746 	return 0;
747 }
748 
749 static void msi_domain_ops_set_desc(msi_alloc_info_t *arg,
750 				    struct msi_desc *desc)
751 {
752 	arg->desc = desc;
753 }
754 
755 static int msi_domain_ops_init(struct irq_domain *domain,
756 			       struct msi_domain_info *info,
757 			       unsigned int virq, irq_hw_number_t hwirq,
758 			       msi_alloc_info_t *arg)
759 {
760 	irq_domain_set_hwirq_and_chip(domain, virq, hwirq, info->chip,
761 				      info->chip_data);
762 	if (info->handler && info->handler_name) {
763 		__irq_set_handler(virq, info->handler, 0, info->handler_name);
764 		if (info->handler_data)
765 			irq_set_handler_data(virq, info->handler_data);
766 	}
767 	return 0;
768 }
769 
770 static struct msi_domain_ops msi_domain_ops_default = {
771 	.get_hwirq		= msi_domain_ops_get_hwirq,
772 	.msi_init		= msi_domain_ops_init,
773 	.msi_prepare		= msi_domain_ops_prepare,
774 	.set_desc		= msi_domain_ops_set_desc,
775 };
776 
777 static void msi_domain_update_dom_ops(struct msi_domain_info *info)
778 {
779 	struct msi_domain_ops *ops = info->ops;
780 
781 	if (ops == NULL) {
782 		info->ops = &msi_domain_ops_default;
783 		return;
784 	}
785 
786 	if (!(info->flags & MSI_FLAG_USE_DEF_DOM_OPS))
787 		return;
788 
789 	if (ops->get_hwirq == NULL)
790 		ops->get_hwirq = msi_domain_ops_default.get_hwirq;
791 	if (ops->msi_init == NULL)
792 		ops->msi_init = msi_domain_ops_default.msi_init;
793 	if (ops->msi_prepare == NULL)
794 		ops->msi_prepare = msi_domain_ops_default.msi_prepare;
795 	if (ops->set_desc == NULL)
796 		ops->set_desc = msi_domain_ops_default.set_desc;
797 }
798 
799 static void msi_domain_update_chip_ops(struct msi_domain_info *info)
800 {
801 	struct irq_chip *chip = info->chip;
802 
803 	BUG_ON(!chip || !chip->irq_mask || !chip->irq_unmask);
804 	if (!chip->irq_set_affinity)
805 		chip->irq_set_affinity = msi_domain_set_affinity;
806 }
807 
808 static struct irq_domain *__msi_create_irq_domain(struct fwnode_handle *fwnode,
809 						  struct msi_domain_info *info,
810 						  unsigned int flags,
811 						  struct irq_domain *parent)
812 {
813 	struct irq_domain *domain;
814 
815 	if (info->hwsize > MSI_XA_DOMAIN_SIZE)
816 		return NULL;
817 
818 	/*
819 	 * Hardware size 0 is valid for backwards compatibility and for
820 	 * domains which are not backed by a hardware table. Grant the
821 	 * maximum index space.
822 	 */
823 	if (!info->hwsize)
824 		info->hwsize = MSI_XA_DOMAIN_SIZE;
825 
826 	msi_domain_update_dom_ops(info);
827 	if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
828 		msi_domain_update_chip_ops(info);
829 
830 	domain = irq_domain_create_hierarchy(parent, flags | IRQ_DOMAIN_FLAG_MSI, 0,
831 					     fwnode, &msi_domain_ops, info);
832 
833 	if (domain) {
834 		if (!domain->name && info->chip)
835 			domain->name = info->chip->name;
836 		irq_domain_update_bus_token(domain, info->bus_token);
837 	}
838 
839 	return domain;
840 }
841 
842 /**
843  * msi_create_irq_domain - Create an MSI interrupt domain
844  * @fwnode:	Optional fwnode of the interrupt controller
845  * @info:	MSI domain info
846  * @parent:	Parent irq domain
847  *
848  * Return: pointer to the created &struct irq_domain or %NULL on failure
849  */
850 struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
851 					 struct msi_domain_info *info,
852 					 struct irq_domain *parent)
853 {
854 	return __msi_create_irq_domain(fwnode, info, 0, parent);
855 }
856 
857 /**
858  * msi_parent_init_dev_msi_info - Delegate initialization of device MSI info down
859  *				  in the domain hierarchy
860  * @dev:		The device for which the domain should be created
861  * @domain:		The domain in the hierarchy this op is being called on
862  * @msi_parent_domain:	The IRQ_DOMAIN_FLAG_MSI_PARENT domain for the child to
863  *			be created
864  * @msi_child_info:	The MSI domain info of the IRQ_DOMAIN_FLAG_MSI_DEVICE
865  *			domain to be created
866  *
867  * Return: true on success, false otherwise
868  *
869  * This is the most complex problem of per device MSI domains and the
870  * underlying interrupt domain hierarchy:
871  *
872  * The device domain to be initialized requests the broadest feature set
873  * possible and the underlying domain hierarchy puts restrictions on it.
874  *
875  * That's trivial for a simple parent->child relationship, but it gets
876  * interesting with an intermediate domain: root->parent->child.  The
877  * intermediate 'parent' can expand the capabilities which the 'root'
878  * domain is providing. So that creates a classic hen and egg problem:
879  * Which entity is doing the restrictions/expansions?
880  *
881  * One solution is to let the root domain handle the initialization that's
882  * why there is the @domain and the @msi_parent_domain pointer.
883  */
884 bool msi_parent_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
885 				  struct irq_domain *msi_parent_domain,
886 				  struct msi_domain_info *msi_child_info)
887 {
888 	struct irq_domain *parent = domain->parent;
889 
890 	if (WARN_ON_ONCE(!parent || !parent->msi_parent_ops ||
891 			 !parent->msi_parent_ops->init_dev_msi_info))
892 		return false;
893 
894 	return parent->msi_parent_ops->init_dev_msi_info(dev, parent, msi_parent_domain,
895 							 msi_child_info);
896 }
897 
898 /**
899  * msi_create_device_irq_domain - Create a device MSI interrupt domain
900  * @dev:		Pointer to the device
901  * @domid:		Domain id
902  * @template:		MSI domain info bundle used as template
903  * @hwsize:		Maximum number of MSI table entries (0 if unknown or unlimited)
904  * @domain_data:	Optional pointer to domain specific data which is set in
905  *			msi_domain_info::data
906  * @chip_data:		Optional pointer to chip specific data which is set in
907  *			msi_domain_info::chip_data
908  *
909  * Return: True on success, false otherwise
910  *
911  * There is no firmware node required for this interface because the per
912  * device domains are software constructs which are actually closer to the
913  * hardware reality than any firmware can describe them.
914  *
915  * The domain name and the irq chip name for a MSI device domain are
916  * composed by: "$(PREFIX)$(CHIPNAME)-$(DEVNAME)"
917  *
918  * $PREFIX:   Optional prefix provided by the underlying MSI parent domain
919  *	      via msi_parent_ops::prefix. If that pointer is NULL the prefix
920  *	      is empty.
921  * $CHIPNAME: The name of the irq_chip in @template
922  * $DEVNAME:  The name of the device
923  *
924  * This results in understandable chip names and hardware interrupt numbers
925  * in e.g. /proc/interrupts
926  *
927  * PCI-MSI-0000:00:1c.0     0-edge  Parent domain has no prefix
928  * IR-PCI-MSI-0000:00:1c.4  0-edge  Same with interrupt remapping prefix 'IR-'
929  *
930  * IR-PCI-MSIX-0000:3d:00.0 0-edge  Hardware interrupt numbers reflect
931  * IR-PCI-MSIX-0000:3d:00.0 1-edge  the real MSI-X index on that device
932  * IR-PCI-MSIX-0000:3d:00.0 2-edge
933  *
934  * On IMS domains the hardware interrupt number is either a table entry
935  * index or a purely software managed index but it is guaranteed to be
936  * unique.
937  *
938  * The domain pointer is stored in @dev::msi::data::__irqdomains[]. All
939  * subsequent operations on the domain depend on the domain id.
940  *
941  * The domain is automatically freed when the device is removed via devres
942  * in the context of @dev::msi::data freeing, but it can also be
943  * independently removed via @msi_remove_device_irq_domain().
944  */
945 bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
946 				  const struct msi_domain_template *template,
947 				  unsigned int hwsize, void *domain_data,
948 				  void *chip_data)
949 {
950 	struct irq_domain *domain, *parent = dev->msi.domain;
951 	const struct msi_parent_ops *pops;
952 	struct msi_domain_template *bundle;
953 	struct fwnode_handle *fwnode;
954 
955 	if (!irq_domain_is_msi_parent(parent))
956 		return false;
957 
958 	if (domid >= MSI_MAX_DEVICE_IRQDOMAINS)
959 		return false;
960 
961 	bundle = kmemdup(template, sizeof(*bundle), GFP_KERNEL);
962 	if (!bundle)
963 		return false;
964 
965 	bundle->info.hwsize = hwsize;
966 	bundle->info.chip = &bundle->chip;
967 	bundle->info.ops = &bundle->ops;
968 	bundle->info.data = domain_data;
969 	bundle->info.chip_data = chip_data;
970 
971 	pops = parent->msi_parent_ops;
972 	snprintf(bundle->name, sizeof(bundle->name), "%s%s-%s",
973 		 pops->prefix ? : "", bundle->chip.name, dev_name(dev));
974 	bundle->chip.name = bundle->name;
975 
976 	fwnode = irq_domain_alloc_named_fwnode(bundle->name);
977 	if (!fwnode)
978 		goto free_bundle;
979 
980 	if (msi_setup_device_data(dev))
981 		goto free_fwnode;
982 
983 	msi_lock_descs(dev);
984 
985 	if (WARN_ON_ONCE(msi_get_device_domain(dev, domid)))
986 		goto fail;
987 
988 	if (!pops->init_dev_msi_info(dev, parent, parent, &bundle->info))
989 		goto fail;
990 
991 	domain = __msi_create_irq_domain(fwnode, &bundle->info, IRQ_DOMAIN_FLAG_MSI_DEVICE, parent);
992 	if (!domain)
993 		goto fail;
994 
995 	domain->dev = dev;
996 	dev->msi.data->__domains[domid].domain = domain;
997 	msi_unlock_descs(dev);
998 	return true;
999 
1000 fail:
1001 	msi_unlock_descs(dev);
1002 free_fwnode:
1003 	kfree(fwnode);
1004 free_bundle:
1005 	kfree(bundle);
1006 	return false;
1007 }
1008 
1009 /**
1010  * msi_remove_device_irq_domain - Free a device MSI interrupt domain
1011  * @dev:	Pointer to the device
1012  * @domid:	Domain id
1013  */
1014 void msi_remove_device_irq_domain(struct device *dev, unsigned int domid)
1015 {
1016 	struct msi_domain_info *info;
1017 	struct irq_domain *domain;
1018 
1019 	msi_lock_descs(dev);
1020 
1021 	domain = msi_get_device_domain(dev, domid);
1022 
1023 	if (!domain || !irq_domain_is_msi_device(domain))
1024 		goto unlock;
1025 
1026 	dev->msi.data->__domains[domid].domain = NULL;
1027 	info = domain->host_data;
1028 	irq_domain_remove(domain);
1029 	kfree(container_of(info, struct msi_domain_template, info));
1030 
1031 unlock:
1032 	msi_unlock_descs(dev);
1033 }
1034 
1035 /**
1036  * msi_match_device_irq_domain - Match a device irq domain against a bus token
1037  * @dev:	Pointer to the device
1038  * @domid:	Domain id
1039  * @bus_token:	Bus token to match against the domain bus token
1040  *
1041  * Return: True if device domain exists and bus tokens match.
1042  */
1043 bool msi_match_device_irq_domain(struct device *dev, unsigned int domid,
1044 				 enum irq_domain_bus_token bus_token)
1045 {
1046 	struct msi_domain_info *info;
1047 	struct irq_domain *domain;
1048 	bool ret = false;
1049 
1050 	msi_lock_descs(dev);
1051 	domain = msi_get_device_domain(dev, domid);
1052 	if (domain && irq_domain_is_msi_device(domain)) {
1053 		info = domain->host_data;
1054 		ret = info->bus_token == bus_token;
1055 	}
1056 	msi_unlock_descs(dev);
1057 	return ret;
1058 }
1059 
1060 int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
1061 			    int nvec, msi_alloc_info_t *arg)
1062 {
1063 	struct msi_domain_info *info = domain->host_data;
1064 	struct msi_domain_ops *ops = info->ops;
1065 
1066 	return ops->msi_prepare(domain, dev, nvec, arg);
1067 }
1068 
1069 int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
1070 			     int virq_base, int nvec, msi_alloc_info_t *arg)
1071 {
1072 	struct msi_domain_info *info = domain->host_data;
1073 	struct msi_domain_ops *ops = info->ops;
1074 	struct msi_ctrl ctrl = {
1075 		.domid	= MSI_DEFAULT_DOMAIN,
1076 		.first  = virq_base,
1077 		.last	= virq_base + nvec - 1,
1078 	};
1079 	struct msi_desc *desc;
1080 	struct xarray *xa;
1081 	int ret, virq;
1082 
1083 	if (!msi_ctrl_valid(dev, &ctrl))
1084 		return -EINVAL;
1085 
1086 	msi_lock_descs(dev);
1087 	ret = msi_domain_add_simple_msi_descs(dev, &ctrl);
1088 	if (ret)
1089 		goto unlock;
1090 
1091 	xa = &dev->msi.data->__domains[ctrl.domid].store;
1092 
1093 	for (virq = virq_base; virq < virq_base + nvec; virq++) {
1094 		desc = xa_load(xa, virq);
1095 		desc->irq = virq;
1096 
1097 		ops->set_desc(arg, desc);
1098 		ret = irq_domain_alloc_irqs_hierarchy(domain, virq, 1, arg);
1099 		if (ret)
1100 			goto fail;
1101 
1102 		irq_set_msi_desc(virq, desc);
1103 	}
1104 	msi_unlock_descs(dev);
1105 	return 0;
1106 
1107 fail:
1108 	for (--virq; virq >= virq_base; virq--)
1109 		irq_domain_free_irqs_common(domain, virq, 1);
1110 	msi_domain_free_descs(dev, &ctrl);
1111 unlock:
1112 	msi_unlock_descs(dev);
1113 	return ret;
1114 }
1115 
1116 /*
1117  * Carefully check whether the device can use reservation mode. If
1118  * reservation mode is enabled then the early activation will assign a
1119  * dummy vector to the device. If the PCI/MSI device does not support
1120  * masking of the entry then this can result in spurious interrupts when
1121  * the device driver is not absolutely careful. But even then a malfunction
1122  * of the hardware could result in a spurious interrupt on the dummy vector
1123  * and render the device unusable. If the entry can be masked then the core
1124  * logic will prevent the spurious interrupt and reservation mode can be
1125  * used. For now reservation mode is restricted to PCI/MSI.
1126  */
1127 static bool msi_check_reservation_mode(struct irq_domain *domain,
1128 				       struct msi_domain_info *info,
1129 				       struct device *dev)
1130 {
1131 	struct msi_desc *desc;
1132 
1133 	switch(domain->bus_token) {
1134 	case DOMAIN_BUS_PCI_MSI:
1135 	case DOMAIN_BUS_PCI_DEVICE_MSI:
1136 	case DOMAIN_BUS_PCI_DEVICE_MSIX:
1137 	case DOMAIN_BUS_VMD_MSI:
1138 		break;
1139 	default:
1140 		return false;
1141 	}
1142 
1143 	if (!(info->flags & MSI_FLAG_MUST_REACTIVATE))
1144 		return false;
1145 
1146 	if (IS_ENABLED(CONFIG_PCI_MSI) && pci_msi_ignore_mask)
1147 		return false;
1148 
1149 	/*
1150 	 * Checking the first MSI descriptor is sufficient. MSIX supports
1151 	 * masking and MSI does so when the can_mask attribute is set.
1152 	 */
1153 	desc = msi_first_desc(dev, MSI_DESC_ALL);
1154 	return desc->pci.msi_attrib.is_msix || desc->pci.msi_attrib.can_mask;
1155 }
1156 
1157 static int msi_handle_pci_fail(struct irq_domain *domain, struct msi_desc *desc,
1158 			       int allocated)
1159 {
1160 	switch(domain->bus_token) {
1161 	case DOMAIN_BUS_PCI_MSI:
1162 	case DOMAIN_BUS_PCI_DEVICE_MSI:
1163 	case DOMAIN_BUS_PCI_DEVICE_MSIX:
1164 	case DOMAIN_BUS_VMD_MSI:
1165 		if (IS_ENABLED(CONFIG_PCI_MSI))
1166 			break;
1167 		fallthrough;
1168 	default:
1169 		return -ENOSPC;
1170 	}
1171 
1172 	/* Let a failed PCI multi MSI allocation retry */
1173 	if (desc->nvec_used > 1)
1174 		return 1;
1175 
1176 	/* If there was a successful allocation let the caller know */
1177 	return allocated ? allocated : -ENOSPC;
1178 }
1179 
1180 #define VIRQ_CAN_RESERVE	0x01
1181 #define VIRQ_ACTIVATE		0x02
1182 #define VIRQ_NOMASK_QUIRK	0x04
1183 
1184 static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflags)
1185 {
1186 	struct irq_data *irqd = irq_domain_get_irq_data(domain, virq);
1187 	int ret;
1188 
1189 	if (!(vflags & VIRQ_CAN_RESERVE)) {
1190 		irqd_clr_can_reserve(irqd);
1191 		if (vflags & VIRQ_NOMASK_QUIRK)
1192 			irqd_set_msi_nomask_quirk(irqd);
1193 
1194 		/*
1195 		 * If the interrupt is managed but no CPU is available to
1196 		 * service it, shut it down until better times. Note that
1197 		 * we only do this on the !RESERVE path as x86 (the only
1198 		 * architecture using this flag) deals with this in a
1199 		 * different way by using a catch-all vector.
1200 		 */
1201 		if ((vflags & VIRQ_ACTIVATE) &&
1202 		    irqd_affinity_is_managed(irqd) &&
1203 		    !cpumask_intersects(irq_data_get_affinity_mask(irqd),
1204 					cpu_online_mask)) {
1205 			    irqd_set_managed_shutdown(irqd);
1206 			    return 0;
1207 		    }
1208 	}
1209 
1210 	if (!(vflags & VIRQ_ACTIVATE))
1211 		return 0;
1212 
1213 	ret = irq_domain_activate_irq(irqd, vflags & VIRQ_CAN_RESERVE);
1214 	if (ret)
1215 		return ret;
1216 	/*
1217 	 * If the interrupt uses reservation mode, clear the activated bit
1218 	 * so request_irq() will assign the final vector.
1219 	 */
1220 	if (vflags & VIRQ_CAN_RESERVE)
1221 		irqd_clr_activated(irqd);
1222 	return 0;
1223 }
1224 
1225 static int __msi_domain_alloc_irqs(struct device *dev, struct irq_domain *domain,
1226 				   struct msi_ctrl *ctrl)
1227 {
1228 	struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store;
1229 	struct msi_domain_info *info = domain->host_data;
1230 	struct msi_domain_ops *ops = info->ops;
1231 	unsigned int vflags = 0, allocated = 0;
1232 	msi_alloc_info_t arg = { };
1233 	struct msi_desc *desc;
1234 	unsigned long idx;
1235 	int i, ret, virq;
1236 
1237 	ret = msi_domain_prepare_irqs(domain, dev, ctrl->nirqs, &arg);
1238 	if (ret)
1239 		return ret;
1240 
1241 	/*
1242 	 * This flag is set by the PCI layer as we need to activate
1243 	 * the MSI entries before the PCI layer enables MSI in the
1244 	 * card. Otherwise the card latches a random msi message.
1245 	 */
1246 	if (info->flags & MSI_FLAG_ACTIVATE_EARLY)
1247 		vflags |= VIRQ_ACTIVATE;
1248 
1249 	/*
1250 	 * Interrupt can use a reserved vector and will not occupy
1251 	 * a real device vector until the interrupt is requested.
1252 	 */
1253 	if (msi_check_reservation_mode(domain, info, dev)) {
1254 		vflags |= VIRQ_CAN_RESERVE;
1255 		/*
1256 		 * MSI affinity setting requires a special quirk (X86) when
1257 		 * reservation mode is active.
1258 		 */
1259 		if (info->flags & MSI_FLAG_NOMASK_QUIRK)
1260 			vflags |= VIRQ_NOMASK_QUIRK;
1261 	}
1262 
1263 	xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
1264 		if (!msi_desc_match(desc, MSI_DESC_NOTASSOCIATED))
1265 			continue;
1266 
1267 		/* This should return -ECONFUSED... */
1268 		if (WARN_ON_ONCE(allocated >= ctrl->nirqs))
1269 			return -EINVAL;
1270 
1271 		if (ops->prepare_desc)
1272 			ops->prepare_desc(domain, &arg, desc);
1273 
1274 		ops->set_desc(&arg, desc);
1275 
1276 		virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used,
1277 					       dev_to_node(dev), &arg, false,
1278 					       desc->affinity);
1279 		if (virq < 0)
1280 			return msi_handle_pci_fail(domain, desc, allocated);
1281 
1282 		for (i = 0; i < desc->nvec_used; i++) {
1283 			irq_set_msi_desc_off(virq, i, desc);
1284 			irq_debugfs_copy_devname(virq + i, dev);
1285 			ret = msi_init_virq(domain, virq + i, vflags);
1286 			if (ret)
1287 				return ret;
1288 		}
1289 		if (info->flags & MSI_FLAG_DEV_SYSFS) {
1290 			ret = msi_sysfs_populate_desc(dev, desc);
1291 			if (ret)
1292 				return ret;
1293 		}
1294 		allocated++;
1295 	}
1296 	return 0;
1297 }
1298 
1299 static int msi_domain_alloc_simple_msi_descs(struct device *dev,
1300 					     struct msi_domain_info *info,
1301 					     struct msi_ctrl *ctrl)
1302 {
1303 	if (!(info->flags & MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS))
1304 		return 0;
1305 
1306 	return msi_domain_add_simple_msi_descs(dev, ctrl);
1307 }
1308 
1309 static int __msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl)
1310 {
1311 	struct msi_domain_info *info;
1312 	struct msi_domain_ops *ops;
1313 	struct irq_domain *domain;
1314 	int ret;
1315 
1316 	if (!msi_ctrl_valid(dev, ctrl))
1317 		return -EINVAL;
1318 
1319 	domain = msi_get_device_domain(dev, ctrl->domid);
1320 	if (!domain)
1321 		return -ENODEV;
1322 
1323 	info = domain->host_data;
1324 
1325 	ret = msi_domain_alloc_simple_msi_descs(dev, info, ctrl);
1326 	if (ret)
1327 		return ret;
1328 
1329 	ops = info->ops;
1330 	if (ops->domain_alloc_irqs)
1331 		return ops->domain_alloc_irqs(domain, dev, ctrl->nirqs);
1332 
1333 	return __msi_domain_alloc_irqs(dev, domain, ctrl);
1334 }
1335 
1336 static int msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl)
1337 {
1338 	int ret = __msi_domain_alloc_locked(dev, ctrl);
1339 
1340 	if (ret)
1341 		msi_domain_free_locked(dev, ctrl);
1342 	return ret;
1343 }
1344 
1345 /**
1346  * msi_domain_alloc_irqs_range_locked - Allocate interrupts from a MSI interrupt domain
1347  * @dev:	Pointer to device struct of the device for which the interrupts
1348  *		are allocated
1349  * @domid:	Id of the interrupt domain to operate on
1350  * @first:	First index to allocate (inclusive)
1351  * @last:	Last index to allocate (inclusive)
1352  *
1353  * Must be invoked from within a msi_lock_descs() / msi_unlock_descs()
1354  * pair. Use this for MSI irqdomains which implement their own descriptor
1355  * allocation/free.
1356  *
1357  * Return: %0 on success or an error code.
1358  */
1359 int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid,
1360 				       unsigned int first, unsigned int last)
1361 {
1362 	struct msi_ctrl ctrl = {
1363 		.domid	= domid,
1364 		.first	= first,
1365 		.last	= last,
1366 		.nirqs	= last + 1 - first,
1367 	};
1368 
1369 	return msi_domain_alloc_locked(dev, &ctrl);
1370 }
1371 
1372 /**
1373  * msi_domain_alloc_irqs_range - Allocate interrupts from a MSI interrupt domain
1374  * @dev:	Pointer to device struct of the device for which the interrupts
1375  *		are allocated
1376  * @domid:	Id of the interrupt domain to operate on
1377  * @first:	First index to allocate (inclusive)
1378  * @last:	Last index to allocate (inclusive)
1379  *
1380  * Return: %0 on success or an error code.
1381  */
1382 int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid,
1383 				unsigned int first, unsigned int last)
1384 {
1385 	int ret;
1386 
1387 	msi_lock_descs(dev);
1388 	ret = msi_domain_alloc_irqs_range_locked(dev, domid, first, last);
1389 	msi_unlock_descs(dev);
1390 	return ret;
1391 }
1392 
1393 /**
1394  * msi_domain_alloc_irqs_all_locked - Allocate all interrupts from a MSI interrupt domain
1395  *
1396  * @dev:	Pointer to device struct of the device for which the interrupts
1397  *		are allocated
1398  * @domid:	Id of the interrupt domain to operate on
1399  * @nirqs:	The number of interrupts to allocate
1400  *
1401  * This function scans all MSI descriptors of the MSI domain and allocates interrupts
1402  * for all unassigned ones. That function is to be used for MSI domain usage where
1403  * the descriptor allocation is handled at the call site, e.g. PCI/MSI[X].
1404  *
1405  * Return: %0 on success or an error code.
1406  */
1407 int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int nirqs)
1408 {
1409 	struct msi_ctrl ctrl = {
1410 		.domid	= domid,
1411 		.first	= 0,
1412 		.last	= msi_domain_get_hwsize(dev, domid) - 1,
1413 		.nirqs	= nirqs,
1414 	};
1415 
1416 	return msi_domain_alloc_locked(dev, &ctrl);
1417 }
1418 
1419 /**
1420  * msi_domain_alloc_irq_at - Allocate an interrupt from a MSI interrupt domain at
1421  *			     a given index - or at the next free index
1422  *
1423  * @dev:	Pointer to device struct of the device for which the interrupts
1424  *		are allocated
1425  * @domid:	Id of the interrupt domain to operate on
1426  * @index:	Index for allocation. If @index == %MSI_ANY_INDEX the allocation
1427  *		uses the next free index.
1428  * @affdesc:	Optional pointer to an interrupt affinity descriptor structure
1429  * @icookie:	Optional pointer to a domain specific per instance cookie. If
1430  *		non-NULL the content of the cookie is stored in msi_desc::data.
1431  *		Must be NULL for MSI-X allocations
1432  *
1433  * This requires a MSI interrupt domain which lets the core code manage the
1434  * MSI descriptors.
1435  *
1436  * Return: struct msi_map
1437  *
1438  *	On success msi_map::index contains the allocated index number and
1439  *	msi_map::virq the corresponding Linux interrupt number
1440  *
1441  *	On failure msi_map::index contains the error code and msi_map::virq
1442  *	is %0.
1443  */
1444 struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, unsigned int index,
1445 				       const struct irq_affinity_desc *affdesc,
1446 				       union msi_instance_cookie *icookie)
1447 {
1448 	struct msi_ctrl ctrl = { .domid	= domid, .nirqs = 1, };
1449 	struct irq_domain *domain;
1450 	struct msi_map map = { };
1451 	struct msi_desc *desc;
1452 	int ret;
1453 
1454 	msi_lock_descs(dev);
1455 	domain = msi_get_device_domain(dev, domid);
1456 	if (!domain) {
1457 		map.index = -ENODEV;
1458 		goto unlock;
1459 	}
1460 
1461 	desc = msi_alloc_desc(dev, 1, affdesc);
1462 	if (!desc) {
1463 		map.index = -ENOMEM;
1464 		goto unlock;
1465 	}
1466 
1467 	if (icookie)
1468 		desc->data.icookie = *icookie;
1469 
1470 	ret = msi_insert_desc(dev, desc, domid, index);
1471 	if (ret) {
1472 		map.index = ret;
1473 		goto unlock;
1474 	}
1475 
1476 	ctrl.first = ctrl.last = desc->msi_index;
1477 
1478 	ret = __msi_domain_alloc_irqs(dev, domain, &ctrl);
1479 	if (ret) {
1480 		map.index = ret;
1481 		msi_domain_free_locked(dev, &ctrl);
1482 	} else {
1483 		map.index = desc->msi_index;
1484 		map.virq = desc->irq;
1485 	}
1486 unlock:
1487 	msi_unlock_descs(dev);
1488 	return map;
1489 }
1490 
1491 static void __msi_domain_free_irqs(struct device *dev, struct irq_domain *domain,
1492 				   struct msi_ctrl *ctrl)
1493 {
1494 	struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store;
1495 	struct msi_domain_info *info = domain->host_data;
1496 	struct irq_data *irqd;
1497 	struct msi_desc *desc;
1498 	unsigned long idx;
1499 	int i;
1500 
1501 	xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
1502 		/* Only handle MSI entries which have an interrupt associated */
1503 		if (!msi_desc_match(desc, MSI_DESC_ASSOCIATED))
1504 			continue;
1505 
1506 		/* Make sure all interrupts are deactivated */
1507 		for (i = 0; i < desc->nvec_used; i++) {
1508 			irqd = irq_domain_get_irq_data(domain, desc->irq + i);
1509 			if (irqd && irqd_is_activated(irqd))
1510 				irq_domain_deactivate_irq(irqd);
1511 		}
1512 
1513 		irq_domain_free_irqs(desc->irq, desc->nvec_used);
1514 		if (info->flags & MSI_FLAG_DEV_SYSFS)
1515 			msi_sysfs_remove_desc(dev, desc);
1516 		desc->irq = 0;
1517 	}
1518 }
1519 
1520 static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl)
1521 {
1522 	struct msi_domain_info *info;
1523 	struct msi_domain_ops *ops;
1524 	struct irq_domain *domain;
1525 
1526 	if (!msi_ctrl_valid(dev, ctrl))
1527 		return;
1528 
1529 	domain = msi_get_device_domain(dev, ctrl->domid);
1530 	if (!domain)
1531 		return;
1532 
1533 	info = domain->host_data;
1534 	ops = info->ops;
1535 
1536 	if (ops->domain_free_irqs)
1537 		ops->domain_free_irqs(domain, dev);
1538 	else
1539 		__msi_domain_free_irqs(dev, domain, ctrl);
1540 
1541 	if (ops->msi_post_free)
1542 		ops->msi_post_free(domain, dev);
1543 
1544 	if (info->flags & MSI_FLAG_FREE_MSI_DESCS)
1545 		msi_domain_free_descs(dev, ctrl);
1546 }
1547 
1548 /**
1549  * msi_domain_free_irqs_range_locked - Free a range of interrupts from a MSI interrupt domain
1550  *				       associated to @dev with msi_lock held
1551  * @dev:	Pointer to device struct of the device for which the interrupts
1552  *		are freed
1553  * @domid:	Id of the interrupt domain to operate on
1554  * @first:	First index to free (inclusive)
1555  * @last:	Last index to free (inclusive)
1556  */
1557 void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid,
1558 				       unsigned int first, unsigned int last)
1559 {
1560 	struct msi_ctrl ctrl = {
1561 		.domid	= domid,
1562 		.first	= first,
1563 		.last	= last,
1564 	};
1565 	msi_domain_free_locked(dev, &ctrl);
1566 }
1567 
1568 /**
1569  * msi_domain_free_irqs_range - Free a range of interrupts from a MSI interrupt domain
1570  *				associated to @dev
1571  * @dev:	Pointer to device struct of the device for which the interrupts
1572  *		are freed
1573  * @domid:	Id of the interrupt domain to operate on
1574  * @first:	First index to free (inclusive)
1575  * @last:	Last index to free (inclusive)
1576  */
1577 void msi_domain_free_irqs_range(struct device *dev, unsigned int domid,
1578 				unsigned int first, unsigned int last)
1579 {
1580 	msi_lock_descs(dev);
1581 	msi_domain_free_irqs_range_locked(dev, domid, first, last);
1582 	msi_unlock_descs(dev);
1583 }
1584 
1585 /**
1586  * msi_domain_free_irqs_all_locked - Free all interrupts from a MSI interrupt domain
1587  *				     associated to a device
1588  * @dev:	Pointer to device struct of the device for which the interrupts
1589  *		are freed
1590  * @domid:	The id of the domain to operate on
1591  *
1592  * Must be invoked from within a msi_lock_descs() / msi_unlock_descs()
1593  * pair. Use this for MSI irqdomains which implement their own vector
1594  * allocation.
1595  */
1596 void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid)
1597 {
1598 	msi_domain_free_irqs_range_locked(dev, domid, 0,
1599 					  msi_domain_get_hwsize(dev, domid) - 1);
1600 }
1601 
1602 /**
1603  * msi_domain_free_irqs_all - Free all interrupts from a MSI interrupt domain
1604  *			      associated to a device
1605  * @dev:	Pointer to device struct of the device for which the interrupts
1606  *		are freed
1607  * @domid:	The id of the domain to operate on
1608  */
1609 void msi_domain_free_irqs_all(struct device *dev, unsigned int domid)
1610 {
1611 	msi_lock_descs(dev);
1612 	msi_domain_free_irqs_all_locked(dev, domid);
1613 	msi_unlock_descs(dev);
1614 }
1615 
1616 /**
1617  * msi_get_domain_info - Get the MSI interrupt domain info for @domain
1618  * @domain:	The interrupt domain to retrieve data from
1619  *
1620  * Return: the pointer to the msi_domain_info stored in @domain->host_data.
1621  */
1622 struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain)
1623 {
1624 	return (struct msi_domain_info *)domain->host_data;
1625 }
1626 
1627 /**
1628  * msi_device_has_isolated_msi - True if the device has isolated MSI
1629  * @dev: The device to check
1630  *
1631  * Isolated MSI means that HW modeled by an irq_domain on the path from the
1632  * initiating device to the CPU will validate that the MSI message specifies an
1633  * interrupt number that the device is authorized to trigger. This must block
1634  * devices from triggering interrupts they are not authorized to trigger.
1635  * Currently authorization means the MSI vector is one assigned to the device.
1636  *
1637  * This is interesting for securing VFIO use cases where a rouge MSI (eg created
1638  * by abusing a normal PCI MemWr DMA) must not allow the VFIO userspace to
1639  * impact outside its security domain, eg userspace triggering interrupts on
1640  * kernel drivers, a VM triggering interrupts on the hypervisor, or a VM
1641  * triggering interrupts on another VM.
1642  */
1643 bool msi_device_has_isolated_msi(struct device *dev)
1644 {
1645 	struct irq_domain *domain = dev_get_msi_domain(dev);
1646 
1647 	for (; domain; domain = domain->parent)
1648 		if (domain->flags & IRQ_DOMAIN_FLAG_MSI_REMAP)
1649 			return true;
1650 	return false;
1651 }
1652 EXPORT_SYMBOL_GPL(msi_device_has_isolated_msi);
1653