xref: /openbmc/linux/kernel/irq/msi.c (revision df202b452fe6c6d6f1351bad485e2367ef1e644e)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2014 Intel Corp.
4  * Author: Jiang Liu <jiang.liu@linux.intel.com>
5  *
6  * This file is licensed under GPLv2.
7  *
8  * This file contains common code to support Message Signaled Interrupts for
9  * PCI compatible and non PCI compatible devices.
10  */
11 #include <linux/types.h>
12 #include <linux/device.h>
13 #include <linux/irq.h>
14 #include <linux/irqdomain.h>
15 #include <linux/msi.h>
16 #include <linux/slab.h>
17 #include <linux/sysfs.h>
18 #include <linux/pci.h>
19 
20 #include "internals.h"
21 
22 static inline int msi_sysfs_create_group(struct device *dev);
23 
24 /**
25  * msi_alloc_desc - Allocate an initialized msi_desc
26  * @dev:	Pointer to the device for which this is allocated
27  * @nvec:	The number of vectors used in this entry
28  * @affinity:	Optional pointer to an affinity mask array size of @nvec
29  *
30  * If @affinity is not %NULL then an affinity array[@nvec] is allocated
31  * and the affinity masks and flags from @affinity are copied.
32  *
33  * Return: pointer to allocated &msi_desc on success or %NULL on failure
34  */
35 static struct msi_desc *msi_alloc_desc(struct device *dev, int nvec,
36 					const struct irq_affinity_desc *affinity)
37 {
38 	struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
39 
40 	if (!desc)
41 		return NULL;
42 
43 	desc->dev = dev;
44 	desc->nvec_used = nvec;
45 	if (affinity) {
46 		desc->affinity = kmemdup(affinity, nvec * sizeof(*desc->affinity), GFP_KERNEL);
47 		if (!desc->affinity) {
48 			kfree(desc);
49 			return NULL;
50 		}
51 	}
52 	return desc;
53 }
54 
55 static void msi_free_desc(struct msi_desc *desc)
56 {
57 	kfree(desc->affinity);
58 	kfree(desc);
59 }
60 
61 static int msi_insert_desc(struct msi_device_data *md, struct msi_desc *desc, unsigned int index)
62 {
63 	int ret;
64 
65 	desc->msi_index = index;
66 	ret = xa_insert(&md->__store, index, desc, GFP_KERNEL);
67 	if (ret)
68 		msi_free_desc(desc);
69 	return ret;
70 }
71 
72 /**
73  * msi_add_msi_desc - Allocate and initialize a MSI descriptor
74  * @dev:	Pointer to the device for which the descriptor is allocated
75  * @init_desc:	Pointer to an MSI descriptor to initialize the new descriptor
76  *
77  * Return: 0 on success or an appropriate failure code.
78  */
79 int msi_add_msi_desc(struct device *dev, struct msi_desc *init_desc)
80 {
81 	struct msi_desc *desc;
82 
83 	lockdep_assert_held(&dev->msi.data->mutex);
84 
85 	desc = msi_alloc_desc(dev, init_desc->nvec_used, init_desc->affinity);
86 	if (!desc)
87 		return -ENOMEM;
88 
89 	/* Copy type specific data to the new descriptor. */
90 	desc->pci = init_desc->pci;
91 	return msi_insert_desc(dev->msi.data, desc, init_desc->msi_index);
92 }
93 
94 /**
95  * msi_add_simple_msi_descs - Allocate and initialize MSI descriptors
96  * @dev:	Pointer to the device for which the descriptors are allocated
97  * @index:	Index for the first MSI descriptor
98  * @ndesc:	Number of descriptors to allocate
99  *
100  * Return: 0 on success or an appropriate failure code.
101  */
102 static int msi_add_simple_msi_descs(struct device *dev, unsigned int index, unsigned int ndesc)
103 {
104 	unsigned int idx, last = index + ndesc - 1;
105 	struct msi_desc *desc;
106 	int ret;
107 
108 	lockdep_assert_held(&dev->msi.data->mutex);
109 
110 	for (idx = index; idx <= last; idx++) {
111 		desc = msi_alloc_desc(dev, 1, NULL);
112 		if (!desc)
113 			goto fail_mem;
114 		ret = msi_insert_desc(dev->msi.data, desc, idx);
115 		if (ret)
116 			goto fail;
117 	}
118 	return 0;
119 
120 fail_mem:
121 	ret = -ENOMEM;
122 fail:
123 	msi_free_msi_descs_range(dev, MSI_DESC_NOTASSOCIATED, index, last);
124 	return ret;
125 }
126 
127 static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter)
128 {
129 	switch (filter) {
130 	case MSI_DESC_ALL:
131 		return true;
132 	case MSI_DESC_NOTASSOCIATED:
133 		return !desc->irq;
134 	case MSI_DESC_ASSOCIATED:
135 		return !!desc->irq;
136 	}
137 	WARN_ON_ONCE(1);
138 	return false;
139 }
140 
141 /**
142  * msi_free_msi_descs_range - Free MSI descriptors of a device
143  * @dev:		Device to free the descriptors
144  * @filter:		Descriptor state filter
145  * @first_index:	Index to start freeing from
146  * @last_index:		Last index to be freed
147  */
148 void msi_free_msi_descs_range(struct device *dev, enum msi_desc_filter filter,
149 			      unsigned int first_index, unsigned int last_index)
150 {
151 	struct xarray *xa = &dev->msi.data->__store;
152 	struct msi_desc *desc;
153 	unsigned long idx;
154 
155 	lockdep_assert_held(&dev->msi.data->mutex);
156 
157 	xa_for_each_range(xa, idx, desc, first_index, last_index) {
158 		if (msi_desc_match(desc, filter)) {
159 			xa_erase(xa, idx);
160 			msi_free_desc(desc);
161 		}
162 	}
163 }
164 
165 void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
166 {
167 	*msg = entry->msg;
168 }
169 
170 void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
171 {
172 	struct msi_desc *entry = irq_get_msi_desc(irq);
173 
174 	__get_cached_msi_msg(entry, msg);
175 }
176 EXPORT_SYMBOL_GPL(get_cached_msi_msg);
177 
178 static void msi_device_data_release(struct device *dev, void *res)
179 {
180 	struct msi_device_data *md = res;
181 
182 	WARN_ON_ONCE(!xa_empty(&md->__store));
183 	xa_destroy(&md->__store);
184 	dev->msi.data = NULL;
185 }
186 
187 /**
188  * msi_setup_device_data - Setup MSI device data
189  * @dev:	Device for which MSI device data should be set up
190  *
191  * Return: 0 on success, appropriate error code otherwise
192  *
193  * This can be called more than once for @dev. If the MSI device data is
194  * already allocated the call succeeds. The allocated memory is
195  * automatically released when the device is destroyed.
196  */
197 int msi_setup_device_data(struct device *dev)
198 {
199 	struct msi_device_data *md;
200 	int ret;
201 
202 	if (dev->msi.data)
203 		return 0;
204 
205 	md = devres_alloc(msi_device_data_release, sizeof(*md), GFP_KERNEL);
206 	if (!md)
207 		return -ENOMEM;
208 
209 	ret = msi_sysfs_create_group(dev);
210 	if (ret) {
211 		devres_free(md);
212 		return ret;
213 	}
214 
215 	xa_init(&md->__store);
216 	mutex_init(&md->mutex);
217 	dev->msi.data = md;
218 	devres_add(dev, md);
219 	return 0;
220 }
221 
222 /**
223  * msi_lock_descs - Lock the MSI descriptor storage of a device
224  * @dev:	Device to operate on
225  */
226 void msi_lock_descs(struct device *dev)
227 {
228 	mutex_lock(&dev->msi.data->mutex);
229 }
230 EXPORT_SYMBOL_GPL(msi_lock_descs);
231 
232 /**
233  * msi_unlock_descs - Unlock the MSI descriptor storage of a device
234  * @dev:	Device to operate on
235  */
236 void msi_unlock_descs(struct device *dev)
237 {
238 	/* Invalidate the index wich was cached by the iterator */
239 	dev->msi.data->__iter_idx = MSI_MAX_INDEX;
240 	mutex_unlock(&dev->msi.data->mutex);
241 }
242 EXPORT_SYMBOL_GPL(msi_unlock_descs);
243 
244 static struct msi_desc *msi_find_desc(struct msi_device_data *md, enum msi_desc_filter filter)
245 {
246 	struct msi_desc *desc;
247 
248 	xa_for_each_start(&md->__store, md->__iter_idx, desc, md->__iter_idx) {
249 		if (msi_desc_match(desc, filter))
250 			return desc;
251 	}
252 	md->__iter_idx = MSI_MAX_INDEX;
253 	return NULL;
254 }
255 
256 /**
257  * msi_first_desc - Get the first MSI descriptor of a device
258  * @dev:	Device to operate on
259  * @filter:	Descriptor state filter
260  *
261  * Must be called with the MSI descriptor mutex held, i.e. msi_lock_descs()
262  * must be invoked before the call.
263  *
264  * Return: Pointer to the first MSI descriptor matching the search
265  *	   criteria, NULL if none found.
266  */
267 struct msi_desc *msi_first_desc(struct device *dev, enum msi_desc_filter filter)
268 {
269 	struct msi_device_data *md = dev->msi.data;
270 
271 	if (WARN_ON_ONCE(!md))
272 		return NULL;
273 
274 	lockdep_assert_held(&md->mutex);
275 
276 	md->__iter_idx = 0;
277 	return msi_find_desc(md, filter);
278 }
279 EXPORT_SYMBOL_GPL(msi_first_desc);
280 
281 /**
282  * msi_next_desc - Get the next MSI descriptor of a device
283  * @dev:	Device to operate on
284  *
285  * The first invocation of msi_next_desc() has to be preceeded by a
286  * successful invocation of __msi_first_desc(). Consecutive invocations are
287  * only valid if the previous one was successful. All these operations have
288  * to be done within the same MSI mutex held region.
289  *
290  * Return: Pointer to the next MSI descriptor matching the search
291  *	   criteria, NULL if none found.
292  */
293 struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter)
294 {
295 	struct msi_device_data *md = dev->msi.data;
296 
297 	if (WARN_ON_ONCE(!md))
298 		return NULL;
299 
300 	lockdep_assert_held(&md->mutex);
301 
302 	if (md->__iter_idx >= (unsigned long)MSI_MAX_INDEX)
303 		return NULL;
304 
305 	md->__iter_idx++;
306 	return msi_find_desc(md, filter);
307 }
308 EXPORT_SYMBOL_GPL(msi_next_desc);
309 
310 /**
311  * msi_get_virq - Return Linux interrupt number of a MSI interrupt
312  * @dev:	Device to operate on
313  * @index:	MSI interrupt index to look for (0-based)
314  *
315  * Return: The Linux interrupt number on success (> 0), 0 if not found
316  */
317 unsigned int msi_get_virq(struct device *dev, unsigned int index)
318 {
319 	struct msi_desc *desc;
320 	unsigned int ret = 0;
321 	bool pcimsi;
322 
323 	if (!dev->msi.data)
324 		return 0;
325 
326 	pcimsi = dev_is_pci(dev) ? to_pci_dev(dev)->msi_enabled : false;
327 
328 	msi_lock_descs(dev);
329 	desc = xa_load(&dev->msi.data->__store, pcimsi ? 0 : index);
330 	if (desc && desc->irq) {
331 		/*
332 		 * PCI-MSI has only one descriptor for multiple interrupts.
333 		 * PCI-MSIX and platform MSI use a descriptor per
334 		 * interrupt.
335 		 */
336 		if (pcimsi) {
337 			if (index < desc->nvec_used)
338 				ret = desc->irq + index;
339 		} else {
340 			ret = desc->irq;
341 		}
342 	}
343 	msi_unlock_descs(dev);
344 	return ret;
345 }
346 EXPORT_SYMBOL_GPL(msi_get_virq);
347 
348 #ifdef CONFIG_SYSFS
349 static struct attribute *msi_dev_attrs[] = {
350 	NULL
351 };
352 
353 static const struct attribute_group msi_irqs_group = {
354 	.name	= "msi_irqs",
355 	.attrs	= msi_dev_attrs,
356 };
357 
358 static inline int msi_sysfs_create_group(struct device *dev)
359 {
360 	return devm_device_add_group(dev, &msi_irqs_group);
361 }
362 
363 static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr,
364 			     char *buf)
365 {
366 	/* MSI vs. MSIX is per device not per interrupt */
367 	bool is_msix = dev_is_pci(dev) ? to_pci_dev(dev)->msix_enabled : false;
368 
369 	return sysfs_emit(buf, "%s\n", is_msix ? "msix" : "msi");
370 }
371 
372 static void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc)
373 {
374 	struct device_attribute *attrs = desc->sysfs_attrs;
375 	int i;
376 
377 	if (!attrs)
378 		return;
379 
380 	desc->sysfs_attrs = NULL;
381 	for (i = 0; i < desc->nvec_used; i++) {
382 		if (attrs[i].show)
383 			sysfs_remove_file_from_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name);
384 		kfree(attrs[i].attr.name);
385 	}
386 	kfree(attrs);
387 }
388 
389 static int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc)
390 {
391 	struct device_attribute *attrs;
392 	int ret, i;
393 
394 	attrs = kcalloc(desc->nvec_used, sizeof(*attrs), GFP_KERNEL);
395 	if (!attrs)
396 		return -ENOMEM;
397 
398 	desc->sysfs_attrs = attrs;
399 	for (i = 0; i < desc->nvec_used; i++) {
400 		sysfs_attr_init(&attrs[i].attr);
401 		attrs[i].attr.name = kasprintf(GFP_KERNEL, "%d", desc->irq + i);
402 		if (!attrs[i].attr.name) {
403 			ret = -ENOMEM;
404 			goto fail;
405 		}
406 
407 		attrs[i].attr.mode = 0444;
408 		attrs[i].show = msi_mode_show;
409 
410 		ret = sysfs_add_file_to_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name);
411 		if (ret) {
412 			attrs[i].show = NULL;
413 			goto fail;
414 		}
415 	}
416 	return 0;
417 
418 fail:
419 	msi_sysfs_remove_desc(dev, desc);
420 	return ret;
421 }
422 
423 #ifdef CONFIG_PCI_MSI_ARCH_FALLBACKS
424 /**
425  * msi_device_populate_sysfs - Populate msi_irqs sysfs entries for a device
426  * @dev:	The device (PCI, platform etc) which will get sysfs entries
427  */
428 int msi_device_populate_sysfs(struct device *dev)
429 {
430 	struct msi_desc *desc;
431 	int ret;
432 
433 	msi_for_each_desc(desc, dev, MSI_DESC_ASSOCIATED) {
434 		if (desc->sysfs_attrs)
435 			continue;
436 		ret = msi_sysfs_populate_desc(dev, desc);
437 		if (ret)
438 			return ret;
439 	}
440 	return 0;
441 }
442 
443 /**
444  * msi_device_destroy_sysfs - Destroy msi_irqs sysfs entries for a device
445  * @dev:		The device (PCI, platform etc) for which to remove
446  *			sysfs entries
447  */
448 void msi_device_destroy_sysfs(struct device *dev)
449 {
450 	struct msi_desc *desc;
451 
452 	msi_for_each_desc(desc, dev, MSI_DESC_ALL)
453 		msi_sysfs_remove_desc(dev, desc);
454 }
455 #endif /* CONFIG_PCI_MSI_ARCH_FALLBACK */
456 #else /* CONFIG_SYSFS */
457 static inline int msi_sysfs_create_group(struct device *dev) { return 0; }
458 static inline int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc) { return 0; }
459 static inline void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc) { }
460 #endif /* !CONFIG_SYSFS */
461 
462 #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
463 static inline void irq_chip_write_msi_msg(struct irq_data *data,
464 					  struct msi_msg *msg)
465 {
466 	data->chip->irq_write_msi_msg(data, msg);
467 }
468 
469 static void msi_check_level(struct irq_domain *domain, struct msi_msg *msg)
470 {
471 	struct msi_domain_info *info = domain->host_data;
472 
473 	/*
474 	 * If the MSI provider has messed with the second message and
475 	 * not advertized that it is level-capable, signal the breakage.
476 	 */
477 	WARN_ON(!((info->flags & MSI_FLAG_LEVEL_CAPABLE) &&
478 		  (info->chip->flags & IRQCHIP_SUPPORTS_LEVEL_MSI)) &&
479 		(msg[1].address_lo || msg[1].address_hi || msg[1].data));
480 }
481 
482 /**
483  * msi_domain_set_affinity - Generic affinity setter function for MSI domains
484  * @irq_data:	The irq data associated to the interrupt
485  * @mask:	The affinity mask to set
486  * @force:	Flag to enforce setting (disable online checks)
487  *
488  * Intended to be used by MSI interrupt controllers which are
489  * implemented with hierarchical domains.
490  *
491  * Return: IRQ_SET_MASK_* result code
492  */
493 int msi_domain_set_affinity(struct irq_data *irq_data,
494 			    const struct cpumask *mask, bool force)
495 {
496 	struct irq_data *parent = irq_data->parent_data;
497 	struct msi_msg msg[2] = { [1] = { }, };
498 	int ret;
499 
500 	ret = parent->chip->irq_set_affinity(parent, mask, force);
501 	if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) {
502 		BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
503 		msi_check_level(irq_data->domain, msg);
504 		irq_chip_write_msi_msg(irq_data, msg);
505 	}
506 
507 	return ret;
508 }
509 
510 static int msi_domain_activate(struct irq_domain *domain,
511 			       struct irq_data *irq_data, bool early)
512 {
513 	struct msi_msg msg[2] = { [1] = { }, };
514 
515 	BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
516 	msi_check_level(irq_data->domain, msg);
517 	irq_chip_write_msi_msg(irq_data, msg);
518 	return 0;
519 }
520 
521 static void msi_domain_deactivate(struct irq_domain *domain,
522 				  struct irq_data *irq_data)
523 {
524 	struct msi_msg msg[2];
525 
526 	memset(msg, 0, sizeof(msg));
527 	irq_chip_write_msi_msg(irq_data, msg);
528 }
529 
530 static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
531 			    unsigned int nr_irqs, void *arg)
532 {
533 	struct msi_domain_info *info = domain->host_data;
534 	struct msi_domain_ops *ops = info->ops;
535 	irq_hw_number_t hwirq = ops->get_hwirq(info, arg);
536 	int i, ret;
537 
538 	if (irq_find_mapping(domain, hwirq) > 0)
539 		return -EEXIST;
540 
541 	if (domain->parent) {
542 		ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
543 		if (ret < 0)
544 			return ret;
545 	}
546 
547 	for (i = 0; i < nr_irqs; i++) {
548 		ret = ops->msi_init(domain, info, virq + i, hwirq + i, arg);
549 		if (ret < 0) {
550 			if (ops->msi_free) {
551 				for (i--; i > 0; i--)
552 					ops->msi_free(domain, info, virq + i);
553 			}
554 			irq_domain_free_irqs_top(domain, virq, nr_irqs);
555 			return ret;
556 		}
557 	}
558 
559 	return 0;
560 }
561 
562 static void msi_domain_free(struct irq_domain *domain, unsigned int virq,
563 			    unsigned int nr_irqs)
564 {
565 	struct msi_domain_info *info = domain->host_data;
566 	int i;
567 
568 	if (info->ops->msi_free) {
569 		for (i = 0; i < nr_irqs; i++)
570 			info->ops->msi_free(domain, info, virq + i);
571 	}
572 	irq_domain_free_irqs_top(domain, virq, nr_irqs);
573 }
574 
575 static const struct irq_domain_ops msi_domain_ops = {
576 	.alloc		= msi_domain_alloc,
577 	.free		= msi_domain_free,
578 	.activate	= msi_domain_activate,
579 	.deactivate	= msi_domain_deactivate,
580 };
581 
582 static irq_hw_number_t msi_domain_ops_get_hwirq(struct msi_domain_info *info,
583 						msi_alloc_info_t *arg)
584 {
585 	return arg->hwirq;
586 }
587 
588 static int msi_domain_ops_prepare(struct irq_domain *domain, struct device *dev,
589 				  int nvec, msi_alloc_info_t *arg)
590 {
591 	memset(arg, 0, sizeof(*arg));
592 	return 0;
593 }
594 
595 static void msi_domain_ops_set_desc(msi_alloc_info_t *arg,
596 				    struct msi_desc *desc)
597 {
598 	arg->desc = desc;
599 }
600 
601 static int msi_domain_ops_init(struct irq_domain *domain,
602 			       struct msi_domain_info *info,
603 			       unsigned int virq, irq_hw_number_t hwirq,
604 			       msi_alloc_info_t *arg)
605 {
606 	irq_domain_set_hwirq_and_chip(domain, virq, hwirq, info->chip,
607 				      info->chip_data);
608 	if (info->handler && info->handler_name) {
609 		__irq_set_handler(virq, info->handler, 0, info->handler_name);
610 		if (info->handler_data)
611 			irq_set_handler_data(virq, info->handler_data);
612 	}
613 	return 0;
614 }
615 
616 static int msi_domain_ops_check(struct irq_domain *domain,
617 				struct msi_domain_info *info,
618 				struct device *dev)
619 {
620 	return 0;
621 }
622 
623 static struct msi_domain_ops msi_domain_ops_default = {
624 	.get_hwirq		= msi_domain_ops_get_hwirq,
625 	.msi_init		= msi_domain_ops_init,
626 	.msi_check		= msi_domain_ops_check,
627 	.msi_prepare		= msi_domain_ops_prepare,
628 	.set_desc		= msi_domain_ops_set_desc,
629 	.domain_alloc_irqs	= __msi_domain_alloc_irqs,
630 	.domain_free_irqs	= __msi_domain_free_irqs,
631 };
632 
633 static void msi_domain_update_dom_ops(struct msi_domain_info *info)
634 {
635 	struct msi_domain_ops *ops = info->ops;
636 
637 	if (ops == NULL) {
638 		info->ops = &msi_domain_ops_default;
639 		return;
640 	}
641 
642 	if (ops->domain_alloc_irqs == NULL)
643 		ops->domain_alloc_irqs = msi_domain_ops_default.domain_alloc_irqs;
644 	if (ops->domain_free_irqs == NULL)
645 		ops->domain_free_irqs = msi_domain_ops_default.domain_free_irqs;
646 
647 	if (!(info->flags & MSI_FLAG_USE_DEF_DOM_OPS))
648 		return;
649 
650 	if (ops->get_hwirq == NULL)
651 		ops->get_hwirq = msi_domain_ops_default.get_hwirq;
652 	if (ops->msi_init == NULL)
653 		ops->msi_init = msi_domain_ops_default.msi_init;
654 	if (ops->msi_check == NULL)
655 		ops->msi_check = msi_domain_ops_default.msi_check;
656 	if (ops->msi_prepare == NULL)
657 		ops->msi_prepare = msi_domain_ops_default.msi_prepare;
658 	if (ops->set_desc == NULL)
659 		ops->set_desc = msi_domain_ops_default.set_desc;
660 }
661 
662 static void msi_domain_update_chip_ops(struct msi_domain_info *info)
663 {
664 	struct irq_chip *chip = info->chip;
665 
666 	BUG_ON(!chip || !chip->irq_mask || !chip->irq_unmask);
667 	if (!chip->irq_set_affinity)
668 		chip->irq_set_affinity = msi_domain_set_affinity;
669 }
670 
671 /**
672  * msi_create_irq_domain - Create an MSI interrupt domain
673  * @fwnode:	Optional fwnode of the interrupt controller
674  * @info:	MSI domain info
675  * @parent:	Parent irq domain
676  *
677  * Return: pointer to the created &struct irq_domain or %NULL on failure
678  */
679 struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
680 					 struct msi_domain_info *info,
681 					 struct irq_domain *parent)
682 {
683 	struct irq_domain *domain;
684 
685 	msi_domain_update_dom_ops(info);
686 	if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
687 		msi_domain_update_chip_ops(info);
688 
689 	domain = irq_domain_create_hierarchy(parent, IRQ_DOMAIN_FLAG_MSI, 0,
690 					     fwnode, &msi_domain_ops, info);
691 
692 	if (domain && !domain->name && info->chip)
693 		domain->name = info->chip->name;
694 
695 	return domain;
696 }
697 
698 int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
699 			    int nvec, msi_alloc_info_t *arg)
700 {
701 	struct msi_domain_info *info = domain->host_data;
702 	struct msi_domain_ops *ops = info->ops;
703 	int ret;
704 
705 	ret = ops->msi_check(domain, info, dev);
706 	if (ret == 0)
707 		ret = ops->msi_prepare(domain, dev, nvec, arg);
708 
709 	return ret;
710 }
711 
712 int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
713 			     int virq_base, int nvec, msi_alloc_info_t *arg)
714 {
715 	struct msi_domain_info *info = domain->host_data;
716 	struct msi_domain_ops *ops = info->ops;
717 	struct msi_desc *desc;
718 	int ret, virq;
719 
720 	msi_lock_descs(dev);
721 	ret = msi_add_simple_msi_descs(dev, virq_base, nvec);
722 	if (ret)
723 		goto unlock;
724 
725 	for (virq = virq_base; virq < virq_base + nvec; virq++) {
726 		desc = xa_load(&dev->msi.data->__store, virq);
727 		desc->irq = virq;
728 
729 		ops->set_desc(arg, desc);
730 		ret = irq_domain_alloc_irqs_hierarchy(domain, virq, 1, arg);
731 		if (ret)
732 			goto fail;
733 
734 		irq_set_msi_desc(virq, desc);
735 	}
736 	msi_unlock_descs(dev);
737 	return 0;
738 
739 fail:
740 	for (--virq; virq >= virq_base; virq--)
741 		irq_domain_free_irqs_common(domain, virq, 1);
742 	msi_free_msi_descs_range(dev, MSI_DESC_ALL, virq_base, virq_base + nvec - 1);
743 unlock:
744 	msi_unlock_descs(dev);
745 	return ret;
746 }
747 
748 /*
749  * Carefully check whether the device can use reservation mode. If
750  * reservation mode is enabled then the early activation will assign a
751  * dummy vector to the device. If the PCI/MSI device does not support
752  * masking of the entry then this can result in spurious interrupts when
753  * the device driver is not absolutely careful. But even then a malfunction
754  * of the hardware could result in a spurious interrupt on the dummy vector
755  * and render the device unusable. If the entry can be masked then the core
756  * logic will prevent the spurious interrupt and reservation mode can be
757  * used. For now reservation mode is restricted to PCI/MSI.
758  */
759 static bool msi_check_reservation_mode(struct irq_domain *domain,
760 				       struct msi_domain_info *info,
761 				       struct device *dev)
762 {
763 	struct msi_desc *desc;
764 
765 	switch(domain->bus_token) {
766 	case DOMAIN_BUS_PCI_MSI:
767 	case DOMAIN_BUS_VMD_MSI:
768 		break;
769 	default:
770 		return false;
771 	}
772 
773 	if (!(info->flags & MSI_FLAG_MUST_REACTIVATE))
774 		return false;
775 
776 	if (IS_ENABLED(CONFIG_PCI_MSI) && pci_msi_ignore_mask)
777 		return false;
778 
779 	/*
780 	 * Checking the first MSI descriptor is sufficient. MSIX supports
781 	 * masking and MSI does so when the can_mask attribute is set.
782 	 */
783 	desc = msi_first_desc(dev, MSI_DESC_ALL);
784 	return desc->pci.msi_attrib.is_msix || desc->pci.msi_attrib.can_mask;
785 }
786 
787 static int msi_handle_pci_fail(struct irq_domain *domain, struct msi_desc *desc,
788 			       int allocated)
789 {
790 	switch(domain->bus_token) {
791 	case DOMAIN_BUS_PCI_MSI:
792 	case DOMAIN_BUS_VMD_MSI:
793 		if (IS_ENABLED(CONFIG_PCI_MSI))
794 			break;
795 		fallthrough;
796 	default:
797 		return -ENOSPC;
798 	}
799 
800 	/* Let a failed PCI multi MSI allocation retry */
801 	if (desc->nvec_used > 1)
802 		return 1;
803 
804 	/* If there was a successful allocation let the caller know */
805 	return allocated ? allocated : -ENOSPC;
806 }
807 
808 #define VIRQ_CAN_RESERVE	0x01
809 #define VIRQ_ACTIVATE		0x02
810 #define VIRQ_NOMASK_QUIRK	0x04
811 
812 static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflags)
813 {
814 	struct irq_data *irqd = irq_domain_get_irq_data(domain, virq);
815 	int ret;
816 
817 	if (!(vflags & VIRQ_CAN_RESERVE)) {
818 		irqd_clr_can_reserve(irqd);
819 		if (vflags & VIRQ_NOMASK_QUIRK)
820 			irqd_set_msi_nomask_quirk(irqd);
821 
822 		/*
823 		 * If the interrupt is managed but no CPU is available to
824 		 * service it, shut it down until better times. Note that
825 		 * we only do this on the !RESERVE path as x86 (the only
826 		 * architecture using this flag) deals with this in a
827 		 * different way by using a catch-all vector.
828 		 */
829 		if ((vflags & VIRQ_ACTIVATE) &&
830 		    irqd_affinity_is_managed(irqd) &&
831 		    !cpumask_intersects(irq_data_get_affinity_mask(irqd),
832 					cpu_online_mask)) {
833 			    irqd_set_managed_shutdown(irqd);
834 			    return 0;
835 		    }
836 	}
837 
838 	if (!(vflags & VIRQ_ACTIVATE))
839 		return 0;
840 
841 	ret = irq_domain_activate_irq(irqd, vflags & VIRQ_CAN_RESERVE);
842 	if (ret)
843 		return ret;
844 	/*
845 	 * If the interrupt uses reservation mode, clear the activated bit
846 	 * so request_irq() will assign the final vector.
847 	 */
848 	if (vflags & VIRQ_CAN_RESERVE)
849 		irqd_clr_activated(irqd);
850 	return 0;
851 }
852 
853 int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
854 			    int nvec)
855 {
856 	struct msi_domain_info *info = domain->host_data;
857 	struct msi_domain_ops *ops = info->ops;
858 	msi_alloc_info_t arg = { };
859 	unsigned int vflags = 0;
860 	struct msi_desc *desc;
861 	int allocated = 0;
862 	int i, ret, virq;
863 
864 	ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg);
865 	if (ret)
866 		return ret;
867 
868 	/*
869 	 * This flag is set by the PCI layer as we need to activate
870 	 * the MSI entries before the PCI layer enables MSI in the
871 	 * card. Otherwise the card latches a random msi message.
872 	 */
873 	if (info->flags & MSI_FLAG_ACTIVATE_EARLY)
874 		vflags |= VIRQ_ACTIVATE;
875 
876 	/*
877 	 * Interrupt can use a reserved vector and will not occupy
878 	 * a real device vector until the interrupt is requested.
879 	 */
880 	if (msi_check_reservation_mode(domain, info, dev)) {
881 		vflags |= VIRQ_CAN_RESERVE;
882 		/*
883 		 * MSI affinity setting requires a special quirk (X86) when
884 		 * reservation mode is active.
885 		 */
886 		if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
887 			vflags |= VIRQ_NOMASK_QUIRK;
888 	}
889 
890 	msi_for_each_desc(desc, dev, MSI_DESC_NOTASSOCIATED) {
891 		ops->set_desc(&arg, desc);
892 
893 		virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used,
894 					       dev_to_node(dev), &arg, false,
895 					       desc->affinity);
896 		if (virq < 0)
897 			return msi_handle_pci_fail(domain, desc, allocated);
898 
899 		for (i = 0; i < desc->nvec_used; i++) {
900 			irq_set_msi_desc_off(virq, i, desc);
901 			irq_debugfs_copy_devname(virq + i, dev);
902 			ret = msi_init_virq(domain, virq + i, vflags);
903 			if (ret)
904 				return ret;
905 		}
906 		if (info->flags & MSI_FLAG_DEV_SYSFS) {
907 			ret = msi_sysfs_populate_desc(dev, desc);
908 			if (ret)
909 				return ret;
910 		}
911 		allocated++;
912 	}
913 	return 0;
914 }
915 
916 static int msi_domain_add_simple_msi_descs(struct msi_domain_info *info,
917 					   struct device *dev,
918 					   unsigned int num_descs)
919 {
920 	if (!(info->flags & MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS))
921 		return 0;
922 
923 	return msi_add_simple_msi_descs(dev, 0, num_descs);
924 }
925 
926 /**
927  * msi_domain_alloc_irqs_descs_locked - Allocate interrupts from a MSI interrupt domain
928  * @domain:	The domain to allocate from
929  * @dev:	Pointer to device struct of the device for which the interrupts
930  *		are allocated
931  * @nvec:	The number of interrupts to allocate
932  *
933  * Must be invoked from within a msi_lock_descs() / msi_unlock_descs()
934  * pair. Use this for MSI irqdomains which implement their own vector
935  * allocation/free.
936  *
937  * Return: %0 on success or an error code.
938  */
939 int msi_domain_alloc_irqs_descs_locked(struct irq_domain *domain, struct device *dev,
940 				       int nvec)
941 {
942 	struct msi_domain_info *info = domain->host_data;
943 	struct msi_domain_ops *ops = info->ops;
944 	int ret;
945 
946 	lockdep_assert_held(&dev->msi.data->mutex);
947 
948 	ret = msi_domain_add_simple_msi_descs(info, dev, nvec);
949 	if (ret)
950 		return ret;
951 
952 	ret = ops->domain_alloc_irqs(domain, dev, nvec);
953 	if (ret)
954 		msi_domain_free_irqs_descs_locked(domain, dev);
955 	return ret;
956 }
957 
958 /**
959  * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain
960  * @domain:	The domain to allocate from
961  * @dev:	Pointer to device struct of the device for which the interrupts
962  *		are allocated
963  * @nvec:	The number of interrupts to allocate
964  *
965  * Return: %0 on success or an error code.
966  */
967 int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, int nvec)
968 {
969 	int ret;
970 
971 	msi_lock_descs(dev);
972 	ret = msi_domain_alloc_irqs_descs_locked(domain, dev, nvec);
973 	msi_unlock_descs(dev);
974 	return ret;
975 }
976 
977 void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev)
978 {
979 	struct msi_domain_info *info = domain->host_data;
980 	struct irq_data *irqd;
981 	struct msi_desc *desc;
982 	int i;
983 
984 	/* Only handle MSI entries which have an interrupt associated */
985 	msi_for_each_desc(desc, dev, MSI_DESC_ASSOCIATED) {
986 		/* Make sure all interrupts are deactivated */
987 		for (i = 0; i < desc->nvec_used; i++) {
988 			irqd = irq_domain_get_irq_data(domain, desc->irq + i);
989 			if (irqd && irqd_is_activated(irqd))
990 				irq_domain_deactivate_irq(irqd);
991 		}
992 
993 		irq_domain_free_irqs(desc->irq, desc->nvec_used);
994 		if (info->flags & MSI_FLAG_DEV_SYSFS)
995 			msi_sysfs_remove_desc(dev, desc);
996 		desc->irq = 0;
997 	}
998 }
999 
1000 static void msi_domain_free_msi_descs(struct msi_domain_info *info,
1001 				      struct device *dev)
1002 {
1003 	if (info->flags & MSI_FLAG_FREE_MSI_DESCS)
1004 		msi_free_msi_descs(dev);
1005 }
1006 
1007 /**
1008  * msi_domain_free_irqs_descs_locked - Free interrupts from a MSI interrupt @domain associated to @dev
1009  * @domain:	The domain to managing the interrupts
1010  * @dev:	Pointer to device struct of the device for which the interrupts
1011  *		are free
1012  *
1013  * Must be invoked from within a msi_lock_descs() / msi_unlock_descs()
1014  * pair. Use this for MSI irqdomains which implement their own vector
1015  * allocation.
1016  */
1017 void msi_domain_free_irqs_descs_locked(struct irq_domain *domain, struct device *dev)
1018 {
1019 	struct msi_domain_info *info = domain->host_data;
1020 	struct msi_domain_ops *ops = info->ops;
1021 
1022 	lockdep_assert_held(&dev->msi.data->mutex);
1023 
1024 	ops->domain_free_irqs(domain, dev);
1025 	msi_domain_free_msi_descs(info, dev);
1026 }
1027 
1028 /**
1029  * msi_domain_free_irqs - Free interrupts from a MSI interrupt @domain associated to @dev
1030  * @domain:	The domain to managing the interrupts
1031  * @dev:	Pointer to device struct of the device for which the interrupts
1032  *		are free
1033  */
1034 void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev)
1035 {
1036 	msi_lock_descs(dev);
1037 	msi_domain_free_irqs_descs_locked(domain, dev);
1038 	msi_unlock_descs(dev);
1039 }
1040 
1041 /**
1042  * msi_get_domain_info - Get the MSI interrupt domain info for @domain
1043  * @domain:	The interrupt domain to retrieve data from
1044  *
1045  * Return: the pointer to the msi_domain_info stored in @domain->host_data.
1046  */
1047 struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain)
1048 {
1049 	return (struct msi_domain_info *)domain->host_data;
1050 }
1051 
1052 #endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */
1053