1e39397d1SWei Liu // SPDX-License-Identifier: GPL-2.0
2e39397d1SWei Liu
3e39397d1SWei Liu /*
4e39397d1SWei Liu * Irqdomain for Linux to run as the root partition on Microsoft Hypervisor.
5e39397d1SWei Liu *
6e39397d1SWei Liu * Authors:
7e39397d1SWei Liu * Sunil Muthuswamy <sunilmut@microsoft.com>
8e39397d1SWei Liu * Wei Liu <wei.liu@kernel.org>
9e39397d1SWei Liu */
10e39397d1SWei Liu
11e39397d1SWei Liu #include <linux/pci.h>
12e39397d1SWei Liu #include <linux/irq.h>
13e39397d1SWei Liu #include <asm/mshyperv.h>
14e39397d1SWei Liu
hv_map_interrupt(union hv_device_id device_id,bool level,int cpu,int vector,struct hv_interrupt_entry * entry)15e39397d1SWei Liu static int hv_map_interrupt(union hv_device_id device_id, bool level,
16e39397d1SWei Liu int cpu, int vector, struct hv_interrupt_entry *entry)
17e39397d1SWei Liu {
18e39397d1SWei Liu struct hv_input_map_device_interrupt *input;
19e39397d1SWei Liu struct hv_output_map_device_interrupt *output;
20e39397d1SWei Liu struct hv_device_interrupt_descriptor *intr_desc;
21e39397d1SWei Liu unsigned long flags;
22e39397d1SWei Liu u64 status;
23e39397d1SWei Liu int nr_bank, var_size;
24e39397d1SWei Liu
25e39397d1SWei Liu local_irq_save(flags);
26e39397d1SWei Liu
27e39397d1SWei Liu input = *this_cpu_ptr(hyperv_pcpu_input_arg);
28e39397d1SWei Liu output = *this_cpu_ptr(hyperv_pcpu_output_arg);
29e39397d1SWei Liu
30e39397d1SWei Liu intr_desc = &input->interrupt_descriptor;
31e39397d1SWei Liu memset(input, 0, sizeof(*input));
32e39397d1SWei Liu input->partition_id = hv_current_partition_id;
33e39397d1SWei Liu input->device_id = device_id.as_uint64;
34e39397d1SWei Liu intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED;
35e39397d1SWei Liu intr_desc->vector_count = 1;
36e39397d1SWei Liu intr_desc->target.vector = vector;
37e39397d1SWei Liu
38e39397d1SWei Liu if (level)
39e39397d1SWei Liu intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_LEVEL;
40e39397d1SWei Liu else
41e39397d1SWei Liu intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE;
42e39397d1SWei Liu
43e39397d1SWei Liu intr_desc->target.vp_set.valid_bank_mask = 0;
44e39397d1SWei Liu intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K;
45e39397d1SWei Liu nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), cpumask_of(cpu));
46e39397d1SWei Liu if (nr_bank < 0) {
47e39397d1SWei Liu local_irq_restore(flags);
48e39397d1SWei Liu pr_err("%s: unable to generate VP set\n", __func__);
49e39397d1SWei Liu return EINVAL;
50e39397d1SWei Liu }
51e39397d1SWei Liu intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET;
52e39397d1SWei Liu
53e39397d1SWei Liu /*
54e39397d1SWei Liu * var-sized hypercall, var-size starts after vp_mask (thus
55e39397d1SWei Liu * vp_set.format does not count, but vp_set.valid_bank_mask
56e39397d1SWei Liu * does).
57e39397d1SWei Liu */
58e39397d1SWei Liu var_size = nr_bank + 1;
59e39397d1SWei Liu
60e39397d1SWei Liu status = hv_do_rep_hypercall(HVCALL_MAP_DEVICE_INTERRUPT, 0, var_size,
61e39397d1SWei Liu input, output);
62e39397d1SWei Liu *entry = output->interrupt_entry;
63e39397d1SWei Liu
64e39397d1SWei Liu local_irq_restore(flags);
65e39397d1SWei Liu
66753ed9c9SJoseph Salisbury if (!hv_result_success(status))
67e39397d1SWei Liu pr_err("%s: hypercall failed, status %lld\n", __func__, status);
68e39397d1SWei Liu
69753ed9c9SJoseph Salisbury return hv_result(status);
70e39397d1SWei Liu }
71e39397d1SWei Liu
hv_unmap_interrupt(u64 id,struct hv_interrupt_entry * old_entry)72e39397d1SWei Liu static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry)
73e39397d1SWei Liu {
74e39397d1SWei Liu unsigned long flags;
75e39397d1SWei Liu struct hv_input_unmap_device_interrupt *input;
76e39397d1SWei Liu struct hv_interrupt_entry *intr_entry;
77e39397d1SWei Liu u64 status;
78e39397d1SWei Liu
79e39397d1SWei Liu local_irq_save(flags);
80e39397d1SWei Liu input = *this_cpu_ptr(hyperv_pcpu_input_arg);
81e39397d1SWei Liu
82e39397d1SWei Liu memset(input, 0, sizeof(*input));
83e39397d1SWei Liu intr_entry = &input->interrupt_entry;
84e39397d1SWei Liu input->partition_id = hv_current_partition_id;
85e39397d1SWei Liu input->device_id = id;
86e39397d1SWei Liu *intr_entry = *old_entry;
87e39397d1SWei Liu
88e39397d1SWei Liu status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL);
89e39397d1SWei Liu local_irq_restore(flags);
90e39397d1SWei Liu
91753ed9c9SJoseph Salisbury return hv_result(status);
92e39397d1SWei Liu }
93e39397d1SWei Liu
94e39397d1SWei Liu #ifdef CONFIG_PCI_MSI
95e39397d1SWei Liu struct rid_data {
96e39397d1SWei Liu struct pci_dev *bridge;
97e39397d1SWei Liu u32 rid;
98e39397d1SWei Liu };
99e39397d1SWei Liu
get_rid_cb(struct pci_dev * pdev,u16 alias,void * data)100e39397d1SWei Liu static int get_rid_cb(struct pci_dev *pdev, u16 alias, void *data)
101e39397d1SWei Liu {
102e39397d1SWei Liu struct rid_data *rd = data;
103e39397d1SWei Liu u8 bus = PCI_BUS_NUM(rd->rid);
104e39397d1SWei Liu
105e39397d1SWei Liu if (pdev->bus->number != bus || PCI_BUS_NUM(alias) != bus) {
106e39397d1SWei Liu rd->bridge = pdev;
107e39397d1SWei Liu rd->rid = alias;
108e39397d1SWei Liu }
109e39397d1SWei Liu
110e39397d1SWei Liu return 0;
111e39397d1SWei Liu }
112e39397d1SWei Liu
hv_build_pci_dev_id(struct pci_dev * dev)113e39397d1SWei Liu static union hv_device_id hv_build_pci_dev_id(struct pci_dev *dev)
114e39397d1SWei Liu {
115e39397d1SWei Liu union hv_device_id dev_id;
116e39397d1SWei Liu struct rid_data data = {
117e39397d1SWei Liu .bridge = NULL,
118e39397d1SWei Liu .rid = PCI_DEVID(dev->bus->number, dev->devfn)
119e39397d1SWei Liu };
120e39397d1SWei Liu
121e39397d1SWei Liu pci_for_each_dma_alias(dev, get_rid_cb, &data);
122e39397d1SWei Liu
123e39397d1SWei Liu dev_id.as_uint64 = 0;
124e39397d1SWei Liu dev_id.device_type = HV_DEVICE_TYPE_PCI;
125e39397d1SWei Liu dev_id.pci.segment = pci_domain_nr(dev->bus);
126e39397d1SWei Liu
127e39397d1SWei Liu dev_id.pci.bdf.bus = PCI_BUS_NUM(data.rid);
128e39397d1SWei Liu dev_id.pci.bdf.device = PCI_SLOT(data.rid);
129e39397d1SWei Liu dev_id.pci.bdf.function = PCI_FUNC(data.rid);
130e39397d1SWei Liu dev_id.pci.source_shadow = HV_SOURCE_SHADOW_NONE;
131e39397d1SWei Liu
132e39397d1SWei Liu if (data.bridge) {
133e39397d1SWei Liu int pos;
134e39397d1SWei Liu
135e39397d1SWei Liu /*
136e39397d1SWei Liu * Microsoft Hypervisor requires a bus range when the bridge is
137e39397d1SWei Liu * running in PCI-X mode.
138e39397d1SWei Liu *
139e39397d1SWei Liu * To distinguish conventional vs PCI-X bridge, we can check
140e39397d1SWei Liu * the bridge's PCI-X Secondary Status Register, Secondary Bus
141e39397d1SWei Liu * Mode and Frequency bits. See PCI Express to PCI/PCI-X Bridge
142e39397d1SWei Liu * Specification Revision 1.0 5.2.2.1.3.
143e39397d1SWei Liu *
144e39397d1SWei Liu * Value zero means it is in conventional mode, otherwise it is
145e39397d1SWei Liu * in PCI-X mode.
146e39397d1SWei Liu */
147e39397d1SWei Liu
148e39397d1SWei Liu pos = pci_find_capability(data.bridge, PCI_CAP_ID_PCIX);
149e39397d1SWei Liu if (pos) {
150e39397d1SWei Liu u16 status;
151e39397d1SWei Liu
152e39397d1SWei Liu pci_read_config_word(data.bridge, pos +
153e39397d1SWei Liu PCI_X_BRIDGE_SSTATUS, &status);
154e39397d1SWei Liu
155e39397d1SWei Liu if (status & PCI_X_SSTATUS_FREQ) {
156e39397d1SWei Liu /* Non-zero, PCI-X mode */
157e39397d1SWei Liu u8 sec_bus, sub_bus;
158e39397d1SWei Liu
159e39397d1SWei Liu dev_id.pci.source_shadow = HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE;
160e39397d1SWei Liu
161e39397d1SWei Liu pci_read_config_byte(data.bridge, PCI_SECONDARY_BUS, &sec_bus);
162e39397d1SWei Liu dev_id.pci.shadow_bus_range.secondary_bus = sec_bus;
163e39397d1SWei Liu pci_read_config_byte(data.bridge, PCI_SUBORDINATE_BUS, &sub_bus);
164e39397d1SWei Liu dev_id.pci.shadow_bus_range.subordinate_bus = sub_bus;
165e39397d1SWei Liu }
166e39397d1SWei Liu }
167e39397d1SWei Liu }
168e39397d1SWei Liu
169e39397d1SWei Liu return dev_id;
170e39397d1SWei Liu }
171e39397d1SWei Liu
hv_map_msi_interrupt(struct pci_dev * dev,int cpu,int vector,struct hv_interrupt_entry * entry)172e39397d1SWei Liu static int hv_map_msi_interrupt(struct pci_dev *dev, int cpu, int vector,
173e39397d1SWei Liu struct hv_interrupt_entry *entry)
174e39397d1SWei Liu {
175e39397d1SWei Liu union hv_device_id device_id = hv_build_pci_dev_id(dev);
176e39397d1SWei Liu
177e39397d1SWei Liu return hv_map_interrupt(device_id, false, cpu, vector, entry);
178e39397d1SWei Liu }
179e39397d1SWei Liu
entry_to_msi_msg(struct hv_interrupt_entry * entry,struct msi_msg * msg)180e39397d1SWei Liu static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi_msg *msg)
181e39397d1SWei Liu {
182e39397d1SWei Liu /* High address is always 0 */
183e39397d1SWei Liu msg->address_hi = 0;
184e39397d1SWei Liu msg->address_lo = entry->msi_entry.address.as_uint32;
185e39397d1SWei Liu msg->data = entry->msi_entry.data.as_uint32;
186e39397d1SWei Liu }
187e39397d1SWei Liu
188e39397d1SWei Liu static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry);
hv_irq_compose_msi_msg(struct irq_data * data,struct msi_msg * msg)189e39397d1SWei Liu static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
190e39397d1SWei Liu {
191e39397d1SWei Liu struct msi_desc *msidesc;
192e39397d1SWei Liu struct pci_dev *dev;
193e39397d1SWei Liu struct hv_interrupt_entry out_entry, *stored_entry;
194e39397d1SWei Liu struct irq_cfg *cfg = irqd_cfg(data);
195*4d0b8298SSamuel Holland const cpumask_t *affinity;
196e39397d1SWei Liu int cpu;
197e39397d1SWei Liu u64 status;
198e39397d1SWei Liu
199e39397d1SWei Liu msidesc = irq_data_get_msi_desc(data);
200e39397d1SWei Liu dev = msi_desc_to_pci_dev(msidesc);
201e39397d1SWei Liu
202e39397d1SWei Liu if (!cfg) {
203e39397d1SWei Liu pr_debug("%s: cfg is NULL", __func__);
204e39397d1SWei Liu return;
205e39397d1SWei Liu }
206e39397d1SWei Liu
207e39397d1SWei Liu affinity = irq_data_get_effective_affinity_mask(data);
208e39397d1SWei Liu cpu = cpumask_first_and(affinity, cpu_online_mask);
209e39397d1SWei Liu
210e39397d1SWei Liu if (data->chip_data) {
211e39397d1SWei Liu /*
212e39397d1SWei Liu * This interrupt is already mapped. Let's unmap first.
213e39397d1SWei Liu *
214e39397d1SWei Liu * We don't use retarget interrupt hypercalls here because
215e39397d1SWei Liu * Microsoft Hypervisor doens't allow root to change the vector
216e39397d1SWei Liu * or specify VPs outside of the set that is initially used
217e39397d1SWei Liu * during mapping.
218e39397d1SWei Liu */
219e39397d1SWei Liu stored_entry = data->chip_data;
220e39397d1SWei Liu data->chip_data = NULL;
221e39397d1SWei Liu
222e39397d1SWei Liu status = hv_unmap_msi_interrupt(dev, stored_entry);
223e39397d1SWei Liu
224e39397d1SWei Liu kfree(stored_entry);
225e39397d1SWei Liu
226e39397d1SWei Liu if (status != HV_STATUS_SUCCESS) {
227e39397d1SWei Liu pr_debug("%s: failed to unmap, status %lld", __func__, status);
228e39397d1SWei Liu return;
229e39397d1SWei Liu }
230e39397d1SWei Liu }
231e39397d1SWei Liu
232e39397d1SWei Liu stored_entry = kzalloc(sizeof(*stored_entry), GFP_ATOMIC);
233e39397d1SWei Liu if (!stored_entry) {
234e39397d1SWei Liu pr_debug("%s: failed to allocate chip data\n", __func__);
235e39397d1SWei Liu return;
236e39397d1SWei Liu }
237e39397d1SWei Liu
238e39397d1SWei Liu status = hv_map_msi_interrupt(dev, cpu, cfg->vector, &out_entry);
239e39397d1SWei Liu if (status != HV_STATUS_SUCCESS) {
240e39397d1SWei Liu kfree(stored_entry);
241e39397d1SWei Liu return;
242e39397d1SWei Liu }
243e39397d1SWei Liu
244e39397d1SWei Liu *stored_entry = out_entry;
245e39397d1SWei Liu data->chip_data = stored_entry;
246e39397d1SWei Liu entry_to_msi_msg(&out_entry, msg);
247e39397d1SWei Liu
248e39397d1SWei Liu return;
249e39397d1SWei Liu }
250e39397d1SWei Liu
hv_unmap_msi_interrupt(struct pci_dev * dev,struct hv_interrupt_entry * old_entry)251e39397d1SWei Liu static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry)
252e39397d1SWei Liu {
253e39397d1SWei Liu return hv_unmap_interrupt(hv_build_pci_dev_id(dev).as_uint64, old_entry);
254e39397d1SWei Liu }
255e39397d1SWei Liu
hv_teardown_msi_irq(struct pci_dev * dev,struct irq_data * irqd)2561982afd6SThomas Gleixner static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd)
257e39397d1SWei Liu {
258e39397d1SWei Liu struct hv_interrupt_entry old_entry;
259e39397d1SWei Liu struct msi_msg msg;
2601982afd6SThomas Gleixner u64 status;
261e39397d1SWei Liu
2621982afd6SThomas Gleixner if (!irqd->chip_data) {
263e39397d1SWei Liu pr_debug("%s: no chip data\n!", __func__);
264e39397d1SWei Liu return;
265e39397d1SWei Liu }
266e39397d1SWei Liu
2671982afd6SThomas Gleixner old_entry = *(struct hv_interrupt_entry *)irqd->chip_data;
268e39397d1SWei Liu entry_to_msi_msg(&old_entry, &msg);
269e39397d1SWei Liu
2701982afd6SThomas Gleixner kfree(irqd->chip_data);
2711982afd6SThomas Gleixner irqd->chip_data = NULL;
272e39397d1SWei Liu
273e39397d1SWei Liu status = hv_unmap_msi_interrupt(dev, &old_entry);
274e39397d1SWei Liu
2751982afd6SThomas Gleixner if (status != HV_STATUS_SUCCESS)
276e39397d1SWei Liu pr_err("%s: hypercall failed, status %lld\n", __func__, status);
277e39397d1SWei Liu }
278e39397d1SWei Liu
hv_msi_free_irq(struct irq_domain * domain,struct msi_domain_info * info,unsigned int virq)2791982afd6SThomas Gleixner static void hv_msi_free_irq(struct irq_domain *domain,
2801982afd6SThomas Gleixner struct msi_domain_info *info, unsigned int virq)
281e39397d1SWei Liu {
2821982afd6SThomas Gleixner struct irq_data *irqd = irq_get_irq_data(virq);
2831982afd6SThomas Gleixner struct msi_desc *desc;
284e39397d1SWei Liu
2851982afd6SThomas Gleixner if (!irqd)
286e39397d1SWei Liu return;
287e39397d1SWei Liu
2881982afd6SThomas Gleixner desc = irq_data_get_msi_desc(irqd);
2891982afd6SThomas Gleixner if (!desc || !desc->irq || WARN_ON_ONCE(!dev_is_pci(desc->dev)))
2901982afd6SThomas Gleixner return;
291e39397d1SWei Liu
2921982afd6SThomas Gleixner hv_teardown_msi_irq(to_pci_dev(desc->dev), irqd);
293e39397d1SWei Liu }
294e39397d1SWei Liu
295e39397d1SWei Liu /*
296e39397d1SWei Liu * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
297e39397d1SWei Liu * which implement the MSI or MSI-X Capability Structure.
298e39397d1SWei Liu */
299e39397d1SWei Liu static struct irq_chip hv_pci_msi_controller = {
300e39397d1SWei Liu .name = "HV-PCI-MSI",
301e39397d1SWei Liu .irq_unmask = pci_msi_unmask_irq,
302e39397d1SWei Liu .irq_mask = pci_msi_mask_irq,
303e39397d1SWei Liu .irq_ack = irq_chip_ack_parent,
304e39397d1SWei Liu .irq_retrigger = irq_chip_retrigger_hierarchy,
305e39397d1SWei Liu .irq_compose_msi_msg = hv_irq_compose_msi_msg,
306e39397d1SWei Liu .irq_set_affinity = msi_domain_set_affinity,
307e39397d1SWei Liu .flags = IRQCHIP_SKIP_SET_WAKE,
308e39397d1SWei Liu };
309e39397d1SWei Liu
310e39397d1SWei Liu static struct msi_domain_ops pci_msi_domain_ops = {
3111982afd6SThomas Gleixner .msi_free = hv_msi_free_irq,
312e39397d1SWei Liu .msi_prepare = pci_msi_prepare,
313e39397d1SWei Liu };
314e39397d1SWei Liu
315e39397d1SWei Liu static struct msi_domain_info hv_pci_msi_domain_info = {
316e39397d1SWei Liu .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
317e39397d1SWei Liu MSI_FLAG_PCI_MSIX,
318e39397d1SWei Liu .ops = &pci_msi_domain_ops,
319e39397d1SWei Liu .chip = &hv_pci_msi_controller,
320e39397d1SWei Liu .handler = handle_edge_irq,
321e39397d1SWei Liu .handler_name = "edge",
322e39397d1SWei Liu };
323e39397d1SWei Liu
hv_create_pci_msi_domain(void)324e39397d1SWei Liu struct irq_domain * __init hv_create_pci_msi_domain(void)
325e39397d1SWei Liu {
326e39397d1SWei Liu struct irq_domain *d = NULL;
327e39397d1SWei Liu struct fwnode_handle *fn;
328e39397d1SWei Liu
329e39397d1SWei Liu fn = irq_domain_alloc_named_fwnode("HV-PCI-MSI");
330e39397d1SWei Liu if (fn)
331e39397d1SWei Liu d = pci_msi_create_irq_domain(fn, &hv_pci_msi_domain_info, x86_vector_domain);
332e39397d1SWei Liu
333e39397d1SWei Liu /* No point in going further if we can't get an irq domain */
334e39397d1SWei Liu BUG_ON(!d);
335e39397d1SWei Liu
336e39397d1SWei Liu return d;
337e39397d1SWei Liu }
338e39397d1SWei Liu
339e39397d1SWei Liu #endif /* CONFIG_PCI_MSI */
340fb5ef351SWei Liu
hv_unmap_ioapic_interrupt(int ioapic_id,struct hv_interrupt_entry * entry)341fb5ef351SWei Liu int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry)
342fb5ef351SWei Liu {
343fb5ef351SWei Liu union hv_device_id device_id;
344fb5ef351SWei Liu
345fb5ef351SWei Liu device_id.as_uint64 = 0;
346fb5ef351SWei Liu device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
347fb5ef351SWei Liu device_id.ioapic.ioapic_id = (u8)ioapic_id;
348fb5ef351SWei Liu
349fb5ef351SWei Liu return hv_unmap_interrupt(device_id.as_uint64, entry);
350fb5ef351SWei Liu }
351fb5ef351SWei Liu EXPORT_SYMBOL_GPL(hv_unmap_ioapic_interrupt);
352fb5ef351SWei Liu
hv_map_ioapic_interrupt(int ioapic_id,bool level,int cpu,int vector,struct hv_interrupt_entry * entry)353fb5ef351SWei Liu int hv_map_ioapic_interrupt(int ioapic_id, bool level, int cpu, int vector,
354fb5ef351SWei Liu struct hv_interrupt_entry *entry)
355fb5ef351SWei Liu {
356fb5ef351SWei Liu union hv_device_id device_id;
357fb5ef351SWei Liu
358fb5ef351SWei Liu device_id.as_uint64 = 0;
359fb5ef351SWei Liu device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
360fb5ef351SWei Liu device_id.ioapic.ioapic_id = (u8)ioapic_id;
361fb5ef351SWei Liu
362fb5ef351SWei Liu return hv_map_interrupt(device_id, level, cpu, vector, entry);
363fb5ef351SWei Liu }
364fb5ef351SWei Liu EXPORT_SYMBOL_GPL(hv_map_ioapic_interrupt);
365