1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Irqdomain for Linux to run as the root partition on Microsoft Hypervisor. 5 * 6 * Authors: 7 * Sunil Muthuswamy <sunilmut@microsoft.com> 8 * Wei Liu <wei.liu@kernel.org> 9 */ 10 11 #include <linux/pci.h> 12 #include <linux/irq.h> 13 #include <asm/mshyperv.h> 14 15 static int hv_map_interrupt(union hv_device_id device_id, bool level, 16 int cpu, int vector, struct hv_interrupt_entry *entry) 17 { 18 struct hv_input_map_device_interrupt *input; 19 struct hv_output_map_device_interrupt *output; 20 struct hv_device_interrupt_descriptor *intr_desc; 21 unsigned long flags; 22 u64 status; 23 int nr_bank, var_size; 24 25 local_irq_save(flags); 26 27 input = *this_cpu_ptr(hyperv_pcpu_input_arg); 28 output = *this_cpu_ptr(hyperv_pcpu_output_arg); 29 30 intr_desc = &input->interrupt_descriptor; 31 memset(input, 0, sizeof(*input)); 32 input->partition_id = hv_current_partition_id; 33 input->device_id = device_id.as_uint64; 34 intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED; 35 intr_desc->vector_count = 1; 36 intr_desc->target.vector = vector; 37 38 if (level) 39 intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_LEVEL; 40 else 41 intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE; 42 43 intr_desc->target.vp_set.valid_bank_mask = 0; 44 intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K; 45 nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), cpumask_of(cpu)); 46 if (nr_bank < 0) { 47 local_irq_restore(flags); 48 pr_err("%s: unable to generate VP set\n", __func__); 49 return EINVAL; 50 } 51 intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; 52 53 /* 54 * var-sized hypercall, var-size starts after vp_mask (thus 55 * vp_set.format does not count, but vp_set.valid_bank_mask 56 * does). 57 */ 58 var_size = nr_bank + 1; 59 60 status = hv_do_rep_hypercall(HVCALL_MAP_DEVICE_INTERRUPT, 0, var_size, 61 input, output); 62 *entry = output->interrupt_entry; 63 64 local_irq_restore(flags); 65 66 if (!hv_result_success(status)) 67 pr_err("%s: hypercall failed, status %lld\n", __func__, status); 68 69 return hv_result(status); 70 } 71 72 static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry) 73 { 74 unsigned long flags; 75 struct hv_input_unmap_device_interrupt *input; 76 struct hv_interrupt_entry *intr_entry; 77 u64 status; 78 79 local_irq_save(flags); 80 input = *this_cpu_ptr(hyperv_pcpu_input_arg); 81 82 memset(input, 0, sizeof(*input)); 83 intr_entry = &input->interrupt_entry; 84 input->partition_id = hv_current_partition_id; 85 input->device_id = id; 86 *intr_entry = *old_entry; 87 88 status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL); 89 local_irq_restore(flags); 90 91 return hv_result(status); 92 } 93 94 #ifdef CONFIG_PCI_MSI 95 struct rid_data { 96 struct pci_dev *bridge; 97 u32 rid; 98 }; 99 100 static int get_rid_cb(struct pci_dev *pdev, u16 alias, void *data) 101 { 102 struct rid_data *rd = data; 103 u8 bus = PCI_BUS_NUM(rd->rid); 104 105 if (pdev->bus->number != bus || PCI_BUS_NUM(alias) != bus) { 106 rd->bridge = pdev; 107 rd->rid = alias; 108 } 109 110 return 0; 111 } 112 113 static union hv_device_id hv_build_pci_dev_id(struct pci_dev *dev) 114 { 115 union hv_device_id dev_id; 116 struct rid_data data = { 117 .bridge = NULL, 118 .rid = PCI_DEVID(dev->bus->number, dev->devfn) 119 }; 120 121 pci_for_each_dma_alias(dev, get_rid_cb, &data); 122 123 dev_id.as_uint64 = 0; 124 dev_id.device_type = HV_DEVICE_TYPE_PCI; 125 dev_id.pci.segment = pci_domain_nr(dev->bus); 126 127 dev_id.pci.bdf.bus = PCI_BUS_NUM(data.rid); 128 dev_id.pci.bdf.device = PCI_SLOT(data.rid); 129 dev_id.pci.bdf.function = PCI_FUNC(data.rid); 130 dev_id.pci.source_shadow = HV_SOURCE_SHADOW_NONE; 131 132 if (data.bridge) { 133 int pos; 134 135 /* 136 * Microsoft Hypervisor requires a bus range when the bridge is 137 * running in PCI-X mode. 138 * 139 * To distinguish conventional vs PCI-X bridge, we can check 140 * the bridge's PCI-X Secondary Status Register, Secondary Bus 141 * Mode and Frequency bits. See PCI Express to PCI/PCI-X Bridge 142 * Specification Revision 1.0 5.2.2.1.3. 143 * 144 * Value zero means it is in conventional mode, otherwise it is 145 * in PCI-X mode. 146 */ 147 148 pos = pci_find_capability(data.bridge, PCI_CAP_ID_PCIX); 149 if (pos) { 150 u16 status; 151 152 pci_read_config_word(data.bridge, pos + 153 PCI_X_BRIDGE_SSTATUS, &status); 154 155 if (status & PCI_X_SSTATUS_FREQ) { 156 /* Non-zero, PCI-X mode */ 157 u8 sec_bus, sub_bus; 158 159 dev_id.pci.source_shadow = HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE; 160 161 pci_read_config_byte(data.bridge, PCI_SECONDARY_BUS, &sec_bus); 162 dev_id.pci.shadow_bus_range.secondary_bus = sec_bus; 163 pci_read_config_byte(data.bridge, PCI_SUBORDINATE_BUS, &sub_bus); 164 dev_id.pci.shadow_bus_range.subordinate_bus = sub_bus; 165 } 166 } 167 } 168 169 return dev_id; 170 } 171 172 static int hv_map_msi_interrupt(struct pci_dev *dev, int cpu, int vector, 173 struct hv_interrupt_entry *entry) 174 { 175 union hv_device_id device_id = hv_build_pci_dev_id(dev); 176 177 return hv_map_interrupt(device_id, false, cpu, vector, entry); 178 } 179 180 static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi_msg *msg) 181 { 182 /* High address is always 0 */ 183 msg->address_hi = 0; 184 msg->address_lo = entry->msi_entry.address.as_uint32; 185 msg->data = entry->msi_entry.data.as_uint32; 186 } 187 188 static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry); 189 static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) 190 { 191 struct msi_desc *msidesc; 192 struct pci_dev *dev; 193 struct hv_interrupt_entry out_entry, *stored_entry; 194 struct irq_cfg *cfg = irqd_cfg(data); 195 cpumask_t *affinity; 196 int cpu; 197 u64 status; 198 199 msidesc = irq_data_get_msi_desc(data); 200 dev = msi_desc_to_pci_dev(msidesc); 201 202 if (!cfg) { 203 pr_debug("%s: cfg is NULL", __func__); 204 return; 205 } 206 207 affinity = irq_data_get_effective_affinity_mask(data); 208 cpu = cpumask_first_and(affinity, cpu_online_mask); 209 210 if (data->chip_data) { 211 /* 212 * This interrupt is already mapped. Let's unmap first. 213 * 214 * We don't use retarget interrupt hypercalls here because 215 * Microsoft Hypervisor doens't allow root to change the vector 216 * or specify VPs outside of the set that is initially used 217 * during mapping. 218 */ 219 stored_entry = data->chip_data; 220 data->chip_data = NULL; 221 222 status = hv_unmap_msi_interrupt(dev, stored_entry); 223 224 kfree(stored_entry); 225 226 if (status != HV_STATUS_SUCCESS) { 227 pr_debug("%s: failed to unmap, status %lld", __func__, status); 228 return; 229 } 230 } 231 232 stored_entry = kzalloc(sizeof(*stored_entry), GFP_ATOMIC); 233 if (!stored_entry) { 234 pr_debug("%s: failed to allocate chip data\n", __func__); 235 return; 236 } 237 238 status = hv_map_msi_interrupt(dev, cpu, cfg->vector, &out_entry); 239 if (status != HV_STATUS_SUCCESS) { 240 kfree(stored_entry); 241 return; 242 } 243 244 *stored_entry = out_entry; 245 data->chip_data = stored_entry; 246 entry_to_msi_msg(&out_entry, msg); 247 248 return; 249 } 250 251 static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry) 252 { 253 return hv_unmap_interrupt(hv_build_pci_dev_id(dev).as_uint64, old_entry); 254 } 255 256 static void hv_teardown_msi_irq_common(struct pci_dev *dev, struct msi_desc *msidesc, int irq) 257 { 258 u64 status; 259 struct hv_interrupt_entry old_entry; 260 struct irq_desc *desc; 261 struct irq_data *data; 262 struct msi_msg msg; 263 264 desc = irq_to_desc(irq); 265 if (!desc) { 266 pr_debug("%s: no irq desc\n", __func__); 267 return; 268 } 269 270 data = &desc->irq_data; 271 if (!data) { 272 pr_debug("%s: no irq data\n", __func__); 273 return; 274 } 275 276 if (!data->chip_data) { 277 pr_debug("%s: no chip data\n!", __func__); 278 return; 279 } 280 281 old_entry = *(struct hv_interrupt_entry *)data->chip_data; 282 entry_to_msi_msg(&old_entry, &msg); 283 284 kfree(data->chip_data); 285 data->chip_data = NULL; 286 287 status = hv_unmap_msi_interrupt(dev, &old_entry); 288 289 if (status != HV_STATUS_SUCCESS) { 290 pr_err("%s: hypercall failed, status %lld\n", __func__, status); 291 return; 292 } 293 } 294 295 static void hv_msi_domain_free_irqs(struct irq_domain *domain, struct device *dev) 296 { 297 int i; 298 struct msi_desc *entry; 299 struct pci_dev *pdev; 300 301 if (WARN_ON_ONCE(!dev_is_pci(dev))) 302 return; 303 304 pdev = to_pci_dev(dev); 305 306 for_each_pci_msi_entry(entry, pdev) { 307 if (entry->irq) { 308 for (i = 0; i < entry->nvec_used; i++) { 309 hv_teardown_msi_irq_common(pdev, entry, entry->irq + i); 310 irq_domain_free_irqs(entry->irq + i, 1); 311 } 312 } 313 } 314 } 315 316 /* 317 * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, 318 * which implement the MSI or MSI-X Capability Structure. 319 */ 320 static struct irq_chip hv_pci_msi_controller = { 321 .name = "HV-PCI-MSI", 322 .irq_unmask = pci_msi_unmask_irq, 323 .irq_mask = pci_msi_mask_irq, 324 .irq_ack = irq_chip_ack_parent, 325 .irq_retrigger = irq_chip_retrigger_hierarchy, 326 .irq_compose_msi_msg = hv_irq_compose_msi_msg, 327 .irq_set_affinity = msi_domain_set_affinity, 328 .flags = IRQCHIP_SKIP_SET_WAKE, 329 }; 330 331 static struct msi_domain_ops pci_msi_domain_ops = { 332 .domain_free_irqs = hv_msi_domain_free_irqs, 333 .msi_prepare = pci_msi_prepare, 334 }; 335 336 static struct msi_domain_info hv_pci_msi_domain_info = { 337 .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | 338 MSI_FLAG_PCI_MSIX, 339 .ops = &pci_msi_domain_ops, 340 .chip = &hv_pci_msi_controller, 341 .handler = handle_edge_irq, 342 .handler_name = "edge", 343 }; 344 345 struct irq_domain * __init hv_create_pci_msi_domain(void) 346 { 347 struct irq_domain *d = NULL; 348 struct fwnode_handle *fn; 349 350 fn = irq_domain_alloc_named_fwnode("HV-PCI-MSI"); 351 if (fn) 352 d = pci_msi_create_irq_domain(fn, &hv_pci_msi_domain_info, x86_vector_domain); 353 354 /* No point in going further if we can't get an irq domain */ 355 BUG_ON(!d); 356 357 return d; 358 } 359 360 #endif /* CONFIG_PCI_MSI */ 361 362 int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry) 363 { 364 union hv_device_id device_id; 365 366 device_id.as_uint64 = 0; 367 device_id.device_type = HV_DEVICE_TYPE_IOAPIC; 368 device_id.ioapic.ioapic_id = (u8)ioapic_id; 369 370 return hv_unmap_interrupt(device_id.as_uint64, entry); 371 } 372 EXPORT_SYMBOL_GPL(hv_unmap_ioapic_interrupt); 373 374 int hv_map_ioapic_interrupt(int ioapic_id, bool level, int cpu, int vector, 375 struct hv_interrupt_entry *entry) 376 { 377 union hv_device_id device_id; 378 379 device_id.as_uint64 = 0; 380 device_id.device_type = HV_DEVICE_TYPE_IOAPIC; 381 device_id.ioapic.ioapic_id = (u8)ioapic_id; 382 383 return hv_map_interrupt(device_id, level, cpu, vector, entry); 384 } 385 EXPORT_SYMBOL_GPL(hv_map_ioapic_interrupt); 386