1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright 2006 Jake Moilanen <moilanen@austin.ibm.com>, IBM Corp. 4 * Copyright 2006-2007 Michael Ellerman, IBM Corp. 5 */ 6 7 #include <linux/device.h> 8 #include <linux/irq.h> 9 #include <linux/msi.h> 10 11 #include <asm/rtas.h> 12 #include <asm/hw_irq.h> 13 #include <asm/ppc-pci.h> 14 #include <asm/machdep.h> 15 16 #include "pseries.h" 17 18 static int query_token, change_token; 19 20 #define RTAS_QUERY_FN 0 21 #define RTAS_CHANGE_FN 1 22 #define RTAS_RESET_FN 2 23 #define RTAS_CHANGE_MSI_FN 3 24 #define RTAS_CHANGE_MSIX_FN 4 25 #define RTAS_CHANGE_32MSI_FN 5 26 27 /* RTAS Helpers */ 28 29 static int rtas_change_msi(struct pci_dn *pdn, u32 func, u32 num_irqs) 30 { 31 u32 addr, seq_num, rtas_ret[3]; 32 unsigned long buid; 33 int rc; 34 35 addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); 36 buid = pdn->phb->buid; 37 38 seq_num = 1; 39 do { 40 if (func == RTAS_CHANGE_MSI_FN || func == RTAS_CHANGE_MSIX_FN || 41 func == RTAS_CHANGE_32MSI_FN) 42 rc = rtas_call(change_token, 6, 4, rtas_ret, addr, 43 BUID_HI(buid), BUID_LO(buid), 44 func, num_irqs, seq_num); 45 else 46 rc = rtas_call(change_token, 6, 3, rtas_ret, addr, 47 BUID_HI(buid), BUID_LO(buid), 48 func, num_irqs, seq_num); 49 50 seq_num = rtas_ret[1]; 51 } while (rtas_busy_delay(rc)); 52 53 /* 54 * If the RTAS call succeeded, return the number of irqs allocated. 55 * If not, make sure we return a negative error code. 56 */ 57 if (rc == 0) 58 rc = rtas_ret[0]; 59 else if (rc > 0) 60 rc = -rc; 61 62 pr_debug("rtas_msi: ibm,change_msi(func=%d,num=%d), got %d rc = %d\n", 63 func, num_irqs, rtas_ret[0], rc); 64 65 return rc; 66 } 67 68 static void rtas_disable_msi(struct pci_dev *pdev) 69 { 70 struct pci_dn *pdn; 71 72 pdn = pci_get_pdn(pdev); 73 if (!pdn) 74 return; 75 76 /* 77 * disabling MSI with the explicit interface also disables MSI-X 78 */ 79 if (rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, 0) != 0) { 80 /* 81 * may have failed because explicit interface is not 82 * present 83 */ 84 if (rtas_change_msi(pdn, RTAS_CHANGE_FN, 0) != 0) { 85 pr_debug("rtas_msi: Setting MSIs to 0 failed!\n"); 86 } 87 } 88 } 89 90 static int rtas_query_irq_number(struct pci_dn *pdn, int offset) 91 { 92 u32 addr, rtas_ret[2]; 93 unsigned long buid; 94 int rc; 95 96 addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); 97 buid = pdn->phb->buid; 98 99 do { 100 rc = rtas_call(query_token, 4, 3, rtas_ret, addr, 101 BUID_HI(buid), BUID_LO(buid), offset); 102 } while (rtas_busy_delay(rc)); 103 104 if (rc) { 105 pr_debug("rtas_msi: error (%d) querying source number\n", rc); 106 return rc; 107 } 108 109 return rtas_ret[0]; 110 } 111 112 static void rtas_teardown_msi_irqs(struct pci_dev *pdev) 113 { 114 struct msi_desc *entry; 115 116 for_each_pci_msi_entry(entry, pdev) { 117 if (!entry->irq) 118 continue; 119 120 irq_set_msi_desc(entry->irq, NULL); 121 irq_dispose_mapping(entry->irq); 122 } 123 124 rtas_disable_msi(pdev); 125 } 126 127 static int check_req(struct pci_dev *pdev, int nvec, char *prop_name) 128 { 129 struct device_node *dn; 130 const __be32 *p; 131 u32 req_msi; 132 133 dn = pci_device_to_OF_node(pdev); 134 135 p = of_get_property(dn, prop_name, NULL); 136 if (!p) { 137 pr_debug("rtas_msi: No %s on %pOF\n", prop_name, dn); 138 return -ENOENT; 139 } 140 141 req_msi = be32_to_cpup(p); 142 if (req_msi < nvec) { 143 pr_debug("rtas_msi: %s requests < %d MSIs\n", prop_name, nvec); 144 145 if (req_msi == 0) /* Be paranoid */ 146 return -ENOSPC; 147 148 return req_msi; 149 } 150 151 return 0; 152 } 153 154 static int check_req_msi(struct pci_dev *pdev, int nvec) 155 { 156 return check_req(pdev, nvec, "ibm,req#msi"); 157 } 158 159 static int check_req_msix(struct pci_dev *pdev, int nvec) 160 { 161 return check_req(pdev, nvec, "ibm,req#msi-x"); 162 } 163 164 /* Quota calculation */ 165 166 static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total) 167 { 168 struct device_node *dn; 169 const __be32 *p; 170 171 dn = of_node_get(pci_device_to_OF_node(dev)); 172 while (dn) { 173 p = of_get_property(dn, "ibm,pe-total-#msi", NULL); 174 if (p) { 175 pr_debug("rtas_msi: found prop on dn %pOF\n", 176 dn); 177 *total = be32_to_cpup(p); 178 return dn; 179 } 180 181 dn = of_get_next_parent(dn); 182 } 183 184 return NULL; 185 } 186 187 static struct device_node *find_pe_dn(struct pci_dev *dev, int *total) 188 { 189 struct device_node *dn; 190 struct eeh_dev *edev; 191 192 /* Found our PE and assume 8 at that point. */ 193 194 dn = pci_device_to_OF_node(dev); 195 if (!dn) 196 return NULL; 197 198 /* Get the top level device in the PE */ 199 edev = pdn_to_eeh_dev(PCI_DN(dn)); 200 if (edev->pe) 201 edev = list_first_entry(&edev->pe->edevs, struct eeh_dev, 202 entry); 203 dn = pci_device_to_OF_node(edev->pdev); 204 if (!dn) 205 return NULL; 206 207 /* We actually want the parent */ 208 dn = of_get_parent(dn); 209 if (!dn) 210 return NULL; 211 212 /* Hardcode of 8 for old firmwares */ 213 *total = 8; 214 pr_debug("rtas_msi: using PE dn %pOF\n", dn); 215 216 return dn; 217 } 218 219 struct msi_counts { 220 struct device_node *requestor; 221 int num_devices; 222 int request; 223 int quota; 224 int spare; 225 int over_quota; 226 }; 227 228 static void *count_non_bridge_devices(struct device_node *dn, void *data) 229 { 230 struct msi_counts *counts = data; 231 const __be32 *p; 232 u32 class; 233 234 pr_debug("rtas_msi: counting %pOF\n", dn); 235 236 p = of_get_property(dn, "class-code", NULL); 237 class = p ? be32_to_cpup(p) : 0; 238 239 if ((class >> 8) != PCI_CLASS_BRIDGE_PCI) 240 counts->num_devices++; 241 242 return NULL; 243 } 244 245 static void *count_spare_msis(struct device_node *dn, void *data) 246 { 247 struct msi_counts *counts = data; 248 const __be32 *p; 249 int req; 250 251 if (dn == counts->requestor) 252 req = counts->request; 253 else { 254 /* We don't know if a driver will try to use MSI or MSI-X, 255 * so we just have to punt and use the larger of the two. */ 256 req = 0; 257 p = of_get_property(dn, "ibm,req#msi", NULL); 258 if (p) 259 req = be32_to_cpup(p); 260 261 p = of_get_property(dn, "ibm,req#msi-x", NULL); 262 if (p) 263 req = max(req, (int)be32_to_cpup(p)); 264 } 265 266 if (req < counts->quota) 267 counts->spare += counts->quota - req; 268 else if (req > counts->quota) 269 counts->over_quota++; 270 271 return NULL; 272 } 273 274 static int msi_quota_for_device(struct pci_dev *dev, int request) 275 { 276 struct device_node *pe_dn; 277 struct msi_counts counts; 278 int total; 279 280 pr_debug("rtas_msi: calc quota for %s, request %d\n", pci_name(dev), 281 request); 282 283 pe_dn = find_pe_total_msi(dev, &total); 284 if (!pe_dn) 285 pe_dn = find_pe_dn(dev, &total); 286 287 if (!pe_dn) { 288 pr_err("rtas_msi: couldn't find PE for %s\n", pci_name(dev)); 289 goto out; 290 } 291 292 pr_debug("rtas_msi: found PE %pOF\n", pe_dn); 293 294 memset(&counts, 0, sizeof(struct msi_counts)); 295 296 /* Work out how many devices we have below this PE */ 297 pci_traverse_device_nodes(pe_dn, count_non_bridge_devices, &counts); 298 299 if (counts.num_devices == 0) { 300 pr_err("rtas_msi: found 0 devices under PE for %s\n", 301 pci_name(dev)); 302 goto out; 303 } 304 305 counts.quota = total / counts.num_devices; 306 if (request <= counts.quota) 307 goto out; 308 309 /* else, we have some more calculating to do */ 310 counts.requestor = pci_device_to_OF_node(dev); 311 counts.request = request; 312 pci_traverse_device_nodes(pe_dn, count_spare_msis, &counts); 313 314 /* If the quota isn't an integer multiple of the total, we can 315 * use the remainder as spare MSIs for anyone that wants them. */ 316 counts.spare += total % counts.num_devices; 317 318 /* Divide any spare by the number of over-quota requestors */ 319 if (counts.over_quota) 320 counts.quota += counts.spare / counts.over_quota; 321 322 /* And finally clamp the request to the possibly adjusted quota */ 323 request = min(counts.quota, request); 324 325 pr_debug("rtas_msi: request clamped to quota %d\n", request); 326 out: 327 of_node_put(pe_dn); 328 329 return request; 330 } 331 332 static int check_msix_entries(struct pci_dev *pdev) 333 { 334 struct msi_desc *entry; 335 int expected; 336 337 /* There's no way for us to express to firmware that we want 338 * a discontiguous, or non-zero based, range of MSI-X entries. 339 * So we must reject such requests. */ 340 341 expected = 0; 342 for_each_pci_msi_entry(entry, pdev) { 343 if (entry->msi_attrib.entry_nr != expected) { 344 pr_debug("rtas_msi: bad MSI-X entries.\n"); 345 return -EINVAL; 346 } 347 expected++; 348 } 349 350 return 0; 351 } 352 353 static void rtas_hack_32bit_msi_gen2(struct pci_dev *pdev) 354 { 355 u32 addr_hi, addr_lo; 356 357 /* 358 * We should only get in here for IODA1 configs. This is based on the 359 * fact that we using RTAS for MSIs, we don't have the 32 bit MSI RTAS 360 * support, and we are in a PCIe Gen2 slot. 361 */ 362 dev_info(&pdev->dev, 363 "rtas_msi: No 32 bit MSI firmware support, forcing 32 bit MSI\n"); 364 pci_read_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, &addr_hi); 365 addr_lo = 0xffff0000 | ((addr_hi >> (48 - 32)) << 4); 366 pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO, addr_lo); 367 pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, 0); 368 } 369 370 static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type) 371 { 372 struct pci_dn *pdn; 373 int hwirq, virq, i, quota, rc; 374 struct msi_desc *entry; 375 struct msi_msg msg; 376 int nvec = nvec_in; 377 int use_32bit_msi_hack = 0; 378 379 if (type == PCI_CAP_ID_MSIX) 380 rc = check_req_msix(pdev, nvec); 381 else 382 rc = check_req_msi(pdev, nvec); 383 384 if (rc) 385 return rc; 386 387 quota = msi_quota_for_device(pdev, nvec); 388 389 if (quota && quota < nvec) 390 return quota; 391 392 if (type == PCI_CAP_ID_MSIX && check_msix_entries(pdev)) 393 return -EINVAL; 394 395 /* 396 * Firmware currently refuse any non power of two allocation 397 * so we round up if the quota will allow it. 398 */ 399 if (type == PCI_CAP_ID_MSIX) { 400 int m = roundup_pow_of_two(nvec); 401 quota = msi_quota_for_device(pdev, m); 402 403 if (quota >= m) 404 nvec = m; 405 } 406 407 pdn = pci_get_pdn(pdev); 408 409 /* 410 * Try the new more explicit firmware interface, if that fails fall 411 * back to the old interface. The old interface is known to never 412 * return MSI-Xs. 413 */ 414 again: 415 if (type == PCI_CAP_ID_MSI) { 416 if (pdev->no_64bit_msi) { 417 rc = rtas_change_msi(pdn, RTAS_CHANGE_32MSI_FN, nvec); 418 if (rc < 0) { 419 /* 420 * We only want to run the 32 bit MSI hack below if 421 * the max bus speed is Gen2 speed 422 */ 423 if (pdev->bus->max_bus_speed != PCIE_SPEED_5_0GT) 424 return rc; 425 426 use_32bit_msi_hack = 1; 427 } 428 } else 429 rc = -1; 430 431 if (rc < 0) 432 rc = rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, nvec); 433 434 if (rc < 0) { 435 pr_debug("rtas_msi: trying the old firmware call.\n"); 436 rc = rtas_change_msi(pdn, RTAS_CHANGE_FN, nvec); 437 } 438 439 if (use_32bit_msi_hack && rc > 0) 440 rtas_hack_32bit_msi_gen2(pdev); 441 } else 442 rc = rtas_change_msi(pdn, RTAS_CHANGE_MSIX_FN, nvec); 443 444 if (rc != nvec) { 445 if (nvec != nvec_in) { 446 nvec = nvec_in; 447 goto again; 448 } 449 pr_debug("rtas_msi: rtas_change_msi() failed\n"); 450 return rc; 451 } 452 453 i = 0; 454 for_each_pci_msi_entry(entry, pdev) { 455 hwirq = rtas_query_irq_number(pdn, i++); 456 if (hwirq < 0) { 457 pr_debug("rtas_msi: error (%d) getting hwirq\n", rc); 458 return hwirq; 459 } 460 461 virq = irq_create_mapping_affinity(NULL, hwirq, 462 entry->affinity); 463 464 if (!virq) { 465 pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq); 466 return -ENOSPC; 467 } 468 469 dev_dbg(&pdev->dev, "rtas_msi: allocated virq %d\n", virq); 470 irq_set_msi_desc(virq, entry); 471 472 /* Read config space back so we can restore after reset */ 473 __pci_read_msi_msg(entry, &msg); 474 entry->msg = msg; 475 } 476 477 return 0; 478 } 479 480 static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev) 481 { 482 /* No LSI -> leave MSIs (if any) configured */ 483 if (!pdev->irq) { 484 dev_dbg(&pdev->dev, "rtas_msi: no LSI, nothing to do.\n"); 485 return; 486 } 487 488 /* No MSI -> MSIs can't have been assigned by fw, leave LSI */ 489 if (check_req_msi(pdev, 1) && check_req_msix(pdev, 1)) { 490 dev_dbg(&pdev->dev, "rtas_msi: no req#msi/x, nothing to do.\n"); 491 return; 492 } 493 494 dev_dbg(&pdev->dev, "rtas_msi: disabling existing MSI.\n"); 495 rtas_disable_msi(pdev); 496 } 497 498 static int rtas_msi_init(void) 499 { 500 struct pci_controller *phb; 501 502 query_token = rtas_token("ibm,query-interrupt-source-number"); 503 change_token = rtas_token("ibm,change-msi"); 504 505 if ((query_token == RTAS_UNKNOWN_SERVICE) || 506 (change_token == RTAS_UNKNOWN_SERVICE)) { 507 pr_debug("rtas_msi: no RTAS tokens, no MSI support.\n"); 508 return -1; 509 } 510 511 pr_debug("rtas_msi: Registering RTAS MSI callbacks.\n"); 512 513 WARN_ON(pseries_pci_controller_ops.setup_msi_irqs); 514 pseries_pci_controller_ops.setup_msi_irqs = rtas_setup_msi_irqs; 515 pseries_pci_controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs; 516 517 list_for_each_entry(phb, &hose_list, list_node) { 518 WARN_ON(phb->controller_ops.setup_msi_irqs); 519 phb->controller_ops.setup_msi_irqs = rtas_setup_msi_irqs; 520 phb->controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs; 521 } 522 523 WARN_ON(ppc_md.pci_irq_fixup); 524 ppc_md.pci_irq_fixup = rtas_msi_pci_irq_fixup; 525 526 return 0; 527 } 528 machine_arch_initcall(pseries, rtas_msi_init); 529