1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2019 Mellanox Technologies. */ 3 4 #include <linux/pci.h> 5 #include <linux/interrupt.h> 6 #include <linux/notifier.h> 7 #include <linux/mlx5/driver.h> 8 #include <linux/mlx5/vport.h> 9 #include "mlx5_core.h" 10 #include "mlx5_irq.h" 11 #include "pci_irq.h" 12 #include "lib/sf.h" 13 #include "lib/eq.h" 14 #ifdef CONFIG_RFS_ACCEL 15 #include <linux/cpu_rmap.h> 16 #endif 17 18 #define MLX5_SFS_PER_CTRL_IRQ 64 19 #define MLX5_IRQ_CTRL_SF_MAX 8 20 /* min num of vectors for SFs to be enabled */ 21 #define MLX5_IRQ_VEC_COMP_BASE_SF 2 22 23 #define MLX5_EQ_SHARE_IRQ_MAX_COMP (8) 24 #define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX) 25 #define MLX5_EQ_SHARE_IRQ_MIN_COMP (1) 26 #define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4) 27 28 struct mlx5_irq { 29 struct atomic_notifier_head nh; 30 cpumask_var_t mask; 31 char name[MLX5_MAX_IRQ_NAME]; 32 struct mlx5_irq_pool *pool; 33 int refcount; 34 struct msi_map map; 35 }; 36 37 struct mlx5_irq_table { 38 struct mlx5_irq_pool *pcif_pool; 39 struct mlx5_irq_pool *sf_ctrl_pool; 40 struct mlx5_irq_pool *sf_comp_pool; 41 }; 42 43 /** 44 * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors 45 * to be ssigned to each VF. 46 * @dev: PF to work on 47 * @num_vfs: Number of enabled VFs 48 */ 49 int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs) 50 { 51 int num_vf_msix, min_msix, max_msix; 52 53 num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); 54 if (!num_vf_msix) 55 return 0; 56 57 min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size); 58 max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size); 59 60 /* Limit maximum number of MSI-X vectors so the default configuration 61 * has some available in the pool. This will allow the user to increase 62 * the number of vectors in a VF without having to first size-down other 63 * VFs. 64 */ 65 return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix); 66 } 67 68 /** 69 * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF 70 * @dev: PF to work on 71 * @function_id: Internal PCI VF function IDd 72 * @msix_vec_count: Number of MSI-X vectors to set 73 */ 74 int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, 75 int msix_vec_count) 76 { 77 int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); 78 int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); 79 void *hca_cap = NULL, *query_cap = NULL, *cap; 80 int num_vf_msix, min_msix, max_msix; 81 int ret; 82 83 num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); 84 if (!num_vf_msix) 85 return 0; 86 87 if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev)) 88 return -EOPNOTSUPP; 89 90 min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size); 91 max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size); 92 93 if (msix_vec_count < min_msix) 94 return -EINVAL; 95 96 if (msix_vec_count > max_msix) 97 return -EOVERFLOW; 98 99 query_cap = kvzalloc(query_sz, GFP_KERNEL); 100 hca_cap = kvzalloc(set_sz, GFP_KERNEL); 101 if (!hca_cap || !query_cap) { 102 ret = -ENOMEM; 103 goto out; 104 } 105 106 ret = mlx5_vport_get_other_func_general_cap(dev, function_id, query_cap); 107 if (ret) 108 goto out; 109 110 cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability); 111 memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability), 112 MLX5_UN_SZ_BYTES(hca_cap_union)); 113 MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count); 114 115 MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP); 116 MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1); 117 MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id); 118 119 MLX5_SET(set_hca_cap_in, hca_cap, op_mod, 120 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1); 121 ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap); 122 out: 123 kvfree(hca_cap); 124 kvfree(query_cap); 125 return ret; 126 } 127 128 static void irq_release(struct mlx5_irq *irq) 129 { 130 struct mlx5_irq_pool *pool = irq->pool; 131 #ifdef CONFIG_RFS_ACCEL 132 struct cpu_rmap *rmap; 133 #endif 134 135 xa_erase(&pool->irqs, irq->map.index); 136 /* free_irq requires that affinity_hint and rmap will be cleared before 137 * calling it. To satisfy this requirement, we call 138 * irq_cpu_rmap_remove() to remove the notifier 139 */ 140 irq_update_affinity_hint(irq->map.virq, NULL); 141 #ifdef CONFIG_RFS_ACCEL 142 rmap = mlx5_eq_table_get_rmap(pool->dev); 143 if (rmap && irq->map.index) 144 irq_cpu_rmap_remove(rmap, irq->map.virq); 145 #endif 146 147 free_cpumask_var(irq->mask); 148 free_irq(irq->map.virq, &irq->nh); 149 if (irq->map.index && pci_msix_can_alloc_dyn(pool->dev->pdev)) 150 pci_msix_free_irq(pool->dev->pdev, irq->map); 151 kfree(irq); 152 } 153 154 int mlx5_irq_put(struct mlx5_irq *irq) 155 { 156 struct mlx5_irq_pool *pool = irq->pool; 157 int ret = 0; 158 159 mutex_lock(&pool->lock); 160 irq->refcount--; 161 if (!irq->refcount) { 162 irq_release(irq); 163 ret = 1; 164 } 165 mutex_unlock(&pool->lock); 166 return ret; 167 } 168 169 int mlx5_irq_read_locked(struct mlx5_irq *irq) 170 { 171 lockdep_assert_held(&irq->pool->lock); 172 return irq->refcount; 173 } 174 175 int mlx5_irq_get_locked(struct mlx5_irq *irq) 176 { 177 lockdep_assert_held(&irq->pool->lock); 178 if (WARN_ON_ONCE(!irq->refcount)) 179 return 0; 180 irq->refcount++; 181 return 1; 182 } 183 184 static int irq_get(struct mlx5_irq *irq) 185 { 186 int err; 187 188 mutex_lock(&irq->pool->lock); 189 err = mlx5_irq_get_locked(irq); 190 mutex_unlock(&irq->pool->lock); 191 return err; 192 } 193 194 static irqreturn_t irq_int_handler(int irq, void *nh) 195 { 196 atomic_notifier_call_chain(nh, 0, NULL); 197 return IRQ_HANDLED; 198 } 199 200 static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) 201 { 202 snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx); 203 } 204 205 static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) 206 { 207 if (!pool->xa_num_irqs.max) { 208 /* in case we only have a single irq for the device */ 209 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx); 210 return; 211 } 212 213 if (!vecidx) { 214 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx); 215 return; 216 } 217 218 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx); 219 } 220 221 struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, 222 struct irq_affinity_desc *af_desc, 223 struct cpu_rmap **rmap) 224 { 225 struct mlx5_core_dev *dev = pool->dev; 226 char name[MLX5_MAX_IRQ_NAME]; 227 struct mlx5_irq *irq; 228 int err; 229 230 irq = kzalloc(sizeof(*irq), GFP_KERNEL); 231 if (!irq) 232 return ERR_PTR(-ENOMEM); 233 if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) { 234 /* The vector at index 0 was already allocated. 235 * Just get the irq number. If dynamic irq is not supported 236 * vectors have also been allocated. 237 */ 238 irq->map.virq = pci_irq_vector(dev->pdev, i); 239 irq->map.index = 0; 240 } else { 241 irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc); 242 if (!irq->map.virq) { 243 err = irq->map.index; 244 goto err_alloc_irq; 245 } 246 } 247 248 if (i && rmap && *rmap) { 249 #ifdef CONFIG_RFS_ACCEL 250 err = irq_cpu_rmap_add(*rmap, irq->map.virq); 251 if (err) 252 goto err_irq_rmap; 253 #endif 254 } 255 if (!mlx5_irq_pool_is_sf_pool(pool)) 256 irq_set_name(pool, name, i); 257 else 258 irq_sf_set_name(pool, name, i); 259 ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh); 260 snprintf(irq->name, MLX5_MAX_IRQ_NAME, 261 "%s@pci:%s", name, pci_name(dev->pdev)); 262 err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name, 263 &irq->nh); 264 if (err) { 265 mlx5_core_err(dev, "Failed to request irq. err = %d\n", err); 266 goto err_req_irq; 267 } 268 if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) { 269 mlx5_core_warn(dev, "zalloc_cpumask_var failed\n"); 270 err = -ENOMEM; 271 goto err_cpumask; 272 } 273 if (af_desc) { 274 cpumask_copy(irq->mask, &af_desc->mask); 275 irq_set_affinity_and_hint(irq->map.virq, irq->mask); 276 } 277 irq->pool = pool; 278 irq->refcount = 1; 279 irq->map.index = i; 280 err = xa_err(xa_store(&pool->irqs, irq->map.index, irq, GFP_KERNEL)); 281 if (err) { 282 mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n", 283 irq->map.index, err); 284 goto err_xa; 285 } 286 return irq; 287 err_xa: 288 if (af_desc) 289 irq_update_affinity_hint(irq->map.virq, NULL); 290 free_cpumask_var(irq->mask); 291 err_cpumask: 292 free_irq(irq->map.virq, &irq->nh); 293 err_req_irq: 294 #ifdef CONFIG_RFS_ACCEL 295 if (i && rmap && *rmap) { 296 free_irq_cpu_rmap(*rmap); 297 *rmap = NULL; 298 } 299 err_irq_rmap: 300 #endif 301 if (i && pci_msix_can_alloc_dyn(dev->pdev)) 302 pci_msix_free_irq(dev->pdev, irq->map); 303 err_alloc_irq: 304 kfree(irq); 305 return ERR_PTR(err); 306 } 307 308 int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb) 309 { 310 int ret; 311 312 ret = irq_get(irq); 313 if (!ret) 314 /* Something very bad happens here, we are enabling EQ 315 * on non-existing IRQ. 316 */ 317 return -ENOENT; 318 ret = atomic_notifier_chain_register(&irq->nh, nb); 319 if (ret) 320 mlx5_irq_put(irq); 321 return ret; 322 } 323 324 int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb) 325 { 326 int err = 0; 327 328 err = atomic_notifier_chain_unregister(&irq->nh, nb); 329 mlx5_irq_put(irq); 330 return err; 331 } 332 333 struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq) 334 { 335 return irq->mask; 336 } 337 338 int mlx5_irq_get_index(struct mlx5_irq *irq) 339 { 340 return irq->map.index; 341 } 342 343 /* irq_pool API */ 344 345 /* requesting an irq from a given pool according to given index */ 346 static struct mlx5_irq * 347 irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx, 348 struct irq_affinity_desc *af_desc, 349 struct cpu_rmap **rmap) 350 { 351 struct mlx5_irq *irq; 352 353 mutex_lock(&pool->lock); 354 irq = xa_load(&pool->irqs, vecidx); 355 if (irq) { 356 mlx5_irq_get_locked(irq); 357 goto unlock; 358 } 359 irq = mlx5_irq_alloc(pool, vecidx, af_desc, rmap); 360 unlock: 361 mutex_unlock(&pool->lock); 362 return irq; 363 } 364 365 static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table) 366 { 367 return irq_table->sf_ctrl_pool; 368 } 369 370 static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table) 371 { 372 return irq_table->sf_comp_pool; 373 } 374 375 struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev) 376 { 377 struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); 378 struct mlx5_irq_pool *pool = NULL; 379 380 if (mlx5_core_is_sf(dev)) 381 pool = sf_irq_pool_get(irq_table); 382 383 /* In some configs, there won't be a pool of SFs IRQs. Hence, returning 384 * the PF IRQs pool in case the SF pool doesn't exist. 385 */ 386 return pool ? pool : irq_table->pcif_pool; 387 } 388 389 static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev) 390 { 391 struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); 392 struct mlx5_irq_pool *pool = NULL; 393 394 if (mlx5_core_is_sf(dev)) 395 pool = sf_ctrl_irq_pool_get(irq_table); 396 397 /* In some configs, there won't be a pool of SFs IRQs. Hence, returning 398 * the PF IRQs pool in case the SF pool doesn't exist. 399 */ 400 return pool ? pool : irq_table->pcif_pool; 401 } 402 403 /** 404 * mlx5_irqs_release - release one or more IRQs back to the system. 405 * @irqs: IRQs to be released. 406 * @nirqs: number of IRQs to be released. 407 */ 408 static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs) 409 { 410 int i; 411 412 for (i = 0; i < nirqs; i++) { 413 synchronize_irq(irqs[i]->map.virq); 414 mlx5_irq_put(irqs[i]); 415 } 416 } 417 418 /** 419 * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system. 420 * @ctrl_irq: ctrl IRQ to be released. 421 */ 422 void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq) 423 { 424 mlx5_irqs_release(&ctrl_irq, 1); 425 } 426 427 /** 428 * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device. 429 * @dev: mlx5 device that requesting the IRQ. 430 * 431 * This function returns a pointer to IRQ, or ERR_PTR in case of error. 432 */ 433 struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev) 434 { 435 struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev); 436 struct irq_affinity_desc af_desc; 437 struct mlx5_irq *irq; 438 439 cpumask_copy(&af_desc.mask, cpu_online_mask); 440 af_desc.is_managed = false; 441 if (!mlx5_irq_pool_is_sf_pool(pool)) { 442 /* In case we are allocating a control IRQ from a pci device's pool. 443 * This can happen also for a SF if the SFs pool is empty. 444 */ 445 if (!pool->xa_num_irqs.max) { 446 cpumask_clear(&af_desc.mask); 447 /* In case we only have a single IRQ for PF/VF */ 448 cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc.mask); 449 } 450 /* Allocate the IRQ in index 0. The vector was already allocated */ 451 irq = irq_pool_request_vector(pool, 0, &af_desc, NULL); 452 } else { 453 irq = mlx5_irq_affinity_request(pool, &af_desc); 454 } 455 456 return irq; 457 } 458 459 /** 460 * mlx5_irq_request - request an IRQ for mlx5 PF/VF device. 461 * @dev: mlx5 device that requesting the IRQ. 462 * @vecidx: vector index of the IRQ. This argument is ignore if affinity is 463 * provided. 464 * @af_desc: affinity descriptor for this IRQ. 465 * @rmap: pointer to reverse map pointer for completion interrupts 466 * 467 * This function returns a pointer to IRQ, or ERR_PTR in case of error. 468 */ 469 struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, 470 struct irq_affinity_desc *af_desc, 471 struct cpu_rmap **rmap) 472 { 473 struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); 474 struct mlx5_irq_pool *pool; 475 struct mlx5_irq *irq; 476 477 pool = irq_table->pcif_pool; 478 irq = irq_pool_request_vector(pool, vecidx, af_desc, rmap); 479 if (IS_ERR(irq)) 480 return irq; 481 mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n", 482 irq->map.virq, cpumask_pr_args(&af_desc->mask), 483 irq->refcount / MLX5_EQ_REFS_PER_IRQ); 484 return irq; 485 } 486 487 /** 488 * mlx5_msix_alloc - allocate msix interrupt 489 * @dev: mlx5 device from which to request 490 * @handler: interrupt handler 491 * @affdesc: affinity descriptor 492 * @name: interrupt name 493 * 494 * Returns: struct msi_map with result encoded. 495 * Note: the caller must make sure to release the irq by calling 496 * mlx5_msix_free() if shutdown was initiated. 497 */ 498 struct msi_map mlx5_msix_alloc(struct mlx5_core_dev *dev, 499 irqreturn_t (*handler)(int, void *), 500 const struct irq_affinity_desc *affdesc, 501 const char *name) 502 { 503 struct msi_map map; 504 int err; 505 506 if (!dev->pdev) { 507 map.virq = 0; 508 map.index = -EINVAL; 509 return map; 510 } 511 512 map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, affdesc); 513 if (!map.virq) 514 return map; 515 516 err = request_irq(map.virq, handler, 0, name, NULL); 517 if (err) { 518 mlx5_core_warn(dev, "err %d\n", err); 519 pci_msix_free_irq(dev->pdev, map); 520 map.virq = 0; 521 map.index = -ENOMEM; 522 } 523 return map; 524 } 525 EXPORT_SYMBOL(mlx5_msix_alloc); 526 527 /** 528 * mlx5_msix_free - free a previously allocated msix interrupt 529 * @dev: mlx5 device associated with interrupt 530 * @map: map previously returned by mlx5_msix_alloc() 531 */ 532 void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map) 533 { 534 free_irq(map.virq, NULL); 535 pci_msix_free_irq(dev->pdev, map); 536 } 537 EXPORT_SYMBOL(mlx5_msix_free); 538 539 /** 540 * mlx5_irqs_release_vectors - release one or more IRQs back to the system. 541 * @irqs: IRQs to be released. 542 * @nirqs: number of IRQs to be released. 543 */ 544 void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs) 545 { 546 mlx5_irqs_release(irqs, nirqs); 547 } 548 549 /** 550 * mlx5_irqs_request_vectors - request one or more IRQs for mlx5 device. 551 * @dev: mlx5 device that is requesting the IRQs. 552 * @cpus: CPUs array for binding the IRQs 553 * @nirqs: number of IRQs to request. 554 * @irqs: an output array of IRQs pointers. 555 * @rmap: pointer to reverse map pointer for completion interrupts 556 * 557 * Each IRQ is bound to at most 1 CPU. 558 * This function is requests nirqs IRQs, starting from @vecidx. 559 * 560 * This function returns the number of IRQs requested, (which might be smaller than 561 * @nirqs), if successful, or a negative error code in case of an error. 562 */ 563 int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, 564 struct mlx5_irq **irqs, struct cpu_rmap **rmap) 565 { 566 struct irq_affinity_desc af_desc; 567 struct mlx5_irq *irq; 568 int i; 569 570 af_desc.is_managed = 1; 571 for (i = 0; i < nirqs; i++) { 572 cpumask_set_cpu(cpus[i], &af_desc.mask); 573 irq = mlx5_irq_request(dev, i + 1, &af_desc, rmap); 574 if (IS_ERR(irq)) 575 break; 576 cpumask_clear(&af_desc.mask); 577 irqs[i] = irq; 578 } 579 580 return i ? i : PTR_ERR(irq); 581 } 582 583 static struct mlx5_irq_pool * 584 irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name, 585 u32 min_threshold, u32 max_threshold) 586 { 587 struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL); 588 589 if (!pool) 590 return ERR_PTR(-ENOMEM); 591 pool->dev = dev; 592 mutex_init(&pool->lock); 593 xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC); 594 pool->xa_num_irqs.min = start; 595 pool->xa_num_irqs.max = start + size - 1; 596 if (name) 597 snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS, 598 "%s", name); 599 pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ; 600 pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ; 601 mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d", 602 name, size, start); 603 return pool; 604 } 605 606 static void irq_pool_free(struct mlx5_irq_pool *pool) 607 { 608 struct mlx5_irq *irq; 609 unsigned long index; 610 611 /* There are cases in which we are destrying the irq_table before 612 * freeing all the IRQs, fast teardown for example. Hence, free the irqs 613 * which might not have been freed. 614 */ 615 xa_for_each(&pool->irqs, index, irq) 616 irq_release(irq); 617 xa_destroy(&pool->irqs); 618 mutex_destroy(&pool->lock); 619 kfree(pool->irqs_per_cpu); 620 kvfree(pool); 621 } 622 623 static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec) 624 { 625 struct mlx5_irq_table *table = dev->priv.irq_table; 626 int num_sf_ctrl_by_msix; 627 int num_sf_ctrl_by_sfs; 628 int num_sf_ctrl; 629 int err; 630 631 /* init pcif_pool */ 632 table->pcif_pool = irq_pool_alloc(dev, 0, pcif_vec, NULL, 633 MLX5_EQ_SHARE_IRQ_MIN_COMP, 634 MLX5_EQ_SHARE_IRQ_MAX_COMP); 635 if (IS_ERR(table->pcif_pool)) 636 return PTR_ERR(table->pcif_pool); 637 if (!mlx5_sf_max_functions(dev)) 638 return 0; 639 if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) { 640 mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n"); 641 return 0; 642 } 643 644 /* init sf_ctrl_pool */ 645 num_sf_ctrl_by_msix = DIV_ROUND_UP(sf_vec, MLX5_COMP_EQS_PER_SF); 646 num_sf_ctrl_by_sfs = DIV_ROUND_UP(mlx5_sf_max_functions(dev), 647 MLX5_SFS_PER_CTRL_IRQ); 648 num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs); 649 num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl); 650 table->sf_ctrl_pool = irq_pool_alloc(dev, pcif_vec, num_sf_ctrl, 651 "mlx5_sf_ctrl", 652 MLX5_EQ_SHARE_IRQ_MIN_CTRL, 653 MLX5_EQ_SHARE_IRQ_MAX_CTRL); 654 if (IS_ERR(table->sf_ctrl_pool)) { 655 err = PTR_ERR(table->sf_ctrl_pool); 656 goto err_pf; 657 } 658 /* init sf_comp_pool */ 659 table->sf_comp_pool = irq_pool_alloc(dev, pcif_vec + num_sf_ctrl, 660 sf_vec - num_sf_ctrl, "mlx5_sf_comp", 661 MLX5_EQ_SHARE_IRQ_MIN_COMP, 662 MLX5_EQ_SHARE_IRQ_MAX_COMP); 663 if (IS_ERR(table->sf_comp_pool)) { 664 err = PTR_ERR(table->sf_comp_pool); 665 goto err_sf_ctrl; 666 } 667 668 table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL); 669 if (!table->sf_comp_pool->irqs_per_cpu) { 670 err = -ENOMEM; 671 goto err_irqs_per_cpu; 672 } 673 674 return 0; 675 676 err_irqs_per_cpu: 677 irq_pool_free(table->sf_comp_pool); 678 err_sf_ctrl: 679 irq_pool_free(table->sf_ctrl_pool); 680 err_pf: 681 irq_pool_free(table->pcif_pool); 682 return err; 683 } 684 685 static void irq_pools_destroy(struct mlx5_irq_table *table) 686 { 687 if (table->sf_ctrl_pool) { 688 irq_pool_free(table->sf_comp_pool); 689 irq_pool_free(table->sf_ctrl_pool); 690 } 691 irq_pool_free(table->pcif_pool); 692 } 693 694 /* irq_table API */ 695 696 int mlx5_irq_table_init(struct mlx5_core_dev *dev) 697 { 698 struct mlx5_irq_table *irq_table; 699 700 if (mlx5_core_is_sf(dev)) 701 return 0; 702 703 irq_table = kvzalloc_node(sizeof(*irq_table), GFP_KERNEL, 704 dev->priv.numa_node); 705 if (!irq_table) 706 return -ENOMEM; 707 708 dev->priv.irq_table = irq_table; 709 return 0; 710 } 711 712 void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) 713 { 714 if (mlx5_core_is_sf(dev)) 715 return; 716 717 kvfree(dev->priv.irq_table); 718 } 719 720 int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table) 721 { 722 if (!table->pcif_pool->xa_num_irqs.max) 723 return 1; 724 return table->pcif_pool->xa_num_irqs.max - table->pcif_pool->xa_num_irqs.min; 725 } 726 727 int mlx5_irq_table_create(struct mlx5_core_dev *dev) 728 { 729 int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? 730 MLX5_CAP_GEN(dev, max_num_eqs) : 731 1 << MLX5_CAP_GEN(dev, log_max_eq); 732 int total_vec; 733 int pcif_vec; 734 int req_vec; 735 int err; 736 int n; 737 738 if (mlx5_core_is_sf(dev)) 739 return 0; 740 741 pcif_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1; 742 pcif_vec = min_t(int, pcif_vec, num_eqs); 743 744 total_vec = pcif_vec; 745 if (mlx5_sf_max_functions(dev)) 746 total_vec += MLX5_IRQ_CTRL_SF_MAX + 747 MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev); 748 total_vec = min_t(int, total_vec, pci_msix_vec_count(dev->pdev)); 749 pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev)); 750 751 req_vec = pci_msix_can_alloc_dyn(dev->pdev) ? 1 : total_vec; 752 n = pci_alloc_irq_vectors(dev->pdev, 1, req_vec, PCI_IRQ_MSIX); 753 if (n < 0) 754 return n; 755 756 err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec); 757 if (err) 758 pci_free_irq_vectors(dev->pdev); 759 760 return err; 761 } 762 763 void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) 764 { 765 struct mlx5_irq_table *table = dev->priv.irq_table; 766 767 if (mlx5_core_is_sf(dev)) 768 return; 769 770 /* There are cases where IRQs still will be in used when we reaching 771 * to here. Hence, making sure all the irqs are released. 772 */ 773 irq_pools_destroy(table); 774 pci_free_irq_vectors(dev->pdev); 775 } 776 777 int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table) 778 { 779 if (table->sf_comp_pool) 780 return min_t(int, num_online_cpus(), 781 table->sf_comp_pool->xa_num_irqs.max - 782 table->sf_comp_pool->xa_num_irqs.min + 1); 783 else 784 return mlx5_irq_table_get_num_comp(table); 785 } 786 787 struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev) 788 { 789 #ifdef CONFIG_MLX5_SF 790 if (mlx5_core_is_sf(dev)) 791 return dev->priv.parent_mdev->priv.irq_table; 792 #endif 793 return dev->priv.irq_table; 794 } 795