1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <linux/pci.h>
5 #include <linux/interrupt.h>
6 #include <linux/notifier.h>
7 #include <linux/mlx5/driver.h>
8 #include <linux/mlx5/vport.h>
9 #include "mlx5_core.h"
10 #include "mlx5_irq.h"
11 #include "pci_irq.h"
12 #include "lib/sf.h"
13 #include "lib/eq.h"
14 #ifdef CONFIG_RFS_ACCEL
15 #include <linux/cpu_rmap.h>
16 #endif
17 
18 #define MLX5_SFS_PER_CTRL_IRQ 64
19 #define MLX5_IRQ_CTRL_SF_MAX 8
20 /* min num of vectors for SFs to be enabled */
21 #define MLX5_IRQ_VEC_COMP_BASE_SF 2
22 
23 #define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
24 #define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
25 #define MLX5_EQ_SHARE_IRQ_MIN_COMP (1)
26 #define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4)
27 
28 struct mlx5_irq {
29 	struct atomic_notifier_head nh;
30 	cpumask_var_t mask;
31 	char name[MLX5_MAX_IRQ_NAME];
32 	struct mlx5_irq_pool *pool;
33 	int refcount;
34 	struct msi_map map;
35 	u32 pool_index;
36 };
37 
38 struct mlx5_irq_table {
39 	struct mlx5_irq_pool *pcif_pool;
40 	struct mlx5_irq_pool *sf_ctrl_pool;
41 	struct mlx5_irq_pool *sf_comp_pool;
42 };
43 
44 static int mlx5_core_func_to_vport(const struct mlx5_core_dev *dev,
45 				   int func,
46 				   bool ec_vf_func)
47 {
48 	if (!ec_vf_func)
49 		return func;
50 	return mlx5_core_ec_vf_vport_base(dev) + func - 1;
51 }
52 
53 /**
54  * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors
55  *                                   to be ssigned to each VF.
56  * @dev: PF to work on
57  * @num_vfs: Number of enabled VFs
58  */
59 int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs)
60 {
61 	int num_vf_msix, min_msix, max_msix;
62 
63 	num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
64 	if (!num_vf_msix)
65 		return 0;
66 
67 	min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
68 	max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
69 
70 	/* Limit maximum number of MSI-X vectors so the default configuration
71 	 * has some available in the pool. This will allow the user to increase
72 	 * the number of vectors in a VF without having to first size-down other
73 	 * VFs.
74 	 */
75 	return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix);
76 }
77 
78 /**
79  * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF
80  * @dev: PF to work on
81  * @function_id: Internal PCI VF function IDd
82  * @msix_vec_count: Number of MSI-X vectors to set
83  */
84 int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
85 			    int msix_vec_count)
86 {
87 	int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
88 	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
89 	void *hca_cap = NULL, *query_cap = NULL, *cap;
90 	int num_vf_msix, min_msix, max_msix;
91 	bool ec_vf_function;
92 	int vport;
93 	int ret;
94 
95 	num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
96 	if (!num_vf_msix)
97 		return 0;
98 
99 	if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev))
100 		return -EOPNOTSUPP;
101 
102 	min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
103 	max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
104 
105 	if (msix_vec_count < min_msix)
106 		return -EINVAL;
107 
108 	if (msix_vec_count > max_msix)
109 		return -EOVERFLOW;
110 
111 	query_cap = kvzalloc(query_sz, GFP_KERNEL);
112 	hca_cap = kvzalloc(set_sz, GFP_KERNEL);
113 	if (!hca_cap || !query_cap) {
114 		ret = -ENOMEM;
115 		goto out;
116 	}
117 
118 	ec_vf_function = mlx5_core_ec_sriov_enabled(dev);
119 	vport = mlx5_core_func_to_vport(dev, function_id, ec_vf_function);
120 	ret = mlx5_vport_get_other_func_general_cap(dev, vport, query_cap);
121 	if (ret)
122 		goto out;
123 
124 	cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability);
125 	memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability),
126 	       MLX5_UN_SZ_BYTES(hca_cap_union));
127 	MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count);
128 
129 	MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP);
130 	MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1);
131 	MLX5_SET(set_hca_cap_in, hca_cap, ec_vf_function, ec_vf_function);
132 	MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id);
133 
134 	MLX5_SET(set_hca_cap_in, hca_cap, op_mod,
135 		 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
136 	ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
137 out:
138 	kvfree(hca_cap);
139 	kvfree(query_cap);
140 	return ret;
141 }
142 
143 /* mlx5_system_free_irq - Free an IRQ
144  * @irq: IRQ to free
145  *
146  * Free the IRQ and other resources such as rmap from the system.
147  * BUT doesn't free or remove reference from mlx5.
148  * This function is very important for the shutdown flow, where we need to
149  * cleanup system resoruces but keep mlx5 objects alive,
150  * see mlx5_irq_table_free_irqs().
151  */
152 static void mlx5_system_free_irq(struct mlx5_irq *irq)
153 {
154 	struct mlx5_irq_pool *pool = irq->pool;
155 #ifdef CONFIG_RFS_ACCEL
156 	struct cpu_rmap *rmap;
157 #endif
158 
159 	/* free_irq requires that affinity_hint and rmap will be cleared before
160 	 * calling it. To satisfy this requirement, we call
161 	 * irq_cpu_rmap_remove() to remove the notifier
162 	 */
163 	irq_update_affinity_hint(irq->map.virq, NULL);
164 #ifdef CONFIG_RFS_ACCEL
165 	rmap = mlx5_eq_table_get_rmap(pool->dev);
166 	if (rmap)
167 		irq_cpu_rmap_remove(rmap, irq->map.virq);
168 #endif
169 
170 	free_irq(irq->map.virq, &irq->nh);
171 	if (irq->map.index && pci_msix_can_alloc_dyn(pool->dev->pdev))
172 		pci_msix_free_irq(pool->dev->pdev, irq->map);
173 }
174 
175 static void irq_release(struct mlx5_irq *irq)
176 {
177 	struct mlx5_irq_pool *pool = irq->pool;
178 
179 	xa_erase(&pool->irqs, irq->pool_index);
180 	mlx5_system_free_irq(irq);
181 	free_cpumask_var(irq->mask);
182 	kfree(irq);
183 }
184 
185 int mlx5_irq_put(struct mlx5_irq *irq)
186 {
187 	struct mlx5_irq_pool *pool = irq->pool;
188 	int ret = 0;
189 
190 	mutex_lock(&pool->lock);
191 	irq->refcount--;
192 	if (!irq->refcount) {
193 		irq_release(irq);
194 		ret = 1;
195 	}
196 	mutex_unlock(&pool->lock);
197 	return ret;
198 }
199 
200 int mlx5_irq_read_locked(struct mlx5_irq *irq)
201 {
202 	lockdep_assert_held(&irq->pool->lock);
203 	return irq->refcount;
204 }
205 
206 int mlx5_irq_get_locked(struct mlx5_irq *irq)
207 {
208 	lockdep_assert_held(&irq->pool->lock);
209 	if (WARN_ON_ONCE(!irq->refcount))
210 		return 0;
211 	irq->refcount++;
212 	return 1;
213 }
214 
215 static int irq_get(struct mlx5_irq *irq)
216 {
217 	int err;
218 
219 	mutex_lock(&irq->pool->lock);
220 	err = mlx5_irq_get_locked(irq);
221 	mutex_unlock(&irq->pool->lock);
222 	return err;
223 }
224 
225 static irqreturn_t irq_int_handler(int irq, void *nh)
226 {
227 	atomic_notifier_call_chain(nh, 0, NULL);
228 	return IRQ_HANDLED;
229 }
230 
231 static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
232 {
233 	snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx);
234 }
235 
236 static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
237 {
238 	if (!pool->xa_num_irqs.max) {
239 		/* in case we only have a single irq for the device */
240 		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx);
241 		return;
242 	}
243 
244 	if (!vecidx) {
245 		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx);
246 		return;
247 	}
248 
249 	snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
250 }
251 
252 struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
253 				struct irq_affinity_desc *af_desc,
254 				struct cpu_rmap **rmap)
255 {
256 	struct mlx5_core_dev *dev = pool->dev;
257 	char name[MLX5_MAX_IRQ_NAME];
258 	struct mlx5_irq *irq;
259 	int err;
260 
261 	irq = kzalloc(sizeof(*irq), GFP_KERNEL);
262 	if (!irq)
263 		return ERR_PTR(-ENOMEM);
264 	if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) {
265 		/* The vector at index 0 is always statically allocated. If
266 		 * dynamic irq is not supported all vectors are statically
267 		 * allocated. In both cases just get the irq number and set
268 		 * the index.
269 		 */
270 		irq->map.virq = pci_irq_vector(dev->pdev, i);
271 		irq->map.index = i;
272 	} else {
273 		irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc);
274 		if (!irq->map.virq) {
275 			err = irq->map.index;
276 			goto err_alloc_irq;
277 		}
278 	}
279 
280 	if (i && rmap && *rmap) {
281 #ifdef CONFIG_RFS_ACCEL
282 		err = irq_cpu_rmap_add(*rmap, irq->map.virq);
283 		if (err)
284 			goto err_irq_rmap;
285 #endif
286 	}
287 	if (!mlx5_irq_pool_is_sf_pool(pool))
288 		irq_set_name(pool, name, i);
289 	else
290 		irq_sf_set_name(pool, name, i);
291 	ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
292 	snprintf(irq->name, MLX5_MAX_IRQ_NAME,
293 		 "%s@pci:%s", name, pci_name(dev->pdev));
294 	err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name,
295 			  &irq->nh);
296 	if (err) {
297 		mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
298 		goto err_req_irq;
299 	}
300 	if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
301 		mlx5_core_warn(dev, "zalloc_cpumask_var failed\n");
302 		err = -ENOMEM;
303 		goto err_cpumask;
304 	}
305 	if (af_desc) {
306 		cpumask_copy(irq->mask, &af_desc->mask);
307 		irq_set_affinity_and_hint(irq->map.virq, irq->mask);
308 	}
309 	irq->pool = pool;
310 	irq->refcount = 1;
311 	irq->pool_index = i;
312 	err = xa_err(xa_store(&pool->irqs, irq->pool_index, irq, GFP_KERNEL));
313 	if (err) {
314 		mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
315 			      irq->pool_index, err);
316 		goto err_xa;
317 	}
318 	return irq;
319 err_xa:
320 	if (af_desc)
321 		irq_update_affinity_hint(irq->map.virq, NULL);
322 	free_cpumask_var(irq->mask);
323 err_cpumask:
324 	free_irq(irq->map.virq, &irq->nh);
325 err_req_irq:
326 #ifdef CONFIG_RFS_ACCEL
327 	if (i && rmap && *rmap) {
328 		free_irq_cpu_rmap(*rmap);
329 		*rmap = NULL;
330 	}
331 err_irq_rmap:
332 #endif
333 	if (i && pci_msix_can_alloc_dyn(dev->pdev))
334 		pci_msix_free_irq(dev->pdev, irq->map);
335 err_alloc_irq:
336 	kfree(irq);
337 	return ERR_PTR(err);
338 }
339 
340 int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
341 {
342 	int ret;
343 
344 	ret = irq_get(irq);
345 	if (!ret)
346 		/* Something very bad happens here, we are enabling EQ
347 		 * on non-existing IRQ.
348 		 */
349 		return -ENOENT;
350 	ret = atomic_notifier_chain_register(&irq->nh, nb);
351 	if (ret)
352 		mlx5_irq_put(irq);
353 	return ret;
354 }
355 
356 int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
357 {
358 	int err = 0;
359 
360 	err = atomic_notifier_chain_unregister(&irq->nh, nb);
361 	mlx5_irq_put(irq);
362 	return err;
363 }
364 
365 struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
366 {
367 	return irq->mask;
368 }
369 
370 int mlx5_irq_get_index(struct mlx5_irq *irq)
371 {
372 	return irq->map.index;
373 }
374 
375 /* irq_pool API */
376 
377 /* requesting an irq from a given pool according to given index */
378 static struct mlx5_irq *
379 irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
380 			struct irq_affinity_desc *af_desc,
381 			struct cpu_rmap **rmap)
382 {
383 	struct mlx5_irq *irq;
384 
385 	mutex_lock(&pool->lock);
386 	irq = xa_load(&pool->irqs, vecidx);
387 	if (irq) {
388 		mlx5_irq_get_locked(irq);
389 		goto unlock;
390 	}
391 	irq = mlx5_irq_alloc(pool, vecidx, af_desc, rmap);
392 unlock:
393 	mutex_unlock(&pool->lock);
394 	return irq;
395 }
396 
397 static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table)
398 {
399 	return irq_table->sf_ctrl_pool;
400 }
401 
402 static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table)
403 {
404 	return irq_table->sf_comp_pool;
405 }
406 
407 struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev)
408 {
409 	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
410 	struct mlx5_irq_pool *pool = NULL;
411 
412 	if (mlx5_core_is_sf(dev))
413 		pool = sf_irq_pool_get(irq_table);
414 
415 	/* In some configs, there won't be a pool of SFs IRQs. Hence, returning
416 	 * the PF IRQs pool in case the SF pool doesn't exist.
417 	 */
418 	return pool ? pool : irq_table->pcif_pool;
419 }
420 
421 static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
422 {
423 	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
424 	struct mlx5_irq_pool *pool = NULL;
425 
426 	if (mlx5_core_is_sf(dev))
427 		pool = sf_ctrl_irq_pool_get(irq_table);
428 
429 	/* In some configs, there won't be a pool of SFs IRQs. Hence, returning
430 	 * the PF IRQs pool in case the SF pool doesn't exist.
431 	 */
432 	return pool ? pool : irq_table->pcif_pool;
433 }
434 
435 /**
436  * mlx5_irqs_release - release one or more IRQs back to the system.
437  * @irqs: IRQs to be released.
438  * @nirqs: number of IRQs to be released.
439  */
440 static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs)
441 {
442 	int i;
443 
444 	for (i = 0; i < nirqs; i++) {
445 		synchronize_irq(irqs[i]->map.virq);
446 		mlx5_irq_put(irqs[i]);
447 	}
448 }
449 
450 /**
451  * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system.
452  * @ctrl_irq: ctrl IRQ to be released.
453  */
454 void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq)
455 {
456 	mlx5_irqs_release(&ctrl_irq, 1);
457 }
458 
459 /**
460  * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device.
461  * @dev: mlx5 device that requesting the IRQ.
462  *
463  * This function returns a pointer to IRQ, or ERR_PTR in case of error.
464  */
465 struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
466 {
467 	struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev);
468 	struct irq_affinity_desc af_desc;
469 	struct mlx5_irq *irq;
470 
471 	cpumask_copy(&af_desc.mask, cpu_online_mask);
472 	af_desc.is_managed = false;
473 	if (!mlx5_irq_pool_is_sf_pool(pool)) {
474 		/* In case we are allocating a control IRQ from a pci device's pool.
475 		 * This can happen also for a SF if the SFs pool is empty.
476 		 */
477 		if (!pool->xa_num_irqs.max) {
478 			cpumask_clear(&af_desc.mask);
479 			/* In case we only have a single IRQ for PF/VF */
480 			cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc.mask);
481 		}
482 		/* Allocate the IRQ in index 0. The vector was already allocated */
483 		irq = irq_pool_request_vector(pool, 0, &af_desc, NULL);
484 	} else {
485 		irq = mlx5_irq_affinity_request(pool, &af_desc);
486 	}
487 
488 	return irq;
489 }
490 
491 /**
492  * mlx5_irq_request - request an IRQ for mlx5 PF/VF device.
493  * @dev: mlx5 device that requesting the IRQ.
494  * @vecidx: vector index of the IRQ. This argument is ignore if affinity is
495  * provided.
496  * @af_desc: affinity descriptor for this IRQ.
497  * @rmap: pointer to reverse map pointer for completion interrupts
498  *
499  * This function returns a pointer to IRQ, or ERR_PTR in case of error.
500  */
501 struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
502 				  struct irq_affinity_desc *af_desc,
503 				  struct cpu_rmap **rmap)
504 {
505 	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
506 	struct mlx5_irq_pool *pool;
507 	struct mlx5_irq *irq;
508 
509 	pool = irq_table->pcif_pool;
510 	irq = irq_pool_request_vector(pool, vecidx, af_desc, rmap);
511 	if (IS_ERR(irq))
512 		return irq;
513 	mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
514 		      irq->map.virq, cpumask_pr_args(&af_desc->mask),
515 		      irq->refcount / MLX5_EQ_REFS_PER_IRQ);
516 	return irq;
517 }
518 
519 /**
520  * mlx5_msix_alloc - allocate msix interrupt
521  * @dev: mlx5 device from which to request
522  * @handler: interrupt handler
523  * @affdesc: affinity descriptor
524  * @name: interrupt name
525  *
526  * Returns: struct msi_map with result encoded.
527  * Note: the caller must make sure to release the irq by calling
528  *       mlx5_msix_free() if shutdown was initiated.
529  */
530 struct msi_map mlx5_msix_alloc(struct mlx5_core_dev *dev,
531 			       irqreturn_t (*handler)(int, void *),
532 			       const struct irq_affinity_desc *affdesc,
533 			       const char *name)
534 {
535 	struct msi_map map;
536 	int err;
537 
538 	if (!dev->pdev) {
539 		map.virq = 0;
540 		map.index = -EINVAL;
541 		return map;
542 	}
543 
544 	map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, affdesc);
545 	if (!map.virq)
546 		return map;
547 
548 	err = request_irq(map.virq, handler, 0, name, NULL);
549 	if (err) {
550 		mlx5_core_warn(dev, "err %d\n", err);
551 		pci_msix_free_irq(dev->pdev, map);
552 		map.virq = 0;
553 		map.index = -ENOMEM;
554 	}
555 	return map;
556 }
557 EXPORT_SYMBOL(mlx5_msix_alloc);
558 
559 /**
560  * mlx5_msix_free - free a previously allocated msix interrupt
561  * @dev: mlx5 device associated with interrupt
562  * @map: map previously returned by mlx5_msix_alloc()
563  */
564 void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map)
565 {
566 	free_irq(map.virq, NULL);
567 	pci_msix_free_irq(dev->pdev, map);
568 }
569 EXPORT_SYMBOL(mlx5_msix_free);
570 
571 /**
572  * mlx5_irqs_release_vectors - release one or more IRQs back to the system.
573  * @irqs: IRQs to be released.
574  * @nirqs: number of IRQs to be released.
575  */
576 void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs)
577 {
578 	mlx5_irqs_release(irqs, nirqs);
579 }
580 
581 /**
582  * mlx5_irqs_request_vectors - request one or more IRQs for mlx5 device.
583  * @dev: mlx5 device that is requesting the IRQs.
584  * @cpus: CPUs array for binding the IRQs
585  * @nirqs: number of IRQs to request.
586  * @irqs: an output array of IRQs pointers.
587  * @rmap: pointer to reverse map pointer for completion interrupts
588  *
589  * Each IRQ is bound to at most 1 CPU.
590  * This function is requests nirqs IRQs, starting from @vecidx.
591  *
592  * This function returns the number of IRQs requested, (which might be smaller than
593  * @nirqs), if successful, or a negative error code in case of an error.
594  */
595 int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
596 			      struct mlx5_irq **irqs, struct cpu_rmap **rmap)
597 {
598 	struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
599 	struct mlx5_irq_pool *pool = table->pcif_pool;
600 	struct irq_affinity_desc af_desc;
601 	struct mlx5_irq *irq;
602 	int offset = 1;
603 	int i;
604 
605 	if (!pool->xa_num_irqs.max)
606 		offset = 0;
607 
608 	af_desc.is_managed = false;
609 	for (i = 0; i < nirqs; i++) {
610 		cpumask_clear(&af_desc.mask);
611 		cpumask_set_cpu(cpus[i], &af_desc.mask);
612 		irq = mlx5_irq_request(dev, i + offset, &af_desc, rmap);
613 		if (IS_ERR(irq))
614 			break;
615 		irqs[i] = irq;
616 	}
617 
618 	return i ? i : PTR_ERR(irq);
619 }
620 
621 static struct mlx5_irq_pool *
622 irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
623 	       u32 min_threshold, u32 max_threshold)
624 {
625 	struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
626 
627 	if (!pool)
628 		return ERR_PTR(-ENOMEM);
629 	pool->dev = dev;
630 	mutex_init(&pool->lock);
631 	xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC);
632 	pool->xa_num_irqs.min = start;
633 	pool->xa_num_irqs.max = start + size - 1;
634 	if (name)
635 		snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS,
636 			 "%s", name);
637 	pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ;
638 	pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ;
639 	mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
640 		      name, size, start);
641 	return pool;
642 }
643 
644 static void irq_pool_free(struct mlx5_irq_pool *pool)
645 {
646 	struct mlx5_irq *irq;
647 	unsigned long index;
648 
649 	/* There are cases in which we are destrying the irq_table before
650 	 * freeing all the IRQs, fast teardown for example. Hence, free the irqs
651 	 * which might not have been freed.
652 	 */
653 	xa_for_each(&pool->irqs, index, irq)
654 		irq_release(irq);
655 	xa_destroy(&pool->irqs);
656 	mutex_destroy(&pool->lock);
657 	kfree(pool->irqs_per_cpu);
658 	kvfree(pool);
659 }
660 
661 static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec)
662 {
663 	struct mlx5_irq_table *table = dev->priv.irq_table;
664 	int num_sf_ctrl_by_msix;
665 	int num_sf_ctrl_by_sfs;
666 	int num_sf_ctrl;
667 	int err;
668 
669 	/* init pcif_pool */
670 	table->pcif_pool = irq_pool_alloc(dev, 0, pcif_vec, NULL,
671 					  MLX5_EQ_SHARE_IRQ_MIN_COMP,
672 					  MLX5_EQ_SHARE_IRQ_MAX_COMP);
673 	if (IS_ERR(table->pcif_pool))
674 		return PTR_ERR(table->pcif_pool);
675 	if (!mlx5_sf_max_functions(dev))
676 		return 0;
677 	if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
678 		mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n");
679 		return 0;
680 	}
681 
682 	/* init sf_ctrl_pool */
683 	num_sf_ctrl_by_msix = DIV_ROUND_UP(sf_vec, MLX5_COMP_EQS_PER_SF);
684 	num_sf_ctrl_by_sfs = DIV_ROUND_UP(mlx5_sf_max_functions(dev),
685 					  MLX5_SFS_PER_CTRL_IRQ);
686 	num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs);
687 	num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
688 	table->sf_ctrl_pool = irq_pool_alloc(dev, pcif_vec, num_sf_ctrl,
689 					     "mlx5_sf_ctrl",
690 					     MLX5_EQ_SHARE_IRQ_MIN_CTRL,
691 					     MLX5_EQ_SHARE_IRQ_MAX_CTRL);
692 	if (IS_ERR(table->sf_ctrl_pool)) {
693 		err = PTR_ERR(table->sf_ctrl_pool);
694 		goto err_pf;
695 	}
696 	/* init sf_comp_pool */
697 	table->sf_comp_pool = irq_pool_alloc(dev, pcif_vec + num_sf_ctrl,
698 					     sf_vec - num_sf_ctrl, "mlx5_sf_comp",
699 					     MLX5_EQ_SHARE_IRQ_MIN_COMP,
700 					     MLX5_EQ_SHARE_IRQ_MAX_COMP);
701 	if (IS_ERR(table->sf_comp_pool)) {
702 		err = PTR_ERR(table->sf_comp_pool);
703 		goto err_sf_ctrl;
704 	}
705 
706 	table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL);
707 	if (!table->sf_comp_pool->irqs_per_cpu) {
708 		err = -ENOMEM;
709 		goto err_irqs_per_cpu;
710 	}
711 
712 	return 0;
713 
714 err_irqs_per_cpu:
715 	irq_pool_free(table->sf_comp_pool);
716 err_sf_ctrl:
717 	irq_pool_free(table->sf_ctrl_pool);
718 err_pf:
719 	irq_pool_free(table->pcif_pool);
720 	return err;
721 }
722 
723 static void irq_pools_destroy(struct mlx5_irq_table *table)
724 {
725 	if (table->sf_ctrl_pool) {
726 		irq_pool_free(table->sf_comp_pool);
727 		irq_pool_free(table->sf_ctrl_pool);
728 	}
729 	irq_pool_free(table->pcif_pool);
730 }
731 
732 static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool)
733 {
734 	struct mlx5_irq *irq;
735 	unsigned long index;
736 
737 	xa_for_each(&pool->irqs, index, irq)
738 		mlx5_system_free_irq(irq);
739 
740 }
741 
742 static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table)
743 {
744 	if (table->sf_ctrl_pool) {
745 		mlx5_irq_pool_free_irqs(table->sf_comp_pool);
746 		mlx5_irq_pool_free_irqs(table->sf_ctrl_pool);
747 	}
748 	mlx5_irq_pool_free_irqs(table->pcif_pool);
749 }
750 
751 /* irq_table API */
752 
753 int mlx5_irq_table_init(struct mlx5_core_dev *dev)
754 {
755 	struct mlx5_irq_table *irq_table;
756 
757 	if (mlx5_core_is_sf(dev))
758 		return 0;
759 
760 	irq_table = kvzalloc_node(sizeof(*irq_table), GFP_KERNEL,
761 				  dev->priv.numa_node);
762 	if (!irq_table)
763 		return -ENOMEM;
764 
765 	dev->priv.irq_table = irq_table;
766 	return 0;
767 }
768 
769 void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
770 {
771 	if (mlx5_core_is_sf(dev))
772 		return;
773 
774 	kvfree(dev->priv.irq_table);
775 }
776 
777 int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
778 {
779 	if (!table->pcif_pool->xa_num_irqs.max)
780 		return 1;
781 	return table->pcif_pool->xa_num_irqs.max - table->pcif_pool->xa_num_irqs.min;
782 }
783 
784 int mlx5_irq_table_create(struct mlx5_core_dev *dev)
785 {
786 	int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
787 		      MLX5_CAP_GEN(dev, max_num_eqs) :
788 		      1 << MLX5_CAP_GEN(dev, log_max_eq);
789 	int total_vec;
790 	int pcif_vec;
791 	int req_vec;
792 	int err;
793 	int n;
794 
795 	if (mlx5_core_is_sf(dev))
796 		return 0;
797 
798 	pcif_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1;
799 	pcif_vec = min_t(int, pcif_vec, num_eqs);
800 
801 	total_vec = pcif_vec;
802 	if (mlx5_sf_max_functions(dev))
803 		total_vec += MLX5_IRQ_CTRL_SF_MAX +
804 			MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev);
805 	total_vec = min_t(int, total_vec, pci_msix_vec_count(dev->pdev));
806 	pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev));
807 
808 	req_vec = pci_msix_can_alloc_dyn(dev->pdev) ? 1 : total_vec;
809 	n = pci_alloc_irq_vectors(dev->pdev, 1, req_vec, PCI_IRQ_MSIX);
810 	if (n < 0)
811 		return n;
812 
813 	err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec);
814 	if (err)
815 		pci_free_irq_vectors(dev->pdev);
816 
817 	return err;
818 }
819 
820 void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
821 {
822 	struct mlx5_irq_table *table = dev->priv.irq_table;
823 
824 	if (mlx5_core_is_sf(dev))
825 		return;
826 
827 	/* There are cases where IRQs still will be in used when we reaching
828 	 * to here. Hence, making sure all the irqs are released.
829 	 */
830 	irq_pools_destroy(table);
831 	pci_free_irq_vectors(dev->pdev);
832 }
833 
834 void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev)
835 {
836 	struct mlx5_irq_table *table = dev->priv.irq_table;
837 
838 	if (mlx5_core_is_sf(dev))
839 		return;
840 
841 	mlx5_irq_pools_free_irqs(table);
842 	pci_free_irq_vectors(dev->pdev);
843 }
844 
845 int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
846 {
847 	if (table->sf_comp_pool)
848 		return min_t(int, num_online_cpus(),
849 			     table->sf_comp_pool->xa_num_irqs.max -
850 			     table->sf_comp_pool->xa_num_irqs.min + 1);
851 	else
852 		return mlx5_irq_table_get_num_comp(table);
853 }
854 
855 struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev)
856 {
857 #ifdef CONFIG_MLX5_SF
858 	if (mlx5_core_is_sf(dev))
859 		return dev->priv.parent_mdev->priv.irq_table;
860 #endif
861 	return dev->priv.irq_table;
862 }
863