1  // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2  /* Copyright (c) 2019 Mellanox Technologies. */
3  
4  #include <linux/pci.h>
5  #include <linux/interrupt.h>
6  #include <linux/notifier.h>
7  #include <linux/mlx5/driver.h>
8  #include <linux/mlx5/vport.h>
9  #include "mlx5_core.h"
10  #include "mlx5_irq.h"
11  #include "pci_irq.h"
12  #include "lib/sf.h"
13  #include "lib/eq.h"
14  #ifdef CONFIG_RFS_ACCEL
15  #include <linux/cpu_rmap.h>
16  #endif
17  
18  #define MLX5_SFS_PER_CTRL_IRQ 64
19  #define MLX5_MAX_MSIX_PER_SF 256
20  #define MLX5_IRQ_CTRL_SF_MAX 8
21  /* min num of vectors for SFs to be enabled */
22  #define MLX5_IRQ_VEC_COMP_BASE_SF 2
23  #define MLX5_IRQ_VEC_COMP_BASE 1
24  
25  #define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
26  #define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
27  #define MLX5_EQ_SHARE_IRQ_MIN_COMP (1)
28  #define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4)
29  
30  struct mlx5_irq {
31  	struct atomic_notifier_head nh;
32  	cpumask_var_t mask;
33  	char name[MLX5_MAX_IRQ_FORMATTED_NAME];
34  	struct mlx5_irq_pool *pool;
35  	int refcount;
36  	struct msi_map map;
37  	u32 pool_index;
38  };
39  
40  struct mlx5_irq_table {
41  	struct mlx5_irq_pool *pcif_pool;
42  	struct mlx5_irq_pool *sf_ctrl_pool;
43  	struct mlx5_irq_pool *sf_comp_pool;
44  };
45  
mlx5_core_func_to_vport(const struct mlx5_core_dev * dev,int func,bool ec_vf_func)46  static int mlx5_core_func_to_vport(const struct mlx5_core_dev *dev,
47  				   int func,
48  				   bool ec_vf_func)
49  {
50  	if (!ec_vf_func)
51  		return func;
52  	return mlx5_core_ec_vf_vport_base(dev) + func - 1;
53  }
54  
55  /**
56   * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors
57   *                                   to be ssigned to each VF.
58   * @dev: PF to work on
59   * @num_vfs: Number of enabled VFs
60   */
mlx5_get_default_msix_vec_count(struct mlx5_core_dev * dev,int num_vfs)61  int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs)
62  {
63  	int num_vf_msix, min_msix, max_msix;
64  
65  	num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
66  	if (!num_vf_msix)
67  		return 0;
68  
69  	min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
70  	max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
71  
72  	/* Limit maximum number of MSI-X vectors so the default configuration
73  	 * has some available in the pool. This will allow the user to increase
74  	 * the number of vectors in a VF without having to first size-down other
75  	 * VFs.
76  	 */
77  	return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix);
78  }
79  
80  /**
81   * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF
82   * @dev: PF to work on
83   * @function_id: Internal PCI VF function IDd
84   * @msix_vec_count: Number of MSI-X vectors to set
85   */
mlx5_set_msix_vec_count(struct mlx5_core_dev * dev,int function_id,int msix_vec_count)86  int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
87  			    int msix_vec_count)
88  {
89  	int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
90  	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
91  	void *hca_cap = NULL, *query_cap = NULL, *cap;
92  	int num_vf_msix, min_msix, max_msix;
93  	bool ec_vf_function;
94  	int vport;
95  	int ret;
96  
97  	num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
98  	if (!num_vf_msix)
99  		return 0;
100  
101  	if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev))
102  		return -EOPNOTSUPP;
103  
104  	min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
105  	max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
106  
107  	if (msix_vec_count < min_msix)
108  		return -EINVAL;
109  
110  	if (msix_vec_count > max_msix)
111  		return -EOVERFLOW;
112  
113  	query_cap = kvzalloc(query_sz, GFP_KERNEL);
114  	hca_cap = kvzalloc(set_sz, GFP_KERNEL);
115  	if (!hca_cap || !query_cap) {
116  		ret = -ENOMEM;
117  		goto out;
118  	}
119  
120  	ec_vf_function = mlx5_core_ec_sriov_enabled(dev);
121  	vport = mlx5_core_func_to_vport(dev, function_id, ec_vf_function);
122  	ret = mlx5_vport_get_other_func_general_cap(dev, vport, query_cap);
123  	if (ret)
124  		goto out;
125  
126  	cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability);
127  	memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability),
128  	       MLX5_UN_SZ_BYTES(hca_cap_union));
129  	MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count);
130  
131  	MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP);
132  	MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1);
133  	MLX5_SET(set_hca_cap_in, hca_cap, ec_vf_function, ec_vf_function);
134  	MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id);
135  
136  	MLX5_SET(set_hca_cap_in, hca_cap, op_mod,
137  		 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
138  	ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
139  out:
140  	kvfree(hca_cap);
141  	kvfree(query_cap);
142  	return ret;
143  }
144  
145  /* mlx5_system_free_irq - Free an IRQ
146   * @irq: IRQ to free
147   *
148   * Free the IRQ and other resources such as rmap from the system.
149   * BUT doesn't free or remove reference from mlx5.
150   * This function is very important for the shutdown flow, where we need to
151   * cleanup system resoruces but keep mlx5 objects alive,
152   * see mlx5_irq_table_free_irqs().
153   */
mlx5_system_free_irq(struct mlx5_irq * irq)154  static void mlx5_system_free_irq(struct mlx5_irq *irq)
155  {
156  	struct mlx5_irq_pool *pool = irq->pool;
157  #ifdef CONFIG_RFS_ACCEL
158  	struct cpu_rmap *rmap;
159  #endif
160  
161  	/* free_irq requires that affinity_hint and rmap will be cleared before
162  	 * calling it. To satisfy this requirement, we call
163  	 * irq_cpu_rmap_remove() to remove the notifier
164  	 */
165  	irq_update_affinity_hint(irq->map.virq, NULL);
166  #ifdef CONFIG_RFS_ACCEL
167  	rmap = mlx5_eq_table_get_rmap(pool->dev);
168  	if (rmap)
169  		irq_cpu_rmap_remove(rmap, irq->map.virq);
170  #endif
171  
172  	free_irq(irq->map.virq, &irq->nh);
173  	if (irq->map.index && pci_msix_can_alloc_dyn(pool->dev->pdev))
174  		pci_msix_free_irq(pool->dev->pdev, irq->map);
175  }
176  
irq_release(struct mlx5_irq * irq)177  static void irq_release(struct mlx5_irq *irq)
178  {
179  	struct mlx5_irq_pool *pool = irq->pool;
180  
181  	xa_erase(&pool->irqs, irq->pool_index);
182  	mlx5_system_free_irq(irq);
183  	free_cpumask_var(irq->mask);
184  	kfree(irq);
185  }
186  
mlx5_irq_put(struct mlx5_irq * irq)187  int mlx5_irq_put(struct mlx5_irq *irq)
188  {
189  	struct mlx5_irq_pool *pool = irq->pool;
190  	int ret = 0;
191  
192  	mutex_lock(&pool->lock);
193  	irq->refcount--;
194  	if (!irq->refcount) {
195  		irq_release(irq);
196  		ret = 1;
197  	}
198  	mutex_unlock(&pool->lock);
199  	return ret;
200  }
201  
mlx5_irq_read_locked(struct mlx5_irq * irq)202  int mlx5_irq_read_locked(struct mlx5_irq *irq)
203  {
204  	lockdep_assert_held(&irq->pool->lock);
205  	return irq->refcount;
206  }
207  
mlx5_irq_get_locked(struct mlx5_irq * irq)208  int mlx5_irq_get_locked(struct mlx5_irq *irq)
209  {
210  	lockdep_assert_held(&irq->pool->lock);
211  	if (WARN_ON_ONCE(!irq->refcount))
212  		return 0;
213  	irq->refcount++;
214  	return 1;
215  }
216  
irq_get(struct mlx5_irq * irq)217  static int irq_get(struct mlx5_irq *irq)
218  {
219  	int err;
220  
221  	mutex_lock(&irq->pool->lock);
222  	err = mlx5_irq_get_locked(irq);
223  	mutex_unlock(&irq->pool->lock);
224  	return err;
225  }
226  
irq_int_handler(int irq,void * nh)227  static irqreturn_t irq_int_handler(int irq, void *nh)
228  {
229  	atomic_notifier_call_chain(nh, 0, NULL);
230  	return IRQ_HANDLED;
231  }
232  
irq_sf_set_name(struct mlx5_irq_pool * pool,char * name,int vecidx)233  static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
234  {
235  	snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx);
236  }
237  
irq_set_name(struct mlx5_irq_pool * pool,char * name,int vecidx)238  static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
239  {
240  	if (!pool->xa_num_irqs.max) {
241  		/* in case we only have a single irq for the device */
242  		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx);
243  		return;
244  	}
245  
246  	if (!vecidx) {
247  		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx);
248  		return;
249  	}
250  
251  	vecidx -= MLX5_IRQ_VEC_COMP_BASE;
252  	snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
253  }
254  
mlx5_irq_alloc(struct mlx5_irq_pool * pool,int i,struct irq_affinity_desc * af_desc,struct cpu_rmap ** rmap)255  struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
256  				struct irq_affinity_desc *af_desc,
257  				struct cpu_rmap **rmap)
258  {
259  	struct mlx5_core_dev *dev = pool->dev;
260  	char name[MLX5_MAX_IRQ_NAME];
261  	struct mlx5_irq *irq;
262  	int err;
263  
264  	irq = kzalloc(sizeof(*irq), GFP_KERNEL);
265  	if (!irq || !zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
266  		kfree(irq);
267  		return ERR_PTR(-ENOMEM);
268  	}
269  
270  	if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) {
271  		/* The vector at index 0 is always statically allocated. If
272  		 * dynamic irq is not supported all vectors are statically
273  		 * allocated. In both cases just get the irq number and set
274  		 * the index.
275  		 */
276  		irq->map.virq = pci_irq_vector(dev->pdev, i);
277  		irq->map.index = i;
278  	} else {
279  		irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc);
280  		if (!irq->map.virq) {
281  			err = irq->map.index;
282  			goto err_alloc_irq;
283  		}
284  	}
285  
286  	if (i && rmap && *rmap) {
287  #ifdef CONFIG_RFS_ACCEL
288  		err = irq_cpu_rmap_add(*rmap, irq->map.virq);
289  		if (err)
290  			goto err_irq_rmap;
291  #endif
292  	}
293  	if (!mlx5_irq_pool_is_sf_pool(pool))
294  		irq_set_name(pool, name, i);
295  	else
296  		irq_sf_set_name(pool, name, i);
297  	ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
298  	snprintf(irq->name, MLX5_MAX_IRQ_FORMATTED_NAME,
299  		 MLX5_IRQ_NAME_FORMAT_STR, name, pci_name(dev->pdev));
300  	err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name,
301  			  &irq->nh);
302  	if (err) {
303  		mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
304  		goto err_req_irq;
305  	}
306  
307  	if (af_desc) {
308  		cpumask_copy(irq->mask, &af_desc->mask);
309  		irq_set_affinity_and_hint(irq->map.virq, irq->mask);
310  	}
311  	irq->pool = pool;
312  	irq->refcount = 1;
313  	irq->pool_index = i;
314  	err = xa_err(xa_store(&pool->irqs, irq->pool_index, irq, GFP_KERNEL));
315  	if (err) {
316  		mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
317  			      irq->pool_index, err);
318  		goto err_xa;
319  	}
320  	return irq;
321  err_xa:
322  	if (af_desc)
323  		irq_update_affinity_hint(irq->map.virq, NULL);
324  	free_irq(irq->map.virq, &irq->nh);
325  err_req_irq:
326  #ifdef CONFIG_RFS_ACCEL
327  	if (i && rmap && *rmap) {
328  		free_irq_cpu_rmap(*rmap);
329  		*rmap = NULL;
330  	}
331  err_irq_rmap:
332  #endif
333  	if (i && pci_msix_can_alloc_dyn(dev->pdev))
334  		pci_msix_free_irq(dev->pdev, irq->map);
335  err_alloc_irq:
336  	free_cpumask_var(irq->mask);
337  	kfree(irq);
338  	return ERR_PTR(err);
339  }
340  
mlx5_irq_attach_nb(struct mlx5_irq * irq,struct notifier_block * nb)341  int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
342  {
343  	int ret;
344  
345  	ret = irq_get(irq);
346  	if (!ret)
347  		/* Something very bad happens here, we are enabling EQ
348  		 * on non-existing IRQ.
349  		 */
350  		return -ENOENT;
351  	ret = atomic_notifier_chain_register(&irq->nh, nb);
352  	if (ret)
353  		mlx5_irq_put(irq);
354  	return ret;
355  }
356  
mlx5_irq_detach_nb(struct mlx5_irq * irq,struct notifier_block * nb)357  int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
358  {
359  	int err = 0;
360  
361  	err = atomic_notifier_chain_unregister(&irq->nh, nb);
362  	mlx5_irq_put(irq);
363  	return err;
364  }
365  
mlx5_irq_get_affinity_mask(struct mlx5_irq * irq)366  struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
367  {
368  	return irq->mask;
369  }
370  
mlx5_irq_get_irq(const struct mlx5_irq * irq)371  int mlx5_irq_get_irq(const struct mlx5_irq *irq)
372  {
373  	return irq->map.virq;
374  }
375  
mlx5_irq_get_index(struct mlx5_irq * irq)376  int mlx5_irq_get_index(struct mlx5_irq *irq)
377  {
378  	return irq->map.index;
379  }
380  
381  /* irq_pool API */
382  
383  /* requesting an irq from a given pool according to given index */
384  static struct mlx5_irq *
irq_pool_request_vector(struct mlx5_irq_pool * pool,int vecidx,struct irq_affinity_desc * af_desc,struct cpu_rmap ** rmap)385  irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
386  			struct irq_affinity_desc *af_desc,
387  			struct cpu_rmap **rmap)
388  {
389  	struct mlx5_irq *irq;
390  
391  	mutex_lock(&pool->lock);
392  	irq = xa_load(&pool->irqs, vecidx);
393  	if (irq) {
394  		mlx5_irq_get_locked(irq);
395  		goto unlock;
396  	}
397  	irq = mlx5_irq_alloc(pool, vecidx, af_desc, rmap);
398  unlock:
399  	mutex_unlock(&pool->lock);
400  	return irq;
401  }
402  
sf_ctrl_irq_pool_get(struct mlx5_irq_table * irq_table)403  static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table)
404  {
405  	return irq_table->sf_ctrl_pool;
406  }
407  
sf_irq_pool_get(struct mlx5_irq_table * irq_table)408  static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table)
409  {
410  	return irq_table->sf_comp_pool;
411  }
412  
mlx5_irq_pool_get(struct mlx5_core_dev * dev)413  struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev)
414  {
415  	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
416  	struct mlx5_irq_pool *pool = NULL;
417  
418  	if (mlx5_core_is_sf(dev))
419  		pool = sf_irq_pool_get(irq_table);
420  
421  	/* In some configs, there won't be a pool of SFs IRQs. Hence, returning
422  	 * the PF IRQs pool in case the SF pool doesn't exist.
423  	 */
424  	return pool ? pool : irq_table->pcif_pool;
425  }
426  
ctrl_irq_pool_get(struct mlx5_core_dev * dev)427  static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
428  {
429  	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
430  	struct mlx5_irq_pool *pool = NULL;
431  
432  	if (mlx5_core_is_sf(dev))
433  		pool = sf_ctrl_irq_pool_get(irq_table);
434  
435  	/* In some configs, there won't be a pool of SFs IRQs. Hence, returning
436  	 * the PF IRQs pool in case the SF pool doesn't exist.
437  	 */
438  	return pool ? pool : irq_table->pcif_pool;
439  }
440  
_mlx5_irq_release(struct mlx5_irq * irq)441  static void _mlx5_irq_release(struct mlx5_irq *irq)
442  {
443  	synchronize_irq(irq->map.virq);
444  	mlx5_irq_put(irq);
445  }
446  
447  /**
448   * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system.
449   * @dev: mlx5 device that releasing the IRQ.
450   * @ctrl_irq: ctrl IRQ to be released.
451   */
mlx5_ctrl_irq_release(struct mlx5_core_dev * dev,struct mlx5_irq * ctrl_irq)452  void mlx5_ctrl_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *ctrl_irq)
453  {
454  	mlx5_irq_affinity_irq_release(dev, ctrl_irq);
455  }
456  
457  /**
458   * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device.
459   * @dev: mlx5 device that requesting the IRQ.
460   *
461   * This function returns a pointer to IRQ, or ERR_PTR in case of error.
462   */
mlx5_ctrl_irq_request(struct mlx5_core_dev * dev)463  struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
464  {
465  	struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev);
466  	struct irq_affinity_desc af_desc;
467  	struct mlx5_irq *irq;
468  
469  	cpumask_copy(&af_desc.mask, cpu_online_mask);
470  	af_desc.is_managed = false;
471  	if (!mlx5_irq_pool_is_sf_pool(pool)) {
472  		/* In case we are allocating a control IRQ from a pci device's pool.
473  		 * This can happen also for a SF if the SFs pool is empty.
474  		 */
475  		if (!pool->xa_num_irqs.max) {
476  			cpumask_clear(&af_desc.mask);
477  			/* In case we only have a single IRQ for PF/VF */
478  			cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc.mask);
479  		}
480  		/* Allocate the IRQ in index 0. The vector was already allocated */
481  		irq = irq_pool_request_vector(pool, 0, &af_desc, NULL);
482  	} else {
483  		irq = mlx5_irq_affinity_request(dev, pool, &af_desc);
484  	}
485  
486  	return irq;
487  }
488  
489  /**
490   * mlx5_irq_request - request an IRQ for mlx5 PF/VF device.
491   * @dev: mlx5 device that requesting the IRQ.
492   * @vecidx: vector index of the IRQ. This argument is ignore if affinity is
493   * provided.
494   * @af_desc: affinity descriptor for this IRQ.
495   * @rmap: pointer to reverse map pointer for completion interrupts
496   *
497   * This function returns a pointer to IRQ, or ERR_PTR in case of error.
498   */
mlx5_irq_request(struct mlx5_core_dev * dev,u16 vecidx,struct irq_affinity_desc * af_desc,struct cpu_rmap ** rmap)499  struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
500  				  struct irq_affinity_desc *af_desc,
501  				  struct cpu_rmap **rmap)
502  {
503  	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
504  	struct mlx5_irq_pool *pool;
505  	struct mlx5_irq *irq;
506  
507  	pool = irq_table->pcif_pool;
508  	irq = irq_pool_request_vector(pool, vecidx, af_desc, rmap);
509  	if (IS_ERR(irq))
510  		return irq;
511  	mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
512  		      irq->map.virq, cpumask_pr_args(&af_desc->mask),
513  		      irq->refcount / MLX5_EQ_REFS_PER_IRQ);
514  	return irq;
515  }
516  
517  /**
518   * mlx5_irq_release_vector - release one IRQ back to the system.
519   * @irq: the irq to release.
520   */
mlx5_irq_release_vector(struct mlx5_irq * irq)521  void mlx5_irq_release_vector(struct mlx5_irq *irq)
522  {
523  	_mlx5_irq_release(irq);
524  }
525  
526  /**
527   * mlx5_irq_request_vector - request one IRQ for mlx5 device.
528   * @dev: mlx5 device that is requesting the IRQ.
529   * @cpu: CPU to bind the IRQ to.
530   * @vecidx: vector index to request an IRQ for.
531   * @rmap: pointer to reverse map pointer for completion interrupts
532   *
533   * Each IRQ is bound to at most 1 CPU.
534   * This function is requests one IRQ, for the given @vecidx.
535   *
536   * This function returns a pointer to the irq on success, or an error pointer
537   * in case of an error.
538   */
mlx5_irq_request_vector(struct mlx5_core_dev * dev,u16 cpu,u16 vecidx,struct cpu_rmap ** rmap)539  struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu,
540  					 u16 vecidx, struct cpu_rmap **rmap)
541  {
542  	struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
543  	struct mlx5_irq_pool *pool = table->pcif_pool;
544  	struct irq_affinity_desc af_desc;
545  	int offset = MLX5_IRQ_VEC_COMP_BASE;
546  
547  	if (!pool->xa_num_irqs.max)
548  		offset = 0;
549  
550  	af_desc.is_managed = false;
551  	cpumask_clear(&af_desc.mask);
552  	cpumask_set_cpu(cpu, &af_desc.mask);
553  	return mlx5_irq_request(dev, vecidx + offset, &af_desc, rmap);
554  }
555  
556  static struct mlx5_irq_pool *
irq_pool_alloc(struct mlx5_core_dev * dev,int start,int size,char * name,u32 min_threshold,u32 max_threshold)557  irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
558  	       u32 min_threshold, u32 max_threshold)
559  {
560  	struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
561  
562  	if (!pool)
563  		return ERR_PTR(-ENOMEM);
564  	pool->dev = dev;
565  	mutex_init(&pool->lock);
566  	xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC);
567  	pool->xa_num_irqs.min = start;
568  	pool->xa_num_irqs.max = start + size - 1;
569  	if (name)
570  		snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS,
571  			 "%s", name);
572  	pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ;
573  	pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ;
574  	mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
575  		      name, size, start);
576  	return pool;
577  }
578  
irq_pool_free(struct mlx5_irq_pool * pool)579  static void irq_pool_free(struct mlx5_irq_pool *pool)
580  {
581  	struct mlx5_irq *irq;
582  	unsigned long index;
583  
584  	/* There are cases in which we are destrying the irq_table before
585  	 * freeing all the IRQs, fast teardown for example. Hence, free the irqs
586  	 * which might not have been freed.
587  	 */
588  	xa_for_each(&pool->irqs, index, irq)
589  		irq_release(irq);
590  	xa_destroy(&pool->irqs);
591  	mutex_destroy(&pool->lock);
592  	kfree(pool->irqs_per_cpu);
593  	kvfree(pool);
594  }
595  
irq_pools_init(struct mlx5_core_dev * dev,int sf_vec,int pcif_vec,bool dynamic_vec)596  static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec,
597  			  bool dynamic_vec)
598  {
599  	struct mlx5_irq_table *table = dev->priv.irq_table;
600  	int sf_vec_available = sf_vec;
601  	int num_sf_ctrl;
602  	int err;
603  
604  	/* init pcif_pool */
605  	table->pcif_pool = irq_pool_alloc(dev, 0, pcif_vec, NULL,
606  					  MLX5_EQ_SHARE_IRQ_MIN_COMP,
607  					  MLX5_EQ_SHARE_IRQ_MAX_COMP);
608  	if (IS_ERR(table->pcif_pool))
609  		return PTR_ERR(table->pcif_pool);
610  	if (!mlx5_sf_max_functions(dev))
611  		return 0;
612  	if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
613  		mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n");
614  		return 0;
615  	}
616  
617  	/* init sf_ctrl_pool */
618  	num_sf_ctrl = DIV_ROUND_UP(mlx5_sf_max_functions(dev),
619  				   MLX5_SFS_PER_CTRL_IRQ);
620  	num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
621  	if (!dynamic_vec && (num_sf_ctrl + 1) > sf_vec_available) {
622  		mlx5_core_dbg(dev,
623  			      "Not enough IRQs for SFs control and completion pool, required=%d avail=%d\n",
624  			      num_sf_ctrl + 1, sf_vec_available);
625  		return 0;
626  	}
627  
628  	table->sf_ctrl_pool = irq_pool_alloc(dev, pcif_vec, num_sf_ctrl,
629  					     "mlx5_sf_ctrl",
630  					     MLX5_EQ_SHARE_IRQ_MIN_CTRL,
631  					     MLX5_EQ_SHARE_IRQ_MAX_CTRL);
632  	if (IS_ERR(table->sf_ctrl_pool)) {
633  		err = PTR_ERR(table->sf_ctrl_pool);
634  		goto err_pf;
635  	}
636  	sf_vec_available -= num_sf_ctrl;
637  
638  	/* init sf_comp_pool, remaining vectors are for the SF completions */
639  	table->sf_comp_pool = irq_pool_alloc(dev, pcif_vec + num_sf_ctrl,
640  					     sf_vec_available, "mlx5_sf_comp",
641  					     MLX5_EQ_SHARE_IRQ_MIN_COMP,
642  					     MLX5_EQ_SHARE_IRQ_MAX_COMP);
643  	if (IS_ERR(table->sf_comp_pool)) {
644  		err = PTR_ERR(table->sf_comp_pool);
645  		goto err_sf_ctrl;
646  	}
647  
648  	table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL);
649  	if (!table->sf_comp_pool->irqs_per_cpu) {
650  		err = -ENOMEM;
651  		goto err_irqs_per_cpu;
652  	}
653  
654  	return 0;
655  
656  err_irqs_per_cpu:
657  	irq_pool_free(table->sf_comp_pool);
658  err_sf_ctrl:
659  	irq_pool_free(table->sf_ctrl_pool);
660  err_pf:
661  	irq_pool_free(table->pcif_pool);
662  	return err;
663  }
664  
irq_pools_destroy(struct mlx5_irq_table * table)665  static void irq_pools_destroy(struct mlx5_irq_table *table)
666  {
667  	if (table->sf_ctrl_pool) {
668  		irq_pool_free(table->sf_comp_pool);
669  		irq_pool_free(table->sf_ctrl_pool);
670  	}
671  	irq_pool_free(table->pcif_pool);
672  }
673  
mlx5_irq_pool_free_irqs(struct mlx5_irq_pool * pool)674  static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool)
675  {
676  	struct mlx5_irq *irq;
677  	unsigned long index;
678  
679  	xa_for_each(&pool->irqs, index, irq)
680  		mlx5_system_free_irq(irq);
681  
682  }
683  
mlx5_irq_pools_free_irqs(struct mlx5_irq_table * table)684  static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table)
685  {
686  	if (table->sf_ctrl_pool) {
687  		mlx5_irq_pool_free_irqs(table->sf_comp_pool);
688  		mlx5_irq_pool_free_irqs(table->sf_ctrl_pool);
689  	}
690  	mlx5_irq_pool_free_irqs(table->pcif_pool);
691  }
692  
693  /* irq_table API */
694  
mlx5_irq_table_init(struct mlx5_core_dev * dev)695  int mlx5_irq_table_init(struct mlx5_core_dev *dev)
696  {
697  	struct mlx5_irq_table *irq_table;
698  
699  	if (mlx5_core_is_sf(dev))
700  		return 0;
701  
702  	irq_table = kvzalloc_node(sizeof(*irq_table), GFP_KERNEL,
703  				  dev->priv.numa_node);
704  	if (!irq_table)
705  		return -ENOMEM;
706  
707  	dev->priv.irq_table = irq_table;
708  	return 0;
709  }
710  
mlx5_irq_table_cleanup(struct mlx5_core_dev * dev)711  void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
712  {
713  	if (mlx5_core_is_sf(dev))
714  		return;
715  
716  	kvfree(dev->priv.irq_table);
717  }
718  
mlx5_irq_table_get_num_comp(struct mlx5_irq_table * table)719  int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
720  {
721  	if (!table->pcif_pool->xa_num_irqs.max)
722  		return 1;
723  	return table->pcif_pool->xa_num_irqs.max - table->pcif_pool->xa_num_irqs.min;
724  }
725  
mlx5_irq_table_create(struct mlx5_core_dev * dev)726  int mlx5_irq_table_create(struct mlx5_core_dev *dev)
727  {
728  	int num_eqs = mlx5_max_eq_cap_get(dev);
729  	bool dynamic_vec;
730  	int total_vec;
731  	int pcif_vec;
732  	int req_vec;
733  	int err;
734  	int n;
735  
736  	if (mlx5_core_is_sf(dev))
737  		return 0;
738  
739  	/* PCI PF vectors usage is limited by online cpus, device EQs and
740  	 * PCI MSI-X capability.
741  	 */
742  	pcif_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1;
743  	pcif_vec = min_t(int, pcif_vec, num_eqs);
744  	pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev));
745  
746  	total_vec = pcif_vec;
747  	if (mlx5_sf_max_functions(dev))
748  		total_vec += MLX5_MAX_MSIX_PER_SF * mlx5_sf_max_functions(dev);
749  	total_vec = min_t(int, total_vec, pci_msix_vec_count(dev->pdev));
750  
751  	req_vec = pci_msix_can_alloc_dyn(dev->pdev) ? 1 : total_vec;
752  	n = pci_alloc_irq_vectors(dev->pdev, 1, req_vec, PCI_IRQ_MSIX);
753  	if (n < 0)
754  		return n;
755  
756  	/* Further limit vectors of the pools based on platform for non dynamic case */
757  	dynamic_vec = pci_msix_can_alloc_dyn(dev->pdev);
758  	if (!dynamic_vec) {
759  		pcif_vec = min_t(int, n, pcif_vec);
760  		total_vec = min_t(int, n, total_vec);
761  	}
762  
763  	err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec, dynamic_vec);
764  	if (err)
765  		pci_free_irq_vectors(dev->pdev);
766  
767  	return err;
768  }
769  
mlx5_irq_table_destroy(struct mlx5_core_dev * dev)770  void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
771  {
772  	struct mlx5_irq_table *table = dev->priv.irq_table;
773  
774  	if (mlx5_core_is_sf(dev))
775  		return;
776  
777  	/* There are cases where IRQs still will be in used when we reaching
778  	 * to here. Hence, making sure all the irqs are released.
779  	 */
780  	irq_pools_destroy(table);
781  	pci_free_irq_vectors(dev->pdev);
782  }
783  
mlx5_irq_table_free_irqs(struct mlx5_core_dev * dev)784  void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev)
785  {
786  	struct mlx5_irq_table *table = dev->priv.irq_table;
787  
788  	if (mlx5_core_is_sf(dev))
789  		return;
790  
791  	mlx5_irq_pools_free_irqs(table);
792  	pci_free_irq_vectors(dev->pdev);
793  }
794  
mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table * table)795  int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
796  {
797  	if (table->sf_comp_pool)
798  		return min_t(int, num_online_cpus(),
799  			     table->sf_comp_pool->xa_num_irqs.max -
800  			     table->sf_comp_pool->xa_num_irqs.min + 1);
801  	else
802  		return mlx5_irq_table_get_num_comp(table);
803  }
804  
mlx5_irq_table_get(struct mlx5_core_dev * dev)805  struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev)
806  {
807  #ifdef CONFIG_MLX5_SF
808  	if (mlx5_core_is_sf(dev))
809  		return dev->priv.parent_mdev->priv.irq_table;
810  #endif
811  	return dev->priv.irq_table;
812  }
813