1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3
4 #include <linux/pci.h>
5 #include <linux/interrupt.h>
6 #include <linux/notifier.h>
7 #include <linux/mlx5/driver.h>
8 #include <linux/mlx5/vport.h>
9 #include "mlx5_core.h"
10 #include "mlx5_irq.h"
11 #include "pci_irq.h"
12 #include "lib/sf.h"
13 #include "lib/eq.h"
14 #ifdef CONFIG_RFS_ACCEL
15 #include <linux/cpu_rmap.h>
16 #endif
17
18 #define MLX5_SFS_PER_CTRL_IRQ 64
19 #define MLX5_MAX_MSIX_PER_SF 256
20 #define MLX5_IRQ_CTRL_SF_MAX 8
21 /* min num of vectors for SFs to be enabled */
22 #define MLX5_IRQ_VEC_COMP_BASE_SF 2
23 #define MLX5_IRQ_VEC_COMP_BASE 1
24
25 #define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
26 #define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
27 #define MLX5_EQ_SHARE_IRQ_MIN_COMP (1)
28 #define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4)
29
30 struct mlx5_irq {
31 struct atomic_notifier_head nh;
32 cpumask_var_t mask;
33 char name[MLX5_MAX_IRQ_FORMATTED_NAME];
34 struct mlx5_irq_pool *pool;
35 int refcount;
36 struct msi_map map;
37 u32 pool_index;
38 };
39
40 struct mlx5_irq_table {
41 struct mlx5_irq_pool *pcif_pool;
42 struct mlx5_irq_pool *sf_ctrl_pool;
43 struct mlx5_irq_pool *sf_comp_pool;
44 };
45
mlx5_core_func_to_vport(const struct mlx5_core_dev * dev,int func,bool ec_vf_func)46 static int mlx5_core_func_to_vport(const struct mlx5_core_dev *dev,
47 int func,
48 bool ec_vf_func)
49 {
50 if (!ec_vf_func)
51 return func;
52 return mlx5_core_ec_vf_vport_base(dev) + func - 1;
53 }
54
55 /**
56 * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors
57 * to be ssigned to each VF.
58 * @dev: PF to work on
59 * @num_vfs: Number of enabled VFs
60 */
mlx5_get_default_msix_vec_count(struct mlx5_core_dev * dev,int num_vfs)61 int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs)
62 {
63 int num_vf_msix, min_msix, max_msix;
64
65 num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
66 if (!num_vf_msix)
67 return 0;
68
69 min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
70 max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
71
72 /* Limit maximum number of MSI-X vectors so the default configuration
73 * has some available in the pool. This will allow the user to increase
74 * the number of vectors in a VF without having to first size-down other
75 * VFs.
76 */
77 return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix);
78 }
79
80 /**
81 * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF
82 * @dev: PF to work on
83 * @function_id: Internal PCI VF function IDd
84 * @msix_vec_count: Number of MSI-X vectors to set
85 */
mlx5_set_msix_vec_count(struct mlx5_core_dev * dev,int function_id,int msix_vec_count)86 int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
87 int msix_vec_count)
88 {
89 int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
90 int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
91 void *hca_cap = NULL, *query_cap = NULL, *cap;
92 int num_vf_msix, min_msix, max_msix;
93 bool ec_vf_function;
94 int vport;
95 int ret;
96
97 num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
98 if (!num_vf_msix)
99 return 0;
100
101 if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev))
102 return -EOPNOTSUPP;
103
104 min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
105 max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
106
107 if (msix_vec_count < min_msix)
108 return -EINVAL;
109
110 if (msix_vec_count > max_msix)
111 return -EOVERFLOW;
112
113 query_cap = kvzalloc(query_sz, GFP_KERNEL);
114 hca_cap = kvzalloc(set_sz, GFP_KERNEL);
115 if (!hca_cap || !query_cap) {
116 ret = -ENOMEM;
117 goto out;
118 }
119
120 ec_vf_function = mlx5_core_ec_sriov_enabled(dev);
121 vport = mlx5_core_func_to_vport(dev, function_id, ec_vf_function);
122 ret = mlx5_vport_get_other_func_general_cap(dev, vport, query_cap);
123 if (ret)
124 goto out;
125
126 cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability);
127 memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability),
128 MLX5_UN_SZ_BYTES(hca_cap_union));
129 MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count);
130
131 MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP);
132 MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1);
133 MLX5_SET(set_hca_cap_in, hca_cap, ec_vf_function, ec_vf_function);
134 MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id);
135
136 MLX5_SET(set_hca_cap_in, hca_cap, op_mod,
137 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
138 ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
139 out:
140 kvfree(hca_cap);
141 kvfree(query_cap);
142 return ret;
143 }
144
145 /* mlx5_system_free_irq - Free an IRQ
146 * @irq: IRQ to free
147 *
148 * Free the IRQ and other resources such as rmap from the system.
149 * BUT doesn't free or remove reference from mlx5.
150 * This function is very important for the shutdown flow, where we need to
151 * cleanup system resoruces but keep mlx5 objects alive,
152 * see mlx5_irq_table_free_irqs().
153 */
mlx5_system_free_irq(struct mlx5_irq * irq)154 static void mlx5_system_free_irq(struct mlx5_irq *irq)
155 {
156 struct mlx5_irq_pool *pool = irq->pool;
157 #ifdef CONFIG_RFS_ACCEL
158 struct cpu_rmap *rmap;
159 #endif
160
161 /* free_irq requires that affinity_hint and rmap will be cleared before
162 * calling it. To satisfy this requirement, we call
163 * irq_cpu_rmap_remove() to remove the notifier
164 */
165 irq_update_affinity_hint(irq->map.virq, NULL);
166 #ifdef CONFIG_RFS_ACCEL
167 rmap = mlx5_eq_table_get_rmap(pool->dev);
168 if (rmap)
169 irq_cpu_rmap_remove(rmap, irq->map.virq);
170 #endif
171
172 free_irq(irq->map.virq, &irq->nh);
173 if (irq->map.index && pci_msix_can_alloc_dyn(pool->dev->pdev))
174 pci_msix_free_irq(pool->dev->pdev, irq->map);
175 }
176
irq_release(struct mlx5_irq * irq)177 static void irq_release(struct mlx5_irq *irq)
178 {
179 struct mlx5_irq_pool *pool = irq->pool;
180
181 xa_erase(&pool->irqs, irq->pool_index);
182 mlx5_system_free_irq(irq);
183 free_cpumask_var(irq->mask);
184 kfree(irq);
185 }
186
mlx5_irq_put(struct mlx5_irq * irq)187 int mlx5_irq_put(struct mlx5_irq *irq)
188 {
189 struct mlx5_irq_pool *pool = irq->pool;
190 int ret = 0;
191
192 mutex_lock(&pool->lock);
193 irq->refcount--;
194 if (!irq->refcount) {
195 irq_release(irq);
196 ret = 1;
197 }
198 mutex_unlock(&pool->lock);
199 return ret;
200 }
201
mlx5_irq_read_locked(struct mlx5_irq * irq)202 int mlx5_irq_read_locked(struct mlx5_irq *irq)
203 {
204 lockdep_assert_held(&irq->pool->lock);
205 return irq->refcount;
206 }
207
mlx5_irq_get_locked(struct mlx5_irq * irq)208 int mlx5_irq_get_locked(struct mlx5_irq *irq)
209 {
210 lockdep_assert_held(&irq->pool->lock);
211 if (WARN_ON_ONCE(!irq->refcount))
212 return 0;
213 irq->refcount++;
214 return 1;
215 }
216
irq_get(struct mlx5_irq * irq)217 static int irq_get(struct mlx5_irq *irq)
218 {
219 int err;
220
221 mutex_lock(&irq->pool->lock);
222 err = mlx5_irq_get_locked(irq);
223 mutex_unlock(&irq->pool->lock);
224 return err;
225 }
226
irq_int_handler(int irq,void * nh)227 static irqreturn_t irq_int_handler(int irq, void *nh)
228 {
229 atomic_notifier_call_chain(nh, 0, NULL);
230 return IRQ_HANDLED;
231 }
232
irq_sf_set_name(struct mlx5_irq_pool * pool,char * name,int vecidx)233 static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
234 {
235 snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx);
236 }
237
irq_set_name(struct mlx5_irq_pool * pool,char * name,int vecidx)238 static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
239 {
240 if (!pool->xa_num_irqs.max) {
241 /* in case we only have a single irq for the device */
242 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx);
243 return;
244 }
245
246 if (!vecidx) {
247 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx);
248 return;
249 }
250
251 vecidx -= MLX5_IRQ_VEC_COMP_BASE;
252 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
253 }
254
mlx5_irq_alloc(struct mlx5_irq_pool * pool,int i,struct irq_affinity_desc * af_desc,struct cpu_rmap ** rmap)255 struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
256 struct irq_affinity_desc *af_desc,
257 struct cpu_rmap **rmap)
258 {
259 struct mlx5_core_dev *dev = pool->dev;
260 char name[MLX5_MAX_IRQ_NAME];
261 struct mlx5_irq *irq;
262 int err;
263
264 irq = kzalloc(sizeof(*irq), GFP_KERNEL);
265 if (!irq || !zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
266 kfree(irq);
267 return ERR_PTR(-ENOMEM);
268 }
269
270 if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) {
271 /* The vector at index 0 is always statically allocated. If
272 * dynamic irq is not supported all vectors are statically
273 * allocated. In both cases just get the irq number and set
274 * the index.
275 */
276 irq->map.virq = pci_irq_vector(dev->pdev, i);
277 irq->map.index = i;
278 } else {
279 irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc);
280 if (!irq->map.virq) {
281 err = irq->map.index;
282 goto err_alloc_irq;
283 }
284 }
285
286 if (i && rmap && *rmap) {
287 #ifdef CONFIG_RFS_ACCEL
288 err = irq_cpu_rmap_add(*rmap, irq->map.virq);
289 if (err)
290 goto err_irq_rmap;
291 #endif
292 }
293 if (!mlx5_irq_pool_is_sf_pool(pool))
294 irq_set_name(pool, name, i);
295 else
296 irq_sf_set_name(pool, name, i);
297 ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
298 snprintf(irq->name, MLX5_MAX_IRQ_FORMATTED_NAME,
299 MLX5_IRQ_NAME_FORMAT_STR, name, pci_name(dev->pdev));
300 err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name,
301 &irq->nh);
302 if (err) {
303 mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
304 goto err_req_irq;
305 }
306
307 if (af_desc) {
308 cpumask_copy(irq->mask, &af_desc->mask);
309 irq_set_affinity_and_hint(irq->map.virq, irq->mask);
310 }
311 irq->pool = pool;
312 irq->refcount = 1;
313 irq->pool_index = i;
314 err = xa_err(xa_store(&pool->irqs, irq->pool_index, irq, GFP_KERNEL));
315 if (err) {
316 mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
317 irq->pool_index, err);
318 goto err_xa;
319 }
320 return irq;
321 err_xa:
322 if (af_desc)
323 irq_update_affinity_hint(irq->map.virq, NULL);
324 free_irq(irq->map.virq, &irq->nh);
325 err_req_irq:
326 #ifdef CONFIG_RFS_ACCEL
327 if (i && rmap && *rmap) {
328 free_irq_cpu_rmap(*rmap);
329 *rmap = NULL;
330 }
331 err_irq_rmap:
332 #endif
333 if (i && pci_msix_can_alloc_dyn(dev->pdev))
334 pci_msix_free_irq(dev->pdev, irq->map);
335 err_alloc_irq:
336 free_cpumask_var(irq->mask);
337 kfree(irq);
338 return ERR_PTR(err);
339 }
340
mlx5_irq_attach_nb(struct mlx5_irq * irq,struct notifier_block * nb)341 int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
342 {
343 int ret;
344
345 ret = irq_get(irq);
346 if (!ret)
347 /* Something very bad happens here, we are enabling EQ
348 * on non-existing IRQ.
349 */
350 return -ENOENT;
351 ret = atomic_notifier_chain_register(&irq->nh, nb);
352 if (ret)
353 mlx5_irq_put(irq);
354 return ret;
355 }
356
mlx5_irq_detach_nb(struct mlx5_irq * irq,struct notifier_block * nb)357 int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
358 {
359 int err = 0;
360
361 err = atomic_notifier_chain_unregister(&irq->nh, nb);
362 mlx5_irq_put(irq);
363 return err;
364 }
365
mlx5_irq_get_affinity_mask(struct mlx5_irq * irq)366 struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
367 {
368 return irq->mask;
369 }
370
mlx5_irq_get_irq(const struct mlx5_irq * irq)371 int mlx5_irq_get_irq(const struct mlx5_irq *irq)
372 {
373 return irq->map.virq;
374 }
375
mlx5_irq_get_index(struct mlx5_irq * irq)376 int mlx5_irq_get_index(struct mlx5_irq *irq)
377 {
378 return irq->map.index;
379 }
380
381 /* irq_pool API */
382
383 /* requesting an irq from a given pool according to given index */
384 static struct mlx5_irq *
irq_pool_request_vector(struct mlx5_irq_pool * pool,int vecidx,struct irq_affinity_desc * af_desc,struct cpu_rmap ** rmap)385 irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
386 struct irq_affinity_desc *af_desc,
387 struct cpu_rmap **rmap)
388 {
389 struct mlx5_irq *irq;
390
391 mutex_lock(&pool->lock);
392 irq = xa_load(&pool->irqs, vecidx);
393 if (irq) {
394 mlx5_irq_get_locked(irq);
395 goto unlock;
396 }
397 irq = mlx5_irq_alloc(pool, vecidx, af_desc, rmap);
398 unlock:
399 mutex_unlock(&pool->lock);
400 return irq;
401 }
402
sf_ctrl_irq_pool_get(struct mlx5_irq_table * irq_table)403 static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table)
404 {
405 return irq_table->sf_ctrl_pool;
406 }
407
sf_irq_pool_get(struct mlx5_irq_table * irq_table)408 static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table)
409 {
410 return irq_table->sf_comp_pool;
411 }
412
mlx5_irq_pool_get(struct mlx5_core_dev * dev)413 struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev)
414 {
415 struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
416 struct mlx5_irq_pool *pool = NULL;
417
418 if (mlx5_core_is_sf(dev))
419 pool = sf_irq_pool_get(irq_table);
420
421 /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
422 * the PF IRQs pool in case the SF pool doesn't exist.
423 */
424 return pool ? pool : irq_table->pcif_pool;
425 }
426
ctrl_irq_pool_get(struct mlx5_core_dev * dev)427 static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
428 {
429 struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
430 struct mlx5_irq_pool *pool = NULL;
431
432 if (mlx5_core_is_sf(dev))
433 pool = sf_ctrl_irq_pool_get(irq_table);
434
435 /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
436 * the PF IRQs pool in case the SF pool doesn't exist.
437 */
438 return pool ? pool : irq_table->pcif_pool;
439 }
440
_mlx5_irq_release(struct mlx5_irq * irq)441 static void _mlx5_irq_release(struct mlx5_irq *irq)
442 {
443 synchronize_irq(irq->map.virq);
444 mlx5_irq_put(irq);
445 }
446
447 /**
448 * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system.
449 * @dev: mlx5 device that releasing the IRQ.
450 * @ctrl_irq: ctrl IRQ to be released.
451 */
mlx5_ctrl_irq_release(struct mlx5_core_dev * dev,struct mlx5_irq * ctrl_irq)452 void mlx5_ctrl_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *ctrl_irq)
453 {
454 mlx5_irq_affinity_irq_release(dev, ctrl_irq);
455 }
456
457 /**
458 * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device.
459 * @dev: mlx5 device that requesting the IRQ.
460 *
461 * This function returns a pointer to IRQ, or ERR_PTR in case of error.
462 */
mlx5_ctrl_irq_request(struct mlx5_core_dev * dev)463 struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
464 {
465 struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev);
466 struct irq_affinity_desc af_desc;
467 struct mlx5_irq *irq;
468
469 cpumask_copy(&af_desc.mask, cpu_online_mask);
470 af_desc.is_managed = false;
471 if (!mlx5_irq_pool_is_sf_pool(pool)) {
472 /* In case we are allocating a control IRQ from a pci device's pool.
473 * This can happen also for a SF if the SFs pool is empty.
474 */
475 if (!pool->xa_num_irqs.max) {
476 cpumask_clear(&af_desc.mask);
477 /* In case we only have a single IRQ for PF/VF */
478 cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc.mask);
479 }
480 /* Allocate the IRQ in index 0. The vector was already allocated */
481 irq = irq_pool_request_vector(pool, 0, &af_desc, NULL);
482 } else {
483 irq = mlx5_irq_affinity_request(dev, pool, &af_desc);
484 }
485
486 return irq;
487 }
488
489 /**
490 * mlx5_irq_request - request an IRQ for mlx5 PF/VF device.
491 * @dev: mlx5 device that requesting the IRQ.
492 * @vecidx: vector index of the IRQ. This argument is ignore if affinity is
493 * provided.
494 * @af_desc: affinity descriptor for this IRQ.
495 * @rmap: pointer to reverse map pointer for completion interrupts
496 *
497 * This function returns a pointer to IRQ, or ERR_PTR in case of error.
498 */
mlx5_irq_request(struct mlx5_core_dev * dev,u16 vecidx,struct irq_affinity_desc * af_desc,struct cpu_rmap ** rmap)499 struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
500 struct irq_affinity_desc *af_desc,
501 struct cpu_rmap **rmap)
502 {
503 struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
504 struct mlx5_irq_pool *pool;
505 struct mlx5_irq *irq;
506
507 pool = irq_table->pcif_pool;
508 irq = irq_pool_request_vector(pool, vecidx, af_desc, rmap);
509 if (IS_ERR(irq))
510 return irq;
511 mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
512 irq->map.virq, cpumask_pr_args(&af_desc->mask),
513 irq->refcount / MLX5_EQ_REFS_PER_IRQ);
514 return irq;
515 }
516
517 /**
518 * mlx5_irq_release_vector - release one IRQ back to the system.
519 * @irq: the irq to release.
520 */
mlx5_irq_release_vector(struct mlx5_irq * irq)521 void mlx5_irq_release_vector(struct mlx5_irq *irq)
522 {
523 _mlx5_irq_release(irq);
524 }
525
526 /**
527 * mlx5_irq_request_vector - request one IRQ for mlx5 device.
528 * @dev: mlx5 device that is requesting the IRQ.
529 * @cpu: CPU to bind the IRQ to.
530 * @vecidx: vector index to request an IRQ for.
531 * @rmap: pointer to reverse map pointer for completion interrupts
532 *
533 * Each IRQ is bound to at most 1 CPU.
534 * This function is requests one IRQ, for the given @vecidx.
535 *
536 * This function returns a pointer to the irq on success, or an error pointer
537 * in case of an error.
538 */
mlx5_irq_request_vector(struct mlx5_core_dev * dev,u16 cpu,u16 vecidx,struct cpu_rmap ** rmap)539 struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu,
540 u16 vecidx, struct cpu_rmap **rmap)
541 {
542 struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
543 struct mlx5_irq_pool *pool = table->pcif_pool;
544 struct irq_affinity_desc af_desc;
545 int offset = MLX5_IRQ_VEC_COMP_BASE;
546
547 if (!pool->xa_num_irqs.max)
548 offset = 0;
549
550 af_desc.is_managed = false;
551 cpumask_clear(&af_desc.mask);
552 cpumask_set_cpu(cpu, &af_desc.mask);
553 return mlx5_irq_request(dev, vecidx + offset, &af_desc, rmap);
554 }
555
556 static struct mlx5_irq_pool *
irq_pool_alloc(struct mlx5_core_dev * dev,int start,int size,char * name,u32 min_threshold,u32 max_threshold)557 irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
558 u32 min_threshold, u32 max_threshold)
559 {
560 struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
561
562 if (!pool)
563 return ERR_PTR(-ENOMEM);
564 pool->dev = dev;
565 mutex_init(&pool->lock);
566 xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC);
567 pool->xa_num_irqs.min = start;
568 pool->xa_num_irqs.max = start + size - 1;
569 if (name)
570 snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS,
571 "%s", name);
572 pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ;
573 pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ;
574 mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
575 name, size, start);
576 return pool;
577 }
578
irq_pool_free(struct mlx5_irq_pool * pool)579 static void irq_pool_free(struct mlx5_irq_pool *pool)
580 {
581 struct mlx5_irq *irq;
582 unsigned long index;
583
584 /* There are cases in which we are destrying the irq_table before
585 * freeing all the IRQs, fast teardown for example. Hence, free the irqs
586 * which might not have been freed.
587 */
588 xa_for_each(&pool->irqs, index, irq)
589 irq_release(irq);
590 xa_destroy(&pool->irqs);
591 mutex_destroy(&pool->lock);
592 kfree(pool->irqs_per_cpu);
593 kvfree(pool);
594 }
595
irq_pools_init(struct mlx5_core_dev * dev,int sf_vec,int pcif_vec,bool dynamic_vec)596 static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec,
597 bool dynamic_vec)
598 {
599 struct mlx5_irq_table *table = dev->priv.irq_table;
600 int sf_vec_available = sf_vec;
601 int num_sf_ctrl;
602 int err;
603
604 /* init pcif_pool */
605 table->pcif_pool = irq_pool_alloc(dev, 0, pcif_vec, NULL,
606 MLX5_EQ_SHARE_IRQ_MIN_COMP,
607 MLX5_EQ_SHARE_IRQ_MAX_COMP);
608 if (IS_ERR(table->pcif_pool))
609 return PTR_ERR(table->pcif_pool);
610 if (!mlx5_sf_max_functions(dev))
611 return 0;
612 if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
613 mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n");
614 return 0;
615 }
616
617 /* init sf_ctrl_pool */
618 num_sf_ctrl = DIV_ROUND_UP(mlx5_sf_max_functions(dev),
619 MLX5_SFS_PER_CTRL_IRQ);
620 num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
621 if (!dynamic_vec && (num_sf_ctrl + 1) > sf_vec_available) {
622 mlx5_core_dbg(dev,
623 "Not enough IRQs for SFs control and completion pool, required=%d avail=%d\n",
624 num_sf_ctrl + 1, sf_vec_available);
625 return 0;
626 }
627
628 table->sf_ctrl_pool = irq_pool_alloc(dev, pcif_vec, num_sf_ctrl,
629 "mlx5_sf_ctrl",
630 MLX5_EQ_SHARE_IRQ_MIN_CTRL,
631 MLX5_EQ_SHARE_IRQ_MAX_CTRL);
632 if (IS_ERR(table->sf_ctrl_pool)) {
633 err = PTR_ERR(table->sf_ctrl_pool);
634 goto err_pf;
635 }
636 sf_vec_available -= num_sf_ctrl;
637
638 /* init sf_comp_pool, remaining vectors are for the SF completions */
639 table->sf_comp_pool = irq_pool_alloc(dev, pcif_vec + num_sf_ctrl,
640 sf_vec_available, "mlx5_sf_comp",
641 MLX5_EQ_SHARE_IRQ_MIN_COMP,
642 MLX5_EQ_SHARE_IRQ_MAX_COMP);
643 if (IS_ERR(table->sf_comp_pool)) {
644 err = PTR_ERR(table->sf_comp_pool);
645 goto err_sf_ctrl;
646 }
647
648 table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL);
649 if (!table->sf_comp_pool->irqs_per_cpu) {
650 err = -ENOMEM;
651 goto err_irqs_per_cpu;
652 }
653
654 return 0;
655
656 err_irqs_per_cpu:
657 irq_pool_free(table->sf_comp_pool);
658 err_sf_ctrl:
659 irq_pool_free(table->sf_ctrl_pool);
660 err_pf:
661 irq_pool_free(table->pcif_pool);
662 return err;
663 }
664
irq_pools_destroy(struct mlx5_irq_table * table)665 static void irq_pools_destroy(struct mlx5_irq_table *table)
666 {
667 if (table->sf_ctrl_pool) {
668 irq_pool_free(table->sf_comp_pool);
669 irq_pool_free(table->sf_ctrl_pool);
670 }
671 irq_pool_free(table->pcif_pool);
672 }
673
mlx5_irq_pool_free_irqs(struct mlx5_irq_pool * pool)674 static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool)
675 {
676 struct mlx5_irq *irq;
677 unsigned long index;
678
679 xa_for_each(&pool->irqs, index, irq)
680 mlx5_system_free_irq(irq);
681
682 }
683
mlx5_irq_pools_free_irqs(struct mlx5_irq_table * table)684 static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table)
685 {
686 if (table->sf_ctrl_pool) {
687 mlx5_irq_pool_free_irqs(table->sf_comp_pool);
688 mlx5_irq_pool_free_irqs(table->sf_ctrl_pool);
689 }
690 mlx5_irq_pool_free_irqs(table->pcif_pool);
691 }
692
693 /* irq_table API */
694
mlx5_irq_table_init(struct mlx5_core_dev * dev)695 int mlx5_irq_table_init(struct mlx5_core_dev *dev)
696 {
697 struct mlx5_irq_table *irq_table;
698
699 if (mlx5_core_is_sf(dev))
700 return 0;
701
702 irq_table = kvzalloc_node(sizeof(*irq_table), GFP_KERNEL,
703 dev->priv.numa_node);
704 if (!irq_table)
705 return -ENOMEM;
706
707 dev->priv.irq_table = irq_table;
708 return 0;
709 }
710
mlx5_irq_table_cleanup(struct mlx5_core_dev * dev)711 void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
712 {
713 if (mlx5_core_is_sf(dev))
714 return;
715
716 kvfree(dev->priv.irq_table);
717 }
718
mlx5_irq_table_get_num_comp(struct mlx5_irq_table * table)719 int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
720 {
721 if (!table->pcif_pool->xa_num_irqs.max)
722 return 1;
723 return table->pcif_pool->xa_num_irqs.max - table->pcif_pool->xa_num_irqs.min;
724 }
725
mlx5_irq_table_create(struct mlx5_core_dev * dev)726 int mlx5_irq_table_create(struct mlx5_core_dev *dev)
727 {
728 int num_eqs = mlx5_max_eq_cap_get(dev);
729 bool dynamic_vec;
730 int total_vec;
731 int pcif_vec;
732 int req_vec;
733 int err;
734 int n;
735
736 if (mlx5_core_is_sf(dev))
737 return 0;
738
739 /* PCI PF vectors usage is limited by online cpus, device EQs and
740 * PCI MSI-X capability.
741 */
742 pcif_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1;
743 pcif_vec = min_t(int, pcif_vec, num_eqs);
744 pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev));
745
746 total_vec = pcif_vec;
747 if (mlx5_sf_max_functions(dev))
748 total_vec += MLX5_MAX_MSIX_PER_SF * mlx5_sf_max_functions(dev);
749 total_vec = min_t(int, total_vec, pci_msix_vec_count(dev->pdev));
750
751 req_vec = pci_msix_can_alloc_dyn(dev->pdev) ? 1 : total_vec;
752 n = pci_alloc_irq_vectors(dev->pdev, 1, req_vec, PCI_IRQ_MSIX);
753 if (n < 0)
754 return n;
755
756 /* Further limit vectors of the pools based on platform for non dynamic case */
757 dynamic_vec = pci_msix_can_alloc_dyn(dev->pdev);
758 if (!dynamic_vec) {
759 pcif_vec = min_t(int, n, pcif_vec);
760 total_vec = min_t(int, n, total_vec);
761 }
762
763 err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec, dynamic_vec);
764 if (err)
765 pci_free_irq_vectors(dev->pdev);
766
767 return err;
768 }
769
mlx5_irq_table_destroy(struct mlx5_core_dev * dev)770 void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
771 {
772 struct mlx5_irq_table *table = dev->priv.irq_table;
773
774 if (mlx5_core_is_sf(dev))
775 return;
776
777 /* There are cases where IRQs still will be in used when we reaching
778 * to here. Hence, making sure all the irqs are released.
779 */
780 irq_pools_destroy(table);
781 pci_free_irq_vectors(dev->pdev);
782 }
783
mlx5_irq_table_free_irqs(struct mlx5_core_dev * dev)784 void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev)
785 {
786 struct mlx5_irq_table *table = dev->priv.irq_table;
787
788 if (mlx5_core_is_sf(dev))
789 return;
790
791 mlx5_irq_pools_free_irqs(table);
792 pci_free_irq_vectors(dev->pdev);
793 }
794
mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table * table)795 int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
796 {
797 if (table->sf_comp_pool)
798 return min_t(int, num_online_cpus(),
799 table->sf_comp_pool->xa_num_irqs.max -
800 table->sf_comp_pool->xa_num_irqs.min + 1);
801 else
802 return mlx5_irq_table_get_num_comp(table);
803 }
804
mlx5_irq_table_get(struct mlx5_core_dev * dev)805 struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev)
806 {
807 #ifdef CONFIG_MLX5_SF
808 if (mlx5_core_is_sf(dev))
809 return dev->priv.parent_mdev->priv.irq_table;
810 #endif
811 return dev->priv.irq_table;
812 }
813