1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2019 Mellanox Technologies. */ 3 4 #include <linux/pci.h> 5 #include <linux/interrupt.h> 6 #include <linux/notifier.h> 7 #include <linux/mlx5/driver.h> 8 #include <linux/mlx5/vport.h> 9 #include "mlx5_core.h" 10 #include "mlx5_irq.h" 11 #include "pci_irq.h" 12 #include "lib/sf.h" 13 #include "lib/eq.h" 14 #ifdef CONFIG_RFS_ACCEL 15 #include <linux/cpu_rmap.h> 16 #endif 17 18 #define MLX5_SFS_PER_CTRL_IRQ 64 19 #define MLX5_MAX_MSIX_PER_SF 256 20 #define MLX5_IRQ_CTRL_SF_MAX 8 21 /* min num of vectors for SFs to be enabled */ 22 #define MLX5_IRQ_VEC_COMP_BASE_SF 2 23 #define MLX5_IRQ_VEC_COMP_BASE 1 24 25 #define MLX5_EQ_SHARE_IRQ_MAX_COMP (8) 26 #define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX) 27 #define MLX5_EQ_SHARE_IRQ_MIN_COMP (1) 28 #define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4) 29 30 struct mlx5_irq { 31 struct atomic_notifier_head nh; 32 cpumask_var_t mask; 33 char name[MLX5_MAX_IRQ_FORMATTED_NAME]; 34 struct mlx5_irq_pool *pool; 35 int refcount; 36 struct msi_map map; 37 u32 pool_index; 38 }; 39 40 struct mlx5_irq_table { 41 struct mlx5_irq_pool *pcif_pool; 42 struct mlx5_irq_pool *sf_ctrl_pool; 43 struct mlx5_irq_pool *sf_comp_pool; 44 }; 45 mlx5_core_func_to_vport(const struct mlx5_core_dev * dev,int func,bool ec_vf_func)46 static int mlx5_core_func_to_vport(const struct mlx5_core_dev *dev, 47 int func, 48 bool ec_vf_func) 49 { 50 if (!ec_vf_func) 51 return func; 52 return mlx5_core_ec_vf_vport_base(dev) + func - 1; 53 } 54 55 /** 56 * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors 57 * to be ssigned to each VF. 58 * @dev: PF to work on 59 * @num_vfs: Number of enabled VFs 60 */ mlx5_get_default_msix_vec_count(struct mlx5_core_dev * dev,int num_vfs)61 int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs) 62 { 63 int num_vf_msix, min_msix, max_msix; 64 65 num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); 66 if (!num_vf_msix) 67 return 0; 68 69 min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size); 70 max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size); 71 72 /* Limit maximum number of MSI-X vectors so the default configuration 73 * has some available in the pool. This will allow the user to increase 74 * the number of vectors in a VF without having to first size-down other 75 * VFs. 76 */ 77 return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix); 78 } 79 80 /** 81 * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF 82 * @dev: PF to work on 83 * @function_id: Internal PCI VF function IDd 84 * @msix_vec_count: Number of MSI-X vectors to set 85 */ mlx5_set_msix_vec_count(struct mlx5_core_dev * dev,int function_id,int msix_vec_count)86 int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, 87 int msix_vec_count) 88 { 89 int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); 90 int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); 91 void *hca_cap = NULL, *query_cap = NULL, *cap; 92 int num_vf_msix, min_msix, max_msix; 93 bool ec_vf_function; 94 int vport; 95 int ret; 96 97 num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); 98 if (!num_vf_msix) 99 return 0; 100 101 if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev)) 102 return -EOPNOTSUPP; 103 104 min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size); 105 max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size); 106 107 if (msix_vec_count < min_msix) 108 return -EINVAL; 109 110 if (msix_vec_count > max_msix) 111 return -EOVERFLOW; 112 113 query_cap = kvzalloc(query_sz, GFP_KERNEL); 114 hca_cap = kvzalloc(set_sz, GFP_KERNEL); 115 if (!hca_cap || !query_cap) { 116 ret = -ENOMEM; 117 goto out; 118 } 119 120 ec_vf_function = mlx5_core_ec_sriov_enabled(dev); 121 vport = mlx5_core_func_to_vport(dev, function_id, ec_vf_function); 122 ret = mlx5_vport_get_other_func_general_cap(dev, vport, query_cap); 123 if (ret) 124 goto out; 125 126 cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability); 127 memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability), 128 MLX5_UN_SZ_BYTES(hca_cap_union)); 129 MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count); 130 131 MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP); 132 MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1); 133 MLX5_SET(set_hca_cap_in, hca_cap, ec_vf_function, ec_vf_function); 134 MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id); 135 136 MLX5_SET(set_hca_cap_in, hca_cap, op_mod, 137 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1); 138 ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap); 139 out: 140 kvfree(hca_cap); 141 kvfree(query_cap); 142 return ret; 143 } 144 145 /* mlx5_system_free_irq - Free an IRQ 146 * @irq: IRQ to free 147 * 148 * Free the IRQ and other resources such as rmap from the system. 149 * BUT doesn't free or remove reference from mlx5. 150 * This function is very important for the shutdown flow, where we need to 151 * cleanup system resoruces but keep mlx5 objects alive, 152 * see mlx5_irq_table_free_irqs(). 153 */ mlx5_system_free_irq(struct mlx5_irq * irq)154 static void mlx5_system_free_irq(struct mlx5_irq *irq) 155 { 156 struct mlx5_irq_pool *pool = irq->pool; 157 #ifdef CONFIG_RFS_ACCEL 158 struct cpu_rmap *rmap; 159 #endif 160 161 /* free_irq requires that affinity_hint and rmap will be cleared before 162 * calling it. To satisfy this requirement, we call 163 * irq_cpu_rmap_remove() to remove the notifier 164 */ 165 irq_update_affinity_hint(irq->map.virq, NULL); 166 #ifdef CONFIG_RFS_ACCEL 167 rmap = mlx5_eq_table_get_rmap(pool->dev); 168 if (rmap) 169 irq_cpu_rmap_remove(rmap, irq->map.virq); 170 #endif 171 172 free_irq(irq->map.virq, &irq->nh); 173 if (irq->map.index && pci_msix_can_alloc_dyn(pool->dev->pdev)) 174 pci_msix_free_irq(pool->dev->pdev, irq->map); 175 } 176 irq_release(struct mlx5_irq * irq)177 static void irq_release(struct mlx5_irq *irq) 178 { 179 struct mlx5_irq_pool *pool = irq->pool; 180 181 xa_erase(&pool->irqs, irq->pool_index); 182 mlx5_system_free_irq(irq); 183 free_cpumask_var(irq->mask); 184 kfree(irq); 185 } 186 mlx5_irq_put(struct mlx5_irq * irq)187 int mlx5_irq_put(struct mlx5_irq *irq) 188 { 189 struct mlx5_irq_pool *pool = irq->pool; 190 int ret = 0; 191 192 mutex_lock(&pool->lock); 193 irq->refcount--; 194 if (!irq->refcount) { 195 irq_release(irq); 196 ret = 1; 197 } 198 mutex_unlock(&pool->lock); 199 return ret; 200 } 201 mlx5_irq_read_locked(struct mlx5_irq * irq)202 int mlx5_irq_read_locked(struct mlx5_irq *irq) 203 { 204 lockdep_assert_held(&irq->pool->lock); 205 return irq->refcount; 206 } 207 mlx5_irq_get_locked(struct mlx5_irq * irq)208 int mlx5_irq_get_locked(struct mlx5_irq *irq) 209 { 210 lockdep_assert_held(&irq->pool->lock); 211 if (WARN_ON_ONCE(!irq->refcount)) 212 return 0; 213 irq->refcount++; 214 return 1; 215 } 216 irq_get(struct mlx5_irq * irq)217 static int irq_get(struct mlx5_irq *irq) 218 { 219 int err; 220 221 mutex_lock(&irq->pool->lock); 222 err = mlx5_irq_get_locked(irq); 223 mutex_unlock(&irq->pool->lock); 224 return err; 225 } 226 irq_int_handler(int irq,void * nh)227 static irqreturn_t irq_int_handler(int irq, void *nh) 228 { 229 atomic_notifier_call_chain(nh, 0, NULL); 230 return IRQ_HANDLED; 231 } 232 irq_sf_set_name(struct mlx5_irq_pool * pool,char * name,int vecidx)233 static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) 234 { 235 snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx); 236 } 237 irq_set_name(struct mlx5_irq_pool * pool,char * name,int vecidx)238 static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) 239 { 240 if (!pool->xa_num_irqs.max) { 241 /* in case we only have a single irq for the device */ 242 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx); 243 return; 244 } 245 246 if (!vecidx) { 247 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx); 248 return; 249 } 250 251 vecidx -= MLX5_IRQ_VEC_COMP_BASE; 252 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx); 253 } 254 mlx5_irq_alloc(struct mlx5_irq_pool * pool,int i,struct irq_affinity_desc * af_desc,struct cpu_rmap ** rmap)255 struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, 256 struct irq_affinity_desc *af_desc, 257 struct cpu_rmap **rmap) 258 { 259 struct mlx5_core_dev *dev = pool->dev; 260 char name[MLX5_MAX_IRQ_NAME]; 261 struct mlx5_irq *irq; 262 int err; 263 264 irq = kzalloc(sizeof(*irq), GFP_KERNEL); 265 if (!irq || !zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) { 266 kfree(irq); 267 return ERR_PTR(-ENOMEM); 268 } 269 270 if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) { 271 /* The vector at index 0 is always statically allocated. If 272 * dynamic irq is not supported all vectors are statically 273 * allocated. In both cases just get the irq number and set 274 * the index. 275 */ 276 irq->map.virq = pci_irq_vector(dev->pdev, i); 277 irq->map.index = i; 278 } else { 279 irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc); 280 if (!irq->map.virq) { 281 err = irq->map.index; 282 goto err_alloc_irq; 283 } 284 } 285 286 if (i && rmap && *rmap) { 287 #ifdef CONFIG_RFS_ACCEL 288 err = irq_cpu_rmap_add(*rmap, irq->map.virq); 289 if (err) 290 goto err_irq_rmap; 291 #endif 292 } 293 if (!mlx5_irq_pool_is_sf_pool(pool)) 294 irq_set_name(pool, name, i); 295 else 296 irq_sf_set_name(pool, name, i); 297 ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh); 298 snprintf(irq->name, MLX5_MAX_IRQ_FORMATTED_NAME, 299 MLX5_IRQ_NAME_FORMAT_STR, name, pci_name(dev->pdev)); 300 err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name, 301 &irq->nh); 302 if (err) { 303 mlx5_core_err(dev, "Failed to request irq. err = %d\n", err); 304 goto err_req_irq; 305 } 306 307 if (af_desc) { 308 cpumask_copy(irq->mask, &af_desc->mask); 309 irq_set_affinity_and_hint(irq->map.virq, irq->mask); 310 } 311 irq->pool = pool; 312 irq->refcount = 1; 313 irq->pool_index = i; 314 err = xa_err(xa_store(&pool->irqs, irq->pool_index, irq, GFP_KERNEL)); 315 if (err) { 316 mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n", 317 irq->pool_index, err); 318 goto err_xa; 319 } 320 return irq; 321 err_xa: 322 if (af_desc) 323 irq_update_affinity_hint(irq->map.virq, NULL); 324 free_irq(irq->map.virq, &irq->nh); 325 err_req_irq: 326 #ifdef CONFIG_RFS_ACCEL 327 if (i && rmap && *rmap) { 328 free_irq_cpu_rmap(*rmap); 329 *rmap = NULL; 330 } 331 err_irq_rmap: 332 #endif 333 if (i && pci_msix_can_alloc_dyn(dev->pdev)) 334 pci_msix_free_irq(dev->pdev, irq->map); 335 err_alloc_irq: 336 free_cpumask_var(irq->mask); 337 kfree(irq); 338 return ERR_PTR(err); 339 } 340 mlx5_irq_attach_nb(struct mlx5_irq * irq,struct notifier_block * nb)341 int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb) 342 { 343 int ret; 344 345 ret = irq_get(irq); 346 if (!ret) 347 /* Something very bad happens here, we are enabling EQ 348 * on non-existing IRQ. 349 */ 350 return -ENOENT; 351 ret = atomic_notifier_chain_register(&irq->nh, nb); 352 if (ret) 353 mlx5_irq_put(irq); 354 return ret; 355 } 356 mlx5_irq_detach_nb(struct mlx5_irq * irq,struct notifier_block * nb)357 int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb) 358 { 359 int err = 0; 360 361 err = atomic_notifier_chain_unregister(&irq->nh, nb); 362 mlx5_irq_put(irq); 363 return err; 364 } 365 mlx5_irq_get_affinity_mask(struct mlx5_irq * irq)366 struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq) 367 { 368 return irq->mask; 369 } 370 mlx5_irq_get_irq(const struct mlx5_irq * irq)371 int mlx5_irq_get_irq(const struct mlx5_irq *irq) 372 { 373 return irq->map.virq; 374 } 375 mlx5_irq_get_index(struct mlx5_irq * irq)376 int mlx5_irq_get_index(struct mlx5_irq *irq) 377 { 378 return irq->map.index; 379 } 380 381 /* irq_pool API */ 382 383 /* requesting an irq from a given pool according to given index */ 384 static struct mlx5_irq * irq_pool_request_vector(struct mlx5_irq_pool * pool,int vecidx,struct irq_affinity_desc * af_desc,struct cpu_rmap ** rmap)385 irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx, 386 struct irq_affinity_desc *af_desc, 387 struct cpu_rmap **rmap) 388 { 389 struct mlx5_irq *irq; 390 391 mutex_lock(&pool->lock); 392 irq = xa_load(&pool->irqs, vecidx); 393 if (irq) { 394 mlx5_irq_get_locked(irq); 395 goto unlock; 396 } 397 irq = mlx5_irq_alloc(pool, vecidx, af_desc, rmap); 398 unlock: 399 mutex_unlock(&pool->lock); 400 return irq; 401 } 402 sf_ctrl_irq_pool_get(struct mlx5_irq_table * irq_table)403 static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table) 404 { 405 return irq_table->sf_ctrl_pool; 406 } 407 sf_irq_pool_get(struct mlx5_irq_table * irq_table)408 static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table) 409 { 410 return irq_table->sf_comp_pool; 411 } 412 mlx5_irq_pool_get(struct mlx5_core_dev * dev)413 struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev) 414 { 415 struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); 416 struct mlx5_irq_pool *pool = NULL; 417 418 if (mlx5_core_is_sf(dev)) 419 pool = sf_irq_pool_get(irq_table); 420 421 /* In some configs, there won't be a pool of SFs IRQs. Hence, returning 422 * the PF IRQs pool in case the SF pool doesn't exist. 423 */ 424 return pool ? pool : irq_table->pcif_pool; 425 } 426 ctrl_irq_pool_get(struct mlx5_core_dev * dev)427 static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev) 428 { 429 struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); 430 struct mlx5_irq_pool *pool = NULL; 431 432 if (mlx5_core_is_sf(dev)) 433 pool = sf_ctrl_irq_pool_get(irq_table); 434 435 /* In some configs, there won't be a pool of SFs IRQs. Hence, returning 436 * the PF IRQs pool in case the SF pool doesn't exist. 437 */ 438 return pool ? pool : irq_table->pcif_pool; 439 } 440 _mlx5_irq_release(struct mlx5_irq * irq)441 static void _mlx5_irq_release(struct mlx5_irq *irq) 442 { 443 synchronize_irq(irq->map.virq); 444 mlx5_irq_put(irq); 445 } 446 447 /** 448 * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system. 449 * @dev: mlx5 device that releasing the IRQ. 450 * @ctrl_irq: ctrl IRQ to be released. 451 */ mlx5_ctrl_irq_release(struct mlx5_core_dev * dev,struct mlx5_irq * ctrl_irq)452 void mlx5_ctrl_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *ctrl_irq) 453 { 454 mlx5_irq_affinity_irq_release(dev, ctrl_irq); 455 } 456 457 /** 458 * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device. 459 * @dev: mlx5 device that requesting the IRQ. 460 * 461 * This function returns a pointer to IRQ, or ERR_PTR in case of error. 462 */ mlx5_ctrl_irq_request(struct mlx5_core_dev * dev)463 struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev) 464 { 465 struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev); 466 struct irq_affinity_desc af_desc; 467 struct mlx5_irq *irq; 468 469 cpumask_copy(&af_desc.mask, cpu_online_mask); 470 af_desc.is_managed = false; 471 if (!mlx5_irq_pool_is_sf_pool(pool)) { 472 /* In case we are allocating a control IRQ from a pci device's pool. 473 * This can happen also for a SF if the SFs pool is empty. 474 */ 475 if (!pool->xa_num_irqs.max) { 476 cpumask_clear(&af_desc.mask); 477 /* In case we only have a single IRQ for PF/VF */ 478 cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc.mask); 479 } 480 /* Allocate the IRQ in index 0. The vector was already allocated */ 481 irq = irq_pool_request_vector(pool, 0, &af_desc, NULL); 482 } else { 483 irq = mlx5_irq_affinity_request(dev, pool, &af_desc); 484 } 485 486 return irq; 487 } 488 489 /** 490 * mlx5_irq_request - request an IRQ for mlx5 PF/VF device. 491 * @dev: mlx5 device that requesting the IRQ. 492 * @vecidx: vector index of the IRQ. This argument is ignore if affinity is 493 * provided. 494 * @af_desc: affinity descriptor for this IRQ. 495 * @rmap: pointer to reverse map pointer for completion interrupts 496 * 497 * This function returns a pointer to IRQ, or ERR_PTR in case of error. 498 */ mlx5_irq_request(struct mlx5_core_dev * dev,u16 vecidx,struct irq_affinity_desc * af_desc,struct cpu_rmap ** rmap)499 struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, 500 struct irq_affinity_desc *af_desc, 501 struct cpu_rmap **rmap) 502 { 503 struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); 504 struct mlx5_irq_pool *pool; 505 struct mlx5_irq *irq; 506 507 pool = irq_table->pcif_pool; 508 irq = irq_pool_request_vector(pool, vecidx, af_desc, rmap); 509 if (IS_ERR(irq)) 510 return irq; 511 mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n", 512 irq->map.virq, cpumask_pr_args(&af_desc->mask), 513 irq->refcount / MLX5_EQ_REFS_PER_IRQ); 514 return irq; 515 } 516 517 /** 518 * mlx5_irq_release_vector - release one IRQ back to the system. 519 * @irq: the irq to release. 520 */ mlx5_irq_release_vector(struct mlx5_irq * irq)521 void mlx5_irq_release_vector(struct mlx5_irq *irq) 522 { 523 _mlx5_irq_release(irq); 524 } 525 526 /** 527 * mlx5_irq_request_vector - request one IRQ for mlx5 device. 528 * @dev: mlx5 device that is requesting the IRQ. 529 * @cpu: CPU to bind the IRQ to. 530 * @vecidx: vector index to request an IRQ for. 531 * @rmap: pointer to reverse map pointer for completion interrupts 532 * 533 * Each IRQ is bound to at most 1 CPU. 534 * This function is requests one IRQ, for the given @vecidx. 535 * 536 * This function returns a pointer to the irq on success, or an error pointer 537 * in case of an error. 538 */ mlx5_irq_request_vector(struct mlx5_core_dev * dev,u16 cpu,u16 vecidx,struct cpu_rmap ** rmap)539 struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu, 540 u16 vecidx, struct cpu_rmap **rmap) 541 { 542 struct mlx5_irq_table *table = mlx5_irq_table_get(dev); 543 struct mlx5_irq_pool *pool = table->pcif_pool; 544 struct irq_affinity_desc af_desc; 545 int offset = MLX5_IRQ_VEC_COMP_BASE; 546 547 if (!pool->xa_num_irqs.max) 548 offset = 0; 549 550 af_desc.is_managed = false; 551 cpumask_clear(&af_desc.mask); 552 cpumask_set_cpu(cpu, &af_desc.mask); 553 return mlx5_irq_request(dev, vecidx + offset, &af_desc, rmap); 554 } 555 556 static struct mlx5_irq_pool * irq_pool_alloc(struct mlx5_core_dev * dev,int start,int size,char * name,u32 min_threshold,u32 max_threshold)557 irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name, 558 u32 min_threshold, u32 max_threshold) 559 { 560 struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL); 561 562 if (!pool) 563 return ERR_PTR(-ENOMEM); 564 pool->dev = dev; 565 mutex_init(&pool->lock); 566 xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC); 567 pool->xa_num_irqs.min = start; 568 pool->xa_num_irqs.max = start + size - 1; 569 if (name) 570 snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS, 571 "%s", name); 572 pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ; 573 pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ; 574 mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d", 575 name, size, start); 576 return pool; 577 } 578 irq_pool_free(struct mlx5_irq_pool * pool)579 static void irq_pool_free(struct mlx5_irq_pool *pool) 580 { 581 struct mlx5_irq *irq; 582 unsigned long index; 583 584 /* There are cases in which we are destrying the irq_table before 585 * freeing all the IRQs, fast teardown for example. Hence, free the irqs 586 * which might not have been freed. 587 */ 588 xa_for_each(&pool->irqs, index, irq) 589 irq_release(irq); 590 xa_destroy(&pool->irqs); 591 mutex_destroy(&pool->lock); 592 kfree(pool->irqs_per_cpu); 593 kvfree(pool); 594 } 595 irq_pools_init(struct mlx5_core_dev * dev,int sf_vec,int pcif_vec,bool dynamic_vec)596 static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec, 597 bool dynamic_vec) 598 { 599 struct mlx5_irq_table *table = dev->priv.irq_table; 600 int sf_vec_available = sf_vec; 601 int num_sf_ctrl; 602 int err; 603 604 /* init pcif_pool */ 605 table->pcif_pool = irq_pool_alloc(dev, 0, pcif_vec, NULL, 606 MLX5_EQ_SHARE_IRQ_MIN_COMP, 607 MLX5_EQ_SHARE_IRQ_MAX_COMP); 608 if (IS_ERR(table->pcif_pool)) 609 return PTR_ERR(table->pcif_pool); 610 if (!mlx5_sf_max_functions(dev)) 611 return 0; 612 if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) { 613 mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n"); 614 return 0; 615 } 616 617 /* init sf_ctrl_pool */ 618 num_sf_ctrl = DIV_ROUND_UP(mlx5_sf_max_functions(dev), 619 MLX5_SFS_PER_CTRL_IRQ); 620 num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl); 621 if (!dynamic_vec && (num_sf_ctrl + 1) > sf_vec_available) { 622 mlx5_core_dbg(dev, 623 "Not enough IRQs for SFs control and completion pool, required=%d avail=%d\n", 624 num_sf_ctrl + 1, sf_vec_available); 625 return 0; 626 } 627 628 table->sf_ctrl_pool = irq_pool_alloc(dev, pcif_vec, num_sf_ctrl, 629 "mlx5_sf_ctrl", 630 MLX5_EQ_SHARE_IRQ_MIN_CTRL, 631 MLX5_EQ_SHARE_IRQ_MAX_CTRL); 632 if (IS_ERR(table->sf_ctrl_pool)) { 633 err = PTR_ERR(table->sf_ctrl_pool); 634 goto err_pf; 635 } 636 sf_vec_available -= num_sf_ctrl; 637 638 /* init sf_comp_pool, remaining vectors are for the SF completions */ 639 table->sf_comp_pool = irq_pool_alloc(dev, pcif_vec + num_sf_ctrl, 640 sf_vec_available, "mlx5_sf_comp", 641 MLX5_EQ_SHARE_IRQ_MIN_COMP, 642 MLX5_EQ_SHARE_IRQ_MAX_COMP); 643 if (IS_ERR(table->sf_comp_pool)) { 644 err = PTR_ERR(table->sf_comp_pool); 645 goto err_sf_ctrl; 646 } 647 648 table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL); 649 if (!table->sf_comp_pool->irqs_per_cpu) { 650 err = -ENOMEM; 651 goto err_irqs_per_cpu; 652 } 653 654 return 0; 655 656 err_irqs_per_cpu: 657 irq_pool_free(table->sf_comp_pool); 658 err_sf_ctrl: 659 irq_pool_free(table->sf_ctrl_pool); 660 err_pf: 661 irq_pool_free(table->pcif_pool); 662 return err; 663 } 664 irq_pools_destroy(struct mlx5_irq_table * table)665 static void irq_pools_destroy(struct mlx5_irq_table *table) 666 { 667 if (table->sf_ctrl_pool) { 668 irq_pool_free(table->sf_comp_pool); 669 irq_pool_free(table->sf_ctrl_pool); 670 } 671 irq_pool_free(table->pcif_pool); 672 } 673 mlx5_irq_pool_free_irqs(struct mlx5_irq_pool * pool)674 static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool) 675 { 676 struct mlx5_irq *irq; 677 unsigned long index; 678 679 xa_for_each(&pool->irqs, index, irq) 680 mlx5_system_free_irq(irq); 681 682 } 683 mlx5_irq_pools_free_irqs(struct mlx5_irq_table * table)684 static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table) 685 { 686 if (table->sf_ctrl_pool) { 687 mlx5_irq_pool_free_irqs(table->sf_comp_pool); 688 mlx5_irq_pool_free_irqs(table->sf_ctrl_pool); 689 } 690 mlx5_irq_pool_free_irqs(table->pcif_pool); 691 } 692 693 /* irq_table API */ 694 mlx5_irq_table_init(struct mlx5_core_dev * dev)695 int mlx5_irq_table_init(struct mlx5_core_dev *dev) 696 { 697 struct mlx5_irq_table *irq_table; 698 699 if (mlx5_core_is_sf(dev)) 700 return 0; 701 702 irq_table = kvzalloc_node(sizeof(*irq_table), GFP_KERNEL, 703 dev->priv.numa_node); 704 if (!irq_table) 705 return -ENOMEM; 706 707 dev->priv.irq_table = irq_table; 708 return 0; 709 } 710 mlx5_irq_table_cleanup(struct mlx5_core_dev * dev)711 void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) 712 { 713 if (mlx5_core_is_sf(dev)) 714 return; 715 716 kvfree(dev->priv.irq_table); 717 } 718 mlx5_irq_table_get_num_comp(struct mlx5_irq_table * table)719 int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table) 720 { 721 if (!table->pcif_pool->xa_num_irqs.max) 722 return 1; 723 return table->pcif_pool->xa_num_irqs.max - table->pcif_pool->xa_num_irqs.min; 724 } 725 mlx5_irq_table_create(struct mlx5_core_dev * dev)726 int mlx5_irq_table_create(struct mlx5_core_dev *dev) 727 { 728 int num_eqs = mlx5_max_eq_cap_get(dev); 729 bool dynamic_vec; 730 int total_vec; 731 int pcif_vec; 732 int req_vec; 733 int err; 734 int n; 735 736 if (mlx5_core_is_sf(dev)) 737 return 0; 738 739 /* PCI PF vectors usage is limited by online cpus, device EQs and 740 * PCI MSI-X capability. 741 */ 742 pcif_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1; 743 pcif_vec = min_t(int, pcif_vec, num_eqs); 744 pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev)); 745 746 total_vec = pcif_vec; 747 if (mlx5_sf_max_functions(dev)) 748 total_vec += MLX5_MAX_MSIX_PER_SF * mlx5_sf_max_functions(dev); 749 total_vec = min_t(int, total_vec, pci_msix_vec_count(dev->pdev)); 750 751 req_vec = pci_msix_can_alloc_dyn(dev->pdev) ? 1 : total_vec; 752 n = pci_alloc_irq_vectors(dev->pdev, 1, req_vec, PCI_IRQ_MSIX); 753 if (n < 0) 754 return n; 755 756 /* Further limit vectors of the pools based on platform for non dynamic case */ 757 dynamic_vec = pci_msix_can_alloc_dyn(dev->pdev); 758 if (!dynamic_vec) { 759 pcif_vec = min_t(int, n, pcif_vec); 760 total_vec = min_t(int, n, total_vec); 761 } 762 763 err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec, dynamic_vec); 764 if (err) 765 pci_free_irq_vectors(dev->pdev); 766 767 return err; 768 } 769 mlx5_irq_table_destroy(struct mlx5_core_dev * dev)770 void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) 771 { 772 struct mlx5_irq_table *table = dev->priv.irq_table; 773 774 if (mlx5_core_is_sf(dev)) 775 return; 776 777 /* There are cases where IRQs still will be in used when we reaching 778 * to here. Hence, making sure all the irqs are released. 779 */ 780 irq_pools_destroy(table); 781 pci_free_irq_vectors(dev->pdev); 782 } 783 mlx5_irq_table_free_irqs(struct mlx5_core_dev * dev)784 void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev) 785 { 786 struct mlx5_irq_table *table = dev->priv.irq_table; 787 788 if (mlx5_core_is_sf(dev)) 789 return; 790 791 mlx5_irq_pools_free_irqs(table); 792 pci_free_irq_vectors(dev->pdev); 793 } 794 mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table * table)795 int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table) 796 { 797 if (table->sf_comp_pool) 798 return min_t(int, num_online_cpus(), 799 table->sf_comp_pool->xa_num_irqs.max - 800 table->sf_comp_pool->xa_num_irqs.min + 1); 801 else 802 return mlx5_irq_table_get_num_comp(table); 803 } 804 mlx5_irq_table_get(struct mlx5_core_dev * dev)805 struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev) 806 { 807 #ifdef CONFIG_MLX5_SF 808 if (mlx5_core_is_sf(dev)) 809 return dev->priv.parent_mdev->priv.irq_table; 810 #endif 811 return dev->priv.irq_table; 812 } 813