1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ 3 4 #include "eswitch.h" 5 #include "esw/qos.h" 6 #include "en/port.h" 7 #define CREATE_TRACE_POINTS 8 #include "diag/qos_tracepoint.h" 9 10 /* Minimum supported BW share value by the HW is 1 Mbit/sec */ 11 #define MLX5_MIN_BW_SHARE 1 12 13 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ 14 min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit) 15 16 struct mlx5_esw_rate_group { 17 u32 tsar_ix; 18 u32 max_rate; 19 u32 min_rate; 20 u32 bw_share; 21 struct list_head list; 22 }; 23 24 static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx, 25 u32 tsar_ix, u32 max_rate, u32 bw_share) 26 { 27 u32 bitmask = 0; 28 29 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) 30 return -EOPNOTSUPP; 31 32 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); 33 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); 34 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; 35 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; 36 37 return mlx5_modify_scheduling_element_cmd(dev, 38 SCHEDULING_HIERARCHY_E_SWITCH, 39 sched_ctx, 40 tsar_ix, 41 bitmask); 42 } 43 44 static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, 45 u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) 46 { 47 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 48 struct mlx5_core_dev *dev = esw->dev; 49 int err; 50 51 err = esw_qos_tsar_config(dev, sched_ctx, 52 group->tsar_ix, 53 max_rate, bw_share); 54 if (err) 55 NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed"); 56 57 trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate); 58 59 return err; 60 } 61 62 static int esw_qos_vport_config(struct mlx5_eswitch *esw, 63 struct mlx5_vport *vport, 64 u32 max_rate, u32 bw_share, 65 struct netlink_ext_ack *extack) 66 { 67 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 68 struct mlx5_core_dev *dev = esw->dev; 69 int err; 70 71 if (!vport->qos.enabled) 72 return -EIO; 73 74 err = esw_qos_tsar_config(dev, sched_ctx, vport->qos.esw_tsar_ix, 75 max_rate, bw_share); 76 if (err) { 77 esw_warn(esw->dev, 78 "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n", 79 vport->vport, err); 80 NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed"); 81 return err; 82 } 83 84 trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate); 85 86 return 0; 87 } 88 89 static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, 90 struct mlx5_esw_rate_group *group, 91 bool group_level) 92 { 93 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); 94 struct mlx5_vport *evport; 95 u32 max_guarantee = 0; 96 unsigned long i; 97 98 if (group_level) { 99 struct mlx5_esw_rate_group *group; 100 101 list_for_each_entry(group, &esw->qos.groups, list) { 102 if (group->min_rate < max_guarantee) 103 continue; 104 max_guarantee = group->min_rate; 105 } 106 } else { 107 mlx5_esw_for_each_vport(esw, i, evport) { 108 if (!evport->enabled || !evport->qos.enabled || 109 evport->qos.group != group || evport->qos.min_rate < max_guarantee) 110 continue; 111 max_guarantee = evport->qos.min_rate; 112 } 113 } 114 115 if (max_guarantee) 116 return max_t(u32, max_guarantee / fw_max_bw_share, 1); 117 118 /* If vports min rate divider is 0 but their group has bw_share configured, then 119 * need to set bw_share for vports to minimal value. 120 */ 121 if (!group_level && !max_guarantee && group && group->bw_share) 122 return 1; 123 return 0; 124 } 125 126 static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max) 127 { 128 if (divider) 129 return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max); 130 131 return 0; 132 } 133 134 static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw, 135 struct mlx5_esw_rate_group *group, 136 struct netlink_ext_ack *extack) 137 { 138 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); 139 u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false); 140 struct mlx5_vport *evport; 141 unsigned long i; 142 u32 bw_share; 143 int err; 144 145 mlx5_esw_for_each_vport(esw, i, evport) { 146 if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group) 147 continue; 148 bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share); 149 150 if (bw_share == evport->qos.bw_share) 151 continue; 152 153 err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack); 154 if (err) 155 return err; 156 157 evport->qos.bw_share = bw_share; 158 } 159 160 return 0; 161 } 162 163 static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider, 164 struct netlink_ext_ack *extack) 165 { 166 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); 167 struct mlx5_esw_rate_group *group; 168 u32 bw_share; 169 int err; 170 171 list_for_each_entry(group, &esw->qos.groups, list) { 172 bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share); 173 174 if (bw_share == group->bw_share) 175 continue; 176 177 err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack); 178 if (err) 179 return err; 180 181 group->bw_share = bw_share; 182 183 /* All the group's vports need to be set with default bw_share 184 * to enable them with QOS 185 */ 186 err = esw_qos_normalize_vports_min_rate(esw, group, extack); 187 188 if (err) 189 return err; 190 } 191 192 return 0; 193 } 194 195 static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, 196 u32 min_rate, struct netlink_ext_ack *extack) 197 { 198 u32 fw_max_bw_share, previous_min_rate; 199 bool min_rate_supported; 200 int err; 201 202 lockdep_assert_held(&esw->state_lock); 203 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); 204 min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && 205 fw_max_bw_share >= MLX5_MIN_BW_SHARE; 206 if (min_rate && !min_rate_supported) 207 return -EOPNOTSUPP; 208 if (min_rate == evport->qos.min_rate) 209 return 0; 210 211 previous_min_rate = evport->qos.min_rate; 212 evport->qos.min_rate = min_rate; 213 err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack); 214 if (err) 215 evport->qos.min_rate = previous_min_rate; 216 217 return err; 218 } 219 220 static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, 221 u32 max_rate, struct netlink_ext_ack *extack) 222 { 223 u32 act_max_rate = max_rate; 224 bool max_rate_supported; 225 int err; 226 227 lockdep_assert_held(&esw->state_lock); 228 max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); 229 230 if (max_rate && !max_rate_supported) 231 return -EOPNOTSUPP; 232 if (max_rate == evport->qos.max_rate) 233 return 0; 234 235 /* If parent group has rate limit need to set to group 236 * value when new max rate is 0. 237 */ 238 if (evport->qos.group && !max_rate) 239 act_max_rate = evport->qos.group->max_rate; 240 241 err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack); 242 243 if (!err) 244 evport->qos.max_rate = max_rate; 245 246 return err; 247 } 248 249 static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, 250 u32 min_rate, struct netlink_ext_ack *extack) 251 { 252 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); 253 struct mlx5_core_dev *dev = esw->dev; 254 u32 previous_min_rate, divider; 255 int err; 256 257 if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE)) 258 return -EOPNOTSUPP; 259 260 if (min_rate == group->min_rate) 261 return 0; 262 263 previous_min_rate = group->min_rate; 264 group->min_rate = min_rate; 265 divider = esw_qos_calculate_min_rate_divider(esw, group, true); 266 err = esw_qos_normalize_groups_min_rate(esw, divider, extack); 267 if (err) { 268 group->min_rate = previous_min_rate; 269 NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed"); 270 271 /* Attempt restoring previous configuration */ 272 divider = esw_qos_calculate_min_rate_divider(esw, group, true); 273 if (esw_qos_normalize_groups_min_rate(esw, divider, extack)) 274 NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed"); 275 } 276 277 return err; 278 } 279 280 static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw, 281 struct mlx5_esw_rate_group *group, 282 u32 max_rate, struct netlink_ext_ack *extack) 283 { 284 struct mlx5_vport *vport; 285 unsigned long i; 286 int err; 287 288 if (group->max_rate == max_rate) 289 return 0; 290 291 err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack); 292 if (err) 293 return err; 294 295 group->max_rate = max_rate; 296 297 /* Any unlimited vports in the group should be set 298 * with the value of the group. 299 */ 300 mlx5_esw_for_each_vport(esw, i, vport) { 301 if (!vport->enabled || !vport->qos.enabled || 302 vport->qos.group != group || vport->qos.max_rate) 303 continue; 304 305 err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack); 306 if (err) 307 NL_SET_ERR_MSG_MOD(extack, 308 "E-Switch vport implicit rate limit setting failed"); 309 } 310 311 return err; 312 } 313 314 static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw, 315 struct mlx5_vport *vport, 316 u32 max_rate, u32 bw_share) 317 { 318 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 319 struct mlx5_esw_rate_group *group = vport->qos.group; 320 struct mlx5_core_dev *dev = esw->dev; 321 u32 parent_tsar_ix; 322 void *vport_elem; 323 int err; 324 325 parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix; 326 MLX5_SET(scheduling_context, sched_ctx, element_type, 327 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); 328 vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); 329 MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); 330 MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix); 331 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); 332 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); 333 334 err = mlx5_create_scheduling_element_cmd(dev, 335 SCHEDULING_HIERARCHY_E_SWITCH, 336 sched_ctx, 337 &vport->qos.esw_tsar_ix); 338 if (err) { 339 esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n", 340 vport->vport, err); 341 return err; 342 } 343 344 return 0; 345 } 346 347 static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw, 348 struct mlx5_vport *vport, 349 struct mlx5_esw_rate_group *curr_group, 350 struct mlx5_esw_rate_group *new_group, 351 struct netlink_ext_ack *extack) 352 { 353 u32 max_rate; 354 int err; 355 356 err = mlx5_destroy_scheduling_element_cmd(esw->dev, 357 SCHEDULING_HIERARCHY_E_SWITCH, 358 vport->qos.esw_tsar_ix); 359 if (err) { 360 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed"); 361 return err; 362 } 363 364 vport->qos.group = new_group; 365 max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate; 366 367 /* If vport is unlimited, we set the group's value. 368 * Therefore, if the group is limited it will apply to 369 * the vport as well and if not, vport will remain unlimited. 370 */ 371 err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share); 372 if (err) { 373 NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed."); 374 goto err_sched; 375 } 376 377 return 0; 378 379 err_sched: 380 vport->qos.group = curr_group; 381 max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate; 382 if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share)) 383 esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n", 384 vport->vport); 385 386 return err; 387 } 388 389 static int esw_qos_vport_update_group(struct mlx5_eswitch *esw, 390 struct mlx5_vport *vport, 391 struct mlx5_esw_rate_group *group, 392 struct netlink_ext_ack *extack) 393 { 394 struct mlx5_esw_rate_group *new_group, *curr_group; 395 int err; 396 397 if (!vport->enabled) 398 return -EINVAL; 399 400 curr_group = vport->qos.group; 401 new_group = group ?: esw->qos.group0; 402 if (curr_group == new_group) 403 return 0; 404 405 err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack); 406 if (err) 407 return err; 408 409 /* Recalculate bw share weights of old and new groups */ 410 if (vport->qos.bw_share || new_group->bw_share) { 411 esw_qos_normalize_vports_min_rate(esw, curr_group, extack); 412 esw_qos_normalize_vports_min_rate(esw, new_group, extack); 413 } 414 415 return 0; 416 } 417 418 static struct mlx5_esw_rate_group * 419 __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) 420 { 421 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 422 struct mlx5_esw_rate_group *group; 423 u32 divider; 424 int err; 425 426 group = kzalloc(sizeof(*group), GFP_KERNEL); 427 if (!group) 428 return ERR_PTR(-ENOMEM); 429 430 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, 431 esw->qos.root_tsar_ix); 432 err = mlx5_create_scheduling_element_cmd(esw->dev, 433 SCHEDULING_HIERARCHY_E_SWITCH, 434 tsar_ctx, 435 &group->tsar_ix); 436 if (err) { 437 NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed"); 438 goto err_sched_elem; 439 } 440 441 list_add_tail(&group->list, &esw->qos.groups); 442 443 divider = esw_qos_calculate_min_rate_divider(esw, group, true); 444 if (divider) { 445 err = esw_qos_normalize_groups_min_rate(esw, divider, extack); 446 if (err) { 447 NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed"); 448 goto err_min_rate; 449 } 450 } 451 trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix); 452 453 return group; 454 455 err_min_rate: 456 list_del(&group->list); 457 if (mlx5_destroy_scheduling_element_cmd(esw->dev, 458 SCHEDULING_HIERARCHY_E_SWITCH, 459 group->tsar_ix)) 460 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed"); 461 err_sched_elem: 462 kfree(group); 463 return ERR_PTR(err); 464 } 465 466 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack); 467 static void esw_qos_put(struct mlx5_eswitch *esw); 468 469 static struct mlx5_esw_rate_group * 470 esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) 471 { 472 struct mlx5_esw_rate_group *group; 473 int err; 474 475 if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth)) 476 return ERR_PTR(-EOPNOTSUPP); 477 478 err = esw_qos_get(esw, extack); 479 if (err) 480 return ERR_PTR(err); 481 482 group = __esw_qos_create_rate_group(esw, extack); 483 if (IS_ERR(group)) 484 esw_qos_put(esw); 485 486 return group; 487 } 488 489 static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, 490 struct mlx5_esw_rate_group *group, 491 struct netlink_ext_ack *extack) 492 { 493 u32 divider; 494 int err; 495 496 list_del(&group->list); 497 498 divider = esw_qos_calculate_min_rate_divider(esw, NULL, true); 499 err = esw_qos_normalize_groups_min_rate(esw, divider, extack); 500 if (err) 501 NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed"); 502 503 err = mlx5_destroy_scheduling_element_cmd(esw->dev, 504 SCHEDULING_HIERARCHY_E_SWITCH, 505 group->tsar_ix); 506 if (err) 507 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed"); 508 509 trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix); 510 511 kfree(group); 512 513 return err; 514 } 515 516 static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, 517 struct mlx5_esw_rate_group *group, 518 struct netlink_ext_ack *extack) 519 { 520 int err; 521 522 err = __esw_qos_destroy_rate_group(esw, group, extack); 523 esw_qos_put(esw); 524 525 return err; 526 } 527 528 static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type) 529 { 530 switch (type) { 531 case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: 532 return MLX5_CAP_QOS(dev, esw_element_type) & 533 ELEMENT_TYPE_CAP_MASK_TASR; 534 case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: 535 return MLX5_CAP_QOS(dev, esw_element_type) & 536 ELEMENT_TYPE_CAP_MASK_VPORT; 537 case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC: 538 return MLX5_CAP_QOS(dev, esw_element_type) & 539 ELEMENT_TYPE_CAP_MASK_VPORT_TC; 540 case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC: 541 return MLX5_CAP_QOS(dev, esw_element_type) & 542 ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; 543 } 544 return false; 545 } 546 547 static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) 548 { 549 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 550 struct mlx5_core_dev *dev = esw->dev; 551 __be32 *attr; 552 int err; 553 554 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) 555 return -EOPNOTSUPP; 556 557 if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR)) 558 return -EOPNOTSUPP; 559 560 MLX5_SET(scheduling_context, tsar_ctx, element_type, 561 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); 562 563 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); 564 *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); 565 566 err = mlx5_create_scheduling_element_cmd(dev, 567 SCHEDULING_HIERARCHY_E_SWITCH, 568 tsar_ctx, 569 &esw->qos.root_tsar_ix); 570 if (err) { 571 esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err); 572 return err; 573 } 574 575 INIT_LIST_HEAD(&esw->qos.groups); 576 if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) { 577 esw->qos.group0 = __esw_qos_create_rate_group(esw, extack); 578 if (IS_ERR(esw->qos.group0)) { 579 esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n", 580 PTR_ERR(esw->qos.group0)); 581 err = PTR_ERR(esw->qos.group0); 582 goto err_group0; 583 } 584 } 585 refcount_set(&esw->qos.refcnt, 1); 586 587 return 0; 588 589 err_group0: 590 if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, 591 esw->qos.root_tsar_ix)) 592 esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n"); 593 594 return err; 595 } 596 597 static void esw_qos_destroy(struct mlx5_eswitch *esw) 598 { 599 int err; 600 601 if (esw->qos.group0) 602 __esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL); 603 604 err = mlx5_destroy_scheduling_element_cmd(esw->dev, 605 SCHEDULING_HIERARCHY_E_SWITCH, 606 esw->qos.root_tsar_ix); 607 if (err) 608 esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err); 609 } 610 611 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) 612 { 613 int err = 0; 614 615 lockdep_assert_held(&esw->state_lock); 616 617 if (!refcount_inc_not_zero(&esw->qos.refcnt)) { 618 /* esw_qos_create() set refcount to 1 only on success. 619 * No need to decrement on failure. 620 */ 621 err = esw_qos_create(esw, extack); 622 } 623 624 return err; 625 } 626 627 static void esw_qos_put(struct mlx5_eswitch *esw) 628 { 629 lockdep_assert_held(&esw->state_lock); 630 if (refcount_dec_and_test(&esw->qos.refcnt)) 631 esw_qos_destroy(esw); 632 } 633 634 static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, 635 u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) 636 { 637 int err; 638 639 lockdep_assert_held(&esw->state_lock); 640 if (vport->qos.enabled) 641 return 0; 642 643 err = esw_qos_get(esw, extack); 644 if (err) 645 return err; 646 647 vport->qos.group = esw->qos.group0; 648 649 err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share); 650 if (err) 651 goto err_out; 652 653 vport->qos.enabled = true; 654 trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate); 655 656 return 0; 657 658 err_out: 659 esw_qos_put(esw); 660 661 return err; 662 } 663 664 void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport) 665 { 666 int err; 667 668 lockdep_assert_held(&esw->state_lock); 669 if (!vport->qos.enabled) 670 return; 671 WARN(vport->qos.group && vport->qos.group != esw->qos.group0, 672 "Disabling QoS on port before detaching it from group"); 673 674 err = mlx5_destroy_scheduling_element_cmd(esw->dev, 675 SCHEDULING_HIERARCHY_E_SWITCH, 676 vport->qos.esw_tsar_ix); 677 if (err) 678 esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n", 679 vport->vport, err); 680 681 memset(&vport->qos, 0, sizeof(vport->qos)); 682 trace_mlx5_esw_vport_qos_destroy(vport); 683 684 esw_qos_put(esw); 685 } 686 687 int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport, 688 u32 max_rate, u32 min_rate) 689 { 690 int err; 691 692 lockdep_assert_held(&esw->state_lock); 693 err = esw_qos_vport_enable(esw, vport, 0, 0, NULL); 694 if (err) 695 return err; 696 697 err = esw_qos_set_vport_min_rate(esw, vport, min_rate, NULL); 698 if (!err) 699 err = esw_qos_set_vport_max_rate(esw, vport, max_rate, NULL); 700 701 return err; 702 } 703 704 int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps) 705 { 706 u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 707 struct mlx5_vport *vport; 708 u32 bitmask; 709 int err; 710 711 vport = mlx5_eswitch_get_vport(esw, vport_num); 712 if (IS_ERR(vport)) 713 return PTR_ERR(vport); 714 715 mutex_lock(&esw->state_lock); 716 if (!vport->qos.enabled) { 717 /* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */ 718 err = esw_qos_vport_enable(esw, vport, rate_mbps, vport->qos.bw_share, NULL); 719 } else { 720 MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); 721 722 bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; 723 err = mlx5_modify_scheduling_element_cmd(esw->dev, 724 SCHEDULING_HIERARCHY_E_SWITCH, 725 ctx, 726 vport->qos.esw_tsar_ix, 727 bitmask); 728 } 729 mutex_unlock(&esw->state_lock); 730 731 return err; 732 } 733 734 #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */ 735 736 /* Converts bytes per second value passed in a pointer into megabits per 737 * second, rewriting last. If converted rate exceed link speed or is not a 738 * fraction of Mbps - returns error. 739 */ 740 static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name, 741 u64 *rate, struct netlink_ext_ack *extack) 742 { 743 u32 link_speed_max, reminder; 744 u64 value; 745 int err; 746 747 err = mlx5e_port_max_linkspeed(mdev, &link_speed_max); 748 if (err) { 749 NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed"); 750 return err; 751 } 752 753 value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &reminder); 754 if (reminder) { 755 pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n", 756 name, *rate); 757 NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps"); 758 return -EINVAL; 759 } 760 761 if (value > link_speed_max) { 762 pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n", 763 name, value, link_speed_max); 764 NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed"); 765 return -EINVAL; 766 } 767 768 *rate = value; 769 return 0; 770 } 771 772 /* Eswitch devlink rate API */ 773 774 int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, 775 u64 tx_share, struct netlink_ext_ack *extack) 776 { 777 struct mlx5_vport *vport = priv; 778 struct mlx5_eswitch *esw; 779 int err; 780 781 esw = vport->dev->priv.eswitch; 782 if (!mlx5_esw_allowed(esw)) 783 return -EPERM; 784 785 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack); 786 if (err) 787 return err; 788 789 mutex_lock(&esw->state_lock); 790 err = esw_qos_vport_enable(esw, vport, 0, 0, extack); 791 if (err) 792 goto unlock; 793 794 err = esw_qos_set_vport_min_rate(esw, vport, tx_share, extack); 795 unlock: 796 mutex_unlock(&esw->state_lock); 797 return err; 798 } 799 800 int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, 801 u64 tx_max, struct netlink_ext_ack *extack) 802 { 803 struct mlx5_vport *vport = priv; 804 struct mlx5_eswitch *esw; 805 int err; 806 807 esw = vport->dev->priv.eswitch; 808 if (!mlx5_esw_allowed(esw)) 809 return -EPERM; 810 811 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack); 812 if (err) 813 return err; 814 815 mutex_lock(&esw->state_lock); 816 err = esw_qos_vport_enable(esw, vport, 0, 0, extack); 817 if (err) 818 goto unlock; 819 820 err = esw_qos_set_vport_max_rate(esw, vport, tx_max, extack); 821 unlock: 822 mutex_unlock(&esw->state_lock); 823 return err; 824 } 825 826 int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, 827 u64 tx_share, struct netlink_ext_ack *extack) 828 { 829 struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink); 830 struct mlx5_eswitch *esw = dev->priv.eswitch; 831 struct mlx5_esw_rate_group *group = priv; 832 int err; 833 834 err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack); 835 if (err) 836 return err; 837 838 mutex_lock(&esw->state_lock); 839 err = esw_qos_set_group_min_rate(esw, group, tx_share, extack); 840 mutex_unlock(&esw->state_lock); 841 return err; 842 } 843 844 int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, 845 u64 tx_max, struct netlink_ext_ack *extack) 846 { 847 struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink); 848 struct mlx5_eswitch *esw = dev->priv.eswitch; 849 struct mlx5_esw_rate_group *group = priv; 850 int err; 851 852 err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack); 853 if (err) 854 return err; 855 856 mutex_lock(&esw->state_lock); 857 err = esw_qos_set_group_max_rate(esw, group, tx_max, extack); 858 mutex_unlock(&esw->state_lock); 859 return err; 860 } 861 862 int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, 863 struct netlink_ext_ack *extack) 864 { 865 struct mlx5_esw_rate_group *group; 866 struct mlx5_eswitch *esw; 867 int err = 0; 868 869 esw = mlx5_devlink_eswitch_get(rate_node->devlink); 870 if (IS_ERR(esw)) 871 return PTR_ERR(esw); 872 873 mutex_lock(&esw->state_lock); 874 if (esw->mode != MLX5_ESWITCH_OFFLOADS) { 875 NL_SET_ERR_MSG_MOD(extack, 876 "Rate node creation supported only in switchdev mode"); 877 err = -EOPNOTSUPP; 878 goto unlock; 879 } 880 881 group = esw_qos_create_rate_group(esw, extack); 882 if (IS_ERR(group)) { 883 err = PTR_ERR(group); 884 goto unlock; 885 } 886 887 *priv = group; 888 unlock: 889 mutex_unlock(&esw->state_lock); 890 return err; 891 } 892 893 int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, 894 struct netlink_ext_ack *extack) 895 { 896 struct mlx5_esw_rate_group *group = priv; 897 struct mlx5_eswitch *esw; 898 int err; 899 900 esw = mlx5_devlink_eswitch_get(rate_node->devlink); 901 if (IS_ERR(esw)) 902 return PTR_ERR(esw); 903 904 mutex_lock(&esw->state_lock); 905 err = esw_qos_destroy_rate_group(esw, group, extack); 906 mutex_unlock(&esw->state_lock); 907 return err; 908 } 909 910 int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw, 911 struct mlx5_vport *vport, 912 struct mlx5_esw_rate_group *group, 913 struct netlink_ext_ack *extack) 914 { 915 int err = 0; 916 917 mutex_lock(&esw->state_lock); 918 if (!vport->qos.enabled && !group) 919 goto unlock; 920 921 err = esw_qos_vport_enable(esw, vport, 0, 0, extack); 922 if (!err) 923 err = esw_qos_vport_update_group(esw, vport, group, extack); 924 unlock: 925 mutex_unlock(&esw->state_lock); 926 return err; 927 } 928 929 int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, 930 struct devlink_rate *parent, 931 void *priv, void *parent_priv, 932 struct netlink_ext_ack *extack) 933 { 934 struct mlx5_esw_rate_group *group; 935 struct mlx5_vport *vport = priv; 936 937 if (!parent) 938 return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, 939 vport, NULL, extack); 940 941 group = parent_priv; 942 return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack); 943 } 944