1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ 3 4 #include "eswitch.h" 5 #include "esw/qos.h" 6 #include "en/port.h" 7 8 /* Minimum supported BW share value by the HW is 1 Mbit/sec */ 9 #define MLX5_MIN_BW_SHARE 1 10 11 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ 12 min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit) 13 14 static int esw_qos_vport_config(struct mlx5_eswitch *esw, 15 struct mlx5_vport *vport, 16 u32 max_rate, u32 bw_share, 17 struct netlink_ext_ack *extack) 18 { 19 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 20 struct mlx5_core_dev *dev = esw->dev; 21 void *vport_elem; 22 u32 bitmask = 0; 23 int err; 24 25 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) 26 return -EOPNOTSUPP; 27 28 if (!vport->qos.enabled) 29 return -EIO; 30 31 MLX5_SET(scheduling_context, sched_ctx, element_type, 32 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); 33 vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, 34 element_attributes); 35 MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); 36 MLX5_SET(scheduling_context, sched_ctx, parent_element_id, esw->qos.root_tsar_ix); 37 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); 38 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); 39 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; 40 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; 41 42 err = mlx5_modify_scheduling_element_cmd(dev, 43 SCHEDULING_HIERARCHY_E_SWITCH, 44 sched_ctx, 45 vport->qos.esw_tsar_ix, 46 bitmask); 47 if (err) { 48 esw_warn(esw->dev, "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n", 49 vport->vport, err); 50 NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed"); 51 return err; 52 } 53 54 return 0; 55 } 56 57 static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw) 58 { 59 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); 60 struct mlx5_vport *evport; 61 u32 max_guarantee = 0; 62 unsigned long i; 63 64 mlx5_esw_for_each_vport(esw, i, evport) { 65 if (!evport->enabled || evport->qos.min_rate < max_guarantee) 66 continue; 67 max_guarantee = evport->qos.min_rate; 68 } 69 70 if (max_guarantee) 71 return max_t(u32, max_guarantee / fw_max_bw_share, 1); 72 return 0; 73 } 74 75 static int 76 esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) 77 { 78 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); 79 u32 divider = calculate_vports_min_rate_divider(esw); 80 struct mlx5_vport *evport; 81 u32 vport_max_rate; 82 u32 vport_min_rate; 83 unsigned long i; 84 u32 bw_share; 85 int err; 86 87 mlx5_esw_for_each_vport(esw, i, evport) { 88 if (!evport->enabled) 89 continue; 90 vport_min_rate = evport->qos.min_rate; 91 vport_max_rate = evport->qos.max_rate; 92 bw_share = 0; 93 94 if (divider) 95 bw_share = MLX5_RATE_TO_BW_SHARE(vport_min_rate, 96 divider, 97 fw_max_bw_share); 98 99 if (bw_share == evport->qos.bw_share) 100 continue; 101 102 err = esw_qos_vport_config(esw, evport, vport_max_rate, bw_share, extack); 103 if (!err) 104 evport->qos.bw_share = bw_share; 105 else 106 return err; 107 } 108 109 return 0; 110 } 111 112 int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, 113 struct mlx5_vport *evport, 114 u32 min_rate, 115 struct netlink_ext_ack *extack) 116 { 117 u32 fw_max_bw_share, previous_min_rate; 118 bool min_rate_supported; 119 int err; 120 121 lockdep_assert_held(&esw->state_lock); 122 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); 123 min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && 124 fw_max_bw_share >= MLX5_MIN_BW_SHARE; 125 if (min_rate && !min_rate_supported) 126 return -EOPNOTSUPP; 127 if (min_rate == evport->qos.min_rate) 128 return 0; 129 130 previous_min_rate = evport->qos.min_rate; 131 evport->qos.min_rate = min_rate; 132 err = esw_qos_normalize_vports_min_rate(esw, extack); 133 if (err) 134 evport->qos.min_rate = previous_min_rate; 135 136 return err; 137 } 138 139 int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, 140 struct mlx5_vport *evport, 141 u32 max_rate, 142 struct netlink_ext_ack *extack) 143 { 144 bool max_rate_supported; 145 int err; 146 147 lockdep_assert_held(&esw->state_lock); 148 max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); 149 150 if (max_rate && !max_rate_supported) 151 return -EOPNOTSUPP; 152 if (max_rate == evport->qos.max_rate) 153 return 0; 154 155 err = esw_qos_vport_config(esw, evport, max_rate, evport->qos.bw_share, extack); 156 if (!err) 157 evport->qos.max_rate = max_rate; 158 159 return err; 160 } 161 162 static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type) 163 { 164 switch (type) { 165 case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: 166 return MLX5_CAP_QOS(dev, esw_element_type) & 167 ELEMENT_TYPE_CAP_MASK_TASR; 168 case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: 169 return MLX5_CAP_QOS(dev, esw_element_type) & 170 ELEMENT_TYPE_CAP_MASK_VPORT; 171 case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC: 172 return MLX5_CAP_QOS(dev, esw_element_type) & 173 ELEMENT_TYPE_CAP_MASK_VPORT_TC; 174 case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC: 175 return MLX5_CAP_QOS(dev, esw_element_type) & 176 ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; 177 } 178 return false; 179 } 180 181 void mlx5_esw_qos_create(struct mlx5_eswitch *esw) 182 { 183 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 184 struct mlx5_core_dev *dev = esw->dev; 185 __be32 *attr; 186 int err; 187 188 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) 189 return; 190 191 if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR)) 192 return; 193 194 if (esw->qos.enabled) 195 return; 196 197 MLX5_SET(scheduling_context, tsar_ctx, element_type, 198 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); 199 200 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); 201 *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); 202 203 err = mlx5_create_scheduling_element_cmd(dev, 204 SCHEDULING_HIERARCHY_E_SWITCH, 205 tsar_ctx, 206 &esw->qos.root_tsar_ix); 207 if (err) { 208 esw_warn(dev, "E-Switch create TSAR failed (%d)\n", err); 209 return; 210 } 211 212 esw->qos.enabled = true; 213 } 214 215 void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw) 216 { 217 int err; 218 219 if (!esw->qos.enabled) 220 return; 221 222 err = mlx5_destroy_scheduling_element_cmd(esw->dev, 223 SCHEDULING_HIERARCHY_E_SWITCH, 224 esw->qos.root_tsar_ix); 225 if (err) 226 esw_warn(esw->dev, "E-Switch destroy TSAR failed (%d)\n", err); 227 228 esw->qos.enabled = false; 229 } 230 231 int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, 232 u32 max_rate, u32 bw_share) 233 { 234 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 235 struct mlx5_core_dev *dev = esw->dev; 236 void *vport_elem; 237 int err; 238 239 lockdep_assert_held(&esw->state_lock); 240 if (!esw->qos.enabled) 241 return 0; 242 243 if (vport->qos.enabled) 244 return -EEXIST; 245 246 MLX5_SET(scheduling_context, sched_ctx, element_type, 247 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); 248 vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); 249 MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); 250 MLX5_SET(scheduling_context, sched_ctx, parent_element_id, esw->qos.root_tsar_ix); 251 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); 252 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); 253 254 err = mlx5_create_scheduling_element_cmd(dev, 255 SCHEDULING_HIERARCHY_E_SWITCH, 256 sched_ctx, 257 &vport->qos.esw_tsar_ix); 258 if (err) 259 esw_warn(dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n", 260 vport->vport, err); 261 else 262 vport->qos.enabled = true; 263 264 return err; 265 } 266 267 void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport) 268 { 269 int err; 270 271 lockdep_assert_held(&esw->state_lock); 272 if (!esw->qos.enabled || !vport->qos.enabled) 273 return; 274 275 err = mlx5_destroy_scheduling_element_cmd(esw->dev, 276 SCHEDULING_HIERARCHY_E_SWITCH, 277 vport->qos.esw_tsar_ix); 278 if (err) 279 esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n", 280 vport->vport, err); 281 282 vport->qos.enabled = false; 283 } 284 285 int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps) 286 { 287 u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; 288 struct mlx5_vport *vport; 289 u32 bitmask; 290 291 vport = mlx5_eswitch_get_vport(esw, vport_num); 292 if (IS_ERR(vport)) 293 return PTR_ERR(vport); 294 295 if (!vport->qos.enabled) 296 return -EOPNOTSUPP; 297 298 MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); 299 bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; 300 301 return mlx5_modify_scheduling_element_cmd(esw->dev, 302 SCHEDULING_HIERARCHY_E_SWITCH, 303 ctx, 304 vport->qos.esw_tsar_ix, 305 bitmask); 306 } 307 308 #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */ 309 310 /* Converts bytes per second value passed in a pointer into megabits per 311 * second, rewriting last. If converted rate exceed link speed or is not a 312 * fraction of Mbps - returns error. 313 */ 314 static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name, 315 u64 *rate, struct netlink_ext_ack *extack) 316 { 317 u32 link_speed_max, reminder; 318 u64 value; 319 int err; 320 321 err = mlx5e_port_max_linkspeed(mdev, &link_speed_max); 322 if (err) { 323 NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed"); 324 return err; 325 } 326 327 value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &reminder); 328 if (reminder) { 329 pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n", 330 name, *rate); 331 NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps"); 332 return -EINVAL; 333 } 334 335 if (value > link_speed_max) { 336 pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n", 337 name, value, link_speed_max); 338 NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed"); 339 return -EINVAL; 340 } 341 342 *rate = value; 343 return 0; 344 } 345 346 /* Eswitch devlink rate API */ 347 348 int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, 349 u64 tx_share, struct netlink_ext_ack *extack) 350 { 351 struct mlx5_vport *vport = priv; 352 struct mlx5_eswitch *esw; 353 int err; 354 355 esw = vport->dev->priv.eswitch; 356 if (!mlx5_esw_allowed(esw)) 357 return -EPERM; 358 359 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack); 360 if (err) 361 return err; 362 363 mutex_lock(&esw->state_lock); 364 err = mlx5_esw_qos_set_vport_min_rate(esw, vport, tx_share, extack); 365 mutex_unlock(&esw->state_lock); 366 return err; 367 } 368 369 int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, 370 u64 tx_max, struct netlink_ext_ack *extack) 371 { 372 struct mlx5_vport *vport = priv; 373 struct mlx5_eswitch *esw; 374 int err; 375 376 esw = vport->dev->priv.eswitch; 377 if (!mlx5_esw_allowed(esw)) 378 return -EPERM; 379 380 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack); 381 if (err) 382 return err; 383 384 mutex_lock(&esw->state_lock); 385 err = mlx5_esw_qos_set_vport_max_rate(esw, vport, tx_max, extack); 386 mutex_unlock(&esw->state_lock); 387 return err; 388 } 389