1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ 3 4 #include <linux/mlx5/fs.h> 5 #include "en/mapping.h" 6 #include "en/tc/int_port.h" 7 #include "en.h" 8 #include "en_rep.h" 9 #include "en_tc.h" 10 11 struct mlx5e_tc_int_port { 12 enum mlx5e_tc_int_port_type type; 13 int ifindex; 14 u32 match_metadata; 15 u32 mapping; 16 struct list_head list; 17 struct mlx5_flow_handle *rx_rule; 18 refcount_t refcnt; 19 struct rcu_head rcu_head; 20 }; 21 22 struct mlx5e_tc_int_port_priv { 23 struct mlx5_core_dev *dev; 24 struct mutex int_ports_lock; /* Protects int ports list */ 25 struct list_head int_ports; /* Uses int_ports_lock */ 26 u16 num_ports; 27 bool ul_rep_rx_ready; /* Set when uplink is performing teardown */ 28 struct mapping_ctx *metadata_mapping; /* Metadata for source port rewrite and matching */ 29 }; 30 31 bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw) 32 { 33 return mlx5_eswitch_vport_match_metadata_enabled(esw) && 34 MLX5_CAP_GEN(esw->dev, reg_c_preserve); 35 } 36 37 u32 mlx5e_tc_int_port_get_metadata(struct mlx5e_tc_int_port *int_port) 38 { 39 return int_port->match_metadata; 40 } 41 42 int mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port) 43 { 44 /* For egress forwarding we can have the case 45 * where the packet came from a vport and redirected 46 * to int port or it came from the uplink, going 47 * via internal port and hairpinned back to uplink 48 * so we set the source to any port in this case. 49 */ 50 return int_port->type == MLX5E_TC_INT_PORT_EGRESS ? 51 MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT : 52 MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; 53 } 54 55 u32 mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port) 56 { 57 return int_port->match_metadata << (32 - ESW_SOURCE_PORT_METADATA_BITS); 58 } 59 60 static struct mlx5_flow_handle * 61 mlx5e_int_port_create_rx_rule(struct mlx5_eswitch *esw, 62 struct mlx5e_tc_int_port *int_port, 63 struct mlx5_flow_destination *dest) 64 65 { 66 struct mlx5_flow_context *flow_context; 67 struct mlx5_flow_act flow_act = {}; 68 struct mlx5_flow_handle *flow_rule; 69 struct mlx5_flow_spec *spec; 70 void *misc; 71 72 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 73 if (!spec) 74 return ERR_PTR(-ENOMEM); 75 76 misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); 77 MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, 78 mlx5e_tc_int_port_get_metadata_for_match(int_port)); 79 80 misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); 81 MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, 82 mlx5_eswitch_get_vport_metadata_mask()); 83 84 spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; 85 86 /* Overwrite flow tag with the int port metadata mapping 87 * instead of the chain mapping. 88 */ 89 flow_context = &spec->flow_context; 90 flow_context->flags |= FLOW_CONTEXT_HAS_TAG; 91 flow_context->flow_tag = int_port->mapping; 92 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 93 flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec, 94 &flow_act, dest, 1); 95 if (IS_ERR(flow_rule)) 96 mlx5_core_warn(esw->dev, "ft offloads: Failed to add internal vport rx rule err %ld\n", 97 PTR_ERR(flow_rule)); 98 99 kvfree(spec); 100 101 return flow_rule; 102 } 103 104 static struct mlx5e_tc_int_port * 105 mlx5e_int_port_lookup(struct mlx5e_tc_int_port_priv *priv, 106 int ifindex, 107 enum mlx5e_tc_int_port_type type) 108 { 109 struct mlx5e_tc_int_port *int_port; 110 111 if (!priv->ul_rep_rx_ready) 112 goto not_found; 113 114 list_for_each_entry(int_port, &priv->int_ports, list) 115 if (int_port->ifindex == ifindex && int_port->type == type) { 116 refcount_inc(&int_port->refcnt); 117 return int_port; 118 } 119 120 not_found: 121 return NULL; 122 } 123 124 static int mlx5e_int_port_metadata_alloc(struct mlx5e_tc_int_port_priv *priv, 125 int ifindex, enum mlx5e_tc_int_port_type type, 126 u32 *id) 127 { 128 u32 mapped_key[2] = {type, ifindex}; 129 int err; 130 131 err = mapping_add(priv->metadata_mapping, mapped_key, id); 132 if (err) 133 return err; 134 135 /* Fill upper 4 bits of PFNUM with reserved value */ 136 *id |= 0xf << ESW_VPORT_BITS; 137 138 return 0; 139 } 140 141 static void mlx5e_int_port_metadata_free(struct mlx5e_tc_int_port_priv *priv, 142 u32 id) 143 { 144 id &= (1 << ESW_VPORT_BITS) - 1; 145 mapping_remove(priv->metadata_mapping, id); 146 } 147 148 /* Must be called with priv->int_ports_lock held */ 149 static struct mlx5e_tc_int_port * 150 mlx5e_int_port_add(struct mlx5e_tc_int_port_priv *priv, 151 int ifindex, 152 enum mlx5e_tc_int_port_type type) 153 { 154 struct mlx5_eswitch *esw = priv->dev->priv.eswitch; 155 struct mlx5_mapped_obj mapped_obj = {}; 156 struct mlx5e_rep_priv *uplink_rpriv; 157 struct mlx5e_tc_int_port *int_port; 158 struct mlx5_flow_destination dest; 159 struct mapping_ctx *ctx; 160 u32 match_metadata; 161 u32 mapping; 162 int err; 163 164 if (priv->num_ports == MLX5E_TC_MAX_INT_PORT_NUM) { 165 mlx5_core_dbg(priv->dev, "Cannot add a new int port, max supported %d", 166 MLX5E_TC_MAX_INT_PORT_NUM); 167 return ERR_PTR(-ENOSPC); 168 } 169 170 int_port = kzalloc(sizeof(*int_port), GFP_KERNEL); 171 if (!int_port) 172 return ERR_PTR(-ENOMEM); 173 174 err = mlx5e_int_port_metadata_alloc(priv, ifindex, type, &match_metadata); 175 if (err) { 176 mlx5_core_warn(esw->dev, "Cannot add a new internal port, metadata allocation failed for ifindex %d", 177 ifindex); 178 goto err_metadata; 179 } 180 181 /* map metadata to reg_c0 object for miss handling */ 182 ctx = esw->offloads.reg_c0_obj_pool; 183 mapped_obj.type = MLX5_MAPPED_OBJ_INT_PORT_METADATA; 184 mapped_obj.int_port_metadata = match_metadata; 185 err = mapping_add(ctx, &mapped_obj, &mapping); 186 if (err) 187 goto err_map; 188 189 int_port->type = type; 190 int_port->ifindex = ifindex; 191 int_port->match_metadata = match_metadata; 192 int_port->mapping = mapping; 193 194 /* Create a match on internal vport metadata in vport table */ 195 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 196 197 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 198 dest.ft = uplink_rpriv->root_ft; 199 200 int_port->rx_rule = mlx5e_int_port_create_rx_rule(esw, int_port, &dest); 201 if (IS_ERR(int_port->rx_rule)) { 202 err = PTR_ERR(int_port->rx_rule); 203 mlx5_core_warn(esw->dev, "Can't add internal port rx rule, err %d", err); 204 goto err_rx_rule; 205 } 206 207 refcount_set(&int_port->refcnt, 1); 208 list_add_rcu(&int_port->list, &priv->int_ports); 209 priv->num_ports++; 210 211 return int_port; 212 213 err_rx_rule: 214 mapping_remove(ctx, int_port->mapping); 215 216 err_map: 217 mlx5e_int_port_metadata_free(priv, match_metadata); 218 219 err_metadata: 220 kfree(int_port); 221 222 return ERR_PTR(err); 223 } 224 225 /* Must be called with priv->int_ports_lock held */ 226 static void 227 mlx5e_int_port_remove(struct mlx5e_tc_int_port_priv *priv, 228 struct mlx5e_tc_int_port *int_port) 229 { 230 struct mlx5_eswitch *esw = priv->dev->priv.eswitch; 231 struct mapping_ctx *ctx; 232 233 ctx = esw->offloads.reg_c0_obj_pool; 234 235 list_del_rcu(&int_port->list); 236 237 /* The following parameters are not used by the 238 * rcu readers of this int_port object so it is 239 * safe to release them. 240 */ 241 if (int_port->rx_rule) 242 mlx5_del_flow_rules(int_port->rx_rule); 243 mapping_remove(ctx, int_port->mapping); 244 mlx5e_int_port_metadata_free(priv, int_port->match_metadata); 245 kfree_rcu_mightsleep(int_port); 246 priv->num_ports--; 247 } 248 249 /* Must be called with rcu_read_lock held */ 250 static struct mlx5e_tc_int_port * 251 mlx5e_int_port_get_from_metadata(struct mlx5e_tc_int_port_priv *priv, 252 u32 metadata) 253 { 254 struct mlx5e_tc_int_port *int_port; 255 256 list_for_each_entry_rcu(int_port, &priv->int_ports, list) 257 if (int_port->match_metadata == metadata) 258 return int_port; 259 260 return NULL; 261 } 262 263 struct mlx5e_tc_int_port * 264 mlx5e_tc_int_port_get(struct mlx5e_tc_int_port_priv *priv, 265 int ifindex, 266 enum mlx5e_tc_int_port_type type) 267 { 268 struct mlx5e_tc_int_port *int_port; 269 270 if (!priv) 271 return ERR_PTR(-EOPNOTSUPP); 272 273 mutex_lock(&priv->int_ports_lock); 274 275 /* Reject request if ul rep not ready */ 276 if (!priv->ul_rep_rx_ready) { 277 int_port = ERR_PTR(-EOPNOTSUPP); 278 goto done; 279 } 280 281 int_port = mlx5e_int_port_lookup(priv, ifindex, type); 282 if (int_port) 283 goto done; 284 285 /* Alloc and add new int port to list */ 286 int_port = mlx5e_int_port_add(priv, ifindex, type); 287 288 done: 289 mutex_unlock(&priv->int_ports_lock); 290 291 return int_port; 292 } 293 294 void 295 mlx5e_tc_int_port_put(struct mlx5e_tc_int_port_priv *priv, 296 struct mlx5e_tc_int_port *int_port) 297 { 298 if (!refcount_dec_and_mutex_lock(&int_port->refcnt, &priv->int_ports_lock)) 299 return; 300 301 mlx5e_int_port_remove(priv, int_port); 302 mutex_unlock(&priv->int_ports_lock); 303 } 304 305 struct mlx5e_tc_int_port_priv * 306 mlx5e_tc_int_port_init(struct mlx5e_priv *priv) 307 { 308 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 309 struct mlx5e_tc_int_port_priv *int_port_priv; 310 u64 mapping_id; 311 312 if (!mlx5e_tc_int_port_supported(esw)) 313 return NULL; 314 315 int_port_priv = kzalloc(sizeof(*int_port_priv), GFP_KERNEL); 316 if (!int_port_priv) 317 return NULL; 318 319 mapping_id = mlx5_query_nic_system_image_guid(priv->mdev); 320 321 int_port_priv->metadata_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_INT_PORT, 322 sizeof(u32) * 2, 323 (1 << ESW_VPORT_BITS) - 1, true); 324 if (IS_ERR(int_port_priv->metadata_mapping)) { 325 mlx5_core_warn(priv->mdev, "Can't allocate metadata mapping of int port offload, err=%ld\n", 326 PTR_ERR(int_port_priv->metadata_mapping)); 327 goto err_mapping; 328 } 329 330 int_port_priv->dev = priv->mdev; 331 mutex_init(&int_port_priv->int_ports_lock); 332 INIT_LIST_HEAD(&int_port_priv->int_ports); 333 334 return int_port_priv; 335 336 err_mapping: 337 kfree(int_port_priv); 338 339 return NULL; 340 } 341 342 void 343 mlx5e_tc_int_port_cleanup(struct mlx5e_tc_int_port_priv *priv) 344 { 345 if (!priv) 346 return; 347 348 mutex_destroy(&priv->int_ports_lock); 349 mapping_destroy(priv->metadata_mapping); 350 kfree(priv); 351 } 352 353 /* Int port rx rules reside in ul rep rx tables. 354 * It is possible the ul rep will go down while there are 355 * still int port rules in its rx table so proper cleanup 356 * is required to free resources. 357 */ 358 void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv) 359 { 360 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 361 struct mlx5_rep_uplink_priv *uplink_priv; 362 struct mlx5e_tc_int_port_priv *ppriv; 363 struct mlx5e_rep_priv *uplink_rpriv; 364 365 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 366 uplink_priv = &uplink_rpriv->uplink_priv; 367 368 ppriv = uplink_priv->int_port_priv; 369 370 if (!ppriv) 371 return; 372 373 mutex_lock(&ppriv->int_ports_lock); 374 ppriv->ul_rep_rx_ready = true; 375 mutex_unlock(&ppriv->int_ports_lock); 376 } 377 378 void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv) 379 { 380 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 381 struct mlx5_rep_uplink_priv *uplink_priv; 382 struct mlx5e_tc_int_port_priv *ppriv; 383 struct mlx5e_rep_priv *uplink_rpriv; 384 struct mlx5e_tc_int_port *int_port; 385 386 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 387 uplink_priv = &uplink_rpriv->uplink_priv; 388 389 ppriv = uplink_priv->int_port_priv; 390 391 if (!ppriv) 392 return; 393 394 mutex_lock(&ppriv->int_ports_lock); 395 396 ppriv->ul_rep_rx_ready = false; 397 398 list_for_each_entry(int_port, &ppriv->int_ports, list) { 399 if (!IS_ERR_OR_NULL(int_port->rx_rule)) 400 mlx5_del_flow_rules(int_port->rx_rule); 401 402 int_port->rx_rule = NULL; 403 } 404 405 mutex_unlock(&ppriv->int_ports_lock); 406 } 407 408 bool 409 mlx5e_tc_int_port_dev_fwd(struct mlx5e_tc_int_port_priv *priv, 410 struct sk_buff *skb, u32 int_vport_metadata, 411 bool *forward_tx) 412 { 413 enum mlx5e_tc_int_port_type fwd_type; 414 struct mlx5e_tc_int_port *int_port; 415 struct net_device *dev; 416 int ifindex; 417 418 if (!priv) 419 return false; 420 421 rcu_read_lock(); 422 int_port = mlx5e_int_port_get_from_metadata(priv, int_vport_metadata); 423 if (!int_port) { 424 rcu_read_unlock(); 425 mlx5_core_dbg(priv->dev, "Unable to find int port with metadata 0x%.8x\n", 426 int_vport_metadata); 427 return false; 428 } 429 430 ifindex = int_port->ifindex; 431 fwd_type = int_port->type; 432 rcu_read_unlock(); 433 434 dev = dev_get_by_index(&init_net, ifindex); 435 if (!dev) { 436 mlx5_core_dbg(priv->dev, 437 "Couldn't find internal port device with ifindex: %d\n", 438 ifindex); 439 return false; 440 } 441 442 skb->skb_iif = dev->ifindex; 443 skb->dev = dev; 444 445 if (fwd_type == MLX5E_TC_INT_PORT_INGRESS) { 446 skb->pkt_type = PACKET_HOST; 447 skb_set_redirected(skb, true); 448 *forward_tx = false; 449 } else { 450 skb_reset_network_header(skb); 451 skb_push_rcsum(skb, skb->mac_len); 452 skb_set_redirected(skb, false); 453 *forward_tx = true; 454 } 455 456 return true; 457 } 458