1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2021 Mellanox Technologies. */ 3 4 #include <linux/etherdevice.h> 5 #include <linux/idr.h> 6 #include <linux/mlx5/driver.h> 7 #include <linux/mlx5/mlx5_ifc.h> 8 #include <linux/mlx5/vport.h> 9 #include <linux/mlx5/fs.h> 10 #include "mlx5_core.h" 11 #include "eswitch.h" 12 #include "en.h" 13 #include "en_tc.h" 14 #include "fs_core.h" 15 #include "esw/indir_table.h" 16 #include "lib/fs_chains.h" 17 #include "en/mod_hdr.h" 18 19 #define MLX5_ESW_INDIR_TABLE_SIZE 128 20 #define MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX (MLX5_ESW_INDIR_TABLE_SIZE - 2) 21 #define MLX5_ESW_INDIR_TABLE_FWD_IDX (MLX5_ESW_INDIR_TABLE_SIZE - 1) 22 23 struct mlx5_esw_indir_table_rule { 24 struct list_head list; 25 struct mlx5_flow_handle *handle; 26 union { 27 __be32 v4; 28 struct in6_addr v6; 29 } dst_ip; 30 u32 vni; 31 struct mlx5_modify_hdr *mh; 32 refcount_t refcnt; 33 }; 34 35 struct mlx5_esw_indir_table_entry { 36 struct hlist_node hlist; 37 struct mlx5_flow_table *ft; 38 struct mlx5_flow_group *recirc_grp; 39 struct mlx5_flow_group *fwd_grp; 40 struct mlx5_flow_handle *fwd_rule; 41 struct list_head recirc_rules; 42 int recirc_cnt; 43 int fwd_ref; 44 45 u16 vport; 46 u8 ip_version; 47 }; 48 49 struct mlx5_esw_indir_table { 50 struct mutex lock; /* protects table */ 51 DECLARE_HASHTABLE(table, 8); 52 }; 53 54 struct mlx5_esw_indir_table * 55 mlx5_esw_indir_table_init(void) 56 { 57 struct mlx5_esw_indir_table *indir = kvzalloc(sizeof(*indir), GFP_KERNEL); 58 59 if (!indir) 60 return ERR_PTR(-ENOMEM); 61 62 mutex_init(&indir->lock); 63 hash_init(indir->table); 64 return indir; 65 } 66 67 void 68 mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir) 69 { 70 mutex_destroy(&indir->lock); 71 kvfree(indir); 72 } 73 74 bool 75 mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw, 76 struct mlx5_flow_attr *attr, 77 u16 vport_num, 78 struct mlx5_core_dev *dest_mdev) 79 { 80 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 81 bool vf_sf_vport; 82 83 vf_sf_vport = mlx5_eswitch_is_vf_vport(esw, vport_num) || 84 mlx5_esw_is_sf_vport(esw, vport_num); 85 86 /* Use indirect table for all IP traffic from UL to VF with vport 87 * destination when source rewrite flag is set. 88 */ 89 return esw_attr->in_rep->vport == MLX5_VPORT_UPLINK && 90 vf_sf_vport && 91 esw->dev == dest_mdev && 92 attr->ip_version && 93 attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE; 94 } 95 96 u16 97 mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr) 98 { 99 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 100 101 return esw_attr->rx_tun_attr ? esw_attr->rx_tun_attr->decap_vport : 0; 102 } 103 104 static struct mlx5_esw_indir_table_rule * 105 mlx5_esw_indir_table_rule_lookup(struct mlx5_esw_indir_table_entry *e, 106 struct mlx5_esw_flow_attr *attr) 107 { 108 struct mlx5_esw_indir_table_rule *rule; 109 110 list_for_each_entry(rule, &e->recirc_rules, list) 111 if (rule->vni == attr->rx_tun_attr->vni && 112 !memcmp(&rule->dst_ip, &attr->rx_tun_attr->dst_ip, 113 sizeof(attr->rx_tun_attr->dst_ip))) 114 goto found; 115 return NULL; 116 117 found: 118 refcount_inc(&rule->refcnt); 119 return rule; 120 } 121 122 static int mlx5_esw_indir_table_rule_get(struct mlx5_eswitch *esw, 123 struct mlx5_flow_attr *attr, 124 struct mlx5_flow_spec *spec, 125 struct mlx5_esw_indir_table_entry *e) 126 { 127 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 128 struct mlx5_fs_chains *chains = esw_chains(esw); 129 struct mlx5e_tc_mod_hdr_acts mod_acts = {}; 130 struct mlx5_flow_destination dest = {}; 131 struct mlx5_esw_indir_table_rule *rule; 132 struct mlx5_flow_act flow_act = {}; 133 struct mlx5_flow_spec *rule_spec; 134 struct mlx5_flow_handle *handle; 135 int err = 0; 136 u32 data; 137 138 rule = mlx5_esw_indir_table_rule_lookup(e, esw_attr); 139 if (rule) 140 return 0; 141 142 if (e->recirc_cnt == MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX) 143 return -EINVAL; 144 145 rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); 146 if (!rule_spec) 147 return -ENOMEM; 148 149 rule = kzalloc(sizeof(*rule), GFP_KERNEL); 150 if (!rule) { 151 err = -ENOMEM; 152 goto out; 153 } 154 155 rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | 156 MLX5_MATCH_MISC_PARAMETERS | 157 MLX5_MATCH_MISC_PARAMETERS_2; 158 if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version)) { 159 MLX5_SET(fte_match_param, rule_spec->match_criteria, 160 outer_headers.ip_version, 0xf); 161 MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_version, 162 attr->ip_version); 163 } else if (attr->ip_version) { 164 MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, 165 outer_headers.ethertype); 166 MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ethertype, 167 (attr->ip_version == 4 ? ETH_P_IP : ETH_P_IPV6)); 168 } else { 169 err = -EOPNOTSUPP; 170 goto err_ethertype; 171 } 172 173 if (attr->ip_version == 4) { 174 MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, 175 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); 176 MLX5_SET(fte_match_param, rule_spec->match_value, 177 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4, 178 ntohl(esw_attr->rx_tun_attr->dst_ip.v4)); 179 } else if (attr->ip_version == 6) { 180 int len = sizeof(struct in6_addr); 181 182 memset(MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria, 183 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 184 0xff, len); 185 memcpy(MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, 186 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 187 &esw_attr->rx_tun_attr->dst_ip.v6, len); 188 } 189 190 MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, 191 misc_parameters.vxlan_vni); 192 MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters.vxlan_vni, 193 MLX5_GET(fte_match_param, spec->match_value, misc_parameters.vxlan_vni)); 194 195 MLX5_SET(fte_match_param, rule_spec->match_criteria, 196 misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); 197 MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0, 198 mlx5_eswitch_get_vport_metadata_for_match(esw_attr->in_mdev->priv.eswitch, 199 MLX5_VPORT_UPLINK)); 200 201 /* Modify flow source to recirculate packet */ 202 data = mlx5_eswitch_get_vport_metadata_for_set(esw, esw_attr->rx_tun_attr->decap_vport); 203 err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, 204 VPORT_TO_REG, data); 205 if (err) 206 goto err_mod_hdr_regc0; 207 208 err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, 209 TUNNEL_TO_REG, ESW_TUN_SLOW_TABLE_GOTO_VPORT); 210 if (err) 211 goto err_mod_hdr_regc1; 212 213 flow_act.modify_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, 214 mod_acts.num_actions, mod_acts.actions); 215 if (IS_ERR(flow_act.modify_hdr)) { 216 err = PTR_ERR(flow_act.modify_hdr); 217 goto err_mod_hdr_alloc; 218 } 219 220 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 221 flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND; 222 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 223 dest.ft = mlx5_chains_get_table(chains, 0, 1, 0); 224 if (IS_ERR(dest.ft)) { 225 err = PTR_ERR(dest.ft); 226 goto err_table; 227 } 228 handle = mlx5_add_flow_rules(e->ft, rule_spec, &flow_act, &dest, 1); 229 if (IS_ERR(handle)) { 230 err = PTR_ERR(handle); 231 goto err_handle; 232 } 233 234 mlx5e_mod_hdr_dealloc(&mod_acts); 235 rule->handle = handle; 236 rule->vni = esw_attr->rx_tun_attr->vni; 237 rule->mh = flow_act.modify_hdr; 238 memcpy(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip, 239 sizeof(esw_attr->rx_tun_attr->dst_ip)); 240 refcount_set(&rule->refcnt, 1); 241 list_add(&rule->list, &e->recirc_rules); 242 e->recirc_cnt++; 243 goto out; 244 245 err_handle: 246 mlx5_chains_put_table(chains, 0, 1, 0); 247 err_table: 248 mlx5_modify_header_dealloc(esw->dev, flow_act.modify_hdr); 249 err_mod_hdr_alloc: 250 err_mod_hdr_regc1: 251 mlx5e_mod_hdr_dealloc(&mod_acts); 252 err_mod_hdr_regc0: 253 err_ethertype: 254 kfree(rule); 255 out: 256 kvfree(rule_spec); 257 return err; 258 } 259 260 static void mlx5_esw_indir_table_rule_put(struct mlx5_eswitch *esw, 261 struct mlx5_flow_attr *attr, 262 struct mlx5_esw_indir_table_entry *e) 263 { 264 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 265 struct mlx5_fs_chains *chains = esw_chains(esw); 266 struct mlx5_esw_indir_table_rule *rule; 267 268 list_for_each_entry(rule, &e->recirc_rules, list) 269 if (rule->vni == esw_attr->rx_tun_attr->vni && 270 !memcmp(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip, 271 sizeof(esw_attr->rx_tun_attr->dst_ip))) 272 goto found; 273 274 return; 275 276 found: 277 if (!refcount_dec_and_test(&rule->refcnt)) 278 return; 279 280 mlx5_del_flow_rules(rule->handle); 281 mlx5_chains_put_table(chains, 0, 1, 0); 282 mlx5_modify_header_dealloc(esw->dev, rule->mh); 283 list_del(&rule->list); 284 kfree(rule); 285 e->recirc_cnt--; 286 } 287 288 static int mlx5_create_indir_recirc_group(struct mlx5_eswitch *esw, 289 struct mlx5_flow_attr *attr, 290 struct mlx5_flow_spec *spec, 291 struct mlx5_esw_indir_table_entry *e) 292 { 293 int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); 294 u32 *in, *match; 295 296 in = kvzalloc(inlen, GFP_KERNEL); 297 if (!in) 298 return -ENOMEM; 299 300 MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS | 301 MLX5_MATCH_MISC_PARAMETERS | MLX5_MATCH_MISC_PARAMETERS_2); 302 match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); 303 304 if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version)) 305 MLX5_SET(fte_match_param, match, outer_headers.ip_version, 0xf); 306 else 307 MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ethertype); 308 309 if (attr->ip_version == 4) { 310 MLX5_SET_TO_ONES(fte_match_param, match, 311 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); 312 } else if (attr->ip_version == 6) { 313 memset(MLX5_ADDR_OF(fte_match_param, match, 314 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 315 0xff, sizeof(struct in6_addr)); 316 } else { 317 err = -EOPNOTSUPP; 318 goto out; 319 } 320 321 MLX5_SET_TO_ONES(fte_match_param, match, misc_parameters.vxlan_vni); 322 MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0, 323 mlx5_eswitch_get_vport_metadata_mask()); 324 MLX5_SET(create_flow_group_in, in, start_flow_index, 0); 325 MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX); 326 e->recirc_grp = mlx5_create_flow_group(e->ft, in); 327 if (IS_ERR(e->recirc_grp)) { 328 err = PTR_ERR(e->recirc_grp); 329 goto out; 330 } 331 332 INIT_LIST_HEAD(&e->recirc_rules); 333 e->recirc_cnt = 0; 334 335 out: 336 kvfree(in); 337 return err; 338 } 339 340 static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw, 341 struct mlx5_esw_indir_table_entry *e) 342 { 343 int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); 344 struct mlx5_flow_destination dest = {}; 345 struct mlx5_flow_act flow_act = {}; 346 struct mlx5_flow_spec *spec; 347 u32 *in; 348 349 in = kvzalloc(inlen, GFP_KERNEL); 350 if (!in) 351 return -ENOMEM; 352 353 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 354 if (!spec) { 355 kvfree(in); 356 return -ENOMEM; 357 } 358 359 /* Hold one entry */ 360 MLX5_SET(create_flow_group_in, in, start_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX); 361 MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX); 362 e->fwd_grp = mlx5_create_flow_group(e->ft, in); 363 if (IS_ERR(e->fwd_grp)) { 364 err = PTR_ERR(e->fwd_grp); 365 goto err_out; 366 } 367 368 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 369 dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; 370 dest.vport.num = e->vport; 371 dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); 372 dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; 373 e->fwd_rule = mlx5_add_flow_rules(e->ft, spec, &flow_act, &dest, 1); 374 if (IS_ERR(e->fwd_rule)) { 375 mlx5_destroy_flow_group(e->fwd_grp); 376 err = PTR_ERR(e->fwd_rule); 377 } 378 379 err_out: 380 kvfree(spec); 381 kvfree(in); 382 return err; 383 } 384 385 static struct mlx5_esw_indir_table_entry * 386 mlx5_esw_indir_table_entry_create(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, 387 struct mlx5_flow_spec *spec, u16 vport, bool decap) 388 { 389 struct mlx5_flow_table_attr ft_attr = {}; 390 struct mlx5_flow_namespace *root_ns; 391 struct mlx5_esw_indir_table_entry *e; 392 struct mlx5_flow_table *ft; 393 int err = 0; 394 395 root_ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB); 396 if (!root_ns) 397 return ERR_PTR(-ENOENT); 398 399 e = kzalloc(sizeof(*e), GFP_KERNEL); 400 if (!e) 401 return ERR_PTR(-ENOMEM); 402 403 ft_attr.prio = FDB_TC_OFFLOAD; 404 ft_attr.max_fte = MLX5_ESW_INDIR_TABLE_SIZE; 405 ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; 406 ft_attr.level = 1; 407 408 ft = mlx5_create_flow_table(root_ns, &ft_attr); 409 if (IS_ERR(ft)) { 410 err = PTR_ERR(ft); 411 goto tbl_err; 412 } 413 e->ft = ft; 414 e->vport = vport; 415 e->ip_version = attr->ip_version; 416 e->fwd_ref = !decap; 417 418 err = mlx5_create_indir_recirc_group(esw, attr, spec, e); 419 if (err) 420 goto recirc_grp_err; 421 422 if (decap) { 423 err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e); 424 if (err) 425 goto recirc_rule_err; 426 } 427 428 err = mlx5_create_indir_fwd_group(esw, e); 429 if (err) 430 goto fwd_grp_err; 431 432 hash_add(esw->fdb_table.offloads.indir->table, &e->hlist, 433 vport << 16 | attr->ip_version); 434 435 return e; 436 437 fwd_grp_err: 438 if (decap) 439 mlx5_esw_indir_table_rule_put(esw, attr, e); 440 recirc_rule_err: 441 mlx5_destroy_flow_group(e->recirc_grp); 442 recirc_grp_err: 443 mlx5_destroy_flow_table(e->ft); 444 tbl_err: 445 kfree(e); 446 return ERR_PTR(err); 447 } 448 449 static struct mlx5_esw_indir_table_entry * 450 mlx5_esw_indir_table_entry_lookup(struct mlx5_eswitch *esw, u16 vport, u8 ip_version) 451 { 452 struct mlx5_esw_indir_table_entry *e; 453 u32 key = vport << 16 | ip_version; 454 455 hash_for_each_possible(esw->fdb_table.offloads.indir->table, e, hlist, key) 456 if (e->vport == vport && e->ip_version == ip_version) 457 return e; 458 459 return NULL; 460 } 461 462 struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw, 463 struct mlx5_flow_attr *attr, 464 struct mlx5_flow_spec *spec, 465 u16 vport, bool decap) 466 { 467 struct mlx5_esw_indir_table_entry *e; 468 int err; 469 470 mutex_lock(&esw->fdb_table.offloads.indir->lock); 471 e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version); 472 if (e) { 473 if (!decap) { 474 e->fwd_ref++; 475 } else { 476 err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e); 477 if (err) 478 goto out_err; 479 } 480 } else { 481 e = mlx5_esw_indir_table_entry_create(esw, attr, spec, vport, decap); 482 if (IS_ERR(e)) { 483 err = PTR_ERR(e); 484 esw_warn(esw->dev, "Failed to create indirection table, err %d.\n", err); 485 goto out_err; 486 } 487 } 488 mutex_unlock(&esw->fdb_table.offloads.indir->lock); 489 return e->ft; 490 491 out_err: 492 mutex_unlock(&esw->fdb_table.offloads.indir->lock); 493 return ERR_PTR(err); 494 } 495 496 void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw, 497 struct mlx5_flow_attr *attr, 498 u16 vport, bool decap) 499 { 500 struct mlx5_esw_indir_table_entry *e; 501 502 mutex_lock(&esw->fdb_table.offloads.indir->lock); 503 e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version); 504 if (!e) 505 goto out; 506 507 if (!decap) 508 e->fwd_ref--; 509 else 510 mlx5_esw_indir_table_rule_put(esw, attr, e); 511 512 if (e->fwd_ref || e->recirc_cnt) 513 goto out; 514 515 hash_del(&e->hlist); 516 mlx5_destroy_flow_group(e->recirc_grp); 517 mlx5_del_flow_rules(e->fwd_rule); 518 mlx5_destroy_flow_group(e->fwd_grp); 519 mlx5_destroy_flow_table(e->ft); 520 kfree(e); 521 out: 522 mutex_unlock(&esw->fdb_table.offloads.indir->lock); 523 } 524