1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2021 Mellanox Technologies. */ 3 4 #include <linux/etherdevice.h> 5 #include <linux/idr.h> 6 #include <linux/mlx5/driver.h> 7 #include <linux/mlx5/mlx5_ifc.h> 8 #include <linux/mlx5/vport.h> 9 #include <linux/mlx5/fs.h> 10 #include "mlx5_core.h" 11 #include "eswitch.h" 12 #include "en.h" 13 #include "en_tc.h" 14 #include "fs_core.h" 15 #include "esw/indir_table.h" 16 #include "lib/fs_chains.h" 17 18 #define MLX5_ESW_INDIR_TABLE_SIZE 128 19 #define MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX (MLX5_ESW_INDIR_TABLE_SIZE - 2) 20 #define MLX5_ESW_INDIR_TABLE_FWD_IDX (MLX5_ESW_INDIR_TABLE_SIZE - 1) 21 22 struct mlx5_esw_indir_table_rule { 23 struct list_head list; 24 struct mlx5_flow_handle *handle; 25 union { 26 __be32 v4; 27 struct in6_addr v6; 28 } dst_ip; 29 u32 vni; 30 struct mlx5_modify_hdr *mh; 31 refcount_t refcnt; 32 }; 33 34 struct mlx5_esw_indir_table_entry { 35 struct hlist_node hlist; 36 struct mlx5_flow_table *ft; 37 struct mlx5_flow_group *recirc_grp; 38 struct mlx5_flow_group *fwd_grp; 39 struct mlx5_flow_handle *fwd_rule; 40 struct list_head recirc_rules; 41 int recirc_cnt; 42 int fwd_ref; 43 44 u16 vport; 45 u8 ip_version; 46 }; 47 48 struct mlx5_esw_indir_table { 49 struct mutex lock; /* protects table */ 50 DECLARE_HASHTABLE(table, 8); 51 }; 52 53 struct mlx5_esw_indir_table * 54 mlx5_esw_indir_table_init(void) 55 { 56 struct mlx5_esw_indir_table *indir = kvzalloc(sizeof(*indir), GFP_KERNEL); 57 58 if (!indir) 59 return ERR_PTR(-ENOMEM); 60 61 mutex_init(&indir->lock); 62 hash_init(indir->table); 63 return indir; 64 } 65 66 void 67 mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir) 68 { 69 mutex_destroy(&indir->lock); 70 kvfree(indir); 71 } 72 73 bool 74 mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw, 75 struct mlx5_flow_attr *attr, 76 u16 vport_num, 77 struct mlx5_core_dev *dest_mdev) 78 { 79 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 80 81 /* Use indirect table for all IP traffic from UL to VF with vport 82 * destination when source rewrite flag is set. 83 */ 84 return esw_attr->in_rep->vport == MLX5_VPORT_UPLINK && 85 mlx5_eswitch_is_vf_vport(esw, vport_num) && 86 esw->dev == dest_mdev && 87 attr->ip_version && 88 attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE; 89 } 90 91 u16 92 mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr) 93 { 94 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 95 96 return esw_attr->rx_tun_attr ? esw_attr->rx_tun_attr->decap_vport : 0; 97 } 98 99 static struct mlx5_esw_indir_table_rule * 100 mlx5_esw_indir_table_rule_lookup(struct mlx5_esw_indir_table_entry *e, 101 struct mlx5_esw_flow_attr *attr) 102 { 103 struct mlx5_esw_indir_table_rule *rule; 104 105 list_for_each_entry(rule, &e->recirc_rules, list) 106 if (rule->vni == attr->rx_tun_attr->vni && 107 !memcmp(&rule->dst_ip, &attr->rx_tun_attr->dst_ip, 108 sizeof(attr->rx_tun_attr->dst_ip))) 109 goto found; 110 return NULL; 111 112 found: 113 refcount_inc(&rule->refcnt); 114 return rule; 115 } 116 117 static int mlx5_esw_indir_table_rule_get(struct mlx5_eswitch *esw, 118 struct mlx5_flow_attr *attr, 119 struct mlx5_flow_spec *spec, 120 struct mlx5_esw_indir_table_entry *e) 121 { 122 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 123 struct mlx5_fs_chains *chains = esw_chains(esw); 124 struct mlx5e_tc_mod_hdr_acts mod_acts = {}; 125 struct mlx5_flow_destination dest = {}; 126 struct mlx5_esw_indir_table_rule *rule; 127 struct mlx5_flow_act flow_act = {}; 128 struct mlx5_flow_spec *rule_spec; 129 struct mlx5_flow_handle *handle; 130 int err = 0; 131 u32 data; 132 133 rule = mlx5_esw_indir_table_rule_lookup(e, esw_attr); 134 if (rule) 135 return 0; 136 137 if (e->recirc_cnt == MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX) 138 return -EINVAL; 139 140 rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); 141 if (!rule_spec) 142 return -ENOMEM; 143 144 rule = kzalloc(sizeof(*rule), GFP_KERNEL); 145 if (!rule) { 146 err = -ENOMEM; 147 goto out; 148 } 149 150 rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | 151 MLX5_MATCH_MISC_PARAMETERS | 152 MLX5_MATCH_MISC_PARAMETERS_2; 153 if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version)) { 154 MLX5_SET(fte_match_param, rule_spec->match_criteria, 155 outer_headers.ip_version, 0xf); 156 MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_version, 157 attr->ip_version); 158 } else if (attr->ip_version) { 159 MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, 160 outer_headers.ethertype); 161 MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ethertype, 162 (attr->ip_version == 4 ? ETH_P_IP : ETH_P_IPV6)); 163 } else { 164 err = -EOPNOTSUPP; 165 goto err_ethertype; 166 } 167 168 if (attr->ip_version == 4) { 169 MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, 170 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); 171 MLX5_SET(fte_match_param, rule_spec->match_value, 172 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4, 173 ntohl(esw_attr->rx_tun_attr->dst_ip.v4)); 174 } else if (attr->ip_version == 6) { 175 int len = sizeof(struct in6_addr); 176 177 memset(MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria, 178 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 179 0xff, len); 180 memcpy(MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, 181 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 182 &esw_attr->rx_tun_attr->dst_ip.v6, len); 183 } 184 185 MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, 186 misc_parameters.vxlan_vni); 187 MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters.vxlan_vni, 188 MLX5_GET(fte_match_param, spec->match_value, misc_parameters.vxlan_vni)); 189 190 MLX5_SET(fte_match_param, rule_spec->match_criteria, 191 misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); 192 MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0, 193 mlx5_eswitch_get_vport_metadata_for_match(esw_attr->in_mdev->priv.eswitch, 194 MLX5_VPORT_UPLINK)); 195 196 /* Modify flow source to recirculate packet */ 197 data = mlx5_eswitch_get_vport_metadata_for_set(esw, esw_attr->rx_tun_attr->decap_vport); 198 err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, 199 VPORT_TO_REG, data); 200 if (err) 201 goto err_mod_hdr_regc0; 202 203 err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, 204 TUNNEL_TO_REG, ESW_TUN_SLOW_TABLE_GOTO_VPORT); 205 if (err) 206 goto err_mod_hdr_regc1; 207 208 flow_act.modify_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, 209 mod_acts.num_actions, mod_acts.actions); 210 if (IS_ERR(flow_act.modify_hdr)) { 211 err = PTR_ERR(flow_act.modify_hdr); 212 goto err_mod_hdr_alloc; 213 } 214 215 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 216 flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND; 217 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 218 dest.ft = mlx5_chains_get_table(chains, 0, 1, 0); 219 if (IS_ERR(dest.ft)) { 220 err = PTR_ERR(dest.ft); 221 goto err_table; 222 } 223 handle = mlx5_add_flow_rules(e->ft, rule_spec, &flow_act, &dest, 1); 224 if (IS_ERR(handle)) { 225 err = PTR_ERR(handle); 226 goto err_handle; 227 } 228 229 dealloc_mod_hdr_actions(&mod_acts); 230 rule->handle = handle; 231 rule->vni = esw_attr->rx_tun_attr->vni; 232 rule->mh = flow_act.modify_hdr; 233 memcpy(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip, 234 sizeof(esw_attr->rx_tun_attr->dst_ip)); 235 refcount_set(&rule->refcnt, 1); 236 list_add(&rule->list, &e->recirc_rules); 237 e->recirc_cnt++; 238 goto out; 239 240 err_handle: 241 mlx5_chains_put_table(chains, 0, 1, 0); 242 err_table: 243 mlx5_modify_header_dealloc(esw->dev, flow_act.modify_hdr); 244 err_mod_hdr_alloc: 245 err_mod_hdr_regc1: 246 dealloc_mod_hdr_actions(&mod_acts); 247 err_mod_hdr_regc0: 248 err_ethertype: 249 kfree(rule); 250 out: 251 kvfree(rule_spec); 252 return err; 253 } 254 255 static void mlx5_esw_indir_table_rule_put(struct mlx5_eswitch *esw, 256 struct mlx5_flow_attr *attr, 257 struct mlx5_esw_indir_table_entry *e) 258 { 259 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 260 struct mlx5_fs_chains *chains = esw_chains(esw); 261 struct mlx5_esw_indir_table_rule *rule; 262 263 list_for_each_entry(rule, &e->recirc_rules, list) 264 if (rule->vni == esw_attr->rx_tun_attr->vni && 265 !memcmp(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip, 266 sizeof(esw_attr->rx_tun_attr->dst_ip))) 267 goto found; 268 269 return; 270 271 found: 272 if (!refcount_dec_and_test(&rule->refcnt)) 273 return; 274 275 mlx5_del_flow_rules(rule->handle); 276 mlx5_chains_put_table(chains, 0, 1, 0); 277 mlx5_modify_header_dealloc(esw->dev, rule->mh); 278 list_del(&rule->list); 279 kfree(rule); 280 e->recirc_cnt--; 281 } 282 283 static int mlx5_create_indir_recirc_group(struct mlx5_eswitch *esw, 284 struct mlx5_flow_attr *attr, 285 struct mlx5_flow_spec *spec, 286 struct mlx5_esw_indir_table_entry *e) 287 { 288 int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); 289 u32 *in, *match; 290 291 in = kvzalloc(inlen, GFP_KERNEL); 292 if (!in) 293 return -ENOMEM; 294 295 MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS | 296 MLX5_MATCH_MISC_PARAMETERS | MLX5_MATCH_MISC_PARAMETERS_2); 297 match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); 298 299 if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version)) 300 MLX5_SET(fte_match_param, match, outer_headers.ip_version, 0xf); 301 else 302 MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ethertype); 303 304 if (attr->ip_version == 4) { 305 MLX5_SET_TO_ONES(fte_match_param, match, 306 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); 307 } else if (attr->ip_version == 6) { 308 memset(MLX5_ADDR_OF(fte_match_param, match, 309 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 310 0xff, sizeof(struct in6_addr)); 311 } else { 312 err = -EOPNOTSUPP; 313 goto out; 314 } 315 316 MLX5_SET_TO_ONES(fte_match_param, match, misc_parameters.vxlan_vni); 317 MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0, 318 mlx5_eswitch_get_vport_metadata_mask()); 319 MLX5_SET(create_flow_group_in, in, start_flow_index, 0); 320 MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX); 321 e->recirc_grp = mlx5_create_flow_group(e->ft, in); 322 if (IS_ERR(e->recirc_grp)) { 323 err = PTR_ERR(e->recirc_grp); 324 goto out; 325 } 326 327 INIT_LIST_HEAD(&e->recirc_rules); 328 e->recirc_cnt = 0; 329 330 out: 331 kvfree(in); 332 return err; 333 } 334 335 static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw, 336 struct mlx5_esw_indir_table_entry *e) 337 { 338 int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); 339 struct mlx5_flow_destination dest = {}; 340 struct mlx5_flow_act flow_act = {}; 341 struct mlx5_flow_spec *spec; 342 u32 *in; 343 344 in = kvzalloc(inlen, GFP_KERNEL); 345 if (!in) 346 return -ENOMEM; 347 348 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 349 if (!spec) { 350 kvfree(in); 351 return -ENOMEM; 352 } 353 354 /* Hold one entry */ 355 MLX5_SET(create_flow_group_in, in, start_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX); 356 MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX); 357 e->fwd_grp = mlx5_create_flow_group(e->ft, in); 358 if (IS_ERR(e->fwd_grp)) { 359 err = PTR_ERR(e->fwd_grp); 360 goto err_out; 361 } 362 363 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 364 dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; 365 dest.vport.num = e->vport; 366 dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); 367 e->fwd_rule = mlx5_add_flow_rules(e->ft, spec, &flow_act, &dest, 1); 368 if (IS_ERR(e->fwd_rule)) { 369 mlx5_destroy_flow_group(e->fwd_grp); 370 err = PTR_ERR(e->fwd_rule); 371 } 372 373 err_out: 374 kvfree(spec); 375 kvfree(in); 376 return err; 377 } 378 379 static struct mlx5_esw_indir_table_entry * 380 mlx5_esw_indir_table_entry_create(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, 381 struct mlx5_flow_spec *spec, u16 vport, bool decap) 382 { 383 struct mlx5_flow_table_attr ft_attr = {}; 384 struct mlx5_flow_namespace *root_ns; 385 struct mlx5_esw_indir_table_entry *e; 386 struct mlx5_flow_table *ft; 387 int err = 0; 388 389 root_ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB); 390 if (!root_ns) 391 return ERR_PTR(-ENOENT); 392 393 e = kzalloc(sizeof(*e), GFP_KERNEL); 394 if (!e) 395 return ERR_PTR(-ENOMEM); 396 397 ft_attr.prio = FDB_TC_OFFLOAD; 398 ft_attr.max_fte = MLX5_ESW_INDIR_TABLE_SIZE; 399 ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; 400 ft_attr.level = 1; 401 402 ft = mlx5_create_flow_table(root_ns, &ft_attr); 403 if (IS_ERR(ft)) { 404 err = PTR_ERR(ft); 405 goto tbl_err; 406 } 407 e->ft = ft; 408 e->vport = vport; 409 e->ip_version = attr->ip_version; 410 e->fwd_ref = !decap; 411 412 err = mlx5_create_indir_recirc_group(esw, attr, spec, e); 413 if (err) 414 goto recirc_grp_err; 415 416 if (decap) { 417 err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e); 418 if (err) 419 goto recirc_rule_err; 420 } 421 422 err = mlx5_create_indir_fwd_group(esw, e); 423 if (err) 424 goto fwd_grp_err; 425 426 hash_add(esw->fdb_table.offloads.indir->table, &e->hlist, 427 vport << 16 | attr->ip_version); 428 429 return e; 430 431 fwd_grp_err: 432 if (decap) 433 mlx5_esw_indir_table_rule_put(esw, attr, e); 434 recirc_rule_err: 435 mlx5_destroy_flow_group(e->recirc_grp); 436 recirc_grp_err: 437 mlx5_destroy_flow_table(e->ft); 438 tbl_err: 439 kfree(e); 440 return ERR_PTR(err); 441 } 442 443 static struct mlx5_esw_indir_table_entry * 444 mlx5_esw_indir_table_entry_lookup(struct mlx5_eswitch *esw, u16 vport, u8 ip_version) 445 { 446 struct mlx5_esw_indir_table_entry *e; 447 u32 key = vport << 16 | ip_version; 448 449 hash_for_each_possible(esw->fdb_table.offloads.indir->table, e, hlist, key) 450 if (e->vport == vport && e->ip_version == ip_version) 451 return e; 452 453 return NULL; 454 } 455 456 struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw, 457 struct mlx5_flow_attr *attr, 458 struct mlx5_flow_spec *spec, 459 u16 vport, bool decap) 460 { 461 struct mlx5_esw_indir_table_entry *e; 462 int err; 463 464 mutex_lock(&esw->fdb_table.offloads.indir->lock); 465 e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version); 466 if (e) { 467 if (!decap) { 468 e->fwd_ref++; 469 } else { 470 err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e); 471 if (err) 472 goto out_err; 473 } 474 } else { 475 e = mlx5_esw_indir_table_entry_create(esw, attr, spec, vport, decap); 476 if (IS_ERR(e)) { 477 err = PTR_ERR(e); 478 esw_warn(esw->dev, "Failed to create indirection table, err %d.\n", err); 479 goto out_err; 480 } 481 } 482 mutex_unlock(&esw->fdb_table.offloads.indir->lock); 483 return e->ft; 484 485 out_err: 486 mutex_unlock(&esw->fdb_table.offloads.indir->lock); 487 return ERR_PTR(err); 488 } 489 490 void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw, 491 struct mlx5_flow_attr *attr, 492 u16 vport, bool decap) 493 { 494 struct mlx5_esw_indir_table_entry *e; 495 496 mutex_lock(&esw->fdb_table.offloads.indir->lock); 497 e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version); 498 if (!e) 499 goto out; 500 501 if (!decap) 502 e->fwd_ref--; 503 else 504 mlx5_esw_indir_table_rule_put(esw, attr, e); 505 506 if (e->fwd_ref || e->recirc_cnt) 507 goto out; 508 509 hash_del(&e->hlist); 510 mlx5_destroy_flow_group(e->recirc_grp); 511 mlx5_del_flow_rules(e->fwd_rule); 512 mlx5_destroy_flow_group(e->fwd_grp); 513 mlx5_destroy_flow_table(e->ft); 514 kfree(e); 515 out: 516 mutex_unlock(&esw->fdb_table.offloads.indir->lock); 517 } 518