1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2021 Mellanox Technologies. */ 3 4 #include <linux/etherdevice.h> 5 #include <linux/idr.h> 6 #include <linux/mlx5/driver.h> 7 #include <linux/mlx5/mlx5_ifc.h> 8 #include <linux/mlx5/vport.h> 9 #include <linux/mlx5/fs.h> 10 #include "mlx5_core.h" 11 #include "eswitch.h" 12 #include "en.h" 13 #include "en_tc.h" 14 #include "fs_core.h" 15 #include "esw/indir_table.h" 16 #include "lib/fs_chains.h" 17 18 #define MLX5_ESW_INDIR_TABLE_SIZE 128 19 #define MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX (MLX5_ESW_INDIR_TABLE_SIZE - 2) 20 #define MLX5_ESW_INDIR_TABLE_FWD_IDX (MLX5_ESW_INDIR_TABLE_SIZE - 1) 21 22 struct mlx5_esw_indir_table_rule { 23 struct list_head list; 24 struct mlx5_flow_handle *handle; 25 union { 26 __be32 v4; 27 struct in6_addr v6; 28 } dst_ip; 29 u32 vni; 30 struct mlx5_modify_hdr *mh; 31 refcount_t refcnt; 32 }; 33 34 struct mlx5_esw_indir_table_entry { 35 struct hlist_node hlist; 36 struct mlx5_flow_table *ft; 37 struct mlx5_flow_group *recirc_grp; 38 struct mlx5_flow_group *fwd_grp; 39 struct mlx5_flow_handle *fwd_rule; 40 struct list_head recirc_rules; 41 int recirc_cnt; 42 int fwd_ref; 43 44 u16 vport; 45 u8 ip_version; 46 }; 47 48 struct mlx5_esw_indir_table { 49 struct mutex lock; /* protects table */ 50 DECLARE_HASHTABLE(table, 8); 51 }; 52 53 struct mlx5_esw_indir_table * 54 mlx5_esw_indir_table_init(void) 55 { 56 struct mlx5_esw_indir_table *indir = kvzalloc(sizeof(*indir), GFP_KERNEL); 57 58 if (!indir) 59 return ERR_PTR(-ENOMEM); 60 61 mutex_init(&indir->lock); 62 hash_init(indir->table); 63 return indir; 64 } 65 66 void 67 mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir) 68 { 69 mutex_destroy(&indir->lock); 70 kvfree(indir); 71 } 72 73 bool 74 mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw, 75 struct mlx5_flow_attr *attr, 76 u16 vport_num, 77 struct mlx5_core_dev *dest_mdev) 78 { 79 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 80 81 /* Use indirect table for all IP traffic from UL to VF with vport 82 * destination when source rewrite flag is set. 83 */ 84 return esw_attr->in_rep->vport == MLX5_VPORT_UPLINK && 85 mlx5_eswitch_is_vf_vport(esw, vport_num) && 86 esw->dev == dest_mdev && 87 attr->ip_version && 88 attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE; 89 } 90 91 u16 92 mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr) 93 { 94 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 95 96 return esw_attr->rx_tun_attr ? esw_attr->rx_tun_attr->decap_vport : 0; 97 } 98 99 static struct mlx5_esw_indir_table_rule * 100 mlx5_esw_indir_table_rule_lookup(struct mlx5_esw_indir_table_entry *e, 101 struct mlx5_esw_flow_attr *attr) 102 { 103 struct mlx5_esw_indir_table_rule *rule; 104 105 list_for_each_entry(rule, &e->recirc_rules, list) 106 if (rule->vni == attr->rx_tun_attr->vni && 107 !memcmp(&rule->dst_ip, &attr->rx_tun_attr->dst_ip, 108 sizeof(attr->rx_tun_attr->dst_ip))) 109 goto found; 110 return NULL; 111 112 found: 113 refcount_inc(&rule->refcnt); 114 return rule; 115 } 116 117 static int mlx5_esw_indir_table_rule_get(struct mlx5_eswitch *esw, 118 struct mlx5_flow_attr *attr, 119 struct mlx5_flow_spec *spec, 120 struct mlx5_esw_indir_table_entry *e) 121 { 122 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 123 struct mlx5_fs_chains *chains = esw_chains(esw); 124 struct mlx5e_tc_mod_hdr_acts mod_acts = {}; 125 struct mlx5_flow_destination dest = {}; 126 struct mlx5_esw_indir_table_rule *rule; 127 struct mlx5_flow_act flow_act = {}; 128 struct mlx5_flow_spec *rule_spec; 129 struct mlx5_flow_handle *handle; 130 int err = 0; 131 u32 data; 132 133 rule = mlx5_esw_indir_table_rule_lookup(e, esw_attr); 134 if (rule) 135 return 0; 136 137 if (e->recirc_cnt == MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX) 138 return -EINVAL; 139 140 rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); 141 if (!rule_spec) 142 return -ENOMEM; 143 144 rule = kzalloc(sizeof(*rule), GFP_KERNEL); 145 if (!rule) { 146 err = -ENOMEM; 147 goto out; 148 } 149 150 rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | 151 MLX5_MATCH_MISC_PARAMETERS | 152 MLX5_MATCH_MISC_PARAMETERS_2; 153 if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version)) { 154 MLX5_SET(fte_match_param, rule_spec->match_criteria, 155 outer_headers.ip_version, 0xf); 156 MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_version, 157 attr->ip_version); 158 } else if (attr->ip_version) { 159 MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, 160 outer_headers.ethertype); 161 MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ethertype, 162 (attr->ip_version == 4 ? ETH_P_IP : ETH_P_IPV6)); 163 } else { 164 err = -EOPNOTSUPP; 165 goto err_ethertype; 166 } 167 168 if (attr->ip_version == 4) { 169 MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, 170 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); 171 MLX5_SET(fte_match_param, rule_spec->match_value, 172 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4, 173 ntohl(esw_attr->rx_tun_attr->dst_ip.v4)); 174 } else if (attr->ip_version == 6) { 175 int len = sizeof(struct in6_addr); 176 177 memset(MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria, 178 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 179 0xff, len); 180 memcpy(MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, 181 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 182 &esw_attr->rx_tun_attr->dst_ip.v6, len); 183 } 184 185 MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, 186 misc_parameters.vxlan_vni); 187 MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters.vxlan_vni, 188 MLX5_GET(fte_match_param, spec->match_value, misc_parameters.vxlan_vni)); 189 190 MLX5_SET(fte_match_param, rule_spec->match_criteria, 191 misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); 192 MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0, 193 mlx5_eswitch_get_vport_metadata_for_match(esw_attr->in_mdev->priv.eswitch, 194 MLX5_VPORT_UPLINK)); 195 196 /* Modify flow source to recirculate packet */ 197 data = mlx5_eswitch_get_vport_metadata_for_set(esw, esw_attr->rx_tun_attr->decap_vport); 198 err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, 199 VPORT_TO_REG, data); 200 if (err) 201 goto err_mod_hdr_regc0; 202 203 err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, 204 TUNNEL_TO_REG, ESW_TUN_SLOW_TABLE_GOTO_VPORT); 205 if (err) 206 goto err_mod_hdr_regc1; 207 208 flow_act.modify_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, 209 mod_acts.num_actions, mod_acts.actions); 210 if (IS_ERR(flow_act.modify_hdr)) { 211 err = PTR_ERR(flow_act.modify_hdr); 212 goto err_mod_hdr_alloc; 213 } 214 215 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 216 flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND; 217 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 218 dest.ft = mlx5_chains_get_table(chains, 0, 1, 0); 219 if (IS_ERR(dest.ft)) { 220 err = PTR_ERR(dest.ft); 221 goto err_table; 222 } 223 handle = mlx5_add_flow_rules(e->ft, rule_spec, &flow_act, &dest, 1); 224 if (IS_ERR(handle)) { 225 err = PTR_ERR(handle); 226 goto err_handle; 227 } 228 229 dealloc_mod_hdr_actions(&mod_acts); 230 rule->handle = handle; 231 rule->vni = esw_attr->rx_tun_attr->vni; 232 rule->mh = flow_act.modify_hdr; 233 memcpy(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip, 234 sizeof(esw_attr->rx_tun_attr->dst_ip)); 235 refcount_set(&rule->refcnt, 1); 236 list_add(&rule->list, &e->recirc_rules); 237 e->recirc_cnt++; 238 goto out; 239 240 err_handle: 241 mlx5_chains_put_table(chains, 0, 1, 0); 242 err_table: 243 mlx5_modify_header_dealloc(esw->dev, flow_act.modify_hdr); 244 err_mod_hdr_alloc: 245 err_mod_hdr_regc1: 246 dealloc_mod_hdr_actions(&mod_acts); 247 err_mod_hdr_regc0: 248 err_ethertype: 249 kfree(rule); 250 out: 251 kvfree(rule_spec); 252 return err; 253 } 254 255 static void mlx5_esw_indir_table_rule_put(struct mlx5_eswitch *esw, 256 struct mlx5_flow_attr *attr, 257 struct mlx5_esw_indir_table_entry *e) 258 { 259 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 260 struct mlx5_fs_chains *chains = esw_chains(esw); 261 struct mlx5_esw_indir_table_rule *rule; 262 263 list_for_each_entry(rule, &e->recirc_rules, list) 264 if (rule->vni == esw_attr->rx_tun_attr->vni && 265 !memcmp(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip, 266 sizeof(esw_attr->rx_tun_attr->dst_ip))) 267 goto found; 268 269 return; 270 271 found: 272 if (!refcount_dec_and_test(&rule->refcnt)) 273 return; 274 275 mlx5_del_flow_rules(rule->handle); 276 mlx5_chains_put_table(chains, 0, 1, 0); 277 mlx5_modify_header_dealloc(esw->dev, rule->mh); 278 list_del(&rule->list); 279 kfree(rule); 280 e->recirc_cnt--; 281 } 282 283 static int mlx5_create_indir_recirc_group(struct mlx5_eswitch *esw, 284 struct mlx5_flow_attr *attr, 285 struct mlx5_flow_spec *spec, 286 struct mlx5_esw_indir_table_entry *e) 287 { 288 int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); 289 u32 *in, *match; 290 291 in = kvzalloc(inlen, GFP_KERNEL); 292 if (!in) 293 return -ENOMEM; 294 295 MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS | 296 MLX5_MATCH_MISC_PARAMETERS | MLX5_MATCH_MISC_PARAMETERS_2); 297 match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); 298 299 if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version)) 300 MLX5_SET(fte_match_param, match, outer_headers.ip_version, 0xf); 301 else 302 MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ethertype); 303 304 if (attr->ip_version == 4) { 305 MLX5_SET_TO_ONES(fte_match_param, match, 306 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); 307 } else if (attr->ip_version == 6) { 308 memset(MLX5_ADDR_OF(fte_match_param, match, 309 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 310 0xff, sizeof(struct in6_addr)); 311 } else { 312 err = -EOPNOTSUPP; 313 goto out; 314 } 315 316 MLX5_SET_TO_ONES(fte_match_param, match, misc_parameters.vxlan_vni); 317 MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0, 318 mlx5_eswitch_get_vport_metadata_mask()); 319 MLX5_SET(create_flow_group_in, in, start_flow_index, 0); 320 MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX); 321 e->recirc_grp = mlx5_create_flow_group(e->ft, in); 322 if (IS_ERR(e->recirc_grp)) { 323 err = PTR_ERR(e->recirc_grp); 324 goto out; 325 } 326 327 INIT_LIST_HEAD(&e->recirc_rules); 328 e->recirc_cnt = 0; 329 330 out: 331 kvfree(in); 332 return err; 333 } 334 335 static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw, 336 struct mlx5_esw_indir_table_entry *e) 337 { 338 int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); 339 struct mlx5_flow_destination dest = {}; 340 struct mlx5_flow_act flow_act = {}; 341 struct mlx5_flow_spec *spec; 342 u32 *in; 343 344 in = kvzalloc(inlen, GFP_KERNEL); 345 if (!in) 346 return -ENOMEM; 347 348 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 349 if (!spec) { 350 kvfree(in); 351 return -ENOMEM; 352 } 353 354 /* Hold one entry */ 355 MLX5_SET(create_flow_group_in, in, start_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX); 356 MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX); 357 e->fwd_grp = mlx5_create_flow_group(e->ft, in); 358 if (IS_ERR(e->fwd_grp)) { 359 err = PTR_ERR(e->fwd_grp); 360 goto err_out; 361 } 362 363 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 364 dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; 365 dest.vport.num = e->vport; 366 dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); 367 dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; 368 e->fwd_rule = mlx5_add_flow_rules(e->ft, spec, &flow_act, &dest, 1); 369 if (IS_ERR(e->fwd_rule)) { 370 mlx5_destroy_flow_group(e->fwd_grp); 371 err = PTR_ERR(e->fwd_rule); 372 } 373 374 err_out: 375 kvfree(spec); 376 kvfree(in); 377 return err; 378 } 379 380 static struct mlx5_esw_indir_table_entry * 381 mlx5_esw_indir_table_entry_create(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, 382 struct mlx5_flow_spec *spec, u16 vport, bool decap) 383 { 384 struct mlx5_flow_table_attr ft_attr = {}; 385 struct mlx5_flow_namespace *root_ns; 386 struct mlx5_esw_indir_table_entry *e; 387 struct mlx5_flow_table *ft; 388 int err = 0; 389 390 root_ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB); 391 if (!root_ns) 392 return ERR_PTR(-ENOENT); 393 394 e = kzalloc(sizeof(*e), GFP_KERNEL); 395 if (!e) 396 return ERR_PTR(-ENOMEM); 397 398 ft_attr.prio = FDB_TC_OFFLOAD; 399 ft_attr.max_fte = MLX5_ESW_INDIR_TABLE_SIZE; 400 ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; 401 ft_attr.level = 1; 402 403 ft = mlx5_create_flow_table(root_ns, &ft_attr); 404 if (IS_ERR(ft)) { 405 err = PTR_ERR(ft); 406 goto tbl_err; 407 } 408 e->ft = ft; 409 e->vport = vport; 410 e->ip_version = attr->ip_version; 411 e->fwd_ref = !decap; 412 413 err = mlx5_create_indir_recirc_group(esw, attr, spec, e); 414 if (err) 415 goto recirc_grp_err; 416 417 if (decap) { 418 err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e); 419 if (err) 420 goto recirc_rule_err; 421 } 422 423 err = mlx5_create_indir_fwd_group(esw, e); 424 if (err) 425 goto fwd_grp_err; 426 427 hash_add(esw->fdb_table.offloads.indir->table, &e->hlist, 428 vport << 16 | attr->ip_version); 429 430 return e; 431 432 fwd_grp_err: 433 if (decap) 434 mlx5_esw_indir_table_rule_put(esw, attr, e); 435 recirc_rule_err: 436 mlx5_destroy_flow_group(e->recirc_grp); 437 recirc_grp_err: 438 mlx5_destroy_flow_table(e->ft); 439 tbl_err: 440 kfree(e); 441 return ERR_PTR(err); 442 } 443 444 static struct mlx5_esw_indir_table_entry * 445 mlx5_esw_indir_table_entry_lookup(struct mlx5_eswitch *esw, u16 vport, u8 ip_version) 446 { 447 struct mlx5_esw_indir_table_entry *e; 448 u32 key = vport << 16 | ip_version; 449 450 hash_for_each_possible(esw->fdb_table.offloads.indir->table, e, hlist, key) 451 if (e->vport == vport && e->ip_version == ip_version) 452 return e; 453 454 return NULL; 455 } 456 457 struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw, 458 struct mlx5_flow_attr *attr, 459 struct mlx5_flow_spec *spec, 460 u16 vport, bool decap) 461 { 462 struct mlx5_esw_indir_table_entry *e; 463 int err; 464 465 mutex_lock(&esw->fdb_table.offloads.indir->lock); 466 e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version); 467 if (e) { 468 if (!decap) { 469 e->fwd_ref++; 470 } else { 471 err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e); 472 if (err) 473 goto out_err; 474 } 475 } else { 476 e = mlx5_esw_indir_table_entry_create(esw, attr, spec, vport, decap); 477 if (IS_ERR(e)) { 478 err = PTR_ERR(e); 479 esw_warn(esw->dev, "Failed to create indirection table, err %d.\n", err); 480 goto out_err; 481 } 482 } 483 mutex_unlock(&esw->fdb_table.offloads.indir->lock); 484 return e->ft; 485 486 out_err: 487 mutex_unlock(&esw->fdb_table.offloads.indir->lock); 488 return ERR_PTR(err); 489 } 490 491 void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw, 492 struct mlx5_flow_attr *attr, 493 u16 vport, bool decap) 494 { 495 struct mlx5_esw_indir_table_entry *e; 496 497 mutex_lock(&esw->fdb_table.offloads.indir->lock); 498 e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version); 499 if (!e) 500 goto out; 501 502 if (!decap) 503 e->fwd_ref--; 504 else 505 mlx5_esw_indir_table_rule_put(esw, attr, e); 506 507 if (e->fwd_ref || e->recirc_cnt) 508 goto out; 509 510 hash_del(&e->hlist); 511 mlx5_destroy_flow_group(e->recirc_grp); 512 mlx5_del_flow_rules(e->fwd_rule); 513 mlx5_destroy_flow_group(e->fwd_grp); 514 mlx5_destroy_flow_table(e->ft); 515 kfree(e); 516 out: 517 mutex_unlock(&esw->fdb_table.offloads.indir->lock); 518 } 519