1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2021 Mellanox Technologies. */ 3 4 #include <linux/etherdevice.h> 5 #include <linux/idr.h> 6 #include <linux/mlx5/driver.h> 7 #include <linux/mlx5/mlx5_ifc.h> 8 #include <linux/mlx5/vport.h> 9 #include <linux/mlx5/fs.h> 10 #include "mlx5_core.h" 11 #include "eswitch.h" 12 #include "en.h" 13 #include "en_tc.h" 14 #include "fs_core.h" 15 #include "esw/indir_table.h" 16 #include "lib/fs_chains.h" 17 #include "en/mod_hdr.h" 18 19 #define MLX5_ESW_INDIR_TABLE_SIZE 128 20 #define MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX (MLX5_ESW_INDIR_TABLE_SIZE - 2) 21 #define MLX5_ESW_INDIR_TABLE_FWD_IDX (MLX5_ESW_INDIR_TABLE_SIZE - 1) 22 23 struct mlx5_esw_indir_table_rule { 24 struct list_head list; 25 struct mlx5_flow_handle *handle; 26 union { 27 __be32 v4; 28 struct in6_addr v6; 29 } dst_ip; 30 u32 vni; 31 struct mlx5_modify_hdr *mh; 32 refcount_t refcnt; 33 }; 34 35 struct mlx5_esw_indir_table_entry { 36 struct hlist_node hlist; 37 struct mlx5_flow_table *ft; 38 struct mlx5_flow_group *recirc_grp; 39 struct mlx5_flow_group *fwd_grp; 40 struct mlx5_flow_handle *fwd_rule; 41 struct list_head recirc_rules; 42 int recirc_cnt; 43 int fwd_ref; 44 45 u16 vport; 46 u8 ip_version; 47 }; 48 49 struct mlx5_esw_indir_table { 50 struct mutex lock; /* protects table */ 51 DECLARE_HASHTABLE(table, 8); 52 }; 53 54 struct mlx5_esw_indir_table * 55 mlx5_esw_indir_table_init(void) 56 { 57 struct mlx5_esw_indir_table *indir = kvzalloc(sizeof(*indir), GFP_KERNEL); 58 59 if (!indir) 60 return ERR_PTR(-ENOMEM); 61 62 mutex_init(&indir->lock); 63 hash_init(indir->table); 64 return indir; 65 } 66 67 void 68 mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir) 69 { 70 mutex_destroy(&indir->lock); 71 kvfree(indir); 72 } 73 74 bool 75 mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw, 76 struct mlx5_flow_attr *attr, 77 u16 vport_num, 78 struct mlx5_core_dev *dest_mdev) 79 { 80 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 81 82 /* Use indirect table for all IP traffic from UL to VF with vport 83 * destination when source rewrite flag is set. 84 */ 85 return esw_attr->in_rep->vport == MLX5_VPORT_UPLINK && 86 mlx5_eswitch_is_vf_vport(esw, vport_num) && 87 esw->dev == dest_mdev && 88 attr->ip_version && 89 attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE; 90 } 91 92 u16 93 mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr) 94 { 95 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 96 97 return esw_attr->rx_tun_attr ? esw_attr->rx_tun_attr->decap_vport : 0; 98 } 99 100 static struct mlx5_esw_indir_table_rule * 101 mlx5_esw_indir_table_rule_lookup(struct mlx5_esw_indir_table_entry *e, 102 struct mlx5_esw_flow_attr *attr) 103 { 104 struct mlx5_esw_indir_table_rule *rule; 105 106 list_for_each_entry(rule, &e->recirc_rules, list) 107 if (rule->vni == attr->rx_tun_attr->vni && 108 !memcmp(&rule->dst_ip, &attr->rx_tun_attr->dst_ip, 109 sizeof(attr->rx_tun_attr->dst_ip))) 110 goto found; 111 return NULL; 112 113 found: 114 refcount_inc(&rule->refcnt); 115 return rule; 116 } 117 118 static int mlx5_esw_indir_table_rule_get(struct mlx5_eswitch *esw, 119 struct mlx5_flow_attr *attr, 120 struct mlx5_flow_spec *spec, 121 struct mlx5_esw_indir_table_entry *e) 122 { 123 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 124 struct mlx5_fs_chains *chains = esw_chains(esw); 125 struct mlx5e_tc_mod_hdr_acts mod_acts = {}; 126 struct mlx5_flow_destination dest = {}; 127 struct mlx5_esw_indir_table_rule *rule; 128 struct mlx5_flow_act flow_act = {}; 129 struct mlx5_flow_spec *rule_spec; 130 struct mlx5_flow_handle *handle; 131 int err = 0; 132 u32 data; 133 134 rule = mlx5_esw_indir_table_rule_lookup(e, esw_attr); 135 if (rule) 136 return 0; 137 138 if (e->recirc_cnt == MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX) 139 return -EINVAL; 140 141 rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); 142 if (!rule_spec) 143 return -ENOMEM; 144 145 rule = kzalloc(sizeof(*rule), GFP_KERNEL); 146 if (!rule) { 147 err = -ENOMEM; 148 goto out; 149 } 150 151 rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | 152 MLX5_MATCH_MISC_PARAMETERS | 153 MLX5_MATCH_MISC_PARAMETERS_2; 154 if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version)) { 155 MLX5_SET(fte_match_param, rule_spec->match_criteria, 156 outer_headers.ip_version, 0xf); 157 MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_version, 158 attr->ip_version); 159 } else if (attr->ip_version) { 160 MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, 161 outer_headers.ethertype); 162 MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ethertype, 163 (attr->ip_version == 4 ? ETH_P_IP : ETH_P_IPV6)); 164 } else { 165 err = -EOPNOTSUPP; 166 goto err_ethertype; 167 } 168 169 if (attr->ip_version == 4) { 170 MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, 171 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); 172 MLX5_SET(fte_match_param, rule_spec->match_value, 173 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4, 174 ntohl(esw_attr->rx_tun_attr->dst_ip.v4)); 175 } else if (attr->ip_version == 6) { 176 int len = sizeof(struct in6_addr); 177 178 memset(MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria, 179 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 180 0xff, len); 181 memcpy(MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, 182 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 183 &esw_attr->rx_tun_attr->dst_ip.v6, len); 184 } 185 186 MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, 187 misc_parameters.vxlan_vni); 188 MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters.vxlan_vni, 189 MLX5_GET(fte_match_param, spec->match_value, misc_parameters.vxlan_vni)); 190 191 MLX5_SET(fte_match_param, rule_spec->match_criteria, 192 misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); 193 MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0, 194 mlx5_eswitch_get_vport_metadata_for_match(esw_attr->in_mdev->priv.eswitch, 195 MLX5_VPORT_UPLINK)); 196 197 /* Modify flow source to recirculate packet */ 198 data = mlx5_eswitch_get_vport_metadata_for_set(esw, esw_attr->rx_tun_attr->decap_vport); 199 err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, 200 VPORT_TO_REG, data); 201 if (err) 202 goto err_mod_hdr_regc0; 203 204 err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, 205 TUNNEL_TO_REG, ESW_TUN_SLOW_TABLE_GOTO_VPORT); 206 if (err) 207 goto err_mod_hdr_regc1; 208 209 flow_act.modify_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, 210 mod_acts.num_actions, mod_acts.actions); 211 if (IS_ERR(flow_act.modify_hdr)) { 212 err = PTR_ERR(flow_act.modify_hdr); 213 goto err_mod_hdr_alloc; 214 } 215 216 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 217 flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND; 218 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 219 dest.ft = mlx5_chains_get_table(chains, 0, 1, 0); 220 if (IS_ERR(dest.ft)) { 221 err = PTR_ERR(dest.ft); 222 goto err_table; 223 } 224 handle = mlx5_add_flow_rules(e->ft, rule_spec, &flow_act, &dest, 1); 225 if (IS_ERR(handle)) { 226 err = PTR_ERR(handle); 227 goto err_handle; 228 } 229 230 mlx5e_mod_hdr_dealloc(&mod_acts); 231 rule->handle = handle; 232 rule->vni = esw_attr->rx_tun_attr->vni; 233 rule->mh = flow_act.modify_hdr; 234 memcpy(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip, 235 sizeof(esw_attr->rx_tun_attr->dst_ip)); 236 refcount_set(&rule->refcnt, 1); 237 list_add(&rule->list, &e->recirc_rules); 238 e->recirc_cnt++; 239 goto out; 240 241 err_handle: 242 mlx5_chains_put_table(chains, 0, 1, 0); 243 err_table: 244 mlx5_modify_header_dealloc(esw->dev, flow_act.modify_hdr); 245 err_mod_hdr_alloc: 246 err_mod_hdr_regc1: 247 mlx5e_mod_hdr_dealloc(&mod_acts); 248 err_mod_hdr_regc0: 249 err_ethertype: 250 kfree(rule); 251 out: 252 kvfree(rule_spec); 253 return err; 254 } 255 256 static void mlx5_esw_indir_table_rule_put(struct mlx5_eswitch *esw, 257 struct mlx5_flow_attr *attr, 258 struct mlx5_esw_indir_table_entry *e) 259 { 260 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 261 struct mlx5_fs_chains *chains = esw_chains(esw); 262 struct mlx5_esw_indir_table_rule *rule; 263 264 list_for_each_entry(rule, &e->recirc_rules, list) 265 if (rule->vni == esw_attr->rx_tun_attr->vni && 266 !memcmp(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip, 267 sizeof(esw_attr->rx_tun_attr->dst_ip))) 268 goto found; 269 270 return; 271 272 found: 273 if (!refcount_dec_and_test(&rule->refcnt)) 274 return; 275 276 mlx5_del_flow_rules(rule->handle); 277 mlx5_chains_put_table(chains, 0, 1, 0); 278 mlx5_modify_header_dealloc(esw->dev, rule->mh); 279 list_del(&rule->list); 280 kfree(rule); 281 e->recirc_cnt--; 282 } 283 284 static int mlx5_create_indir_recirc_group(struct mlx5_eswitch *esw, 285 struct mlx5_flow_attr *attr, 286 struct mlx5_flow_spec *spec, 287 struct mlx5_esw_indir_table_entry *e) 288 { 289 int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); 290 u32 *in, *match; 291 292 in = kvzalloc(inlen, GFP_KERNEL); 293 if (!in) 294 return -ENOMEM; 295 296 MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS | 297 MLX5_MATCH_MISC_PARAMETERS | MLX5_MATCH_MISC_PARAMETERS_2); 298 match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); 299 300 if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version)) 301 MLX5_SET(fte_match_param, match, outer_headers.ip_version, 0xf); 302 else 303 MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ethertype); 304 305 if (attr->ip_version == 4) { 306 MLX5_SET_TO_ONES(fte_match_param, match, 307 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); 308 } else if (attr->ip_version == 6) { 309 memset(MLX5_ADDR_OF(fte_match_param, match, 310 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 311 0xff, sizeof(struct in6_addr)); 312 } else { 313 err = -EOPNOTSUPP; 314 goto out; 315 } 316 317 MLX5_SET_TO_ONES(fte_match_param, match, misc_parameters.vxlan_vni); 318 MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0, 319 mlx5_eswitch_get_vport_metadata_mask()); 320 MLX5_SET(create_flow_group_in, in, start_flow_index, 0); 321 MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX); 322 e->recirc_grp = mlx5_create_flow_group(e->ft, in); 323 if (IS_ERR(e->recirc_grp)) { 324 err = PTR_ERR(e->recirc_grp); 325 goto out; 326 } 327 328 INIT_LIST_HEAD(&e->recirc_rules); 329 e->recirc_cnt = 0; 330 331 out: 332 kvfree(in); 333 return err; 334 } 335 336 static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw, 337 struct mlx5_esw_indir_table_entry *e) 338 { 339 int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); 340 struct mlx5_flow_destination dest = {}; 341 struct mlx5_flow_act flow_act = {}; 342 struct mlx5_flow_spec *spec; 343 u32 *in; 344 345 in = kvzalloc(inlen, GFP_KERNEL); 346 if (!in) 347 return -ENOMEM; 348 349 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 350 if (!spec) { 351 kvfree(in); 352 return -ENOMEM; 353 } 354 355 /* Hold one entry */ 356 MLX5_SET(create_flow_group_in, in, start_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX); 357 MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX); 358 e->fwd_grp = mlx5_create_flow_group(e->ft, in); 359 if (IS_ERR(e->fwd_grp)) { 360 err = PTR_ERR(e->fwd_grp); 361 goto err_out; 362 } 363 364 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 365 dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; 366 dest.vport.num = e->vport; 367 dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); 368 dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; 369 e->fwd_rule = mlx5_add_flow_rules(e->ft, spec, &flow_act, &dest, 1); 370 if (IS_ERR(e->fwd_rule)) { 371 mlx5_destroy_flow_group(e->fwd_grp); 372 err = PTR_ERR(e->fwd_rule); 373 } 374 375 err_out: 376 kvfree(spec); 377 kvfree(in); 378 return err; 379 } 380 381 static struct mlx5_esw_indir_table_entry * 382 mlx5_esw_indir_table_entry_create(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, 383 struct mlx5_flow_spec *spec, u16 vport, bool decap) 384 { 385 struct mlx5_flow_table_attr ft_attr = {}; 386 struct mlx5_flow_namespace *root_ns; 387 struct mlx5_esw_indir_table_entry *e; 388 struct mlx5_flow_table *ft; 389 int err = 0; 390 391 root_ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB); 392 if (!root_ns) 393 return ERR_PTR(-ENOENT); 394 395 e = kzalloc(sizeof(*e), GFP_KERNEL); 396 if (!e) 397 return ERR_PTR(-ENOMEM); 398 399 ft_attr.prio = FDB_TC_OFFLOAD; 400 ft_attr.max_fte = MLX5_ESW_INDIR_TABLE_SIZE; 401 ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; 402 ft_attr.level = 1; 403 404 ft = mlx5_create_flow_table(root_ns, &ft_attr); 405 if (IS_ERR(ft)) { 406 err = PTR_ERR(ft); 407 goto tbl_err; 408 } 409 e->ft = ft; 410 e->vport = vport; 411 e->ip_version = attr->ip_version; 412 e->fwd_ref = !decap; 413 414 err = mlx5_create_indir_recirc_group(esw, attr, spec, e); 415 if (err) 416 goto recirc_grp_err; 417 418 if (decap) { 419 err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e); 420 if (err) 421 goto recirc_rule_err; 422 } 423 424 err = mlx5_create_indir_fwd_group(esw, e); 425 if (err) 426 goto fwd_grp_err; 427 428 hash_add(esw->fdb_table.offloads.indir->table, &e->hlist, 429 vport << 16 | attr->ip_version); 430 431 return e; 432 433 fwd_grp_err: 434 if (decap) 435 mlx5_esw_indir_table_rule_put(esw, attr, e); 436 recirc_rule_err: 437 mlx5_destroy_flow_group(e->recirc_grp); 438 recirc_grp_err: 439 mlx5_destroy_flow_table(e->ft); 440 tbl_err: 441 kfree(e); 442 return ERR_PTR(err); 443 } 444 445 static struct mlx5_esw_indir_table_entry * 446 mlx5_esw_indir_table_entry_lookup(struct mlx5_eswitch *esw, u16 vport, u8 ip_version) 447 { 448 struct mlx5_esw_indir_table_entry *e; 449 u32 key = vport << 16 | ip_version; 450 451 hash_for_each_possible(esw->fdb_table.offloads.indir->table, e, hlist, key) 452 if (e->vport == vport && e->ip_version == ip_version) 453 return e; 454 455 return NULL; 456 } 457 458 struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw, 459 struct mlx5_flow_attr *attr, 460 struct mlx5_flow_spec *spec, 461 u16 vport, bool decap) 462 { 463 struct mlx5_esw_indir_table_entry *e; 464 int err; 465 466 mutex_lock(&esw->fdb_table.offloads.indir->lock); 467 e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version); 468 if (e) { 469 if (!decap) { 470 e->fwd_ref++; 471 } else { 472 err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e); 473 if (err) 474 goto out_err; 475 } 476 } else { 477 e = mlx5_esw_indir_table_entry_create(esw, attr, spec, vport, decap); 478 if (IS_ERR(e)) { 479 err = PTR_ERR(e); 480 esw_warn(esw->dev, "Failed to create indirection table, err %d.\n", err); 481 goto out_err; 482 } 483 } 484 mutex_unlock(&esw->fdb_table.offloads.indir->lock); 485 return e->ft; 486 487 out_err: 488 mutex_unlock(&esw->fdb_table.offloads.indir->lock); 489 return ERR_PTR(err); 490 } 491 492 void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw, 493 struct mlx5_flow_attr *attr, 494 u16 vport, bool decap) 495 { 496 struct mlx5_esw_indir_table_entry *e; 497 498 mutex_lock(&esw->fdb_table.offloads.indir->lock); 499 e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version); 500 if (!e) 501 goto out; 502 503 if (!decap) 504 e->fwd_ref--; 505 else 506 mlx5_esw_indir_table_rule_put(esw, attr, e); 507 508 if (e->fwd_ref || e->recirc_cnt) 509 goto out; 510 511 hash_del(&e->hlist); 512 mlx5_destroy_flow_group(e->recirc_grp); 513 mlx5_del_flow_rules(e->fwd_rule); 514 mlx5_destroy_flow_group(e->fwd_grp); 515 mlx5_destroy_flow_table(e->ft); 516 kfree(e); 517 out: 518 mutex_unlock(&esw->fdb_table.offloads.indir->lock); 519 } 520