1 /* 2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <net/flow_dissector.h> 34 #include <net/flow_offload.h> 35 #include <net/sch_generic.h> 36 #include <net/pkt_cls.h> 37 #include <net/tc_act/tc_gact.h> 38 #include <net/tc_act/tc_skbedit.h> 39 #include <linux/mlx5/fs.h> 40 #include <linux/mlx5/device.h> 41 #include <linux/rhashtable.h> 42 #include <linux/refcount.h> 43 #include <linux/completion.h> 44 #include <net/tc_act/tc_mirred.h> 45 #include <net/tc_act/tc_vlan.h> 46 #include <net/tc_act/tc_tunnel_key.h> 47 #include <net/tc_act/tc_pedit.h> 48 #include <net/tc_act/tc_csum.h> 49 #include <net/tc_act/tc_mpls.h> 50 #include <net/arp.h> 51 #include <net/ipv6_stubs.h> 52 #include <net/bareudp.h> 53 #include <net/bonding.h> 54 #include "en.h" 55 #include "en_rep.h" 56 #include "en/rep/tc.h" 57 #include "en/rep/neigh.h" 58 #include "en_tc.h" 59 #include "eswitch.h" 60 #include "fs_core.h" 61 #include "en/port.h" 62 #include "en/tc_tun.h" 63 #include "en/mapping.h" 64 #include "en/tc_ct.h" 65 #include "en/mod_hdr.h" 66 #include "en/tc_priv.h" 67 #include "en/tc_tun_encap.h" 68 #include "lib/devcom.h" 69 #include "lib/geneve.h" 70 #include "lib/fs_chains.h" 71 #include "diag/en_tc_tracepoint.h" 72 #include <asm/div64.h> 73 74 #define nic_chains(priv) ((priv)->fs.tc.chains) 75 #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) 76 77 #define MLX5E_TC_TABLE_NUM_GROUPS 4 78 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18) 79 80 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { 81 [CHAIN_TO_REG] = { 82 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, 83 .moffset = 0, 84 .mlen = 2, 85 }, 86 [VPORT_TO_REG] = { 87 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, 88 .moffset = 2, 89 .mlen = 2, 90 }, 91 [TUNNEL_TO_REG] = { 92 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1, 93 .moffset = 1, 94 .mlen = ((ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS) / 8), 95 .soffset = MLX5_BYTE_OFF(fte_match_param, 96 misc_parameters_2.metadata_reg_c_1), 97 }, 98 [ZONE_TO_REG] = zone_to_reg_ct, 99 [ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct, 100 [CTSTATE_TO_REG] = ctstate_to_reg_ct, 101 [MARK_TO_REG] = mark_to_reg_ct, 102 [LABELS_TO_REG] = labels_to_reg_ct, 103 [FTEID_TO_REG] = fteid_to_reg_ct, 104 /* For NIC rules we store the retore metadata directly 105 * into reg_b that is passed to SW since we don't 106 * jump between steering domains. 107 */ 108 [NIC_CHAIN_TO_REG] = { 109 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B, 110 .moffset = 0, 111 .mlen = 2, 112 }, 113 [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct, 114 }; 115 116 /* To avoid false lock dependency warning set the tc_ht lock 117 * class different than the lock class of the ht being used when deleting 118 * last flow from a group and then deleting a group, we get into del_sw_flow_group() 119 * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but 120 * it's different than the ht->mutex here. 121 */ 122 static struct lock_class_key tc_ht_lock_key; 123 124 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow); 125 126 void 127 mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, 128 enum mlx5e_tc_attr_to_reg type, 129 u32 data, 130 u32 mask) 131 { 132 int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset; 133 int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen; 134 void *headers_c = spec->match_criteria; 135 void *headers_v = spec->match_value; 136 void *fmask, *fval; 137 138 fmask = headers_c + soffset; 139 fval = headers_v + soffset; 140 141 mask = (__force u32)(cpu_to_be32(mask)) >> (32 - (match_len * 8)); 142 data = (__force u32)(cpu_to_be32(data)) >> (32 - (match_len * 8)); 143 144 memcpy(fmask, &mask, match_len); 145 memcpy(fval, &data, match_len); 146 147 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; 148 } 149 150 void 151 mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec, 152 enum mlx5e_tc_attr_to_reg type, 153 u32 *data, 154 u32 *mask) 155 { 156 int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset; 157 int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen; 158 void *headers_c = spec->match_criteria; 159 void *headers_v = spec->match_value; 160 void *fmask, *fval; 161 162 fmask = headers_c + soffset; 163 fval = headers_v + soffset; 164 165 memcpy(mask, fmask, match_len); 166 memcpy(data, fval, match_len); 167 168 *mask = be32_to_cpu((__force __be32)(*mask << (32 - (match_len * 8)))); 169 *data = be32_to_cpu((__force __be32)(*data << (32 - (match_len * 8)))); 170 } 171 172 int 173 mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev, 174 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 175 enum mlx5_flow_namespace_type ns, 176 enum mlx5e_tc_attr_to_reg type, 177 u32 data) 178 { 179 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; 180 int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield; 181 int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen; 182 char *modact; 183 int err; 184 185 err = alloc_mod_hdr_actions(mdev, ns, mod_hdr_acts); 186 if (err) 187 return err; 188 189 modact = mod_hdr_acts->actions + 190 (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ); 191 192 /* Firmware has 5bit length field and 0 means 32bits */ 193 if (mlen == 4) 194 mlen = 0; 195 196 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); 197 MLX5_SET(set_action_in, modact, field, mfield); 198 MLX5_SET(set_action_in, modact, offset, moffset * 8); 199 MLX5_SET(set_action_in, modact, length, mlen * 8); 200 MLX5_SET(set_action_in, modact, data, data); 201 err = mod_hdr_acts->num_actions; 202 mod_hdr_acts->num_actions++; 203 204 return err; 205 } 206 207 static struct mlx5_tc_ct_priv * 208 get_ct_priv(struct mlx5e_priv *priv) 209 { 210 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 211 struct mlx5_rep_uplink_priv *uplink_priv; 212 struct mlx5e_rep_priv *uplink_rpriv; 213 214 if (is_mdev_switchdev_mode(priv->mdev)) { 215 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 216 uplink_priv = &uplink_rpriv->uplink_priv; 217 218 return uplink_priv->ct_priv; 219 } 220 221 return priv->fs.tc.ct; 222 } 223 224 struct mlx5_flow_handle * 225 mlx5_tc_rule_insert(struct mlx5e_priv *priv, 226 struct mlx5_flow_spec *spec, 227 struct mlx5_flow_attr *attr) 228 { 229 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 230 231 if (is_mdev_switchdev_mode(priv->mdev)) 232 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); 233 234 return mlx5e_add_offloaded_nic_rule(priv, spec, attr); 235 } 236 237 void 238 mlx5_tc_rule_delete(struct mlx5e_priv *priv, 239 struct mlx5_flow_handle *rule, 240 struct mlx5_flow_attr *attr) 241 { 242 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 243 244 if (is_mdev_switchdev_mode(priv->mdev)) { 245 mlx5_eswitch_del_offloaded_rule(esw, rule, attr); 246 247 return; 248 } 249 250 mlx5e_del_offloaded_nic_rule(priv, rule, attr); 251 } 252 253 int 254 mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, 255 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 256 enum mlx5_flow_namespace_type ns, 257 enum mlx5e_tc_attr_to_reg type, 258 u32 data) 259 { 260 int ret = mlx5e_tc_match_to_reg_set_and_get_id(mdev, mod_hdr_acts, ns, type, data); 261 262 return ret < 0 ? ret : 0; 263 } 264 265 void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev, 266 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 267 enum mlx5e_tc_attr_to_reg type, 268 int act_id, u32 data) 269 { 270 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; 271 int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield; 272 int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen; 273 char *modact; 274 275 modact = mod_hdr_acts->actions + (act_id * MLX5_MH_ACT_SZ); 276 277 /* Firmware has 5bit length field and 0 means 32bits */ 278 if (mlen == 4) 279 mlen = 0; 280 281 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); 282 MLX5_SET(set_action_in, modact, field, mfield); 283 MLX5_SET(set_action_in, modact, offset, moffset * 8); 284 MLX5_SET(set_action_in, modact, length, mlen * 8); 285 MLX5_SET(set_action_in, modact, data, data); 286 } 287 288 struct mlx5e_hairpin { 289 struct mlx5_hairpin *pair; 290 291 struct mlx5_core_dev *func_mdev; 292 struct mlx5e_priv *func_priv; 293 u32 tdn; 294 u32 tirn; 295 296 int num_channels; 297 struct mlx5e_rqt indir_rqt; 298 u32 indir_tirn[MLX5E_NUM_INDIR_TIRS]; 299 struct mlx5e_ttc_table ttc; 300 }; 301 302 struct mlx5e_hairpin_entry { 303 /* a node of a hash table which keeps all the hairpin entries */ 304 struct hlist_node hairpin_hlist; 305 306 /* protects flows list */ 307 spinlock_t flows_lock; 308 /* flows sharing the same hairpin */ 309 struct list_head flows; 310 /* hpe's that were not fully initialized when dead peer update event 311 * function traversed them. 312 */ 313 struct list_head dead_peer_wait_list; 314 315 u16 peer_vhca_id; 316 u8 prio; 317 struct mlx5e_hairpin *hp; 318 refcount_t refcnt; 319 struct completion res_ready; 320 }; 321 322 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, 323 struct mlx5e_tc_flow *flow); 324 325 struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow) 326 { 327 if (!flow || !refcount_inc_not_zero(&flow->refcnt)) 328 return ERR_PTR(-EINVAL); 329 return flow; 330 } 331 332 void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) 333 { 334 if (refcount_dec_and_test(&flow->refcnt)) { 335 mlx5e_tc_del_flow(priv, flow); 336 kfree_rcu(flow, rcu_head); 337 } 338 } 339 340 bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow) 341 { 342 return flow_flag_test(flow, ESWITCH); 343 } 344 345 static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow) 346 { 347 return flow_flag_test(flow, FT); 348 } 349 350 bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow) 351 { 352 return flow_flag_test(flow, OFFLOADED); 353 } 354 355 static int get_flow_name_space(struct mlx5e_tc_flow *flow) 356 { 357 return mlx5e_is_eswitch_flow(flow) ? 358 MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL; 359 } 360 361 static struct mod_hdr_tbl * 362 get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) 363 { 364 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 365 366 return get_flow_name_space(flow) == MLX5_FLOW_NAMESPACE_FDB ? 367 &esw->offloads.mod_hdr : 368 &priv->fs.tc.mod_hdr; 369 } 370 371 static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, 372 struct mlx5e_tc_flow *flow, 373 struct mlx5e_tc_flow_parse_attr *parse_attr) 374 { 375 struct mlx5_modify_hdr *modify_hdr; 376 struct mlx5e_mod_hdr_handle *mh; 377 378 mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow), 379 get_flow_name_space(flow), 380 &parse_attr->mod_hdr_acts); 381 if (IS_ERR(mh)) 382 return PTR_ERR(mh); 383 384 modify_hdr = mlx5e_mod_hdr_get(mh); 385 flow->attr->modify_hdr = modify_hdr; 386 flow->mh = mh; 387 388 return 0; 389 } 390 391 static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv, 392 struct mlx5e_tc_flow *flow) 393 { 394 /* flow wasn't fully initialized */ 395 if (!flow->mh) 396 return; 397 398 mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow), 399 flow->mh); 400 flow->mh = NULL; 401 } 402 403 static 404 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex) 405 { 406 struct net_device *netdev; 407 struct mlx5e_priv *priv; 408 409 netdev = __dev_get_by_index(net, ifindex); 410 priv = netdev_priv(netdev); 411 return priv->mdev; 412 } 413 414 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp) 415 { 416 u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {}; 417 void *tirc; 418 int err; 419 420 err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn); 421 if (err) 422 goto alloc_tdn_err; 423 424 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); 425 426 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); 427 MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]); 428 MLX5_SET(tirc, tirc, transport_domain, hp->tdn); 429 430 err = mlx5_core_create_tir(hp->func_mdev, in, &hp->tirn); 431 if (err) 432 goto create_tir_err; 433 434 return 0; 435 436 create_tir_err: 437 mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); 438 alloc_tdn_err: 439 return err; 440 } 441 442 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp) 443 { 444 mlx5_core_destroy_tir(hp->func_mdev, hp->tirn); 445 mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); 446 } 447 448 static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc) 449 { 450 u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE], rqn; 451 struct mlx5e_priv *priv = hp->func_priv; 452 int i, ix, sz = MLX5E_INDIR_RQT_SIZE; 453 454 mlx5e_build_default_indir_rqt(indirection_rqt, sz, 455 hp->num_channels); 456 457 for (i = 0; i < sz; i++) { 458 ix = i; 459 if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR) 460 ix = mlx5e_bits_invert(i, ilog2(sz)); 461 ix = indirection_rqt[ix]; 462 rqn = hp->pair->rqn[ix]; 463 MLX5_SET(rqtc, rqtc, rq_num[i], rqn); 464 } 465 } 466 467 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) 468 { 469 int inlen, err, sz = MLX5E_INDIR_RQT_SIZE; 470 struct mlx5e_priv *priv = hp->func_priv; 471 struct mlx5_core_dev *mdev = priv->mdev; 472 void *rqtc; 473 u32 *in; 474 475 inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; 476 in = kvzalloc(inlen, GFP_KERNEL); 477 if (!in) 478 return -ENOMEM; 479 480 rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); 481 482 MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); 483 MLX5_SET(rqtc, rqtc, rqt_max_size, sz); 484 485 mlx5e_hairpin_fill_rqt_rqns(hp, rqtc); 486 487 err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn); 488 if (!err) 489 hp->indir_rqt.enabled = true; 490 491 kvfree(in); 492 return err; 493 } 494 495 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) 496 { 497 struct mlx5e_priv *priv = hp->func_priv; 498 u32 in[MLX5_ST_SZ_DW(create_tir_in)]; 499 int tt, i, err; 500 void *tirc; 501 502 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { 503 struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt); 504 505 memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in)); 506 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); 507 508 MLX5_SET(tirc, tirc, transport_domain, hp->tdn); 509 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); 510 MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn); 511 mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false); 512 513 err = mlx5_core_create_tir(hp->func_mdev, in, 514 &hp->indir_tirn[tt]); 515 if (err) { 516 mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err); 517 goto err_destroy_tirs; 518 } 519 } 520 return 0; 521 522 err_destroy_tirs: 523 for (i = 0; i < tt; i++) 524 mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[i]); 525 return err; 526 } 527 528 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp) 529 { 530 int tt; 531 532 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) 533 mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[tt]); 534 } 535 536 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp, 537 struct ttc_params *ttc_params) 538 { 539 struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; 540 int tt; 541 542 memset(ttc_params, 0, sizeof(*ttc_params)); 543 544 ttc_params->any_tt_tirn = hp->tirn; 545 546 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) 547 ttc_params->indir_tirn[tt] = hp->indir_tirn[tt]; 548 549 ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE; 550 ft_attr->level = MLX5E_TC_TTC_FT_LEVEL; 551 ft_attr->prio = MLX5E_TC_PRIO; 552 } 553 554 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp) 555 { 556 struct mlx5e_priv *priv = hp->func_priv; 557 struct ttc_params ttc_params; 558 int err; 559 560 err = mlx5e_hairpin_create_indirect_rqt(hp); 561 if (err) 562 return err; 563 564 err = mlx5e_hairpin_create_indirect_tirs(hp); 565 if (err) 566 goto err_create_indirect_tirs; 567 568 mlx5e_hairpin_set_ttc_params(hp, &ttc_params); 569 err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc); 570 if (err) 571 goto err_create_ttc_table; 572 573 netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n", 574 hp->num_channels, hp->ttc.ft.t->id); 575 576 return 0; 577 578 err_create_ttc_table: 579 mlx5e_hairpin_destroy_indirect_tirs(hp); 580 err_create_indirect_tirs: 581 mlx5e_destroy_rqt(priv, &hp->indir_rqt); 582 583 return err; 584 } 585 586 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp) 587 { 588 struct mlx5e_priv *priv = hp->func_priv; 589 590 mlx5e_destroy_ttc_table(priv, &hp->ttc); 591 mlx5e_hairpin_destroy_indirect_tirs(hp); 592 mlx5e_destroy_rqt(priv, &hp->indir_rqt); 593 } 594 595 static struct mlx5e_hairpin * 596 mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params, 597 int peer_ifindex) 598 { 599 struct mlx5_core_dev *func_mdev, *peer_mdev; 600 struct mlx5e_hairpin *hp; 601 struct mlx5_hairpin *pair; 602 int err; 603 604 hp = kzalloc(sizeof(*hp), GFP_KERNEL); 605 if (!hp) 606 return ERR_PTR(-ENOMEM); 607 608 func_mdev = priv->mdev; 609 peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); 610 611 pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params); 612 if (IS_ERR(pair)) { 613 err = PTR_ERR(pair); 614 goto create_pair_err; 615 } 616 hp->pair = pair; 617 hp->func_mdev = func_mdev; 618 hp->func_priv = priv; 619 hp->num_channels = params->num_channels; 620 621 err = mlx5e_hairpin_create_transport(hp); 622 if (err) 623 goto create_transport_err; 624 625 if (hp->num_channels > 1) { 626 err = mlx5e_hairpin_rss_init(hp); 627 if (err) 628 goto rss_init_err; 629 } 630 631 return hp; 632 633 rss_init_err: 634 mlx5e_hairpin_destroy_transport(hp); 635 create_transport_err: 636 mlx5_core_hairpin_destroy(hp->pair); 637 create_pair_err: 638 kfree(hp); 639 return ERR_PTR(err); 640 } 641 642 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp) 643 { 644 if (hp->num_channels > 1) 645 mlx5e_hairpin_rss_cleanup(hp); 646 mlx5e_hairpin_destroy_transport(hp); 647 mlx5_core_hairpin_destroy(hp->pair); 648 kvfree(hp); 649 } 650 651 static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio) 652 { 653 return (peer_vhca_id << 16 | prio); 654 } 655 656 static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv, 657 u16 peer_vhca_id, u8 prio) 658 { 659 struct mlx5e_hairpin_entry *hpe; 660 u32 hash_key = hash_hairpin_info(peer_vhca_id, prio); 661 662 hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe, 663 hairpin_hlist, hash_key) { 664 if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) { 665 refcount_inc(&hpe->refcnt); 666 return hpe; 667 } 668 } 669 670 return NULL; 671 } 672 673 static void mlx5e_hairpin_put(struct mlx5e_priv *priv, 674 struct mlx5e_hairpin_entry *hpe) 675 { 676 /* no more hairpin flows for us, release the hairpin pair */ 677 if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock)) 678 return; 679 hash_del(&hpe->hairpin_hlist); 680 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); 681 682 if (!IS_ERR_OR_NULL(hpe->hp)) { 683 netdev_dbg(priv->netdev, "del hairpin: peer %s\n", 684 dev_name(hpe->hp->pair->peer_mdev->device)); 685 686 mlx5e_hairpin_destroy(hpe->hp); 687 } 688 689 WARN_ON(!list_empty(&hpe->flows)); 690 kfree(hpe); 691 } 692 693 #define UNKNOWN_MATCH_PRIO 8 694 695 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv, 696 struct mlx5_flow_spec *spec, u8 *match_prio, 697 struct netlink_ext_ack *extack) 698 { 699 void *headers_c, *headers_v; 700 u8 prio_val, prio_mask = 0; 701 bool vlan_present; 702 703 #ifdef CONFIG_MLX5_CORE_EN_DCB 704 if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) { 705 NL_SET_ERR_MSG_MOD(extack, 706 "only PCP trust state supported for hairpin"); 707 return -EOPNOTSUPP; 708 } 709 #endif 710 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); 711 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); 712 713 vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag); 714 if (vlan_present) { 715 prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio); 716 prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio); 717 } 718 719 if (!vlan_present || !prio_mask) { 720 prio_val = UNKNOWN_MATCH_PRIO; 721 } else if (prio_mask != 0x7) { 722 NL_SET_ERR_MSG_MOD(extack, 723 "masked priority match not supported for hairpin"); 724 return -EOPNOTSUPP; 725 } 726 727 *match_prio = prio_val; 728 return 0; 729 } 730 731 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, 732 struct mlx5e_tc_flow *flow, 733 struct mlx5e_tc_flow_parse_attr *parse_attr, 734 struct netlink_ext_ack *extack) 735 { 736 int peer_ifindex = parse_attr->mirred_ifindex[0]; 737 struct mlx5_hairpin_params params; 738 struct mlx5_core_dev *peer_mdev; 739 struct mlx5e_hairpin_entry *hpe; 740 struct mlx5e_hairpin *hp; 741 u64 link_speed64; 742 u32 link_speed; 743 u8 match_prio; 744 u16 peer_id; 745 int err; 746 747 peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); 748 if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) { 749 NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported"); 750 return -EOPNOTSUPP; 751 } 752 753 peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id); 754 err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio, 755 extack); 756 if (err) 757 return err; 758 759 mutex_lock(&priv->fs.tc.hairpin_tbl_lock); 760 hpe = mlx5e_hairpin_get(priv, peer_id, match_prio); 761 if (hpe) { 762 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); 763 wait_for_completion(&hpe->res_ready); 764 765 if (IS_ERR(hpe->hp)) { 766 err = -EREMOTEIO; 767 goto out_err; 768 } 769 goto attach_flow; 770 } 771 772 hpe = kzalloc(sizeof(*hpe), GFP_KERNEL); 773 if (!hpe) { 774 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); 775 return -ENOMEM; 776 } 777 778 spin_lock_init(&hpe->flows_lock); 779 INIT_LIST_HEAD(&hpe->flows); 780 INIT_LIST_HEAD(&hpe->dead_peer_wait_list); 781 hpe->peer_vhca_id = peer_id; 782 hpe->prio = match_prio; 783 refcount_set(&hpe->refcnt, 1); 784 init_completion(&hpe->res_ready); 785 786 hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, 787 hash_hairpin_info(peer_id, match_prio)); 788 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); 789 790 params.log_data_size = 15; 791 params.log_data_size = min_t(u8, params.log_data_size, 792 MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz)); 793 params.log_data_size = max_t(u8, params.log_data_size, 794 MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz)); 795 796 params.log_num_packets = params.log_data_size - 797 MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev); 798 params.log_num_packets = min_t(u8, params.log_num_packets, 799 MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets)); 800 801 params.q_counter = priv->q_counter; 802 /* set hairpin pair per each 50Gbs share of the link */ 803 mlx5e_port_max_linkspeed(priv->mdev, &link_speed); 804 link_speed = max_t(u32, link_speed, 50000); 805 link_speed64 = link_speed; 806 do_div(link_speed64, 50000); 807 params.num_channels = link_speed64; 808 809 hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex); 810 hpe->hp = hp; 811 complete_all(&hpe->res_ready); 812 if (IS_ERR(hp)) { 813 err = PTR_ERR(hp); 814 goto out_err; 815 } 816 817 netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n", 818 hp->tirn, hp->pair->rqn[0], 819 dev_name(hp->pair->peer_mdev->device), 820 hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets); 821 822 attach_flow: 823 if (hpe->hp->num_channels > 1) { 824 flow_flag_set(flow, HAIRPIN_RSS); 825 flow->attr->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t; 826 } else { 827 flow->attr->nic_attr->hairpin_tirn = hpe->hp->tirn; 828 } 829 830 flow->hpe = hpe; 831 spin_lock(&hpe->flows_lock); 832 list_add(&flow->hairpin, &hpe->flows); 833 spin_unlock(&hpe->flows_lock); 834 835 return 0; 836 837 out_err: 838 mlx5e_hairpin_put(priv, hpe); 839 return err; 840 } 841 842 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv, 843 struct mlx5e_tc_flow *flow) 844 { 845 /* flow wasn't fully initialized */ 846 if (!flow->hpe) 847 return; 848 849 spin_lock(&flow->hpe->flows_lock); 850 list_del(&flow->hairpin); 851 spin_unlock(&flow->hpe->flows_lock); 852 853 mlx5e_hairpin_put(priv, flow->hpe); 854 flow->hpe = NULL; 855 } 856 857 struct mlx5_flow_handle * 858 mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv, 859 struct mlx5_flow_spec *spec, 860 struct mlx5_flow_attr *attr) 861 { 862 struct mlx5_flow_context *flow_context = &spec->flow_context; 863 struct mlx5_fs_chains *nic_chains = nic_chains(priv); 864 struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr; 865 struct mlx5e_tc_table *tc = &priv->fs.tc; 866 struct mlx5_flow_destination dest[2] = {}; 867 struct mlx5_flow_act flow_act = { 868 .action = attr->action, 869 .flags = FLOW_ACT_NO_APPEND, 870 }; 871 struct mlx5_flow_handle *rule; 872 struct mlx5_flow_table *ft; 873 int dest_ix = 0; 874 875 flow_context->flags |= FLOW_CONTEXT_HAS_TAG; 876 flow_context->flow_tag = nic_attr->flow_tag; 877 878 if (attr->dest_ft) { 879 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 880 dest[dest_ix].ft = attr->dest_ft; 881 dest_ix++; 882 } else if (nic_attr->hairpin_ft) { 883 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 884 dest[dest_ix].ft = nic_attr->hairpin_ft; 885 dest_ix++; 886 } else if (nic_attr->hairpin_tirn) { 887 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR; 888 dest[dest_ix].tir_num = nic_attr->hairpin_tirn; 889 dest_ix++; 890 } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { 891 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 892 if (attr->dest_chain) { 893 dest[dest_ix].ft = mlx5_chains_get_table(nic_chains, 894 attr->dest_chain, 1, 895 MLX5E_TC_FT_LEVEL); 896 if (IS_ERR(dest[dest_ix].ft)) 897 return ERR_CAST(dest[dest_ix].ft); 898 } else { 899 dest[dest_ix].ft = priv->fs.vlan.ft.t; 900 } 901 dest_ix++; 902 } 903 904 if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && 905 MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) 906 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; 907 908 if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { 909 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; 910 dest[dest_ix].counter_id = mlx5_fc_id(attr->counter); 911 dest_ix++; 912 } 913 914 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) 915 flow_act.modify_hdr = attr->modify_hdr; 916 917 mutex_lock(&tc->t_lock); 918 if (IS_ERR_OR_NULL(tc->t)) { 919 /* Create the root table here if doesn't exist yet */ 920 tc->t = 921 mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL); 922 923 if (IS_ERR(tc->t)) { 924 mutex_unlock(&tc->t_lock); 925 netdev_err(priv->netdev, 926 "Failed to create tc offload table\n"); 927 rule = ERR_CAST(priv->fs.tc.t); 928 goto err_ft_get; 929 } 930 } 931 mutex_unlock(&tc->t_lock); 932 933 if (attr->chain || attr->prio) 934 ft = mlx5_chains_get_table(nic_chains, 935 attr->chain, attr->prio, 936 MLX5E_TC_FT_LEVEL); 937 else 938 ft = attr->ft; 939 940 if (IS_ERR(ft)) { 941 rule = ERR_CAST(ft); 942 goto err_ft_get; 943 } 944 945 if (attr->outer_match_level != MLX5_MATCH_NONE) 946 spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; 947 948 rule = mlx5_add_flow_rules(ft, spec, 949 &flow_act, dest, dest_ix); 950 if (IS_ERR(rule)) 951 goto err_rule; 952 953 return rule; 954 955 err_rule: 956 if (attr->chain || attr->prio) 957 mlx5_chains_put_table(nic_chains, 958 attr->chain, attr->prio, 959 MLX5E_TC_FT_LEVEL); 960 err_ft_get: 961 if (attr->dest_chain) 962 mlx5_chains_put_table(nic_chains, 963 attr->dest_chain, 1, 964 MLX5E_TC_FT_LEVEL); 965 966 return ERR_CAST(rule); 967 } 968 969 static int 970 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, 971 struct mlx5e_tc_flow_parse_attr *parse_attr, 972 struct mlx5e_tc_flow *flow, 973 struct netlink_ext_ack *extack) 974 { 975 struct mlx5_flow_attr *attr = flow->attr; 976 struct mlx5_core_dev *dev = priv->mdev; 977 struct mlx5_fc *counter = NULL; 978 int err; 979 980 if (flow_flag_test(flow, HAIRPIN)) { 981 err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); 982 if (err) 983 return err; 984 } 985 986 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { 987 counter = mlx5_fc_create(dev, true); 988 if (IS_ERR(counter)) 989 return PTR_ERR(counter); 990 991 attr->counter = counter; 992 } 993 994 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 995 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); 996 dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); 997 if (err) 998 return err; 999 } 1000 1001 if (flow_flag_test(flow, CT)) 1002 flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), flow, &parse_attr->spec, 1003 attr, &parse_attr->mod_hdr_acts); 1004 else 1005 flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec, 1006 attr); 1007 1008 return PTR_ERR_OR_ZERO(flow->rule[0]); 1009 } 1010 1011 void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv, 1012 struct mlx5_flow_handle *rule, 1013 struct mlx5_flow_attr *attr) 1014 { 1015 struct mlx5_fs_chains *nic_chains = nic_chains(priv); 1016 1017 mlx5_del_flow_rules(rule); 1018 1019 if (attr->chain || attr->prio) 1020 mlx5_chains_put_table(nic_chains, attr->chain, attr->prio, 1021 MLX5E_TC_FT_LEVEL); 1022 1023 if (attr->dest_chain) 1024 mlx5_chains_put_table(nic_chains, attr->dest_chain, 1, 1025 MLX5E_TC_FT_LEVEL); 1026 } 1027 1028 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, 1029 struct mlx5e_tc_flow *flow) 1030 { 1031 struct mlx5_flow_attr *attr = flow->attr; 1032 struct mlx5e_tc_table *tc = &priv->fs.tc; 1033 1034 flow_flag_clear(flow, OFFLOADED); 1035 1036 if (flow_flag_test(flow, CT)) 1037 mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); 1038 else if (!IS_ERR_OR_NULL(flow->rule[0])) 1039 mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr); 1040 1041 /* Remove root table if no rules are left to avoid 1042 * extra steering hops. 1043 */ 1044 mutex_lock(&priv->fs.tc.t_lock); 1045 if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && 1046 !IS_ERR_OR_NULL(tc->t)) { 1047 mlx5_chains_put_table(nic_chains(priv), 0, 1, MLX5E_TC_FT_LEVEL); 1048 priv->fs.tc.t = NULL; 1049 } 1050 mutex_unlock(&priv->fs.tc.t_lock); 1051 1052 kvfree(attr->parse_attr); 1053 1054 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) 1055 mlx5e_detach_mod_hdr(priv, flow); 1056 1057 mlx5_fc_destroy(priv->mdev, attr->counter); 1058 1059 if (flow_flag_test(flow, HAIRPIN)) 1060 mlx5e_hairpin_flow_del(priv, flow); 1061 1062 kfree(flow->attr); 1063 } 1064 1065 struct mlx5_flow_handle * 1066 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, 1067 struct mlx5e_tc_flow *flow, 1068 struct mlx5_flow_spec *spec, 1069 struct mlx5_flow_attr *attr) 1070 { 1071 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; 1072 struct mlx5_flow_handle *rule; 1073 1074 if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) 1075 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); 1076 1077 if (flow_flag_test(flow, CT)) { 1078 mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; 1079 1080 return mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv), 1081 flow, spec, attr, 1082 mod_hdr_acts); 1083 } 1084 1085 rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); 1086 if (IS_ERR(rule)) 1087 return rule; 1088 1089 if (attr->esw_attr->split_count) { 1090 flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr); 1091 if (IS_ERR(flow->rule[1])) { 1092 mlx5_eswitch_del_offloaded_rule(esw, rule, attr); 1093 return flow->rule[1]; 1094 } 1095 } 1096 1097 return rule; 1098 } 1099 1100 void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, 1101 struct mlx5e_tc_flow *flow, 1102 struct mlx5_flow_attr *attr) 1103 { 1104 flow_flag_clear(flow, OFFLOADED); 1105 1106 if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) 1107 goto offload_rule_0; 1108 1109 if (flow_flag_test(flow, CT)) { 1110 mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); 1111 return; 1112 } 1113 1114 if (attr->esw_attr->split_count) 1115 mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); 1116 1117 offload_rule_0: 1118 mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); 1119 } 1120 1121 struct mlx5_flow_handle * 1122 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, 1123 struct mlx5e_tc_flow *flow, 1124 struct mlx5_flow_spec *spec) 1125 { 1126 struct mlx5_flow_attr *slow_attr; 1127 struct mlx5_flow_handle *rule; 1128 1129 slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); 1130 if (!slow_attr) 1131 return ERR_PTR(-ENOMEM); 1132 1133 memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ); 1134 slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1135 slow_attr->esw_attr->split_count = 0; 1136 slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; 1137 1138 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); 1139 if (!IS_ERR(rule)) 1140 flow_flag_set(flow, SLOW); 1141 1142 kfree(slow_attr); 1143 1144 return rule; 1145 } 1146 1147 void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, 1148 struct mlx5e_tc_flow *flow) 1149 { 1150 struct mlx5_flow_attr *slow_attr; 1151 1152 slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); 1153 if (!slow_attr) { 1154 mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n"); 1155 return; 1156 } 1157 1158 memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ); 1159 slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1160 slow_attr->esw_attr->split_count = 0; 1161 slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; 1162 mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); 1163 flow_flag_clear(flow, SLOW); 1164 kfree(slow_attr); 1165 } 1166 1167 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this 1168 * function. 1169 */ 1170 static void unready_flow_add(struct mlx5e_tc_flow *flow, 1171 struct list_head *unready_flows) 1172 { 1173 flow_flag_set(flow, NOT_READY); 1174 list_add_tail(&flow->unready, unready_flows); 1175 } 1176 1177 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this 1178 * function. 1179 */ 1180 static void unready_flow_del(struct mlx5e_tc_flow *flow) 1181 { 1182 list_del(&flow->unready); 1183 flow_flag_clear(flow, NOT_READY); 1184 } 1185 1186 static void add_unready_flow(struct mlx5e_tc_flow *flow) 1187 { 1188 struct mlx5_rep_uplink_priv *uplink_priv; 1189 struct mlx5e_rep_priv *rpriv; 1190 struct mlx5_eswitch *esw; 1191 1192 esw = flow->priv->mdev->priv.eswitch; 1193 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1194 uplink_priv = &rpriv->uplink_priv; 1195 1196 mutex_lock(&uplink_priv->unready_flows_lock); 1197 unready_flow_add(flow, &uplink_priv->unready_flows); 1198 mutex_unlock(&uplink_priv->unready_flows_lock); 1199 } 1200 1201 static void remove_unready_flow(struct mlx5e_tc_flow *flow) 1202 { 1203 struct mlx5_rep_uplink_priv *uplink_priv; 1204 struct mlx5e_rep_priv *rpriv; 1205 struct mlx5_eswitch *esw; 1206 1207 esw = flow->priv->mdev->priv.eswitch; 1208 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1209 uplink_priv = &rpriv->uplink_priv; 1210 1211 mutex_lock(&uplink_priv->unready_flows_lock); 1212 unready_flow_del(flow); 1213 mutex_unlock(&uplink_priv->unready_flows_lock); 1214 } 1215 1216 static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv); 1217 1218 bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev) 1219 { 1220 struct mlx5_core_dev *out_mdev, *route_mdev; 1221 struct mlx5e_priv *out_priv, *route_priv; 1222 1223 out_priv = netdev_priv(out_dev); 1224 out_mdev = out_priv->mdev; 1225 route_priv = netdev_priv(route_dev); 1226 route_mdev = route_priv->mdev; 1227 1228 if (out_mdev->coredev_type != MLX5_COREDEV_PF || 1229 route_mdev->coredev_type != MLX5_COREDEV_VF) 1230 return false; 1231 1232 return same_hw_devs(out_priv, route_priv); 1233 } 1234 1235 int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport) 1236 { 1237 struct mlx5e_priv *out_priv, *route_priv; 1238 struct mlx5_core_dev *route_mdev; 1239 struct mlx5_eswitch *esw; 1240 u16 vhca_id; 1241 int err; 1242 1243 out_priv = netdev_priv(out_dev); 1244 esw = out_priv->mdev->priv.eswitch; 1245 route_priv = netdev_priv(route_dev); 1246 route_mdev = route_priv->mdev; 1247 1248 vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id); 1249 err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); 1250 return err; 1251 } 1252 1253 int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv, 1254 struct mlx5e_tc_flow_parse_attr *parse_attr, 1255 struct mlx5e_tc_flow *flow) 1256 { 1257 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &parse_attr->mod_hdr_acts; 1258 struct mlx5_modify_hdr *mod_hdr; 1259 1260 mod_hdr = mlx5_modify_header_alloc(priv->mdev, 1261 get_flow_name_space(flow), 1262 mod_hdr_acts->num_actions, 1263 mod_hdr_acts->actions); 1264 if (IS_ERR(mod_hdr)) 1265 return PTR_ERR(mod_hdr); 1266 1267 WARN_ON(flow->attr->modify_hdr); 1268 flow->attr->modify_hdr = mod_hdr; 1269 1270 return 0; 1271 } 1272 1273 static int 1274 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, 1275 struct mlx5e_tc_flow *flow, 1276 struct netlink_ext_ack *extack) 1277 { 1278 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1279 struct net_device *out_dev, *encap_dev = NULL; 1280 struct mlx5e_tc_flow_parse_attr *parse_attr; 1281 struct mlx5_flow_attr *attr = flow->attr; 1282 bool vf_tun = false, encap_valid = true; 1283 struct mlx5_esw_flow_attr *esw_attr; 1284 struct mlx5_fc *counter = NULL; 1285 struct mlx5e_rep_priv *rpriv; 1286 struct mlx5e_priv *out_priv; 1287 u32 max_prio, max_chain; 1288 int err = 0; 1289 int out_index; 1290 1291 /* We check chain range only for tc flows. 1292 * For ft flows, we checked attr->chain was originally 0 and set it to 1293 * FDB_FT_CHAIN which is outside tc range. 1294 * See mlx5e_rep_setup_ft_cb(). 1295 */ 1296 max_chain = mlx5_chains_get_chain_range(esw_chains(esw)); 1297 if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) { 1298 NL_SET_ERR_MSG_MOD(extack, 1299 "Requested chain is out of supported range"); 1300 err = -EOPNOTSUPP; 1301 goto err_out; 1302 } 1303 1304 max_prio = mlx5_chains_get_prio_range(esw_chains(esw)); 1305 if (attr->prio > max_prio) { 1306 NL_SET_ERR_MSG_MOD(extack, 1307 "Requested priority is out of supported range"); 1308 err = -EOPNOTSUPP; 1309 goto err_out; 1310 } 1311 1312 if (flow_flag_test(flow, TUN_RX)) { 1313 err = mlx5e_attach_decap_route(priv, flow); 1314 if (err) 1315 goto err_out; 1316 } 1317 1318 if (flow_flag_test(flow, L3_TO_L2_DECAP)) { 1319 err = mlx5e_attach_decap(priv, flow, extack); 1320 if (err) 1321 goto err_out; 1322 } 1323 1324 parse_attr = attr->parse_attr; 1325 esw_attr = attr->esw_attr; 1326 1327 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { 1328 int mirred_ifindex; 1329 1330 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) 1331 continue; 1332 1333 mirred_ifindex = parse_attr->mirred_ifindex[out_index]; 1334 out_dev = __dev_get_by_index(dev_net(priv->netdev), 1335 mirred_ifindex); 1336 err = mlx5e_attach_encap(priv, flow, out_dev, out_index, 1337 extack, &encap_dev, &encap_valid); 1338 if (err) 1339 goto err_out; 1340 1341 if (esw_attr->dests[out_index].flags & 1342 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) 1343 vf_tun = true; 1344 out_priv = netdev_priv(encap_dev); 1345 rpriv = out_priv->ppriv; 1346 esw_attr->dests[out_index].rep = rpriv->rep; 1347 esw_attr->dests[out_index].mdev = out_priv->mdev; 1348 } 1349 1350 err = mlx5_eswitch_add_vlan_action(esw, attr); 1351 if (err) 1352 goto err_out; 1353 1354 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && 1355 !(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) { 1356 if (vf_tun) { 1357 err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow); 1358 if (err) 1359 goto err_out; 1360 } else { 1361 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); 1362 if (err) 1363 goto err_out; 1364 } 1365 } 1366 1367 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { 1368 counter = mlx5_fc_create(esw_attr->counter_dev, true); 1369 if (IS_ERR(counter)) { 1370 err = PTR_ERR(counter); 1371 goto err_out; 1372 } 1373 1374 attr->counter = counter; 1375 } 1376 1377 /* we get here if one of the following takes place: 1378 * (1) there's no error 1379 * (2) there's an encap action and we don't have valid neigh 1380 */ 1381 if (!encap_valid) 1382 flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec); 1383 else 1384 flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); 1385 1386 if (IS_ERR(flow->rule[0])) { 1387 err = PTR_ERR(flow->rule[0]); 1388 goto err_out; 1389 } 1390 flow_flag_set(flow, OFFLOADED); 1391 1392 return 0; 1393 1394 err_out: 1395 flow_flag_set(flow, FAILED); 1396 return err; 1397 } 1398 1399 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) 1400 { 1401 struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec; 1402 void *headers_v = MLX5_ADDR_OF(fte_match_param, 1403 spec->match_value, 1404 misc_parameters_3); 1405 u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3, 1406 headers_v, 1407 geneve_tlv_option_0_data); 1408 1409 return !!geneve_tlv_opt_0_data; 1410 } 1411 1412 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, 1413 struct mlx5e_tc_flow *flow) 1414 { 1415 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1416 struct mlx5_flow_attr *attr = flow->attr; 1417 struct mlx5_esw_flow_attr *esw_attr; 1418 bool vf_tun = false; 1419 int out_index; 1420 1421 esw_attr = attr->esw_attr; 1422 mlx5e_put_flow_tunnel_id(flow); 1423 1424 if (flow_flag_test(flow, NOT_READY)) 1425 remove_unready_flow(flow); 1426 1427 if (mlx5e_is_offloaded_flow(flow)) { 1428 if (flow_flag_test(flow, SLOW)) 1429 mlx5e_tc_unoffload_from_slow_path(esw, flow); 1430 else 1431 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); 1432 } 1433 1434 if (mlx5_flow_has_geneve_opt(flow)) 1435 mlx5_geneve_tlv_option_del(priv->mdev->geneve); 1436 1437 mlx5_eswitch_del_vlan_action(esw, attr); 1438 1439 if (flow->decap_route) 1440 mlx5e_detach_decap_route(priv, flow); 1441 1442 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { 1443 if (esw_attr->dests[out_index].flags & 1444 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) 1445 vf_tun = true; 1446 if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) { 1447 mlx5e_detach_encap(priv, flow, out_index); 1448 kfree(attr->parse_attr->tun_info[out_index]); 1449 } 1450 } 1451 1452 mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr); 1453 1454 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 1455 dealloc_mod_hdr_actions(&attr->parse_attr->mod_hdr_acts); 1456 if (vf_tun && attr->modify_hdr) 1457 mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr); 1458 else 1459 mlx5e_detach_mod_hdr(priv, flow); 1460 } 1461 kvfree(attr->parse_attr); 1462 kvfree(attr->esw_attr->rx_tun_attr); 1463 1464 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) 1465 mlx5_fc_destroy(esw_attr->counter_dev, attr->counter); 1466 1467 if (flow_flag_test(flow, L3_TO_L2_DECAP)) 1468 mlx5e_detach_decap(priv, flow); 1469 1470 kfree(flow->attr); 1471 } 1472 1473 struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) 1474 { 1475 return flow->attr->counter; 1476 } 1477 1478 /* Iterate over tmp_list of flows attached to flow_list head. */ 1479 void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list) 1480 { 1481 struct mlx5e_tc_flow *flow, *tmp; 1482 1483 list_for_each_entry_safe(flow, tmp, flow_list, tmp_list) 1484 mlx5e_flow_put(priv, flow); 1485 } 1486 1487 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) 1488 { 1489 struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; 1490 1491 if (!flow_flag_test(flow, ESWITCH) || 1492 !flow_flag_test(flow, DUP)) 1493 return; 1494 1495 mutex_lock(&esw->offloads.peer_mutex); 1496 list_del(&flow->peer); 1497 mutex_unlock(&esw->offloads.peer_mutex); 1498 1499 flow_flag_clear(flow, DUP); 1500 1501 if (refcount_dec_and_test(&flow->peer_flow->refcnt)) { 1502 mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow); 1503 kfree(flow->peer_flow); 1504 } 1505 1506 flow->peer_flow = NULL; 1507 } 1508 1509 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) 1510 { 1511 struct mlx5_core_dev *dev = flow->priv->mdev; 1512 struct mlx5_devcom *devcom = dev->priv.devcom; 1513 struct mlx5_eswitch *peer_esw; 1514 1515 peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 1516 if (!peer_esw) 1517 return; 1518 1519 __mlx5e_tc_del_fdb_peer_flow(flow); 1520 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 1521 } 1522 1523 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, 1524 struct mlx5e_tc_flow *flow) 1525 { 1526 if (mlx5e_is_eswitch_flow(flow)) { 1527 mlx5e_tc_del_fdb_peer_flow(flow); 1528 mlx5e_tc_del_fdb_flow(priv, flow); 1529 } else { 1530 mlx5e_tc_del_nic_flow(priv, flow); 1531 } 1532 } 1533 1534 static int flow_has_tc_fwd_action(struct flow_cls_offload *f) 1535 { 1536 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 1537 struct flow_action *flow_action = &rule->action; 1538 const struct flow_action_entry *act; 1539 int i; 1540 1541 flow_action_for_each(i, act, flow_action) { 1542 switch (act->id) { 1543 case FLOW_ACTION_GOTO: 1544 return true; 1545 default: 1546 continue; 1547 } 1548 } 1549 1550 return false; 1551 } 1552 1553 static int 1554 enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv, 1555 struct flow_dissector_key_enc_opts *opts, 1556 struct netlink_ext_ack *extack, 1557 bool *dont_care) 1558 { 1559 struct geneve_opt *opt; 1560 int off = 0; 1561 1562 *dont_care = true; 1563 1564 while (opts->len > off) { 1565 opt = (struct geneve_opt *)&opts->data[off]; 1566 1567 if (!(*dont_care) || opt->opt_class || opt->type || 1568 memchr_inv(opt->opt_data, 0, opt->length * 4)) { 1569 *dont_care = false; 1570 1571 if (opt->opt_class != htons(U16_MAX) || 1572 opt->type != U8_MAX) { 1573 NL_SET_ERR_MSG(extack, 1574 "Partial match of tunnel options in chain > 0 isn't supported"); 1575 netdev_warn(priv->netdev, 1576 "Partial match of tunnel options in chain > 0 isn't supported"); 1577 return -EOPNOTSUPP; 1578 } 1579 } 1580 1581 off += sizeof(struct geneve_opt) + opt->length * 4; 1582 } 1583 1584 return 0; 1585 } 1586 1587 #define COPY_DISSECTOR(rule, diss_key, dst)\ 1588 ({ \ 1589 struct flow_rule *__rule = (rule);\ 1590 typeof(dst) __dst = dst;\ 1591 \ 1592 memcpy(__dst,\ 1593 skb_flow_dissector_target(__rule->match.dissector,\ 1594 diss_key,\ 1595 __rule->match.key),\ 1596 sizeof(*__dst));\ 1597 }) 1598 1599 static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv, 1600 struct mlx5e_tc_flow *flow, 1601 struct flow_cls_offload *f, 1602 struct net_device *filter_dev) 1603 { 1604 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 1605 struct netlink_ext_ack *extack = f->common.extack; 1606 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; 1607 struct flow_match_enc_opts enc_opts_match; 1608 struct tunnel_match_enc_opts tun_enc_opts; 1609 struct mlx5_rep_uplink_priv *uplink_priv; 1610 struct mlx5_flow_attr *attr = flow->attr; 1611 struct mlx5e_rep_priv *uplink_rpriv; 1612 struct tunnel_match_key tunnel_key; 1613 bool enc_opts_is_dont_care = true; 1614 u32 tun_id, enc_opts_id = 0; 1615 struct mlx5_eswitch *esw; 1616 u32 value, mask; 1617 int err; 1618 1619 esw = priv->mdev->priv.eswitch; 1620 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1621 uplink_priv = &uplink_rpriv->uplink_priv; 1622 1623 memset(&tunnel_key, 0, sizeof(tunnel_key)); 1624 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL, 1625 &tunnel_key.enc_control); 1626 if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) 1627 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, 1628 &tunnel_key.enc_ipv4); 1629 else 1630 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, 1631 &tunnel_key.enc_ipv6); 1632 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip); 1633 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS, 1634 &tunnel_key.enc_tp); 1635 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID, 1636 &tunnel_key.enc_key_id); 1637 tunnel_key.filter_ifindex = filter_dev->ifindex; 1638 1639 err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id); 1640 if (err) 1641 return err; 1642 1643 flow_rule_match_enc_opts(rule, &enc_opts_match); 1644 err = enc_opts_is_dont_care_or_full_match(priv, 1645 enc_opts_match.mask, 1646 extack, 1647 &enc_opts_is_dont_care); 1648 if (err) 1649 goto err_enc_opts; 1650 1651 if (!enc_opts_is_dont_care) { 1652 memset(&tun_enc_opts, 0, sizeof(tun_enc_opts)); 1653 memcpy(&tun_enc_opts.key, enc_opts_match.key, 1654 sizeof(*enc_opts_match.key)); 1655 memcpy(&tun_enc_opts.mask, enc_opts_match.mask, 1656 sizeof(*enc_opts_match.mask)); 1657 1658 err = mapping_add(uplink_priv->tunnel_enc_opts_mapping, 1659 &tun_enc_opts, &enc_opts_id); 1660 if (err) 1661 goto err_enc_opts; 1662 } 1663 1664 value = tun_id << ENC_OPTS_BITS | enc_opts_id; 1665 mask = enc_opts_id ? TUNNEL_ID_MASK : 1666 (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK); 1667 1668 if (attr->chain) { 1669 mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec, 1670 TUNNEL_TO_REG, value, mask); 1671 } else { 1672 mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; 1673 err = mlx5e_tc_match_to_reg_set(priv->mdev, 1674 mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB, 1675 TUNNEL_TO_REG, value); 1676 if (err) 1677 goto err_set; 1678 1679 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1680 } 1681 1682 flow->tunnel_id = value; 1683 return 0; 1684 1685 err_set: 1686 if (enc_opts_id) 1687 mapping_remove(uplink_priv->tunnel_enc_opts_mapping, 1688 enc_opts_id); 1689 err_enc_opts: 1690 mapping_remove(uplink_priv->tunnel_mapping, tun_id); 1691 return err; 1692 } 1693 1694 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow) 1695 { 1696 u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK; 1697 u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS; 1698 struct mlx5_rep_uplink_priv *uplink_priv; 1699 struct mlx5e_rep_priv *uplink_rpriv; 1700 struct mlx5_eswitch *esw; 1701 1702 esw = flow->priv->mdev->priv.eswitch; 1703 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1704 uplink_priv = &uplink_rpriv->uplink_priv; 1705 1706 if (tun_id) 1707 mapping_remove(uplink_priv->tunnel_mapping, tun_id); 1708 if (enc_opts_id) 1709 mapping_remove(uplink_priv->tunnel_enc_opts_mapping, 1710 enc_opts_id); 1711 } 1712 1713 u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow) 1714 { 1715 return flow->tunnel_id; 1716 } 1717 1718 void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev, 1719 struct flow_match_basic *match, bool outer, 1720 void *headers_c, void *headers_v) 1721 { 1722 bool ip_version_cap; 1723 1724 ip_version_cap = outer ? 1725 MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 1726 ft_field_support.outer_ip_version) : 1727 MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 1728 ft_field_support.inner_ip_version); 1729 1730 if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) && 1731 (match->key->n_proto == htons(ETH_P_IP) || 1732 match->key->n_proto == htons(ETH_P_IPV6))) { 1733 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version); 1734 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version, 1735 match->key->n_proto == htons(ETH_P_IP) ? 4 : 6); 1736 } else { 1737 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, 1738 ntohs(match->mask->n_proto)); 1739 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, 1740 ntohs(match->key->n_proto)); 1741 } 1742 } 1743 1744 u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer) 1745 { 1746 void *headers_v; 1747 u16 ethertype; 1748 u8 ip_version; 1749 1750 if (outer) 1751 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); 1752 else 1753 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers); 1754 1755 ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version); 1756 /* Return ip_version converted from ethertype anyway */ 1757 if (!ip_version) { 1758 ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); 1759 if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP) 1760 ip_version = 4; 1761 else if (ethertype == ETH_P_IPV6) 1762 ip_version = 6; 1763 } 1764 return ip_version; 1765 } 1766 1767 static int parse_tunnel_attr(struct mlx5e_priv *priv, 1768 struct mlx5e_tc_flow *flow, 1769 struct mlx5_flow_spec *spec, 1770 struct flow_cls_offload *f, 1771 struct net_device *filter_dev, 1772 u8 *match_level, 1773 bool *match_inner) 1774 { 1775 struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev); 1776 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1777 struct netlink_ext_ack *extack = f->common.extack; 1778 bool needs_mapping, sets_mapping; 1779 int err; 1780 1781 if (!mlx5e_is_eswitch_flow(flow)) 1782 return -EOPNOTSUPP; 1783 1784 needs_mapping = !!flow->attr->chain; 1785 sets_mapping = !flow->attr->chain && flow_has_tc_fwd_action(f); 1786 *match_inner = !needs_mapping; 1787 1788 if ((needs_mapping || sets_mapping) && 1789 !mlx5_eswitch_reg_c1_loopback_enabled(esw)) { 1790 NL_SET_ERR_MSG(extack, 1791 "Chains on tunnel devices isn't supported without register loopback support"); 1792 netdev_warn(priv->netdev, 1793 "Chains on tunnel devices isn't supported without register loopback support"); 1794 return -EOPNOTSUPP; 1795 } 1796 1797 if (!flow->attr->chain) { 1798 err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, 1799 match_level); 1800 if (err) { 1801 NL_SET_ERR_MSG_MOD(extack, 1802 "Failed to parse tunnel attributes"); 1803 netdev_warn(priv->netdev, 1804 "Failed to parse tunnel attributes"); 1805 return err; 1806 } 1807 1808 /* With mpls over udp we decapsulate using packet reformat 1809 * object 1810 */ 1811 if (!netif_is_bareudp(filter_dev)) 1812 flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; 1813 err = mlx5e_tc_set_attr_rx_tun(flow, spec); 1814 if (err) 1815 return err; 1816 } else if (tunnel && tunnel->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { 1817 struct mlx5_flow_spec *tmp_spec; 1818 1819 tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL); 1820 if (!tmp_spec) { 1821 NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for vxlan tmp spec"); 1822 netdev_warn(priv->netdev, "Failed to allocate memory for vxlan tmp spec"); 1823 return -ENOMEM; 1824 } 1825 memcpy(tmp_spec, spec, sizeof(*tmp_spec)); 1826 1827 err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level); 1828 if (err) { 1829 kvfree(tmp_spec); 1830 NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes"); 1831 netdev_warn(priv->netdev, "Failed to parse tunnel attributes"); 1832 return err; 1833 } 1834 err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec); 1835 kvfree(tmp_spec); 1836 if (err) 1837 return err; 1838 } 1839 1840 if (!needs_mapping && !sets_mapping) 1841 return 0; 1842 1843 return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev); 1844 } 1845 1846 static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec) 1847 { 1848 return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 1849 inner_headers); 1850 } 1851 1852 static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec) 1853 { 1854 return MLX5_ADDR_OF(fte_match_param, spec->match_value, 1855 inner_headers); 1856 } 1857 1858 static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec) 1859 { 1860 return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 1861 outer_headers); 1862 } 1863 1864 static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec) 1865 { 1866 return MLX5_ADDR_OF(fte_match_param, spec->match_value, 1867 outer_headers); 1868 } 1869 1870 static void *get_match_headers_value(u32 flags, 1871 struct mlx5_flow_spec *spec) 1872 { 1873 return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? 1874 get_match_inner_headers_value(spec) : 1875 get_match_outer_headers_value(spec); 1876 } 1877 1878 static void *get_match_headers_criteria(u32 flags, 1879 struct mlx5_flow_spec *spec) 1880 { 1881 return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? 1882 get_match_inner_headers_criteria(spec) : 1883 get_match_outer_headers_criteria(spec); 1884 } 1885 1886 static int mlx5e_flower_parse_meta(struct net_device *filter_dev, 1887 struct flow_cls_offload *f) 1888 { 1889 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 1890 struct netlink_ext_ack *extack = f->common.extack; 1891 struct net_device *ingress_dev; 1892 struct flow_match_meta match; 1893 1894 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) 1895 return 0; 1896 1897 flow_rule_match_meta(rule, &match); 1898 if (match.mask->ingress_ifindex != 0xFFFFFFFF) { 1899 NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask"); 1900 return -EOPNOTSUPP; 1901 } 1902 1903 ingress_dev = __dev_get_by_index(dev_net(filter_dev), 1904 match.key->ingress_ifindex); 1905 if (!ingress_dev) { 1906 NL_SET_ERR_MSG_MOD(extack, 1907 "Can't find the ingress port to match on"); 1908 return -ENOENT; 1909 } 1910 1911 if (ingress_dev != filter_dev) { 1912 NL_SET_ERR_MSG_MOD(extack, 1913 "Can't match on the ingress filter port"); 1914 return -EOPNOTSUPP; 1915 } 1916 1917 return 0; 1918 } 1919 1920 static bool skip_key_basic(struct net_device *filter_dev, 1921 struct flow_cls_offload *f) 1922 { 1923 /* When doing mpls over udp decap, the user needs to provide 1924 * MPLS_UC as the protocol in order to be able to match on mpls 1925 * label fields. However, the actual ethertype is IP so we want to 1926 * avoid matching on this, otherwise we'll fail the match. 1927 */ 1928 if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0) 1929 return true; 1930 1931 return false; 1932 } 1933 1934 static int __parse_cls_flower(struct mlx5e_priv *priv, 1935 struct mlx5e_tc_flow *flow, 1936 struct mlx5_flow_spec *spec, 1937 struct flow_cls_offload *f, 1938 struct net_device *filter_dev, 1939 u8 *inner_match_level, u8 *outer_match_level) 1940 { 1941 struct netlink_ext_ack *extack = f->common.extack; 1942 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 1943 outer_headers); 1944 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, 1945 outer_headers); 1946 void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 1947 misc_parameters); 1948 void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, 1949 misc_parameters); 1950 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 1951 struct flow_dissector *dissector = rule->match.dissector; 1952 u16 addr_type = 0; 1953 u8 ip_proto = 0; 1954 u8 *match_level; 1955 int err; 1956 1957 match_level = outer_match_level; 1958 1959 if (dissector->used_keys & 1960 ~(BIT(FLOW_DISSECTOR_KEY_META) | 1961 BIT(FLOW_DISSECTOR_KEY_CONTROL) | 1962 BIT(FLOW_DISSECTOR_KEY_BASIC) | 1963 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | 1964 BIT(FLOW_DISSECTOR_KEY_VLAN) | 1965 BIT(FLOW_DISSECTOR_KEY_CVLAN) | 1966 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | 1967 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | 1968 BIT(FLOW_DISSECTOR_KEY_PORTS) | 1969 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | 1970 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | 1971 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | 1972 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | 1973 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | 1974 BIT(FLOW_DISSECTOR_KEY_TCP) | 1975 BIT(FLOW_DISSECTOR_KEY_IP) | 1976 BIT(FLOW_DISSECTOR_KEY_CT) | 1977 BIT(FLOW_DISSECTOR_KEY_ENC_IP) | 1978 BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | 1979 BIT(FLOW_DISSECTOR_KEY_MPLS))) { 1980 NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); 1981 netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n", 1982 dissector->used_keys); 1983 return -EOPNOTSUPP; 1984 } 1985 1986 if (mlx5e_get_tc_tun(filter_dev)) { 1987 bool match_inner = false; 1988 1989 err = parse_tunnel_attr(priv, flow, spec, f, filter_dev, 1990 outer_match_level, &match_inner); 1991 if (err) 1992 return err; 1993 1994 if (match_inner) { 1995 /* header pointers should point to the inner headers 1996 * if the packet was decapsulated already. 1997 * outer headers are set by parse_tunnel_attr. 1998 */ 1999 match_level = inner_match_level; 2000 headers_c = get_match_inner_headers_criteria(spec); 2001 headers_v = get_match_inner_headers_value(spec); 2002 } 2003 } 2004 2005 err = mlx5e_flower_parse_meta(filter_dev, f); 2006 if (err) 2007 return err; 2008 2009 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) && 2010 !skip_key_basic(filter_dev, f)) { 2011 struct flow_match_basic match; 2012 2013 flow_rule_match_basic(rule, &match); 2014 mlx5e_tc_set_ethertype(priv->mdev, &match, 2015 match_level == outer_match_level, 2016 headers_c, headers_v); 2017 2018 if (match.mask->n_proto) 2019 *match_level = MLX5_MATCH_L2; 2020 } 2021 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) || 2022 is_vlan_dev(filter_dev)) { 2023 struct flow_dissector_key_vlan filter_dev_mask; 2024 struct flow_dissector_key_vlan filter_dev_key; 2025 struct flow_match_vlan match; 2026 2027 if (is_vlan_dev(filter_dev)) { 2028 match.key = &filter_dev_key; 2029 match.key->vlan_id = vlan_dev_vlan_id(filter_dev); 2030 match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev); 2031 match.key->vlan_priority = 0; 2032 match.mask = &filter_dev_mask; 2033 memset(match.mask, 0xff, sizeof(*match.mask)); 2034 match.mask->vlan_priority = 0; 2035 } else { 2036 flow_rule_match_vlan(rule, &match); 2037 } 2038 if (match.mask->vlan_id || 2039 match.mask->vlan_priority || 2040 match.mask->vlan_tpid) { 2041 if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { 2042 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2043 svlan_tag, 1); 2044 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2045 svlan_tag, 1); 2046 } else { 2047 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2048 cvlan_tag, 1); 2049 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2050 cvlan_tag, 1); 2051 } 2052 2053 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, 2054 match.mask->vlan_id); 2055 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, 2056 match.key->vlan_id); 2057 2058 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, 2059 match.mask->vlan_priority); 2060 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, 2061 match.key->vlan_priority); 2062 2063 *match_level = MLX5_MATCH_L2; 2064 } 2065 } else if (*match_level != MLX5_MATCH_NONE) { 2066 /* cvlan_tag enabled in match criteria and 2067 * disabled in match value means both S & C tags 2068 * don't exist (untagged of both) 2069 */ 2070 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); 2071 *match_level = MLX5_MATCH_L2; 2072 } 2073 2074 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { 2075 struct flow_match_vlan match; 2076 2077 flow_rule_match_cvlan(rule, &match); 2078 if (match.mask->vlan_id || 2079 match.mask->vlan_priority || 2080 match.mask->vlan_tpid) { 2081 if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { 2082 MLX5_SET(fte_match_set_misc, misc_c, 2083 outer_second_svlan_tag, 1); 2084 MLX5_SET(fte_match_set_misc, misc_v, 2085 outer_second_svlan_tag, 1); 2086 } else { 2087 MLX5_SET(fte_match_set_misc, misc_c, 2088 outer_second_cvlan_tag, 1); 2089 MLX5_SET(fte_match_set_misc, misc_v, 2090 outer_second_cvlan_tag, 1); 2091 } 2092 2093 MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid, 2094 match.mask->vlan_id); 2095 MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid, 2096 match.key->vlan_id); 2097 MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio, 2098 match.mask->vlan_priority); 2099 MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio, 2100 match.key->vlan_priority); 2101 2102 *match_level = MLX5_MATCH_L2; 2103 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; 2104 } 2105 } 2106 2107 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { 2108 struct flow_match_eth_addrs match; 2109 2110 flow_rule_match_eth_addrs(rule, &match); 2111 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2112 dmac_47_16), 2113 match.mask->dst); 2114 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2115 dmac_47_16), 2116 match.key->dst); 2117 2118 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2119 smac_47_16), 2120 match.mask->src); 2121 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2122 smac_47_16), 2123 match.key->src); 2124 2125 if (!is_zero_ether_addr(match.mask->src) || 2126 !is_zero_ether_addr(match.mask->dst)) 2127 *match_level = MLX5_MATCH_L2; 2128 } 2129 2130 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { 2131 struct flow_match_control match; 2132 2133 flow_rule_match_control(rule, &match); 2134 addr_type = match.key->addr_type; 2135 2136 /* the HW doesn't support frag first/later */ 2137 if (match.mask->flags & FLOW_DIS_FIRST_FRAG) 2138 return -EOPNOTSUPP; 2139 2140 if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) { 2141 MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); 2142 MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 2143 match.key->flags & FLOW_DIS_IS_FRAGMENT); 2144 2145 /* the HW doesn't need L3 inline to match on frag=no */ 2146 if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT)) 2147 *match_level = MLX5_MATCH_L2; 2148 /* *** L2 attributes parsing up to here *** */ 2149 else 2150 *match_level = MLX5_MATCH_L3; 2151 } 2152 } 2153 2154 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { 2155 struct flow_match_basic match; 2156 2157 flow_rule_match_basic(rule, &match); 2158 ip_proto = match.key->ip_proto; 2159 2160 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 2161 match.mask->ip_proto); 2162 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, 2163 match.key->ip_proto); 2164 2165 if (match.mask->ip_proto) 2166 *match_level = MLX5_MATCH_L3; 2167 } 2168 2169 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 2170 struct flow_match_ipv4_addrs match; 2171 2172 flow_rule_match_ipv4_addrs(rule, &match); 2173 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2174 src_ipv4_src_ipv6.ipv4_layout.ipv4), 2175 &match.mask->src, sizeof(match.mask->src)); 2176 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2177 src_ipv4_src_ipv6.ipv4_layout.ipv4), 2178 &match.key->src, sizeof(match.key->src)); 2179 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2180 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 2181 &match.mask->dst, sizeof(match.mask->dst)); 2182 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2183 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 2184 &match.key->dst, sizeof(match.key->dst)); 2185 2186 if (match.mask->src || match.mask->dst) 2187 *match_level = MLX5_MATCH_L3; 2188 } 2189 2190 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 2191 struct flow_match_ipv6_addrs match; 2192 2193 flow_rule_match_ipv6_addrs(rule, &match); 2194 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2195 src_ipv4_src_ipv6.ipv6_layout.ipv6), 2196 &match.mask->src, sizeof(match.mask->src)); 2197 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2198 src_ipv4_src_ipv6.ipv6_layout.ipv6), 2199 &match.key->src, sizeof(match.key->src)); 2200 2201 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2202 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 2203 &match.mask->dst, sizeof(match.mask->dst)); 2204 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2205 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 2206 &match.key->dst, sizeof(match.key->dst)); 2207 2208 if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY || 2209 ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY) 2210 *match_level = MLX5_MATCH_L3; 2211 } 2212 2213 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { 2214 struct flow_match_ip match; 2215 2216 flow_rule_match_ip(rule, &match); 2217 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, 2218 match.mask->tos & 0x3); 2219 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, 2220 match.key->tos & 0x3); 2221 2222 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, 2223 match.mask->tos >> 2); 2224 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, 2225 match.key->tos >> 2); 2226 2227 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, 2228 match.mask->ttl); 2229 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, 2230 match.key->ttl); 2231 2232 if (match.mask->ttl && 2233 !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, 2234 ft_field_support.outer_ipv4_ttl)) { 2235 NL_SET_ERR_MSG_MOD(extack, 2236 "Matching on TTL is not supported"); 2237 return -EOPNOTSUPP; 2238 } 2239 2240 if (match.mask->tos || match.mask->ttl) 2241 *match_level = MLX5_MATCH_L3; 2242 } 2243 2244 /* *** L3 attributes parsing up to here *** */ 2245 2246 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { 2247 struct flow_match_ports match; 2248 2249 flow_rule_match_ports(rule, &match); 2250 switch (ip_proto) { 2251 case IPPROTO_TCP: 2252 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2253 tcp_sport, ntohs(match.mask->src)); 2254 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2255 tcp_sport, ntohs(match.key->src)); 2256 2257 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2258 tcp_dport, ntohs(match.mask->dst)); 2259 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2260 tcp_dport, ntohs(match.key->dst)); 2261 break; 2262 2263 case IPPROTO_UDP: 2264 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2265 udp_sport, ntohs(match.mask->src)); 2266 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2267 udp_sport, ntohs(match.key->src)); 2268 2269 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2270 udp_dport, ntohs(match.mask->dst)); 2271 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2272 udp_dport, ntohs(match.key->dst)); 2273 break; 2274 default: 2275 NL_SET_ERR_MSG_MOD(extack, 2276 "Only UDP and TCP transports are supported for L4 matching"); 2277 netdev_err(priv->netdev, 2278 "Only UDP and TCP transport are supported\n"); 2279 return -EINVAL; 2280 } 2281 2282 if (match.mask->src || match.mask->dst) 2283 *match_level = MLX5_MATCH_L4; 2284 } 2285 2286 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { 2287 struct flow_match_tcp match; 2288 2289 flow_rule_match_tcp(rule, &match); 2290 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, 2291 ntohs(match.mask->flags)); 2292 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, 2293 ntohs(match.key->flags)); 2294 2295 if (match.mask->flags) 2296 *match_level = MLX5_MATCH_L4; 2297 } 2298 2299 return 0; 2300 } 2301 2302 static int parse_cls_flower(struct mlx5e_priv *priv, 2303 struct mlx5e_tc_flow *flow, 2304 struct mlx5_flow_spec *spec, 2305 struct flow_cls_offload *f, 2306 struct net_device *filter_dev) 2307 { 2308 u8 inner_match_level, outer_match_level, non_tunnel_match_level; 2309 struct netlink_ext_ack *extack = f->common.extack; 2310 struct mlx5_core_dev *dev = priv->mdev; 2311 struct mlx5_eswitch *esw = dev->priv.eswitch; 2312 struct mlx5e_rep_priv *rpriv = priv->ppriv; 2313 struct mlx5_eswitch_rep *rep; 2314 bool is_eswitch_flow; 2315 int err; 2316 2317 inner_match_level = MLX5_MATCH_NONE; 2318 outer_match_level = MLX5_MATCH_NONE; 2319 2320 err = __parse_cls_flower(priv, flow, spec, f, filter_dev, 2321 &inner_match_level, &outer_match_level); 2322 non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ? 2323 outer_match_level : inner_match_level; 2324 2325 is_eswitch_flow = mlx5e_is_eswitch_flow(flow); 2326 if (!err && is_eswitch_flow) { 2327 rep = rpriv->rep; 2328 if (rep->vport != MLX5_VPORT_UPLINK && 2329 (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && 2330 esw->offloads.inline_mode < non_tunnel_match_level)) { 2331 NL_SET_ERR_MSG_MOD(extack, 2332 "Flow is not offloaded due to min inline setting"); 2333 netdev_warn(priv->netdev, 2334 "Flow is not offloaded due to min inline setting, required %d actual %d\n", 2335 non_tunnel_match_level, esw->offloads.inline_mode); 2336 return -EOPNOTSUPP; 2337 } 2338 } 2339 2340 flow->attr->inner_match_level = inner_match_level; 2341 flow->attr->outer_match_level = outer_match_level; 2342 2343 2344 return err; 2345 } 2346 2347 struct pedit_headers { 2348 struct ethhdr eth; 2349 struct vlan_hdr vlan; 2350 struct iphdr ip4; 2351 struct ipv6hdr ip6; 2352 struct tcphdr tcp; 2353 struct udphdr udp; 2354 }; 2355 2356 struct pedit_headers_action { 2357 struct pedit_headers vals; 2358 struct pedit_headers masks; 2359 u32 pedits; 2360 }; 2361 2362 static int pedit_header_offsets[] = { 2363 [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth), 2364 [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4), 2365 [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6), 2366 [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp), 2367 [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp), 2368 }; 2369 2370 #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype]) 2371 2372 static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset, 2373 struct pedit_headers_action *hdrs) 2374 { 2375 u32 *curr_pmask, *curr_pval; 2376 2377 curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset); 2378 curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset); 2379 2380 if (*curr_pmask & mask) /* disallow acting twice on the same location */ 2381 goto out_err; 2382 2383 *curr_pmask |= mask; 2384 *curr_pval |= (val & mask); 2385 2386 return 0; 2387 2388 out_err: 2389 return -EOPNOTSUPP; 2390 } 2391 2392 struct mlx5_fields { 2393 u8 field; 2394 u8 field_bsize; 2395 u32 field_mask; 2396 u32 offset; 2397 u32 match_offset; 2398 }; 2399 2400 #define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \ 2401 {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \ 2402 offsetof(struct pedit_headers, field) + (off), \ 2403 MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)} 2404 2405 /* masked values are the same and there are no rewrites that do not have a 2406 * match. 2407 */ 2408 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \ 2409 type matchmaskx = *(type *)(matchmaskp); \ 2410 type matchvalx = *(type *)(matchvalp); \ 2411 type maskx = *(type *)(maskp); \ 2412 type valx = *(type *)(valp); \ 2413 \ 2414 (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \ 2415 matchmaskx)); \ 2416 }) 2417 2418 static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp, 2419 void *matchmaskp, u8 bsize) 2420 { 2421 bool same = false; 2422 2423 switch (bsize) { 2424 case 8: 2425 same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp); 2426 break; 2427 case 16: 2428 same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp); 2429 break; 2430 case 32: 2431 same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp); 2432 break; 2433 } 2434 2435 return same; 2436 } 2437 2438 static struct mlx5_fields fields[] = { 2439 OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16), 2440 OFFLOAD(DMAC_15_0, 16, U16_MAX, eth.h_dest[4], 0, dmac_15_0), 2441 OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16), 2442 OFFLOAD(SMAC_15_0, 16, U16_MAX, eth.h_source[4], 0, smac_15_0), 2443 OFFLOAD(ETHERTYPE, 16, U16_MAX, eth.h_proto, 0, ethertype), 2444 OFFLOAD(FIRST_VID, 16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid), 2445 2446 OFFLOAD(IP_DSCP, 8, 0xfc, ip4.tos, 0, ip_dscp), 2447 OFFLOAD(IP_TTL, 8, U8_MAX, ip4.ttl, 0, ttl_hoplimit), 2448 OFFLOAD(SIPV4, 32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4), 2449 OFFLOAD(DIPV4, 32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 2450 2451 OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0, 2452 src_ipv4_src_ipv6.ipv6_layout.ipv6[0]), 2453 OFFLOAD(SIPV6_95_64, 32, U32_MAX, ip6.saddr.s6_addr32[1], 0, 2454 src_ipv4_src_ipv6.ipv6_layout.ipv6[4]), 2455 OFFLOAD(SIPV6_63_32, 32, U32_MAX, ip6.saddr.s6_addr32[2], 0, 2456 src_ipv4_src_ipv6.ipv6_layout.ipv6[8]), 2457 OFFLOAD(SIPV6_31_0, 32, U32_MAX, ip6.saddr.s6_addr32[3], 0, 2458 src_ipv4_src_ipv6.ipv6_layout.ipv6[12]), 2459 OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0, 2460 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]), 2461 OFFLOAD(DIPV6_95_64, 32, U32_MAX, ip6.daddr.s6_addr32[1], 0, 2462 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]), 2463 OFFLOAD(DIPV6_63_32, 32, U32_MAX, ip6.daddr.s6_addr32[2], 0, 2464 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]), 2465 OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0, 2466 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]), 2467 OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit), 2468 OFFLOAD(IP_DSCP, 16, 0xc00f, ip6, 0, ip_dscp), 2469 2470 OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport), 2471 OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport), 2472 /* in linux iphdr tcp_flags is 8 bits long */ 2473 OFFLOAD(TCP_FLAGS, 8, U8_MAX, tcp.ack_seq, 5, tcp_flags), 2474 2475 OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport), 2476 OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport), 2477 }; 2478 2479 static unsigned long mask_to_le(unsigned long mask, int size) 2480 { 2481 __be32 mask_be32; 2482 __be16 mask_be16; 2483 2484 if (size == 32) { 2485 mask_be32 = (__force __be32)(mask); 2486 mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32)); 2487 } else if (size == 16) { 2488 mask_be32 = (__force __be32)(mask); 2489 mask_be16 = *(__be16 *)&mask_be32; 2490 mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16)); 2491 } 2492 2493 return mask; 2494 } 2495 static int offload_pedit_fields(struct mlx5e_priv *priv, 2496 int namespace, 2497 struct pedit_headers_action *hdrs, 2498 struct mlx5e_tc_flow_parse_attr *parse_attr, 2499 u32 *action_flags, 2500 struct netlink_ext_ack *extack) 2501 { 2502 struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; 2503 int i, action_size, first, last, next_z; 2504 void *headers_c, *headers_v, *action, *vals_p; 2505 u32 *s_masks_p, *a_masks_p, s_mask, a_mask; 2506 struct mlx5e_tc_mod_hdr_acts *mod_acts; 2507 struct mlx5_fields *f; 2508 unsigned long mask, field_mask; 2509 int err; 2510 u8 cmd; 2511 2512 mod_acts = &parse_attr->mod_hdr_acts; 2513 headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec); 2514 headers_v = get_match_headers_value(*action_flags, &parse_attr->spec); 2515 2516 set_masks = &hdrs[0].masks; 2517 add_masks = &hdrs[1].masks; 2518 set_vals = &hdrs[0].vals; 2519 add_vals = &hdrs[1].vals; 2520 2521 action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto); 2522 2523 for (i = 0; i < ARRAY_SIZE(fields); i++) { 2524 bool skip; 2525 2526 f = &fields[i]; 2527 /* avoid seeing bits set from previous iterations */ 2528 s_mask = 0; 2529 a_mask = 0; 2530 2531 s_masks_p = (void *)set_masks + f->offset; 2532 a_masks_p = (void *)add_masks + f->offset; 2533 2534 s_mask = *s_masks_p & f->field_mask; 2535 a_mask = *a_masks_p & f->field_mask; 2536 2537 if (!s_mask && !a_mask) /* nothing to offload here */ 2538 continue; 2539 2540 if (s_mask && a_mask) { 2541 NL_SET_ERR_MSG_MOD(extack, 2542 "can't set and add to the same HW field"); 2543 printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field); 2544 return -EOPNOTSUPP; 2545 } 2546 2547 skip = false; 2548 if (s_mask) { 2549 void *match_mask = headers_c + f->match_offset; 2550 void *match_val = headers_v + f->match_offset; 2551 2552 cmd = MLX5_ACTION_TYPE_SET; 2553 mask = s_mask; 2554 vals_p = (void *)set_vals + f->offset; 2555 /* don't rewrite if we have a match on the same value */ 2556 if (cmp_val_mask(vals_p, s_masks_p, match_val, 2557 match_mask, f->field_bsize)) 2558 skip = true; 2559 /* clear to denote we consumed this field */ 2560 *s_masks_p &= ~f->field_mask; 2561 } else { 2562 cmd = MLX5_ACTION_TYPE_ADD; 2563 mask = a_mask; 2564 vals_p = (void *)add_vals + f->offset; 2565 /* add 0 is no change */ 2566 if ((*(u32 *)vals_p & f->field_mask) == 0) 2567 skip = true; 2568 /* clear to denote we consumed this field */ 2569 *a_masks_p &= ~f->field_mask; 2570 } 2571 if (skip) 2572 continue; 2573 2574 mask = mask_to_le(mask, f->field_bsize); 2575 2576 first = find_first_bit(&mask, f->field_bsize); 2577 next_z = find_next_zero_bit(&mask, f->field_bsize, first); 2578 last = find_last_bit(&mask, f->field_bsize); 2579 if (first < next_z && next_z < last) { 2580 NL_SET_ERR_MSG_MOD(extack, 2581 "rewrite of few sub-fields isn't supported"); 2582 printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n", 2583 mask); 2584 return -EOPNOTSUPP; 2585 } 2586 2587 err = alloc_mod_hdr_actions(priv->mdev, namespace, mod_acts); 2588 if (err) { 2589 NL_SET_ERR_MSG_MOD(extack, 2590 "too many pedit actions, can't offload"); 2591 mlx5_core_warn(priv->mdev, 2592 "mlx5: parsed %d pedit actions, can't do more\n", 2593 mod_acts->num_actions); 2594 return err; 2595 } 2596 2597 action = mod_acts->actions + 2598 (mod_acts->num_actions * action_size); 2599 MLX5_SET(set_action_in, action, action_type, cmd); 2600 MLX5_SET(set_action_in, action, field, f->field); 2601 2602 if (cmd == MLX5_ACTION_TYPE_SET) { 2603 int start; 2604 2605 field_mask = mask_to_le(f->field_mask, f->field_bsize); 2606 2607 /* if field is bit sized it can start not from first bit */ 2608 start = find_first_bit(&field_mask, f->field_bsize); 2609 2610 MLX5_SET(set_action_in, action, offset, first - start); 2611 /* length is num of bits to be written, zero means length of 32 */ 2612 MLX5_SET(set_action_in, action, length, (last - first + 1)); 2613 } 2614 2615 if (f->field_bsize == 32) 2616 MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first); 2617 else if (f->field_bsize == 16) 2618 MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first); 2619 else if (f->field_bsize == 8) 2620 MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first); 2621 2622 ++mod_acts->num_actions; 2623 } 2624 2625 return 0; 2626 } 2627 2628 static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev, 2629 int namespace) 2630 { 2631 if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */ 2632 return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions); 2633 else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */ 2634 return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions); 2635 } 2636 2637 int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev, 2638 int namespace, 2639 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) 2640 { 2641 int action_size, new_num_actions, max_hw_actions; 2642 size_t new_sz, old_sz; 2643 void *ret; 2644 2645 if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions) 2646 return 0; 2647 2648 action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto); 2649 2650 max_hw_actions = mlx5e_flow_namespace_max_modify_action(mdev, 2651 namespace); 2652 new_num_actions = min(max_hw_actions, 2653 mod_hdr_acts->actions ? 2654 mod_hdr_acts->max_actions * 2 : 1); 2655 if (mod_hdr_acts->max_actions == new_num_actions) 2656 return -ENOSPC; 2657 2658 new_sz = action_size * new_num_actions; 2659 old_sz = mod_hdr_acts->max_actions * action_size; 2660 ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL); 2661 if (!ret) 2662 return -ENOMEM; 2663 2664 memset(ret + old_sz, 0, new_sz - old_sz); 2665 mod_hdr_acts->actions = ret; 2666 mod_hdr_acts->max_actions = new_num_actions; 2667 2668 return 0; 2669 } 2670 2671 void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) 2672 { 2673 kfree(mod_hdr_acts->actions); 2674 mod_hdr_acts->actions = NULL; 2675 mod_hdr_acts->num_actions = 0; 2676 mod_hdr_acts->max_actions = 0; 2677 } 2678 2679 static const struct pedit_headers zero_masks = {}; 2680 2681 static int 2682 parse_pedit_to_modify_hdr(struct mlx5e_priv *priv, 2683 const struct flow_action_entry *act, int namespace, 2684 struct mlx5e_tc_flow_parse_attr *parse_attr, 2685 struct pedit_headers_action *hdrs, 2686 struct netlink_ext_ack *extack) 2687 { 2688 u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1; 2689 int err = -EOPNOTSUPP; 2690 u32 mask, val, offset; 2691 u8 htype; 2692 2693 htype = act->mangle.htype; 2694 err = -EOPNOTSUPP; /* can't be all optimistic */ 2695 2696 if (htype == FLOW_ACT_MANGLE_UNSPEC) { 2697 NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded"); 2698 goto out_err; 2699 } 2700 2701 if (!mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace)) { 2702 NL_SET_ERR_MSG_MOD(extack, 2703 "The pedit offload action is not supported"); 2704 goto out_err; 2705 } 2706 2707 mask = act->mangle.mask; 2708 val = act->mangle.val; 2709 offset = act->mangle.offset; 2710 2711 err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]); 2712 if (err) 2713 goto out_err; 2714 2715 hdrs[cmd].pedits++; 2716 2717 return 0; 2718 out_err: 2719 return err; 2720 } 2721 2722 static int 2723 parse_pedit_to_reformat(struct mlx5e_priv *priv, 2724 const struct flow_action_entry *act, 2725 struct mlx5e_tc_flow_parse_attr *parse_attr, 2726 struct netlink_ext_ack *extack) 2727 { 2728 u32 mask, val, offset; 2729 u32 *p; 2730 2731 if (act->id != FLOW_ACTION_MANGLE) 2732 return -EOPNOTSUPP; 2733 2734 if (act->mangle.htype != FLOW_ACT_MANGLE_HDR_TYPE_ETH) { 2735 NL_SET_ERR_MSG_MOD(extack, "Only Ethernet modification is supported"); 2736 return -EOPNOTSUPP; 2737 } 2738 2739 mask = ~act->mangle.mask; 2740 val = act->mangle.val; 2741 offset = act->mangle.offset; 2742 p = (u32 *)&parse_attr->eth; 2743 *(p + (offset >> 2)) |= (val & mask); 2744 2745 return 0; 2746 } 2747 2748 static int parse_tc_pedit_action(struct mlx5e_priv *priv, 2749 const struct flow_action_entry *act, int namespace, 2750 struct mlx5e_tc_flow_parse_attr *parse_attr, 2751 struct pedit_headers_action *hdrs, 2752 struct mlx5e_tc_flow *flow, 2753 struct netlink_ext_ack *extack) 2754 { 2755 if (flow && flow_flag_test(flow, L3_TO_L2_DECAP)) 2756 return parse_pedit_to_reformat(priv, act, parse_attr, extack); 2757 2758 return parse_pedit_to_modify_hdr(priv, act, namespace, 2759 parse_attr, hdrs, extack); 2760 } 2761 2762 static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, 2763 struct mlx5e_tc_flow_parse_attr *parse_attr, 2764 struct pedit_headers_action *hdrs, 2765 u32 *action_flags, 2766 struct netlink_ext_ack *extack) 2767 { 2768 struct pedit_headers *cmd_masks; 2769 int err; 2770 u8 cmd; 2771 2772 err = offload_pedit_fields(priv, namespace, hdrs, parse_attr, 2773 action_flags, extack); 2774 if (err < 0) 2775 goto out_dealloc_parsed_actions; 2776 2777 for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) { 2778 cmd_masks = &hdrs[cmd].masks; 2779 if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) { 2780 NL_SET_ERR_MSG_MOD(extack, 2781 "attempt to offload an unsupported field"); 2782 netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd); 2783 print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS, 2784 16, 1, cmd_masks, sizeof(zero_masks), true); 2785 err = -EOPNOTSUPP; 2786 goto out_dealloc_parsed_actions; 2787 } 2788 } 2789 2790 return 0; 2791 2792 out_dealloc_parsed_actions: 2793 dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); 2794 return err; 2795 } 2796 2797 static bool csum_offload_supported(struct mlx5e_priv *priv, 2798 u32 action, 2799 u32 update_flags, 2800 struct netlink_ext_ack *extack) 2801 { 2802 u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP | 2803 TCA_CSUM_UPDATE_FLAG_UDP; 2804 2805 /* The HW recalcs checksums only if re-writing headers */ 2806 if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) { 2807 NL_SET_ERR_MSG_MOD(extack, 2808 "TC csum action is only offloaded with pedit"); 2809 netdev_warn(priv->netdev, 2810 "TC csum action is only offloaded with pedit\n"); 2811 return false; 2812 } 2813 2814 if (update_flags & ~prot_flags) { 2815 NL_SET_ERR_MSG_MOD(extack, 2816 "can't offload TC csum action for some header/s"); 2817 netdev_warn(priv->netdev, 2818 "can't offload TC csum action for some header/s - flags %#x\n", 2819 update_flags); 2820 return false; 2821 } 2822 2823 return true; 2824 } 2825 2826 struct ip_ttl_word { 2827 __u8 ttl; 2828 __u8 protocol; 2829 __sum16 check; 2830 }; 2831 2832 struct ipv6_hoplimit_word { 2833 __be16 payload_len; 2834 __u8 nexthdr; 2835 __u8 hop_limit; 2836 }; 2837 2838 static int is_action_keys_supported(const struct flow_action_entry *act, 2839 bool ct_flow, bool *modify_ip_header, 2840 bool *modify_tuple, 2841 struct netlink_ext_ack *extack) 2842 { 2843 u32 mask, offset; 2844 u8 htype; 2845 2846 htype = act->mangle.htype; 2847 offset = act->mangle.offset; 2848 mask = ~act->mangle.mask; 2849 /* For IPv4 & IPv6 header check 4 byte word, 2850 * to determine that modified fields 2851 * are NOT ttl & hop_limit only. 2852 */ 2853 if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) { 2854 struct ip_ttl_word *ttl_word = 2855 (struct ip_ttl_word *)&mask; 2856 2857 if (offset != offsetof(struct iphdr, ttl) || 2858 ttl_word->protocol || 2859 ttl_word->check) { 2860 *modify_ip_header = true; 2861 } 2862 2863 if (offset >= offsetof(struct iphdr, saddr)) 2864 *modify_tuple = true; 2865 2866 if (ct_flow && *modify_tuple) { 2867 NL_SET_ERR_MSG_MOD(extack, 2868 "can't offload re-write of ipv4 address with action ct"); 2869 return -EOPNOTSUPP; 2870 } 2871 } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) { 2872 struct ipv6_hoplimit_word *hoplimit_word = 2873 (struct ipv6_hoplimit_word *)&mask; 2874 2875 if (offset != offsetof(struct ipv6hdr, payload_len) || 2876 hoplimit_word->payload_len || 2877 hoplimit_word->nexthdr) { 2878 *modify_ip_header = true; 2879 } 2880 2881 if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr)) 2882 *modify_tuple = true; 2883 2884 if (ct_flow && *modify_tuple) { 2885 NL_SET_ERR_MSG_MOD(extack, 2886 "can't offload re-write of ipv6 address with action ct"); 2887 return -EOPNOTSUPP; 2888 } 2889 } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP || 2890 htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP) { 2891 *modify_tuple = true; 2892 if (ct_flow) { 2893 NL_SET_ERR_MSG_MOD(extack, 2894 "can't offload re-write of transport header ports with action ct"); 2895 return -EOPNOTSUPP; 2896 } 2897 } 2898 2899 return 0; 2900 } 2901 2902 static bool modify_header_match_supported(struct mlx5e_priv *priv, 2903 struct mlx5_flow_spec *spec, 2904 struct flow_action *flow_action, 2905 u32 actions, bool ct_flow, 2906 bool ct_clear, 2907 struct netlink_ext_ack *extack) 2908 { 2909 const struct flow_action_entry *act; 2910 bool modify_ip_header, modify_tuple; 2911 void *headers_c; 2912 void *headers_v; 2913 u16 ethertype; 2914 u8 ip_proto; 2915 int i, err; 2916 2917 headers_c = get_match_headers_criteria(actions, spec); 2918 headers_v = get_match_headers_value(actions, spec); 2919 ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); 2920 2921 /* for non-IP we only re-write MACs, so we're okay */ 2922 if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 && 2923 ethertype != ETH_P_IP && ethertype != ETH_P_IPV6) 2924 goto out_ok; 2925 2926 modify_ip_header = false; 2927 modify_tuple = false; 2928 flow_action_for_each(i, act, flow_action) { 2929 if (act->id != FLOW_ACTION_MANGLE && 2930 act->id != FLOW_ACTION_ADD) 2931 continue; 2932 2933 err = is_action_keys_supported(act, ct_flow, 2934 &modify_ip_header, 2935 &modify_tuple, extack); 2936 if (err) 2937 return err; 2938 } 2939 2940 /* Add ct_state=-trk match so it will be offloaded for non ct flows 2941 * (or after clear action), as otherwise, since the tuple is changed, 2942 * we can't restore ct state 2943 */ 2944 if (!ct_clear && modify_tuple && 2945 mlx5_tc_ct_add_no_trk_match(spec)) { 2946 NL_SET_ERR_MSG_MOD(extack, 2947 "can't offload tuple modify header with ct matches"); 2948 netdev_info(priv->netdev, 2949 "can't offload tuple modify header with ct matches"); 2950 return false; 2951 } 2952 2953 ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); 2954 if (modify_ip_header && ip_proto != IPPROTO_TCP && 2955 ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) { 2956 NL_SET_ERR_MSG_MOD(extack, 2957 "can't offload re-write of non TCP/UDP"); 2958 netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n", 2959 ip_proto); 2960 return false; 2961 } 2962 2963 out_ok: 2964 return true; 2965 } 2966 2967 static bool actions_match_supported(struct mlx5e_priv *priv, 2968 struct flow_action *flow_action, 2969 struct mlx5e_tc_flow_parse_attr *parse_attr, 2970 struct mlx5e_tc_flow *flow, 2971 struct netlink_ext_ack *extack) 2972 { 2973 bool ct_flow = false, ct_clear = false; 2974 u32 actions; 2975 2976 ct_clear = flow->attr->ct_attr.ct_action & 2977 TCA_CT_ACT_CLEAR; 2978 ct_flow = flow_flag_test(flow, CT) && !ct_clear; 2979 actions = flow->attr->action; 2980 2981 if (mlx5e_is_eswitch_flow(flow)) { 2982 if (flow->attr->esw_attr->split_count && ct_flow) { 2983 /* All registers used by ct are cleared when using 2984 * split rules. 2985 */ 2986 NL_SET_ERR_MSG_MOD(extack, 2987 "Can't offload mirroring with action ct"); 2988 return false; 2989 } 2990 } 2991 2992 if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) 2993 return modify_header_match_supported(priv, &parse_attr->spec, 2994 flow_action, actions, 2995 ct_flow, ct_clear, 2996 extack); 2997 2998 return true; 2999 } 3000 3001 static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) 3002 { 3003 return priv->mdev == peer_priv->mdev; 3004 } 3005 3006 static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) 3007 { 3008 struct mlx5_core_dev *fmdev, *pmdev; 3009 u64 fsystem_guid, psystem_guid; 3010 3011 fmdev = priv->mdev; 3012 pmdev = peer_priv->mdev; 3013 3014 fsystem_guid = mlx5_query_nic_system_image_guid(fmdev); 3015 psystem_guid = mlx5_query_nic_system_image_guid(pmdev); 3016 3017 return (fsystem_guid == psystem_guid); 3018 } 3019 3020 static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace, 3021 const struct flow_action_entry *act, 3022 struct mlx5e_tc_flow_parse_attr *parse_attr, 3023 struct pedit_headers_action *hdrs, 3024 u32 *action, struct netlink_ext_ack *extack) 3025 { 3026 u16 mask16 = VLAN_VID_MASK; 3027 u16 val16 = act->vlan.vid & VLAN_VID_MASK; 3028 const struct flow_action_entry pedit_act = { 3029 .id = FLOW_ACTION_MANGLE, 3030 .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH, 3031 .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI), 3032 .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16), 3033 .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16), 3034 }; 3035 u8 match_prio_mask, match_prio_val; 3036 void *headers_c, *headers_v; 3037 int err; 3038 3039 headers_c = get_match_headers_criteria(*action, &parse_attr->spec); 3040 headers_v = get_match_headers_value(*action, &parse_attr->spec); 3041 3042 if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) && 3043 MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) { 3044 NL_SET_ERR_MSG_MOD(extack, 3045 "VLAN rewrite action must have VLAN protocol match"); 3046 return -EOPNOTSUPP; 3047 } 3048 3049 match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio); 3050 match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio); 3051 if (act->vlan.prio != (match_prio_val & match_prio_mask)) { 3052 NL_SET_ERR_MSG_MOD(extack, 3053 "Changing VLAN prio is not supported"); 3054 return -EOPNOTSUPP; 3055 } 3056 3057 err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr, hdrs, NULL, extack); 3058 *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 3059 3060 return err; 3061 } 3062 3063 static int 3064 add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv, 3065 struct mlx5e_tc_flow_parse_attr *parse_attr, 3066 struct pedit_headers_action *hdrs, 3067 u32 *action, struct netlink_ext_ack *extack) 3068 { 3069 const struct flow_action_entry prio_tag_act = { 3070 .vlan.vid = 0, 3071 .vlan.prio = 3072 MLX5_GET(fte_match_set_lyr_2_4, 3073 get_match_headers_value(*action, 3074 &parse_attr->spec), 3075 first_prio) & 3076 MLX5_GET(fte_match_set_lyr_2_4, 3077 get_match_headers_criteria(*action, 3078 &parse_attr->spec), 3079 first_prio), 3080 }; 3081 3082 return add_vlan_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, 3083 &prio_tag_act, parse_attr, hdrs, action, 3084 extack); 3085 } 3086 3087 static int validate_goto_chain(struct mlx5e_priv *priv, 3088 struct mlx5e_tc_flow *flow, 3089 const struct flow_action_entry *act, 3090 u32 actions, 3091 struct netlink_ext_ack *extack) 3092 { 3093 bool is_esw = mlx5e_is_eswitch_flow(flow); 3094 struct mlx5_flow_attr *attr = flow->attr; 3095 bool ft_flow = mlx5e_is_ft_flow(flow); 3096 u32 dest_chain = act->chain_index; 3097 struct mlx5_fs_chains *chains; 3098 struct mlx5_eswitch *esw; 3099 u32 reformat_and_fwd; 3100 u32 max_chain; 3101 3102 esw = priv->mdev->priv.eswitch; 3103 chains = is_esw ? esw_chains(esw) : nic_chains(priv); 3104 max_chain = mlx5_chains_get_chain_range(chains); 3105 reformat_and_fwd = is_esw ? 3106 MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) : 3107 MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, reformat_and_fwd_to_table); 3108 3109 if (ft_flow) { 3110 NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported"); 3111 return -EOPNOTSUPP; 3112 } 3113 3114 if (!mlx5_chains_backwards_supported(chains) && 3115 dest_chain <= attr->chain) { 3116 NL_SET_ERR_MSG_MOD(extack, 3117 "Goto lower numbered chain isn't supported"); 3118 return -EOPNOTSUPP; 3119 } 3120 3121 if (dest_chain > max_chain) { 3122 NL_SET_ERR_MSG_MOD(extack, 3123 "Requested destination chain is out of supported range"); 3124 return -EOPNOTSUPP; 3125 } 3126 3127 if (actions & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | 3128 MLX5_FLOW_CONTEXT_ACTION_DECAP) && 3129 !reformat_and_fwd) { 3130 NL_SET_ERR_MSG_MOD(extack, 3131 "Goto chain is not allowed if action has reformat or decap"); 3132 return -EOPNOTSUPP; 3133 } 3134 3135 return 0; 3136 } 3137 3138 static int parse_tc_nic_actions(struct mlx5e_priv *priv, 3139 struct flow_action *flow_action, 3140 struct mlx5e_tc_flow_parse_attr *parse_attr, 3141 struct mlx5e_tc_flow *flow, 3142 struct netlink_ext_ack *extack) 3143 { 3144 struct mlx5_flow_attr *attr = flow->attr; 3145 struct pedit_headers_action hdrs[2] = {}; 3146 const struct flow_action_entry *act; 3147 struct mlx5_nic_flow_attr *nic_attr; 3148 u32 action = 0; 3149 int err, i; 3150 3151 if (!flow_action_has_entries(flow_action)) 3152 return -EINVAL; 3153 3154 if (!flow_action_hw_stats_check(flow_action, extack, 3155 FLOW_ACTION_HW_STATS_DELAYED_BIT)) 3156 return -EOPNOTSUPP; 3157 3158 nic_attr = attr->nic_attr; 3159 3160 nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; 3161 3162 flow_action_for_each(i, act, flow_action) { 3163 switch (act->id) { 3164 case FLOW_ACTION_ACCEPT: 3165 action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 3166 MLX5_FLOW_CONTEXT_ACTION_COUNT; 3167 break; 3168 case FLOW_ACTION_DROP: 3169 action |= MLX5_FLOW_CONTEXT_ACTION_DROP; 3170 if (MLX5_CAP_FLOWTABLE(priv->mdev, 3171 flow_table_properties_nic_receive.flow_counter)) 3172 action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 3173 break; 3174 case FLOW_ACTION_MANGLE: 3175 case FLOW_ACTION_ADD: 3176 err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL, 3177 parse_attr, hdrs, NULL, extack); 3178 if (err) 3179 return err; 3180 3181 action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 3182 break; 3183 case FLOW_ACTION_VLAN_MANGLE: 3184 err = add_vlan_rewrite_action(priv, 3185 MLX5_FLOW_NAMESPACE_KERNEL, 3186 act, parse_attr, hdrs, 3187 &action, extack); 3188 if (err) 3189 return err; 3190 3191 break; 3192 case FLOW_ACTION_CSUM: 3193 if (csum_offload_supported(priv, action, 3194 act->csum_flags, 3195 extack)) 3196 break; 3197 3198 return -EOPNOTSUPP; 3199 case FLOW_ACTION_REDIRECT: { 3200 struct net_device *peer_dev = act->dev; 3201 3202 if (priv->netdev->netdev_ops == peer_dev->netdev_ops && 3203 same_hw_devs(priv, netdev_priv(peer_dev))) { 3204 parse_attr->mirred_ifindex[0] = peer_dev->ifindex; 3205 flow_flag_set(flow, HAIRPIN); 3206 action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 3207 MLX5_FLOW_CONTEXT_ACTION_COUNT; 3208 } else { 3209 NL_SET_ERR_MSG_MOD(extack, 3210 "device is not on same HW, can't offload"); 3211 netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n", 3212 peer_dev->name); 3213 return -EINVAL; 3214 } 3215 } 3216 break; 3217 case FLOW_ACTION_MARK: { 3218 u32 mark = act->mark; 3219 3220 if (mark & ~MLX5E_TC_FLOW_ID_MASK) { 3221 NL_SET_ERR_MSG_MOD(extack, 3222 "Bad flow mark - only 16 bit is supported"); 3223 return -EINVAL; 3224 } 3225 3226 nic_attr->flow_tag = mark; 3227 action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 3228 } 3229 break; 3230 case FLOW_ACTION_GOTO: 3231 err = validate_goto_chain(priv, flow, act, action, 3232 extack); 3233 if (err) 3234 return err; 3235 3236 action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 3237 attr->dest_chain = act->chain_index; 3238 break; 3239 case FLOW_ACTION_CT: 3240 err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack); 3241 if (err) 3242 return err; 3243 3244 flow_flag_set(flow, CT); 3245 break; 3246 default: 3247 NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported"); 3248 return -EOPNOTSUPP; 3249 } 3250 } 3251 3252 if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits || 3253 hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) { 3254 err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL, 3255 parse_attr, hdrs, &action, extack); 3256 if (err) 3257 return err; 3258 /* in case all pedit actions are skipped, remove the MOD_HDR 3259 * flag. 3260 */ 3261 if (parse_attr->mod_hdr_acts.num_actions == 0) { 3262 action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 3263 dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); 3264 } 3265 } 3266 3267 attr->action = action; 3268 3269 if (attr->dest_chain) { 3270 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { 3271 NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported"); 3272 return -EOPNOTSUPP; 3273 } 3274 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 3275 } 3276 3277 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) 3278 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 3279 3280 if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack)) 3281 return -EOPNOTSUPP; 3282 3283 return 0; 3284 } 3285 3286 static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv, 3287 struct net_device *peer_netdev) 3288 { 3289 struct mlx5e_priv *peer_priv; 3290 3291 peer_priv = netdev_priv(peer_netdev); 3292 3293 return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) && 3294 mlx5e_eswitch_vf_rep(priv->netdev) && 3295 mlx5e_eswitch_vf_rep(peer_netdev) && 3296 same_hw_devs(priv, peer_priv)); 3297 } 3298 3299 static int parse_tc_vlan_action(struct mlx5e_priv *priv, 3300 const struct flow_action_entry *act, 3301 struct mlx5_esw_flow_attr *attr, 3302 u32 *action) 3303 { 3304 u8 vlan_idx = attr->total_vlan; 3305 3306 if (vlan_idx >= MLX5_FS_VLAN_DEPTH) 3307 return -EOPNOTSUPP; 3308 3309 switch (act->id) { 3310 case FLOW_ACTION_VLAN_POP: 3311 if (vlan_idx) { 3312 if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 3313 MLX5_FS_VLAN_DEPTH)) 3314 return -EOPNOTSUPP; 3315 3316 *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2; 3317 } else { 3318 *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; 3319 } 3320 break; 3321 case FLOW_ACTION_VLAN_PUSH: 3322 attr->vlan_vid[vlan_idx] = act->vlan.vid; 3323 attr->vlan_prio[vlan_idx] = act->vlan.prio; 3324 attr->vlan_proto[vlan_idx] = act->vlan.proto; 3325 if (!attr->vlan_proto[vlan_idx]) 3326 attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q); 3327 3328 if (vlan_idx) { 3329 if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 3330 MLX5_FS_VLAN_DEPTH)) 3331 return -EOPNOTSUPP; 3332 3333 *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; 3334 } else { 3335 if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) && 3336 (act->vlan.proto != htons(ETH_P_8021Q) || 3337 act->vlan.prio)) 3338 return -EOPNOTSUPP; 3339 3340 *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; 3341 } 3342 break; 3343 default: 3344 return -EINVAL; 3345 } 3346 3347 attr->total_vlan = vlan_idx + 1; 3348 3349 return 0; 3350 } 3351 3352 static struct net_device *get_fdb_out_dev(struct net_device *uplink_dev, 3353 struct net_device *out_dev) 3354 { 3355 struct net_device *fdb_out_dev = out_dev; 3356 struct net_device *uplink_upper; 3357 3358 rcu_read_lock(); 3359 uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev); 3360 if (uplink_upper && netif_is_lag_master(uplink_upper) && 3361 uplink_upper == out_dev) { 3362 fdb_out_dev = uplink_dev; 3363 } else if (netif_is_lag_master(out_dev)) { 3364 fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev)); 3365 if (fdb_out_dev && 3366 (!mlx5e_eswitch_rep(fdb_out_dev) || 3367 !netdev_port_same_parent_id(fdb_out_dev, uplink_dev))) 3368 fdb_out_dev = NULL; 3369 } 3370 rcu_read_unlock(); 3371 return fdb_out_dev; 3372 } 3373 3374 static int add_vlan_push_action(struct mlx5e_priv *priv, 3375 struct mlx5_flow_attr *attr, 3376 struct net_device **out_dev, 3377 u32 *action) 3378 { 3379 struct net_device *vlan_dev = *out_dev; 3380 struct flow_action_entry vlan_act = { 3381 .id = FLOW_ACTION_VLAN_PUSH, 3382 .vlan.vid = vlan_dev_vlan_id(vlan_dev), 3383 .vlan.proto = vlan_dev_vlan_proto(vlan_dev), 3384 .vlan.prio = 0, 3385 }; 3386 int err; 3387 3388 err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action); 3389 if (err) 3390 return err; 3391 3392 *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), 3393 dev_get_iflink(vlan_dev)); 3394 if (is_vlan_dev(*out_dev)) 3395 err = add_vlan_push_action(priv, attr, out_dev, action); 3396 3397 return err; 3398 } 3399 3400 static int add_vlan_pop_action(struct mlx5e_priv *priv, 3401 struct mlx5_flow_attr *attr, 3402 u32 *action) 3403 { 3404 struct flow_action_entry vlan_act = { 3405 .id = FLOW_ACTION_VLAN_POP, 3406 }; 3407 int nest_level, err = 0; 3408 3409 nest_level = attr->parse_attr->filter_dev->lower_level - 3410 priv->netdev->lower_level; 3411 while (nest_level--) { 3412 err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action); 3413 if (err) 3414 return err; 3415 } 3416 3417 return err; 3418 } 3419 3420 static bool same_hw_reps(struct mlx5e_priv *priv, 3421 struct net_device *peer_netdev) 3422 { 3423 struct mlx5e_priv *peer_priv; 3424 3425 peer_priv = netdev_priv(peer_netdev); 3426 3427 return mlx5e_eswitch_rep(priv->netdev) && 3428 mlx5e_eswitch_rep(peer_netdev) && 3429 same_hw_devs(priv, peer_priv); 3430 } 3431 3432 static bool is_lag_dev(struct mlx5e_priv *priv, 3433 struct net_device *peer_netdev) 3434 { 3435 return ((mlx5_lag_is_sriov(priv->mdev) || 3436 mlx5_lag_is_multipath(priv->mdev)) && 3437 same_hw_reps(priv, peer_netdev)); 3438 } 3439 3440 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, 3441 struct net_device *out_dev) 3442 { 3443 if (is_merged_eswitch_vfs(priv, out_dev)) 3444 return true; 3445 3446 if (is_lag_dev(priv, out_dev)) 3447 return true; 3448 3449 return mlx5e_eswitch_rep(out_dev) && 3450 same_port_devs(priv, netdev_priv(out_dev)); 3451 } 3452 3453 static bool is_duplicated_output_device(struct net_device *dev, 3454 struct net_device *out_dev, 3455 int *ifindexes, int if_count, 3456 struct netlink_ext_ack *extack) 3457 { 3458 int i; 3459 3460 for (i = 0; i < if_count; i++) { 3461 if (ifindexes[i] == out_dev->ifindex) { 3462 NL_SET_ERR_MSG_MOD(extack, 3463 "can't duplicate output to same device"); 3464 netdev_err(dev, "can't duplicate output to same device: %s\n", 3465 out_dev->name); 3466 return true; 3467 } 3468 } 3469 3470 return false; 3471 } 3472 3473 static int verify_uplink_forwarding(struct mlx5e_priv *priv, 3474 struct mlx5e_tc_flow *flow, 3475 struct net_device *out_dev, 3476 struct netlink_ext_ack *extack) 3477 { 3478 struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; 3479 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 3480 struct mlx5e_rep_priv *rep_priv; 3481 3482 /* Forwarding non encapsulated traffic between 3483 * uplink ports is allowed only if 3484 * termination_table_raw_traffic cap is set. 3485 * 3486 * Input vport was stored attr->in_rep. 3487 * In LAG case, *priv* is the private data of 3488 * uplink which may be not the input vport. 3489 */ 3490 rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep); 3491 3492 if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) && 3493 mlx5e_eswitch_uplink_rep(out_dev))) 3494 return 0; 3495 3496 if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, 3497 termination_table_raw_traffic)) { 3498 NL_SET_ERR_MSG_MOD(extack, 3499 "devices are both uplink, can't offload forwarding"); 3500 pr_err("devices %s %s are both uplink, can't offload forwarding\n", 3501 priv->netdev->name, out_dev->name); 3502 return -EOPNOTSUPP; 3503 } else if (out_dev != rep_priv->netdev) { 3504 NL_SET_ERR_MSG_MOD(extack, 3505 "devices are not the same uplink, can't offload forwarding"); 3506 pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n", 3507 priv->netdev->name, out_dev->name); 3508 return -EOPNOTSUPP; 3509 } 3510 return 0; 3511 } 3512 3513 static int parse_tc_fdb_actions(struct mlx5e_priv *priv, 3514 struct flow_action *flow_action, 3515 struct mlx5e_tc_flow *flow, 3516 struct netlink_ext_ack *extack, 3517 struct net_device *filter_dev) 3518 { 3519 struct pedit_headers_action hdrs[2] = {}; 3520 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 3521 struct mlx5e_tc_flow_parse_attr *parse_attr; 3522 struct mlx5e_rep_priv *rpriv = priv->ppriv; 3523 const struct ip_tunnel_info *info = NULL; 3524 struct mlx5_flow_attr *attr = flow->attr; 3525 int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS]; 3526 bool ft_flow = mlx5e_is_ft_flow(flow); 3527 const struct flow_action_entry *act; 3528 struct mlx5_esw_flow_attr *esw_attr; 3529 bool encap = false, decap = false; 3530 u32 action = attr->action; 3531 int err, i, if_count = 0; 3532 bool mpls_push = false; 3533 3534 if (!flow_action_has_entries(flow_action)) 3535 return -EINVAL; 3536 3537 if (!flow_action_hw_stats_check(flow_action, extack, 3538 FLOW_ACTION_HW_STATS_DELAYED_BIT)) 3539 return -EOPNOTSUPP; 3540 3541 esw_attr = attr->esw_attr; 3542 parse_attr = attr->parse_attr; 3543 3544 flow_action_for_each(i, act, flow_action) { 3545 switch (act->id) { 3546 case FLOW_ACTION_DROP: 3547 action |= MLX5_FLOW_CONTEXT_ACTION_DROP | 3548 MLX5_FLOW_CONTEXT_ACTION_COUNT; 3549 break; 3550 case FLOW_ACTION_TRAP: 3551 if (!flow_offload_has_one_action(flow_action)) { 3552 NL_SET_ERR_MSG_MOD(extack, 3553 "action trap is supported as a sole action only"); 3554 return -EOPNOTSUPP; 3555 } 3556 action |= (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 3557 MLX5_FLOW_CONTEXT_ACTION_COUNT); 3558 attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; 3559 break; 3560 case FLOW_ACTION_MPLS_PUSH: 3561 if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, 3562 reformat_l2_to_l3_tunnel) || 3563 act->mpls_push.proto != htons(ETH_P_MPLS_UC)) { 3564 NL_SET_ERR_MSG_MOD(extack, 3565 "mpls push is supported only for mpls_uc protocol"); 3566 return -EOPNOTSUPP; 3567 } 3568 mpls_push = true; 3569 break; 3570 case FLOW_ACTION_MPLS_POP: 3571 /* we only support mpls pop if it is the first action 3572 * and the filter net device is bareudp. Subsequent 3573 * actions can be pedit and the last can be mirred 3574 * egress redirect. 3575 */ 3576 if (i) { 3577 NL_SET_ERR_MSG_MOD(extack, 3578 "mpls pop supported only as first action"); 3579 return -EOPNOTSUPP; 3580 } 3581 if (!netif_is_bareudp(filter_dev)) { 3582 NL_SET_ERR_MSG_MOD(extack, 3583 "mpls pop supported only on bareudp devices"); 3584 return -EOPNOTSUPP; 3585 } 3586 3587 parse_attr->eth.h_proto = act->mpls_pop.proto; 3588 action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; 3589 flow_flag_set(flow, L3_TO_L2_DECAP); 3590 break; 3591 case FLOW_ACTION_MANGLE: 3592 case FLOW_ACTION_ADD: 3593 err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB, 3594 parse_attr, hdrs, flow, extack); 3595 if (err) 3596 return err; 3597 3598 if (!flow_flag_test(flow, L3_TO_L2_DECAP)) { 3599 action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 3600 esw_attr->split_count = esw_attr->out_count; 3601 } 3602 break; 3603 case FLOW_ACTION_CSUM: 3604 if (csum_offload_supported(priv, action, 3605 act->csum_flags, extack)) 3606 break; 3607 3608 return -EOPNOTSUPP; 3609 case FLOW_ACTION_REDIRECT: 3610 case FLOW_ACTION_MIRRED: { 3611 struct mlx5e_priv *out_priv; 3612 struct net_device *out_dev; 3613 3614 out_dev = act->dev; 3615 if (!out_dev) { 3616 /* out_dev is NULL when filters with 3617 * non-existing mirred device are replayed to 3618 * the driver. 3619 */ 3620 return -EINVAL; 3621 } 3622 3623 if (mpls_push && !netif_is_bareudp(out_dev)) { 3624 NL_SET_ERR_MSG_MOD(extack, 3625 "mpls is supported only through a bareudp device"); 3626 return -EOPNOTSUPP; 3627 } 3628 3629 if (ft_flow && out_dev == priv->netdev) { 3630 /* Ignore forward to self rules generated 3631 * by adding both mlx5 devs to the flow table 3632 * block on a normal nft offload setup. 3633 */ 3634 return -EOPNOTSUPP; 3635 } 3636 3637 if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) { 3638 NL_SET_ERR_MSG_MOD(extack, 3639 "can't support more output ports, can't offload forwarding"); 3640 netdev_warn(priv->netdev, 3641 "can't support more than %d output ports, can't offload forwarding\n", 3642 esw_attr->out_count); 3643 return -EOPNOTSUPP; 3644 } 3645 3646 action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 3647 MLX5_FLOW_CONTEXT_ACTION_COUNT; 3648 if (encap) { 3649 parse_attr->mirred_ifindex[esw_attr->out_count] = 3650 out_dev->ifindex; 3651 parse_attr->tun_info[esw_attr->out_count] = 3652 mlx5e_dup_tun_info(info); 3653 if (!parse_attr->tun_info[esw_attr->out_count]) 3654 return -ENOMEM; 3655 encap = false; 3656 esw_attr->dests[esw_attr->out_count].flags |= 3657 MLX5_ESW_DEST_ENCAP; 3658 esw_attr->out_count++; 3659 /* attr->dests[].rep is resolved when we 3660 * handle encap 3661 */ 3662 } else if (netdev_port_same_parent_id(priv->netdev, out_dev)) { 3663 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 3664 struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); 3665 3666 if (is_duplicated_output_device(priv->netdev, 3667 out_dev, 3668 ifindexes, 3669 if_count, 3670 extack)) 3671 return -EOPNOTSUPP; 3672 3673 ifindexes[if_count] = out_dev->ifindex; 3674 if_count++; 3675 3676 out_dev = get_fdb_out_dev(uplink_dev, out_dev); 3677 if (!out_dev) 3678 return -ENODEV; 3679 3680 if (is_vlan_dev(out_dev)) { 3681 err = add_vlan_push_action(priv, attr, 3682 &out_dev, 3683 &action); 3684 if (err) 3685 return err; 3686 } 3687 3688 if (is_vlan_dev(parse_attr->filter_dev)) { 3689 err = add_vlan_pop_action(priv, attr, 3690 &action); 3691 if (err) 3692 return err; 3693 } 3694 3695 err = verify_uplink_forwarding(priv, flow, out_dev, extack); 3696 if (err) 3697 return err; 3698 3699 if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) { 3700 NL_SET_ERR_MSG_MOD(extack, 3701 "devices are not on same switch HW, can't offload forwarding"); 3702 return -EOPNOTSUPP; 3703 } 3704 3705 out_priv = netdev_priv(out_dev); 3706 rpriv = out_priv->ppriv; 3707 esw_attr->dests[esw_attr->out_count].rep = rpriv->rep; 3708 esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev; 3709 esw_attr->out_count++; 3710 } else if (parse_attr->filter_dev != priv->netdev) { 3711 /* All mlx5 devices are called to configure 3712 * high level device filters. Therefore, the 3713 * *attempt* to install a filter on invalid 3714 * eswitch should not trigger an explicit error 3715 */ 3716 return -EINVAL; 3717 } else { 3718 NL_SET_ERR_MSG_MOD(extack, 3719 "devices are not on same switch HW, can't offload forwarding"); 3720 netdev_warn(priv->netdev, 3721 "devices %s %s not on same switch HW, can't offload forwarding\n", 3722 priv->netdev->name, 3723 out_dev->name); 3724 return -EINVAL; 3725 } 3726 } 3727 break; 3728 case FLOW_ACTION_TUNNEL_ENCAP: 3729 info = act->tunnel; 3730 if (info) 3731 encap = true; 3732 else 3733 return -EOPNOTSUPP; 3734 3735 break; 3736 case FLOW_ACTION_VLAN_PUSH: 3737 case FLOW_ACTION_VLAN_POP: 3738 if (act->id == FLOW_ACTION_VLAN_PUSH && 3739 (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) { 3740 /* Replace vlan pop+push with vlan modify */ 3741 action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; 3742 err = add_vlan_rewrite_action(priv, 3743 MLX5_FLOW_NAMESPACE_FDB, 3744 act, parse_attr, hdrs, 3745 &action, extack); 3746 } else { 3747 err = parse_tc_vlan_action(priv, act, esw_attr, &action); 3748 } 3749 if (err) 3750 return err; 3751 3752 esw_attr->split_count = esw_attr->out_count; 3753 break; 3754 case FLOW_ACTION_VLAN_MANGLE: 3755 err = add_vlan_rewrite_action(priv, 3756 MLX5_FLOW_NAMESPACE_FDB, 3757 act, parse_attr, hdrs, 3758 &action, extack); 3759 if (err) 3760 return err; 3761 3762 esw_attr->split_count = esw_attr->out_count; 3763 break; 3764 case FLOW_ACTION_TUNNEL_DECAP: 3765 decap = true; 3766 break; 3767 case FLOW_ACTION_GOTO: 3768 err = validate_goto_chain(priv, flow, act, action, 3769 extack); 3770 if (err) 3771 return err; 3772 3773 action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 3774 attr->dest_chain = act->chain_index; 3775 break; 3776 case FLOW_ACTION_CT: 3777 err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack); 3778 if (err) 3779 return err; 3780 3781 flow_flag_set(flow, CT); 3782 break; 3783 default: 3784 NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported"); 3785 return -EOPNOTSUPP; 3786 } 3787 } 3788 3789 /* always set IP version for indirect table handling */ 3790 attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true); 3791 3792 if (MLX5_CAP_GEN(esw->dev, prio_tag_required) && 3793 action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) { 3794 /* For prio tag mode, replace vlan pop with rewrite vlan prio 3795 * tag rewrite. 3796 */ 3797 action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; 3798 err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs, 3799 &action, extack); 3800 if (err) 3801 return err; 3802 } 3803 3804 if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits || 3805 hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) { 3806 err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB, 3807 parse_attr, hdrs, &action, extack); 3808 if (err) 3809 return err; 3810 /* in case all pedit actions are skipped, remove the MOD_HDR 3811 * flag. we might have set split_count either by pedit or 3812 * pop/push. if there is no pop/push either, reset it too. 3813 */ 3814 if (parse_attr->mod_hdr_acts.num_actions == 0) { 3815 action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 3816 dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); 3817 if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || 3818 (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))) 3819 esw_attr->split_count = 0; 3820 } 3821 } 3822 3823 attr->action = action; 3824 if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack)) 3825 return -EOPNOTSUPP; 3826 3827 if (attr->dest_chain) { 3828 if (decap) { 3829 /* It can be supported if we'll create a mapping for 3830 * the tunnel device only (without tunnel), and set 3831 * this tunnel id with this decap flow. 3832 * 3833 * On restore (miss), we'll just set this saved tunnel 3834 * device. 3835 */ 3836 3837 NL_SET_ERR_MSG(extack, 3838 "Decap with goto isn't supported"); 3839 netdev_warn(priv->netdev, 3840 "Decap with goto isn't supported"); 3841 return -EOPNOTSUPP; 3842 } 3843 3844 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { 3845 NL_SET_ERR_MSG_MOD(extack, 3846 "Mirroring goto chain rules isn't supported"); 3847 return -EOPNOTSUPP; 3848 } 3849 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 3850 } 3851 3852 if (!(attr->action & 3853 (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { 3854 NL_SET_ERR_MSG_MOD(extack, 3855 "Rule must have at least one forward/drop action"); 3856 return -EOPNOTSUPP; 3857 } 3858 3859 if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { 3860 NL_SET_ERR_MSG_MOD(extack, 3861 "current firmware doesn't support split rule for port mirroring"); 3862 netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n"); 3863 return -EOPNOTSUPP; 3864 } 3865 3866 return 0; 3867 } 3868 3869 static void get_flags(int flags, unsigned long *flow_flags) 3870 { 3871 unsigned long __flow_flags = 0; 3872 3873 if (flags & MLX5_TC_FLAG(INGRESS)) 3874 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS); 3875 if (flags & MLX5_TC_FLAG(EGRESS)) 3876 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS); 3877 3878 if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) 3879 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); 3880 if (flags & MLX5_TC_FLAG(NIC_OFFLOAD)) 3881 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); 3882 if (flags & MLX5_TC_FLAG(FT_OFFLOAD)) 3883 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT); 3884 3885 *flow_flags = __flow_flags; 3886 } 3887 3888 static const struct rhashtable_params tc_ht_params = { 3889 .head_offset = offsetof(struct mlx5e_tc_flow, node), 3890 .key_offset = offsetof(struct mlx5e_tc_flow, cookie), 3891 .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie), 3892 .automatic_shrinking = true, 3893 }; 3894 3895 static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, 3896 unsigned long flags) 3897 { 3898 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 3899 struct mlx5e_rep_priv *uplink_rpriv; 3900 3901 if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) { 3902 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 3903 return &uplink_rpriv->uplink_priv.tc_ht; 3904 } else /* NIC offload */ 3905 return &priv->fs.tc.ht; 3906 } 3907 3908 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) 3909 { 3910 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; 3911 struct mlx5_flow_attr *attr = flow->attr; 3912 bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK && 3913 flow_flag_test(flow, INGRESS); 3914 bool act_is_encap = !!(attr->action & 3915 MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); 3916 bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom, 3917 MLX5_DEVCOM_ESW_OFFLOADS); 3918 3919 if (!esw_paired) 3920 return false; 3921 3922 if ((mlx5_lag_is_sriov(esw_attr->in_mdev) || 3923 mlx5_lag_is_multipath(esw_attr->in_mdev)) && 3924 (is_rep_ingress || act_is_encap)) 3925 return true; 3926 3927 return false; 3928 } 3929 3930 struct mlx5_flow_attr * 3931 mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type) 3932 { 3933 u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB) ? 3934 sizeof(struct mlx5_esw_flow_attr) : 3935 sizeof(struct mlx5_nic_flow_attr); 3936 struct mlx5_flow_attr *attr; 3937 3938 return kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL); 3939 } 3940 3941 static int 3942 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, 3943 struct flow_cls_offload *f, unsigned long flow_flags, 3944 struct mlx5e_tc_flow_parse_attr **__parse_attr, 3945 struct mlx5e_tc_flow **__flow) 3946 { 3947 struct mlx5e_tc_flow_parse_attr *parse_attr; 3948 struct mlx5_flow_attr *attr; 3949 struct mlx5e_tc_flow *flow; 3950 int err = -ENOMEM; 3951 int out_index; 3952 3953 flow = kzalloc(sizeof(*flow), GFP_KERNEL); 3954 parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); 3955 if (!parse_attr || !flow) 3956 goto err_free; 3957 3958 flow->flags = flow_flags; 3959 flow->cookie = f->cookie; 3960 flow->priv = priv; 3961 3962 attr = mlx5_alloc_flow_attr(get_flow_name_space(flow)); 3963 if (!attr) 3964 goto err_free; 3965 3966 flow->attr = attr; 3967 3968 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) 3969 INIT_LIST_HEAD(&flow->encaps[out_index].list); 3970 INIT_LIST_HEAD(&flow->hairpin); 3971 INIT_LIST_HEAD(&flow->l3_to_l2_reformat); 3972 refcount_set(&flow->refcnt, 1); 3973 init_completion(&flow->init_done); 3974 3975 *__flow = flow; 3976 *__parse_attr = parse_attr; 3977 3978 return 0; 3979 3980 err_free: 3981 kfree(flow); 3982 kvfree(parse_attr); 3983 return err; 3984 } 3985 3986 static void 3987 mlx5e_flow_attr_init(struct mlx5_flow_attr *attr, 3988 struct mlx5e_tc_flow_parse_attr *parse_attr, 3989 struct flow_cls_offload *f) 3990 { 3991 attr->parse_attr = parse_attr; 3992 attr->chain = f->common.chain_index; 3993 attr->prio = f->common.prio; 3994 } 3995 3996 static void 3997 mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr, 3998 struct mlx5e_priv *priv, 3999 struct mlx5e_tc_flow_parse_attr *parse_attr, 4000 struct flow_cls_offload *f, 4001 struct mlx5_eswitch_rep *in_rep, 4002 struct mlx5_core_dev *in_mdev) 4003 { 4004 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 4005 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 4006 4007 mlx5e_flow_attr_init(attr, parse_attr, f); 4008 4009 esw_attr->in_rep = in_rep; 4010 esw_attr->in_mdev = in_mdev; 4011 4012 if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) == 4013 MLX5_COUNTER_SOURCE_ESWITCH) 4014 esw_attr->counter_dev = in_mdev; 4015 else 4016 esw_attr->counter_dev = priv->mdev; 4017 } 4018 4019 static struct mlx5e_tc_flow * 4020 __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, 4021 struct flow_cls_offload *f, 4022 unsigned long flow_flags, 4023 struct net_device *filter_dev, 4024 struct mlx5_eswitch_rep *in_rep, 4025 struct mlx5_core_dev *in_mdev) 4026 { 4027 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 4028 struct netlink_ext_ack *extack = f->common.extack; 4029 struct mlx5e_tc_flow_parse_attr *parse_attr; 4030 struct mlx5e_tc_flow *flow; 4031 int attr_size, err; 4032 4033 flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); 4034 attr_size = sizeof(struct mlx5_esw_flow_attr); 4035 err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, 4036 &parse_attr, &flow); 4037 if (err) 4038 goto out; 4039 4040 parse_attr->filter_dev = filter_dev; 4041 mlx5e_flow_esw_attr_init(flow->attr, 4042 priv, parse_attr, 4043 f, in_rep, in_mdev); 4044 4045 err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, 4046 f, filter_dev); 4047 if (err) 4048 goto err_free; 4049 4050 /* actions validation depends on parsing the ct matches first */ 4051 err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f, 4052 &flow->attr->ct_attr, extack); 4053 if (err) 4054 goto err_free; 4055 4056 err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev); 4057 if (err) 4058 goto err_free; 4059 4060 err = mlx5e_tc_add_fdb_flow(priv, flow, extack); 4061 complete_all(&flow->init_done); 4062 if (err) { 4063 if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev))) 4064 goto err_free; 4065 4066 add_unready_flow(flow); 4067 } 4068 4069 return flow; 4070 4071 err_free: 4072 mlx5e_flow_put(priv, flow); 4073 out: 4074 return ERR_PTR(err); 4075 } 4076 4077 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, 4078 struct mlx5e_tc_flow *flow, 4079 unsigned long flow_flags) 4080 { 4081 struct mlx5e_priv *priv = flow->priv, *peer_priv; 4082 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw; 4083 struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; 4084 struct mlx5_devcom *devcom = priv->mdev->priv.devcom; 4085 struct mlx5e_tc_flow_parse_attr *parse_attr; 4086 struct mlx5e_rep_priv *peer_urpriv; 4087 struct mlx5e_tc_flow *peer_flow; 4088 struct mlx5_core_dev *in_mdev; 4089 int err = 0; 4090 4091 peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 4092 if (!peer_esw) 4093 return -ENODEV; 4094 4095 peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH); 4096 peer_priv = netdev_priv(peer_urpriv->netdev); 4097 4098 /* in_mdev is assigned of which the packet originated from. 4099 * So packets redirected to uplink use the same mdev of the 4100 * original flow and packets redirected from uplink use the 4101 * peer mdev. 4102 */ 4103 if (attr->in_rep->vport == MLX5_VPORT_UPLINK) 4104 in_mdev = peer_priv->mdev; 4105 else 4106 in_mdev = priv->mdev; 4107 4108 parse_attr = flow->attr->parse_attr; 4109 peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags, 4110 parse_attr->filter_dev, 4111 attr->in_rep, in_mdev); 4112 if (IS_ERR(peer_flow)) { 4113 err = PTR_ERR(peer_flow); 4114 goto out; 4115 } 4116 4117 flow->peer_flow = peer_flow; 4118 flow_flag_set(flow, DUP); 4119 mutex_lock(&esw->offloads.peer_mutex); 4120 list_add_tail(&flow->peer, &esw->offloads.peer_flows); 4121 mutex_unlock(&esw->offloads.peer_mutex); 4122 4123 out: 4124 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 4125 return err; 4126 } 4127 4128 static int 4129 mlx5e_add_fdb_flow(struct mlx5e_priv *priv, 4130 struct flow_cls_offload *f, 4131 unsigned long flow_flags, 4132 struct net_device *filter_dev, 4133 struct mlx5e_tc_flow **__flow) 4134 { 4135 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4136 struct mlx5_eswitch_rep *in_rep = rpriv->rep; 4137 struct mlx5_core_dev *in_mdev = priv->mdev; 4138 struct mlx5e_tc_flow *flow; 4139 int err; 4140 4141 flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep, 4142 in_mdev); 4143 if (IS_ERR(flow)) 4144 return PTR_ERR(flow); 4145 4146 if (is_peer_flow_needed(flow)) { 4147 err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags); 4148 if (err) { 4149 mlx5e_tc_del_fdb_flow(priv, flow); 4150 goto out; 4151 } 4152 } 4153 4154 *__flow = flow; 4155 4156 return 0; 4157 4158 out: 4159 return err; 4160 } 4161 4162 static int 4163 mlx5e_add_nic_flow(struct mlx5e_priv *priv, 4164 struct flow_cls_offload *f, 4165 unsigned long flow_flags, 4166 struct net_device *filter_dev, 4167 struct mlx5e_tc_flow **__flow) 4168 { 4169 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 4170 struct netlink_ext_ack *extack = f->common.extack; 4171 struct mlx5e_tc_flow_parse_attr *parse_attr; 4172 struct mlx5e_tc_flow *flow; 4173 int attr_size, err; 4174 4175 if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) { 4176 if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common)) 4177 return -EOPNOTSUPP; 4178 } else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) { 4179 return -EOPNOTSUPP; 4180 } 4181 4182 flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); 4183 attr_size = sizeof(struct mlx5_nic_flow_attr); 4184 err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, 4185 &parse_attr, &flow); 4186 if (err) 4187 goto out; 4188 4189 parse_attr->filter_dev = filter_dev; 4190 mlx5e_flow_attr_init(flow->attr, parse_attr, f); 4191 4192 err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, 4193 f, filter_dev); 4194 if (err) 4195 goto err_free; 4196 4197 err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f, 4198 &flow->attr->ct_attr, extack); 4199 if (err) 4200 goto err_free; 4201 4202 err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack); 4203 if (err) 4204 goto err_free; 4205 4206 err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack); 4207 if (err) 4208 goto err_free; 4209 4210 flow_flag_set(flow, OFFLOADED); 4211 *__flow = flow; 4212 4213 return 0; 4214 4215 err_free: 4216 flow_flag_set(flow, FAILED); 4217 dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); 4218 mlx5e_flow_put(priv, flow); 4219 out: 4220 return err; 4221 } 4222 4223 static int 4224 mlx5e_tc_add_flow(struct mlx5e_priv *priv, 4225 struct flow_cls_offload *f, 4226 unsigned long flags, 4227 struct net_device *filter_dev, 4228 struct mlx5e_tc_flow **flow) 4229 { 4230 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 4231 unsigned long flow_flags; 4232 int err; 4233 4234 get_flags(flags, &flow_flags); 4235 4236 if (!tc_can_offload_extack(priv->netdev, f->common.extack)) 4237 return -EOPNOTSUPP; 4238 4239 if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS) 4240 err = mlx5e_add_fdb_flow(priv, f, flow_flags, 4241 filter_dev, flow); 4242 else 4243 err = mlx5e_add_nic_flow(priv, f, flow_flags, 4244 filter_dev, flow); 4245 4246 return err; 4247 } 4248 4249 static bool is_flow_rule_duplicate_allowed(struct net_device *dev, 4250 struct mlx5e_rep_priv *rpriv) 4251 { 4252 /* Offloaded flow rule is allowed to duplicate on non-uplink representor 4253 * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this 4254 * function is called from NIC mode. 4255 */ 4256 return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK; 4257 } 4258 4259 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, 4260 struct flow_cls_offload *f, unsigned long flags) 4261 { 4262 struct netlink_ext_ack *extack = f->common.extack; 4263 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 4264 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4265 struct mlx5e_tc_flow *flow; 4266 int err = 0; 4267 4268 rcu_read_lock(); 4269 flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); 4270 if (flow) { 4271 /* Same flow rule offloaded to non-uplink representor sharing tc block, 4272 * just return 0. 4273 */ 4274 if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev) 4275 goto rcu_unlock; 4276 4277 NL_SET_ERR_MSG_MOD(extack, 4278 "flow cookie already exists, ignoring"); 4279 netdev_warn_once(priv->netdev, 4280 "flow cookie %lx already exists, ignoring\n", 4281 f->cookie); 4282 err = -EEXIST; 4283 goto rcu_unlock; 4284 } 4285 rcu_unlock: 4286 rcu_read_unlock(); 4287 if (flow) 4288 goto out; 4289 4290 trace_mlx5e_configure_flower(f); 4291 err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow); 4292 if (err) 4293 goto out; 4294 4295 /* Flow rule offloaded to non-uplink representor sharing tc block, 4296 * set the flow's owner dev. 4297 */ 4298 if (is_flow_rule_duplicate_allowed(dev, rpriv)) 4299 flow->orig_dev = dev; 4300 4301 err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params); 4302 if (err) 4303 goto err_free; 4304 4305 return 0; 4306 4307 err_free: 4308 mlx5e_flow_put(priv, flow); 4309 out: 4310 return err; 4311 } 4312 4313 static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags) 4314 { 4315 bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS)); 4316 bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS)); 4317 4318 return flow_flag_test(flow, INGRESS) == dir_ingress && 4319 flow_flag_test(flow, EGRESS) == dir_egress; 4320 } 4321 4322 int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, 4323 struct flow_cls_offload *f, unsigned long flags) 4324 { 4325 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 4326 struct mlx5e_tc_flow *flow; 4327 int err; 4328 4329 rcu_read_lock(); 4330 flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); 4331 if (!flow || !same_flow_direction(flow, flags)) { 4332 err = -EINVAL; 4333 goto errout; 4334 } 4335 4336 /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag 4337 * set. 4338 */ 4339 if (flow_flag_test_and_set(flow, DELETED)) { 4340 err = -EINVAL; 4341 goto errout; 4342 } 4343 rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params); 4344 rcu_read_unlock(); 4345 4346 trace_mlx5e_delete_flower(f); 4347 mlx5e_flow_put(priv, flow); 4348 4349 return 0; 4350 4351 errout: 4352 rcu_read_unlock(); 4353 return err; 4354 } 4355 4356 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, 4357 struct flow_cls_offload *f, unsigned long flags) 4358 { 4359 struct mlx5_devcom *devcom = priv->mdev->priv.devcom; 4360 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 4361 struct mlx5_eswitch *peer_esw; 4362 struct mlx5e_tc_flow *flow; 4363 struct mlx5_fc *counter; 4364 u64 lastuse = 0; 4365 u64 packets = 0; 4366 u64 bytes = 0; 4367 int err = 0; 4368 4369 rcu_read_lock(); 4370 flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie, 4371 tc_ht_params)); 4372 rcu_read_unlock(); 4373 if (IS_ERR(flow)) 4374 return PTR_ERR(flow); 4375 4376 if (!same_flow_direction(flow, flags)) { 4377 err = -EINVAL; 4378 goto errout; 4379 } 4380 4381 if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) { 4382 counter = mlx5e_tc_get_counter(flow); 4383 if (!counter) 4384 goto errout; 4385 4386 mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); 4387 } 4388 4389 /* Under multipath it's possible for one rule to be currently 4390 * un-offloaded while the other rule is offloaded. 4391 */ 4392 peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 4393 if (!peer_esw) 4394 goto out; 4395 4396 if (flow_flag_test(flow, DUP) && 4397 flow_flag_test(flow->peer_flow, OFFLOADED)) { 4398 u64 bytes2; 4399 u64 packets2; 4400 u64 lastuse2; 4401 4402 counter = mlx5e_tc_get_counter(flow->peer_flow); 4403 if (!counter) 4404 goto no_peer_counter; 4405 mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2); 4406 4407 bytes += bytes2; 4408 packets += packets2; 4409 lastuse = max_t(u64, lastuse, lastuse2); 4410 } 4411 4412 no_peer_counter: 4413 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 4414 out: 4415 flow_stats_update(&f->stats, bytes, packets, 0, lastuse, 4416 FLOW_ACTION_HW_STATS_DELAYED); 4417 trace_mlx5e_stats_flower(f); 4418 errout: 4419 mlx5e_flow_put(priv, flow); 4420 return err; 4421 } 4422 4423 static int apply_police_params(struct mlx5e_priv *priv, u64 rate, 4424 struct netlink_ext_ack *extack) 4425 { 4426 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4427 struct mlx5_eswitch *esw; 4428 u32 rate_mbps = 0; 4429 u16 vport_num; 4430 int err; 4431 4432 vport_num = rpriv->rep->vport; 4433 if (vport_num >= MLX5_VPORT_ECPF) { 4434 NL_SET_ERR_MSG_MOD(extack, 4435 "Ingress rate limit is supported only for Eswitch ports connected to VFs"); 4436 return -EOPNOTSUPP; 4437 } 4438 4439 esw = priv->mdev->priv.eswitch; 4440 /* rate is given in bytes/sec. 4441 * First convert to bits/sec and then round to the nearest mbit/secs. 4442 * mbit means million bits. 4443 * Moreover, if rate is non zero we choose to configure to a minimum of 4444 * 1 mbit/sec. 4445 */ 4446 if (rate) { 4447 rate = (rate * BITS_PER_BYTE) + 500000; 4448 rate_mbps = max_t(u64, do_div(rate, 1000000), 1); 4449 } 4450 4451 err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps); 4452 if (err) 4453 NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware"); 4454 4455 return err; 4456 } 4457 4458 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, 4459 struct flow_action *flow_action, 4460 struct netlink_ext_ack *extack) 4461 { 4462 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4463 const struct flow_action_entry *act; 4464 int err; 4465 int i; 4466 4467 if (!flow_action_has_entries(flow_action)) { 4468 NL_SET_ERR_MSG_MOD(extack, "matchall called with no action"); 4469 return -EINVAL; 4470 } 4471 4472 if (!flow_offload_has_one_action(flow_action)) { 4473 NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action"); 4474 return -EOPNOTSUPP; 4475 } 4476 4477 if (!flow_action_basic_hw_stats_check(flow_action, extack)) 4478 return -EOPNOTSUPP; 4479 4480 flow_action_for_each(i, act, flow_action) { 4481 switch (act->id) { 4482 case FLOW_ACTION_POLICE: 4483 err = apply_police_params(priv, act->police.rate_bytes_ps, extack); 4484 if (err) 4485 return err; 4486 4487 rpriv->prev_vf_vport_stats = priv->stats.vf_vport; 4488 break; 4489 default: 4490 NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall"); 4491 return -EOPNOTSUPP; 4492 } 4493 } 4494 4495 return 0; 4496 } 4497 4498 int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, 4499 struct tc_cls_matchall_offload *ma) 4500 { 4501 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 4502 struct netlink_ext_ack *extack = ma->common.extack; 4503 4504 if (!mlx5_esw_qos_enabled(esw)) { 4505 NL_SET_ERR_MSG_MOD(extack, "QoS is not supported on this device"); 4506 return -EOPNOTSUPP; 4507 } 4508 4509 if (ma->common.prio != 1) { 4510 NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported"); 4511 return -EINVAL; 4512 } 4513 4514 return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack); 4515 } 4516 4517 int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv, 4518 struct tc_cls_matchall_offload *ma) 4519 { 4520 struct netlink_ext_ack *extack = ma->common.extack; 4521 4522 return apply_police_params(priv, 0, extack); 4523 } 4524 4525 void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, 4526 struct tc_cls_matchall_offload *ma) 4527 { 4528 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4529 struct rtnl_link_stats64 cur_stats; 4530 u64 dbytes; 4531 u64 dpkts; 4532 4533 cur_stats = priv->stats.vf_vport; 4534 dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets; 4535 dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes; 4536 rpriv->prev_vf_vport_stats = cur_stats; 4537 flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies, 4538 FLOW_ACTION_HW_STATS_DELAYED); 4539 } 4540 4541 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, 4542 struct mlx5e_priv *peer_priv) 4543 { 4544 struct mlx5_core_dev *peer_mdev = peer_priv->mdev; 4545 struct mlx5e_hairpin_entry *hpe, *tmp; 4546 LIST_HEAD(init_wait_list); 4547 u16 peer_vhca_id; 4548 int bkt; 4549 4550 if (!same_hw_devs(priv, peer_priv)) 4551 return; 4552 4553 peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id); 4554 4555 mutex_lock(&priv->fs.tc.hairpin_tbl_lock); 4556 hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) 4557 if (refcount_inc_not_zero(&hpe->refcnt)) 4558 list_add(&hpe->dead_peer_wait_list, &init_wait_list); 4559 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); 4560 4561 list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) { 4562 wait_for_completion(&hpe->res_ready); 4563 if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id) 4564 hpe->hp->pair->peer_gone = true; 4565 4566 mlx5e_hairpin_put(priv, hpe); 4567 } 4568 } 4569 4570 static int mlx5e_tc_netdev_event(struct notifier_block *this, 4571 unsigned long event, void *ptr) 4572 { 4573 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 4574 struct mlx5e_flow_steering *fs; 4575 struct mlx5e_priv *peer_priv; 4576 struct mlx5e_tc_table *tc; 4577 struct mlx5e_priv *priv; 4578 4579 if (ndev->netdev_ops != &mlx5e_netdev_ops || 4580 event != NETDEV_UNREGISTER || 4581 ndev->reg_state == NETREG_REGISTERED) 4582 return NOTIFY_DONE; 4583 4584 tc = container_of(this, struct mlx5e_tc_table, netdevice_nb); 4585 fs = container_of(tc, struct mlx5e_flow_steering, tc); 4586 priv = container_of(fs, struct mlx5e_priv, fs); 4587 peer_priv = netdev_priv(ndev); 4588 if (priv == peer_priv || 4589 !(priv->netdev->features & NETIF_F_HW_TC)) 4590 return NOTIFY_DONE; 4591 4592 mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv); 4593 4594 return NOTIFY_DONE; 4595 } 4596 4597 static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev) 4598 { 4599 int tc_grp_size, tc_tbl_size; 4600 u32 max_flow_counter; 4601 4602 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | 4603 MLX5_CAP_GEN(dev, max_flow_counter_15_0); 4604 4605 tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE); 4606 4607 tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS, 4608 BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size))); 4609 4610 return tc_tbl_size; 4611 } 4612 4613 int mlx5e_tc_nic_init(struct mlx5e_priv *priv) 4614 { 4615 struct mlx5e_tc_table *tc = &priv->fs.tc; 4616 struct mlx5_core_dev *dev = priv->mdev; 4617 struct mlx5_chains_attr attr = {}; 4618 int err; 4619 4620 mlx5e_mod_hdr_tbl_init(&tc->mod_hdr); 4621 mutex_init(&tc->t_lock); 4622 mutex_init(&tc->hairpin_tbl_lock); 4623 hash_init(tc->hairpin_tbl); 4624 4625 err = rhashtable_init(&tc->ht, &tc_ht_params); 4626 if (err) 4627 return err; 4628 4629 lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key); 4630 4631 if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) { 4632 attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED | 4633 MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED; 4634 attr.max_restore_tag = MLX5E_TC_TABLE_CHAIN_TAG_MASK; 4635 } 4636 attr.ns = MLX5_FLOW_NAMESPACE_KERNEL; 4637 attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev); 4638 attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS; 4639 attr.default_ft = priv->fs.vlan.ft.t; 4640 4641 tc->chains = mlx5_chains_create(dev, &attr); 4642 if (IS_ERR(tc->chains)) { 4643 err = PTR_ERR(tc->chains); 4644 goto err_chains; 4645 } 4646 4647 tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr, 4648 MLX5_FLOW_NAMESPACE_KERNEL); 4649 if (IS_ERR(tc->ct)) { 4650 err = PTR_ERR(tc->ct); 4651 goto err_ct; 4652 } 4653 4654 tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; 4655 err = register_netdevice_notifier_dev_net(priv->netdev, 4656 &tc->netdevice_nb, 4657 &tc->netdevice_nn); 4658 if (err) { 4659 tc->netdevice_nb.notifier_call = NULL; 4660 mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n"); 4661 goto err_reg; 4662 } 4663 4664 return 0; 4665 4666 err_reg: 4667 mlx5_tc_ct_clean(tc->ct); 4668 err_ct: 4669 mlx5_chains_destroy(tc->chains); 4670 err_chains: 4671 rhashtable_destroy(&tc->ht); 4672 return err; 4673 } 4674 4675 static void _mlx5e_tc_del_flow(void *ptr, void *arg) 4676 { 4677 struct mlx5e_tc_flow *flow = ptr; 4678 struct mlx5e_priv *priv = flow->priv; 4679 4680 mlx5e_tc_del_flow(priv, flow); 4681 kfree(flow); 4682 } 4683 4684 void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) 4685 { 4686 struct mlx5e_tc_table *tc = &priv->fs.tc; 4687 4688 if (tc->netdevice_nb.notifier_call) 4689 unregister_netdevice_notifier_dev_net(priv->netdev, 4690 &tc->netdevice_nb, 4691 &tc->netdevice_nn); 4692 4693 mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr); 4694 mutex_destroy(&tc->hairpin_tbl_lock); 4695 4696 rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL); 4697 4698 if (!IS_ERR_OR_NULL(tc->t)) { 4699 mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL); 4700 tc->t = NULL; 4701 } 4702 mutex_destroy(&tc->t_lock); 4703 4704 mlx5_tc_ct_clean(tc->ct); 4705 mlx5_chains_destroy(tc->chains); 4706 } 4707 4708 int mlx5e_tc_esw_init(struct rhashtable *tc_ht) 4709 { 4710 const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts); 4711 struct mlx5_rep_uplink_priv *uplink_priv; 4712 struct mlx5e_rep_priv *rpriv; 4713 struct mapping_ctx *mapping; 4714 struct mlx5_eswitch *esw; 4715 struct mlx5e_priv *priv; 4716 int err = 0; 4717 4718 uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); 4719 rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); 4720 priv = netdev_priv(rpriv->netdev); 4721 esw = priv->mdev->priv.eswitch; 4722 4723 uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev), 4724 esw_chains(esw), 4725 &esw->offloads.mod_hdr, 4726 MLX5_FLOW_NAMESPACE_FDB); 4727 if (IS_ERR(uplink_priv->ct_priv)) 4728 goto err_ct; 4729 4730 mapping = mapping_create(sizeof(struct tunnel_match_key), 4731 TUNNEL_INFO_BITS_MASK, true); 4732 if (IS_ERR(mapping)) { 4733 err = PTR_ERR(mapping); 4734 goto err_tun_mapping; 4735 } 4736 uplink_priv->tunnel_mapping = mapping; 4737 4738 /* 0xFFF is reserved for stack devices slow path table mark */ 4739 mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true); 4740 if (IS_ERR(mapping)) { 4741 err = PTR_ERR(mapping); 4742 goto err_enc_opts_mapping; 4743 } 4744 uplink_priv->tunnel_enc_opts_mapping = mapping; 4745 4746 err = rhashtable_init(tc_ht, &tc_ht_params); 4747 if (err) 4748 goto err_ht_init; 4749 4750 lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key); 4751 4752 uplink_priv->encap = mlx5e_tc_tun_init(priv); 4753 if (IS_ERR(uplink_priv->encap)) { 4754 err = PTR_ERR(uplink_priv->encap); 4755 goto err_register_fib_notifier; 4756 } 4757 4758 return 0; 4759 4760 err_register_fib_notifier: 4761 rhashtable_destroy(tc_ht); 4762 err_ht_init: 4763 mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); 4764 err_enc_opts_mapping: 4765 mapping_destroy(uplink_priv->tunnel_mapping); 4766 err_tun_mapping: 4767 mlx5_tc_ct_clean(uplink_priv->ct_priv); 4768 err_ct: 4769 netdev_warn(priv->netdev, 4770 "Failed to initialize tc (eswitch), err: %d", err); 4771 return err; 4772 } 4773 4774 void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) 4775 { 4776 struct mlx5_rep_uplink_priv *uplink_priv; 4777 4778 uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); 4779 4780 rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); 4781 mlx5e_tc_tun_cleanup(uplink_priv->encap); 4782 4783 mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); 4784 mapping_destroy(uplink_priv->tunnel_mapping); 4785 4786 mlx5_tc_ct_clean(uplink_priv->ct_priv); 4787 } 4788 4789 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) 4790 { 4791 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 4792 4793 return atomic_read(&tc_ht->nelems); 4794 } 4795 4796 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw) 4797 { 4798 struct mlx5e_tc_flow *flow, *tmp; 4799 4800 list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer) 4801 __mlx5e_tc_del_fdb_peer_flow(flow); 4802 } 4803 4804 void mlx5e_tc_reoffload_flows_work(struct work_struct *work) 4805 { 4806 struct mlx5_rep_uplink_priv *rpriv = 4807 container_of(work, struct mlx5_rep_uplink_priv, 4808 reoffload_flows_work); 4809 struct mlx5e_tc_flow *flow, *tmp; 4810 4811 mutex_lock(&rpriv->unready_flows_lock); 4812 list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) { 4813 if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL)) 4814 unready_flow_del(flow); 4815 } 4816 mutex_unlock(&rpriv->unready_flows_lock); 4817 } 4818 4819 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, 4820 struct flow_cls_offload *cls_flower, 4821 unsigned long flags) 4822 { 4823 switch (cls_flower->command) { 4824 case FLOW_CLS_REPLACE: 4825 return mlx5e_configure_flower(priv->netdev, priv, cls_flower, 4826 flags); 4827 case FLOW_CLS_DESTROY: 4828 return mlx5e_delete_flower(priv->netdev, priv, cls_flower, 4829 flags); 4830 case FLOW_CLS_STATS: 4831 return mlx5e_stats_flower(priv->netdev, priv, cls_flower, 4832 flags); 4833 default: 4834 return -EOPNOTSUPP; 4835 } 4836 } 4837 4838 int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, 4839 void *cb_priv) 4840 { 4841 unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD); 4842 struct mlx5e_priv *priv = cb_priv; 4843 4844 switch (type) { 4845 case TC_SETUP_CLSFLOWER: 4846 return mlx5e_setup_tc_cls_flower(priv, type_data, flags); 4847 default: 4848 return -EOPNOTSUPP; 4849 } 4850 } 4851 4852 bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, 4853 struct sk_buff *skb) 4854 { 4855 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) 4856 u32 chain = 0, chain_tag, reg_b, zone_restore_id; 4857 struct mlx5e_priv *priv = netdev_priv(skb->dev); 4858 struct mlx5e_tc_table *tc = &priv->fs.tc; 4859 struct tc_skb_ext *tc_skb_ext; 4860 int err; 4861 4862 reg_b = be32_to_cpu(cqe->ft_metadata); 4863 4864 chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK; 4865 4866 err = mlx5_get_chain_for_tag(nic_chains(priv), chain_tag, &chain); 4867 if (err) { 4868 netdev_dbg(priv->netdev, 4869 "Couldn't find chain for chain tag: %d, err: %d\n", 4870 chain_tag, err); 4871 return false; 4872 } 4873 4874 if (chain) { 4875 tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); 4876 if (WARN_ON(!tc_skb_ext)) 4877 return false; 4878 4879 tc_skb_ext->chain = chain; 4880 4881 zone_restore_id = (reg_b >> REG_MAPPING_SHIFT(NIC_ZONE_RESTORE_TO_REG)) & 4882 ESW_ZONE_ID_MASK; 4883 4884 if (!mlx5e_tc_ct_restore_flow(tc->ct, skb, 4885 zone_restore_id)) 4886 return false; 4887 } 4888 #endif /* CONFIG_NET_TC_SKB_EXT */ 4889 4890 return true; 4891 } 4892