1 /* 2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <net/flow_dissector.h> 34 #include <net/flow_offload.h> 35 #include <net/sch_generic.h> 36 #include <net/pkt_cls.h> 37 #include <linux/mlx5/fs.h> 38 #include <linux/mlx5/device.h> 39 #include <linux/rhashtable.h> 40 #include <linux/refcount.h> 41 #include <linux/completion.h> 42 #include <net/arp.h> 43 #include <net/ipv6_stubs.h> 44 #include <net/bareudp.h> 45 #include <net/bonding.h> 46 #include "en.h" 47 #include "en/tc/post_act.h" 48 #include "en_rep.h" 49 #include "en/rep/tc.h" 50 #include "en/rep/neigh.h" 51 #include "en_tc.h" 52 #include "eswitch.h" 53 #include "fs_core.h" 54 #include "en/port.h" 55 #include "en/tc_tun.h" 56 #include "en/mapping.h" 57 #include "en/tc_ct.h" 58 #include "en/mod_hdr.h" 59 #include "en/tc_tun_encap.h" 60 #include "en/tc/sample.h" 61 #include "en/tc/act/act.h" 62 #include "lib/devcom.h" 63 #include "lib/geneve.h" 64 #include "lib/fs_chains.h" 65 #include "diag/en_tc_tracepoint.h" 66 #include <asm/div64.h> 67 #include "lag/lag.h" 68 #include "lag/mp.h" 69 70 #define MLX5E_TC_TABLE_NUM_GROUPS 4 71 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18) 72 73 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { 74 [CHAIN_TO_REG] = { 75 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, 76 .moffset = 0, 77 .mlen = 16, 78 }, 79 [VPORT_TO_REG] = { 80 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, 81 .moffset = 16, 82 .mlen = 16, 83 }, 84 [TUNNEL_TO_REG] = { 85 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1, 86 .moffset = 8, 87 .mlen = ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS, 88 .soffset = MLX5_BYTE_OFF(fte_match_param, 89 misc_parameters_2.metadata_reg_c_1), 90 }, 91 [ZONE_TO_REG] = zone_to_reg_ct, 92 [ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct, 93 [CTSTATE_TO_REG] = ctstate_to_reg_ct, 94 [MARK_TO_REG] = mark_to_reg_ct, 95 [LABELS_TO_REG] = labels_to_reg_ct, 96 [FTEID_TO_REG] = fteid_to_reg_ct, 97 /* For NIC rules we store the restore metadata directly 98 * into reg_b that is passed to SW since we don't 99 * jump between steering domains. 100 */ 101 [NIC_CHAIN_TO_REG] = { 102 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B, 103 .moffset = 0, 104 .mlen = 16, 105 }, 106 [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct, 107 }; 108 109 /* To avoid false lock dependency warning set the tc_ht lock 110 * class different than the lock class of the ht being used when deleting 111 * last flow from a group and then deleting a group, we get into del_sw_flow_group() 112 * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but 113 * it's different than the ht->mutex here. 114 */ 115 static struct lock_class_key tc_ht_lock_key; 116 117 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow); 118 119 void 120 mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, 121 enum mlx5e_tc_attr_to_reg type, 122 u32 val, 123 u32 mask) 124 { 125 void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval; 126 int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset; 127 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; 128 int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen; 129 u32 max_mask = GENMASK(match_len - 1, 0); 130 __be32 curr_mask_be, curr_val_be; 131 u32 curr_mask, curr_val; 132 133 fmask = headers_c + soffset; 134 fval = headers_v + soffset; 135 136 memcpy(&curr_mask_be, fmask, 4); 137 memcpy(&curr_val_be, fval, 4); 138 139 curr_mask = be32_to_cpu(curr_mask_be); 140 curr_val = be32_to_cpu(curr_val_be); 141 142 //move to correct offset 143 WARN_ON(mask > max_mask); 144 mask <<= moffset; 145 val <<= moffset; 146 max_mask <<= moffset; 147 148 //zero val and mask 149 curr_mask &= ~max_mask; 150 curr_val &= ~max_mask; 151 152 //add current to mask 153 curr_mask |= mask; 154 curr_val |= val; 155 156 //back to be32 and write 157 curr_mask_be = cpu_to_be32(curr_mask); 158 curr_val_be = cpu_to_be32(curr_val); 159 160 memcpy(fmask, &curr_mask_be, 4); 161 memcpy(fval, &curr_val_be, 4); 162 163 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; 164 } 165 166 void 167 mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec, 168 enum mlx5e_tc_attr_to_reg type, 169 u32 *val, 170 u32 *mask) 171 { 172 void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval; 173 int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset; 174 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; 175 int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen; 176 u32 max_mask = GENMASK(match_len - 1, 0); 177 __be32 curr_mask_be, curr_val_be; 178 u32 curr_mask, curr_val; 179 180 fmask = headers_c + soffset; 181 fval = headers_v + soffset; 182 183 memcpy(&curr_mask_be, fmask, 4); 184 memcpy(&curr_val_be, fval, 4); 185 186 curr_mask = be32_to_cpu(curr_mask_be); 187 curr_val = be32_to_cpu(curr_val_be); 188 189 *mask = (curr_mask >> moffset) & max_mask; 190 *val = (curr_val >> moffset) & max_mask; 191 } 192 193 int 194 mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev, 195 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 196 enum mlx5_flow_namespace_type ns, 197 enum mlx5e_tc_attr_to_reg type, 198 u32 data) 199 { 200 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; 201 int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield; 202 int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen; 203 char *modact; 204 int err; 205 206 modact = mlx5e_mod_hdr_alloc(mdev, ns, mod_hdr_acts); 207 if (IS_ERR(modact)) 208 return PTR_ERR(modact); 209 210 /* Firmware has 5bit length field and 0 means 32bits */ 211 if (mlen == 32) 212 mlen = 0; 213 214 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); 215 MLX5_SET(set_action_in, modact, field, mfield); 216 MLX5_SET(set_action_in, modact, offset, moffset); 217 MLX5_SET(set_action_in, modact, length, mlen); 218 MLX5_SET(set_action_in, modact, data, data); 219 err = mod_hdr_acts->num_actions; 220 mod_hdr_acts->num_actions++; 221 222 return err; 223 } 224 225 struct mlx5e_tc_int_port_priv * 226 mlx5e_get_int_port_priv(struct mlx5e_priv *priv) 227 { 228 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 229 struct mlx5_rep_uplink_priv *uplink_priv; 230 struct mlx5e_rep_priv *uplink_rpriv; 231 232 if (is_mdev_switchdev_mode(priv->mdev)) { 233 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 234 uplink_priv = &uplink_rpriv->uplink_priv; 235 236 return uplink_priv->int_port_priv; 237 } 238 239 return NULL; 240 } 241 242 static struct mlx5_tc_ct_priv * 243 get_ct_priv(struct mlx5e_priv *priv) 244 { 245 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 246 struct mlx5_rep_uplink_priv *uplink_priv; 247 struct mlx5e_rep_priv *uplink_rpriv; 248 249 if (is_mdev_switchdev_mode(priv->mdev)) { 250 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 251 uplink_priv = &uplink_rpriv->uplink_priv; 252 253 return uplink_priv->ct_priv; 254 } 255 256 return priv->fs.tc.ct; 257 } 258 259 static struct mlx5e_tc_psample * 260 get_sample_priv(struct mlx5e_priv *priv) 261 { 262 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 263 struct mlx5_rep_uplink_priv *uplink_priv; 264 struct mlx5e_rep_priv *uplink_rpriv; 265 266 if (is_mdev_switchdev_mode(priv->mdev)) { 267 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 268 uplink_priv = &uplink_rpriv->uplink_priv; 269 270 return uplink_priv->tc_psample; 271 } 272 273 return NULL; 274 } 275 276 struct mlx5_flow_handle * 277 mlx5_tc_rule_insert(struct mlx5e_priv *priv, 278 struct mlx5_flow_spec *spec, 279 struct mlx5_flow_attr *attr) 280 { 281 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 282 283 if (is_mdev_switchdev_mode(priv->mdev)) 284 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); 285 286 return mlx5e_add_offloaded_nic_rule(priv, spec, attr); 287 } 288 289 void 290 mlx5_tc_rule_delete(struct mlx5e_priv *priv, 291 struct mlx5_flow_handle *rule, 292 struct mlx5_flow_attr *attr) 293 { 294 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 295 296 if (is_mdev_switchdev_mode(priv->mdev)) { 297 mlx5_eswitch_del_offloaded_rule(esw, rule, attr); 298 299 return; 300 } 301 302 mlx5e_del_offloaded_nic_rule(priv, rule, attr); 303 } 304 305 int 306 mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, 307 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 308 enum mlx5_flow_namespace_type ns, 309 enum mlx5e_tc_attr_to_reg type, 310 u32 data) 311 { 312 int ret = mlx5e_tc_match_to_reg_set_and_get_id(mdev, mod_hdr_acts, ns, type, data); 313 314 return ret < 0 ? ret : 0; 315 } 316 317 void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev, 318 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 319 enum mlx5e_tc_attr_to_reg type, 320 int act_id, u32 data) 321 { 322 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; 323 int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield; 324 int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen; 325 char *modact; 326 327 modact = mlx5e_mod_hdr_get_item(mod_hdr_acts, act_id); 328 329 /* Firmware has 5bit length field and 0 means 32bits */ 330 if (mlen == 32) 331 mlen = 0; 332 333 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); 334 MLX5_SET(set_action_in, modact, field, mfield); 335 MLX5_SET(set_action_in, modact, offset, moffset); 336 MLX5_SET(set_action_in, modact, length, mlen); 337 MLX5_SET(set_action_in, modact, data, data); 338 } 339 340 struct mlx5e_hairpin { 341 struct mlx5_hairpin *pair; 342 343 struct mlx5_core_dev *func_mdev; 344 struct mlx5e_priv *func_priv; 345 u32 tdn; 346 struct mlx5e_tir direct_tir; 347 348 int num_channels; 349 struct mlx5e_rqt indir_rqt; 350 struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; 351 struct mlx5_ttc_table *ttc; 352 }; 353 354 struct mlx5e_hairpin_entry { 355 /* a node of a hash table which keeps all the hairpin entries */ 356 struct hlist_node hairpin_hlist; 357 358 /* protects flows list */ 359 spinlock_t flows_lock; 360 /* flows sharing the same hairpin */ 361 struct list_head flows; 362 /* hpe's that were not fully initialized when dead peer update event 363 * function traversed them. 364 */ 365 struct list_head dead_peer_wait_list; 366 367 u16 peer_vhca_id; 368 u8 prio; 369 struct mlx5e_hairpin *hp; 370 refcount_t refcnt; 371 struct completion res_ready; 372 }; 373 374 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, 375 struct mlx5e_tc_flow *flow); 376 377 struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow) 378 { 379 if (!flow || !refcount_inc_not_zero(&flow->refcnt)) 380 return ERR_PTR(-EINVAL); 381 return flow; 382 } 383 384 void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) 385 { 386 if (refcount_dec_and_test(&flow->refcnt)) { 387 mlx5e_tc_del_flow(priv, flow); 388 kfree_rcu(flow, rcu_head); 389 } 390 } 391 392 bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow) 393 { 394 return flow_flag_test(flow, ESWITCH); 395 } 396 397 bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow) 398 { 399 return flow_flag_test(flow, FT); 400 } 401 402 bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow) 403 { 404 return flow_flag_test(flow, OFFLOADED); 405 } 406 407 int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow) 408 { 409 return mlx5e_is_eswitch_flow(flow) ? 410 MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL; 411 } 412 413 static struct mod_hdr_tbl * 414 get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) 415 { 416 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 417 418 return mlx5e_get_flow_namespace(flow) == MLX5_FLOW_NAMESPACE_FDB ? 419 &esw->offloads.mod_hdr : 420 &priv->fs.tc.mod_hdr; 421 } 422 423 static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, 424 struct mlx5e_tc_flow *flow, 425 struct mlx5e_tc_flow_parse_attr *parse_attr) 426 { 427 struct mlx5_modify_hdr *modify_hdr; 428 struct mlx5e_mod_hdr_handle *mh; 429 430 mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow), 431 mlx5e_get_flow_namespace(flow), 432 &parse_attr->mod_hdr_acts); 433 if (IS_ERR(mh)) 434 return PTR_ERR(mh); 435 436 modify_hdr = mlx5e_mod_hdr_get(mh); 437 flow->attr->modify_hdr = modify_hdr; 438 flow->mh = mh; 439 440 return 0; 441 } 442 443 static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv, 444 struct mlx5e_tc_flow *flow) 445 { 446 /* flow wasn't fully initialized */ 447 if (!flow->mh) 448 return; 449 450 mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow), 451 flow->mh); 452 flow->mh = NULL; 453 } 454 455 static 456 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex) 457 { 458 struct mlx5_core_dev *mdev; 459 struct net_device *netdev; 460 struct mlx5e_priv *priv; 461 462 netdev = dev_get_by_index(net, ifindex); 463 if (!netdev) 464 return ERR_PTR(-ENODEV); 465 466 priv = netdev_priv(netdev); 467 mdev = priv->mdev; 468 dev_put(netdev); 469 470 /* Mirred tc action holds a refcount on the ifindex net_device (see 471 * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev 472 * after dev_put(netdev), while we're in the context of adding a tc flow. 473 * 474 * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then 475 * stored in a hairpin object, which exists until all flows, that refer to it, get 476 * removed. 477 * 478 * On the other hand, after a hairpin object has been created, the peer net_device may 479 * be removed/unbound while there are still some hairpin flows that are using it. This 480 * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to 481 * NETDEV_UNREGISTER event of the peer net_device. 482 */ 483 return mdev; 484 } 485 486 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp) 487 { 488 struct mlx5e_tir_builder *builder; 489 int err; 490 491 builder = mlx5e_tir_builder_alloc(false); 492 if (!builder) 493 return -ENOMEM; 494 495 err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn); 496 if (err) 497 goto out; 498 499 mlx5e_tir_builder_build_inline(builder, hp->tdn, hp->pair->rqn[0]); 500 err = mlx5e_tir_init(&hp->direct_tir, builder, hp->func_mdev, false); 501 if (err) 502 goto create_tir_err; 503 504 out: 505 mlx5e_tir_builder_free(builder); 506 return err; 507 508 create_tir_err: 509 mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); 510 511 goto out; 512 } 513 514 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp) 515 { 516 mlx5e_tir_destroy(&hp->direct_tir); 517 mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); 518 } 519 520 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) 521 { 522 struct mlx5e_priv *priv = hp->func_priv; 523 struct mlx5_core_dev *mdev = priv->mdev; 524 struct mlx5e_rss_params_indir *indir; 525 int err; 526 527 indir = kvmalloc(sizeof(*indir), GFP_KERNEL); 528 if (!indir) 529 return -ENOMEM; 530 531 mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels); 532 err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels, 533 mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc, 534 indir); 535 536 kvfree(indir); 537 return err; 538 } 539 540 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) 541 { 542 struct mlx5e_priv *priv = hp->func_priv; 543 struct mlx5e_rss_params_hash rss_hash; 544 enum mlx5_traffic_types tt, max_tt; 545 struct mlx5e_tir_builder *builder; 546 int err = 0; 547 548 builder = mlx5e_tir_builder_alloc(false); 549 if (!builder) 550 return -ENOMEM; 551 552 rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res); 553 554 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { 555 struct mlx5e_rss_params_traffic_type rss_tt; 556 557 rss_tt = mlx5e_rss_get_default_tt_config(tt); 558 559 mlx5e_tir_builder_build_rqt(builder, hp->tdn, 560 mlx5e_rqt_get_rqtn(&hp->indir_rqt), 561 false); 562 mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false); 563 564 err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false); 565 if (err) { 566 mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err); 567 goto err_destroy_tirs; 568 } 569 570 mlx5e_tir_builder_clear(builder); 571 } 572 573 out: 574 mlx5e_tir_builder_free(builder); 575 return err; 576 577 err_destroy_tirs: 578 max_tt = tt; 579 for (tt = 0; tt < max_tt; tt++) 580 mlx5e_tir_destroy(&hp->indir_tir[tt]); 581 582 goto out; 583 } 584 585 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp) 586 { 587 int tt; 588 589 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) 590 mlx5e_tir_destroy(&hp->indir_tir[tt]); 591 } 592 593 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp, 594 struct ttc_params *ttc_params) 595 { 596 struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; 597 int tt; 598 599 memset(ttc_params, 0, sizeof(*ttc_params)); 600 601 ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev, 602 MLX5_FLOW_NAMESPACE_KERNEL); 603 for (tt = 0; tt < MLX5_NUM_TT; tt++) { 604 ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; 605 ttc_params->dests[tt].tir_num = 606 tt == MLX5_TT_ANY ? 607 mlx5e_tir_get_tirn(&hp->direct_tir) : 608 mlx5e_tir_get_tirn(&hp->indir_tir[tt]); 609 } 610 611 ft_attr->level = MLX5E_TC_TTC_FT_LEVEL; 612 ft_attr->prio = MLX5E_TC_PRIO; 613 } 614 615 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp) 616 { 617 struct mlx5e_priv *priv = hp->func_priv; 618 struct ttc_params ttc_params; 619 int err; 620 621 err = mlx5e_hairpin_create_indirect_rqt(hp); 622 if (err) 623 return err; 624 625 err = mlx5e_hairpin_create_indirect_tirs(hp); 626 if (err) 627 goto err_create_indirect_tirs; 628 629 mlx5e_hairpin_set_ttc_params(hp, &ttc_params); 630 hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params); 631 if (IS_ERR(hp->ttc)) { 632 err = PTR_ERR(hp->ttc); 633 goto err_create_ttc_table; 634 } 635 636 netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n", 637 hp->num_channels, 638 mlx5_get_ttc_flow_table(priv->fs.ttc)->id); 639 640 return 0; 641 642 err_create_ttc_table: 643 mlx5e_hairpin_destroy_indirect_tirs(hp); 644 err_create_indirect_tirs: 645 mlx5e_rqt_destroy(&hp->indir_rqt); 646 647 return err; 648 } 649 650 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp) 651 { 652 mlx5_destroy_ttc_table(hp->ttc); 653 mlx5e_hairpin_destroy_indirect_tirs(hp); 654 mlx5e_rqt_destroy(&hp->indir_rqt); 655 } 656 657 static struct mlx5e_hairpin * 658 mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params, 659 int peer_ifindex) 660 { 661 struct mlx5_core_dev *func_mdev, *peer_mdev; 662 struct mlx5e_hairpin *hp; 663 struct mlx5_hairpin *pair; 664 int err; 665 666 hp = kzalloc(sizeof(*hp), GFP_KERNEL); 667 if (!hp) 668 return ERR_PTR(-ENOMEM); 669 670 func_mdev = priv->mdev; 671 peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); 672 if (IS_ERR(peer_mdev)) { 673 err = PTR_ERR(peer_mdev); 674 goto create_pair_err; 675 } 676 677 pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params); 678 if (IS_ERR(pair)) { 679 err = PTR_ERR(pair); 680 goto create_pair_err; 681 } 682 hp->pair = pair; 683 hp->func_mdev = func_mdev; 684 hp->func_priv = priv; 685 hp->num_channels = params->num_channels; 686 687 err = mlx5e_hairpin_create_transport(hp); 688 if (err) 689 goto create_transport_err; 690 691 if (hp->num_channels > 1) { 692 err = mlx5e_hairpin_rss_init(hp); 693 if (err) 694 goto rss_init_err; 695 } 696 697 return hp; 698 699 rss_init_err: 700 mlx5e_hairpin_destroy_transport(hp); 701 create_transport_err: 702 mlx5_core_hairpin_destroy(hp->pair); 703 create_pair_err: 704 kfree(hp); 705 return ERR_PTR(err); 706 } 707 708 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp) 709 { 710 if (hp->num_channels > 1) 711 mlx5e_hairpin_rss_cleanup(hp); 712 mlx5e_hairpin_destroy_transport(hp); 713 mlx5_core_hairpin_destroy(hp->pair); 714 kvfree(hp); 715 } 716 717 static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio) 718 { 719 return (peer_vhca_id << 16 | prio); 720 } 721 722 static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv, 723 u16 peer_vhca_id, u8 prio) 724 { 725 struct mlx5e_hairpin_entry *hpe; 726 u32 hash_key = hash_hairpin_info(peer_vhca_id, prio); 727 728 hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe, 729 hairpin_hlist, hash_key) { 730 if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) { 731 refcount_inc(&hpe->refcnt); 732 return hpe; 733 } 734 } 735 736 return NULL; 737 } 738 739 static void mlx5e_hairpin_put(struct mlx5e_priv *priv, 740 struct mlx5e_hairpin_entry *hpe) 741 { 742 /* no more hairpin flows for us, release the hairpin pair */ 743 if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock)) 744 return; 745 hash_del(&hpe->hairpin_hlist); 746 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); 747 748 if (!IS_ERR_OR_NULL(hpe->hp)) { 749 netdev_dbg(priv->netdev, "del hairpin: peer %s\n", 750 dev_name(hpe->hp->pair->peer_mdev->device)); 751 752 mlx5e_hairpin_destroy(hpe->hp); 753 } 754 755 WARN_ON(!list_empty(&hpe->flows)); 756 kfree(hpe); 757 } 758 759 #define UNKNOWN_MATCH_PRIO 8 760 761 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv, 762 struct mlx5_flow_spec *spec, u8 *match_prio, 763 struct netlink_ext_ack *extack) 764 { 765 void *headers_c, *headers_v; 766 u8 prio_val, prio_mask = 0; 767 bool vlan_present; 768 769 #ifdef CONFIG_MLX5_CORE_EN_DCB 770 if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) { 771 NL_SET_ERR_MSG_MOD(extack, 772 "only PCP trust state supported for hairpin"); 773 return -EOPNOTSUPP; 774 } 775 #endif 776 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); 777 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); 778 779 vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag); 780 if (vlan_present) { 781 prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio); 782 prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio); 783 } 784 785 if (!vlan_present || !prio_mask) { 786 prio_val = UNKNOWN_MATCH_PRIO; 787 } else if (prio_mask != 0x7) { 788 NL_SET_ERR_MSG_MOD(extack, 789 "masked priority match not supported for hairpin"); 790 return -EOPNOTSUPP; 791 } 792 793 *match_prio = prio_val; 794 return 0; 795 } 796 797 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, 798 struct mlx5e_tc_flow *flow, 799 struct mlx5e_tc_flow_parse_attr *parse_attr, 800 struct netlink_ext_ack *extack) 801 { 802 int peer_ifindex = parse_attr->mirred_ifindex[0]; 803 struct mlx5_hairpin_params params; 804 struct mlx5_core_dev *peer_mdev; 805 struct mlx5e_hairpin_entry *hpe; 806 struct mlx5e_hairpin *hp; 807 u64 link_speed64; 808 u32 link_speed; 809 u8 match_prio; 810 u16 peer_id; 811 int err; 812 813 peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); 814 if (IS_ERR(peer_mdev)) { 815 NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device"); 816 return PTR_ERR(peer_mdev); 817 } 818 819 if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) { 820 NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported"); 821 return -EOPNOTSUPP; 822 } 823 824 peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id); 825 err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio, 826 extack); 827 if (err) 828 return err; 829 830 mutex_lock(&priv->fs.tc.hairpin_tbl_lock); 831 hpe = mlx5e_hairpin_get(priv, peer_id, match_prio); 832 if (hpe) { 833 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); 834 wait_for_completion(&hpe->res_ready); 835 836 if (IS_ERR(hpe->hp)) { 837 err = -EREMOTEIO; 838 goto out_err; 839 } 840 goto attach_flow; 841 } 842 843 hpe = kzalloc(sizeof(*hpe), GFP_KERNEL); 844 if (!hpe) { 845 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); 846 return -ENOMEM; 847 } 848 849 spin_lock_init(&hpe->flows_lock); 850 INIT_LIST_HEAD(&hpe->flows); 851 INIT_LIST_HEAD(&hpe->dead_peer_wait_list); 852 hpe->peer_vhca_id = peer_id; 853 hpe->prio = match_prio; 854 refcount_set(&hpe->refcnt, 1); 855 init_completion(&hpe->res_ready); 856 857 hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, 858 hash_hairpin_info(peer_id, match_prio)); 859 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); 860 861 params.log_data_size = 16; 862 params.log_data_size = min_t(u8, params.log_data_size, 863 MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz)); 864 params.log_data_size = max_t(u8, params.log_data_size, 865 MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz)); 866 867 params.log_num_packets = params.log_data_size - 868 MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev); 869 params.log_num_packets = min_t(u8, params.log_num_packets, 870 MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets)); 871 872 params.q_counter = priv->q_counter; 873 /* set hairpin pair per each 50Gbs share of the link */ 874 mlx5e_port_max_linkspeed(priv->mdev, &link_speed); 875 link_speed = max_t(u32, link_speed, 50000); 876 link_speed64 = link_speed; 877 do_div(link_speed64, 50000); 878 params.num_channels = link_speed64; 879 880 hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex); 881 hpe->hp = hp; 882 complete_all(&hpe->res_ready); 883 if (IS_ERR(hp)) { 884 err = PTR_ERR(hp); 885 goto out_err; 886 } 887 888 netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n", 889 mlx5e_tir_get_tirn(&hp->direct_tir), hp->pair->rqn[0], 890 dev_name(hp->pair->peer_mdev->device), 891 hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets); 892 893 attach_flow: 894 if (hpe->hp->num_channels > 1) { 895 flow_flag_set(flow, HAIRPIN_RSS); 896 flow->attr->nic_attr->hairpin_ft = 897 mlx5_get_ttc_flow_table(hpe->hp->ttc); 898 } else { 899 flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir); 900 } 901 902 flow->hpe = hpe; 903 spin_lock(&hpe->flows_lock); 904 list_add(&flow->hairpin, &hpe->flows); 905 spin_unlock(&hpe->flows_lock); 906 907 return 0; 908 909 out_err: 910 mlx5e_hairpin_put(priv, hpe); 911 return err; 912 } 913 914 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv, 915 struct mlx5e_tc_flow *flow) 916 { 917 /* flow wasn't fully initialized */ 918 if (!flow->hpe) 919 return; 920 921 spin_lock(&flow->hpe->flows_lock); 922 list_del(&flow->hairpin); 923 spin_unlock(&flow->hpe->flows_lock); 924 925 mlx5e_hairpin_put(priv, flow->hpe); 926 flow->hpe = NULL; 927 } 928 929 struct mlx5_flow_handle * 930 mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv, 931 struct mlx5_flow_spec *spec, 932 struct mlx5_flow_attr *attr) 933 { 934 struct mlx5_flow_context *flow_context = &spec->flow_context; 935 struct mlx5_fs_chains *nic_chains = mlx5e_nic_chains(priv); 936 struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr; 937 struct mlx5e_tc_table *tc = &priv->fs.tc; 938 struct mlx5_flow_destination dest[2] = {}; 939 struct mlx5_flow_act flow_act = { 940 .action = attr->action, 941 .flags = FLOW_ACT_NO_APPEND, 942 }; 943 struct mlx5_flow_handle *rule; 944 struct mlx5_flow_table *ft; 945 int dest_ix = 0; 946 947 flow_context->flags |= FLOW_CONTEXT_HAS_TAG; 948 flow_context->flow_tag = nic_attr->flow_tag; 949 950 if (attr->dest_ft) { 951 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 952 dest[dest_ix].ft = attr->dest_ft; 953 dest_ix++; 954 } else if (nic_attr->hairpin_ft) { 955 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 956 dest[dest_ix].ft = nic_attr->hairpin_ft; 957 dest_ix++; 958 } else if (nic_attr->hairpin_tirn) { 959 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR; 960 dest[dest_ix].tir_num = nic_attr->hairpin_tirn; 961 dest_ix++; 962 } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { 963 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 964 if (attr->dest_chain) { 965 dest[dest_ix].ft = mlx5_chains_get_table(nic_chains, 966 attr->dest_chain, 1, 967 MLX5E_TC_FT_LEVEL); 968 if (IS_ERR(dest[dest_ix].ft)) 969 return ERR_CAST(dest[dest_ix].ft); 970 } else { 971 dest[dest_ix].ft = mlx5e_vlan_get_flowtable(priv->fs.vlan); 972 } 973 dest_ix++; 974 } 975 976 if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && 977 MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) 978 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; 979 980 if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { 981 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; 982 dest[dest_ix].counter_id = mlx5_fc_id(attr->counter); 983 dest_ix++; 984 } 985 986 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) 987 flow_act.modify_hdr = attr->modify_hdr; 988 989 mutex_lock(&tc->t_lock); 990 if (IS_ERR_OR_NULL(tc->t)) { 991 /* Create the root table here if doesn't exist yet */ 992 tc->t = 993 mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL); 994 995 if (IS_ERR(tc->t)) { 996 mutex_unlock(&tc->t_lock); 997 netdev_err(priv->netdev, 998 "Failed to create tc offload table\n"); 999 rule = ERR_CAST(priv->fs.tc.t); 1000 goto err_ft_get; 1001 } 1002 } 1003 mutex_unlock(&tc->t_lock); 1004 1005 if (attr->chain || attr->prio) 1006 ft = mlx5_chains_get_table(nic_chains, 1007 attr->chain, attr->prio, 1008 MLX5E_TC_FT_LEVEL); 1009 else 1010 ft = attr->ft; 1011 1012 if (IS_ERR(ft)) { 1013 rule = ERR_CAST(ft); 1014 goto err_ft_get; 1015 } 1016 1017 if (attr->outer_match_level != MLX5_MATCH_NONE) 1018 spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; 1019 1020 rule = mlx5_add_flow_rules(ft, spec, 1021 &flow_act, dest, dest_ix); 1022 if (IS_ERR(rule)) 1023 goto err_rule; 1024 1025 return rule; 1026 1027 err_rule: 1028 if (attr->chain || attr->prio) 1029 mlx5_chains_put_table(nic_chains, 1030 attr->chain, attr->prio, 1031 MLX5E_TC_FT_LEVEL); 1032 err_ft_get: 1033 if (attr->dest_chain) 1034 mlx5_chains_put_table(nic_chains, 1035 attr->dest_chain, 1, 1036 MLX5E_TC_FT_LEVEL); 1037 1038 return ERR_CAST(rule); 1039 } 1040 1041 static int 1042 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, 1043 struct mlx5e_tc_flow *flow, 1044 struct netlink_ext_ack *extack) 1045 { 1046 struct mlx5e_tc_flow_parse_attr *parse_attr; 1047 struct mlx5_flow_attr *attr = flow->attr; 1048 struct mlx5_core_dev *dev = priv->mdev; 1049 struct mlx5_fc *counter; 1050 int err; 1051 1052 parse_attr = attr->parse_attr; 1053 1054 if (flow_flag_test(flow, HAIRPIN)) { 1055 err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); 1056 if (err) 1057 return err; 1058 } 1059 1060 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { 1061 counter = mlx5_fc_create(dev, true); 1062 if (IS_ERR(counter)) 1063 return PTR_ERR(counter); 1064 1065 attr->counter = counter; 1066 } 1067 1068 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 1069 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); 1070 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); 1071 if (err) 1072 return err; 1073 } 1074 1075 if (flow_flag_test(flow, CT)) 1076 flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), flow, &parse_attr->spec, 1077 attr, &parse_attr->mod_hdr_acts); 1078 else 1079 flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec, 1080 attr); 1081 1082 return PTR_ERR_OR_ZERO(flow->rule[0]); 1083 } 1084 1085 void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv, 1086 struct mlx5_flow_handle *rule, 1087 struct mlx5_flow_attr *attr) 1088 { 1089 struct mlx5_fs_chains *nic_chains = mlx5e_nic_chains(priv); 1090 1091 mlx5_del_flow_rules(rule); 1092 1093 if (attr->chain || attr->prio) 1094 mlx5_chains_put_table(nic_chains, attr->chain, attr->prio, 1095 MLX5E_TC_FT_LEVEL); 1096 1097 if (attr->dest_chain) 1098 mlx5_chains_put_table(nic_chains, attr->dest_chain, 1, 1099 MLX5E_TC_FT_LEVEL); 1100 } 1101 1102 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, 1103 struct mlx5e_tc_flow *flow) 1104 { 1105 struct mlx5_flow_attr *attr = flow->attr; 1106 struct mlx5e_tc_table *tc = &priv->fs.tc; 1107 1108 flow_flag_clear(flow, OFFLOADED); 1109 1110 if (flow_flag_test(flow, CT)) 1111 mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); 1112 else if (!IS_ERR_OR_NULL(flow->rule[0])) 1113 mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr); 1114 1115 /* Remove root table if no rules are left to avoid 1116 * extra steering hops. 1117 */ 1118 mutex_lock(&priv->fs.tc.t_lock); 1119 if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && 1120 !IS_ERR_OR_NULL(tc->t)) { 1121 mlx5_chains_put_table(mlx5e_nic_chains(priv), 0, 1, MLX5E_TC_FT_LEVEL); 1122 priv->fs.tc.t = NULL; 1123 } 1124 mutex_unlock(&priv->fs.tc.t_lock); 1125 1126 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) 1127 mlx5e_detach_mod_hdr(priv, flow); 1128 1129 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) 1130 mlx5_fc_destroy(priv->mdev, attr->counter); 1131 1132 if (flow_flag_test(flow, HAIRPIN)) 1133 mlx5e_hairpin_flow_del(priv, flow); 1134 1135 kvfree(attr->parse_attr); 1136 kfree(flow->attr); 1137 } 1138 1139 struct mlx5_flow_handle * 1140 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, 1141 struct mlx5e_tc_flow *flow, 1142 struct mlx5_flow_spec *spec, 1143 struct mlx5_flow_attr *attr) 1144 { 1145 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; 1146 struct mlx5_flow_handle *rule; 1147 1148 if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) 1149 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); 1150 1151 if (flow_flag_test(flow, CT)) { 1152 mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; 1153 1154 rule = mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv), 1155 flow, spec, attr, 1156 mod_hdr_acts); 1157 } else if (flow_flag_test(flow, SAMPLE)) { 1158 rule = mlx5e_tc_sample_offload(get_sample_priv(flow->priv), spec, attr, 1159 mlx5e_tc_get_flow_tun_id(flow)); 1160 } else { 1161 rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); 1162 } 1163 1164 if (IS_ERR(rule)) 1165 return rule; 1166 1167 if (attr->esw_attr->split_count) { 1168 flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr); 1169 if (IS_ERR(flow->rule[1])) { 1170 if (flow_flag_test(flow, CT)) 1171 mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); 1172 else 1173 mlx5_eswitch_del_offloaded_rule(esw, rule, attr); 1174 return flow->rule[1]; 1175 } 1176 } 1177 1178 return rule; 1179 } 1180 1181 void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, 1182 struct mlx5e_tc_flow *flow, 1183 struct mlx5_flow_attr *attr) 1184 { 1185 flow_flag_clear(flow, OFFLOADED); 1186 1187 if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) 1188 goto offload_rule_0; 1189 1190 if (attr->esw_attr->split_count) 1191 mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); 1192 1193 if (flow_flag_test(flow, CT)) 1194 mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); 1195 else if (flow_flag_test(flow, SAMPLE)) 1196 mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); 1197 else 1198 offload_rule_0: 1199 mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); 1200 } 1201 1202 struct mlx5_flow_handle * 1203 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, 1204 struct mlx5e_tc_flow *flow, 1205 struct mlx5_flow_spec *spec) 1206 { 1207 struct mlx5_flow_attr *slow_attr; 1208 struct mlx5_flow_handle *rule; 1209 1210 slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); 1211 if (!slow_attr) 1212 return ERR_PTR(-ENOMEM); 1213 1214 memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ); 1215 slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1216 slow_attr->esw_attr->split_count = 0; 1217 slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; 1218 1219 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); 1220 if (!IS_ERR(rule)) 1221 flow_flag_set(flow, SLOW); 1222 1223 kfree(slow_attr); 1224 1225 return rule; 1226 } 1227 1228 void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, 1229 struct mlx5e_tc_flow *flow) 1230 { 1231 struct mlx5_flow_attr *slow_attr; 1232 1233 slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); 1234 if (!slow_attr) { 1235 mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n"); 1236 return; 1237 } 1238 1239 memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ); 1240 slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1241 slow_attr->esw_attr->split_count = 0; 1242 slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; 1243 mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); 1244 flow_flag_clear(flow, SLOW); 1245 kfree(slow_attr); 1246 } 1247 1248 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this 1249 * function. 1250 */ 1251 static void unready_flow_add(struct mlx5e_tc_flow *flow, 1252 struct list_head *unready_flows) 1253 { 1254 flow_flag_set(flow, NOT_READY); 1255 list_add_tail(&flow->unready, unready_flows); 1256 } 1257 1258 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this 1259 * function. 1260 */ 1261 static void unready_flow_del(struct mlx5e_tc_flow *flow) 1262 { 1263 list_del(&flow->unready); 1264 flow_flag_clear(flow, NOT_READY); 1265 } 1266 1267 static void add_unready_flow(struct mlx5e_tc_flow *flow) 1268 { 1269 struct mlx5_rep_uplink_priv *uplink_priv; 1270 struct mlx5e_rep_priv *rpriv; 1271 struct mlx5_eswitch *esw; 1272 1273 esw = flow->priv->mdev->priv.eswitch; 1274 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1275 uplink_priv = &rpriv->uplink_priv; 1276 1277 mutex_lock(&uplink_priv->unready_flows_lock); 1278 unready_flow_add(flow, &uplink_priv->unready_flows); 1279 mutex_unlock(&uplink_priv->unready_flows_lock); 1280 } 1281 1282 static void remove_unready_flow(struct mlx5e_tc_flow *flow) 1283 { 1284 struct mlx5_rep_uplink_priv *uplink_priv; 1285 struct mlx5e_rep_priv *rpriv; 1286 struct mlx5_eswitch *esw; 1287 1288 esw = flow->priv->mdev->priv.eswitch; 1289 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1290 uplink_priv = &rpriv->uplink_priv; 1291 1292 mutex_lock(&uplink_priv->unready_flows_lock); 1293 unready_flow_del(flow); 1294 mutex_unlock(&uplink_priv->unready_flows_lock); 1295 } 1296 1297 bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev) 1298 { 1299 struct mlx5_core_dev *out_mdev, *route_mdev; 1300 struct mlx5e_priv *out_priv, *route_priv; 1301 1302 out_priv = netdev_priv(out_dev); 1303 out_mdev = out_priv->mdev; 1304 route_priv = netdev_priv(route_dev); 1305 route_mdev = route_priv->mdev; 1306 1307 if (out_mdev->coredev_type != MLX5_COREDEV_PF || 1308 route_mdev->coredev_type != MLX5_COREDEV_VF) 1309 return false; 1310 1311 return mlx5e_same_hw_devs(out_priv, route_priv); 1312 } 1313 1314 int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport) 1315 { 1316 struct mlx5e_priv *out_priv, *route_priv; 1317 struct mlx5_devcom *devcom = NULL; 1318 struct mlx5_core_dev *route_mdev; 1319 struct mlx5_eswitch *esw; 1320 u16 vhca_id; 1321 int err; 1322 1323 out_priv = netdev_priv(out_dev); 1324 esw = out_priv->mdev->priv.eswitch; 1325 route_priv = netdev_priv(route_dev); 1326 route_mdev = route_priv->mdev; 1327 1328 vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id); 1329 if (mlx5_lag_is_active(out_priv->mdev)) { 1330 /* In lag case we may get devices from different eswitch instances. 1331 * If we failed to get vport num, it means, mostly, that we on the wrong 1332 * eswitch. 1333 */ 1334 err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); 1335 if (err != -ENOENT) 1336 return err; 1337 1338 devcom = out_priv->mdev->priv.devcom; 1339 esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 1340 if (!esw) 1341 return -ENODEV; 1342 } 1343 1344 err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); 1345 if (devcom) 1346 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 1347 return err; 1348 } 1349 1350 int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv, 1351 struct mlx5e_tc_flow_parse_attr *parse_attr, 1352 struct mlx5e_tc_flow *flow) 1353 { 1354 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &parse_attr->mod_hdr_acts; 1355 struct mlx5_modify_hdr *mod_hdr; 1356 1357 mod_hdr = mlx5_modify_header_alloc(priv->mdev, 1358 mlx5e_get_flow_namespace(flow), 1359 mod_hdr_acts->num_actions, 1360 mod_hdr_acts->actions); 1361 if (IS_ERR(mod_hdr)) 1362 return PTR_ERR(mod_hdr); 1363 1364 WARN_ON(flow->attr->modify_hdr); 1365 flow->attr->modify_hdr = mod_hdr; 1366 1367 return 0; 1368 } 1369 1370 static int 1371 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, 1372 struct mlx5e_tc_flow *flow, 1373 struct netlink_ext_ack *extack) 1374 { 1375 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1376 struct mlx5e_tc_flow_parse_attr *parse_attr; 1377 struct mlx5_flow_attr *attr = flow->attr; 1378 bool vf_tun = false, encap_valid = true; 1379 struct net_device *encap_dev = NULL; 1380 struct mlx5_esw_flow_attr *esw_attr; 1381 struct mlx5e_rep_priv *rpriv; 1382 struct mlx5e_priv *out_priv; 1383 struct mlx5_fc *counter; 1384 u32 max_prio, max_chain; 1385 int err = 0; 1386 int out_index; 1387 1388 parse_attr = attr->parse_attr; 1389 esw_attr = attr->esw_attr; 1390 1391 /* We check chain range only for tc flows. 1392 * For ft flows, we checked attr->chain was originally 0 and set it to 1393 * FDB_FT_CHAIN which is outside tc range. 1394 * See mlx5e_rep_setup_ft_cb(). 1395 */ 1396 max_chain = mlx5_chains_get_chain_range(esw_chains(esw)); 1397 if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) { 1398 NL_SET_ERR_MSG_MOD(extack, 1399 "Requested chain is out of supported range"); 1400 err = -EOPNOTSUPP; 1401 goto err_out; 1402 } 1403 1404 max_prio = mlx5_chains_get_prio_range(esw_chains(esw)); 1405 if (attr->prio > max_prio) { 1406 NL_SET_ERR_MSG_MOD(extack, 1407 "Requested priority is out of supported range"); 1408 err = -EOPNOTSUPP; 1409 goto err_out; 1410 } 1411 1412 if (flow_flag_test(flow, TUN_RX)) { 1413 err = mlx5e_attach_decap_route(priv, flow); 1414 if (err) 1415 goto err_out; 1416 1417 if (!attr->chain && esw_attr->int_port && 1418 attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { 1419 /* If decap route device is internal port, change the 1420 * source vport value in reg_c0 back to uplink just in 1421 * case the rule performs goto chain > 0. If we have a miss 1422 * on chain > 0 we want the metadata regs to hold the 1423 * chain id so SW will resume handling of this packet 1424 * from the proper chain. 1425 */ 1426 u32 metadata = mlx5_eswitch_get_vport_metadata_for_set(esw, 1427 esw_attr->in_rep->vport); 1428 1429 err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts, 1430 MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG, 1431 metadata); 1432 if (err) 1433 goto err_out; 1434 1435 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1436 } 1437 } 1438 1439 if (flow_flag_test(flow, L3_TO_L2_DECAP)) { 1440 err = mlx5e_attach_decap(priv, flow, extack); 1441 if (err) 1442 goto err_out; 1443 } 1444 1445 if (netif_is_ovs_master(parse_attr->filter_dev)) { 1446 struct mlx5e_tc_int_port *int_port; 1447 1448 if (attr->chain) { 1449 NL_SET_ERR_MSG_MOD(extack, 1450 "Internal port rule is only supported on chain 0"); 1451 err = -EOPNOTSUPP; 1452 goto err_out; 1453 } 1454 1455 if (attr->dest_chain) { 1456 NL_SET_ERR_MSG_MOD(extack, 1457 "Internal port rule offload doesn't support goto action"); 1458 err = -EOPNOTSUPP; 1459 goto err_out; 1460 } 1461 1462 int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv), 1463 parse_attr->filter_dev->ifindex, 1464 flow_flag_test(flow, EGRESS) ? 1465 MLX5E_TC_INT_PORT_EGRESS : 1466 MLX5E_TC_INT_PORT_INGRESS); 1467 if (IS_ERR(int_port)) { 1468 err = PTR_ERR(int_port); 1469 goto err_out; 1470 } 1471 1472 esw_attr->int_port = int_port; 1473 } 1474 1475 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { 1476 struct net_device *out_dev; 1477 int mirred_ifindex; 1478 1479 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) 1480 continue; 1481 1482 mirred_ifindex = parse_attr->mirred_ifindex[out_index]; 1483 out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex); 1484 if (!out_dev) { 1485 NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found"); 1486 err = -ENODEV; 1487 goto err_out; 1488 } 1489 err = mlx5e_attach_encap(priv, flow, out_dev, out_index, 1490 extack, &encap_dev, &encap_valid); 1491 dev_put(out_dev); 1492 if (err) 1493 goto err_out; 1494 1495 if (esw_attr->dests[out_index].flags & 1496 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && 1497 !esw_attr->dest_int_port) 1498 vf_tun = true; 1499 out_priv = netdev_priv(encap_dev); 1500 rpriv = out_priv->ppriv; 1501 esw_attr->dests[out_index].rep = rpriv->rep; 1502 esw_attr->dests[out_index].mdev = out_priv->mdev; 1503 } 1504 1505 if (vf_tun && esw_attr->out_count > 1) { 1506 NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported"); 1507 err = -EOPNOTSUPP; 1508 goto err_out; 1509 } 1510 1511 err = mlx5_eswitch_add_vlan_action(esw, attr); 1512 if (err) 1513 goto err_out; 1514 1515 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && 1516 !(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) { 1517 if (vf_tun) { 1518 err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow); 1519 if (err) 1520 goto err_out; 1521 } else { 1522 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); 1523 if (err) 1524 goto err_out; 1525 } 1526 } 1527 1528 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { 1529 counter = mlx5_fc_create(esw_attr->counter_dev, true); 1530 if (IS_ERR(counter)) { 1531 err = PTR_ERR(counter); 1532 goto err_out; 1533 } 1534 1535 attr->counter = counter; 1536 } 1537 1538 /* we get here if one of the following takes place: 1539 * (1) there's no error 1540 * (2) there's an encap action and we don't have valid neigh 1541 */ 1542 if (!encap_valid) 1543 flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec); 1544 else 1545 flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); 1546 1547 if (IS_ERR(flow->rule[0])) { 1548 err = PTR_ERR(flow->rule[0]); 1549 goto err_out; 1550 } 1551 flow_flag_set(flow, OFFLOADED); 1552 1553 return 0; 1554 1555 err_out: 1556 flow_flag_set(flow, FAILED); 1557 return err; 1558 } 1559 1560 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) 1561 { 1562 struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec; 1563 void *headers_v = MLX5_ADDR_OF(fte_match_param, 1564 spec->match_value, 1565 misc_parameters_3); 1566 u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3, 1567 headers_v, 1568 geneve_tlv_option_0_data); 1569 1570 return !!geneve_tlv_opt_0_data; 1571 } 1572 1573 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, 1574 struct mlx5e_tc_flow *flow) 1575 { 1576 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1577 struct mlx5_flow_attr *attr = flow->attr; 1578 struct mlx5_esw_flow_attr *esw_attr; 1579 bool vf_tun = false; 1580 int out_index; 1581 1582 esw_attr = attr->esw_attr; 1583 mlx5e_put_flow_tunnel_id(flow); 1584 1585 if (flow_flag_test(flow, NOT_READY)) 1586 remove_unready_flow(flow); 1587 1588 if (mlx5e_is_offloaded_flow(flow)) { 1589 if (flow_flag_test(flow, SLOW)) 1590 mlx5e_tc_unoffload_from_slow_path(esw, flow); 1591 else 1592 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); 1593 } 1594 complete_all(&flow->del_hw_done); 1595 1596 if (mlx5_flow_has_geneve_opt(flow)) 1597 mlx5_geneve_tlv_option_del(priv->mdev->geneve); 1598 1599 mlx5_eswitch_del_vlan_action(esw, attr); 1600 1601 if (flow->decap_route) 1602 mlx5e_detach_decap_route(priv, flow); 1603 1604 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { 1605 if (esw_attr->dests[out_index].flags & 1606 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && 1607 !esw_attr->dest_int_port) 1608 vf_tun = true; 1609 if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) { 1610 mlx5e_detach_encap(priv, flow, out_index); 1611 kfree(attr->parse_attr->tun_info[out_index]); 1612 } 1613 } 1614 1615 mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr); 1616 1617 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 1618 mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); 1619 if (vf_tun && attr->modify_hdr) 1620 mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr); 1621 else 1622 mlx5e_detach_mod_hdr(priv, flow); 1623 } 1624 1625 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) 1626 mlx5_fc_destroy(esw_attr->counter_dev, attr->counter); 1627 1628 if (esw_attr->int_port) 1629 mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port); 1630 1631 if (esw_attr->dest_int_port) 1632 mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->dest_int_port); 1633 1634 if (flow_flag_test(flow, L3_TO_L2_DECAP)) 1635 mlx5e_detach_decap(priv, flow); 1636 1637 kfree(attr->sample_attr); 1638 kvfree(attr->esw_attr->rx_tun_attr); 1639 kvfree(attr->parse_attr); 1640 kfree(flow->attr); 1641 } 1642 1643 struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) 1644 { 1645 return flow->attr->counter; 1646 } 1647 1648 /* Iterate over tmp_list of flows attached to flow_list head. */ 1649 void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list) 1650 { 1651 struct mlx5e_tc_flow *flow, *tmp; 1652 1653 list_for_each_entry_safe(flow, tmp, flow_list, tmp_list) 1654 mlx5e_flow_put(priv, flow); 1655 } 1656 1657 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) 1658 { 1659 struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; 1660 1661 if (!flow_flag_test(flow, ESWITCH) || 1662 !flow_flag_test(flow, DUP)) 1663 return; 1664 1665 mutex_lock(&esw->offloads.peer_mutex); 1666 list_del(&flow->peer); 1667 mutex_unlock(&esw->offloads.peer_mutex); 1668 1669 flow_flag_clear(flow, DUP); 1670 1671 if (refcount_dec_and_test(&flow->peer_flow->refcnt)) { 1672 mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow); 1673 kfree(flow->peer_flow); 1674 } 1675 1676 flow->peer_flow = NULL; 1677 } 1678 1679 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) 1680 { 1681 struct mlx5_core_dev *dev = flow->priv->mdev; 1682 struct mlx5_devcom *devcom = dev->priv.devcom; 1683 struct mlx5_eswitch *peer_esw; 1684 1685 peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 1686 if (!peer_esw) 1687 return; 1688 1689 __mlx5e_tc_del_fdb_peer_flow(flow); 1690 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 1691 } 1692 1693 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, 1694 struct mlx5e_tc_flow *flow) 1695 { 1696 if (mlx5e_is_eswitch_flow(flow)) { 1697 mlx5e_tc_del_fdb_peer_flow(flow); 1698 mlx5e_tc_del_fdb_flow(priv, flow); 1699 } else { 1700 mlx5e_tc_del_nic_flow(priv, flow); 1701 } 1702 } 1703 1704 static bool flow_requires_tunnel_mapping(u32 chain, struct flow_cls_offload *f) 1705 { 1706 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 1707 struct flow_action *flow_action = &rule->action; 1708 const struct flow_action_entry *act; 1709 int i; 1710 1711 if (chain) 1712 return false; 1713 1714 flow_action_for_each(i, act, flow_action) { 1715 switch (act->id) { 1716 case FLOW_ACTION_GOTO: 1717 return true; 1718 case FLOW_ACTION_SAMPLE: 1719 return true; 1720 default: 1721 continue; 1722 } 1723 } 1724 1725 return false; 1726 } 1727 1728 static int 1729 enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv, 1730 struct flow_dissector_key_enc_opts *opts, 1731 struct netlink_ext_ack *extack, 1732 bool *dont_care) 1733 { 1734 struct geneve_opt *opt; 1735 int off = 0; 1736 1737 *dont_care = true; 1738 1739 while (opts->len > off) { 1740 opt = (struct geneve_opt *)&opts->data[off]; 1741 1742 if (!(*dont_care) || opt->opt_class || opt->type || 1743 memchr_inv(opt->opt_data, 0, opt->length * 4)) { 1744 *dont_care = false; 1745 1746 if (opt->opt_class != htons(U16_MAX) || 1747 opt->type != U8_MAX) { 1748 NL_SET_ERR_MSG_MOD(extack, 1749 "Partial match of tunnel options in chain > 0 isn't supported"); 1750 netdev_warn(priv->netdev, 1751 "Partial match of tunnel options in chain > 0 isn't supported"); 1752 return -EOPNOTSUPP; 1753 } 1754 } 1755 1756 off += sizeof(struct geneve_opt) + opt->length * 4; 1757 } 1758 1759 return 0; 1760 } 1761 1762 #define COPY_DISSECTOR(rule, diss_key, dst)\ 1763 ({ \ 1764 struct flow_rule *__rule = (rule);\ 1765 typeof(dst) __dst = dst;\ 1766 \ 1767 memcpy(__dst,\ 1768 skb_flow_dissector_target(__rule->match.dissector,\ 1769 diss_key,\ 1770 __rule->match.key),\ 1771 sizeof(*__dst));\ 1772 }) 1773 1774 static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv, 1775 struct mlx5e_tc_flow *flow, 1776 struct flow_cls_offload *f, 1777 struct net_device *filter_dev) 1778 { 1779 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 1780 struct netlink_ext_ack *extack = f->common.extack; 1781 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; 1782 struct flow_match_enc_opts enc_opts_match; 1783 struct tunnel_match_enc_opts tun_enc_opts; 1784 struct mlx5_rep_uplink_priv *uplink_priv; 1785 struct mlx5_flow_attr *attr = flow->attr; 1786 struct mlx5e_rep_priv *uplink_rpriv; 1787 struct tunnel_match_key tunnel_key; 1788 bool enc_opts_is_dont_care = true; 1789 u32 tun_id, enc_opts_id = 0; 1790 struct mlx5_eswitch *esw; 1791 u32 value, mask; 1792 int err; 1793 1794 esw = priv->mdev->priv.eswitch; 1795 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1796 uplink_priv = &uplink_rpriv->uplink_priv; 1797 1798 memset(&tunnel_key, 0, sizeof(tunnel_key)); 1799 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL, 1800 &tunnel_key.enc_control); 1801 if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) 1802 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, 1803 &tunnel_key.enc_ipv4); 1804 else 1805 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, 1806 &tunnel_key.enc_ipv6); 1807 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip); 1808 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS, 1809 &tunnel_key.enc_tp); 1810 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID, 1811 &tunnel_key.enc_key_id); 1812 tunnel_key.filter_ifindex = filter_dev->ifindex; 1813 1814 err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id); 1815 if (err) 1816 return err; 1817 1818 flow_rule_match_enc_opts(rule, &enc_opts_match); 1819 err = enc_opts_is_dont_care_or_full_match(priv, 1820 enc_opts_match.mask, 1821 extack, 1822 &enc_opts_is_dont_care); 1823 if (err) 1824 goto err_enc_opts; 1825 1826 if (!enc_opts_is_dont_care) { 1827 memset(&tun_enc_opts, 0, sizeof(tun_enc_opts)); 1828 memcpy(&tun_enc_opts.key, enc_opts_match.key, 1829 sizeof(*enc_opts_match.key)); 1830 memcpy(&tun_enc_opts.mask, enc_opts_match.mask, 1831 sizeof(*enc_opts_match.mask)); 1832 1833 err = mapping_add(uplink_priv->tunnel_enc_opts_mapping, 1834 &tun_enc_opts, &enc_opts_id); 1835 if (err) 1836 goto err_enc_opts; 1837 } 1838 1839 value = tun_id << ENC_OPTS_BITS | enc_opts_id; 1840 mask = enc_opts_id ? TUNNEL_ID_MASK : 1841 (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK); 1842 1843 if (attr->chain) { 1844 mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec, 1845 TUNNEL_TO_REG, value, mask); 1846 } else { 1847 mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; 1848 err = mlx5e_tc_match_to_reg_set(priv->mdev, 1849 mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB, 1850 TUNNEL_TO_REG, value); 1851 if (err) 1852 goto err_set; 1853 1854 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1855 } 1856 1857 flow->tunnel_id = value; 1858 return 0; 1859 1860 err_set: 1861 if (enc_opts_id) 1862 mapping_remove(uplink_priv->tunnel_enc_opts_mapping, 1863 enc_opts_id); 1864 err_enc_opts: 1865 mapping_remove(uplink_priv->tunnel_mapping, tun_id); 1866 return err; 1867 } 1868 1869 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow) 1870 { 1871 u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK; 1872 u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS; 1873 struct mlx5_rep_uplink_priv *uplink_priv; 1874 struct mlx5e_rep_priv *uplink_rpriv; 1875 struct mlx5_eswitch *esw; 1876 1877 esw = flow->priv->mdev->priv.eswitch; 1878 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1879 uplink_priv = &uplink_rpriv->uplink_priv; 1880 1881 if (tun_id) 1882 mapping_remove(uplink_priv->tunnel_mapping, tun_id); 1883 if (enc_opts_id) 1884 mapping_remove(uplink_priv->tunnel_enc_opts_mapping, 1885 enc_opts_id); 1886 } 1887 1888 u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow) 1889 { 1890 return flow->tunnel_id; 1891 } 1892 1893 void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev, 1894 struct flow_match_basic *match, bool outer, 1895 void *headers_c, void *headers_v) 1896 { 1897 bool ip_version_cap; 1898 1899 ip_version_cap = outer ? 1900 MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 1901 ft_field_support.outer_ip_version) : 1902 MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 1903 ft_field_support.inner_ip_version); 1904 1905 if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) && 1906 (match->key->n_proto == htons(ETH_P_IP) || 1907 match->key->n_proto == htons(ETH_P_IPV6))) { 1908 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version); 1909 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version, 1910 match->key->n_proto == htons(ETH_P_IP) ? 4 : 6); 1911 } else { 1912 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, 1913 ntohs(match->mask->n_proto)); 1914 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, 1915 ntohs(match->key->n_proto)); 1916 } 1917 } 1918 1919 u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer) 1920 { 1921 void *headers_v; 1922 u16 ethertype; 1923 u8 ip_version; 1924 1925 if (outer) 1926 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); 1927 else 1928 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers); 1929 1930 ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version); 1931 /* Return ip_version converted from ethertype anyway */ 1932 if (!ip_version) { 1933 ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); 1934 if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP) 1935 ip_version = 4; 1936 else if (ethertype == ETH_P_IPV6) 1937 ip_version = 6; 1938 } 1939 return ip_version; 1940 } 1941 1942 /* Tunnel device follows RFC 6040, see include/net/inet_ecn.h. 1943 * And changes inner ip_ecn depending on inner and outer ip_ecn as follows: 1944 * +---------+----------------------------------------+ 1945 * |Arriving | Arriving Outer Header | 1946 * | Inner +---------+---------+---------+----------+ 1947 * | Header | Not-ECT | ECT(0) | ECT(1) | CE | 1948 * +---------+---------+---------+---------+----------+ 1949 * | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop> | 1950 * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE* | 1951 * | ECT(1) | ECT(1) | ECT(1) | ECT(1)* | CE* | 1952 * | CE | CE | CE | CE | CE | 1953 * +---------+---------+---------+---------+----------+ 1954 * 1955 * Tc matches on inner after decapsulation on tunnel device, but hw offload matches 1956 * the inner ip_ecn value before hardware decap action. 1957 * 1958 * Cells marked are changed from original inner packet ip_ecn value during decap, and 1959 * so matching those values on inner ip_ecn before decap will fail. 1960 * 1961 * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn, 1962 * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE, 1963 * and such we can drop the inner ip_ecn=CE match. 1964 */ 1965 1966 static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv, 1967 struct flow_cls_offload *f, 1968 bool *match_inner_ecn) 1969 { 1970 u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0; 1971 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 1972 struct netlink_ext_ack *extack = f->common.extack; 1973 struct flow_match_ip match; 1974 1975 *match_inner_ecn = true; 1976 1977 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { 1978 flow_rule_match_enc_ip(rule, &match); 1979 outer_ecn_key = match.key->tos & INET_ECN_MASK; 1980 outer_ecn_mask = match.mask->tos & INET_ECN_MASK; 1981 } 1982 1983 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { 1984 flow_rule_match_ip(rule, &match); 1985 inner_ecn_key = match.key->tos & INET_ECN_MASK; 1986 inner_ecn_mask = match.mask->tos & INET_ECN_MASK; 1987 } 1988 1989 if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) { 1990 NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported"); 1991 netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported"); 1992 return -EOPNOTSUPP; 1993 } 1994 1995 if (!outer_ecn_mask) { 1996 if (!inner_ecn_mask) 1997 return 0; 1998 1999 NL_SET_ERR_MSG_MOD(extack, 2000 "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported"); 2001 netdev_warn(priv->netdev, 2002 "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported"); 2003 return -EOPNOTSUPP; 2004 } 2005 2006 if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) { 2007 NL_SET_ERR_MSG_MOD(extack, 2008 "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported"); 2009 netdev_warn(priv->netdev, 2010 "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported"); 2011 return -EOPNOTSUPP; 2012 } 2013 2014 if (!inner_ecn_mask) 2015 return 0; 2016 2017 /* Both inner and outer have full mask on ecn */ 2018 2019 if (outer_ecn_key == INET_ECN_ECT_1) { 2020 /* inner ecn might change by DECAP action */ 2021 2022 NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported"); 2023 netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported"); 2024 return -EOPNOTSUPP; 2025 } 2026 2027 if (outer_ecn_key != INET_ECN_CE) 2028 return 0; 2029 2030 if (inner_ecn_key != INET_ECN_CE) { 2031 /* Can't happen in software, as packet ecn will be changed to CE after decap */ 2032 NL_SET_ERR_MSG_MOD(extack, 2033 "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported"); 2034 netdev_warn(priv->netdev, 2035 "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported"); 2036 return -EOPNOTSUPP; 2037 } 2038 2039 /* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase, 2040 * drop match on inner ecn 2041 */ 2042 *match_inner_ecn = false; 2043 2044 return 0; 2045 } 2046 2047 static int parse_tunnel_attr(struct mlx5e_priv *priv, 2048 struct mlx5e_tc_flow *flow, 2049 struct mlx5_flow_spec *spec, 2050 struct flow_cls_offload *f, 2051 struct net_device *filter_dev, 2052 u8 *match_level, 2053 bool *match_inner) 2054 { 2055 struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev); 2056 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 2057 struct netlink_ext_ack *extack = f->common.extack; 2058 bool needs_mapping, sets_mapping; 2059 int err; 2060 2061 if (!mlx5e_is_eswitch_flow(flow)) { 2062 NL_SET_ERR_MSG_MOD(extack, "Match on tunnel is not supported"); 2063 return -EOPNOTSUPP; 2064 } 2065 2066 needs_mapping = !!flow->attr->chain; 2067 sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f); 2068 *match_inner = !needs_mapping; 2069 2070 if ((needs_mapping || sets_mapping) && 2071 !mlx5_eswitch_reg_c1_loopback_enabled(esw)) { 2072 NL_SET_ERR_MSG_MOD(extack, 2073 "Chains on tunnel devices isn't supported without register loopback support"); 2074 netdev_warn(priv->netdev, 2075 "Chains on tunnel devices isn't supported without register loopback support"); 2076 return -EOPNOTSUPP; 2077 } 2078 2079 if (!flow->attr->chain) { 2080 err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, 2081 match_level); 2082 if (err) { 2083 NL_SET_ERR_MSG_MOD(extack, 2084 "Failed to parse tunnel attributes"); 2085 netdev_warn(priv->netdev, 2086 "Failed to parse tunnel attributes"); 2087 return err; 2088 } 2089 2090 /* With mpls over udp we decapsulate using packet reformat 2091 * object 2092 */ 2093 if (!netif_is_bareudp(filter_dev)) 2094 flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; 2095 err = mlx5e_tc_set_attr_rx_tun(flow, spec); 2096 if (err) 2097 return err; 2098 } else if (tunnel && tunnel->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { 2099 struct mlx5_flow_spec *tmp_spec; 2100 2101 tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL); 2102 if (!tmp_spec) { 2103 NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for vxlan tmp spec"); 2104 netdev_warn(priv->netdev, "Failed to allocate memory for vxlan tmp spec"); 2105 return -ENOMEM; 2106 } 2107 memcpy(tmp_spec, spec, sizeof(*tmp_spec)); 2108 2109 err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level); 2110 if (err) { 2111 kvfree(tmp_spec); 2112 NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes"); 2113 netdev_warn(priv->netdev, "Failed to parse tunnel attributes"); 2114 return err; 2115 } 2116 err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec); 2117 kvfree(tmp_spec); 2118 if (err) 2119 return err; 2120 } 2121 2122 if (!needs_mapping && !sets_mapping) 2123 return 0; 2124 2125 return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev); 2126 } 2127 2128 static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec) 2129 { 2130 return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 2131 inner_headers); 2132 } 2133 2134 static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec) 2135 { 2136 return MLX5_ADDR_OF(fte_match_param, spec->match_value, 2137 inner_headers); 2138 } 2139 2140 static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec) 2141 { 2142 return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 2143 outer_headers); 2144 } 2145 2146 static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec) 2147 { 2148 return MLX5_ADDR_OF(fte_match_param, spec->match_value, 2149 outer_headers); 2150 } 2151 2152 void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec) 2153 { 2154 return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? 2155 get_match_inner_headers_value(spec) : 2156 get_match_outer_headers_value(spec); 2157 } 2158 2159 void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec) 2160 { 2161 return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? 2162 get_match_inner_headers_criteria(spec) : 2163 get_match_outer_headers_criteria(spec); 2164 } 2165 2166 static int mlx5e_flower_parse_meta(struct net_device *filter_dev, 2167 struct flow_cls_offload *f) 2168 { 2169 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 2170 struct netlink_ext_ack *extack = f->common.extack; 2171 struct net_device *ingress_dev; 2172 struct flow_match_meta match; 2173 2174 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) 2175 return 0; 2176 2177 flow_rule_match_meta(rule, &match); 2178 if (!match.mask->ingress_ifindex) 2179 return 0; 2180 2181 if (match.mask->ingress_ifindex != 0xFFFFFFFF) { 2182 NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask"); 2183 return -EOPNOTSUPP; 2184 } 2185 2186 ingress_dev = __dev_get_by_index(dev_net(filter_dev), 2187 match.key->ingress_ifindex); 2188 if (!ingress_dev) { 2189 NL_SET_ERR_MSG_MOD(extack, 2190 "Can't find the ingress port to match on"); 2191 return -ENOENT; 2192 } 2193 2194 if (ingress_dev != filter_dev) { 2195 NL_SET_ERR_MSG_MOD(extack, 2196 "Can't match on the ingress filter port"); 2197 return -EOPNOTSUPP; 2198 } 2199 2200 return 0; 2201 } 2202 2203 static bool skip_key_basic(struct net_device *filter_dev, 2204 struct flow_cls_offload *f) 2205 { 2206 /* When doing mpls over udp decap, the user needs to provide 2207 * MPLS_UC as the protocol in order to be able to match on mpls 2208 * label fields. However, the actual ethertype is IP so we want to 2209 * avoid matching on this, otherwise we'll fail the match. 2210 */ 2211 if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0) 2212 return true; 2213 2214 return false; 2215 } 2216 2217 static int __parse_cls_flower(struct mlx5e_priv *priv, 2218 struct mlx5e_tc_flow *flow, 2219 struct mlx5_flow_spec *spec, 2220 struct flow_cls_offload *f, 2221 struct net_device *filter_dev, 2222 u8 *inner_match_level, u8 *outer_match_level) 2223 { 2224 struct netlink_ext_ack *extack = f->common.extack; 2225 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 2226 outer_headers); 2227 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, 2228 outer_headers); 2229 void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 2230 misc_parameters); 2231 void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, 2232 misc_parameters); 2233 void *misc_c_3 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 2234 misc_parameters_3); 2235 void *misc_v_3 = MLX5_ADDR_OF(fte_match_param, spec->match_value, 2236 misc_parameters_3); 2237 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 2238 struct flow_dissector *dissector = rule->match.dissector; 2239 enum fs_flow_table_type fs_type; 2240 bool match_inner_ecn = true; 2241 u16 addr_type = 0; 2242 u8 ip_proto = 0; 2243 u8 *match_level; 2244 int err; 2245 2246 fs_type = mlx5e_is_eswitch_flow(flow) ? FS_FT_FDB : FS_FT_NIC_RX; 2247 match_level = outer_match_level; 2248 2249 if (dissector->used_keys & 2250 ~(BIT(FLOW_DISSECTOR_KEY_META) | 2251 BIT(FLOW_DISSECTOR_KEY_CONTROL) | 2252 BIT(FLOW_DISSECTOR_KEY_BASIC) | 2253 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | 2254 BIT(FLOW_DISSECTOR_KEY_VLAN) | 2255 BIT(FLOW_DISSECTOR_KEY_CVLAN) | 2256 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | 2257 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | 2258 BIT(FLOW_DISSECTOR_KEY_PORTS) | 2259 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | 2260 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | 2261 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | 2262 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | 2263 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | 2264 BIT(FLOW_DISSECTOR_KEY_TCP) | 2265 BIT(FLOW_DISSECTOR_KEY_IP) | 2266 BIT(FLOW_DISSECTOR_KEY_CT) | 2267 BIT(FLOW_DISSECTOR_KEY_ENC_IP) | 2268 BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | 2269 BIT(FLOW_DISSECTOR_KEY_ICMP) | 2270 BIT(FLOW_DISSECTOR_KEY_MPLS))) { 2271 NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); 2272 netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n", 2273 dissector->used_keys); 2274 return -EOPNOTSUPP; 2275 } 2276 2277 if (mlx5e_get_tc_tun(filter_dev)) { 2278 bool match_inner = false; 2279 2280 err = parse_tunnel_attr(priv, flow, spec, f, filter_dev, 2281 outer_match_level, &match_inner); 2282 if (err) 2283 return err; 2284 2285 if (match_inner) { 2286 /* header pointers should point to the inner headers 2287 * if the packet was decapsulated already. 2288 * outer headers are set by parse_tunnel_attr. 2289 */ 2290 match_level = inner_match_level; 2291 headers_c = get_match_inner_headers_criteria(spec); 2292 headers_v = get_match_inner_headers_value(spec); 2293 } 2294 2295 err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn); 2296 if (err) 2297 return err; 2298 } 2299 2300 err = mlx5e_flower_parse_meta(filter_dev, f); 2301 if (err) 2302 return err; 2303 2304 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) && 2305 !skip_key_basic(filter_dev, f)) { 2306 struct flow_match_basic match; 2307 2308 flow_rule_match_basic(rule, &match); 2309 mlx5e_tc_set_ethertype(priv->mdev, &match, 2310 match_level == outer_match_level, 2311 headers_c, headers_v); 2312 2313 if (match.mask->n_proto) 2314 *match_level = MLX5_MATCH_L2; 2315 } 2316 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) || 2317 is_vlan_dev(filter_dev)) { 2318 struct flow_dissector_key_vlan filter_dev_mask; 2319 struct flow_dissector_key_vlan filter_dev_key; 2320 struct flow_match_vlan match; 2321 2322 if (is_vlan_dev(filter_dev)) { 2323 match.key = &filter_dev_key; 2324 match.key->vlan_id = vlan_dev_vlan_id(filter_dev); 2325 match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev); 2326 match.key->vlan_priority = 0; 2327 match.mask = &filter_dev_mask; 2328 memset(match.mask, 0xff, sizeof(*match.mask)); 2329 match.mask->vlan_priority = 0; 2330 } else { 2331 flow_rule_match_vlan(rule, &match); 2332 } 2333 if (match.mask->vlan_id || 2334 match.mask->vlan_priority || 2335 match.mask->vlan_tpid) { 2336 if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { 2337 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2338 svlan_tag, 1); 2339 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2340 svlan_tag, 1); 2341 } else { 2342 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2343 cvlan_tag, 1); 2344 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2345 cvlan_tag, 1); 2346 } 2347 2348 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, 2349 match.mask->vlan_id); 2350 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, 2351 match.key->vlan_id); 2352 2353 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, 2354 match.mask->vlan_priority); 2355 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, 2356 match.key->vlan_priority); 2357 2358 *match_level = MLX5_MATCH_L2; 2359 } 2360 } else if (*match_level != MLX5_MATCH_NONE) { 2361 /* cvlan_tag enabled in match criteria and 2362 * disabled in match value means both S & C tags 2363 * don't exist (untagged of both) 2364 */ 2365 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); 2366 *match_level = MLX5_MATCH_L2; 2367 } 2368 2369 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { 2370 struct flow_match_vlan match; 2371 2372 flow_rule_match_cvlan(rule, &match); 2373 if (match.mask->vlan_id || 2374 match.mask->vlan_priority || 2375 match.mask->vlan_tpid) { 2376 if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ft_field_support.outer_second_vid, 2377 fs_type)) { 2378 NL_SET_ERR_MSG_MOD(extack, 2379 "Matching on CVLAN is not supported"); 2380 return -EOPNOTSUPP; 2381 } 2382 2383 if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { 2384 MLX5_SET(fte_match_set_misc, misc_c, 2385 outer_second_svlan_tag, 1); 2386 MLX5_SET(fte_match_set_misc, misc_v, 2387 outer_second_svlan_tag, 1); 2388 } else { 2389 MLX5_SET(fte_match_set_misc, misc_c, 2390 outer_second_cvlan_tag, 1); 2391 MLX5_SET(fte_match_set_misc, misc_v, 2392 outer_second_cvlan_tag, 1); 2393 } 2394 2395 MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid, 2396 match.mask->vlan_id); 2397 MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid, 2398 match.key->vlan_id); 2399 MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio, 2400 match.mask->vlan_priority); 2401 MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio, 2402 match.key->vlan_priority); 2403 2404 *match_level = MLX5_MATCH_L2; 2405 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; 2406 } 2407 } 2408 2409 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { 2410 struct flow_match_eth_addrs match; 2411 2412 flow_rule_match_eth_addrs(rule, &match); 2413 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2414 dmac_47_16), 2415 match.mask->dst); 2416 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2417 dmac_47_16), 2418 match.key->dst); 2419 2420 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2421 smac_47_16), 2422 match.mask->src); 2423 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2424 smac_47_16), 2425 match.key->src); 2426 2427 if (!is_zero_ether_addr(match.mask->src) || 2428 !is_zero_ether_addr(match.mask->dst)) 2429 *match_level = MLX5_MATCH_L2; 2430 } 2431 2432 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { 2433 struct flow_match_control match; 2434 2435 flow_rule_match_control(rule, &match); 2436 addr_type = match.key->addr_type; 2437 2438 /* the HW doesn't support frag first/later */ 2439 if (match.mask->flags & FLOW_DIS_FIRST_FRAG) { 2440 NL_SET_ERR_MSG_MOD(extack, "Match on frag first/later is not supported"); 2441 return -EOPNOTSUPP; 2442 } 2443 2444 if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) { 2445 MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); 2446 MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 2447 match.key->flags & FLOW_DIS_IS_FRAGMENT); 2448 2449 /* the HW doesn't need L3 inline to match on frag=no */ 2450 if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT)) 2451 *match_level = MLX5_MATCH_L2; 2452 /* *** L2 attributes parsing up to here *** */ 2453 else 2454 *match_level = MLX5_MATCH_L3; 2455 } 2456 } 2457 2458 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { 2459 struct flow_match_basic match; 2460 2461 flow_rule_match_basic(rule, &match); 2462 ip_proto = match.key->ip_proto; 2463 2464 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 2465 match.mask->ip_proto); 2466 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, 2467 match.key->ip_proto); 2468 2469 if (match.mask->ip_proto) 2470 *match_level = MLX5_MATCH_L3; 2471 } 2472 2473 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 2474 struct flow_match_ipv4_addrs match; 2475 2476 flow_rule_match_ipv4_addrs(rule, &match); 2477 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2478 src_ipv4_src_ipv6.ipv4_layout.ipv4), 2479 &match.mask->src, sizeof(match.mask->src)); 2480 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2481 src_ipv4_src_ipv6.ipv4_layout.ipv4), 2482 &match.key->src, sizeof(match.key->src)); 2483 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2484 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 2485 &match.mask->dst, sizeof(match.mask->dst)); 2486 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2487 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 2488 &match.key->dst, sizeof(match.key->dst)); 2489 2490 if (match.mask->src || match.mask->dst) 2491 *match_level = MLX5_MATCH_L3; 2492 } 2493 2494 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 2495 struct flow_match_ipv6_addrs match; 2496 2497 flow_rule_match_ipv6_addrs(rule, &match); 2498 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2499 src_ipv4_src_ipv6.ipv6_layout.ipv6), 2500 &match.mask->src, sizeof(match.mask->src)); 2501 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2502 src_ipv4_src_ipv6.ipv6_layout.ipv6), 2503 &match.key->src, sizeof(match.key->src)); 2504 2505 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2506 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 2507 &match.mask->dst, sizeof(match.mask->dst)); 2508 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2509 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 2510 &match.key->dst, sizeof(match.key->dst)); 2511 2512 if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY || 2513 ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY) 2514 *match_level = MLX5_MATCH_L3; 2515 } 2516 2517 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { 2518 struct flow_match_ip match; 2519 2520 flow_rule_match_ip(rule, &match); 2521 if (match_inner_ecn) { 2522 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, 2523 match.mask->tos & 0x3); 2524 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, 2525 match.key->tos & 0x3); 2526 } 2527 2528 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, 2529 match.mask->tos >> 2); 2530 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, 2531 match.key->tos >> 2); 2532 2533 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, 2534 match.mask->ttl); 2535 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, 2536 match.key->ttl); 2537 2538 if (match.mask->ttl && 2539 !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, 2540 ft_field_support.outer_ipv4_ttl)) { 2541 NL_SET_ERR_MSG_MOD(extack, 2542 "Matching on TTL is not supported"); 2543 return -EOPNOTSUPP; 2544 } 2545 2546 if (match.mask->tos || match.mask->ttl) 2547 *match_level = MLX5_MATCH_L3; 2548 } 2549 2550 /* *** L3 attributes parsing up to here *** */ 2551 2552 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { 2553 struct flow_match_ports match; 2554 2555 flow_rule_match_ports(rule, &match); 2556 switch (ip_proto) { 2557 case IPPROTO_TCP: 2558 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2559 tcp_sport, ntohs(match.mask->src)); 2560 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2561 tcp_sport, ntohs(match.key->src)); 2562 2563 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2564 tcp_dport, ntohs(match.mask->dst)); 2565 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2566 tcp_dport, ntohs(match.key->dst)); 2567 break; 2568 2569 case IPPROTO_UDP: 2570 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2571 udp_sport, ntohs(match.mask->src)); 2572 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2573 udp_sport, ntohs(match.key->src)); 2574 2575 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2576 udp_dport, ntohs(match.mask->dst)); 2577 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2578 udp_dport, ntohs(match.key->dst)); 2579 break; 2580 default: 2581 NL_SET_ERR_MSG_MOD(extack, 2582 "Only UDP and TCP transports are supported for L4 matching"); 2583 netdev_err(priv->netdev, 2584 "Only UDP and TCP transport are supported\n"); 2585 return -EINVAL; 2586 } 2587 2588 if (match.mask->src || match.mask->dst) 2589 *match_level = MLX5_MATCH_L4; 2590 } 2591 2592 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { 2593 struct flow_match_tcp match; 2594 2595 flow_rule_match_tcp(rule, &match); 2596 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, 2597 ntohs(match.mask->flags)); 2598 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, 2599 ntohs(match.key->flags)); 2600 2601 if (match.mask->flags) 2602 *match_level = MLX5_MATCH_L4; 2603 } 2604 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) { 2605 struct flow_match_icmp match; 2606 2607 flow_rule_match_icmp(rule, &match); 2608 switch (ip_proto) { 2609 case IPPROTO_ICMP: 2610 if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & 2611 MLX5_FLEX_PROTO_ICMP)) { 2612 NL_SET_ERR_MSG_MOD(extack, 2613 "Match on Flex protocols for ICMP is not supported"); 2614 return -EOPNOTSUPP; 2615 } 2616 MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type, 2617 match.mask->type); 2618 MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type, 2619 match.key->type); 2620 MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_code, 2621 match.mask->code); 2622 MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_code, 2623 match.key->code); 2624 break; 2625 case IPPROTO_ICMPV6: 2626 if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & 2627 MLX5_FLEX_PROTO_ICMPV6)) { 2628 NL_SET_ERR_MSG_MOD(extack, 2629 "Match on Flex protocols for ICMPV6 is not supported"); 2630 return -EOPNOTSUPP; 2631 } 2632 MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type, 2633 match.mask->type); 2634 MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type, 2635 match.key->type); 2636 MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_code, 2637 match.mask->code); 2638 MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_code, 2639 match.key->code); 2640 break; 2641 default: 2642 NL_SET_ERR_MSG_MOD(extack, 2643 "Code and type matching only with ICMP and ICMPv6"); 2644 netdev_err(priv->netdev, 2645 "Code and type matching only with ICMP and ICMPv6\n"); 2646 return -EINVAL; 2647 } 2648 if (match.mask->code || match.mask->type) { 2649 *match_level = MLX5_MATCH_L4; 2650 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3; 2651 } 2652 } 2653 /* Currently supported only for MPLS over UDP */ 2654 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) && 2655 !netif_is_bareudp(filter_dev)) { 2656 NL_SET_ERR_MSG_MOD(extack, 2657 "Matching on MPLS is supported only for MPLS over UDP"); 2658 netdev_err(priv->netdev, 2659 "Matching on MPLS is supported only for MPLS over UDP\n"); 2660 return -EOPNOTSUPP; 2661 } 2662 2663 return 0; 2664 } 2665 2666 static int parse_cls_flower(struct mlx5e_priv *priv, 2667 struct mlx5e_tc_flow *flow, 2668 struct mlx5_flow_spec *spec, 2669 struct flow_cls_offload *f, 2670 struct net_device *filter_dev) 2671 { 2672 u8 inner_match_level, outer_match_level, non_tunnel_match_level; 2673 struct netlink_ext_ack *extack = f->common.extack; 2674 struct mlx5_core_dev *dev = priv->mdev; 2675 struct mlx5_eswitch *esw = dev->priv.eswitch; 2676 struct mlx5e_rep_priv *rpriv = priv->ppriv; 2677 struct mlx5_eswitch_rep *rep; 2678 bool is_eswitch_flow; 2679 int err; 2680 2681 inner_match_level = MLX5_MATCH_NONE; 2682 outer_match_level = MLX5_MATCH_NONE; 2683 2684 err = __parse_cls_flower(priv, flow, spec, f, filter_dev, 2685 &inner_match_level, &outer_match_level); 2686 non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ? 2687 outer_match_level : inner_match_level; 2688 2689 is_eswitch_flow = mlx5e_is_eswitch_flow(flow); 2690 if (!err && is_eswitch_flow) { 2691 rep = rpriv->rep; 2692 if (rep->vport != MLX5_VPORT_UPLINK && 2693 (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && 2694 esw->offloads.inline_mode < non_tunnel_match_level)) { 2695 NL_SET_ERR_MSG_MOD(extack, 2696 "Flow is not offloaded due to min inline setting"); 2697 netdev_warn(priv->netdev, 2698 "Flow is not offloaded due to min inline setting, required %d actual %d\n", 2699 non_tunnel_match_level, esw->offloads.inline_mode); 2700 return -EOPNOTSUPP; 2701 } 2702 } 2703 2704 flow->attr->inner_match_level = inner_match_level; 2705 flow->attr->outer_match_level = outer_match_level; 2706 2707 2708 return err; 2709 } 2710 2711 struct mlx5_fields { 2712 u8 field; 2713 u8 field_bsize; 2714 u32 field_mask; 2715 u32 offset; 2716 u32 match_offset; 2717 }; 2718 2719 #define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \ 2720 {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \ 2721 offsetof(struct pedit_headers, field) + (off), \ 2722 MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)} 2723 2724 /* masked values are the same and there are no rewrites that do not have a 2725 * match. 2726 */ 2727 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \ 2728 type matchmaskx = *(type *)(matchmaskp); \ 2729 type matchvalx = *(type *)(matchvalp); \ 2730 type maskx = *(type *)(maskp); \ 2731 type valx = *(type *)(valp); \ 2732 \ 2733 (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \ 2734 matchmaskx)); \ 2735 }) 2736 2737 static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp, 2738 void *matchmaskp, u8 bsize) 2739 { 2740 bool same = false; 2741 2742 switch (bsize) { 2743 case 8: 2744 same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp); 2745 break; 2746 case 16: 2747 same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp); 2748 break; 2749 case 32: 2750 same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp); 2751 break; 2752 } 2753 2754 return same; 2755 } 2756 2757 static struct mlx5_fields fields[] = { 2758 OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16), 2759 OFFLOAD(DMAC_15_0, 16, U16_MAX, eth.h_dest[4], 0, dmac_15_0), 2760 OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16), 2761 OFFLOAD(SMAC_15_0, 16, U16_MAX, eth.h_source[4], 0, smac_15_0), 2762 OFFLOAD(ETHERTYPE, 16, U16_MAX, eth.h_proto, 0, ethertype), 2763 OFFLOAD(FIRST_VID, 16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid), 2764 2765 OFFLOAD(IP_DSCP, 8, 0xfc, ip4.tos, 0, ip_dscp), 2766 OFFLOAD(IP_TTL, 8, U8_MAX, ip4.ttl, 0, ttl_hoplimit), 2767 OFFLOAD(SIPV4, 32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4), 2768 OFFLOAD(DIPV4, 32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 2769 2770 OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0, 2771 src_ipv4_src_ipv6.ipv6_layout.ipv6[0]), 2772 OFFLOAD(SIPV6_95_64, 32, U32_MAX, ip6.saddr.s6_addr32[1], 0, 2773 src_ipv4_src_ipv6.ipv6_layout.ipv6[4]), 2774 OFFLOAD(SIPV6_63_32, 32, U32_MAX, ip6.saddr.s6_addr32[2], 0, 2775 src_ipv4_src_ipv6.ipv6_layout.ipv6[8]), 2776 OFFLOAD(SIPV6_31_0, 32, U32_MAX, ip6.saddr.s6_addr32[3], 0, 2777 src_ipv4_src_ipv6.ipv6_layout.ipv6[12]), 2778 OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0, 2779 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]), 2780 OFFLOAD(DIPV6_95_64, 32, U32_MAX, ip6.daddr.s6_addr32[1], 0, 2781 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]), 2782 OFFLOAD(DIPV6_63_32, 32, U32_MAX, ip6.daddr.s6_addr32[2], 0, 2783 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]), 2784 OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0, 2785 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]), 2786 OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit), 2787 OFFLOAD(IP_DSCP, 16, 0xc00f, ip6, 0, ip_dscp), 2788 2789 OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport), 2790 OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport), 2791 /* in linux iphdr tcp_flags is 8 bits long */ 2792 OFFLOAD(TCP_FLAGS, 8, U8_MAX, tcp.ack_seq, 5, tcp_flags), 2793 2794 OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport), 2795 OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport), 2796 }; 2797 2798 static unsigned long mask_to_le(unsigned long mask, int size) 2799 { 2800 __be32 mask_be32; 2801 __be16 mask_be16; 2802 2803 if (size == 32) { 2804 mask_be32 = (__force __be32)(mask); 2805 mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32)); 2806 } else if (size == 16) { 2807 mask_be32 = (__force __be32)(mask); 2808 mask_be16 = *(__be16 *)&mask_be32; 2809 mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16)); 2810 } 2811 2812 return mask; 2813 } 2814 static int offload_pedit_fields(struct mlx5e_priv *priv, 2815 int namespace, 2816 struct pedit_headers_action *hdrs, 2817 struct mlx5e_tc_flow_parse_attr *parse_attr, 2818 u32 *action_flags, 2819 struct netlink_ext_ack *extack) 2820 { 2821 struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; 2822 void *headers_c, *headers_v, *action, *vals_p; 2823 u32 *s_masks_p, *a_masks_p, s_mask, a_mask; 2824 struct mlx5e_tc_mod_hdr_acts *mod_acts; 2825 unsigned long mask, field_mask; 2826 int i, first, last, next_z; 2827 struct mlx5_fields *f; 2828 u8 cmd; 2829 2830 mod_acts = &parse_attr->mod_hdr_acts; 2831 headers_c = mlx5e_get_match_headers_criteria(*action_flags, &parse_attr->spec); 2832 headers_v = mlx5e_get_match_headers_value(*action_flags, &parse_attr->spec); 2833 2834 set_masks = &hdrs[0].masks; 2835 add_masks = &hdrs[1].masks; 2836 set_vals = &hdrs[0].vals; 2837 add_vals = &hdrs[1].vals; 2838 2839 for (i = 0; i < ARRAY_SIZE(fields); i++) { 2840 bool skip; 2841 2842 f = &fields[i]; 2843 /* avoid seeing bits set from previous iterations */ 2844 s_mask = 0; 2845 a_mask = 0; 2846 2847 s_masks_p = (void *)set_masks + f->offset; 2848 a_masks_p = (void *)add_masks + f->offset; 2849 2850 s_mask = *s_masks_p & f->field_mask; 2851 a_mask = *a_masks_p & f->field_mask; 2852 2853 if (!s_mask && !a_mask) /* nothing to offload here */ 2854 continue; 2855 2856 if (s_mask && a_mask) { 2857 NL_SET_ERR_MSG_MOD(extack, 2858 "can't set and add to the same HW field"); 2859 netdev_warn(priv->netdev, 2860 "mlx5: can't set and add to the same HW field (%x)\n", 2861 f->field); 2862 return -EOPNOTSUPP; 2863 } 2864 2865 skip = false; 2866 if (s_mask) { 2867 void *match_mask = headers_c + f->match_offset; 2868 void *match_val = headers_v + f->match_offset; 2869 2870 cmd = MLX5_ACTION_TYPE_SET; 2871 mask = s_mask; 2872 vals_p = (void *)set_vals + f->offset; 2873 /* don't rewrite if we have a match on the same value */ 2874 if (cmp_val_mask(vals_p, s_masks_p, match_val, 2875 match_mask, f->field_bsize)) 2876 skip = true; 2877 /* clear to denote we consumed this field */ 2878 *s_masks_p &= ~f->field_mask; 2879 } else { 2880 cmd = MLX5_ACTION_TYPE_ADD; 2881 mask = a_mask; 2882 vals_p = (void *)add_vals + f->offset; 2883 /* add 0 is no change */ 2884 if ((*(u32 *)vals_p & f->field_mask) == 0) 2885 skip = true; 2886 /* clear to denote we consumed this field */ 2887 *a_masks_p &= ~f->field_mask; 2888 } 2889 if (skip) 2890 continue; 2891 2892 mask = mask_to_le(mask, f->field_bsize); 2893 2894 first = find_first_bit(&mask, f->field_bsize); 2895 next_z = find_next_zero_bit(&mask, f->field_bsize, first); 2896 last = find_last_bit(&mask, f->field_bsize); 2897 if (first < next_z && next_z < last) { 2898 NL_SET_ERR_MSG_MOD(extack, 2899 "rewrite of few sub-fields isn't supported"); 2900 netdev_warn(priv->netdev, 2901 "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n", 2902 mask); 2903 return -EOPNOTSUPP; 2904 } 2905 2906 action = mlx5e_mod_hdr_alloc(priv->mdev, namespace, mod_acts); 2907 if (IS_ERR(action)) { 2908 NL_SET_ERR_MSG_MOD(extack, 2909 "too many pedit actions, can't offload"); 2910 mlx5_core_warn(priv->mdev, 2911 "mlx5: parsed %d pedit actions, can't do more\n", 2912 mod_acts->num_actions); 2913 return PTR_ERR(action); 2914 } 2915 2916 MLX5_SET(set_action_in, action, action_type, cmd); 2917 MLX5_SET(set_action_in, action, field, f->field); 2918 2919 if (cmd == MLX5_ACTION_TYPE_SET) { 2920 int start; 2921 2922 field_mask = mask_to_le(f->field_mask, f->field_bsize); 2923 2924 /* if field is bit sized it can start not from first bit */ 2925 start = find_first_bit(&field_mask, f->field_bsize); 2926 2927 MLX5_SET(set_action_in, action, offset, first - start); 2928 /* length is num of bits to be written, zero means length of 32 */ 2929 MLX5_SET(set_action_in, action, length, (last - first + 1)); 2930 } 2931 2932 if (f->field_bsize == 32) 2933 MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first); 2934 else if (f->field_bsize == 16) 2935 MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first); 2936 else if (f->field_bsize == 8) 2937 MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first); 2938 2939 ++mod_acts->num_actions; 2940 } 2941 2942 return 0; 2943 } 2944 2945 static const struct pedit_headers zero_masks = {}; 2946 2947 static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, 2948 struct mlx5e_tc_flow_parse_attr *parse_attr, 2949 struct pedit_headers_action *hdrs, 2950 u32 *action_flags, 2951 struct netlink_ext_ack *extack) 2952 { 2953 struct pedit_headers *cmd_masks; 2954 int err; 2955 u8 cmd; 2956 2957 err = offload_pedit_fields(priv, namespace, hdrs, parse_attr, 2958 action_flags, extack); 2959 if (err < 0) 2960 goto out_dealloc_parsed_actions; 2961 2962 for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) { 2963 cmd_masks = &hdrs[cmd].masks; 2964 if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) { 2965 NL_SET_ERR_MSG_MOD(extack, 2966 "attempt to offload an unsupported field"); 2967 netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd); 2968 print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS, 2969 16, 1, cmd_masks, sizeof(zero_masks), true); 2970 err = -EOPNOTSUPP; 2971 goto out_dealloc_parsed_actions; 2972 } 2973 } 2974 2975 return 0; 2976 2977 out_dealloc_parsed_actions: 2978 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); 2979 return err; 2980 } 2981 2982 struct ip_ttl_word { 2983 __u8 ttl; 2984 __u8 protocol; 2985 __sum16 check; 2986 }; 2987 2988 struct ipv6_hoplimit_word { 2989 __be16 payload_len; 2990 __u8 nexthdr; 2991 __u8 hop_limit; 2992 }; 2993 2994 static bool 2995 is_action_keys_supported(const struct flow_action_entry *act, bool ct_flow, 2996 bool *modify_ip_header, bool *modify_tuple, 2997 struct netlink_ext_ack *extack) 2998 { 2999 u32 mask, offset; 3000 u8 htype; 3001 3002 htype = act->mangle.htype; 3003 offset = act->mangle.offset; 3004 mask = ~act->mangle.mask; 3005 /* For IPv4 & IPv6 header check 4 byte word, 3006 * to determine that modified fields 3007 * are NOT ttl & hop_limit only. 3008 */ 3009 if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) { 3010 struct ip_ttl_word *ttl_word = 3011 (struct ip_ttl_word *)&mask; 3012 3013 if (offset != offsetof(struct iphdr, ttl) || 3014 ttl_word->protocol || 3015 ttl_word->check) { 3016 *modify_ip_header = true; 3017 } 3018 3019 if (offset >= offsetof(struct iphdr, saddr)) 3020 *modify_tuple = true; 3021 3022 if (ct_flow && *modify_tuple) { 3023 NL_SET_ERR_MSG_MOD(extack, 3024 "can't offload re-write of ipv4 address with action ct"); 3025 return false; 3026 } 3027 } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) { 3028 struct ipv6_hoplimit_word *hoplimit_word = 3029 (struct ipv6_hoplimit_word *)&mask; 3030 3031 if (offset != offsetof(struct ipv6hdr, payload_len) || 3032 hoplimit_word->payload_len || 3033 hoplimit_word->nexthdr) { 3034 *modify_ip_header = true; 3035 } 3036 3037 if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr)) 3038 *modify_tuple = true; 3039 3040 if (ct_flow && *modify_tuple) { 3041 NL_SET_ERR_MSG_MOD(extack, 3042 "can't offload re-write of ipv6 address with action ct"); 3043 return false; 3044 } 3045 } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP || 3046 htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP) { 3047 *modify_tuple = true; 3048 if (ct_flow) { 3049 NL_SET_ERR_MSG_MOD(extack, 3050 "can't offload re-write of transport header ports with action ct"); 3051 return false; 3052 } 3053 } 3054 3055 return true; 3056 } 3057 3058 static bool modify_tuple_supported(bool modify_tuple, bool ct_clear, 3059 bool ct_flow, struct netlink_ext_ack *extack, 3060 struct mlx5e_priv *priv, 3061 struct mlx5_flow_spec *spec) 3062 { 3063 if (!modify_tuple || ct_clear) 3064 return true; 3065 3066 if (ct_flow) { 3067 NL_SET_ERR_MSG_MOD(extack, 3068 "can't offload tuple modification with non-clear ct()"); 3069 netdev_info(priv->netdev, 3070 "can't offload tuple modification with non-clear ct()"); 3071 return false; 3072 } 3073 3074 /* Add ct_state=-trk match so it will be offloaded for non ct flows 3075 * (or after clear action), as otherwise, since the tuple is changed, 3076 * we can't restore ct state 3077 */ 3078 if (mlx5_tc_ct_add_no_trk_match(spec)) { 3079 NL_SET_ERR_MSG_MOD(extack, 3080 "can't offload tuple modification with ct matches and no ct(clear) action"); 3081 netdev_info(priv->netdev, 3082 "can't offload tuple modification with ct matches and no ct(clear) action"); 3083 return false; 3084 } 3085 3086 return true; 3087 } 3088 3089 static bool modify_header_match_supported(struct mlx5e_priv *priv, 3090 struct mlx5_flow_spec *spec, 3091 struct flow_action *flow_action, 3092 u32 actions, bool ct_flow, 3093 bool ct_clear, 3094 struct netlink_ext_ack *extack) 3095 { 3096 const struct flow_action_entry *act; 3097 bool modify_ip_header, modify_tuple; 3098 void *headers_c; 3099 void *headers_v; 3100 u16 ethertype; 3101 u8 ip_proto; 3102 int i; 3103 3104 headers_c = mlx5e_get_match_headers_criteria(actions, spec); 3105 headers_v = mlx5e_get_match_headers_value(actions, spec); 3106 ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); 3107 3108 /* for non-IP we only re-write MACs, so we're okay */ 3109 if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 && 3110 ethertype != ETH_P_IP && ethertype != ETH_P_IPV6) 3111 goto out_ok; 3112 3113 modify_ip_header = false; 3114 modify_tuple = false; 3115 flow_action_for_each(i, act, flow_action) { 3116 if (act->id != FLOW_ACTION_MANGLE && 3117 act->id != FLOW_ACTION_ADD) 3118 continue; 3119 3120 if (!is_action_keys_supported(act, ct_flow, 3121 &modify_ip_header, 3122 &modify_tuple, extack)) 3123 return false; 3124 } 3125 3126 if (!modify_tuple_supported(modify_tuple, ct_clear, ct_flow, extack, 3127 priv, spec)) 3128 return false; 3129 3130 ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); 3131 if (modify_ip_header && ip_proto != IPPROTO_TCP && 3132 ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) { 3133 NL_SET_ERR_MSG_MOD(extack, 3134 "can't offload re-write of non TCP/UDP"); 3135 netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n", 3136 ip_proto); 3137 return false; 3138 } 3139 3140 out_ok: 3141 return true; 3142 } 3143 3144 static bool 3145 actions_match_supported_fdb(struct mlx5e_priv *priv, 3146 struct mlx5e_tc_flow_parse_attr *parse_attr, 3147 struct mlx5e_tc_flow *flow, 3148 struct netlink_ext_ack *extack) 3149 { 3150 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; 3151 bool ct_flow, ct_clear; 3152 3153 ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; 3154 ct_flow = flow_flag_test(flow, CT) && !ct_clear; 3155 3156 if (esw_attr->split_count && ct_flow && 3157 !MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve)) { 3158 /* All registers used by ct are cleared when using 3159 * split rules. 3160 */ 3161 NL_SET_ERR_MSG_MOD(extack, "Can't offload mirroring with action ct"); 3162 return false; 3163 } 3164 3165 if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { 3166 NL_SET_ERR_MSG_MOD(extack, 3167 "current firmware doesn't support split rule for port mirroring"); 3168 netdev_warn_once(priv->netdev, 3169 "current firmware doesn't support split rule for port mirroring\n"); 3170 return false; 3171 } 3172 3173 return true; 3174 } 3175 3176 static bool 3177 actions_match_supported(struct mlx5e_priv *priv, 3178 struct flow_action *flow_action, 3179 struct mlx5e_tc_flow_parse_attr *parse_attr, 3180 struct mlx5e_tc_flow *flow, 3181 struct netlink_ext_ack *extack) 3182 { 3183 u32 actions = flow->attr->action; 3184 bool ct_flow, ct_clear; 3185 3186 ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; 3187 ct_flow = flow_flag_test(flow, CT) && !ct_clear; 3188 3189 if (!(actions & 3190 (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { 3191 NL_SET_ERR_MSG_MOD(extack, "Rule must have at least one forward/drop action"); 3192 return false; 3193 } 3194 3195 if (!(~actions & 3196 (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { 3197 NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action"); 3198 return false; 3199 } 3200 3201 if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && 3202 actions & MLX5_FLOW_CONTEXT_ACTION_DROP) { 3203 NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported"); 3204 return false; 3205 } 3206 3207 if (!(~actions & 3208 (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { 3209 NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action"); 3210 return false; 3211 } 3212 3213 if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && 3214 actions & MLX5_FLOW_CONTEXT_ACTION_DROP) { 3215 NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported"); 3216 return false; 3217 } 3218 3219 if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && 3220 !modify_header_match_supported(priv, &parse_attr->spec, flow_action, 3221 actions, ct_flow, ct_clear, extack)) 3222 return false; 3223 3224 if (mlx5e_is_eswitch_flow(flow) && 3225 !actions_match_supported_fdb(priv, parse_attr, flow, extack)) 3226 return false; 3227 3228 return true; 3229 } 3230 3231 static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) 3232 { 3233 return priv->mdev == peer_priv->mdev; 3234 } 3235 3236 bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) 3237 { 3238 struct mlx5_core_dev *fmdev, *pmdev; 3239 u64 fsystem_guid, psystem_guid; 3240 3241 fmdev = priv->mdev; 3242 pmdev = peer_priv->mdev; 3243 3244 fsystem_guid = mlx5_query_nic_system_image_guid(fmdev); 3245 psystem_guid = mlx5_query_nic_system_image_guid(pmdev); 3246 3247 return (fsystem_guid == psystem_guid); 3248 } 3249 3250 static int 3251 parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, 3252 struct flow_action *flow_action) 3253 { 3254 struct netlink_ext_ack *extack = parse_state->extack; 3255 struct mlx5e_tc_flow *flow = parse_state->flow; 3256 struct mlx5_flow_attr *attr = flow->attr; 3257 enum mlx5_flow_namespace_type ns_type; 3258 struct mlx5e_priv *priv = flow->priv; 3259 const struct flow_action_entry *act; 3260 struct mlx5e_tc_act *tc_act; 3261 int err, i; 3262 3263 ns_type = mlx5e_get_flow_namespace(flow); 3264 3265 flow_action_for_each(i, act, flow_action) { 3266 tc_act = mlx5e_tc_act_get(act->id, ns_type); 3267 if (!tc_act) { 3268 NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action"); 3269 return -EOPNOTSUPP; 3270 } 3271 3272 if (!tc_act->can_offload(parse_state, act, i)) 3273 return -EOPNOTSUPP; 3274 3275 err = tc_act->parse_action(parse_state, act, priv, attr); 3276 if (err) 3277 return err; 3278 } 3279 3280 flow_action_for_each(i, act, flow_action) { 3281 tc_act = mlx5e_tc_act_get(act->id, ns_type); 3282 if (!tc_act || !tc_act->post_parse || 3283 !tc_act->can_offload(parse_state, act, i)) 3284 continue; 3285 3286 err = tc_act->post_parse(parse_state, priv, attr); 3287 if (err) 3288 return err; 3289 } 3290 3291 return 0; 3292 } 3293 3294 static int 3295 actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv, 3296 struct mlx5e_tc_flow *flow, 3297 struct mlx5_flow_attr *attr, 3298 struct pedit_headers_action *hdrs, 3299 struct netlink_ext_ack *extack) 3300 { 3301 struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; 3302 enum mlx5_flow_namespace_type ns_type; 3303 int err; 3304 3305 if (!hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits && 3306 !hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) 3307 return 0; 3308 3309 ns_type = mlx5e_get_flow_namespace(flow); 3310 3311 err = alloc_tc_pedit_action(priv, ns_type, parse_attr, hdrs, 3312 &attr->action, extack); 3313 if (err) 3314 return err; 3315 3316 if (parse_attr->mod_hdr_acts.num_actions > 0) 3317 return 0; 3318 3319 /* In case all pedit actions are skipped, remove the MOD_HDR flag. */ 3320 attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 3321 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); 3322 3323 if (ns_type != MLX5_FLOW_NAMESPACE_FDB) 3324 return 0; 3325 3326 if (!((attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || 3327 (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))) 3328 attr->esw_attr->split_count = 0; 3329 3330 return 0; 3331 } 3332 3333 static int 3334 flow_action_supported(struct flow_action *flow_action, 3335 struct netlink_ext_ack *extack) 3336 { 3337 if (!flow_action_has_entries(flow_action)) { 3338 NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries"); 3339 return -EINVAL; 3340 } 3341 3342 if (!flow_action_hw_stats_check(flow_action, extack, 3343 FLOW_ACTION_HW_STATS_DELAYED_BIT)) { 3344 NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported"); 3345 return -EOPNOTSUPP; 3346 } 3347 3348 return 0; 3349 } 3350 3351 static int 3352 parse_tc_nic_actions(struct mlx5e_priv *priv, 3353 struct flow_action *flow_action, 3354 struct mlx5e_tc_flow *flow, 3355 struct netlink_ext_ack *extack) 3356 { 3357 struct mlx5e_tc_act_parse_state *parse_state; 3358 struct mlx5e_tc_flow_parse_attr *parse_attr; 3359 struct mlx5_flow_attr *attr = flow->attr; 3360 struct pedit_headers_action *hdrs; 3361 int err; 3362 3363 err = flow_action_supported(flow_action, extack); 3364 if (err) 3365 return err; 3366 3367 attr->nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; 3368 parse_attr = attr->parse_attr; 3369 parse_state = &parse_attr->parse_state; 3370 mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack); 3371 parse_state->ct_priv = get_ct_priv(priv); 3372 hdrs = parse_state->hdrs; 3373 3374 err = parse_tc_actions(parse_state, flow_action); 3375 if (err) 3376 return err; 3377 3378 err = actions_prepare_mod_hdr_actions(priv, flow, attr, hdrs, extack); 3379 if (err) 3380 return err; 3381 3382 if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack)) 3383 return -EOPNOTSUPP; 3384 3385 return 0; 3386 } 3387 3388 static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv, 3389 struct net_device *peer_netdev) 3390 { 3391 struct mlx5e_priv *peer_priv; 3392 3393 peer_priv = netdev_priv(peer_netdev); 3394 3395 return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) && 3396 mlx5e_eswitch_vf_rep(priv->netdev) && 3397 mlx5e_eswitch_vf_rep(peer_netdev) && 3398 mlx5e_same_hw_devs(priv, peer_priv)); 3399 } 3400 3401 static bool same_hw_reps(struct mlx5e_priv *priv, 3402 struct net_device *peer_netdev) 3403 { 3404 struct mlx5e_priv *peer_priv; 3405 3406 peer_priv = netdev_priv(peer_netdev); 3407 3408 return mlx5e_eswitch_rep(priv->netdev) && 3409 mlx5e_eswitch_rep(peer_netdev) && 3410 mlx5e_same_hw_devs(priv, peer_priv); 3411 } 3412 3413 static bool is_lag_dev(struct mlx5e_priv *priv, 3414 struct net_device *peer_netdev) 3415 { 3416 return ((mlx5_lag_is_sriov(priv->mdev) || 3417 mlx5_lag_is_multipath(priv->mdev)) && 3418 same_hw_reps(priv, peer_netdev)); 3419 } 3420 3421 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, 3422 struct net_device *out_dev) 3423 { 3424 if (is_merged_eswitch_vfs(priv, out_dev)) 3425 return true; 3426 3427 if (is_lag_dev(priv, out_dev)) 3428 return true; 3429 3430 return mlx5e_eswitch_rep(out_dev) && 3431 same_port_devs(priv, netdev_priv(out_dev)); 3432 } 3433 3434 int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv, 3435 struct mlx5_flow_attr *attr, 3436 int ifindex, 3437 enum mlx5e_tc_int_port_type type, 3438 u32 *action, 3439 int out_index) 3440 { 3441 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 3442 struct mlx5e_tc_int_port_priv *int_port_priv; 3443 struct mlx5e_tc_flow_parse_attr *parse_attr; 3444 struct mlx5e_tc_int_port *dest_int_port; 3445 int err; 3446 3447 parse_attr = attr->parse_attr; 3448 int_port_priv = mlx5e_get_int_port_priv(priv); 3449 3450 dest_int_port = mlx5e_tc_int_port_get(int_port_priv, ifindex, type); 3451 if (IS_ERR(dest_int_port)) 3452 return PTR_ERR(dest_int_port); 3453 3454 err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts, 3455 MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG, 3456 mlx5e_tc_int_port_get_metadata(dest_int_port)); 3457 if (err) { 3458 mlx5e_tc_int_port_put(int_port_priv, dest_int_port); 3459 return err; 3460 } 3461 3462 *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 3463 3464 esw_attr->dest_int_port = dest_int_port; 3465 esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE; 3466 3467 /* Forward to root fdb for matching against the new source vport */ 3468 attr->dest_chain = 0; 3469 3470 return 0; 3471 } 3472 3473 static int 3474 parse_tc_fdb_actions(struct mlx5e_priv *priv, 3475 struct flow_action *flow_action, 3476 struct mlx5e_tc_flow *flow, 3477 struct netlink_ext_ack *extack) 3478 { 3479 struct mlx5e_tc_act_parse_state *parse_state; 3480 struct mlx5e_tc_flow_parse_attr *parse_attr; 3481 struct mlx5_flow_attr *attr = flow->attr; 3482 struct mlx5_esw_flow_attr *esw_attr; 3483 struct pedit_headers_action *hdrs; 3484 int err; 3485 3486 err = flow_action_supported(flow_action, extack); 3487 if (err) 3488 return err; 3489 3490 esw_attr = attr->esw_attr; 3491 parse_attr = attr->parse_attr; 3492 parse_state = &parse_attr->parse_state; 3493 mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack); 3494 parse_state->ct_priv = get_ct_priv(priv); 3495 hdrs = parse_state->hdrs; 3496 3497 err = parse_tc_actions(parse_state, flow_action); 3498 if (err) 3499 return err; 3500 3501 /* Forward to/from internal port can only have 1 dest */ 3502 if ((netif_is_ovs_master(parse_attr->filter_dev) || esw_attr->dest_int_port) && 3503 esw_attr->out_count > 1) { 3504 NL_SET_ERR_MSG_MOD(extack, 3505 "Rules with internal port can have only one destination"); 3506 return -EOPNOTSUPP; 3507 } 3508 3509 err = actions_prepare_mod_hdr_actions(priv, flow, attr, hdrs, extack); 3510 if (err) 3511 return err; 3512 3513 if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack)) 3514 return -EOPNOTSUPP; 3515 3516 return 0; 3517 } 3518 3519 static void get_flags(int flags, unsigned long *flow_flags) 3520 { 3521 unsigned long __flow_flags = 0; 3522 3523 if (flags & MLX5_TC_FLAG(INGRESS)) 3524 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS); 3525 if (flags & MLX5_TC_FLAG(EGRESS)) 3526 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS); 3527 3528 if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) 3529 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); 3530 if (flags & MLX5_TC_FLAG(NIC_OFFLOAD)) 3531 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); 3532 if (flags & MLX5_TC_FLAG(FT_OFFLOAD)) 3533 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT); 3534 3535 *flow_flags = __flow_flags; 3536 } 3537 3538 static const struct rhashtable_params tc_ht_params = { 3539 .head_offset = offsetof(struct mlx5e_tc_flow, node), 3540 .key_offset = offsetof(struct mlx5e_tc_flow, cookie), 3541 .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie), 3542 .automatic_shrinking = true, 3543 }; 3544 3545 static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, 3546 unsigned long flags) 3547 { 3548 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 3549 struct mlx5e_rep_priv *uplink_rpriv; 3550 3551 if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) { 3552 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 3553 return &uplink_rpriv->uplink_priv.tc_ht; 3554 } else /* NIC offload */ 3555 return &priv->fs.tc.ht; 3556 } 3557 3558 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) 3559 { 3560 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; 3561 struct mlx5_flow_attr *attr = flow->attr; 3562 bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK && 3563 flow_flag_test(flow, INGRESS); 3564 bool act_is_encap = !!(attr->action & 3565 MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); 3566 bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom, 3567 MLX5_DEVCOM_ESW_OFFLOADS); 3568 3569 if (!esw_paired) 3570 return false; 3571 3572 if ((mlx5_lag_is_sriov(esw_attr->in_mdev) || 3573 mlx5_lag_is_multipath(esw_attr->in_mdev)) && 3574 (is_rep_ingress || act_is_encap)) 3575 return true; 3576 3577 return false; 3578 } 3579 3580 struct mlx5_flow_attr * 3581 mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type) 3582 { 3583 u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB) ? 3584 sizeof(struct mlx5_esw_flow_attr) : 3585 sizeof(struct mlx5_nic_flow_attr); 3586 struct mlx5_flow_attr *attr; 3587 3588 return kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL); 3589 } 3590 3591 static int 3592 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, 3593 struct flow_cls_offload *f, unsigned long flow_flags, 3594 struct mlx5e_tc_flow_parse_attr **__parse_attr, 3595 struct mlx5e_tc_flow **__flow) 3596 { 3597 struct mlx5e_tc_flow_parse_attr *parse_attr; 3598 struct mlx5_flow_attr *attr; 3599 struct mlx5e_tc_flow *flow; 3600 int err = -ENOMEM; 3601 int out_index; 3602 3603 flow = kzalloc(sizeof(*flow), GFP_KERNEL); 3604 parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); 3605 if (!parse_attr || !flow) 3606 goto err_free; 3607 3608 flow->flags = flow_flags; 3609 flow->cookie = f->cookie; 3610 flow->priv = priv; 3611 3612 attr = mlx5_alloc_flow_attr(mlx5e_get_flow_namespace(flow)); 3613 if (!attr) 3614 goto err_free; 3615 3616 flow->attr = attr; 3617 3618 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) 3619 INIT_LIST_HEAD(&flow->encaps[out_index].list); 3620 INIT_LIST_HEAD(&flow->hairpin); 3621 INIT_LIST_HEAD(&flow->l3_to_l2_reformat); 3622 refcount_set(&flow->refcnt, 1); 3623 init_completion(&flow->init_done); 3624 init_completion(&flow->del_hw_done); 3625 3626 *__flow = flow; 3627 *__parse_attr = parse_attr; 3628 3629 return 0; 3630 3631 err_free: 3632 kfree(flow); 3633 kvfree(parse_attr); 3634 return err; 3635 } 3636 3637 static void 3638 mlx5e_flow_attr_init(struct mlx5_flow_attr *attr, 3639 struct mlx5e_tc_flow_parse_attr *parse_attr, 3640 struct flow_cls_offload *f) 3641 { 3642 attr->parse_attr = parse_attr; 3643 attr->chain = f->common.chain_index; 3644 attr->prio = f->common.prio; 3645 } 3646 3647 static void 3648 mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr, 3649 struct mlx5e_priv *priv, 3650 struct mlx5e_tc_flow_parse_attr *parse_attr, 3651 struct flow_cls_offload *f, 3652 struct mlx5_eswitch_rep *in_rep, 3653 struct mlx5_core_dev *in_mdev) 3654 { 3655 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 3656 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 3657 3658 mlx5e_flow_attr_init(attr, parse_attr, f); 3659 3660 esw_attr->in_rep = in_rep; 3661 esw_attr->in_mdev = in_mdev; 3662 3663 if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) == 3664 MLX5_COUNTER_SOURCE_ESWITCH) 3665 esw_attr->counter_dev = in_mdev; 3666 else 3667 esw_attr->counter_dev = priv->mdev; 3668 } 3669 3670 static struct mlx5e_tc_flow * 3671 __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, 3672 struct flow_cls_offload *f, 3673 unsigned long flow_flags, 3674 struct net_device *filter_dev, 3675 struct mlx5_eswitch_rep *in_rep, 3676 struct mlx5_core_dev *in_mdev) 3677 { 3678 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 3679 struct netlink_ext_ack *extack = f->common.extack; 3680 struct mlx5e_tc_flow_parse_attr *parse_attr; 3681 struct mlx5e_tc_flow *flow; 3682 int attr_size, err; 3683 3684 flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); 3685 attr_size = sizeof(struct mlx5_esw_flow_attr); 3686 err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, 3687 &parse_attr, &flow); 3688 if (err) 3689 goto out; 3690 3691 parse_attr->filter_dev = filter_dev; 3692 mlx5e_flow_esw_attr_init(flow->attr, 3693 priv, parse_attr, 3694 f, in_rep, in_mdev); 3695 3696 err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, 3697 f, filter_dev); 3698 if (err) 3699 goto err_free; 3700 3701 /* actions validation depends on parsing the ct matches first */ 3702 err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f, 3703 &flow->attr->ct_attr, extack); 3704 if (err) 3705 goto err_free; 3706 3707 /* always set IP version for indirect table handling */ 3708 flow->attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true); 3709 3710 err = parse_tc_fdb_actions(priv, &rule->action, flow, extack); 3711 if (err) 3712 goto err_free; 3713 3714 err = mlx5e_tc_add_fdb_flow(priv, flow, extack); 3715 complete_all(&flow->init_done); 3716 if (err) { 3717 if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev))) 3718 goto err_free; 3719 3720 add_unready_flow(flow); 3721 } 3722 3723 return flow; 3724 3725 err_free: 3726 mlx5e_flow_put(priv, flow); 3727 out: 3728 return ERR_PTR(err); 3729 } 3730 3731 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, 3732 struct mlx5e_tc_flow *flow, 3733 unsigned long flow_flags) 3734 { 3735 struct mlx5e_priv *priv = flow->priv, *peer_priv; 3736 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw; 3737 struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; 3738 struct mlx5_devcom *devcom = priv->mdev->priv.devcom; 3739 struct mlx5e_tc_flow_parse_attr *parse_attr; 3740 struct mlx5e_rep_priv *peer_urpriv; 3741 struct mlx5e_tc_flow *peer_flow; 3742 struct mlx5_core_dev *in_mdev; 3743 int err = 0; 3744 3745 peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 3746 if (!peer_esw) 3747 return -ENODEV; 3748 3749 peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH); 3750 peer_priv = netdev_priv(peer_urpriv->netdev); 3751 3752 /* in_mdev is assigned of which the packet originated from. 3753 * So packets redirected to uplink use the same mdev of the 3754 * original flow and packets redirected from uplink use the 3755 * peer mdev. 3756 */ 3757 if (attr->in_rep->vport == MLX5_VPORT_UPLINK) 3758 in_mdev = peer_priv->mdev; 3759 else 3760 in_mdev = priv->mdev; 3761 3762 parse_attr = flow->attr->parse_attr; 3763 peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags, 3764 parse_attr->filter_dev, 3765 attr->in_rep, in_mdev); 3766 if (IS_ERR(peer_flow)) { 3767 err = PTR_ERR(peer_flow); 3768 goto out; 3769 } 3770 3771 flow->peer_flow = peer_flow; 3772 flow_flag_set(flow, DUP); 3773 mutex_lock(&esw->offloads.peer_mutex); 3774 list_add_tail(&flow->peer, &esw->offloads.peer_flows); 3775 mutex_unlock(&esw->offloads.peer_mutex); 3776 3777 out: 3778 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 3779 return err; 3780 } 3781 3782 static int 3783 mlx5e_add_fdb_flow(struct mlx5e_priv *priv, 3784 struct flow_cls_offload *f, 3785 unsigned long flow_flags, 3786 struct net_device *filter_dev, 3787 struct mlx5e_tc_flow **__flow) 3788 { 3789 struct mlx5e_rep_priv *rpriv = priv->ppriv; 3790 struct mlx5_eswitch_rep *in_rep = rpriv->rep; 3791 struct mlx5_core_dev *in_mdev = priv->mdev; 3792 struct mlx5e_tc_flow *flow; 3793 int err; 3794 3795 flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep, 3796 in_mdev); 3797 if (IS_ERR(flow)) 3798 return PTR_ERR(flow); 3799 3800 if (is_peer_flow_needed(flow)) { 3801 err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags); 3802 if (err) { 3803 mlx5e_tc_del_fdb_flow(priv, flow); 3804 goto out; 3805 } 3806 } 3807 3808 *__flow = flow; 3809 3810 return 0; 3811 3812 out: 3813 return err; 3814 } 3815 3816 static int 3817 mlx5e_add_nic_flow(struct mlx5e_priv *priv, 3818 struct flow_cls_offload *f, 3819 unsigned long flow_flags, 3820 struct net_device *filter_dev, 3821 struct mlx5e_tc_flow **__flow) 3822 { 3823 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 3824 struct netlink_ext_ack *extack = f->common.extack; 3825 struct mlx5e_tc_flow_parse_attr *parse_attr; 3826 struct mlx5e_tc_flow *flow; 3827 int attr_size, err; 3828 3829 if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) { 3830 if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common)) 3831 return -EOPNOTSUPP; 3832 } else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) { 3833 return -EOPNOTSUPP; 3834 } 3835 3836 flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); 3837 attr_size = sizeof(struct mlx5_nic_flow_attr); 3838 err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, 3839 &parse_attr, &flow); 3840 if (err) 3841 goto out; 3842 3843 parse_attr->filter_dev = filter_dev; 3844 mlx5e_flow_attr_init(flow->attr, parse_attr, f); 3845 3846 err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, 3847 f, filter_dev); 3848 if (err) 3849 goto err_free; 3850 3851 err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f, 3852 &flow->attr->ct_attr, extack); 3853 if (err) 3854 goto err_free; 3855 3856 err = parse_tc_nic_actions(priv, &rule->action, flow, extack); 3857 if (err) 3858 goto err_free; 3859 3860 err = mlx5e_tc_add_nic_flow(priv, flow, extack); 3861 if (err) 3862 goto err_free; 3863 3864 flow_flag_set(flow, OFFLOADED); 3865 *__flow = flow; 3866 3867 return 0; 3868 3869 err_free: 3870 flow_flag_set(flow, FAILED); 3871 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); 3872 mlx5e_flow_put(priv, flow); 3873 out: 3874 return err; 3875 } 3876 3877 static int 3878 mlx5e_tc_add_flow(struct mlx5e_priv *priv, 3879 struct flow_cls_offload *f, 3880 unsigned long flags, 3881 struct net_device *filter_dev, 3882 struct mlx5e_tc_flow **flow) 3883 { 3884 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 3885 unsigned long flow_flags; 3886 int err; 3887 3888 get_flags(flags, &flow_flags); 3889 3890 if (!tc_can_offload_extack(priv->netdev, f->common.extack)) 3891 return -EOPNOTSUPP; 3892 3893 if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS) 3894 err = mlx5e_add_fdb_flow(priv, f, flow_flags, 3895 filter_dev, flow); 3896 else 3897 err = mlx5e_add_nic_flow(priv, f, flow_flags, 3898 filter_dev, flow); 3899 3900 return err; 3901 } 3902 3903 static bool is_flow_rule_duplicate_allowed(struct net_device *dev, 3904 struct mlx5e_rep_priv *rpriv) 3905 { 3906 /* Offloaded flow rule is allowed to duplicate on non-uplink representor 3907 * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this 3908 * function is called from NIC mode. 3909 */ 3910 return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK; 3911 } 3912 3913 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, 3914 struct flow_cls_offload *f, unsigned long flags) 3915 { 3916 struct netlink_ext_ack *extack = f->common.extack; 3917 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 3918 struct mlx5e_rep_priv *rpriv = priv->ppriv; 3919 struct mlx5e_tc_flow *flow; 3920 int err = 0; 3921 3922 if (!mlx5_esw_hold(priv->mdev)) 3923 return -EAGAIN; 3924 3925 mlx5_esw_get(priv->mdev); 3926 3927 rcu_read_lock(); 3928 flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); 3929 if (flow) { 3930 /* Same flow rule offloaded to non-uplink representor sharing tc block, 3931 * just return 0. 3932 */ 3933 if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev) 3934 goto rcu_unlock; 3935 3936 NL_SET_ERR_MSG_MOD(extack, 3937 "flow cookie already exists, ignoring"); 3938 netdev_warn_once(priv->netdev, 3939 "flow cookie %lx already exists, ignoring\n", 3940 f->cookie); 3941 err = -EEXIST; 3942 goto rcu_unlock; 3943 } 3944 rcu_unlock: 3945 rcu_read_unlock(); 3946 if (flow) 3947 goto out; 3948 3949 trace_mlx5e_configure_flower(f); 3950 err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow); 3951 if (err) 3952 goto out; 3953 3954 /* Flow rule offloaded to non-uplink representor sharing tc block, 3955 * set the flow's owner dev. 3956 */ 3957 if (is_flow_rule_duplicate_allowed(dev, rpriv)) 3958 flow->orig_dev = dev; 3959 3960 err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params); 3961 if (err) 3962 goto err_free; 3963 3964 mlx5_esw_release(priv->mdev); 3965 return 0; 3966 3967 err_free: 3968 mlx5e_flow_put(priv, flow); 3969 out: 3970 mlx5_esw_put(priv->mdev); 3971 mlx5_esw_release(priv->mdev); 3972 return err; 3973 } 3974 3975 static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags) 3976 { 3977 bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS)); 3978 bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS)); 3979 3980 return flow_flag_test(flow, INGRESS) == dir_ingress && 3981 flow_flag_test(flow, EGRESS) == dir_egress; 3982 } 3983 3984 int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, 3985 struct flow_cls_offload *f, unsigned long flags) 3986 { 3987 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 3988 struct mlx5e_tc_flow *flow; 3989 int err; 3990 3991 rcu_read_lock(); 3992 flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); 3993 if (!flow || !same_flow_direction(flow, flags)) { 3994 err = -EINVAL; 3995 goto errout; 3996 } 3997 3998 /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag 3999 * set. 4000 */ 4001 if (flow_flag_test_and_set(flow, DELETED)) { 4002 err = -EINVAL; 4003 goto errout; 4004 } 4005 rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params); 4006 rcu_read_unlock(); 4007 4008 trace_mlx5e_delete_flower(f); 4009 mlx5e_flow_put(priv, flow); 4010 4011 mlx5_esw_put(priv->mdev); 4012 return 0; 4013 4014 errout: 4015 rcu_read_unlock(); 4016 return err; 4017 } 4018 4019 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, 4020 struct flow_cls_offload *f, unsigned long flags) 4021 { 4022 struct mlx5_devcom *devcom = priv->mdev->priv.devcom; 4023 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 4024 struct mlx5_eswitch *peer_esw; 4025 struct mlx5e_tc_flow *flow; 4026 struct mlx5_fc *counter; 4027 u64 lastuse = 0; 4028 u64 packets = 0; 4029 u64 bytes = 0; 4030 int err = 0; 4031 4032 rcu_read_lock(); 4033 flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie, 4034 tc_ht_params)); 4035 rcu_read_unlock(); 4036 if (IS_ERR(flow)) 4037 return PTR_ERR(flow); 4038 4039 if (!same_flow_direction(flow, flags)) { 4040 err = -EINVAL; 4041 goto errout; 4042 } 4043 4044 if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) { 4045 counter = mlx5e_tc_get_counter(flow); 4046 if (!counter) 4047 goto errout; 4048 4049 mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); 4050 } 4051 4052 /* Under multipath it's possible for one rule to be currently 4053 * un-offloaded while the other rule is offloaded. 4054 */ 4055 peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 4056 if (!peer_esw) 4057 goto out; 4058 4059 if (flow_flag_test(flow, DUP) && 4060 flow_flag_test(flow->peer_flow, OFFLOADED)) { 4061 u64 bytes2; 4062 u64 packets2; 4063 u64 lastuse2; 4064 4065 counter = mlx5e_tc_get_counter(flow->peer_flow); 4066 if (!counter) 4067 goto no_peer_counter; 4068 mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2); 4069 4070 bytes += bytes2; 4071 packets += packets2; 4072 lastuse = max_t(u64, lastuse, lastuse2); 4073 } 4074 4075 no_peer_counter: 4076 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 4077 out: 4078 flow_stats_update(&f->stats, bytes, packets, 0, lastuse, 4079 FLOW_ACTION_HW_STATS_DELAYED); 4080 trace_mlx5e_stats_flower(f); 4081 errout: 4082 mlx5e_flow_put(priv, flow); 4083 return err; 4084 } 4085 4086 static int apply_police_params(struct mlx5e_priv *priv, u64 rate, 4087 struct netlink_ext_ack *extack) 4088 { 4089 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4090 struct mlx5_eswitch *esw; 4091 u32 rate_mbps = 0; 4092 u16 vport_num; 4093 int err; 4094 4095 vport_num = rpriv->rep->vport; 4096 if (vport_num >= MLX5_VPORT_ECPF) { 4097 NL_SET_ERR_MSG_MOD(extack, 4098 "Ingress rate limit is supported only for Eswitch ports connected to VFs"); 4099 return -EOPNOTSUPP; 4100 } 4101 4102 esw = priv->mdev->priv.eswitch; 4103 /* rate is given in bytes/sec. 4104 * First convert to bits/sec and then round to the nearest mbit/secs. 4105 * mbit means million bits. 4106 * Moreover, if rate is non zero we choose to configure to a minimum of 4107 * 1 mbit/sec. 4108 */ 4109 if (rate) { 4110 rate = (rate * BITS_PER_BYTE) + 500000; 4111 do_div(rate, 1000000); 4112 rate_mbps = max_t(u32, rate, 1); 4113 } 4114 4115 err = mlx5_esw_qos_modify_vport_rate(esw, vport_num, rate_mbps); 4116 if (err) 4117 NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware"); 4118 4119 return err; 4120 } 4121 4122 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, 4123 struct flow_action *flow_action, 4124 struct netlink_ext_ack *extack) 4125 { 4126 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4127 const struct flow_action_entry *act; 4128 int err; 4129 int i; 4130 4131 if (!flow_action_has_entries(flow_action)) { 4132 NL_SET_ERR_MSG_MOD(extack, "matchall called with no action"); 4133 return -EINVAL; 4134 } 4135 4136 if (!flow_offload_has_one_action(flow_action)) { 4137 NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action"); 4138 return -EOPNOTSUPP; 4139 } 4140 4141 if (!flow_action_basic_hw_stats_check(flow_action, extack)) { 4142 NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported"); 4143 return -EOPNOTSUPP; 4144 } 4145 4146 flow_action_for_each(i, act, flow_action) { 4147 switch (act->id) { 4148 case FLOW_ACTION_POLICE: 4149 if (act->police.rate_pkt_ps) { 4150 NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second"); 4151 return -EOPNOTSUPP; 4152 } 4153 err = apply_police_params(priv, act->police.rate_bytes_ps, extack); 4154 if (err) 4155 return err; 4156 4157 rpriv->prev_vf_vport_stats = priv->stats.vf_vport; 4158 break; 4159 default: 4160 NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall"); 4161 return -EOPNOTSUPP; 4162 } 4163 } 4164 4165 return 0; 4166 } 4167 4168 int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, 4169 struct tc_cls_matchall_offload *ma) 4170 { 4171 struct netlink_ext_ack *extack = ma->common.extack; 4172 4173 if (ma->common.prio != 1) { 4174 NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported"); 4175 return -EINVAL; 4176 } 4177 4178 return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack); 4179 } 4180 4181 int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv, 4182 struct tc_cls_matchall_offload *ma) 4183 { 4184 struct netlink_ext_ack *extack = ma->common.extack; 4185 4186 return apply_police_params(priv, 0, extack); 4187 } 4188 4189 void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, 4190 struct tc_cls_matchall_offload *ma) 4191 { 4192 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4193 struct rtnl_link_stats64 cur_stats; 4194 u64 dbytes; 4195 u64 dpkts; 4196 4197 cur_stats = priv->stats.vf_vport; 4198 dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets; 4199 dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes; 4200 rpriv->prev_vf_vport_stats = cur_stats; 4201 flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies, 4202 FLOW_ACTION_HW_STATS_DELAYED); 4203 } 4204 4205 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, 4206 struct mlx5e_priv *peer_priv) 4207 { 4208 struct mlx5_core_dev *peer_mdev = peer_priv->mdev; 4209 struct mlx5e_hairpin_entry *hpe, *tmp; 4210 LIST_HEAD(init_wait_list); 4211 u16 peer_vhca_id; 4212 int bkt; 4213 4214 if (!mlx5e_same_hw_devs(priv, peer_priv)) 4215 return; 4216 4217 peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id); 4218 4219 mutex_lock(&priv->fs.tc.hairpin_tbl_lock); 4220 hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) 4221 if (refcount_inc_not_zero(&hpe->refcnt)) 4222 list_add(&hpe->dead_peer_wait_list, &init_wait_list); 4223 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); 4224 4225 list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) { 4226 wait_for_completion(&hpe->res_ready); 4227 if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id) 4228 mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair); 4229 4230 mlx5e_hairpin_put(priv, hpe); 4231 } 4232 } 4233 4234 static int mlx5e_tc_netdev_event(struct notifier_block *this, 4235 unsigned long event, void *ptr) 4236 { 4237 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 4238 struct mlx5e_flow_steering *fs; 4239 struct mlx5e_priv *peer_priv; 4240 struct mlx5e_tc_table *tc; 4241 struct mlx5e_priv *priv; 4242 4243 if (ndev->netdev_ops != &mlx5e_netdev_ops || 4244 event != NETDEV_UNREGISTER || 4245 ndev->reg_state == NETREG_REGISTERED) 4246 return NOTIFY_DONE; 4247 4248 tc = container_of(this, struct mlx5e_tc_table, netdevice_nb); 4249 fs = container_of(tc, struct mlx5e_flow_steering, tc); 4250 priv = container_of(fs, struct mlx5e_priv, fs); 4251 peer_priv = netdev_priv(ndev); 4252 if (priv == peer_priv || 4253 !(priv->netdev->features & NETIF_F_HW_TC)) 4254 return NOTIFY_DONE; 4255 4256 mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv); 4257 4258 return NOTIFY_DONE; 4259 } 4260 4261 static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev) 4262 { 4263 int tc_grp_size, tc_tbl_size; 4264 u32 max_flow_counter; 4265 4266 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | 4267 MLX5_CAP_GEN(dev, max_flow_counter_15_0); 4268 4269 tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE); 4270 4271 tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS, 4272 BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size))); 4273 4274 return tc_tbl_size; 4275 } 4276 4277 int mlx5e_tc_nic_init(struct mlx5e_priv *priv) 4278 { 4279 struct mlx5e_tc_table *tc = &priv->fs.tc; 4280 struct mlx5_core_dev *dev = priv->mdev; 4281 struct mapping_ctx *chains_mapping; 4282 struct mlx5_chains_attr attr = {}; 4283 u64 mapping_id; 4284 int err; 4285 4286 mlx5e_mod_hdr_tbl_init(&tc->mod_hdr); 4287 mutex_init(&tc->t_lock); 4288 mutex_init(&tc->hairpin_tbl_lock); 4289 hash_init(tc->hairpin_tbl); 4290 4291 err = rhashtable_init(&tc->ht, &tc_ht_params); 4292 if (err) 4293 return err; 4294 4295 lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key); 4296 4297 mapping_id = mlx5_query_nic_system_image_guid(dev); 4298 4299 chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN, 4300 sizeof(struct mlx5_mapped_obj), 4301 MLX5E_TC_TABLE_CHAIN_TAG_MASK, true); 4302 4303 if (IS_ERR(chains_mapping)) { 4304 err = PTR_ERR(chains_mapping); 4305 goto err_mapping; 4306 } 4307 tc->mapping = chains_mapping; 4308 4309 if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) 4310 attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED | 4311 MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED; 4312 attr.ns = MLX5_FLOW_NAMESPACE_KERNEL; 4313 attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev); 4314 attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS; 4315 attr.default_ft = mlx5e_vlan_get_flowtable(priv->fs.vlan); 4316 attr.mapping = chains_mapping; 4317 4318 tc->chains = mlx5_chains_create(dev, &attr); 4319 if (IS_ERR(tc->chains)) { 4320 err = PTR_ERR(tc->chains); 4321 goto err_chains; 4322 } 4323 4324 tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL); 4325 tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr, 4326 MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act); 4327 4328 tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; 4329 err = register_netdevice_notifier_dev_net(priv->netdev, 4330 &tc->netdevice_nb, 4331 &tc->netdevice_nn); 4332 if (err) { 4333 tc->netdevice_nb.notifier_call = NULL; 4334 mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n"); 4335 goto err_reg; 4336 } 4337 4338 return 0; 4339 4340 err_reg: 4341 mlx5_tc_ct_clean(tc->ct); 4342 mlx5e_tc_post_act_destroy(tc->post_act); 4343 mlx5_chains_destroy(tc->chains); 4344 err_chains: 4345 mapping_destroy(chains_mapping); 4346 err_mapping: 4347 rhashtable_destroy(&tc->ht); 4348 return err; 4349 } 4350 4351 static void _mlx5e_tc_del_flow(void *ptr, void *arg) 4352 { 4353 struct mlx5e_tc_flow *flow = ptr; 4354 struct mlx5e_priv *priv = flow->priv; 4355 4356 mlx5e_tc_del_flow(priv, flow); 4357 kfree(flow); 4358 } 4359 4360 void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) 4361 { 4362 struct mlx5e_tc_table *tc = &priv->fs.tc; 4363 4364 if (tc->netdevice_nb.notifier_call) 4365 unregister_netdevice_notifier_dev_net(priv->netdev, 4366 &tc->netdevice_nb, 4367 &tc->netdevice_nn); 4368 4369 mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr); 4370 mutex_destroy(&tc->hairpin_tbl_lock); 4371 4372 rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL); 4373 4374 if (!IS_ERR_OR_NULL(tc->t)) { 4375 mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL); 4376 tc->t = NULL; 4377 } 4378 mutex_destroy(&tc->t_lock); 4379 4380 mlx5_tc_ct_clean(tc->ct); 4381 mlx5e_tc_post_act_destroy(tc->post_act); 4382 mapping_destroy(tc->mapping); 4383 mlx5_chains_destroy(tc->chains); 4384 } 4385 4386 int mlx5e_tc_esw_init(struct rhashtable *tc_ht) 4387 { 4388 const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts); 4389 struct mlx5_rep_uplink_priv *uplink_priv; 4390 struct mlx5e_rep_priv *rpriv; 4391 struct mapping_ctx *mapping; 4392 struct mlx5_eswitch *esw; 4393 struct mlx5e_priv *priv; 4394 u64 mapping_id; 4395 int err = 0; 4396 4397 uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); 4398 rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); 4399 priv = netdev_priv(rpriv->netdev); 4400 esw = priv->mdev->priv.eswitch; 4401 4402 uplink_priv->post_act = mlx5e_tc_post_act_init(priv, esw_chains(esw), 4403 MLX5_FLOW_NAMESPACE_FDB); 4404 uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev), 4405 esw_chains(esw), 4406 &esw->offloads.mod_hdr, 4407 MLX5_FLOW_NAMESPACE_FDB, 4408 uplink_priv->post_act); 4409 4410 uplink_priv->int_port_priv = mlx5e_tc_int_port_init(netdev_priv(priv->netdev)); 4411 4412 uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act); 4413 4414 mapping_id = mlx5_query_nic_system_image_guid(esw->dev); 4415 4416 mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL, 4417 sizeof(struct tunnel_match_key), 4418 TUNNEL_INFO_BITS_MASK, true); 4419 4420 if (IS_ERR(mapping)) { 4421 err = PTR_ERR(mapping); 4422 goto err_tun_mapping; 4423 } 4424 uplink_priv->tunnel_mapping = mapping; 4425 4426 /* Two last values are reserved for stack devices slow path table mark 4427 * and bridge ingress push mark. 4428 */ 4429 mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS, 4430 sz_enc_opts, ENC_OPTS_BITS_MASK - 2, true); 4431 if (IS_ERR(mapping)) { 4432 err = PTR_ERR(mapping); 4433 goto err_enc_opts_mapping; 4434 } 4435 uplink_priv->tunnel_enc_opts_mapping = mapping; 4436 4437 err = rhashtable_init(tc_ht, &tc_ht_params); 4438 if (err) 4439 goto err_ht_init; 4440 4441 lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key); 4442 4443 uplink_priv->encap = mlx5e_tc_tun_init(priv); 4444 if (IS_ERR(uplink_priv->encap)) { 4445 err = PTR_ERR(uplink_priv->encap); 4446 goto err_register_fib_notifier; 4447 } 4448 4449 return 0; 4450 4451 err_register_fib_notifier: 4452 rhashtable_destroy(tc_ht); 4453 err_ht_init: 4454 mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); 4455 err_enc_opts_mapping: 4456 mapping_destroy(uplink_priv->tunnel_mapping); 4457 err_tun_mapping: 4458 mlx5e_tc_sample_cleanup(uplink_priv->tc_psample); 4459 mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv); 4460 mlx5_tc_ct_clean(uplink_priv->ct_priv); 4461 netdev_warn(priv->netdev, 4462 "Failed to initialize tc (eswitch), err: %d", err); 4463 mlx5e_tc_post_act_destroy(uplink_priv->post_act); 4464 return err; 4465 } 4466 4467 void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) 4468 { 4469 struct mlx5_rep_uplink_priv *uplink_priv; 4470 4471 uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); 4472 4473 rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); 4474 mlx5e_tc_tun_cleanup(uplink_priv->encap); 4475 4476 mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); 4477 mapping_destroy(uplink_priv->tunnel_mapping); 4478 4479 mlx5e_tc_sample_cleanup(uplink_priv->tc_psample); 4480 mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv); 4481 mlx5_tc_ct_clean(uplink_priv->ct_priv); 4482 mlx5e_tc_post_act_destroy(uplink_priv->post_act); 4483 } 4484 4485 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) 4486 { 4487 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 4488 4489 return atomic_read(&tc_ht->nelems); 4490 } 4491 4492 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw) 4493 { 4494 struct mlx5e_tc_flow *flow, *tmp; 4495 4496 list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer) 4497 __mlx5e_tc_del_fdb_peer_flow(flow); 4498 } 4499 4500 void mlx5e_tc_reoffload_flows_work(struct work_struct *work) 4501 { 4502 struct mlx5_rep_uplink_priv *rpriv = 4503 container_of(work, struct mlx5_rep_uplink_priv, 4504 reoffload_flows_work); 4505 struct mlx5e_tc_flow *flow, *tmp; 4506 4507 mutex_lock(&rpriv->unready_flows_lock); 4508 list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) { 4509 if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL)) 4510 unready_flow_del(flow); 4511 } 4512 mutex_unlock(&rpriv->unready_flows_lock); 4513 } 4514 4515 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, 4516 struct flow_cls_offload *cls_flower, 4517 unsigned long flags) 4518 { 4519 switch (cls_flower->command) { 4520 case FLOW_CLS_REPLACE: 4521 return mlx5e_configure_flower(priv->netdev, priv, cls_flower, 4522 flags); 4523 case FLOW_CLS_DESTROY: 4524 return mlx5e_delete_flower(priv->netdev, priv, cls_flower, 4525 flags); 4526 case FLOW_CLS_STATS: 4527 return mlx5e_stats_flower(priv->netdev, priv, cls_flower, 4528 flags); 4529 default: 4530 return -EOPNOTSUPP; 4531 } 4532 } 4533 4534 int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, 4535 void *cb_priv) 4536 { 4537 unsigned long flags = MLX5_TC_FLAG(INGRESS); 4538 struct mlx5e_priv *priv = cb_priv; 4539 4540 if (!priv->netdev || !netif_device_present(priv->netdev)) 4541 return -EOPNOTSUPP; 4542 4543 if (mlx5e_is_uplink_rep(priv)) 4544 flags |= MLX5_TC_FLAG(ESW_OFFLOAD); 4545 else 4546 flags |= MLX5_TC_FLAG(NIC_OFFLOAD); 4547 4548 switch (type) { 4549 case TC_SETUP_CLSFLOWER: 4550 return mlx5e_setup_tc_cls_flower(priv, type_data, flags); 4551 default: 4552 return -EOPNOTSUPP; 4553 } 4554 } 4555 4556 bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, 4557 struct sk_buff *skb) 4558 { 4559 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) 4560 u32 chain = 0, chain_tag, reg_b, zone_restore_id; 4561 struct mlx5e_priv *priv = netdev_priv(skb->dev); 4562 struct mlx5e_tc_table *tc = &priv->fs.tc; 4563 struct mlx5_mapped_obj mapped_obj; 4564 struct tc_skb_ext *tc_skb_ext; 4565 int err; 4566 4567 reg_b = be32_to_cpu(cqe->ft_metadata); 4568 4569 chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK; 4570 4571 err = mapping_find(tc->mapping, chain_tag, &mapped_obj); 4572 if (err) { 4573 netdev_dbg(priv->netdev, 4574 "Couldn't find chain for chain tag: %d, err: %d\n", 4575 chain_tag, err); 4576 return false; 4577 } 4578 4579 if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) { 4580 chain = mapped_obj.chain; 4581 tc_skb_ext = tc_skb_ext_alloc(skb); 4582 if (WARN_ON(!tc_skb_ext)) 4583 return false; 4584 4585 tc_skb_ext->chain = chain; 4586 4587 zone_restore_id = (reg_b >> REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) & 4588 ESW_ZONE_ID_MASK; 4589 4590 if (!mlx5e_tc_ct_restore_flow(tc->ct, skb, 4591 zone_restore_id)) 4592 return false; 4593 } else { 4594 netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type); 4595 return false; 4596 } 4597 #endif /* CONFIG_NET_TC_SKB_EXT */ 4598 4599 return true; 4600 } 4601