1 /* 2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <net/flow_dissector.h> 34 #include <net/flow_offload.h> 35 #include <net/sch_generic.h> 36 #include <net/pkt_cls.h> 37 #include <linux/mlx5/fs.h> 38 #include <linux/mlx5/device.h> 39 #include <linux/rhashtable.h> 40 #include <linux/refcount.h> 41 #include <linux/completion.h> 42 #include <net/arp.h> 43 #include <net/ipv6_stubs.h> 44 #include <net/bareudp.h> 45 #include <net/bonding.h> 46 #include <net/dst_metadata.h> 47 #include "devlink.h" 48 #include "en.h" 49 #include "en/tc/post_act.h" 50 #include "en/tc/act_stats.h" 51 #include "en_rep.h" 52 #include "en/rep/tc.h" 53 #include "en/rep/neigh.h" 54 #include "en_tc.h" 55 #include "eswitch.h" 56 #include "fs_core.h" 57 #include "en/port.h" 58 #include "en/tc_tun.h" 59 #include "en/mapping.h" 60 #include "en/tc_ct.h" 61 #include "en/mod_hdr.h" 62 #include "en/tc_tun_encap.h" 63 #include "en/tc/sample.h" 64 #include "en/tc/act/act.h" 65 #include "en/tc/post_meter.h" 66 #include "lib/devcom.h" 67 #include "lib/geneve.h" 68 #include "lib/fs_chains.h" 69 #include "diag/en_tc_tracepoint.h" 70 #include <asm/div64.h> 71 #include "lag/lag.h" 72 #include "lag/mp.h" 73 74 #define MLX5E_TC_TABLE_NUM_GROUPS 4 75 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18) 76 77 struct mlx5e_tc_table { 78 /* Protects the dynamic assignment of the t parameter 79 * which is the nic tc root table. 80 */ 81 struct mutex t_lock; 82 struct mlx5e_priv *priv; 83 struct mlx5_flow_table *t; 84 struct mlx5_flow_table *miss_t; 85 struct mlx5_fs_chains *chains; 86 struct mlx5e_post_act *post_act; 87 88 struct rhashtable ht; 89 90 struct mod_hdr_tbl mod_hdr; 91 struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */ 92 DECLARE_HASHTABLE(hairpin_tbl, 8); 93 94 struct notifier_block netdevice_nb; 95 struct netdev_net_notifier netdevice_nn; 96 97 struct mlx5_tc_ct_priv *ct; 98 struct mapping_ctx *mapping; 99 struct dentry *dfs_root; 100 101 /* tc action stats */ 102 struct mlx5e_tc_act_stats_handle *action_stats_handle; 103 }; 104 105 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { 106 [MAPPED_OBJ_TO_REG] = { 107 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, 108 .moffset = 0, 109 .mlen = 16, 110 }, 111 [VPORT_TO_REG] = { 112 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, 113 .moffset = 16, 114 .mlen = 16, 115 }, 116 [TUNNEL_TO_REG] = { 117 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1, 118 .moffset = 8, 119 .mlen = ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS, 120 .soffset = MLX5_BYTE_OFF(fte_match_param, 121 misc_parameters_2.metadata_reg_c_1), 122 }, 123 [ZONE_TO_REG] = zone_to_reg_ct, 124 [ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct, 125 [CTSTATE_TO_REG] = ctstate_to_reg_ct, 126 [MARK_TO_REG] = mark_to_reg_ct, 127 [LABELS_TO_REG] = labels_to_reg_ct, 128 [FTEID_TO_REG] = fteid_to_reg_ct, 129 /* For NIC rules we store the restore metadata directly 130 * into reg_b that is passed to SW since we don't 131 * jump between steering domains. 132 */ 133 [NIC_MAPPED_OBJ_TO_REG] = { 134 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B, 135 .moffset = 0, 136 .mlen = 16, 137 }, 138 [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct, 139 [PACKET_COLOR_TO_REG] = packet_color_to_reg, 140 }; 141 142 struct mlx5e_tc_jump_state { 143 u32 jump_count; 144 bool jump_target; 145 struct mlx5_flow_attr *jumping_attr; 146 147 enum flow_action_id last_id; 148 u32 last_index; 149 }; 150 151 struct mlx5e_tc_table *mlx5e_tc_table_alloc(void) 152 { 153 struct mlx5e_tc_table *tc; 154 155 tc = kvzalloc(sizeof(*tc), GFP_KERNEL); 156 return tc ? tc : ERR_PTR(-ENOMEM); 157 } 158 159 void mlx5e_tc_table_free(struct mlx5e_tc_table *tc) 160 { 161 kvfree(tc); 162 } 163 164 struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc) 165 { 166 return tc->chains; 167 } 168 169 /* To avoid false lock dependency warning set the tc_ht lock 170 * class different than the lock class of the ht being used when deleting 171 * last flow from a group and then deleting a group, we get into del_sw_flow_group() 172 * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but 173 * it's different than the ht->mutex here. 174 */ 175 static struct lock_class_key tc_ht_lock_key; 176 static struct lock_class_key tc_ht_wq_key; 177 178 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow); 179 static void free_flow_post_acts(struct mlx5e_tc_flow *flow); 180 static void mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, 181 struct mlx5_flow_attr *attr); 182 183 void 184 mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, 185 enum mlx5e_tc_attr_to_reg type, 186 u32 val, 187 u32 mask) 188 { 189 void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval; 190 int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset; 191 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; 192 int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen; 193 u32 max_mask = GENMASK(match_len - 1, 0); 194 __be32 curr_mask_be, curr_val_be; 195 u32 curr_mask, curr_val; 196 197 fmask = headers_c + soffset; 198 fval = headers_v + soffset; 199 200 memcpy(&curr_mask_be, fmask, 4); 201 memcpy(&curr_val_be, fval, 4); 202 203 curr_mask = be32_to_cpu(curr_mask_be); 204 curr_val = be32_to_cpu(curr_val_be); 205 206 //move to correct offset 207 WARN_ON(mask > max_mask); 208 mask <<= moffset; 209 val <<= moffset; 210 max_mask <<= moffset; 211 212 //zero val and mask 213 curr_mask &= ~max_mask; 214 curr_val &= ~max_mask; 215 216 //add current to mask 217 curr_mask |= mask; 218 curr_val |= val; 219 220 //back to be32 and write 221 curr_mask_be = cpu_to_be32(curr_mask); 222 curr_val_be = cpu_to_be32(curr_val); 223 224 memcpy(fmask, &curr_mask_be, 4); 225 memcpy(fval, &curr_val_be, 4); 226 227 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; 228 } 229 230 void 231 mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec, 232 enum mlx5e_tc_attr_to_reg type, 233 u32 *val, 234 u32 *mask) 235 { 236 void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval; 237 int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset; 238 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; 239 int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen; 240 u32 max_mask = GENMASK(match_len - 1, 0); 241 __be32 curr_mask_be, curr_val_be; 242 u32 curr_mask, curr_val; 243 244 fmask = headers_c + soffset; 245 fval = headers_v + soffset; 246 247 memcpy(&curr_mask_be, fmask, 4); 248 memcpy(&curr_val_be, fval, 4); 249 250 curr_mask = be32_to_cpu(curr_mask_be); 251 curr_val = be32_to_cpu(curr_val_be); 252 253 *mask = (curr_mask >> moffset) & max_mask; 254 *val = (curr_val >> moffset) & max_mask; 255 } 256 257 int 258 mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev, 259 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 260 enum mlx5_flow_namespace_type ns, 261 enum mlx5e_tc_attr_to_reg type, 262 u32 data) 263 { 264 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; 265 int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield; 266 int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen; 267 char *modact; 268 int err; 269 270 modact = mlx5e_mod_hdr_alloc(mdev, ns, mod_hdr_acts); 271 if (IS_ERR(modact)) 272 return PTR_ERR(modact); 273 274 /* Firmware has 5bit length field and 0 means 32bits */ 275 if (mlen == 32) 276 mlen = 0; 277 278 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); 279 MLX5_SET(set_action_in, modact, field, mfield); 280 MLX5_SET(set_action_in, modact, offset, moffset); 281 MLX5_SET(set_action_in, modact, length, mlen); 282 MLX5_SET(set_action_in, modact, data, data); 283 err = mod_hdr_acts->num_actions; 284 mod_hdr_acts->num_actions++; 285 286 return err; 287 } 288 289 static struct mlx5e_tc_act_stats_handle * 290 get_act_stats_handle(struct mlx5e_priv *priv) 291 { 292 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 293 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 294 struct mlx5_rep_uplink_priv *uplink_priv; 295 struct mlx5e_rep_priv *uplink_rpriv; 296 297 if (is_mdev_switchdev_mode(priv->mdev)) { 298 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 299 uplink_priv = &uplink_rpriv->uplink_priv; 300 301 return uplink_priv->action_stats_handle; 302 } 303 304 return tc->action_stats_handle; 305 } 306 307 struct mlx5e_tc_int_port_priv * 308 mlx5e_get_int_port_priv(struct mlx5e_priv *priv) 309 { 310 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 311 struct mlx5_rep_uplink_priv *uplink_priv; 312 struct mlx5e_rep_priv *uplink_rpriv; 313 314 if (is_mdev_switchdev_mode(priv->mdev)) { 315 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 316 uplink_priv = &uplink_rpriv->uplink_priv; 317 318 return uplink_priv->int_port_priv; 319 } 320 321 return NULL; 322 } 323 324 struct mlx5e_flow_meters * 325 mlx5e_get_flow_meters(struct mlx5_core_dev *dev) 326 { 327 struct mlx5_eswitch *esw = dev->priv.eswitch; 328 struct mlx5_rep_uplink_priv *uplink_priv; 329 struct mlx5e_rep_priv *uplink_rpriv; 330 struct mlx5e_priv *priv; 331 332 if (is_mdev_switchdev_mode(dev)) { 333 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 334 uplink_priv = &uplink_rpriv->uplink_priv; 335 priv = netdev_priv(uplink_rpriv->netdev); 336 if (!uplink_priv->flow_meters) 337 uplink_priv->flow_meters = 338 mlx5e_flow_meters_init(priv, 339 MLX5_FLOW_NAMESPACE_FDB, 340 uplink_priv->post_act); 341 if (!IS_ERR(uplink_priv->flow_meters)) 342 return uplink_priv->flow_meters; 343 } 344 345 return NULL; 346 } 347 348 static struct mlx5_tc_ct_priv * 349 get_ct_priv(struct mlx5e_priv *priv) 350 { 351 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 352 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 353 struct mlx5_rep_uplink_priv *uplink_priv; 354 struct mlx5e_rep_priv *uplink_rpriv; 355 356 if (is_mdev_switchdev_mode(priv->mdev)) { 357 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 358 uplink_priv = &uplink_rpriv->uplink_priv; 359 360 return uplink_priv->ct_priv; 361 } 362 363 return tc->ct; 364 } 365 366 static struct mlx5e_tc_psample * 367 get_sample_priv(struct mlx5e_priv *priv) 368 { 369 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 370 struct mlx5_rep_uplink_priv *uplink_priv; 371 struct mlx5e_rep_priv *uplink_rpriv; 372 373 if (is_mdev_switchdev_mode(priv->mdev)) { 374 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 375 uplink_priv = &uplink_rpriv->uplink_priv; 376 377 return uplink_priv->tc_psample; 378 } 379 380 return NULL; 381 } 382 383 static struct mlx5e_post_act * 384 get_post_action(struct mlx5e_priv *priv) 385 { 386 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 387 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 388 struct mlx5_rep_uplink_priv *uplink_priv; 389 struct mlx5e_rep_priv *uplink_rpriv; 390 391 if (is_mdev_switchdev_mode(priv->mdev)) { 392 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 393 uplink_priv = &uplink_rpriv->uplink_priv; 394 395 return uplink_priv->post_act; 396 } 397 398 return tc->post_act; 399 } 400 401 struct mlx5_flow_handle * 402 mlx5_tc_rule_insert(struct mlx5e_priv *priv, 403 struct mlx5_flow_spec *spec, 404 struct mlx5_flow_attr *attr) 405 { 406 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 407 408 if (is_mdev_switchdev_mode(priv->mdev)) 409 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); 410 411 return mlx5e_add_offloaded_nic_rule(priv, spec, attr); 412 } 413 414 void 415 mlx5_tc_rule_delete(struct mlx5e_priv *priv, 416 struct mlx5_flow_handle *rule, 417 struct mlx5_flow_attr *attr) 418 { 419 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 420 421 if (is_mdev_switchdev_mode(priv->mdev)) { 422 mlx5_eswitch_del_offloaded_rule(esw, rule, attr); 423 return; 424 } 425 426 mlx5e_del_offloaded_nic_rule(priv, rule, attr); 427 } 428 429 static bool 430 is_flow_meter_action(struct mlx5_flow_attr *attr) 431 { 432 return (((attr->action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) && 433 (attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER)) || 434 attr->flags & MLX5_ATTR_FLAG_MTU); 435 } 436 437 static int 438 mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv, 439 struct mlx5_flow_attr *attr) 440 { 441 struct mlx5e_post_act *post_act = get_post_action(priv); 442 struct mlx5e_post_meter_priv *post_meter; 443 enum mlx5_flow_namespace_type ns_type; 444 struct mlx5e_flow_meter_handle *meter; 445 enum mlx5e_post_meter_type type; 446 447 meter = mlx5e_tc_meter_replace(priv->mdev, &attr->meter_attr.params); 448 if (IS_ERR(meter)) { 449 mlx5_core_err(priv->mdev, "Failed to get flow meter\n"); 450 return PTR_ERR(meter); 451 } 452 453 ns_type = mlx5e_tc_meter_get_namespace(meter->flow_meters); 454 type = meter->params.mtu ? MLX5E_POST_METER_MTU : MLX5E_POST_METER_RATE; 455 post_meter = mlx5e_post_meter_init(priv, ns_type, post_act, 456 type, 457 meter->act_counter, meter->drop_counter, 458 attr->branch_true, attr->branch_false); 459 if (IS_ERR(post_meter)) { 460 mlx5_core_err(priv->mdev, "Failed to init post meter\n"); 461 goto err_meter_init; 462 } 463 464 attr->meter_attr.meter = meter; 465 attr->meter_attr.post_meter = post_meter; 466 attr->dest_ft = mlx5e_post_meter_get_ft(post_meter); 467 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 468 469 return 0; 470 471 err_meter_init: 472 mlx5e_tc_meter_put(meter); 473 return PTR_ERR(post_meter); 474 } 475 476 static void 477 mlx5e_tc_del_flow_meter(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr) 478 { 479 mlx5e_post_meter_cleanup(esw, attr->meter_attr.post_meter); 480 mlx5e_tc_meter_put(attr->meter_attr.meter); 481 } 482 483 struct mlx5_flow_handle * 484 mlx5e_tc_rule_offload(struct mlx5e_priv *priv, 485 struct mlx5_flow_spec *spec, 486 struct mlx5_flow_attr *attr) 487 { 488 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 489 int err; 490 491 if (!is_mdev_switchdev_mode(priv->mdev)) 492 return mlx5e_add_offloaded_nic_rule(priv, spec, attr); 493 494 if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) 495 return mlx5e_tc_sample_offload(get_sample_priv(priv), spec, attr); 496 497 if (is_flow_meter_action(attr)) { 498 err = mlx5e_tc_add_flow_meter(priv, attr); 499 if (err) 500 return ERR_PTR(err); 501 } 502 503 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); 504 } 505 506 void 507 mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv, 508 struct mlx5_flow_handle *rule, 509 struct mlx5_flow_attr *attr) 510 { 511 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 512 513 if (!is_mdev_switchdev_mode(priv->mdev)) { 514 mlx5e_del_offloaded_nic_rule(priv, rule, attr); 515 return; 516 } 517 518 if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) { 519 mlx5e_tc_sample_unoffload(get_sample_priv(priv), rule, attr); 520 return; 521 } 522 523 mlx5_eswitch_del_offloaded_rule(esw, rule, attr); 524 525 if (attr->meter_attr.meter) 526 mlx5e_tc_del_flow_meter(esw, attr); 527 } 528 529 int 530 mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, 531 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 532 enum mlx5_flow_namespace_type ns, 533 enum mlx5e_tc_attr_to_reg type, 534 u32 data) 535 { 536 int ret = mlx5e_tc_match_to_reg_set_and_get_id(mdev, mod_hdr_acts, ns, type, data); 537 538 return ret < 0 ? ret : 0; 539 } 540 541 void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev, 542 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 543 enum mlx5e_tc_attr_to_reg type, 544 int act_id, u32 data) 545 { 546 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; 547 int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield; 548 int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen; 549 char *modact; 550 551 modact = mlx5e_mod_hdr_get_item(mod_hdr_acts, act_id); 552 553 /* Firmware has 5bit length field and 0 means 32bits */ 554 if (mlen == 32) 555 mlen = 0; 556 557 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); 558 MLX5_SET(set_action_in, modact, field, mfield); 559 MLX5_SET(set_action_in, modact, offset, moffset); 560 MLX5_SET(set_action_in, modact, length, mlen); 561 MLX5_SET(set_action_in, modact, data, data); 562 } 563 564 struct mlx5e_hairpin { 565 struct mlx5_hairpin *pair; 566 567 struct mlx5_core_dev *func_mdev; 568 struct mlx5e_priv *func_priv; 569 u32 tdn; 570 struct mlx5e_tir direct_tir; 571 572 int num_channels; 573 u8 log_num_packets; 574 struct mlx5e_rqt indir_rqt; 575 struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; 576 struct mlx5_ttc_table *ttc; 577 }; 578 579 struct mlx5e_hairpin_entry { 580 /* a node of a hash table which keeps all the hairpin entries */ 581 struct hlist_node hairpin_hlist; 582 583 /* protects flows list */ 584 spinlock_t flows_lock; 585 /* flows sharing the same hairpin */ 586 struct list_head flows; 587 /* hpe's that were not fully initialized when dead peer update event 588 * function traversed them. 589 */ 590 struct list_head dead_peer_wait_list; 591 592 u16 peer_vhca_id; 593 u8 prio; 594 struct mlx5e_hairpin *hp; 595 refcount_t refcnt; 596 struct completion res_ready; 597 }; 598 599 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, 600 struct mlx5e_tc_flow *flow); 601 602 struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow) 603 { 604 if (!flow || !refcount_inc_not_zero(&flow->refcnt)) 605 return ERR_PTR(-EINVAL); 606 return flow; 607 } 608 609 void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) 610 { 611 if (refcount_dec_and_test(&flow->refcnt)) { 612 mlx5e_tc_del_flow(priv, flow); 613 kfree_rcu(flow, rcu_head); 614 } 615 } 616 617 bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow) 618 { 619 return flow_flag_test(flow, ESWITCH); 620 } 621 622 bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow) 623 { 624 return flow_flag_test(flow, FT); 625 } 626 627 bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow) 628 { 629 return flow_flag_test(flow, OFFLOADED); 630 } 631 632 int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow) 633 { 634 return mlx5e_is_eswitch_flow(flow) ? 635 MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL; 636 } 637 638 static struct mlx5_core_dev * 639 get_flow_counter_dev(struct mlx5e_tc_flow *flow) 640 { 641 return mlx5e_is_eswitch_flow(flow) ? flow->attr->esw_attr->counter_dev : flow->priv->mdev; 642 } 643 644 static struct mod_hdr_tbl * 645 get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) 646 { 647 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 648 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 649 650 return mlx5e_get_flow_namespace(flow) == MLX5_FLOW_NAMESPACE_FDB ? 651 &esw->offloads.mod_hdr : 652 &tc->mod_hdr; 653 } 654 655 int mlx5e_tc_attach_mod_hdr(struct mlx5e_priv *priv, 656 struct mlx5e_tc_flow *flow, 657 struct mlx5_flow_attr *attr) 658 { 659 struct mlx5e_mod_hdr_handle *mh; 660 661 mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow), 662 mlx5e_get_flow_namespace(flow), 663 &attr->parse_attr->mod_hdr_acts); 664 if (IS_ERR(mh)) 665 return PTR_ERR(mh); 666 667 WARN_ON(attr->modify_hdr); 668 attr->modify_hdr = mlx5e_mod_hdr_get(mh); 669 attr->mh = mh; 670 671 return 0; 672 } 673 674 void mlx5e_tc_detach_mod_hdr(struct mlx5e_priv *priv, 675 struct mlx5e_tc_flow *flow, 676 struct mlx5_flow_attr *attr) 677 { 678 /* flow wasn't fully initialized */ 679 if (!attr->mh) 680 return; 681 682 mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow), 683 attr->mh); 684 attr->mh = NULL; 685 } 686 687 static 688 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex) 689 { 690 struct mlx5_core_dev *mdev; 691 struct net_device *netdev; 692 struct mlx5e_priv *priv; 693 694 netdev = dev_get_by_index(net, ifindex); 695 if (!netdev) 696 return ERR_PTR(-ENODEV); 697 698 priv = netdev_priv(netdev); 699 mdev = priv->mdev; 700 dev_put(netdev); 701 702 /* Mirred tc action holds a refcount on the ifindex net_device (see 703 * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev 704 * after dev_put(netdev), while we're in the context of adding a tc flow. 705 * 706 * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then 707 * stored in a hairpin object, which exists until all flows, that refer to it, get 708 * removed. 709 * 710 * On the other hand, after a hairpin object has been created, the peer net_device may 711 * be removed/unbound while there are still some hairpin flows that are using it. This 712 * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to 713 * NETDEV_UNREGISTER event of the peer net_device. 714 */ 715 return mdev; 716 } 717 718 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp) 719 { 720 struct mlx5e_tir_builder *builder; 721 int err; 722 723 builder = mlx5e_tir_builder_alloc(false); 724 if (!builder) 725 return -ENOMEM; 726 727 err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn); 728 if (err) 729 goto out; 730 731 mlx5e_tir_builder_build_inline(builder, hp->tdn, hp->pair->rqn[0]); 732 err = mlx5e_tir_init(&hp->direct_tir, builder, hp->func_mdev, false); 733 if (err) 734 goto create_tir_err; 735 736 out: 737 mlx5e_tir_builder_free(builder); 738 return err; 739 740 create_tir_err: 741 mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); 742 743 goto out; 744 } 745 746 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp) 747 { 748 mlx5e_tir_destroy(&hp->direct_tir); 749 mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); 750 } 751 752 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) 753 { 754 struct mlx5e_priv *priv = hp->func_priv; 755 struct mlx5_core_dev *mdev = priv->mdev; 756 struct mlx5e_rss_params_indir *indir; 757 int err; 758 759 indir = kvmalloc(sizeof(*indir), GFP_KERNEL); 760 if (!indir) 761 return -ENOMEM; 762 763 mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels); 764 err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels, 765 mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc, 766 indir); 767 768 kvfree(indir); 769 return err; 770 } 771 772 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) 773 { 774 struct mlx5e_priv *priv = hp->func_priv; 775 struct mlx5e_rss_params_hash rss_hash; 776 enum mlx5_traffic_types tt, max_tt; 777 struct mlx5e_tir_builder *builder; 778 int err = 0; 779 780 builder = mlx5e_tir_builder_alloc(false); 781 if (!builder) 782 return -ENOMEM; 783 784 rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res); 785 786 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { 787 struct mlx5e_rss_params_traffic_type rss_tt; 788 789 rss_tt = mlx5e_rss_get_default_tt_config(tt); 790 791 mlx5e_tir_builder_build_rqt(builder, hp->tdn, 792 mlx5e_rqt_get_rqtn(&hp->indir_rqt), 793 false); 794 mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false); 795 796 err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false); 797 if (err) { 798 mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err); 799 goto err_destroy_tirs; 800 } 801 802 mlx5e_tir_builder_clear(builder); 803 } 804 805 out: 806 mlx5e_tir_builder_free(builder); 807 return err; 808 809 err_destroy_tirs: 810 max_tt = tt; 811 for (tt = 0; tt < max_tt; tt++) 812 mlx5e_tir_destroy(&hp->indir_tir[tt]); 813 814 goto out; 815 } 816 817 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp) 818 { 819 int tt; 820 821 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) 822 mlx5e_tir_destroy(&hp->indir_tir[tt]); 823 } 824 825 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp, 826 struct ttc_params *ttc_params) 827 { 828 struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; 829 int tt; 830 831 memset(ttc_params, 0, sizeof(*ttc_params)); 832 833 ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev, 834 MLX5_FLOW_NAMESPACE_KERNEL); 835 for (tt = 0; tt < MLX5_NUM_TT; tt++) { 836 ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; 837 ttc_params->dests[tt].tir_num = 838 tt == MLX5_TT_ANY ? 839 mlx5e_tir_get_tirn(&hp->direct_tir) : 840 mlx5e_tir_get_tirn(&hp->indir_tir[tt]); 841 } 842 843 ft_attr->level = MLX5E_TC_TTC_FT_LEVEL; 844 ft_attr->prio = MLX5E_TC_PRIO; 845 } 846 847 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp) 848 { 849 struct mlx5e_priv *priv = hp->func_priv; 850 struct ttc_params ttc_params; 851 struct mlx5_ttc_table *ttc; 852 int err; 853 854 err = mlx5e_hairpin_create_indirect_rqt(hp); 855 if (err) 856 return err; 857 858 err = mlx5e_hairpin_create_indirect_tirs(hp); 859 if (err) 860 goto err_create_indirect_tirs; 861 862 mlx5e_hairpin_set_ttc_params(hp, &ttc_params); 863 hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params); 864 if (IS_ERR(hp->ttc)) { 865 err = PTR_ERR(hp->ttc); 866 goto err_create_ttc_table; 867 } 868 869 ttc = mlx5e_fs_get_ttc(priv->fs, false); 870 netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n", 871 hp->num_channels, 872 mlx5_get_ttc_flow_table(ttc)->id); 873 874 return 0; 875 876 err_create_ttc_table: 877 mlx5e_hairpin_destroy_indirect_tirs(hp); 878 err_create_indirect_tirs: 879 mlx5e_rqt_destroy(&hp->indir_rqt); 880 881 return err; 882 } 883 884 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp) 885 { 886 mlx5_destroy_ttc_table(hp->ttc); 887 mlx5e_hairpin_destroy_indirect_tirs(hp); 888 mlx5e_rqt_destroy(&hp->indir_rqt); 889 } 890 891 static struct mlx5e_hairpin * 892 mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params, 893 int peer_ifindex) 894 { 895 struct mlx5_core_dev *func_mdev, *peer_mdev; 896 struct mlx5e_hairpin *hp; 897 struct mlx5_hairpin *pair; 898 int err; 899 900 hp = kzalloc(sizeof(*hp), GFP_KERNEL); 901 if (!hp) 902 return ERR_PTR(-ENOMEM); 903 904 func_mdev = priv->mdev; 905 peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); 906 if (IS_ERR(peer_mdev)) { 907 err = PTR_ERR(peer_mdev); 908 goto create_pair_err; 909 } 910 911 pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params); 912 if (IS_ERR(pair)) { 913 err = PTR_ERR(pair); 914 goto create_pair_err; 915 } 916 hp->pair = pair; 917 hp->func_mdev = func_mdev; 918 hp->func_priv = priv; 919 hp->num_channels = params->num_channels; 920 hp->log_num_packets = params->log_num_packets; 921 922 err = mlx5e_hairpin_create_transport(hp); 923 if (err) 924 goto create_transport_err; 925 926 if (hp->num_channels > 1) { 927 err = mlx5e_hairpin_rss_init(hp); 928 if (err) 929 goto rss_init_err; 930 } 931 932 return hp; 933 934 rss_init_err: 935 mlx5e_hairpin_destroy_transport(hp); 936 create_transport_err: 937 mlx5_core_hairpin_destroy(hp->pair); 938 create_pair_err: 939 kfree(hp); 940 return ERR_PTR(err); 941 } 942 943 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp) 944 { 945 if (hp->num_channels > 1) 946 mlx5e_hairpin_rss_cleanup(hp); 947 mlx5e_hairpin_destroy_transport(hp); 948 mlx5_core_hairpin_destroy(hp->pair); 949 kvfree(hp); 950 } 951 952 static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio) 953 { 954 return (peer_vhca_id << 16 | prio); 955 } 956 957 static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv, 958 u16 peer_vhca_id, u8 prio) 959 { 960 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 961 struct mlx5e_hairpin_entry *hpe; 962 u32 hash_key = hash_hairpin_info(peer_vhca_id, prio); 963 964 hash_for_each_possible(tc->hairpin_tbl, hpe, 965 hairpin_hlist, hash_key) { 966 if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) { 967 refcount_inc(&hpe->refcnt); 968 return hpe; 969 } 970 } 971 972 return NULL; 973 } 974 975 static void mlx5e_hairpin_put(struct mlx5e_priv *priv, 976 struct mlx5e_hairpin_entry *hpe) 977 { 978 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 979 /* no more hairpin flows for us, release the hairpin pair */ 980 if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &tc->hairpin_tbl_lock)) 981 return; 982 hash_del(&hpe->hairpin_hlist); 983 mutex_unlock(&tc->hairpin_tbl_lock); 984 985 if (!IS_ERR_OR_NULL(hpe->hp)) { 986 netdev_dbg(priv->netdev, "del hairpin: peer %s\n", 987 dev_name(hpe->hp->pair->peer_mdev->device)); 988 989 mlx5e_hairpin_destroy(hpe->hp); 990 } 991 992 WARN_ON(!list_empty(&hpe->flows)); 993 kfree(hpe); 994 } 995 996 #define UNKNOWN_MATCH_PRIO 8 997 998 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv, 999 struct mlx5_flow_spec *spec, u8 *match_prio, 1000 struct netlink_ext_ack *extack) 1001 { 1002 void *headers_c, *headers_v; 1003 u8 prio_val, prio_mask = 0; 1004 bool vlan_present; 1005 1006 #ifdef CONFIG_MLX5_CORE_EN_DCB 1007 if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) { 1008 NL_SET_ERR_MSG_MOD(extack, 1009 "only PCP trust state supported for hairpin"); 1010 return -EOPNOTSUPP; 1011 } 1012 #endif 1013 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); 1014 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); 1015 1016 vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag); 1017 if (vlan_present) { 1018 prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio); 1019 prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio); 1020 } 1021 1022 if (!vlan_present || !prio_mask) { 1023 prio_val = UNKNOWN_MATCH_PRIO; 1024 } else if (prio_mask != 0x7) { 1025 NL_SET_ERR_MSG_MOD(extack, 1026 "masked priority match not supported for hairpin"); 1027 return -EOPNOTSUPP; 1028 } 1029 1030 *match_prio = prio_val; 1031 return 0; 1032 } 1033 1034 static int debugfs_hairpin_num_active_get(void *data, u64 *val) 1035 { 1036 struct mlx5e_tc_table *tc = data; 1037 struct mlx5e_hairpin_entry *hpe; 1038 u32 cnt = 0; 1039 u32 bkt; 1040 1041 mutex_lock(&tc->hairpin_tbl_lock); 1042 hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist) 1043 cnt++; 1044 mutex_unlock(&tc->hairpin_tbl_lock); 1045 1046 *val = cnt; 1047 1048 return 0; 1049 } 1050 DEFINE_DEBUGFS_ATTRIBUTE(fops_hairpin_num_active, 1051 debugfs_hairpin_num_active_get, NULL, "%llu\n"); 1052 1053 static int debugfs_hairpin_table_dump_show(struct seq_file *file, void *priv) 1054 1055 { 1056 struct mlx5e_tc_table *tc = file->private; 1057 struct mlx5e_hairpin_entry *hpe; 1058 u32 bkt; 1059 1060 mutex_lock(&tc->hairpin_tbl_lock); 1061 hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist) 1062 seq_printf(file, 1063 "Hairpin peer_vhca_id %u prio %u refcnt %u num_channels %u num_packets %lu\n", 1064 hpe->peer_vhca_id, hpe->prio, 1065 refcount_read(&hpe->refcnt), hpe->hp->num_channels, 1066 BIT(hpe->hp->log_num_packets)); 1067 mutex_unlock(&tc->hairpin_tbl_lock); 1068 1069 return 0; 1070 } 1071 DEFINE_SHOW_ATTRIBUTE(debugfs_hairpin_table_dump); 1072 1073 static void mlx5e_tc_debugfs_init(struct mlx5e_tc_table *tc, 1074 struct dentry *dfs_root) 1075 { 1076 if (IS_ERR_OR_NULL(dfs_root)) 1077 return; 1078 1079 tc->dfs_root = debugfs_create_dir("tc", dfs_root); 1080 1081 debugfs_create_file("hairpin_num_active", 0444, tc->dfs_root, tc, 1082 &fops_hairpin_num_active); 1083 debugfs_create_file("hairpin_table_dump", 0444, tc->dfs_root, tc, 1084 &debugfs_hairpin_table_dump_fops); 1085 } 1086 1087 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, 1088 struct mlx5e_tc_flow *flow, 1089 struct mlx5e_tc_flow_parse_attr *parse_attr, 1090 struct netlink_ext_ack *extack) 1091 { 1092 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 1093 struct devlink *devlink = priv_to_devlink(priv->mdev); 1094 int peer_ifindex = parse_attr->mirred_ifindex[0]; 1095 union devlink_param_value val = {}; 1096 struct mlx5_hairpin_params params; 1097 struct mlx5_core_dev *peer_mdev; 1098 struct mlx5e_hairpin_entry *hpe; 1099 struct mlx5e_hairpin *hp; 1100 u8 match_prio; 1101 u16 peer_id; 1102 int err; 1103 1104 peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); 1105 if (IS_ERR(peer_mdev)) { 1106 NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device"); 1107 return PTR_ERR(peer_mdev); 1108 } 1109 1110 if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) { 1111 NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported"); 1112 return -EOPNOTSUPP; 1113 } 1114 1115 peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id); 1116 err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio, 1117 extack); 1118 if (err) 1119 return err; 1120 1121 mutex_lock(&tc->hairpin_tbl_lock); 1122 hpe = mlx5e_hairpin_get(priv, peer_id, match_prio); 1123 if (hpe) { 1124 mutex_unlock(&tc->hairpin_tbl_lock); 1125 wait_for_completion(&hpe->res_ready); 1126 1127 if (IS_ERR(hpe->hp)) { 1128 err = -EREMOTEIO; 1129 goto out_err; 1130 } 1131 goto attach_flow; 1132 } 1133 1134 hpe = kzalloc(sizeof(*hpe), GFP_KERNEL); 1135 if (!hpe) { 1136 mutex_unlock(&tc->hairpin_tbl_lock); 1137 return -ENOMEM; 1138 } 1139 1140 spin_lock_init(&hpe->flows_lock); 1141 INIT_LIST_HEAD(&hpe->flows); 1142 INIT_LIST_HEAD(&hpe->dead_peer_wait_list); 1143 hpe->peer_vhca_id = peer_id; 1144 hpe->prio = match_prio; 1145 refcount_set(&hpe->refcnt, 1); 1146 init_completion(&hpe->res_ready); 1147 1148 hash_add(tc->hairpin_tbl, &hpe->hairpin_hlist, 1149 hash_hairpin_info(peer_id, match_prio)); 1150 mutex_unlock(&tc->hairpin_tbl_lock); 1151 1152 err = devl_param_driverinit_value_get( 1153 devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, &val); 1154 if (err) { 1155 err = -ENOMEM; 1156 goto out_err; 1157 } 1158 1159 params.log_num_packets = ilog2(val.vu32); 1160 params.log_data_size = 1161 clamp_t(u32, 1162 params.log_num_packets + 1163 MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev), 1164 MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz), 1165 MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz)); 1166 1167 params.q_counter = priv->q_counter; 1168 err = devl_param_driverinit_value_get( 1169 devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, &val); 1170 if (err) { 1171 err = -ENOMEM; 1172 goto out_err; 1173 } 1174 1175 params.num_channels = val.vu32; 1176 1177 hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex); 1178 hpe->hp = hp; 1179 complete_all(&hpe->res_ready); 1180 if (IS_ERR(hp)) { 1181 err = PTR_ERR(hp); 1182 goto out_err; 1183 } 1184 1185 netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n", 1186 mlx5e_tir_get_tirn(&hp->direct_tir), hp->pair->rqn[0], 1187 dev_name(hp->pair->peer_mdev->device), 1188 hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets); 1189 1190 attach_flow: 1191 if (hpe->hp->num_channels > 1) { 1192 flow_flag_set(flow, HAIRPIN_RSS); 1193 flow->attr->nic_attr->hairpin_ft = 1194 mlx5_get_ttc_flow_table(hpe->hp->ttc); 1195 } else { 1196 flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir); 1197 } 1198 1199 flow->hpe = hpe; 1200 spin_lock(&hpe->flows_lock); 1201 list_add(&flow->hairpin, &hpe->flows); 1202 spin_unlock(&hpe->flows_lock); 1203 1204 return 0; 1205 1206 out_err: 1207 mlx5e_hairpin_put(priv, hpe); 1208 return err; 1209 } 1210 1211 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv, 1212 struct mlx5e_tc_flow *flow) 1213 { 1214 /* flow wasn't fully initialized */ 1215 if (!flow->hpe) 1216 return; 1217 1218 spin_lock(&flow->hpe->flows_lock); 1219 list_del(&flow->hairpin); 1220 spin_unlock(&flow->hpe->flows_lock); 1221 1222 mlx5e_hairpin_put(priv, flow->hpe); 1223 flow->hpe = NULL; 1224 } 1225 1226 struct mlx5_flow_handle * 1227 mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv, 1228 struct mlx5_flow_spec *spec, 1229 struct mlx5_flow_attr *attr) 1230 { 1231 struct mlx5_flow_context *flow_context = &spec->flow_context; 1232 struct mlx5e_vlan_table *vlan = mlx5e_fs_get_vlan(priv->fs); 1233 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 1234 struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr; 1235 struct mlx5_flow_destination dest[2] = {}; 1236 struct mlx5_fs_chains *nic_chains; 1237 struct mlx5_flow_act flow_act = { 1238 .action = attr->action, 1239 .flags = FLOW_ACT_NO_APPEND, 1240 }; 1241 struct mlx5_flow_handle *rule; 1242 struct mlx5_flow_table *ft; 1243 int dest_ix = 0; 1244 1245 nic_chains = mlx5e_nic_chains(tc); 1246 flow_context->flags |= FLOW_CONTEXT_HAS_TAG; 1247 flow_context->flow_tag = nic_attr->flow_tag; 1248 1249 if (attr->dest_ft) { 1250 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 1251 dest[dest_ix].ft = attr->dest_ft; 1252 dest_ix++; 1253 } else if (nic_attr->hairpin_ft) { 1254 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 1255 dest[dest_ix].ft = nic_attr->hairpin_ft; 1256 dest_ix++; 1257 } else if (nic_attr->hairpin_tirn) { 1258 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR; 1259 dest[dest_ix].tir_num = nic_attr->hairpin_tirn; 1260 dest_ix++; 1261 } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { 1262 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 1263 if (attr->dest_chain) { 1264 dest[dest_ix].ft = mlx5_chains_get_table(nic_chains, 1265 attr->dest_chain, 1, 1266 MLX5E_TC_FT_LEVEL); 1267 if (IS_ERR(dest[dest_ix].ft)) 1268 return ERR_CAST(dest[dest_ix].ft); 1269 } else { 1270 dest[dest_ix].ft = mlx5e_vlan_get_flowtable(vlan); 1271 } 1272 dest_ix++; 1273 } 1274 1275 if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && 1276 MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) 1277 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; 1278 1279 if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { 1280 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; 1281 dest[dest_ix].counter_id = mlx5_fc_id(attr->counter); 1282 dest_ix++; 1283 } 1284 1285 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) 1286 flow_act.modify_hdr = attr->modify_hdr; 1287 1288 mutex_lock(&tc->t_lock); 1289 if (IS_ERR_OR_NULL(tc->t)) { 1290 /* Create the root table here if doesn't exist yet */ 1291 tc->t = 1292 mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL); 1293 1294 if (IS_ERR(tc->t)) { 1295 mutex_unlock(&tc->t_lock); 1296 netdev_err(priv->netdev, 1297 "Failed to create tc offload table\n"); 1298 rule = ERR_CAST(tc->t); 1299 goto err_ft_get; 1300 } 1301 } 1302 mutex_unlock(&tc->t_lock); 1303 1304 if (attr->chain || attr->prio) 1305 ft = mlx5_chains_get_table(nic_chains, 1306 attr->chain, attr->prio, 1307 MLX5E_TC_FT_LEVEL); 1308 else 1309 ft = attr->ft; 1310 1311 if (IS_ERR(ft)) { 1312 rule = ERR_CAST(ft); 1313 goto err_ft_get; 1314 } 1315 1316 if (attr->outer_match_level != MLX5_MATCH_NONE) 1317 spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; 1318 1319 rule = mlx5_add_flow_rules(ft, spec, 1320 &flow_act, dest, dest_ix); 1321 if (IS_ERR(rule)) 1322 goto err_rule; 1323 1324 return rule; 1325 1326 err_rule: 1327 if (attr->chain || attr->prio) 1328 mlx5_chains_put_table(nic_chains, 1329 attr->chain, attr->prio, 1330 MLX5E_TC_FT_LEVEL); 1331 err_ft_get: 1332 if (attr->dest_chain) 1333 mlx5_chains_put_table(nic_chains, 1334 attr->dest_chain, 1, 1335 MLX5E_TC_FT_LEVEL); 1336 1337 return ERR_CAST(rule); 1338 } 1339 1340 static int 1341 alloc_flow_attr_counter(struct mlx5_core_dev *counter_dev, 1342 struct mlx5_flow_attr *attr) 1343 1344 { 1345 struct mlx5_fc *counter; 1346 1347 counter = mlx5_fc_create(counter_dev, true); 1348 if (IS_ERR(counter)) 1349 return PTR_ERR(counter); 1350 1351 attr->counter = counter; 1352 return 0; 1353 } 1354 1355 static int 1356 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, 1357 struct mlx5e_tc_flow *flow, 1358 struct netlink_ext_ack *extack) 1359 { 1360 struct mlx5e_tc_flow_parse_attr *parse_attr; 1361 struct mlx5_flow_attr *attr = flow->attr; 1362 struct mlx5_core_dev *dev = priv->mdev; 1363 int err; 1364 1365 parse_attr = attr->parse_attr; 1366 1367 if (flow_flag_test(flow, HAIRPIN)) { 1368 err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); 1369 if (err) 1370 return err; 1371 } 1372 1373 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { 1374 err = alloc_flow_attr_counter(dev, attr); 1375 if (err) 1376 return err; 1377 } 1378 1379 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 1380 err = mlx5e_tc_attach_mod_hdr(priv, flow, attr); 1381 if (err) 1382 return err; 1383 } 1384 1385 flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec, attr); 1386 return PTR_ERR_OR_ZERO(flow->rule[0]); 1387 } 1388 1389 void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv, 1390 struct mlx5_flow_handle *rule, 1391 struct mlx5_flow_attr *attr) 1392 { 1393 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 1394 struct mlx5_fs_chains *nic_chains; 1395 1396 nic_chains = mlx5e_nic_chains(tc); 1397 mlx5_del_flow_rules(rule); 1398 1399 if (attr->chain || attr->prio) 1400 mlx5_chains_put_table(nic_chains, attr->chain, attr->prio, 1401 MLX5E_TC_FT_LEVEL); 1402 1403 if (attr->dest_chain) 1404 mlx5_chains_put_table(nic_chains, attr->dest_chain, 1, 1405 MLX5E_TC_FT_LEVEL); 1406 } 1407 1408 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, 1409 struct mlx5e_tc_flow *flow) 1410 { 1411 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 1412 struct mlx5_flow_attr *attr = flow->attr; 1413 1414 flow_flag_clear(flow, OFFLOADED); 1415 1416 if (!IS_ERR_OR_NULL(flow->rule[0])) 1417 mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr); 1418 1419 /* Remove root table if no rules are left to avoid 1420 * extra steering hops. 1421 */ 1422 mutex_lock(&tc->t_lock); 1423 if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && 1424 !IS_ERR_OR_NULL(tc->t)) { 1425 mlx5_chains_put_table(mlx5e_nic_chains(tc), 0, 1, MLX5E_TC_FT_LEVEL); 1426 tc->t = NULL; 1427 } 1428 mutex_unlock(&tc->t_lock); 1429 1430 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 1431 mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); 1432 mlx5e_tc_detach_mod_hdr(priv, flow, attr); 1433 } 1434 1435 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) 1436 mlx5_fc_destroy(priv->mdev, attr->counter); 1437 1438 if (flow_flag_test(flow, HAIRPIN)) 1439 mlx5e_hairpin_flow_del(priv, flow); 1440 1441 free_flow_post_acts(flow); 1442 1443 kvfree(attr->parse_attr); 1444 kfree(flow->attr); 1445 } 1446 1447 struct mlx5_flow_handle * 1448 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, 1449 struct mlx5e_tc_flow *flow, 1450 struct mlx5_flow_spec *spec, 1451 struct mlx5_flow_attr *attr) 1452 { 1453 struct mlx5_flow_handle *rule; 1454 1455 if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) 1456 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); 1457 1458 rule = mlx5e_tc_rule_offload(flow->priv, spec, attr); 1459 1460 if (IS_ERR(rule)) 1461 return rule; 1462 1463 if (attr->esw_attr->split_count) { 1464 flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr); 1465 if (IS_ERR(flow->rule[1])) 1466 goto err_rule1; 1467 } 1468 1469 return rule; 1470 1471 err_rule1: 1472 mlx5e_tc_rule_unoffload(flow->priv, rule, attr); 1473 return flow->rule[1]; 1474 } 1475 1476 void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, 1477 struct mlx5e_tc_flow *flow, 1478 struct mlx5_flow_attr *attr) 1479 { 1480 flow_flag_clear(flow, OFFLOADED); 1481 1482 if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) 1483 return mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); 1484 1485 if (attr->esw_attr->split_count) 1486 mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); 1487 1488 mlx5e_tc_rule_unoffload(flow->priv, flow->rule[0], attr); 1489 } 1490 1491 struct mlx5_flow_handle * 1492 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, 1493 struct mlx5e_tc_flow *flow, 1494 struct mlx5_flow_spec *spec) 1495 { 1496 struct mlx5e_tc_mod_hdr_acts mod_acts = {}; 1497 struct mlx5e_mod_hdr_handle *mh = NULL; 1498 struct mlx5_flow_attr *slow_attr; 1499 struct mlx5_flow_handle *rule; 1500 bool fwd_and_modify_cap; 1501 u32 chain_mapping = 0; 1502 int err; 1503 1504 slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); 1505 if (!slow_attr) 1506 return ERR_PTR(-ENOMEM); 1507 1508 memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ); 1509 slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1510 slow_attr->esw_attr->split_count = 0; 1511 slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH; 1512 1513 fwd_and_modify_cap = MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table); 1514 if (!fwd_and_modify_cap) 1515 goto skip_restore; 1516 1517 err = mlx5_chains_get_chain_mapping(esw_chains(esw), flow->attr->chain, &chain_mapping); 1518 if (err) 1519 goto err_get_chain; 1520 1521 err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, 1522 MAPPED_OBJ_TO_REG, chain_mapping); 1523 if (err) 1524 goto err_reg_set; 1525 1526 mh = mlx5e_mod_hdr_attach(esw->dev, get_mod_hdr_table(flow->priv, flow), 1527 MLX5_FLOW_NAMESPACE_FDB, &mod_acts); 1528 if (IS_ERR(mh)) { 1529 err = PTR_ERR(mh); 1530 goto err_attach; 1531 } 1532 1533 slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1534 slow_attr->modify_hdr = mlx5e_mod_hdr_get(mh); 1535 1536 skip_restore: 1537 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); 1538 if (IS_ERR(rule)) { 1539 err = PTR_ERR(rule); 1540 goto err_offload; 1541 } 1542 1543 flow->attr->slow_mh = mh; 1544 flow->chain_mapping = chain_mapping; 1545 flow_flag_set(flow, SLOW); 1546 1547 mlx5e_mod_hdr_dealloc(&mod_acts); 1548 kfree(slow_attr); 1549 1550 return rule; 1551 1552 err_offload: 1553 if (fwd_and_modify_cap) 1554 mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), mh); 1555 err_attach: 1556 err_reg_set: 1557 if (fwd_and_modify_cap) 1558 mlx5_chains_put_chain_mapping(esw_chains(esw), chain_mapping); 1559 err_get_chain: 1560 mlx5e_mod_hdr_dealloc(&mod_acts); 1561 kfree(slow_attr); 1562 return ERR_PTR(err); 1563 } 1564 1565 void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, 1566 struct mlx5e_tc_flow *flow) 1567 { 1568 struct mlx5e_mod_hdr_handle *slow_mh = flow->attr->slow_mh; 1569 struct mlx5_flow_attr *slow_attr; 1570 1571 slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); 1572 if (!slow_attr) { 1573 mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n"); 1574 return; 1575 } 1576 1577 memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ); 1578 slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1579 slow_attr->esw_attr->split_count = 0; 1580 slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH; 1581 if (slow_mh) { 1582 slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1583 slow_attr->modify_hdr = mlx5e_mod_hdr_get(slow_mh); 1584 } 1585 mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); 1586 if (slow_mh) { 1587 mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), slow_mh); 1588 mlx5_chains_put_chain_mapping(esw_chains(esw), flow->chain_mapping); 1589 flow->chain_mapping = 0; 1590 flow->attr->slow_mh = NULL; 1591 } 1592 flow_flag_clear(flow, SLOW); 1593 kfree(slow_attr); 1594 } 1595 1596 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this 1597 * function. 1598 */ 1599 static void unready_flow_add(struct mlx5e_tc_flow *flow, 1600 struct list_head *unready_flows) 1601 { 1602 flow_flag_set(flow, NOT_READY); 1603 list_add_tail(&flow->unready, unready_flows); 1604 } 1605 1606 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this 1607 * function. 1608 */ 1609 static void unready_flow_del(struct mlx5e_tc_flow *flow) 1610 { 1611 list_del(&flow->unready); 1612 flow_flag_clear(flow, NOT_READY); 1613 } 1614 1615 static void add_unready_flow(struct mlx5e_tc_flow *flow) 1616 { 1617 struct mlx5_rep_uplink_priv *uplink_priv; 1618 struct mlx5e_rep_priv *rpriv; 1619 struct mlx5_eswitch *esw; 1620 1621 esw = flow->priv->mdev->priv.eswitch; 1622 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1623 uplink_priv = &rpriv->uplink_priv; 1624 1625 mutex_lock(&uplink_priv->unready_flows_lock); 1626 unready_flow_add(flow, &uplink_priv->unready_flows); 1627 mutex_unlock(&uplink_priv->unready_flows_lock); 1628 } 1629 1630 static void remove_unready_flow(struct mlx5e_tc_flow *flow) 1631 { 1632 struct mlx5_rep_uplink_priv *uplink_priv; 1633 struct mlx5e_rep_priv *rpriv; 1634 struct mlx5_eswitch *esw; 1635 1636 esw = flow->priv->mdev->priv.eswitch; 1637 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1638 uplink_priv = &rpriv->uplink_priv; 1639 1640 mutex_lock(&uplink_priv->unready_flows_lock); 1641 unready_flow_del(flow); 1642 mutex_unlock(&uplink_priv->unready_flows_lock); 1643 } 1644 1645 bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev) 1646 { 1647 struct mlx5_core_dev *out_mdev, *route_mdev; 1648 struct mlx5e_priv *out_priv, *route_priv; 1649 1650 out_priv = netdev_priv(out_dev); 1651 out_mdev = out_priv->mdev; 1652 route_priv = netdev_priv(route_dev); 1653 route_mdev = route_priv->mdev; 1654 1655 if (out_mdev->coredev_type != MLX5_COREDEV_PF) 1656 return false; 1657 1658 if (route_mdev->coredev_type != MLX5_COREDEV_VF && 1659 route_mdev->coredev_type != MLX5_COREDEV_SF) 1660 return false; 1661 1662 return mlx5e_same_hw_devs(out_priv, route_priv); 1663 } 1664 1665 int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport) 1666 { 1667 struct mlx5e_priv *out_priv, *route_priv; 1668 struct mlx5_core_dev *route_mdev; 1669 struct mlx5_eswitch *esw; 1670 u16 vhca_id; 1671 1672 out_priv = netdev_priv(out_dev); 1673 esw = out_priv->mdev->priv.eswitch; 1674 route_priv = netdev_priv(route_dev); 1675 route_mdev = route_priv->mdev; 1676 1677 vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id); 1678 if (mlx5_lag_is_active(out_priv->mdev)) { 1679 struct mlx5_devcom *devcom; 1680 int err; 1681 1682 /* In lag case we may get devices from different eswitch instances. 1683 * If we failed to get vport num, it means, mostly, that we on the wrong 1684 * eswitch. 1685 */ 1686 err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); 1687 if (err != -ENOENT) 1688 return err; 1689 1690 rcu_read_lock(); 1691 devcom = out_priv->mdev->priv.devcom; 1692 esw = mlx5_devcom_get_peer_data_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 1693 err = esw ? mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport) : -ENODEV; 1694 rcu_read_unlock(); 1695 1696 return err; 1697 } 1698 1699 return mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); 1700 } 1701 1702 static int 1703 verify_attr_actions(u32 actions, struct netlink_ext_ack *extack) 1704 { 1705 if (!(actions & 1706 (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { 1707 NL_SET_ERR_MSG_MOD(extack, "Rule must have at least one forward/drop action"); 1708 return -EOPNOTSUPP; 1709 } 1710 1711 if (!(~actions & 1712 (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { 1713 NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action"); 1714 return -EOPNOTSUPP; 1715 } 1716 1717 if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && 1718 actions & MLX5_FLOW_CONTEXT_ACTION_DROP) { 1719 NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported"); 1720 return -EOPNOTSUPP; 1721 } 1722 1723 return 0; 1724 } 1725 1726 static int 1727 post_process_attr(struct mlx5e_tc_flow *flow, 1728 struct mlx5_flow_attr *attr, 1729 struct netlink_ext_ack *extack) 1730 { 1731 bool vf_tun; 1732 int err = 0; 1733 1734 err = verify_attr_actions(attr->action, extack); 1735 if (err) 1736 goto err_out; 1737 1738 err = mlx5e_tc_tun_encap_dests_set(flow->priv, flow, attr, extack, &vf_tun); 1739 if (err) 1740 goto err_out; 1741 1742 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 1743 err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr); 1744 if (err) 1745 goto err_out; 1746 } 1747 1748 if (attr->branch_true && 1749 attr->branch_true->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 1750 err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr->branch_true); 1751 if (err) 1752 goto err_out; 1753 } 1754 1755 if (attr->branch_false && 1756 attr->branch_false->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 1757 err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr->branch_false); 1758 if (err) 1759 goto err_out; 1760 } 1761 1762 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { 1763 err = alloc_flow_attr_counter(get_flow_counter_dev(flow), attr); 1764 if (err) 1765 goto err_out; 1766 } 1767 1768 err_out: 1769 return err; 1770 } 1771 1772 static int 1773 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, 1774 struct mlx5e_tc_flow *flow, 1775 struct netlink_ext_ack *extack) 1776 { 1777 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1778 struct mlx5e_tc_flow_parse_attr *parse_attr; 1779 struct mlx5_flow_attr *attr = flow->attr; 1780 struct mlx5_esw_flow_attr *esw_attr; 1781 u32 max_prio, max_chain; 1782 int err = 0; 1783 1784 parse_attr = attr->parse_attr; 1785 esw_attr = attr->esw_attr; 1786 1787 /* We check chain range only for tc flows. 1788 * For ft flows, we checked attr->chain was originally 0 and set it to 1789 * FDB_FT_CHAIN which is outside tc range. 1790 * See mlx5e_rep_setup_ft_cb(). 1791 */ 1792 max_chain = mlx5_chains_get_chain_range(esw_chains(esw)); 1793 if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) { 1794 NL_SET_ERR_MSG_MOD(extack, 1795 "Requested chain is out of supported range"); 1796 err = -EOPNOTSUPP; 1797 goto err_out; 1798 } 1799 1800 max_prio = mlx5_chains_get_prio_range(esw_chains(esw)); 1801 if (attr->prio > max_prio) { 1802 NL_SET_ERR_MSG_MOD(extack, 1803 "Requested priority is out of supported range"); 1804 err = -EOPNOTSUPP; 1805 goto err_out; 1806 } 1807 1808 if (flow_flag_test(flow, TUN_RX)) { 1809 err = mlx5e_attach_decap_route(priv, flow); 1810 if (err) 1811 goto err_out; 1812 1813 if (!attr->chain && esw_attr->int_port && 1814 attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { 1815 /* If decap route device is internal port, change the 1816 * source vport value in reg_c0 back to uplink just in 1817 * case the rule performs goto chain > 0. If we have a miss 1818 * on chain > 0 we want the metadata regs to hold the 1819 * chain id so SW will resume handling of this packet 1820 * from the proper chain. 1821 */ 1822 u32 metadata = mlx5_eswitch_get_vport_metadata_for_set(esw, 1823 esw_attr->in_rep->vport); 1824 1825 err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts, 1826 MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG, 1827 metadata); 1828 if (err) 1829 goto err_out; 1830 1831 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1832 } 1833 } 1834 1835 if (flow_flag_test(flow, L3_TO_L2_DECAP)) { 1836 err = mlx5e_attach_decap(priv, flow, extack); 1837 if (err) 1838 goto err_out; 1839 } 1840 1841 if (netif_is_ovs_master(parse_attr->filter_dev)) { 1842 struct mlx5e_tc_int_port *int_port; 1843 1844 if (attr->chain) { 1845 NL_SET_ERR_MSG_MOD(extack, 1846 "Internal port rule is only supported on chain 0"); 1847 err = -EOPNOTSUPP; 1848 goto err_out; 1849 } 1850 1851 if (attr->dest_chain) { 1852 NL_SET_ERR_MSG_MOD(extack, 1853 "Internal port rule offload doesn't support goto action"); 1854 err = -EOPNOTSUPP; 1855 goto err_out; 1856 } 1857 1858 int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv), 1859 parse_attr->filter_dev->ifindex, 1860 flow_flag_test(flow, EGRESS) ? 1861 MLX5E_TC_INT_PORT_EGRESS : 1862 MLX5E_TC_INT_PORT_INGRESS); 1863 if (IS_ERR(int_port)) { 1864 err = PTR_ERR(int_port); 1865 goto err_out; 1866 } 1867 1868 esw_attr->int_port = int_port; 1869 } 1870 1871 err = post_process_attr(flow, attr, extack); 1872 if (err) 1873 goto err_out; 1874 1875 err = mlx5e_tc_act_stats_add_flow(get_act_stats_handle(priv), flow); 1876 if (err) 1877 goto err_out; 1878 1879 /* we get here if one of the following takes place: 1880 * (1) there's no error 1881 * (2) there's an encap action and we don't have valid neigh 1882 */ 1883 if (flow_flag_test(flow, SLOW)) 1884 flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec); 1885 else 1886 flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); 1887 1888 if (IS_ERR(flow->rule[0])) { 1889 err = PTR_ERR(flow->rule[0]); 1890 goto err_out; 1891 } 1892 flow_flag_set(flow, OFFLOADED); 1893 1894 return 0; 1895 1896 err_out: 1897 flow_flag_set(flow, FAILED); 1898 return err; 1899 } 1900 1901 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) 1902 { 1903 struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec; 1904 void *headers_v = MLX5_ADDR_OF(fte_match_param, 1905 spec->match_value, 1906 misc_parameters_3); 1907 u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3, 1908 headers_v, 1909 geneve_tlv_option_0_data); 1910 1911 return !!geneve_tlv_opt_0_data; 1912 } 1913 1914 static void free_branch_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) 1915 { 1916 if (!attr) 1917 return; 1918 1919 mlx5_free_flow_attr_actions(flow, attr); 1920 kvfree(attr->parse_attr); 1921 kfree(attr); 1922 } 1923 1924 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, 1925 struct mlx5e_tc_flow *flow) 1926 { 1927 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1928 struct mlx5_flow_attr *attr = flow->attr; 1929 struct mlx5_esw_flow_attr *esw_attr; 1930 1931 esw_attr = attr->esw_attr; 1932 mlx5e_put_flow_tunnel_id(flow); 1933 1934 if (flow_flag_test(flow, NOT_READY)) 1935 remove_unready_flow(flow); 1936 1937 if (mlx5e_is_offloaded_flow(flow)) { 1938 if (flow_flag_test(flow, SLOW)) 1939 mlx5e_tc_unoffload_from_slow_path(esw, flow); 1940 else 1941 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); 1942 } 1943 complete_all(&flow->del_hw_done); 1944 1945 if (mlx5_flow_has_geneve_opt(flow)) 1946 mlx5_geneve_tlv_option_del(priv->mdev->geneve); 1947 1948 if (flow->decap_route) 1949 mlx5e_detach_decap_route(priv, flow); 1950 1951 mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr); 1952 1953 if (esw_attr->int_port) 1954 mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port); 1955 1956 if (esw_attr->dest_int_port) 1957 mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->dest_int_port); 1958 1959 if (flow_flag_test(flow, L3_TO_L2_DECAP)) 1960 mlx5e_detach_decap(priv, flow); 1961 1962 mlx5e_tc_act_stats_del_flow(get_act_stats_handle(priv), flow); 1963 1964 free_flow_post_acts(flow); 1965 mlx5_free_flow_attr_actions(flow, attr); 1966 1967 kvfree(attr->esw_attr->rx_tun_attr); 1968 kvfree(attr->parse_attr); 1969 kfree(flow->attr); 1970 } 1971 1972 struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) 1973 { 1974 struct mlx5_flow_attr *attr; 1975 1976 attr = list_first_entry(&flow->attrs, struct mlx5_flow_attr, list); 1977 return attr->counter; 1978 } 1979 1980 /* Iterate over tmp_list of flows attached to flow_list head. */ 1981 void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list) 1982 { 1983 struct mlx5e_tc_flow *flow, *tmp; 1984 1985 list_for_each_entry_safe(flow, tmp, flow_list, tmp_list) 1986 mlx5e_flow_put(priv, flow); 1987 } 1988 1989 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) 1990 { 1991 struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; 1992 1993 if (!flow_flag_test(flow, ESWITCH) || 1994 !flow_flag_test(flow, DUP)) 1995 return; 1996 1997 mutex_lock(&esw->offloads.peer_mutex); 1998 list_del(&flow->peer); 1999 mutex_unlock(&esw->offloads.peer_mutex); 2000 2001 flow_flag_clear(flow, DUP); 2002 2003 if (refcount_dec_and_test(&flow->peer_flow->refcnt)) { 2004 mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow); 2005 kfree(flow->peer_flow); 2006 } 2007 2008 flow->peer_flow = NULL; 2009 } 2010 2011 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) 2012 { 2013 struct mlx5_core_dev *dev = flow->priv->mdev; 2014 struct mlx5_devcom *devcom = dev->priv.devcom; 2015 struct mlx5_eswitch *peer_esw; 2016 2017 peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 2018 if (!peer_esw) 2019 return; 2020 2021 __mlx5e_tc_del_fdb_peer_flow(flow); 2022 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 2023 } 2024 2025 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, 2026 struct mlx5e_tc_flow *flow) 2027 { 2028 if (mlx5e_is_eswitch_flow(flow)) { 2029 mlx5e_tc_del_fdb_peer_flow(flow); 2030 mlx5e_tc_del_fdb_flow(priv, flow); 2031 } else { 2032 mlx5e_tc_del_nic_flow(priv, flow); 2033 } 2034 } 2035 2036 static bool flow_requires_tunnel_mapping(u32 chain, struct flow_cls_offload *f) 2037 { 2038 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 2039 struct flow_action *flow_action = &rule->action; 2040 const struct flow_action_entry *act; 2041 int i; 2042 2043 if (chain) 2044 return false; 2045 2046 flow_action_for_each(i, act, flow_action) { 2047 switch (act->id) { 2048 case FLOW_ACTION_GOTO: 2049 return true; 2050 case FLOW_ACTION_SAMPLE: 2051 return true; 2052 default: 2053 continue; 2054 } 2055 } 2056 2057 return false; 2058 } 2059 2060 static int 2061 enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv, 2062 struct flow_dissector_key_enc_opts *opts, 2063 struct netlink_ext_ack *extack, 2064 bool *dont_care) 2065 { 2066 struct geneve_opt *opt; 2067 int off = 0; 2068 2069 *dont_care = true; 2070 2071 while (opts->len > off) { 2072 opt = (struct geneve_opt *)&opts->data[off]; 2073 2074 if (!(*dont_care) || opt->opt_class || opt->type || 2075 memchr_inv(opt->opt_data, 0, opt->length * 4)) { 2076 *dont_care = false; 2077 2078 if (opt->opt_class != htons(U16_MAX) || 2079 opt->type != U8_MAX) { 2080 NL_SET_ERR_MSG_MOD(extack, 2081 "Partial match of tunnel options in chain > 0 isn't supported"); 2082 netdev_warn(priv->netdev, 2083 "Partial match of tunnel options in chain > 0 isn't supported"); 2084 return -EOPNOTSUPP; 2085 } 2086 } 2087 2088 off += sizeof(struct geneve_opt) + opt->length * 4; 2089 } 2090 2091 return 0; 2092 } 2093 2094 #define COPY_DISSECTOR(rule, diss_key, dst)\ 2095 ({ \ 2096 struct flow_rule *__rule = (rule);\ 2097 typeof(dst) __dst = dst;\ 2098 \ 2099 memcpy(__dst,\ 2100 skb_flow_dissector_target(__rule->match.dissector,\ 2101 diss_key,\ 2102 __rule->match.key),\ 2103 sizeof(*__dst));\ 2104 }) 2105 2106 static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv, 2107 struct mlx5e_tc_flow *flow, 2108 struct flow_cls_offload *f, 2109 struct net_device *filter_dev) 2110 { 2111 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 2112 struct netlink_ext_ack *extack = f->common.extack; 2113 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; 2114 struct flow_match_enc_opts enc_opts_match; 2115 struct tunnel_match_enc_opts tun_enc_opts; 2116 struct mlx5_rep_uplink_priv *uplink_priv; 2117 struct mlx5_flow_attr *attr = flow->attr; 2118 struct mlx5e_rep_priv *uplink_rpriv; 2119 struct tunnel_match_key tunnel_key; 2120 bool enc_opts_is_dont_care = true; 2121 u32 tun_id, enc_opts_id = 0; 2122 struct mlx5_eswitch *esw; 2123 u32 value, mask; 2124 int err; 2125 2126 esw = priv->mdev->priv.eswitch; 2127 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 2128 uplink_priv = &uplink_rpriv->uplink_priv; 2129 2130 memset(&tunnel_key, 0, sizeof(tunnel_key)); 2131 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL, 2132 &tunnel_key.enc_control); 2133 if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) 2134 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, 2135 &tunnel_key.enc_ipv4); 2136 else 2137 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, 2138 &tunnel_key.enc_ipv6); 2139 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip); 2140 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS, 2141 &tunnel_key.enc_tp); 2142 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID, 2143 &tunnel_key.enc_key_id); 2144 tunnel_key.filter_ifindex = filter_dev->ifindex; 2145 2146 err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id); 2147 if (err) 2148 return err; 2149 2150 flow_rule_match_enc_opts(rule, &enc_opts_match); 2151 err = enc_opts_is_dont_care_or_full_match(priv, 2152 enc_opts_match.mask, 2153 extack, 2154 &enc_opts_is_dont_care); 2155 if (err) 2156 goto err_enc_opts; 2157 2158 if (!enc_opts_is_dont_care) { 2159 memset(&tun_enc_opts, 0, sizeof(tun_enc_opts)); 2160 memcpy(&tun_enc_opts.key, enc_opts_match.key, 2161 sizeof(*enc_opts_match.key)); 2162 memcpy(&tun_enc_opts.mask, enc_opts_match.mask, 2163 sizeof(*enc_opts_match.mask)); 2164 2165 err = mapping_add(uplink_priv->tunnel_enc_opts_mapping, 2166 &tun_enc_opts, &enc_opts_id); 2167 if (err) 2168 goto err_enc_opts; 2169 } 2170 2171 value = tun_id << ENC_OPTS_BITS | enc_opts_id; 2172 mask = enc_opts_id ? TUNNEL_ID_MASK : 2173 (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK); 2174 2175 if (attr->chain) { 2176 mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec, 2177 TUNNEL_TO_REG, value, mask); 2178 } else { 2179 mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; 2180 err = mlx5e_tc_match_to_reg_set(priv->mdev, 2181 mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB, 2182 TUNNEL_TO_REG, value); 2183 if (err) 2184 goto err_set; 2185 2186 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 2187 } 2188 2189 flow->attr->tunnel_id = value; 2190 return 0; 2191 2192 err_set: 2193 if (enc_opts_id) 2194 mapping_remove(uplink_priv->tunnel_enc_opts_mapping, 2195 enc_opts_id); 2196 err_enc_opts: 2197 mapping_remove(uplink_priv->tunnel_mapping, tun_id); 2198 return err; 2199 } 2200 2201 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow) 2202 { 2203 u32 enc_opts_id = flow->attr->tunnel_id & ENC_OPTS_BITS_MASK; 2204 u32 tun_id = flow->attr->tunnel_id >> ENC_OPTS_BITS; 2205 struct mlx5_rep_uplink_priv *uplink_priv; 2206 struct mlx5e_rep_priv *uplink_rpriv; 2207 struct mlx5_eswitch *esw; 2208 2209 esw = flow->priv->mdev->priv.eswitch; 2210 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 2211 uplink_priv = &uplink_rpriv->uplink_priv; 2212 2213 if (tun_id) 2214 mapping_remove(uplink_priv->tunnel_mapping, tun_id); 2215 if (enc_opts_id) 2216 mapping_remove(uplink_priv->tunnel_enc_opts_mapping, 2217 enc_opts_id); 2218 } 2219 2220 void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev, 2221 struct flow_match_basic *match, bool outer, 2222 void *headers_c, void *headers_v) 2223 { 2224 bool ip_version_cap; 2225 2226 ip_version_cap = outer ? 2227 MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 2228 ft_field_support.outer_ip_version) : 2229 MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 2230 ft_field_support.inner_ip_version); 2231 2232 if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) && 2233 (match->key->n_proto == htons(ETH_P_IP) || 2234 match->key->n_proto == htons(ETH_P_IPV6))) { 2235 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version); 2236 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version, 2237 match->key->n_proto == htons(ETH_P_IP) ? 4 : 6); 2238 } else { 2239 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, 2240 ntohs(match->mask->n_proto)); 2241 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, 2242 ntohs(match->key->n_proto)); 2243 } 2244 } 2245 2246 u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer) 2247 { 2248 void *headers_v; 2249 u16 ethertype; 2250 u8 ip_version; 2251 2252 if (outer) 2253 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); 2254 else 2255 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers); 2256 2257 ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version); 2258 /* Return ip_version converted from ethertype anyway */ 2259 if (!ip_version) { 2260 ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); 2261 if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP) 2262 ip_version = 4; 2263 else if (ethertype == ETH_P_IPV6) 2264 ip_version = 6; 2265 } 2266 return ip_version; 2267 } 2268 2269 /* Tunnel device follows RFC 6040, see include/net/inet_ecn.h. 2270 * And changes inner ip_ecn depending on inner and outer ip_ecn as follows: 2271 * +---------+----------------------------------------+ 2272 * |Arriving | Arriving Outer Header | 2273 * | Inner +---------+---------+---------+----------+ 2274 * | Header | Not-ECT | ECT(0) | ECT(1) | CE | 2275 * +---------+---------+---------+---------+----------+ 2276 * | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop> | 2277 * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE* | 2278 * | ECT(1) | ECT(1) | ECT(1) | ECT(1)* | CE* | 2279 * | CE | CE | CE | CE | CE | 2280 * +---------+---------+---------+---------+----------+ 2281 * 2282 * Tc matches on inner after decapsulation on tunnel device, but hw offload matches 2283 * the inner ip_ecn value before hardware decap action. 2284 * 2285 * Cells marked are changed from original inner packet ip_ecn value during decap, and 2286 * so matching those values on inner ip_ecn before decap will fail. 2287 * 2288 * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn, 2289 * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE, 2290 * and such we can drop the inner ip_ecn=CE match. 2291 */ 2292 2293 static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv, 2294 struct flow_cls_offload *f, 2295 bool *match_inner_ecn) 2296 { 2297 u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0; 2298 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 2299 struct netlink_ext_ack *extack = f->common.extack; 2300 struct flow_match_ip match; 2301 2302 *match_inner_ecn = true; 2303 2304 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { 2305 flow_rule_match_enc_ip(rule, &match); 2306 outer_ecn_key = match.key->tos & INET_ECN_MASK; 2307 outer_ecn_mask = match.mask->tos & INET_ECN_MASK; 2308 } 2309 2310 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { 2311 flow_rule_match_ip(rule, &match); 2312 inner_ecn_key = match.key->tos & INET_ECN_MASK; 2313 inner_ecn_mask = match.mask->tos & INET_ECN_MASK; 2314 } 2315 2316 if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) { 2317 NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported"); 2318 netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported"); 2319 return -EOPNOTSUPP; 2320 } 2321 2322 if (!outer_ecn_mask) { 2323 if (!inner_ecn_mask) 2324 return 0; 2325 2326 NL_SET_ERR_MSG_MOD(extack, 2327 "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported"); 2328 netdev_warn(priv->netdev, 2329 "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported"); 2330 return -EOPNOTSUPP; 2331 } 2332 2333 if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) { 2334 NL_SET_ERR_MSG_MOD(extack, 2335 "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported"); 2336 netdev_warn(priv->netdev, 2337 "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported"); 2338 return -EOPNOTSUPP; 2339 } 2340 2341 if (!inner_ecn_mask) 2342 return 0; 2343 2344 /* Both inner and outer have full mask on ecn */ 2345 2346 if (outer_ecn_key == INET_ECN_ECT_1) { 2347 /* inner ecn might change by DECAP action */ 2348 2349 NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported"); 2350 netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported"); 2351 return -EOPNOTSUPP; 2352 } 2353 2354 if (outer_ecn_key != INET_ECN_CE) 2355 return 0; 2356 2357 if (inner_ecn_key != INET_ECN_CE) { 2358 /* Can't happen in software, as packet ecn will be changed to CE after decap */ 2359 NL_SET_ERR_MSG_MOD(extack, 2360 "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported"); 2361 netdev_warn(priv->netdev, 2362 "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported"); 2363 return -EOPNOTSUPP; 2364 } 2365 2366 /* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase, 2367 * drop match on inner ecn 2368 */ 2369 *match_inner_ecn = false; 2370 2371 return 0; 2372 } 2373 2374 static int parse_tunnel_attr(struct mlx5e_priv *priv, 2375 struct mlx5e_tc_flow *flow, 2376 struct mlx5_flow_spec *spec, 2377 struct flow_cls_offload *f, 2378 struct net_device *filter_dev, 2379 u8 *match_level, 2380 bool *match_inner) 2381 { 2382 struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev); 2383 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 2384 struct netlink_ext_ack *extack = f->common.extack; 2385 bool needs_mapping, sets_mapping; 2386 int err; 2387 2388 if (!mlx5e_is_eswitch_flow(flow)) { 2389 NL_SET_ERR_MSG_MOD(extack, "Match on tunnel is not supported"); 2390 return -EOPNOTSUPP; 2391 } 2392 2393 needs_mapping = !!flow->attr->chain; 2394 sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f); 2395 *match_inner = !needs_mapping; 2396 2397 if ((needs_mapping || sets_mapping) && 2398 !mlx5_eswitch_reg_c1_loopback_enabled(esw)) { 2399 NL_SET_ERR_MSG_MOD(extack, 2400 "Chains on tunnel devices isn't supported without register loopback support"); 2401 netdev_warn(priv->netdev, 2402 "Chains on tunnel devices isn't supported without register loopback support"); 2403 return -EOPNOTSUPP; 2404 } 2405 2406 if (!flow->attr->chain) { 2407 err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, 2408 match_level); 2409 if (err) { 2410 NL_SET_ERR_MSG_MOD(extack, 2411 "Failed to parse tunnel attributes"); 2412 netdev_warn(priv->netdev, 2413 "Failed to parse tunnel attributes"); 2414 return err; 2415 } 2416 2417 /* With mpls over udp we decapsulate using packet reformat 2418 * object 2419 */ 2420 if (!netif_is_bareudp(filter_dev)) 2421 flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; 2422 err = mlx5e_tc_set_attr_rx_tun(flow, spec); 2423 if (err) 2424 return err; 2425 } else if (tunnel) { 2426 struct mlx5_flow_spec *tmp_spec; 2427 2428 tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL); 2429 if (!tmp_spec) { 2430 NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for tunnel tmp spec"); 2431 netdev_warn(priv->netdev, "Failed to allocate memory for tunnel tmp spec"); 2432 return -ENOMEM; 2433 } 2434 memcpy(tmp_spec, spec, sizeof(*tmp_spec)); 2435 2436 err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level); 2437 if (err) { 2438 kvfree(tmp_spec); 2439 NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes"); 2440 netdev_warn(priv->netdev, "Failed to parse tunnel attributes"); 2441 return err; 2442 } 2443 err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec); 2444 kvfree(tmp_spec); 2445 if (err) 2446 return err; 2447 } 2448 2449 if (!needs_mapping && !sets_mapping) 2450 return 0; 2451 2452 return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev); 2453 } 2454 2455 static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec) 2456 { 2457 return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 2458 inner_headers); 2459 } 2460 2461 static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec) 2462 { 2463 return MLX5_ADDR_OF(fte_match_param, spec->match_value, 2464 inner_headers); 2465 } 2466 2467 static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec) 2468 { 2469 return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 2470 outer_headers); 2471 } 2472 2473 static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec) 2474 { 2475 return MLX5_ADDR_OF(fte_match_param, spec->match_value, 2476 outer_headers); 2477 } 2478 2479 void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec) 2480 { 2481 return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? 2482 get_match_inner_headers_value(spec) : 2483 get_match_outer_headers_value(spec); 2484 } 2485 2486 void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec) 2487 { 2488 return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? 2489 get_match_inner_headers_criteria(spec) : 2490 get_match_outer_headers_criteria(spec); 2491 } 2492 2493 static int mlx5e_flower_parse_meta(struct net_device *filter_dev, 2494 struct flow_cls_offload *f) 2495 { 2496 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 2497 struct netlink_ext_ack *extack = f->common.extack; 2498 struct net_device *ingress_dev; 2499 struct flow_match_meta match; 2500 2501 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) 2502 return 0; 2503 2504 flow_rule_match_meta(rule, &match); 2505 if (!match.mask->ingress_ifindex) 2506 return 0; 2507 2508 if (match.mask->ingress_ifindex != 0xFFFFFFFF) { 2509 NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask"); 2510 return -EOPNOTSUPP; 2511 } 2512 2513 ingress_dev = __dev_get_by_index(dev_net(filter_dev), 2514 match.key->ingress_ifindex); 2515 if (!ingress_dev) { 2516 NL_SET_ERR_MSG_MOD(extack, 2517 "Can't find the ingress port to match on"); 2518 return -ENOENT; 2519 } 2520 2521 if (ingress_dev != filter_dev) { 2522 NL_SET_ERR_MSG_MOD(extack, 2523 "Can't match on the ingress filter port"); 2524 return -EOPNOTSUPP; 2525 } 2526 2527 return 0; 2528 } 2529 2530 static bool skip_key_basic(struct net_device *filter_dev, 2531 struct flow_cls_offload *f) 2532 { 2533 /* When doing mpls over udp decap, the user needs to provide 2534 * MPLS_UC as the protocol in order to be able to match on mpls 2535 * label fields. However, the actual ethertype is IP so we want to 2536 * avoid matching on this, otherwise we'll fail the match. 2537 */ 2538 if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0) 2539 return true; 2540 2541 return false; 2542 } 2543 2544 static int __parse_cls_flower(struct mlx5e_priv *priv, 2545 struct mlx5e_tc_flow *flow, 2546 struct mlx5_flow_spec *spec, 2547 struct flow_cls_offload *f, 2548 struct net_device *filter_dev, 2549 u8 *inner_match_level, u8 *outer_match_level) 2550 { 2551 struct netlink_ext_ack *extack = f->common.extack; 2552 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 2553 outer_headers); 2554 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, 2555 outer_headers); 2556 void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 2557 misc_parameters); 2558 void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, 2559 misc_parameters); 2560 void *misc_c_3 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 2561 misc_parameters_3); 2562 void *misc_v_3 = MLX5_ADDR_OF(fte_match_param, spec->match_value, 2563 misc_parameters_3); 2564 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 2565 struct flow_dissector *dissector = rule->match.dissector; 2566 enum fs_flow_table_type fs_type; 2567 bool match_inner_ecn = true; 2568 u16 addr_type = 0; 2569 u8 ip_proto = 0; 2570 u8 *match_level; 2571 int err; 2572 2573 fs_type = mlx5e_is_eswitch_flow(flow) ? FS_FT_FDB : FS_FT_NIC_RX; 2574 match_level = outer_match_level; 2575 2576 if (dissector->used_keys & 2577 ~(BIT(FLOW_DISSECTOR_KEY_META) | 2578 BIT(FLOW_DISSECTOR_KEY_CONTROL) | 2579 BIT(FLOW_DISSECTOR_KEY_BASIC) | 2580 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | 2581 BIT(FLOW_DISSECTOR_KEY_VLAN) | 2582 BIT(FLOW_DISSECTOR_KEY_CVLAN) | 2583 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | 2584 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | 2585 BIT(FLOW_DISSECTOR_KEY_PORTS) | 2586 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | 2587 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | 2588 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | 2589 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | 2590 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | 2591 BIT(FLOW_DISSECTOR_KEY_TCP) | 2592 BIT(FLOW_DISSECTOR_KEY_IP) | 2593 BIT(FLOW_DISSECTOR_KEY_CT) | 2594 BIT(FLOW_DISSECTOR_KEY_ENC_IP) | 2595 BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | 2596 BIT(FLOW_DISSECTOR_KEY_ICMP) | 2597 BIT(FLOW_DISSECTOR_KEY_MPLS))) { 2598 NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); 2599 netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n", 2600 dissector->used_keys); 2601 return -EOPNOTSUPP; 2602 } 2603 2604 if (mlx5e_get_tc_tun(filter_dev)) { 2605 bool match_inner = false; 2606 2607 err = parse_tunnel_attr(priv, flow, spec, f, filter_dev, 2608 outer_match_level, &match_inner); 2609 if (err) 2610 return err; 2611 2612 if (match_inner) { 2613 /* header pointers should point to the inner headers 2614 * if the packet was decapsulated already. 2615 * outer headers are set by parse_tunnel_attr. 2616 */ 2617 match_level = inner_match_level; 2618 headers_c = get_match_inner_headers_criteria(spec); 2619 headers_v = get_match_inner_headers_value(spec); 2620 } 2621 2622 err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn); 2623 if (err) 2624 return err; 2625 } 2626 2627 err = mlx5e_flower_parse_meta(filter_dev, f); 2628 if (err) 2629 return err; 2630 2631 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) && 2632 !skip_key_basic(filter_dev, f)) { 2633 struct flow_match_basic match; 2634 2635 flow_rule_match_basic(rule, &match); 2636 mlx5e_tc_set_ethertype(priv->mdev, &match, 2637 match_level == outer_match_level, 2638 headers_c, headers_v); 2639 2640 if (match.mask->n_proto) 2641 *match_level = MLX5_MATCH_L2; 2642 } 2643 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) || 2644 is_vlan_dev(filter_dev)) { 2645 struct flow_dissector_key_vlan filter_dev_mask; 2646 struct flow_dissector_key_vlan filter_dev_key; 2647 struct flow_match_vlan match; 2648 2649 if (is_vlan_dev(filter_dev)) { 2650 match.key = &filter_dev_key; 2651 match.key->vlan_id = vlan_dev_vlan_id(filter_dev); 2652 match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev); 2653 match.key->vlan_priority = 0; 2654 match.mask = &filter_dev_mask; 2655 memset(match.mask, 0xff, sizeof(*match.mask)); 2656 match.mask->vlan_priority = 0; 2657 } else { 2658 flow_rule_match_vlan(rule, &match); 2659 } 2660 if (match.mask->vlan_id || 2661 match.mask->vlan_priority || 2662 match.mask->vlan_tpid) { 2663 if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { 2664 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2665 svlan_tag, 1); 2666 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2667 svlan_tag, 1); 2668 } else { 2669 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2670 cvlan_tag, 1); 2671 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2672 cvlan_tag, 1); 2673 } 2674 2675 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, 2676 match.mask->vlan_id); 2677 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, 2678 match.key->vlan_id); 2679 2680 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, 2681 match.mask->vlan_priority); 2682 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, 2683 match.key->vlan_priority); 2684 2685 *match_level = MLX5_MATCH_L2; 2686 2687 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) && 2688 match.mask->vlan_eth_type && 2689 MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, 2690 ft_field_support.outer_second_vid, 2691 fs_type)) { 2692 MLX5_SET(fte_match_set_misc, misc_c, 2693 outer_second_cvlan_tag, 1); 2694 spec->match_criteria_enable |= 2695 MLX5_MATCH_MISC_PARAMETERS; 2696 } 2697 } 2698 } else if (*match_level != MLX5_MATCH_NONE) { 2699 /* cvlan_tag enabled in match criteria and 2700 * disabled in match value means both S & C tags 2701 * don't exist (untagged of both) 2702 */ 2703 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); 2704 *match_level = MLX5_MATCH_L2; 2705 } 2706 2707 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { 2708 struct flow_match_vlan match; 2709 2710 flow_rule_match_cvlan(rule, &match); 2711 if (match.mask->vlan_id || 2712 match.mask->vlan_priority || 2713 match.mask->vlan_tpid) { 2714 if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ft_field_support.outer_second_vid, 2715 fs_type)) { 2716 NL_SET_ERR_MSG_MOD(extack, 2717 "Matching on CVLAN is not supported"); 2718 return -EOPNOTSUPP; 2719 } 2720 2721 if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { 2722 MLX5_SET(fte_match_set_misc, misc_c, 2723 outer_second_svlan_tag, 1); 2724 MLX5_SET(fte_match_set_misc, misc_v, 2725 outer_second_svlan_tag, 1); 2726 } else { 2727 MLX5_SET(fte_match_set_misc, misc_c, 2728 outer_second_cvlan_tag, 1); 2729 MLX5_SET(fte_match_set_misc, misc_v, 2730 outer_second_cvlan_tag, 1); 2731 } 2732 2733 MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid, 2734 match.mask->vlan_id); 2735 MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid, 2736 match.key->vlan_id); 2737 MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio, 2738 match.mask->vlan_priority); 2739 MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio, 2740 match.key->vlan_priority); 2741 2742 *match_level = MLX5_MATCH_L2; 2743 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; 2744 } 2745 } 2746 2747 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { 2748 struct flow_match_eth_addrs match; 2749 2750 flow_rule_match_eth_addrs(rule, &match); 2751 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2752 dmac_47_16), 2753 match.mask->dst); 2754 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2755 dmac_47_16), 2756 match.key->dst); 2757 2758 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2759 smac_47_16), 2760 match.mask->src); 2761 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2762 smac_47_16), 2763 match.key->src); 2764 2765 if (!is_zero_ether_addr(match.mask->src) || 2766 !is_zero_ether_addr(match.mask->dst)) 2767 *match_level = MLX5_MATCH_L2; 2768 } 2769 2770 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { 2771 struct flow_match_control match; 2772 2773 flow_rule_match_control(rule, &match); 2774 addr_type = match.key->addr_type; 2775 2776 /* the HW doesn't support frag first/later */ 2777 if (match.mask->flags & FLOW_DIS_FIRST_FRAG) { 2778 NL_SET_ERR_MSG_MOD(extack, "Match on frag first/later is not supported"); 2779 return -EOPNOTSUPP; 2780 } 2781 2782 if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) { 2783 MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); 2784 MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 2785 match.key->flags & FLOW_DIS_IS_FRAGMENT); 2786 2787 /* the HW doesn't need L3 inline to match on frag=no */ 2788 if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT)) 2789 *match_level = MLX5_MATCH_L2; 2790 /* *** L2 attributes parsing up to here *** */ 2791 else 2792 *match_level = MLX5_MATCH_L3; 2793 } 2794 } 2795 2796 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { 2797 struct flow_match_basic match; 2798 2799 flow_rule_match_basic(rule, &match); 2800 ip_proto = match.key->ip_proto; 2801 2802 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 2803 match.mask->ip_proto); 2804 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, 2805 match.key->ip_proto); 2806 2807 if (match.mask->ip_proto) 2808 *match_level = MLX5_MATCH_L3; 2809 } 2810 2811 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 2812 struct flow_match_ipv4_addrs match; 2813 2814 flow_rule_match_ipv4_addrs(rule, &match); 2815 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2816 src_ipv4_src_ipv6.ipv4_layout.ipv4), 2817 &match.mask->src, sizeof(match.mask->src)); 2818 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2819 src_ipv4_src_ipv6.ipv4_layout.ipv4), 2820 &match.key->src, sizeof(match.key->src)); 2821 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2822 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 2823 &match.mask->dst, sizeof(match.mask->dst)); 2824 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2825 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 2826 &match.key->dst, sizeof(match.key->dst)); 2827 2828 if (match.mask->src || match.mask->dst) 2829 *match_level = MLX5_MATCH_L3; 2830 } 2831 2832 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 2833 struct flow_match_ipv6_addrs match; 2834 2835 flow_rule_match_ipv6_addrs(rule, &match); 2836 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2837 src_ipv4_src_ipv6.ipv6_layout.ipv6), 2838 &match.mask->src, sizeof(match.mask->src)); 2839 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2840 src_ipv4_src_ipv6.ipv6_layout.ipv6), 2841 &match.key->src, sizeof(match.key->src)); 2842 2843 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2844 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 2845 &match.mask->dst, sizeof(match.mask->dst)); 2846 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2847 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 2848 &match.key->dst, sizeof(match.key->dst)); 2849 2850 if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY || 2851 ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY) 2852 *match_level = MLX5_MATCH_L3; 2853 } 2854 2855 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { 2856 struct flow_match_ip match; 2857 2858 flow_rule_match_ip(rule, &match); 2859 if (match_inner_ecn) { 2860 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, 2861 match.mask->tos & 0x3); 2862 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, 2863 match.key->tos & 0x3); 2864 } 2865 2866 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, 2867 match.mask->tos >> 2); 2868 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, 2869 match.key->tos >> 2); 2870 2871 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, 2872 match.mask->ttl); 2873 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, 2874 match.key->ttl); 2875 2876 if (match.mask->ttl && 2877 !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, 2878 ft_field_support.outer_ipv4_ttl)) { 2879 NL_SET_ERR_MSG_MOD(extack, 2880 "Matching on TTL is not supported"); 2881 return -EOPNOTSUPP; 2882 } 2883 2884 if (match.mask->tos || match.mask->ttl) 2885 *match_level = MLX5_MATCH_L3; 2886 } 2887 2888 /* *** L3 attributes parsing up to here *** */ 2889 2890 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { 2891 struct flow_match_ports match; 2892 2893 flow_rule_match_ports(rule, &match); 2894 switch (ip_proto) { 2895 case IPPROTO_TCP: 2896 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2897 tcp_sport, ntohs(match.mask->src)); 2898 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2899 tcp_sport, ntohs(match.key->src)); 2900 2901 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2902 tcp_dport, ntohs(match.mask->dst)); 2903 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2904 tcp_dport, ntohs(match.key->dst)); 2905 break; 2906 2907 case IPPROTO_UDP: 2908 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2909 udp_sport, ntohs(match.mask->src)); 2910 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2911 udp_sport, ntohs(match.key->src)); 2912 2913 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2914 udp_dport, ntohs(match.mask->dst)); 2915 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2916 udp_dport, ntohs(match.key->dst)); 2917 break; 2918 default: 2919 NL_SET_ERR_MSG_MOD(extack, 2920 "Only UDP and TCP transports are supported for L4 matching"); 2921 netdev_err(priv->netdev, 2922 "Only UDP and TCP transport are supported\n"); 2923 return -EINVAL; 2924 } 2925 2926 if (match.mask->src || match.mask->dst) 2927 *match_level = MLX5_MATCH_L4; 2928 } 2929 2930 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { 2931 struct flow_match_tcp match; 2932 2933 flow_rule_match_tcp(rule, &match); 2934 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, 2935 ntohs(match.mask->flags)); 2936 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, 2937 ntohs(match.key->flags)); 2938 2939 if (match.mask->flags) 2940 *match_level = MLX5_MATCH_L4; 2941 } 2942 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) { 2943 struct flow_match_icmp match; 2944 2945 flow_rule_match_icmp(rule, &match); 2946 switch (ip_proto) { 2947 case IPPROTO_ICMP: 2948 if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & 2949 MLX5_FLEX_PROTO_ICMP)) { 2950 NL_SET_ERR_MSG_MOD(extack, 2951 "Match on Flex protocols for ICMP is not supported"); 2952 return -EOPNOTSUPP; 2953 } 2954 MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type, 2955 match.mask->type); 2956 MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type, 2957 match.key->type); 2958 MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_code, 2959 match.mask->code); 2960 MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_code, 2961 match.key->code); 2962 break; 2963 case IPPROTO_ICMPV6: 2964 if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & 2965 MLX5_FLEX_PROTO_ICMPV6)) { 2966 NL_SET_ERR_MSG_MOD(extack, 2967 "Match on Flex protocols for ICMPV6 is not supported"); 2968 return -EOPNOTSUPP; 2969 } 2970 MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type, 2971 match.mask->type); 2972 MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type, 2973 match.key->type); 2974 MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_code, 2975 match.mask->code); 2976 MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_code, 2977 match.key->code); 2978 break; 2979 default: 2980 NL_SET_ERR_MSG_MOD(extack, 2981 "Code and type matching only with ICMP and ICMPv6"); 2982 netdev_err(priv->netdev, 2983 "Code and type matching only with ICMP and ICMPv6\n"); 2984 return -EINVAL; 2985 } 2986 if (match.mask->code || match.mask->type) { 2987 *match_level = MLX5_MATCH_L4; 2988 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3; 2989 } 2990 } 2991 /* Currently supported only for MPLS over UDP */ 2992 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) && 2993 !netif_is_bareudp(filter_dev)) { 2994 NL_SET_ERR_MSG_MOD(extack, 2995 "Matching on MPLS is supported only for MPLS over UDP"); 2996 netdev_err(priv->netdev, 2997 "Matching on MPLS is supported only for MPLS over UDP\n"); 2998 return -EOPNOTSUPP; 2999 } 3000 3001 return 0; 3002 } 3003 3004 static int parse_cls_flower(struct mlx5e_priv *priv, 3005 struct mlx5e_tc_flow *flow, 3006 struct mlx5_flow_spec *spec, 3007 struct flow_cls_offload *f, 3008 struct net_device *filter_dev) 3009 { 3010 u8 inner_match_level, outer_match_level, non_tunnel_match_level; 3011 struct netlink_ext_ack *extack = f->common.extack; 3012 struct mlx5_core_dev *dev = priv->mdev; 3013 struct mlx5_eswitch *esw = dev->priv.eswitch; 3014 struct mlx5e_rep_priv *rpriv = priv->ppriv; 3015 struct mlx5_eswitch_rep *rep; 3016 bool is_eswitch_flow; 3017 int err; 3018 3019 inner_match_level = MLX5_MATCH_NONE; 3020 outer_match_level = MLX5_MATCH_NONE; 3021 3022 err = __parse_cls_flower(priv, flow, spec, f, filter_dev, 3023 &inner_match_level, &outer_match_level); 3024 non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ? 3025 outer_match_level : inner_match_level; 3026 3027 is_eswitch_flow = mlx5e_is_eswitch_flow(flow); 3028 if (!err && is_eswitch_flow) { 3029 rep = rpriv->rep; 3030 if (rep->vport != MLX5_VPORT_UPLINK && 3031 (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && 3032 esw->offloads.inline_mode < non_tunnel_match_level)) { 3033 NL_SET_ERR_MSG_MOD(extack, 3034 "Flow is not offloaded due to min inline setting"); 3035 netdev_warn(priv->netdev, 3036 "Flow is not offloaded due to min inline setting, required %d actual %d\n", 3037 non_tunnel_match_level, esw->offloads.inline_mode); 3038 return -EOPNOTSUPP; 3039 } 3040 } 3041 3042 flow->attr->inner_match_level = inner_match_level; 3043 flow->attr->outer_match_level = outer_match_level; 3044 3045 3046 return err; 3047 } 3048 3049 struct mlx5_fields { 3050 u8 field; 3051 u8 field_bsize; 3052 u32 field_mask; 3053 u32 offset; 3054 u32 match_offset; 3055 }; 3056 3057 #define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \ 3058 {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \ 3059 offsetof(struct pedit_headers, field) + (off), \ 3060 MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)} 3061 3062 /* masked values are the same and there are no rewrites that do not have a 3063 * match. 3064 */ 3065 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \ 3066 type matchmaskx = *(type *)(matchmaskp); \ 3067 type matchvalx = *(type *)(matchvalp); \ 3068 type maskx = *(type *)(maskp); \ 3069 type valx = *(type *)(valp); \ 3070 \ 3071 (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \ 3072 matchmaskx)); \ 3073 }) 3074 3075 static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp, 3076 void *matchmaskp, u8 bsize) 3077 { 3078 bool same = false; 3079 3080 switch (bsize) { 3081 case 8: 3082 same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp); 3083 break; 3084 case 16: 3085 same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp); 3086 break; 3087 case 32: 3088 same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp); 3089 break; 3090 } 3091 3092 return same; 3093 } 3094 3095 static struct mlx5_fields fields[] = { 3096 OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16), 3097 OFFLOAD(DMAC_15_0, 16, U16_MAX, eth.h_dest[4], 0, dmac_15_0), 3098 OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16), 3099 OFFLOAD(SMAC_15_0, 16, U16_MAX, eth.h_source[4], 0, smac_15_0), 3100 OFFLOAD(ETHERTYPE, 16, U16_MAX, eth.h_proto, 0, ethertype), 3101 OFFLOAD(FIRST_VID, 16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid), 3102 3103 OFFLOAD(IP_DSCP, 8, 0xfc, ip4.tos, 0, ip_dscp), 3104 OFFLOAD(IP_TTL, 8, U8_MAX, ip4.ttl, 0, ttl_hoplimit), 3105 OFFLOAD(SIPV4, 32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4), 3106 OFFLOAD(DIPV4, 32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 3107 3108 OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0, 3109 src_ipv4_src_ipv6.ipv6_layout.ipv6[0]), 3110 OFFLOAD(SIPV6_95_64, 32, U32_MAX, ip6.saddr.s6_addr32[1], 0, 3111 src_ipv4_src_ipv6.ipv6_layout.ipv6[4]), 3112 OFFLOAD(SIPV6_63_32, 32, U32_MAX, ip6.saddr.s6_addr32[2], 0, 3113 src_ipv4_src_ipv6.ipv6_layout.ipv6[8]), 3114 OFFLOAD(SIPV6_31_0, 32, U32_MAX, ip6.saddr.s6_addr32[3], 0, 3115 src_ipv4_src_ipv6.ipv6_layout.ipv6[12]), 3116 OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0, 3117 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]), 3118 OFFLOAD(DIPV6_95_64, 32, U32_MAX, ip6.daddr.s6_addr32[1], 0, 3119 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]), 3120 OFFLOAD(DIPV6_63_32, 32, U32_MAX, ip6.daddr.s6_addr32[2], 0, 3121 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]), 3122 OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0, 3123 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]), 3124 OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit), 3125 OFFLOAD(IP_DSCP, 16, 0xc00f, ip6, 0, ip_dscp), 3126 3127 OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport), 3128 OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport), 3129 /* in linux iphdr tcp_flags is 8 bits long */ 3130 OFFLOAD(TCP_FLAGS, 8, U8_MAX, tcp.ack_seq, 5, tcp_flags), 3131 3132 OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport), 3133 OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport), 3134 }; 3135 3136 static unsigned long mask_to_le(unsigned long mask, int size) 3137 { 3138 __be32 mask_be32; 3139 __be16 mask_be16; 3140 3141 if (size == 32) { 3142 mask_be32 = (__force __be32)(mask); 3143 mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32)); 3144 } else if (size == 16) { 3145 mask_be32 = (__force __be32)(mask); 3146 mask_be16 = *(__be16 *)&mask_be32; 3147 mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16)); 3148 } 3149 3150 return mask; 3151 } 3152 3153 static int offload_pedit_fields(struct mlx5e_priv *priv, 3154 int namespace, 3155 struct mlx5e_tc_flow_parse_attr *parse_attr, 3156 u32 *action_flags, 3157 struct netlink_ext_ack *extack) 3158 { 3159 struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; 3160 struct pedit_headers_action *hdrs = parse_attr->hdrs; 3161 void *headers_c, *headers_v, *action, *vals_p; 3162 u32 *s_masks_p, *a_masks_p, s_mask, a_mask; 3163 struct mlx5e_tc_mod_hdr_acts *mod_acts; 3164 unsigned long mask, field_mask; 3165 int i, first, last, next_z; 3166 struct mlx5_fields *f; 3167 u8 cmd; 3168 3169 mod_acts = &parse_attr->mod_hdr_acts; 3170 headers_c = mlx5e_get_match_headers_criteria(*action_flags, &parse_attr->spec); 3171 headers_v = mlx5e_get_match_headers_value(*action_flags, &parse_attr->spec); 3172 3173 set_masks = &hdrs[0].masks; 3174 add_masks = &hdrs[1].masks; 3175 set_vals = &hdrs[0].vals; 3176 add_vals = &hdrs[1].vals; 3177 3178 for (i = 0; i < ARRAY_SIZE(fields); i++) { 3179 bool skip; 3180 3181 f = &fields[i]; 3182 /* avoid seeing bits set from previous iterations */ 3183 s_mask = 0; 3184 a_mask = 0; 3185 3186 s_masks_p = (void *)set_masks + f->offset; 3187 a_masks_p = (void *)add_masks + f->offset; 3188 3189 s_mask = *s_masks_p & f->field_mask; 3190 a_mask = *a_masks_p & f->field_mask; 3191 3192 if (!s_mask && !a_mask) /* nothing to offload here */ 3193 continue; 3194 3195 if (s_mask && a_mask) { 3196 NL_SET_ERR_MSG_MOD(extack, 3197 "can't set and add to the same HW field"); 3198 netdev_warn(priv->netdev, 3199 "mlx5: can't set and add to the same HW field (%x)\n", 3200 f->field); 3201 return -EOPNOTSUPP; 3202 } 3203 3204 skip = false; 3205 if (s_mask) { 3206 void *match_mask = headers_c + f->match_offset; 3207 void *match_val = headers_v + f->match_offset; 3208 3209 cmd = MLX5_ACTION_TYPE_SET; 3210 mask = s_mask; 3211 vals_p = (void *)set_vals + f->offset; 3212 /* don't rewrite if we have a match on the same value */ 3213 if (cmp_val_mask(vals_p, s_masks_p, match_val, 3214 match_mask, f->field_bsize)) 3215 skip = true; 3216 /* clear to denote we consumed this field */ 3217 *s_masks_p &= ~f->field_mask; 3218 } else { 3219 cmd = MLX5_ACTION_TYPE_ADD; 3220 mask = a_mask; 3221 vals_p = (void *)add_vals + f->offset; 3222 /* add 0 is no change */ 3223 if ((*(u32 *)vals_p & f->field_mask) == 0) 3224 skip = true; 3225 /* clear to denote we consumed this field */ 3226 *a_masks_p &= ~f->field_mask; 3227 } 3228 if (skip) 3229 continue; 3230 3231 mask = mask_to_le(mask, f->field_bsize); 3232 3233 first = find_first_bit(&mask, f->field_bsize); 3234 next_z = find_next_zero_bit(&mask, f->field_bsize, first); 3235 last = find_last_bit(&mask, f->field_bsize); 3236 if (first < next_z && next_z < last) { 3237 NL_SET_ERR_MSG_MOD(extack, 3238 "rewrite of few sub-fields isn't supported"); 3239 netdev_warn(priv->netdev, 3240 "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n", 3241 mask); 3242 return -EOPNOTSUPP; 3243 } 3244 3245 action = mlx5e_mod_hdr_alloc(priv->mdev, namespace, mod_acts); 3246 if (IS_ERR(action)) { 3247 NL_SET_ERR_MSG_MOD(extack, 3248 "too many pedit actions, can't offload"); 3249 mlx5_core_warn(priv->mdev, 3250 "mlx5: parsed %d pedit actions, can't do more\n", 3251 mod_acts->num_actions); 3252 return PTR_ERR(action); 3253 } 3254 3255 MLX5_SET(set_action_in, action, action_type, cmd); 3256 MLX5_SET(set_action_in, action, field, f->field); 3257 3258 if (cmd == MLX5_ACTION_TYPE_SET) { 3259 int start; 3260 3261 field_mask = mask_to_le(f->field_mask, f->field_bsize); 3262 3263 /* if field is bit sized it can start not from first bit */ 3264 start = find_first_bit(&field_mask, f->field_bsize); 3265 3266 MLX5_SET(set_action_in, action, offset, first - start); 3267 /* length is num of bits to be written, zero means length of 32 */ 3268 MLX5_SET(set_action_in, action, length, (last - first + 1)); 3269 } 3270 3271 if (f->field_bsize == 32) 3272 MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first); 3273 else if (f->field_bsize == 16) 3274 MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first); 3275 else if (f->field_bsize == 8) 3276 MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first); 3277 3278 ++mod_acts->num_actions; 3279 } 3280 3281 return 0; 3282 } 3283 3284 static const struct pedit_headers zero_masks = {}; 3285 3286 static int verify_offload_pedit_fields(struct mlx5e_priv *priv, 3287 struct mlx5e_tc_flow_parse_attr *parse_attr, 3288 struct netlink_ext_ack *extack) 3289 { 3290 struct pedit_headers *cmd_masks; 3291 u8 cmd; 3292 3293 for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) { 3294 cmd_masks = &parse_attr->hdrs[cmd].masks; 3295 if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) { 3296 NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field"); 3297 netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd); 3298 print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS, 3299 16, 1, cmd_masks, sizeof(zero_masks), true); 3300 return -EOPNOTSUPP; 3301 } 3302 } 3303 3304 return 0; 3305 } 3306 3307 static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, 3308 struct mlx5e_tc_flow_parse_attr *parse_attr, 3309 u32 *action_flags, 3310 struct netlink_ext_ack *extack) 3311 { 3312 int err; 3313 3314 err = offload_pedit_fields(priv, namespace, parse_attr, action_flags, extack); 3315 if (err) 3316 goto out_dealloc_parsed_actions; 3317 3318 err = verify_offload_pedit_fields(priv, parse_attr, extack); 3319 if (err) 3320 goto out_dealloc_parsed_actions; 3321 3322 return 0; 3323 3324 out_dealloc_parsed_actions: 3325 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); 3326 return err; 3327 } 3328 3329 struct ip_ttl_word { 3330 __u8 ttl; 3331 __u8 protocol; 3332 __sum16 check; 3333 }; 3334 3335 struct ipv6_hoplimit_word { 3336 __be16 payload_len; 3337 __u8 nexthdr; 3338 __u8 hop_limit; 3339 }; 3340 3341 static bool 3342 is_flow_action_modify_ip_header(struct flow_action *flow_action) 3343 { 3344 const struct flow_action_entry *act; 3345 u32 mask, offset; 3346 u8 htype; 3347 int i; 3348 3349 /* For IPv4 & IPv6 header check 4 byte word, 3350 * to determine that modified fields 3351 * are NOT ttl & hop_limit only. 3352 */ 3353 flow_action_for_each(i, act, flow_action) { 3354 if (act->id != FLOW_ACTION_MANGLE && 3355 act->id != FLOW_ACTION_ADD) 3356 continue; 3357 3358 htype = act->mangle.htype; 3359 offset = act->mangle.offset; 3360 mask = ~act->mangle.mask; 3361 3362 if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) { 3363 struct ip_ttl_word *ttl_word = 3364 (struct ip_ttl_word *)&mask; 3365 3366 if (offset != offsetof(struct iphdr, ttl) || 3367 ttl_word->protocol || 3368 ttl_word->check) 3369 return true; 3370 } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) { 3371 struct ipv6_hoplimit_word *hoplimit_word = 3372 (struct ipv6_hoplimit_word *)&mask; 3373 3374 if (offset != offsetof(struct ipv6hdr, payload_len) || 3375 hoplimit_word->payload_len || 3376 hoplimit_word->nexthdr) 3377 return true; 3378 } 3379 } 3380 3381 return false; 3382 } 3383 3384 static bool modify_header_match_supported(struct mlx5e_priv *priv, 3385 struct mlx5_flow_spec *spec, 3386 struct flow_action *flow_action, 3387 u32 actions, 3388 struct netlink_ext_ack *extack) 3389 { 3390 bool modify_ip_header; 3391 void *headers_c; 3392 void *headers_v; 3393 u16 ethertype; 3394 u8 ip_proto; 3395 3396 headers_c = mlx5e_get_match_headers_criteria(actions, spec); 3397 headers_v = mlx5e_get_match_headers_value(actions, spec); 3398 ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); 3399 3400 /* for non-IP we only re-write MACs, so we're okay */ 3401 if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 && 3402 ethertype != ETH_P_IP && ethertype != ETH_P_IPV6) 3403 goto out_ok; 3404 3405 modify_ip_header = is_flow_action_modify_ip_header(flow_action); 3406 ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); 3407 if (modify_ip_header && ip_proto != IPPROTO_TCP && 3408 ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) { 3409 NL_SET_ERR_MSG_MOD(extack, 3410 "can't offload re-write of non TCP/UDP"); 3411 netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n", 3412 ip_proto); 3413 return false; 3414 } 3415 3416 out_ok: 3417 return true; 3418 } 3419 3420 static bool 3421 actions_match_supported_fdb(struct mlx5e_priv *priv, 3422 struct mlx5e_tc_flow *flow, 3423 struct netlink_ext_ack *extack) 3424 { 3425 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; 3426 3427 if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { 3428 NL_SET_ERR_MSG_MOD(extack, 3429 "current firmware doesn't support split rule for port mirroring"); 3430 netdev_warn_once(priv->netdev, 3431 "current firmware doesn't support split rule for port mirroring\n"); 3432 return false; 3433 } 3434 3435 return true; 3436 } 3437 3438 static bool 3439 actions_match_supported(struct mlx5e_priv *priv, 3440 struct flow_action *flow_action, 3441 u32 actions, 3442 struct mlx5e_tc_flow_parse_attr *parse_attr, 3443 struct mlx5e_tc_flow *flow, 3444 struct netlink_ext_ack *extack) 3445 { 3446 if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && 3447 !modify_header_match_supported(priv, &parse_attr->spec, flow_action, actions, 3448 extack)) 3449 return false; 3450 3451 if (mlx5e_is_eswitch_flow(flow) && 3452 !actions_match_supported_fdb(priv, flow, extack)) 3453 return false; 3454 3455 return true; 3456 } 3457 3458 static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) 3459 { 3460 return priv->mdev == peer_priv->mdev; 3461 } 3462 3463 bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) 3464 { 3465 struct mlx5_core_dev *fmdev, *pmdev; 3466 u64 fsystem_guid, psystem_guid; 3467 3468 fmdev = priv->mdev; 3469 pmdev = peer_priv->mdev; 3470 3471 fsystem_guid = mlx5_query_nic_system_image_guid(fmdev); 3472 psystem_guid = mlx5_query_nic_system_image_guid(pmdev); 3473 3474 return (fsystem_guid == psystem_guid); 3475 } 3476 3477 static int 3478 actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv, 3479 struct mlx5e_tc_flow *flow, 3480 struct mlx5_flow_attr *attr, 3481 struct netlink_ext_ack *extack) 3482 { 3483 struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; 3484 struct pedit_headers_action *hdrs = parse_attr->hdrs; 3485 enum mlx5_flow_namespace_type ns_type; 3486 int err; 3487 3488 if (!hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits && 3489 !hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) 3490 return 0; 3491 3492 ns_type = mlx5e_get_flow_namespace(flow); 3493 3494 err = alloc_tc_pedit_action(priv, ns_type, parse_attr, &attr->action, extack); 3495 if (err) 3496 return err; 3497 3498 if (parse_attr->mod_hdr_acts.num_actions > 0) 3499 return 0; 3500 3501 /* In case all pedit actions are skipped, remove the MOD_HDR flag. */ 3502 attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 3503 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); 3504 3505 if (ns_type != MLX5_FLOW_NAMESPACE_FDB) 3506 return 0; 3507 3508 if (!((attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || 3509 (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))) 3510 attr->esw_attr->split_count = 0; 3511 3512 return 0; 3513 } 3514 3515 static struct mlx5_flow_attr* 3516 mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr, 3517 enum mlx5_flow_namespace_type ns_type) 3518 { 3519 struct mlx5e_tc_flow_parse_attr *parse_attr; 3520 u32 attr_sz = ns_to_attr_sz(ns_type); 3521 struct mlx5_flow_attr *attr2; 3522 3523 attr2 = mlx5_alloc_flow_attr(ns_type); 3524 parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); 3525 if (!attr2 || !parse_attr) { 3526 kvfree(parse_attr); 3527 kfree(attr2); 3528 return NULL; 3529 } 3530 3531 memcpy(attr2, attr, attr_sz); 3532 INIT_LIST_HEAD(&attr2->list); 3533 parse_attr->filter_dev = attr->parse_attr->filter_dev; 3534 attr2->action = 0; 3535 attr2->counter = NULL; 3536 attr2->tc_act_cookies_count = 0; 3537 attr2->flags = 0; 3538 attr2->parse_attr = parse_attr; 3539 attr2->dest_chain = 0; 3540 attr2->dest_ft = NULL; 3541 attr2->act_id_restore_rule = NULL; 3542 memset(&attr2->ct_attr, 0, sizeof(attr2->ct_attr)); 3543 3544 if (ns_type == MLX5_FLOW_NAMESPACE_FDB) { 3545 attr2->esw_attr->out_count = 0; 3546 attr2->esw_attr->split_count = 0; 3547 } 3548 3549 attr2->branch_true = NULL; 3550 attr2->branch_false = NULL; 3551 attr2->jumping_attr = NULL; 3552 return attr2; 3553 } 3554 3555 struct mlx5_flow_attr * 3556 mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow) 3557 { 3558 struct mlx5_esw_flow_attr *esw_attr; 3559 struct mlx5_flow_attr *attr; 3560 int i; 3561 3562 list_for_each_entry(attr, &flow->attrs, list) { 3563 esw_attr = attr->esw_attr; 3564 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { 3565 if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) 3566 return attr; 3567 } 3568 } 3569 3570 return NULL; 3571 } 3572 3573 void 3574 mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow) 3575 { 3576 struct mlx5e_post_act *post_act = get_post_action(flow->priv); 3577 struct mlx5_flow_attr *attr; 3578 3579 list_for_each_entry(attr, &flow->attrs, list) { 3580 if (list_is_last(&attr->list, &flow->attrs)) 3581 break; 3582 3583 mlx5e_tc_post_act_unoffload(post_act, attr->post_act_handle); 3584 } 3585 } 3586 3587 static void 3588 free_flow_post_acts(struct mlx5e_tc_flow *flow) 3589 { 3590 struct mlx5_flow_attr *attr, *tmp; 3591 3592 list_for_each_entry_safe(attr, tmp, &flow->attrs, list) { 3593 if (list_is_last(&attr->list, &flow->attrs)) 3594 break; 3595 3596 mlx5_free_flow_attr_actions(flow, attr); 3597 3598 list_del(&attr->list); 3599 kvfree(attr->parse_attr); 3600 kfree(attr); 3601 } 3602 } 3603 3604 int 3605 mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow) 3606 { 3607 struct mlx5e_post_act *post_act = get_post_action(flow->priv); 3608 struct mlx5_flow_attr *attr; 3609 int err = 0; 3610 3611 list_for_each_entry(attr, &flow->attrs, list) { 3612 if (list_is_last(&attr->list, &flow->attrs)) 3613 break; 3614 3615 err = mlx5e_tc_post_act_offload(post_act, attr->post_act_handle); 3616 if (err) 3617 break; 3618 } 3619 3620 return err; 3621 } 3622 3623 /* TC filter rule HW translation: 3624 * 3625 * +---------------------+ 3626 * + ft prio (tc chain) + 3627 * + original match + 3628 * +---------------------+ 3629 * | 3630 * | if multi table action 3631 * | 3632 * v 3633 * +---------------------+ 3634 * + post act ft |<----. 3635 * + match fte id | | split on multi table action 3636 * + do actions |-----' 3637 * +---------------------+ 3638 * | 3639 * | 3640 * v 3641 * Do rest of the actions after last multi table action. 3642 */ 3643 static int 3644 alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) 3645 { 3646 struct mlx5e_post_act *post_act = get_post_action(flow->priv); 3647 struct mlx5_flow_attr *attr, *next_attr = NULL; 3648 struct mlx5e_post_act_handle *handle; 3649 int err; 3650 3651 /* This is going in reverse order as needed. 3652 * The first entry is the last attribute. 3653 */ 3654 list_for_each_entry(attr, &flow->attrs, list) { 3655 if (!next_attr) { 3656 /* Set counter action on last post act rule. */ 3657 attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 3658 } 3659 3660 if (next_attr && !(attr->flags & MLX5_ATTR_FLAG_TERMINATING)) { 3661 err = mlx5e_tc_act_set_next_post_act(flow, attr, next_attr); 3662 if (err) 3663 goto out_free; 3664 } 3665 3666 /* Don't add post_act rule for first attr (last in the list). 3667 * It's being handled by the caller. 3668 */ 3669 if (list_is_last(&attr->list, &flow->attrs)) 3670 break; 3671 3672 err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack); 3673 if (err) 3674 goto out_free; 3675 3676 err = post_process_attr(flow, attr, extack); 3677 if (err) 3678 goto out_free; 3679 3680 handle = mlx5e_tc_post_act_add(post_act, attr); 3681 if (IS_ERR(handle)) { 3682 err = PTR_ERR(handle); 3683 goto out_free; 3684 } 3685 3686 attr->post_act_handle = handle; 3687 3688 if (attr->jumping_attr) { 3689 err = mlx5e_tc_act_set_next_post_act(flow, attr->jumping_attr, attr); 3690 if (err) 3691 goto out_free; 3692 } 3693 3694 next_attr = attr; 3695 } 3696 3697 if (flow_flag_test(flow, SLOW)) 3698 goto out; 3699 3700 err = mlx5e_tc_offload_flow_post_acts(flow); 3701 if (err) 3702 goto out_free; 3703 3704 out: 3705 return 0; 3706 3707 out_free: 3708 free_flow_post_acts(flow); 3709 return err; 3710 } 3711 3712 static int 3713 alloc_branch_attr(struct mlx5e_tc_flow *flow, 3714 struct mlx5e_tc_act_branch_ctrl *cond, 3715 struct mlx5_flow_attr **cond_attr, 3716 u32 *jump_count, 3717 struct netlink_ext_ack *extack) 3718 { 3719 struct mlx5_flow_attr *attr; 3720 int err = 0; 3721 3722 *cond_attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, 3723 mlx5e_get_flow_namespace(flow)); 3724 if (!(*cond_attr)) 3725 return -ENOMEM; 3726 3727 attr = *cond_attr; 3728 3729 switch (cond->act_id) { 3730 case FLOW_ACTION_DROP: 3731 attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; 3732 break; 3733 case FLOW_ACTION_ACCEPT: 3734 case FLOW_ACTION_PIPE: 3735 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 3736 attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv)); 3737 break; 3738 case FLOW_ACTION_JUMP: 3739 if (*jump_count) { 3740 NL_SET_ERR_MSG_MOD(extack, "Cannot offload flows with nested jumps"); 3741 err = -EOPNOTSUPP; 3742 goto out_err; 3743 } 3744 *jump_count = cond->extval; 3745 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 3746 attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv)); 3747 break; 3748 default: 3749 err = -EOPNOTSUPP; 3750 goto out_err; 3751 } 3752 3753 return err; 3754 out_err: 3755 kfree(*cond_attr); 3756 *cond_attr = NULL; 3757 return err; 3758 } 3759 3760 static void 3761 dec_jump_count(struct flow_action_entry *act, struct mlx5e_tc_act *tc_act, 3762 struct mlx5_flow_attr *attr, struct mlx5e_priv *priv, 3763 struct mlx5e_tc_jump_state *jump_state) 3764 { 3765 if (!jump_state->jump_count) 3766 return; 3767 3768 /* Single tc action can instantiate multiple offload actions (e.g. pedit) 3769 * Jump only over a tc action 3770 */ 3771 if (act->id == jump_state->last_id && act->hw_index == jump_state->last_index) 3772 return; 3773 3774 jump_state->last_id = act->id; 3775 jump_state->last_index = act->hw_index; 3776 3777 /* nothing to do for intermediate actions */ 3778 if (--jump_state->jump_count > 1) 3779 return; 3780 3781 if (jump_state->jump_count == 1) { /* last action in the jump action list */ 3782 3783 /* create a new attribute after this action */ 3784 jump_state->jump_target = true; 3785 3786 if (tc_act->is_terminating_action) { /* the branch ends here */ 3787 attr->flags |= MLX5_ATTR_FLAG_TERMINATING; 3788 attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 3789 } else { /* the branch continues executing the rest of the actions */ 3790 struct mlx5e_post_act *post_act; 3791 3792 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 3793 post_act = get_post_action(priv); 3794 attr->dest_ft = mlx5e_tc_post_act_get_ft(post_act); 3795 } 3796 } else if (jump_state->jump_count == 0) { /* first attr after the jump action list */ 3797 /* This is the post action for the jumping attribute (either red or green) 3798 * Use the stored jumping_attr to set the post act id on the jumping attribute 3799 */ 3800 attr->jumping_attr = jump_state->jumping_attr; 3801 } 3802 } 3803 3804 static int 3805 parse_branch_ctrl(struct flow_action_entry *act, struct mlx5e_tc_act *tc_act, 3806 struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr, 3807 struct mlx5e_tc_jump_state *jump_state, 3808 struct netlink_ext_ack *extack) 3809 { 3810 struct mlx5e_tc_act_branch_ctrl cond_true, cond_false; 3811 u32 jump_count = jump_state->jump_count; 3812 int err; 3813 3814 if (!tc_act->get_branch_ctrl) 3815 return 0; 3816 3817 tc_act->get_branch_ctrl(act, &cond_true, &cond_false); 3818 3819 err = alloc_branch_attr(flow, &cond_true, 3820 &attr->branch_true, &jump_count, extack); 3821 if (err) 3822 goto out_err; 3823 3824 if (jump_count) 3825 jump_state->jumping_attr = attr->branch_true; 3826 3827 err = alloc_branch_attr(flow, &cond_false, 3828 &attr->branch_false, &jump_count, extack); 3829 if (err) 3830 goto err_branch_false; 3831 3832 if (jump_count && !jump_state->jumping_attr) 3833 jump_state->jumping_attr = attr->branch_false; 3834 3835 jump_state->jump_count = jump_count; 3836 3837 /* branching action requires its own counter */ 3838 attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 3839 flow_flag_set(flow, USE_ACT_STATS); 3840 3841 return 0; 3842 3843 err_branch_false: 3844 free_branch_attr(flow, attr->branch_true); 3845 out_err: 3846 return err; 3847 } 3848 3849 static int 3850 parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, 3851 struct flow_action *flow_action) 3852 { 3853 struct netlink_ext_ack *extack = parse_state->extack; 3854 struct mlx5e_tc_flow *flow = parse_state->flow; 3855 struct mlx5e_tc_jump_state jump_state = {}; 3856 struct mlx5_flow_attr *attr = flow->attr; 3857 enum mlx5_flow_namespace_type ns_type; 3858 struct mlx5e_priv *priv = flow->priv; 3859 struct mlx5_flow_attr *prev_attr; 3860 struct flow_action_entry *act; 3861 struct mlx5e_tc_act *tc_act; 3862 int err, i, i_split = 0; 3863 bool is_missable; 3864 3865 ns_type = mlx5e_get_flow_namespace(flow); 3866 list_add(&attr->list, &flow->attrs); 3867 3868 flow_action_for_each(i, act, flow_action) { 3869 jump_state.jump_target = false; 3870 is_missable = false; 3871 prev_attr = attr; 3872 3873 tc_act = mlx5e_tc_act_get(act->id, ns_type); 3874 if (!tc_act) { 3875 NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action"); 3876 err = -EOPNOTSUPP; 3877 goto out_free_post_acts; 3878 } 3879 3880 if (tc_act->can_offload && !tc_act->can_offload(parse_state, act, i, attr)) { 3881 err = -EOPNOTSUPP; 3882 goto out_free_post_acts; 3883 } 3884 3885 err = tc_act->parse_action(parse_state, act, priv, attr); 3886 if (err) 3887 goto out_free_post_acts; 3888 3889 dec_jump_count(act, tc_act, attr, priv, &jump_state); 3890 3891 err = parse_branch_ctrl(act, tc_act, flow, attr, &jump_state, extack); 3892 if (err) 3893 goto out_free_post_acts; 3894 3895 parse_state->actions |= attr->action; 3896 3897 /* Split attr for multi table act if not the last act. */ 3898 if (jump_state.jump_target || 3899 (tc_act->is_multi_table_act && 3900 tc_act->is_multi_table_act(priv, act, attr) && 3901 i < flow_action->num_entries - 1)) { 3902 is_missable = tc_act->is_missable ? tc_act->is_missable(act) : false; 3903 3904 err = mlx5e_tc_act_post_parse(parse_state, flow_action, i_split, i, attr, 3905 ns_type); 3906 if (err) 3907 goto out_free_post_acts; 3908 3909 attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, ns_type); 3910 if (!attr) { 3911 err = -ENOMEM; 3912 goto out_free_post_acts; 3913 } 3914 3915 i_split = i + 1; 3916 list_add(&attr->list, &flow->attrs); 3917 } 3918 3919 if (is_missable) { 3920 /* Add counter to prev, and assign act to new (next) attr */ 3921 prev_attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 3922 flow_flag_set(flow, USE_ACT_STATS); 3923 3924 attr->tc_act_cookies[attr->tc_act_cookies_count++] = act->cookie; 3925 } else if (!tc_act->stats_action) { 3926 prev_attr->tc_act_cookies[prev_attr->tc_act_cookies_count++] = act->cookie; 3927 } 3928 } 3929 3930 err = mlx5e_tc_act_post_parse(parse_state, flow_action, i_split, i, attr, ns_type); 3931 if (err) 3932 goto out_free_post_acts; 3933 3934 err = alloc_flow_post_acts(flow, extack); 3935 if (err) 3936 goto out_free_post_acts; 3937 3938 return 0; 3939 3940 out_free_post_acts: 3941 free_flow_post_acts(flow); 3942 3943 return err; 3944 } 3945 3946 static int 3947 flow_action_supported(struct flow_action *flow_action, 3948 struct netlink_ext_ack *extack) 3949 { 3950 if (!flow_action_has_entries(flow_action)) { 3951 NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries"); 3952 return -EINVAL; 3953 } 3954 3955 if (!flow_action_hw_stats_check(flow_action, extack, 3956 FLOW_ACTION_HW_STATS_DELAYED_BIT)) { 3957 NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported"); 3958 return -EOPNOTSUPP; 3959 } 3960 3961 return 0; 3962 } 3963 3964 static int 3965 parse_tc_nic_actions(struct mlx5e_priv *priv, 3966 struct flow_action *flow_action, 3967 struct mlx5e_tc_flow *flow, 3968 struct netlink_ext_ack *extack) 3969 { 3970 struct mlx5e_tc_act_parse_state *parse_state; 3971 struct mlx5e_tc_flow_parse_attr *parse_attr; 3972 struct mlx5_flow_attr *attr = flow->attr; 3973 int err; 3974 3975 err = flow_action_supported(flow_action, extack); 3976 if (err) 3977 return err; 3978 3979 attr->nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; 3980 parse_attr = attr->parse_attr; 3981 parse_state = &parse_attr->parse_state; 3982 mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack); 3983 parse_state->ct_priv = get_ct_priv(priv); 3984 3985 err = parse_tc_actions(parse_state, flow_action); 3986 if (err) 3987 return err; 3988 3989 err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack); 3990 if (err) 3991 return err; 3992 3993 err = verify_attr_actions(attr->action, extack); 3994 if (err) 3995 return err; 3996 3997 if (!actions_match_supported(priv, flow_action, parse_state->actions, 3998 parse_attr, flow, extack)) 3999 return -EOPNOTSUPP; 4000 4001 return 0; 4002 } 4003 4004 static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv, 4005 struct net_device *peer_netdev) 4006 { 4007 struct mlx5e_priv *peer_priv; 4008 4009 peer_priv = netdev_priv(peer_netdev); 4010 4011 return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) && 4012 mlx5e_eswitch_vf_rep(priv->netdev) && 4013 mlx5e_eswitch_vf_rep(peer_netdev) && 4014 mlx5e_same_hw_devs(priv, peer_priv)); 4015 } 4016 4017 static bool same_hw_reps(struct mlx5e_priv *priv, 4018 struct net_device *peer_netdev) 4019 { 4020 struct mlx5e_priv *peer_priv; 4021 4022 peer_priv = netdev_priv(peer_netdev); 4023 4024 return mlx5e_eswitch_rep(priv->netdev) && 4025 mlx5e_eswitch_rep(peer_netdev) && 4026 mlx5e_same_hw_devs(priv, peer_priv); 4027 } 4028 4029 static bool is_lag_dev(struct mlx5e_priv *priv, 4030 struct net_device *peer_netdev) 4031 { 4032 return ((mlx5_lag_is_sriov(priv->mdev) || 4033 mlx5_lag_is_multipath(priv->mdev)) && 4034 same_hw_reps(priv, peer_netdev)); 4035 } 4036 4037 static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev) 4038 { 4039 return same_hw_reps(priv, out_dev) && mlx5_lag_is_mpesw(priv->mdev); 4040 } 4041 4042 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, 4043 struct net_device *out_dev) 4044 { 4045 if (is_merged_eswitch_vfs(priv, out_dev)) 4046 return true; 4047 4048 if (is_multiport_eligible(priv, out_dev)) 4049 return true; 4050 4051 if (is_lag_dev(priv, out_dev)) 4052 return true; 4053 4054 return mlx5e_eswitch_rep(out_dev) && 4055 same_port_devs(priv, netdev_priv(out_dev)); 4056 } 4057 4058 int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv, 4059 struct mlx5_flow_attr *attr, 4060 int ifindex, 4061 enum mlx5e_tc_int_port_type type, 4062 u32 *action, 4063 int out_index) 4064 { 4065 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 4066 struct mlx5e_tc_int_port_priv *int_port_priv; 4067 struct mlx5e_tc_flow_parse_attr *parse_attr; 4068 struct mlx5e_tc_int_port *dest_int_port; 4069 int err; 4070 4071 parse_attr = attr->parse_attr; 4072 int_port_priv = mlx5e_get_int_port_priv(priv); 4073 4074 dest_int_port = mlx5e_tc_int_port_get(int_port_priv, ifindex, type); 4075 if (IS_ERR(dest_int_port)) 4076 return PTR_ERR(dest_int_port); 4077 4078 err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts, 4079 MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG, 4080 mlx5e_tc_int_port_get_metadata(dest_int_port)); 4081 if (err) { 4082 mlx5e_tc_int_port_put(int_port_priv, dest_int_port); 4083 return err; 4084 } 4085 4086 *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 4087 4088 esw_attr->dest_int_port = dest_int_port; 4089 esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE; 4090 esw_attr->split_count = out_index; 4091 4092 /* Forward to root fdb for matching against the new source vport */ 4093 attr->dest_chain = 0; 4094 4095 return 0; 4096 } 4097 4098 static int 4099 parse_tc_fdb_actions(struct mlx5e_priv *priv, 4100 struct flow_action *flow_action, 4101 struct mlx5e_tc_flow *flow, 4102 struct netlink_ext_ack *extack) 4103 { 4104 struct mlx5e_tc_act_parse_state *parse_state; 4105 struct mlx5e_tc_flow_parse_attr *parse_attr; 4106 struct mlx5_flow_attr *attr = flow->attr; 4107 struct mlx5_esw_flow_attr *esw_attr; 4108 struct net_device *filter_dev; 4109 int err; 4110 4111 err = flow_action_supported(flow_action, extack); 4112 if (err) 4113 return err; 4114 4115 esw_attr = attr->esw_attr; 4116 parse_attr = attr->parse_attr; 4117 filter_dev = parse_attr->filter_dev; 4118 parse_state = &parse_attr->parse_state; 4119 mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack); 4120 parse_state->ct_priv = get_ct_priv(priv); 4121 4122 err = parse_tc_actions(parse_state, flow_action); 4123 if (err) 4124 return err; 4125 4126 /* Forward to/from internal port can only have 1 dest */ 4127 if ((netif_is_ovs_master(filter_dev) || esw_attr->dest_int_port) && 4128 esw_attr->out_count > 1) { 4129 NL_SET_ERR_MSG_MOD(extack, 4130 "Rules with internal port can have only one destination"); 4131 return -EOPNOTSUPP; 4132 } 4133 4134 /* Forward from tunnel/internal port to internal port is not supported */ 4135 if ((mlx5e_get_tc_tun(filter_dev) || netif_is_ovs_master(filter_dev)) && 4136 esw_attr->dest_int_port) { 4137 NL_SET_ERR_MSG_MOD(extack, 4138 "Forwarding from tunnel/internal port to internal port is not supported"); 4139 return -EOPNOTSUPP; 4140 } 4141 4142 err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack); 4143 if (err) 4144 return err; 4145 4146 if (!actions_match_supported(priv, flow_action, parse_state->actions, 4147 parse_attr, flow, extack)) 4148 return -EOPNOTSUPP; 4149 4150 return 0; 4151 } 4152 4153 static void get_flags(int flags, unsigned long *flow_flags) 4154 { 4155 unsigned long __flow_flags = 0; 4156 4157 if (flags & MLX5_TC_FLAG(INGRESS)) 4158 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS); 4159 if (flags & MLX5_TC_FLAG(EGRESS)) 4160 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS); 4161 4162 if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) 4163 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); 4164 if (flags & MLX5_TC_FLAG(NIC_OFFLOAD)) 4165 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); 4166 if (flags & MLX5_TC_FLAG(FT_OFFLOAD)) 4167 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT); 4168 4169 *flow_flags = __flow_flags; 4170 } 4171 4172 static const struct rhashtable_params tc_ht_params = { 4173 .head_offset = offsetof(struct mlx5e_tc_flow, node), 4174 .key_offset = offsetof(struct mlx5e_tc_flow, cookie), 4175 .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie), 4176 .automatic_shrinking = true, 4177 }; 4178 4179 static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, 4180 unsigned long flags) 4181 { 4182 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 4183 struct mlx5e_rep_priv *rpriv; 4184 4185 if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) { 4186 rpriv = priv->ppriv; 4187 return &rpriv->tc_ht; 4188 } else /* NIC offload */ 4189 return &tc->ht; 4190 } 4191 4192 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) 4193 { 4194 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; 4195 struct mlx5_flow_attr *attr = flow->attr; 4196 bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK && 4197 flow_flag_test(flow, INGRESS); 4198 bool act_is_encap = !!(attr->action & 4199 MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); 4200 bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom, 4201 MLX5_DEVCOM_ESW_OFFLOADS); 4202 4203 if (!esw_paired) 4204 return false; 4205 4206 if ((mlx5_lag_is_sriov(esw_attr->in_mdev) || 4207 mlx5_lag_is_multipath(esw_attr->in_mdev)) && 4208 (is_rep_ingress || act_is_encap)) 4209 return true; 4210 4211 if (mlx5_lag_is_mpesw(esw_attr->in_mdev)) 4212 return true; 4213 4214 return false; 4215 } 4216 4217 struct mlx5_flow_attr * 4218 mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type) 4219 { 4220 u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB) ? 4221 sizeof(struct mlx5_esw_flow_attr) : 4222 sizeof(struct mlx5_nic_flow_attr); 4223 struct mlx5_flow_attr *attr; 4224 4225 attr = kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL); 4226 if (!attr) 4227 return attr; 4228 4229 INIT_LIST_HEAD(&attr->list); 4230 return attr; 4231 } 4232 4233 static void 4234 mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) 4235 { 4236 struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow); 4237 4238 if (!attr) 4239 return; 4240 4241 if (attr->post_act_handle) 4242 mlx5e_tc_post_act_del(get_post_action(flow->priv), attr->post_act_handle); 4243 4244 mlx5e_tc_tun_encap_dests_unset(flow->priv, flow, attr); 4245 4246 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) 4247 mlx5_fc_destroy(counter_dev, attr->counter); 4248 4249 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 4250 mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); 4251 mlx5e_tc_detach_mod_hdr(flow->priv, flow, attr); 4252 } 4253 4254 mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr); 4255 4256 free_branch_attr(flow, attr->branch_true); 4257 free_branch_attr(flow, attr->branch_false); 4258 } 4259 4260 static int 4261 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, 4262 struct flow_cls_offload *f, unsigned long flow_flags, 4263 struct mlx5e_tc_flow_parse_attr **__parse_attr, 4264 struct mlx5e_tc_flow **__flow) 4265 { 4266 struct mlx5e_tc_flow_parse_attr *parse_attr; 4267 struct mlx5_flow_attr *attr; 4268 struct mlx5e_tc_flow *flow; 4269 int err = -ENOMEM; 4270 int out_index; 4271 4272 flow = kzalloc(sizeof(*flow), GFP_KERNEL); 4273 parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); 4274 if (!parse_attr || !flow) 4275 goto err_free; 4276 4277 flow->flags = flow_flags; 4278 flow->cookie = f->cookie; 4279 flow->priv = priv; 4280 4281 attr = mlx5_alloc_flow_attr(mlx5e_get_flow_namespace(flow)); 4282 if (!attr) 4283 goto err_free; 4284 4285 flow->attr = attr; 4286 4287 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) 4288 INIT_LIST_HEAD(&flow->encaps[out_index].list); 4289 INIT_LIST_HEAD(&flow->hairpin); 4290 INIT_LIST_HEAD(&flow->l3_to_l2_reformat); 4291 INIT_LIST_HEAD(&flow->attrs); 4292 refcount_set(&flow->refcnt, 1); 4293 init_completion(&flow->init_done); 4294 init_completion(&flow->del_hw_done); 4295 4296 *__flow = flow; 4297 *__parse_attr = parse_attr; 4298 4299 return 0; 4300 4301 err_free: 4302 kfree(flow); 4303 kvfree(parse_attr); 4304 return err; 4305 } 4306 4307 static void 4308 mlx5e_flow_attr_init(struct mlx5_flow_attr *attr, 4309 struct mlx5e_tc_flow_parse_attr *parse_attr, 4310 struct flow_cls_offload *f) 4311 { 4312 attr->parse_attr = parse_attr; 4313 attr->chain = f->common.chain_index; 4314 attr->prio = f->common.prio; 4315 } 4316 4317 static void 4318 mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr, 4319 struct mlx5e_priv *priv, 4320 struct mlx5e_tc_flow_parse_attr *parse_attr, 4321 struct flow_cls_offload *f, 4322 struct mlx5_eswitch_rep *in_rep, 4323 struct mlx5_core_dev *in_mdev) 4324 { 4325 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 4326 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 4327 4328 mlx5e_flow_attr_init(attr, parse_attr, f); 4329 4330 esw_attr->in_rep = in_rep; 4331 esw_attr->in_mdev = in_mdev; 4332 4333 if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) == 4334 MLX5_COUNTER_SOURCE_ESWITCH) 4335 esw_attr->counter_dev = in_mdev; 4336 else 4337 esw_attr->counter_dev = priv->mdev; 4338 } 4339 4340 static struct mlx5e_tc_flow * 4341 __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, 4342 struct flow_cls_offload *f, 4343 unsigned long flow_flags, 4344 struct net_device *filter_dev, 4345 struct mlx5_eswitch_rep *in_rep, 4346 struct mlx5_core_dev *in_mdev) 4347 { 4348 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 4349 struct netlink_ext_ack *extack = f->common.extack; 4350 struct mlx5e_tc_flow_parse_attr *parse_attr; 4351 struct mlx5e_tc_flow *flow; 4352 int attr_size, err; 4353 4354 flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); 4355 attr_size = sizeof(struct mlx5_esw_flow_attr); 4356 err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, 4357 &parse_attr, &flow); 4358 if (err) 4359 goto out; 4360 4361 parse_attr->filter_dev = filter_dev; 4362 mlx5e_flow_esw_attr_init(flow->attr, 4363 priv, parse_attr, 4364 f, in_rep, in_mdev); 4365 4366 err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, 4367 f, filter_dev); 4368 if (err) 4369 goto err_free; 4370 4371 /* actions validation depends on parsing the ct matches first */ 4372 err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f, 4373 &flow->attr->ct_attr, extack); 4374 if (err) 4375 goto err_free; 4376 4377 err = parse_tc_fdb_actions(priv, &rule->action, flow, extack); 4378 if (err) 4379 goto err_free; 4380 4381 err = mlx5e_tc_add_fdb_flow(priv, flow, extack); 4382 complete_all(&flow->init_done); 4383 if (err) { 4384 if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev))) 4385 goto err_free; 4386 4387 add_unready_flow(flow); 4388 } 4389 4390 return flow; 4391 4392 err_free: 4393 mlx5e_flow_put(priv, flow); 4394 out: 4395 return ERR_PTR(err); 4396 } 4397 4398 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, 4399 struct mlx5e_tc_flow *flow, 4400 unsigned long flow_flags) 4401 { 4402 struct mlx5e_priv *priv = flow->priv, *peer_priv; 4403 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw; 4404 struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; 4405 struct mlx5_devcom *devcom = priv->mdev->priv.devcom; 4406 struct mlx5e_tc_flow_parse_attr *parse_attr; 4407 struct mlx5e_rep_priv *peer_urpriv; 4408 struct mlx5e_tc_flow *peer_flow; 4409 struct mlx5_core_dev *in_mdev; 4410 int err = 0; 4411 4412 peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 4413 if (!peer_esw) 4414 return -ENODEV; 4415 4416 peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH); 4417 peer_priv = netdev_priv(peer_urpriv->netdev); 4418 4419 /* in_mdev is assigned of which the packet originated from. 4420 * So packets redirected to uplink use the same mdev of the 4421 * original flow and packets redirected from uplink use the 4422 * peer mdev. 4423 * In multiport eswitch it's a special case that we need to 4424 * keep the original mdev. 4425 */ 4426 if (attr->in_rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(priv->mdev)) 4427 in_mdev = peer_priv->mdev; 4428 else 4429 in_mdev = priv->mdev; 4430 4431 parse_attr = flow->attr->parse_attr; 4432 peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags, 4433 parse_attr->filter_dev, 4434 attr->in_rep, in_mdev); 4435 if (IS_ERR(peer_flow)) { 4436 err = PTR_ERR(peer_flow); 4437 goto out; 4438 } 4439 4440 flow->peer_flow = peer_flow; 4441 flow_flag_set(flow, DUP); 4442 mutex_lock(&esw->offloads.peer_mutex); 4443 list_add_tail(&flow->peer, &esw->offloads.peer_flows); 4444 mutex_unlock(&esw->offloads.peer_mutex); 4445 4446 out: 4447 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 4448 return err; 4449 } 4450 4451 static int 4452 mlx5e_add_fdb_flow(struct mlx5e_priv *priv, 4453 struct flow_cls_offload *f, 4454 unsigned long flow_flags, 4455 struct net_device *filter_dev, 4456 struct mlx5e_tc_flow **__flow) 4457 { 4458 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4459 struct mlx5_eswitch_rep *in_rep = rpriv->rep; 4460 struct mlx5_core_dev *in_mdev = priv->mdev; 4461 struct mlx5e_tc_flow *flow; 4462 int err; 4463 4464 flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep, 4465 in_mdev); 4466 if (IS_ERR(flow)) 4467 return PTR_ERR(flow); 4468 4469 if (is_peer_flow_needed(flow)) { 4470 err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags); 4471 if (err) { 4472 mlx5e_tc_del_fdb_flow(priv, flow); 4473 goto out; 4474 } 4475 } 4476 4477 *__flow = flow; 4478 4479 return 0; 4480 4481 out: 4482 return err; 4483 } 4484 4485 static int 4486 mlx5e_add_nic_flow(struct mlx5e_priv *priv, 4487 struct flow_cls_offload *f, 4488 unsigned long flow_flags, 4489 struct net_device *filter_dev, 4490 struct mlx5e_tc_flow **__flow) 4491 { 4492 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 4493 struct netlink_ext_ack *extack = f->common.extack; 4494 struct mlx5e_tc_flow_parse_attr *parse_attr; 4495 struct mlx5e_tc_flow *flow; 4496 int attr_size, err; 4497 4498 if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) { 4499 if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common)) 4500 return -EOPNOTSUPP; 4501 } else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) { 4502 return -EOPNOTSUPP; 4503 } 4504 4505 flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); 4506 attr_size = sizeof(struct mlx5_nic_flow_attr); 4507 err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, 4508 &parse_attr, &flow); 4509 if (err) 4510 goto out; 4511 4512 parse_attr->filter_dev = filter_dev; 4513 mlx5e_flow_attr_init(flow->attr, parse_attr, f); 4514 4515 err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, 4516 f, filter_dev); 4517 if (err) 4518 goto err_free; 4519 4520 err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f, 4521 &flow->attr->ct_attr, extack); 4522 if (err) 4523 goto err_free; 4524 4525 err = parse_tc_nic_actions(priv, &rule->action, flow, extack); 4526 if (err) 4527 goto err_free; 4528 4529 err = mlx5e_tc_add_nic_flow(priv, flow, extack); 4530 if (err) 4531 goto err_free; 4532 4533 flow_flag_set(flow, OFFLOADED); 4534 *__flow = flow; 4535 4536 return 0; 4537 4538 err_free: 4539 flow_flag_set(flow, FAILED); 4540 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); 4541 mlx5e_flow_put(priv, flow); 4542 out: 4543 return err; 4544 } 4545 4546 static int 4547 mlx5e_tc_add_flow(struct mlx5e_priv *priv, 4548 struct flow_cls_offload *f, 4549 unsigned long flags, 4550 struct net_device *filter_dev, 4551 struct mlx5e_tc_flow **flow) 4552 { 4553 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 4554 unsigned long flow_flags; 4555 int err; 4556 4557 get_flags(flags, &flow_flags); 4558 4559 if (!tc_can_offload_extack(priv->netdev, f->common.extack)) 4560 return -EOPNOTSUPP; 4561 4562 if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS) 4563 err = mlx5e_add_fdb_flow(priv, f, flow_flags, 4564 filter_dev, flow); 4565 else 4566 err = mlx5e_add_nic_flow(priv, f, flow_flags, 4567 filter_dev, flow); 4568 4569 return err; 4570 } 4571 4572 static bool is_flow_rule_duplicate_allowed(struct net_device *dev, 4573 struct mlx5e_rep_priv *rpriv) 4574 { 4575 /* Offloaded flow rule is allowed to duplicate on non-uplink representor 4576 * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this 4577 * function is called from NIC mode. 4578 */ 4579 return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK; 4580 } 4581 4582 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, 4583 struct flow_cls_offload *f, unsigned long flags) 4584 { 4585 struct netlink_ext_ack *extack = f->common.extack; 4586 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 4587 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4588 struct mlx5e_tc_flow *flow; 4589 int err = 0; 4590 4591 if (!mlx5_esw_hold(priv->mdev)) 4592 return -EBUSY; 4593 4594 mlx5_esw_get(priv->mdev); 4595 4596 rcu_read_lock(); 4597 flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); 4598 if (flow) { 4599 /* Same flow rule offloaded to non-uplink representor sharing tc block, 4600 * just return 0. 4601 */ 4602 if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev) 4603 goto rcu_unlock; 4604 4605 NL_SET_ERR_MSG_MOD(extack, 4606 "flow cookie already exists, ignoring"); 4607 netdev_warn_once(priv->netdev, 4608 "flow cookie %lx already exists, ignoring\n", 4609 f->cookie); 4610 err = -EEXIST; 4611 goto rcu_unlock; 4612 } 4613 rcu_unlock: 4614 rcu_read_unlock(); 4615 if (flow) 4616 goto out; 4617 4618 trace_mlx5e_configure_flower(f); 4619 err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow); 4620 if (err) 4621 goto out; 4622 4623 /* Flow rule offloaded to non-uplink representor sharing tc block, 4624 * set the flow's owner dev. 4625 */ 4626 if (is_flow_rule_duplicate_allowed(dev, rpriv)) 4627 flow->orig_dev = dev; 4628 4629 err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params); 4630 if (err) 4631 goto err_free; 4632 4633 mlx5_esw_release(priv->mdev); 4634 return 0; 4635 4636 err_free: 4637 mlx5e_flow_put(priv, flow); 4638 out: 4639 mlx5_esw_put(priv->mdev); 4640 mlx5_esw_release(priv->mdev); 4641 return err; 4642 } 4643 4644 static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags) 4645 { 4646 bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS)); 4647 bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS)); 4648 4649 return flow_flag_test(flow, INGRESS) == dir_ingress && 4650 flow_flag_test(flow, EGRESS) == dir_egress; 4651 } 4652 4653 int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, 4654 struct flow_cls_offload *f, unsigned long flags) 4655 { 4656 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 4657 struct mlx5e_tc_flow *flow; 4658 int err; 4659 4660 rcu_read_lock(); 4661 flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); 4662 if (!flow || !same_flow_direction(flow, flags)) { 4663 err = -EINVAL; 4664 goto errout; 4665 } 4666 4667 /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag 4668 * set. 4669 */ 4670 if (flow_flag_test_and_set(flow, DELETED)) { 4671 err = -EINVAL; 4672 goto errout; 4673 } 4674 rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params); 4675 rcu_read_unlock(); 4676 4677 trace_mlx5e_delete_flower(f); 4678 mlx5e_flow_put(priv, flow); 4679 4680 mlx5_esw_put(priv->mdev); 4681 return 0; 4682 4683 errout: 4684 rcu_read_unlock(); 4685 return err; 4686 } 4687 4688 int mlx5e_tc_fill_action_stats(struct mlx5e_priv *priv, 4689 struct flow_offload_action *fl_act) 4690 { 4691 return mlx5e_tc_act_stats_fill_stats(get_act_stats_handle(priv), fl_act); 4692 } 4693 4694 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, 4695 struct flow_cls_offload *f, unsigned long flags) 4696 { 4697 struct mlx5_devcom *devcom = priv->mdev->priv.devcom; 4698 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 4699 struct mlx5_eswitch *peer_esw; 4700 struct mlx5e_tc_flow *flow; 4701 struct mlx5_fc *counter; 4702 u64 lastuse = 0; 4703 u64 packets = 0; 4704 u64 bytes = 0; 4705 int err = 0; 4706 4707 rcu_read_lock(); 4708 flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie, 4709 tc_ht_params)); 4710 rcu_read_unlock(); 4711 if (IS_ERR(flow)) 4712 return PTR_ERR(flow); 4713 4714 if (!same_flow_direction(flow, flags)) { 4715 err = -EINVAL; 4716 goto errout; 4717 } 4718 4719 if (mlx5e_is_offloaded_flow(flow)) { 4720 if (flow_flag_test(flow, USE_ACT_STATS)) { 4721 f->use_act_stats = true; 4722 } else { 4723 counter = mlx5e_tc_get_counter(flow); 4724 if (!counter) 4725 goto errout; 4726 4727 mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); 4728 } 4729 } 4730 4731 /* Under multipath it's possible for one rule to be currently 4732 * un-offloaded while the other rule is offloaded. 4733 */ 4734 peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 4735 if (!peer_esw) 4736 goto out; 4737 4738 if (flow_flag_test(flow, DUP) && 4739 flow_flag_test(flow->peer_flow, OFFLOADED)) { 4740 u64 bytes2; 4741 u64 packets2; 4742 u64 lastuse2; 4743 4744 if (flow_flag_test(flow, USE_ACT_STATS)) { 4745 f->use_act_stats = true; 4746 } else { 4747 counter = mlx5e_tc_get_counter(flow->peer_flow); 4748 if (!counter) 4749 goto no_peer_counter; 4750 mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2); 4751 4752 bytes += bytes2; 4753 packets += packets2; 4754 lastuse = max_t(u64, lastuse, lastuse2); 4755 } 4756 } 4757 4758 no_peer_counter: 4759 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 4760 out: 4761 flow_stats_update(&f->stats, bytes, packets, 0, lastuse, 4762 FLOW_ACTION_HW_STATS_DELAYED); 4763 trace_mlx5e_stats_flower(f); 4764 errout: 4765 mlx5e_flow_put(priv, flow); 4766 return err; 4767 } 4768 4769 static int apply_police_params(struct mlx5e_priv *priv, u64 rate, 4770 struct netlink_ext_ack *extack) 4771 { 4772 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4773 struct mlx5_eswitch *esw; 4774 u32 rate_mbps = 0; 4775 u16 vport_num; 4776 int err; 4777 4778 vport_num = rpriv->rep->vport; 4779 if (vport_num >= MLX5_VPORT_ECPF) { 4780 NL_SET_ERR_MSG_MOD(extack, 4781 "Ingress rate limit is supported only for Eswitch ports connected to VFs"); 4782 return -EOPNOTSUPP; 4783 } 4784 4785 esw = priv->mdev->priv.eswitch; 4786 /* rate is given in bytes/sec. 4787 * First convert to bits/sec and then round to the nearest mbit/secs. 4788 * mbit means million bits. 4789 * Moreover, if rate is non zero we choose to configure to a minimum of 4790 * 1 mbit/sec. 4791 */ 4792 if (rate) { 4793 rate = (rate * BITS_PER_BYTE) + 500000; 4794 do_div(rate, 1000000); 4795 rate_mbps = max_t(u32, rate, 1); 4796 } 4797 4798 err = mlx5_esw_qos_modify_vport_rate(esw, vport_num, rate_mbps); 4799 if (err) 4800 NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware"); 4801 4802 return err; 4803 } 4804 4805 static int 4806 tc_matchall_police_validate(const struct flow_action *action, 4807 const struct flow_action_entry *act, 4808 struct netlink_ext_ack *extack) 4809 { 4810 if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE) { 4811 NL_SET_ERR_MSG_MOD(extack, 4812 "Offload not supported when conform action is not continue"); 4813 return -EOPNOTSUPP; 4814 } 4815 4816 if (act->police.exceed.act_id != FLOW_ACTION_DROP) { 4817 NL_SET_ERR_MSG_MOD(extack, 4818 "Offload not supported when exceed action is not drop"); 4819 return -EOPNOTSUPP; 4820 } 4821 4822 if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && 4823 !flow_action_is_last_entry(action, act)) { 4824 NL_SET_ERR_MSG_MOD(extack, 4825 "Offload not supported when conform action is ok, but action is not last"); 4826 return -EOPNOTSUPP; 4827 } 4828 4829 if (act->police.peakrate_bytes_ps || 4830 act->police.avrate || act->police.overhead) { 4831 NL_SET_ERR_MSG_MOD(extack, 4832 "Offload not supported when peakrate/avrate/overhead is configured"); 4833 return -EOPNOTSUPP; 4834 } 4835 4836 return 0; 4837 } 4838 4839 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, 4840 struct flow_action *flow_action, 4841 struct netlink_ext_ack *extack) 4842 { 4843 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4844 const struct flow_action_entry *act; 4845 int err; 4846 int i; 4847 4848 if (!flow_action_has_entries(flow_action)) { 4849 NL_SET_ERR_MSG_MOD(extack, "matchall called with no action"); 4850 return -EINVAL; 4851 } 4852 4853 if (!flow_offload_has_one_action(flow_action)) { 4854 NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action"); 4855 return -EOPNOTSUPP; 4856 } 4857 4858 if (!flow_action_basic_hw_stats_check(flow_action, extack)) { 4859 NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported"); 4860 return -EOPNOTSUPP; 4861 } 4862 4863 flow_action_for_each(i, act, flow_action) { 4864 switch (act->id) { 4865 case FLOW_ACTION_POLICE: 4866 err = tc_matchall_police_validate(flow_action, act, extack); 4867 if (err) 4868 return err; 4869 4870 err = apply_police_params(priv, act->police.rate_bytes_ps, extack); 4871 if (err) 4872 return err; 4873 4874 rpriv->prev_vf_vport_stats = priv->stats.vf_vport; 4875 break; 4876 default: 4877 NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall"); 4878 return -EOPNOTSUPP; 4879 } 4880 } 4881 4882 return 0; 4883 } 4884 4885 int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, 4886 struct tc_cls_matchall_offload *ma) 4887 { 4888 struct netlink_ext_ack *extack = ma->common.extack; 4889 4890 if (ma->common.prio != 1) { 4891 NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported"); 4892 return -EINVAL; 4893 } 4894 4895 return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack); 4896 } 4897 4898 int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv, 4899 struct tc_cls_matchall_offload *ma) 4900 { 4901 struct netlink_ext_ack *extack = ma->common.extack; 4902 4903 return apply_police_params(priv, 0, extack); 4904 } 4905 4906 void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, 4907 struct tc_cls_matchall_offload *ma) 4908 { 4909 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4910 struct rtnl_link_stats64 cur_stats; 4911 u64 dbytes; 4912 u64 dpkts; 4913 4914 cur_stats = priv->stats.vf_vport; 4915 dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets; 4916 dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes; 4917 rpriv->prev_vf_vport_stats = cur_stats; 4918 flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies, 4919 FLOW_ACTION_HW_STATS_DELAYED); 4920 } 4921 4922 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, 4923 struct mlx5e_priv *peer_priv) 4924 { 4925 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 4926 struct mlx5_core_dev *peer_mdev = peer_priv->mdev; 4927 struct mlx5e_hairpin_entry *hpe, *tmp; 4928 LIST_HEAD(init_wait_list); 4929 u16 peer_vhca_id; 4930 int bkt; 4931 4932 if (!mlx5e_same_hw_devs(priv, peer_priv)) 4933 return; 4934 4935 peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id); 4936 4937 mutex_lock(&tc->hairpin_tbl_lock); 4938 hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist) 4939 if (refcount_inc_not_zero(&hpe->refcnt)) 4940 list_add(&hpe->dead_peer_wait_list, &init_wait_list); 4941 mutex_unlock(&tc->hairpin_tbl_lock); 4942 4943 list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) { 4944 wait_for_completion(&hpe->res_ready); 4945 if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id) 4946 mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair); 4947 4948 mlx5e_hairpin_put(priv, hpe); 4949 } 4950 } 4951 4952 static int mlx5e_tc_netdev_event(struct notifier_block *this, 4953 unsigned long event, void *ptr) 4954 { 4955 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 4956 struct mlx5e_priv *peer_priv; 4957 struct mlx5e_tc_table *tc; 4958 struct mlx5e_priv *priv; 4959 4960 if (ndev->netdev_ops != &mlx5e_netdev_ops || 4961 event != NETDEV_UNREGISTER || 4962 ndev->reg_state == NETREG_REGISTERED) 4963 return NOTIFY_DONE; 4964 4965 tc = container_of(this, struct mlx5e_tc_table, netdevice_nb); 4966 priv = tc->priv; 4967 peer_priv = netdev_priv(ndev); 4968 if (priv == peer_priv || 4969 !(priv->netdev->features & NETIF_F_HW_TC)) 4970 return NOTIFY_DONE; 4971 4972 mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv); 4973 4974 return NOTIFY_DONE; 4975 } 4976 4977 static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv) 4978 { 4979 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 4980 struct mlx5_flow_table **ft = &tc->miss_t; 4981 struct mlx5_flow_table_attr ft_attr = {}; 4982 struct mlx5_flow_namespace *ns; 4983 int err = 0; 4984 4985 ft_attr.max_fte = 1; 4986 ft_attr.autogroup.max_num_groups = 1; 4987 ft_attr.level = MLX5E_TC_MISS_LEVEL; 4988 ft_attr.prio = 0; 4989 ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); 4990 4991 *ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); 4992 if (IS_ERR(*ft)) { 4993 err = PTR_ERR(*ft); 4994 netdev_err(priv->netdev, "failed to create tc nic miss table err=%d\n", err); 4995 } 4996 4997 return err; 4998 } 4999 5000 static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv *priv) 5001 { 5002 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 5003 5004 mlx5_destroy_flow_table(tc->miss_t); 5005 } 5006 5007 int mlx5e_tc_nic_init(struct mlx5e_priv *priv) 5008 { 5009 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 5010 struct mlx5_core_dev *dev = priv->mdev; 5011 struct mapping_ctx *chains_mapping; 5012 struct mlx5_chains_attr attr = {}; 5013 u64 mapping_id; 5014 int err; 5015 5016 mlx5e_mod_hdr_tbl_init(&tc->mod_hdr); 5017 mutex_init(&tc->t_lock); 5018 mutex_init(&tc->hairpin_tbl_lock); 5019 hash_init(tc->hairpin_tbl); 5020 tc->priv = priv; 5021 5022 err = rhashtable_init(&tc->ht, &tc_ht_params); 5023 if (err) 5024 return err; 5025 5026 lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key); 5027 lockdep_init_map(&tc->ht.run_work.lockdep_map, "tc_ht_wq_key", &tc_ht_wq_key, 0); 5028 5029 mapping_id = mlx5_query_nic_system_image_guid(dev); 5030 5031 chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN, 5032 sizeof(struct mlx5_mapped_obj), 5033 MLX5E_TC_TABLE_CHAIN_TAG_MASK, true); 5034 5035 if (IS_ERR(chains_mapping)) { 5036 err = PTR_ERR(chains_mapping); 5037 goto err_mapping; 5038 } 5039 tc->mapping = chains_mapping; 5040 5041 err = mlx5e_tc_nic_create_miss_table(priv); 5042 if (err) 5043 goto err_chains; 5044 5045 if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) 5046 attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED | 5047 MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED; 5048 attr.ns = MLX5_FLOW_NAMESPACE_KERNEL; 5049 attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS; 5050 attr.default_ft = tc->miss_t; 5051 attr.mapping = chains_mapping; 5052 attr.fs_base_prio = MLX5E_TC_PRIO; 5053 5054 tc->chains = mlx5_chains_create(dev, &attr); 5055 if (IS_ERR(tc->chains)) { 5056 err = PTR_ERR(tc->chains); 5057 goto err_miss; 5058 } 5059 5060 mlx5_chains_print_info(tc->chains); 5061 5062 tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL); 5063 tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr, 5064 MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act); 5065 5066 tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; 5067 err = register_netdevice_notifier_dev_net(priv->netdev, 5068 &tc->netdevice_nb, 5069 &tc->netdevice_nn); 5070 if (err) { 5071 tc->netdevice_nb.notifier_call = NULL; 5072 mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n"); 5073 goto err_reg; 5074 } 5075 5076 mlx5e_tc_debugfs_init(tc, mlx5e_fs_get_debugfs_root(priv->fs)); 5077 5078 tc->action_stats_handle = mlx5e_tc_act_stats_create(); 5079 if (IS_ERR(tc->action_stats_handle)) { 5080 err = PTR_ERR(tc->action_stats_handle); 5081 goto err_act_stats; 5082 } 5083 5084 return 0; 5085 5086 err_act_stats: 5087 unregister_netdevice_notifier_dev_net(priv->netdev, 5088 &tc->netdevice_nb, 5089 &tc->netdevice_nn); 5090 err_reg: 5091 mlx5_tc_ct_clean(tc->ct); 5092 mlx5e_tc_post_act_destroy(tc->post_act); 5093 mlx5_chains_destroy(tc->chains); 5094 err_miss: 5095 mlx5e_tc_nic_destroy_miss_table(priv); 5096 err_chains: 5097 mapping_destroy(chains_mapping); 5098 err_mapping: 5099 rhashtable_destroy(&tc->ht); 5100 return err; 5101 } 5102 5103 static void _mlx5e_tc_del_flow(void *ptr, void *arg) 5104 { 5105 struct mlx5e_tc_flow *flow = ptr; 5106 struct mlx5e_priv *priv = flow->priv; 5107 5108 mlx5e_tc_del_flow(priv, flow); 5109 kfree(flow); 5110 } 5111 5112 void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) 5113 { 5114 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 5115 5116 debugfs_remove_recursive(tc->dfs_root); 5117 5118 if (tc->netdevice_nb.notifier_call) 5119 unregister_netdevice_notifier_dev_net(priv->netdev, 5120 &tc->netdevice_nb, 5121 &tc->netdevice_nn); 5122 5123 mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr); 5124 mutex_destroy(&tc->hairpin_tbl_lock); 5125 5126 rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL); 5127 5128 if (!IS_ERR_OR_NULL(tc->t)) { 5129 mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL); 5130 tc->t = NULL; 5131 } 5132 mutex_destroy(&tc->t_lock); 5133 5134 mlx5_tc_ct_clean(tc->ct); 5135 mlx5e_tc_post_act_destroy(tc->post_act); 5136 mapping_destroy(tc->mapping); 5137 mlx5_chains_destroy(tc->chains); 5138 mlx5e_tc_nic_destroy_miss_table(priv); 5139 mlx5e_tc_act_stats_free(tc->action_stats_handle); 5140 } 5141 5142 int mlx5e_tc_ht_init(struct rhashtable *tc_ht) 5143 { 5144 int err; 5145 5146 err = rhashtable_init(tc_ht, &tc_ht_params); 5147 if (err) 5148 return err; 5149 5150 lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key); 5151 lockdep_init_map(&tc_ht->run_work.lockdep_map, "tc_ht_wq_key", &tc_ht_wq_key, 0); 5152 5153 return 0; 5154 } 5155 5156 void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht) 5157 { 5158 rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); 5159 } 5160 5161 int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv) 5162 { 5163 const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts); 5164 struct mlx5e_rep_priv *rpriv; 5165 struct mapping_ctx *mapping; 5166 struct mlx5_eswitch *esw; 5167 struct mlx5e_priv *priv; 5168 u64 mapping_id; 5169 int err = 0; 5170 5171 rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); 5172 priv = netdev_priv(rpriv->netdev); 5173 esw = priv->mdev->priv.eswitch; 5174 5175 uplink_priv->post_act = mlx5e_tc_post_act_init(priv, esw_chains(esw), 5176 MLX5_FLOW_NAMESPACE_FDB); 5177 uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev), 5178 esw_chains(esw), 5179 &esw->offloads.mod_hdr, 5180 MLX5_FLOW_NAMESPACE_FDB, 5181 uplink_priv->post_act); 5182 5183 uplink_priv->int_port_priv = mlx5e_tc_int_port_init(netdev_priv(priv->netdev)); 5184 5185 uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act); 5186 5187 mapping_id = mlx5_query_nic_system_image_guid(esw->dev); 5188 5189 mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL, 5190 sizeof(struct tunnel_match_key), 5191 TUNNEL_INFO_BITS_MASK, true); 5192 5193 if (IS_ERR(mapping)) { 5194 err = PTR_ERR(mapping); 5195 goto err_tun_mapping; 5196 } 5197 uplink_priv->tunnel_mapping = mapping; 5198 5199 /* Two last values are reserved for stack devices slow path table mark 5200 * and bridge ingress push mark. 5201 */ 5202 mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS, 5203 sz_enc_opts, ENC_OPTS_BITS_MASK - 2, true); 5204 if (IS_ERR(mapping)) { 5205 err = PTR_ERR(mapping); 5206 goto err_enc_opts_mapping; 5207 } 5208 uplink_priv->tunnel_enc_opts_mapping = mapping; 5209 5210 uplink_priv->encap = mlx5e_tc_tun_init(priv); 5211 if (IS_ERR(uplink_priv->encap)) { 5212 err = PTR_ERR(uplink_priv->encap); 5213 goto err_register_fib_notifier; 5214 } 5215 5216 uplink_priv->action_stats_handle = mlx5e_tc_act_stats_create(); 5217 if (IS_ERR(uplink_priv->action_stats_handle)) { 5218 err = PTR_ERR(uplink_priv->action_stats_handle); 5219 goto err_action_counter; 5220 } 5221 5222 mlx5_esw_offloads_devcom_init(esw); 5223 5224 return 0; 5225 5226 err_action_counter: 5227 mlx5e_tc_tun_cleanup(uplink_priv->encap); 5228 err_register_fib_notifier: 5229 mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); 5230 err_enc_opts_mapping: 5231 mapping_destroy(uplink_priv->tunnel_mapping); 5232 err_tun_mapping: 5233 mlx5e_tc_sample_cleanup(uplink_priv->tc_psample); 5234 mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv); 5235 mlx5_tc_ct_clean(uplink_priv->ct_priv); 5236 netdev_warn(priv->netdev, 5237 "Failed to initialize tc (eswitch), err: %d", err); 5238 mlx5e_tc_post_act_destroy(uplink_priv->post_act); 5239 return err; 5240 } 5241 5242 void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv) 5243 { 5244 struct mlx5e_rep_priv *rpriv; 5245 struct mlx5_eswitch *esw; 5246 struct mlx5e_priv *priv; 5247 5248 rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); 5249 priv = netdev_priv(rpriv->netdev); 5250 esw = priv->mdev->priv.eswitch; 5251 5252 mlx5_esw_offloads_devcom_cleanup(esw); 5253 5254 mlx5e_tc_tun_cleanup(uplink_priv->encap); 5255 5256 mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); 5257 mapping_destroy(uplink_priv->tunnel_mapping); 5258 5259 mlx5e_tc_sample_cleanup(uplink_priv->tc_psample); 5260 mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv); 5261 mlx5_tc_ct_clean(uplink_priv->ct_priv); 5262 mlx5e_flow_meters_cleanup(uplink_priv->flow_meters); 5263 mlx5e_tc_post_act_destroy(uplink_priv->post_act); 5264 mlx5e_tc_act_stats_free(uplink_priv->action_stats_handle); 5265 } 5266 5267 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) 5268 { 5269 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 5270 5271 return atomic_read(&tc_ht->nelems); 5272 } 5273 5274 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw) 5275 { 5276 struct mlx5e_tc_flow *flow, *tmp; 5277 5278 list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer) 5279 __mlx5e_tc_del_fdb_peer_flow(flow); 5280 } 5281 5282 void mlx5e_tc_reoffload_flows_work(struct work_struct *work) 5283 { 5284 struct mlx5_rep_uplink_priv *rpriv = 5285 container_of(work, struct mlx5_rep_uplink_priv, 5286 reoffload_flows_work); 5287 struct mlx5e_tc_flow *flow, *tmp; 5288 5289 mutex_lock(&rpriv->unready_flows_lock); 5290 list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) { 5291 if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL)) 5292 unready_flow_del(flow); 5293 } 5294 mutex_unlock(&rpriv->unready_flows_lock); 5295 } 5296 5297 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, 5298 struct flow_cls_offload *cls_flower, 5299 unsigned long flags) 5300 { 5301 switch (cls_flower->command) { 5302 case FLOW_CLS_REPLACE: 5303 return mlx5e_configure_flower(priv->netdev, priv, cls_flower, 5304 flags); 5305 case FLOW_CLS_DESTROY: 5306 return mlx5e_delete_flower(priv->netdev, priv, cls_flower, 5307 flags); 5308 case FLOW_CLS_STATS: 5309 return mlx5e_stats_flower(priv->netdev, priv, cls_flower, 5310 flags); 5311 default: 5312 return -EOPNOTSUPP; 5313 } 5314 } 5315 5316 int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, 5317 void *cb_priv) 5318 { 5319 unsigned long flags = MLX5_TC_FLAG(INGRESS); 5320 struct mlx5e_priv *priv = cb_priv; 5321 5322 if (!priv->netdev || !netif_device_present(priv->netdev)) 5323 return -EOPNOTSUPP; 5324 5325 if (mlx5e_is_uplink_rep(priv)) 5326 flags |= MLX5_TC_FLAG(ESW_OFFLOAD); 5327 else 5328 flags |= MLX5_TC_FLAG(NIC_OFFLOAD); 5329 5330 switch (type) { 5331 case TC_SETUP_CLSFLOWER: 5332 return mlx5e_setup_tc_cls_flower(priv, type_data, flags); 5333 default: 5334 return -EOPNOTSUPP; 5335 } 5336 } 5337 5338 static bool mlx5e_tc_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, 5339 struct mlx5e_tc_update_priv *tc_priv, 5340 u32 tunnel_id) 5341 { 5342 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 5343 struct tunnel_match_enc_opts enc_opts = {}; 5344 struct mlx5_rep_uplink_priv *uplink_priv; 5345 struct mlx5e_rep_priv *uplink_rpriv; 5346 struct metadata_dst *tun_dst; 5347 struct tunnel_match_key key; 5348 u32 tun_id, enc_opts_id; 5349 struct net_device *dev; 5350 int err; 5351 5352 enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK; 5353 tun_id = tunnel_id >> ENC_OPTS_BITS; 5354 5355 if (!tun_id) 5356 return true; 5357 5358 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 5359 uplink_priv = &uplink_rpriv->uplink_priv; 5360 5361 err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key); 5362 if (err) { 5363 netdev_dbg(priv->netdev, 5364 "Couldn't find tunnel for tun_id: %d, err: %d\n", 5365 tun_id, err); 5366 return false; 5367 } 5368 5369 if (enc_opts_id) { 5370 err = mapping_find(uplink_priv->tunnel_enc_opts_mapping, 5371 enc_opts_id, &enc_opts); 5372 if (err) { 5373 netdev_dbg(priv->netdev, 5374 "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n", 5375 enc_opts_id, err); 5376 return false; 5377 } 5378 } 5379 5380 switch (key.enc_control.addr_type) { 5381 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 5382 tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst, 5383 key.enc_ip.tos, key.enc_ip.ttl, 5384 key.enc_tp.dst, TUNNEL_KEY, 5385 key32_to_tunnel_id(key.enc_key_id.keyid), 5386 enc_opts.key.len); 5387 break; 5388 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 5389 tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst, 5390 key.enc_ip.tos, key.enc_ip.ttl, 5391 key.enc_tp.dst, 0, TUNNEL_KEY, 5392 key32_to_tunnel_id(key.enc_key_id.keyid), 5393 enc_opts.key.len); 5394 break; 5395 default: 5396 netdev_dbg(priv->netdev, 5397 "Couldn't restore tunnel, unsupported addr_type: %d\n", 5398 key.enc_control.addr_type); 5399 return false; 5400 } 5401 5402 if (!tun_dst) { 5403 netdev_dbg(priv->netdev, "Couldn't restore tunnel, no tun_dst\n"); 5404 return false; 5405 } 5406 5407 tun_dst->u.tun_info.key.tp_src = key.enc_tp.src; 5408 5409 if (enc_opts.key.len) 5410 ip_tunnel_info_opts_set(&tun_dst->u.tun_info, 5411 enc_opts.key.data, 5412 enc_opts.key.len, 5413 enc_opts.key.dst_opt_type); 5414 5415 skb_dst_set(skb, (struct dst_entry *)tun_dst); 5416 dev = dev_get_by_index(&init_net, key.filter_ifindex); 5417 if (!dev) { 5418 netdev_dbg(priv->netdev, 5419 "Couldn't find tunnel device with ifindex: %d\n", 5420 key.filter_ifindex); 5421 return false; 5422 } 5423 5424 /* Set fwd_dev so we do dev_put() after datapath */ 5425 tc_priv->fwd_dev = dev; 5426 5427 skb->dev = dev; 5428 5429 return true; 5430 } 5431 5432 static bool mlx5e_tc_restore_skb_tc_meta(struct sk_buff *skb, struct mlx5_tc_ct_priv *ct_priv, 5433 struct mlx5_mapped_obj *mapped_obj, u32 zone_restore_id, 5434 u32 tunnel_id, struct mlx5e_tc_update_priv *tc_priv) 5435 { 5436 struct mlx5e_priv *priv = netdev_priv(skb->dev); 5437 struct tc_skb_ext *tc_skb_ext; 5438 u64 act_miss_cookie; 5439 u32 chain; 5440 5441 chain = mapped_obj->type == MLX5_MAPPED_OBJ_CHAIN ? mapped_obj->chain : 0; 5442 act_miss_cookie = mapped_obj->type == MLX5_MAPPED_OBJ_ACT_MISS ? 5443 mapped_obj->act_miss_cookie : 0; 5444 if (chain || act_miss_cookie) { 5445 if (!mlx5e_tc_ct_restore_flow(ct_priv, skb, zone_restore_id)) 5446 return false; 5447 5448 tc_skb_ext = tc_skb_ext_alloc(skb); 5449 if (!tc_skb_ext) { 5450 WARN_ON(1); 5451 return false; 5452 } 5453 5454 if (act_miss_cookie) { 5455 tc_skb_ext->act_miss_cookie = act_miss_cookie; 5456 tc_skb_ext->act_miss = 1; 5457 } else { 5458 tc_skb_ext->chain = chain; 5459 } 5460 } 5461 5462 if (tc_priv) 5463 return mlx5e_tc_restore_tunnel(priv, skb, tc_priv, tunnel_id); 5464 5465 return true; 5466 } 5467 5468 static void mlx5e_tc_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb, 5469 struct mlx5_mapped_obj *mapped_obj, 5470 struct mlx5e_tc_update_priv *tc_priv) 5471 { 5472 if (!mlx5e_tc_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) { 5473 netdev_dbg(priv->netdev, 5474 "Failed to restore tunnel info for sampled packet\n"); 5475 return; 5476 } 5477 mlx5e_tc_sample_skb(skb, mapped_obj); 5478 } 5479 5480 static bool mlx5e_tc_restore_skb_int_port(struct mlx5e_priv *priv, struct sk_buff *skb, 5481 struct mlx5_mapped_obj *mapped_obj, 5482 struct mlx5e_tc_update_priv *tc_priv, 5483 u32 tunnel_id) 5484 { 5485 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 5486 struct mlx5_rep_uplink_priv *uplink_priv; 5487 struct mlx5e_rep_priv *uplink_rpriv; 5488 bool forward_tx = false; 5489 5490 /* Tunnel restore takes precedence over int port restore */ 5491 if (tunnel_id) 5492 return mlx5e_tc_restore_tunnel(priv, skb, tc_priv, tunnel_id); 5493 5494 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 5495 uplink_priv = &uplink_rpriv->uplink_priv; 5496 5497 if (mlx5e_tc_int_port_dev_fwd(uplink_priv->int_port_priv, skb, 5498 mapped_obj->int_port_metadata, &forward_tx)) { 5499 /* Set fwd_dev for future dev_put */ 5500 tc_priv->fwd_dev = skb->dev; 5501 tc_priv->forward_tx = forward_tx; 5502 5503 return true; 5504 } 5505 5506 return false; 5507 } 5508 5509 bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb, 5510 struct mapping_ctx *mapping_ctx, u32 mapped_obj_id, 5511 struct mlx5_tc_ct_priv *ct_priv, 5512 u32 zone_restore_id, u32 tunnel_id, 5513 struct mlx5e_tc_update_priv *tc_priv) 5514 { 5515 struct mlx5e_priv *priv = netdev_priv(skb->dev); 5516 struct mlx5_mapped_obj mapped_obj; 5517 int err; 5518 5519 err = mapping_find(mapping_ctx, mapped_obj_id, &mapped_obj); 5520 if (err) { 5521 netdev_dbg(skb->dev, 5522 "Couldn't find mapped object for mapped_obj_id: %d, err: %d\n", 5523 mapped_obj_id, err); 5524 return false; 5525 } 5526 5527 switch (mapped_obj.type) { 5528 case MLX5_MAPPED_OBJ_CHAIN: 5529 case MLX5_MAPPED_OBJ_ACT_MISS: 5530 return mlx5e_tc_restore_skb_tc_meta(skb, ct_priv, &mapped_obj, zone_restore_id, 5531 tunnel_id, tc_priv); 5532 case MLX5_MAPPED_OBJ_SAMPLE: 5533 mlx5e_tc_restore_skb_sample(priv, skb, &mapped_obj, tc_priv); 5534 tc_priv->skb_done = true; 5535 return true; 5536 case MLX5_MAPPED_OBJ_INT_PORT_METADATA: 5537 return mlx5e_tc_restore_skb_int_port(priv, skb, &mapped_obj, tc_priv, tunnel_id); 5538 default: 5539 netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type); 5540 return false; 5541 } 5542 5543 return false; 5544 } 5545 5546 bool mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb) 5547 { 5548 struct mlx5e_priv *priv = netdev_priv(skb->dev); 5549 u32 mapped_obj_id, reg_b, zone_restore_id; 5550 struct mlx5_tc_ct_priv *ct_priv; 5551 struct mapping_ctx *mapping_ctx; 5552 struct mlx5e_tc_table *tc; 5553 5554 reg_b = be32_to_cpu(cqe->ft_metadata); 5555 tc = mlx5e_fs_get_tc(priv->fs); 5556 mapped_obj_id = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK; 5557 zone_restore_id = (reg_b >> MLX5_REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) & 5558 ESW_ZONE_ID_MASK; 5559 ct_priv = tc->ct; 5560 mapping_ctx = tc->mapping; 5561 5562 return mlx5e_tc_update_skb(cqe, skb, mapping_ctx, mapped_obj_id, ct_priv, zone_restore_id, 5563 0, NULL); 5564 } 5565 5566 static struct mapping_ctx * 5567 mlx5e_get_priv_obj_mapping(struct mlx5e_priv *priv) 5568 { 5569 struct mlx5e_tc_table *tc; 5570 struct mlx5_eswitch *esw; 5571 struct mapping_ctx *ctx; 5572 5573 if (is_mdev_switchdev_mode(priv->mdev)) { 5574 esw = priv->mdev->priv.eswitch; 5575 ctx = esw->offloads.reg_c0_obj_pool; 5576 } else { 5577 tc = mlx5e_fs_get_tc(priv->fs); 5578 ctx = tc->mapping; 5579 } 5580 5581 return ctx; 5582 } 5583 5584 int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, 5585 u64 act_miss_cookie, u32 *act_miss_mapping) 5586 { 5587 struct mlx5_mapped_obj mapped_obj = {}; 5588 struct mlx5_eswitch *esw; 5589 struct mapping_ctx *ctx; 5590 int err; 5591 5592 ctx = mlx5e_get_priv_obj_mapping(priv); 5593 mapped_obj.type = MLX5_MAPPED_OBJ_ACT_MISS; 5594 mapped_obj.act_miss_cookie = act_miss_cookie; 5595 err = mapping_add(ctx, &mapped_obj, act_miss_mapping); 5596 if (err) 5597 return err; 5598 5599 if (!is_mdev_switchdev_mode(priv->mdev)) 5600 return 0; 5601 5602 esw = priv->mdev->priv.eswitch; 5603 attr->act_id_restore_rule = esw_add_restore_rule(esw, *act_miss_mapping); 5604 if (IS_ERR(attr->act_id_restore_rule)) 5605 goto err_rule; 5606 5607 return 0; 5608 5609 err_rule: 5610 mapping_remove(ctx, *act_miss_mapping); 5611 return err; 5612 } 5613 5614 void mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, 5615 u32 act_miss_mapping) 5616 { 5617 struct mapping_ctx *ctx = mlx5e_get_priv_obj_mapping(priv); 5618 5619 if (is_mdev_switchdev_mode(priv->mdev)) 5620 mlx5_del_flow_rules(attr->act_id_restore_rule); 5621 mapping_remove(ctx, act_miss_mapping); 5622 } 5623