1 /* 2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <net/flow_dissector.h> 34 #include <net/flow_offload.h> 35 #include <net/sch_generic.h> 36 #include <net/pkt_cls.h> 37 #include <linux/mlx5/fs.h> 38 #include <linux/mlx5/device.h> 39 #include <linux/rhashtable.h> 40 #include <linux/refcount.h> 41 #include <linux/completion.h> 42 #include <net/arp.h> 43 #include <net/ipv6_stubs.h> 44 #include <net/bareudp.h> 45 #include <net/bonding.h> 46 #include <net/dst_metadata.h> 47 #include "devlink.h" 48 #include "en.h" 49 #include "en/tc/post_act.h" 50 #include "en/tc/act_stats.h" 51 #include "en_rep.h" 52 #include "en/rep/tc.h" 53 #include "en/rep/neigh.h" 54 #include "en_tc.h" 55 #include "eswitch.h" 56 #include "fs_core.h" 57 #include "en/port.h" 58 #include "en/tc_tun.h" 59 #include "en/mapping.h" 60 #include "en/tc_ct.h" 61 #include "en/mod_hdr.h" 62 #include "en/tc_tun_encap.h" 63 #include "en/tc/sample.h" 64 #include "en/tc/act/act.h" 65 #include "en/tc/post_meter.h" 66 #include "lib/devcom.h" 67 #include "lib/geneve.h" 68 #include "lib/fs_chains.h" 69 #include "diag/en_tc_tracepoint.h" 70 #include <asm/div64.h> 71 #include "lag/lag.h" 72 #include "lag/mp.h" 73 74 #define MLX5E_TC_TABLE_NUM_GROUPS 4 75 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18) 76 77 struct mlx5e_tc_table { 78 /* Protects the dynamic assignment of the t parameter 79 * which is the nic tc root table. 80 */ 81 struct mutex t_lock; 82 struct mlx5e_priv *priv; 83 struct mlx5_flow_table *t; 84 struct mlx5_flow_table *miss_t; 85 struct mlx5_fs_chains *chains; 86 struct mlx5e_post_act *post_act; 87 88 struct rhashtable ht; 89 90 struct mod_hdr_tbl mod_hdr; 91 struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */ 92 DECLARE_HASHTABLE(hairpin_tbl, 8); 93 94 struct notifier_block netdevice_nb; 95 struct netdev_net_notifier netdevice_nn; 96 97 struct mlx5_tc_ct_priv *ct; 98 struct mapping_ctx *mapping; 99 struct dentry *dfs_root; 100 101 /* tc action stats */ 102 struct mlx5e_tc_act_stats_handle *action_stats_handle; 103 }; 104 105 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { 106 [MAPPED_OBJ_TO_REG] = { 107 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, 108 .moffset = 0, 109 .mlen = 16, 110 }, 111 [VPORT_TO_REG] = { 112 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, 113 .moffset = 16, 114 .mlen = 16, 115 }, 116 [TUNNEL_TO_REG] = { 117 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1, 118 .moffset = 8, 119 .mlen = ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS, 120 .soffset = MLX5_BYTE_OFF(fte_match_param, 121 misc_parameters_2.metadata_reg_c_1), 122 }, 123 [ZONE_TO_REG] = zone_to_reg_ct, 124 [ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct, 125 [CTSTATE_TO_REG] = ctstate_to_reg_ct, 126 [MARK_TO_REG] = mark_to_reg_ct, 127 [LABELS_TO_REG] = labels_to_reg_ct, 128 [FTEID_TO_REG] = fteid_to_reg_ct, 129 /* For NIC rules we store the restore metadata directly 130 * into reg_b that is passed to SW since we don't 131 * jump between steering domains. 132 */ 133 [NIC_MAPPED_OBJ_TO_REG] = { 134 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B, 135 .moffset = 0, 136 .mlen = 16, 137 }, 138 [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct, 139 [PACKET_COLOR_TO_REG] = packet_color_to_reg, 140 }; 141 142 struct mlx5e_tc_jump_state { 143 u32 jump_count; 144 bool jump_target; 145 struct mlx5_flow_attr *jumping_attr; 146 147 enum flow_action_id last_id; 148 u32 last_index; 149 }; 150 151 struct mlx5e_tc_table *mlx5e_tc_table_alloc(void) 152 { 153 struct mlx5e_tc_table *tc; 154 155 tc = kvzalloc(sizeof(*tc), GFP_KERNEL); 156 return tc ? tc : ERR_PTR(-ENOMEM); 157 } 158 159 void mlx5e_tc_table_free(struct mlx5e_tc_table *tc) 160 { 161 kvfree(tc); 162 } 163 164 struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc) 165 { 166 return tc->chains; 167 } 168 169 /* To avoid false lock dependency warning set the tc_ht lock 170 * class different than the lock class of the ht being used when deleting 171 * last flow from a group and then deleting a group, we get into del_sw_flow_group() 172 * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but 173 * it's different than the ht->mutex here. 174 */ 175 static struct lock_class_key tc_ht_lock_key; 176 static struct lock_class_key tc_ht_wq_key; 177 178 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow); 179 static void free_flow_post_acts(struct mlx5e_tc_flow *flow); 180 static void mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, 181 struct mlx5_flow_attr *attr); 182 183 void 184 mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, 185 enum mlx5e_tc_attr_to_reg type, 186 u32 val, 187 u32 mask) 188 { 189 void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval; 190 int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset; 191 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; 192 int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen; 193 u32 max_mask = GENMASK(match_len - 1, 0); 194 __be32 curr_mask_be, curr_val_be; 195 u32 curr_mask, curr_val; 196 197 fmask = headers_c + soffset; 198 fval = headers_v + soffset; 199 200 memcpy(&curr_mask_be, fmask, 4); 201 memcpy(&curr_val_be, fval, 4); 202 203 curr_mask = be32_to_cpu(curr_mask_be); 204 curr_val = be32_to_cpu(curr_val_be); 205 206 //move to correct offset 207 WARN_ON(mask > max_mask); 208 mask <<= moffset; 209 val <<= moffset; 210 max_mask <<= moffset; 211 212 //zero val and mask 213 curr_mask &= ~max_mask; 214 curr_val &= ~max_mask; 215 216 //add current to mask 217 curr_mask |= mask; 218 curr_val |= val; 219 220 //back to be32 and write 221 curr_mask_be = cpu_to_be32(curr_mask); 222 curr_val_be = cpu_to_be32(curr_val); 223 224 memcpy(fmask, &curr_mask_be, 4); 225 memcpy(fval, &curr_val_be, 4); 226 227 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; 228 } 229 230 void 231 mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec, 232 enum mlx5e_tc_attr_to_reg type, 233 u32 *val, 234 u32 *mask) 235 { 236 void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval; 237 int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset; 238 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; 239 int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen; 240 u32 max_mask = GENMASK(match_len - 1, 0); 241 __be32 curr_mask_be, curr_val_be; 242 u32 curr_mask, curr_val; 243 244 fmask = headers_c + soffset; 245 fval = headers_v + soffset; 246 247 memcpy(&curr_mask_be, fmask, 4); 248 memcpy(&curr_val_be, fval, 4); 249 250 curr_mask = be32_to_cpu(curr_mask_be); 251 curr_val = be32_to_cpu(curr_val_be); 252 253 *mask = (curr_mask >> moffset) & max_mask; 254 *val = (curr_val >> moffset) & max_mask; 255 } 256 257 int 258 mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev, 259 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 260 enum mlx5_flow_namespace_type ns, 261 enum mlx5e_tc_attr_to_reg type, 262 u32 data) 263 { 264 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; 265 int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield; 266 int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen; 267 char *modact; 268 int err; 269 270 modact = mlx5e_mod_hdr_alloc(mdev, ns, mod_hdr_acts); 271 if (IS_ERR(modact)) 272 return PTR_ERR(modact); 273 274 /* Firmware has 5bit length field and 0 means 32bits */ 275 if (mlen == 32) 276 mlen = 0; 277 278 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); 279 MLX5_SET(set_action_in, modact, field, mfield); 280 MLX5_SET(set_action_in, modact, offset, moffset); 281 MLX5_SET(set_action_in, modact, length, mlen); 282 MLX5_SET(set_action_in, modact, data, data); 283 err = mod_hdr_acts->num_actions; 284 mod_hdr_acts->num_actions++; 285 286 return err; 287 } 288 289 static struct mlx5e_tc_act_stats_handle * 290 get_act_stats_handle(struct mlx5e_priv *priv) 291 { 292 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 293 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 294 struct mlx5_rep_uplink_priv *uplink_priv; 295 struct mlx5e_rep_priv *uplink_rpriv; 296 297 if (is_mdev_switchdev_mode(priv->mdev)) { 298 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 299 uplink_priv = &uplink_rpriv->uplink_priv; 300 301 return uplink_priv->action_stats_handle; 302 } 303 304 return tc->action_stats_handle; 305 } 306 307 struct mlx5e_tc_int_port_priv * 308 mlx5e_get_int_port_priv(struct mlx5e_priv *priv) 309 { 310 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 311 struct mlx5_rep_uplink_priv *uplink_priv; 312 struct mlx5e_rep_priv *uplink_rpriv; 313 314 if (is_mdev_switchdev_mode(priv->mdev)) { 315 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 316 uplink_priv = &uplink_rpriv->uplink_priv; 317 318 return uplink_priv->int_port_priv; 319 } 320 321 return NULL; 322 } 323 324 struct mlx5e_flow_meters * 325 mlx5e_get_flow_meters(struct mlx5_core_dev *dev) 326 { 327 struct mlx5_eswitch *esw = dev->priv.eswitch; 328 struct mlx5_rep_uplink_priv *uplink_priv; 329 struct mlx5e_rep_priv *uplink_rpriv; 330 struct mlx5e_priv *priv; 331 332 if (is_mdev_switchdev_mode(dev)) { 333 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 334 uplink_priv = &uplink_rpriv->uplink_priv; 335 priv = netdev_priv(uplink_rpriv->netdev); 336 if (!uplink_priv->flow_meters) 337 uplink_priv->flow_meters = 338 mlx5e_flow_meters_init(priv, 339 MLX5_FLOW_NAMESPACE_FDB, 340 uplink_priv->post_act); 341 if (!IS_ERR(uplink_priv->flow_meters)) 342 return uplink_priv->flow_meters; 343 } 344 345 return NULL; 346 } 347 348 static struct mlx5_tc_ct_priv * 349 get_ct_priv(struct mlx5e_priv *priv) 350 { 351 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 352 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 353 struct mlx5_rep_uplink_priv *uplink_priv; 354 struct mlx5e_rep_priv *uplink_rpriv; 355 356 if (is_mdev_switchdev_mode(priv->mdev)) { 357 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 358 uplink_priv = &uplink_rpriv->uplink_priv; 359 360 return uplink_priv->ct_priv; 361 } 362 363 return tc->ct; 364 } 365 366 static struct mlx5e_tc_psample * 367 get_sample_priv(struct mlx5e_priv *priv) 368 { 369 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 370 struct mlx5_rep_uplink_priv *uplink_priv; 371 struct mlx5e_rep_priv *uplink_rpriv; 372 373 if (is_mdev_switchdev_mode(priv->mdev)) { 374 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 375 uplink_priv = &uplink_rpriv->uplink_priv; 376 377 return uplink_priv->tc_psample; 378 } 379 380 return NULL; 381 } 382 383 static struct mlx5e_post_act * 384 get_post_action(struct mlx5e_priv *priv) 385 { 386 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 387 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 388 struct mlx5_rep_uplink_priv *uplink_priv; 389 struct mlx5e_rep_priv *uplink_rpriv; 390 391 if (is_mdev_switchdev_mode(priv->mdev)) { 392 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 393 uplink_priv = &uplink_rpriv->uplink_priv; 394 395 return uplink_priv->post_act; 396 } 397 398 return tc->post_act; 399 } 400 401 struct mlx5_flow_handle * 402 mlx5_tc_rule_insert(struct mlx5e_priv *priv, 403 struct mlx5_flow_spec *spec, 404 struct mlx5_flow_attr *attr) 405 { 406 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 407 408 if (is_mdev_switchdev_mode(priv->mdev)) 409 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); 410 411 return mlx5e_add_offloaded_nic_rule(priv, spec, attr); 412 } 413 414 void 415 mlx5_tc_rule_delete(struct mlx5e_priv *priv, 416 struct mlx5_flow_handle *rule, 417 struct mlx5_flow_attr *attr) 418 { 419 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 420 421 if (is_mdev_switchdev_mode(priv->mdev)) { 422 mlx5_eswitch_del_offloaded_rule(esw, rule, attr); 423 return; 424 } 425 426 mlx5e_del_offloaded_nic_rule(priv, rule, attr); 427 } 428 429 static bool 430 is_flow_meter_action(struct mlx5_flow_attr *attr) 431 { 432 return (((attr->action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) && 433 (attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER)) || 434 attr->flags & MLX5_ATTR_FLAG_MTU); 435 } 436 437 static int 438 mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv, 439 struct mlx5_flow_attr *attr) 440 { 441 struct mlx5e_post_act *post_act = get_post_action(priv); 442 struct mlx5e_post_meter_priv *post_meter; 443 enum mlx5_flow_namespace_type ns_type; 444 struct mlx5e_flow_meter_handle *meter; 445 enum mlx5e_post_meter_type type; 446 447 meter = mlx5e_tc_meter_replace(priv->mdev, &attr->meter_attr.params); 448 if (IS_ERR(meter)) { 449 mlx5_core_err(priv->mdev, "Failed to get flow meter\n"); 450 return PTR_ERR(meter); 451 } 452 453 ns_type = mlx5e_tc_meter_get_namespace(meter->flow_meters); 454 type = meter->params.mtu ? MLX5E_POST_METER_MTU : MLX5E_POST_METER_RATE; 455 post_meter = mlx5e_post_meter_init(priv, ns_type, post_act, 456 type, 457 meter->act_counter, meter->drop_counter, 458 attr->branch_true, attr->branch_false); 459 if (IS_ERR(post_meter)) { 460 mlx5_core_err(priv->mdev, "Failed to init post meter\n"); 461 goto err_meter_init; 462 } 463 464 attr->meter_attr.meter = meter; 465 attr->meter_attr.post_meter = post_meter; 466 attr->dest_ft = mlx5e_post_meter_get_ft(post_meter); 467 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 468 469 return 0; 470 471 err_meter_init: 472 mlx5e_tc_meter_put(meter); 473 return PTR_ERR(post_meter); 474 } 475 476 static void 477 mlx5e_tc_del_flow_meter(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr) 478 { 479 mlx5e_post_meter_cleanup(esw, attr->meter_attr.post_meter); 480 mlx5e_tc_meter_put(attr->meter_attr.meter); 481 } 482 483 struct mlx5_flow_handle * 484 mlx5e_tc_rule_offload(struct mlx5e_priv *priv, 485 struct mlx5_flow_spec *spec, 486 struct mlx5_flow_attr *attr) 487 { 488 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 489 int err; 490 491 if (!is_mdev_switchdev_mode(priv->mdev)) 492 return mlx5e_add_offloaded_nic_rule(priv, spec, attr); 493 494 if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) 495 return mlx5e_tc_sample_offload(get_sample_priv(priv), spec, attr); 496 497 if (is_flow_meter_action(attr)) { 498 err = mlx5e_tc_add_flow_meter(priv, attr); 499 if (err) 500 return ERR_PTR(err); 501 } 502 503 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); 504 } 505 506 void 507 mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv, 508 struct mlx5_flow_handle *rule, 509 struct mlx5_flow_attr *attr) 510 { 511 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 512 513 if (!is_mdev_switchdev_mode(priv->mdev)) { 514 mlx5e_del_offloaded_nic_rule(priv, rule, attr); 515 return; 516 } 517 518 if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) { 519 mlx5e_tc_sample_unoffload(get_sample_priv(priv), rule, attr); 520 return; 521 } 522 523 mlx5_eswitch_del_offloaded_rule(esw, rule, attr); 524 525 if (attr->meter_attr.meter) 526 mlx5e_tc_del_flow_meter(esw, attr); 527 } 528 529 int 530 mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, 531 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 532 enum mlx5_flow_namespace_type ns, 533 enum mlx5e_tc_attr_to_reg type, 534 u32 data) 535 { 536 int ret = mlx5e_tc_match_to_reg_set_and_get_id(mdev, mod_hdr_acts, ns, type, data); 537 538 return ret < 0 ? ret : 0; 539 } 540 541 void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev, 542 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 543 enum mlx5e_tc_attr_to_reg type, 544 int act_id, u32 data) 545 { 546 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; 547 int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield; 548 int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen; 549 char *modact; 550 551 modact = mlx5e_mod_hdr_get_item(mod_hdr_acts, act_id); 552 553 /* Firmware has 5bit length field and 0 means 32bits */ 554 if (mlen == 32) 555 mlen = 0; 556 557 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); 558 MLX5_SET(set_action_in, modact, field, mfield); 559 MLX5_SET(set_action_in, modact, offset, moffset); 560 MLX5_SET(set_action_in, modact, length, mlen); 561 MLX5_SET(set_action_in, modact, data, data); 562 } 563 564 struct mlx5e_hairpin { 565 struct mlx5_hairpin *pair; 566 567 struct mlx5_core_dev *func_mdev; 568 struct mlx5e_priv *func_priv; 569 u32 tdn; 570 struct mlx5e_tir direct_tir; 571 572 int num_channels; 573 u8 log_num_packets; 574 struct mlx5e_rqt indir_rqt; 575 struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; 576 struct mlx5_ttc_table *ttc; 577 }; 578 579 struct mlx5e_hairpin_entry { 580 /* a node of a hash table which keeps all the hairpin entries */ 581 struct hlist_node hairpin_hlist; 582 583 /* protects flows list */ 584 spinlock_t flows_lock; 585 /* flows sharing the same hairpin */ 586 struct list_head flows; 587 /* hpe's that were not fully initialized when dead peer update event 588 * function traversed them. 589 */ 590 struct list_head dead_peer_wait_list; 591 592 u16 peer_vhca_id; 593 u8 prio; 594 struct mlx5e_hairpin *hp; 595 refcount_t refcnt; 596 struct completion res_ready; 597 }; 598 599 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, 600 struct mlx5e_tc_flow *flow); 601 602 struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow) 603 { 604 if (!flow || !refcount_inc_not_zero(&flow->refcnt)) 605 return ERR_PTR(-EINVAL); 606 return flow; 607 } 608 609 void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) 610 { 611 if (refcount_dec_and_test(&flow->refcnt)) { 612 mlx5e_tc_del_flow(priv, flow); 613 kfree_rcu(flow, rcu_head); 614 } 615 } 616 617 bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow) 618 { 619 return flow_flag_test(flow, ESWITCH); 620 } 621 622 bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow) 623 { 624 return flow_flag_test(flow, FT); 625 } 626 627 bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow) 628 { 629 return flow_flag_test(flow, OFFLOADED); 630 } 631 632 int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow) 633 { 634 return mlx5e_is_eswitch_flow(flow) ? 635 MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL; 636 } 637 638 static struct mlx5_core_dev * 639 get_flow_counter_dev(struct mlx5e_tc_flow *flow) 640 { 641 return mlx5e_is_eswitch_flow(flow) ? flow->attr->esw_attr->counter_dev : flow->priv->mdev; 642 } 643 644 static struct mod_hdr_tbl * 645 get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) 646 { 647 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 648 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 649 650 return mlx5e_get_flow_namespace(flow) == MLX5_FLOW_NAMESPACE_FDB ? 651 &esw->offloads.mod_hdr : 652 &tc->mod_hdr; 653 } 654 655 int mlx5e_tc_attach_mod_hdr(struct mlx5e_priv *priv, 656 struct mlx5e_tc_flow *flow, 657 struct mlx5_flow_attr *attr) 658 { 659 struct mlx5e_mod_hdr_handle *mh; 660 661 mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow), 662 mlx5e_get_flow_namespace(flow), 663 &attr->parse_attr->mod_hdr_acts); 664 if (IS_ERR(mh)) 665 return PTR_ERR(mh); 666 667 WARN_ON(attr->modify_hdr); 668 attr->modify_hdr = mlx5e_mod_hdr_get(mh); 669 attr->mh = mh; 670 671 return 0; 672 } 673 674 void mlx5e_tc_detach_mod_hdr(struct mlx5e_priv *priv, 675 struct mlx5e_tc_flow *flow, 676 struct mlx5_flow_attr *attr) 677 { 678 /* flow wasn't fully initialized */ 679 if (!attr->mh) 680 return; 681 682 mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow), 683 attr->mh); 684 attr->mh = NULL; 685 } 686 687 static 688 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex) 689 { 690 struct mlx5_core_dev *mdev; 691 struct net_device *netdev; 692 struct mlx5e_priv *priv; 693 694 netdev = dev_get_by_index(net, ifindex); 695 if (!netdev) 696 return ERR_PTR(-ENODEV); 697 698 priv = netdev_priv(netdev); 699 mdev = priv->mdev; 700 dev_put(netdev); 701 702 /* Mirred tc action holds a refcount on the ifindex net_device (see 703 * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev 704 * after dev_put(netdev), while we're in the context of adding a tc flow. 705 * 706 * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then 707 * stored in a hairpin object, which exists until all flows, that refer to it, get 708 * removed. 709 * 710 * On the other hand, after a hairpin object has been created, the peer net_device may 711 * be removed/unbound while there are still some hairpin flows that are using it. This 712 * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to 713 * NETDEV_UNREGISTER event of the peer net_device. 714 */ 715 return mdev; 716 } 717 718 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp) 719 { 720 struct mlx5e_tir_builder *builder; 721 int err; 722 723 builder = mlx5e_tir_builder_alloc(false); 724 if (!builder) 725 return -ENOMEM; 726 727 err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn); 728 if (err) 729 goto out; 730 731 mlx5e_tir_builder_build_inline(builder, hp->tdn, hp->pair->rqn[0]); 732 err = mlx5e_tir_init(&hp->direct_tir, builder, hp->func_mdev, false); 733 if (err) 734 goto create_tir_err; 735 736 out: 737 mlx5e_tir_builder_free(builder); 738 return err; 739 740 create_tir_err: 741 mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); 742 743 goto out; 744 } 745 746 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp) 747 { 748 mlx5e_tir_destroy(&hp->direct_tir); 749 mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); 750 } 751 752 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) 753 { 754 struct mlx5e_priv *priv = hp->func_priv; 755 struct mlx5_core_dev *mdev = priv->mdev; 756 struct mlx5e_rss_params_indir *indir; 757 int err; 758 759 indir = kvmalloc(sizeof(*indir), GFP_KERNEL); 760 if (!indir) 761 return -ENOMEM; 762 763 mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels); 764 err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels, 765 mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc, 766 indir); 767 768 kvfree(indir); 769 return err; 770 } 771 772 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) 773 { 774 struct mlx5e_priv *priv = hp->func_priv; 775 struct mlx5e_rss_params_hash rss_hash; 776 enum mlx5_traffic_types tt, max_tt; 777 struct mlx5e_tir_builder *builder; 778 int err = 0; 779 780 builder = mlx5e_tir_builder_alloc(false); 781 if (!builder) 782 return -ENOMEM; 783 784 rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res); 785 786 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { 787 struct mlx5e_rss_params_traffic_type rss_tt; 788 789 rss_tt = mlx5e_rss_get_default_tt_config(tt); 790 791 mlx5e_tir_builder_build_rqt(builder, hp->tdn, 792 mlx5e_rqt_get_rqtn(&hp->indir_rqt), 793 false); 794 mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false); 795 796 err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false); 797 if (err) { 798 mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err); 799 goto err_destroy_tirs; 800 } 801 802 mlx5e_tir_builder_clear(builder); 803 } 804 805 out: 806 mlx5e_tir_builder_free(builder); 807 return err; 808 809 err_destroy_tirs: 810 max_tt = tt; 811 for (tt = 0; tt < max_tt; tt++) 812 mlx5e_tir_destroy(&hp->indir_tir[tt]); 813 814 goto out; 815 } 816 817 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp) 818 { 819 int tt; 820 821 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) 822 mlx5e_tir_destroy(&hp->indir_tir[tt]); 823 } 824 825 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp, 826 struct ttc_params *ttc_params) 827 { 828 struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; 829 int tt; 830 831 memset(ttc_params, 0, sizeof(*ttc_params)); 832 833 ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev, 834 MLX5_FLOW_NAMESPACE_KERNEL); 835 for (tt = 0; tt < MLX5_NUM_TT; tt++) { 836 ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; 837 ttc_params->dests[tt].tir_num = 838 tt == MLX5_TT_ANY ? 839 mlx5e_tir_get_tirn(&hp->direct_tir) : 840 mlx5e_tir_get_tirn(&hp->indir_tir[tt]); 841 } 842 843 ft_attr->level = MLX5E_TC_TTC_FT_LEVEL; 844 ft_attr->prio = MLX5E_TC_PRIO; 845 } 846 847 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp) 848 { 849 struct mlx5e_priv *priv = hp->func_priv; 850 struct ttc_params ttc_params; 851 struct mlx5_ttc_table *ttc; 852 int err; 853 854 err = mlx5e_hairpin_create_indirect_rqt(hp); 855 if (err) 856 return err; 857 858 err = mlx5e_hairpin_create_indirect_tirs(hp); 859 if (err) 860 goto err_create_indirect_tirs; 861 862 mlx5e_hairpin_set_ttc_params(hp, &ttc_params); 863 hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params); 864 if (IS_ERR(hp->ttc)) { 865 err = PTR_ERR(hp->ttc); 866 goto err_create_ttc_table; 867 } 868 869 ttc = mlx5e_fs_get_ttc(priv->fs, false); 870 netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n", 871 hp->num_channels, 872 mlx5_get_ttc_flow_table(ttc)->id); 873 874 return 0; 875 876 err_create_ttc_table: 877 mlx5e_hairpin_destroy_indirect_tirs(hp); 878 err_create_indirect_tirs: 879 mlx5e_rqt_destroy(&hp->indir_rqt); 880 881 return err; 882 } 883 884 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp) 885 { 886 mlx5_destroy_ttc_table(hp->ttc); 887 mlx5e_hairpin_destroy_indirect_tirs(hp); 888 mlx5e_rqt_destroy(&hp->indir_rqt); 889 } 890 891 static struct mlx5e_hairpin * 892 mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params, 893 int peer_ifindex) 894 { 895 struct mlx5_core_dev *func_mdev, *peer_mdev; 896 struct mlx5e_hairpin *hp; 897 struct mlx5_hairpin *pair; 898 int err; 899 900 hp = kzalloc(sizeof(*hp), GFP_KERNEL); 901 if (!hp) 902 return ERR_PTR(-ENOMEM); 903 904 func_mdev = priv->mdev; 905 peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); 906 if (IS_ERR(peer_mdev)) { 907 err = PTR_ERR(peer_mdev); 908 goto create_pair_err; 909 } 910 911 pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params); 912 if (IS_ERR(pair)) { 913 err = PTR_ERR(pair); 914 goto create_pair_err; 915 } 916 hp->pair = pair; 917 hp->func_mdev = func_mdev; 918 hp->func_priv = priv; 919 hp->num_channels = params->num_channels; 920 hp->log_num_packets = params->log_num_packets; 921 922 err = mlx5e_hairpin_create_transport(hp); 923 if (err) 924 goto create_transport_err; 925 926 if (hp->num_channels > 1) { 927 err = mlx5e_hairpin_rss_init(hp); 928 if (err) 929 goto rss_init_err; 930 } 931 932 return hp; 933 934 rss_init_err: 935 mlx5e_hairpin_destroy_transport(hp); 936 create_transport_err: 937 mlx5_core_hairpin_destroy(hp->pair); 938 create_pair_err: 939 kfree(hp); 940 return ERR_PTR(err); 941 } 942 943 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp) 944 { 945 if (hp->num_channels > 1) 946 mlx5e_hairpin_rss_cleanup(hp); 947 mlx5e_hairpin_destroy_transport(hp); 948 mlx5_core_hairpin_destroy(hp->pair); 949 kvfree(hp); 950 } 951 952 static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio) 953 { 954 return (peer_vhca_id << 16 | prio); 955 } 956 957 static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv, 958 u16 peer_vhca_id, u8 prio) 959 { 960 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 961 struct mlx5e_hairpin_entry *hpe; 962 u32 hash_key = hash_hairpin_info(peer_vhca_id, prio); 963 964 hash_for_each_possible(tc->hairpin_tbl, hpe, 965 hairpin_hlist, hash_key) { 966 if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) { 967 refcount_inc(&hpe->refcnt); 968 return hpe; 969 } 970 } 971 972 return NULL; 973 } 974 975 static void mlx5e_hairpin_put(struct mlx5e_priv *priv, 976 struct mlx5e_hairpin_entry *hpe) 977 { 978 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 979 /* no more hairpin flows for us, release the hairpin pair */ 980 if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &tc->hairpin_tbl_lock)) 981 return; 982 hash_del(&hpe->hairpin_hlist); 983 mutex_unlock(&tc->hairpin_tbl_lock); 984 985 if (!IS_ERR_OR_NULL(hpe->hp)) { 986 netdev_dbg(priv->netdev, "del hairpin: peer %s\n", 987 dev_name(hpe->hp->pair->peer_mdev->device)); 988 989 mlx5e_hairpin_destroy(hpe->hp); 990 } 991 992 WARN_ON(!list_empty(&hpe->flows)); 993 kfree(hpe); 994 } 995 996 #define UNKNOWN_MATCH_PRIO 8 997 998 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv, 999 struct mlx5_flow_spec *spec, u8 *match_prio, 1000 struct netlink_ext_ack *extack) 1001 { 1002 void *headers_c, *headers_v; 1003 u8 prio_val, prio_mask = 0; 1004 bool vlan_present; 1005 1006 #ifdef CONFIG_MLX5_CORE_EN_DCB 1007 if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) { 1008 NL_SET_ERR_MSG_MOD(extack, 1009 "only PCP trust state supported for hairpin"); 1010 return -EOPNOTSUPP; 1011 } 1012 #endif 1013 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); 1014 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); 1015 1016 vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag); 1017 if (vlan_present) { 1018 prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio); 1019 prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio); 1020 } 1021 1022 if (!vlan_present || !prio_mask) { 1023 prio_val = UNKNOWN_MATCH_PRIO; 1024 } else if (prio_mask != 0x7) { 1025 NL_SET_ERR_MSG_MOD(extack, 1026 "masked priority match not supported for hairpin"); 1027 return -EOPNOTSUPP; 1028 } 1029 1030 *match_prio = prio_val; 1031 return 0; 1032 } 1033 1034 static int debugfs_hairpin_num_active_get(void *data, u64 *val) 1035 { 1036 struct mlx5e_tc_table *tc = data; 1037 struct mlx5e_hairpin_entry *hpe; 1038 u32 cnt = 0; 1039 u32 bkt; 1040 1041 mutex_lock(&tc->hairpin_tbl_lock); 1042 hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist) 1043 cnt++; 1044 mutex_unlock(&tc->hairpin_tbl_lock); 1045 1046 *val = cnt; 1047 1048 return 0; 1049 } 1050 DEFINE_DEBUGFS_ATTRIBUTE(fops_hairpin_num_active, 1051 debugfs_hairpin_num_active_get, NULL, "%llu\n"); 1052 1053 static int debugfs_hairpin_table_dump_show(struct seq_file *file, void *priv) 1054 1055 { 1056 struct mlx5e_tc_table *tc = file->private; 1057 struct mlx5e_hairpin_entry *hpe; 1058 u32 bkt; 1059 1060 mutex_lock(&tc->hairpin_tbl_lock); 1061 hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist) 1062 seq_printf(file, 1063 "Hairpin peer_vhca_id %u prio %u refcnt %u num_channels %u num_packets %lu\n", 1064 hpe->peer_vhca_id, hpe->prio, 1065 refcount_read(&hpe->refcnt), hpe->hp->num_channels, 1066 BIT(hpe->hp->log_num_packets)); 1067 mutex_unlock(&tc->hairpin_tbl_lock); 1068 1069 return 0; 1070 } 1071 DEFINE_SHOW_ATTRIBUTE(debugfs_hairpin_table_dump); 1072 1073 static void mlx5e_tc_debugfs_init(struct mlx5e_tc_table *tc, 1074 struct dentry *dfs_root) 1075 { 1076 if (IS_ERR_OR_NULL(dfs_root)) 1077 return; 1078 1079 tc->dfs_root = debugfs_create_dir("tc", dfs_root); 1080 1081 debugfs_create_file("hairpin_num_active", 0444, tc->dfs_root, tc, 1082 &fops_hairpin_num_active); 1083 debugfs_create_file("hairpin_table_dump", 0444, tc->dfs_root, tc, 1084 &debugfs_hairpin_table_dump_fops); 1085 } 1086 1087 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, 1088 struct mlx5e_tc_flow *flow, 1089 struct mlx5e_tc_flow_parse_attr *parse_attr, 1090 struct netlink_ext_ack *extack) 1091 { 1092 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 1093 struct devlink *devlink = priv_to_devlink(priv->mdev); 1094 int peer_ifindex = parse_attr->mirred_ifindex[0]; 1095 union devlink_param_value val = {}; 1096 struct mlx5_hairpin_params params; 1097 struct mlx5_core_dev *peer_mdev; 1098 struct mlx5e_hairpin_entry *hpe; 1099 struct mlx5e_hairpin *hp; 1100 u8 match_prio; 1101 u16 peer_id; 1102 int err; 1103 1104 peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); 1105 if (IS_ERR(peer_mdev)) { 1106 NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device"); 1107 return PTR_ERR(peer_mdev); 1108 } 1109 1110 if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) { 1111 NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported"); 1112 return -EOPNOTSUPP; 1113 } 1114 1115 peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id); 1116 err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio, 1117 extack); 1118 if (err) 1119 return err; 1120 1121 mutex_lock(&tc->hairpin_tbl_lock); 1122 hpe = mlx5e_hairpin_get(priv, peer_id, match_prio); 1123 if (hpe) { 1124 mutex_unlock(&tc->hairpin_tbl_lock); 1125 wait_for_completion(&hpe->res_ready); 1126 1127 if (IS_ERR(hpe->hp)) { 1128 err = -EREMOTEIO; 1129 goto out_err; 1130 } 1131 goto attach_flow; 1132 } 1133 1134 hpe = kzalloc(sizeof(*hpe), GFP_KERNEL); 1135 if (!hpe) { 1136 mutex_unlock(&tc->hairpin_tbl_lock); 1137 return -ENOMEM; 1138 } 1139 1140 spin_lock_init(&hpe->flows_lock); 1141 INIT_LIST_HEAD(&hpe->flows); 1142 INIT_LIST_HEAD(&hpe->dead_peer_wait_list); 1143 hpe->peer_vhca_id = peer_id; 1144 hpe->prio = match_prio; 1145 refcount_set(&hpe->refcnt, 1); 1146 init_completion(&hpe->res_ready); 1147 1148 hash_add(tc->hairpin_tbl, &hpe->hairpin_hlist, 1149 hash_hairpin_info(peer_id, match_prio)); 1150 mutex_unlock(&tc->hairpin_tbl_lock); 1151 1152 err = devl_param_driverinit_value_get( 1153 devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, &val); 1154 if (err) { 1155 err = -ENOMEM; 1156 goto out_err; 1157 } 1158 1159 params.log_num_packets = ilog2(val.vu32); 1160 params.log_data_size = 1161 clamp_t(u32, 1162 params.log_num_packets + 1163 MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev), 1164 MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz), 1165 MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz)); 1166 1167 params.q_counter = priv->q_counter; 1168 err = devl_param_driverinit_value_get( 1169 devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, &val); 1170 if (err) { 1171 err = -ENOMEM; 1172 goto out_err; 1173 } 1174 1175 params.num_channels = val.vu32; 1176 1177 hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex); 1178 hpe->hp = hp; 1179 complete_all(&hpe->res_ready); 1180 if (IS_ERR(hp)) { 1181 err = PTR_ERR(hp); 1182 goto out_err; 1183 } 1184 1185 netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n", 1186 mlx5e_tir_get_tirn(&hp->direct_tir), hp->pair->rqn[0], 1187 dev_name(hp->pair->peer_mdev->device), 1188 hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets); 1189 1190 attach_flow: 1191 if (hpe->hp->num_channels > 1) { 1192 flow_flag_set(flow, HAIRPIN_RSS); 1193 flow->attr->nic_attr->hairpin_ft = 1194 mlx5_get_ttc_flow_table(hpe->hp->ttc); 1195 } else { 1196 flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir); 1197 } 1198 1199 flow->hpe = hpe; 1200 spin_lock(&hpe->flows_lock); 1201 list_add(&flow->hairpin, &hpe->flows); 1202 spin_unlock(&hpe->flows_lock); 1203 1204 return 0; 1205 1206 out_err: 1207 mlx5e_hairpin_put(priv, hpe); 1208 return err; 1209 } 1210 1211 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv, 1212 struct mlx5e_tc_flow *flow) 1213 { 1214 /* flow wasn't fully initialized */ 1215 if (!flow->hpe) 1216 return; 1217 1218 spin_lock(&flow->hpe->flows_lock); 1219 list_del(&flow->hairpin); 1220 spin_unlock(&flow->hpe->flows_lock); 1221 1222 mlx5e_hairpin_put(priv, flow->hpe); 1223 flow->hpe = NULL; 1224 } 1225 1226 struct mlx5_flow_handle * 1227 mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv, 1228 struct mlx5_flow_spec *spec, 1229 struct mlx5_flow_attr *attr) 1230 { 1231 struct mlx5_flow_context *flow_context = &spec->flow_context; 1232 struct mlx5e_vlan_table *vlan = mlx5e_fs_get_vlan(priv->fs); 1233 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 1234 struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr; 1235 struct mlx5_flow_destination dest[2] = {}; 1236 struct mlx5_fs_chains *nic_chains; 1237 struct mlx5_flow_act flow_act = { 1238 .action = attr->action, 1239 .flags = FLOW_ACT_NO_APPEND, 1240 }; 1241 struct mlx5_flow_handle *rule; 1242 struct mlx5_flow_table *ft; 1243 int dest_ix = 0; 1244 1245 nic_chains = mlx5e_nic_chains(tc); 1246 flow_context->flags |= FLOW_CONTEXT_HAS_TAG; 1247 flow_context->flow_tag = nic_attr->flow_tag; 1248 1249 if (attr->dest_ft) { 1250 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 1251 dest[dest_ix].ft = attr->dest_ft; 1252 dest_ix++; 1253 } else if (nic_attr->hairpin_ft) { 1254 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 1255 dest[dest_ix].ft = nic_attr->hairpin_ft; 1256 dest_ix++; 1257 } else if (nic_attr->hairpin_tirn) { 1258 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR; 1259 dest[dest_ix].tir_num = nic_attr->hairpin_tirn; 1260 dest_ix++; 1261 } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { 1262 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 1263 if (attr->dest_chain) { 1264 dest[dest_ix].ft = mlx5_chains_get_table(nic_chains, 1265 attr->dest_chain, 1, 1266 MLX5E_TC_FT_LEVEL); 1267 if (IS_ERR(dest[dest_ix].ft)) 1268 return ERR_CAST(dest[dest_ix].ft); 1269 } else { 1270 dest[dest_ix].ft = mlx5e_vlan_get_flowtable(vlan); 1271 } 1272 dest_ix++; 1273 } 1274 1275 if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && 1276 MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) 1277 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; 1278 1279 if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { 1280 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; 1281 dest[dest_ix].counter_id = mlx5_fc_id(attr->counter); 1282 dest_ix++; 1283 } 1284 1285 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) 1286 flow_act.modify_hdr = attr->modify_hdr; 1287 1288 mutex_lock(&tc->t_lock); 1289 if (IS_ERR_OR_NULL(tc->t)) { 1290 /* Create the root table here if doesn't exist yet */ 1291 tc->t = 1292 mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL); 1293 1294 if (IS_ERR(tc->t)) { 1295 mutex_unlock(&tc->t_lock); 1296 netdev_err(priv->netdev, 1297 "Failed to create tc offload table\n"); 1298 rule = ERR_CAST(tc->t); 1299 goto err_ft_get; 1300 } 1301 } 1302 mutex_unlock(&tc->t_lock); 1303 1304 if (attr->chain || attr->prio) 1305 ft = mlx5_chains_get_table(nic_chains, 1306 attr->chain, attr->prio, 1307 MLX5E_TC_FT_LEVEL); 1308 else 1309 ft = attr->ft; 1310 1311 if (IS_ERR(ft)) { 1312 rule = ERR_CAST(ft); 1313 goto err_ft_get; 1314 } 1315 1316 if (attr->outer_match_level != MLX5_MATCH_NONE) 1317 spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; 1318 1319 rule = mlx5_add_flow_rules(ft, spec, 1320 &flow_act, dest, dest_ix); 1321 if (IS_ERR(rule)) 1322 goto err_rule; 1323 1324 return rule; 1325 1326 err_rule: 1327 if (attr->chain || attr->prio) 1328 mlx5_chains_put_table(nic_chains, 1329 attr->chain, attr->prio, 1330 MLX5E_TC_FT_LEVEL); 1331 err_ft_get: 1332 if (attr->dest_chain) 1333 mlx5_chains_put_table(nic_chains, 1334 attr->dest_chain, 1, 1335 MLX5E_TC_FT_LEVEL); 1336 1337 return ERR_CAST(rule); 1338 } 1339 1340 static int 1341 alloc_flow_attr_counter(struct mlx5_core_dev *counter_dev, 1342 struct mlx5_flow_attr *attr) 1343 1344 { 1345 struct mlx5_fc *counter; 1346 1347 counter = mlx5_fc_create(counter_dev, true); 1348 if (IS_ERR(counter)) 1349 return PTR_ERR(counter); 1350 1351 attr->counter = counter; 1352 return 0; 1353 } 1354 1355 static int 1356 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, 1357 struct mlx5e_tc_flow *flow, 1358 struct netlink_ext_ack *extack) 1359 { 1360 struct mlx5e_tc_flow_parse_attr *parse_attr; 1361 struct mlx5_flow_attr *attr = flow->attr; 1362 struct mlx5_core_dev *dev = priv->mdev; 1363 int err; 1364 1365 parse_attr = attr->parse_attr; 1366 1367 if (flow_flag_test(flow, HAIRPIN)) { 1368 err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); 1369 if (err) 1370 return err; 1371 } 1372 1373 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { 1374 err = alloc_flow_attr_counter(dev, attr); 1375 if (err) 1376 return err; 1377 } 1378 1379 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 1380 err = mlx5e_tc_attach_mod_hdr(priv, flow, attr); 1381 if (err) 1382 return err; 1383 } 1384 1385 flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec, attr); 1386 return PTR_ERR_OR_ZERO(flow->rule[0]); 1387 } 1388 1389 void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv, 1390 struct mlx5_flow_handle *rule, 1391 struct mlx5_flow_attr *attr) 1392 { 1393 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 1394 struct mlx5_fs_chains *nic_chains; 1395 1396 nic_chains = mlx5e_nic_chains(tc); 1397 mlx5_del_flow_rules(rule); 1398 1399 if (attr->chain || attr->prio) 1400 mlx5_chains_put_table(nic_chains, attr->chain, attr->prio, 1401 MLX5E_TC_FT_LEVEL); 1402 1403 if (attr->dest_chain) 1404 mlx5_chains_put_table(nic_chains, attr->dest_chain, 1, 1405 MLX5E_TC_FT_LEVEL); 1406 } 1407 1408 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, 1409 struct mlx5e_tc_flow *flow) 1410 { 1411 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 1412 struct mlx5_flow_attr *attr = flow->attr; 1413 1414 flow_flag_clear(flow, OFFLOADED); 1415 1416 if (!IS_ERR_OR_NULL(flow->rule[0])) 1417 mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr); 1418 1419 /* Remove root table if no rules are left to avoid 1420 * extra steering hops. 1421 */ 1422 mutex_lock(&tc->t_lock); 1423 if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && 1424 !IS_ERR_OR_NULL(tc->t)) { 1425 mlx5_chains_put_table(mlx5e_nic_chains(tc), 0, 1, MLX5E_TC_FT_LEVEL); 1426 tc->t = NULL; 1427 } 1428 mutex_unlock(&tc->t_lock); 1429 1430 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 1431 mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); 1432 mlx5e_tc_detach_mod_hdr(priv, flow, attr); 1433 } 1434 1435 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) 1436 mlx5_fc_destroy(priv->mdev, attr->counter); 1437 1438 if (flow_flag_test(flow, HAIRPIN)) 1439 mlx5e_hairpin_flow_del(priv, flow); 1440 1441 free_flow_post_acts(flow); 1442 mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr); 1443 1444 kvfree(attr->parse_attr); 1445 kfree(flow->attr); 1446 } 1447 1448 struct mlx5_flow_handle * 1449 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, 1450 struct mlx5e_tc_flow *flow, 1451 struct mlx5_flow_spec *spec, 1452 struct mlx5_flow_attr *attr) 1453 { 1454 struct mlx5_flow_handle *rule; 1455 1456 if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) 1457 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); 1458 1459 rule = mlx5e_tc_rule_offload(flow->priv, spec, attr); 1460 1461 if (IS_ERR(rule)) 1462 return rule; 1463 1464 if (attr->esw_attr->split_count) { 1465 flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr); 1466 if (IS_ERR(flow->rule[1])) 1467 goto err_rule1; 1468 } 1469 1470 return rule; 1471 1472 err_rule1: 1473 mlx5e_tc_rule_unoffload(flow->priv, rule, attr); 1474 return flow->rule[1]; 1475 } 1476 1477 void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, 1478 struct mlx5e_tc_flow *flow, 1479 struct mlx5_flow_attr *attr) 1480 { 1481 flow_flag_clear(flow, OFFLOADED); 1482 1483 if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) 1484 return mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); 1485 1486 if (attr->esw_attr->split_count) 1487 mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); 1488 1489 mlx5e_tc_rule_unoffload(flow->priv, flow->rule[0], attr); 1490 } 1491 1492 struct mlx5_flow_handle * 1493 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, 1494 struct mlx5e_tc_flow *flow, 1495 struct mlx5_flow_spec *spec) 1496 { 1497 struct mlx5e_tc_mod_hdr_acts mod_acts = {}; 1498 struct mlx5e_mod_hdr_handle *mh = NULL; 1499 struct mlx5_flow_attr *slow_attr; 1500 struct mlx5_flow_handle *rule; 1501 bool fwd_and_modify_cap; 1502 u32 chain_mapping = 0; 1503 int err; 1504 1505 slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); 1506 if (!slow_attr) 1507 return ERR_PTR(-ENOMEM); 1508 1509 memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ); 1510 slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1511 slow_attr->esw_attr->split_count = 0; 1512 slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH; 1513 1514 fwd_and_modify_cap = MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table); 1515 if (!fwd_and_modify_cap) 1516 goto skip_restore; 1517 1518 err = mlx5_chains_get_chain_mapping(esw_chains(esw), flow->attr->chain, &chain_mapping); 1519 if (err) 1520 goto err_get_chain; 1521 1522 err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, 1523 MAPPED_OBJ_TO_REG, chain_mapping); 1524 if (err) 1525 goto err_reg_set; 1526 1527 mh = mlx5e_mod_hdr_attach(esw->dev, get_mod_hdr_table(flow->priv, flow), 1528 MLX5_FLOW_NAMESPACE_FDB, &mod_acts); 1529 if (IS_ERR(mh)) { 1530 err = PTR_ERR(mh); 1531 goto err_attach; 1532 } 1533 1534 slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1535 slow_attr->modify_hdr = mlx5e_mod_hdr_get(mh); 1536 1537 skip_restore: 1538 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); 1539 if (IS_ERR(rule)) { 1540 err = PTR_ERR(rule); 1541 goto err_offload; 1542 } 1543 1544 flow->attr->slow_mh = mh; 1545 flow->chain_mapping = chain_mapping; 1546 flow_flag_set(flow, SLOW); 1547 1548 mlx5e_mod_hdr_dealloc(&mod_acts); 1549 kfree(slow_attr); 1550 1551 return rule; 1552 1553 err_offload: 1554 if (fwd_and_modify_cap) 1555 mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), mh); 1556 err_attach: 1557 err_reg_set: 1558 if (fwd_and_modify_cap) 1559 mlx5_chains_put_chain_mapping(esw_chains(esw), chain_mapping); 1560 err_get_chain: 1561 mlx5e_mod_hdr_dealloc(&mod_acts); 1562 kfree(slow_attr); 1563 return ERR_PTR(err); 1564 } 1565 1566 void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, 1567 struct mlx5e_tc_flow *flow) 1568 { 1569 struct mlx5e_mod_hdr_handle *slow_mh = flow->attr->slow_mh; 1570 struct mlx5_flow_attr *slow_attr; 1571 1572 slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); 1573 if (!slow_attr) { 1574 mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n"); 1575 return; 1576 } 1577 1578 memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ); 1579 slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1580 slow_attr->esw_attr->split_count = 0; 1581 slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH; 1582 if (slow_mh) { 1583 slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1584 slow_attr->modify_hdr = mlx5e_mod_hdr_get(slow_mh); 1585 } 1586 mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); 1587 if (slow_mh) { 1588 mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), slow_mh); 1589 mlx5_chains_put_chain_mapping(esw_chains(esw), flow->chain_mapping); 1590 flow->chain_mapping = 0; 1591 flow->attr->slow_mh = NULL; 1592 } 1593 flow_flag_clear(flow, SLOW); 1594 kfree(slow_attr); 1595 } 1596 1597 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this 1598 * function. 1599 */ 1600 static void unready_flow_add(struct mlx5e_tc_flow *flow, 1601 struct list_head *unready_flows) 1602 { 1603 flow_flag_set(flow, NOT_READY); 1604 list_add_tail(&flow->unready, unready_flows); 1605 } 1606 1607 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this 1608 * function. 1609 */ 1610 static void unready_flow_del(struct mlx5e_tc_flow *flow) 1611 { 1612 list_del(&flow->unready); 1613 flow_flag_clear(flow, NOT_READY); 1614 } 1615 1616 static void add_unready_flow(struct mlx5e_tc_flow *flow) 1617 { 1618 struct mlx5_rep_uplink_priv *uplink_priv; 1619 struct mlx5e_rep_priv *rpriv; 1620 struct mlx5_eswitch *esw; 1621 1622 esw = flow->priv->mdev->priv.eswitch; 1623 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1624 uplink_priv = &rpriv->uplink_priv; 1625 1626 mutex_lock(&uplink_priv->unready_flows_lock); 1627 unready_flow_add(flow, &uplink_priv->unready_flows); 1628 mutex_unlock(&uplink_priv->unready_flows_lock); 1629 } 1630 1631 static void remove_unready_flow(struct mlx5e_tc_flow *flow) 1632 { 1633 struct mlx5_rep_uplink_priv *uplink_priv; 1634 struct mlx5e_rep_priv *rpriv; 1635 struct mlx5_eswitch *esw; 1636 1637 esw = flow->priv->mdev->priv.eswitch; 1638 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1639 uplink_priv = &rpriv->uplink_priv; 1640 1641 mutex_lock(&uplink_priv->unready_flows_lock); 1642 if (flow_flag_test(flow, NOT_READY)) 1643 unready_flow_del(flow); 1644 mutex_unlock(&uplink_priv->unready_flows_lock); 1645 } 1646 1647 bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev) 1648 { 1649 struct mlx5_core_dev *out_mdev, *route_mdev; 1650 struct mlx5e_priv *out_priv, *route_priv; 1651 1652 out_priv = netdev_priv(out_dev); 1653 out_mdev = out_priv->mdev; 1654 route_priv = netdev_priv(route_dev); 1655 route_mdev = route_priv->mdev; 1656 1657 if (out_mdev->coredev_type != MLX5_COREDEV_PF) 1658 return false; 1659 1660 if (route_mdev->coredev_type != MLX5_COREDEV_VF && 1661 route_mdev->coredev_type != MLX5_COREDEV_SF) 1662 return false; 1663 1664 return mlx5e_same_hw_devs(out_priv, route_priv); 1665 } 1666 1667 int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport) 1668 { 1669 struct mlx5e_priv *out_priv, *route_priv; 1670 struct mlx5_core_dev *route_mdev; 1671 struct mlx5_devcom *devcom; 1672 struct mlx5_eswitch *esw; 1673 u16 vhca_id; 1674 int err; 1675 int i; 1676 1677 out_priv = netdev_priv(out_dev); 1678 esw = out_priv->mdev->priv.eswitch; 1679 route_priv = netdev_priv(route_dev); 1680 route_mdev = route_priv->mdev; 1681 1682 vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id); 1683 err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); 1684 if (!err) 1685 return err; 1686 1687 if (!mlx5_lag_is_active(out_priv->mdev)) 1688 return err; 1689 1690 rcu_read_lock(); 1691 devcom = out_priv->mdev->priv.devcom; 1692 err = -ENODEV; 1693 mlx5_devcom_for_each_peer_entry_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS, 1694 esw, i) { 1695 err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); 1696 if (!err) 1697 break; 1698 } 1699 rcu_read_unlock(); 1700 1701 return err; 1702 } 1703 1704 static int 1705 verify_attr_actions(u32 actions, struct netlink_ext_ack *extack) 1706 { 1707 if (!(actions & 1708 (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { 1709 NL_SET_ERR_MSG_MOD(extack, "Rule must have at least one forward/drop action"); 1710 return -EOPNOTSUPP; 1711 } 1712 1713 if (!(~actions & 1714 (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { 1715 NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action"); 1716 return -EOPNOTSUPP; 1717 } 1718 1719 if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && 1720 actions & MLX5_FLOW_CONTEXT_ACTION_DROP) { 1721 NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported"); 1722 return -EOPNOTSUPP; 1723 } 1724 1725 return 0; 1726 } 1727 1728 static int 1729 post_process_attr(struct mlx5e_tc_flow *flow, 1730 struct mlx5_flow_attr *attr, 1731 struct netlink_ext_ack *extack) 1732 { 1733 bool vf_tun; 1734 int err = 0; 1735 1736 err = verify_attr_actions(attr->action, extack); 1737 if (err) 1738 goto err_out; 1739 1740 err = mlx5e_tc_tun_encap_dests_set(flow->priv, flow, attr, extack, &vf_tun); 1741 if (err) 1742 goto err_out; 1743 1744 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 1745 err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr); 1746 if (err) 1747 goto err_out; 1748 } 1749 1750 if (attr->branch_true && 1751 attr->branch_true->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 1752 err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr->branch_true); 1753 if (err) 1754 goto err_out; 1755 } 1756 1757 if (attr->branch_false && 1758 attr->branch_false->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 1759 err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr->branch_false); 1760 if (err) 1761 goto err_out; 1762 } 1763 1764 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { 1765 err = alloc_flow_attr_counter(get_flow_counter_dev(flow), attr); 1766 if (err) 1767 goto err_out; 1768 } 1769 1770 err_out: 1771 return err; 1772 } 1773 1774 static int 1775 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, 1776 struct mlx5e_tc_flow *flow, 1777 struct netlink_ext_ack *extack) 1778 { 1779 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1780 struct mlx5e_tc_flow_parse_attr *parse_attr; 1781 struct mlx5_flow_attr *attr = flow->attr; 1782 struct mlx5_esw_flow_attr *esw_attr; 1783 u32 max_prio, max_chain; 1784 int err = 0; 1785 1786 parse_attr = attr->parse_attr; 1787 esw_attr = attr->esw_attr; 1788 1789 /* We check chain range only for tc flows. 1790 * For ft flows, we checked attr->chain was originally 0 and set it to 1791 * FDB_FT_CHAIN which is outside tc range. 1792 * See mlx5e_rep_setup_ft_cb(). 1793 */ 1794 max_chain = mlx5_chains_get_chain_range(esw_chains(esw)); 1795 if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) { 1796 NL_SET_ERR_MSG_MOD(extack, 1797 "Requested chain is out of supported range"); 1798 err = -EOPNOTSUPP; 1799 goto err_out; 1800 } 1801 1802 max_prio = mlx5_chains_get_prio_range(esw_chains(esw)); 1803 if (attr->prio > max_prio) { 1804 NL_SET_ERR_MSG_MOD(extack, 1805 "Requested priority is out of supported range"); 1806 err = -EOPNOTSUPP; 1807 goto err_out; 1808 } 1809 1810 if (flow_flag_test(flow, TUN_RX)) { 1811 err = mlx5e_attach_decap_route(priv, flow); 1812 if (err) 1813 goto err_out; 1814 1815 if (!attr->chain && esw_attr->int_port && 1816 attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { 1817 /* If decap route device is internal port, change the 1818 * source vport value in reg_c0 back to uplink just in 1819 * case the rule performs goto chain > 0. If we have a miss 1820 * on chain > 0 we want the metadata regs to hold the 1821 * chain id so SW will resume handling of this packet 1822 * from the proper chain. 1823 */ 1824 u32 metadata = mlx5_eswitch_get_vport_metadata_for_set(esw, 1825 esw_attr->in_rep->vport); 1826 1827 err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts, 1828 MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG, 1829 metadata); 1830 if (err) 1831 goto err_out; 1832 1833 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1834 } 1835 } 1836 1837 if (flow_flag_test(flow, L3_TO_L2_DECAP)) { 1838 err = mlx5e_attach_decap(priv, flow, extack); 1839 if (err) 1840 goto err_out; 1841 } 1842 1843 if (netif_is_ovs_master(parse_attr->filter_dev)) { 1844 struct mlx5e_tc_int_port *int_port; 1845 1846 if (attr->chain) { 1847 NL_SET_ERR_MSG_MOD(extack, 1848 "Internal port rule is only supported on chain 0"); 1849 err = -EOPNOTSUPP; 1850 goto err_out; 1851 } 1852 1853 if (attr->dest_chain) { 1854 NL_SET_ERR_MSG_MOD(extack, 1855 "Internal port rule offload doesn't support goto action"); 1856 err = -EOPNOTSUPP; 1857 goto err_out; 1858 } 1859 1860 int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv), 1861 parse_attr->filter_dev->ifindex, 1862 flow_flag_test(flow, EGRESS) ? 1863 MLX5E_TC_INT_PORT_EGRESS : 1864 MLX5E_TC_INT_PORT_INGRESS); 1865 if (IS_ERR(int_port)) { 1866 err = PTR_ERR(int_port); 1867 goto err_out; 1868 } 1869 1870 esw_attr->int_port = int_port; 1871 } 1872 1873 err = post_process_attr(flow, attr, extack); 1874 if (err) 1875 goto err_out; 1876 1877 err = mlx5e_tc_act_stats_add_flow(get_act_stats_handle(priv), flow); 1878 if (err) 1879 goto err_out; 1880 1881 /* we get here if one of the following takes place: 1882 * (1) there's no error 1883 * (2) there's an encap action and we don't have valid neigh 1884 */ 1885 if (flow_flag_test(flow, SLOW)) 1886 flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec); 1887 else 1888 flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); 1889 1890 if (IS_ERR(flow->rule[0])) { 1891 err = PTR_ERR(flow->rule[0]); 1892 goto err_out; 1893 } 1894 flow_flag_set(flow, OFFLOADED); 1895 1896 return 0; 1897 1898 err_out: 1899 flow_flag_set(flow, FAILED); 1900 return err; 1901 } 1902 1903 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) 1904 { 1905 struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec; 1906 void *headers_v = MLX5_ADDR_OF(fte_match_param, 1907 spec->match_value, 1908 misc_parameters_3); 1909 u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3, 1910 headers_v, 1911 geneve_tlv_option_0_data); 1912 1913 return !!geneve_tlv_opt_0_data; 1914 } 1915 1916 static void free_branch_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) 1917 { 1918 if (!attr) 1919 return; 1920 1921 mlx5_free_flow_attr_actions(flow, attr); 1922 kvfree(attr->parse_attr); 1923 kfree(attr); 1924 } 1925 1926 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, 1927 struct mlx5e_tc_flow *flow) 1928 { 1929 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1930 struct mlx5_flow_attr *attr = flow->attr; 1931 struct mlx5_esw_flow_attr *esw_attr; 1932 1933 esw_attr = attr->esw_attr; 1934 mlx5e_put_flow_tunnel_id(flow); 1935 1936 remove_unready_flow(flow); 1937 1938 if (mlx5e_is_offloaded_flow(flow)) { 1939 if (flow_flag_test(flow, SLOW)) 1940 mlx5e_tc_unoffload_from_slow_path(esw, flow); 1941 else 1942 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); 1943 } 1944 complete_all(&flow->del_hw_done); 1945 1946 if (mlx5_flow_has_geneve_opt(flow)) 1947 mlx5_geneve_tlv_option_del(priv->mdev->geneve); 1948 1949 if (flow->decap_route) 1950 mlx5e_detach_decap_route(priv, flow); 1951 1952 mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr); 1953 1954 if (esw_attr->int_port) 1955 mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port); 1956 1957 if (esw_attr->dest_int_port) 1958 mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->dest_int_port); 1959 1960 if (flow_flag_test(flow, L3_TO_L2_DECAP)) 1961 mlx5e_detach_decap(priv, flow); 1962 1963 mlx5e_tc_act_stats_del_flow(get_act_stats_handle(priv), flow); 1964 1965 free_flow_post_acts(flow); 1966 mlx5_free_flow_attr_actions(flow, attr); 1967 1968 kvfree(attr->esw_attr->rx_tun_attr); 1969 kvfree(attr->parse_attr); 1970 kfree(flow->attr); 1971 } 1972 1973 struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) 1974 { 1975 struct mlx5_flow_attr *attr; 1976 1977 attr = list_first_entry(&flow->attrs, struct mlx5_flow_attr, list); 1978 return attr->counter; 1979 } 1980 1981 /* Iterate over tmp_list of flows attached to flow_list head. */ 1982 void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list) 1983 { 1984 struct mlx5e_tc_flow *flow, *tmp; 1985 1986 list_for_each_entry_safe(flow, tmp, flow_list, tmp_list) 1987 mlx5e_flow_put(priv, flow); 1988 } 1989 1990 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow, 1991 int peer_index) 1992 { 1993 struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; 1994 struct mlx5e_tc_flow *peer_flow; 1995 struct mlx5e_tc_flow *tmp; 1996 1997 if (!flow_flag_test(flow, ESWITCH) || 1998 !flow_flag_test(flow, DUP)) 1999 return; 2000 2001 mutex_lock(&esw->offloads.peer_mutex); 2002 list_del(&flow->peer[peer_index]); 2003 mutex_unlock(&esw->offloads.peer_mutex); 2004 2005 list_for_each_entry_safe(peer_flow, tmp, &flow->peer_flows, peer_flows) { 2006 if (peer_index != mlx5_get_dev_index(peer_flow->priv->mdev)) 2007 continue; 2008 if (refcount_dec_and_test(&peer_flow->refcnt)) { 2009 mlx5e_tc_del_fdb_flow(peer_flow->priv, peer_flow); 2010 list_del(&peer_flow->peer_flows); 2011 kfree(peer_flow); 2012 } 2013 } 2014 2015 if (list_empty(&flow->peer_flows)) 2016 flow_flag_clear(flow, DUP); 2017 } 2018 2019 static void mlx5e_tc_del_fdb_peers_flow(struct mlx5e_tc_flow *flow) 2020 { 2021 int i; 2022 2023 for (i = 0; i < MLX5_MAX_PORTS; i++) { 2024 if (i == mlx5_get_dev_index(flow->priv->mdev)) 2025 continue; 2026 mlx5e_tc_del_fdb_peer_flow(flow, i); 2027 } 2028 } 2029 2030 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, 2031 struct mlx5e_tc_flow *flow) 2032 { 2033 if (mlx5e_is_eswitch_flow(flow)) { 2034 struct mlx5_devcom *devcom = flow->priv->mdev->priv.devcom; 2035 2036 if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) { 2037 mlx5e_tc_del_fdb_flow(priv, flow); 2038 return; 2039 } 2040 2041 mlx5e_tc_del_fdb_peers_flow(flow); 2042 mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 2043 mlx5e_tc_del_fdb_flow(priv, flow); 2044 } else { 2045 mlx5e_tc_del_nic_flow(priv, flow); 2046 } 2047 } 2048 2049 static bool flow_requires_tunnel_mapping(u32 chain, struct flow_cls_offload *f) 2050 { 2051 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 2052 struct flow_action *flow_action = &rule->action; 2053 const struct flow_action_entry *act; 2054 int i; 2055 2056 if (chain) 2057 return false; 2058 2059 flow_action_for_each(i, act, flow_action) { 2060 switch (act->id) { 2061 case FLOW_ACTION_GOTO: 2062 return true; 2063 case FLOW_ACTION_SAMPLE: 2064 return true; 2065 default: 2066 continue; 2067 } 2068 } 2069 2070 return false; 2071 } 2072 2073 static int 2074 enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv, 2075 struct flow_dissector_key_enc_opts *opts, 2076 struct netlink_ext_ack *extack, 2077 bool *dont_care) 2078 { 2079 struct geneve_opt *opt; 2080 int off = 0; 2081 2082 *dont_care = true; 2083 2084 while (opts->len > off) { 2085 opt = (struct geneve_opt *)&opts->data[off]; 2086 2087 if (!(*dont_care) || opt->opt_class || opt->type || 2088 memchr_inv(opt->opt_data, 0, opt->length * 4)) { 2089 *dont_care = false; 2090 2091 if (opt->opt_class != htons(U16_MAX) || 2092 opt->type != U8_MAX) { 2093 NL_SET_ERR_MSG_MOD(extack, 2094 "Partial match of tunnel options in chain > 0 isn't supported"); 2095 netdev_warn(priv->netdev, 2096 "Partial match of tunnel options in chain > 0 isn't supported"); 2097 return -EOPNOTSUPP; 2098 } 2099 } 2100 2101 off += sizeof(struct geneve_opt) + opt->length * 4; 2102 } 2103 2104 return 0; 2105 } 2106 2107 #define COPY_DISSECTOR(rule, diss_key, dst)\ 2108 ({ \ 2109 struct flow_rule *__rule = (rule);\ 2110 typeof(dst) __dst = dst;\ 2111 \ 2112 memcpy(__dst,\ 2113 skb_flow_dissector_target(__rule->match.dissector,\ 2114 diss_key,\ 2115 __rule->match.key),\ 2116 sizeof(*__dst));\ 2117 }) 2118 2119 static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv, 2120 struct mlx5e_tc_flow *flow, 2121 struct flow_cls_offload *f, 2122 struct net_device *filter_dev) 2123 { 2124 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 2125 struct netlink_ext_ack *extack = f->common.extack; 2126 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; 2127 struct flow_match_enc_opts enc_opts_match; 2128 struct tunnel_match_enc_opts tun_enc_opts; 2129 struct mlx5_rep_uplink_priv *uplink_priv; 2130 struct mlx5_flow_attr *attr = flow->attr; 2131 struct mlx5e_rep_priv *uplink_rpriv; 2132 struct tunnel_match_key tunnel_key; 2133 bool enc_opts_is_dont_care = true; 2134 u32 tun_id, enc_opts_id = 0; 2135 struct mlx5_eswitch *esw; 2136 u32 value, mask; 2137 int err; 2138 2139 esw = priv->mdev->priv.eswitch; 2140 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 2141 uplink_priv = &uplink_rpriv->uplink_priv; 2142 2143 memset(&tunnel_key, 0, sizeof(tunnel_key)); 2144 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL, 2145 &tunnel_key.enc_control); 2146 if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) 2147 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, 2148 &tunnel_key.enc_ipv4); 2149 else 2150 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, 2151 &tunnel_key.enc_ipv6); 2152 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip); 2153 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS, 2154 &tunnel_key.enc_tp); 2155 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID, 2156 &tunnel_key.enc_key_id); 2157 tunnel_key.filter_ifindex = filter_dev->ifindex; 2158 2159 err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id); 2160 if (err) 2161 return err; 2162 2163 flow_rule_match_enc_opts(rule, &enc_opts_match); 2164 err = enc_opts_is_dont_care_or_full_match(priv, 2165 enc_opts_match.mask, 2166 extack, 2167 &enc_opts_is_dont_care); 2168 if (err) 2169 goto err_enc_opts; 2170 2171 if (!enc_opts_is_dont_care) { 2172 memset(&tun_enc_opts, 0, sizeof(tun_enc_opts)); 2173 memcpy(&tun_enc_opts.key, enc_opts_match.key, 2174 sizeof(*enc_opts_match.key)); 2175 memcpy(&tun_enc_opts.mask, enc_opts_match.mask, 2176 sizeof(*enc_opts_match.mask)); 2177 2178 err = mapping_add(uplink_priv->tunnel_enc_opts_mapping, 2179 &tun_enc_opts, &enc_opts_id); 2180 if (err) 2181 goto err_enc_opts; 2182 } 2183 2184 value = tun_id << ENC_OPTS_BITS | enc_opts_id; 2185 mask = enc_opts_id ? TUNNEL_ID_MASK : 2186 (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK); 2187 2188 if (attr->chain) { 2189 mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec, 2190 TUNNEL_TO_REG, value, mask); 2191 } else { 2192 mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; 2193 err = mlx5e_tc_match_to_reg_set(priv->mdev, 2194 mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB, 2195 TUNNEL_TO_REG, value); 2196 if (err) 2197 goto err_set; 2198 2199 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 2200 } 2201 2202 flow->attr->tunnel_id = value; 2203 return 0; 2204 2205 err_set: 2206 if (enc_opts_id) 2207 mapping_remove(uplink_priv->tunnel_enc_opts_mapping, 2208 enc_opts_id); 2209 err_enc_opts: 2210 mapping_remove(uplink_priv->tunnel_mapping, tun_id); 2211 return err; 2212 } 2213 2214 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow) 2215 { 2216 u32 enc_opts_id = flow->attr->tunnel_id & ENC_OPTS_BITS_MASK; 2217 u32 tun_id = flow->attr->tunnel_id >> ENC_OPTS_BITS; 2218 struct mlx5_rep_uplink_priv *uplink_priv; 2219 struct mlx5e_rep_priv *uplink_rpriv; 2220 struct mlx5_eswitch *esw; 2221 2222 esw = flow->priv->mdev->priv.eswitch; 2223 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 2224 uplink_priv = &uplink_rpriv->uplink_priv; 2225 2226 if (tun_id) 2227 mapping_remove(uplink_priv->tunnel_mapping, tun_id); 2228 if (enc_opts_id) 2229 mapping_remove(uplink_priv->tunnel_enc_opts_mapping, 2230 enc_opts_id); 2231 } 2232 2233 void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev, 2234 struct flow_match_basic *match, bool outer, 2235 void *headers_c, void *headers_v) 2236 { 2237 bool ip_version_cap; 2238 2239 ip_version_cap = outer ? 2240 MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 2241 ft_field_support.outer_ip_version) : 2242 MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 2243 ft_field_support.inner_ip_version); 2244 2245 if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) && 2246 (match->key->n_proto == htons(ETH_P_IP) || 2247 match->key->n_proto == htons(ETH_P_IPV6))) { 2248 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version); 2249 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version, 2250 match->key->n_proto == htons(ETH_P_IP) ? 4 : 6); 2251 } else { 2252 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, 2253 ntohs(match->mask->n_proto)); 2254 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, 2255 ntohs(match->key->n_proto)); 2256 } 2257 } 2258 2259 u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer) 2260 { 2261 void *headers_v; 2262 u16 ethertype; 2263 u8 ip_version; 2264 2265 if (outer) 2266 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); 2267 else 2268 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers); 2269 2270 ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version); 2271 /* Return ip_version converted from ethertype anyway */ 2272 if (!ip_version) { 2273 ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); 2274 if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP) 2275 ip_version = 4; 2276 else if (ethertype == ETH_P_IPV6) 2277 ip_version = 6; 2278 } 2279 return ip_version; 2280 } 2281 2282 /* Tunnel device follows RFC 6040, see include/net/inet_ecn.h. 2283 * And changes inner ip_ecn depending on inner and outer ip_ecn as follows: 2284 * +---------+----------------------------------------+ 2285 * |Arriving | Arriving Outer Header | 2286 * | Inner +---------+---------+---------+----------+ 2287 * | Header | Not-ECT | ECT(0) | ECT(1) | CE | 2288 * +---------+---------+---------+---------+----------+ 2289 * | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop> | 2290 * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE* | 2291 * | ECT(1) | ECT(1) | ECT(1) | ECT(1)* | CE* | 2292 * | CE | CE | CE | CE | CE | 2293 * +---------+---------+---------+---------+----------+ 2294 * 2295 * Tc matches on inner after decapsulation on tunnel device, but hw offload matches 2296 * the inner ip_ecn value before hardware decap action. 2297 * 2298 * Cells marked are changed from original inner packet ip_ecn value during decap, and 2299 * so matching those values on inner ip_ecn before decap will fail. 2300 * 2301 * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn, 2302 * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE, 2303 * and such we can drop the inner ip_ecn=CE match. 2304 */ 2305 2306 static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv, 2307 struct flow_cls_offload *f, 2308 bool *match_inner_ecn) 2309 { 2310 u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0; 2311 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 2312 struct netlink_ext_ack *extack = f->common.extack; 2313 struct flow_match_ip match; 2314 2315 *match_inner_ecn = true; 2316 2317 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { 2318 flow_rule_match_enc_ip(rule, &match); 2319 outer_ecn_key = match.key->tos & INET_ECN_MASK; 2320 outer_ecn_mask = match.mask->tos & INET_ECN_MASK; 2321 } 2322 2323 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { 2324 flow_rule_match_ip(rule, &match); 2325 inner_ecn_key = match.key->tos & INET_ECN_MASK; 2326 inner_ecn_mask = match.mask->tos & INET_ECN_MASK; 2327 } 2328 2329 if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) { 2330 NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported"); 2331 netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported"); 2332 return -EOPNOTSUPP; 2333 } 2334 2335 if (!outer_ecn_mask) { 2336 if (!inner_ecn_mask) 2337 return 0; 2338 2339 NL_SET_ERR_MSG_MOD(extack, 2340 "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported"); 2341 netdev_warn(priv->netdev, 2342 "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported"); 2343 return -EOPNOTSUPP; 2344 } 2345 2346 if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) { 2347 NL_SET_ERR_MSG_MOD(extack, 2348 "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported"); 2349 netdev_warn(priv->netdev, 2350 "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported"); 2351 return -EOPNOTSUPP; 2352 } 2353 2354 if (!inner_ecn_mask) 2355 return 0; 2356 2357 /* Both inner and outer have full mask on ecn */ 2358 2359 if (outer_ecn_key == INET_ECN_ECT_1) { 2360 /* inner ecn might change by DECAP action */ 2361 2362 NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported"); 2363 netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported"); 2364 return -EOPNOTSUPP; 2365 } 2366 2367 if (outer_ecn_key != INET_ECN_CE) 2368 return 0; 2369 2370 if (inner_ecn_key != INET_ECN_CE) { 2371 /* Can't happen in software, as packet ecn will be changed to CE after decap */ 2372 NL_SET_ERR_MSG_MOD(extack, 2373 "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported"); 2374 netdev_warn(priv->netdev, 2375 "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported"); 2376 return -EOPNOTSUPP; 2377 } 2378 2379 /* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase, 2380 * drop match on inner ecn 2381 */ 2382 *match_inner_ecn = false; 2383 2384 return 0; 2385 } 2386 2387 static int parse_tunnel_attr(struct mlx5e_priv *priv, 2388 struct mlx5e_tc_flow *flow, 2389 struct mlx5_flow_spec *spec, 2390 struct flow_cls_offload *f, 2391 struct net_device *filter_dev, 2392 u8 *match_level, 2393 bool *match_inner) 2394 { 2395 struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev); 2396 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 2397 struct netlink_ext_ack *extack = f->common.extack; 2398 bool needs_mapping, sets_mapping; 2399 int err; 2400 2401 if (!mlx5e_is_eswitch_flow(flow)) { 2402 NL_SET_ERR_MSG_MOD(extack, "Match on tunnel is not supported"); 2403 return -EOPNOTSUPP; 2404 } 2405 2406 needs_mapping = !!flow->attr->chain; 2407 sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f); 2408 *match_inner = !needs_mapping; 2409 2410 if ((needs_mapping || sets_mapping) && 2411 !mlx5_eswitch_reg_c1_loopback_enabled(esw)) { 2412 NL_SET_ERR_MSG_MOD(extack, 2413 "Chains on tunnel devices isn't supported without register loopback support"); 2414 netdev_warn(priv->netdev, 2415 "Chains on tunnel devices isn't supported without register loopback support"); 2416 return -EOPNOTSUPP; 2417 } 2418 2419 if (!flow->attr->chain) { 2420 err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, 2421 match_level); 2422 if (err) { 2423 NL_SET_ERR_MSG_MOD(extack, 2424 "Failed to parse tunnel attributes"); 2425 netdev_warn(priv->netdev, 2426 "Failed to parse tunnel attributes"); 2427 return err; 2428 } 2429 2430 /* With mpls over udp we decapsulate using packet reformat 2431 * object 2432 */ 2433 if (!netif_is_bareudp(filter_dev)) 2434 flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; 2435 err = mlx5e_tc_set_attr_rx_tun(flow, spec); 2436 if (err) 2437 return err; 2438 } else if (tunnel) { 2439 struct mlx5_flow_spec *tmp_spec; 2440 2441 tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL); 2442 if (!tmp_spec) { 2443 NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for tunnel tmp spec"); 2444 netdev_warn(priv->netdev, "Failed to allocate memory for tunnel tmp spec"); 2445 return -ENOMEM; 2446 } 2447 memcpy(tmp_spec, spec, sizeof(*tmp_spec)); 2448 2449 err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level); 2450 if (err) { 2451 kvfree(tmp_spec); 2452 NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes"); 2453 netdev_warn(priv->netdev, "Failed to parse tunnel attributes"); 2454 return err; 2455 } 2456 err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec); 2457 kvfree(tmp_spec); 2458 if (err) 2459 return err; 2460 } 2461 2462 if (!needs_mapping && !sets_mapping) 2463 return 0; 2464 2465 return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev); 2466 } 2467 2468 static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec) 2469 { 2470 return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 2471 inner_headers); 2472 } 2473 2474 static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec) 2475 { 2476 return MLX5_ADDR_OF(fte_match_param, spec->match_value, 2477 inner_headers); 2478 } 2479 2480 static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec) 2481 { 2482 return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 2483 outer_headers); 2484 } 2485 2486 static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec) 2487 { 2488 return MLX5_ADDR_OF(fte_match_param, spec->match_value, 2489 outer_headers); 2490 } 2491 2492 void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec) 2493 { 2494 return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? 2495 get_match_inner_headers_value(spec) : 2496 get_match_outer_headers_value(spec); 2497 } 2498 2499 void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec) 2500 { 2501 return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? 2502 get_match_inner_headers_criteria(spec) : 2503 get_match_outer_headers_criteria(spec); 2504 } 2505 2506 static int mlx5e_flower_parse_meta(struct net_device *filter_dev, 2507 struct flow_cls_offload *f) 2508 { 2509 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 2510 struct netlink_ext_ack *extack = f->common.extack; 2511 struct net_device *ingress_dev; 2512 struct flow_match_meta match; 2513 2514 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) 2515 return 0; 2516 2517 flow_rule_match_meta(rule, &match); 2518 2519 if (match.mask->l2_miss) { 2520 NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on \"l2_miss\""); 2521 return -EOPNOTSUPP; 2522 } 2523 2524 if (!match.mask->ingress_ifindex) 2525 return 0; 2526 2527 if (match.mask->ingress_ifindex != 0xFFFFFFFF) { 2528 NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask"); 2529 return -EOPNOTSUPP; 2530 } 2531 2532 ingress_dev = __dev_get_by_index(dev_net(filter_dev), 2533 match.key->ingress_ifindex); 2534 if (!ingress_dev) { 2535 NL_SET_ERR_MSG_MOD(extack, 2536 "Can't find the ingress port to match on"); 2537 return -ENOENT; 2538 } 2539 2540 if (ingress_dev != filter_dev) { 2541 NL_SET_ERR_MSG_MOD(extack, 2542 "Can't match on the ingress filter port"); 2543 return -EOPNOTSUPP; 2544 } 2545 2546 return 0; 2547 } 2548 2549 static bool skip_key_basic(struct net_device *filter_dev, 2550 struct flow_cls_offload *f) 2551 { 2552 /* When doing mpls over udp decap, the user needs to provide 2553 * MPLS_UC as the protocol in order to be able to match on mpls 2554 * label fields. However, the actual ethertype is IP so we want to 2555 * avoid matching on this, otherwise we'll fail the match. 2556 */ 2557 if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0) 2558 return true; 2559 2560 return false; 2561 } 2562 2563 static int __parse_cls_flower(struct mlx5e_priv *priv, 2564 struct mlx5e_tc_flow *flow, 2565 struct mlx5_flow_spec *spec, 2566 struct flow_cls_offload *f, 2567 struct net_device *filter_dev, 2568 u8 *inner_match_level, u8 *outer_match_level) 2569 { 2570 struct netlink_ext_ack *extack = f->common.extack; 2571 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 2572 outer_headers); 2573 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, 2574 outer_headers); 2575 void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 2576 misc_parameters); 2577 void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, 2578 misc_parameters); 2579 void *misc_c_3 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 2580 misc_parameters_3); 2581 void *misc_v_3 = MLX5_ADDR_OF(fte_match_param, spec->match_value, 2582 misc_parameters_3); 2583 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 2584 struct flow_dissector *dissector = rule->match.dissector; 2585 enum fs_flow_table_type fs_type; 2586 bool match_inner_ecn = true; 2587 u16 addr_type = 0; 2588 u8 ip_proto = 0; 2589 u8 *match_level; 2590 int err; 2591 2592 fs_type = mlx5e_is_eswitch_flow(flow) ? FS_FT_FDB : FS_FT_NIC_RX; 2593 match_level = outer_match_level; 2594 2595 if (dissector->used_keys & 2596 ~(BIT(FLOW_DISSECTOR_KEY_META) | 2597 BIT(FLOW_DISSECTOR_KEY_CONTROL) | 2598 BIT(FLOW_DISSECTOR_KEY_BASIC) | 2599 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | 2600 BIT(FLOW_DISSECTOR_KEY_VLAN) | 2601 BIT(FLOW_DISSECTOR_KEY_CVLAN) | 2602 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | 2603 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | 2604 BIT(FLOW_DISSECTOR_KEY_PORTS) | 2605 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | 2606 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | 2607 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | 2608 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | 2609 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | 2610 BIT(FLOW_DISSECTOR_KEY_TCP) | 2611 BIT(FLOW_DISSECTOR_KEY_IP) | 2612 BIT(FLOW_DISSECTOR_KEY_CT) | 2613 BIT(FLOW_DISSECTOR_KEY_ENC_IP) | 2614 BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | 2615 BIT(FLOW_DISSECTOR_KEY_ICMP) | 2616 BIT(FLOW_DISSECTOR_KEY_MPLS))) { 2617 NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); 2618 netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n", 2619 dissector->used_keys); 2620 return -EOPNOTSUPP; 2621 } 2622 2623 if (mlx5e_get_tc_tun(filter_dev)) { 2624 bool match_inner = false; 2625 2626 err = parse_tunnel_attr(priv, flow, spec, f, filter_dev, 2627 outer_match_level, &match_inner); 2628 if (err) 2629 return err; 2630 2631 if (match_inner) { 2632 /* header pointers should point to the inner headers 2633 * if the packet was decapsulated already. 2634 * outer headers are set by parse_tunnel_attr. 2635 */ 2636 match_level = inner_match_level; 2637 headers_c = get_match_inner_headers_criteria(spec); 2638 headers_v = get_match_inner_headers_value(spec); 2639 } 2640 2641 err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn); 2642 if (err) 2643 return err; 2644 } 2645 2646 err = mlx5e_flower_parse_meta(filter_dev, f); 2647 if (err) 2648 return err; 2649 2650 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) && 2651 !skip_key_basic(filter_dev, f)) { 2652 struct flow_match_basic match; 2653 2654 flow_rule_match_basic(rule, &match); 2655 mlx5e_tc_set_ethertype(priv->mdev, &match, 2656 match_level == outer_match_level, 2657 headers_c, headers_v); 2658 2659 if (match.mask->n_proto) 2660 *match_level = MLX5_MATCH_L2; 2661 } 2662 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) || 2663 is_vlan_dev(filter_dev)) { 2664 struct flow_dissector_key_vlan filter_dev_mask; 2665 struct flow_dissector_key_vlan filter_dev_key; 2666 struct flow_match_vlan match; 2667 2668 if (is_vlan_dev(filter_dev)) { 2669 match.key = &filter_dev_key; 2670 match.key->vlan_id = vlan_dev_vlan_id(filter_dev); 2671 match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev); 2672 match.key->vlan_priority = 0; 2673 match.mask = &filter_dev_mask; 2674 memset(match.mask, 0xff, sizeof(*match.mask)); 2675 match.mask->vlan_priority = 0; 2676 } else { 2677 flow_rule_match_vlan(rule, &match); 2678 } 2679 if (match.mask->vlan_id || 2680 match.mask->vlan_priority || 2681 match.mask->vlan_tpid) { 2682 if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { 2683 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2684 svlan_tag, 1); 2685 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2686 svlan_tag, 1); 2687 } else { 2688 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2689 cvlan_tag, 1); 2690 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2691 cvlan_tag, 1); 2692 } 2693 2694 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, 2695 match.mask->vlan_id); 2696 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, 2697 match.key->vlan_id); 2698 2699 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, 2700 match.mask->vlan_priority); 2701 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, 2702 match.key->vlan_priority); 2703 2704 *match_level = MLX5_MATCH_L2; 2705 2706 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) && 2707 match.mask->vlan_eth_type && 2708 MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, 2709 ft_field_support.outer_second_vid, 2710 fs_type)) { 2711 MLX5_SET(fte_match_set_misc, misc_c, 2712 outer_second_cvlan_tag, 1); 2713 spec->match_criteria_enable |= 2714 MLX5_MATCH_MISC_PARAMETERS; 2715 } 2716 } 2717 } else if (*match_level != MLX5_MATCH_NONE) { 2718 /* cvlan_tag enabled in match criteria and 2719 * disabled in match value means both S & C tags 2720 * don't exist (untagged of both) 2721 */ 2722 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); 2723 *match_level = MLX5_MATCH_L2; 2724 } 2725 2726 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { 2727 struct flow_match_vlan match; 2728 2729 flow_rule_match_cvlan(rule, &match); 2730 if (match.mask->vlan_id || 2731 match.mask->vlan_priority || 2732 match.mask->vlan_tpid) { 2733 if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ft_field_support.outer_second_vid, 2734 fs_type)) { 2735 NL_SET_ERR_MSG_MOD(extack, 2736 "Matching on CVLAN is not supported"); 2737 return -EOPNOTSUPP; 2738 } 2739 2740 if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { 2741 MLX5_SET(fte_match_set_misc, misc_c, 2742 outer_second_svlan_tag, 1); 2743 MLX5_SET(fte_match_set_misc, misc_v, 2744 outer_second_svlan_tag, 1); 2745 } else { 2746 MLX5_SET(fte_match_set_misc, misc_c, 2747 outer_second_cvlan_tag, 1); 2748 MLX5_SET(fte_match_set_misc, misc_v, 2749 outer_second_cvlan_tag, 1); 2750 } 2751 2752 MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid, 2753 match.mask->vlan_id); 2754 MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid, 2755 match.key->vlan_id); 2756 MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio, 2757 match.mask->vlan_priority); 2758 MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio, 2759 match.key->vlan_priority); 2760 2761 *match_level = MLX5_MATCH_L2; 2762 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; 2763 } 2764 } 2765 2766 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { 2767 struct flow_match_eth_addrs match; 2768 2769 flow_rule_match_eth_addrs(rule, &match); 2770 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2771 dmac_47_16), 2772 match.mask->dst); 2773 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2774 dmac_47_16), 2775 match.key->dst); 2776 2777 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2778 smac_47_16), 2779 match.mask->src); 2780 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2781 smac_47_16), 2782 match.key->src); 2783 2784 if (!is_zero_ether_addr(match.mask->src) || 2785 !is_zero_ether_addr(match.mask->dst)) 2786 *match_level = MLX5_MATCH_L2; 2787 } 2788 2789 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { 2790 struct flow_match_control match; 2791 2792 flow_rule_match_control(rule, &match); 2793 addr_type = match.key->addr_type; 2794 2795 /* the HW doesn't support frag first/later */ 2796 if (match.mask->flags & FLOW_DIS_FIRST_FRAG) { 2797 NL_SET_ERR_MSG_MOD(extack, "Match on frag first/later is not supported"); 2798 return -EOPNOTSUPP; 2799 } 2800 2801 if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) { 2802 MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); 2803 MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 2804 match.key->flags & FLOW_DIS_IS_FRAGMENT); 2805 2806 /* the HW doesn't need L3 inline to match on frag=no */ 2807 if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT)) 2808 *match_level = MLX5_MATCH_L2; 2809 /* *** L2 attributes parsing up to here *** */ 2810 else 2811 *match_level = MLX5_MATCH_L3; 2812 } 2813 } 2814 2815 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { 2816 struct flow_match_basic match; 2817 2818 flow_rule_match_basic(rule, &match); 2819 ip_proto = match.key->ip_proto; 2820 2821 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 2822 match.mask->ip_proto); 2823 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, 2824 match.key->ip_proto); 2825 2826 if (match.mask->ip_proto) 2827 *match_level = MLX5_MATCH_L3; 2828 } 2829 2830 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 2831 struct flow_match_ipv4_addrs match; 2832 2833 flow_rule_match_ipv4_addrs(rule, &match); 2834 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2835 src_ipv4_src_ipv6.ipv4_layout.ipv4), 2836 &match.mask->src, sizeof(match.mask->src)); 2837 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2838 src_ipv4_src_ipv6.ipv4_layout.ipv4), 2839 &match.key->src, sizeof(match.key->src)); 2840 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2841 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 2842 &match.mask->dst, sizeof(match.mask->dst)); 2843 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2844 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 2845 &match.key->dst, sizeof(match.key->dst)); 2846 2847 if (match.mask->src || match.mask->dst) 2848 *match_level = MLX5_MATCH_L3; 2849 } 2850 2851 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 2852 struct flow_match_ipv6_addrs match; 2853 2854 flow_rule_match_ipv6_addrs(rule, &match); 2855 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2856 src_ipv4_src_ipv6.ipv6_layout.ipv6), 2857 &match.mask->src, sizeof(match.mask->src)); 2858 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2859 src_ipv4_src_ipv6.ipv6_layout.ipv6), 2860 &match.key->src, sizeof(match.key->src)); 2861 2862 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2863 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 2864 &match.mask->dst, sizeof(match.mask->dst)); 2865 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2866 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 2867 &match.key->dst, sizeof(match.key->dst)); 2868 2869 if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY || 2870 ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY) 2871 *match_level = MLX5_MATCH_L3; 2872 } 2873 2874 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { 2875 struct flow_match_ip match; 2876 2877 flow_rule_match_ip(rule, &match); 2878 if (match_inner_ecn) { 2879 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, 2880 match.mask->tos & 0x3); 2881 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, 2882 match.key->tos & 0x3); 2883 } 2884 2885 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, 2886 match.mask->tos >> 2); 2887 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, 2888 match.key->tos >> 2); 2889 2890 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, 2891 match.mask->ttl); 2892 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, 2893 match.key->ttl); 2894 2895 if (match.mask->ttl && 2896 !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, 2897 ft_field_support.outer_ipv4_ttl)) { 2898 NL_SET_ERR_MSG_MOD(extack, 2899 "Matching on TTL is not supported"); 2900 return -EOPNOTSUPP; 2901 } 2902 2903 if (match.mask->tos || match.mask->ttl) 2904 *match_level = MLX5_MATCH_L3; 2905 } 2906 2907 /* *** L3 attributes parsing up to here *** */ 2908 2909 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { 2910 struct flow_match_ports match; 2911 2912 flow_rule_match_ports(rule, &match); 2913 switch (ip_proto) { 2914 case IPPROTO_TCP: 2915 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2916 tcp_sport, ntohs(match.mask->src)); 2917 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2918 tcp_sport, ntohs(match.key->src)); 2919 2920 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2921 tcp_dport, ntohs(match.mask->dst)); 2922 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2923 tcp_dport, ntohs(match.key->dst)); 2924 break; 2925 2926 case IPPROTO_UDP: 2927 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2928 udp_sport, ntohs(match.mask->src)); 2929 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2930 udp_sport, ntohs(match.key->src)); 2931 2932 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2933 udp_dport, ntohs(match.mask->dst)); 2934 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2935 udp_dport, ntohs(match.key->dst)); 2936 break; 2937 default: 2938 NL_SET_ERR_MSG_MOD(extack, 2939 "Only UDP and TCP transports are supported for L4 matching"); 2940 netdev_err(priv->netdev, 2941 "Only UDP and TCP transport are supported\n"); 2942 return -EINVAL; 2943 } 2944 2945 if (match.mask->src || match.mask->dst) 2946 *match_level = MLX5_MATCH_L4; 2947 } 2948 2949 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { 2950 struct flow_match_tcp match; 2951 2952 flow_rule_match_tcp(rule, &match); 2953 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, 2954 ntohs(match.mask->flags)); 2955 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, 2956 ntohs(match.key->flags)); 2957 2958 if (match.mask->flags) 2959 *match_level = MLX5_MATCH_L4; 2960 } 2961 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) { 2962 struct flow_match_icmp match; 2963 2964 flow_rule_match_icmp(rule, &match); 2965 switch (ip_proto) { 2966 case IPPROTO_ICMP: 2967 if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & 2968 MLX5_FLEX_PROTO_ICMP)) { 2969 NL_SET_ERR_MSG_MOD(extack, 2970 "Match on Flex protocols for ICMP is not supported"); 2971 return -EOPNOTSUPP; 2972 } 2973 MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type, 2974 match.mask->type); 2975 MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type, 2976 match.key->type); 2977 MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_code, 2978 match.mask->code); 2979 MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_code, 2980 match.key->code); 2981 break; 2982 case IPPROTO_ICMPV6: 2983 if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & 2984 MLX5_FLEX_PROTO_ICMPV6)) { 2985 NL_SET_ERR_MSG_MOD(extack, 2986 "Match on Flex protocols for ICMPV6 is not supported"); 2987 return -EOPNOTSUPP; 2988 } 2989 MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type, 2990 match.mask->type); 2991 MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type, 2992 match.key->type); 2993 MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_code, 2994 match.mask->code); 2995 MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_code, 2996 match.key->code); 2997 break; 2998 default: 2999 NL_SET_ERR_MSG_MOD(extack, 3000 "Code and type matching only with ICMP and ICMPv6"); 3001 netdev_err(priv->netdev, 3002 "Code and type matching only with ICMP and ICMPv6\n"); 3003 return -EINVAL; 3004 } 3005 if (match.mask->code || match.mask->type) { 3006 *match_level = MLX5_MATCH_L4; 3007 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3; 3008 } 3009 } 3010 /* Currently supported only for MPLS over UDP */ 3011 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) && 3012 !netif_is_bareudp(filter_dev)) { 3013 NL_SET_ERR_MSG_MOD(extack, 3014 "Matching on MPLS is supported only for MPLS over UDP"); 3015 netdev_err(priv->netdev, 3016 "Matching on MPLS is supported only for MPLS over UDP\n"); 3017 return -EOPNOTSUPP; 3018 } 3019 3020 return 0; 3021 } 3022 3023 static int parse_cls_flower(struct mlx5e_priv *priv, 3024 struct mlx5e_tc_flow *flow, 3025 struct mlx5_flow_spec *spec, 3026 struct flow_cls_offload *f, 3027 struct net_device *filter_dev) 3028 { 3029 u8 inner_match_level, outer_match_level, non_tunnel_match_level; 3030 struct netlink_ext_ack *extack = f->common.extack; 3031 struct mlx5_core_dev *dev = priv->mdev; 3032 struct mlx5_eswitch *esw = dev->priv.eswitch; 3033 struct mlx5e_rep_priv *rpriv = priv->ppriv; 3034 struct mlx5_eswitch_rep *rep; 3035 bool is_eswitch_flow; 3036 int err; 3037 3038 inner_match_level = MLX5_MATCH_NONE; 3039 outer_match_level = MLX5_MATCH_NONE; 3040 3041 err = __parse_cls_flower(priv, flow, spec, f, filter_dev, 3042 &inner_match_level, &outer_match_level); 3043 non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ? 3044 outer_match_level : inner_match_level; 3045 3046 is_eswitch_flow = mlx5e_is_eswitch_flow(flow); 3047 if (!err && is_eswitch_flow) { 3048 rep = rpriv->rep; 3049 if (rep->vport != MLX5_VPORT_UPLINK && 3050 (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && 3051 esw->offloads.inline_mode < non_tunnel_match_level)) { 3052 NL_SET_ERR_MSG_MOD(extack, 3053 "Flow is not offloaded due to min inline setting"); 3054 netdev_warn(priv->netdev, 3055 "Flow is not offloaded due to min inline setting, required %d actual %d\n", 3056 non_tunnel_match_level, esw->offloads.inline_mode); 3057 return -EOPNOTSUPP; 3058 } 3059 } 3060 3061 flow->attr->inner_match_level = inner_match_level; 3062 flow->attr->outer_match_level = outer_match_level; 3063 3064 3065 return err; 3066 } 3067 3068 struct mlx5_fields { 3069 u8 field; 3070 u8 field_bsize; 3071 u32 field_mask; 3072 u32 offset; 3073 u32 match_offset; 3074 }; 3075 3076 #define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \ 3077 {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \ 3078 offsetof(struct pedit_headers, field) + (off), \ 3079 MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)} 3080 3081 /* masked values are the same and there are no rewrites that do not have a 3082 * match. 3083 */ 3084 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \ 3085 type matchmaskx = *(type *)(matchmaskp); \ 3086 type matchvalx = *(type *)(matchvalp); \ 3087 type maskx = *(type *)(maskp); \ 3088 type valx = *(type *)(valp); \ 3089 \ 3090 (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \ 3091 matchmaskx)); \ 3092 }) 3093 3094 static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp, 3095 void *matchmaskp, u8 bsize) 3096 { 3097 bool same = false; 3098 3099 switch (bsize) { 3100 case 8: 3101 same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp); 3102 break; 3103 case 16: 3104 same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp); 3105 break; 3106 case 32: 3107 same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp); 3108 break; 3109 } 3110 3111 return same; 3112 } 3113 3114 static struct mlx5_fields fields[] = { 3115 OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16), 3116 OFFLOAD(DMAC_15_0, 16, U16_MAX, eth.h_dest[4], 0, dmac_15_0), 3117 OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16), 3118 OFFLOAD(SMAC_15_0, 16, U16_MAX, eth.h_source[4], 0, smac_15_0), 3119 OFFLOAD(ETHERTYPE, 16, U16_MAX, eth.h_proto, 0, ethertype), 3120 OFFLOAD(FIRST_VID, 16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid), 3121 3122 OFFLOAD(IP_DSCP, 8, 0xfc, ip4.tos, 0, ip_dscp), 3123 OFFLOAD(IP_TTL, 8, U8_MAX, ip4.ttl, 0, ttl_hoplimit), 3124 OFFLOAD(SIPV4, 32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4), 3125 OFFLOAD(DIPV4, 32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 3126 3127 OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0, 3128 src_ipv4_src_ipv6.ipv6_layout.ipv6[0]), 3129 OFFLOAD(SIPV6_95_64, 32, U32_MAX, ip6.saddr.s6_addr32[1], 0, 3130 src_ipv4_src_ipv6.ipv6_layout.ipv6[4]), 3131 OFFLOAD(SIPV6_63_32, 32, U32_MAX, ip6.saddr.s6_addr32[2], 0, 3132 src_ipv4_src_ipv6.ipv6_layout.ipv6[8]), 3133 OFFLOAD(SIPV6_31_0, 32, U32_MAX, ip6.saddr.s6_addr32[3], 0, 3134 src_ipv4_src_ipv6.ipv6_layout.ipv6[12]), 3135 OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0, 3136 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]), 3137 OFFLOAD(DIPV6_95_64, 32, U32_MAX, ip6.daddr.s6_addr32[1], 0, 3138 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]), 3139 OFFLOAD(DIPV6_63_32, 32, U32_MAX, ip6.daddr.s6_addr32[2], 0, 3140 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]), 3141 OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0, 3142 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]), 3143 OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit), 3144 OFFLOAD(IP_DSCP, 16, 0xc00f, ip6, 0, ip_dscp), 3145 3146 OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport), 3147 OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport), 3148 /* in linux iphdr tcp_flags is 8 bits long */ 3149 OFFLOAD(TCP_FLAGS, 8, U8_MAX, tcp.ack_seq, 5, tcp_flags), 3150 3151 OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport), 3152 OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport), 3153 }; 3154 3155 static unsigned long mask_to_le(unsigned long mask, int size) 3156 { 3157 __be32 mask_be32; 3158 __be16 mask_be16; 3159 3160 if (size == 32) { 3161 mask_be32 = (__force __be32)(mask); 3162 mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32)); 3163 } else if (size == 16) { 3164 mask_be32 = (__force __be32)(mask); 3165 mask_be16 = *(__be16 *)&mask_be32; 3166 mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16)); 3167 } 3168 3169 return mask; 3170 } 3171 3172 static int offload_pedit_fields(struct mlx5e_priv *priv, 3173 int namespace, 3174 struct mlx5e_tc_flow_parse_attr *parse_attr, 3175 u32 *action_flags, 3176 struct netlink_ext_ack *extack) 3177 { 3178 struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; 3179 struct pedit_headers_action *hdrs = parse_attr->hdrs; 3180 void *headers_c, *headers_v, *action, *vals_p; 3181 u32 *s_masks_p, *a_masks_p, s_mask, a_mask; 3182 struct mlx5e_tc_mod_hdr_acts *mod_acts; 3183 unsigned long mask, field_mask; 3184 int i, first, last, next_z; 3185 struct mlx5_fields *f; 3186 u8 cmd; 3187 3188 mod_acts = &parse_attr->mod_hdr_acts; 3189 headers_c = mlx5e_get_match_headers_criteria(*action_flags, &parse_attr->spec); 3190 headers_v = mlx5e_get_match_headers_value(*action_flags, &parse_attr->spec); 3191 3192 set_masks = &hdrs[0].masks; 3193 add_masks = &hdrs[1].masks; 3194 set_vals = &hdrs[0].vals; 3195 add_vals = &hdrs[1].vals; 3196 3197 for (i = 0; i < ARRAY_SIZE(fields); i++) { 3198 bool skip; 3199 3200 f = &fields[i]; 3201 /* avoid seeing bits set from previous iterations */ 3202 s_mask = 0; 3203 a_mask = 0; 3204 3205 s_masks_p = (void *)set_masks + f->offset; 3206 a_masks_p = (void *)add_masks + f->offset; 3207 3208 s_mask = *s_masks_p & f->field_mask; 3209 a_mask = *a_masks_p & f->field_mask; 3210 3211 if (!s_mask && !a_mask) /* nothing to offload here */ 3212 continue; 3213 3214 if (s_mask && a_mask) { 3215 NL_SET_ERR_MSG_MOD(extack, 3216 "can't set and add to the same HW field"); 3217 netdev_warn(priv->netdev, 3218 "mlx5: can't set and add to the same HW field (%x)\n", 3219 f->field); 3220 return -EOPNOTSUPP; 3221 } 3222 3223 skip = false; 3224 if (s_mask) { 3225 void *match_mask = headers_c + f->match_offset; 3226 void *match_val = headers_v + f->match_offset; 3227 3228 cmd = MLX5_ACTION_TYPE_SET; 3229 mask = s_mask; 3230 vals_p = (void *)set_vals + f->offset; 3231 /* don't rewrite if we have a match on the same value */ 3232 if (cmp_val_mask(vals_p, s_masks_p, match_val, 3233 match_mask, f->field_bsize)) 3234 skip = true; 3235 /* clear to denote we consumed this field */ 3236 *s_masks_p &= ~f->field_mask; 3237 } else { 3238 cmd = MLX5_ACTION_TYPE_ADD; 3239 mask = a_mask; 3240 vals_p = (void *)add_vals + f->offset; 3241 /* add 0 is no change */ 3242 if ((*(u32 *)vals_p & f->field_mask) == 0) 3243 skip = true; 3244 /* clear to denote we consumed this field */ 3245 *a_masks_p &= ~f->field_mask; 3246 } 3247 if (skip) 3248 continue; 3249 3250 mask = mask_to_le(mask, f->field_bsize); 3251 3252 first = find_first_bit(&mask, f->field_bsize); 3253 next_z = find_next_zero_bit(&mask, f->field_bsize, first); 3254 last = find_last_bit(&mask, f->field_bsize); 3255 if (first < next_z && next_z < last) { 3256 NL_SET_ERR_MSG_MOD(extack, 3257 "rewrite of few sub-fields isn't supported"); 3258 netdev_warn(priv->netdev, 3259 "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n", 3260 mask); 3261 return -EOPNOTSUPP; 3262 } 3263 3264 action = mlx5e_mod_hdr_alloc(priv->mdev, namespace, mod_acts); 3265 if (IS_ERR(action)) { 3266 NL_SET_ERR_MSG_MOD(extack, 3267 "too many pedit actions, can't offload"); 3268 mlx5_core_warn(priv->mdev, 3269 "mlx5: parsed %d pedit actions, can't do more\n", 3270 mod_acts->num_actions); 3271 return PTR_ERR(action); 3272 } 3273 3274 MLX5_SET(set_action_in, action, action_type, cmd); 3275 MLX5_SET(set_action_in, action, field, f->field); 3276 3277 if (cmd == MLX5_ACTION_TYPE_SET) { 3278 int start; 3279 3280 field_mask = mask_to_le(f->field_mask, f->field_bsize); 3281 3282 /* if field is bit sized it can start not from first bit */ 3283 start = find_first_bit(&field_mask, f->field_bsize); 3284 3285 MLX5_SET(set_action_in, action, offset, first - start); 3286 /* length is num of bits to be written, zero means length of 32 */ 3287 MLX5_SET(set_action_in, action, length, (last - first + 1)); 3288 } 3289 3290 if (f->field_bsize == 32) 3291 MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first); 3292 else if (f->field_bsize == 16) 3293 MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first); 3294 else if (f->field_bsize == 8) 3295 MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first); 3296 3297 ++mod_acts->num_actions; 3298 } 3299 3300 return 0; 3301 } 3302 3303 static const struct pedit_headers zero_masks = {}; 3304 3305 static int verify_offload_pedit_fields(struct mlx5e_priv *priv, 3306 struct mlx5e_tc_flow_parse_attr *parse_attr, 3307 struct netlink_ext_ack *extack) 3308 { 3309 struct pedit_headers *cmd_masks; 3310 u8 cmd; 3311 3312 for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) { 3313 cmd_masks = &parse_attr->hdrs[cmd].masks; 3314 if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) { 3315 NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field"); 3316 netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd); 3317 print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS, 3318 16, 1, cmd_masks, sizeof(zero_masks), true); 3319 return -EOPNOTSUPP; 3320 } 3321 } 3322 3323 return 0; 3324 } 3325 3326 static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, 3327 struct mlx5e_tc_flow_parse_attr *parse_attr, 3328 u32 *action_flags, 3329 struct netlink_ext_ack *extack) 3330 { 3331 int err; 3332 3333 err = offload_pedit_fields(priv, namespace, parse_attr, action_flags, extack); 3334 if (err) 3335 goto out_dealloc_parsed_actions; 3336 3337 err = verify_offload_pedit_fields(priv, parse_attr, extack); 3338 if (err) 3339 goto out_dealloc_parsed_actions; 3340 3341 return 0; 3342 3343 out_dealloc_parsed_actions: 3344 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); 3345 return err; 3346 } 3347 3348 struct ip_ttl_word { 3349 __u8 ttl; 3350 __u8 protocol; 3351 __sum16 check; 3352 }; 3353 3354 struct ipv6_hoplimit_word { 3355 __be16 payload_len; 3356 __u8 nexthdr; 3357 __u8 hop_limit; 3358 }; 3359 3360 static bool 3361 is_flow_action_modify_ip_header(struct flow_action *flow_action) 3362 { 3363 const struct flow_action_entry *act; 3364 u32 mask, offset; 3365 u8 htype; 3366 int i; 3367 3368 /* For IPv4 & IPv6 header check 4 byte word, 3369 * to determine that modified fields 3370 * are NOT ttl & hop_limit only. 3371 */ 3372 flow_action_for_each(i, act, flow_action) { 3373 if (act->id != FLOW_ACTION_MANGLE && 3374 act->id != FLOW_ACTION_ADD) 3375 continue; 3376 3377 htype = act->mangle.htype; 3378 offset = act->mangle.offset; 3379 mask = ~act->mangle.mask; 3380 3381 if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) { 3382 struct ip_ttl_word *ttl_word = 3383 (struct ip_ttl_word *)&mask; 3384 3385 if (offset != offsetof(struct iphdr, ttl) || 3386 ttl_word->protocol || 3387 ttl_word->check) 3388 return true; 3389 } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) { 3390 struct ipv6_hoplimit_word *hoplimit_word = 3391 (struct ipv6_hoplimit_word *)&mask; 3392 3393 if (offset != offsetof(struct ipv6hdr, payload_len) || 3394 hoplimit_word->payload_len || 3395 hoplimit_word->nexthdr) 3396 return true; 3397 } 3398 } 3399 3400 return false; 3401 } 3402 3403 static bool modify_header_match_supported(struct mlx5e_priv *priv, 3404 struct mlx5_flow_spec *spec, 3405 struct flow_action *flow_action, 3406 u32 actions, 3407 struct netlink_ext_ack *extack) 3408 { 3409 bool modify_ip_header; 3410 void *headers_c; 3411 void *headers_v; 3412 u16 ethertype; 3413 u8 ip_proto; 3414 3415 headers_c = mlx5e_get_match_headers_criteria(actions, spec); 3416 headers_v = mlx5e_get_match_headers_value(actions, spec); 3417 ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); 3418 3419 /* for non-IP we only re-write MACs, so we're okay */ 3420 if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 && 3421 ethertype != ETH_P_IP && ethertype != ETH_P_IPV6) 3422 goto out_ok; 3423 3424 modify_ip_header = is_flow_action_modify_ip_header(flow_action); 3425 ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); 3426 if (modify_ip_header && ip_proto != IPPROTO_TCP && 3427 ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) { 3428 NL_SET_ERR_MSG_MOD(extack, 3429 "can't offload re-write of non TCP/UDP"); 3430 netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n", 3431 ip_proto); 3432 return false; 3433 } 3434 3435 out_ok: 3436 return true; 3437 } 3438 3439 static bool 3440 actions_match_supported_fdb(struct mlx5e_priv *priv, 3441 struct mlx5e_tc_flow *flow, 3442 struct netlink_ext_ack *extack) 3443 { 3444 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; 3445 3446 if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { 3447 NL_SET_ERR_MSG_MOD(extack, 3448 "current firmware doesn't support split rule for port mirroring"); 3449 netdev_warn_once(priv->netdev, 3450 "current firmware doesn't support split rule for port mirroring\n"); 3451 return false; 3452 } 3453 3454 return true; 3455 } 3456 3457 static bool 3458 actions_match_supported(struct mlx5e_priv *priv, 3459 struct flow_action *flow_action, 3460 u32 actions, 3461 struct mlx5e_tc_flow_parse_attr *parse_attr, 3462 struct mlx5e_tc_flow *flow, 3463 struct netlink_ext_ack *extack) 3464 { 3465 if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && 3466 !modify_header_match_supported(priv, &parse_attr->spec, flow_action, actions, 3467 extack)) 3468 return false; 3469 3470 if (mlx5e_is_eswitch_flow(flow) && 3471 !actions_match_supported_fdb(priv, flow, extack)) 3472 return false; 3473 3474 return true; 3475 } 3476 3477 static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) 3478 { 3479 return priv->mdev == peer_priv->mdev; 3480 } 3481 3482 bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) 3483 { 3484 struct mlx5_core_dev *fmdev, *pmdev; 3485 u64 fsystem_guid, psystem_guid; 3486 3487 fmdev = priv->mdev; 3488 pmdev = peer_priv->mdev; 3489 3490 fsystem_guid = mlx5_query_nic_system_image_guid(fmdev); 3491 psystem_guid = mlx5_query_nic_system_image_guid(pmdev); 3492 3493 return (fsystem_guid == psystem_guid); 3494 } 3495 3496 static int 3497 actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv, 3498 struct mlx5e_tc_flow *flow, 3499 struct mlx5_flow_attr *attr, 3500 struct netlink_ext_ack *extack) 3501 { 3502 struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; 3503 struct pedit_headers_action *hdrs = parse_attr->hdrs; 3504 enum mlx5_flow_namespace_type ns_type; 3505 int err; 3506 3507 if (!hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits && 3508 !hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) 3509 return 0; 3510 3511 ns_type = mlx5e_get_flow_namespace(flow); 3512 3513 err = alloc_tc_pedit_action(priv, ns_type, parse_attr, &attr->action, extack); 3514 if (err) 3515 return err; 3516 3517 if (parse_attr->mod_hdr_acts.num_actions > 0) 3518 return 0; 3519 3520 /* In case all pedit actions are skipped, remove the MOD_HDR flag. */ 3521 attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 3522 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); 3523 3524 if (ns_type != MLX5_FLOW_NAMESPACE_FDB) 3525 return 0; 3526 3527 if (!((attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || 3528 (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))) 3529 attr->esw_attr->split_count = 0; 3530 3531 return 0; 3532 } 3533 3534 static struct mlx5_flow_attr* 3535 mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr, 3536 enum mlx5_flow_namespace_type ns_type) 3537 { 3538 struct mlx5e_tc_flow_parse_attr *parse_attr; 3539 u32 attr_sz = ns_to_attr_sz(ns_type); 3540 struct mlx5_flow_attr *attr2; 3541 3542 attr2 = mlx5_alloc_flow_attr(ns_type); 3543 parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); 3544 if (!attr2 || !parse_attr) { 3545 kvfree(parse_attr); 3546 kfree(attr2); 3547 return NULL; 3548 } 3549 3550 memcpy(attr2, attr, attr_sz); 3551 INIT_LIST_HEAD(&attr2->list); 3552 parse_attr->filter_dev = attr->parse_attr->filter_dev; 3553 attr2->action = 0; 3554 attr2->counter = NULL; 3555 attr2->tc_act_cookies_count = 0; 3556 attr2->flags = 0; 3557 attr2->parse_attr = parse_attr; 3558 attr2->dest_chain = 0; 3559 attr2->dest_ft = NULL; 3560 attr2->act_id_restore_rule = NULL; 3561 memset(&attr2->ct_attr, 0, sizeof(attr2->ct_attr)); 3562 3563 if (ns_type == MLX5_FLOW_NAMESPACE_FDB) { 3564 attr2->esw_attr->out_count = 0; 3565 attr2->esw_attr->split_count = 0; 3566 } 3567 3568 attr2->branch_true = NULL; 3569 attr2->branch_false = NULL; 3570 attr2->jumping_attr = NULL; 3571 return attr2; 3572 } 3573 3574 struct mlx5_flow_attr * 3575 mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow) 3576 { 3577 struct mlx5_esw_flow_attr *esw_attr; 3578 struct mlx5_flow_attr *attr; 3579 int i; 3580 3581 list_for_each_entry(attr, &flow->attrs, list) { 3582 esw_attr = attr->esw_attr; 3583 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { 3584 if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) 3585 return attr; 3586 } 3587 } 3588 3589 return NULL; 3590 } 3591 3592 void 3593 mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow) 3594 { 3595 struct mlx5e_post_act *post_act = get_post_action(flow->priv); 3596 struct mlx5_flow_attr *attr; 3597 3598 list_for_each_entry(attr, &flow->attrs, list) { 3599 if (list_is_last(&attr->list, &flow->attrs)) 3600 break; 3601 3602 mlx5e_tc_post_act_unoffload(post_act, attr->post_act_handle); 3603 } 3604 } 3605 3606 static void 3607 free_flow_post_acts(struct mlx5e_tc_flow *flow) 3608 { 3609 struct mlx5_flow_attr *attr, *tmp; 3610 3611 list_for_each_entry_safe(attr, tmp, &flow->attrs, list) { 3612 if (list_is_last(&attr->list, &flow->attrs)) 3613 break; 3614 3615 mlx5_free_flow_attr_actions(flow, attr); 3616 3617 list_del(&attr->list); 3618 kvfree(attr->parse_attr); 3619 kfree(attr); 3620 } 3621 } 3622 3623 int 3624 mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow) 3625 { 3626 struct mlx5e_post_act *post_act = get_post_action(flow->priv); 3627 struct mlx5_flow_attr *attr; 3628 int err = 0; 3629 3630 list_for_each_entry(attr, &flow->attrs, list) { 3631 if (list_is_last(&attr->list, &flow->attrs)) 3632 break; 3633 3634 err = mlx5e_tc_post_act_offload(post_act, attr->post_act_handle); 3635 if (err) 3636 break; 3637 } 3638 3639 return err; 3640 } 3641 3642 /* TC filter rule HW translation: 3643 * 3644 * +---------------------+ 3645 * + ft prio (tc chain) + 3646 * + original match + 3647 * +---------------------+ 3648 * | 3649 * | if multi table action 3650 * | 3651 * v 3652 * +---------------------+ 3653 * + post act ft |<----. 3654 * + match fte id | | split on multi table action 3655 * + do actions |-----' 3656 * +---------------------+ 3657 * | 3658 * | 3659 * v 3660 * Do rest of the actions after last multi table action. 3661 */ 3662 static int 3663 alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) 3664 { 3665 struct mlx5e_post_act *post_act = get_post_action(flow->priv); 3666 struct mlx5_flow_attr *attr, *next_attr = NULL; 3667 struct mlx5e_post_act_handle *handle; 3668 int err; 3669 3670 /* This is going in reverse order as needed. 3671 * The first entry is the last attribute. 3672 */ 3673 list_for_each_entry(attr, &flow->attrs, list) { 3674 if (!next_attr) { 3675 /* Set counter action on last post act rule. */ 3676 attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 3677 } 3678 3679 if (next_attr && !(attr->flags & MLX5_ATTR_FLAG_TERMINATING)) { 3680 err = mlx5e_tc_act_set_next_post_act(flow, attr, next_attr); 3681 if (err) 3682 goto out_free; 3683 } 3684 3685 /* Don't add post_act rule for first attr (last in the list). 3686 * It's being handled by the caller. 3687 */ 3688 if (list_is_last(&attr->list, &flow->attrs)) 3689 break; 3690 3691 err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack); 3692 if (err) 3693 goto out_free; 3694 3695 err = post_process_attr(flow, attr, extack); 3696 if (err) 3697 goto out_free; 3698 3699 handle = mlx5e_tc_post_act_add(post_act, attr); 3700 if (IS_ERR(handle)) { 3701 err = PTR_ERR(handle); 3702 goto out_free; 3703 } 3704 3705 attr->post_act_handle = handle; 3706 3707 if (attr->jumping_attr) { 3708 err = mlx5e_tc_act_set_next_post_act(flow, attr->jumping_attr, attr); 3709 if (err) 3710 goto out_free; 3711 } 3712 3713 next_attr = attr; 3714 } 3715 3716 if (flow_flag_test(flow, SLOW)) 3717 goto out; 3718 3719 err = mlx5e_tc_offload_flow_post_acts(flow); 3720 if (err) 3721 goto out_free; 3722 3723 out: 3724 return 0; 3725 3726 out_free: 3727 free_flow_post_acts(flow); 3728 return err; 3729 } 3730 3731 static int 3732 alloc_branch_attr(struct mlx5e_tc_flow *flow, 3733 struct mlx5e_tc_act_branch_ctrl *cond, 3734 struct mlx5_flow_attr **cond_attr, 3735 u32 *jump_count, 3736 struct netlink_ext_ack *extack) 3737 { 3738 struct mlx5_flow_attr *attr; 3739 int err = 0; 3740 3741 *cond_attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, 3742 mlx5e_get_flow_namespace(flow)); 3743 if (!(*cond_attr)) 3744 return -ENOMEM; 3745 3746 attr = *cond_attr; 3747 3748 switch (cond->act_id) { 3749 case FLOW_ACTION_DROP: 3750 attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; 3751 break; 3752 case FLOW_ACTION_ACCEPT: 3753 case FLOW_ACTION_PIPE: 3754 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 3755 attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv)); 3756 break; 3757 case FLOW_ACTION_JUMP: 3758 if (*jump_count) { 3759 NL_SET_ERR_MSG_MOD(extack, "Cannot offload flows with nested jumps"); 3760 err = -EOPNOTSUPP; 3761 goto out_err; 3762 } 3763 *jump_count = cond->extval; 3764 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 3765 attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv)); 3766 break; 3767 default: 3768 err = -EOPNOTSUPP; 3769 goto out_err; 3770 } 3771 3772 return err; 3773 out_err: 3774 kfree(*cond_attr); 3775 *cond_attr = NULL; 3776 return err; 3777 } 3778 3779 static void 3780 dec_jump_count(struct flow_action_entry *act, struct mlx5e_tc_act *tc_act, 3781 struct mlx5_flow_attr *attr, struct mlx5e_priv *priv, 3782 struct mlx5e_tc_jump_state *jump_state) 3783 { 3784 if (!jump_state->jump_count) 3785 return; 3786 3787 /* Single tc action can instantiate multiple offload actions (e.g. pedit) 3788 * Jump only over a tc action 3789 */ 3790 if (act->id == jump_state->last_id && act->hw_index == jump_state->last_index) 3791 return; 3792 3793 jump_state->last_id = act->id; 3794 jump_state->last_index = act->hw_index; 3795 3796 /* nothing to do for intermediate actions */ 3797 if (--jump_state->jump_count > 1) 3798 return; 3799 3800 if (jump_state->jump_count == 1) { /* last action in the jump action list */ 3801 3802 /* create a new attribute after this action */ 3803 jump_state->jump_target = true; 3804 3805 if (tc_act->is_terminating_action) { /* the branch ends here */ 3806 attr->flags |= MLX5_ATTR_FLAG_TERMINATING; 3807 attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 3808 } else { /* the branch continues executing the rest of the actions */ 3809 struct mlx5e_post_act *post_act; 3810 3811 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 3812 post_act = get_post_action(priv); 3813 attr->dest_ft = mlx5e_tc_post_act_get_ft(post_act); 3814 } 3815 } else if (jump_state->jump_count == 0) { /* first attr after the jump action list */ 3816 /* This is the post action for the jumping attribute (either red or green) 3817 * Use the stored jumping_attr to set the post act id on the jumping attribute 3818 */ 3819 attr->jumping_attr = jump_state->jumping_attr; 3820 } 3821 } 3822 3823 static int 3824 parse_branch_ctrl(struct flow_action_entry *act, struct mlx5e_tc_act *tc_act, 3825 struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr, 3826 struct mlx5e_tc_jump_state *jump_state, 3827 struct netlink_ext_ack *extack) 3828 { 3829 struct mlx5e_tc_act_branch_ctrl cond_true, cond_false; 3830 u32 jump_count = jump_state->jump_count; 3831 int err; 3832 3833 if (!tc_act->get_branch_ctrl) 3834 return 0; 3835 3836 tc_act->get_branch_ctrl(act, &cond_true, &cond_false); 3837 3838 err = alloc_branch_attr(flow, &cond_true, 3839 &attr->branch_true, &jump_count, extack); 3840 if (err) 3841 goto out_err; 3842 3843 if (jump_count) 3844 jump_state->jumping_attr = attr->branch_true; 3845 3846 err = alloc_branch_attr(flow, &cond_false, 3847 &attr->branch_false, &jump_count, extack); 3848 if (err) 3849 goto err_branch_false; 3850 3851 if (jump_count && !jump_state->jumping_attr) 3852 jump_state->jumping_attr = attr->branch_false; 3853 3854 jump_state->jump_count = jump_count; 3855 3856 /* branching action requires its own counter */ 3857 attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 3858 flow_flag_set(flow, USE_ACT_STATS); 3859 3860 return 0; 3861 3862 err_branch_false: 3863 free_branch_attr(flow, attr->branch_true); 3864 out_err: 3865 return err; 3866 } 3867 3868 static int 3869 parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, 3870 struct flow_action *flow_action) 3871 { 3872 struct netlink_ext_ack *extack = parse_state->extack; 3873 struct mlx5e_tc_flow *flow = parse_state->flow; 3874 struct mlx5e_tc_jump_state jump_state = {}; 3875 struct mlx5_flow_attr *attr = flow->attr; 3876 enum mlx5_flow_namespace_type ns_type; 3877 struct mlx5e_priv *priv = flow->priv; 3878 struct mlx5_flow_attr *prev_attr; 3879 struct flow_action_entry *act; 3880 struct mlx5e_tc_act *tc_act; 3881 int err, i, i_split = 0; 3882 bool is_missable; 3883 3884 ns_type = mlx5e_get_flow_namespace(flow); 3885 list_add(&attr->list, &flow->attrs); 3886 3887 flow_action_for_each(i, act, flow_action) { 3888 jump_state.jump_target = false; 3889 is_missable = false; 3890 prev_attr = attr; 3891 3892 tc_act = mlx5e_tc_act_get(act->id, ns_type); 3893 if (!tc_act) { 3894 NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action"); 3895 err = -EOPNOTSUPP; 3896 goto out_free_post_acts; 3897 } 3898 3899 if (tc_act->can_offload && !tc_act->can_offload(parse_state, act, i, attr)) { 3900 err = -EOPNOTSUPP; 3901 goto out_free_post_acts; 3902 } 3903 3904 err = tc_act->parse_action(parse_state, act, priv, attr); 3905 if (err) 3906 goto out_free_post_acts; 3907 3908 dec_jump_count(act, tc_act, attr, priv, &jump_state); 3909 3910 err = parse_branch_ctrl(act, tc_act, flow, attr, &jump_state, extack); 3911 if (err) 3912 goto out_free_post_acts; 3913 3914 parse_state->actions |= attr->action; 3915 3916 /* Split attr for multi table act if not the last act. */ 3917 if (jump_state.jump_target || 3918 (tc_act->is_multi_table_act && 3919 tc_act->is_multi_table_act(priv, act, attr) && 3920 i < flow_action->num_entries - 1)) { 3921 is_missable = tc_act->is_missable ? tc_act->is_missable(act) : false; 3922 3923 err = mlx5e_tc_act_post_parse(parse_state, flow_action, i_split, i, attr, 3924 ns_type); 3925 if (err) 3926 goto out_free_post_acts; 3927 3928 attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, ns_type); 3929 if (!attr) { 3930 err = -ENOMEM; 3931 goto out_free_post_acts; 3932 } 3933 3934 i_split = i + 1; 3935 list_add(&attr->list, &flow->attrs); 3936 } 3937 3938 if (is_missable) { 3939 /* Add counter to prev, and assign act to new (next) attr */ 3940 prev_attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 3941 flow_flag_set(flow, USE_ACT_STATS); 3942 3943 attr->tc_act_cookies[attr->tc_act_cookies_count++] = act->cookie; 3944 } else if (!tc_act->stats_action) { 3945 prev_attr->tc_act_cookies[prev_attr->tc_act_cookies_count++] = act->cookie; 3946 } 3947 } 3948 3949 err = mlx5e_tc_act_post_parse(parse_state, flow_action, i_split, i, attr, ns_type); 3950 if (err) 3951 goto out_free_post_acts; 3952 3953 err = alloc_flow_post_acts(flow, extack); 3954 if (err) 3955 goto out_free_post_acts; 3956 3957 return 0; 3958 3959 out_free_post_acts: 3960 free_flow_post_acts(flow); 3961 3962 return err; 3963 } 3964 3965 static int 3966 flow_action_supported(struct flow_action *flow_action, 3967 struct netlink_ext_ack *extack) 3968 { 3969 if (!flow_action_has_entries(flow_action)) { 3970 NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries"); 3971 return -EINVAL; 3972 } 3973 3974 if (!flow_action_hw_stats_check(flow_action, extack, 3975 FLOW_ACTION_HW_STATS_DELAYED_BIT)) { 3976 NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported"); 3977 return -EOPNOTSUPP; 3978 } 3979 3980 return 0; 3981 } 3982 3983 static int 3984 parse_tc_nic_actions(struct mlx5e_priv *priv, 3985 struct flow_action *flow_action, 3986 struct mlx5e_tc_flow *flow, 3987 struct netlink_ext_ack *extack) 3988 { 3989 struct mlx5e_tc_act_parse_state *parse_state; 3990 struct mlx5e_tc_flow_parse_attr *parse_attr; 3991 struct mlx5_flow_attr *attr = flow->attr; 3992 int err; 3993 3994 err = flow_action_supported(flow_action, extack); 3995 if (err) 3996 return err; 3997 3998 attr->nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; 3999 parse_attr = attr->parse_attr; 4000 parse_state = &parse_attr->parse_state; 4001 mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack); 4002 parse_state->ct_priv = get_ct_priv(priv); 4003 4004 err = parse_tc_actions(parse_state, flow_action); 4005 if (err) 4006 return err; 4007 4008 err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack); 4009 if (err) 4010 return err; 4011 4012 err = verify_attr_actions(attr->action, extack); 4013 if (err) 4014 return err; 4015 4016 if (!actions_match_supported(priv, flow_action, parse_state->actions, 4017 parse_attr, flow, extack)) 4018 return -EOPNOTSUPP; 4019 4020 return 0; 4021 } 4022 4023 static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv, 4024 struct net_device *peer_netdev) 4025 { 4026 struct mlx5e_priv *peer_priv; 4027 4028 peer_priv = netdev_priv(peer_netdev); 4029 4030 return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) && 4031 mlx5e_eswitch_vf_rep(priv->netdev) && 4032 mlx5e_eswitch_vf_rep(peer_netdev) && 4033 mlx5e_same_hw_devs(priv, peer_priv)); 4034 } 4035 4036 static bool same_hw_reps(struct mlx5e_priv *priv, 4037 struct net_device *peer_netdev) 4038 { 4039 struct mlx5e_priv *peer_priv; 4040 4041 peer_priv = netdev_priv(peer_netdev); 4042 4043 return mlx5e_eswitch_rep(priv->netdev) && 4044 mlx5e_eswitch_rep(peer_netdev) && 4045 mlx5e_same_hw_devs(priv, peer_priv); 4046 } 4047 4048 static bool is_lag_dev(struct mlx5e_priv *priv, 4049 struct net_device *peer_netdev) 4050 { 4051 return ((mlx5_lag_is_sriov(priv->mdev) || 4052 mlx5_lag_is_multipath(priv->mdev)) && 4053 same_hw_reps(priv, peer_netdev)); 4054 } 4055 4056 static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev) 4057 { 4058 return same_hw_reps(priv, out_dev) && mlx5_lag_is_mpesw(priv->mdev); 4059 } 4060 4061 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, 4062 struct net_device *out_dev) 4063 { 4064 if (is_merged_eswitch_vfs(priv, out_dev)) 4065 return true; 4066 4067 if (is_multiport_eligible(priv, out_dev)) 4068 return true; 4069 4070 if (is_lag_dev(priv, out_dev)) 4071 return true; 4072 4073 return mlx5e_eswitch_rep(out_dev) && 4074 same_port_devs(priv, netdev_priv(out_dev)); 4075 } 4076 4077 int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv, 4078 struct mlx5_flow_attr *attr, 4079 int ifindex, 4080 enum mlx5e_tc_int_port_type type, 4081 u32 *action, 4082 int out_index) 4083 { 4084 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 4085 struct mlx5e_tc_int_port_priv *int_port_priv; 4086 struct mlx5e_tc_flow_parse_attr *parse_attr; 4087 struct mlx5e_tc_int_port *dest_int_port; 4088 int err; 4089 4090 parse_attr = attr->parse_attr; 4091 int_port_priv = mlx5e_get_int_port_priv(priv); 4092 4093 dest_int_port = mlx5e_tc_int_port_get(int_port_priv, ifindex, type); 4094 if (IS_ERR(dest_int_port)) 4095 return PTR_ERR(dest_int_port); 4096 4097 err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts, 4098 MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG, 4099 mlx5e_tc_int_port_get_metadata(dest_int_port)); 4100 if (err) { 4101 mlx5e_tc_int_port_put(int_port_priv, dest_int_port); 4102 return err; 4103 } 4104 4105 *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 4106 4107 esw_attr->dest_int_port = dest_int_port; 4108 esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE; 4109 esw_attr->split_count = out_index; 4110 4111 /* Forward to root fdb for matching against the new source vport */ 4112 attr->dest_chain = 0; 4113 4114 return 0; 4115 } 4116 4117 static int 4118 parse_tc_fdb_actions(struct mlx5e_priv *priv, 4119 struct flow_action *flow_action, 4120 struct mlx5e_tc_flow *flow, 4121 struct netlink_ext_ack *extack) 4122 { 4123 struct mlx5e_tc_act_parse_state *parse_state; 4124 struct mlx5e_tc_flow_parse_attr *parse_attr; 4125 struct mlx5_flow_attr *attr = flow->attr; 4126 struct mlx5_esw_flow_attr *esw_attr; 4127 struct net_device *filter_dev; 4128 int err; 4129 4130 err = flow_action_supported(flow_action, extack); 4131 if (err) 4132 return err; 4133 4134 esw_attr = attr->esw_attr; 4135 parse_attr = attr->parse_attr; 4136 filter_dev = parse_attr->filter_dev; 4137 parse_state = &parse_attr->parse_state; 4138 mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack); 4139 parse_state->ct_priv = get_ct_priv(priv); 4140 4141 err = parse_tc_actions(parse_state, flow_action); 4142 if (err) 4143 return err; 4144 4145 /* Forward to/from internal port can only have 1 dest */ 4146 if ((netif_is_ovs_master(filter_dev) || esw_attr->dest_int_port) && 4147 esw_attr->out_count > 1) { 4148 NL_SET_ERR_MSG_MOD(extack, 4149 "Rules with internal port can have only one destination"); 4150 return -EOPNOTSUPP; 4151 } 4152 4153 /* Forward from tunnel/internal port to internal port is not supported */ 4154 if ((mlx5e_get_tc_tun(filter_dev) || netif_is_ovs_master(filter_dev)) && 4155 esw_attr->dest_int_port) { 4156 NL_SET_ERR_MSG_MOD(extack, 4157 "Forwarding from tunnel/internal port to internal port is not supported"); 4158 return -EOPNOTSUPP; 4159 } 4160 4161 err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack); 4162 if (err) 4163 return err; 4164 4165 if (!actions_match_supported(priv, flow_action, parse_state->actions, 4166 parse_attr, flow, extack)) 4167 return -EOPNOTSUPP; 4168 4169 return 0; 4170 } 4171 4172 static void get_flags(int flags, unsigned long *flow_flags) 4173 { 4174 unsigned long __flow_flags = 0; 4175 4176 if (flags & MLX5_TC_FLAG(INGRESS)) 4177 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS); 4178 if (flags & MLX5_TC_FLAG(EGRESS)) 4179 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS); 4180 4181 if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) 4182 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); 4183 if (flags & MLX5_TC_FLAG(NIC_OFFLOAD)) 4184 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); 4185 if (flags & MLX5_TC_FLAG(FT_OFFLOAD)) 4186 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT); 4187 4188 *flow_flags = __flow_flags; 4189 } 4190 4191 static const struct rhashtable_params tc_ht_params = { 4192 .head_offset = offsetof(struct mlx5e_tc_flow, node), 4193 .key_offset = offsetof(struct mlx5e_tc_flow, cookie), 4194 .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie), 4195 .automatic_shrinking = true, 4196 }; 4197 4198 static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, 4199 unsigned long flags) 4200 { 4201 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 4202 struct mlx5e_rep_priv *rpriv; 4203 4204 if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) { 4205 rpriv = priv->ppriv; 4206 return &rpriv->tc_ht; 4207 } else /* NIC offload */ 4208 return &tc->ht; 4209 } 4210 4211 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) 4212 { 4213 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; 4214 struct mlx5_flow_attr *attr = flow->attr; 4215 bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK && 4216 flow_flag_test(flow, INGRESS); 4217 bool act_is_encap = !!(attr->action & 4218 MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); 4219 bool esw_paired = mlx5_devcom_comp_is_ready(esw_attr->in_mdev->priv.devcom, 4220 MLX5_DEVCOM_ESW_OFFLOADS); 4221 4222 if (!esw_paired) 4223 return false; 4224 4225 if ((mlx5_lag_is_sriov(esw_attr->in_mdev) || 4226 mlx5_lag_is_multipath(esw_attr->in_mdev)) && 4227 (is_rep_ingress || act_is_encap)) 4228 return true; 4229 4230 if (mlx5_lag_is_mpesw(esw_attr->in_mdev)) 4231 return true; 4232 4233 return false; 4234 } 4235 4236 struct mlx5_flow_attr * 4237 mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type) 4238 { 4239 u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB) ? 4240 sizeof(struct mlx5_esw_flow_attr) : 4241 sizeof(struct mlx5_nic_flow_attr); 4242 struct mlx5_flow_attr *attr; 4243 4244 attr = kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL); 4245 if (!attr) 4246 return attr; 4247 4248 INIT_LIST_HEAD(&attr->list); 4249 return attr; 4250 } 4251 4252 static void 4253 mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) 4254 { 4255 struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow); 4256 4257 if (!attr) 4258 return; 4259 4260 if (attr->post_act_handle) 4261 mlx5e_tc_post_act_del(get_post_action(flow->priv), attr->post_act_handle); 4262 4263 mlx5e_tc_tun_encap_dests_unset(flow->priv, flow, attr); 4264 4265 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) 4266 mlx5_fc_destroy(counter_dev, attr->counter); 4267 4268 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 4269 mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); 4270 mlx5e_tc_detach_mod_hdr(flow->priv, flow, attr); 4271 } 4272 4273 mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr); 4274 4275 free_branch_attr(flow, attr->branch_true); 4276 free_branch_attr(flow, attr->branch_false); 4277 } 4278 4279 static int 4280 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, 4281 struct flow_cls_offload *f, unsigned long flow_flags, 4282 struct mlx5e_tc_flow_parse_attr **__parse_attr, 4283 struct mlx5e_tc_flow **__flow) 4284 { 4285 struct mlx5e_tc_flow_parse_attr *parse_attr; 4286 struct mlx5_flow_attr *attr; 4287 struct mlx5e_tc_flow *flow; 4288 int err = -ENOMEM; 4289 int out_index; 4290 4291 flow = kzalloc(sizeof(*flow), GFP_KERNEL); 4292 parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); 4293 if (!parse_attr || !flow) 4294 goto err_free; 4295 4296 flow->flags = flow_flags; 4297 flow->cookie = f->cookie; 4298 flow->priv = priv; 4299 4300 attr = mlx5_alloc_flow_attr(mlx5e_get_flow_namespace(flow)); 4301 if (!attr) 4302 goto err_free; 4303 4304 flow->attr = attr; 4305 4306 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) 4307 INIT_LIST_HEAD(&flow->encaps[out_index].list); 4308 INIT_LIST_HEAD(&flow->hairpin); 4309 INIT_LIST_HEAD(&flow->l3_to_l2_reformat); 4310 INIT_LIST_HEAD(&flow->attrs); 4311 INIT_LIST_HEAD(&flow->peer_flows); 4312 refcount_set(&flow->refcnt, 1); 4313 init_completion(&flow->init_done); 4314 init_completion(&flow->del_hw_done); 4315 4316 *__flow = flow; 4317 *__parse_attr = parse_attr; 4318 4319 return 0; 4320 4321 err_free: 4322 kfree(flow); 4323 kvfree(parse_attr); 4324 return err; 4325 } 4326 4327 static void 4328 mlx5e_flow_attr_init(struct mlx5_flow_attr *attr, 4329 struct mlx5e_tc_flow_parse_attr *parse_attr, 4330 struct flow_cls_offload *f) 4331 { 4332 attr->parse_attr = parse_attr; 4333 attr->chain = f->common.chain_index; 4334 attr->prio = f->common.prio; 4335 } 4336 4337 static void 4338 mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr, 4339 struct mlx5e_priv *priv, 4340 struct mlx5e_tc_flow_parse_attr *parse_attr, 4341 struct flow_cls_offload *f, 4342 struct mlx5_eswitch_rep *in_rep, 4343 struct mlx5_core_dev *in_mdev) 4344 { 4345 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 4346 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 4347 4348 mlx5e_flow_attr_init(attr, parse_attr, f); 4349 4350 esw_attr->in_rep = in_rep; 4351 esw_attr->in_mdev = in_mdev; 4352 4353 if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) == 4354 MLX5_COUNTER_SOURCE_ESWITCH) 4355 esw_attr->counter_dev = in_mdev; 4356 else 4357 esw_attr->counter_dev = priv->mdev; 4358 } 4359 4360 static struct mlx5e_tc_flow * 4361 __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, 4362 struct flow_cls_offload *f, 4363 unsigned long flow_flags, 4364 struct net_device *filter_dev, 4365 struct mlx5_eswitch_rep *in_rep, 4366 struct mlx5_core_dev *in_mdev) 4367 { 4368 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 4369 struct netlink_ext_ack *extack = f->common.extack; 4370 struct mlx5e_tc_flow_parse_attr *parse_attr; 4371 struct mlx5e_tc_flow *flow; 4372 int attr_size, err; 4373 4374 flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); 4375 attr_size = sizeof(struct mlx5_esw_flow_attr); 4376 err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, 4377 &parse_attr, &flow); 4378 if (err) 4379 goto out; 4380 4381 parse_attr->filter_dev = filter_dev; 4382 mlx5e_flow_esw_attr_init(flow->attr, 4383 priv, parse_attr, 4384 f, in_rep, in_mdev); 4385 4386 err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, 4387 f, filter_dev); 4388 if (err) 4389 goto err_free; 4390 4391 /* actions validation depends on parsing the ct matches first */ 4392 err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f, 4393 &flow->attr->ct_attr, extack); 4394 if (err) 4395 goto err_free; 4396 4397 err = parse_tc_fdb_actions(priv, &rule->action, flow, extack); 4398 if (err) 4399 goto err_free; 4400 4401 err = mlx5e_tc_add_fdb_flow(priv, flow, extack); 4402 complete_all(&flow->init_done); 4403 if (err) { 4404 if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev))) 4405 goto err_free; 4406 4407 add_unready_flow(flow); 4408 } 4409 4410 return flow; 4411 4412 err_free: 4413 mlx5e_flow_put(priv, flow); 4414 out: 4415 return ERR_PTR(err); 4416 } 4417 4418 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, 4419 struct mlx5e_tc_flow *flow, 4420 unsigned long flow_flags, 4421 struct mlx5_eswitch *peer_esw) 4422 { 4423 struct mlx5e_priv *priv = flow->priv, *peer_priv; 4424 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 4425 struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; 4426 struct mlx5e_tc_flow_parse_attr *parse_attr; 4427 int i = mlx5_get_dev_index(peer_esw->dev); 4428 struct mlx5e_rep_priv *peer_urpriv; 4429 struct mlx5e_tc_flow *peer_flow; 4430 struct mlx5_core_dev *in_mdev; 4431 int err = 0; 4432 4433 peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH); 4434 peer_priv = netdev_priv(peer_urpriv->netdev); 4435 4436 /* in_mdev is assigned of which the packet originated from. 4437 * So packets redirected to uplink use the same mdev of the 4438 * original flow and packets redirected from uplink use the 4439 * peer mdev. 4440 * In multiport eswitch it's a special case that we need to 4441 * keep the original mdev. 4442 */ 4443 if (attr->in_rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(priv->mdev)) 4444 in_mdev = peer_priv->mdev; 4445 else 4446 in_mdev = priv->mdev; 4447 4448 parse_attr = flow->attr->parse_attr; 4449 peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags, 4450 parse_attr->filter_dev, 4451 attr->in_rep, in_mdev); 4452 if (IS_ERR(peer_flow)) { 4453 err = PTR_ERR(peer_flow); 4454 goto out; 4455 } 4456 4457 list_add_tail(&peer_flow->peer_flows, &flow->peer_flows); 4458 flow_flag_set(flow, DUP); 4459 mutex_lock(&esw->offloads.peer_mutex); 4460 list_add_tail(&flow->peer[i], &esw->offloads.peer_flows[i]); 4461 mutex_unlock(&esw->offloads.peer_mutex); 4462 4463 out: 4464 return err; 4465 } 4466 4467 static int 4468 mlx5e_add_fdb_flow(struct mlx5e_priv *priv, 4469 struct flow_cls_offload *f, 4470 unsigned long flow_flags, 4471 struct net_device *filter_dev, 4472 struct mlx5e_tc_flow **__flow) 4473 { 4474 struct mlx5_devcom *devcom = priv->mdev->priv.devcom; 4475 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4476 struct mlx5_eswitch_rep *in_rep = rpriv->rep; 4477 struct mlx5_core_dev *in_mdev = priv->mdev; 4478 struct mlx5_eswitch *peer_esw; 4479 struct mlx5e_tc_flow *flow; 4480 int err; 4481 int i; 4482 4483 flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep, 4484 in_mdev); 4485 if (IS_ERR(flow)) 4486 return PTR_ERR(flow); 4487 4488 if (!is_peer_flow_needed(flow)) { 4489 *__flow = flow; 4490 return 0; 4491 } 4492 4493 if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) { 4494 err = -ENODEV; 4495 goto clean_flow; 4496 } 4497 4498 mlx5_devcom_for_each_peer_entry(devcom, 4499 MLX5_DEVCOM_ESW_OFFLOADS, 4500 peer_esw, i) { 4501 err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags, peer_esw); 4502 if (err) 4503 goto peer_clean; 4504 } 4505 4506 mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 4507 4508 *__flow = flow; 4509 return 0; 4510 4511 peer_clean: 4512 mlx5e_tc_del_fdb_peers_flow(flow); 4513 mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 4514 clean_flow: 4515 mlx5e_tc_del_fdb_flow(priv, flow); 4516 return err; 4517 } 4518 4519 static int 4520 mlx5e_add_nic_flow(struct mlx5e_priv *priv, 4521 struct flow_cls_offload *f, 4522 unsigned long flow_flags, 4523 struct net_device *filter_dev, 4524 struct mlx5e_tc_flow **__flow) 4525 { 4526 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 4527 struct netlink_ext_ack *extack = f->common.extack; 4528 struct mlx5e_tc_flow_parse_attr *parse_attr; 4529 struct mlx5e_tc_flow *flow; 4530 int attr_size, err; 4531 4532 if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) { 4533 if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common)) 4534 return -EOPNOTSUPP; 4535 } else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) { 4536 return -EOPNOTSUPP; 4537 } 4538 4539 flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); 4540 attr_size = sizeof(struct mlx5_nic_flow_attr); 4541 err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, 4542 &parse_attr, &flow); 4543 if (err) 4544 goto out; 4545 4546 parse_attr->filter_dev = filter_dev; 4547 mlx5e_flow_attr_init(flow->attr, parse_attr, f); 4548 4549 err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, 4550 f, filter_dev); 4551 if (err) 4552 goto err_free; 4553 4554 err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f, 4555 &flow->attr->ct_attr, extack); 4556 if (err) 4557 goto err_free; 4558 4559 err = parse_tc_nic_actions(priv, &rule->action, flow, extack); 4560 if (err) 4561 goto err_free; 4562 4563 err = mlx5e_tc_add_nic_flow(priv, flow, extack); 4564 if (err) 4565 goto err_free; 4566 4567 flow_flag_set(flow, OFFLOADED); 4568 *__flow = flow; 4569 4570 return 0; 4571 4572 err_free: 4573 flow_flag_set(flow, FAILED); 4574 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); 4575 mlx5e_flow_put(priv, flow); 4576 out: 4577 return err; 4578 } 4579 4580 static int 4581 mlx5e_tc_add_flow(struct mlx5e_priv *priv, 4582 struct flow_cls_offload *f, 4583 unsigned long flags, 4584 struct net_device *filter_dev, 4585 struct mlx5e_tc_flow **flow) 4586 { 4587 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 4588 unsigned long flow_flags; 4589 int err; 4590 4591 get_flags(flags, &flow_flags); 4592 4593 if (!tc_can_offload_extack(priv->netdev, f->common.extack)) 4594 return -EOPNOTSUPP; 4595 4596 if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS) 4597 err = mlx5e_add_fdb_flow(priv, f, flow_flags, 4598 filter_dev, flow); 4599 else 4600 err = mlx5e_add_nic_flow(priv, f, flow_flags, 4601 filter_dev, flow); 4602 4603 return err; 4604 } 4605 4606 static bool is_flow_rule_duplicate_allowed(struct net_device *dev, 4607 struct mlx5e_rep_priv *rpriv) 4608 { 4609 /* Offloaded flow rule is allowed to duplicate on non-uplink representor 4610 * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this 4611 * function is called from NIC mode. 4612 */ 4613 return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK; 4614 } 4615 4616 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, 4617 struct flow_cls_offload *f, unsigned long flags) 4618 { 4619 struct netlink_ext_ack *extack = f->common.extack; 4620 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 4621 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4622 struct mlx5e_tc_flow *flow; 4623 int err = 0; 4624 4625 if (!mlx5_esw_hold(priv->mdev)) 4626 return -EBUSY; 4627 4628 mlx5_esw_get(priv->mdev); 4629 4630 rcu_read_lock(); 4631 flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); 4632 if (flow) { 4633 /* Same flow rule offloaded to non-uplink representor sharing tc block, 4634 * just return 0. 4635 */ 4636 if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev) 4637 goto rcu_unlock; 4638 4639 NL_SET_ERR_MSG_MOD(extack, 4640 "flow cookie already exists, ignoring"); 4641 netdev_warn_once(priv->netdev, 4642 "flow cookie %lx already exists, ignoring\n", 4643 f->cookie); 4644 err = -EEXIST; 4645 goto rcu_unlock; 4646 } 4647 rcu_unlock: 4648 rcu_read_unlock(); 4649 if (flow) 4650 goto out; 4651 4652 trace_mlx5e_configure_flower(f); 4653 err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow); 4654 if (err) 4655 goto out; 4656 4657 /* Flow rule offloaded to non-uplink representor sharing tc block, 4658 * set the flow's owner dev. 4659 */ 4660 if (is_flow_rule_duplicate_allowed(dev, rpriv)) 4661 flow->orig_dev = dev; 4662 4663 err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params); 4664 if (err) 4665 goto err_free; 4666 4667 mlx5_esw_release(priv->mdev); 4668 return 0; 4669 4670 err_free: 4671 mlx5e_flow_put(priv, flow); 4672 out: 4673 mlx5_esw_put(priv->mdev); 4674 mlx5_esw_release(priv->mdev); 4675 return err; 4676 } 4677 4678 static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags) 4679 { 4680 bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS)); 4681 bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS)); 4682 4683 return flow_flag_test(flow, INGRESS) == dir_ingress && 4684 flow_flag_test(flow, EGRESS) == dir_egress; 4685 } 4686 4687 int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, 4688 struct flow_cls_offload *f, unsigned long flags) 4689 { 4690 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 4691 struct mlx5e_tc_flow *flow; 4692 int err; 4693 4694 rcu_read_lock(); 4695 flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); 4696 if (!flow || !same_flow_direction(flow, flags)) { 4697 err = -EINVAL; 4698 goto errout; 4699 } 4700 4701 /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag 4702 * set. 4703 */ 4704 if (flow_flag_test_and_set(flow, DELETED)) { 4705 err = -EINVAL; 4706 goto errout; 4707 } 4708 rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params); 4709 rcu_read_unlock(); 4710 4711 trace_mlx5e_delete_flower(f); 4712 mlx5e_flow_put(priv, flow); 4713 4714 mlx5_esw_put(priv->mdev); 4715 return 0; 4716 4717 errout: 4718 rcu_read_unlock(); 4719 return err; 4720 } 4721 4722 int mlx5e_tc_fill_action_stats(struct mlx5e_priv *priv, 4723 struct flow_offload_action *fl_act) 4724 { 4725 return mlx5e_tc_act_stats_fill_stats(get_act_stats_handle(priv), fl_act); 4726 } 4727 4728 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, 4729 struct flow_cls_offload *f, unsigned long flags) 4730 { 4731 struct mlx5_devcom *devcom = priv->mdev->priv.devcom; 4732 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 4733 struct mlx5e_tc_flow *flow; 4734 struct mlx5_fc *counter; 4735 u64 lastuse = 0; 4736 u64 packets = 0; 4737 u64 bytes = 0; 4738 int err = 0; 4739 4740 rcu_read_lock(); 4741 flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie, 4742 tc_ht_params)); 4743 rcu_read_unlock(); 4744 if (IS_ERR(flow)) 4745 return PTR_ERR(flow); 4746 4747 if (!same_flow_direction(flow, flags)) { 4748 err = -EINVAL; 4749 goto errout; 4750 } 4751 4752 if (mlx5e_is_offloaded_flow(flow)) { 4753 if (flow_flag_test(flow, USE_ACT_STATS)) { 4754 f->use_act_stats = true; 4755 } else { 4756 counter = mlx5e_tc_get_counter(flow); 4757 if (!counter) 4758 goto errout; 4759 4760 mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); 4761 } 4762 } 4763 4764 /* Under multipath it's possible for one rule to be currently 4765 * un-offloaded while the other rule is offloaded. 4766 */ 4767 if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) 4768 goto out; 4769 4770 if (flow_flag_test(flow, DUP)) { 4771 struct mlx5e_tc_flow *peer_flow; 4772 4773 list_for_each_entry(peer_flow, &flow->peer_flows, peer_flows) { 4774 u64 packets2; 4775 u64 lastuse2; 4776 u64 bytes2; 4777 4778 if (!flow_flag_test(peer_flow, OFFLOADED)) 4779 continue; 4780 if (flow_flag_test(flow, USE_ACT_STATS)) { 4781 f->use_act_stats = true; 4782 break; 4783 } 4784 4785 counter = mlx5e_tc_get_counter(peer_flow); 4786 if (!counter) 4787 goto no_peer_counter; 4788 mlx5_fc_query_cached(counter, &bytes2, &packets2, 4789 &lastuse2); 4790 4791 bytes += bytes2; 4792 packets += packets2; 4793 lastuse = max_t(u64, lastuse, lastuse2); 4794 } 4795 } 4796 4797 no_peer_counter: 4798 mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 4799 out: 4800 flow_stats_update(&f->stats, bytes, packets, 0, lastuse, 4801 FLOW_ACTION_HW_STATS_DELAYED); 4802 trace_mlx5e_stats_flower(f); 4803 errout: 4804 mlx5e_flow_put(priv, flow); 4805 return err; 4806 } 4807 4808 static int apply_police_params(struct mlx5e_priv *priv, u64 rate, 4809 struct netlink_ext_ack *extack) 4810 { 4811 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4812 struct mlx5_eswitch *esw; 4813 u32 rate_mbps = 0; 4814 u16 vport_num; 4815 int err; 4816 4817 vport_num = rpriv->rep->vport; 4818 if (vport_num >= MLX5_VPORT_ECPF) { 4819 NL_SET_ERR_MSG_MOD(extack, 4820 "Ingress rate limit is supported only for Eswitch ports connected to VFs"); 4821 return -EOPNOTSUPP; 4822 } 4823 4824 esw = priv->mdev->priv.eswitch; 4825 /* rate is given in bytes/sec. 4826 * First convert to bits/sec and then round to the nearest mbit/secs. 4827 * mbit means million bits. 4828 * Moreover, if rate is non zero we choose to configure to a minimum of 4829 * 1 mbit/sec. 4830 */ 4831 if (rate) { 4832 rate = (rate * BITS_PER_BYTE) + 500000; 4833 do_div(rate, 1000000); 4834 rate_mbps = max_t(u32, rate, 1); 4835 } 4836 4837 err = mlx5_esw_qos_modify_vport_rate(esw, vport_num, rate_mbps); 4838 if (err) 4839 NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware"); 4840 4841 return err; 4842 } 4843 4844 static int 4845 tc_matchall_police_validate(const struct flow_action *action, 4846 const struct flow_action_entry *act, 4847 struct netlink_ext_ack *extack) 4848 { 4849 if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE) { 4850 NL_SET_ERR_MSG_MOD(extack, 4851 "Offload not supported when conform action is not continue"); 4852 return -EOPNOTSUPP; 4853 } 4854 4855 if (act->police.exceed.act_id != FLOW_ACTION_DROP) { 4856 NL_SET_ERR_MSG_MOD(extack, 4857 "Offload not supported when exceed action is not drop"); 4858 return -EOPNOTSUPP; 4859 } 4860 4861 if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && 4862 !flow_action_is_last_entry(action, act)) { 4863 NL_SET_ERR_MSG_MOD(extack, 4864 "Offload not supported when conform action is ok, but action is not last"); 4865 return -EOPNOTSUPP; 4866 } 4867 4868 if (act->police.peakrate_bytes_ps || 4869 act->police.avrate || act->police.overhead) { 4870 NL_SET_ERR_MSG_MOD(extack, 4871 "Offload not supported when peakrate/avrate/overhead is configured"); 4872 return -EOPNOTSUPP; 4873 } 4874 4875 return 0; 4876 } 4877 4878 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, 4879 struct flow_action *flow_action, 4880 struct netlink_ext_ack *extack) 4881 { 4882 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4883 const struct flow_action_entry *act; 4884 int err; 4885 int i; 4886 4887 if (!flow_action_has_entries(flow_action)) { 4888 NL_SET_ERR_MSG_MOD(extack, "matchall called with no action"); 4889 return -EINVAL; 4890 } 4891 4892 if (!flow_offload_has_one_action(flow_action)) { 4893 NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action"); 4894 return -EOPNOTSUPP; 4895 } 4896 4897 if (!flow_action_basic_hw_stats_check(flow_action, extack)) { 4898 NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported"); 4899 return -EOPNOTSUPP; 4900 } 4901 4902 flow_action_for_each(i, act, flow_action) { 4903 switch (act->id) { 4904 case FLOW_ACTION_POLICE: 4905 err = tc_matchall_police_validate(flow_action, act, extack); 4906 if (err) 4907 return err; 4908 4909 err = apply_police_params(priv, act->police.rate_bytes_ps, extack); 4910 if (err) 4911 return err; 4912 4913 rpriv->prev_vf_vport_stats = priv->stats.vf_vport; 4914 break; 4915 default: 4916 NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall"); 4917 return -EOPNOTSUPP; 4918 } 4919 } 4920 4921 return 0; 4922 } 4923 4924 int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, 4925 struct tc_cls_matchall_offload *ma) 4926 { 4927 struct netlink_ext_ack *extack = ma->common.extack; 4928 4929 if (ma->common.prio != 1) { 4930 NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported"); 4931 return -EINVAL; 4932 } 4933 4934 return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack); 4935 } 4936 4937 int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv, 4938 struct tc_cls_matchall_offload *ma) 4939 { 4940 struct netlink_ext_ack *extack = ma->common.extack; 4941 4942 return apply_police_params(priv, 0, extack); 4943 } 4944 4945 void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, 4946 struct tc_cls_matchall_offload *ma) 4947 { 4948 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4949 struct rtnl_link_stats64 cur_stats; 4950 u64 dbytes; 4951 u64 dpkts; 4952 4953 cur_stats = priv->stats.vf_vport; 4954 dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets; 4955 dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes; 4956 rpriv->prev_vf_vport_stats = cur_stats; 4957 flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies, 4958 FLOW_ACTION_HW_STATS_DELAYED); 4959 } 4960 4961 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, 4962 struct mlx5e_priv *peer_priv) 4963 { 4964 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 4965 struct mlx5_core_dev *peer_mdev = peer_priv->mdev; 4966 struct mlx5e_hairpin_entry *hpe, *tmp; 4967 LIST_HEAD(init_wait_list); 4968 u16 peer_vhca_id; 4969 int bkt; 4970 4971 if (!mlx5e_same_hw_devs(priv, peer_priv)) 4972 return; 4973 4974 peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id); 4975 4976 mutex_lock(&tc->hairpin_tbl_lock); 4977 hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist) 4978 if (refcount_inc_not_zero(&hpe->refcnt)) 4979 list_add(&hpe->dead_peer_wait_list, &init_wait_list); 4980 mutex_unlock(&tc->hairpin_tbl_lock); 4981 4982 list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) { 4983 wait_for_completion(&hpe->res_ready); 4984 if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id) 4985 mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair); 4986 4987 mlx5e_hairpin_put(priv, hpe); 4988 } 4989 } 4990 4991 static int mlx5e_tc_netdev_event(struct notifier_block *this, 4992 unsigned long event, void *ptr) 4993 { 4994 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 4995 struct mlx5e_priv *peer_priv; 4996 struct mlx5e_tc_table *tc; 4997 struct mlx5e_priv *priv; 4998 4999 if (ndev->netdev_ops != &mlx5e_netdev_ops || 5000 event != NETDEV_UNREGISTER || 5001 ndev->reg_state == NETREG_REGISTERED) 5002 return NOTIFY_DONE; 5003 5004 tc = container_of(this, struct mlx5e_tc_table, netdevice_nb); 5005 priv = tc->priv; 5006 peer_priv = netdev_priv(ndev); 5007 if (priv == peer_priv || 5008 !(priv->netdev->features & NETIF_F_HW_TC)) 5009 return NOTIFY_DONE; 5010 5011 mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv); 5012 5013 return NOTIFY_DONE; 5014 } 5015 5016 static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv) 5017 { 5018 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 5019 struct mlx5_flow_table **ft = &tc->miss_t; 5020 struct mlx5_flow_table_attr ft_attr = {}; 5021 struct mlx5_flow_namespace *ns; 5022 int err = 0; 5023 5024 ft_attr.max_fte = 1; 5025 ft_attr.autogroup.max_num_groups = 1; 5026 ft_attr.level = MLX5E_TC_MISS_LEVEL; 5027 ft_attr.prio = 0; 5028 ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); 5029 5030 *ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); 5031 if (IS_ERR(*ft)) { 5032 err = PTR_ERR(*ft); 5033 netdev_err(priv->netdev, "failed to create tc nic miss table err=%d\n", err); 5034 } 5035 5036 return err; 5037 } 5038 5039 static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv *priv) 5040 { 5041 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 5042 5043 mlx5_destroy_flow_table(tc->miss_t); 5044 } 5045 5046 int mlx5e_tc_nic_init(struct mlx5e_priv *priv) 5047 { 5048 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 5049 struct mlx5_core_dev *dev = priv->mdev; 5050 struct mapping_ctx *chains_mapping; 5051 struct mlx5_chains_attr attr = {}; 5052 u64 mapping_id; 5053 int err; 5054 5055 mlx5e_mod_hdr_tbl_init(&tc->mod_hdr); 5056 mutex_init(&tc->t_lock); 5057 mutex_init(&tc->hairpin_tbl_lock); 5058 hash_init(tc->hairpin_tbl); 5059 tc->priv = priv; 5060 5061 err = rhashtable_init(&tc->ht, &tc_ht_params); 5062 if (err) 5063 return err; 5064 5065 lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key); 5066 lockdep_init_map(&tc->ht.run_work.lockdep_map, "tc_ht_wq_key", &tc_ht_wq_key, 0); 5067 5068 mapping_id = mlx5_query_nic_system_image_guid(dev); 5069 5070 chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN, 5071 sizeof(struct mlx5_mapped_obj), 5072 MLX5E_TC_TABLE_CHAIN_TAG_MASK, true); 5073 5074 if (IS_ERR(chains_mapping)) { 5075 err = PTR_ERR(chains_mapping); 5076 goto err_mapping; 5077 } 5078 tc->mapping = chains_mapping; 5079 5080 err = mlx5e_tc_nic_create_miss_table(priv); 5081 if (err) 5082 goto err_chains; 5083 5084 if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) 5085 attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED | 5086 MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED; 5087 attr.ns = MLX5_FLOW_NAMESPACE_KERNEL; 5088 attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS; 5089 attr.default_ft = tc->miss_t; 5090 attr.mapping = chains_mapping; 5091 attr.fs_base_prio = MLX5E_TC_PRIO; 5092 5093 tc->chains = mlx5_chains_create(dev, &attr); 5094 if (IS_ERR(tc->chains)) { 5095 err = PTR_ERR(tc->chains); 5096 goto err_miss; 5097 } 5098 5099 mlx5_chains_print_info(tc->chains); 5100 5101 tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL); 5102 tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr, 5103 MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act); 5104 5105 tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; 5106 err = register_netdevice_notifier_dev_net(priv->netdev, 5107 &tc->netdevice_nb, 5108 &tc->netdevice_nn); 5109 if (err) { 5110 tc->netdevice_nb.notifier_call = NULL; 5111 mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n"); 5112 goto err_reg; 5113 } 5114 5115 mlx5e_tc_debugfs_init(tc, mlx5e_fs_get_debugfs_root(priv->fs)); 5116 5117 tc->action_stats_handle = mlx5e_tc_act_stats_create(); 5118 if (IS_ERR(tc->action_stats_handle)) { 5119 err = PTR_ERR(tc->action_stats_handle); 5120 goto err_act_stats; 5121 } 5122 5123 return 0; 5124 5125 err_act_stats: 5126 unregister_netdevice_notifier_dev_net(priv->netdev, 5127 &tc->netdevice_nb, 5128 &tc->netdevice_nn); 5129 err_reg: 5130 mlx5_tc_ct_clean(tc->ct); 5131 mlx5e_tc_post_act_destroy(tc->post_act); 5132 mlx5_chains_destroy(tc->chains); 5133 err_miss: 5134 mlx5e_tc_nic_destroy_miss_table(priv); 5135 err_chains: 5136 mapping_destroy(chains_mapping); 5137 err_mapping: 5138 rhashtable_destroy(&tc->ht); 5139 return err; 5140 } 5141 5142 static void _mlx5e_tc_del_flow(void *ptr, void *arg) 5143 { 5144 struct mlx5e_tc_flow *flow = ptr; 5145 struct mlx5e_priv *priv = flow->priv; 5146 5147 mlx5e_tc_del_flow(priv, flow); 5148 kfree(flow); 5149 } 5150 5151 void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) 5152 { 5153 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 5154 5155 debugfs_remove_recursive(tc->dfs_root); 5156 5157 if (tc->netdevice_nb.notifier_call) 5158 unregister_netdevice_notifier_dev_net(priv->netdev, 5159 &tc->netdevice_nb, 5160 &tc->netdevice_nn); 5161 5162 mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr); 5163 mutex_destroy(&tc->hairpin_tbl_lock); 5164 5165 rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL); 5166 5167 if (!IS_ERR_OR_NULL(tc->t)) { 5168 mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL); 5169 tc->t = NULL; 5170 } 5171 mutex_destroy(&tc->t_lock); 5172 5173 mlx5_tc_ct_clean(tc->ct); 5174 mlx5e_tc_post_act_destroy(tc->post_act); 5175 mapping_destroy(tc->mapping); 5176 mlx5_chains_destroy(tc->chains); 5177 mlx5e_tc_nic_destroy_miss_table(priv); 5178 mlx5e_tc_act_stats_free(tc->action_stats_handle); 5179 } 5180 5181 int mlx5e_tc_ht_init(struct rhashtable *tc_ht) 5182 { 5183 int err; 5184 5185 err = rhashtable_init(tc_ht, &tc_ht_params); 5186 if (err) 5187 return err; 5188 5189 lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key); 5190 lockdep_init_map(&tc_ht->run_work.lockdep_map, "tc_ht_wq_key", &tc_ht_wq_key, 0); 5191 5192 return 0; 5193 } 5194 5195 void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht) 5196 { 5197 rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); 5198 } 5199 5200 int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv) 5201 { 5202 const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts); 5203 struct mlx5e_rep_priv *rpriv; 5204 struct mapping_ctx *mapping; 5205 struct mlx5_eswitch *esw; 5206 struct mlx5e_priv *priv; 5207 u64 mapping_id; 5208 int err = 0; 5209 5210 rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); 5211 priv = netdev_priv(rpriv->netdev); 5212 esw = priv->mdev->priv.eswitch; 5213 5214 uplink_priv->post_act = mlx5e_tc_post_act_init(priv, esw_chains(esw), 5215 MLX5_FLOW_NAMESPACE_FDB); 5216 uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev), 5217 esw_chains(esw), 5218 &esw->offloads.mod_hdr, 5219 MLX5_FLOW_NAMESPACE_FDB, 5220 uplink_priv->post_act); 5221 5222 uplink_priv->int_port_priv = mlx5e_tc_int_port_init(netdev_priv(priv->netdev)); 5223 5224 uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act); 5225 5226 mapping_id = mlx5_query_nic_system_image_guid(esw->dev); 5227 5228 mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL, 5229 sizeof(struct tunnel_match_key), 5230 TUNNEL_INFO_BITS_MASK, true); 5231 5232 if (IS_ERR(mapping)) { 5233 err = PTR_ERR(mapping); 5234 goto err_tun_mapping; 5235 } 5236 uplink_priv->tunnel_mapping = mapping; 5237 5238 /* Two last values are reserved for stack devices slow path table mark 5239 * and bridge ingress push mark. 5240 */ 5241 mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS, 5242 sz_enc_opts, ENC_OPTS_BITS_MASK - 2, true); 5243 if (IS_ERR(mapping)) { 5244 err = PTR_ERR(mapping); 5245 goto err_enc_opts_mapping; 5246 } 5247 uplink_priv->tunnel_enc_opts_mapping = mapping; 5248 5249 uplink_priv->encap = mlx5e_tc_tun_init(priv); 5250 if (IS_ERR(uplink_priv->encap)) { 5251 err = PTR_ERR(uplink_priv->encap); 5252 goto err_register_fib_notifier; 5253 } 5254 5255 uplink_priv->action_stats_handle = mlx5e_tc_act_stats_create(); 5256 if (IS_ERR(uplink_priv->action_stats_handle)) { 5257 err = PTR_ERR(uplink_priv->action_stats_handle); 5258 goto err_action_counter; 5259 } 5260 5261 mlx5_esw_offloads_devcom_init(esw); 5262 5263 return 0; 5264 5265 err_action_counter: 5266 mlx5e_tc_tun_cleanup(uplink_priv->encap); 5267 err_register_fib_notifier: 5268 mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); 5269 err_enc_opts_mapping: 5270 mapping_destroy(uplink_priv->tunnel_mapping); 5271 err_tun_mapping: 5272 mlx5e_tc_sample_cleanup(uplink_priv->tc_psample); 5273 mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv); 5274 mlx5_tc_ct_clean(uplink_priv->ct_priv); 5275 netdev_warn(priv->netdev, 5276 "Failed to initialize tc (eswitch), err: %d", err); 5277 mlx5e_tc_post_act_destroy(uplink_priv->post_act); 5278 return err; 5279 } 5280 5281 void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv) 5282 { 5283 struct mlx5e_rep_priv *rpriv; 5284 struct mlx5_eswitch *esw; 5285 struct mlx5e_priv *priv; 5286 5287 rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); 5288 priv = netdev_priv(rpriv->netdev); 5289 esw = priv->mdev->priv.eswitch; 5290 5291 mlx5_esw_offloads_devcom_cleanup(esw); 5292 5293 mlx5e_tc_tun_cleanup(uplink_priv->encap); 5294 5295 mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); 5296 mapping_destroy(uplink_priv->tunnel_mapping); 5297 5298 mlx5e_tc_sample_cleanup(uplink_priv->tc_psample); 5299 mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv); 5300 mlx5_tc_ct_clean(uplink_priv->ct_priv); 5301 mlx5e_flow_meters_cleanup(uplink_priv->flow_meters); 5302 mlx5e_tc_post_act_destroy(uplink_priv->post_act); 5303 mlx5e_tc_act_stats_free(uplink_priv->action_stats_handle); 5304 } 5305 5306 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) 5307 { 5308 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 5309 5310 return atomic_read(&tc_ht->nelems); 5311 } 5312 5313 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw) 5314 { 5315 struct mlx5e_tc_flow *flow, *tmp; 5316 int i; 5317 5318 for (i = 0; i < MLX5_MAX_PORTS; i++) { 5319 if (i == mlx5_get_dev_index(esw->dev)) 5320 continue; 5321 list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows[i], peer[i]) 5322 mlx5e_tc_del_fdb_peers_flow(flow); 5323 } 5324 } 5325 5326 void mlx5e_tc_reoffload_flows_work(struct work_struct *work) 5327 { 5328 struct mlx5_rep_uplink_priv *rpriv = 5329 container_of(work, struct mlx5_rep_uplink_priv, 5330 reoffload_flows_work); 5331 struct mlx5e_tc_flow *flow, *tmp; 5332 5333 mutex_lock(&rpriv->unready_flows_lock); 5334 list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) { 5335 if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL)) 5336 unready_flow_del(flow); 5337 } 5338 mutex_unlock(&rpriv->unready_flows_lock); 5339 } 5340 5341 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, 5342 struct flow_cls_offload *cls_flower, 5343 unsigned long flags) 5344 { 5345 switch (cls_flower->command) { 5346 case FLOW_CLS_REPLACE: 5347 return mlx5e_configure_flower(priv->netdev, priv, cls_flower, 5348 flags); 5349 case FLOW_CLS_DESTROY: 5350 return mlx5e_delete_flower(priv->netdev, priv, cls_flower, 5351 flags); 5352 case FLOW_CLS_STATS: 5353 return mlx5e_stats_flower(priv->netdev, priv, cls_flower, 5354 flags); 5355 default: 5356 return -EOPNOTSUPP; 5357 } 5358 } 5359 5360 int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, 5361 void *cb_priv) 5362 { 5363 unsigned long flags = MLX5_TC_FLAG(INGRESS); 5364 struct mlx5e_priv *priv = cb_priv; 5365 5366 if (!priv->netdev || !netif_device_present(priv->netdev)) 5367 return -EOPNOTSUPP; 5368 5369 if (mlx5e_is_uplink_rep(priv)) 5370 flags |= MLX5_TC_FLAG(ESW_OFFLOAD); 5371 else 5372 flags |= MLX5_TC_FLAG(NIC_OFFLOAD); 5373 5374 switch (type) { 5375 case TC_SETUP_CLSFLOWER: 5376 return mlx5e_setup_tc_cls_flower(priv, type_data, flags); 5377 default: 5378 return -EOPNOTSUPP; 5379 } 5380 } 5381 5382 static bool mlx5e_tc_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, 5383 struct mlx5e_tc_update_priv *tc_priv, 5384 u32 tunnel_id) 5385 { 5386 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 5387 struct tunnel_match_enc_opts enc_opts = {}; 5388 struct mlx5_rep_uplink_priv *uplink_priv; 5389 struct mlx5e_rep_priv *uplink_rpriv; 5390 struct metadata_dst *tun_dst; 5391 struct tunnel_match_key key; 5392 u32 tun_id, enc_opts_id; 5393 struct net_device *dev; 5394 int err; 5395 5396 enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK; 5397 tun_id = tunnel_id >> ENC_OPTS_BITS; 5398 5399 if (!tun_id) 5400 return true; 5401 5402 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 5403 uplink_priv = &uplink_rpriv->uplink_priv; 5404 5405 err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key); 5406 if (err) { 5407 netdev_dbg(priv->netdev, 5408 "Couldn't find tunnel for tun_id: %d, err: %d\n", 5409 tun_id, err); 5410 return false; 5411 } 5412 5413 if (enc_opts_id) { 5414 err = mapping_find(uplink_priv->tunnel_enc_opts_mapping, 5415 enc_opts_id, &enc_opts); 5416 if (err) { 5417 netdev_dbg(priv->netdev, 5418 "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n", 5419 enc_opts_id, err); 5420 return false; 5421 } 5422 } 5423 5424 switch (key.enc_control.addr_type) { 5425 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 5426 tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst, 5427 key.enc_ip.tos, key.enc_ip.ttl, 5428 key.enc_tp.dst, TUNNEL_KEY, 5429 key32_to_tunnel_id(key.enc_key_id.keyid), 5430 enc_opts.key.len); 5431 break; 5432 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 5433 tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst, 5434 key.enc_ip.tos, key.enc_ip.ttl, 5435 key.enc_tp.dst, 0, TUNNEL_KEY, 5436 key32_to_tunnel_id(key.enc_key_id.keyid), 5437 enc_opts.key.len); 5438 break; 5439 default: 5440 netdev_dbg(priv->netdev, 5441 "Couldn't restore tunnel, unsupported addr_type: %d\n", 5442 key.enc_control.addr_type); 5443 return false; 5444 } 5445 5446 if (!tun_dst) { 5447 netdev_dbg(priv->netdev, "Couldn't restore tunnel, no tun_dst\n"); 5448 return false; 5449 } 5450 5451 tun_dst->u.tun_info.key.tp_src = key.enc_tp.src; 5452 5453 if (enc_opts.key.len) 5454 ip_tunnel_info_opts_set(&tun_dst->u.tun_info, 5455 enc_opts.key.data, 5456 enc_opts.key.len, 5457 enc_opts.key.dst_opt_type); 5458 5459 skb_dst_set(skb, (struct dst_entry *)tun_dst); 5460 dev = dev_get_by_index(&init_net, key.filter_ifindex); 5461 if (!dev) { 5462 netdev_dbg(priv->netdev, 5463 "Couldn't find tunnel device with ifindex: %d\n", 5464 key.filter_ifindex); 5465 return false; 5466 } 5467 5468 /* Set fwd_dev so we do dev_put() after datapath */ 5469 tc_priv->fwd_dev = dev; 5470 5471 skb->dev = dev; 5472 5473 return true; 5474 } 5475 5476 static bool mlx5e_tc_restore_skb_tc_meta(struct sk_buff *skb, struct mlx5_tc_ct_priv *ct_priv, 5477 struct mlx5_mapped_obj *mapped_obj, u32 zone_restore_id, 5478 u32 tunnel_id, struct mlx5e_tc_update_priv *tc_priv) 5479 { 5480 struct mlx5e_priv *priv = netdev_priv(skb->dev); 5481 struct tc_skb_ext *tc_skb_ext; 5482 u64 act_miss_cookie; 5483 u32 chain; 5484 5485 chain = mapped_obj->type == MLX5_MAPPED_OBJ_CHAIN ? mapped_obj->chain : 0; 5486 act_miss_cookie = mapped_obj->type == MLX5_MAPPED_OBJ_ACT_MISS ? 5487 mapped_obj->act_miss_cookie : 0; 5488 if (chain || act_miss_cookie) { 5489 if (!mlx5e_tc_ct_restore_flow(ct_priv, skb, zone_restore_id)) 5490 return false; 5491 5492 tc_skb_ext = tc_skb_ext_alloc(skb); 5493 if (!tc_skb_ext) { 5494 WARN_ON(1); 5495 return false; 5496 } 5497 5498 if (act_miss_cookie) { 5499 tc_skb_ext->act_miss_cookie = act_miss_cookie; 5500 tc_skb_ext->act_miss = 1; 5501 } else { 5502 tc_skb_ext->chain = chain; 5503 } 5504 } 5505 5506 if (tc_priv) 5507 return mlx5e_tc_restore_tunnel(priv, skb, tc_priv, tunnel_id); 5508 5509 return true; 5510 } 5511 5512 static void mlx5e_tc_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb, 5513 struct mlx5_mapped_obj *mapped_obj, 5514 struct mlx5e_tc_update_priv *tc_priv) 5515 { 5516 if (!mlx5e_tc_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) { 5517 netdev_dbg(priv->netdev, 5518 "Failed to restore tunnel info for sampled packet\n"); 5519 return; 5520 } 5521 mlx5e_tc_sample_skb(skb, mapped_obj); 5522 } 5523 5524 static bool mlx5e_tc_restore_skb_int_port(struct mlx5e_priv *priv, struct sk_buff *skb, 5525 struct mlx5_mapped_obj *mapped_obj, 5526 struct mlx5e_tc_update_priv *tc_priv, 5527 u32 tunnel_id) 5528 { 5529 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 5530 struct mlx5_rep_uplink_priv *uplink_priv; 5531 struct mlx5e_rep_priv *uplink_rpriv; 5532 bool forward_tx = false; 5533 5534 /* Tunnel restore takes precedence over int port restore */ 5535 if (tunnel_id) 5536 return mlx5e_tc_restore_tunnel(priv, skb, tc_priv, tunnel_id); 5537 5538 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 5539 uplink_priv = &uplink_rpriv->uplink_priv; 5540 5541 if (mlx5e_tc_int_port_dev_fwd(uplink_priv->int_port_priv, skb, 5542 mapped_obj->int_port_metadata, &forward_tx)) { 5543 /* Set fwd_dev for future dev_put */ 5544 tc_priv->fwd_dev = skb->dev; 5545 tc_priv->forward_tx = forward_tx; 5546 5547 return true; 5548 } 5549 5550 return false; 5551 } 5552 5553 bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb, 5554 struct mapping_ctx *mapping_ctx, u32 mapped_obj_id, 5555 struct mlx5_tc_ct_priv *ct_priv, 5556 u32 zone_restore_id, u32 tunnel_id, 5557 struct mlx5e_tc_update_priv *tc_priv) 5558 { 5559 struct mlx5e_priv *priv = netdev_priv(skb->dev); 5560 struct mlx5_mapped_obj mapped_obj; 5561 int err; 5562 5563 err = mapping_find(mapping_ctx, mapped_obj_id, &mapped_obj); 5564 if (err) { 5565 netdev_dbg(skb->dev, 5566 "Couldn't find mapped object for mapped_obj_id: %d, err: %d\n", 5567 mapped_obj_id, err); 5568 return false; 5569 } 5570 5571 switch (mapped_obj.type) { 5572 case MLX5_MAPPED_OBJ_CHAIN: 5573 case MLX5_MAPPED_OBJ_ACT_MISS: 5574 return mlx5e_tc_restore_skb_tc_meta(skb, ct_priv, &mapped_obj, zone_restore_id, 5575 tunnel_id, tc_priv); 5576 case MLX5_MAPPED_OBJ_SAMPLE: 5577 mlx5e_tc_restore_skb_sample(priv, skb, &mapped_obj, tc_priv); 5578 tc_priv->skb_done = true; 5579 return true; 5580 case MLX5_MAPPED_OBJ_INT_PORT_METADATA: 5581 return mlx5e_tc_restore_skb_int_port(priv, skb, &mapped_obj, tc_priv, tunnel_id); 5582 default: 5583 netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type); 5584 return false; 5585 } 5586 5587 return false; 5588 } 5589 5590 bool mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb) 5591 { 5592 struct mlx5e_priv *priv = netdev_priv(skb->dev); 5593 u32 mapped_obj_id, reg_b, zone_restore_id; 5594 struct mlx5_tc_ct_priv *ct_priv; 5595 struct mapping_ctx *mapping_ctx; 5596 struct mlx5e_tc_table *tc; 5597 5598 reg_b = be32_to_cpu(cqe->ft_metadata); 5599 tc = mlx5e_fs_get_tc(priv->fs); 5600 mapped_obj_id = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK; 5601 zone_restore_id = (reg_b >> MLX5_REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) & 5602 ESW_ZONE_ID_MASK; 5603 ct_priv = tc->ct; 5604 mapping_ctx = tc->mapping; 5605 5606 return mlx5e_tc_update_skb(cqe, skb, mapping_ctx, mapped_obj_id, ct_priv, zone_restore_id, 5607 0, NULL); 5608 } 5609 5610 static struct mapping_ctx * 5611 mlx5e_get_priv_obj_mapping(struct mlx5e_priv *priv) 5612 { 5613 struct mlx5e_tc_table *tc; 5614 struct mlx5_eswitch *esw; 5615 struct mapping_ctx *ctx; 5616 5617 if (is_mdev_switchdev_mode(priv->mdev)) { 5618 esw = priv->mdev->priv.eswitch; 5619 ctx = esw->offloads.reg_c0_obj_pool; 5620 } else { 5621 tc = mlx5e_fs_get_tc(priv->fs); 5622 ctx = tc->mapping; 5623 } 5624 5625 return ctx; 5626 } 5627 5628 int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, 5629 u64 act_miss_cookie, u32 *act_miss_mapping) 5630 { 5631 struct mlx5_mapped_obj mapped_obj = {}; 5632 struct mlx5_eswitch *esw; 5633 struct mapping_ctx *ctx; 5634 int err; 5635 5636 ctx = mlx5e_get_priv_obj_mapping(priv); 5637 mapped_obj.type = MLX5_MAPPED_OBJ_ACT_MISS; 5638 mapped_obj.act_miss_cookie = act_miss_cookie; 5639 err = mapping_add(ctx, &mapped_obj, act_miss_mapping); 5640 if (err) 5641 return err; 5642 5643 if (!is_mdev_switchdev_mode(priv->mdev)) 5644 return 0; 5645 5646 esw = priv->mdev->priv.eswitch; 5647 attr->act_id_restore_rule = esw_add_restore_rule(esw, *act_miss_mapping); 5648 if (IS_ERR(attr->act_id_restore_rule)) 5649 goto err_rule; 5650 5651 return 0; 5652 5653 err_rule: 5654 mapping_remove(ctx, *act_miss_mapping); 5655 return err; 5656 } 5657 5658 void mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, 5659 u32 act_miss_mapping) 5660 { 5661 struct mapping_ctx *ctx = mlx5e_get_priv_obj_mapping(priv); 5662 5663 if (is_mdev_switchdev_mode(priv->mdev)) 5664 mlx5_del_flow_rules(attr->act_id_restore_rule); 5665 mapping_remove(ctx, act_miss_mapping); 5666 } 5667