1 /* 2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <net/flow_dissector.h> 34 #include <net/flow_offload.h> 35 #include <net/sch_generic.h> 36 #include <net/pkt_cls.h> 37 #include <linux/mlx5/fs.h> 38 #include <linux/mlx5/device.h> 39 #include <linux/rhashtable.h> 40 #include <linux/refcount.h> 41 #include <linux/completion.h> 42 #include <net/arp.h> 43 #include <net/ipv6_stubs.h> 44 #include <net/bareudp.h> 45 #include <net/bonding.h> 46 #include <net/dst_metadata.h> 47 #include "devlink.h" 48 #include "en.h" 49 #include "en/tc/post_act.h" 50 #include "en/tc/act_stats.h" 51 #include "en_rep.h" 52 #include "en/rep/tc.h" 53 #include "en/rep/neigh.h" 54 #include "en_tc.h" 55 #include "eswitch.h" 56 #include "fs_core.h" 57 #include "en/port.h" 58 #include "en/tc_tun.h" 59 #include "en/mapping.h" 60 #include "en/tc_ct.h" 61 #include "en/mod_hdr.h" 62 #include "en/tc_tun_encap.h" 63 #include "en/tc/sample.h" 64 #include "en/tc/act/act.h" 65 #include "en/tc/post_meter.h" 66 #include "lib/devcom.h" 67 #include "lib/geneve.h" 68 #include "lib/fs_chains.h" 69 #include "diag/en_tc_tracepoint.h" 70 #include <asm/div64.h> 71 #include "lag/lag.h" 72 #include "lag/mp.h" 73 74 #define MLX5E_TC_TABLE_NUM_GROUPS 4 75 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18) 76 77 struct mlx5e_tc_table { 78 /* Protects the dynamic assignment of the t parameter 79 * which is the nic tc root table. 80 */ 81 struct mutex t_lock; 82 struct mlx5e_priv *priv; 83 struct mlx5_flow_table *t; 84 struct mlx5_flow_table *miss_t; 85 struct mlx5_fs_chains *chains; 86 struct mlx5e_post_act *post_act; 87 88 struct rhashtable ht; 89 90 struct mod_hdr_tbl mod_hdr; 91 struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */ 92 DECLARE_HASHTABLE(hairpin_tbl, 8); 93 94 struct notifier_block netdevice_nb; 95 struct netdev_net_notifier netdevice_nn; 96 97 struct mlx5_tc_ct_priv *ct; 98 struct mapping_ctx *mapping; 99 struct dentry *dfs_root; 100 101 /* tc action stats */ 102 struct mlx5e_tc_act_stats_handle *action_stats_handle; 103 }; 104 105 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { 106 [MAPPED_OBJ_TO_REG] = { 107 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, 108 .moffset = 0, 109 .mlen = 16, 110 }, 111 [VPORT_TO_REG] = { 112 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, 113 .moffset = 16, 114 .mlen = 16, 115 }, 116 [TUNNEL_TO_REG] = { 117 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1, 118 .moffset = 8, 119 .mlen = ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS, 120 .soffset = MLX5_BYTE_OFF(fte_match_param, 121 misc_parameters_2.metadata_reg_c_1), 122 }, 123 [ZONE_TO_REG] = zone_to_reg_ct, 124 [ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct, 125 [CTSTATE_TO_REG] = ctstate_to_reg_ct, 126 [MARK_TO_REG] = mark_to_reg_ct, 127 [LABELS_TO_REG] = labels_to_reg_ct, 128 [FTEID_TO_REG] = fteid_to_reg_ct, 129 /* For NIC rules we store the restore metadata directly 130 * into reg_b that is passed to SW since we don't 131 * jump between steering domains. 132 */ 133 [NIC_MAPPED_OBJ_TO_REG] = { 134 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B, 135 .moffset = 0, 136 .mlen = 16, 137 }, 138 [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct, 139 [PACKET_COLOR_TO_REG] = packet_color_to_reg, 140 }; 141 142 struct mlx5e_tc_jump_state { 143 u32 jump_count; 144 bool jump_target; 145 struct mlx5_flow_attr *jumping_attr; 146 147 enum flow_action_id last_id; 148 u32 last_index; 149 }; 150 151 struct mlx5e_tc_table *mlx5e_tc_table_alloc(void) 152 { 153 struct mlx5e_tc_table *tc; 154 155 tc = kvzalloc(sizeof(*tc), GFP_KERNEL); 156 return tc ? tc : ERR_PTR(-ENOMEM); 157 } 158 159 void mlx5e_tc_table_free(struct mlx5e_tc_table *tc) 160 { 161 kvfree(tc); 162 } 163 164 struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc) 165 { 166 return tc->chains; 167 } 168 169 /* To avoid false lock dependency warning set the tc_ht lock 170 * class different than the lock class of the ht being used when deleting 171 * last flow from a group and then deleting a group, we get into del_sw_flow_group() 172 * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but 173 * it's different than the ht->mutex here. 174 */ 175 static struct lock_class_key tc_ht_lock_key; 176 static struct lock_class_key tc_ht_wq_key; 177 178 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow); 179 static void free_flow_post_acts(struct mlx5e_tc_flow *flow); 180 static void mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, 181 struct mlx5_flow_attr *attr); 182 183 void 184 mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, 185 enum mlx5e_tc_attr_to_reg type, 186 u32 val, 187 u32 mask) 188 { 189 void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval; 190 int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset; 191 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; 192 int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen; 193 u32 max_mask = GENMASK(match_len - 1, 0); 194 __be32 curr_mask_be, curr_val_be; 195 u32 curr_mask, curr_val; 196 197 fmask = headers_c + soffset; 198 fval = headers_v + soffset; 199 200 memcpy(&curr_mask_be, fmask, 4); 201 memcpy(&curr_val_be, fval, 4); 202 203 curr_mask = be32_to_cpu(curr_mask_be); 204 curr_val = be32_to_cpu(curr_val_be); 205 206 //move to correct offset 207 WARN_ON(mask > max_mask); 208 mask <<= moffset; 209 val <<= moffset; 210 max_mask <<= moffset; 211 212 //zero val and mask 213 curr_mask &= ~max_mask; 214 curr_val &= ~max_mask; 215 216 //add current to mask 217 curr_mask |= mask; 218 curr_val |= val; 219 220 //back to be32 and write 221 curr_mask_be = cpu_to_be32(curr_mask); 222 curr_val_be = cpu_to_be32(curr_val); 223 224 memcpy(fmask, &curr_mask_be, 4); 225 memcpy(fval, &curr_val_be, 4); 226 227 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; 228 } 229 230 void 231 mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec, 232 enum mlx5e_tc_attr_to_reg type, 233 u32 *val, 234 u32 *mask) 235 { 236 void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval; 237 int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset; 238 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; 239 int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen; 240 u32 max_mask = GENMASK(match_len - 1, 0); 241 __be32 curr_mask_be, curr_val_be; 242 u32 curr_mask, curr_val; 243 244 fmask = headers_c + soffset; 245 fval = headers_v + soffset; 246 247 memcpy(&curr_mask_be, fmask, 4); 248 memcpy(&curr_val_be, fval, 4); 249 250 curr_mask = be32_to_cpu(curr_mask_be); 251 curr_val = be32_to_cpu(curr_val_be); 252 253 *mask = (curr_mask >> moffset) & max_mask; 254 *val = (curr_val >> moffset) & max_mask; 255 } 256 257 int 258 mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev, 259 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 260 enum mlx5_flow_namespace_type ns, 261 enum mlx5e_tc_attr_to_reg type, 262 u32 data) 263 { 264 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; 265 int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield; 266 int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen; 267 char *modact; 268 int err; 269 270 modact = mlx5e_mod_hdr_alloc(mdev, ns, mod_hdr_acts); 271 if (IS_ERR(modact)) 272 return PTR_ERR(modact); 273 274 /* Firmware has 5bit length field and 0 means 32bits */ 275 if (mlen == 32) 276 mlen = 0; 277 278 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); 279 MLX5_SET(set_action_in, modact, field, mfield); 280 MLX5_SET(set_action_in, modact, offset, moffset); 281 MLX5_SET(set_action_in, modact, length, mlen); 282 MLX5_SET(set_action_in, modact, data, data); 283 err = mod_hdr_acts->num_actions; 284 mod_hdr_acts->num_actions++; 285 286 return err; 287 } 288 289 static struct mlx5e_tc_act_stats_handle * 290 get_act_stats_handle(struct mlx5e_priv *priv) 291 { 292 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 293 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 294 struct mlx5_rep_uplink_priv *uplink_priv; 295 struct mlx5e_rep_priv *uplink_rpriv; 296 297 if (is_mdev_switchdev_mode(priv->mdev)) { 298 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 299 uplink_priv = &uplink_rpriv->uplink_priv; 300 301 return uplink_priv->action_stats_handle; 302 } 303 304 return tc->action_stats_handle; 305 } 306 307 struct mlx5e_tc_int_port_priv * 308 mlx5e_get_int_port_priv(struct mlx5e_priv *priv) 309 { 310 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 311 struct mlx5_rep_uplink_priv *uplink_priv; 312 struct mlx5e_rep_priv *uplink_rpriv; 313 314 if (is_mdev_switchdev_mode(priv->mdev)) { 315 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 316 uplink_priv = &uplink_rpriv->uplink_priv; 317 318 return uplink_priv->int_port_priv; 319 } 320 321 return NULL; 322 } 323 324 struct mlx5e_flow_meters * 325 mlx5e_get_flow_meters(struct mlx5_core_dev *dev) 326 { 327 struct mlx5_eswitch *esw = dev->priv.eswitch; 328 struct mlx5_rep_uplink_priv *uplink_priv; 329 struct mlx5e_rep_priv *uplink_rpriv; 330 struct mlx5e_priv *priv; 331 332 if (is_mdev_switchdev_mode(dev)) { 333 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 334 uplink_priv = &uplink_rpriv->uplink_priv; 335 priv = netdev_priv(uplink_rpriv->netdev); 336 if (!uplink_priv->flow_meters) 337 uplink_priv->flow_meters = 338 mlx5e_flow_meters_init(priv, 339 MLX5_FLOW_NAMESPACE_FDB, 340 uplink_priv->post_act); 341 if (!IS_ERR(uplink_priv->flow_meters)) 342 return uplink_priv->flow_meters; 343 } 344 345 return NULL; 346 } 347 348 static struct mlx5_tc_ct_priv * 349 get_ct_priv(struct mlx5e_priv *priv) 350 { 351 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 352 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 353 struct mlx5_rep_uplink_priv *uplink_priv; 354 struct mlx5e_rep_priv *uplink_rpriv; 355 356 if (is_mdev_switchdev_mode(priv->mdev)) { 357 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 358 uplink_priv = &uplink_rpriv->uplink_priv; 359 360 return uplink_priv->ct_priv; 361 } 362 363 return tc->ct; 364 } 365 366 static struct mlx5e_tc_psample * 367 get_sample_priv(struct mlx5e_priv *priv) 368 { 369 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 370 struct mlx5_rep_uplink_priv *uplink_priv; 371 struct mlx5e_rep_priv *uplink_rpriv; 372 373 if (is_mdev_switchdev_mode(priv->mdev)) { 374 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 375 uplink_priv = &uplink_rpriv->uplink_priv; 376 377 return uplink_priv->tc_psample; 378 } 379 380 return NULL; 381 } 382 383 static struct mlx5e_post_act * 384 get_post_action(struct mlx5e_priv *priv) 385 { 386 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 387 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 388 struct mlx5_rep_uplink_priv *uplink_priv; 389 struct mlx5e_rep_priv *uplink_rpriv; 390 391 if (is_mdev_switchdev_mode(priv->mdev)) { 392 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 393 uplink_priv = &uplink_rpriv->uplink_priv; 394 395 return uplink_priv->post_act; 396 } 397 398 return tc->post_act; 399 } 400 401 struct mlx5_flow_handle * 402 mlx5_tc_rule_insert(struct mlx5e_priv *priv, 403 struct mlx5_flow_spec *spec, 404 struct mlx5_flow_attr *attr) 405 { 406 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 407 408 if (is_mdev_switchdev_mode(priv->mdev)) 409 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); 410 411 return mlx5e_add_offloaded_nic_rule(priv, spec, attr); 412 } 413 414 void 415 mlx5_tc_rule_delete(struct mlx5e_priv *priv, 416 struct mlx5_flow_handle *rule, 417 struct mlx5_flow_attr *attr) 418 { 419 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 420 421 if (is_mdev_switchdev_mode(priv->mdev)) { 422 mlx5_eswitch_del_offloaded_rule(esw, rule, attr); 423 return; 424 } 425 426 mlx5e_del_offloaded_nic_rule(priv, rule, attr); 427 } 428 429 static bool 430 is_flow_meter_action(struct mlx5_flow_attr *attr) 431 { 432 return (((attr->action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) && 433 (attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER)) || 434 attr->flags & MLX5_ATTR_FLAG_MTU); 435 } 436 437 static int 438 mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv, 439 struct mlx5_flow_attr *attr) 440 { 441 struct mlx5e_post_act *post_act = get_post_action(priv); 442 struct mlx5e_post_meter_priv *post_meter; 443 enum mlx5_flow_namespace_type ns_type; 444 struct mlx5e_flow_meter_handle *meter; 445 enum mlx5e_post_meter_type type; 446 447 meter = mlx5e_tc_meter_replace(priv->mdev, &attr->meter_attr.params); 448 if (IS_ERR(meter)) { 449 mlx5_core_err(priv->mdev, "Failed to get flow meter\n"); 450 return PTR_ERR(meter); 451 } 452 453 ns_type = mlx5e_tc_meter_get_namespace(meter->flow_meters); 454 type = meter->params.mtu ? MLX5E_POST_METER_MTU : MLX5E_POST_METER_RATE; 455 post_meter = mlx5e_post_meter_init(priv, ns_type, post_act, 456 type, 457 meter->act_counter, meter->drop_counter, 458 attr->branch_true, attr->branch_false); 459 if (IS_ERR(post_meter)) { 460 mlx5_core_err(priv->mdev, "Failed to init post meter\n"); 461 goto err_meter_init; 462 } 463 464 attr->meter_attr.meter = meter; 465 attr->meter_attr.post_meter = post_meter; 466 attr->dest_ft = mlx5e_post_meter_get_ft(post_meter); 467 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 468 469 return 0; 470 471 err_meter_init: 472 mlx5e_tc_meter_put(meter); 473 return PTR_ERR(post_meter); 474 } 475 476 static void 477 mlx5e_tc_del_flow_meter(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr) 478 { 479 mlx5e_post_meter_cleanup(esw, attr->meter_attr.post_meter); 480 mlx5e_tc_meter_put(attr->meter_attr.meter); 481 } 482 483 struct mlx5_flow_handle * 484 mlx5e_tc_rule_offload(struct mlx5e_priv *priv, 485 struct mlx5_flow_spec *spec, 486 struct mlx5_flow_attr *attr) 487 { 488 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 489 int err; 490 491 if (!is_mdev_switchdev_mode(priv->mdev)) 492 return mlx5e_add_offloaded_nic_rule(priv, spec, attr); 493 494 if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) 495 return mlx5e_tc_sample_offload(get_sample_priv(priv), spec, attr); 496 497 if (is_flow_meter_action(attr)) { 498 err = mlx5e_tc_add_flow_meter(priv, attr); 499 if (err) 500 return ERR_PTR(err); 501 } 502 503 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); 504 } 505 506 void 507 mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv, 508 struct mlx5_flow_handle *rule, 509 struct mlx5_flow_attr *attr) 510 { 511 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 512 513 if (!is_mdev_switchdev_mode(priv->mdev)) { 514 mlx5e_del_offloaded_nic_rule(priv, rule, attr); 515 return; 516 } 517 518 if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) { 519 mlx5e_tc_sample_unoffload(get_sample_priv(priv), rule, attr); 520 return; 521 } 522 523 mlx5_eswitch_del_offloaded_rule(esw, rule, attr); 524 525 if (attr->meter_attr.meter) 526 mlx5e_tc_del_flow_meter(esw, attr); 527 } 528 529 int 530 mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, 531 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 532 enum mlx5_flow_namespace_type ns, 533 enum mlx5e_tc_attr_to_reg type, 534 u32 data) 535 { 536 int ret = mlx5e_tc_match_to_reg_set_and_get_id(mdev, mod_hdr_acts, ns, type, data); 537 538 return ret < 0 ? ret : 0; 539 } 540 541 void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev, 542 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, 543 enum mlx5e_tc_attr_to_reg type, 544 int act_id, u32 data) 545 { 546 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; 547 int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield; 548 int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen; 549 char *modact; 550 551 modact = mlx5e_mod_hdr_get_item(mod_hdr_acts, act_id); 552 553 /* Firmware has 5bit length field and 0 means 32bits */ 554 if (mlen == 32) 555 mlen = 0; 556 557 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); 558 MLX5_SET(set_action_in, modact, field, mfield); 559 MLX5_SET(set_action_in, modact, offset, moffset); 560 MLX5_SET(set_action_in, modact, length, mlen); 561 MLX5_SET(set_action_in, modact, data, data); 562 } 563 564 struct mlx5e_hairpin { 565 struct mlx5_hairpin *pair; 566 567 struct mlx5_core_dev *func_mdev; 568 struct mlx5e_priv *func_priv; 569 u32 tdn; 570 struct mlx5e_tir direct_tir; 571 572 int num_channels; 573 u8 log_num_packets; 574 struct mlx5e_rqt indir_rqt; 575 struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; 576 struct mlx5_ttc_table *ttc; 577 }; 578 579 struct mlx5e_hairpin_entry { 580 /* a node of a hash table which keeps all the hairpin entries */ 581 struct hlist_node hairpin_hlist; 582 583 /* protects flows list */ 584 spinlock_t flows_lock; 585 /* flows sharing the same hairpin */ 586 struct list_head flows; 587 /* hpe's that were not fully initialized when dead peer update event 588 * function traversed them. 589 */ 590 struct list_head dead_peer_wait_list; 591 592 u16 peer_vhca_id; 593 u8 prio; 594 struct mlx5e_hairpin *hp; 595 refcount_t refcnt; 596 struct completion res_ready; 597 }; 598 599 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, 600 struct mlx5e_tc_flow *flow); 601 602 struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow) 603 { 604 if (!flow || !refcount_inc_not_zero(&flow->refcnt)) 605 return ERR_PTR(-EINVAL); 606 return flow; 607 } 608 609 void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) 610 { 611 if (refcount_dec_and_test(&flow->refcnt)) { 612 mlx5e_tc_del_flow(priv, flow); 613 kfree_rcu(flow, rcu_head); 614 } 615 } 616 617 bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow) 618 { 619 return flow_flag_test(flow, ESWITCH); 620 } 621 622 bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow) 623 { 624 return flow_flag_test(flow, FT); 625 } 626 627 bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow) 628 { 629 return flow_flag_test(flow, OFFLOADED); 630 } 631 632 int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow) 633 { 634 return mlx5e_is_eswitch_flow(flow) ? 635 MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL; 636 } 637 638 static struct mlx5_core_dev * 639 get_flow_counter_dev(struct mlx5e_tc_flow *flow) 640 { 641 return mlx5e_is_eswitch_flow(flow) ? flow->attr->esw_attr->counter_dev : flow->priv->mdev; 642 } 643 644 static struct mod_hdr_tbl * 645 get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) 646 { 647 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 648 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 649 650 return mlx5e_get_flow_namespace(flow) == MLX5_FLOW_NAMESPACE_FDB ? 651 &esw->offloads.mod_hdr : 652 &tc->mod_hdr; 653 } 654 655 int mlx5e_tc_attach_mod_hdr(struct mlx5e_priv *priv, 656 struct mlx5e_tc_flow *flow, 657 struct mlx5_flow_attr *attr) 658 { 659 struct mlx5e_mod_hdr_handle *mh; 660 661 mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow), 662 mlx5e_get_flow_namespace(flow), 663 &attr->parse_attr->mod_hdr_acts); 664 if (IS_ERR(mh)) 665 return PTR_ERR(mh); 666 667 WARN_ON(attr->modify_hdr); 668 attr->modify_hdr = mlx5e_mod_hdr_get(mh); 669 attr->mh = mh; 670 671 return 0; 672 } 673 674 void mlx5e_tc_detach_mod_hdr(struct mlx5e_priv *priv, 675 struct mlx5e_tc_flow *flow, 676 struct mlx5_flow_attr *attr) 677 { 678 /* flow wasn't fully initialized */ 679 if (!attr->mh) 680 return; 681 682 mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow), 683 attr->mh); 684 attr->mh = NULL; 685 } 686 687 static 688 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex) 689 { 690 struct mlx5_core_dev *mdev; 691 struct net_device *netdev; 692 struct mlx5e_priv *priv; 693 694 netdev = dev_get_by_index(net, ifindex); 695 if (!netdev) 696 return ERR_PTR(-ENODEV); 697 698 priv = netdev_priv(netdev); 699 mdev = priv->mdev; 700 dev_put(netdev); 701 702 /* Mirred tc action holds a refcount on the ifindex net_device (see 703 * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev 704 * after dev_put(netdev), while we're in the context of adding a tc flow. 705 * 706 * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then 707 * stored in a hairpin object, which exists until all flows, that refer to it, get 708 * removed. 709 * 710 * On the other hand, after a hairpin object has been created, the peer net_device may 711 * be removed/unbound while there are still some hairpin flows that are using it. This 712 * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to 713 * NETDEV_UNREGISTER event of the peer net_device. 714 */ 715 return mdev; 716 } 717 718 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp) 719 { 720 struct mlx5e_tir_builder *builder; 721 int err; 722 723 builder = mlx5e_tir_builder_alloc(false); 724 if (!builder) 725 return -ENOMEM; 726 727 err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn); 728 if (err) 729 goto out; 730 731 mlx5e_tir_builder_build_inline(builder, hp->tdn, hp->pair->rqn[0]); 732 err = mlx5e_tir_init(&hp->direct_tir, builder, hp->func_mdev, false); 733 if (err) 734 goto create_tir_err; 735 736 out: 737 mlx5e_tir_builder_free(builder); 738 return err; 739 740 create_tir_err: 741 mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); 742 743 goto out; 744 } 745 746 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp) 747 { 748 mlx5e_tir_destroy(&hp->direct_tir); 749 mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); 750 } 751 752 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) 753 { 754 struct mlx5e_priv *priv = hp->func_priv; 755 struct mlx5_core_dev *mdev = priv->mdev; 756 struct mlx5e_rss_params_indir *indir; 757 int err; 758 759 indir = kvmalloc(sizeof(*indir), GFP_KERNEL); 760 if (!indir) 761 return -ENOMEM; 762 763 mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels); 764 err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels, 765 mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc, 766 indir); 767 768 kvfree(indir); 769 return err; 770 } 771 772 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) 773 { 774 struct mlx5e_priv *priv = hp->func_priv; 775 struct mlx5e_rss_params_hash rss_hash; 776 enum mlx5_traffic_types tt, max_tt; 777 struct mlx5e_tir_builder *builder; 778 int err = 0; 779 780 builder = mlx5e_tir_builder_alloc(false); 781 if (!builder) 782 return -ENOMEM; 783 784 rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res); 785 786 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { 787 struct mlx5e_rss_params_traffic_type rss_tt; 788 789 rss_tt = mlx5e_rss_get_default_tt_config(tt); 790 791 mlx5e_tir_builder_build_rqt(builder, hp->tdn, 792 mlx5e_rqt_get_rqtn(&hp->indir_rqt), 793 false); 794 mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false); 795 796 err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false); 797 if (err) { 798 mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err); 799 goto err_destroy_tirs; 800 } 801 802 mlx5e_tir_builder_clear(builder); 803 } 804 805 out: 806 mlx5e_tir_builder_free(builder); 807 return err; 808 809 err_destroy_tirs: 810 max_tt = tt; 811 for (tt = 0; tt < max_tt; tt++) 812 mlx5e_tir_destroy(&hp->indir_tir[tt]); 813 814 goto out; 815 } 816 817 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp) 818 { 819 int tt; 820 821 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) 822 mlx5e_tir_destroy(&hp->indir_tir[tt]); 823 } 824 825 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp, 826 struct ttc_params *ttc_params) 827 { 828 struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; 829 int tt; 830 831 memset(ttc_params, 0, sizeof(*ttc_params)); 832 833 ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev, 834 MLX5_FLOW_NAMESPACE_KERNEL); 835 for (tt = 0; tt < MLX5_NUM_TT; tt++) { 836 ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; 837 ttc_params->dests[tt].tir_num = 838 tt == MLX5_TT_ANY ? 839 mlx5e_tir_get_tirn(&hp->direct_tir) : 840 mlx5e_tir_get_tirn(&hp->indir_tir[tt]); 841 } 842 843 ft_attr->level = MLX5E_TC_TTC_FT_LEVEL; 844 ft_attr->prio = MLX5E_TC_PRIO; 845 } 846 847 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp) 848 { 849 struct mlx5e_priv *priv = hp->func_priv; 850 struct ttc_params ttc_params; 851 struct mlx5_ttc_table *ttc; 852 int err; 853 854 err = mlx5e_hairpin_create_indirect_rqt(hp); 855 if (err) 856 return err; 857 858 err = mlx5e_hairpin_create_indirect_tirs(hp); 859 if (err) 860 goto err_create_indirect_tirs; 861 862 mlx5e_hairpin_set_ttc_params(hp, &ttc_params); 863 hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params); 864 if (IS_ERR(hp->ttc)) { 865 err = PTR_ERR(hp->ttc); 866 goto err_create_ttc_table; 867 } 868 869 ttc = mlx5e_fs_get_ttc(priv->fs, false); 870 netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n", 871 hp->num_channels, 872 mlx5_get_ttc_flow_table(ttc)->id); 873 874 return 0; 875 876 err_create_ttc_table: 877 mlx5e_hairpin_destroy_indirect_tirs(hp); 878 err_create_indirect_tirs: 879 mlx5e_rqt_destroy(&hp->indir_rqt); 880 881 return err; 882 } 883 884 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp) 885 { 886 mlx5_destroy_ttc_table(hp->ttc); 887 mlx5e_hairpin_destroy_indirect_tirs(hp); 888 mlx5e_rqt_destroy(&hp->indir_rqt); 889 } 890 891 static struct mlx5e_hairpin * 892 mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params, 893 int peer_ifindex) 894 { 895 struct mlx5_core_dev *func_mdev, *peer_mdev; 896 struct mlx5e_hairpin *hp; 897 struct mlx5_hairpin *pair; 898 int err; 899 900 hp = kzalloc(sizeof(*hp), GFP_KERNEL); 901 if (!hp) 902 return ERR_PTR(-ENOMEM); 903 904 func_mdev = priv->mdev; 905 peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); 906 if (IS_ERR(peer_mdev)) { 907 err = PTR_ERR(peer_mdev); 908 goto create_pair_err; 909 } 910 911 pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params); 912 if (IS_ERR(pair)) { 913 err = PTR_ERR(pair); 914 goto create_pair_err; 915 } 916 hp->pair = pair; 917 hp->func_mdev = func_mdev; 918 hp->func_priv = priv; 919 hp->num_channels = params->num_channels; 920 hp->log_num_packets = params->log_num_packets; 921 922 err = mlx5e_hairpin_create_transport(hp); 923 if (err) 924 goto create_transport_err; 925 926 if (hp->num_channels > 1) { 927 err = mlx5e_hairpin_rss_init(hp); 928 if (err) 929 goto rss_init_err; 930 } 931 932 return hp; 933 934 rss_init_err: 935 mlx5e_hairpin_destroy_transport(hp); 936 create_transport_err: 937 mlx5_core_hairpin_destroy(hp->pair); 938 create_pair_err: 939 kfree(hp); 940 return ERR_PTR(err); 941 } 942 943 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp) 944 { 945 if (hp->num_channels > 1) 946 mlx5e_hairpin_rss_cleanup(hp); 947 mlx5e_hairpin_destroy_transport(hp); 948 mlx5_core_hairpin_destroy(hp->pair); 949 kvfree(hp); 950 } 951 952 static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio) 953 { 954 return (peer_vhca_id << 16 | prio); 955 } 956 957 static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv, 958 u16 peer_vhca_id, u8 prio) 959 { 960 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 961 struct mlx5e_hairpin_entry *hpe; 962 u32 hash_key = hash_hairpin_info(peer_vhca_id, prio); 963 964 hash_for_each_possible(tc->hairpin_tbl, hpe, 965 hairpin_hlist, hash_key) { 966 if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) { 967 refcount_inc(&hpe->refcnt); 968 return hpe; 969 } 970 } 971 972 return NULL; 973 } 974 975 static void mlx5e_hairpin_put(struct mlx5e_priv *priv, 976 struct mlx5e_hairpin_entry *hpe) 977 { 978 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 979 /* no more hairpin flows for us, release the hairpin pair */ 980 if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &tc->hairpin_tbl_lock)) 981 return; 982 hash_del(&hpe->hairpin_hlist); 983 mutex_unlock(&tc->hairpin_tbl_lock); 984 985 if (!IS_ERR_OR_NULL(hpe->hp)) { 986 netdev_dbg(priv->netdev, "del hairpin: peer %s\n", 987 dev_name(hpe->hp->pair->peer_mdev->device)); 988 989 mlx5e_hairpin_destroy(hpe->hp); 990 } 991 992 WARN_ON(!list_empty(&hpe->flows)); 993 kfree(hpe); 994 } 995 996 #define UNKNOWN_MATCH_PRIO 8 997 998 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv, 999 struct mlx5_flow_spec *spec, u8 *match_prio, 1000 struct netlink_ext_ack *extack) 1001 { 1002 void *headers_c, *headers_v; 1003 u8 prio_val, prio_mask = 0; 1004 bool vlan_present; 1005 1006 #ifdef CONFIG_MLX5_CORE_EN_DCB 1007 if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) { 1008 NL_SET_ERR_MSG_MOD(extack, 1009 "only PCP trust state supported for hairpin"); 1010 return -EOPNOTSUPP; 1011 } 1012 #endif 1013 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); 1014 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); 1015 1016 vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag); 1017 if (vlan_present) { 1018 prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio); 1019 prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio); 1020 } 1021 1022 if (!vlan_present || !prio_mask) { 1023 prio_val = UNKNOWN_MATCH_PRIO; 1024 } else if (prio_mask != 0x7) { 1025 NL_SET_ERR_MSG_MOD(extack, 1026 "masked priority match not supported for hairpin"); 1027 return -EOPNOTSUPP; 1028 } 1029 1030 *match_prio = prio_val; 1031 return 0; 1032 } 1033 1034 static int debugfs_hairpin_num_active_get(void *data, u64 *val) 1035 { 1036 struct mlx5e_tc_table *tc = data; 1037 struct mlx5e_hairpin_entry *hpe; 1038 u32 cnt = 0; 1039 u32 bkt; 1040 1041 mutex_lock(&tc->hairpin_tbl_lock); 1042 hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist) 1043 cnt++; 1044 mutex_unlock(&tc->hairpin_tbl_lock); 1045 1046 *val = cnt; 1047 1048 return 0; 1049 } 1050 DEFINE_DEBUGFS_ATTRIBUTE(fops_hairpin_num_active, 1051 debugfs_hairpin_num_active_get, NULL, "%llu\n"); 1052 1053 static int debugfs_hairpin_table_dump_show(struct seq_file *file, void *priv) 1054 1055 { 1056 struct mlx5e_tc_table *tc = file->private; 1057 struct mlx5e_hairpin_entry *hpe; 1058 u32 bkt; 1059 1060 mutex_lock(&tc->hairpin_tbl_lock); 1061 hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist) 1062 seq_printf(file, 1063 "Hairpin peer_vhca_id %u prio %u refcnt %u num_channels %u num_packets %lu\n", 1064 hpe->peer_vhca_id, hpe->prio, 1065 refcount_read(&hpe->refcnt), hpe->hp->num_channels, 1066 BIT(hpe->hp->log_num_packets)); 1067 mutex_unlock(&tc->hairpin_tbl_lock); 1068 1069 return 0; 1070 } 1071 DEFINE_SHOW_ATTRIBUTE(debugfs_hairpin_table_dump); 1072 1073 static void mlx5e_tc_debugfs_init(struct mlx5e_tc_table *tc, 1074 struct dentry *dfs_root) 1075 { 1076 if (IS_ERR_OR_NULL(dfs_root)) 1077 return; 1078 1079 tc->dfs_root = debugfs_create_dir("tc", dfs_root); 1080 1081 debugfs_create_file("hairpin_num_active", 0444, tc->dfs_root, tc, 1082 &fops_hairpin_num_active); 1083 debugfs_create_file("hairpin_table_dump", 0444, tc->dfs_root, tc, 1084 &debugfs_hairpin_table_dump_fops); 1085 } 1086 1087 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, 1088 struct mlx5e_tc_flow *flow, 1089 struct mlx5e_tc_flow_parse_attr *parse_attr, 1090 struct netlink_ext_ack *extack) 1091 { 1092 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 1093 struct devlink *devlink = priv_to_devlink(priv->mdev); 1094 int peer_ifindex = parse_attr->mirred_ifindex[0]; 1095 union devlink_param_value val = {}; 1096 struct mlx5_hairpin_params params; 1097 struct mlx5_core_dev *peer_mdev; 1098 struct mlx5e_hairpin_entry *hpe; 1099 struct mlx5e_hairpin *hp; 1100 u8 match_prio; 1101 u16 peer_id; 1102 int err; 1103 1104 peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); 1105 if (IS_ERR(peer_mdev)) { 1106 NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device"); 1107 return PTR_ERR(peer_mdev); 1108 } 1109 1110 if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) { 1111 NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported"); 1112 return -EOPNOTSUPP; 1113 } 1114 1115 peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id); 1116 err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio, 1117 extack); 1118 if (err) 1119 return err; 1120 1121 mutex_lock(&tc->hairpin_tbl_lock); 1122 hpe = mlx5e_hairpin_get(priv, peer_id, match_prio); 1123 if (hpe) { 1124 mutex_unlock(&tc->hairpin_tbl_lock); 1125 wait_for_completion(&hpe->res_ready); 1126 1127 if (IS_ERR(hpe->hp)) { 1128 err = -EREMOTEIO; 1129 goto out_err; 1130 } 1131 goto attach_flow; 1132 } 1133 1134 hpe = kzalloc(sizeof(*hpe), GFP_KERNEL); 1135 if (!hpe) { 1136 mutex_unlock(&tc->hairpin_tbl_lock); 1137 return -ENOMEM; 1138 } 1139 1140 spin_lock_init(&hpe->flows_lock); 1141 INIT_LIST_HEAD(&hpe->flows); 1142 INIT_LIST_HEAD(&hpe->dead_peer_wait_list); 1143 hpe->peer_vhca_id = peer_id; 1144 hpe->prio = match_prio; 1145 refcount_set(&hpe->refcnt, 1); 1146 init_completion(&hpe->res_ready); 1147 1148 hash_add(tc->hairpin_tbl, &hpe->hairpin_hlist, 1149 hash_hairpin_info(peer_id, match_prio)); 1150 mutex_unlock(&tc->hairpin_tbl_lock); 1151 1152 err = devl_param_driverinit_value_get( 1153 devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, &val); 1154 if (err) { 1155 err = -ENOMEM; 1156 goto out_err; 1157 } 1158 1159 params.log_num_packets = ilog2(val.vu32); 1160 params.log_data_size = 1161 clamp_t(u32, 1162 params.log_num_packets + 1163 MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev), 1164 MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz), 1165 MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz)); 1166 1167 params.q_counter = priv->q_counter; 1168 err = devl_param_driverinit_value_get( 1169 devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, &val); 1170 if (err) { 1171 err = -ENOMEM; 1172 goto out_err; 1173 } 1174 1175 params.num_channels = val.vu32; 1176 1177 hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex); 1178 hpe->hp = hp; 1179 complete_all(&hpe->res_ready); 1180 if (IS_ERR(hp)) { 1181 err = PTR_ERR(hp); 1182 goto out_err; 1183 } 1184 1185 netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n", 1186 mlx5e_tir_get_tirn(&hp->direct_tir), hp->pair->rqn[0], 1187 dev_name(hp->pair->peer_mdev->device), 1188 hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets); 1189 1190 attach_flow: 1191 if (hpe->hp->num_channels > 1) { 1192 flow_flag_set(flow, HAIRPIN_RSS); 1193 flow->attr->nic_attr->hairpin_ft = 1194 mlx5_get_ttc_flow_table(hpe->hp->ttc); 1195 } else { 1196 flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir); 1197 } 1198 1199 flow->hpe = hpe; 1200 spin_lock(&hpe->flows_lock); 1201 list_add(&flow->hairpin, &hpe->flows); 1202 spin_unlock(&hpe->flows_lock); 1203 1204 return 0; 1205 1206 out_err: 1207 mlx5e_hairpin_put(priv, hpe); 1208 return err; 1209 } 1210 1211 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv, 1212 struct mlx5e_tc_flow *flow) 1213 { 1214 /* flow wasn't fully initialized */ 1215 if (!flow->hpe) 1216 return; 1217 1218 spin_lock(&flow->hpe->flows_lock); 1219 list_del(&flow->hairpin); 1220 spin_unlock(&flow->hpe->flows_lock); 1221 1222 mlx5e_hairpin_put(priv, flow->hpe); 1223 flow->hpe = NULL; 1224 } 1225 1226 struct mlx5_flow_handle * 1227 mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv, 1228 struct mlx5_flow_spec *spec, 1229 struct mlx5_flow_attr *attr) 1230 { 1231 struct mlx5_flow_context *flow_context = &spec->flow_context; 1232 struct mlx5e_vlan_table *vlan = mlx5e_fs_get_vlan(priv->fs); 1233 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 1234 struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr; 1235 struct mlx5_flow_destination dest[2] = {}; 1236 struct mlx5_fs_chains *nic_chains; 1237 struct mlx5_flow_act flow_act = { 1238 .action = attr->action, 1239 .flags = FLOW_ACT_NO_APPEND, 1240 }; 1241 struct mlx5_flow_handle *rule; 1242 struct mlx5_flow_table *ft; 1243 int dest_ix = 0; 1244 1245 nic_chains = mlx5e_nic_chains(tc); 1246 flow_context->flags |= FLOW_CONTEXT_HAS_TAG; 1247 flow_context->flow_tag = nic_attr->flow_tag; 1248 1249 if (attr->dest_ft) { 1250 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 1251 dest[dest_ix].ft = attr->dest_ft; 1252 dest_ix++; 1253 } else if (nic_attr->hairpin_ft) { 1254 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 1255 dest[dest_ix].ft = nic_attr->hairpin_ft; 1256 dest_ix++; 1257 } else if (nic_attr->hairpin_tirn) { 1258 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR; 1259 dest[dest_ix].tir_num = nic_attr->hairpin_tirn; 1260 dest_ix++; 1261 } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { 1262 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 1263 if (attr->dest_chain) { 1264 dest[dest_ix].ft = mlx5_chains_get_table(nic_chains, 1265 attr->dest_chain, 1, 1266 MLX5E_TC_FT_LEVEL); 1267 if (IS_ERR(dest[dest_ix].ft)) 1268 return ERR_CAST(dest[dest_ix].ft); 1269 } else { 1270 dest[dest_ix].ft = mlx5e_vlan_get_flowtable(vlan); 1271 } 1272 dest_ix++; 1273 } 1274 1275 if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && 1276 MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) 1277 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; 1278 1279 if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { 1280 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; 1281 dest[dest_ix].counter_id = mlx5_fc_id(attr->counter); 1282 dest_ix++; 1283 } 1284 1285 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) 1286 flow_act.modify_hdr = attr->modify_hdr; 1287 1288 mutex_lock(&tc->t_lock); 1289 if (IS_ERR_OR_NULL(tc->t)) { 1290 /* Create the root table here if doesn't exist yet */ 1291 tc->t = 1292 mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL); 1293 1294 if (IS_ERR(tc->t)) { 1295 mutex_unlock(&tc->t_lock); 1296 netdev_err(priv->netdev, 1297 "Failed to create tc offload table\n"); 1298 rule = ERR_CAST(tc->t); 1299 goto err_ft_get; 1300 } 1301 } 1302 mutex_unlock(&tc->t_lock); 1303 1304 if (attr->chain || attr->prio) 1305 ft = mlx5_chains_get_table(nic_chains, 1306 attr->chain, attr->prio, 1307 MLX5E_TC_FT_LEVEL); 1308 else 1309 ft = attr->ft; 1310 1311 if (IS_ERR(ft)) { 1312 rule = ERR_CAST(ft); 1313 goto err_ft_get; 1314 } 1315 1316 if (attr->outer_match_level != MLX5_MATCH_NONE) 1317 spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; 1318 1319 rule = mlx5_add_flow_rules(ft, spec, 1320 &flow_act, dest, dest_ix); 1321 if (IS_ERR(rule)) 1322 goto err_rule; 1323 1324 return rule; 1325 1326 err_rule: 1327 if (attr->chain || attr->prio) 1328 mlx5_chains_put_table(nic_chains, 1329 attr->chain, attr->prio, 1330 MLX5E_TC_FT_LEVEL); 1331 err_ft_get: 1332 if (attr->dest_chain) 1333 mlx5_chains_put_table(nic_chains, 1334 attr->dest_chain, 1, 1335 MLX5E_TC_FT_LEVEL); 1336 1337 return ERR_CAST(rule); 1338 } 1339 1340 static int 1341 alloc_flow_attr_counter(struct mlx5_core_dev *counter_dev, 1342 struct mlx5_flow_attr *attr) 1343 1344 { 1345 struct mlx5_fc *counter; 1346 1347 counter = mlx5_fc_create(counter_dev, true); 1348 if (IS_ERR(counter)) 1349 return PTR_ERR(counter); 1350 1351 attr->counter = counter; 1352 return 0; 1353 } 1354 1355 static int 1356 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, 1357 struct mlx5e_tc_flow *flow, 1358 struct netlink_ext_ack *extack) 1359 { 1360 struct mlx5e_tc_flow_parse_attr *parse_attr; 1361 struct mlx5_flow_attr *attr = flow->attr; 1362 struct mlx5_core_dev *dev = priv->mdev; 1363 int err; 1364 1365 parse_attr = attr->parse_attr; 1366 1367 if (flow_flag_test(flow, HAIRPIN)) { 1368 err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); 1369 if (err) 1370 return err; 1371 } 1372 1373 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { 1374 err = alloc_flow_attr_counter(dev, attr); 1375 if (err) 1376 return err; 1377 } 1378 1379 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 1380 err = mlx5e_tc_attach_mod_hdr(priv, flow, attr); 1381 if (err) 1382 return err; 1383 } 1384 1385 flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec, attr); 1386 return PTR_ERR_OR_ZERO(flow->rule[0]); 1387 } 1388 1389 void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv, 1390 struct mlx5_flow_handle *rule, 1391 struct mlx5_flow_attr *attr) 1392 { 1393 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 1394 struct mlx5_fs_chains *nic_chains; 1395 1396 nic_chains = mlx5e_nic_chains(tc); 1397 mlx5_del_flow_rules(rule); 1398 1399 if (attr->chain || attr->prio) 1400 mlx5_chains_put_table(nic_chains, attr->chain, attr->prio, 1401 MLX5E_TC_FT_LEVEL); 1402 1403 if (attr->dest_chain) 1404 mlx5_chains_put_table(nic_chains, attr->dest_chain, 1, 1405 MLX5E_TC_FT_LEVEL); 1406 } 1407 1408 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, 1409 struct mlx5e_tc_flow *flow) 1410 { 1411 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 1412 struct mlx5_flow_attr *attr = flow->attr; 1413 1414 flow_flag_clear(flow, OFFLOADED); 1415 1416 if (!IS_ERR_OR_NULL(flow->rule[0])) 1417 mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr); 1418 1419 /* Remove root table if no rules are left to avoid 1420 * extra steering hops. 1421 */ 1422 mutex_lock(&tc->t_lock); 1423 if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && 1424 !IS_ERR_OR_NULL(tc->t)) { 1425 mlx5_chains_put_table(mlx5e_nic_chains(tc), 0, 1, MLX5E_TC_FT_LEVEL); 1426 tc->t = NULL; 1427 } 1428 mutex_unlock(&tc->t_lock); 1429 1430 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 1431 mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); 1432 mlx5e_tc_detach_mod_hdr(priv, flow, attr); 1433 } 1434 1435 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) 1436 mlx5_fc_destroy(priv->mdev, attr->counter); 1437 1438 if (flow_flag_test(flow, HAIRPIN)) 1439 mlx5e_hairpin_flow_del(priv, flow); 1440 1441 free_flow_post_acts(flow); 1442 1443 kvfree(attr->parse_attr); 1444 kfree(flow->attr); 1445 } 1446 1447 struct mlx5_flow_handle * 1448 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, 1449 struct mlx5e_tc_flow *flow, 1450 struct mlx5_flow_spec *spec, 1451 struct mlx5_flow_attr *attr) 1452 { 1453 struct mlx5_flow_handle *rule; 1454 1455 if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) 1456 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); 1457 1458 rule = mlx5e_tc_rule_offload(flow->priv, spec, attr); 1459 1460 if (IS_ERR(rule)) 1461 return rule; 1462 1463 if (attr->esw_attr->split_count) { 1464 flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr); 1465 if (IS_ERR(flow->rule[1])) 1466 goto err_rule1; 1467 } 1468 1469 return rule; 1470 1471 err_rule1: 1472 mlx5e_tc_rule_unoffload(flow->priv, rule, attr); 1473 return flow->rule[1]; 1474 } 1475 1476 void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, 1477 struct mlx5e_tc_flow *flow, 1478 struct mlx5_flow_attr *attr) 1479 { 1480 flow_flag_clear(flow, OFFLOADED); 1481 1482 if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) 1483 return mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); 1484 1485 if (attr->esw_attr->split_count) 1486 mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); 1487 1488 mlx5e_tc_rule_unoffload(flow->priv, flow->rule[0], attr); 1489 } 1490 1491 struct mlx5_flow_handle * 1492 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, 1493 struct mlx5e_tc_flow *flow, 1494 struct mlx5_flow_spec *spec) 1495 { 1496 struct mlx5e_tc_mod_hdr_acts mod_acts = {}; 1497 struct mlx5e_mod_hdr_handle *mh = NULL; 1498 struct mlx5_flow_attr *slow_attr; 1499 struct mlx5_flow_handle *rule; 1500 bool fwd_and_modify_cap; 1501 u32 chain_mapping = 0; 1502 int err; 1503 1504 slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); 1505 if (!slow_attr) 1506 return ERR_PTR(-ENOMEM); 1507 1508 memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ); 1509 slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1510 slow_attr->esw_attr->split_count = 0; 1511 slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH; 1512 1513 fwd_and_modify_cap = MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table); 1514 if (!fwd_and_modify_cap) 1515 goto skip_restore; 1516 1517 err = mlx5_chains_get_chain_mapping(esw_chains(esw), flow->attr->chain, &chain_mapping); 1518 if (err) 1519 goto err_get_chain; 1520 1521 err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, 1522 MAPPED_OBJ_TO_REG, chain_mapping); 1523 if (err) 1524 goto err_reg_set; 1525 1526 mh = mlx5e_mod_hdr_attach(esw->dev, get_mod_hdr_table(flow->priv, flow), 1527 MLX5_FLOW_NAMESPACE_FDB, &mod_acts); 1528 if (IS_ERR(mh)) { 1529 err = PTR_ERR(mh); 1530 goto err_attach; 1531 } 1532 1533 slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1534 slow_attr->modify_hdr = mlx5e_mod_hdr_get(mh); 1535 1536 skip_restore: 1537 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); 1538 if (IS_ERR(rule)) { 1539 err = PTR_ERR(rule); 1540 goto err_offload; 1541 } 1542 1543 flow->attr->slow_mh = mh; 1544 flow->chain_mapping = chain_mapping; 1545 flow_flag_set(flow, SLOW); 1546 1547 mlx5e_mod_hdr_dealloc(&mod_acts); 1548 kfree(slow_attr); 1549 1550 return rule; 1551 1552 err_offload: 1553 if (fwd_and_modify_cap) 1554 mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), mh); 1555 err_attach: 1556 err_reg_set: 1557 if (fwd_and_modify_cap) 1558 mlx5_chains_put_chain_mapping(esw_chains(esw), chain_mapping); 1559 err_get_chain: 1560 mlx5e_mod_hdr_dealloc(&mod_acts); 1561 kfree(slow_attr); 1562 return ERR_PTR(err); 1563 } 1564 1565 void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, 1566 struct mlx5e_tc_flow *flow) 1567 { 1568 struct mlx5e_mod_hdr_handle *slow_mh = flow->attr->slow_mh; 1569 struct mlx5_flow_attr *slow_attr; 1570 1571 slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); 1572 if (!slow_attr) { 1573 mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n"); 1574 return; 1575 } 1576 1577 memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ); 1578 slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1579 slow_attr->esw_attr->split_count = 0; 1580 slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH; 1581 if (slow_mh) { 1582 slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1583 slow_attr->modify_hdr = mlx5e_mod_hdr_get(slow_mh); 1584 } 1585 mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); 1586 if (slow_mh) { 1587 mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), slow_mh); 1588 mlx5_chains_put_chain_mapping(esw_chains(esw), flow->chain_mapping); 1589 flow->chain_mapping = 0; 1590 flow->attr->slow_mh = NULL; 1591 } 1592 flow_flag_clear(flow, SLOW); 1593 kfree(slow_attr); 1594 } 1595 1596 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this 1597 * function. 1598 */ 1599 static void unready_flow_add(struct mlx5e_tc_flow *flow, 1600 struct list_head *unready_flows) 1601 { 1602 flow_flag_set(flow, NOT_READY); 1603 list_add_tail(&flow->unready, unready_flows); 1604 } 1605 1606 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this 1607 * function. 1608 */ 1609 static void unready_flow_del(struct mlx5e_tc_flow *flow) 1610 { 1611 list_del(&flow->unready); 1612 flow_flag_clear(flow, NOT_READY); 1613 } 1614 1615 static void add_unready_flow(struct mlx5e_tc_flow *flow) 1616 { 1617 struct mlx5_rep_uplink_priv *uplink_priv; 1618 struct mlx5e_rep_priv *rpriv; 1619 struct mlx5_eswitch *esw; 1620 1621 esw = flow->priv->mdev->priv.eswitch; 1622 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1623 uplink_priv = &rpriv->uplink_priv; 1624 1625 mutex_lock(&uplink_priv->unready_flows_lock); 1626 unready_flow_add(flow, &uplink_priv->unready_flows); 1627 mutex_unlock(&uplink_priv->unready_flows_lock); 1628 } 1629 1630 static void remove_unready_flow(struct mlx5e_tc_flow *flow) 1631 { 1632 struct mlx5_rep_uplink_priv *uplink_priv; 1633 struct mlx5e_rep_priv *rpriv; 1634 struct mlx5_eswitch *esw; 1635 1636 esw = flow->priv->mdev->priv.eswitch; 1637 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1638 uplink_priv = &rpriv->uplink_priv; 1639 1640 mutex_lock(&uplink_priv->unready_flows_lock); 1641 unready_flow_del(flow); 1642 mutex_unlock(&uplink_priv->unready_flows_lock); 1643 } 1644 1645 bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev) 1646 { 1647 struct mlx5_core_dev *out_mdev, *route_mdev; 1648 struct mlx5e_priv *out_priv, *route_priv; 1649 1650 out_priv = netdev_priv(out_dev); 1651 out_mdev = out_priv->mdev; 1652 route_priv = netdev_priv(route_dev); 1653 route_mdev = route_priv->mdev; 1654 1655 if (out_mdev->coredev_type != MLX5_COREDEV_PF) 1656 return false; 1657 1658 if (route_mdev->coredev_type != MLX5_COREDEV_VF && 1659 route_mdev->coredev_type != MLX5_COREDEV_SF) 1660 return false; 1661 1662 return mlx5e_same_hw_devs(out_priv, route_priv); 1663 } 1664 1665 int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport) 1666 { 1667 struct mlx5e_priv *out_priv, *route_priv; 1668 struct mlx5_devcom *devcom = NULL; 1669 struct mlx5_core_dev *route_mdev; 1670 struct mlx5_eswitch *esw; 1671 u16 vhca_id; 1672 int err; 1673 1674 out_priv = netdev_priv(out_dev); 1675 esw = out_priv->mdev->priv.eswitch; 1676 route_priv = netdev_priv(route_dev); 1677 route_mdev = route_priv->mdev; 1678 1679 vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id); 1680 if (mlx5_lag_is_active(out_priv->mdev)) { 1681 /* In lag case we may get devices from different eswitch instances. 1682 * If we failed to get vport num, it means, mostly, that we on the wrong 1683 * eswitch. 1684 */ 1685 err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); 1686 if (err != -ENOENT) 1687 return err; 1688 1689 devcom = out_priv->mdev->priv.devcom; 1690 esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 1691 if (!esw) 1692 return -ENODEV; 1693 } 1694 1695 err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); 1696 if (devcom) 1697 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 1698 return err; 1699 } 1700 1701 static int 1702 set_encap_dests(struct mlx5e_priv *priv, 1703 struct mlx5e_tc_flow *flow, 1704 struct mlx5_flow_attr *attr, 1705 struct netlink_ext_ack *extack, 1706 bool *vf_tun) 1707 { 1708 struct mlx5e_tc_flow_parse_attr *parse_attr; 1709 struct mlx5_esw_flow_attr *esw_attr; 1710 struct net_device *encap_dev = NULL; 1711 struct mlx5e_rep_priv *rpriv; 1712 struct mlx5e_priv *out_priv; 1713 int out_index; 1714 int err = 0; 1715 1716 if (!mlx5e_is_eswitch_flow(flow)) 1717 return 0; 1718 1719 parse_attr = attr->parse_attr; 1720 esw_attr = attr->esw_attr; 1721 *vf_tun = false; 1722 1723 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { 1724 struct net_device *out_dev; 1725 int mirred_ifindex; 1726 1727 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) 1728 continue; 1729 1730 mirred_ifindex = parse_attr->mirred_ifindex[out_index]; 1731 out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex); 1732 if (!out_dev) { 1733 NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found"); 1734 err = -ENODEV; 1735 goto out; 1736 } 1737 err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index, 1738 extack, &encap_dev); 1739 dev_put(out_dev); 1740 if (err) 1741 goto out; 1742 1743 if (esw_attr->dests[out_index].flags & 1744 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && 1745 !esw_attr->dest_int_port) 1746 *vf_tun = true; 1747 1748 out_priv = netdev_priv(encap_dev); 1749 rpriv = out_priv->ppriv; 1750 esw_attr->dests[out_index].rep = rpriv->rep; 1751 esw_attr->dests[out_index].mdev = out_priv->mdev; 1752 } 1753 1754 if (*vf_tun && esw_attr->out_count > 1) { 1755 NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported"); 1756 err = -EOPNOTSUPP; 1757 goto out; 1758 } 1759 1760 out: 1761 return err; 1762 } 1763 1764 static void 1765 clean_encap_dests(struct mlx5e_priv *priv, 1766 struct mlx5e_tc_flow *flow, 1767 struct mlx5_flow_attr *attr) 1768 { 1769 struct mlx5_esw_flow_attr *esw_attr; 1770 int out_index; 1771 1772 if (!mlx5e_is_eswitch_flow(flow)) 1773 return; 1774 1775 esw_attr = attr->esw_attr; 1776 1777 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { 1778 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) 1779 continue; 1780 1781 mlx5e_detach_encap(priv, flow, attr, out_index); 1782 kfree(attr->parse_attr->tun_info[out_index]); 1783 } 1784 } 1785 1786 static int 1787 verify_attr_actions(u32 actions, struct netlink_ext_ack *extack) 1788 { 1789 if (!(actions & 1790 (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { 1791 NL_SET_ERR_MSG_MOD(extack, "Rule must have at least one forward/drop action"); 1792 return -EOPNOTSUPP; 1793 } 1794 1795 if (!(~actions & 1796 (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { 1797 NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action"); 1798 return -EOPNOTSUPP; 1799 } 1800 1801 if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && 1802 actions & MLX5_FLOW_CONTEXT_ACTION_DROP) { 1803 NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported"); 1804 return -EOPNOTSUPP; 1805 } 1806 1807 return 0; 1808 } 1809 1810 static int 1811 post_process_attr(struct mlx5e_tc_flow *flow, 1812 struct mlx5_flow_attr *attr, 1813 struct netlink_ext_ack *extack) 1814 { 1815 bool vf_tun; 1816 int err = 0; 1817 1818 err = verify_attr_actions(attr->action, extack); 1819 if (err) 1820 goto err_out; 1821 1822 err = set_encap_dests(flow->priv, flow, attr, extack, &vf_tun); 1823 if (err) 1824 goto err_out; 1825 1826 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 1827 err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr); 1828 if (err) 1829 goto err_out; 1830 } 1831 1832 if (attr->branch_true && 1833 attr->branch_true->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 1834 err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr->branch_true); 1835 if (err) 1836 goto err_out; 1837 } 1838 1839 if (attr->branch_false && 1840 attr->branch_false->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 1841 err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr->branch_false); 1842 if (err) 1843 goto err_out; 1844 } 1845 1846 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { 1847 err = alloc_flow_attr_counter(get_flow_counter_dev(flow), attr); 1848 if (err) 1849 goto err_out; 1850 } 1851 1852 err_out: 1853 return err; 1854 } 1855 1856 static int 1857 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, 1858 struct mlx5e_tc_flow *flow, 1859 struct netlink_ext_ack *extack) 1860 { 1861 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1862 struct mlx5e_tc_flow_parse_attr *parse_attr; 1863 struct mlx5_flow_attr *attr = flow->attr; 1864 struct mlx5_esw_flow_attr *esw_attr; 1865 u32 max_prio, max_chain; 1866 int err = 0; 1867 1868 parse_attr = attr->parse_attr; 1869 esw_attr = attr->esw_attr; 1870 1871 /* We check chain range only for tc flows. 1872 * For ft flows, we checked attr->chain was originally 0 and set it to 1873 * FDB_FT_CHAIN which is outside tc range. 1874 * See mlx5e_rep_setup_ft_cb(). 1875 */ 1876 max_chain = mlx5_chains_get_chain_range(esw_chains(esw)); 1877 if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) { 1878 NL_SET_ERR_MSG_MOD(extack, 1879 "Requested chain is out of supported range"); 1880 err = -EOPNOTSUPP; 1881 goto err_out; 1882 } 1883 1884 max_prio = mlx5_chains_get_prio_range(esw_chains(esw)); 1885 if (attr->prio > max_prio) { 1886 NL_SET_ERR_MSG_MOD(extack, 1887 "Requested priority is out of supported range"); 1888 err = -EOPNOTSUPP; 1889 goto err_out; 1890 } 1891 1892 if (flow_flag_test(flow, TUN_RX)) { 1893 err = mlx5e_attach_decap_route(priv, flow); 1894 if (err) 1895 goto err_out; 1896 1897 if (!attr->chain && esw_attr->int_port && 1898 attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { 1899 /* If decap route device is internal port, change the 1900 * source vport value in reg_c0 back to uplink just in 1901 * case the rule performs goto chain > 0. If we have a miss 1902 * on chain > 0 we want the metadata regs to hold the 1903 * chain id so SW will resume handling of this packet 1904 * from the proper chain. 1905 */ 1906 u32 metadata = mlx5_eswitch_get_vport_metadata_for_set(esw, 1907 esw_attr->in_rep->vport); 1908 1909 err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts, 1910 MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG, 1911 metadata); 1912 if (err) 1913 goto err_out; 1914 1915 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1916 } 1917 } 1918 1919 if (flow_flag_test(flow, L3_TO_L2_DECAP)) { 1920 err = mlx5e_attach_decap(priv, flow, extack); 1921 if (err) 1922 goto err_out; 1923 } 1924 1925 if (netif_is_ovs_master(parse_attr->filter_dev)) { 1926 struct mlx5e_tc_int_port *int_port; 1927 1928 if (attr->chain) { 1929 NL_SET_ERR_MSG_MOD(extack, 1930 "Internal port rule is only supported on chain 0"); 1931 err = -EOPNOTSUPP; 1932 goto err_out; 1933 } 1934 1935 if (attr->dest_chain) { 1936 NL_SET_ERR_MSG_MOD(extack, 1937 "Internal port rule offload doesn't support goto action"); 1938 err = -EOPNOTSUPP; 1939 goto err_out; 1940 } 1941 1942 int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv), 1943 parse_attr->filter_dev->ifindex, 1944 flow_flag_test(flow, EGRESS) ? 1945 MLX5E_TC_INT_PORT_EGRESS : 1946 MLX5E_TC_INT_PORT_INGRESS); 1947 if (IS_ERR(int_port)) { 1948 err = PTR_ERR(int_port); 1949 goto err_out; 1950 } 1951 1952 esw_attr->int_port = int_port; 1953 } 1954 1955 err = post_process_attr(flow, attr, extack); 1956 if (err) 1957 goto err_out; 1958 1959 err = mlx5e_tc_act_stats_add_flow(get_act_stats_handle(priv), flow); 1960 if (err) 1961 goto err_out; 1962 1963 /* we get here if one of the following takes place: 1964 * (1) there's no error 1965 * (2) there's an encap action and we don't have valid neigh 1966 */ 1967 if (flow_flag_test(flow, SLOW)) 1968 flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec); 1969 else 1970 flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); 1971 1972 if (IS_ERR(flow->rule[0])) { 1973 err = PTR_ERR(flow->rule[0]); 1974 goto err_out; 1975 } 1976 flow_flag_set(flow, OFFLOADED); 1977 1978 return 0; 1979 1980 err_out: 1981 flow_flag_set(flow, FAILED); 1982 return err; 1983 } 1984 1985 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) 1986 { 1987 struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec; 1988 void *headers_v = MLX5_ADDR_OF(fte_match_param, 1989 spec->match_value, 1990 misc_parameters_3); 1991 u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3, 1992 headers_v, 1993 geneve_tlv_option_0_data); 1994 1995 return !!geneve_tlv_opt_0_data; 1996 } 1997 1998 static void free_branch_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) 1999 { 2000 if (!attr) 2001 return; 2002 2003 mlx5_free_flow_attr_actions(flow, attr); 2004 kvfree(attr->parse_attr); 2005 kfree(attr); 2006 } 2007 2008 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, 2009 struct mlx5e_tc_flow *flow) 2010 { 2011 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 2012 struct mlx5_flow_attr *attr = flow->attr; 2013 struct mlx5_esw_flow_attr *esw_attr; 2014 2015 esw_attr = attr->esw_attr; 2016 mlx5e_put_flow_tunnel_id(flow); 2017 2018 if (flow_flag_test(flow, NOT_READY)) 2019 remove_unready_flow(flow); 2020 2021 if (mlx5e_is_offloaded_flow(flow)) { 2022 if (flow_flag_test(flow, SLOW)) 2023 mlx5e_tc_unoffload_from_slow_path(esw, flow); 2024 else 2025 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); 2026 } 2027 complete_all(&flow->del_hw_done); 2028 2029 if (mlx5_flow_has_geneve_opt(flow)) 2030 mlx5_geneve_tlv_option_del(priv->mdev->geneve); 2031 2032 if (flow->decap_route) 2033 mlx5e_detach_decap_route(priv, flow); 2034 2035 mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr); 2036 2037 if (esw_attr->int_port) 2038 mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port); 2039 2040 if (esw_attr->dest_int_port) 2041 mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->dest_int_port); 2042 2043 if (flow_flag_test(flow, L3_TO_L2_DECAP)) 2044 mlx5e_detach_decap(priv, flow); 2045 2046 mlx5e_tc_act_stats_del_flow(get_act_stats_handle(priv), flow); 2047 2048 free_flow_post_acts(flow); 2049 mlx5_free_flow_attr_actions(flow, attr); 2050 2051 kvfree(attr->esw_attr->rx_tun_attr); 2052 kvfree(attr->parse_attr); 2053 kfree(flow->attr); 2054 } 2055 2056 struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) 2057 { 2058 struct mlx5_flow_attr *attr; 2059 2060 attr = list_first_entry(&flow->attrs, struct mlx5_flow_attr, list); 2061 return attr->counter; 2062 } 2063 2064 /* Iterate over tmp_list of flows attached to flow_list head. */ 2065 void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list) 2066 { 2067 struct mlx5e_tc_flow *flow, *tmp; 2068 2069 list_for_each_entry_safe(flow, tmp, flow_list, tmp_list) 2070 mlx5e_flow_put(priv, flow); 2071 } 2072 2073 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) 2074 { 2075 struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; 2076 2077 if (!flow_flag_test(flow, ESWITCH) || 2078 !flow_flag_test(flow, DUP)) 2079 return; 2080 2081 mutex_lock(&esw->offloads.peer_mutex); 2082 list_del(&flow->peer); 2083 mutex_unlock(&esw->offloads.peer_mutex); 2084 2085 flow_flag_clear(flow, DUP); 2086 2087 if (refcount_dec_and_test(&flow->peer_flow->refcnt)) { 2088 mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow); 2089 kfree(flow->peer_flow); 2090 } 2091 2092 flow->peer_flow = NULL; 2093 } 2094 2095 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) 2096 { 2097 struct mlx5_core_dev *dev = flow->priv->mdev; 2098 struct mlx5_devcom *devcom = dev->priv.devcom; 2099 struct mlx5_eswitch *peer_esw; 2100 2101 peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 2102 if (!peer_esw) 2103 return; 2104 2105 __mlx5e_tc_del_fdb_peer_flow(flow); 2106 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 2107 } 2108 2109 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, 2110 struct mlx5e_tc_flow *flow) 2111 { 2112 if (mlx5e_is_eswitch_flow(flow)) { 2113 mlx5e_tc_del_fdb_peer_flow(flow); 2114 mlx5e_tc_del_fdb_flow(priv, flow); 2115 } else { 2116 mlx5e_tc_del_nic_flow(priv, flow); 2117 } 2118 } 2119 2120 static bool flow_requires_tunnel_mapping(u32 chain, struct flow_cls_offload *f) 2121 { 2122 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 2123 struct flow_action *flow_action = &rule->action; 2124 const struct flow_action_entry *act; 2125 int i; 2126 2127 if (chain) 2128 return false; 2129 2130 flow_action_for_each(i, act, flow_action) { 2131 switch (act->id) { 2132 case FLOW_ACTION_GOTO: 2133 return true; 2134 case FLOW_ACTION_SAMPLE: 2135 return true; 2136 default: 2137 continue; 2138 } 2139 } 2140 2141 return false; 2142 } 2143 2144 static int 2145 enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv, 2146 struct flow_dissector_key_enc_opts *opts, 2147 struct netlink_ext_ack *extack, 2148 bool *dont_care) 2149 { 2150 struct geneve_opt *opt; 2151 int off = 0; 2152 2153 *dont_care = true; 2154 2155 while (opts->len > off) { 2156 opt = (struct geneve_opt *)&opts->data[off]; 2157 2158 if (!(*dont_care) || opt->opt_class || opt->type || 2159 memchr_inv(opt->opt_data, 0, opt->length * 4)) { 2160 *dont_care = false; 2161 2162 if (opt->opt_class != htons(U16_MAX) || 2163 opt->type != U8_MAX) { 2164 NL_SET_ERR_MSG_MOD(extack, 2165 "Partial match of tunnel options in chain > 0 isn't supported"); 2166 netdev_warn(priv->netdev, 2167 "Partial match of tunnel options in chain > 0 isn't supported"); 2168 return -EOPNOTSUPP; 2169 } 2170 } 2171 2172 off += sizeof(struct geneve_opt) + opt->length * 4; 2173 } 2174 2175 return 0; 2176 } 2177 2178 #define COPY_DISSECTOR(rule, diss_key, dst)\ 2179 ({ \ 2180 struct flow_rule *__rule = (rule);\ 2181 typeof(dst) __dst = dst;\ 2182 \ 2183 memcpy(__dst,\ 2184 skb_flow_dissector_target(__rule->match.dissector,\ 2185 diss_key,\ 2186 __rule->match.key),\ 2187 sizeof(*__dst));\ 2188 }) 2189 2190 static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv, 2191 struct mlx5e_tc_flow *flow, 2192 struct flow_cls_offload *f, 2193 struct net_device *filter_dev) 2194 { 2195 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 2196 struct netlink_ext_ack *extack = f->common.extack; 2197 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; 2198 struct flow_match_enc_opts enc_opts_match; 2199 struct tunnel_match_enc_opts tun_enc_opts; 2200 struct mlx5_rep_uplink_priv *uplink_priv; 2201 struct mlx5_flow_attr *attr = flow->attr; 2202 struct mlx5e_rep_priv *uplink_rpriv; 2203 struct tunnel_match_key tunnel_key; 2204 bool enc_opts_is_dont_care = true; 2205 u32 tun_id, enc_opts_id = 0; 2206 struct mlx5_eswitch *esw; 2207 u32 value, mask; 2208 int err; 2209 2210 esw = priv->mdev->priv.eswitch; 2211 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 2212 uplink_priv = &uplink_rpriv->uplink_priv; 2213 2214 memset(&tunnel_key, 0, sizeof(tunnel_key)); 2215 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL, 2216 &tunnel_key.enc_control); 2217 if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) 2218 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, 2219 &tunnel_key.enc_ipv4); 2220 else 2221 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, 2222 &tunnel_key.enc_ipv6); 2223 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip); 2224 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS, 2225 &tunnel_key.enc_tp); 2226 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID, 2227 &tunnel_key.enc_key_id); 2228 tunnel_key.filter_ifindex = filter_dev->ifindex; 2229 2230 err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id); 2231 if (err) 2232 return err; 2233 2234 flow_rule_match_enc_opts(rule, &enc_opts_match); 2235 err = enc_opts_is_dont_care_or_full_match(priv, 2236 enc_opts_match.mask, 2237 extack, 2238 &enc_opts_is_dont_care); 2239 if (err) 2240 goto err_enc_opts; 2241 2242 if (!enc_opts_is_dont_care) { 2243 memset(&tun_enc_opts, 0, sizeof(tun_enc_opts)); 2244 memcpy(&tun_enc_opts.key, enc_opts_match.key, 2245 sizeof(*enc_opts_match.key)); 2246 memcpy(&tun_enc_opts.mask, enc_opts_match.mask, 2247 sizeof(*enc_opts_match.mask)); 2248 2249 err = mapping_add(uplink_priv->tunnel_enc_opts_mapping, 2250 &tun_enc_opts, &enc_opts_id); 2251 if (err) 2252 goto err_enc_opts; 2253 } 2254 2255 value = tun_id << ENC_OPTS_BITS | enc_opts_id; 2256 mask = enc_opts_id ? TUNNEL_ID_MASK : 2257 (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK); 2258 2259 if (attr->chain) { 2260 mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec, 2261 TUNNEL_TO_REG, value, mask); 2262 } else { 2263 mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; 2264 err = mlx5e_tc_match_to_reg_set(priv->mdev, 2265 mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB, 2266 TUNNEL_TO_REG, value); 2267 if (err) 2268 goto err_set; 2269 2270 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 2271 } 2272 2273 flow->attr->tunnel_id = value; 2274 return 0; 2275 2276 err_set: 2277 if (enc_opts_id) 2278 mapping_remove(uplink_priv->tunnel_enc_opts_mapping, 2279 enc_opts_id); 2280 err_enc_opts: 2281 mapping_remove(uplink_priv->tunnel_mapping, tun_id); 2282 return err; 2283 } 2284 2285 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow) 2286 { 2287 u32 enc_opts_id = flow->attr->tunnel_id & ENC_OPTS_BITS_MASK; 2288 u32 tun_id = flow->attr->tunnel_id >> ENC_OPTS_BITS; 2289 struct mlx5_rep_uplink_priv *uplink_priv; 2290 struct mlx5e_rep_priv *uplink_rpriv; 2291 struct mlx5_eswitch *esw; 2292 2293 esw = flow->priv->mdev->priv.eswitch; 2294 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 2295 uplink_priv = &uplink_rpriv->uplink_priv; 2296 2297 if (tun_id) 2298 mapping_remove(uplink_priv->tunnel_mapping, tun_id); 2299 if (enc_opts_id) 2300 mapping_remove(uplink_priv->tunnel_enc_opts_mapping, 2301 enc_opts_id); 2302 } 2303 2304 void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev, 2305 struct flow_match_basic *match, bool outer, 2306 void *headers_c, void *headers_v) 2307 { 2308 bool ip_version_cap; 2309 2310 ip_version_cap = outer ? 2311 MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 2312 ft_field_support.outer_ip_version) : 2313 MLX5_CAP_FLOWTABLE_NIC_RX(mdev, 2314 ft_field_support.inner_ip_version); 2315 2316 if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) && 2317 (match->key->n_proto == htons(ETH_P_IP) || 2318 match->key->n_proto == htons(ETH_P_IPV6))) { 2319 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version); 2320 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version, 2321 match->key->n_proto == htons(ETH_P_IP) ? 4 : 6); 2322 } else { 2323 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, 2324 ntohs(match->mask->n_proto)); 2325 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, 2326 ntohs(match->key->n_proto)); 2327 } 2328 } 2329 2330 u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer) 2331 { 2332 void *headers_v; 2333 u16 ethertype; 2334 u8 ip_version; 2335 2336 if (outer) 2337 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); 2338 else 2339 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers); 2340 2341 ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version); 2342 /* Return ip_version converted from ethertype anyway */ 2343 if (!ip_version) { 2344 ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); 2345 if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP) 2346 ip_version = 4; 2347 else if (ethertype == ETH_P_IPV6) 2348 ip_version = 6; 2349 } 2350 return ip_version; 2351 } 2352 2353 /* Tunnel device follows RFC 6040, see include/net/inet_ecn.h. 2354 * And changes inner ip_ecn depending on inner and outer ip_ecn as follows: 2355 * +---------+----------------------------------------+ 2356 * |Arriving | Arriving Outer Header | 2357 * | Inner +---------+---------+---------+----------+ 2358 * | Header | Not-ECT | ECT(0) | ECT(1) | CE | 2359 * +---------+---------+---------+---------+----------+ 2360 * | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop> | 2361 * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE* | 2362 * | ECT(1) | ECT(1) | ECT(1) | ECT(1)* | CE* | 2363 * | CE | CE | CE | CE | CE | 2364 * +---------+---------+---------+---------+----------+ 2365 * 2366 * Tc matches on inner after decapsulation on tunnel device, but hw offload matches 2367 * the inner ip_ecn value before hardware decap action. 2368 * 2369 * Cells marked are changed from original inner packet ip_ecn value during decap, and 2370 * so matching those values on inner ip_ecn before decap will fail. 2371 * 2372 * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn, 2373 * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE, 2374 * and such we can drop the inner ip_ecn=CE match. 2375 */ 2376 2377 static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv, 2378 struct flow_cls_offload *f, 2379 bool *match_inner_ecn) 2380 { 2381 u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0; 2382 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 2383 struct netlink_ext_ack *extack = f->common.extack; 2384 struct flow_match_ip match; 2385 2386 *match_inner_ecn = true; 2387 2388 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { 2389 flow_rule_match_enc_ip(rule, &match); 2390 outer_ecn_key = match.key->tos & INET_ECN_MASK; 2391 outer_ecn_mask = match.mask->tos & INET_ECN_MASK; 2392 } 2393 2394 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { 2395 flow_rule_match_ip(rule, &match); 2396 inner_ecn_key = match.key->tos & INET_ECN_MASK; 2397 inner_ecn_mask = match.mask->tos & INET_ECN_MASK; 2398 } 2399 2400 if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) { 2401 NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported"); 2402 netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported"); 2403 return -EOPNOTSUPP; 2404 } 2405 2406 if (!outer_ecn_mask) { 2407 if (!inner_ecn_mask) 2408 return 0; 2409 2410 NL_SET_ERR_MSG_MOD(extack, 2411 "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported"); 2412 netdev_warn(priv->netdev, 2413 "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported"); 2414 return -EOPNOTSUPP; 2415 } 2416 2417 if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) { 2418 NL_SET_ERR_MSG_MOD(extack, 2419 "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported"); 2420 netdev_warn(priv->netdev, 2421 "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported"); 2422 return -EOPNOTSUPP; 2423 } 2424 2425 if (!inner_ecn_mask) 2426 return 0; 2427 2428 /* Both inner and outer have full mask on ecn */ 2429 2430 if (outer_ecn_key == INET_ECN_ECT_1) { 2431 /* inner ecn might change by DECAP action */ 2432 2433 NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported"); 2434 netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported"); 2435 return -EOPNOTSUPP; 2436 } 2437 2438 if (outer_ecn_key != INET_ECN_CE) 2439 return 0; 2440 2441 if (inner_ecn_key != INET_ECN_CE) { 2442 /* Can't happen in software, as packet ecn will be changed to CE after decap */ 2443 NL_SET_ERR_MSG_MOD(extack, 2444 "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported"); 2445 netdev_warn(priv->netdev, 2446 "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported"); 2447 return -EOPNOTSUPP; 2448 } 2449 2450 /* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase, 2451 * drop match on inner ecn 2452 */ 2453 *match_inner_ecn = false; 2454 2455 return 0; 2456 } 2457 2458 static int parse_tunnel_attr(struct mlx5e_priv *priv, 2459 struct mlx5e_tc_flow *flow, 2460 struct mlx5_flow_spec *spec, 2461 struct flow_cls_offload *f, 2462 struct net_device *filter_dev, 2463 u8 *match_level, 2464 bool *match_inner) 2465 { 2466 struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev); 2467 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 2468 struct netlink_ext_ack *extack = f->common.extack; 2469 bool needs_mapping, sets_mapping; 2470 int err; 2471 2472 if (!mlx5e_is_eswitch_flow(flow)) { 2473 NL_SET_ERR_MSG_MOD(extack, "Match on tunnel is not supported"); 2474 return -EOPNOTSUPP; 2475 } 2476 2477 needs_mapping = !!flow->attr->chain; 2478 sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f); 2479 *match_inner = !needs_mapping; 2480 2481 if ((needs_mapping || sets_mapping) && 2482 !mlx5_eswitch_reg_c1_loopback_enabled(esw)) { 2483 NL_SET_ERR_MSG_MOD(extack, 2484 "Chains on tunnel devices isn't supported without register loopback support"); 2485 netdev_warn(priv->netdev, 2486 "Chains on tunnel devices isn't supported without register loopback support"); 2487 return -EOPNOTSUPP; 2488 } 2489 2490 if (!flow->attr->chain) { 2491 err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, 2492 match_level); 2493 if (err) { 2494 NL_SET_ERR_MSG_MOD(extack, 2495 "Failed to parse tunnel attributes"); 2496 netdev_warn(priv->netdev, 2497 "Failed to parse tunnel attributes"); 2498 return err; 2499 } 2500 2501 /* With mpls over udp we decapsulate using packet reformat 2502 * object 2503 */ 2504 if (!netif_is_bareudp(filter_dev)) 2505 flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; 2506 err = mlx5e_tc_set_attr_rx_tun(flow, spec); 2507 if (err) 2508 return err; 2509 } else if (tunnel) { 2510 struct mlx5_flow_spec *tmp_spec; 2511 2512 tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL); 2513 if (!tmp_spec) { 2514 NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for tunnel tmp spec"); 2515 netdev_warn(priv->netdev, "Failed to allocate memory for tunnel tmp spec"); 2516 return -ENOMEM; 2517 } 2518 memcpy(tmp_spec, spec, sizeof(*tmp_spec)); 2519 2520 err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level); 2521 if (err) { 2522 kvfree(tmp_spec); 2523 NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes"); 2524 netdev_warn(priv->netdev, "Failed to parse tunnel attributes"); 2525 return err; 2526 } 2527 err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec); 2528 kvfree(tmp_spec); 2529 if (err) 2530 return err; 2531 } 2532 2533 if (!needs_mapping && !sets_mapping) 2534 return 0; 2535 2536 return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev); 2537 } 2538 2539 static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec) 2540 { 2541 return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 2542 inner_headers); 2543 } 2544 2545 static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec) 2546 { 2547 return MLX5_ADDR_OF(fte_match_param, spec->match_value, 2548 inner_headers); 2549 } 2550 2551 static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec) 2552 { 2553 return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 2554 outer_headers); 2555 } 2556 2557 static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec) 2558 { 2559 return MLX5_ADDR_OF(fte_match_param, spec->match_value, 2560 outer_headers); 2561 } 2562 2563 void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec) 2564 { 2565 return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? 2566 get_match_inner_headers_value(spec) : 2567 get_match_outer_headers_value(spec); 2568 } 2569 2570 void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec) 2571 { 2572 return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? 2573 get_match_inner_headers_criteria(spec) : 2574 get_match_outer_headers_criteria(spec); 2575 } 2576 2577 static int mlx5e_flower_parse_meta(struct net_device *filter_dev, 2578 struct flow_cls_offload *f) 2579 { 2580 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 2581 struct netlink_ext_ack *extack = f->common.extack; 2582 struct net_device *ingress_dev; 2583 struct flow_match_meta match; 2584 2585 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) 2586 return 0; 2587 2588 flow_rule_match_meta(rule, &match); 2589 if (!match.mask->ingress_ifindex) 2590 return 0; 2591 2592 if (match.mask->ingress_ifindex != 0xFFFFFFFF) { 2593 NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask"); 2594 return -EOPNOTSUPP; 2595 } 2596 2597 ingress_dev = __dev_get_by_index(dev_net(filter_dev), 2598 match.key->ingress_ifindex); 2599 if (!ingress_dev) { 2600 NL_SET_ERR_MSG_MOD(extack, 2601 "Can't find the ingress port to match on"); 2602 return -ENOENT; 2603 } 2604 2605 if (ingress_dev != filter_dev) { 2606 NL_SET_ERR_MSG_MOD(extack, 2607 "Can't match on the ingress filter port"); 2608 return -EOPNOTSUPP; 2609 } 2610 2611 return 0; 2612 } 2613 2614 static bool skip_key_basic(struct net_device *filter_dev, 2615 struct flow_cls_offload *f) 2616 { 2617 /* When doing mpls over udp decap, the user needs to provide 2618 * MPLS_UC as the protocol in order to be able to match on mpls 2619 * label fields. However, the actual ethertype is IP so we want to 2620 * avoid matching on this, otherwise we'll fail the match. 2621 */ 2622 if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0) 2623 return true; 2624 2625 return false; 2626 } 2627 2628 static int __parse_cls_flower(struct mlx5e_priv *priv, 2629 struct mlx5e_tc_flow *flow, 2630 struct mlx5_flow_spec *spec, 2631 struct flow_cls_offload *f, 2632 struct net_device *filter_dev, 2633 u8 *inner_match_level, u8 *outer_match_level) 2634 { 2635 struct netlink_ext_ack *extack = f->common.extack; 2636 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 2637 outer_headers); 2638 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, 2639 outer_headers); 2640 void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 2641 misc_parameters); 2642 void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, 2643 misc_parameters); 2644 void *misc_c_3 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 2645 misc_parameters_3); 2646 void *misc_v_3 = MLX5_ADDR_OF(fte_match_param, spec->match_value, 2647 misc_parameters_3); 2648 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 2649 struct flow_dissector *dissector = rule->match.dissector; 2650 enum fs_flow_table_type fs_type; 2651 bool match_inner_ecn = true; 2652 u16 addr_type = 0; 2653 u8 ip_proto = 0; 2654 u8 *match_level; 2655 int err; 2656 2657 fs_type = mlx5e_is_eswitch_flow(flow) ? FS_FT_FDB : FS_FT_NIC_RX; 2658 match_level = outer_match_level; 2659 2660 if (dissector->used_keys & 2661 ~(BIT(FLOW_DISSECTOR_KEY_META) | 2662 BIT(FLOW_DISSECTOR_KEY_CONTROL) | 2663 BIT(FLOW_DISSECTOR_KEY_BASIC) | 2664 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | 2665 BIT(FLOW_DISSECTOR_KEY_VLAN) | 2666 BIT(FLOW_DISSECTOR_KEY_CVLAN) | 2667 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | 2668 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | 2669 BIT(FLOW_DISSECTOR_KEY_PORTS) | 2670 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | 2671 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | 2672 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | 2673 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | 2674 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | 2675 BIT(FLOW_DISSECTOR_KEY_TCP) | 2676 BIT(FLOW_DISSECTOR_KEY_IP) | 2677 BIT(FLOW_DISSECTOR_KEY_CT) | 2678 BIT(FLOW_DISSECTOR_KEY_ENC_IP) | 2679 BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | 2680 BIT(FLOW_DISSECTOR_KEY_ICMP) | 2681 BIT(FLOW_DISSECTOR_KEY_MPLS))) { 2682 NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); 2683 netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n", 2684 dissector->used_keys); 2685 return -EOPNOTSUPP; 2686 } 2687 2688 if (mlx5e_get_tc_tun(filter_dev)) { 2689 bool match_inner = false; 2690 2691 err = parse_tunnel_attr(priv, flow, spec, f, filter_dev, 2692 outer_match_level, &match_inner); 2693 if (err) 2694 return err; 2695 2696 if (match_inner) { 2697 /* header pointers should point to the inner headers 2698 * if the packet was decapsulated already. 2699 * outer headers are set by parse_tunnel_attr. 2700 */ 2701 match_level = inner_match_level; 2702 headers_c = get_match_inner_headers_criteria(spec); 2703 headers_v = get_match_inner_headers_value(spec); 2704 } 2705 2706 err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn); 2707 if (err) 2708 return err; 2709 } 2710 2711 err = mlx5e_flower_parse_meta(filter_dev, f); 2712 if (err) 2713 return err; 2714 2715 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) && 2716 !skip_key_basic(filter_dev, f)) { 2717 struct flow_match_basic match; 2718 2719 flow_rule_match_basic(rule, &match); 2720 mlx5e_tc_set_ethertype(priv->mdev, &match, 2721 match_level == outer_match_level, 2722 headers_c, headers_v); 2723 2724 if (match.mask->n_proto) 2725 *match_level = MLX5_MATCH_L2; 2726 } 2727 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) || 2728 is_vlan_dev(filter_dev)) { 2729 struct flow_dissector_key_vlan filter_dev_mask; 2730 struct flow_dissector_key_vlan filter_dev_key; 2731 struct flow_match_vlan match; 2732 2733 if (is_vlan_dev(filter_dev)) { 2734 match.key = &filter_dev_key; 2735 match.key->vlan_id = vlan_dev_vlan_id(filter_dev); 2736 match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev); 2737 match.key->vlan_priority = 0; 2738 match.mask = &filter_dev_mask; 2739 memset(match.mask, 0xff, sizeof(*match.mask)); 2740 match.mask->vlan_priority = 0; 2741 } else { 2742 flow_rule_match_vlan(rule, &match); 2743 } 2744 if (match.mask->vlan_id || 2745 match.mask->vlan_priority || 2746 match.mask->vlan_tpid) { 2747 if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { 2748 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2749 svlan_tag, 1); 2750 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2751 svlan_tag, 1); 2752 } else { 2753 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2754 cvlan_tag, 1); 2755 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2756 cvlan_tag, 1); 2757 } 2758 2759 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, 2760 match.mask->vlan_id); 2761 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, 2762 match.key->vlan_id); 2763 2764 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, 2765 match.mask->vlan_priority); 2766 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, 2767 match.key->vlan_priority); 2768 2769 *match_level = MLX5_MATCH_L2; 2770 2771 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) && 2772 match.mask->vlan_eth_type && 2773 MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, 2774 ft_field_support.outer_second_vid, 2775 fs_type)) { 2776 MLX5_SET(fte_match_set_misc, misc_c, 2777 outer_second_cvlan_tag, 1); 2778 spec->match_criteria_enable |= 2779 MLX5_MATCH_MISC_PARAMETERS; 2780 } 2781 } 2782 } else if (*match_level != MLX5_MATCH_NONE) { 2783 /* cvlan_tag enabled in match criteria and 2784 * disabled in match value means both S & C tags 2785 * don't exist (untagged of both) 2786 */ 2787 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); 2788 *match_level = MLX5_MATCH_L2; 2789 } 2790 2791 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { 2792 struct flow_match_vlan match; 2793 2794 flow_rule_match_cvlan(rule, &match); 2795 if (match.mask->vlan_id || 2796 match.mask->vlan_priority || 2797 match.mask->vlan_tpid) { 2798 if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ft_field_support.outer_second_vid, 2799 fs_type)) { 2800 NL_SET_ERR_MSG_MOD(extack, 2801 "Matching on CVLAN is not supported"); 2802 return -EOPNOTSUPP; 2803 } 2804 2805 if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { 2806 MLX5_SET(fte_match_set_misc, misc_c, 2807 outer_second_svlan_tag, 1); 2808 MLX5_SET(fte_match_set_misc, misc_v, 2809 outer_second_svlan_tag, 1); 2810 } else { 2811 MLX5_SET(fte_match_set_misc, misc_c, 2812 outer_second_cvlan_tag, 1); 2813 MLX5_SET(fte_match_set_misc, misc_v, 2814 outer_second_cvlan_tag, 1); 2815 } 2816 2817 MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid, 2818 match.mask->vlan_id); 2819 MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid, 2820 match.key->vlan_id); 2821 MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio, 2822 match.mask->vlan_priority); 2823 MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio, 2824 match.key->vlan_priority); 2825 2826 *match_level = MLX5_MATCH_L2; 2827 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; 2828 } 2829 } 2830 2831 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { 2832 struct flow_match_eth_addrs match; 2833 2834 flow_rule_match_eth_addrs(rule, &match); 2835 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2836 dmac_47_16), 2837 match.mask->dst); 2838 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2839 dmac_47_16), 2840 match.key->dst); 2841 2842 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2843 smac_47_16), 2844 match.mask->src); 2845 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2846 smac_47_16), 2847 match.key->src); 2848 2849 if (!is_zero_ether_addr(match.mask->src) || 2850 !is_zero_ether_addr(match.mask->dst)) 2851 *match_level = MLX5_MATCH_L2; 2852 } 2853 2854 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { 2855 struct flow_match_control match; 2856 2857 flow_rule_match_control(rule, &match); 2858 addr_type = match.key->addr_type; 2859 2860 /* the HW doesn't support frag first/later */ 2861 if (match.mask->flags & FLOW_DIS_FIRST_FRAG) { 2862 NL_SET_ERR_MSG_MOD(extack, "Match on frag first/later is not supported"); 2863 return -EOPNOTSUPP; 2864 } 2865 2866 if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) { 2867 MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); 2868 MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 2869 match.key->flags & FLOW_DIS_IS_FRAGMENT); 2870 2871 /* the HW doesn't need L3 inline to match on frag=no */ 2872 if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT)) 2873 *match_level = MLX5_MATCH_L2; 2874 /* *** L2 attributes parsing up to here *** */ 2875 else 2876 *match_level = MLX5_MATCH_L3; 2877 } 2878 } 2879 2880 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { 2881 struct flow_match_basic match; 2882 2883 flow_rule_match_basic(rule, &match); 2884 ip_proto = match.key->ip_proto; 2885 2886 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 2887 match.mask->ip_proto); 2888 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, 2889 match.key->ip_proto); 2890 2891 if (match.mask->ip_proto) 2892 *match_level = MLX5_MATCH_L3; 2893 } 2894 2895 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 2896 struct flow_match_ipv4_addrs match; 2897 2898 flow_rule_match_ipv4_addrs(rule, &match); 2899 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2900 src_ipv4_src_ipv6.ipv4_layout.ipv4), 2901 &match.mask->src, sizeof(match.mask->src)); 2902 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2903 src_ipv4_src_ipv6.ipv4_layout.ipv4), 2904 &match.key->src, sizeof(match.key->src)); 2905 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2906 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 2907 &match.mask->dst, sizeof(match.mask->dst)); 2908 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2909 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 2910 &match.key->dst, sizeof(match.key->dst)); 2911 2912 if (match.mask->src || match.mask->dst) 2913 *match_level = MLX5_MATCH_L3; 2914 } 2915 2916 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 2917 struct flow_match_ipv6_addrs match; 2918 2919 flow_rule_match_ipv6_addrs(rule, &match); 2920 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2921 src_ipv4_src_ipv6.ipv6_layout.ipv6), 2922 &match.mask->src, sizeof(match.mask->src)); 2923 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2924 src_ipv4_src_ipv6.ipv6_layout.ipv6), 2925 &match.key->src, sizeof(match.key->src)); 2926 2927 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2928 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 2929 &match.mask->dst, sizeof(match.mask->dst)); 2930 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2931 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 2932 &match.key->dst, sizeof(match.key->dst)); 2933 2934 if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY || 2935 ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY) 2936 *match_level = MLX5_MATCH_L3; 2937 } 2938 2939 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { 2940 struct flow_match_ip match; 2941 2942 flow_rule_match_ip(rule, &match); 2943 if (match_inner_ecn) { 2944 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, 2945 match.mask->tos & 0x3); 2946 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, 2947 match.key->tos & 0x3); 2948 } 2949 2950 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, 2951 match.mask->tos >> 2); 2952 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, 2953 match.key->tos >> 2); 2954 2955 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, 2956 match.mask->ttl); 2957 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, 2958 match.key->ttl); 2959 2960 if (match.mask->ttl && 2961 !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, 2962 ft_field_support.outer_ipv4_ttl)) { 2963 NL_SET_ERR_MSG_MOD(extack, 2964 "Matching on TTL is not supported"); 2965 return -EOPNOTSUPP; 2966 } 2967 2968 if (match.mask->tos || match.mask->ttl) 2969 *match_level = MLX5_MATCH_L3; 2970 } 2971 2972 /* *** L3 attributes parsing up to here *** */ 2973 2974 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { 2975 struct flow_match_ports match; 2976 2977 flow_rule_match_ports(rule, &match); 2978 switch (ip_proto) { 2979 case IPPROTO_TCP: 2980 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2981 tcp_sport, ntohs(match.mask->src)); 2982 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2983 tcp_sport, ntohs(match.key->src)); 2984 2985 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2986 tcp_dport, ntohs(match.mask->dst)); 2987 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2988 tcp_dport, ntohs(match.key->dst)); 2989 break; 2990 2991 case IPPROTO_UDP: 2992 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2993 udp_sport, ntohs(match.mask->src)); 2994 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2995 udp_sport, ntohs(match.key->src)); 2996 2997 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2998 udp_dport, ntohs(match.mask->dst)); 2999 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 3000 udp_dport, ntohs(match.key->dst)); 3001 break; 3002 default: 3003 NL_SET_ERR_MSG_MOD(extack, 3004 "Only UDP and TCP transports are supported for L4 matching"); 3005 netdev_err(priv->netdev, 3006 "Only UDP and TCP transport are supported\n"); 3007 return -EINVAL; 3008 } 3009 3010 if (match.mask->src || match.mask->dst) 3011 *match_level = MLX5_MATCH_L4; 3012 } 3013 3014 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { 3015 struct flow_match_tcp match; 3016 3017 flow_rule_match_tcp(rule, &match); 3018 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, 3019 ntohs(match.mask->flags)); 3020 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, 3021 ntohs(match.key->flags)); 3022 3023 if (match.mask->flags) 3024 *match_level = MLX5_MATCH_L4; 3025 } 3026 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) { 3027 struct flow_match_icmp match; 3028 3029 flow_rule_match_icmp(rule, &match); 3030 switch (ip_proto) { 3031 case IPPROTO_ICMP: 3032 if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & 3033 MLX5_FLEX_PROTO_ICMP)) { 3034 NL_SET_ERR_MSG_MOD(extack, 3035 "Match on Flex protocols for ICMP is not supported"); 3036 return -EOPNOTSUPP; 3037 } 3038 MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type, 3039 match.mask->type); 3040 MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type, 3041 match.key->type); 3042 MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_code, 3043 match.mask->code); 3044 MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_code, 3045 match.key->code); 3046 break; 3047 case IPPROTO_ICMPV6: 3048 if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & 3049 MLX5_FLEX_PROTO_ICMPV6)) { 3050 NL_SET_ERR_MSG_MOD(extack, 3051 "Match on Flex protocols for ICMPV6 is not supported"); 3052 return -EOPNOTSUPP; 3053 } 3054 MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type, 3055 match.mask->type); 3056 MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type, 3057 match.key->type); 3058 MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_code, 3059 match.mask->code); 3060 MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_code, 3061 match.key->code); 3062 break; 3063 default: 3064 NL_SET_ERR_MSG_MOD(extack, 3065 "Code and type matching only with ICMP and ICMPv6"); 3066 netdev_err(priv->netdev, 3067 "Code and type matching only with ICMP and ICMPv6\n"); 3068 return -EINVAL; 3069 } 3070 if (match.mask->code || match.mask->type) { 3071 *match_level = MLX5_MATCH_L4; 3072 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3; 3073 } 3074 } 3075 /* Currently supported only for MPLS over UDP */ 3076 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) && 3077 !netif_is_bareudp(filter_dev)) { 3078 NL_SET_ERR_MSG_MOD(extack, 3079 "Matching on MPLS is supported only for MPLS over UDP"); 3080 netdev_err(priv->netdev, 3081 "Matching on MPLS is supported only for MPLS over UDP\n"); 3082 return -EOPNOTSUPP; 3083 } 3084 3085 return 0; 3086 } 3087 3088 static int parse_cls_flower(struct mlx5e_priv *priv, 3089 struct mlx5e_tc_flow *flow, 3090 struct mlx5_flow_spec *spec, 3091 struct flow_cls_offload *f, 3092 struct net_device *filter_dev) 3093 { 3094 u8 inner_match_level, outer_match_level, non_tunnel_match_level; 3095 struct netlink_ext_ack *extack = f->common.extack; 3096 struct mlx5_core_dev *dev = priv->mdev; 3097 struct mlx5_eswitch *esw = dev->priv.eswitch; 3098 struct mlx5e_rep_priv *rpriv = priv->ppriv; 3099 struct mlx5_eswitch_rep *rep; 3100 bool is_eswitch_flow; 3101 int err; 3102 3103 inner_match_level = MLX5_MATCH_NONE; 3104 outer_match_level = MLX5_MATCH_NONE; 3105 3106 err = __parse_cls_flower(priv, flow, spec, f, filter_dev, 3107 &inner_match_level, &outer_match_level); 3108 non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ? 3109 outer_match_level : inner_match_level; 3110 3111 is_eswitch_flow = mlx5e_is_eswitch_flow(flow); 3112 if (!err && is_eswitch_flow) { 3113 rep = rpriv->rep; 3114 if (rep->vport != MLX5_VPORT_UPLINK && 3115 (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && 3116 esw->offloads.inline_mode < non_tunnel_match_level)) { 3117 NL_SET_ERR_MSG_MOD(extack, 3118 "Flow is not offloaded due to min inline setting"); 3119 netdev_warn(priv->netdev, 3120 "Flow is not offloaded due to min inline setting, required %d actual %d\n", 3121 non_tunnel_match_level, esw->offloads.inline_mode); 3122 return -EOPNOTSUPP; 3123 } 3124 } 3125 3126 flow->attr->inner_match_level = inner_match_level; 3127 flow->attr->outer_match_level = outer_match_level; 3128 3129 3130 return err; 3131 } 3132 3133 struct mlx5_fields { 3134 u8 field; 3135 u8 field_bsize; 3136 u32 field_mask; 3137 u32 offset; 3138 u32 match_offset; 3139 }; 3140 3141 #define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \ 3142 {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \ 3143 offsetof(struct pedit_headers, field) + (off), \ 3144 MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)} 3145 3146 /* masked values are the same and there are no rewrites that do not have a 3147 * match. 3148 */ 3149 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \ 3150 type matchmaskx = *(type *)(matchmaskp); \ 3151 type matchvalx = *(type *)(matchvalp); \ 3152 type maskx = *(type *)(maskp); \ 3153 type valx = *(type *)(valp); \ 3154 \ 3155 (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \ 3156 matchmaskx)); \ 3157 }) 3158 3159 static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp, 3160 void *matchmaskp, u8 bsize) 3161 { 3162 bool same = false; 3163 3164 switch (bsize) { 3165 case 8: 3166 same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp); 3167 break; 3168 case 16: 3169 same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp); 3170 break; 3171 case 32: 3172 same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp); 3173 break; 3174 } 3175 3176 return same; 3177 } 3178 3179 static struct mlx5_fields fields[] = { 3180 OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16), 3181 OFFLOAD(DMAC_15_0, 16, U16_MAX, eth.h_dest[4], 0, dmac_15_0), 3182 OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16), 3183 OFFLOAD(SMAC_15_0, 16, U16_MAX, eth.h_source[4], 0, smac_15_0), 3184 OFFLOAD(ETHERTYPE, 16, U16_MAX, eth.h_proto, 0, ethertype), 3185 OFFLOAD(FIRST_VID, 16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid), 3186 3187 OFFLOAD(IP_DSCP, 8, 0xfc, ip4.tos, 0, ip_dscp), 3188 OFFLOAD(IP_TTL, 8, U8_MAX, ip4.ttl, 0, ttl_hoplimit), 3189 OFFLOAD(SIPV4, 32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4), 3190 OFFLOAD(DIPV4, 32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 3191 3192 OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0, 3193 src_ipv4_src_ipv6.ipv6_layout.ipv6[0]), 3194 OFFLOAD(SIPV6_95_64, 32, U32_MAX, ip6.saddr.s6_addr32[1], 0, 3195 src_ipv4_src_ipv6.ipv6_layout.ipv6[4]), 3196 OFFLOAD(SIPV6_63_32, 32, U32_MAX, ip6.saddr.s6_addr32[2], 0, 3197 src_ipv4_src_ipv6.ipv6_layout.ipv6[8]), 3198 OFFLOAD(SIPV6_31_0, 32, U32_MAX, ip6.saddr.s6_addr32[3], 0, 3199 src_ipv4_src_ipv6.ipv6_layout.ipv6[12]), 3200 OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0, 3201 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]), 3202 OFFLOAD(DIPV6_95_64, 32, U32_MAX, ip6.daddr.s6_addr32[1], 0, 3203 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]), 3204 OFFLOAD(DIPV6_63_32, 32, U32_MAX, ip6.daddr.s6_addr32[2], 0, 3205 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]), 3206 OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0, 3207 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]), 3208 OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit), 3209 OFFLOAD(IP_DSCP, 16, 0xc00f, ip6, 0, ip_dscp), 3210 3211 OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport), 3212 OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport), 3213 /* in linux iphdr tcp_flags is 8 bits long */ 3214 OFFLOAD(TCP_FLAGS, 8, U8_MAX, tcp.ack_seq, 5, tcp_flags), 3215 3216 OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport), 3217 OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport), 3218 }; 3219 3220 static unsigned long mask_to_le(unsigned long mask, int size) 3221 { 3222 __be32 mask_be32; 3223 __be16 mask_be16; 3224 3225 if (size == 32) { 3226 mask_be32 = (__force __be32)(mask); 3227 mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32)); 3228 } else if (size == 16) { 3229 mask_be32 = (__force __be32)(mask); 3230 mask_be16 = *(__be16 *)&mask_be32; 3231 mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16)); 3232 } 3233 3234 return mask; 3235 } 3236 3237 static int offload_pedit_fields(struct mlx5e_priv *priv, 3238 int namespace, 3239 struct mlx5e_tc_flow_parse_attr *parse_attr, 3240 u32 *action_flags, 3241 struct netlink_ext_ack *extack) 3242 { 3243 struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; 3244 struct pedit_headers_action *hdrs = parse_attr->hdrs; 3245 void *headers_c, *headers_v, *action, *vals_p; 3246 u32 *s_masks_p, *a_masks_p, s_mask, a_mask; 3247 struct mlx5e_tc_mod_hdr_acts *mod_acts; 3248 unsigned long mask, field_mask; 3249 int i, first, last, next_z; 3250 struct mlx5_fields *f; 3251 u8 cmd; 3252 3253 mod_acts = &parse_attr->mod_hdr_acts; 3254 headers_c = mlx5e_get_match_headers_criteria(*action_flags, &parse_attr->spec); 3255 headers_v = mlx5e_get_match_headers_value(*action_flags, &parse_attr->spec); 3256 3257 set_masks = &hdrs[0].masks; 3258 add_masks = &hdrs[1].masks; 3259 set_vals = &hdrs[0].vals; 3260 add_vals = &hdrs[1].vals; 3261 3262 for (i = 0; i < ARRAY_SIZE(fields); i++) { 3263 bool skip; 3264 3265 f = &fields[i]; 3266 /* avoid seeing bits set from previous iterations */ 3267 s_mask = 0; 3268 a_mask = 0; 3269 3270 s_masks_p = (void *)set_masks + f->offset; 3271 a_masks_p = (void *)add_masks + f->offset; 3272 3273 s_mask = *s_masks_p & f->field_mask; 3274 a_mask = *a_masks_p & f->field_mask; 3275 3276 if (!s_mask && !a_mask) /* nothing to offload here */ 3277 continue; 3278 3279 if (s_mask && a_mask) { 3280 NL_SET_ERR_MSG_MOD(extack, 3281 "can't set and add to the same HW field"); 3282 netdev_warn(priv->netdev, 3283 "mlx5: can't set and add to the same HW field (%x)\n", 3284 f->field); 3285 return -EOPNOTSUPP; 3286 } 3287 3288 skip = false; 3289 if (s_mask) { 3290 void *match_mask = headers_c + f->match_offset; 3291 void *match_val = headers_v + f->match_offset; 3292 3293 cmd = MLX5_ACTION_TYPE_SET; 3294 mask = s_mask; 3295 vals_p = (void *)set_vals + f->offset; 3296 /* don't rewrite if we have a match on the same value */ 3297 if (cmp_val_mask(vals_p, s_masks_p, match_val, 3298 match_mask, f->field_bsize)) 3299 skip = true; 3300 /* clear to denote we consumed this field */ 3301 *s_masks_p &= ~f->field_mask; 3302 } else { 3303 cmd = MLX5_ACTION_TYPE_ADD; 3304 mask = a_mask; 3305 vals_p = (void *)add_vals + f->offset; 3306 /* add 0 is no change */ 3307 if ((*(u32 *)vals_p & f->field_mask) == 0) 3308 skip = true; 3309 /* clear to denote we consumed this field */ 3310 *a_masks_p &= ~f->field_mask; 3311 } 3312 if (skip) 3313 continue; 3314 3315 mask = mask_to_le(mask, f->field_bsize); 3316 3317 first = find_first_bit(&mask, f->field_bsize); 3318 next_z = find_next_zero_bit(&mask, f->field_bsize, first); 3319 last = find_last_bit(&mask, f->field_bsize); 3320 if (first < next_z && next_z < last) { 3321 NL_SET_ERR_MSG_MOD(extack, 3322 "rewrite of few sub-fields isn't supported"); 3323 netdev_warn(priv->netdev, 3324 "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n", 3325 mask); 3326 return -EOPNOTSUPP; 3327 } 3328 3329 action = mlx5e_mod_hdr_alloc(priv->mdev, namespace, mod_acts); 3330 if (IS_ERR(action)) { 3331 NL_SET_ERR_MSG_MOD(extack, 3332 "too many pedit actions, can't offload"); 3333 mlx5_core_warn(priv->mdev, 3334 "mlx5: parsed %d pedit actions, can't do more\n", 3335 mod_acts->num_actions); 3336 return PTR_ERR(action); 3337 } 3338 3339 MLX5_SET(set_action_in, action, action_type, cmd); 3340 MLX5_SET(set_action_in, action, field, f->field); 3341 3342 if (cmd == MLX5_ACTION_TYPE_SET) { 3343 int start; 3344 3345 field_mask = mask_to_le(f->field_mask, f->field_bsize); 3346 3347 /* if field is bit sized it can start not from first bit */ 3348 start = find_first_bit(&field_mask, f->field_bsize); 3349 3350 MLX5_SET(set_action_in, action, offset, first - start); 3351 /* length is num of bits to be written, zero means length of 32 */ 3352 MLX5_SET(set_action_in, action, length, (last - first + 1)); 3353 } 3354 3355 if (f->field_bsize == 32) 3356 MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first); 3357 else if (f->field_bsize == 16) 3358 MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first); 3359 else if (f->field_bsize == 8) 3360 MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first); 3361 3362 ++mod_acts->num_actions; 3363 } 3364 3365 return 0; 3366 } 3367 3368 static const struct pedit_headers zero_masks = {}; 3369 3370 static int verify_offload_pedit_fields(struct mlx5e_priv *priv, 3371 struct mlx5e_tc_flow_parse_attr *parse_attr, 3372 struct netlink_ext_ack *extack) 3373 { 3374 struct pedit_headers *cmd_masks; 3375 u8 cmd; 3376 3377 for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) { 3378 cmd_masks = &parse_attr->hdrs[cmd].masks; 3379 if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) { 3380 NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field"); 3381 netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd); 3382 print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS, 3383 16, 1, cmd_masks, sizeof(zero_masks), true); 3384 return -EOPNOTSUPP; 3385 } 3386 } 3387 3388 return 0; 3389 } 3390 3391 static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, 3392 struct mlx5e_tc_flow_parse_attr *parse_attr, 3393 u32 *action_flags, 3394 struct netlink_ext_ack *extack) 3395 { 3396 int err; 3397 3398 err = offload_pedit_fields(priv, namespace, parse_attr, action_flags, extack); 3399 if (err) 3400 goto out_dealloc_parsed_actions; 3401 3402 err = verify_offload_pedit_fields(priv, parse_attr, extack); 3403 if (err) 3404 goto out_dealloc_parsed_actions; 3405 3406 return 0; 3407 3408 out_dealloc_parsed_actions: 3409 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); 3410 return err; 3411 } 3412 3413 struct ip_ttl_word { 3414 __u8 ttl; 3415 __u8 protocol; 3416 __sum16 check; 3417 }; 3418 3419 struct ipv6_hoplimit_word { 3420 __be16 payload_len; 3421 __u8 nexthdr; 3422 __u8 hop_limit; 3423 }; 3424 3425 static bool 3426 is_flow_action_modify_ip_header(struct flow_action *flow_action) 3427 { 3428 const struct flow_action_entry *act; 3429 u32 mask, offset; 3430 u8 htype; 3431 int i; 3432 3433 /* For IPv4 & IPv6 header check 4 byte word, 3434 * to determine that modified fields 3435 * are NOT ttl & hop_limit only. 3436 */ 3437 flow_action_for_each(i, act, flow_action) { 3438 if (act->id != FLOW_ACTION_MANGLE && 3439 act->id != FLOW_ACTION_ADD) 3440 continue; 3441 3442 htype = act->mangle.htype; 3443 offset = act->mangle.offset; 3444 mask = ~act->mangle.mask; 3445 3446 if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) { 3447 struct ip_ttl_word *ttl_word = 3448 (struct ip_ttl_word *)&mask; 3449 3450 if (offset != offsetof(struct iphdr, ttl) || 3451 ttl_word->protocol || 3452 ttl_word->check) 3453 return true; 3454 } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) { 3455 struct ipv6_hoplimit_word *hoplimit_word = 3456 (struct ipv6_hoplimit_word *)&mask; 3457 3458 if (offset != offsetof(struct ipv6hdr, payload_len) || 3459 hoplimit_word->payload_len || 3460 hoplimit_word->nexthdr) 3461 return true; 3462 } 3463 } 3464 3465 return false; 3466 } 3467 3468 static bool modify_header_match_supported(struct mlx5e_priv *priv, 3469 struct mlx5_flow_spec *spec, 3470 struct flow_action *flow_action, 3471 u32 actions, 3472 struct netlink_ext_ack *extack) 3473 { 3474 bool modify_ip_header; 3475 void *headers_c; 3476 void *headers_v; 3477 u16 ethertype; 3478 u8 ip_proto; 3479 3480 headers_c = mlx5e_get_match_headers_criteria(actions, spec); 3481 headers_v = mlx5e_get_match_headers_value(actions, spec); 3482 ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); 3483 3484 /* for non-IP we only re-write MACs, so we're okay */ 3485 if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 && 3486 ethertype != ETH_P_IP && ethertype != ETH_P_IPV6) 3487 goto out_ok; 3488 3489 modify_ip_header = is_flow_action_modify_ip_header(flow_action); 3490 ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); 3491 if (modify_ip_header && ip_proto != IPPROTO_TCP && 3492 ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) { 3493 NL_SET_ERR_MSG_MOD(extack, 3494 "can't offload re-write of non TCP/UDP"); 3495 netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n", 3496 ip_proto); 3497 return false; 3498 } 3499 3500 out_ok: 3501 return true; 3502 } 3503 3504 static bool 3505 actions_match_supported_fdb(struct mlx5e_priv *priv, 3506 struct mlx5e_tc_flow *flow, 3507 struct netlink_ext_ack *extack) 3508 { 3509 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; 3510 3511 if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { 3512 NL_SET_ERR_MSG_MOD(extack, 3513 "current firmware doesn't support split rule for port mirroring"); 3514 netdev_warn_once(priv->netdev, 3515 "current firmware doesn't support split rule for port mirroring\n"); 3516 return false; 3517 } 3518 3519 return true; 3520 } 3521 3522 static bool 3523 actions_match_supported(struct mlx5e_priv *priv, 3524 struct flow_action *flow_action, 3525 u32 actions, 3526 struct mlx5e_tc_flow_parse_attr *parse_attr, 3527 struct mlx5e_tc_flow *flow, 3528 struct netlink_ext_ack *extack) 3529 { 3530 if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && 3531 !modify_header_match_supported(priv, &parse_attr->spec, flow_action, actions, 3532 extack)) 3533 return false; 3534 3535 if (mlx5e_is_eswitch_flow(flow) && 3536 !actions_match_supported_fdb(priv, flow, extack)) 3537 return false; 3538 3539 return true; 3540 } 3541 3542 static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) 3543 { 3544 return priv->mdev == peer_priv->mdev; 3545 } 3546 3547 bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) 3548 { 3549 struct mlx5_core_dev *fmdev, *pmdev; 3550 u64 fsystem_guid, psystem_guid; 3551 3552 fmdev = priv->mdev; 3553 pmdev = peer_priv->mdev; 3554 3555 fsystem_guid = mlx5_query_nic_system_image_guid(fmdev); 3556 psystem_guid = mlx5_query_nic_system_image_guid(pmdev); 3557 3558 return (fsystem_guid == psystem_guid); 3559 } 3560 3561 static int 3562 actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv, 3563 struct mlx5e_tc_flow *flow, 3564 struct mlx5_flow_attr *attr, 3565 struct netlink_ext_ack *extack) 3566 { 3567 struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; 3568 struct pedit_headers_action *hdrs = parse_attr->hdrs; 3569 enum mlx5_flow_namespace_type ns_type; 3570 int err; 3571 3572 if (!hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits && 3573 !hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) 3574 return 0; 3575 3576 ns_type = mlx5e_get_flow_namespace(flow); 3577 3578 err = alloc_tc_pedit_action(priv, ns_type, parse_attr, &attr->action, extack); 3579 if (err) 3580 return err; 3581 3582 if (parse_attr->mod_hdr_acts.num_actions > 0) 3583 return 0; 3584 3585 /* In case all pedit actions are skipped, remove the MOD_HDR flag. */ 3586 attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 3587 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); 3588 3589 if (ns_type != MLX5_FLOW_NAMESPACE_FDB) 3590 return 0; 3591 3592 if (!((attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || 3593 (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))) 3594 attr->esw_attr->split_count = 0; 3595 3596 return 0; 3597 } 3598 3599 static struct mlx5_flow_attr* 3600 mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr, 3601 enum mlx5_flow_namespace_type ns_type) 3602 { 3603 struct mlx5e_tc_flow_parse_attr *parse_attr; 3604 u32 attr_sz = ns_to_attr_sz(ns_type); 3605 struct mlx5_flow_attr *attr2; 3606 3607 attr2 = mlx5_alloc_flow_attr(ns_type); 3608 parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); 3609 if (!attr2 || !parse_attr) { 3610 kvfree(parse_attr); 3611 kfree(attr2); 3612 return NULL; 3613 } 3614 3615 memcpy(attr2, attr, attr_sz); 3616 INIT_LIST_HEAD(&attr2->list); 3617 parse_attr->filter_dev = attr->parse_attr->filter_dev; 3618 attr2->action = 0; 3619 attr2->counter = NULL; 3620 attr2->tc_act_cookies_count = 0; 3621 attr2->flags = 0; 3622 attr2->parse_attr = parse_attr; 3623 attr2->dest_chain = 0; 3624 attr2->dest_ft = NULL; 3625 attr2->act_id_restore_rule = NULL; 3626 memset(&attr2->ct_attr, 0, sizeof(attr2->ct_attr)); 3627 3628 if (ns_type == MLX5_FLOW_NAMESPACE_FDB) { 3629 attr2->esw_attr->out_count = 0; 3630 attr2->esw_attr->split_count = 0; 3631 } 3632 3633 attr2->branch_true = NULL; 3634 attr2->branch_false = NULL; 3635 attr2->jumping_attr = NULL; 3636 return attr2; 3637 } 3638 3639 struct mlx5_flow_attr * 3640 mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow) 3641 { 3642 struct mlx5_esw_flow_attr *esw_attr; 3643 struct mlx5_flow_attr *attr; 3644 int i; 3645 3646 list_for_each_entry(attr, &flow->attrs, list) { 3647 esw_attr = attr->esw_attr; 3648 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { 3649 if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) 3650 return attr; 3651 } 3652 } 3653 3654 return NULL; 3655 } 3656 3657 void 3658 mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow) 3659 { 3660 struct mlx5e_post_act *post_act = get_post_action(flow->priv); 3661 struct mlx5_flow_attr *attr; 3662 3663 list_for_each_entry(attr, &flow->attrs, list) { 3664 if (list_is_last(&attr->list, &flow->attrs)) 3665 break; 3666 3667 mlx5e_tc_post_act_unoffload(post_act, attr->post_act_handle); 3668 } 3669 } 3670 3671 static void 3672 free_flow_post_acts(struct mlx5e_tc_flow *flow) 3673 { 3674 struct mlx5_flow_attr *attr, *tmp; 3675 3676 list_for_each_entry_safe(attr, tmp, &flow->attrs, list) { 3677 if (list_is_last(&attr->list, &flow->attrs)) 3678 break; 3679 3680 mlx5_free_flow_attr_actions(flow, attr); 3681 3682 list_del(&attr->list); 3683 kvfree(attr->parse_attr); 3684 kfree(attr); 3685 } 3686 } 3687 3688 int 3689 mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow) 3690 { 3691 struct mlx5e_post_act *post_act = get_post_action(flow->priv); 3692 struct mlx5_flow_attr *attr; 3693 int err = 0; 3694 3695 list_for_each_entry(attr, &flow->attrs, list) { 3696 if (list_is_last(&attr->list, &flow->attrs)) 3697 break; 3698 3699 err = mlx5e_tc_post_act_offload(post_act, attr->post_act_handle); 3700 if (err) 3701 break; 3702 } 3703 3704 return err; 3705 } 3706 3707 /* TC filter rule HW translation: 3708 * 3709 * +---------------------+ 3710 * + ft prio (tc chain) + 3711 * + original match + 3712 * +---------------------+ 3713 * | 3714 * | if multi table action 3715 * | 3716 * v 3717 * +---------------------+ 3718 * + post act ft |<----. 3719 * + match fte id | | split on multi table action 3720 * + do actions |-----' 3721 * +---------------------+ 3722 * | 3723 * | 3724 * v 3725 * Do rest of the actions after last multi table action. 3726 */ 3727 static int 3728 alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) 3729 { 3730 struct mlx5e_post_act *post_act = get_post_action(flow->priv); 3731 struct mlx5_flow_attr *attr, *next_attr = NULL; 3732 struct mlx5e_post_act_handle *handle; 3733 int err; 3734 3735 /* This is going in reverse order as needed. 3736 * The first entry is the last attribute. 3737 */ 3738 list_for_each_entry(attr, &flow->attrs, list) { 3739 if (!next_attr) { 3740 /* Set counter action on last post act rule. */ 3741 attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 3742 } 3743 3744 if (next_attr && !(attr->flags & MLX5_ATTR_FLAG_TERMINATING)) { 3745 err = mlx5e_tc_act_set_next_post_act(flow, attr, next_attr); 3746 if (err) 3747 goto out_free; 3748 } 3749 3750 /* Don't add post_act rule for first attr (last in the list). 3751 * It's being handled by the caller. 3752 */ 3753 if (list_is_last(&attr->list, &flow->attrs)) 3754 break; 3755 3756 err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack); 3757 if (err) 3758 goto out_free; 3759 3760 err = post_process_attr(flow, attr, extack); 3761 if (err) 3762 goto out_free; 3763 3764 handle = mlx5e_tc_post_act_add(post_act, attr); 3765 if (IS_ERR(handle)) { 3766 err = PTR_ERR(handle); 3767 goto out_free; 3768 } 3769 3770 attr->post_act_handle = handle; 3771 3772 if (attr->jumping_attr) { 3773 err = mlx5e_tc_act_set_next_post_act(flow, attr->jumping_attr, attr); 3774 if (err) 3775 goto out_free; 3776 } 3777 3778 next_attr = attr; 3779 } 3780 3781 if (flow_flag_test(flow, SLOW)) 3782 goto out; 3783 3784 err = mlx5e_tc_offload_flow_post_acts(flow); 3785 if (err) 3786 goto out_free; 3787 3788 out: 3789 return 0; 3790 3791 out_free: 3792 free_flow_post_acts(flow); 3793 return err; 3794 } 3795 3796 static int 3797 alloc_branch_attr(struct mlx5e_tc_flow *flow, 3798 struct mlx5e_tc_act_branch_ctrl *cond, 3799 struct mlx5_flow_attr **cond_attr, 3800 u32 *jump_count, 3801 struct netlink_ext_ack *extack) 3802 { 3803 struct mlx5_flow_attr *attr; 3804 int err = 0; 3805 3806 *cond_attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, 3807 mlx5e_get_flow_namespace(flow)); 3808 if (!(*cond_attr)) 3809 return -ENOMEM; 3810 3811 attr = *cond_attr; 3812 3813 switch (cond->act_id) { 3814 case FLOW_ACTION_DROP: 3815 attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; 3816 break; 3817 case FLOW_ACTION_ACCEPT: 3818 case FLOW_ACTION_PIPE: 3819 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 3820 attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv)); 3821 break; 3822 case FLOW_ACTION_JUMP: 3823 if (*jump_count) { 3824 NL_SET_ERR_MSG_MOD(extack, "Cannot offload flows with nested jumps"); 3825 err = -EOPNOTSUPP; 3826 goto out_err; 3827 } 3828 *jump_count = cond->extval; 3829 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 3830 attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv)); 3831 break; 3832 default: 3833 err = -EOPNOTSUPP; 3834 goto out_err; 3835 } 3836 3837 return err; 3838 out_err: 3839 kfree(*cond_attr); 3840 *cond_attr = NULL; 3841 return err; 3842 } 3843 3844 static void 3845 dec_jump_count(struct flow_action_entry *act, struct mlx5e_tc_act *tc_act, 3846 struct mlx5_flow_attr *attr, struct mlx5e_priv *priv, 3847 struct mlx5e_tc_jump_state *jump_state) 3848 { 3849 if (!jump_state->jump_count) 3850 return; 3851 3852 /* Single tc action can instantiate multiple offload actions (e.g. pedit) 3853 * Jump only over a tc action 3854 */ 3855 if (act->id == jump_state->last_id && act->hw_index == jump_state->last_index) 3856 return; 3857 3858 jump_state->last_id = act->id; 3859 jump_state->last_index = act->hw_index; 3860 3861 /* nothing to do for intermediate actions */ 3862 if (--jump_state->jump_count > 1) 3863 return; 3864 3865 if (jump_state->jump_count == 1) { /* last action in the jump action list */ 3866 3867 /* create a new attribute after this action */ 3868 jump_state->jump_target = true; 3869 3870 if (tc_act->is_terminating_action) { /* the branch ends here */ 3871 attr->flags |= MLX5_ATTR_FLAG_TERMINATING; 3872 attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 3873 } else { /* the branch continues executing the rest of the actions */ 3874 struct mlx5e_post_act *post_act; 3875 3876 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 3877 post_act = get_post_action(priv); 3878 attr->dest_ft = mlx5e_tc_post_act_get_ft(post_act); 3879 } 3880 } else if (jump_state->jump_count == 0) { /* first attr after the jump action list */ 3881 /* This is the post action for the jumping attribute (either red or green) 3882 * Use the stored jumping_attr to set the post act id on the jumping attribute 3883 */ 3884 attr->jumping_attr = jump_state->jumping_attr; 3885 } 3886 } 3887 3888 static int 3889 parse_branch_ctrl(struct flow_action_entry *act, struct mlx5e_tc_act *tc_act, 3890 struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr, 3891 struct mlx5e_tc_jump_state *jump_state, 3892 struct netlink_ext_ack *extack) 3893 { 3894 struct mlx5e_tc_act_branch_ctrl cond_true, cond_false; 3895 u32 jump_count = jump_state->jump_count; 3896 int err; 3897 3898 if (!tc_act->get_branch_ctrl) 3899 return 0; 3900 3901 tc_act->get_branch_ctrl(act, &cond_true, &cond_false); 3902 3903 err = alloc_branch_attr(flow, &cond_true, 3904 &attr->branch_true, &jump_count, extack); 3905 if (err) 3906 goto out_err; 3907 3908 if (jump_count) 3909 jump_state->jumping_attr = attr->branch_true; 3910 3911 err = alloc_branch_attr(flow, &cond_false, 3912 &attr->branch_false, &jump_count, extack); 3913 if (err) 3914 goto err_branch_false; 3915 3916 if (jump_count && !jump_state->jumping_attr) 3917 jump_state->jumping_attr = attr->branch_false; 3918 3919 jump_state->jump_count = jump_count; 3920 3921 /* branching action requires its own counter */ 3922 attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 3923 flow_flag_set(flow, USE_ACT_STATS); 3924 3925 return 0; 3926 3927 err_branch_false: 3928 free_branch_attr(flow, attr->branch_true); 3929 out_err: 3930 return err; 3931 } 3932 3933 static int 3934 parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, 3935 struct flow_action *flow_action) 3936 { 3937 struct netlink_ext_ack *extack = parse_state->extack; 3938 struct mlx5e_tc_flow *flow = parse_state->flow; 3939 struct mlx5e_tc_jump_state jump_state = {}; 3940 struct mlx5_flow_attr *attr = flow->attr; 3941 enum mlx5_flow_namespace_type ns_type; 3942 struct mlx5e_priv *priv = flow->priv; 3943 struct mlx5_flow_attr *prev_attr; 3944 struct flow_action_entry *act; 3945 struct mlx5e_tc_act *tc_act; 3946 bool is_missable; 3947 int err, i; 3948 3949 ns_type = mlx5e_get_flow_namespace(flow); 3950 list_add(&attr->list, &flow->attrs); 3951 3952 flow_action_for_each(i, act, flow_action) { 3953 jump_state.jump_target = false; 3954 is_missable = false; 3955 prev_attr = attr; 3956 3957 tc_act = mlx5e_tc_act_get(act->id, ns_type); 3958 if (!tc_act) { 3959 NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action"); 3960 err = -EOPNOTSUPP; 3961 goto out_free_post_acts; 3962 } 3963 3964 if (tc_act->can_offload && !tc_act->can_offload(parse_state, act, i, attr)) { 3965 err = -EOPNOTSUPP; 3966 goto out_free_post_acts; 3967 } 3968 3969 err = tc_act->parse_action(parse_state, act, priv, attr); 3970 if (err) 3971 goto out_free_post_acts; 3972 3973 dec_jump_count(act, tc_act, attr, priv, &jump_state); 3974 3975 err = parse_branch_ctrl(act, tc_act, flow, attr, &jump_state, extack); 3976 if (err) 3977 goto out_free_post_acts; 3978 3979 parse_state->actions |= attr->action; 3980 3981 /* Split attr for multi table act if not the last act. */ 3982 if (jump_state.jump_target || 3983 (tc_act->is_multi_table_act && 3984 tc_act->is_multi_table_act(priv, act, attr) && 3985 i < flow_action->num_entries - 1)) { 3986 is_missable = tc_act->is_missable ? tc_act->is_missable(act) : false; 3987 3988 err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type); 3989 if (err) 3990 goto out_free_post_acts; 3991 3992 attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, ns_type); 3993 if (!attr) { 3994 err = -ENOMEM; 3995 goto out_free_post_acts; 3996 } 3997 3998 list_add(&attr->list, &flow->attrs); 3999 } 4000 4001 if (is_missable) { 4002 /* Add counter to prev, and assign act to new (next) attr */ 4003 prev_attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 4004 flow_flag_set(flow, USE_ACT_STATS); 4005 4006 attr->tc_act_cookies[attr->tc_act_cookies_count++] = act->cookie; 4007 } else if (!tc_act->stats_action) { 4008 prev_attr->tc_act_cookies[prev_attr->tc_act_cookies_count++] = act->cookie; 4009 } 4010 } 4011 4012 err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type); 4013 if (err) 4014 goto out_free_post_acts; 4015 4016 err = alloc_flow_post_acts(flow, extack); 4017 if (err) 4018 goto out_free_post_acts; 4019 4020 return 0; 4021 4022 out_free_post_acts: 4023 free_flow_post_acts(flow); 4024 4025 return err; 4026 } 4027 4028 static int 4029 flow_action_supported(struct flow_action *flow_action, 4030 struct netlink_ext_ack *extack) 4031 { 4032 if (!flow_action_has_entries(flow_action)) { 4033 NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries"); 4034 return -EINVAL; 4035 } 4036 4037 if (!flow_action_hw_stats_check(flow_action, extack, 4038 FLOW_ACTION_HW_STATS_DELAYED_BIT)) { 4039 NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported"); 4040 return -EOPNOTSUPP; 4041 } 4042 4043 return 0; 4044 } 4045 4046 static int 4047 parse_tc_nic_actions(struct mlx5e_priv *priv, 4048 struct flow_action *flow_action, 4049 struct mlx5e_tc_flow *flow, 4050 struct netlink_ext_ack *extack) 4051 { 4052 struct mlx5e_tc_act_parse_state *parse_state; 4053 struct mlx5e_tc_flow_parse_attr *parse_attr; 4054 struct mlx5_flow_attr *attr = flow->attr; 4055 int err; 4056 4057 err = flow_action_supported(flow_action, extack); 4058 if (err) 4059 return err; 4060 4061 attr->nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; 4062 parse_attr = attr->parse_attr; 4063 parse_state = &parse_attr->parse_state; 4064 mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack); 4065 parse_state->ct_priv = get_ct_priv(priv); 4066 4067 err = parse_tc_actions(parse_state, flow_action); 4068 if (err) 4069 return err; 4070 4071 err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack); 4072 if (err) 4073 return err; 4074 4075 err = verify_attr_actions(attr->action, extack); 4076 if (err) 4077 return err; 4078 4079 if (!actions_match_supported(priv, flow_action, parse_state->actions, 4080 parse_attr, flow, extack)) 4081 return -EOPNOTSUPP; 4082 4083 return 0; 4084 } 4085 4086 static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv, 4087 struct net_device *peer_netdev) 4088 { 4089 struct mlx5e_priv *peer_priv; 4090 4091 peer_priv = netdev_priv(peer_netdev); 4092 4093 return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) && 4094 mlx5e_eswitch_vf_rep(priv->netdev) && 4095 mlx5e_eswitch_vf_rep(peer_netdev) && 4096 mlx5e_same_hw_devs(priv, peer_priv)); 4097 } 4098 4099 static bool same_hw_reps(struct mlx5e_priv *priv, 4100 struct net_device *peer_netdev) 4101 { 4102 struct mlx5e_priv *peer_priv; 4103 4104 peer_priv = netdev_priv(peer_netdev); 4105 4106 return mlx5e_eswitch_rep(priv->netdev) && 4107 mlx5e_eswitch_rep(peer_netdev) && 4108 mlx5e_same_hw_devs(priv, peer_priv); 4109 } 4110 4111 static bool is_lag_dev(struct mlx5e_priv *priv, 4112 struct net_device *peer_netdev) 4113 { 4114 return ((mlx5_lag_is_sriov(priv->mdev) || 4115 mlx5_lag_is_multipath(priv->mdev)) && 4116 same_hw_reps(priv, peer_netdev)); 4117 } 4118 4119 static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev) 4120 { 4121 return same_hw_reps(priv, out_dev) && mlx5_lag_is_mpesw(priv->mdev); 4122 } 4123 4124 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, 4125 struct net_device *out_dev) 4126 { 4127 if (is_merged_eswitch_vfs(priv, out_dev)) 4128 return true; 4129 4130 if (is_multiport_eligible(priv, out_dev)) 4131 return true; 4132 4133 if (is_lag_dev(priv, out_dev)) 4134 return true; 4135 4136 return mlx5e_eswitch_rep(out_dev) && 4137 same_port_devs(priv, netdev_priv(out_dev)); 4138 } 4139 4140 int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv, 4141 struct mlx5_flow_attr *attr, 4142 int ifindex, 4143 enum mlx5e_tc_int_port_type type, 4144 u32 *action, 4145 int out_index) 4146 { 4147 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 4148 struct mlx5e_tc_int_port_priv *int_port_priv; 4149 struct mlx5e_tc_flow_parse_attr *parse_attr; 4150 struct mlx5e_tc_int_port *dest_int_port; 4151 int err; 4152 4153 parse_attr = attr->parse_attr; 4154 int_port_priv = mlx5e_get_int_port_priv(priv); 4155 4156 dest_int_port = mlx5e_tc_int_port_get(int_port_priv, ifindex, type); 4157 if (IS_ERR(dest_int_port)) 4158 return PTR_ERR(dest_int_port); 4159 4160 err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts, 4161 MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG, 4162 mlx5e_tc_int_port_get_metadata(dest_int_port)); 4163 if (err) { 4164 mlx5e_tc_int_port_put(int_port_priv, dest_int_port); 4165 return err; 4166 } 4167 4168 *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 4169 4170 esw_attr->dest_int_port = dest_int_port; 4171 esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE; 4172 esw_attr->split_count = out_index; 4173 4174 /* Forward to root fdb for matching against the new source vport */ 4175 attr->dest_chain = 0; 4176 4177 return 0; 4178 } 4179 4180 static int 4181 parse_tc_fdb_actions(struct mlx5e_priv *priv, 4182 struct flow_action *flow_action, 4183 struct mlx5e_tc_flow *flow, 4184 struct netlink_ext_ack *extack) 4185 { 4186 struct mlx5e_tc_act_parse_state *parse_state; 4187 struct mlx5e_tc_flow_parse_attr *parse_attr; 4188 struct mlx5_flow_attr *attr = flow->attr; 4189 struct mlx5_esw_flow_attr *esw_attr; 4190 struct net_device *filter_dev; 4191 int err; 4192 4193 err = flow_action_supported(flow_action, extack); 4194 if (err) 4195 return err; 4196 4197 esw_attr = attr->esw_attr; 4198 parse_attr = attr->parse_attr; 4199 filter_dev = parse_attr->filter_dev; 4200 parse_state = &parse_attr->parse_state; 4201 mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack); 4202 parse_state->ct_priv = get_ct_priv(priv); 4203 4204 err = parse_tc_actions(parse_state, flow_action); 4205 if (err) 4206 return err; 4207 4208 /* Forward to/from internal port can only have 1 dest */ 4209 if ((netif_is_ovs_master(filter_dev) || esw_attr->dest_int_port) && 4210 esw_attr->out_count > 1) { 4211 NL_SET_ERR_MSG_MOD(extack, 4212 "Rules with internal port can have only one destination"); 4213 return -EOPNOTSUPP; 4214 } 4215 4216 /* Forward from tunnel/internal port to internal port is not supported */ 4217 if ((mlx5e_get_tc_tun(filter_dev) || netif_is_ovs_master(filter_dev)) && 4218 esw_attr->dest_int_port) { 4219 NL_SET_ERR_MSG_MOD(extack, 4220 "Forwarding from tunnel/internal port to internal port is not supported"); 4221 return -EOPNOTSUPP; 4222 } 4223 4224 err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack); 4225 if (err) 4226 return err; 4227 4228 if (!actions_match_supported(priv, flow_action, parse_state->actions, 4229 parse_attr, flow, extack)) 4230 return -EOPNOTSUPP; 4231 4232 return 0; 4233 } 4234 4235 static void get_flags(int flags, unsigned long *flow_flags) 4236 { 4237 unsigned long __flow_flags = 0; 4238 4239 if (flags & MLX5_TC_FLAG(INGRESS)) 4240 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS); 4241 if (flags & MLX5_TC_FLAG(EGRESS)) 4242 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS); 4243 4244 if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) 4245 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); 4246 if (flags & MLX5_TC_FLAG(NIC_OFFLOAD)) 4247 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); 4248 if (flags & MLX5_TC_FLAG(FT_OFFLOAD)) 4249 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT); 4250 4251 *flow_flags = __flow_flags; 4252 } 4253 4254 static const struct rhashtable_params tc_ht_params = { 4255 .head_offset = offsetof(struct mlx5e_tc_flow, node), 4256 .key_offset = offsetof(struct mlx5e_tc_flow, cookie), 4257 .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie), 4258 .automatic_shrinking = true, 4259 }; 4260 4261 static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, 4262 unsigned long flags) 4263 { 4264 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 4265 struct mlx5e_rep_priv *rpriv; 4266 4267 if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) { 4268 rpriv = priv->ppriv; 4269 return &rpriv->tc_ht; 4270 } else /* NIC offload */ 4271 return &tc->ht; 4272 } 4273 4274 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) 4275 { 4276 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; 4277 struct mlx5_flow_attr *attr = flow->attr; 4278 bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK && 4279 flow_flag_test(flow, INGRESS); 4280 bool act_is_encap = !!(attr->action & 4281 MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); 4282 bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom, 4283 MLX5_DEVCOM_ESW_OFFLOADS); 4284 4285 if (!esw_paired) 4286 return false; 4287 4288 if ((mlx5_lag_is_sriov(esw_attr->in_mdev) || 4289 mlx5_lag_is_multipath(esw_attr->in_mdev)) && 4290 (is_rep_ingress || act_is_encap)) 4291 return true; 4292 4293 if (mlx5_lag_is_mpesw(esw_attr->in_mdev)) 4294 return true; 4295 4296 return false; 4297 } 4298 4299 struct mlx5_flow_attr * 4300 mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type) 4301 { 4302 u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB) ? 4303 sizeof(struct mlx5_esw_flow_attr) : 4304 sizeof(struct mlx5_nic_flow_attr); 4305 struct mlx5_flow_attr *attr; 4306 4307 attr = kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL); 4308 if (!attr) 4309 return attr; 4310 4311 INIT_LIST_HEAD(&attr->list); 4312 return attr; 4313 } 4314 4315 static void 4316 mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) 4317 { 4318 struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow); 4319 4320 if (!attr) 4321 return; 4322 4323 if (attr->post_act_handle) 4324 mlx5e_tc_post_act_del(get_post_action(flow->priv), attr->post_act_handle); 4325 4326 clean_encap_dests(flow->priv, flow, attr); 4327 4328 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) 4329 mlx5_fc_destroy(counter_dev, attr->counter); 4330 4331 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 4332 mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); 4333 mlx5e_tc_detach_mod_hdr(flow->priv, flow, attr); 4334 } 4335 4336 mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr); 4337 4338 free_branch_attr(flow, attr->branch_true); 4339 free_branch_attr(flow, attr->branch_false); 4340 } 4341 4342 static int 4343 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, 4344 struct flow_cls_offload *f, unsigned long flow_flags, 4345 struct mlx5e_tc_flow_parse_attr **__parse_attr, 4346 struct mlx5e_tc_flow **__flow) 4347 { 4348 struct mlx5e_tc_flow_parse_attr *parse_attr; 4349 struct mlx5_flow_attr *attr; 4350 struct mlx5e_tc_flow *flow; 4351 int err = -ENOMEM; 4352 int out_index; 4353 4354 flow = kzalloc(sizeof(*flow), GFP_KERNEL); 4355 parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); 4356 if (!parse_attr || !flow) 4357 goto err_free; 4358 4359 flow->flags = flow_flags; 4360 flow->cookie = f->cookie; 4361 flow->priv = priv; 4362 4363 attr = mlx5_alloc_flow_attr(mlx5e_get_flow_namespace(flow)); 4364 if (!attr) 4365 goto err_free; 4366 4367 flow->attr = attr; 4368 4369 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) 4370 INIT_LIST_HEAD(&flow->encaps[out_index].list); 4371 INIT_LIST_HEAD(&flow->hairpin); 4372 INIT_LIST_HEAD(&flow->l3_to_l2_reformat); 4373 INIT_LIST_HEAD(&flow->attrs); 4374 refcount_set(&flow->refcnt, 1); 4375 init_completion(&flow->init_done); 4376 init_completion(&flow->del_hw_done); 4377 4378 *__flow = flow; 4379 *__parse_attr = parse_attr; 4380 4381 return 0; 4382 4383 err_free: 4384 kfree(flow); 4385 kvfree(parse_attr); 4386 return err; 4387 } 4388 4389 static void 4390 mlx5e_flow_attr_init(struct mlx5_flow_attr *attr, 4391 struct mlx5e_tc_flow_parse_attr *parse_attr, 4392 struct flow_cls_offload *f) 4393 { 4394 attr->parse_attr = parse_attr; 4395 attr->chain = f->common.chain_index; 4396 attr->prio = f->common.prio; 4397 } 4398 4399 static void 4400 mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr, 4401 struct mlx5e_priv *priv, 4402 struct mlx5e_tc_flow_parse_attr *parse_attr, 4403 struct flow_cls_offload *f, 4404 struct mlx5_eswitch_rep *in_rep, 4405 struct mlx5_core_dev *in_mdev) 4406 { 4407 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 4408 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; 4409 4410 mlx5e_flow_attr_init(attr, parse_attr, f); 4411 4412 esw_attr->in_rep = in_rep; 4413 esw_attr->in_mdev = in_mdev; 4414 4415 if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) == 4416 MLX5_COUNTER_SOURCE_ESWITCH) 4417 esw_attr->counter_dev = in_mdev; 4418 else 4419 esw_attr->counter_dev = priv->mdev; 4420 } 4421 4422 static struct mlx5e_tc_flow * 4423 __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, 4424 struct flow_cls_offload *f, 4425 unsigned long flow_flags, 4426 struct net_device *filter_dev, 4427 struct mlx5_eswitch_rep *in_rep, 4428 struct mlx5_core_dev *in_mdev) 4429 { 4430 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 4431 struct netlink_ext_ack *extack = f->common.extack; 4432 struct mlx5e_tc_flow_parse_attr *parse_attr; 4433 struct mlx5e_tc_flow *flow; 4434 int attr_size, err; 4435 4436 flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); 4437 attr_size = sizeof(struct mlx5_esw_flow_attr); 4438 err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, 4439 &parse_attr, &flow); 4440 if (err) 4441 goto out; 4442 4443 parse_attr->filter_dev = filter_dev; 4444 mlx5e_flow_esw_attr_init(flow->attr, 4445 priv, parse_attr, 4446 f, in_rep, in_mdev); 4447 4448 err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, 4449 f, filter_dev); 4450 if (err) 4451 goto err_free; 4452 4453 /* actions validation depends on parsing the ct matches first */ 4454 err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f, 4455 &flow->attr->ct_attr, extack); 4456 if (err) 4457 goto err_free; 4458 4459 err = parse_tc_fdb_actions(priv, &rule->action, flow, extack); 4460 if (err) 4461 goto err_free; 4462 4463 err = mlx5e_tc_add_fdb_flow(priv, flow, extack); 4464 complete_all(&flow->init_done); 4465 if (err) { 4466 if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev))) 4467 goto err_free; 4468 4469 add_unready_flow(flow); 4470 } 4471 4472 return flow; 4473 4474 err_free: 4475 mlx5e_flow_put(priv, flow); 4476 out: 4477 return ERR_PTR(err); 4478 } 4479 4480 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, 4481 struct mlx5e_tc_flow *flow, 4482 unsigned long flow_flags) 4483 { 4484 struct mlx5e_priv *priv = flow->priv, *peer_priv; 4485 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw; 4486 struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; 4487 struct mlx5_devcom *devcom = priv->mdev->priv.devcom; 4488 struct mlx5e_tc_flow_parse_attr *parse_attr; 4489 struct mlx5e_rep_priv *peer_urpriv; 4490 struct mlx5e_tc_flow *peer_flow; 4491 struct mlx5_core_dev *in_mdev; 4492 int err = 0; 4493 4494 peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 4495 if (!peer_esw) 4496 return -ENODEV; 4497 4498 peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH); 4499 peer_priv = netdev_priv(peer_urpriv->netdev); 4500 4501 /* in_mdev is assigned of which the packet originated from. 4502 * So packets redirected to uplink use the same mdev of the 4503 * original flow and packets redirected from uplink use the 4504 * peer mdev. 4505 * In multiport eswitch it's a special case that we need to 4506 * keep the original mdev. 4507 */ 4508 if (attr->in_rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(priv->mdev)) 4509 in_mdev = peer_priv->mdev; 4510 else 4511 in_mdev = priv->mdev; 4512 4513 parse_attr = flow->attr->parse_attr; 4514 peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags, 4515 parse_attr->filter_dev, 4516 attr->in_rep, in_mdev); 4517 if (IS_ERR(peer_flow)) { 4518 err = PTR_ERR(peer_flow); 4519 goto out; 4520 } 4521 4522 flow->peer_flow = peer_flow; 4523 flow_flag_set(flow, DUP); 4524 mutex_lock(&esw->offloads.peer_mutex); 4525 list_add_tail(&flow->peer, &esw->offloads.peer_flows); 4526 mutex_unlock(&esw->offloads.peer_mutex); 4527 4528 out: 4529 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 4530 return err; 4531 } 4532 4533 static int 4534 mlx5e_add_fdb_flow(struct mlx5e_priv *priv, 4535 struct flow_cls_offload *f, 4536 unsigned long flow_flags, 4537 struct net_device *filter_dev, 4538 struct mlx5e_tc_flow **__flow) 4539 { 4540 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4541 struct mlx5_eswitch_rep *in_rep = rpriv->rep; 4542 struct mlx5_core_dev *in_mdev = priv->mdev; 4543 struct mlx5e_tc_flow *flow; 4544 int err; 4545 4546 flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep, 4547 in_mdev); 4548 if (IS_ERR(flow)) 4549 return PTR_ERR(flow); 4550 4551 if (is_peer_flow_needed(flow)) { 4552 err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags); 4553 if (err) { 4554 mlx5e_tc_del_fdb_flow(priv, flow); 4555 goto out; 4556 } 4557 } 4558 4559 *__flow = flow; 4560 4561 return 0; 4562 4563 out: 4564 return err; 4565 } 4566 4567 static int 4568 mlx5e_add_nic_flow(struct mlx5e_priv *priv, 4569 struct flow_cls_offload *f, 4570 unsigned long flow_flags, 4571 struct net_device *filter_dev, 4572 struct mlx5e_tc_flow **__flow) 4573 { 4574 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 4575 struct netlink_ext_ack *extack = f->common.extack; 4576 struct mlx5e_tc_flow_parse_attr *parse_attr; 4577 struct mlx5e_tc_flow *flow; 4578 int attr_size, err; 4579 4580 if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) { 4581 if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common)) 4582 return -EOPNOTSUPP; 4583 } else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) { 4584 return -EOPNOTSUPP; 4585 } 4586 4587 flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); 4588 attr_size = sizeof(struct mlx5_nic_flow_attr); 4589 err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, 4590 &parse_attr, &flow); 4591 if (err) 4592 goto out; 4593 4594 parse_attr->filter_dev = filter_dev; 4595 mlx5e_flow_attr_init(flow->attr, parse_attr, f); 4596 4597 err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, 4598 f, filter_dev); 4599 if (err) 4600 goto err_free; 4601 4602 err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f, 4603 &flow->attr->ct_attr, extack); 4604 if (err) 4605 goto err_free; 4606 4607 err = parse_tc_nic_actions(priv, &rule->action, flow, extack); 4608 if (err) 4609 goto err_free; 4610 4611 err = mlx5e_tc_add_nic_flow(priv, flow, extack); 4612 if (err) 4613 goto err_free; 4614 4615 flow_flag_set(flow, OFFLOADED); 4616 *__flow = flow; 4617 4618 return 0; 4619 4620 err_free: 4621 flow_flag_set(flow, FAILED); 4622 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); 4623 mlx5e_flow_put(priv, flow); 4624 out: 4625 return err; 4626 } 4627 4628 static int 4629 mlx5e_tc_add_flow(struct mlx5e_priv *priv, 4630 struct flow_cls_offload *f, 4631 unsigned long flags, 4632 struct net_device *filter_dev, 4633 struct mlx5e_tc_flow **flow) 4634 { 4635 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 4636 unsigned long flow_flags; 4637 int err; 4638 4639 get_flags(flags, &flow_flags); 4640 4641 if (!tc_can_offload_extack(priv->netdev, f->common.extack)) 4642 return -EOPNOTSUPP; 4643 4644 if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS) 4645 err = mlx5e_add_fdb_flow(priv, f, flow_flags, 4646 filter_dev, flow); 4647 else 4648 err = mlx5e_add_nic_flow(priv, f, flow_flags, 4649 filter_dev, flow); 4650 4651 return err; 4652 } 4653 4654 static bool is_flow_rule_duplicate_allowed(struct net_device *dev, 4655 struct mlx5e_rep_priv *rpriv) 4656 { 4657 /* Offloaded flow rule is allowed to duplicate on non-uplink representor 4658 * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this 4659 * function is called from NIC mode. 4660 */ 4661 return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK; 4662 } 4663 4664 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, 4665 struct flow_cls_offload *f, unsigned long flags) 4666 { 4667 struct netlink_ext_ack *extack = f->common.extack; 4668 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 4669 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4670 struct mlx5e_tc_flow *flow; 4671 int err = 0; 4672 4673 if (!mlx5_esw_hold(priv->mdev)) 4674 return -EBUSY; 4675 4676 mlx5_esw_get(priv->mdev); 4677 4678 rcu_read_lock(); 4679 flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); 4680 if (flow) { 4681 /* Same flow rule offloaded to non-uplink representor sharing tc block, 4682 * just return 0. 4683 */ 4684 if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev) 4685 goto rcu_unlock; 4686 4687 NL_SET_ERR_MSG_MOD(extack, 4688 "flow cookie already exists, ignoring"); 4689 netdev_warn_once(priv->netdev, 4690 "flow cookie %lx already exists, ignoring\n", 4691 f->cookie); 4692 err = -EEXIST; 4693 goto rcu_unlock; 4694 } 4695 rcu_unlock: 4696 rcu_read_unlock(); 4697 if (flow) 4698 goto out; 4699 4700 trace_mlx5e_configure_flower(f); 4701 err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow); 4702 if (err) 4703 goto out; 4704 4705 /* Flow rule offloaded to non-uplink representor sharing tc block, 4706 * set the flow's owner dev. 4707 */ 4708 if (is_flow_rule_duplicate_allowed(dev, rpriv)) 4709 flow->orig_dev = dev; 4710 4711 err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params); 4712 if (err) 4713 goto err_free; 4714 4715 mlx5_esw_release(priv->mdev); 4716 return 0; 4717 4718 err_free: 4719 mlx5e_flow_put(priv, flow); 4720 out: 4721 mlx5_esw_put(priv->mdev); 4722 mlx5_esw_release(priv->mdev); 4723 return err; 4724 } 4725 4726 static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags) 4727 { 4728 bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS)); 4729 bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS)); 4730 4731 return flow_flag_test(flow, INGRESS) == dir_ingress && 4732 flow_flag_test(flow, EGRESS) == dir_egress; 4733 } 4734 4735 int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, 4736 struct flow_cls_offload *f, unsigned long flags) 4737 { 4738 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 4739 struct mlx5e_tc_flow *flow; 4740 int err; 4741 4742 rcu_read_lock(); 4743 flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); 4744 if (!flow || !same_flow_direction(flow, flags)) { 4745 err = -EINVAL; 4746 goto errout; 4747 } 4748 4749 /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag 4750 * set. 4751 */ 4752 if (flow_flag_test_and_set(flow, DELETED)) { 4753 err = -EINVAL; 4754 goto errout; 4755 } 4756 rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params); 4757 rcu_read_unlock(); 4758 4759 trace_mlx5e_delete_flower(f); 4760 mlx5e_flow_put(priv, flow); 4761 4762 mlx5_esw_put(priv->mdev); 4763 return 0; 4764 4765 errout: 4766 rcu_read_unlock(); 4767 return err; 4768 } 4769 4770 int mlx5e_tc_fill_action_stats(struct mlx5e_priv *priv, 4771 struct flow_offload_action *fl_act) 4772 { 4773 return mlx5e_tc_act_stats_fill_stats(get_act_stats_handle(priv), fl_act); 4774 } 4775 4776 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, 4777 struct flow_cls_offload *f, unsigned long flags) 4778 { 4779 struct mlx5_devcom *devcom = priv->mdev->priv.devcom; 4780 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 4781 struct mlx5_eswitch *peer_esw; 4782 struct mlx5e_tc_flow *flow; 4783 struct mlx5_fc *counter; 4784 u64 lastuse = 0; 4785 u64 packets = 0; 4786 u64 bytes = 0; 4787 int err = 0; 4788 4789 rcu_read_lock(); 4790 flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie, 4791 tc_ht_params)); 4792 rcu_read_unlock(); 4793 if (IS_ERR(flow)) 4794 return PTR_ERR(flow); 4795 4796 if (!same_flow_direction(flow, flags)) { 4797 err = -EINVAL; 4798 goto errout; 4799 } 4800 4801 if (mlx5e_is_offloaded_flow(flow)) { 4802 if (flow_flag_test(flow, USE_ACT_STATS)) { 4803 f->use_act_stats = true; 4804 } else { 4805 counter = mlx5e_tc_get_counter(flow); 4806 if (!counter) 4807 goto errout; 4808 4809 mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); 4810 } 4811 } 4812 4813 /* Under multipath it's possible for one rule to be currently 4814 * un-offloaded while the other rule is offloaded. 4815 */ 4816 peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 4817 if (!peer_esw) 4818 goto out; 4819 4820 if (flow_flag_test(flow, DUP) && 4821 flow_flag_test(flow->peer_flow, OFFLOADED)) { 4822 u64 bytes2; 4823 u64 packets2; 4824 u64 lastuse2; 4825 4826 if (flow_flag_test(flow, USE_ACT_STATS)) { 4827 f->use_act_stats = true; 4828 } else { 4829 counter = mlx5e_tc_get_counter(flow->peer_flow); 4830 if (!counter) 4831 goto no_peer_counter; 4832 mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2); 4833 4834 bytes += bytes2; 4835 packets += packets2; 4836 lastuse = max_t(u64, lastuse, lastuse2); 4837 } 4838 } 4839 4840 no_peer_counter: 4841 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 4842 out: 4843 flow_stats_update(&f->stats, bytes, packets, 0, lastuse, 4844 FLOW_ACTION_HW_STATS_DELAYED); 4845 trace_mlx5e_stats_flower(f); 4846 errout: 4847 mlx5e_flow_put(priv, flow); 4848 return err; 4849 } 4850 4851 static int apply_police_params(struct mlx5e_priv *priv, u64 rate, 4852 struct netlink_ext_ack *extack) 4853 { 4854 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4855 struct mlx5_eswitch *esw; 4856 u32 rate_mbps = 0; 4857 u16 vport_num; 4858 int err; 4859 4860 vport_num = rpriv->rep->vport; 4861 if (vport_num >= MLX5_VPORT_ECPF) { 4862 NL_SET_ERR_MSG_MOD(extack, 4863 "Ingress rate limit is supported only for Eswitch ports connected to VFs"); 4864 return -EOPNOTSUPP; 4865 } 4866 4867 esw = priv->mdev->priv.eswitch; 4868 /* rate is given in bytes/sec. 4869 * First convert to bits/sec and then round to the nearest mbit/secs. 4870 * mbit means million bits. 4871 * Moreover, if rate is non zero we choose to configure to a minimum of 4872 * 1 mbit/sec. 4873 */ 4874 if (rate) { 4875 rate = (rate * BITS_PER_BYTE) + 500000; 4876 do_div(rate, 1000000); 4877 rate_mbps = max_t(u32, rate, 1); 4878 } 4879 4880 err = mlx5_esw_qos_modify_vport_rate(esw, vport_num, rate_mbps); 4881 if (err) 4882 NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware"); 4883 4884 return err; 4885 } 4886 4887 static int 4888 tc_matchall_police_validate(const struct flow_action *action, 4889 const struct flow_action_entry *act, 4890 struct netlink_ext_ack *extack) 4891 { 4892 if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE) { 4893 NL_SET_ERR_MSG_MOD(extack, 4894 "Offload not supported when conform action is not continue"); 4895 return -EOPNOTSUPP; 4896 } 4897 4898 if (act->police.exceed.act_id != FLOW_ACTION_DROP) { 4899 NL_SET_ERR_MSG_MOD(extack, 4900 "Offload not supported when exceed action is not drop"); 4901 return -EOPNOTSUPP; 4902 } 4903 4904 if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && 4905 !flow_action_is_last_entry(action, act)) { 4906 NL_SET_ERR_MSG_MOD(extack, 4907 "Offload not supported when conform action is ok, but action is not last"); 4908 return -EOPNOTSUPP; 4909 } 4910 4911 if (act->police.peakrate_bytes_ps || 4912 act->police.avrate || act->police.overhead) { 4913 NL_SET_ERR_MSG_MOD(extack, 4914 "Offload not supported when peakrate/avrate/overhead is configured"); 4915 return -EOPNOTSUPP; 4916 } 4917 4918 return 0; 4919 } 4920 4921 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, 4922 struct flow_action *flow_action, 4923 struct netlink_ext_ack *extack) 4924 { 4925 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4926 const struct flow_action_entry *act; 4927 int err; 4928 int i; 4929 4930 if (!flow_action_has_entries(flow_action)) { 4931 NL_SET_ERR_MSG_MOD(extack, "matchall called with no action"); 4932 return -EINVAL; 4933 } 4934 4935 if (!flow_offload_has_one_action(flow_action)) { 4936 NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action"); 4937 return -EOPNOTSUPP; 4938 } 4939 4940 if (!flow_action_basic_hw_stats_check(flow_action, extack)) { 4941 NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported"); 4942 return -EOPNOTSUPP; 4943 } 4944 4945 flow_action_for_each(i, act, flow_action) { 4946 switch (act->id) { 4947 case FLOW_ACTION_POLICE: 4948 err = tc_matchall_police_validate(flow_action, act, extack); 4949 if (err) 4950 return err; 4951 4952 err = apply_police_params(priv, act->police.rate_bytes_ps, extack); 4953 if (err) 4954 return err; 4955 4956 rpriv->prev_vf_vport_stats = priv->stats.vf_vport; 4957 break; 4958 default: 4959 NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall"); 4960 return -EOPNOTSUPP; 4961 } 4962 } 4963 4964 return 0; 4965 } 4966 4967 int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, 4968 struct tc_cls_matchall_offload *ma) 4969 { 4970 struct netlink_ext_ack *extack = ma->common.extack; 4971 4972 if (ma->common.prio != 1) { 4973 NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported"); 4974 return -EINVAL; 4975 } 4976 4977 return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack); 4978 } 4979 4980 int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv, 4981 struct tc_cls_matchall_offload *ma) 4982 { 4983 struct netlink_ext_ack *extack = ma->common.extack; 4984 4985 return apply_police_params(priv, 0, extack); 4986 } 4987 4988 void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, 4989 struct tc_cls_matchall_offload *ma) 4990 { 4991 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4992 struct rtnl_link_stats64 cur_stats; 4993 u64 dbytes; 4994 u64 dpkts; 4995 4996 cur_stats = priv->stats.vf_vport; 4997 dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets; 4998 dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes; 4999 rpriv->prev_vf_vport_stats = cur_stats; 5000 flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies, 5001 FLOW_ACTION_HW_STATS_DELAYED); 5002 } 5003 5004 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, 5005 struct mlx5e_priv *peer_priv) 5006 { 5007 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 5008 struct mlx5_core_dev *peer_mdev = peer_priv->mdev; 5009 struct mlx5e_hairpin_entry *hpe, *tmp; 5010 LIST_HEAD(init_wait_list); 5011 u16 peer_vhca_id; 5012 int bkt; 5013 5014 if (!mlx5e_same_hw_devs(priv, peer_priv)) 5015 return; 5016 5017 peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id); 5018 5019 mutex_lock(&tc->hairpin_tbl_lock); 5020 hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist) 5021 if (refcount_inc_not_zero(&hpe->refcnt)) 5022 list_add(&hpe->dead_peer_wait_list, &init_wait_list); 5023 mutex_unlock(&tc->hairpin_tbl_lock); 5024 5025 list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) { 5026 wait_for_completion(&hpe->res_ready); 5027 if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id) 5028 mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair); 5029 5030 mlx5e_hairpin_put(priv, hpe); 5031 } 5032 } 5033 5034 static int mlx5e_tc_netdev_event(struct notifier_block *this, 5035 unsigned long event, void *ptr) 5036 { 5037 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 5038 struct mlx5e_priv *peer_priv; 5039 struct mlx5e_tc_table *tc; 5040 struct mlx5e_priv *priv; 5041 5042 if (ndev->netdev_ops != &mlx5e_netdev_ops || 5043 event != NETDEV_UNREGISTER || 5044 ndev->reg_state == NETREG_REGISTERED) 5045 return NOTIFY_DONE; 5046 5047 tc = container_of(this, struct mlx5e_tc_table, netdevice_nb); 5048 priv = tc->priv; 5049 peer_priv = netdev_priv(ndev); 5050 if (priv == peer_priv || 5051 !(priv->netdev->features & NETIF_F_HW_TC)) 5052 return NOTIFY_DONE; 5053 5054 mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv); 5055 5056 return NOTIFY_DONE; 5057 } 5058 5059 static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv) 5060 { 5061 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 5062 struct mlx5_flow_table **ft = &tc->miss_t; 5063 struct mlx5_flow_table_attr ft_attr = {}; 5064 struct mlx5_flow_namespace *ns; 5065 int err = 0; 5066 5067 ft_attr.max_fte = 1; 5068 ft_attr.autogroup.max_num_groups = 1; 5069 ft_attr.level = MLX5E_TC_MISS_LEVEL; 5070 ft_attr.prio = 0; 5071 ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); 5072 5073 *ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); 5074 if (IS_ERR(*ft)) { 5075 err = PTR_ERR(*ft); 5076 netdev_err(priv->netdev, "failed to create tc nic miss table err=%d\n", err); 5077 } 5078 5079 return err; 5080 } 5081 5082 static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv *priv) 5083 { 5084 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 5085 5086 mlx5_destroy_flow_table(tc->miss_t); 5087 } 5088 5089 int mlx5e_tc_nic_init(struct mlx5e_priv *priv) 5090 { 5091 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 5092 struct mlx5_core_dev *dev = priv->mdev; 5093 struct mapping_ctx *chains_mapping; 5094 struct mlx5_chains_attr attr = {}; 5095 u64 mapping_id; 5096 int err; 5097 5098 mlx5e_mod_hdr_tbl_init(&tc->mod_hdr); 5099 mutex_init(&tc->t_lock); 5100 mutex_init(&tc->hairpin_tbl_lock); 5101 hash_init(tc->hairpin_tbl); 5102 tc->priv = priv; 5103 5104 err = rhashtable_init(&tc->ht, &tc_ht_params); 5105 if (err) 5106 return err; 5107 5108 lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key); 5109 lockdep_init_map(&tc->ht.run_work.lockdep_map, "tc_ht_wq_key", &tc_ht_wq_key, 0); 5110 5111 mapping_id = mlx5_query_nic_system_image_guid(dev); 5112 5113 chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN, 5114 sizeof(struct mlx5_mapped_obj), 5115 MLX5E_TC_TABLE_CHAIN_TAG_MASK, true); 5116 5117 if (IS_ERR(chains_mapping)) { 5118 err = PTR_ERR(chains_mapping); 5119 goto err_mapping; 5120 } 5121 tc->mapping = chains_mapping; 5122 5123 err = mlx5e_tc_nic_create_miss_table(priv); 5124 if (err) 5125 goto err_chains; 5126 5127 if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) 5128 attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED | 5129 MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED; 5130 attr.ns = MLX5_FLOW_NAMESPACE_KERNEL; 5131 attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS; 5132 attr.default_ft = tc->miss_t; 5133 attr.mapping = chains_mapping; 5134 attr.fs_base_prio = MLX5E_TC_PRIO; 5135 5136 tc->chains = mlx5_chains_create(dev, &attr); 5137 if (IS_ERR(tc->chains)) { 5138 err = PTR_ERR(tc->chains); 5139 goto err_miss; 5140 } 5141 5142 mlx5_chains_print_info(tc->chains); 5143 5144 tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL); 5145 tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr, 5146 MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act); 5147 5148 tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; 5149 err = register_netdevice_notifier_dev_net(priv->netdev, 5150 &tc->netdevice_nb, 5151 &tc->netdevice_nn); 5152 if (err) { 5153 tc->netdevice_nb.notifier_call = NULL; 5154 mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n"); 5155 goto err_reg; 5156 } 5157 5158 mlx5e_tc_debugfs_init(tc, mlx5e_fs_get_debugfs_root(priv->fs)); 5159 5160 tc->action_stats_handle = mlx5e_tc_act_stats_create(); 5161 if (IS_ERR(tc->action_stats_handle)) { 5162 err = PTR_ERR(tc->action_stats_handle); 5163 goto err_act_stats; 5164 } 5165 5166 return 0; 5167 5168 err_act_stats: 5169 unregister_netdevice_notifier_dev_net(priv->netdev, 5170 &tc->netdevice_nb, 5171 &tc->netdevice_nn); 5172 err_reg: 5173 mlx5_tc_ct_clean(tc->ct); 5174 mlx5e_tc_post_act_destroy(tc->post_act); 5175 mlx5_chains_destroy(tc->chains); 5176 err_miss: 5177 mlx5e_tc_nic_destroy_miss_table(priv); 5178 err_chains: 5179 mapping_destroy(chains_mapping); 5180 err_mapping: 5181 rhashtable_destroy(&tc->ht); 5182 return err; 5183 } 5184 5185 static void _mlx5e_tc_del_flow(void *ptr, void *arg) 5186 { 5187 struct mlx5e_tc_flow *flow = ptr; 5188 struct mlx5e_priv *priv = flow->priv; 5189 5190 mlx5e_tc_del_flow(priv, flow); 5191 kfree(flow); 5192 } 5193 5194 void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) 5195 { 5196 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 5197 5198 debugfs_remove_recursive(tc->dfs_root); 5199 5200 if (tc->netdevice_nb.notifier_call) 5201 unregister_netdevice_notifier_dev_net(priv->netdev, 5202 &tc->netdevice_nb, 5203 &tc->netdevice_nn); 5204 5205 mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr); 5206 mutex_destroy(&tc->hairpin_tbl_lock); 5207 5208 rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL); 5209 5210 if (!IS_ERR_OR_NULL(tc->t)) { 5211 mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL); 5212 tc->t = NULL; 5213 } 5214 mutex_destroy(&tc->t_lock); 5215 5216 mlx5_tc_ct_clean(tc->ct); 5217 mlx5e_tc_post_act_destroy(tc->post_act); 5218 mapping_destroy(tc->mapping); 5219 mlx5_chains_destroy(tc->chains); 5220 mlx5e_tc_nic_destroy_miss_table(priv); 5221 mlx5e_tc_act_stats_free(tc->action_stats_handle); 5222 } 5223 5224 int mlx5e_tc_ht_init(struct rhashtable *tc_ht) 5225 { 5226 int err; 5227 5228 err = rhashtable_init(tc_ht, &tc_ht_params); 5229 if (err) 5230 return err; 5231 5232 lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key); 5233 lockdep_init_map(&tc_ht->run_work.lockdep_map, "tc_ht_wq_key", &tc_ht_wq_key, 0); 5234 5235 return 0; 5236 } 5237 5238 void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht) 5239 { 5240 rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); 5241 } 5242 5243 int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv) 5244 { 5245 const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts); 5246 struct mlx5e_rep_priv *rpriv; 5247 struct mapping_ctx *mapping; 5248 struct mlx5_eswitch *esw; 5249 struct mlx5e_priv *priv; 5250 u64 mapping_id; 5251 int err = 0; 5252 5253 rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); 5254 priv = netdev_priv(rpriv->netdev); 5255 esw = priv->mdev->priv.eswitch; 5256 5257 uplink_priv->post_act = mlx5e_tc_post_act_init(priv, esw_chains(esw), 5258 MLX5_FLOW_NAMESPACE_FDB); 5259 uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev), 5260 esw_chains(esw), 5261 &esw->offloads.mod_hdr, 5262 MLX5_FLOW_NAMESPACE_FDB, 5263 uplink_priv->post_act); 5264 5265 uplink_priv->int_port_priv = mlx5e_tc_int_port_init(netdev_priv(priv->netdev)); 5266 5267 uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act); 5268 5269 mapping_id = mlx5_query_nic_system_image_guid(esw->dev); 5270 5271 mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL, 5272 sizeof(struct tunnel_match_key), 5273 TUNNEL_INFO_BITS_MASK, true); 5274 5275 if (IS_ERR(mapping)) { 5276 err = PTR_ERR(mapping); 5277 goto err_tun_mapping; 5278 } 5279 uplink_priv->tunnel_mapping = mapping; 5280 5281 /* Two last values are reserved for stack devices slow path table mark 5282 * and bridge ingress push mark. 5283 */ 5284 mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS, 5285 sz_enc_opts, ENC_OPTS_BITS_MASK - 2, true); 5286 if (IS_ERR(mapping)) { 5287 err = PTR_ERR(mapping); 5288 goto err_enc_opts_mapping; 5289 } 5290 uplink_priv->tunnel_enc_opts_mapping = mapping; 5291 5292 uplink_priv->encap = mlx5e_tc_tun_init(priv); 5293 if (IS_ERR(uplink_priv->encap)) { 5294 err = PTR_ERR(uplink_priv->encap); 5295 goto err_register_fib_notifier; 5296 } 5297 5298 uplink_priv->action_stats_handle = mlx5e_tc_act_stats_create(); 5299 if (IS_ERR(uplink_priv->action_stats_handle)) { 5300 err = PTR_ERR(uplink_priv->action_stats_handle); 5301 goto err_action_counter; 5302 } 5303 5304 return 0; 5305 5306 err_action_counter: 5307 mlx5e_tc_tun_cleanup(uplink_priv->encap); 5308 err_register_fib_notifier: 5309 mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); 5310 err_enc_opts_mapping: 5311 mapping_destroy(uplink_priv->tunnel_mapping); 5312 err_tun_mapping: 5313 mlx5e_tc_sample_cleanup(uplink_priv->tc_psample); 5314 mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv); 5315 mlx5_tc_ct_clean(uplink_priv->ct_priv); 5316 netdev_warn(priv->netdev, 5317 "Failed to initialize tc (eswitch), err: %d", err); 5318 mlx5e_tc_post_act_destroy(uplink_priv->post_act); 5319 return err; 5320 } 5321 5322 void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv) 5323 { 5324 struct mlx5e_rep_priv *rpriv; 5325 struct mlx5_eswitch *esw; 5326 struct mlx5e_priv *priv; 5327 5328 rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); 5329 priv = netdev_priv(rpriv->netdev); 5330 esw = priv->mdev->priv.eswitch; 5331 5332 mlx5e_tc_clean_fdb_peer_flows(esw); 5333 5334 mlx5e_tc_tun_cleanup(uplink_priv->encap); 5335 5336 mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); 5337 mapping_destroy(uplink_priv->tunnel_mapping); 5338 5339 mlx5e_tc_sample_cleanup(uplink_priv->tc_psample); 5340 mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv); 5341 mlx5_tc_ct_clean(uplink_priv->ct_priv); 5342 mlx5e_flow_meters_cleanup(uplink_priv->flow_meters); 5343 mlx5e_tc_post_act_destroy(uplink_priv->post_act); 5344 mlx5e_tc_act_stats_free(uplink_priv->action_stats_handle); 5345 } 5346 5347 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) 5348 { 5349 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 5350 5351 return atomic_read(&tc_ht->nelems); 5352 } 5353 5354 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw) 5355 { 5356 struct mlx5e_tc_flow *flow, *tmp; 5357 5358 list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer) 5359 __mlx5e_tc_del_fdb_peer_flow(flow); 5360 } 5361 5362 void mlx5e_tc_reoffload_flows_work(struct work_struct *work) 5363 { 5364 struct mlx5_rep_uplink_priv *rpriv = 5365 container_of(work, struct mlx5_rep_uplink_priv, 5366 reoffload_flows_work); 5367 struct mlx5e_tc_flow *flow, *tmp; 5368 5369 mutex_lock(&rpriv->unready_flows_lock); 5370 list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) { 5371 if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL)) 5372 unready_flow_del(flow); 5373 } 5374 mutex_unlock(&rpriv->unready_flows_lock); 5375 } 5376 5377 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, 5378 struct flow_cls_offload *cls_flower, 5379 unsigned long flags) 5380 { 5381 switch (cls_flower->command) { 5382 case FLOW_CLS_REPLACE: 5383 return mlx5e_configure_flower(priv->netdev, priv, cls_flower, 5384 flags); 5385 case FLOW_CLS_DESTROY: 5386 return mlx5e_delete_flower(priv->netdev, priv, cls_flower, 5387 flags); 5388 case FLOW_CLS_STATS: 5389 return mlx5e_stats_flower(priv->netdev, priv, cls_flower, 5390 flags); 5391 default: 5392 return -EOPNOTSUPP; 5393 } 5394 } 5395 5396 int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, 5397 void *cb_priv) 5398 { 5399 unsigned long flags = MLX5_TC_FLAG(INGRESS); 5400 struct mlx5e_priv *priv = cb_priv; 5401 5402 if (!priv->netdev || !netif_device_present(priv->netdev)) 5403 return -EOPNOTSUPP; 5404 5405 if (mlx5e_is_uplink_rep(priv)) 5406 flags |= MLX5_TC_FLAG(ESW_OFFLOAD); 5407 else 5408 flags |= MLX5_TC_FLAG(NIC_OFFLOAD); 5409 5410 switch (type) { 5411 case TC_SETUP_CLSFLOWER: 5412 return mlx5e_setup_tc_cls_flower(priv, type_data, flags); 5413 default: 5414 return -EOPNOTSUPP; 5415 } 5416 } 5417 5418 static bool mlx5e_tc_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, 5419 struct mlx5e_tc_update_priv *tc_priv, 5420 u32 tunnel_id) 5421 { 5422 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 5423 struct tunnel_match_enc_opts enc_opts = {}; 5424 struct mlx5_rep_uplink_priv *uplink_priv; 5425 struct mlx5e_rep_priv *uplink_rpriv; 5426 struct metadata_dst *tun_dst; 5427 struct tunnel_match_key key; 5428 u32 tun_id, enc_opts_id; 5429 struct net_device *dev; 5430 int err; 5431 5432 enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK; 5433 tun_id = tunnel_id >> ENC_OPTS_BITS; 5434 5435 if (!tun_id) 5436 return true; 5437 5438 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 5439 uplink_priv = &uplink_rpriv->uplink_priv; 5440 5441 err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key); 5442 if (err) { 5443 netdev_dbg(priv->netdev, 5444 "Couldn't find tunnel for tun_id: %d, err: %d\n", 5445 tun_id, err); 5446 return false; 5447 } 5448 5449 if (enc_opts_id) { 5450 err = mapping_find(uplink_priv->tunnel_enc_opts_mapping, 5451 enc_opts_id, &enc_opts); 5452 if (err) { 5453 netdev_dbg(priv->netdev, 5454 "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n", 5455 enc_opts_id, err); 5456 return false; 5457 } 5458 } 5459 5460 switch (key.enc_control.addr_type) { 5461 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 5462 tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst, 5463 key.enc_ip.tos, key.enc_ip.ttl, 5464 key.enc_tp.dst, TUNNEL_KEY, 5465 key32_to_tunnel_id(key.enc_key_id.keyid), 5466 enc_opts.key.len); 5467 break; 5468 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 5469 tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst, 5470 key.enc_ip.tos, key.enc_ip.ttl, 5471 key.enc_tp.dst, 0, TUNNEL_KEY, 5472 key32_to_tunnel_id(key.enc_key_id.keyid), 5473 enc_opts.key.len); 5474 break; 5475 default: 5476 netdev_dbg(priv->netdev, 5477 "Couldn't restore tunnel, unsupported addr_type: %d\n", 5478 key.enc_control.addr_type); 5479 return false; 5480 } 5481 5482 if (!tun_dst) { 5483 netdev_dbg(priv->netdev, "Couldn't restore tunnel, no tun_dst\n"); 5484 return false; 5485 } 5486 5487 tun_dst->u.tun_info.key.tp_src = key.enc_tp.src; 5488 5489 if (enc_opts.key.len) 5490 ip_tunnel_info_opts_set(&tun_dst->u.tun_info, 5491 enc_opts.key.data, 5492 enc_opts.key.len, 5493 enc_opts.key.dst_opt_type); 5494 5495 skb_dst_set(skb, (struct dst_entry *)tun_dst); 5496 dev = dev_get_by_index(&init_net, key.filter_ifindex); 5497 if (!dev) { 5498 netdev_dbg(priv->netdev, 5499 "Couldn't find tunnel device with ifindex: %d\n", 5500 key.filter_ifindex); 5501 return false; 5502 } 5503 5504 /* Set fwd_dev so we do dev_put() after datapath */ 5505 tc_priv->fwd_dev = dev; 5506 5507 skb->dev = dev; 5508 5509 return true; 5510 } 5511 5512 static bool mlx5e_tc_restore_skb_tc_meta(struct sk_buff *skb, struct mlx5_tc_ct_priv *ct_priv, 5513 struct mlx5_mapped_obj *mapped_obj, u32 zone_restore_id, 5514 u32 tunnel_id, struct mlx5e_tc_update_priv *tc_priv) 5515 { 5516 struct mlx5e_priv *priv = netdev_priv(skb->dev); 5517 struct tc_skb_ext *tc_skb_ext; 5518 u64 act_miss_cookie; 5519 u32 chain; 5520 5521 chain = mapped_obj->type == MLX5_MAPPED_OBJ_CHAIN ? mapped_obj->chain : 0; 5522 act_miss_cookie = mapped_obj->type == MLX5_MAPPED_OBJ_ACT_MISS ? 5523 mapped_obj->act_miss_cookie : 0; 5524 if (chain || act_miss_cookie) { 5525 if (!mlx5e_tc_ct_restore_flow(ct_priv, skb, zone_restore_id)) 5526 return false; 5527 5528 tc_skb_ext = tc_skb_ext_alloc(skb); 5529 if (!tc_skb_ext) { 5530 WARN_ON(1); 5531 return false; 5532 } 5533 5534 if (act_miss_cookie) { 5535 tc_skb_ext->act_miss_cookie = act_miss_cookie; 5536 tc_skb_ext->act_miss = 1; 5537 } else { 5538 tc_skb_ext->chain = chain; 5539 } 5540 } 5541 5542 if (tc_priv) 5543 return mlx5e_tc_restore_tunnel(priv, skb, tc_priv, tunnel_id); 5544 5545 return true; 5546 } 5547 5548 static void mlx5e_tc_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb, 5549 struct mlx5_mapped_obj *mapped_obj, 5550 struct mlx5e_tc_update_priv *tc_priv) 5551 { 5552 if (!mlx5e_tc_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) { 5553 netdev_dbg(priv->netdev, 5554 "Failed to restore tunnel info for sampled packet\n"); 5555 return; 5556 } 5557 mlx5e_tc_sample_skb(skb, mapped_obj); 5558 } 5559 5560 static bool mlx5e_tc_restore_skb_int_port(struct mlx5e_priv *priv, struct sk_buff *skb, 5561 struct mlx5_mapped_obj *mapped_obj, 5562 struct mlx5e_tc_update_priv *tc_priv, 5563 u32 tunnel_id) 5564 { 5565 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 5566 struct mlx5_rep_uplink_priv *uplink_priv; 5567 struct mlx5e_rep_priv *uplink_rpriv; 5568 bool forward_tx = false; 5569 5570 /* Tunnel restore takes precedence over int port restore */ 5571 if (tunnel_id) 5572 return mlx5e_tc_restore_tunnel(priv, skb, tc_priv, tunnel_id); 5573 5574 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 5575 uplink_priv = &uplink_rpriv->uplink_priv; 5576 5577 if (mlx5e_tc_int_port_dev_fwd(uplink_priv->int_port_priv, skb, 5578 mapped_obj->int_port_metadata, &forward_tx)) { 5579 /* Set fwd_dev for future dev_put */ 5580 tc_priv->fwd_dev = skb->dev; 5581 tc_priv->forward_tx = forward_tx; 5582 5583 return true; 5584 } 5585 5586 return false; 5587 } 5588 5589 bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb, 5590 struct mapping_ctx *mapping_ctx, u32 mapped_obj_id, 5591 struct mlx5_tc_ct_priv *ct_priv, 5592 u32 zone_restore_id, u32 tunnel_id, 5593 struct mlx5e_tc_update_priv *tc_priv) 5594 { 5595 struct mlx5e_priv *priv = netdev_priv(skb->dev); 5596 struct mlx5_mapped_obj mapped_obj; 5597 int err; 5598 5599 err = mapping_find(mapping_ctx, mapped_obj_id, &mapped_obj); 5600 if (err) { 5601 netdev_dbg(skb->dev, 5602 "Couldn't find mapped object for mapped_obj_id: %d, err: %d\n", 5603 mapped_obj_id, err); 5604 return false; 5605 } 5606 5607 switch (mapped_obj.type) { 5608 case MLX5_MAPPED_OBJ_CHAIN: 5609 case MLX5_MAPPED_OBJ_ACT_MISS: 5610 return mlx5e_tc_restore_skb_tc_meta(skb, ct_priv, &mapped_obj, zone_restore_id, 5611 tunnel_id, tc_priv); 5612 case MLX5_MAPPED_OBJ_SAMPLE: 5613 mlx5e_tc_restore_skb_sample(priv, skb, &mapped_obj, tc_priv); 5614 tc_priv->skb_done = true; 5615 return true; 5616 case MLX5_MAPPED_OBJ_INT_PORT_METADATA: 5617 return mlx5e_tc_restore_skb_int_port(priv, skb, &mapped_obj, tc_priv, tunnel_id); 5618 default: 5619 netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type); 5620 return false; 5621 } 5622 5623 return false; 5624 } 5625 5626 bool mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb) 5627 { 5628 struct mlx5e_priv *priv = netdev_priv(skb->dev); 5629 u32 mapped_obj_id, reg_b, zone_restore_id; 5630 struct mlx5_tc_ct_priv *ct_priv; 5631 struct mapping_ctx *mapping_ctx; 5632 struct mlx5e_tc_table *tc; 5633 5634 reg_b = be32_to_cpu(cqe->ft_metadata); 5635 tc = mlx5e_fs_get_tc(priv->fs); 5636 mapped_obj_id = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK; 5637 zone_restore_id = (reg_b >> MLX5_REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) & 5638 ESW_ZONE_ID_MASK; 5639 ct_priv = tc->ct; 5640 mapping_ctx = tc->mapping; 5641 5642 return mlx5e_tc_update_skb(cqe, skb, mapping_ctx, mapped_obj_id, ct_priv, zone_restore_id, 5643 0, NULL); 5644 } 5645 5646 int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, 5647 u64 act_miss_cookie, u32 *act_miss_mapping) 5648 { 5649 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 5650 struct mlx5_mapped_obj mapped_obj = {}; 5651 struct mapping_ctx *ctx; 5652 int err; 5653 5654 ctx = esw->offloads.reg_c0_obj_pool; 5655 5656 mapped_obj.type = MLX5_MAPPED_OBJ_ACT_MISS; 5657 mapped_obj.act_miss_cookie = act_miss_cookie; 5658 err = mapping_add(ctx, &mapped_obj, act_miss_mapping); 5659 if (err) 5660 return err; 5661 5662 attr->act_id_restore_rule = esw_add_restore_rule(esw, *act_miss_mapping); 5663 if (IS_ERR(attr->act_id_restore_rule)) 5664 goto err_rule; 5665 5666 return 0; 5667 5668 err_rule: 5669 mapping_remove(ctx, *act_miss_mapping); 5670 return err; 5671 } 5672 5673 void mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, 5674 u32 act_miss_mapping) 5675 { 5676 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 5677 struct mapping_ctx *ctx; 5678 5679 ctx = esw->offloads.reg_c0_obj_pool; 5680 mlx5_del_flow_rules(attr->act_id_restore_rule); 5681 mapping_remove(ctx, act_miss_mapping); 5682 } 5683