1 /* 2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <net/flow_dissector.h> 34 #include <net/sch_generic.h> 35 #include <net/pkt_cls.h> 36 #include <net/tc_act/tc_gact.h> 37 #include <net/tc_act/tc_skbedit.h> 38 #include <linux/mlx5/fs.h> 39 #include <linux/mlx5/device.h> 40 #include <linux/rhashtable.h> 41 #include <linux/refcount.h> 42 #include <linux/completion.h> 43 #include <net/tc_act/tc_mirred.h> 44 #include <net/tc_act/tc_vlan.h> 45 #include <net/tc_act/tc_tunnel_key.h> 46 #include <net/tc_act/tc_pedit.h> 47 #include <net/tc_act/tc_csum.h> 48 #include <net/arp.h> 49 #include <net/ipv6_stubs.h> 50 #include "en.h" 51 #include "en_rep.h" 52 #include "en_tc.h" 53 #include "eswitch.h" 54 #include "fs_core.h" 55 #include "en/port.h" 56 #include "en/tc_tun.h" 57 #include "lib/devcom.h" 58 #include "lib/geneve.h" 59 #include "diag/en_tc_tracepoint.h" 60 61 struct mlx5_nic_flow_attr { 62 u32 action; 63 u32 flow_tag; 64 struct mlx5_modify_hdr *modify_hdr; 65 u32 hairpin_tirn; 66 u8 match_level; 67 struct mlx5_flow_table *hairpin_ft; 68 struct mlx5_fc *counter; 69 }; 70 71 #define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1) 72 73 enum { 74 MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT, 75 MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT, 76 MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT, 77 MLX5E_TC_FLOW_FLAG_FT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT, 78 MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT, 79 MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE, 80 MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1, 81 MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS = MLX5E_TC_FLOW_BASE + 2, 82 MLX5E_TC_FLOW_FLAG_SLOW = MLX5E_TC_FLOW_BASE + 3, 83 MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4, 84 MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5, 85 MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6, 86 }; 87 88 #define MLX5E_TC_MAX_SPLITS 1 89 90 /* Helper struct for accessing a struct containing list_head array. 91 * Containing struct 92 * |- Helper array 93 * [0] Helper item 0 94 * |- list_head item 0 95 * |- index (0) 96 * [1] Helper item 1 97 * |- list_head item 1 98 * |- index (1) 99 * To access the containing struct from one of the list_head items: 100 * 1. Get the helper item from the list_head item using 101 * helper item = 102 * container_of(list_head item, helper struct type, list_head field) 103 * 2. Get the contining struct from the helper item and its index in the array: 104 * containing struct = 105 * container_of(helper item, containing struct type, helper field[index]) 106 */ 107 struct encap_flow_item { 108 struct mlx5e_encap_entry *e; /* attached encap instance */ 109 struct list_head list; 110 int index; 111 }; 112 113 struct mlx5e_tc_flow { 114 struct rhash_head node; 115 struct mlx5e_priv *priv; 116 u64 cookie; 117 unsigned long flags; 118 struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1]; 119 /* Flow can be associated with multiple encap IDs. 120 * The number of encaps is bounded by the number of supported 121 * destinations. 122 */ 123 struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS]; 124 struct mlx5e_tc_flow *peer_flow; 125 struct mlx5e_mod_hdr_entry *mh; /* attached mod header instance */ 126 struct list_head mod_hdr; /* flows sharing the same mod hdr ID */ 127 struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */ 128 struct list_head hairpin; /* flows sharing the same hairpin */ 129 struct list_head peer; /* flows with peer flow */ 130 struct list_head unready; /* flows not ready to be offloaded (e.g due to missing route) */ 131 int tmp_efi_index; 132 struct list_head tmp_list; /* temporary flow list used by neigh update */ 133 refcount_t refcnt; 134 struct rcu_head rcu_head; 135 struct completion init_done; 136 union { 137 struct mlx5_esw_flow_attr esw_attr[0]; 138 struct mlx5_nic_flow_attr nic_attr[0]; 139 }; 140 }; 141 142 struct mlx5e_tc_flow_parse_attr { 143 const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; 144 struct net_device *filter_dev; 145 struct mlx5_flow_spec spec; 146 int num_mod_hdr_actions; 147 int max_mod_hdr_actions; 148 void *mod_hdr_actions; 149 int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; 150 }; 151 152 #define MLX5E_TC_TABLE_NUM_GROUPS 4 153 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16) 154 155 struct mlx5e_hairpin { 156 struct mlx5_hairpin *pair; 157 158 struct mlx5_core_dev *func_mdev; 159 struct mlx5e_priv *func_priv; 160 u32 tdn; 161 u32 tirn; 162 163 int num_channels; 164 struct mlx5e_rqt indir_rqt; 165 u32 indir_tirn[MLX5E_NUM_INDIR_TIRS]; 166 struct mlx5e_ttc_table ttc; 167 }; 168 169 struct mlx5e_hairpin_entry { 170 /* a node of a hash table which keeps all the hairpin entries */ 171 struct hlist_node hairpin_hlist; 172 173 /* protects flows list */ 174 spinlock_t flows_lock; 175 /* flows sharing the same hairpin */ 176 struct list_head flows; 177 /* hpe's that were not fully initialized when dead peer update event 178 * function traversed them. 179 */ 180 struct list_head dead_peer_wait_list; 181 182 u16 peer_vhca_id; 183 u8 prio; 184 struct mlx5e_hairpin *hp; 185 refcount_t refcnt; 186 struct completion res_ready; 187 }; 188 189 struct mod_hdr_key { 190 int num_actions; 191 void *actions; 192 }; 193 194 struct mlx5e_mod_hdr_entry { 195 /* a node of a hash table which keeps all the mod_hdr entries */ 196 struct hlist_node mod_hdr_hlist; 197 198 /* protects flows list */ 199 spinlock_t flows_lock; 200 /* flows sharing the same mod_hdr entry */ 201 struct list_head flows; 202 203 struct mod_hdr_key key; 204 205 struct mlx5_modify_hdr *modify_hdr; 206 207 refcount_t refcnt; 208 struct completion res_ready; 209 int compl_result; 210 }; 211 212 #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) 213 214 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, 215 struct mlx5e_tc_flow *flow); 216 217 static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow) 218 { 219 if (!flow || !refcount_inc_not_zero(&flow->refcnt)) 220 return ERR_PTR(-EINVAL); 221 return flow; 222 } 223 224 static void mlx5e_flow_put(struct mlx5e_priv *priv, 225 struct mlx5e_tc_flow *flow) 226 { 227 if (refcount_dec_and_test(&flow->refcnt)) { 228 mlx5e_tc_del_flow(priv, flow); 229 kfree_rcu(flow, rcu_head); 230 } 231 } 232 233 static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag) 234 { 235 /* Complete all memory stores before setting bit. */ 236 smp_mb__before_atomic(); 237 set_bit(flag, &flow->flags); 238 } 239 240 #define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag) 241 242 static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow, 243 unsigned long flag) 244 { 245 /* test_and_set_bit() provides all necessary barriers */ 246 return test_and_set_bit(flag, &flow->flags); 247 } 248 249 #define flow_flag_test_and_set(flow, flag) \ 250 __flow_flag_test_and_set(flow, \ 251 MLX5E_TC_FLOW_FLAG_##flag) 252 253 static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag) 254 { 255 /* Complete all memory stores before clearing bit. */ 256 smp_mb__before_atomic(); 257 clear_bit(flag, &flow->flags); 258 } 259 260 #define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \ 261 MLX5E_TC_FLOW_FLAG_##flag) 262 263 static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag) 264 { 265 bool ret = test_bit(flag, &flow->flags); 266 267 /* Read fields of flow structure only after checking flags. */ 268 smp_mb__after_atomic(); 269 return ret; 270 } 271 272 #define flow_flag_test(flow, flag) __flow_flag_test(flow, \ 273 MLX5E_TC_FLOW_FLAG_##flag) 274 275 static bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow) 276 { 277 return flow_flag_test(flow, ESWITCH); 278 } 279 280 static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow) 281 { 282 return flow_flag_test(flow, FT); 283 } 284 285 static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow) 286 { 287 return flow_flag_test(flow, OFFLOADED); 288 } 289 290 static inline u32 hash_mod_hdr_info(struct mod_hdr_key *key) 291 { 292 return jhash(key->actions, 293 key->num_actions * MLX5_MH_ACT_SZ, 0); 294 } 295 296 static inline int cmp_mod_hdr_info(struct mod_hdr_key *a, 297 struct mod_hdr_key *b) 298 { 299 if (a->num_actions != b->num_actions) 300 return 1; 301 302 return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ); 303 } 304 305 static struct mod_hdr_tbl * 306 get_mod_hdr_table(struct mlx5e_priv *priv, int namespace) 307 { 308 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 309 310 return namespace == MLX5_FLOW_NAMESPACE_FDB ? &esw->offloads.mod_hdr : 311 &priv->fs.tc.mod_hdr; 312 } 313 314 static struct mlx5e_mod_hdr_entry * 315 mlx5e_mod_hdr_get(struct mod_hdr_tbl *tbl, struct mod_hdr_key *key, u32 hash_key) 316 { 317 struct mlx5e_mod_hdr_entry *mh, *found = NULL; 318 319 hash_for_each_possible(tbl->hlist, mh, mod_hdr_hlist, hash_key) { 320 if (!cmp_mod_hdr_info(&mh->key, key)) { 321 refcount_inc(&mh->refcnt); 322 found = mh; 323 break; 324 } 325 } 326 327 return found; 328 } 329 330 static void mlx5e_mod_hdr_put(struct mlx5e_priv *priv, 331 struct mlx5e_mod_hdr_entry *mh, 332 int namespace) 333 { 334 struct mod_hdr_tbl *tbl = get_mod_hdr_table(priv, namespace); 335 336 if (!refcount_dec_and_mutex_lock(&mh->refcnt, &tbl->lock)) 337 return; 338 hash_del(&mh->mod_hdr_hlist); 339 mutex_unlock(&tbl->lock); 340 341 WARN_ON(!list_empty(&mh->flows)); 342 if (mh->compl_result > 0) 343 mlx5_modify_header_dealloc(priv->mdev, mh->modify_hdr); 344 345 kfree(mh); 346 } 347 348 static int get_flow_name_space(struct mlx5e_tc_flow *flow) 349 { 350 return mlx5e_is_eswitch_flow(flow) ? 351 MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL; 352 } 353 static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, 354 struct mlx5e_tc_flow *flow, 355 struct mlx5e_tc_flow_parse_attr *parse_attr) 356 { 357 int num_actions, actions_size, namespace, err; 358 struct mlx5e_mod_hdr_entry *mh; 359 struct mod_hdr_tbl *tbl; 360 struct mod_hdr_key key; 361 u32 hash_key; 362 363 num_actions = parse_attr->num_mod_hdr_actions; 364 actions_size = MLX5_MH_ACT_SZ * num_actions; 365 366 key.actions = parse_attr->mod_hdr_actions; 367 key.num_actions = num_actions; 368 369 hash_key = hash_mod_hdr_info(&key); 370 371 namespace = get_flow_name_space(flow); 372 tbl = get_mod_hdr_table(priv, namespace); 373 374 mutex_lock(&tbl->lock); 375 mh = mlx5e_mod_hdr_get(tbl, &key, hash_key); 376 if (mh) { 377 mutex_unlock(&tbl->lock); 378 wait_for_completion(&mh->res_ready); 379 380 if (mh->compl_result < 0) { 381 err = -EREMOTEIO; 382 goto attach_header_err; 383 } 384 goto attach_flow; 385 } 386 387 mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL); 388 if (!mh) { 389 mutex_unlock(&tbl->lock); 390 return -ENOMEM; 391 } 392 393 mh->key.actions = (void *)mh + sizeof(*mh); 394 memcpy(mh->key.actions, key.actions, actions_size); 395 mh->key.num_actions = num_actions; 396 spin_lock_init(&mh->flows_lock); 397 INIT_LIST_HEAD(&mh->flows); 398 refcount_set(&mh->refcnt, 1); 399 init_completion(&mh->res_ready); 400 401 hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key); 402 mutex_unlock(&tbl->lock); 403 404 mh->modify_hdr = mlx5_modify_header_alloc(priv->mdev, namespace, 405 mh->key.num_actions, 406 mh->key.actions); 407 if (IS_ERR(mh->modify_hdr)) { 408 err = PTR_ERR(mh->modify_hdr); 409 mh->compl_result = err; 410 goto alloc_header_err; 411 } 412 mh->compl_result = 1; 413 complete_all(&mh->res_ready); 414 415 attach_flow: 416 flow->mh = mh; 417 spin_lock(&mh->flows_lock); 418 list_add(&flow->mod_hdr, &mh->flows); 419 spin_unlock(&mh->flows_lock); 420 if (mlx5e_is_eswitch_flow(flow)) 421 flow->esw_attr->modify_hdr = mh->modify_hdr; 422 else 423 flow->nic_attr->modify_hdr = mh->modify_hdr; 424 425 return 0; 426 427 alloc_header_err: 428 complete_all(&mh->res_ready); 429 attach_header_err: 430 mlx5e_mod_hdr_put(priv, mh, namespace); 431 return err; 432 } 433 434 static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv, 435 struct mlx5e_tc_flow *flow) 436 { 437 /* flow wasn't fully initialized */ 438 if (!flow->mh) 439 return; 440 441 spin_lock(&flow->mh->flows_lock); 442 list_del(&flow->mod_hdr); 443 spin_unlock(&flow->mh->flows_lock); 444 445 mlx5e_mod_hdr_put(priv, flow->mh, get_flow_name_space(flow)); 446 flow->mh = NULL; 447 } 448 449 static 450 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex) 451 { 452 struct net_device *netdev; 453 struct mlx5e_priv *priv; 454 455 netdev = __dev_get_by_index(net, ifindex); 456 priv = netdev_priv(netdev); 457 return priv->mdev; 458 } 459 460 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp) 461 { 462 u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {0}; 463 void *tirc; 464 int err; 465 466 err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn); 467 if (err) 468 goto alloc_tdn_err; 469 470 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); 471 472 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); 473 MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]); 474 MLX5_SET(tirc, tirc, transport_domain, hp->tdn); 475 476 err = mlx5_core_create_tir(hp->func_mdev, in, MLX5_ST_SZ_BYTES(create_tir_in), &hp->tirn); 477 if (err) 478 goto create_tir_err; 479 480 return 0; 481 482 create_tir_err: 483 mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); 484 alloc_tdn_err: 485 return err; 486 } 487 488 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp) 489 { 490 mlx5_core_destroy_tir(hp->func_mdev, hp->tirn); 491 mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); 492 } 493 494 static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc) 495 { 496 u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE], rqn; 497 struct mlx5e_priv *priv = hp->func_priv; 498 int i, ix, sz = MLX5E_INDIR_RQT_SIZE; 499 500 mlx5e_build_default_indir_rqt(indirection_rqt, sz, 501 hp->num_channels); 502 503 for (i = 0; i < sz; i++) { 504 ix = i; 505 if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR) 506 ix = mlx5e_bits_invert(i, ilog2(sz)); 507 ix = indirection_rqt[ix]; 508 rqn = hp->pair->rqn[ix]; 509 MLX5_SET(rqtc, rqtc, rq_num[i], rqn); 510 } 511 } 512 513 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) 514 { 515 int inlen, err, sz = MLX5E_INDIR_RQT_SIZE; 516 struct mlx5e_priv *priv = hp->func_priv; 517 struct mlx5_core_dev *mdev = priv->mdev; 518 void *rqtc; 519 u32 *in; 520 521 inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; 522 in = kvzalloc(inlen, GFP_KERNEL); 523 if (!in) 524 return -ENOMEM; 525 526 rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); 527 528 MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); 529 MLX5_SET(rqtc, rqtc, rqt_max_size, sz); 530 531 mlx5e_hairpin_fill_rqt_rqns(hp, rqtc); 532 533 err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn); 534 if (!err) 535 hp->indir_rqt.enabled = true; 536 537 kvfree(in); 538 return err; 539 } 540 541 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) 542 { 543 struct mlx5e_priv *priv = hp->func_priv; 544 u32 in[MLX5_ST_SZ_DW(create_tir_in)]; 545 int tt, i, err; 546 void *tirc; 547 548 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { 549 struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt); 550 551 memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in)); 552 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); 553 554 MLX5_SET(tirc, tirc, transport_domain, hp->tdn); 555 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); 556 MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn); 557 mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false); 558 559 err = mlx5_core_create_tir(hp->func_mdev, in, 560 MLX5_ST_SZ_BYTES(create_tir_in), &hp->indir_tirn[tt]); 561 if (err) { 562 mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err); 563 goto err_destroy_tirs; 564 } 565 } 566 return 0; 567 568 err_destroy_tirs: 569 for (i = 0; i < tt; i++) 570 mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[i]); 571 return err; 572 } 573 574 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp) 575 { 576 int tt; 577 578 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) 579 mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[tt]); 580 } 581 582 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp, 583 struct ttc_params *ttc_params) 584 { 585 struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; 586 int tt; 587 588 memset(ttc_params, 0, sizeof(*ttc_params)); 589 590 ttc_params->any_tt_tirn = hp->tirn; 591 592 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) 593 ttc_params->indir_tirn[tt] = hp->indir_tirn[tt]; 594 595 ft_attr->max_fte = MLX5E_NUM_TT; 596 ft_attr->level = MLX5E_TC_TTC_FT_LEVEL; 597 ft_attr->prio = MLX5E_TC_PRIO; 598 } 599 600 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp) 601 { 602 struct mlx5e_priv *priv = hp->func_priv; 603 struct ttc_params ttc_params; 604 int err; 605 606 err = mlx5e_hairpin_create_indirect_rqt(hp); 607 if (err) 608 return err; 609 610 err = mlx5e_hairpin_create_indirect_tirs(hp); 611 if (err) 612 goto err_create_indirect_tirs; 613 614 mlx5e_hairpin_set_ttc_params(hp, &ttc_params); 615 err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc); 616 if (err) 617 goto err_create_ttc_table; 618 619 netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n", 620 hp->num_channels, hp->ttc.ft.t->id); 621 622 return 0; 623 624 err_create_ttc_table: 625 mlx5e_hairpin_destroy_indirect_tirs(hp); 626 err_create_indirect_tirs: 627 mlx5e_destroy_rqt(priv, &hp->indir_rqt); 628 629 return err; 630 } 631 632 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp) 633 { 634 struct mlx5e_priv *priv = hp->func_priv; 635 636 mlx5e_destroy_ttc_table(priv, &hp->ttc); 637 mlx5e_hairpin_destroy_indirect_tirs(hp); 638 mlx5e_destroy_rqt(priv, &hp->indir_rqt); 639 } 640 641 static struct mlx5e_hairpin * 642 mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params, 643 int peer_ifindex) 644 { 645 struct mlx5_core_dev *func_mdev, *peer_mdev; 646 struct mlx5e_hairpin *hp; 647 struct mlx5_hairpin *pair; 648 int err; 649 650 hp = kzalloc(sizeof(*hp), GFP_KERNEL); 651 if (!hp) 652 return ERR_PTR(-ENOMEM); 653 654 func_mdev = priv->mdev; 655 peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); 656 657 pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params); 658 if (IS_ERR(pair)) { 659 err = PTR_ERR(pair); 660 goto create_pair_err; 661 } 662 hp->pair = pair; 663 hp->func_mdev = func_mdev; 664 hp->func_priv = priv; 665 hp->num_channels = params->num_channels; 666 667 err = mlx5e_hairpin_create_transport(hp); 668 if (err) 669 goto create_transport_err; 670 671 if (hp->num_channels > 1) { 672 err = mlx5e_hairpin_rss_init(hp); 673 if (err) 674 goto rss_init_err; 675 } 676 677 return hp; 678 679 rss_init_err: 680 mlx5e_hairpin_destroy_transport(hp); 681 create_transport_err: 682 mlx5_core_hairpin_destroy(hp->pair); 683 create_pair_err: 684 kfree(hp); 685 return ERR_PTR(err); 686 } 687 688 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp) 689 { 690 if (hp->num_channels > 1) 691 mlx5e_hairpin_rss_cleanup(hp); 692 mlx5e_hairpin_destroy_transport(hp); 693 mlx5_core_hairpin_destroy(hp->pair); 694 kvfree(hp); 695 } 696 697 static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio) 698 { 699 return (peer_vhca_id << 16 | prio); 700 } 701 702 static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv, 703 u16 peer_vhca_id, u8 prio) 704 { 705 struct mlx5e_hairpin_entry *hpe; 706 u32 hash_key = hash_hairpin_info(peer_vhca_id, prio); 707 708 hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe, 709 hairpin_hlist, hash_key) { 710 if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) { 711 refcount_inc(&hpe->refcnt); 712 return hpe; 713 } 714 } 715 716 return NULL; 717 } 718 719 static void mlx5e_hairpin_put(struct mlx5e_priv *priv, 720 struct mlx5e_hairpin_entry *hpe) 721 { 722 /* no more hairpin flows for us, release the hairpin pair */ 723 if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock)) 724 return; 725 hash_del(&hpe->hairpin_hlist); 726 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); 727 728 if (!IS_ERR_OR_NULL(hpe->hp)) { 729 netdev_dbg(priv->netdev, "del hairpin: peer %s\n", 730 dev_name(hpe->hp->pair->peer_mdev->device)); 731 732 mlx5e_hairpin_destroy(hpe->hp); 733 } 734 735 WARN_ON(!list_empty(&hpe->flows)); 736 kfree(hpe); 737 } 738 739 #define UNKNOWN_MATCH_PRIO 8 740 741 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv, 742 struct mlx5_flow_spec *spec, u8 *match_prio, 743 struct netlink_ext_ack *extack) 744 { 745 void *headers_c, *headers_v; 746 u8 prio_val, prio_mask = 0; 747 bool vlan_present; 748 749 #ifdef CONFIG_MLX5_CORE_EN_DCB 750 if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) { 751 NL_SET_ERR_MSG_MOD(extack, 752 "only PCP trust state supported for hairpin"); 753 return -EOPNOTSUPP; 754 } 755 #endif 756 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); 757 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); 758 759 vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag); 760 if (vlan_present) { 761 prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio); 762 prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio); 763 } 764 765 if (!vlan_present || !prio_mask) { 766 prio_val = UNKNOWN_MATCH_PRIO; 767 } else if (prio_mask != 0x7) { 768 NL_SET_ERR_MSG_MOD(extack, 769 "masked priority match not supported for hairpin"); 770 return -EOPNOTSUPP; 771 } 772 773 *match_prio = prio_val; 774 return 0; 775 } 776 777 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, 778 struct mlx5e_tc_flow *flow, 779 struct mlx5e_tc_flow_parse_attr *parse_attr, 780 struct netlink_ext_ack *extack) 781 { 782 int peer_ifindex = parse_attr->mirred_ifindex[0]; 783 struct mlx5_hairpin_params params; 784 struct mlx5_core_dev *peer_mdev; 785 struct mlx5e_hairpin_entry *hpe; 786 struct mlx5e_hairpin *hp; 787 u64 link_speed64; 788 u32 link_speed; 789 u8 match_prio; 790 u16 peer_id; 791 int err; 792 793 peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); 794 if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) { 795 NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported"); 796 return -EOPNOTSUPP; 797 } 798 799 peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id); 800 err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio, 801 extack); 802 if (err) 803 return err; 804 805 mutex_lock(&priv->fs.tc.hairpin_tbl_lock); 806 hpe = mlx5e_hairpin_get(priv, peer_id, match_prio); 807 if (hpe) { 808 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); 809 wait_for_completion(&hpe->res_ready); 810 811 if (IS_ERR(hpe->hp)) { 812 err = -EREMOTEIO; 813 goto out_err; 814 } 815 goto attach_flow; 816 } 817 818 hpe = kzalloc(sizeof(*hpe), GFP_KERNEL); 819 if (!hpe) { 820 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); 821 return -ENOMEM; 822 } 823 824 spin_lock_init(&hpe->flows_lock); 825 INIT_LIST_HEAD(&hpe->flows); 826 INIT_LIST_HEAD(&hpe->dead_peer_wait_list); 827 hpe->peer_vhca_id = peer_id; 828 hpe->prio = match_prio; 829 refcount_set(&hpe->refcnt, 1); 830 init_completion(&hpe->res_ready); 831 832 hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, 833 hash_hairpin_info(peer_id, match_prio)); 834 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); 835 836 params.log_data_size = 15; 837 params.log_data_size = min_t(u8, params.log_data_size, 838 MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz)); 839 params.log_data_size = max_t(u8, params.log_data_size, 840 MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz)); 841 842 params.log_num_packets = params.log_data_size - 843 MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev); 844 params.log_num_packets = min_t(u8, params.log_num_packets, 845 MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets)); 846 847 params.q_counter = priv->q_counter; 848 /* set hairpin pair per each 50Gbs share of the link */ 849 mlx5e_port_max_linkspeed(priv->mdev, &link_speed); 850 link_speed = max_t(u32, link_speed, 50000); 851 link_speed64 = link_speed; 852 do_div(link_speed64, 50000); 853 params.num_channels = link_speed64; 854 855 hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex); 856 hpe->hp = hp; 857 complete_all(&hpe->res_ready); 858 if (IS_ERR(hp)) { 859 err = PTR_ERR(hp); 860 goto out_err; 861 } 862 863 netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n", 864 hp->tirn, hp->pair->rqn[0], 865 dev_name(hp->pair->peer_mdev->device), 866 hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets); 867 868 attach_flow: 869 if (hpe->hp->num_channels > 1) { 870 flow_flag_set(flow, HAIRPIN_RSS); 871 flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t; 872 } else { 873 flow->nic_attr->hairpin_tirn = hpe->hp->tirn; 874 } 875 876 flow->hpe = hpe; 877 spin_lock(&hpe->flows_lock); 878 list_add(&flow->hairpin, &hpe->flows); 879 spin_unlock(&hpe->flows_lock); 880 881 return 0; 882 883 out_err: 884 mlx5e_hairpin_put(priv, hpe); 885 return err; 886 } 887 888 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv, 889 struct mlx5e_tc_flow *flow) 890 { 891 /* flow wasn't fully initialized */ 892 if (!flow->hpe) 893 return; 894 895 spin_lock(&flow->hpe->flows_lock); 896 list_del(&flow->hairpin); 897 spin_unlock(&flow->hpe->flows_lock); 898 899 mlx5e_hairpin_put(priv, flow->hpe); 900 flow->hpe = NULL; 901 } 902 903 static int 904 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, 905 struct mlx5e_tc_flow_parse_attr *parse_attr, 906 struct mlx5e_tc_flow *flow, 907 struct netlink_ext_ack *extack) 908 { 909 struct mlx5_flow_context *flow_context = &parse_attr->spec.flow_context; 910 struct mlx5_nic_flow_attr *attr = flow->nic_attr; 911 struct mlx5_core_dev *dev = priv->mdev; 912 struct mlx5_flow_destination dest[2] = {}; 913 struct mlx5_flow_act flow_act = { 914 .action = attr->action, 915 .flags = FLOW_ACT_NO_APPEND, 916 }; 917 struct mlx5_fc *counter = NULL; 918 int err, dest_ix = 0; 919 920 flow_context->flags |= FLOW_CONTEXT_HAS_TAG; 921 flow_context->flow_tag = attr->flow_tag; 922 923 if (flow_flag_test(flow, HAIRPIN)) { 924 err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); 925 if (err) 926 return err; 927 928 if (flow_flag_test(flow, HAIRPIN_RSS)) { 929 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 930 dest[dest_ix].ft = attr->hairpin_ft; 931 } else { 932 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR; 933 dest[dest_ix].tir_num = attr->hairpin_tirn; 934 } 935 dest_ix++; 936 } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { 937 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 938 dest[dest_ix].ft = priv->fs.vlan.ft.t; 939 dest_ix++; 940 } 941 942 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { 943 counter = mlx5_fc_create(dev, true); 944 if (IS_ERR(counter)) 945 return PTR_ERR(counter); 946 947 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; 948 dest[dest_ix].counter_id = mlx5_fc_id(counter); 949 dest_ix++; 950 attr->counter = counter; 951 } 952 953 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 954 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); 955 flow_act.modify_hdr = attr->modify_hdr; 956 kfree(parse_attr->mod_hdr_actions); 957 if (err) 958 return err; 959 } 960 961 mutex_lock(&priv->fs.tc.t_lock); 962 if (IS_ERR_OR_NULL(priv->fs.tc.t)) { 963 int tc_grp_size, tc_tbl_size; 964 u32 max_flow_counter; 965 966 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | 967 MLX5_CAP_GEN(dev, max_flow_counter_15_0); 968 969 tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE); 970 971 tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS, 972 BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size))); 973 974 priv->fs.tc.t = 975 mlx5_create_auto_grouped_flow_table(priv->fs.ns, 976 MLX5E_TC_PRIO, 977 tc_tbl_size, 978 MLX5E_TC_TABLE_NUM_GROUPS, 979 MLX5E_TC_FT_LEVEL, 0); 980 if (IS_ERR(priv->fs.tc.t)) { 981 mutex_unlock(&priv->fs.tc.t_lock); 982 NL_SET_ERR_MSG_MOD(extack, 983 "Failed to create tc offload table\n"); 984 netdev_err(priv->netdev, 985 "Failed to create tc offload table\n"); 986 return PTR_ERR(priv->fs.tc.t); 987 } 988 } 989 990 if (attr->match_level != MLX5_MATCH_NONE) 991 parse_attr->spec.match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; 992 993 flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec, 994 &flow_act, dest, dest_ix); 995 mutex_unlock(&priv->fs.tc.t_lock); 996 997 return PTR_ERR_OR_ZERO(flow->rule[0]); 998 } 999 1000 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, 1001 struct mlx5e_tc_flow *flow) 1002 { 1003 struct mlx5_nic_flow_attr *attr = flow->nic_attr; 1004 struct mlx5_fc *counter = NULL; 1005 1006 counter = attr->counter; 1007 if (!IS_ERR_OR_NULL(flow->rule[0])) 1008 mlx5_del_flow_rules(flow->rule[0]); 1009 mlx5_fc_destroy(priv->mdev, counter); 1010 1011 mutex_lock(&priv->fs.tc.t_lock); 1012 if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && priv->fs.tc.t) { 1013 mlx5_destroy_flow_table(priv->fs.tc.t); 1014 priv->fs.tc.t = NULL; 1015 } 1016 mutex_unlock(&priv->fs.tc.t_lock); 1017 1018 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) 1019 mlx5e_detach_mod_hdr(priv, flow); 1020 1021 if (flow_flag_test(flow, HAIRPIN)) 1022 mlx5e_hairpin_flow_del(priv, flow); 1023 } 1024 1025 static void mlx5e_detach_encap(struct mlx5e_priv *priv, 1026 struct mlx5e_tc_flow *flow, int out_index); 1027 1028 static int mlx5e_attach_encap(struct mlx5e_priv *priv, 1029 struct mlx5e_tc_flow *flow, 1030 struct net_device *mirred_dev, 1031 int out_index, 1032 struct netlink_ext_ack *extack, 1033 struct net_device **encap_dev, 1034 bool *encap_valid); 1035 1036 static struct mlx5_flow_handle * 1037 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, 1038 struct mlx5e_tc_flow *flow, 1039 struct mlx5_flow_spec *spec, 1040 struct mlx5_esw_flow_attr *attr) 1041 { 1042 struct mlx5_flow_handle *rule; 1043 1044 rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); 1045 if (IS_ERR(rule)) 1046 return rule; 1047 1048 if (attr->split_count) { 1049 flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr); 1050 if (IS_ERR(flow->rule[1])) { 1051 mlx5_eswitch_del_offloaded_rule(esw, rule, attr); 1052 return flow->rule[1]; 1053 } 1054 } 1055 1056 return rule; 1057 } 1058 1059 static void 1060 mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, 1061 struct mlx5e_tc_flow *flow, 1062 struct mlx5_esw_flow_attr *attr) 1063 { 1064 flow_flag_clear(flow, OFFLOADED); 1065 1066 if (attr->split_count) 1067 mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); 1068 1069 mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); 1070 } 1071 1072 static struct mlx5_flow_handle * 1073 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, 1074 struct mlx5e_tc_flow *flow, 1075 struct mlx5_flow_spec *spec, 1076 struct mlx5_esw_flow_attr *slow_attr) 1077 { 1078 struct mlx5_flow_handle *rule; 1079 1080 memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); 1081 slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1082 slow_attr->split_count = 0; 1083 slow_attr->dest_chain = FDB_TC_SLOW_PATH_CHAIN; 1084 1085 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); 1086 if (!IS_ERR(rule)) 1087 flow_flag_set(flow, SLOW); 1088 1089 return rule; 1090 } 1091 1092 static void 1093 mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, 1094 struct mlx5e_tc_flow *flow, 1095 struct mlx5_esw_flow_attr *slow_attr) 1096 { 1097 memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); 1098 slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1099 slow_attr->split_count = 0; 1100 slow_attr->dest_chain = FDB_TC_SLOW_PATH_CHAIN; 1101 mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); 1102 flow_flag_clear(flow, SLOW); 1103 } 1104 1105 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this 1106 * function. 1107 */ 1108 static void unready_flow_add(struct mlx5e_tc_flow *flow, 1109 struct list_head *unready_flows) 1110 { 1111 flow_flag_set(flow, NOT_READY); 1112 list_add_tail(&flow->unready, unready_flows); 1113 } 1114 1115 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this 1116 * function. 1117 */ 1118 static void unready_flow_del(struct mlx5e_tc_flow *flow) 1119 { 1120 list_del(&flow->unready); 1121 flow_flag_clear(flow, NOT_READY); 1122 } 1123 1124 static void add_unready_flow(struct mlx5e_tc_flow *flow) 1125 { 1126 struct mlx5_rep_uplink_priv *uplink_priv; 1127 struct mlx5e_rep_priv *rpriv; 1128 struct mlx5_eswitch *esw; 1129 1130 esw = flow->priv->mdev->priv.eswitch; 1131 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1132 uplink_priv = &rpriv->uplink_priv; 1133 1134 mutex_lock(&uplink_priv->unready_flows_lock); 1135 unready_flow_add(flow, &uplink_priv->unready_flows); 1136 mutex_unlock(&uplink_priv->unready_flows_lock); 1137 } 1138 1139 static void remove_unready_flow(struct mlx5e_tc_flow *flow) 1140 { 1141 struct mlx5_rep_uplink_priv *uplink_priv; 1142 struct mlx5e_rep_priv *rpriv; 1143 struct mlx5_eswitch *esw; 1144 1145 esw = flow->priv->mdev->priv.eswitch; 1146 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 1147 uplink_priv = &rpriv->uplink_priv; 1148 1149 mutex_lock(&uplink_priv->unready_flows_lock); 1150 unready_flow_del(flow); 1151 mutex_unlock(&uplink_priv->unready_flows_lock); 1152 } 1153 1154 static int 1155 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, 1156 struct mlx5e_tc_flow *flow, 1157 struct netlink_ext_ack *extack) 1158 { 1159 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1160 u32 max_chain = mlx5_eswitch_get_chain_range(esw); 1161 struct mlx5_esw_flow_attr *attr = flow->esw_attr; 1162 struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; 1163 u16 max_prio = mlx5_eswitch_get_prio_range(esw); 1164 struct net_device *out_dev, *encap_dev = NULL; 1165 struct mlx5_fc *counter = NULL; 1166 struct mlx5e_rep_priv *rpriv; 1167 struct mlx5e_priv *out_priv; 1168 bool encap_valid = true; 1169 int err = 0; 1170 int out_index; 1171 1172 if (!mlx5_eswitch_prios_supported(esw) && attr->prio != 1) { 1173 NL_SET_ERR_MSG(extack, "E-switch priorities unsupported, upgrade FW"); 1174 return -EOPNOTSUPP; 1175 } 1176 1177 /* We check chain range only for tc flows. 1178 * For ft flows, we checked attr->chain was originally 0 and set it to 1179 * FDB_FT_CHAIN which is outside tc range. 1180 * See mlx5e_rep_setup_ft_cb(). 1181 */ 1182 if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) { 1183 NL_SET_ERR_MSG(extack, "Requested chain is out of supported range"); 1184 return -EOPNOTSUPP; 1185 } 1186 1187 if (attr->prio > max_prio) { 1188 NL_SET_ERR_MSG(extack, "Requested priority is out of supported range"); 1189 return -EOPNOTSUPP; 1190 } 1191 1192 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { 1193 int mirred_ifindex; 1194 1195 if (!(attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) 1196 continue; 1197 1198 mirred_ifindex = parse_attr->mirred_ifindex[out_index]; 1199 out_dev = __dev_get_by_index(dev_net(priv->netdev), 1200 mirred_ifindex); 1201 err = mlx5e_attach_encap(priv, flow, out_dev, out_index, 1202 extack, &encap_dev, &encap_valid); 1203 if (err) 1204 return err; 1205 1206 out_priv = netdev_priv(encap_dev); 1207 rpriv = out_priv->ppriv; 1208 attr->dests[out_index].rep = rpriv->rep; 1209 attr->dests[out_index].mdev = out_priv->mdev; 1210 } 1211 1212 err = mlx5_eswitch_add_vlan_action(esw, attr); 1213 if (err) 1214 return err; 1215 1216 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { 1217 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); 1218 kfree(parse_attr->mod_hdr_actions); 1219 if (err) 1220 return err; 1221 } 1222 1223 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { 1224 counter = mlx5_fc_create(attr->counter_dev, true); 1225 if (IS_ERR(counter)) 1226 return PTR_ERR(counter); 1227 1228 attr->counter = counter; 1229 } 1230 1231 /* we get here if one of the following takes place: 1232 * (1) there's no error 1233 * (2) there's an encap action and we don't have valid neigh 1234 */ 1235 if (!encap_valid) { 1236 /* continue with goto slow path rule instead */ 1237 struct mlx5_esw_flow_attr slow_attr; 1238 1239 flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec, &slow_attr); 1240 } else { 1241 flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); 1242 } 1243 1244 if (IS_ERR(flow->rule[0])) 1245 return PTR_ERR(flow->rule[0]); 1246 else 1247 flow_flag_set(flow, OFFLOADED); 1248 1249 return 0; 1250 } 1251 1252 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) 1253 { 1254 struct mlx5_flow_spec *spec = &flow->esw_attr->parse_attr->spec; 1255 void *headers_v = MLX5_ADDR_OF(fte_match_param, 1256 spec->match_value, 1257 misc_parameters_3); 1258 u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3, 1259 headers_v, 1260 geneve_tlv_option_0_data); 1261 1262 return !!geneve_tlv_opt_0_data; 1263 } 1264 1265 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, 1266 struct mlx5e_tc_flow *flow) 1267 { 1268 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1269 struct mlx5_esw_flow_attr *attr = flow->esw_attr; 1270 struct mlx5_esw_flow_attr slow_attr; 1271 int out_index; 1272 1273 if (flow_flag_test(flow, NOT_READY)) { 1274 remove_unready_flow(flow); 1275 kvfree(attr->parse_attr); 1276 return; 1277 } 1278 1279 if (mlx5e_is_offloaded_flow(flow)) { 1280 if (flow_flag_test(flow, SLOW)) 1281 mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr); 1282 else 1283 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); 1284 } 1285 1286 if (mlx5_flow_has_geneve_opt(flow)) 1287 mlx5_geneve_tlv_option_del(priv->mdev->geneve); 1288 1289 mlx5_eswitch_del_vlan_action(esw, attr); 1290 1291 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) 1292 if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) { 1293 mlx5e_detach_encap(priv, flow, out_index); 1294 kfree(attr->parse_attr->tun_info[out_index]); 1295 } 1296 kvfree(attr->parse_attr); 1297 1298 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) 1299 mlx5e_detach_mod_hdr(priv, flow); 1300 1301 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) 1302 mlx5_fc_destroy(attr->counter_dev, attr->counter); 1303 } 1304 1305 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, 1306 struct mlx5e_encap_entry *e, 1307 struct list_head *flow_list) 1308 { 1309 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1310 struct mlx5_esw_flow_attr slow_attr, *esw_attr; 1311 struct mlx5_flow_handle *rule; 1312 struct mlx5_flow_spec *spec; 1313 struct mlx5e_tc_flow *flow; 1314 int err; 1315 1316 e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, 1317 e->reformat_type, 1318 e->encap_size, e->encap_header, 1319 MLX5_FLOW_NAMESPACE_FDB); 1320 if (IS_ERR(e->pkt_reformat)) { 1321 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n", 1322 PTR_ERR(e->pkt_reformat)); 1323 return; 1324 } 1325 e->flags |= MLX5_ENCAP_ENTRY_VALID; 1326 mlx5e_rep_queue_neigh_stats_work(priv); 1327 1328 list_for_each_entry(flow, flow_list, tmp_list) { 1329 bool all_flow_encaps_valid = true; 1330 int i; 1331 1332 if (!mlx5e_is_offloaded_flow(flow)) 1333 continue; 1334 esw_attr = flow->esw_attr; 1335 spec = &esw_attr->parse_attr->spec; 1336 1337 esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat; 1338 esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; 1339 /* Flow can be associated with multiple encap entries. 1340 * Before offloading the flow verify that all of them have 1341 * a valid neighbour. 1342 */ 1343 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { 1344 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)) 1345 continue; 1346 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) { 1347 all_flow_encaps_valid = false; 1348 break; 1349 } 1350 } 1351 /* Do not offload flows with unresolved neighbors */ 1352 if (!all_flow_encaps_valid) 1353 continue; 1354 /* update from slow path rule to encap rule */ 1355 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr); 1356 if (IS_ERR(rule)) { 1357 err = PTR_ERR(rule); 1358 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", 1359 err); 1360 continue; 1361 } 1362 1363 mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr); 1364 flow->rule[0] = rule; 1365 /* was unset when slow path rule removed */ 1366 flow_flag_set(flow, OFFLOADED); 1367 } 1368 } 1369 1370 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, 1371 struct mlx5e_encap_entry *e, 1372 struct list_head *flow_list) 1373 { 1374 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1375 struct mlx5_esw_flow_attr slow_attr; 1376 struct mlx5_flow_handle *rule; 1377 struct mlx5_flow_spec *spec; 1378 struct mlx5e_tc_flow *flow; 1379 int err; 1380 1381 list_for_each_entry(flow, flow_list, tmp_list) { 1382 if (!mlx5e_is_offloaded_flow(flow)) 1383 continue; 1384 spec = &flow->esw_attr->parse_attr->spec; 1385 1386 /* update from encap rule to slow path rule */ 1387 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec, &slow_attr); 1388 /* mark the flow's encap dest as non-valid */ 1389 flow->esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; 1390 1391 if (IS_ERR(rule)) { 1392 err = PTR_ERR(rule); 1393 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", 1394 err); 1395 continue; 1396 } 1397 1398 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr); 1399 flow->rule[0] = rule; 1400 /* was unset when fast path rule removed */ 1401 flow_flag_set(flow, OFFLOADED); 1402 } 1403 1404 /* we know that the encap is valid */ 1405 e->flags &= ~MLX5_ENCAP_ENTRY_VALID; 1406 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); 1407 } 1408 1409 static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) 1410 { 1411 if (mlx5e_is_eswitch_flow(flow)) 1412 return flow->esw_attr->counter; 1413 else 1414 return flow->nic_attr->counter; 1415 } 1416 1417 /* Takes reference to all flows attached to encap and adds the flows to 1418 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. 1419 */ 1420 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list) 1421 { 1422 struct encap_flow_item *efi; 1423 struct mlx5e_tc_flow *flow; 1424 1425 list_for_each_entry(efi, &e->flows, list) { 1426 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); 1427 if (IS_ERR(mlx5e_flow_get(flow))) 1428 continue; 1429 wait_for_completion(&flow->init_done); 1430 1431 flow->tmp_efi_index = efi->index; 1432 list_add(&flow->tmp_list, flow_list); 1433 } 1434 } 1435 1436 /* Iterate over tmp_list of flows attached to flow_list head. */ 1437 void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list) 1438 { 1439 struct mlx5e_tc_flow *flow, *tmp; 1440 1441 list_for_each_entry_safe(flow, tmp, flow_list, tmp_list) 1442 mlx5e_flow_put(priv, flow); 1443 } 1444 1445 static struct mlx5e_encap_entry * 1446 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, 1447 struct mlx5e_encap_entry *e) 1448 { 1449 struct mlx5e_encap_entry *next = NULL; 1450 1451 retry: 1452 rcu_read_lock(); 1453 1454 /* find encap with non-zero reference counter value */ 1455 for (next = e ? 1456 list_next_or_null_rcu(&nhe->encap_list, 1457 &e->encap_list, 1458 struct mlx5e_encap_entry, 1459 encap_list) : 1460 list_first_or_null_rcu(&nhe->encap_list, 1461 struct mlx5e_encap_entry, 1462 encap_list); 1463 next; 1464 next = list_next_or_null_rcu(&nhe->encap_list, 1465 &next->encap_list, 1466 struct mlx5e_encap_entry, 1467 encap_list)) 1468 if (mlx5e_encap_take(next)) 1469 break; 1470 1471 rcu_read_unlock(); 1472 1473 /* release starting encap */ 1474 if (e) 1475 mlx5e_encap_put(netdev_priv(e->out_dev), e); 1476 if (!next) 1477 return next; 1478 1479 /* wait for encap to be fully initialized */ 1480 wait_for_completion(&next->res_ready); 1481 /* continue searching if encap entry is not in valid state after completion */ 1482 if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) { 1483 e = next; 1484 goto retry; 1485 } 1486 1487 return next; 1488 } 1489 1490 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) 1491 { 1492 struct mlx5e_neigh *m_neigh = &nhe->m_neigh; 1493 struct mlx5e_encap_entry *e = NULL; 1494 struct mlx5e_tc_flow *flow; 1495 struct mlx5_fc *counter; 1496 struct neigh_table *tbl; 1497 bool neigh_used = false; 1498 struct neighbour *n; 1499 u64 lastuse; 1500 1501 if (m_neigh->family == AF_INET) 1502 tbl = &arp_tbl; 1503 #if IS_ENABLED(CONFIG_IPV6) 1504 else if (m_neigh->family == AF_INET6) 1505 tbl = ipv6_stub->nd_tbl; 1506 #endif 1507 else 1508 return; 1509 1510 /* mlx5e_get_next_valid_encap() releases previous encap before returning 1511 * next one. 1512 */ 1513 while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) { 1514 struct mlx5e_priv *priv = netdev_priv(e->out_dev); 1515 struct encap_flow_item *efi, *tmp; 1516 struct mlx5_eswitch *esw; 1517 LIST_HEAD(flow_list); 1518 1519 esw = priv->mdev->priv.eswitch; 1520 mutex_lock(&esw->offloads.encap_tbl_lock); 1521 list_for_each_entry_safe(efi, tmp, &e->flows, list) { 1522 flow = container_of(efi, struct mlx5e_tc_flow, 1523 encaps[efi->index]); 1524 if (IS_ERR(mlx5e_flow_get(flow))) 1525 continue; 1526 list_add(&flow->tmp_list, &flow_list); 1527 1528 if (mlx5e_is_offloaded_flow(flow)) { 1529 counter = mlx5e_tc_get_counter(flow); 1530 lastuse = mlx5_fc_query_lastuse(counter); 1531 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { 1532 neigh_used = true; 1533 break; 1534 } 1535 } 1536 } 1537 mutex_unlock(&esw->offloads.encap_tbl_lock); 1538 1539 mlx5e_put_encap_flow_list(priv, &flow_list); 1540 if (neigh_used) { 1541 /* release current encap before breaking the loop */ 1542 mlx5e_encap_put(priv, e); 1543 break; 1544 } 1545 } 1546 1547 trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used); 1548 1549 if (neigh_used) { 1550 nhe->reported_lastuse = jiffies; 1551 1552 /* find the relevant neigh according to the cached device and 1553 * dst ip pair 1554 */ 1555 n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev); 1556 if (!n) 1557 return; 1558 1559 neigh_event_send(n, NULL); 1560 neigh_release(n); 1561 } 1562 } 1563 1564 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) 1565 { 1566 WARN_ON(!list_empty(&e->flows)); 1567 1568 if (e->compl_result > 0) { 1569 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); 1570 1571 if (e->flags & MLX5_ENCAP_ENTRY_VALID) 1572 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); 1573 } 1574 1575 kfree(e->tun_info); 1576 kfree(e->encap_header); 1577 kfree_rcu(e, rcu); 1578 } 1579 1580 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) 1581 { 1582 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1583 1584 if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock)) 1585 return; 1586 hash_del_rcu(&e->encap_hlist); 1587 mutex_unlock(&esw->offloads.encap_tbl_lock); 1588 1589 mlx5e_encap_dealloc(priv, e); 1590 } 1591 1592 static void mlx5e_detach_encap(struct mlx5e_priv *priv, 1593 struct mlx5e_tc_flow *flow, int out_index) 1594 { 1595 struct mlx5e_encap_entry *e = flow->encaps[out_index].e; 1596 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 1597 1598 /* flow wasn't fully initialized */ 1599 if (!e) 1600 return; 1601 1602 mutex_lock(&esw->offloads.encap_tbl_lock); 1603 list_del(&flow->encaps[out_index].list); 1604 flow->encaps[out_index].e = NULL; 1605 if (!refcount_dec_and_test(&e->refcnt)) { 1606 mutex_unlock(&esw->offloads.encap_tbl_lock); 1607 return; 1608 } 1609 hash_del_rcu(&e->encap_hlist); 1610 mutex_unlock(&esw->offloads.encap_tbl_lock); 1611 1612 mlx5e_encap_dealloc(priv, e); 1613 } 1614 1615 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) 1616 { 1617 struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; 1618 1619 if (!flow_flag_test(flow, ESWITCH) || 1620 !flow_flag_test(flow, DUP)) 1621 return; 1622 1623 mutex_lock(&esw->offloads.peer_mutex); 1624 list_del(&flow->peer); 1625 mutex_unlock(&esw->offloads.peer_mutex); 1626 1627 flow_flag_clear(flow, DUP); 1628 1629 if (refcount_dec_and_test(&flow->peer_flow->refcnt)) { 1630 mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow); 1631 kfree(flow->peer_flow); 1632 } 1633 1634 flow->peer_flow = NULL; 1635 } 1636 1637 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) 1638 { 1639 struct mlx5_core_dev *dev = flow->priv->mdev; 1640 struct mlx5_devcom *devcom = dev->priv.devcom; 1641 struct mlx5_eswitch *peer_esw; 1642 1643 peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 1644 if (!peer_esw) 1645 return; 1646 1647 __mlx5e_tc_del_fdb_peer_flow(flow); 1648 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 1649 } 1650 1651 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, 1652 struct mlx5e_tc_flow *flow) 1653 { 1654 if (mlx5e_is_eswitch_flow(flow)) { 1655 mlx5e_tc_del_fdb_peer_flow(flow); 1656 mlx5e_tc_del_fdb_flow(priv, flow); 1657 } else { 1658 mlx5e_tc_del_nic_flow(priv, flow); 1659 } 1660 } 1661 1662 1663 static int parse_tunnel_attr(struct mlx5e_priv *priv, 1664 struct mlx5_flow_spec *spec, 1665 struct flow_cls_offload *f, 1666 struct net_device *filter_dev, u8 *match_level) 1667 { 1668 struct netlink_ext_ack *extack = f->common.extack; 1669 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 1670 outer_headers); 1671 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, 1672 outer_headers); 1673 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 1674 int err; 1675 1676 err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, 1677 headers_c, headers_v, match_level); 1678 if (err) { 1679 NL_SET_ERR_MSG_MOD(extack, 1680 "failed to parse tunnel attributes"); 1681 return err; 1682 } 1683 1684 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { 1685 struct flow_match_control match; 1686 u16 addr_type; 1687 1688 flow_rule_match_enc_control(rule, &match); 1689 addr_type = match.key->addr_type; 1690 1691 /* For tunnel addr_type used same key id`s as for non-tunnel */ 1692 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 1693 struct flow_match_ipv4_addrs match; 1694 1695 flow_rule_match_enc_ipv4_addrs(rule, &match); 1696 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 1697 src_ipv4_src_ipv6.ipv4_layout.ipv4, 1698 ntohl(match.mask->src)); 1699 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 1700 src_ipv4_src_ipv6.ipv4_layout.ipv4, 1701 ntohl(match.key->src)); 1702 1703 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 1704 dst_ipv4_dst_ipv6.ipv4_layout.ipv4, 1705 ntohl(match.mask->dst)); 1706 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 1707 dst_ipv4_dst_ipv6.ipv4_layout.ipv4, 1708 ntohl(match.key->dst)); 1709 1710 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, 1711 ethertype); 1712 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, 1713 ETH_P_IP); 1714 } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 1715 struct flow_match_ipv6_addrs match; 1716 1717 flow_rule_match_enc_ipv6_addrs(rule, &match); 1718 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 1719 src_ipv4_src_ipv6.ipv6_layout.ipv6), 1720 &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, 1721 ipv6)); 1722 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 1723 src_ipv4_src_ipv6.ipv6_layout.ipv6), 1724 &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, 1725 ipv6)); 1726 1727 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 1728 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 1729 &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, 1730 ipv6)); 1731 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 1732 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 1733 &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, 1734 ipv6)); 1735 1736 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, 1737 ethertype); 1738 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, 1739 ETH_P_IPV6); 1740 } 1741 } 1742 1743 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { 1744 struct flow_match_ip match; 1745 1746 flow_rule_match_enc_ip(rule, &match); 1747 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, 1748 match.mask->tos & 0x3); 1749 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, 1750 match.key->tos & 0x3); 1751 1752 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, 1753 match.mask->tos >> 2); 1754 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, 1755 match.key->tos >> 2); 1756 1757 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, 1758 match.mask->ttl); 1759 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, 1760 match.key->ttl); 1761 1762 if (match.mask->ttl && 1763 !MLX5_CAP_ESW_FLOWTABLE_FDB 1764 (priv->mdev, 1765 ft_field_support.outer_ipv4_ttl)) { 1766 NL_SET_ERR_MSG_MOD(extack, 1767 "Matching on TTL is not supported"); 1768 return -EOPNOTSUPP; 1769 } 1770 1771 } 1772 1773 /* Enforce DMAC when offloading incoming tunneled flows. 1774 * Flow counters require a match on the DMAC. 1775 */ 1776 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16); 1777 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0); 1778 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 1779 dmac_47_16), priv->netdev->dev_addr); 1780 1781 /* let software handle IP fragments */ 1782 MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); 1783 MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0); 1784 1785 return 0; 1786 } 1787 1788 static void *get_match_headers_criteria(u32 flags, 1789 struct mlx5_flow_spec *spec) 1790 { 1791 return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? 1792 MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 1793 inner_headers) : 1794 MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 1795 outer_headers); 1796 } 1797 1798 static void *get_match_headers_value(u32 flags, 1799 struct mlx5_flow_spec *spec) 1800 { 1801 return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? 1802 MLX5_ADDR_OF(fte_match_param, spec->match_value, 1803 inner_headers) : 1804 MLX5_ADDR_OF(fte_match_param, spec->match_value, 1805 outer_headers); 1806 } 1807 1808 static int __parse_cls_flower(struct mlx5e_priv *priv, 1809 struct mlx5_flow_spec *spec, 1810 struct flow_cls_offload *f, 1811 struct net_device *filter_dev, 1812 u8 *inner_match_level, u8 *outer_match_level) 1813 { 1814 struct netlink_ext_ack *extack = f->common.extack; 1815 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 1816 outer_headers); 1817 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, 1818 outer_headers); 1819 void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 1820 misc_parameters); 1821 void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, 1822 misc_parameters); 1823 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 1824 struct flow_dissector *dissector = rule->match.dissector; 1825 u16 addr_type = 0; 1826 u8 ip_proto = 0; 1827 u8 *match_level; 1828 1829 match_level = outer_match_level; 1830 1831 if (dissector->used_keys & 1832 ~(BIT(FLOW_DISSECTOR_KEY_META) | 1833 BIT(FLOW_DISSECTOR_KEY_CONTROL) | 1834 BIT(FLOW_DISSECTOR_KEY_BASIC) | 1835 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | 1836 BIT(FLOW_DISSECTOR_KEY_VLAN) | 1837 BIT(FLOW_DISSECTOR_KEY_CVLAN) | 1838 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | 1839 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | 1840 BIT(FLOW_DISSECTOR_KEY_PORTS) | 1841 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | 1842 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | 1843 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | 1844 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | 1845 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | 1846 BIT(FLOW_DISSECTOR_KEY_TCP) | 1847 BIT(FLOW_DISSECTOR_KEY_IP) | 1848 BIT(FLOW_DISSECTOR_KEY_ENC_IP) | 1849 BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) { 1850 NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); 1851 netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n", 1852 dissector->used_keys); 1853 return -EOPNOTSUPP; 1854 } 1855 1856 if (mlx5e_get_tc_tun(filter_dev)) { 1857 if (parse_tunnel_attr(priv, spec, f, filter_dev, 1858 outer_match_level)) 1859 return -EOPNOTSUPP; 1860 1861 /* At this point, header pointers should point to the inner 1862 * headers, outer header were already set by parse_tunnel_attr 1863 */ 1864 match_level = inner_match_level; 1865 headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP, 1866 spec); 1867 headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP, 1868 spec); 1869 } 1870 1871 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { 1872 struct flow_match_basic match; 1873 1874 flow_rule_match_basic(rule, &match); 1875 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, 1876 ntohs(match.mask->n_proto)); 1877 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, 1878 ntohs(match.key->n_proto)); 1879 1880 if (match.mask->n_proto) 1881 *match_level = MLX5_MATCH_L2; 1882 } 1883 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) || 1884 is_vlan_dev(filter_dev)) { 1885 struct flow_dissector_key_vlan filter_dev_mask; 1886 struct flow_dissector_key_vlan filter_dev_key; 1887 struct flow_match_vlan match; 1888 1889 if (is_vlan_dev(filter_dev)) { 1890 match.key = &filter_dev_key; 1891 match.key->vlan_id = vlan_dev_vlan_id(filter_dev); 1892 match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev); 1893 match.key->vlan_priority = 0; 1894 match.mask = &filter_dev_mask; 1895 memset(match.mask, 0xff, sizeof(*match.mask)); 1896 match.mask->vlan_priority = 0; 1897 } else { 1898 flow_rule_match_vlan(rule, &match); 1899 } 1900 if (match.mask->vlan_id || 1901 match.mask->vlan_priority || 1902 match.mask->vlan_tpid) { 1903 if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { 1904 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 1905 svlan_tag, 1); 1906 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 1907 svlan_tag, 1); 1908 } else { 1909 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 1910 cvlan_tag, 1); 1911 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 1912 cvlan_tag, 1); 1913 } 1914 1915 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, 1916 match.mask->vlan_id); 1917 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, 1918 match.key->vlan_id); 1919 1920 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, 1921 match.mask->vlan_priority); 1922 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, 1923 match.key->vlan_priority); 1924 1925 *match_level = MLX5_MATCH_L2; 1926 } 1927 } else if (*match_level != MLX5_MATCH_NONE) { 1928 /* cvlan_tag enabled in match criteria and 1929 * disabled in match value means both S & C tags 1930 * don't exist (untagged of both) 1931 */ 1932 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); 1933 *match_level = MLX5_MATCH_L2; 1934 } 1935 1936 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { 1937 struct flow_match_vlan match; 1938 1939 flow_rule_match_cvlan(rule, &match); 1940 if (match.mask->vlan_id || 1941 match.mask->vlan_priority || 1942 match.mask->vlan_tpid) { 1943 if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { 1944 MLX5_SET(fte_match_set_misc, misc_c, 1945 outer_second_svlan_tag, 1); 1946 MLX5_SET(fte_match_set_misc, misc_v, 1947 outer_second_svlan_tag, 1); 1948 } else { 1949 MLX5_SET(fte_match_set_misc, misc_c, 1950 outer_second_cvlan_tag, 1); 1951 MLX5_SET(fte_match_set_misc, misc_v, 1952 outer_second_cvlan_tag, 1); 1953 } 1954 1955 MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid, 1956 match.mask->vlan_id); 1957 MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid, 1958 match.key->vlan_id); 1959 MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio, 1960 match.mask->vlan_priority); 1961 MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio, 1962 match.key->vlan_priority); 1963 1964 *match_level = MLX5_MATCH_L2; 1965 } 1966 } 1967 1968 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { 1969 struct flow_match_eth_addrs match; 1970 1971 flow_rule_match_eth_addrs(rule, &match); 1972 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 1973 dmac_47_16), 1974 match.mask->dst); 1975 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 1976 dmac_47_16), 1977 match.key->dst); 1978 1979 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 1980 smac_47_16), 1981 match.mask->src); 1982 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 1983 smac_47_16), 1984 match.key->src); 1985 1986 if (!is_zero_ether_addr(match.mask->src) || 1987 !is_zero_ether_addr(match.mask->dst)) 1988 *match_level = MLX5_MATCH_L2; 1989 } 1990 1991 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { 1992 struct flow_match_control match; 1993 1994 flow_rule_match_control(rule, &match); 1995 addr_type = match.key->addr_type; 1996 1997 /* the HW doesn't support frag first/later */ 1998 if (match.mask->flags & FLOW_DIS_FIRST_FRAG) 1999 return -EOPNOTSUPP; 2000 2001 if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) { 2002 MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); 2003 MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 2004 match.key->flags & FLOW_DIS_IS_FRAGMENT); 2005 2006 /* the HW doesn't need L3 inline to match on frag=no */ 2007 if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT)) 2008 *match_level = MLX5_MATCH_L2; 2009 /* *** L2 attributes parsing up to here *** */ 2010 else 2011 *match_level = MLX5_MATCH_L3; 2012 } 2013 } 2014 2015 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { 2016 struct flow_match_basic match; 2017 2018 flow_rule_match_basic(rule, &match); 2019 ip_proto = match.key->ip_proto; 2020 2021 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 2022 match.mask->ip_proto); 2023 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, 2024 match.key->ip_proto); 2025 2026 if (match.mask->ip_proto) 2027 *match_level = MLX5_MATCH_L3; 2028 } 2029 2030 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 2031 struct flow_match_ipv4_addrs match; 2032 2033 flow_rule_match_ipv4_addrs(rule, &match); 2034 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2035 src_ipv4_src_ipv6.ipv4_layout.ipv4), 2036 &match.mask->src, sizeof(match.mask->src)); 2037 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2038 src_ipv4_src_ipv6.ipv4_layout.ipv4), 2039 &match.key->src, sizeof(match.key->src)); 2040 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2041 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 2042 &match.mask->dst, sizeof(match.mask->dst)); 2043 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2044 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 2045 &match.key->dst, sizeof(match.key->dst)); 2046 2047 if (match.mask->src || match.mask->dst) 2048 *match_level = MLX5_MATCH_L3; 2049 } 2050 2051 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 2052 struct flow_match_ipv6_addrs match; 2053 2054 flow_rule_match_ipv6_addrs(rule, &match); 2055 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2056 src_ipv4_src_ipv6.ipv6_layout.ipv6), 2057 &match.mask->src, sizeof(match.mask->src)); 2058 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2059 src_ipv4_src_ipv6.ipv6_layout.ipv6), 2060 &match.key->src, sizeof(match.key->src)); 2061 2062 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 2063 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 2064 &match.mask->dst, sizeof(match.mask->dst)); 2065 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 2066 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 2067 &match.key->dst, sizeof(match.key->dst)); 2068 2069 if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY || 2070 ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY) 2071 *match_level = MLX5_MATCH_L3; 2072 } 2073 2074 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { 2075 struct flow_match_ip match; 2076 2077 flow_rule_match_ip(rule, &match); 2078 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, 2079 match.mask->tos & 0x3); 2080 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, 2081 match.key->tos & 0x3); 2082 2083 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, 2084 match.mask->tos >> 2); 2085 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, 2086 match.key->tos >> 2); 2087 2088 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, 2089 match.mask->ttl); 2090 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, 2091 match.key->ttl); 2092 2093 if (match.mask->ttl && 2094 !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, 2095 ft_field_support.outer_ipv4_ttl)) { 2096 NL_SET_ERR_MSG_MOD(extack, 2097 "Matching on TTL is not supported"); 2098 return -EOPNOTSUPP; 2099 } 2100 2101 if (match.mask->tos || match.mask->ttl) 2102 *match_level = MLX5_MATCH_L3; 2103 } 2104 2105 /* *** L3 attributes parsing up to here *** */ 2106 2107 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { 2108 struct flow_match_ports match; 2109 2110 flow_rule_match_ports(rule, &match); 2111 switch (ip_proto) { 2112 case IPPROTO_TCP: 2113 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2114 tcp_sport, ntohs(match.mask->src)); 2115 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2116 tcp_sport, ntohs(match.key->src)); 2117 2118 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2119 tcp_dport, ntohs(match.mask->dst)); 2120 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2121 tcp_dport, ntohs(match.key->dst)); 2122 break; 2123 2124 case IPPROTO_UDP: 2125 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2126 udp_sport, ntohs(match.mask->src)); 2127 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2128 udp_sport, ntohs(match.key->src)); 2129 2130 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 2131 udp_dport, ntohs(match.mask->dst)); 2132 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 2133 udp_dport, ntohs(match.key->dst)); 2134 break; 2135 default: 2136 NL_SET_ERR_MSG_MOD(extack, 2137 "Only UDP and TCP transports are supported for L4 matching"); 2138 netdev_err(priv->netdev, 2139 "Only UDP and TCP transport are supported\n"); 2140 return -EINVAL; 2141 } 2142 2143 if (match.mask->src || match.mask->dst) 2144 *match_level = MLX5_MATCH_L4; 2145 } 2146 2147 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { 2148 struct flow_match_tcp match; 2149 2150 flow_rule_match_tcp(rule, &match); 2151 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, 2152 ntohs(match.mask->flags)); 2153 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, 2154 ntohs(match.key->flags)); 2155 2156 if (match.mask->flags) 2157 *match_level = MLX5_MATCH_L4; 2158 } 2159 2160 return 0; 2161 } 2162 2163 static int parse_cls_flower(struct mlx5e_priv *priv, 2164 struct mlx5e_tc_flow *flow, 2165 struct mlx5_flow_spec *spec, 2166 struct flow_cls_offload *f, 2167 struct net_device *filter_dev) 2168 { 2169 u8 inner_match_level, outer_match_level, non_tunnel_match_level; 2170 struct netlink_ext_ack *extack = f->common.extack; 2171 struct mlx5_core_dev *dev = priv->mdev; 2172 struct mlx5_eswitch *esw = dev->priv.eswitch; 2173 struct mlx5e_rep_priv *rpriv = priv->ppriv; 2174 struct mlx5_eswitch_rep *rep; 2175 bool is_eswitch_flow; 2176 int err; 2177 2178 inner_match_level = MLX5_MATCH_NONE; 2179 outer_match_level = MLX5_MATCH_NONE; 2180 2181 err = __parse_cls_flower(priv, spec, f, filter_dev, &inner_match_level, 2182 &outer_match_level); 2183 non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ? 2184 outer_match_level : inner_match_level; 2185 2186 is_eswitch_flow = mlx5e_is_eswitch_flow(flow); 2187 if (!err && is_eswitch_flow) { 2188 rep = rpriv->rep; 2189 if (rep->vport != MLX5_VPORT_UPLINK && 2190 (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && 2191 esw->offloads.inline_mode < non_tunnel_match_level)) { 2192 NL_SET_ERR_MSG_MOD(extack, 2193 "Flow is not offloaded due to min inline setting"); 2194 netdev_warn(priv->netdev, 2195 "Flow is not offloaded due to min inline setting, required %d actual %d\n", 2196 non_tunnel_match_level, esw->offloads.inline_mode); 2197 return -EOPNOTSUPP; 2198 } 2199 } 2200 2201 if (is_eswitch_flow) { 2202 flow->esw_attr->inner_match_level = inner_match_level; 2203 flow->esw_attr->outer_match_level = outer_match_level; 2204 } else { 2205 flow->nic_attr->match_level = non_tunnel_match_level; 2206 } 2207 2208 return err; 2209 } 2210 2211 struct pedit_headers { 2212 struct ethhdr eth; 2213 struct vlan_hdr vlan; 2214 struct iphdr ip4; 2215 struct ipv6hdr ip6; 2216 struct tcphdr tcp; 2217 struct udphdr udp; 2218 }; 2219 2220 struct pedit_headers_action { 2221 struct pedit_headers vals; 2222 struct pedit_headers masks; 2223 u32 pedits; 2224 }; 2225 2226 static int pedit_header_offsets[] = { 2227 [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth), 2228 [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4), 2229 [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6), 2230 [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp), 2231 [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp), 2232 }; 2233 2234 #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype]) 2235 2236 static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset, 2237 struct pedit_headers_action *hdrs) 2238 { 2239 u32 *curr_pmask, *curr_pval; 2240 2241 curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset); 2242 curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset); 2243 2244 if (*curr_pmask & mask) /* disallow acting twice on the same location */ 2245 goto out_err; 2246 2247 *curr_pmask |= mask; 2248 *curr_pval |= (val & mask); 2249 2250 return 0; 2251 2252 out_err: 2253 return -EOPNOTSUPP; 2254 } 2255 2256 struct mlx5_fields { 2257 u8 field; 2258 u8 field_bsize; 2259 u32 field_mask; 2260 u32 offset; 2261 u32 match_offset; 2262 }; 2263 2264 #define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \ 2265 {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \ 2266 offsetof(struct pedit_headers, field) + (off), \ 2267 MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)} 2268 2269 /* masked values are the same and there are no rewrites that do not have a 2270 * match. 2271 */ 2272 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \ 2273 type matchmaskx = *(type *)(matchmaskp); \ 2274 type matchvalx = *(type *)(matchvalp); \ 2275 type maskx = *(type *)(maskp); \ 2276 type valx = *(type *)(valp); \ 2277 \ 2278 (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \ 2279 matchmaskx)); \ 2280 }) 2281 2282 static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp, 2283 void *matchmaskp, u8 bsize) 2284 { 2285 bool same = false; 2286 2287 switch (bsize) { 2288 case 8: 2289 same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp); 2290 break; 2291 case 16: 2292 same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp); 2293 break; 2294 case 32: 2295 same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp); 2296 break; 2297 } 2298 2299 return same; 2300 } 2301 2302 static struct mlx5_fields fields[] = { 2303 OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16), 2304 OFFLOAD(DMAC_15_0, 16, U16_MAX, eth.h_dest[4], 0, dmac_15_0), 2305 OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16), 2306 OFFLOAD(SMAC_15_0, 16, U16_MAX, eth.h_source[4], 0, smac_15_0), 2307 OFFLOAD(ETHERTYPE, 16, U16_MAX, eth.h_proto, 0, ethertype), 2308 OFFLOAD(FIRST_VID, 16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid), 2309 2310 OFFLOAD(IP_DSCP, 8, 0xfc, ip4.tos, 0, ip_dscp), 2311 OFFLOAD(IP_TTL, 8, U8_MAX, ip4.ttl, 0, ttl_hoplimit), 2312 OFFLOAD(SIPV4, 32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4), 2313 OFFLOAD(DIPV4, 32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 2314 2315 OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0, 2316 src_ipv4_src_ipv6.ipv6_layout.ipv6[0]), 2317 OFFLOAD(SIPV6_95_64, 32, U32_MAX, ip6.saddr.s6_addr32[1], 0, 2318 src_ipv4_src_ipv6.ipv6_layout.ipv6[4]), 2319 OFFLOAD(SIPV6_63_32, 32, U32_MAX, ip6.saddr.s6_addr32[2], 0, 2320 src_ipv4_src_ipv6.ipv6_layout.ipv6[8]), 2321 OFFLOAD(SIPV6_31_0, 32, U32_MAX, ip6.saddr.s6_addr32[3], 0, 2322 src_ipv4_src_ipv6.ipv6_layout.ipv6[12]), 2323 OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0, 2324 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]), 2325 OFFLOAD(DIPV6_95_64, 32, U32_MAX, ip6.daddr.s6_addr32[1], 0, 2326 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]), 2327 OFFLOAD(DIPV6_63_32, 32, U32_MAX, ip6.daddr.s6_addr32[2], 0, 2328 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]), 2329 OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0, 2330 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]), 2331 OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit), 2332 2333 OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport), 2334 OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport), 2335 /* in linux iphdr tcp_flags is 8 bits long */ 2336 OFFLOAD(TCP_FLAGS, 8, U8_MAX, tcp.ack_seq, 5, tcp_flags), 2337 2338 OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport), 2339 OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport), 2340 }; 2341 2342 /* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at 2343 * max from the SW pedit action. On success, attr->num_mod_hdr_actions 2344 * says how many HW actions were actually parsed. 2345 */ 2346 static int offload_pedit_fields(struct pedit_headers_action *hdrs, 2347 struct mlx5e_tc_flow_parse_attr *parse_attr, 2348 u32 *action_flags, 2349 struct netlink_ext_ack *extack) 2350 { 2351 struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; 2352 int i, action_size, nactions, max_actions, first, last, next_z; 2353 void *headers_c, *headers_v, *action, *vals_p; 2354 u32 *s_masks_p, *a_masks_p, s_mask, a_mask; 2355 struct mlx5_fields *f; 2356 unsigned long mask; 2357 __be32 mask_be32; 2358 __be16 mask_be16; 2359 u8 cmd; 2360 2361 headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec); 2362 headers_v = get_match_headers_value(*action_flags, &parse_attr->spec); 2363 2364 set_masks = &hdrs[0].masks; 2365 add_masks = &hdrs[1].masks; 2366 set_vals = &hdrs[0].vals; 2367 add_vals = &hdrs[1].vals; 2368 2369 action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); 2370 action = parse_attr->mod_hdr_actions + 2371 parse_attr->num_mod_hdr_actions * action_size; 2372 2373 max_actions = parse_attr->max_mod_hdr_actions; 2374 nactions = parse_attr->num_mod_hdr_actions; 2375 2376 for (i = 0; i < ARRAY_SIZE(fields); i++) { 2377 bool skip; 2378 2379 f = &fields[i]; 2380 /* avoid seeing bits set from previous iterations */ 2381 s_mask = 0; 2382 a_mask = 0; 2383 2384 s_masks_p = (void *)set_masks + f->offset; 2385 a_masks_p = (void *)add_masks + f->offset; 2386 2387 s_mask = *s_masks_p & f->field_mask; 2388 a_mask = *a_masks_p & f->field_mask; 2389 2390 if (!s_mask && !a_mask) /* nothing to offload here */ 2391 continue; 2392 2393 if (s_mask && a_mask) { 2394 NL_SET_ERR_MSG_MOD(extack, 2395 "can't set and add to the same HW field"); 2396 printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field); 2397 return -EOPNOTSUPP; 2398 } 2399 2400 if (nactions == max_actions) { 2401 NL_SET_ERR_MSG_MOD(extack, 2402 "too many pedit actions, can't offload"); 2403 printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions); 2404 return -EOPNOTSUPP; 2405 } 2406 2407 skip = false; 2408 if (s_mask) { 2409 void *match_mask = headers_c + f->match_offset; 2410 void *match_val = headers_v + f->match_offset; 2411 2412 cmd = MLX5_ACTION_TYPE_SET; 2413 mask = s_mask; 2414 vals_p = (void *)set_vals + f->offset; 2415 /* don't rewrite if we have a match on the same value */ 2416 if (cmp_val_mask(vals_p, s_masks_p, match_val, 2417 match_mask, f->field_bsize)) 2418 skip = true; 2419 /* clear to denote we consumed this field */ 2420 *s_masks_p &= ~f->field_mask; 2421 } else { 2422 cmd = MLX5_ACTION_TYPE_ADD; 2423 mask = a_mask; 2424 vals_p = (void *)add_vals + f->offset; 2425 /* add 0 is no change */ 2426 if ((*(u32 *)vals_p & f->field_mask) == 0) 2427 skip = true; 2428 /* clear to denote we consumed this field */ 2429 *a_masks_p &= ~f->field_mask; 2430 } 2431 if (skip) 2432 continue; 2433 2434 if (f->field_bsize == 32) { 2435 mask_be32 = *(__be32 *)&mask; 2436 mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32)); 2437 } else if (f->field_bsize == 16) { 2438 mask_be16 = *(__be16 *)&mask; 2439 mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16)); 2440 } 2441 2442 first = find_first_bit(&mask, f->field_bsize); 2443 next_z = find_next_zero_bit(&mask, f->field_bsize, first); 2444 last = find_last_bit(&mask, f->field_bsize); 2445 if (first < next_z && next_z < last) { 2446 NL_SET_ERR_MSG_MOD(extack, 2447 "rewrite of few sub-fields isn't supported"); 2448 printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n", 2449 mask); 2450 return -EOPNOTSUPP; 2451 } 2452 2453 MLX5_SET(set_action_in, action, action_type, cmd); 2454 MLX5_SET(set_action_in, action, field, f->field); 2455 2456 if (cmd == MLX5_ACTION_TYPE_SET) { 2457 int start; 2458 2459 /* if field is bit sized it can start not from first bit */ 2460 start = find_first_bit((unsigned long *)&f->field_mask, 2461 f->field_bsize); 2462 2463 MLX5_SET(set_action_in, action, offset, first - start); 2464 /* length is num of bits to be written, zero means length of 32 */ 2465 MLX5_SET(set_action_in, action, length, (last - first + 1)); 2466 } 2467 2468 if (f->field_bsize == 32) 2469 MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first); 2470 else if (f->field_bsize == 16) 2471 MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first); 2472 else if (f->field_bsize == 8) 2473 MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first); 2474 2475 action += action_size; 2476 nactions++; 2477 } 2478 2479 parse_attr->num_mod_hdr_actions = nactions; 2480 return 0; 2481 } 2482 2483 static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev, 2484 int namespace) 2485 { 2486 if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */ 2487 return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions); 2488 else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */ 2489 return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions); 2490 } 2491 2492 static int alloc_mod_hdr_actions(struct mlx5e_priv *priv, 2493 struct pedit_headers_action *hdrs, 2494 int namespace, 2495 struct mlx5e_tc_flow_parse_attr *parse_attr) 2496 { 2497 int nkeys, action_size, max_actions; 2498 2499 nkeys = hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits + 2500 hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits; 2501 action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); 2502 2503 max_actions = mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace); 2504 /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */ 2505 max_actions = min(max_actions, nkeys * 16); 2506 2507 parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL); 2508 if (!parse_attr->mod_hdr_actions) 2509 return -ENOMEM; 2510 2511 parse_attr->max_mod_hdr_actions = max_actions; 2512 return 0; 2513 } 2514 2515 static const struct pedit_headers zero_masks = {}; 2516 2517 static int parse_tc_pedit_action(struct mlx5e_priv *priv, 2518 const struct flow_action_entry *act, int namespace, 2519 struct mlx5e_tc_flow_parse_attr *parse_attr, 2520 struct pedit_headers_action *hdrs, 2521 struct netlink_ext_ack *extack) 2522 { 2523 u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1; 2524 int err = -EOPNOTSUPP; 2525 u32 mask, val, offset; 2526 u8 htype; 2527 2528 htype = act->mangle.htype; 2529 err = -EOPNOTSUPP; /* can't be all optimistic */ 2530 2531 if (htype == FLOW_ACT_MANGLE_UNSPEC) { 2532 NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded"); 2533 goto out_err; 2534 } 2535 2536 if (!mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace)) { 2537 NL_SET_ERR_MSG_MOD(extack, 2538 "The pedit offload action is not supported"); 2539 goto out_err; 2540 } 2541 2542 mask = act->mangle.mask; 2543 val = act->mangle.val; 2544 offset = act->mangle.offset; 2545 2546 err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]); 2547 if (err) 2548 goto out_err; 2549 2550 hdrs[cmd].pedits++; 2551 2552 return 0; 2553 out_err: 2554 return err; 2555 } 2556 2557 static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, 2558 struct mlx5e_tc_flow_parse_attr *parse_attr, 2559 struct pedit_headers_action *hdrs, 2560 u32 *action_flags, 2561 struct netlink_ext_ack *extack) 2562 { 2563 struct pedit_headers *cmd_masks; 2564 int err; 2565 u8 cmd; 2566 2567 if (!parse_attr->mod_hdr_actions) { 2568 err = alloc_mod_hdr_actions(priv, hdrs, namespace, parse_attr); 2569 if (err) 2570 goto out_err; 2571 } 2572 2573 err = offload_pedit_fields(hdrs, parse_attr, action_flags, extack); 2574 if (err < 0) 2575 goto out_dealloc_parsed_actions; 2576 2577 for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) { 2578 cmd_masks = &hdrs[cmd].masks; 2579 if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) { 2580 NL_SET_ERR_MSG_MOD(extack, 2581 "attempt to offload an unsupported field"); 2582 netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd); 2583 print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS, 2584 16, 1, cmd_masks, sizeof(zero_masks), true); 2585 err = -EOPNOTSUPP; 2586 goto out_dealloc_parsed_actions; 2587 } 2588 } 2589 2590 return 0; 2591 2592 out_dealloc_parsed_actions: 2593 kfree(parse_attr->mod_hdr_actions); 2594 out_err: 2595 return err; 2596 } 2597 2598 static bool csum_offload_supported(struct mlx5e_priv *priv, 2599 u32 action, 2600 u32 update_flags, 2601 struct netlink_ext_ack *extack) 2602 { 2603 u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP | 2604 TCA_CSUM_UPDATE_FLAG_UDP; 2605 2606 /* The HW recalcs checksums only if re-writing headers */ 2607 if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) { 2608 NL_SET_ERR_MSG_MOD(extack, 2609 "TC csum action is only offloaded with pedit"); 2610 netdev_warn(priv->netdev, 2611 "TC csum action is only offloaded with pedit\n"); 2612 return false; 2613 } 2614 2615 if (update_flags & ~prot_flags) { 2616 NL_SET_ERR_MSG_MOD(extack, 2617 "can't offload TC csum action for some header/s"); 2618 netdev_warn(priv->netdev, 2619 "can't offload TC csum action for some header/s - flags %#x\n", 2620 update_flags); 2621 return false; 2622 } 2623 2624 return true; 2625 } 2626 2627 struct ip_ttl_word { 2628 __u8 ttl; 2629 __u8 protocol; 2630 __sum16 check; 2631 }; 2632 2633 struct ipv6_hoplimit_word { 2634 __be16 payload_len; 2635 __u8 nexthdr; 2636 __u8 hop_limit; 2637 }; 2638 2639 static bool is_action_keys_supported(const struct flow_action_entry *act) 2640 { 2641 u32 mask, offset; 2642 u8 htype; 2643 2644 htype = act->mangle.htype; 2645 offset = act->mangle.offset; 2646 mask = ~act->mangle.mask; 2647 /* For IPv4 & IPv6 header check 4 byte word, 2648 * to determine that modified fields 2649 * are NOT ttl & hop_limit only. 2650 */ 2651 if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) { 2652 struct ip_ttl_word *ttl_word = 2653 (struct ip_ttl_word *)&mask; 2654 2655 if (offset != offsetof(struct iphdr, ttl) || 2656 ttl_word->protocol || 2657 ttl_word->check) { 2658 return true; 2659 } 2660 } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) { 2661 struct ipv6_hoplimit_word *hoplimit_word = 2662 (struct ipv6_hoplimit_word *)&mask; 2663 2664 if (offset != offsetof(struct ipv6hdr, payload_len) || 2665 hoplimit_word->payload_len || 2666 hoplimit_word->nexthdr) { 2667 return true; 2668 } 2669 } 2670 return false; 2671 } 2672 2673 static bool modify_header_match_supported(struct mlx5_flow_spec *spec, 2674 struct flow_action *flow_action, 2675 u32 actions, 2676 struct netlink_ext_ack *extack) 2677 { 2678 const struct flow_action_entry *act; 2679 bool modify_ip_header; 2680 void *headers_v; 2681 u16 ethertype; 2682 u8 ip_proto; 2683 int i; 2684 2685 headers_v = get_match_headers_value(actions, spec); 2686 ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); 2687 2688 /* for non-IP we only re-write MACs, so we're okay */ 2689 if (ethertype != ETH_P_IP && ethertype != ETH_P_IPV6) 2690 goto out_ok; 2691 2692 modify_ip_header = false; 2693 flow_action_for_each(i, act, flow_action) { 2694 if (act->id != FLOW_ACTION_MANGLE && 2695 act->id != FLOW_ACTION_ADD) 2696 continue; 2697 2698 if (is_action_keys_supported(act)) { 2699 modify_ip_header = true; 2700 break; 2701 } 2702 } 2703 2704 ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); 2705 if (modify_ip_header && ip_proto != IPPROTO_TCP && 2706 ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) { 2707 NL_SET_ERR_MSG_MOD(extack, 2708 "can't offload re-write of non TCP/UDP"); 2709 pr_info("can't offload re-write of ip proto %d\n", ip_proto); 2710 return false; 2711 } 2712 2713 out_ok: 2714 return true; 2715 } 2716 2717 static bool actions_match_supported(struct mlx5e_priv *priv, 2718 struct flow_action *flow_action, 2719 struct mlx5e_tc_flow_parse_attr *parse_attr, 2720 struct mlx5e_tc_flow *flow, 2721 struct netlink_ext_ack *extack) 2722 { 2723 u32 actions; 2724 2725 if (mlx5e_is_eswitch_flow(flow)) 2726 actions = flow->esw_attr->action; 2727 else 2728 actions = flow->nic_attr->action; 2729 2730 if (flow_flag_test(flow, EGRESS) && 2731 !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) || 2732 (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || 2733 (actions & MLX5_FLOW_CONTEXT_ACTION_DROP))) 2734 return false; 2735 2736 if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) 2737 return modify_header_match_supported(&parse_attr->spec, 2738 flow_action, actions, 2739 extack); 2740 2741 return true; 2742 } 2743 2744 static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) 2745 { 2746 struct mlx5_core_dev *fmdev, *pmdev; 2747 u64 fsystem_guid, psystem_guid; 2748 2749 fmdev = priv->mdev; 2750 pmdev = peer_priv->mdev; 2751 2752 fsystem_guid = mlx5_query_nic_system_image_guid(fmdev); 2753 psystem_guid = mlx5_query_nic_system_image_guid(pmdev); 2754 2755 return (fsystem_guid == psystem_guid); 2756 } 2757 2758 static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace, 2759 const struct flow_action_entry *act, 2760 struct mlx5e_tc_flow_parse_attr *parse_attr, 2761 struct pedit_headers_action *hdrs, 2762 u32 *action, struct netlink_ext_ack *extack) 2763 { 2764 u16 mask16 = VLAN_VID_MASK; 2765 u16 val16 = act->vlan.vid & VLAN_VID_MASK; 2766 const struct flow_action_entry pedit_act = { 2767 .id = FLOW_ACTION_MANGLE, 2768 .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH, 2769 .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI), 2770 .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16), 2771 .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16), 2772 }; 2773 u8 match_prio_mask, match_prio_val; 2774 void *headers_c, *headers_v; 2775 int err; 2776 2777 headers_c = get_match_headers_criteria(*action, &parse_attr->spec); 2778 headers_v = get_match_headers_value(*action, &parse_attr->spec); 2779 2780 if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) && 2781 MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) { 2782 NL_SET_ERR_MSG_MOD(extack, 2783 "VLAN rewrite action must have VLAN protocol match"); 2784 return -EOPNOTSUPP; 2785 } 2786 2787 match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio); 2788 match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio); 2789 if (act->vlan.prio != (match_prio_val & match_prio_mask)) { 2790 NL_SET_ERR_MSG_MOD(extack, 2791 "Changing VLAN prio is not supported"); 2792 return -EOPNOTSUPP; 2793 } 2794 2795 err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr, 2796 hdrs, NULL); 2797 *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 2798 2799 return err; 2800 } 2801 2802 static int 2803 add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv, 2804 struct mlx5e_tc_flow_parse_attr *parse_attr, 2805 struct pedit_headers_action *hdrs, 2806 u32 *action, struct netlink_ext_ack *extack) 2807 { 2808 const struct flow_action_entry prio_tag_act = { 2809 .vlan.vid = 0, 2810 .vlan.prio = 2811 MLX5_GET(fte_match_set_lyr_2_4, 2812 get_match_headers_value(*action, 2813 &parse_attr->spec), 2814 first_prio) & 2815 MLX5_GET(fte_match_set_lyr_2_4, 2816 get_match_headers_criteria(*action, 2817 &parse_attr->spec), 2818 first_prio), 2819 }; 2820 2821 return add_vlan_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, 2822 &prio_tag_act, parse_attr, hdrs, action, 2823 extack); 2824 } 2825 2826 static int parse_tc_nic_actions(struct mlx5e_priv *priv, 2827 struct flow_action *flow_action, 2828 struct mlx5e_tc_flow_parse_attr *parse_attr, 2829 struct mlx5e_tc_flow *flow, 2830 struct netlink_ext_ack *extack) 2831 { 2832 struct mlx5_nic_flow_attr *attr = flow->nic_attr; 2833 struct pedit_headers_action hdrs[2] = {}; 2834 const struct flow_action_entry *act; 2835 u32 action = 0; 2836 int err, i; 2837 2838 if (!flow_action_has_entries(flow_action)) 2839 return -EINVAL; 2840 2841 attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; 2842 2843 flow_action_for_each(i, act, flow_action) { 2844 switch (act->id) { 2845 case FLOW_ACTION_DROP: 2846 action |= MLX5_FLOW_CONTEXT_ACTION_DROP; 2847 if (MLX5_CAP_FLOWTABLE(priv->mdev, 2848 flow_table_properties_nic_receive.flow_counter)) 2849 action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 2850 break; 2851 case FLOW_ACTION_MANGLE: 2852 case FLOW_ACTION_ADD: 2853 err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL, 2854 parse_attr, hdrs, extack); 2855 if (err) 2856 return err; 2857 2858 action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | 2859 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 2860 break; 2861 case FLOW_ACTION_VLAN_MANGLE: 2862 err = add_vlan_rewrite_action(priv, 2863 MLX5_FLOW_NAMESPACE_KERNEL, 2864 act, parse_attr, hdrs, 2865 &action, extack); 2866 if (err) 2867 return err; 2868 2869 break; 2870 case FLOW_ACTION_CSUM: 2871 if (csum_offload_supported(priv, action, 2872 act->csum_flags, 2873 extack)) 2874 break; 2875 2876 return -EOPNOTSUPP; 2877 case FLOW_ACTION_REDIRECT: { 2878 struct net_device *peer_dev = act->dev; 2879 2880 if (priv->netdev->netdev_ops == peer_dev->netdev_ops && 2881 same_hw_devs(priv, netdev_priv(peer_dev))) { 2882 parse_attr->mirred_ifindex[0] = peer_dev->ifindex; 2883 flow_flag_set(flow, HAIRPIN); 2884 action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 2885 MLX5_FLOW_CONTEXT_ACTION_COUNT; 2886 } else { 2887 NL_SET_ERR_MSG_MOD(extack, 2888 "device is not on same HW, can't offload"); 2889 netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n", 2890 peer_dev->name); 2891 return -EINVAL; 2892 } 2893 } 2894 break; 2895 case FLOW_ACTION_MARK: { 2896 u32 mark = act->mark; 2897 2898 if (mark & ~MLX5E_TC_FLOW_ID_MASK) { 2899 NL_SET_ERR_MSG_MOD(extack, 2900 "Bad flow mark - only 16 bit is supported"); 2901 return -EINVAL; 2902 } 2903 2904 attr->flow_tag = mark; 2905 action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 2906 } 2907 break; 2908 default: 2909 NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported"); 2910 return -EOPNOTSUPP; 2911 } 2912 } 2913 2914 if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits || 2915 hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) { 2916 err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL, 2917 parse_attr, hdrs, &action, extack); 2918 if (err) 2919 return err; 2920 /* in case all pedit actions are skipped, remove the MOD_HDR 2921 * flag. 2922 */ 2923 if (parse_attr->num_mod_hdr_actions == 0) { 2924 action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 2925 kfree(parse_attr->mod_hdr_actions); 2926 } 2927 } 2928 2929 attr->action = action; 2930 if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack)) 2931 return -EOPNOTSUPP; 2932 2933 return 0; 2934 } 2935 2936 struct encap_key { 2937 const struct ip_tunnel_key *ip_tun_key; 2938 struct mlx5e_tc_tunnel *tc_tunnel; 2939 }; 2940 2941 static inline int cmp_encap_info(struct encap_key *a, 2942 struct encap_key *b) 2943 { 2944 return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) || 2945 a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type; 2946 } 2947 2948 static inline int hash_encap_info(struct encap_key *key) 2949 { 2950 return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), 2951 key->tc_tunnel->tunnel_type); 2952 } 2953 2954 2955 static bool is_merged_eswitch_dev(struct mlx5e_priv *priv, 2956 struct net_device *peer_netdev) 2957 { 2958 struct mlx5e_priv *peer_priv; 2959 2960 peer_priv = netdev_priv(peer_netdev); 2961 2962 return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) && 2963 mlx5e_eswitch_rep(priv->netdev) && 2964 mlx5e_eswitch_rep(peer_netdev) && 2965 same_hw_devs(priv, peer_priv)); 2966 } 2967 2968 2969 2970 bool mlx5e_encap_take(struct mlx5e_encap_entry *e) 2971 { 2972 return refcount_inc_not_zero(&e->refcnt); 2973 } 2974 2975 static struct mlx5e_encap_entry * 2976 mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, 2977 uintptr_t hash_key) 2978 { 2979 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 2980 struct mlx5e_encap_entry *e; 2981 struct encap_key e_key; 2982 2983 hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, 2984 encap_hlist, hash_key) { 2985 e_key.ip_tun_key = &e->tun_info->key; 2986 e_key.tc_tunnel = e->tunnel; 2987 if (!cmp_encap_info(&e_key, key) && 2988 mlx5e_encap_take(e)) 2989 return e; 2990 } 2991 2992 return NULL; 2993 } 2994 2995 static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info) 2996 { 2997 size_t tun_size = sizeof(*tun_info) + tun_info->options_len; 2998 2999 return kmemdup(tun_info, tun_size, GFP_KERNEL); 3000 } 3001 3002 static int mlx5e_attach_encap(struct mlx5e_priv *priv, 3003 struct mlx5e_tc_flow *flow, 3004 struct net_device *mirred_dev, 3005 int out_index, 3006 struct netlink_ext_ack *extack, 3007 struct net_device **encap_dev, 3008 bool *encap_valid) 3009 { 3010 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 3011 struct mlx5_esw_flow_attr *attr = flow->esw_attr; 3012 struct mlx5e_tc_flow_parse_attr *parse_attr; 3013 const struct ip_tunnel_info *tun_info; 3014 struct encap_key key; 3015 struct mlx5e_encap_entry *e; 3016 unsigned short family; 3017 uintptr_t hash_key; 3018 int err = 0; 3019 3020 parse_attr = attr->parse_attr; 3021 tun_info = parse_attr->tun_info[out_index]; 3022 family = ip_tunnel_info_af(tun_info); 3023 key.ip_tun_key = &tun_info->key; 3024 key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev); 3025 if (!key.tc_tunnel) { 3026 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel"); 3027 return -EOPNOTSUPP; 3028 } 3029 3030 hash_key = hash_encap_info(&key); 3031 3032 mutex_lock(&esw->offloads.encap_tbl_lock); 3033 e = mlx5e_encap_get(priv, &key, hash_key); 3034 3035 /* must verify if encap is valid or not */ 3036 if (e) { 3037 mutex_unlock(&esw->offloads.encap_tbl_lock); 3038 wait_for_completion(&e->res_ready); 3039 3040 /* Protect against concurrent neigh update. */ 3041 mutex_lock(&esw->offloads.encap_tbl_lock); 3042 if (e->compl_result < 0) { 3043 err = -EREMOTEIO; 3044 goto out_err; 3045 } 3046 goto attach_flow; 3047 } 3048 3049 e = kzalloc(sizeof(*e), GFP_KERNEL); 3050 if (!e) { 3051 err = -ENOMEM; 3052 goto out_err; 3053 } 3054 3055 refcount_set(&e->refcnt, 1); 3056 init_completion(&e->res_ready); 3057 3058 tun_info = dup_tun_info(tun_info); 3059 if (!tun_info) { 3060 err = -ENOMEM; 3061 goto out_err_init; 3062 } 3063 e->tun_info = tun_info; 3064 err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); 3065 if (err) 3066 goto out_err_init; 3067 3068 INIT_LIST_HEAD(&e->flows); 3069 hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); 3070 mutex_unlock(&esw->offloads.encap_tbl_lock); 3071 3072 if (family == AF_INET) 3073 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); 3074 else if (family == AF_INET6) 3075 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); 3076 3077 /* Protect against concurrent neigh update. */ 3078 mutex_lock(&esw->offloads.encap_tbl_lock); 3079 complete_all(&e->res_ready); 3080 if (err) { 3081 e->compl_result = err; 3082 goto out_err; 3083 } 3084 e->compl_result = 1; 3085 3086 attach_flow: 3087 flow->encaps[out_index].e = e; 3088 list_add(&flow->encaps[out_index].list, &e->flows); 3089 flow->encaps[out_index].index = out_index; 3090 *encap_dev = e->out_dev; 3091 if (e->flags & MLX5_ENCAP_ENTRY_VALID) { 3092 attr->dests[out_index].pkt_reformat = e->pkt_reformat; 3093 attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; 3094 *encap_valid = true; 3095 } else { 3096 *encap_valid = false; 3097 } 3098 mutex_unlock(&esw->offloads.encap_tbl_lock); 3099 3100 return err; 3101 3102 out_err: 3103 mutex_unlock(&esw->offloads.encap_tbl_lock); 3104 if (e) 3105 mlx5e_encap_put(priv, e); 3106 return err; 3107 3108 out_err_init: 3109 mutex_unlock(&esw->offloads.encap_tbl_lock); 3110 kfree(tun_info); 3111 kfree(e); 3112 return err; 3113 } 3114 3115 static int parse_tc_vlan_action(struct mlx5e_priv *priv, 3116 const struct flow_action_entry *act, 3117 struct mlx5_esw_flow_attr *attr, 3118 u32 *action) 3119 { 3120 u8 vlan_idx = attr->total_vlan; 3121 3122 if (vlan_idx >= MLX5_FS_VLAN_DEPTH) 3123 return -EOPNOTSUPP; 3124 3125 switch (act->id) { 3126 case FLOW_ACTION_VLAN_POP: 3127 if (vlan_idx) { 3128 if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 3129 MLX5_FS_VLAN_DEPTH)) 3130 return -EOPNOTSUPP; 3131 3132 *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2; 3133 } else { 3134 *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; 3135 } 3136 break; 3137 case FLOW_ACTION_VLAN_PUSH: 3138 attr->vlan_vid[vlan_idx] = act->vlan.vid; 3139 attr->vlan_prio[vlan_idx] = act->vlan.prio; 3140 attr->vlan_proto[vlan_idx] = act->vlan.proto; 3141 if (!attr->vlan_proto[vlan_idx]) 3142 attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q); 3143 3144 if (vlan_idx) { 3145 if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 3146 MLX5_FS_VLAN_DEPTH)) 3147 return -EOPNOTSUPP; 3148 3149 *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; 3150 } else { 3151 if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) && 3152 (act->vlan.proto != htons(ETH_P_8021Q) || 3153 act->vlan.prio)) 3154 return -EOPNOTSUPP; 3155 3156 *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; 3157 } 3158 break; 3159 default: 3160 return -EINVAL; 3161 } 3162 3163 attr->total_vlan = vlan_idx + 1; 3164 3165 return 0; 3166 } 3167 3168 static int add_vlan_push_action(struct mlx5e_priv *priv, 3169 struct mlx5_esw_flow_attr *attr, 3170 struct net_device **out_dev, 3171 u32 *action) 3172 { 3173 struct net_device *vlan_dev = *out_dev; 3174 struct flow_action_entry vlan_act = { 3175 .id = FLOW_ACTION_VLAN_PUSH, 3176 .vlan.vid = vlan_dev_vlan_id(vlan_dev), 3177 .vlan.proto = vlan_dev_vlan_proto(vlan_dev), 3178 .vlan.prio = 0, 3179 }; 3180 int err; 3181 3182 err = parse_tc_vlan_action(priv, &vlan_act, attr, action); 3183 if (err) 3184 return err; 3185 3186 *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), 3187 dev_get_iflink(vlan_dev)); 3188 if (is_vlan_dev(*out_dev)) 3189 err = add_vlan_push_action(priv, attr, out_dev, action); 3190 3191 return err; 3192 } 3193 3194 static int add_vlan_pop_action(struct mlx5e_priv *priv, 3195 struct mlx5_esw_flow_attr *attr, 3196 u32 *action) 3197 { 3198 int nest_level = attr->parse_attr->filter_dev->lower_level; 3199 struct flow_action_entry vlan_act = { 3200 .id = FLOW_ACTION_VLAN_POP, 3201 }; 3202 int err = 0; 3203 3204 while (nest_level--) { 3205 err = parse_tc_vlan_action(priv, &vlan_act, attr, action); 3206 if (err) 3207 return err; 3208 } 3209 3210 return err; 3211 } 3212 3213 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, 3214 struct net_device *out_dev) 3215 { 3216 if (is_merged_eswitch_dev(priv, out_dev)) 3217 return true; 3218 3219 return mlx5e_eswitch_rep(out_dev) && 3220 same_hw_devs(priv, netdev_priv(out_dev)); 3221 } 3222 3223 static int parse_tc_fdb_actions(struct mlx5e_priv *priv, 3224 struct flow_action *flow_action, 3225 struct mlx5e_tc_flow *flow, 3226 struct netlink_ext_ack *extack) 3227 { 3228 struct pedit_headers_action hdrs[2] = {}; 3229 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 3230 struct mlx5_esw_flow_attr *attr = flow->esw_attr; 3231 struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; 3232 struct mlx5e_rep_priv *rpriv = priv->ppriv; 3233 const struct ip_tunnel_info *info = NULL; 3234 bool ft_flow = mlx5e_is_ft_flow(flow); 3235 const struct flow_action_entry *act; 3236 bool encap = false; 3237 u32 action = 0; 3238 int err, i; 3239 3240 if (!flow_action_has_entries(flow_action)) 3241 return -EINVAL; 3242 3243 flow_action_for_each(i, act, flow_action) { 3244 switch (act->id) { 3245 case FLOW_ACTION_DROP: 3246 action |= MLX5_FLOW_CONTEXT_ACTION_DROP | 3247 MLX5_FLOW_CONTEXT_ACTION_COUNT; 3248 break; 3249 case FLOW_ACTION_MANGLE: 3250 case FLOW_ACTION_ADD: 3251 err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB, 3252 parse_attr, hdrs, extack); 3253 if (err) 3254 return err; 3255 3256 action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 3257 attr->split_count = attr->out_count; 3258 break; 3259 case FLOW_ACTION_CSUM: 3260 if (csum_offload_supported(priv, action, 3261 act->csum_flags, extack)) 3262 break; 3263 3264 return -EOPNOTSUPP; 3265 case FLOW_ACTION_REDIRECT: 3266 case FLOW_ACTION_MIRRED: { 3267 struct mlx5e_priv *out_priv; 3268 struct net_device *out_dev; 3269 3270 out_dev = act->dev; 3271 if (!out_dev) { 3272 /* out_dev is NULL when filters with 3273 * non-existing mirred device are replayed to 3274 * the driver. 3275 */ 3276 return -EINVAL; 3277 } 3278 3279 if (ft_flow && out_dev == priv->netdev) { 3280 /* Ignore forward to self rules generated 3281 * by adding both mlx5 devs to the flow table 3282 * block on a normal nft offload setup. 3283 */ 3284 return -EOPNOTSUPP; 3285 } 3286 3287 if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) { 3288 NL_SET_ERR_MSG_MOD(extack, 3289 "can't support more output ports, can't offload forwarding"); 3290 pr_err("can't support more than %d output ports, can't offload forwarding\n", 3291 attr->out_count); 3292 return -EOPNOTSUPP; 3293 } 3294 3295 action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 3296 MLX5_FLOW_CONTEXT_ACTION_COUNT; 3297 if (encap) { 3298 parse_attr->mirred_ifindex[attr->out_count] = 3299 out_dev->ifindex; 3300 parse_attr->tun_info[attr->out_count] = dup_tun_info(info); 3301 if (!parse_attr->tun_info[attr->out_count]) 3302 return -ENOMEM; 3303 encap = false; 3304 attr->dests[attr->out_count].flags |= 3305 MLX5_ESW_DEST_ENCAP; 3306 attr->out_count++; 3307 /* attr->dests[].rep is resolved when we 3308 * handle encap 3309 */ 3310 } else if (netdev_port_same_parent_id(priv->netdev, out_dev)) { 3311 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 3312 struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); 3313 struct net_device *uplink_upper; 3314 3315 rcu_read_lock(); 3316 uplink_upper = 3317 netdev_master_upper_dev_get_rcu(uplink_dev); 3318 if (uplink_upper && 3319 netif_is_lag_master(uplink_upper) && 3320 uplink_upper == out_dev) 3321 out_dev = uplink_dev; 3322 rcu_read_unlock(); 3323 3324 if (is_vlan_dev(out_dev)) { 3325 err = add_vlan_push_action(priv, attr, 3326 &out_dev, 3327 &action); 3328 if (err) 3329 return err; 3330 } 3331 3332 if (is_vlan_dev(parse_attr->filter_dev)) { 3333 err = add_vlan_pop_action(priv, attr, 3334 &action); 3335 if (err) 3336 return err; 3337 } 3338 3339 if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) { 3340 NL_SET_ERR_MSG_MOD(extack, 3341 "devices are not on same switch HW, can't offload forwarding"); 3342 pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", 3343 priv->netdev->name, out_dev->name); 3344 return -EOPNOTSUPP; 3345 } 3346 3347 out_priv = netdev_priv(out_dev); 3348 rpriv = out_priv->ppriv; 3349 attr->dests[attr->out_count].rep = rpriv->rep; 3350 attr->dests[attr->out_count].mdev = out_priv->mdev; 3351 attr->out_count++; 3352 } else if (parse_attr->filter_dev != priv->netdev) { 3353 /* All mlx5 devices are called to configure 3354 * high level device filters. Therefore, the 3355 * *attempt* to install a filter on invalid 3356 * eswitch should not trigger an explicit error 3357 */ 3358 return -EINVAL; 3359 } else { 3360 NL_SET_ERR_MSG_MOD(extack, 3361 "devices are not on same switch HW, can't offload forwarding"); 3362 pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", 3363 priv->netdev->name, out_dev->name); 3364 return -EINVAL; 3365 } 3366 } 3367 break; 3368 case FLOW_ACTION_TUNNEL_ENCAP: 3369 info = act->tunnel; 3370 if (info) 3371 encap = true; 3372 else 3373 return -EOPNOTSUPP; 3374 3375 break; 3376 case FLOW_ACTION_VLAN_PUSH: 3377 case FLOW_ACTION_VLAN_POP: 3378 if (act->id == FLOW_ACTION_VLAN_PUSH && 3379 (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) { 3380 /* Replace vlan pop+push with vlan modify */ 3381 action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; 3382 err = add_vlan_rewrite_action(priv, 3383 MLX5_FLOW_NAMESPACE_FDB, 3384 act, parse_attr, hdrs, 3385 &action, extack); 3386 } else { 3387 err = parse_tc_vlan_action(priv, act, attr, &action); 3388 } 3389 if (err) 3390 return err; 3391 3392 attr->split_count = attr->out_count; 3393 break; 3394 case FLOW_ACTION_VLAN_MANGLE: 3395 err = add_vlan_rewrite_action(priv, 3396 MLX5_FLOW_NAMESPACE_FDB, 3397 act, parse_attr, hdrs, 3398 &action, extack); 3399 if (err) 3400 return err; 3401 3402 attr->split_count = attr->out_count; 3403 break; 3404 case FLOW_ACTION_TUNNEL_DECAP: 3405 action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; 3406 break; 3407 case FLOW_ACTION_GOTO: { 3408 u32 dest_chain = act->chain_index; 3409 u32 max_chain = mlx5_eswitch_get_chain_range(esw); 3410 3411 if (ft_flow) { 3412 NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported"); 3413 return -EOPNOTSUPP; 3414 } 3415 if (dest_chain <= attr->chain) { 3416 NL_SET_ERR_MSG(extack, "Goto earlier chain isn't supported"); 3417 return -EOPNOTSUPP; 3418 } 3419 if (dest_chain > max_chain) { 3420 NL_SET_ERR_MSG(extack, "Requested destination chain is out of supported range"); 3421 return -EOPNOTSUPP; 3422 } 3423 action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 3424 attr->dest_chain = dest_chain; 3425 break; 3426 } 3427 default: 3428 NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported"); 3429 return -EOPNOTSUPP; 3430 } 3431 } 3432 3433 if (MLX5_CAP_GEN(esw->dev, prio_tag_required) && 3434 action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) { 3435 /* For prio tag mode, replace vlan pop with rewrite vlan prio 3436 * tag rewrite. 3437 */ 3438 action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; 3439 err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs, 3440 &action, extack); 3441 if (err) 3442 return err; 3443 } 3444 3445 if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits || 3446 hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) { 3447 err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB, 3448 parse_attr, hdrs, &action, extack); 3449 if (err) 3450 return err; 3451 /* in case all pedit actions are skipped, remove the MOD_HDR 3452 * flag. we might have set split_count either by pedit or 3453 * pop/push. if there is no pop/push either, reset it too. 3454 */ 3455 if (parse_attr->num_mod_hdr_actions == 0) { 3456 action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 3457 kfree(parse_attr->mod_hdr_actions); 3458 if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || 3459 (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))) 3460 attr->split_count = 0; 3461 } 3462 } 3463 3464 attr->action = action; 3465 if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack)) 3466 return -EOPNOTSUPP; 3467 3468 if (attr->dest_chain) { 3469 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { 3470 NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported"); 3471 return -EOPNOTSUPP; 3472 } 3473 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 3474 } 3475 3476 if (!(attr->action & 3477 (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { 3478 NL_SET_ERR_MSG(extack, "Rule must have at least one forward/drop action"); 3479 return -EOPNOTSUPP; 3480 } 3481 3482 if (attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { 3483 NL_SET_ERR_MSG_MOD(extack, 3484 "current firmware doesn't support split rule for port mirroring"); 3485 netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n"); 3486 return -EOPNOTSUPP; 3487 } 3488 3489 return 0; 3490 } 3491 3492 static void get_flags(int flags, unsigned long *flow_flags) 3493 { 3494 unsigned long __flow_flags = 0; 3495 3496 if (flags & MLX5_TC_FLAG(INGRESS)) 3497 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS); 3498 if (flags & MLX5_TC_FLAG(EGRESS)) 3499 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS); 3500 3501 if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) 3502 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); 3503 if (flags & MLX5_TC_FLAG(NIC_OFFLOAD)) 3504 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); 3505 if (flags & MLX5_TC_FLAG(FT_OFFLOAD)) 3506 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT); 3507 3508 *flow_flags = __flow_flags; 3509 } 3510 3511 static const struct rhashtable_params tc_ht_params = { 3512 .head_offset = offsetof(struct mlx5e_tc_flow, node), 3513 .key_offset = offsetof(struct mlx5e_tc_flow, cookie), 3514 .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie), 3515 .automatic_shrinking = true, 3516 }; 3517 3518 static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, 3519 unsigned long flags) 3520 { 3521 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 3522 struct mlx5e_rep_priv *uplink_rpriv; 3523 3524 if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) { 3525 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 3526 return &uplink_rpriv->uplink_priv.tc_ht; 3527 } else /* NIC offload */ 3528 return &priv->fs.tc.ht; 3529 } 3530 3531 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) 3532 { 3533 struct mlx5_esw_flow_attr *attr = flow->esw_attr; 3534 bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK && 3535 flow_flag_test(flow, INGRESS); 3536 bool act_is_encap = !!(attr->action & 3537 MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); 3538 bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom, 3539 MLX5_DEVCOM_ESW_OFFLOADS); 3540 3541 if (!esw_paired) 3542 return false; 3543 3544 if ((mlx5_lag_is_sriov(attr->in_mdev) || 3545 mlx5_lag_is_multipath(attr->in_mdev)) && 3546 (is_rep_ingress || act_is_encap)) 3547 return true; 3548 3549 return false; 3550 } 3551 3552 static int 3553 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, 3554 struct flow_cls_offload *f, unsigned long flow_flags, 3555 struct mlx5e_tc_flow_parse_attr **__parse_attr, 3556 struct mlx5e_tc_flow **__flow) 3557 { 3558 struct mlx5e_tc_flow_parse_attr *parse_attr; 3559 struct mlx5e_tc_flow *flow; 3560 int out_index, err; 3561 3562 flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL); 3563 parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); 3564 if (!parse_attr || !flow) { 3565 err = -ENOMEM; 3566 goto err_free; 3567 } 3568 3569 flow->cookie = f->cookie; 3570 flow->flags = flow_flags; 3571 flow->priv = priv; 3572 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) 3573 INIT_LIST_HEAD(&flow->encaps[out_index].list); 3574 INIT_LIST_HEAD(&flow->mod_hdr); 3575 INIT_LIST_HEAD(&flow->hairpin); 3576 refcount_set(&flow->refcnt, 1); 3577 init_completion(&flow->init_done); 3578 3579 *__flow = flow; 3580 *__parse_attr = parse_attr; 3581 3582 return 0; 3583 3584 err_free: 3585 kfree(flow); 3586 kvfree(parse_attr); 3587 return err; 3588 } 3589 3590 static void 3591 mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr, 3592 struct mlx5e_priv *priv, 3593 struct mlx5e_tc_flow_parse_attr *parse_attr, 3594 struct flow_cls_offload *f, 3595 struct mlx5_eswitch_rep *in_rep, 3596 struct mlx5_core_dev *in_mdev) 3597 { 3598 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 3599 3600 esw_attr->parse_attr = parse_attr; 3601 esw_attr->chain = f->common.chain_index; 3602 esw_attr->prio = f->common.prio; 3603 3604 esw_attr->in_rep = in_rep; 3605 esw_attr->in_mdev = in_mdev; 3606 3607 if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) == 3608 MLX5_COUNTER_SOURCE_ESWITCH) 3609 esw_attr->counter_dev = in_mdev; 3610 else 3611 esw_attr->counter_dev = priv->mdev; 3612 } 3613 3614 static struct mlx5e_tc_flow * 3615 __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, 3616 struct flow_cls_offload *f, 3617 unsigned long flow_flags, 3618 struct net_device *filter_dev, 3619 struct mlx5_eswitch_rep *in_rep, 3620 struct mlx5_core_dev *in_mdev) 3621 { 3622 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 3623 struct netlink_ext_ack *extack = f->common.extack; 3624 struct mlx5e_tc_flow_parse_attr *parse_attr; 3625 struct mlx5e_tc_flow *flow; 3626 int attr_size, err; 3627 3628 flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); 3629 attr_size = sizeof(struct mlx5_esw_flow_attr); 3630 err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, 3631 &parse_attr, &flow); 3632 if (err) 3633 goto out; 3634 3635 parse_attr->filter_dev = filter_dev; 3636 mlx5e_flow_esw_attr_init(flow->esw_attr, 3637 priv, parse_attr, 3638 f, in_rep, in_mdev); 3639 3640 err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, 3641 f, filter_dev); 3642 if (err) 3643 goto err_free; 3644 3645 err = parse_tc_fdb_actions(priv, &rule->action, flow, extack); 3646 if (err) 3647 goto err_free; 3648 3649 err = mlx5e_tc_add_fdb_flow(priv, flow, extack); 3650 complete_all(&flow->init_done); 3651 if (err) { 3652 if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev))) 3653 goto err_free; 3654 3655 add_unready_flow(flow); 3656 } 3657 3658 return flow; 3659 3660 err_free: 3661 mlx5e_flow_put(priv, flow); 3662 out: 3663 return ERR_PTR(err); 3664 } 3665 3666 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, 3667 struct mlx5e_tc_flow *flow, 3668 unsigned long flow_flags) 3669 { 3670 struct mlx5e_priv *priv = flow->priv, *peer_priv; 3671 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw; 3672 struct mlx5_devcom *devcom = priv->mdev->priv.devcom; 3673 struct mlx5e_tc_flow_parse_attr *parse_attr; 3674 struct mlx5e_rep_priv *peer_urpriv; 3675 struct mlx5e_tc_flow *peer_flow; 3676 struct mlx5_core_dev *in_mdev; 3677 int err = 0; 3678 3679 peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 3680 if (!peer_esw) 3681 return -ENODEV; 3682 3683 peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH); 3684 peer_priv = netdev_priv(peer_urpriv->netdev); 3685 3686 /* in_mdev is assigned of which the packet originated from. 3687 * So packets redirected to uplink use the same mdev of the 3688 * original flow and packets redirected from uplink use the 3689 * peer mdev. 3690 */ 3691 if (flow->esw_attr->in_rep->vport == MLX5_VPORT_UPLINK) 3692 in_mdev = peer_priv->mdev; 3693 else 3694 in_mdev = priv->mdev; 3695 3696 parse_attr = flow->esw_attr->parse_attr; 3697 peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags, 3698 parse_attr->filter_dev, 3699 flow->esw_attr->in_rep, in_mdev); 3700 if (IS_ERR(peer_flow)) { 3701 err = PTR_ERR(peer_flow); 3702 goto out; 3703 } 3704 3705 flow->peer_flow = peer_flow; 3706 flow_flag_set(flow, DUP); 3707 mutex_lock(&esw->offloads.peer_mutex); 3708 list_add_tail(&flow->peer, &esw->offloads.peer_flows); 3709 mutex_unlock(&esw->offloads.peer_mutex); 3710 3711 out: 3712 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 3713 return err; 3714 } 3715 3716 static int 3717 mlx5e_add_fdb_flow(struct mlx5e_priv *priv, 3718 struct flow_cls_offload *f, 3719 unsigned long flow_flags, 3720 struct net_device *filter_dev, 3721 struct mlx5e_tc_flow **__flow) 3722 { 3723 struct mlx5e_rep_priv *rpriv = priv->ppriv; 3724 struct mlx5_eswitch_rep *in_rep = rpriv->rep; 3725 struct mlx5_core_dev *in_mdev = priv->mdev; 3726 struct mlx5e_tc_flow *flow; 3727 int err; 3728 3729 flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep, 3730 in_mdev); 3731 if (IS_ERR(flow)) 3732 return PTR_ERR(flow); 3733 3734 if (is_peer_flow_needed(flow)) { 3735 err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags); 3736 if (err) { 3737 mlx5e_tc_del_fdb_flow(priv, flow); 3738 goto out; 3739 } 3740 } 3741 3742 *__flow = flow; 3743 3744 return 0; 3745 3746 out: 3747 return err; 3748 } 3749 3750 static int 3751 mlx5e_add_nic_flow(struct mlx5e_priv *priv, 3752 struct flow_cls_offload *f, 3753 unsigned long flow_flags, 3754 struct net_device *filter_dev, 3755 struct mlx5e_tc_flow **__flow) 3756 { 3757 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 3758 struct netlink_ext_ack *extack = f->common.extack; 3759 struct mlx5e_tc_flow_parse_attr *parse_attr; 3760 struct mlx5e_tc_flow *flow; 3761 int attr_size, err; 3762 3763 /* multi-chain not supported for NIC rules */ 3764 if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common)) 3765 return -EOPNOTSUPP; 3766 3767 flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); 3768 attr_size = sizeof(struct mlx5_nic_flow_attr); 3769 err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, 3770 &parse_attr, &flow); 3771 if (err) 3772 goto out; 3773 3774 parse_attr->filter_dev = filter_dev; 3775 err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, 3776 f, filter_dev); 3777 if (err) 3778 goto err_free; 3779 3780 err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack); 3781 if (err) 3782 goto err_free; 3783 3784 err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack); 3785 if (err) 3786 goto err_free; 3787 3788 flow_flag_set(flow, OFFLOADED); 3789 kvfree(parse_attr); 3790 *__flow = flow; 3791 3792 return 0; 3793 3794 err_free: 3795 mlx5e_flow_put(priv, flow); 3796 kvfree(parse_attr); 3797 out: 3798 return err; 3799 } 3800 3801 static int 3802 mlx5e_tc_add_flow(struct mlx5e_priv *priv, 3803 struct flow_cls_offload *f, 3804 unsigned long flags, 3805 struct net_device *filter_dev, 3806 struct mlx5e_tc_flow **flow) 3807 { 3808 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 3809 unsigned long flow_flags; 3810 int err; 3811 3812 get_flags(flags, &flow_flags); 3813 3814 if (!tc_can_offload_extack(priv->netdev, f->common.extack)) 3815 return -EOPNOTSUPP; 3816 3817 if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS) 3818 err = mlx5e_add_fdb_flow(priv, f, flow_flags, 3819 filter_dev, flow); 3820 else 3821 err = mlx5e_add_nic_flow(priv, f, flow_flags, 3822 filter_dev, flow); 3823 3824 return err; 3825 } 3826 3827 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, 3828 struct flow_cls_offload *f, unsigned long flags) 3829 { 3830 struct netlink_ext_ack *extack = f->common.extack; 3831 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 3832 struct mlx5e_tc_flow *flow; 3833 int err = 0; 3834 3835 rcu_read_lock(); 3836 flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); 3837 rcu_read_unlock(); 3838 if (flow) { 3839 NL_SET_ERR_MSG_MOD(extack, 3840 "flow cookie already exists, ignoring"); 3841 netdev_warn_once(priv->netdev, 3842 "flow cookie %lx already exists, ignoring\n", 3843 f->cookie); 3844 err = -EEXIST; 3845 goto out; 3846 } 3847 3848 trace_mlx5e_configure_flower(f); 3849 err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow); 3850 if (err) 3851 goto out; 3852 3853 err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params); 3854 if (err) 3855 goto err_free; 3856 3857 return 0; 3858 3859 err_free: 3860 mlx5e_flow_put(priv, flow); 3861 out: 3862 return err; 3863 } 3864 3865 static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags) 3866 { 3867 bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS)); 3868 bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS)); 3869 3870 return flow_flag_test(flow, INGRESS) == dir_ingress && 3871 flow_flag_test(flow, EGRESS) == dir_egress; 3872 } 3873 3874 int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, 3875 struct flow_cls_offload *f, unsigned long flags) 3876 { 3877 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 3878 struct mlx5e_tc_flow *flow; 3879 int err; 3880 3881 rcu_read_lock(); 3882 flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); 3883 if (!flow || !same_flow_direction(flow, flags)) { 3884 err = -EINVAL; 3885 goto errout; 3886 } 3887 3888 /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag 3889 * set. 3890 */ 3891 if (flow_flag_test_and_set(flow, DELETED)) { 3892 err = -EINVAL; 3893 goto errout; 3894 } 3895 rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params); 3896 rcu_read_unlock(); 3897 3898 trace_mlx5e_delete_flower(f); 3899 mlx5e_flow_put(priv, flow); 3900 3901 return 0; 3902 3903 errout: 3904 rcu_read_unlock(); 3905 return err; 3906 } 3907 3908 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, 3909 struct flow_cls_offload *f, unsigned long flags) 3910 { 3911 struct mlx5_devcom *devcom = priv->mdev->priv.devcom; 3912 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 3913 struct mlx5_eswitch *peer_esw; 3914 struct mlx5e_tc_flow *flow; 3915 struct mlx5_fc *counter; 3916 u64 lastuse = 0; 3917 u64 packets = 0; 3918 u64 bytes = 0; 3919 int err = 0; 3920 3921 rcu_read_lock(); 3922 flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie, 3923 tc_ht_params)); 3924 rcu_read_unlock(); 3925 if (IS_ERR(flow)) 3926 return PTR_ERR(flow); 3927 3928 if (!same_flow_direction(flow, flags)) { 3929 err = -EINVAL; 3930 goto errout; 3931 } 3932 3933 if (mlx5e_is_offloaded_flow(flow)) { 3934 counter = mlx5e_tc_get_counter(flow); 3935 if (!counter) 3936 goto errout; 3937 3938 mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); 3939 } 3940 3941 /* Under multipath it's possible for one rule to be currently 3942 * un-offloaded while the other rule is offloaded. 3943 */ 3944 peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 3945 if (!peer_esw) 3946 goto out; 3947 3948 if (flow_flag_test(flow, DUP) && 3949 flow_flag_test(flow->peer_flow, OFFLOADED)) { 3950 u64 bytes2; 3951 u64 packets2; 3952 u64 lastuse2; 3953 3954 counter = mlx5e_tc_get_counter(flow->peer_flow); 3955 if (!counter) 3956 goto no_peer_counter; 3957 mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2); 3958 3959 bytes += bytes2; 3960 packets += packets2; 3961 lastuse = max_t(u64, lastuse, lastuse2); 3962 } 3963 3964 no_peer_counter: 3965 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); 3966 out: 3967 flow_stats_update(&f->stats, bytes, packets, lastuse); 3968 trace_mlx5e_stats_flower(f); 3969 errout: 3970 mlx5e_flow_put(priv, flow); 3971 return err; 3972 } 3973 3974 static int apply_police_params(struct mlx5e_priv *priv, u32 rate, 3975 struct netlink_ext_ack *extack) 3976 { 3977 struct mlx5e_rep_priv *rpriv = priv->ppriv; 3978 struct mlx5_eswitch *esw; 3979 u16 vport_num; 3980 u32 rate_mbps; 3981 int err; 3982 3983 esw = priv->mdev->priv.eswitch; 3984 /* rate is given in bytes/sec. 3985 * First convert to bits/sec and then round to the nearest mbit/secs. 3986 * mbit means million bits. 3987 * Moreover, if rate is non zero we choose to configure to a minimum of 3988 * 1 mbit/sec. 3989 */ 3990 rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0; 3991 vport_num = rpriv->rep->vport; 3992 3993 err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps); 3994 if (err) 3995 NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware"); 3996 3997 return err; 3998 } 3999 4000 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, 4001 struct flow_action *flow_action, 4002 struct netlink_ext_ack *extack) 4003 { 4004 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4005 const struct flow_action_entry *act; 4006 int err; 4007 int i; 4008 4009 if (!flow_action_has_entries(flow_action)) { 4010 NL_SET_ERR_MSG_MOD(extack, "matchall called with no action"); 4011 return -EINVAL; 4012 } 4013 4014 if (!flow_offload_has_one_action(flow_action)) { 4015 NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action"); 4016 return -EOPNOTSUPP; 4017 } 4018 4019 flow_action_for_each(i, act, flow_action) { 4020 switch (act->id) { 4021 case FLOW_ACTION_POLICE: 4022 err = apply_police_params(priv, act->police.rate_bytes_ps, extack); 4023 if (err) 4024 return err; 4025 4026 rpriv->prev_vf_vport_stats = priv->stats.vf_vport; 4027 break; 4028 default: 4029 NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall"); 4030 return -EOPNOTSUPP; 4031 } 4032 } 4033 4034 return 0; 4035 } 4036 4037 int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, 4038 struct tc_cls_matchall_offload *ma) 4039 { 4040 struct netlink_ext_ack *extack = ma->common.extack; 4041 4042 if (ma->common.prio != 1) { 4043 NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported"); 4044 return -EINVAL; 4045 } 4046 4047 return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack); 4048 } 4049 4050 int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv, 4051 struct tc_cls_matchall_offload *ma) 4052 { 4053 struct netlink_ext_ack *extack = ma->common.extack; 4054 4055 return apply_police_params(priv, 0, extack); 4056 } 4057 4058 void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, 4059 struct tc_cls_matchall_offload *ma) 4060 { 4061 struct mlx5e_rep_priv *rpriv = priv->ppriv; 4062 struct rtnl_link_stats64 cur_stats; 4063 u64 dbytes; 4064 u64 dpkts; 4065 4066 cur_stats = priv->stats.vf_vport; 4067 dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets; 4068 dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes; 4069 rpriv->prev_vf_vport_stats = cur_stats; 4070 flow_stats_update(&ma->stats, dpkts, dbytes, jiffies); 4071 } 4072 4073 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, 4074 struct mlx5e_priv *peer_priv) 4075 { 4076 struct mlx5_core_dev *peer_mdev = peer_priv->mdev; 4077 struct mlx5e_hairpin_entry *hpe, *tmp; 4078 LIST_HEAD(init_wait_list); 4079 u16 peer_vhca_id; 4080 int bkt; 4081 4082 if (!same_hw_devs(priv, peer_priv)) 4083 return; 4084 4085 peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id); 4086 4087 mutex_lock(&priv->fs.tc.hairpin_tbl_lock); 4088 hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) 4089 if (refcount_inc_not_zero(&hpe->refcnt)) 4090 list_add(&hpe->dead_peer_wait_list, &init_wait_list); 4091 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); 4092 4093 list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) { 4094 wait_for_completion(&hpe->res_ready); 4095 if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id) 4096 hpe->hp->pair->peer_gone = true; 4097 4098 mlx5e_hairpin_put(priv, hpe); 4099 } 4100 } 4101 4102 static int mlx5e_tc_netdev_event(struct notifier_block *this, 4103 unsigned long event, void *ptr) 4104 { 4105 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 4106 struct mlx5e_flow_steering *fs; 4107 struct mlx5e_priv *peer_priv; 4108 struct mlx5e_tc_table *tc; 4109 struct mlx5e_priv *priv; 4110 4111 if (ndev->netdev_ops != &mlx5e_netdev_ops || 4112 event != NETDEV_UNREGISTER || 4113 ndev->reg_state == NETREG_REGISTERED) 4114 return NOTIFY_DONE; 4115 4116 tc = container_of(this, struct mlx5e_tc_table, netdevice_nb); 4117 fs = container_of(tc, struct mlx5e_flow_steering, tc); 4118 priv = container_of(fs, struct mlx5e_priv, fs); 4119 peer_priv = netdev_priv(ndev); 4120 if (priv == peer_priv || 4121 !(priv->netdev->features & NETIF_F_HW_TC)) 4122 return NOTIFY_DONE; 4123 4124 mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv); 4125 4126 return NOTIFY_DONE; 4127 } 4128 4129 int mlx5e_tc_nic_init(struct mlx5e_priv *priv) 4130 { 4131 struct mlx5e_tc_table *tc = &priv->fs.tc; 4132 int err; 4133 4134 mutex_init(&tc->t_lock); 4135 mutex_init(&tc->mod_hdr.lock); 4136 hash_init(tc->mod_hdr.hlist); 4137 mutex_init(&tc->hairpin_tbl_lock); 4138 hash_init(tc->hairpin_tbl); 4139 4140 err = rhashtable_init(&tc->ht, &tc_ht_params); 4141 if (err) 4142 return err; 4143 4144 tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; 4145 if (register_netdevice_notifier(&tc->netdevice_nb)) { 4146 tc->netdevice_nb.notifier_call = NULL; 4147 mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n"); 4148 } 4149 4150 return err; 4151 } 4152 4153 static void _mlx5e_tc_del_flow(void *ptr, void *arg) 4154 { 4155 struct mlx5e_tc_flow *flow = ptr; 4156 struct mlx5e_priv *priv = flow->priv; 4157 4158 mlx5e_tc_del_flow(priv, flow); 4159 kfree(flow); 4160 } 4161 4162 void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) 4163 { 4164 struct mlx5e_tc_table *tc = &priv->fs.tc; 4165 4166 if (tc->netdevice_nb.notifier_call) 4167 unregister_netdevice_notifier(&tc->netdevice_nb); 4168 4169 mutex_destroy(&tc->mod_hdr.lock); 4170 mutex_destroy(&tc->hairpin_tbl_lock); 4171 4172 rhashtable_destroy(&tc->ht); 4173 4174 if (!IS_ERR_OR_NULL(tc->t)) { 4175 mlx5_destroy_flow_table(tc->t); 4176 tc->t = NULL; 4177 } 4178 mutex_destroy(&tc->t_lock); 4179 } 4180 4181 int mlx5e_tc_esw_init(struct rhashtable *tc_ht) 4182 { 4183 return rhashtable_init(tc_ht, &tc_ht_params); 4184 } 4185 4186 void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) 4187 { 4188 rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); 4189 } 4190 4191 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) 4192 { 4193 struct rhashtable *tc_ht = get_tc_ht(priv, flags); 4194 4195 return atomic_read(&tc_ht->nelems); 4196 } 4197 4198 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw) 4199 { 4200 struct mlx5e_tc_flow *flow, *tmp; 4201 4202 list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer) 4203 __mlx5e_tc_del_fdb_peer_flow(flow); 4204 } 4205 4206 void mlx5e_tc_reoffload_flows_work(struct work_struct *work) 4207 { 4208 struct mlx5_rep_uplink_priv *rpriv = 4209 container_of(work, struct mlx5_rep_uplink_priv, 4210 reoffload_flows_work); 4211 struct mlx5e_tc_flow *flow, *tmp; 4212 4213 mutex_lock(&rpriv->unready_flows_lock); 4214 list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) { 4215 if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL)) 4216 unready_flow_del(flow); 4217 } 4218 mutex_unlock(&rpriv->unready_flows_lock); 4219 } 4220