1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2019 Mellanox Technologies. */ 3 4 #include <net/netfilter/nf_conntrack.h> 5 #include <net/netfilter/nf_conntrack_core.h> 6 #include <net/netfilter/nf_conntrack_zones.h> 7 #include <net/netfilter/nf_conntrack_labels.h> 8 #include <net/netfilter/nf_conntrack_helper.h> 9 #include <net/netfilter/nf_conntrack_acct.h> 10 #include <uapi/linux/tc_act/tc_pedit.h> 11 #include <net/tc_act/tc_ct.h> 12 #include <net/flow_offload.h> 13 #include <net/netfilter/nf_flow_table.h> 14 #include <linux/workqueue.h> 15 #include <linux/xarray.h> 16 17 #include "esw/chains.h" 18 #include "en/tc_ct.h" 19 #include "en/mod_hdr.h" 20 #include "en/mapping.h" 21 #include "en.h" 22 #include "en_tc.h" 23 #include "en_rep.h" 24 25 #define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen * 8) 26 #define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0) 27 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1) 28 #define MLX5_CT_STATE_TRK_BIT BIT(2) 29 #define MLX5_CT_STATE_NAT_BIT BIT(3) 30 31 #define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8) 32 #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0) 33 #define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX 34 35 #define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen * 8) 36 #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0) 37 38 #define ct_dbg(fmt, args...)\ 39 netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args) 40 41 struct mlx5_tc_ct_priv { 42 struct mlx5_eswitch *esw; 43 const struct net_device *netdev; 44 struct idr fte_ids; 45 struct xarray tuple_ids; 46 struct rhashtable zone_ht; 47 struct rhashtable ct_tuples_ht; 48 struct rhashtable ct_tuples_nat_ht; 49 struct mlx5_flow_table *ct; 50 struct mlx5_flow_table *ct_nat; 51 struct mlx5_flow_table *post_ct; 52 struct mutex control_lock; /* guards parallel adds/dels */ 53 struct mapping_ctx *zone_mapping; 54 struct mapping_ctx *labels_mapping; 55 }; 56 57 struct mlx5_ct_flow { 58 struct mlx5_esw_flow_attr pre_ct_attr; 59 struct mlx5_esw_flow_attr post_ct_attr; 60 struct mlx5_flow_handle *pre_ct_rule; 61 struct mlx5_flow_handle *post_ct_rule; 62 struct mlx5_ct_ft *ft; 63 u32 fte_id; 64 u32 chain_mapping; 65 }; 66 67 struct mlx5_ct_zone_rule { 68 struct mlx5_flow_handle *rule; 69 struct mlx5e_mod_hdr_handle *mh; 70 struct mlx5_esw_flow_attr attr; 71 bool nat; 72 }; 73 74 struct mlx5_tc_ct_pre { 75 struct mlx5_flow_table *fdb; 76 struct mlx5_flow_group *flow_grp; 77 struct mlx5_flow_group *miss_grp; 78 struct mlx5_flow_handle *flow_rule; 79 struct mlx5_flow_handle *miss_rule; 80 struct mlx5_modify_hdr *modify_hdr; 81 }; 82 83 struct mlx5_ct_ft { 84 struct rhash_head node; 85 u16 zone; 86 u32 zone_restore_id; 87 refcount_t refcount; 88 struct nf_flowtable *nf_ft; 89 struct mlx5_tc_ct_priv *ct_priv; 90 struct rhashtable ct_entries_ht; 91 struct mlx5_tc_ct_pre pre_ct; 92 struct mlx5_tc_ct_pre pre_ct_nat; 93 }; 94 95 struct mlx5_ct_tuple { 96 u16 addr_type; 97 __be16 n_proto; 98 u8 ip_proto; 99 struct { 100 union { 101 __be32 src_v4; 102 struct in6_addr src_v6; 103 }; 104 union { 105 __be32 dst_v4; 106 struct in6_addr dst_v6; 107 }; 108 } ip; 109 struct { 110 __be16 src; 111 __be16 dst; 112 } port; 113 114 u16 zone; 115 }; 116 117 struct mlx5_ct_entry { 118 struct rhash_head node; 119 struct rhash_head tuple_node; 120 struct rhash_head tuple_nat_node; 121 struct mlx5_fc *counter; 122 unsigned long cookie; 123 unsigned long restore_cookie; 124 struct mlx5_ct_tuple tuple; 125 struct mlx5_ct_tuple tuple_nat; 126 struct mlx5_ct_zone_rule zone_rules[2]; 127 }; 128 129 static const struct rhashtable_params cts_ht_params = { 130 .head_offset = offsetof(struct mlx5_ct_entry, node), 131 .key_offset = offsetof(struct mlx5_ct_entry, cookie), 132 .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie), 133 .automatic_shrinking = true, 134 .min_size = 16 * 1024, 135 }; 136 137 static const struct rhashtable_params zone_params = { 138 .head_offset = offsetof(struct mlx5_ct_ft, node), 139 .key_offset = offsetof(struct mlx5_ct_ft, zone), 140 .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone), 141 .automatic_shrinking = true, 142 }; 143 144 static const struct rhashtable_params tuples_ht_params = { 145 .head_offset = offsetof(struct mlx5_ct_entry, tuple_node), 146 .key_offset = offsetof(struct mlx5_ct_entry, tuple), 147 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple), 148 .automatic_shrinking = true, 149 .min_size = 16 * 1024, 150 }; 151 152 static const struct rhashtable_params tuples_nat_ht_params = { 153 .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node), 154 .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat), 155 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat), 156 .automatic_shrinking = true, 157 .min_size = 16 * 1024, 158 }; 159 160 static struct mlx5_tc_ct_priv * 161 mlx5_tc_ct_get_ct_priv(struct mlx5e_priv *priv) 162 { 163 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 164 struct mlx5_rep_uplink_priv *uplink_priv; 165 struct mlx5e_rep_priv *uplink_rpriv; 166 167 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 168 uplink_priv = &uplink_rpriv->uplink_priv; 169 return uplink_priv->ct_priv; 170 } 171 172 static int 173 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule) 174 { 175 struct flow_match_control control; 176 struct flow_match_basic basic; 177 178 flow_rule_match_basic(rule, &basic); 179 flow_rule_match_control(rule, &control); 180 181 tuple->n_proto = basic.key->n_proto; 182 tuple->ip_proto = basic.key->ip_proto; 183 tuple->addr_type = control.key->addr_type; 184 185 if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 186 struct flow_match_ipv4_addrs match; 187 188 flow_rule_match_ipv4_addrs(rule, &match); 189 tuple->ip.src_v4 = match.key->src; 190 tuple->ip.dst_v4 = match.key->dst; 191 } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 192 struct flow_match_ipv6_addrs match; 193 194 flow_rule_match_ipv6_addrs(rule, &match); 195 tuple->ip.src_v6 = match.key->src; 196 tuple->ip.dst_v6 = match.key->dst; 197 } else { 198 return -EOPNOTSUPP; 199 } 200 201 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { 202 struct flow_match_ports match; 203 204 flow_rule_match_ports(rule, &match); 205 switch (tuple->ip_proto) { 206 case IPPROTO_TCP: 207 case IPPROTO_UDP: 208 tuple->port.src = match.key->src; 209 tuple->port.dst = match.key->dst; 210 break; 211 default: 212 return -EOPNOTSUPP; 213 } 214 } else { 215 return -EOPNOTSUPP; 216 } 217 218 return 0; 219 } 220 221 static int 222 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple, 223 struct flow_rule *rule) 224 { 225 struct flow_action *flow_action = &rule->action; 226 struct flow_action_entry *act; 227 u32 offset, val, ip6_offset; 228 int i; 229 230 flow_action_for_each(i, act, flow_action) { 231 if (act->id != FLOW_ACTION_MANGLE) 232 continue; 233 234 offset = act->mangle.offset; 235 val = act->mangle.val; 236 switch (act->mangle.htype) { 237 case FLOW_ACT_MANGLE_HDR_TYPE_IP4: 238 if (offset == offsetof(struct iphdr, saddr)) 239 tuple->ip.src_v4 = cpu_to_be32(val); 240 else if (offset == offsetof(struct iphdr, daddr)) 241 tuple->ip.dst_v4 = cpu_to_be32(val); 242 else 243 return -EOPNOTSUPP; 244 break; 245 246 case FLOW_ACT_MANGLE_HDR_TYPE_IP6: 247 ip6_offset = (offset - offsetof(struct ipv6hdr, saddr)); 248 ip6_offset /= 4; 249 if (ip6_offset < 8) 250 tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val); 251 else 252 return -EOPNOTSUPP; 253 break; 254 255 case FLOW_ACT_MANGLE_HDR_TYPE_TCP: 256 if (offset == offsetof(struct tcphdr, source)) 257 tuple->port.src = cpu_to_be16(val); 258 else if (offset == offsetof(struct tcphdr, dest)) 259 tuple->port.dst = cpu_to_be16(val); 260 else 261 return -EOPNOTSUPP; 262 break; 263 264 case FLOW_ACT_MANGLE_HDR_TYPE_UDP: 265 if (offset == offsetof(struct udphdr, source)) 266 tuple->port.src = cpu_to_be16(val); 267 else if (offset == offsetof(struct udphdr, dest)) 268 tuple->port.dst = cpu_to_be16(val); 269 else 270 return -EOPNOTSUPP; 271 break; 272 273 default: 274 return -EOPNOTSUPP; 275 } 276 } 277 278 return 0; 279 } 280 281 static int 282 mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, 283 struct flow_rule *rule) 284 { 285 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 286 outer_headers); 287 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, 288 outer_headers); 289 u16 addr_type = 0; 290 u8 ip_proto = 0; 291 292 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { 293 struct flow_match_basic match; 294 295 flow_rule_match_basic(rule, &match); 296 297 mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c, 298 headers_v); 299 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 300 match.mask->ip_proto); 301 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, 302 match.key->ip_proto); 303 304 ip_proto = match.key->ip_proto; 305 } 306 307 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { 308 struct flow_match_control match; 309 310 flow_rule_match_control(rule, &match); 311 addr_type = match.key->addr_type; 312 } 313 314 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 315 struct flow_match_ipv4_addrs match; 316 317 flow_rule_match_ipv4_addrs(rule, &match); 318 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 319 src_ipv4_src_ipv6.ipv4_layout.ipv4), 320 &match.mask->src, sizeof(match.mask->src)); 321 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 322 src_ipv4_src_ipv6.ipv4_layout.ipv4), 323 &match.key->src, sizeof(match.key->src)); 324 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 325 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 326 &match.mask->dst, sizeof(match.mask->dst)); 327 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 328 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 329 &match.key->dst, sizeof(match.key->dst)); 330 } 331 332 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 333 struct flow_match_ipv6_addrs match; 334 335 flow_rule_match_ipv6_addrs(rule, &match); 336 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 337 src_ipv4_src_ipv6.ipv6_layout.ipv6), 338 &match.mask->src, sizeof(match.mask->src)); 339 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 340 src_ipv4_src_ipv6.ipv6_layout.ipv6), 341 &match.key->src, sizeof(match.key->src)); 342 343 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 344 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 345 &match.mask->dst, sizeof(match.mask->dst)); 346 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 347 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 348 &match.key->dst, sizeof(match.key->dst)); 349 } 350 351 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { 352 struct flow_match_ports match; 353 354 flow_rule_match_ports(rule, &match); 355 switch (ip_proto) { 356 case IPPROTO_TCP: 357 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 358 tcp_sport, ntohs(match.mask->src)); 359 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 360 tcp_sport, ntohs(match.key->src)); 361 362 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 363 tcp_dport, ntohs(match.mask->dst)); 364 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 365 tcp_dport, ntohs(match.key->dst)); 366 break; 367 368 case IPPROTO_UDP: 369 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 370 udp_sport, ntohs(match.mask->src)); 371 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 372 udp_sport, ntohs(match.key->src)); 373 374 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 375 udp_dport, ntohs(match.mask->dst)); 376 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 377 udp_dport, ntohs(match.key->dst)); 378 break; 379 default: 380 break; 381 } 382 } 383 384 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { 385 struct flow_match_tcp match; 386 387 flow_rule_match_tcp(rule, &match); 388 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, 389 ntohs(match.mask->flags)); 390 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, 391 ntohs(match.key->flags)); 392 } 393 394 return 0; 395 } 396 397 static void 398 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv, 399 struct mlx5_ct_entry *entry, 400 bool nat) 401 { 402 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; 403 struct mlx5_esw_flow_attr *attr = &zone_rule->attr; 404 struct mlx5_eswitch *esw = ct_priv->esw; 405 406 ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone); 407 408 mlx5_eswitch_del_offloaded_rule(esw, zone_rule->rule, attr); 409 mlx5e_mod_hdr_detach(ct_priv->esw->dev, 410 &esw->offloads.mod_hdr, zone_rule->mh); 411 mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); 412 } 413 414 static void 415 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv, 416 struct mlx5_ct_entry *entry) 417 { 418 mlx5_tc_ct_entry_del_rule(ct_priv, entry, true); 419 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); 420 421 mlx5_fc_destroy(ct_priv->esw->dev, entry->counter); 422 } 423 424 static struct flow_action_entry * 425 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule) 426 { 427 struct flow_action *flow_action = &flow_rule->action; 428 struct flow_action_entry *act; 429 int i; 430 431 flow_action_for_each(i, act, flow_action) { 432 if (act->id == FLOW_ACTION_CT_METADATA) 433 return act; 434 } 435 436 return NULL; 437 } 438 439 static int 440 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv, 441 struct mlx5e_tc_mod_hdr_acts *mod_acts, 442 u8 ct_state, 443 u32 mark, 444 u32 labels_id, 445 u8 zone_restore_id) 446 { 447 struct mlx5_eswitch *esw = ct_priv->esw; 448 int err; 449 450 err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, 451 CTSTATE_TO_REG, ct_state); 452 if (err) 453 return err; 454 455 err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, 456 MARK_TO_REG, mark); 457 if (err) 458 return err; 459 460 err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, 461 LABELS_TO_REG, labels_id); 462 if (err) 463 return err; 464 465 err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, 466 ZONE_RESTORE_TO_REG, zone_restore_id); 467 if (err) 468 return err; 469 470 return 0; 471 } 472 473 static int 474 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act, 475 char *modact) 476 { 477 u32 offset = act->mangle.offset, field; 478 479 switch (act->mangle.htype) { 480 case FLOW_ACT_MANGLE_HDR_TYPE_IP4: 481 MLX5_SET(set_action_in, modact, length, 0); 482 if (offset == offsetof(struct iphdr, saddr)) 483 field = MLX5_ACTION_IN_FIELD_OUT_SIPV4; 484 else if (offset == offsetof(struct iphdr, daddr)) 485 field = MLX5_ACTION_IN_FIELD_OUT_DIPV4; 486 else 487 return -EOPNOTSUPP; 488 break; 489 490 case FLOW_ACT_MANGLE_HDR_TYPE_IP6: 491 MLX5_SET(set_action_in, modact, length, 0); 492 if (offset == offsetof(struct ipv6hdr, saddr) + 12) 493 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0; 494 else if (offset == offsetof(struct ipv6hdr, saddr) + 8) 495 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32; 496 else if (offset == offsetof(struct ipv6hdr, saddr) + 4) 497 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64; 498 else if (offset == offsetof(struct ipv6hdr, saddr)) 499 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96; 500 else if (offset == offsetof(struct ipv6hdr, daddr) + 12) 501 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0; 502 else if (offset == offsetof(struct ipv6hdr, daddr) + 8) 503 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32; 504 else if (offset == offsetof(struct ipv6hdr, daddr) + 4) 505 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64; 506 else if (offset == offsetof(struct ipv6hdr, daddr)) 507 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96; 508 else 509 return -EOPNOTSUPP; 510 break; 511 512 case FLOW_ACT_MANGLE_HDR_TYPE_TCP: 513 MLX5_SET(set_action_in, modact, length, 16); 514 if (offset == offsetof(struct tcphdr, source)) 515 field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT; 516 else if (offset == offsetof(struct tcphdr, dest)) 517 field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT; 518 else 519 return -EOPNOTSUPP; 520 break; 521 522 case FLOW_ACT_MANGLE_HDR_TYPE_UDP: 523 MLX5_SET(set_action_in, modact, length, 16); 524 if (offset == offsetof(struct udphdr, source)) 525 field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT; 526 else if (offset == offsetof(struct udphdr, dest)) 527 field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT; 528 else 529 return -EOPNOTSUPP; 530 break; 531 532 default: 533 return -EOPNOTSUPP; 534 } 535 536 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); 537 MLX5_SET(set_action_in, modact, offset, 0); 538 MLX5_SET(set_action_in, modact, field, field); 539 MLX5_SET(set_action_in, modact, data, act->mangle.val); 540 541 return 0; 542 } 543 544 static int 545 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv, 546 struct flow_rule *flow_rule, 547 struct mlx5e_tc_mod_hdr_acts *mod_acts) 548 { 549 struct flow_action *flow_action = &flow_rule->action; 550 struct mlx5_core_dev *mdev = ct_priv->esw->dev; 551 struct flow_action_entry *act; 552 size_t action_size; 553 char *modact; 554 int err, i; 555 556 action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto); 557 558 flow_action_for_each(i, act, flow_action) { 559 switch (act->id) { 560 case FLOW_ACTION_MANGLE: { 561 err = alloc_mod_hdr_actions(mdev, 562 MLX5_FLOW_NAMESPACE_FDB, 563 mod_acts); 564 if (err) 565 return err; 566 567 modact = mod_acts->actions + 568 mod_acts->num_actions * action_size; 569 570 err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact); 571 if (err) 572 return err; 573 574 mod_acts->num_actions++; 575 } 576 break; 577 578 case FLOW_ACTION_CT_METADATA: 579 /* Handled earlier */ 580 continue; 581 default: 582 return -EOPNOTSUPP; 583 } 584 } 585 586 return 0; 587 } 588 589 static int 590 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, 591 struct mlx5_esw_flow_attr *attr, 592 struct flow_rule *flow_rule, 593 struct mlx5e_mod_hdr_handle **mh, 594 u8 zone_restore_id, bool nat) 595 { 596 struct mlx5e_tc_mod_hdr_acts mod_acts = {}; 597 struct flow_action_entry *meta; 598 u16 ct_state = 0; 599 int err; 600 601 meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule); 602 if (!meta) 603 return -EOPNOTSUPP; 604 605 err = mapping_add(ct_priv->labels_mapping, meta->ct_metadata.labels, 606 &attr->ct_attr.ct_labels_id); 607 if (err) 608 return -EOPNOTSUPP; 609 if (nat) { 610 err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, 611 &mod_acts); 612 if (err) 613 goto err_mapping; 614 615 ct_state |= MLX5_CT_STATE_NAT_BIT; 616 } 617 618 ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT; 619 err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts, 620 ct_state, 621 meta->ct_metadata.mark, 622 attr->ct_attr.ct_labels_id, 623 zone_restore_id); 624 if (err) 625 goto err_mapping; 626 627 *mh = mlx5e_mod_hdr_attach(ct_priv->esw->dev, 628 &ct_priv->esw->offloads.mod_hdr, 629 MLX5_FLOW_NAMESPACE_FDB, 630 &mod_acts); 631 if (IS_ERR(*mh)) { 632 err = PTR_ERR(*mh); 633 goto err_mapping; 634 } 635 attr->modify_hdr = mlx5e_mod_hdr_get(*mh); 636 637 dealloc_mod_hdr_actions(&mod_acts); 638 return 0; 639 640 err_mapping: 641 dealloc_mod_hdr_actions(&mod_acts); 642 mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); 643 return err; 644 } 645 646 static int 647 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, 648 struct flow_rule *flow_rule, 649 struct mlx5_ct_entry *entry, 650 bool nat, u8 zone_restore_id) 651 { 652 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; 653 struct mlx5_esw_flow_attr *attr = &zone_rule->attr; 654 struct mlx5_eswitch *esw = ct_priv->esw; 655 struct mlx5_flow_spec *spec = NULL; 656 int err; 657 658 zone_rule->nat = nat; 659 660 spec = kzalloc(sizeof(*spec), GFP_KERNEL); 661 if (!spec) 662 return -ENOMEM; 663 664 err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, 665 &zone_rule->mh, 666 zone_restore_id, nat); 667 if (err) { 668 ct_dbg("Failed to create ct entry mod hdr"); 669 goto err_mod_hdr; 670 } 671 672 attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | 673 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 674 MLX5_FLOW_CONTEXT_ACTION_COUNT; 675 attr->dest_chain = 0; 676 attr->dest_ft = ct_priv->post_ct; 677 attr->fdb = nat ? ct_priv->ct_nat : ct_priv->ct; 678 attr->outer_match_level = MLX5_MATCH_L4; 679 attr->counter = entry->counter; 680 attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT; 681 682 mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule); 683 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 684 entry->tuple.zone & MLX5_CT_ZONE_MASK, 685 MLX5_CT_ZONE_MASK); 686 687 zone_rule->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); 688 if (IS_ERR(zone_rule->rule)) { 689 err = PTR_ERR(zone_rule->rule); 690 ct_dbg("Failed to add ct entry rule, nat: %d", nat); 691 goto err_rule; 692 } 693 694 kfree(spec); 695 ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone); 696 697 return 0; 698 699 err_rule: 700 mlx5e_mod_hdr_detach(ct_priv->esw->dev, 701 &esw->offloads.mod_hdr, zone_rule->mh); 702 err_mod_hdr: 703 kfree(spec); 704 return err; 705 } 706 707 static int 708 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv, 709 struct flow_rule *flow_rule, 710 struct mlx5_ct_entry *entry, 711 u8 zone_restore_id) 712 { 713 struct mlx5_eswitch *esw = ct_priv->esw; 714 int err; 715 716 entry->counter = mlx5_fc_create(esw->dev, true); 717 if (IS_ERR(entry->counter)) { 718 err = PTR_ERR(entry->counter); 719 ct_dbg("Failed to create counter for ct entry"); 720 return err; 721 } 722 723 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false, 724 zone_restore_id); 725 if (err) 726 goto err_orig; 727 728 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true, 729 zone_restore_id); 730 if (err) 731 goto err_nat; 732 733 return 0; 734 735 err_nat: 736 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); 737 err_orig: 738 mlx5_fc_destroy(esw->dev, entry->counter); 739 return err; 740 } 741 742 static int 743 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, 744 struct flow_cls_offload *flow) 745 { 746 struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow); 747 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; 748 struct flow_action_entry *meta_action; 749 unsigned long cookie = flow->cookie; 750 struct mlx5_ct_entry *entry; 751 int err; 752 753 meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule); 754 if (!meta_action) 755 return -EOPNOTSUPP; 756 757 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, 758 cts_ht_params); 759 if (entry) 760 return 0; 761 762 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 763 if (!entry) 764 return -ENOMEM; 765 766 entry->tuple.zone = ft->zone; 767 entry->cookie = flow->cookie; 768 entry->restore_cookie = meta_action->ct_metadata.cookie; 769 770 err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule); 771 if (err) 772 goto err_set; 773 774 memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple)); 775 err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule); 776 if (err) 777 goto err_set; 778 779 err = rhashtable_insert_fast(&ct_priv->ct_tuples_ht, 780 &entry->tuple_node, 781 tuples_ht_params); 782 if (err) 783 goto err_tuple; 784 785 if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) { 786 err = rhashtable_insert_fast(&ct_priv->ct_tuples_nat_ht, 787 &entry->tuple_nat_node, 788 tuples_nat_ht_params); 789 if (err) 790 goto err_tuple_nat; 791 } 792 793 err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry, 794 ft->zone_restore_id); 795 if (err) 796 goto err_rules; 797 798 err = rhashtable_insert_fast(&ft->ct_entries_ht, &entry->node, 799 cts_ht_params); 800 if (err) 801 goto err_insert; 802 803 return 0; 804 805 err_insert: 806 mlx5_tc_ct_entry_del_rules(ct_priv, entry); 807 err_rules: 808 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, 809 &entry->tuple_nat_node, tuples_nat_ht_params); 810 err_tuple_nat: 811 if (entry->tuple_node.next) 812 rhashtable_remove_fast(&ct_priv->ct_tuples_ht, 813 &entry->tuple_node, 814 tuples_ht_params); 815 err_tuple: 816 err_set: 817 kfree(entry); 818 netdev_warn(ct_priv->netdev, 819 "Failed to offload ct entry, err: %d\n", err); 820 return err; 821 } 822 823 static void 824 mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv, 825 struct mlx5_ct_entry *entry) 826 { 827 mlx5_tc_ct_entry_del_rules(ct_priv, entry); 828 if (entry->tuple_node.next) 829 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, 830 &entry->tuple_nat_node, 831 tuples_nat_ht_params); 832 rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node, 833 tuples_ht_params); 834 } 835 836 static int 837 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft, 838 struct flow_cls_offload *flow) 839 { 840 unsigned long cookie = flow->cookie; 841 struct mlx5_ct_entry *entry; 842 843 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, 844 cts_ht_params); 845 if (!entry) 846 return -ENOENT; 847 848 mlx5_tc_ct_del_ft_entry(ft->ct_priv, entry); 849 WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht, 850 &entry->node, 851 cts_ht_params)); 852 kfree(entry); 853 854 return 0; 855 } 856 857 static int 858 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft, 859 struct flow_cls_offload *f) 860 { 861 unsigned long cookie = f->cookie; 862 struct mlx5_ct_entry *entry; 863 u64 lastuse, packets, bytes; 864 865 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, 866 cts_ht_params); 867 if (!entry) 868 return -ENOENT; 869 870 mlx5_fc_query_cached(entry->counter, &bytes, &packets, &lastuse); 871 flow_stats_update(&f->stats, bytes, packets, 0, lastuse, 872 FLOW_ACTION_HW_STATS_DELAYED); 873 874 return 0; 875 } 876 877 static int 878 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data, 879 void *cb_priv) 880 { 881 struct flow_cls_offload *f = type_data; 882 struct mlx5_ct_ft *ft = cb_priv; 883 884 if (type != TC_SETUP_CLSFLOWER) 885 return -EOPNOTSUPP; 886 887 switch (f->command) { 888 case FLOW_CLS_REPLACE: 889 return mlx5_tc_ct_block_flow_offload_add(ft, f); 890 case FLOW_CLS_DESTROY: 891 return mlx5_tc_ct_block_flow_offload_del(ft, f); 892 case FLOW_CLS_STATS: 893 return mlx5_tc_ct_block_flow_offload_stats(ft, f); 894 default: 895 break; 896 } 897 898 return -EOPNOTSUPP; 899 } 900 901 static bool 902 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple, 903 u16 zone) 904 { 905 struct flow_keys flow_keys; 906 907 skb_reset_network_header(skb); 908 skb_flow_dissect_flow_keys(skb, &flow_keys, 0); 909 910 tuple->zone = zone; 911 912 if (flow_keys.basic.ip_proto != IPPROTO_TCP && 913 flow_keys.basic.ip_proto != IPPROTO_UDP) 914 return false; 915 916 tuple->port.src = flow_keys.ports.src; 917 tuple->port.dst = flow_keys.ports.dst; 918 tuple->n_proto = flow_keys.basic.n_proto; 919 tuple->ip_proto = flow_keys.basic.ip_proto; 920 921 switch (flow_keys.basic.n_proto) { 922 case htons(ETH_P_IP): 923 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 924 tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src; 925 tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst; 926 break; 927 928 case htons(ETH_P_IPV6): 929 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 930 tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src; 931 tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst; 932 break; 933 default: 934 goto out; 935 } 936 937 return true; 938 939 out: 940 return false; 941 } 942 943 int 944 mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv, 945 struct mlx5_flow_spec *spec) 946 { 947 u32 ctstate = 0, ctstate_mask = 0; 948 949 mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG, 950 &ctstate, &ctstate_mask); 951 if (ctstate_mask) 952 return -EOPNOTSUPP; 953 954 ctstate_mask |= MLX5_CT_STATE_TRK_BIT; 955 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, 956 ctstate, ctstate_mask); 957 958 return 0; 959 } 960 961 int 962 mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, 963 struct mlx5_flow_spec *spec, 964 struct flow_cls_offload *f, 965 struct mlx5_ct_attr *ct_attr, 966 struct netlink_ext_ack *extack) 967 { 968 struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); 969 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 970 struct flow_dissector_key_ct *mask, *key; 971 bool trk, est, untrk, unest, new; 972 u32 ctstate = 0, ctstate_mask = 0; 973 u16 ct_state_on, ct_state_off; 974 u16 ct_state, ct_state_mask; 975 struct flow_match_ct match; 976 u32 ct_labels[4]; 977 978 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) 979 return 0; 980 981 if (!ct_priv) { 982 NL_SET_ERR_MSG_MOD(extack, 983 "offload of ct matching isn't available"); 984 return -EOPNOTSUPP; 985 } 986 987 flow_rule_match_ct(rule, &match); 988 989 key = match.key; 990 mask = match.mask; 991 992 ct_state = key->ct_state; 993 ct_state_mask = mask->ct_state; 994 995 if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED | 996 TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED | 997 TCA_FLOWER_KEY_CT_FLAGS_NEW)) { 998 NL_SET_ERR_MSG_MOD(extack, 999 "only ct_state trk, est and new are supported for offload"); 1000 return -EOPNOTSUPP; 1001 } 1002 1003 ct_state_on = ct_state & ct_state_mask; 1004 ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask; 1005 trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; 1006 new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW; 1007 est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; 1008 untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; 1009 unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; 1010 1011 ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0; 1012 ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; 1013 ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0; 1014 ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; 1015 1016 if (new) { 1017 NL_SET_ERR_MSG_MOD(extack, 1018 "matching on ct_state +new isn't supported"); 1019 return -EOPNOTSUPP; 1020 } 1021 1022 if (mask->ct_zone) 1023 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 1024 key->ct_zone, MLX5_CT_ZONE_MASK); 1025 if (ctstate_mask) 1026 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, 1027 ctstate, ctstate_mask); 1028 if (mask->ct_mark) 1029 mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG, 1030 key->ct_mark, mask->ct_mark); 1031 if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] || 1032 mask->ct_labels[3]) { 1033 ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0]; 1034 ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1]; 1035 ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2]; 1036 ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3]; 1037 if (mapping_add(ct_priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id)) 1038 return -EOPNOTSUPP; 1039 mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id, 1040 MLX5_CT_LABELS_MASK); 1041 } 1042 1043 return 0; 1044 } 1045 1046 int 1047 mlx5_tc_ct_parse_action(struct mlx5e_priv *priv, 1048 struct mlx5_esw_flow_attr *attr, 1049 const struct flow_action_entry *act, 1050 struct netlink_ext_ack *extack) 1051 { 1052 struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); 1053 1054 if (!ct_priv) { 1055 NL_SET_ERR_MSG_MOD(extack, 1056 "offload of ct action isn't available"); 1057 return -EOPNOTSUPP; 1058 } 1059 1060 attr->ct_attr.zone = act->ct.zone; 1061 attr->ct_attr.ct_action = act->ct.action; 1062 attr->ct_attr.nf_ft = act->ct.flow_table; 1063 1064 return 0; 1065 } 1066 1067 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft, 1068 struct mlx5_tc_ct_pre *pre_ct, 1069 bool nat) 1070 { 1071 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; 1072 struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; 1073 struct mlx5_core_dev *dev = ct_priv->esw->dev; 1074 struct mlx5_flow_table *fdb = pre_ct->fdb; 1075 struct mlx5_flow_destination dest = {}; 1076 struct mlx5_flow_act flow_act = {}; 1077 struct mlx5_modify_hdr *mod_hdr; 1078 struct mlx5_flow_handle *rule; 1079 struct mlx5_flow_spec *spec; 1080 u32 ctstate; 1081 u16 zone; 1082 int err; 1083 1084 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 1085 if (!spec) 1086 return -ENOMEM; 1087 1088 zone = ct_ft->zone & MLX5_CT_ZONE_MASK; 1089 err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ZONE_TO_REG, zone); 1090 if (err) { 1091 ct_dbg("Failed to set zone register mapping"); 1092 goto err_mapping; 1093 } 1094 1095 mod_hdr = mlx5_modify_header_alloc(dev, 1096 MLX5_FLOW_NAMESPACE_FDB, 1097 pre_mod_acts.num_actions, 1098 pre_mod_acts.actions); 1099 1100 if (IS_ERR(mod_hdr)) { 1101 err = PTR_ERR(mod_hdr); 1102 ct_dbg("Failed to create pre ct mod hdr"); 1103 goto err_mapping; 1104 } 1105 pre_ct->modify_hdr = mod_hdr; 1106 1107 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 1108 MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1109 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; 1110 flow_act.modify_hdr = mod_hdr; 1111 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 1112 1113 /* add flow rule */ 1114 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 1115 zone, MLX5_CT_ZONE_MASK); 1116 ctstate = MLX5_CT_STATE_TRK_BIT; 1117 if (nat) 1118 ctstate |= MLX5_CT_STATE_NAT_BIT; 1119 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate); 1120 1121 dest.ft = ct_priv->post_ct; 1122 rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1); 1123 if (IS_ERR(rule)) { 1124 err = PTR_ERR(rule); 1125 ct_dbg("Failed to add pre ct flow rule zone %d", zone); 1126 goto err_flow_rule; 1127 } 1128 pre_ct->flow_rule = rule; 1129 1130 /* add miss rule */ 1131 memset(spec, 0, sizeof(*spec)); 1132 dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct; 1133 rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1); 1134 if (IS_ERR(rule)) { 1135 err = PTR_ERR(rule); 1136 ct_dbg("Failed to add pre ct miss rule zone %d", zone); 1137 goto err_miss_rule; 1138 } 1139 pre_ct->miss_rule = rule; 1140 1141 dealloc_mod_hdr_actions(&pre_mod_acts); 1142 kvfree(spec); 1143 return 0; 1144 1145 err_miss_rule: 1146 mlx5_del_flow_rules(pre_ct->flow_rule); 1147 err_flow_rule: 1148 mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); 1149 err_mapping: 1150 dealloc_mod_hdr_actions(&pre_mod_acts); 1151 kvfree(spec); 1152 return err; 1153 } 1154 1155 static void 1156 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft, 1157 struct mlx5_tc_ct_pre *pre_ct) 1158 { 1159 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; 1160 struct mlx5_core_dev *dev = ct_priv->esw->dev; 1161 1162 mlx5_del_flow_rules(pre_ct->flow_rule); 1163 mlx5_del_flow_rules(pre_ct->miss_rule); 1164 mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); 1165 } 1166 1167 static int 1168 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft, 1169 struct mlx5_tc_ct_pre *pre_ct, 1170 bool nat) 1171 { 1172 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); 1173 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; 1174 struct mlx5_core_dev *dev = ct_priv->esw->dev; 1175 struct mlx5_flow_table_attr ft_attr = {}; 1176 struct mlx5_flow_namespace *ns; 1177 struct mlx5_flow_table *ft; 1178 struct mlx5_flow_group *g; 1179 u32 metadata_reg_c_2_mask; 1180 u32 *flow_group_in; 1181 void *misc; 1182 int err; 1183 1184 ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); 1185 if (!ns) { 1186 err = -EOPNOTSUPP; 1187 ct_dbg("Failed to get FDB flow namespace"); 1188 return err; 1189 } 1190 1191 flow_group_in = kvzalloc(inlen, GFP_KERNEL); 1192 if (!flow_group_in) 1193 return -ENOMEM; 1194 1195 ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; 1196 ft_attr.prio = FDB_TC_OFFLOAD; 1197 ft_attr.max_fte = 2; 1198 ft_attr.level = 1; 1199 ft = mlx5_create_flow_table(ns, &ft_attr); 1200 if (IS_ERR(ft)) { 1201 err = PTR_ERR(ft); 1202 ct_dbg("Failed to create pre ct table"); 1203 goto out_free; 1204 } 1205 pre_ct->fdb = ft; 1206 1207 /* create flow group */ 1208 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); 1209 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); 1210 MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 1211 MLX5_MATCH_MISC_PARAMETERS_2); 1212 1213 misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, 1214 match_criteria.misc_parameters_2); 1215 1216 metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK; 1217 metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16); 1218 if (nat) 1219 metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16); 1220 1221 MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2, 1222 metadata_reg_c_2_mask); 1223 1224 g = mlx5_create_flow_group(ft, flow_group_in); 1225 if (IS_ERR(g)) { 1226 err = PTR_ERR(g); 1227 ct_dbg("Failed to create pre ct group"); 1228 goto err_flow_grp; 1229 } 1230 pre_ct->flow_grp = g; 1231 1232 /* create miss group */ 1233 memset(flow_group_in, 0, inlen); 1234 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); 1235 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); 1236 g = mlx5_create_flow_group(ft, flow_group_in); 1237 if (IS_ERR(g)) { 1238 err = PTR_ERR(g); 1239 ct_dbg("Failed to create pre ct miss group"); 1240 goto err_miss_grp; 1241 } 1242 pre_ct->miss_grp = g; 1243 1244 err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat); 1245 if (err) 1246 goto err_add_rules; 1247 1248 kvfree(flow_group_in); 1249 return 0; 1250 1251 err_add_rules: 1252 mlx5_destroy_flow_group(pre_ct->miss_grp); 1253 err_miss_grp: 1254 mlx5_destroy_flow_group(pre_ct->flow_grp); 1255 err_flow_grp: 1256 mlx5_destroy_flow_table(ft); 1257 out_free: 1258 kvfree(flow_group_in); 1259 return err; 1260 } 1261 1262 static void 1263 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft, 1264 struct mlx5_tc_ct_pre *pre_ct) 1265 { 1266 tc_ct_pre_ct_del_rules(ct_ft, pre_ct); 1267 mlx5_destroy_flow_group(pre_ct->miss_grp); 1268 mlx5_destroy_flow_group(pre_ct->flow_grp); 1269 mlx5_destroy_flow_table(pre_ct->fdb); 1270 } 1271 1272 static int 1273 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft) 1274 { 1275 int err; 1276 1277 err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false); 1278 if (err) 1279 return err; 1280 1281 err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true); 1282 if (err) 1283 goto err_pre_ct_nat; 1284 1285 return 0; 1286 1287 err_pre_ct_nat: 1288 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); 1289 return err; 1290 } 1291 1292 static void 1293 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft) 1294 { 1295 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat); 1296 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); 1297 } 1298 1299 static struct mlx5_ct_ft * 1300 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, 1301 struct nf_flowtable *nf_ft) 1302 { 1303 struct mlx5_ct_ft *ft; 1304 int err; 1305 1306 ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params); 1307 if (ft) { 1308 refcount_inc(&ft->refcount); 1309 return ft; 1310 } 1311 1312 ft = kzalloc(sizeof(*ft), GFP_KERNEL); 1313 if (!ft) 1314 return ERR_PTR(-ENOMEM); 1315 1316 err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id); 1317 if (err) 1318 goto err_mapping; 1319 1320 ft->zone = zone; 1321 ft->nf_ft = nf_ft; 1322 ft->ct_priv = ct_priv; 1323 refcount_set(&ft->refcount, 1); 1324 1325 err = mlx5_tc_ct_alloc_pre_ct_tables(ft); 1326 if (err) 1327 goto err_alloc_pre_ct; 1328 1329 err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params); 1330 if (err) 1331 goto err_init; 1332 1333 err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node, 1334 zone_params); 1335 if (err) 1336 goto err_insert; 1337 1338 err = nf_flow_table_offload_add_cb(ft->nf_ft, 1339 mlx5_tc_ct_block_flow_offload, ft); 1340 if (err) 1341 goto err_add_cb; 1342 1343 return ft; 1344 1345 err_add_cb: 1346 rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); 1347 err_insert: 1348 rhashtable_destroy(&ft->ct_entries_ht); 1349 err_init: 1350 mlx5_tc_ct_free_pre_ct_tables(ft); 1351 err_alloc_pre_ct: 1352 mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); 1353 err_mapping: 1354 kfree(ft); 1355 return ERR_PTR(err); 1356 } 1357 1358 static void 1359 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg) 1360 { 1361 struct mlx5_tc_ct_priv *ct_priv = arg; 1362 struct mlx5_ct_entry *entry = ptr; 1363 1364 mlx5_tc_ct_del_ft_entry(ct_priv, entry); 1365 kfree(entry); 1366 } 1367 1368 static void 1369 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) 1370 { 1371 if (!refcount_dec_and_test(&ft->refcount)) 1372 return; 1373 1374 nf_flow_table_offload_del_cb(ft->nf_ft, 1375 mlx5_tc_ct_block_flow_offload, ft); 1376 rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); 1377 rhashtable_free_and_destroy(&ft->ct_entries_ht, 1378 mlx5_tc_ct_flush_ft_entry, 1379 ct_priv); 1380 mlx5_tc_ct_free_pre_ct_tables(ft); 1381 mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); 1382 kfree(ft); 1383 } 1384 1385 /* We translate the tc filter with CT action to the following HW model: 1386 * 1387 * +---------------------+ 1388 * + fdb prio (tc chain) + 1389 * + original match + 1390 * +---------------------+ 1391 * | set chain miss mapping 1392 * | set fte_id 1393 * | set tunnel_id 1394 * | do decap 1395 * v 1396 * +---------------------+ 1397 * + pre_ct/pre_ct_nat + if matches +---------------------+ 1398 * + zone+nat match +---------------->+ post_ct (see below) + 1399 * +---------------------+ set zone +---------------------+ 1400 * | set zone 1401 * v 1402 * +--------------------+ 1403 * + CT (nat or no nat) + 1404 * + tuple + zone match + 1405 * +--------------------+ 1406 * | set mark 1407 * | set labels_id 1408 * | set established 1409 * | set zone_restore 1410 * | do nat (if needed) 1411 * v 1412 * +--------------+ 1413 * + post_ct + original filter actions 1414 * + fte_id match +------------------------> 1415 * +--------------+ 1416 */ 1417 static struct mlx5_flow_handle * 1418 __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, 1419 struct mlx5e_tc_flow *flow, 1420 struct mlx5_flow_spec *orig_spec, 1421 struct mlx5_esw_flow_attr *attr) 1422 { 1423 struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); 1424 bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; 1425 struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; 1426 struct mlx5_flow_spec *post_ct_spec = NULL; 1427 struct mlx5_eswitch *esw = ct_priv->esw; 1428 struct mlx5_esw_flow_attr *pre_ct_attr; 1429 struct mlx5_modify_hdr *mod_hdr; 1430 struct mlx5_flow_handle *rule; 1431 struct mlx5_ct_flow *ct_flow; 1432 int chain_mapping = 0, err; 1433 struct mlx5_ct_ft *ft; 1434 u32 fte_id = 1; 1435 1436 post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL); 1437 ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); 1438 if (!post_ct_spec || !ct_flow) { 1439 kfree(post_ct_spec); 1440 kfree(ct_flow); 1441 return ERR_PTR(-ENOMEM); 1442 } 1443 1444 /* Register for CT established events */ 1445 ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone, 1446 attr->ct_attr.nf_ft); 1447 if (IS_ERR(ft)) { 1448 err = PTR_ERR(ft); 1449 ct_dbg("Failed to register to ft callback"); 1450 goto err_ft; 1451 } 1452 ct_flow->ft = ft; 1453 1454 err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id, 1455 MLX5_FTE_ID_MAX, GFP_KERNEL); 1456 if (err) { 1457 netdev_warn(priv->netdev, 1458 "Failed to allocate fte id, err: %d\n", err); 1459 goto err_idr; 1460 } 1461 ct_flow->fte_id = fte_id; 1462 1463 /* Base esw attributes of both rules on original rule attribute */ 1464 pre_ct_attr = &ct_flow->pre_ct_attr; 1465 memcpy(pre_ct_attr, attr, sizeof(*attr)); 1466 memcpy(&ct_flow->post_ct_attr, attr, sizeof(*attr)); 1467 1468 /* Modify the original rule's action to fwd and modify, leave decap */ 1469 pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP; 1470 pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 1471 MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1472 1473 /* Write chain miss tag for miss in ct table as we 1474 * don't go though all prios of this chain as normal tc rules 1475 * miss. 1476 */ 1477 err = mlx5_esw_chains_get_chain_mapping(esw, attr->chain, 1478 &chain_mapping); 1479 if (err) { 1480 ct_dbg("Failed to get chain register mapping for chain"); 1481 goto err_get_chain; 1482 } 1483 ct_flow->chain_mapping = chain_mapping; 1484 1485 err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, 1486 CHAIN_TO_REG, chain_mapping); 1487 if (err) { 1488 ct_dbg("Failed to set chain register mapping"); 1489 goto err_mapping; 1490 } 1491 1492 err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, 1493 FTEID_TO_REG, fte_id); 1494 if (err) { 1495 ct_dbg("Failed to set fte_id register mapping"); 1496 goto err_mapping; 1497 } 1498 1499 /* If original flow is decap, we do it before going into ct table 1500 * so add a rewrite for the tunnel match_id. 1501 */ 1502 if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) && 1503 attr->chain == 0) { 1504 u32 tun_id = mlx5e_tc_get_flow_tun_id(flow); 1505 1506 err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, 1507 TUNNEL_TO_REG, 1508 tun_id); 1509 if (err) { 1510 ct_dbg("Failed to set tunnel register mapping"); 1511 goto err_mapping; 1512 } 1513 } 1514 1515 mod_hdr = mlx5_modify_header_alloc(esw->dev, 1516 MLX5_FLOW_NAMESPACE_FDB, 1517 pre_mod_acts.num_actions, 1518 pre_mod_acts.actions); 1519 if (IS_ERR(mod_hdr)) { 1520 err = PTR_ERR(mod_hdr); 1521 ct_dbg("Failed to create pre ct mod hdr"); 1522 goto err_mapping; 1523 } 1524 pre_ct_attr->modify_hdr = mod_hdr; 1525 1526 /* Post ct rule matches on fte_id and executes original rule's 1527 * tc rule action 1528 */ 1529 mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG, 1530 fte_id, MLX5_FTE_ID_MASK); 1531 1532 /* Put post_ct rule on post_ct fdb */ 1533 ct_flow->post_ct_attr.chain = 0; 1534 ct_flow->post_ct_attr.prio = 0; 1535 ct_flow->post_ct_attr.fdb = ct_priv->post_ct; 1536 1537 ct_flow->post_ct_attr.inner_match_level = MLX5_MATCH_NONE; 1538 ct_flow->post_ct_attr.outer_match_level = MLX5_MATCH_NONE; 1539 ct_flow->post_ct_attr.action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP); 1540 rule = mlx5_eswitch_add_offloaded_rule(esw, post_ct_spec, 1541 &ct_flow->post_ct_attr); 1542 ct_flow->post_ct_rule = rule; 1543 if (IS_ERR(ct_flow->post_ct_rule)) { 1544 err = PTR_ERR(ct_flow->post_ct_rule); 1545 ct_dbg("Failed to add post ct rule"); 1546 goto err_insert_post_ct; 1547 } 1548 1549 /* Change original rule point to ct table */ 1550 pre_ct_attr->dest_chain = 0; 1551 pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.fdb : ft->pre_ct.fdb; 1552 ct_flow->pre_ct_rule = mlx5_eswitch_add_offloaded_rule(esw, 1553 orig_spec, 1554 pre_ct_attr); 1555 if (IS_ERR(ct_flow->pre_ct_rule)) { 1556 err = PTR_ERR(ct_flow->pre_ct_rule); 1557 ct_dbg("Failed to add pre ct rule"); 1558 goto err_insert_orig; 1559 } 1560 1561 attr->ct_attr.ct_flow = ct_flow; 1562 dealloc_mod_hdr_actions(&pre_mod_acts); 1563 kfree(post_ct_spec); 1564 1565 return rule; 1566 1567 err_insert_orig: 1568 mlx5_eswitch_del_offloaded_rule(ct_priv->esw, ct_flow->post_ct_rule, 1569 &ct_flow->post_ct_attr); 1570 err_insert_post_ct: 1571 mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); 1572 err_mapping: 1573 dealloc_mod_hdr_actions(&pre_mod_acts); 1574 mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping); 1575 err_get_chain: 1576 idr_remove(&ct_priv->fte_ids, fte_id); 1577 err_idr: 1578 mlx5_tc_ct_del_ft_cb(ct_priv, ft); 1579 err_ft: 1580 kfree(post_ct_spec); 1581 kfree(ct_flow); 1582 netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err); 1583 return ERR_PTR(err); 1584 } 1585 1586 static struct mlx5_flow_handle * 1587 __mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv, 1588 struct mlx5_flow_spec *orig_spec, 1589 struct mlx5_esw_flow_attr *attr, 1590 struct mlx5e_tc_mod_hdr_acts *mod_acts) 1591 { 1592 struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); 1593 struct mlx5_eswitch *esw = ct_priv->esw; 1594 struct mlx5_esw_flow_attr *pre_ct_attr; 1595 struct mlx5_modify_hdr *mod_hdr; 1596 struct mlx5_flow_handle *rule; 1597 struct mlx5_ct_flow *ct_flow; 1598 int err; 1599 1600 ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); 1601 if (!ct_flow) 1602 return ERR_PTR(-ENOMEM); 1603 1604 /* Base esw attributes on original rule attribute */ 1605 pre_ct_attr = &ct_flow->pre_ct_attr; 1606 memcpy(pre_ct_attr, attr, sizeof(*attr)); 1607 1608 err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0); 1609 if (err) { 1610 ct_dbg("Failed to set register for ct clear"); 1611 goto err_set_registers; 1612 } 1613 1614 mod_hdr = mlx5_modify_header_alloc(esw->dev, 1615 MLX5_FLOW_NAMESPACE_FDB, 1616 mod_acts->num_actions, 1617 mod_acts->actions); 1618 if (IS_ERR(mod_hdr)) { 1619 err = PTR_ERR(mod_hdr); 1620 ct_dbg("Failed to add create ct clear mod hdr"); 1621 goto err_set_registers; 1622 } 1623 1624 dealloc_mod_hdr_actions(mod_acts); 1625 pre_ct_attr->modify_hdr = mod_hdr; 1626 pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1627 1628 rule = mlx5_eswitch_add_offloaded_rule(esw, orig_spec, pre_ct_attr); 1629 if (IS_ERR(rule)) { 1630 err = PTR_ERR(rule); 1631 ct_dbg("Failed to add ct clear rule"); 1632 goto err_insert; 1633 } 1634 1635 attr->ct_attr.ct_flow = ct_flow; 1636 ct_flow->pre_ct_rule = rule; 1637 return rule; 1638 1639 err_insert: 1640 mlx5_modify_header_dealloc(priv->mdev, mod_hdr); 1641 err_set_registers: 1642 netdev_warn(priv->netdev, 1643 "Failed to offload ct clear flow, err %d\n", err); 1644 return ERR_PTR(err); 1645 } 1646 1647 struct mlx5_flow_handle * 1648 mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, 1649 struct mlx5e_tc_flow *flow, 1650 struct mlx5_flow_spec *spec, 1651 struct mlx5_esw_flow_attr *attr, 1652 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) 1653 { 1654 bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; 1655 struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); 1656 struct mlx5_flow_handle *rule; 1657 1658 if (!ct_priv) 1659 return ERR_PTR(-EOPNOTSUPP); 1660 1661 mutex_lock(&ct_priv->control_lock); 1662 1663 if (clear_action) 1664 rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts); 1665 else 1666 rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr); 1667 mutex_unlock(&ct_priv->control_lock); 1668 1669 return rule; 1670 } 1671 1672 static void 1673 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, 1674 struct mlx5_ct_flow *ct_flow) 1675 { 1676 struct mlx5_esw_flow_attr *pre_ct_attr = &ct_flow->pre_ct_attr; 1677 struct mlx5_eswitch *esw = ct_priv->esw; 1678 1679 mlx5_eswitch_del_offloaded_rule(esw, ct_flow->pre_ct_rule, 1680 pre_ct_attr); 1681 mlx5_modify_header_dealloc(esw->dev, pre_ct_attr->modify_hdr); 1682 1683 if (ct_flow->post_ct_rule) { 1684 mlx5_eswitch_del_offloaded_rule(esw, ct_flow->post_ct_rule, 1685 &ct_flow->post_ct_attr); 1686 mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping); 1687 idr_remove(&ct_priv->fte_ids, ct_flow->fte_id); 1688 mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft); 1689 } 1690 1691 kfree(ct_flow); 1692 } 1693 1694 void 1695 mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, 1696 struct mlx5_esw_flow_attr *attr) 1697 { 1698 struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); 1699 struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow; 1700 1701 /* We are called on error to clean up stuff from parsing 1702 * but we don't have anything for now 1703 */ 1704 if (!ct_flow) 1705 return; 1706 1707 mutex_lock(&ct_priv->control_lock); 1708 __mlx5_tc_ct_delete_flow(ct_priv, ct_flow); 1709 mutex_unlock(&ct_priv->control_lock); 1710 } 1711 1712 static int 1713 mlx5_tc_ct_init_check_support(struct mlx5_eswitch *esw, 1714 const char **err_msg) 1715 { 1716 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) 1717 /* cannot restore chain ID on HW miss */ 1718 1719 *err_msg = "tc skb extension missing"; 1720 return -EOPNOTSUPP; 1721 #endif 1722 1723 if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) { 1724 *err_msg = "firmware level support is missing"; 1725 return -EOPNOTSUPP; 1726 } 1727 1728 if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) { 1729 /* vlan workaround should be avoided for multi chain rules. 1730 * This is just a sanity check as pop vlan action should 1731 * be supported by any FW that supports ignore_flow_level 1732 */ 1733 1734 *err_msg = "firmware vlan actions support is missing"; 1735 return -EOPNOTSUPP; 1736 } 1737 1738 if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, 1739 fdb_modify_header_fwd_to_table)) { 1740 /* CT always writes to registers which are mod header actions. 1741 * Therefore, mod header and goto is required 1742 */ 1743 1744 *err_msg = "firmware fwd and modify support is missing"; 1745 return -EOPNOTSUPP; 1746 } 1747 1748 if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) { 1749 *err_msg = "register loopback isn't supported"; 1750 return -EOPNOTSUPP; 1751 } 1752 1753 return 0; 1754 } 1755 1756 static void 1757 mlx5_tc_ct_init_err(struct mlx5e_rep_priv *rpriv, const char *msg, int err) 1758 { 1759 if (msg) 1760 netdev_warn(rpriv->netdev, 1761 "tc ct offload not supported, %s, err: %d\n", 1762 msg, err); 1763 else 1764 netdev_warn(rpriv->netdev, 1765 "tc ct offload not supported, err: %d\n", 1766 err); 1767 } 1768 1769 int 1770 mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv) 1771 { 1772 struct mlx5_tc_ct_priv *ct_priv; 1773 struct mlx5e_rep_priv *rpriv; 1774 struct mlx5_eswitch *esw; 1775 struct mlx5e_priv *priv; 1776 const char *msg; 1777 int err; 1778 1779 rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); 1780 priv = netdev_priv(rpriv->netdev); 1781 esw = priv->mdev->priv.eswitch; 1782 1783 err = mlx5_tc_ct_init_check_support(esw, &msg); 1784 if (err) { 1785 mlx5_tc_ct_init_err(rpriv, msg, err); 1786 goto err_support; 1787 } 1788 1789 ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL); 1790 if (!ct_priv) { 1791 mlx5_tc_ct_init_err(rpriv, NULL, -ENOMEM); 1792 goto err_alloc; 1793 } 1794 1795 ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true); 1796 if (IS_ERR(ct_priv->zone_mapping)) { 1797 err = PTR_ERR(ct_priv->zone_mapping); 1798 goto err_mapping_zone; 1799 } 1800 1801 ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true); 1802 if (IS_ERR(ct_priv->labels_mapping)) { 1803 err = PTR_ERR(ct_priv->labels_mapping); 1804 goto err_mapping_labels; 1805 } 1806 1807 ct_priv->esw = esw; 1808 ct_priv->netdev = rpriv->netdev; 1809 ct_priv->ct = mlx5_esw_chains_create_global_table(esw); 1810 if (IS_ERR(ct_priv->ct)) { 1811 err = PTR_ERR(ct_priv->ct); 1812 mlx5_tc_ct_init_err(rpriv, "failed to create ct table", err); 1813 goto err_ct_tbl; 1814 } 1815 1816 ct_priv->ct_nat = mlx5_esw_chains_create_global_table(esw); 1817 if (IS_ERR(ct_priv->ct_nat)) { 1818 err = PTR_ERR(ct_priv->ct_nat); 1819 mlx5_tc_ct_init_err(rpriv, "failed to create ct nat table", 1820 err); 1821 goto err_ct_nat_tbl; 1822 } 1823 1824 ct_priv->post_ct = mlx5_esw_chains_create_global_table(esw); 1825 if (IS_ERR(ct_priv->post_ct)) { 1826 err = PTR_ERR(ct_priv->post_ct); 1827 mlx5_tc_ct_init_err(rpriv, "failed to create post ct table", 1828 err); 1829 goto err_post_ct_tbl; 1830 } 1831 1832 idr_init(&ct_priv->fte_ids); 1833 mutex_init(&ct_priv->control_lock); 1834 rhashtable_init(&ct_priv->zone_ht, &zone_params); 1835 rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params); 1836 rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params); 1837 1838 /* Done, set ct_priv to know it initializted */ 1839 uplink_priv->ct_priv = ct_priv; 1840 1841 return 0; 1842 1843 err_post_ct_tbl: 1844 mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct_nat); 1845 err_ct_nat_tbl: 1846 mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct); 1847 err_ct_tbl: 1848 mapping_destroy(ct_priv->labels_mapping); 1849 err_mapping_labels: 1850 mapping_destroy(ct_priv->zone_mapping); 1851 err_mapping_zone: 1852 kfree(ct_priv); 1853 err_alloc: 1854 err_support: 1855 1856 return 0; 1857 } 1858 1859 void 1860 mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv) 1861 { 1862 struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv; 1863 1864 if (!ct_priv) 1865 return; 1866 1867 mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->post_ct); 1868 mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct_nat); 1869 mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct); 1870 mapping_destroy(ct_priv->zone_mapping); 1871 mapping_destroy(ct_priv->labels_mapping); 1872 1873 rhashtable_destroy(&ct_priv->ct_tuples_ht); 1874 rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); 1875 rhashtable_destroy(&ct_priv->zone_ht); 1876 mutex_destroy(&ct_priv->control_lock); 1877 idr_destroy(&ct_priv->fte_ids); 1878 kfree(ct_priv); 1879 1880 uplink_priv->ct_priv = NULL; 1881 } 1882 1883 bool 1884 mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv, 1885 struct sk_buff *skb, u8 zone_restore_id) 1886 { 1887 struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv; 1888 struct mlx5_ct_tuple tuple = {}; 1889 struct mlx5_ct_entry *entry; 1890 u16 zone; 1891 1892 if (!ct_priv || !zone_restore_id) 1893 return true; 1894 1895 if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone)) 1896 return false; 1897 1898 if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone)) 1899 return false; 1900 1901 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &tuple, 1902 tuples_ht_params); 1903 if (!entry) 1904 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht, 1905 &tuple, tuples_nat_ht_params); 1906 if (!entry) 1907 return false; 1908 1909 tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie); 1910 return true; 1911 } 1912