1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2019 Mellanox Technologies. */ 3 4 #include <net/netfilter/nf_conntrack.h> 5 #include <net/netfilter/nf_conntrack_core.h> 6 #include <net/netfilter/nf_conntrack_zones.h> 7 #include <net/netfilter/nf_conntrack_labels.h> 8 #include <net/netfilter/nf_conntrack_helper.h> 9 #include <net/netfilter/nf_conntrack_acct.h> 10 #include <uapi/linux/tc_act/tc_pedit.h> 11 #include <net/tc_act/tc_ct.h> 12 #include <net/flow_offload.h> 13 #include <net/netfilter/nf_flow_table.h> 14 #include <linux/workqueue.h> 15 #include <linux/xarray.h> 16 17 #include "esw/chains.h" 18 #include "en/tc_ct.h" 19 #include "en/mod_hdr.h" 20 #include "en/mapping.h" 21 #include "en.h" 22 #include "en_tc.h" 23 #include "en_rep.h" 24 25 #define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen * 8) 26 #define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0) 27 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1) 28 #define MLX5_CT_STATE_TRK_BIT BIT(2) 29 #define MLX5_CT_STATE_NAT_BIT BIT(3) 30 31 #define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8) 32 #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0) 33 #define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX 34 35 #define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen * 8) 36 #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0) 37 38 #define ct_dbg(fmt, args...)\ 39 netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args) 40 41 struct mlx5_tc_ct_priv { 42 struct mlx5_eswitch *esw; 43 const struct net_device *netdev; 44 struct idr fte_ids; 45 struct xarray tuple_ids; 46 struct rhashtable zone_ht; 47 struct rhashtable ct_tuples_ht; 48 struct rhashtable ct_tuples_nat_ht; 49 struct mlx5_flow_table *ct; 50 struct mlx5_flow_table *ct_nat; 51 struct mlx5_flow_table *post_ct; 52 struct mutex control_lock; /* guards parallel adds/dels */ 53 struct mapping_ctx *zone_mapping; 54 struct mapping_ctx *labels_mapping; 55 }; 56 57 struct mlx5_ct_flow { 58 struct mlx5_esw_flow_attr pre_ct_attr; 59 struct mlx5_esw_flow_attr post_ct_attr; 60 struct mlx5_flow_handle *pre_ct_rule; 61 struct mlx5_flow_handle *post_ct_rule; 62 struct mlx5_ct_ft *ft; 63 u32 fte_id; 64 u32 chain_mapping; 65 }; 66 67 struct mlx5_ct_zone_rule { 68 struct mlx5_flow_handle *rule; 69 struct mlx5e_mod_hdr_handle *mh; 70 struct mlx5_esw_flow_attr attr; 71 bool nat; 72 }; 73 74 struct mlx5_tc_ct_pre { 75 struct mlx5_flow_table *fdb; 76 struct mlx5_flow_group *flow_grp; 77 struct mlx5_flow_group *miss_grp; 78 struct mlx5_flow_handle *flow_rule; 79 struct mlx5_flow_handle *miss_rule; 80 struct mlx5_modify_hdr *modify_hdr; 81 }; 82 83 struct mlx5_ct_ft { 84 struct rhash_head node; 85 u16 zone; 86 u32 zone_restore_id; 87 refcount_t refcount; 88 struct nf_flowtable *nf_ft; 89 struct mlx5_tc_ct_priv *ct_priv; 90 struct rhashtable ct_entries_ht; 91 struct mlx5_tc_ct_pre pre_ct; 92 struct mlx5_tc_ct_pre pre_ct_nat; 93 }; 94 95 struct mlx5_ct_tuple { 96 u16 addr_type; 97 __be16 n_proto; 98 u8 ip_proto; 99 struct { 100 union { 101 __be32 src_v4; 102 struct in6_addr src_v6; 103 }; 104 union { 105 __be32 dst_v4; 106 struct in6_addr dst_v6; 107 }; 108 } ip; 109 struct { 110 __be16 src; 111 __be16 dst; 112 } port; 113 114 u16 zone; 115 }; 116 117 struct mlx5_ct_entry { 118 struct rhash_head node; 119 struct rhash_head tuple_node; 120 struct rhash_head tuple_nat_node; 121 struct mlx5_fc *counter; 122 unsigned long cookie; 123 unsigned long restore_cookie; 124 struct mlx5_ct_tuple tuple; 125 struct mlx5_ct_tuple tuple_nat; 126 struct mlx5_ct_zone_rule zone_rules[2]; 127 }; 128 129 static const struct rhashtable_params cts_ht_params = { 130 .head_offset = offsetof(struct mlx5_ct_entry, node), 131 .key_offset = offsetof(struct mlx5_ct_entry, cookie), 132 .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie), 133 .automatic_shrinking = true, 134 .min_size = 16 * 1024, 135 }; 136 137 static const struct rhashtable_params zone_params = { 138 .head_offset = offsetof(struct mlx5_ct_ft, node), 139 .key_offset = offsetof(struct mlx5_ct_ft, zone), 140 .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone), 141 .automatic_shrinking = true, 142 }; 143 144 static const struct rhashtable_params tuples_ht_params = { 145 .head_offset = offsetof(struct mlx5_ct_entry, tuple_node), 146 .key_offset = offsetof(struct mlx5_ct_entry, tuple), 147 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple), 148 .automatic_shrinking = true, 149 .min_size = 16 * 1024, 150 }; 151 152 static const struct rhashtable_params tuples_nat_ht_params = { 153 .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node), 154 .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat), 155 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat), 156 .automatic_shrinking = true, 157 .min_size = 16 * 1024, 158 }; 159 160 static struct mlx5_tc_ct_priv * 161 mlx5_tc_ct_get_ct_priv(struct mlx5e_priv *priv) 162 { 163 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 164 struct mlx5_rep_uplink_priv *uplink_priv; 165 struct mlx5e_rep_priv *uplink_rpriv; 166 167 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 168 uplink_priv = &uplink_rpriv->uplink_priv; 169 return uplink_priv->ct_priv; 170 } 171 172 static int 173 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule) 174 { 175 struct flow_match_control control; 176 struct flow_match_basic basic; 177 178 flow_rule_match_basic(rule, &basic); 179 flow_rule_match_control(rule, &control); 180 181 tuple->n_proto = basic.key->n_proto; 182 tuple->ip_proto = basic.key->ip_proto; 183 tuple->addr_type = control.key->addr_type; 184 185 if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 186 struct flow_match_ipv4_addrs match; 187 188 flow_rule_match_ipv4_addrs(rule, &match); 189 tuple->ip.src_v4 = match.key->src; 190 tuple->ip.dst_v4 = match.key->dst; 191 } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 192 struct flow_match_ipv6_addrs match; 193 194 flow_rule_match_ipv6_addrs(rule, &match); 195 tuple->ip.src_v6 = match.key->src; 196 tuple->ip.dst_v6 = match.key->dst; 197 } else { 198 return -EOPNOTSUPP; 199 } 200 201 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { 202 struct flow_match_ports match; 203 204 flow_rule_match_ports(rule, &match); 205 switch (tuple->ip_proto) { 206 case IPPROTO_TCP: 207 case IPPROTO_UDP: 208 tuple->port.src = match.key->src; 209 tuple->port.dst = match.key->dst; 210 break; 211 default: 212 return -EOPNOTSUPP; 213 } 214 } else { 215 return -EOPNOTSUPP; 216 } 217 218 return 0; 219 } 220 221 static int 222 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple, 223 struct flow_rule *rule) 224 { 225 struct flow_action *flow_action = &rule->action; 226 struct flow_action_entry *act; 227 u32 offset, val, ip6_offset; 228 int i; 229 230 flow_action_for_each(i, act, flow_action) { 231 if (act->id != FLOW_ACTION_MANGLE) 232 continue; 233 234 offset = act->mangle.offset; 235 val = act->mangle.val; 236 switch (act->mangle.htype) { 237 case FLOW_ACT_MANGLE_HDR_TYPE_IP4: 238 if (offset == offsetof(struct iphdr, saddr)) 239 tuple->ip.src_v4 = cpu_to_be32(val); 240 else if (offset == offsetof(struct iphdr, daddr)) 241 tuple->ip.dst_v4 = cpu_to_be32(val); 242 else 243 return -EOPNOTSUPP; 244 break; 245 246 case FLOW_ACT_MANGLE_HDR_TYPE_IP6: 247 ip6_offset = (offset - offsetof(struct ipv6hdr, saddr)); 248 ip6_offset /= 4; 249 if (ip6_offset < 4) 250 tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val); 251 else if (ip6_offset < 8) 252 tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val); 253 else 254 return -EOPNOTSUPP; 255 break; 256 257 case FLOW_ACT_MANGLE_HDR_TYPE_TCP: 258 if (offset == offsetof(struct tcphdr, source)) 259 tuple->port.src = cpu_to_be16(val); 260 else if (offset == offsetof(struct tcphdr, dest)) 261 tuple->port.dst = cpu_to_be16(val); 262 else 263 return -EOPNOTSUPP; 264 break; 265 266 case FLOW_ACT_MANGLE_HDR_TYPE_UDP: 267 if (offset == offsetof(struct udphdr, source)) 268 tuple->port.src = cpu_to_be16(val); 269 else if (offset == offsetof(struct udphdr, dest)) 270 tuple->port.dst = cpu_to_be16(val); 271 else 272 return -EOPNOTSUPP; 273 break; 274 275 default: 276 return -EOPNOTSUPP; 277 } 278 } 279 280 return 0; 281 } 282 283 static int 284 mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, 285 struct flow_rule *rule) 286 { 287 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 288 outer_headers); 289 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, 290 outer_headers); 291 u16 addr_type = 0; 292 u8 ip_proto = 0; 293 294 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { 295 struct flow_match_basic match; 296 297 flow_rule_match_basic(rule, &match); 298 299 mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c, 300 headers_v); 301 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 302 match.mask->ip_proto); 303 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, 304 match.key->ip_proto); 305 306 ip_proto = match.key->ip_proto; 307 } 308 309 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { 310 struct flow_match_control match; 311 312 flow_rule_match_control(rule, &match); 313 addr_type = match.key->addr_type; 314 } 315 316 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 317 struct flow_match_ipv4_addrs match; 318 319 flow_rule_match_ipv4_addrs(rule, &match); 320 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 321 src_ipv4_src_ipv6.ipv4_layout.ipv4), 322 &match.mask->src, sizeof(match.mask->src)); 323 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 324 src_ipv4_src_ipv6.ipv4_layout.ipv4), 325 &match.key->src, sizeof(match.key->src)); 326 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 327 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 328 &match.mask->dst, sizeof(match.mask->dst)); 329 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 330 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 331 &match.key->dst, sizeof(match.key->dst)); 332 } 333 334 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 335 struct flow_match_ipv6_addrs match; 336 337 flow_rule_match_ipv6_addrs(rule, &match); 338 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 339 src_ipv4_src_ipv6.ipv6_layout.ipv6), 340 &match.mask->src, sizeof(match.mask->src)); 341 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 342 src_ipv4_src_ipv6.ipv6_layout.ipv6), 343 &match.key->src, sizeof(match.key->src)); 344 345 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 346 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 347 &match.mask->dst, sizeof(match.mask->dst)); 348 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 349 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 350 &match.key->dst, sizeof(match.key->dst)); 351 } 352 353 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { 354 struct flow_match_ports match; 355 356 flow_rule_match_ports(rule, &match); 357 switch (ip_proto) { 358 case IPPROTO_TCP: 359 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 360 tcp_sport, ntohs(match.mask->src)); 361 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 362 tcp_sport, ntohs(match.key->src)); 363 364 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 365 tcp_dport, ntohs(match.mask->dst)); 366 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 367 tcp_dport, ntohs(match.key->dst)); 368 break; 369 370 case IPPROTO_UDP: 371 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 372 udp_sport, ntohs(match.mask->src)); 373 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 374 udp_sport, ntohs(match.key->src)); 375 376 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 377 udp_dport, ntohs(match.mask->dst)); 378 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 379 udp_dport, ntohs(match.key->dst)); 380 break; 381 default: 382 break; 383 } 384 } 385 386 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { 387 struct flow_match_tcp match; 388 389 flow_rule_match_tcp(rule, &match); 390 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, 391 ntohs(match.mask->flags)); 392 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, 393 ntohs(match.key->flags)); 394 } 395 396 return 0; 397 } 398 399 static void 400 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv, 401 struct mlx5_ct_entry *entry, 402 bool nat) 403 { 404 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; 405 struct mlx5_esw_flow_attr *attr = &zone_rule->attr; 406 struct mlx5_eswitch *esw = ct_priv->esw; 407 408 ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone); 409 410 mlx5_eswitch_del_offloaded_rule(esw, zone_rule->rule, attr); 411 mlx5e_mod_hdr_detach(ct_priv->esw->dev, 412 &esw->offloads.mod_hdr, zone_rule->mh); 413 mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); 414 } 415 416 static void 417 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv, 418 struct mlx5_ct_entry *entry) 419 { 420 mlx5_tc_ct_entry_del_rule(ct_priv, entry, true); 421 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); 422 423 mlx5_fc_destroy(ct_priv->esw->dev, entry->counter); 424 } 425 426 static struct flow_action_entry * 427 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule) 428 { 429 struct flow_action *flow_action = &flow_rule->action; 430 struct flow_action_entry *act; 431 int i; 432 433 flow_action_for_each(i, act, flow_action) { 434 if (act->id == FLOW_ACTION_CT_METADATA) 435 return act; 436 } 437 438 return NULL; 439 } 440 441 static int 442 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv, 443 struct mlx5e_tc_mod_hdr_acts *mod_acts, 444 u8 ct_state, 445 u32 mark, 446 u32 labels_id, 447 u8 zone_restore_id) 448 { 449 struct mlx5_eswitch *esw = ct_priv->esw; 450 int err; 451 452 err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, 453 CTSTATE_TO_REG, ct_state); 454 if (err) 455 return err; 456 457 err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, 458 MARK_TO_REG, mark); 459 if (err) 460 return err; 461 462 err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, 463 LABELS_TO_REG, labels_id); 464 if (err) 465 return err; 466 467 err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, 468 ZONE_RESTORE_TO_REG, zone_restore_id); 469 if (err) 470 return err; 471 472 return 0; 473 } 474 475 static int 476 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act, 477 char *modact) 478 { 479 u32 offset = act->mangle.offset, field; 480 481 switch (act->mangle.htype) { 482 case FLOW_ACT_MANGLE_HDR_TYPE_IP4: 483 MLX5_SET(set_action_in, modact, length, 0); 484 if (offset == offsetof(struct iphdr, saddr)) 485 field = MLX5_ACTION_IN_FIELD_OUT_SIPV4; 486 else if (offset == offsetof(struct iphdr, daddr)) 487 field = MLX5_ACTION_IN_FIELD_OUT_DIPV4; 488 else 489 return -EOPNOTSUPP; 490 break; 491 492 case FLOW_ACT_MANGLE_HDR_TYPE_IP6: 493 MLX5_SET(set_action_in, modact, length, 0); 494 if (offset == offsetof(struct ipv6hdr, saddr) + 12) 495 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0; 496 else if (offset == offsetof(struct ipv6hdr, saddr) + 8) 497 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32; 498 else if (offset == offsetof(struct ipv6hdr, saddr) + 4) 499 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64; 500 else if (offset == offsetof(struct ipv6hdr, saddr)) 501 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96; 502 else if (offset == offsetof(struct ipv6hdr, daddr) + 12) 503 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0; 504 else if (offset == offsetof(struct ipv6hdr, daddr) + 8) 505 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32; 506 else if (offset == offsetof(struct ipv6hdr, daddr) + 4) 507 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64; 508 else if (offset == offsetof(struct ipv6hdr, daddr)) 509 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96; 510 else 511 return -EOPNOTSUPP; 512 break; 513 514 case FLOW_ACT_MANGLE_HDR_TYPE_TCP: 515 MLX5_SET(set_action_in, modact, length, 16); 516 if (offset == offsetof(struct tcphdr, source)) 517 field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT; 518 else if (offset == offsetof(struct tcphdr, dest)) 519 field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT; 520 else 521 return -EOPNOTSUPP; 522 break; 523 524 case FLOW_ACT_MANGLE_HDR_TYPE_UDP: 525 MLX5_SET(set_action_in, modact, length, 16); 526 if (offset == offsetof(struct udphdr, source)) 527 field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT; 528 else if (offset == offsetof(struct udphdr, dest)) 529 field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT; 530 else 531 return -EOPNOTSUPP; 532 break; 533 534 default: 535 return -EOPNOTSUPP; 536 } 537 538 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); 539 MLX5_SET(set_action_in, modact, offset, 0); 540 MLX5_SET(set_action_in, modact, field, field); 541 MLX5_SET(set_action_in, modact, data, act->mangle.val); 542 543 return 0; 544 } 545 546 static int 547 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv, 548 struct flow_rule *flow_rule, 549 struct mlx5e_tc_mod_hdr_acts *mod_acts) 550 { 551 struct flow_action *flow_action = &flow_rule->action; 552 struct mlx5_core_dev *mdev = ct_priv->esw->dev; 553 struct flow_action_entry *act; 554 size_t action_size; 555 char *modact; 556 int err, i; 557 558 action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto); 559 560 flow_action_for_each(i, act, flow_action) { 561 switch (act->id) { 562 case FLOW_ACTION_MANGLE: { 563 err = alloc_mod_hdr_actions(mdev, 564 MLX5_FLOW_NAMESPACE_FDB, 565 mod_acts); 566 if (err) 567 return err; 568 569 modact = mod_acts->actions + 570 mod_acts->num_actions * action_size; 571 572 err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact); 573 if (err) 574 return err; 575 576 mod_acts->num_actions++; 577 } 578 break; 579 580 case FLOW_ACTION_CT_METADATA: 581 /* Handled earlier */ 582 continue; 583 default: 584 return -EOPNOTSUPP; 585 } 586 } 587 588 return 0; 589 } 590 591 static int 592 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, 593 struct mlx5_esw_flow_attr *attr, 594 struct flow_rule *flow_rule, 595 struct mlx5e_mod_hdr_handle **mh, 596 u8 zone_restore_id, bool nat) 597 { 598 struct mlx5e_tc_mod_hdr_acts mod_acts = {}; 599 struct flow_action_entry *meta; 600 u16 ct_state = 0; 601 int err; 602 603 meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule); 604 if (!meta) 605 return -EOPNOTSUPP; 606 607 err = mapping_add(ct_priv->labels_mapping, meta->ct_metadata.labels, 608 &attr->ct_attr.ct_labels_id); 609 if (err) 610 return -EOPNOTSUPP; 611 if (nat) { 612 err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, 613 &mod_acts); 614 if (err) 615 goto err_mapping; 616 617 ct_state |= MLX5_CT_STATE_NAT_BIT; 618 } 619 620 ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT; 621 err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts, 622 ct_state, 623 meta->ct_metadata.mark, 624 attr->ct_attr.ct_labels_id, 625 zone_restore_id); 626 if (err) 627 goto err_mapping; 628 629 *mh = mlx5e_mod_hdr_attach(ct_priv->esw->dev, 630 &ct_priv->esw->offloads.mod_hdr, 631 MLX5_FLOW_NAMESPACE_FDB, 632 &mod_acts); 633 if (IS_ERR(*mh)) { 634 err = PTR_ERR(*mh); 635 goto err_mapping; 636 } 637 attr->modify_hdr = mlx5e_mod_hdr_get(*mh); 638 639 dealloc_mod_hdr_actions(&mod_acts); 640 return 0; 641 642 err_mapping: 643 dealloc_mod_hdr_actions(&mod_acts); 644 mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); 645 return err; 646 } 647 648 static int 649 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, 650 struct flow_rule *flow_rule, 651 struct mlx5_ct_entry *entry, 652 bool nat, u8 zone_restore_id) 653 { 654 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; 655 struct mlx5_esw_flow_attr *attr = &zone_rule->attr; 656 struct mlx5_eswitch *esw = ct_priv->esw; 657 struct mlx5_flow_spec *spec = NULL; 658 int err; 659 660 zone_rule->nat = nat; 661 662 spec = kzalloc(sizeof(*spec), GFP_KERNEL); 663 if (!spec) 664 return -ENOMEM; 665 666 err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, 667 &zone_rule->mh, 668 zone_restore_id, nat); 669 if (err) { 670 ct_dbg("Failed to create ct entry mod hdr"); 671 goto err_mod_hdr; 672 } 673 674 attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | 675 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 676 MLX5_FLOW_CONTEXT_ACTION_COUNT; 677 attr->dest_chain = 0; 678 attr->dest_ft = ct_priv->post_ct; 679 attr->fdb = nat ? ct_priv->ct_nat : ct_priv->ct; 680 attr->outer_match_level = MLX5_MATCH_L4; 681 attr->counter = entry->counter; 682 attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT; 683 684 mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule); 685 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 686 entry->tuple.zone & MLX5_CT_ZONE_MASK, 687 MLX5_CT_ZONE_MASK); 688 689 zone_rule->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); 690 if (IS_ERR(zone_rule->rule)) { 691 err = PTR_ERR(zone_rule->rule); 692 ct_dbg("Failed to add ct entry rule, nat: %d", nat); 693 goto err_rule; 694 } 695 696 kfree(spec); 697 ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone); 698 699 return 0; 700 701 err_rule: 702 mlx5e_mod_hdr_detach(ct_priv->esw->dev, 703 &esw->offloads.mod_hdr, zone_rule->mh); 704 mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); 705 err_mod_hdr: 706 kfree(spec); 707 return err; 708 } 709 710 static int 711 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv, 712 struct flow_rule *flow_rule, 713 struct mlx5_ct_entry *entry, 714 u8 zone_restore_id) 715 { 716 struct mlx5_eswitch *esw = ct_priv->esw; 717 int err; 718 719 entry->counter = mlx5_fc_create(esw->dev, true); 720 if (IS_ERR(entry->counter)) { 721 err = PTR_ERR(entry->counter); 722 ct_dbg("Failed to create counter for ct entry"); 723 return err; 724 } 725 726 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false, 727 zone_restore_id); 728 if (err) 729 goto err_orig; 730 731 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true, 732 zone_restore_id); 733 if (err) 734 goto err_nat; 735 736 return 0; 737 738 err_nat: 739 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); 740 err_orig: 741 mlx5_fc_destroy(esw->dev, entry->counter); 742 return err; 743 } 744 745 static int 746 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, 747 struct flow_cls_offload *flow) 748 { 749 struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow); 750 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; 751 struct flow_action_entry *meta_action; 752 unsigned long cookie = flow->cookie; 753 struct mlx5_ct_entry *entry; 754 int err; 755 756 meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule); 757 if (!meta_action) 758 return -EOPNOTSUPP; 759 760 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, 761 cts_ht_params); 762 if (entry) 763 return 0; 764 765 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 766 if (!entry) 767 return -ENOMEM; 768 769 entry->tuple.zone = ft->zone; 770 entry->cookie = flow->cookie; 771 entry->restore_cookie = meta_action->ct_metadata.cookie; 772 773 err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule); 774 if (err) 775 goto err_set; 776 777 memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple)); 778 err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule); 779 if (err) 780 goto err_set; 781 782 err = rhashtable_insert_fast(&ct_priv->ct_tuples_ht, 783 &entry->tuple_node, 784 tuples_ht_params); 785 if (err) 786 goto err_tuple; 787 788 if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) { 789 err = rhashtable_insert_fast(&ct_priv->ct_tuples_nat_ht, 790 &entry->tuple_nat_node, 791 tuples_nat_ht_params); 792 if (err) 793 goto err_tuple_nat; 794 } 795 796 err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry, 797 ft->zone_restore_id); 798 if (err) 799 goto err_rules; 800 801 err = rhashtable_insert_fast(&ft->ct_entries_ht, &entry->node, 802 cts_ht_params); 803 if (err) 804 goto err_insert; 805 806 return 0; 807 808 err_insert: 809 mlx5_tc_ct_entry_del_rules(ct_priv, entry); 810 err_rules: 811 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, 812 &entry->tuple_nat_node, tuples_nat_ht_params); 813 err_tuple_nat: 814 if (entry->tuple_node.next) 815 rhashtable_remove_fast(&ct_priv->ct_tuples_ht, 816 &entry->tuple_node, 817 tuples_ht_params); 818 err_tuple: 819 err_set: 820 kfree(entry); 821 netdev_warn(ct_priv->netdev, 822 "Failed to offload ct entry, err: %d\n", err); 823 return err; 824 } 825 826 static void 827 mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv, 828 struct mlx5_ct_entry *entry) 829 { 830 mlx5_tc_ct_entry_del_rules(ct_priv, entry); 831 if (entry->tuple_node.next) 832 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, 833 &entry->tuple_nat_node, 834 tuples_nat_ht_params); 835 rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node, 836 tuples_ht_params); 837 } 838 839 static int 840 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft, 841 struct flow_cls_offload *flow) 842 { 843 unsigned long cookie = flow->cookie; 844 struct mlx5_ct_entry *entry; 845 846 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, 847 cts_ht_params); 848 if (!entry) 849 return -ENOENT; 850 851 mlx5_tc_ct_del_ft_entry(ft->ct_priv, entry); 852 WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht, 853 &entry->node, 854 cts_ht_params)); 855 kfree(entry); 856 857 return 0; 858 } 859 860 static int 861 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft, 862 struct flow_cls_offload *f) 863 { 864 unsigned long cookie = f->cookie; 865 struct mlx5_ct_entry *entry; 866 u64 lastuse, packets, bytes; 867 868 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, 869 cts_ht_params); 870 if (!entry) 871 return -ENOENT; 872 873 mlx5_fc_query_cached(entry->counter, &bytes, &packets, &lastuse); 874 flow_stats_update(&f->stats, bytes, packets, 0, lastuse, 875 FLOW_ACTION_HW_STATS_DELAYED); 876 877 return 0; 878 } 879 880 static int 881 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data, 882 void *cb_priv) 883 { 884 struct flow_cls_offload *f = type_data; 885 struct mlx5_ct_ft *ft = cb_priv; 886 887 if (type != TC_SETUP_CLSFLOWER) 888 return -EOPNOTSUPP; 889 890 switch (f->command) { 891 case FLOW_CLS_REPLACE: 892 return mlx5_tc_ct_block_flow_offload_add(ft, f); 893 case FLOW_CLS_DESTROY: 894 return mlx5_tc_ct_block_flow_offload_del(ft, f); 895 case FLOW_CLS_STATS: 896 return mlx5_tc_ct_block_flow_offload_stats(ft, f); 897 default: 898 break; 899 } 900 901 return -EOPNOTSUPP; 902 } 903 904 static bool 905 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple, 906 u16 zone) 907 { 908 struct flow_keys flow_keys; 909 910 skb_reset_network_header(skb); 911 skb_flow_dissect_flow_keys(skb, &flow_keys, 0); 912 913 tuple->zone = zone; 914 915 if (flow_keys.basic.ip_proto != IPPROTO_TCP && 916 flow_keys.basic.ip_proto != IPPROTO_UDP) 917 return false; 918 919 tuple->port.src = flow_keys.ports.src; 920 tuple->port.dst = flow_keys.ports.dst; 921 tuple->n_proto = flow_keys.basic.n_proto; 922 tuple->ip_proto = flow_keys.basic.ip_proto; 923 924 switch (flow_keys.basic.n_proto) { 925 case htons(ETH_P_IP): 926 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 927 tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src; 928 tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst; 929 break; 930 931 case htons(ETH_P_IPV6): 932 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 933 tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src; 934 tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst; 935 break; 936 default: 937 goto out; 938 } 939 940 return true; 941 942 out: 943 return false; 944 } 945 946 int 947 mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv, 948 struct mlx5_flow_spec *spec) 949 { 950 u32 ctstate = 0, ctstate_mask = 0; 951 952 mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG, 953 &ctstate, &ctstate_mask); 954 if (ctstate_mask) 955 return -EOPNOTSUPP; 956 957 ctstate_mask |= MLX5_CT_STATE_TRK_BIT; 958 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, 959 ctstate, ctstate_mask); 960 961 return 0; 962 } 963 964 void mlx5_tc_ct_match_del(struct mlx5e_priv *priv, struct mlx5_ct_attr *ct_attr) 965 { 966 struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); 967 968 if (!ct_priv || !ct_attr->ct_labels_id) 969 return; 970 971 mapping_remove(ct_priv->labels_mapping, ct_attr->ct_labels_id); 972 } 973 974 int 975 mlx5_tc_ct_match_add(struct mlx5e_priv *priv, 976 struct mlx5_flow_spec *spec, 977 struct flow_cls_offload *f, 978 struct mlx5_ct_attr *ct_attr, 979 struct netlink_ext_ack *extack) 980 { 981 struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); 982 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 983 struct flow_dissector_key_ct *mask, *key; 984 bool trk, est, untrk, unest, new; 985 u32 ctstate = 0, ctstate_mask = 0; 986 u16 ct_state_on, ct_state_off; 987 u16 ct_state, ct_state_mask; 988 struct flow_match_ct match; 989 u32 ct_labels[4]; 990 991 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) 992 return 0; 993 994 if (!ct_priv) { 995 NL_SET_ERR_MSG_MOD(extack, 996 "offload of ct matching isn't available"); 997 return -EOPNOTSUPP; 998 } 999 1000 flow_rule_match_ct(rule, &match); 1001 1002 key = match.key; 1003 mask = match.mask; 1004 1005 ct_state = key->ct_state; 1006 ct_state_mask = mask->ct_state; 1007 1008 if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED | 1009 TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED | 1010 TCA_FLOWER_KEY_CT_FLAGS_NEW)) { 1011 NL_SET_ERR_MSG_MOD(extack, 1012 "only ct_state trk, est and new are supported for offload"); 1013 return -EOPNOTSUPP; 1014 } 1015 1016 ct_state_on = ct_state & ct_state_mask; 1017 ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask; 1018 trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; 1019 new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW; 1020 est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; 1021 untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; 1022 unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; 1023 1024 ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0; 1025 ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; 1026 ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0; 1027 ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; 1028 1029 if (new) { 1030 NL_SET_ERR_MSG_MOD(extack, 1031 "matching on ct_state +new isn't supported"); 1032 return -EOPNOTSUPP; 1033 } 1034 1035 if (mask->ct_zone) 1036 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 1037 key->ct_zone, MLX5_CT_ZONE_MASK); 1038 if (ctstate_mask) 1039 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, 1040 ctstate, ctstate_mask); 1041 if (mask->ct_mark) 1042 mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG, 1043 key->ct_mark, mask->ct_mark); 1044 if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] || 1045 mask->ct_labels[3]) { 1046 ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0]; 1047 ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1]; 1048 ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2]; 1049 ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3]; 1050 if (mapping_add(ct_priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id)) 1051 return -EOPNOTSUPP; 1052 mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id, 1053 MLX5_CT_LABELS_MASK); 1054 } 1055 1056 return 0; 1057 } 1058 1059 int 1060 mlx5_tc_ct_parse_action(struct mlx5e_priv *priv, 1061 struct mlx5_esw_flow_attr *attr, 1062 const struct flow_action_entry *act, 1063 struct netlink_ext_ack *extack) 1064 { 1065 struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); 1066 1067 if (!ct_priv) { 1068 NL_SET_ERR_MSG_MOD(extack, 1069 "offload of ct action isn't available"); 1070 return -EOPNOTSUPP; 1071 } 1072 1073 attr->ct_attr.zone = act->ct.zone; 1074 attr->ct_attr.ct_action = act->ct.action; 1075 attr->ct_attr.nf_ft = act->ct.flow_table; 1076 1077 return 0; 1078 } 1079 1080 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft, 1081 struct mlx5_tc_ct_pre *pre_ct, 1082 bool nat) 1083 { 1084 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; 1085 struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; 1086 struct mlx5_core_dev *dev = ct_priv->esw->dev; 1087 struct mlx5_flow_table *fdb = pre_ct->fdb; 1088 struct mlx5_flow_destination dest = {}; 1089 struct mlx5_flow_act flow_act = {}; 1090 struct mlx5_modify_hdr *mod_hdr; 1091 struct mlx5_flow_handle *rule; 1092 struct mlx5_flow_spec *spec; 1093 u32 ctstate; 1094 u16 zone; 1095 int err; 1096 1097 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 1098 if (!spec) 1099 return -ENOMEM; 1100 1101 zone = ct_ft->zone & MLX5_CT_ZONE_MASK; 1102 err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ZONE_TO_REG, zone); 1103 if (err) { 1104 ct_dbg("Failed to set zone register mapping"); 1105 goto err_mapping; 1106 } 1107 1108 mod_hdr = mlx5_modify_header_alloc(dev, 1109 MLX5_FLOW_NAMESPACE_FDB, 1110 pre_mod_acts.num_actions, 1111 pre_mod_acts.actions); 1112 1113 if (IS_ERR(mod_hdr)) { 1114 err = PTR_ERR(mod_hdr); 1115 ct_dbg("Failed to create pre ct mod hdr"); 1116 goto err_mapping; 1117 } 1118 pre_ct->modify_hdr = mod_hdr; 1119 1120 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 1121 MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1122 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; 1123 flow_act.modify_hdr = mod_hdr; 1124 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 1125 1126 /* add flow rule */ 1127 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 1128 zone, MLX5_CT_ZONE_MASK); 1129 ctstate = MLX5_CT_STATE_TRK_BIT; 1130 if (nat) 1131 ctstate |= MLX5_CT_STATE_NAT_BIT; 1132 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate); 1133 1134 dest.ft = ct_priv->post_ct; 1135 rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1); 1136 if (IS_ERR(rule)) { 1137 err = PTR_ERR(rule); 1138 ct_dbg("Failed to add pre ct flow rule zone %d", zone); 1139 goto err_flow_rule; 1140 } 1141 pre_ct->flow_rule = rule; 1142 1143 /* add miss rule */ 1144 memset(spec, 0, sizeof(*spec)); 1145 dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct; 1146 rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1); 1147 if (IS_ERR(rule)) { 1148 err = PTR_ERR(rule); 1149 ct_dbg("Failed to add pre ct miss rule zone %d", zone); 1150 goto err_miss_rule; 1151 } 1152 pre_ct->miss_rule = rule; 1153 1154 dealloc_mod_hdr_actions(&pre_mod_acts); 1155 kvfree(spec); 1156 return 0; 1157 1158 err_miss_rule: 1159 mlx5_del_flow_rules(pre_ct->flow_rule); 1160 err_flow_rule: 1161 mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); 1162 err_mapping: 1163 dealloc_mod_hdr_actions(&pre_mod_acts); 1164 kvfree(spec); 1165 return err; 1166 } 1167 1168 static void 1169 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft, 1170 struct mlx5_tc_ct_pre *pre_ct) 1171 { 1172 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; 1173 struct mlx5_core_dev *dev = ct_priv->esw->dev; 1174 1175 mlx5_del_flow_rules(pre_ct->flow_rule); 1176 mlx5_del_flow_rules(pre_ct->miss_rule); 1177 mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); 1178 } 1179 1180 static int 1181 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft, 1182 struct mlx5_tc_ct_pre *pre_ct, 1183 bool nat) 1184 { 1185 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); 1186 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; 1187 struct mlx5_core_dev *dev = ct_priv->esw->dev; 1188 struct mlx5_flow_table_attr ft_attr = {}; 1189 struct mlx5_flow_namespace *ns; 1190 struct mlx5_flow_table *ft; 1191 struct mlx5_flow_group *g; 1192 u32 metadata_reg_c_2_mask; 1193 u32 *flow_group_in; 1194 void *misc; 1195 int err; 1196 1197 ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); 1198 if (!ns) { 1199 err = -EOPNOTSUPP; 1200 ct_dbg("Failed to get FDB flow namespace"); 1201 return err; 1202 } 1203 1204 flow_group_in = kvzalloc(inlen, GFP_KERNEL); 1205 if (!flow_group_in) 1206 return -ENOMEM; 1207 1208 ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; 1209 ft_attr.prio = FDB_TC_OFFLOAD; 1210 ft_attr.max_fte = 2; 1211 ft_attr.level = 1; 1212 ft = mlx5_create_flow_table(ns, &ft_attr); 1213 if (IS_ERR(ft)) { 1214 err = PTR_ERR(ft); 1215 ct_dbg("Failed to create pre ct table"); 1216 goto out_free; 1217 } 1218 pre_ct->fdb = ft; 1219 1220 /* create flow group */ 1221 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); 1222 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); 1223 MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 1224 MLX5_MATCH_MISC_PARAMETERS_2); 1225 1226 misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, 1227 match_criteria.misc_parameters_2); 1228 1229 metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK; 1230 metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16); 1231 if (nat) 1232 metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16); 1233 1234 MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2, 1235 metadata_reg_c_2_mask); 1236 1237 g = mlx5_create_flow_group(ft, flow_group_in); 1238 if (IS_ERR(g)) { 1239 err = PTR_ERR(g); 1240 ct_dbg("Failed to create pre ct group"); 1241 goto err_flow_grp; 1242 } 1243 pre_ct->flow_grp = g; 1244 1245 /* create miss group */ 1246 memset(flow_group_in, 0, inlen); 1247 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); 1248 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); 1249 g = mlx5_create_flow_group(ft, flow_group_in); 1250 if (IS_ERR(g)) { 1251 err = PTR_ERR(g); 1252 ct_dbg("Failed to create pre ct miss group"); 1253 goto err_miss_grp; 1254 } 1255 pre_ct->miss_grp = g; 1256 1257 err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat); 1258 if (err) 1259 goto err_add_rules; 1260 1261 kvfree(flow_group_in); 1262 return 0; 1263 1264 err_add_rules: 1265 mlx5_destroy_flow_group(pre_ct->miss_grp); 1266 err_miss_grp: 1267 mlx5_destroy_flow_group(pre_ct->flow_grp); 1268 err_flow_grp: 1269 mlx5_destroy_flow_table(ft); 1270 out_free: 1271 kvfree(flow_group_in); 1272 return err; 1273 } 1274 1275 static void 1276 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft, 1277 struct mlx5_tc_ct_pre *pre_ct) 1278 { 1279 tc_ct_pre_ct_del_rules(ct_ft, pre_ct); 1280 mlx5_destroy_flow_group(pre_ct->miss_grp); 1281 mlx5_destroy_flow_group(pre_ct->flow_grp); 1282 mlx5_destroy_flow_table(pre_ct->fdb); 1283 } 1284 1285 static int 1286 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft) 1287 { 1288 int err; 1289 1290 err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false); 1291 if (err) 1292 return err; 1293 1294 err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true); 1295 if (err) 1296 goto err_pre_ct_nat; 1297 1298 return 0; 1299 1300 err_pre_ct_nat: 1301 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); 1302 return err; 1303 } 1304 1305 static void 1306 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft) 1307 { 1308 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat); 1309 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); 1310 } 1311 1312 static struct mlx5_ct_ft * 1313 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, 1314 struct nf_flowtable *nf_ft) 1315 { 1316 struct mlx5_ct_ft *ft; 1317 int err; 1318 1319 ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params); 1320 if (ft) { 1321 refcount_inc(&ft->refcount); 1322 return ft; 1323 } 1324 1325 ft = kzalloc(sizeof(*ft), GFP_KERNEL); 1326 if (!ft) 1327 return ERR_PTR(-ENOMEM); 1328 1329 err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id); 1330 if (err) 1331 goto err_mapping; 1332 1333 ft->zone = zone; 1334 ft->nf_ft = nf_ft; 1335 ft->ct_priv = ct_priv; 1336 refcount_set(&ft->refcount, 1); 1337 1338 err = mlx5_tc_ct_alloc_pre_ct_tables(ft); 1339 if (err) 1340 goto err_alloc_pre_ct; 1341 1342 err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params); 1343 if (err) 1344 goto err_init; 1345 1346 err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node, 1347 zone_params); 1348 if (err) 1349 goto err_insert; 1350 1351 err = nf_flow_table_offload_add_cb(ft->nf_ft, 1352 mlx5_tc_ct_block_flow_offload, ft); 1353 if (err) 1354 goto err_add_cb; 1355 1356 return ft; 1357 1358 err_add_cb: 1359 rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); 1360 err_insert: 1361 rhashtable_destroy(&ft->ct_entries_ht); 1362 err_init: 1363 mlx5_tc_ct_free_pre_ct_tables(ft); 1364 err_alloc_pre_ct: 1365 mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); 1366 err_mapping: 1367 kfree(ft); 1368 return ERR_PTR(err); 1369 } 1370 1371 static void 1372 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg) 1373 { 1374 struct mlx5_tc_ct_priv *ct_priv = arg; 1375 struct mlx5_ct_entry *entry = ptr; 1376 1377 mlx5_tc_ct_del_ft_entry(ct_priv, entry); 1378 kfree(entry); 1379 } 1380 1381 static void 1382 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) 1383 { 1384 if (!refcount_dec_and_test(&ft->refcount)) 1385 return; 1386 1387 nf_flow_table_offload_del_cb(ft->nf_ft, 1388 mlx5_tc_ct_block_flow_offload, ft); 1389 rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); 1390 rhashtable_free_and_destroy(&ft->ct_entries_ht, 1391 mlx5_tc_ct_flush_ft_entry, 1392 ct_priv); 1393 mlx5_tc_ct_free_pre_ct_tables(ft); 1394 mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); 1395 kfree(ft); 1396 } 1397 1398 /* We translate the tc filter with CT action to the following HW model: 1399 * 1400 * +---------------------+ 1401 * + fdb prio (tc chain) + 1402 * + original match + 1403 * +---------------------+ 1404 * | set chain miss mapping 1405 * | set fte_id 1406 * | set tunnel_id 1407 * | do decap 1408 * v 1409 * +---------------------+ 1410 * + pre_ct/pre_ct_nat + if matches +---------------------+ 1411 * + zone+nat match +---------------->+ post_ct (see below) + 1412 * +---------------------+ set zone +---------------------+ 1413 * | set zone 1414 * v 1415 * +--------------------+ 1416 * + CT (nat or no nat) + 1417 * + tuple + zone match + 1418 * +--------------------+ 1419 * | set mark 1420 * | set labels_id 1421 * | set established 1422 * | set zone_restore 1423 * | do nat (if needed) 1424 * v 1425 * +--------------+ 1426 * + post_ct + original filter actions 1427 * + fte_id match +------------------------> 1428 * +--------------+ 1429 */ 1430 static struct mlx5_flow_handle * 1431 __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, 1432 struct mlx5e_tc_flow *flow, 1433 struct mlx5_flow_spec *orig_spec, 1434 struct mlx5_esw_flow_attr *attr) 1435 { 1436 struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); 1437 bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; 1438 struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; 1439 struct mlx5_flow_spec *post_ct_spec = NULL; 1440 struct mlx5_eswitch *esw = ct_priv->esw; 1441 struct mlx5_esw_flow_attr *pre_ct_attr; 1442 struct mlx5_modify_hdr *mod_hdr; 1443 struct mlx5_flow_handle *rule; 1444 struct mlx5_ct_flow *ct_flow; 1445 int chain_mapping = 0, err; 1446 struct mlx5_ct_ft *ft; 1447 u32 fte_id = 1; 1448 1449 post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL); 1450 ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); 1451 if (!post_ct_spec || !ct_flow) { 1452 kfree(post_ct_spec); 1453 kfree(ct_flow); 1454 return ERR_PTR(-ENOMEM); 1455 } 1456 1457 /* Register for CT established events */ 1458 ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone, 1459 attr->ct_attr.nf_ft); 1460 if (IS_ERR(ft)) { 1461 err = PTR_ERR(ft); 1462 ct_dbg("Failed to register to ft callback"); 1463 goto err_ft; 1464 } 1465 ct_flow->ft = ft; 1466 1467 err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id, 1468 MLX5_FTE_ID_MAX, GFP_KERNEL); 1469 if (err) { 1470 netdev_warn(priv->netdev, 1471 "Failed to allocate fte id, err: %d\n", err); 1472 goto err_idr; 1473 } 1474 ct_flow->fte_id = fte_id; 1475 1476 /* Base esw attributes of both rules on original rule attribute */ 1477 pre_ct_attr = &ct_flow->pre_ct_attr; 1478 memcpy(pre_ct_attr, attr, sizeof(*attr)); 1479 memcpy(&ct_flow->post_ct_attr, attr, sizeof(*attr)); 1480 1481 /* Modify the original rule's action to fwd and modify, leave decap */ 1482 pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP; 1483 pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 1484 MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1485 1486 /* Write chain miss tag for miss in ct table as we 1487 * don't go though all prios of this chain as normal tc rules 1488 * miss. 1489 */ 1490 err = mlx5_esw_chains_get_chain_mapping(esw, attr->chain, 1491 &chain_mapping); 1492 if (err) { 1493 ct_dbg("Failed to get chain register mapping for chain"); 1494 goto err_get_chain; 1495 } 1496 ct_flow->chain_mapping = chain_mapping; 1497 1498 err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, 1499 CHAIN_TO_REG, chain_mapping); 1500 if (err) { 1501 ct_dbg("Failed to set chain register mapping"); 1502 goto err_mapping; 1503 } 1504 1505 err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, 1506 FTEID_TO_REG, fte_id); 1507 if (err) { 1508 ct_dbg("Failed to set fte_id register mapping"); 1509 goto err_mapping; 1510 } 1511 1512 /* If original flow is decap, we do it before going into ct table 1513 * so add a rewrite for the tunnel match_id. 1514 */ 1515 if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) && 1516 attr->chain == 0) { 1517 u32 tun_id = mlx5e_tc_get_flow_tun_id(flow); 1518 1519 err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, 1520 TUNNEL_TO_REG, 1521 tun_id); 1522 if (err) { 1523 ct_dbg("Failed to set tunnel register mapping"); 1524 goto err_mapping; 1525 } 1526 } 1527 1528 mod_hdr = mlx5_modify_header_alloc(esw->dev, 1529 MLX5_FLOW_NAMESPACE_FDB, 1530 pre_mod_acts.num_actions, 1531 pre_mod_acts.actions); 1532 if (IS_ERR(mod_hdr)) { 1533 err = PTR_ERR(mod_hdr); 1534 ct_dbg("Failed to create pre ct mod hdr"); 1535 goto err_mapping; 1536 } 1537 pre_ct_attr->modify_hdr = mod_hdr; 1538 1539 /* Post ct rule matches on fte_id and executes original rule's 1540 * tc rule action 1541 */ 1542 mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG, 1543 fte_id, MLX5_FTE_ID_MASK); 1544 1545 /* Put post_ct rule on post_ct fdb */ 1546 ct_flow->post_ct_attr.chain = 0; 1547 ct_flow->post_ct_attr.prio = 0; 1548 ct_flow->post_ct_attr.fdb = ct_priv->post_ct; 1549 1550 ct_flow->post_ct_attr.inner_match_level = MLX5_MATCH_NONE; 1551 ct_flow->post_ct_attr.outer_match_level = MLX5_MATCH_NONE; 1552 ct_flow->post_ct_attr.action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP); 1553 rule = mlx5_eswitch_add_offloaded_rule(esw, post_ct_spec, 1554 &ct_flow->post_ct_attr); 1555 ct_flow->post_ct_rule = rule; 1556 if (IS_ERR(ct_flow->post_ct_rule)) { 1557 err = PTR_ERR(ct_flow->post_ct_rule); 1558 ct_dbg("Failed to add post ct rule"); 1559 goto err_insert_post_ct; 1560 } 1561 1562 /* Change original rule point to ct table */ 1563 pre_ct_attr->dest_chain = 0; 1564 pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.fdb : ft->pre_ct.fdb; 1565 ct_flow->pre_ct_rule = mlx5_eswitch_add_offloaded_rule(esw, 1566 orig_spec, 1567 pre_ct_attr); 1568 if (IS_ERR(ct_flow->pre_ct_rule)) { 1569 err = PTR_ERR(ct_flow->pre_ct_rule); 1570 ct_dbg("Failed to add pre ct rule"); 1571 goto err_insert_orig; 1572 } 1573 1574 attr->ct_attr.ct_flow = ct_flow; 1575 dealloc_mod_hdr_actions(&pre_mod_acts); 1576 kfree(post_ct_spec); 1577 1578 return rule; 1579 1580 err_insert_orig: 1581 mlx5_eswitch_del_offloaded_rule(ct_priv->esw, ct_flow->post_ct_rule, 1582 &ct_flow->post_ct_attr); 1583 err_insert_post_ct: 1584 mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); 1585 err_mapping: 1586 dealloc_mod_hdr_actions(&pre_mod_acts); 1587 mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping); 1588 err_get_chain: 1589 idr_remove(&ct_priv->fte_ids, fte_id); 1590 err_idr: 1591 mlx5_tc_ct_del_ft_cb(ct_priv, ft); 1592 err_ft: 1593 kfree(post_ct_spec); 1594 kfree(ct_flow); 1595 netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err); 1596 return ERR_PTR(err); 1597 } 1598 1599 static struct mlx5_flow_handle * 1600 __mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv, 1601 struct mlx5_flow_spec *orig_spec, 1602 struct mlx5_esw_flow_attr *attr, 1603 struct mlx5e_tc_mod_hdr_acts *mod_acts) 1604 { 1605 struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); 1606 struct mlx5_eswitch *esw = ct_priv->esw; 1607 struct mlx5_esw_flow_attr *pre_ct_attr; 1608 struct mlx5_modify_hdr *mod_hdr; 1609 struct mlx5_flow_handle *rule; 1610 struct mlx5_ct_flow *ct_flow; 1611 int err; 1612 1613 ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); 1614 if (!ct_flow) 1615 return ERR_PTR(-ENOMEM); 1616 1617 /* Base esw attributes on original rule attribute */ 1618 pre_ct_attr = &ct_flow->pre_ct_attr; 1619 memcpy(pre_ct_attr, attr, sizeof(*attr)); 1620 1621 err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0); 1622 if (err) { 1623 ct_dbg("Failed to set register for ct clear"); 1624 goto err_set_registers; 1625 } 1626 1627 mod_hdr = mlx5_modify_header_alloc(esw->dev, 1628 MLX5_FLOW_NAMESPACE_FDB, 1629 mod_acts->num_actions, 1630 mod_acts->actions); 1631 if (IS_ERR(mod_hdr)) { 1632 err = PTR_ERR(mod_hdr); 1633 ct_dbg("Failed to add create ct clear mod hdr"); 1634 goto err_set_registers; 1635 } 1636 1637 dealloc_mod_hdr_actions(mod_acts); 1638 pre_ct_attr->modify_hdr = mod_hdr; 1639 pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1640 1641 rule = mlx5_eswitch_add_offloaded_rule(esw, orig_spec, pre_ct_attr); 1642 if (IS_ERR(rule)) { 1643 err = PTR_ERR(rule); 1644 ct_dbg("Failed to add ct clear rule"); 1645 goto err_insert; 1646 } 1647 1648 attr->ct_attr.ct_flow = ct_flow; 1649 ct_flow->pre_ct_rule = rule; 1650 return rule; 1651 1652 err_insert: 1653 mlx5_modify_header_dealloc(priv->mdev, mod_hdr); 1654 err_set_registers: 1655 netdev_warn(priv->netdev, 1656 "Failed to offload ct clear flow, err %d\n", err); 1657 return ERR_PTR(err); 1658 } 1659 1660 struct mlx5_flow_handle * 1661 mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, 1662 struct mlx5e_tc_flow *flow, 1663 struct mlx5_flow_spec *spec, 1664 struct mlx5_esw_flow_attr *attr, 1665 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) 1666 { 1667 bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; 1668 struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); 1669 struct mlx5_flow_handle *rule; 1670 1671 if (!ct_priv) 1672 return ERR_PTR(-EOPNOTSUPP); 1673 1674 mutex_lock(&ct_priv->control_lock); 1675 1676 if (clear_action) 1677 rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts); 1678 else 1679 rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr); 1680 mutex_unlock(&ct_priv->control_lock); 1681 1682 return rule; 1683 } 1684 1685 static void 1686 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, 1687 struct mlx5_ct_flow *ct_flow) 1688 { 1689 struct mlx5_esw_flow_attr *pre_ct_attr = &ct_flow->pre_ct_attr; 1690 struct mlx5_eswitch *esw = ct_priv->esw; 1691 1692 mlx5_eswitch_del_offloaded_rule(esw, ct_flow->pre_ct_rule, 1693 pre_ct_attr); 1694 mlx5_modify_header_dealloc(esw->dev, pre_ct_attr->modify_hdr); 1695 1696 if (ct_flow->post_ct_rule) { 1697 mlx5_eswitch_del_offloaded_rule(esw, ct_flow->post_ct_rule, 1698 &ct_flow->post_ct_attr); 1699 mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping); 1700 idr_remove(&ct_priv->fte_ids, ct_flow->fte_id); 1701 mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft); 1702 } 1703 1704 kfree(ct_flow); 1705 } 1706 1707 void 1708 mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, 1709 struct mlx5_esw_flow_attr *attr) 1710 { 1711 struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); 1712 struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow; 1713 1714 /* We are called on error to clean up stuff from parsing 1715 * but we don't have anything for now 1716 */ 1717 if (!ct_flow) 1718 return; 1719 1720 mutex_lock(&ct_priv->control_lock); 1721 __mlx5_tc_ct_delete_flow(ct_priv, ct_flow); 1722 mutex_unlock(&ct_priv->control_lock); 1723 } 1724 1725 static int 1726 mlx5_tc_ct_init_check_support(struct mlx5_eswitch *esw, 1727 const char **err_msg) 1728 { 1729 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) 1730 /* cannot restore chain ID on HW miss */ 1731 1732 *err_msg = "tc skb extension missing"; 1733 return -EOPNOTSUPP; 1734 #endif 1735 1736 if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) { 1737 *err_msg = "firmware level support is missing"; 1738 return -EOPNOTSUPP; 1739 } 1740 1741 if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) { 1742 /* vlan workaround should be avoided for multi chain rules. 1743 * This is just a sanity check as pop vlan action should 1744 * be supported by any FW that supports ignore_flow_level 1745 */ 1746 1747 *err_msg = "firmware vlan actions support is missing"; 1748 return -EOPNOTSUPP; 1749 } 1750 1751 if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, 1752 fdb_modify_header_fwd_to_table)) { 1753 /* CT always writes to registers which are mod header actions. 1754 * Therefore, mod header and goto is required 1755 */ 1756 1757 *err_msg = "firmware fwd and modify support is missing"; 1758 return -EOPNOTSUPP; 1759 } 1760 1761 if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) { 1762 *err_msg = "register loopback isn't supported"; 1763 return -EOPNOTSUPP; 1764 } 1765 1766 return 0; 1767 } 1768 1769 static void 1770 mlx5_tc_ct_init_err(struct mlx5e_rep_priv *rpriv, const char *msg, int err) 1771 { 1772 if (msg) 1773 netdev_warn(rpriv->netdev, 1774 "tc ct offload not supported, %s, err: %d\n", 1775 msg, err); 1776 else 1777 netdev_warn(rpriv->netdev, 1778 "tc ct offload not supported, err: %d\n", 1779 err); 1780 } 1781 1782 int 1783 mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv) 1784 { 1785 struct mlx5_tc_ct_priv *ct_priv; 1786 struct mlx5e_rep_priv *rpriv; 1787 struct mlx5_eswitch *esw; 1788 struct mlx5e_priv *priv; 1789 const char *msg; 1790 int err; 1791 1792 rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); 1793 priv = netdev_priv(rpriv->netdev); 1794 esw = priv->mdev->priv.eswitch; 1795 1796 err = mlx5_tc_ct_init_check_support(esw, &msg); 1797 if (err) { 1798 mlx5_tc_ct_init_err(rpriv, msg, err); 1799 goto err_support; 1800 } 1801 1802 ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL); 1803 if (!ct_priv) { 1804 mlx5_tc_ct_init_err(rpriv, NULL, -ENOMEM); 1805 goto err_alloc; 1806 } 1807 1808 ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true); 1809 if (IS_ERR(ct_priv->zone_mapping)) { 1810 err = PTR_ERR(ct_priv->zone_mapping); 1811 goto err_mapping_zone; 1812 } 1813 1814 ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true); 1815 if (IS_ERR(ct_priv->labels_mapping)) { 1816 err = PTR_ERR(ct_priv->labels_mapping); 1817 goto err_mapping_labels; 1818 } 1819 1820 ct_priv->esw = esw; 1821 ct_priv->netdev = rpriv->netdev; 1822 ct_priv->ct = mlx5_esw_chains_create_global_table(esw); 1823 if (IS_ERR(ct_priv->ct)) { 1824 err = PTR_ERR(ct_priv->ct); 1825 mlx5_tc_ct_init_err(rpriv, "failed to create ct table", err); 1826 goto err_ct_tbl; 1827 } 1828 1829 ct_priv->ct_nat = mlx5_esw_chains_create_global_table(esw); 1830 if (IS_ERR(ct_priv->ct_nat)) { 1831 err = PTR_ERR(ct_priv->ct_nat); 1832 mlx5_tc_ct_init_err(rpriv, "failed to create ct nat table", 1833 err); 1834 goto err_ct_nat_tbl; 1835 } 1836 1837 ct_priv->post_ct = mlx5_esw_chains_create_global_table(esw); 1838 if (IS_ERR(ct_priv->post_ct)) { 1839 err = PTR_ERR(ct_priv->post_ct); 1840 mlx5_tc_ct_init_err(rpriv, "failed to create post ct table", 1841 err); 1842 goto err_post_ct_tbl; 1843 } 1844 1845 idr_init(&ct_priv->fte_ids); 1846 mutex_init(&ct_priv->control_lock); 1847 rhashtable_init(&ct_priv->zone_ht, &zone_params); 1848 rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params); 1849 rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params); 1850 1851 /* Done, set ct_priv to know it initializted */ 1852 uplink_priv->ct_priv = ct_priv; 1853 1854 return 0; 1855 1856 err_post_ct_tbl: 1857 mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct_nat); 1858 err_ct_nat_tbl: 1859 mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct); 1860 err_ct_tbl: 1861 mapping_destroy(ct_priv->labels_mapping); 1862 err_mapping_labels: 1863 mapping_destroy(ct_priv->zone_mapping); 1864 err_mapping_zone: 1865 kfree(ct_priv); 1866 err_alloc: 1867 err_support: 1868 1869 return 0; 1870 } 1871 1872 void 1873 mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv) 1874 { 1875 struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv; 1876 1877 if (!ct_priv) 1878 return; 1879 1880 mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->post_ct); 1881 mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct_nat); 1882 mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct); 1883 mapping_destroy(ct_priv->zone_mapping); 1884 mapping_destroy(ct_priv->labels_mapping); 1885 1886 rhashtable_destroy(&ct_priv->ct_tuples_ht); 1887 rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); 1888 rhashtable_destroy(&ct_priv->zone_ht); 1889 mutex_destroy(&ct_priv->control_lock); 1890 idr_destroy(&ct_priv->fte_ids); 1891 kfree(ct_priv); 1892 1893 uplink_priv->ct_priv = NULL; 1894 } 1895 1896 bool 1897 mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv, 1898 struct sk_buff *skb, u8 zone_restore_id) 1899 { 1900 struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv; 1901 struct mlx5_ct_tuple tuple = {}; 1902 struct mlx5_ct_entry *entry; 1903 u16 zone; 1904 1905 if (!ct_priv || !zone_restore_id) 1906 return true; 1907 1908 if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone)) 1909 return false; 1910 1911 if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone)) 1912 return false; 1913 1914 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &tuple, 1915 tuples_ht_params); 1916 if (!entry) 1917 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht, 1918 &tuple, tuples_nat_ht_params); 1919 if (!entry) 1920 return false; 1921 1922 tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie); 1923 return true; 1924 } 1925