1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2019 Mellanox Technologies. */ 3 4 #include <net/netfilter/nf_conntrack.h> 5 #include <net/netfilter/nf_conntrack_core.h> 6 #include <net/netfilter/nf_conntrack_zones.h> 7 #include <net/netfilter/nf_conntrack_labels.h> 8 #include <net/netfilter/nf_conntrack_helper.h> 9 #include <net/netfilter/nf_conntrack_acct.h> 10 #include <uapi/linux/tc_act/tc_pedit.h> 11 #include <net/tc_act/tc_ct.h> 12 #include <net/flow_offload.h> 13 #include <net/netfilter/nf_flow_table.h> 14 #include <linux/workqueue.h> 15 #include <linux/refcount.h> 16 #include <linux/xarray.h> 17 #include <linux/if_macvlan.h> 18 19 #include "lib/fs_chains.h" 20 #include "en/tc_ct.h" 21 #include "en/tc/ct_fs.h" 22 #include "en/tc_priv.h" 23 #include "en/mod_hdr.h" 24 #include "en/mapping.h" 25 #include "en/tc/post_act.h" 26 #include "en.h" 27 #include "en_tc.h" 28 #include "en_rep.h" 29 #include "fs_core.h" 30 31 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1) 32 #define MLX5_CT_STATE_TRK_BIT BIT(2) 33 #define MLX5_CT_STATE_NAT_BIT BIT(3) 34 #define MLX5_CT_STATE_REPLY_BIT BIT(4) 35 #define MLX5_CT_STATE_RELATED_BIT BIT(5) 36 #define MLX5_CT_STATE_INVALID_BIT BIT(6) 37 38 #define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen) 39 #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0) 40 41 /* Statically allocate modify actions for 42 * ipv6 and port nat (5) + tuple fields (4) + nic mode zone restore (1) = 10. 43 * This will be increased dynamically if needed (for the ipv6 snat + dnat). 44 */ 45 #define MLX5_CT_MIN_MOD_ACTS 10 46 47 #define ct_dbg(fmt, args...)\ 48 netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args) 49 50 struct mlx5_tc_ct_priv { 51 struct mlx5_core_dev *dev; 52 const struct net_device *netdev; 53 struct mod_hdr_tbl *mod_hdr_tbl; 54 struct xarray tuple_ids; 55 struct rhashtable zone_ht; 56 struct rhashtable ct_tuples_ht; 57 struct rhashtable ct_tuples_nat_ht; 58 struct mlx5_flow_table *ct; 59 struct mlx5_flow_table *ct_nat; 60 struct mlx5e_post_act *post_act; 61 struct mutex control_lock; /* guards parallel adds/dels */ 62 struct mapping_ctx *zone_mapping; 63 struct mapping_ctx *labels_mapping; 64 enum mlx5_flow_namespace_type ns_type; 65 struct mlx5_fs_chains *chains; 66 struct mlx5_ct_fs *fs; 67 struct mlx5_ct_fs_ops *fs_ops; 68 spinlock_t ht_lock; /* protects ft entries */ 69 }; 70 71 struct mlx5_ct_flow { 72 struct mlx5_flow_attr *pre_ct_attr; 73 struct mlx5_flow_handle *pre_ct_rule; 74 struct mlx5_ct_ft *ft; 75 u32 chain_mapping; 76 }; 77 78 struct mlx5_ct_zone_rule { 79 struct mlx5_ct_fs_rule *rule; 80 struct mlx5e_mod_hdr_handle *mh; 81 struct mlx5_flow_attr *attr; 82 bool nat; 83 }; 84 85 struct mlx5_tc_ct_pre { 86 struct mlx5_flow_table *ft; 87 struct mlx5_flow_group *flow_grp; 88 struct mlx5_flow_group *miss_grp; 89 struct mlx5_flow_handle *flow_rule; 90 struct mlx5_flow_handle *miss_rule; 91 struct mlx5_modify_hdr *modify_hdr; 92 }; 93 94 struct mlx5_ct_ft { 95 struct rhash_head node; 96 u16 zone; 97 u32 zone_restore_id; 98 refcount_t refcount; 99 struct nf_flowtable *nf_ft; 100 struct mlx5_tc_ct_priv *ct_priv; 101 struct rhashtable ct_entries_ht; 102 struct mlx5_tc_ct_pre pre_ct; 103 struct mlx5_tc_ct_pre pre_ct_nat; 104 }; 105 106 struct mlx5_ct_tuple { 107 u16 addr_type; 108 __be16 n_proto; 109 u8 ip_proto; 110 struct { 111 union { 112 __be32 src_v4; 113 struct in6_addr src_v6; 114 }; 115 union { 116 __be32 dst_v4; 117 struct in6_addr dst_v6; 118 }; 119 } ip; 120 struct { 121 __be16 src; 122 __be16 dst; 123 } port; 124 125 u16 zone; 126 }; 127 128 struct mlx5_ct_counter { 129 struct mlx5_fc *counter; 130 refcount_t refcount; 131 bool is_shared; 132 }; 133 134 enum { 135 MLX5_CT_ENTRY_FLAG_VALID, 136 }; 137 138 struct mlx5_ct_entry { 139 struct rhash_head node; 140 struct rhash_head tuple_node; 141 struct rhash_head tuple_nat_node; 142 struct mlx5_ct_counter *counter; 143 unsigned long cookie; 144 unsigned long restore_cookie; 145 struct mlx5_ct_tuple tuple; 146 struct mlx5_ct_tuple tuple_nat; 147 struct mlx5_ct_zone_rule zone_rules[2]; 148 149 struct mlx5_tc_ct_priv *ct_priv; 150 struct work_struct work; 151 152 refcount_t refcnt; 153 unsigned long flags; 154 }; 155 156 static void 157 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, 158 struct mlx5_flow_attr *attr, 159 struct mlx5e_mod_hdr_handle *mh); 160 161 static const struct rhashtable_params cts_ht_params = { 162 .head_offset = offsetof(struct mlx5_ct_entry, node), 163 .key_offset = offsetof(struct mlx5_ct_entry, cookie), 164 .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie), 165 .automatic_shrinking = true, 166 .min_size = 16 * 1024, 167 }; 168 169 static const struct rhashtable_params zone_params = { 170 .head_offset = offsetof(struct mlx5_ct_ft, node), 171 .key_offset = offsetof(struct mlx5_ct_ft, zone), 172 .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone), 173 .automatic_shrinking = true, 174 }; 175 176 static const struct rhashtable_params tuples_ht_params = { 177 .head_offset = offsetof(struct mlx5_ct_entry, tuple_node), 178 .key_offset = offsetof(struct mlx5_ct_entry, tuple), 179 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple), 180 .automatic_shrinking = true, 181 .min_size = 16 * 1024, 182 }; 183 184 static const struct rhashtable_params tuples_nat_ht_params = { 185 .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node), 186 .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat), 187 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat), 188 .automatic_shrinking = true, 189 .min_size = 16 * 1024, 190 }; 191 192 static bool 193 mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry) 194 { 195 return !!(entry->tuple_nat_node.next); 196 } 197 198 static int 199 mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv, 200 u32 *labels, u32 *id) 201 { 202 if (!memchr_inv(labels, 0, sizeof(u32) * 4)) { 203 *id = 0; 204 return 0; 205 } 206 207 if (mapping_add(ct_priv->labels_mapping, labels, id)) 208 return -EOPNOTSUPP; 209 210 return 0; 211 } 212 213 static void 214 mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id) 215 { 216 if (id) 217 mapping_remove(ct_priv->labels_mapping, id); 218 } 219 220 static int 221 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule) 222 { 223 struct flow_match_control control; 224 struct flow_match_basic basic; 225 226 flow_rule_match_basic(rule, &basic); 227 flow_rule_match_control(rule, &control); 228 229 tuple->n_proto = basic.key->n_proto; 230 tuple->ip_proto = basic.key->ip_proto; 231 tuple->addr_type = control.key->addr_type; 232 233 if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 234 struct flow_match_ipv4_addrs match; 235 236 flow_rule_match_ipv4_addrs(rule, &match); 237 tuple->ip.src_v4 = match.key->src; 238 tuple->ip.dst_v4 = match.key->dst; 239 } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 240 struct flow_match_ipv6_addrs match; 241 242 flow_rule_match_ipv6_addrs(rule, &match); 243 tuple->ip.src_v6 = match.key->src; 244 tuple->ip.dst_v6 = match.key->dst; 245 } else { 246 return -EOPNOTSUPP; 247 } 248 249 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { 250 struct flow_match_ports match; 251 252 flow_rule_match_ports(rule, &match); 253 switch (tuple->ip_proto) { 254 case IPPROTO_TCP: 255 case IPPROTO_UDP: 256 tuple->port.src = match.key->src; 257 tuple->port.dst = match.key->dst; 258 break; 259 default: 260 return -EOPNOTSUPP; 261 } 262 } else { 263 if (tuple->ip_proto != IPPROTO_GRE) 264 return -EOPNOTSUPP; 265 } 266 267 return 0; 268 } 269 270 static int 271 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple, 272 struct flow_rule *rule) 273 { 274 struct flow_action *flow_action = &rule->action; 275 struct flow_action_entry *act; 276 u32 offset, val, ip6_offset; 277 int i; 278 279 flow_action_for_each(i, act, flow_action) { 280 if (act->id != FLOW_ACTION_MANGLE) 281 continue; 282 283 offset = act->mangle.offset; 284 val = act->mangle.val; 285 switch (act->mangle.htype) { 286 case FLOW_ACT_MANGLE_HDR_TYPE_IP4: 287 if (offset == offsetof(struct iphdr, saddr)) 288 tuple->ip.src_v4 = cpu_to_be32(val); 289 else if (offset == offsetof(struct iphdr, daddr)) 290 tuple->ip.dst_v4 = cpu_to_be32(val); 291 else 292 return -EOPNOTSUPP; 293 break; 294 295 case FLOW_ACT_MANGLE_HDR_TYPE_IP6: 296 ip6_offset = (offset - offsetof(struct ipv6hdr, saddr)); 297 ip6_offset /= 4; 298 if (ip6_offset < 4) 299 tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val); 300 else if (ip6_offset < 8) 301 tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val); 302 else 303 return -EOPNOTSUPP; 304 break; 305 306 case FLOW_ACT_MANGLE_HDR_TYPE_TCP: 307 if (offset == offsetof(struct tcphdr, source)) 308 tuple->port.src = cpu_to_be16(val); 309 else if (offset == offsetof(struct tcphdr, dest)) 310 tuple->port.dst = cpu_to_be16(val); 311 else 312 return -EOPNOTSUPP; 313 break; 314 315 case FLOW_ACT_MANGLE_HDR_TYPE_UDP: 316 if (offset == offsetof(struct udphdr, source)) 317 tuple->port.src = cpu_to_be16(val); 318 else if (offset == offsetof(struct udphdr, dest)) 319 tuple->port.dst = cpu_to_be16(val); 320 else 321 return -EOPNOTSUPP; 322 break; 323 324 default: 325 return -EOPNOTSUPP; 326 } 327 } 328 329 return 0; 330 } 331 332 static int 333 mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv, 334 struct net_device *ndev) 335 { 336 struct mlx5e_priv *other_priv = netdev_priv(ndev); 337 struct mlx5_core_dev *mdev = ct_priv->dev; 338 bool vf_rep, uplink_rep; 339 340 vf_rep = mlx5e_eswitch_vf_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev); 341 uplink_rep = mlx5e_eswitch_uplink_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev); 342 343 if (vf_rep) 344 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; 345 if (uplink_rep) 346 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; 347 if (is_vlan_dev(ndev)) 348 return mlx5_tc_ct_get_flow_source_match(ct_priv, vlan_dev_real_dev(ndev)); 349 if (netif_is_macvlan(ndev)) 350 return mlx5_tc_ct_get_flow_source_match(ct_priv, macvlan_dev_real_dev(ndev)); 351 if (mlx5e_get_tc_tun(ndev) || netif_is_lag_master(ndev)) 352 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; 353 354 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT; 355 } 356 357 static int 358 mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv, 359 struct mlx5_flow_spec *spec, 360 struct flow_rule *rule) 361 { 362 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 363 outer_headers); 364 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, 365 outer_headers); 366 u16 addr_type = 0; 367 u8 ip_proto = 0; 368 369 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { 370 struct flow_match_basic match; 371 372 flow_rule_match_basic(rule, &match); 373 374 mlx5e_tc_set_ethertype(ct_priv->dev, &match, true, headers_c, headers_v); 375 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 376 match.mask->ip_proto); 377 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, 378 match.key->ip_proto); 379 380 ip_proto = match.key->ip_proto; 381 } 382 383 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { 384 struct flow_match_control match; 385 386 flow_rule_match_control(rule, &match); 387 addr_type = match.key->addr_type; 388 } 389 390 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 391 struct flow_match_ipv4_addrs match; 392 393 flow_rule_match_ipv4_addrs(rule, &match); 394 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 395 src_ipv4_src_ipv6.ipv4_layout.ipv4), 396 &match.mask->src, sizeof(match.mask->src)); 397 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 398 src_ipv4_src_ipv6.ipv4_layout.ipv4), 399 &match.key->src, sizeof(match.key->src)); 400 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 401 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 402 &match.mask->dst, sizeof(match.mask->dst)); 403 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 404 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 405 &match.key->dst, sizeof(match.key->dst)); 406 } 407 408 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 409 struct flow_match_ipv6_addrs match; 410 411 flow_rule_match_ipv6_addrs(rule, &match); 412 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 413 src_ipv4_src_ipv6.ipv6_layout.ipv6), 414 &match.mask->src, sizeof(match.mask->src)); 415 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 416 src_ipv4_src_ipv6.ipv6_layout.ipv6), 417 &match.key->src, sizeof(match.key->src)); 418 419 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 420 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 421 &match.mask->dst, sizeof(match.mask->dst)); 422 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 423 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 424 &match.key->dst, sizeof(match.key->dst)); 425 } 426 427 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { 428 struct flow_match_ports match; 429 430 flow_rule_match_ports(rule, &match); 431 switch (ip_proto) { 432 case IPPROTO_TCP: 433 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 434 tcp_sport, ntohs(match.mask->src)); 435 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 436 tcp_sport, ntohs(match.key->src)); 437 438 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 439 tcp_dport, ntohs(match.mask->dst)); 440 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 441 tcp_dport, ntohs(match.key->dst)); 442 break; 443 444 case IPPROTO_UDP: 445 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 446 udp_sport, ntohs(match.mask->src)); 447 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 448 udp_sport, ntohs(match.key->src)); 449 450 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 451 udp_dport, ntohs(match.mask->dst)); 452 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 453 udp_dport, ntohs(match.key->dst)); 454 break; 455 default: 456 break; 457 } 458 } 459 460 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { 461 struct flow_match_tcp match; 462 463 flow_rule_match_tcp(rule, &match); 464 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, 465 ntohs(match.mask->flags)); 466 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, 467 ntohs(match.key->flags)); 468 } 469 470 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) { 471 struct flow_match_meta match; 472 473 flow_rule_match_meta(rule, &match); 474 475 if (match.key->ingress_ifindex & match.mask->ingress_ifindex) { 476 struct net_device *dev; 477 478 dev = dev_get_by_index(&init_net, match.key->ingress_ifindex); 479 if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, flow_source)) 480 spec->flow_context.flow_source = 481 mlx5_tc_ct_get_flow_source_match(ct_priv, dev); 482 483 dev_put(dev); 484 } 485 } 486 487 return 0; 488 } 489 490 static void 491 mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry) 492 { 493 if (entry->counter->is_shared && 494 !refcount_dec_and_test(&entry->counter->refcount)) 495 return; 496 497 mlx5_fc_destroy(ct_priv->dev, entry->counter->counter); 498 kfree(entry->counter); 499 } 500 501 static void 502 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv, 503 struct mlx5_ct_entry *entry, 504 bool nat) 505 { 506 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; 507 struct mlx5_flow_attr *attr = zone_rule->attr; 508 509 ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone); 510 511 ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule); 512 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh); 513 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); 514 kfree(attr); 515 } 516 517 static void 518 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv, 519 struct mlx5_ct_entry *entry) 520 { 521 mlx5_tc_ct_entry_del_rule(ct_priv, entry, true); 522 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); 523 } 524 525 static struct flow_action_entry * 526 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule) 527 { 528 struct flow_action *flow_action = &flow_rule->action; 529 struct flow_action_entry *act; 530 int i; 531 532 flow_action_for_each(i, act, flow_action) { 533 if (act->id == FLOW_ACTION_CT_METADATA) 534 return act; 535 } 536 537 return NULL; 538 } 539 540 static int 541 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv, 542 struct mlx5e_tc_mod_hdr_acts *mod_acts, 543 u8 ct_state, 544 u32 mark, 545 u32 labels_id, 546 u8 zone_restore_id) 547 { 548 enum mlx5_flow_namespace_type ns = ct_priv->ns_type; 549 struct mlx5_core_dev *dev = ct_priv->dev; 550 int err; 551 552 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 553 CTSTATE_TO_REG, ct_state); 554 if (err) 555 return err; 556 557 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 558 MARK_TO_REG, mark); 559 if (err) 560 return err; 561 562 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 563 LABELS_TO_REG, labels_id); 564 if (err) 565 return err; 566 567 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 568 ZONE_RESTORE_TO_REG, zone_restore_id); 569 if (err) 570 return err; 571 572 /* Make another copy of zone id in reg_b for 573 * NIC rx flows since we don't copy reg_c1 to 574 * reg_b upon miss. 575 */ 576 if (ns != MLX5_FLOW_NAMESPACE_FDB) { 577 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 578 NIC_ZONE_RESTORE_TO_REG, zone_restore_id); 579 if (err) 580 return err; 581 } 582 return 0; 583 } 584 585 static int 586 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act, 587 char *modact) 588 { 589 u32 offset = act->mangle.offset, field; 590 591 switch (act->mangle.htype) { 592 case FLOW_ACT_MANGLE_HDR_TYPE_IP4: 593 MLX5_SET(set_action_in, modact, length, 0); 594 if (offset == offsetof(struct iphdr, saddr)) 595 field = MLX5_ACTION_IN_FIELD_OUT_SIPV4; 596 else if (offset == offsetof(struct iphdr, daddr)) 597 field = MLX5_ACTION_IN_FIELD_OUT_DIPV4; 598 else 599 return -EOPNOTSUPP; 600 break; 601 602 case FLOW_ACT_MANGLE_HDR_TYPE_IP6: 603 MLX5_SET(set_action_in, modact, length, 0); 604 if (offset == offsetof(struct ipv6hdr, saddr) + 12) 605 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0; 606 else if (offset == offsetof(struct ipv6hdr, saddr) + 8) 607 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32; 608 else if (offset == offsetof(struct ipv6hdr, saddr) + 4) 609 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64; 610 else if (offset == offsetof(struct ipv6hdr, saddr)) 611 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96; 612 else if (offset == offsetof(struct ipv6hdr, daddr) + 12) 613 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0; 614 else if (offset == offsetof(struct ipv6hdr, daddr) + 8) 615 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32; 616 else if (offset == offsetof(struct ipv6hdr, daddr) + 4) 617 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64; 618 else if (offset == offsetof(struct ipv6hdr, daddr)) 619 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96; 620 else 621 return -EOPNOTSUPP; 622 break; 623 624 case FLOW_ACT_MANGLE_HDR_TYPE_TCP: 625 MLX5_SET(set_action_in, modact, length, 16); 626 if (offset == offsetof(struct tcphdr, source)) 627 field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT; 628 else if (offset == offsetof(struct tcphdr, dest)) 629 field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT; 630 else 631 return -EOPNOTSUPP; 632 break; 633 634 case FLOW_ACT_MANGLE_HDR_TYPE_UDP: 635 MLX5_SET(set_action_in, modact, length, 16); 636 if (offset == offsetof(struct udphdr, source)) 637 field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT; 638 else if (offset == offsetof(struct udphdr, dest)) 639 field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT; 640 else 641 return -EOPNOTSUPP; 642 break; 643 644 default: 645 return -EOPNOTSUPP; 646 } 647 648 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); 649 MLX5_SET(set_action_in, modact, offset, 0); 650 MLX5_SET(set_action_in, modact, field, field); 651 MLX5_SET(set_action_in, modact, data, act->mangle.val); 652 653 return 0; 654 } 655 656 static int 657 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv, 658 struct flow_rule *flow_rule, 659 struct mlx5e_tc_mod_hdr_acts *mod_acts) 660 { 661 struct flow_action *flow_action = &flow_rule->action; 662 struct mlx5_core_dev *mdev = ct_priv->dev; 663 struct flow_action_entry *act; 664 char *modact; 665 int err, i; 666 667 flow_action_for_each(i, act, flow_action) { 668 switch (act->id) { 669 case FLOW_ACTION_MANGLE: { 670 modact = mlx5e_mod_hdr_alloc(mdev, ct_priv->ns_type, mod_acts); 671 if (IS_ERR(modact)) 672 return PTR_ERR(modact); 673 674 err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact); 675 if (err) 676 return err; 677 678 mod_acts->num_actions++; 679 } 680 break; 681 682 case FLOW_ACTION_CT_METADATA: 683 /* Handled earlier */ 684 continue; 685 default: 686 return -EOPNOTSUPP; 687 } 688 } 689 690 return 0; 691 } 692 693 static int 694 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, 695 struct mlx5_flow_attr *attr, 696 struct flow_rule *flow_rule, 697 struct mlx5e_mod_hdr_handle **mh, 698 u8 zone_restore_id, bool nat) 699 { 700 DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS); 701 DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr); 702 struct flow_action_entry *meta; 703 u16 ct_state = 0; 704 int err; 705 706 meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule); 707 if (!meta) 708 return -EOPNOTSUPP; 709 710 err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels, 711 &attr->ct_attr.ct_labels_id); 712 if (err) 713 return -EOPNOTSUPP; 714 if (nat) { 715 err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, 716 &mod_acts); 717 if (err) 718 goto err_mapping; 719 720 ct_state |= MLX5_CT_STATE_NAT_BIT; 721 } 722 723 ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT; 724 ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT; 725 err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts, 726 ct_state, 727 meta->ct_metadata.mark, 728 attr->ct_attr.ct_labels_id, 729 zone_restore_id); 730 if (err) 731 goto err_mapping; 732 733 if (nat) { 734 attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type, 735 mod_acts.num_actions, 736 mod_acts.actions); 737 if (IS_ERR(attr->modify_hdr)) { 738 err = PTR_ERR(attr->modify_hdr); 739 goto err_mapping; 740 } 741 742 *mh = NULL; 743 } else { 744 *mh = mlx5e_mod_hdr_attach(ct_priv->dev, 745 ct_priv->mod_hdr_tbl, 746 ct_priv->ns_type, 747 &mod_acts); 748 if (IS_ERR(*mh)) { 749 err = PTR_ERR(*mh); 750 goto err_mapping; 751 } 752 attr->modify_hdr = mlx5e_mod_hdr_get(*mh); 753 } 754 755 mlx5e_mod_hdr_dealloc(&mod_acts); 756 return 0; 757 758 err_mapping: 759 mlx5e_mod_hdr_dealloc(&mod_acts); 760 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); 761 return err; 762 } 763 764 static void 765 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, 766 struct mlx5_flow_attr *attr, 767 struct mlx5e_mod_hdr_handle *mh) 768 { 769 if (mh) 770 mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, mh); 771 else 772 mlx5_modify_header_dealloc(ct_priv->dev, attr->modify_hdr); 773 } 774 775 static int 776 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, 777 struct flow_rule *flow_rule, 778 struct mlx5_ct_entry *entry, 779 bool nat, u8 zone_restore_id) 780 { 781 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; 782 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); 783 struct mlx5_flow_spec *spec = NULL; 784 struct mlx5_flow_attr *attr; 785 int err; 786 787 zone_rule->nat = nat; 788 789 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 790 if (!spec) 791 return -ENOMEM; 792 793 attr = mlx5_alloc_flow_attr(ct_priv->ns_type); 794 if (!attr) { 795 err = -ENOMEM; 796 goto err_attr; 797 } 798 799 err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, 800 &zone_rule->mh, 801 zone_restore_id, nat); 802 if (err) { 803 ct_dbg("Failed to create ct entry mod hdr"); 804 goto err_mod_hdr; 805 } 806 807 attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | 808 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 809 MLX5_FLOW_CONTEXT_ACTION_COUNT; 810 attr->dest_chain = 0; 811 attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act); 812 attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct; 813 if (entry->tuple.ip_proto == IPPROTO_TCP || 814 entry->tuple.ip_proto == IPPROTO_UDP) 815 attr->outer_match_level = MLX5_MATCH_L4; 816 else 817 attr->outer_match_level = MLX5_MATCH_L3; 818 attr->counter = entry->counter->counter; 819 attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT; 820 if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) 821 attr->esw_attr->in_mdev = priv->mdev; 822 823 mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule); 824 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK); 825 826 zone_rule->rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule); 827 if (IS_ERR(zone_rule->rule)) { 828 err = PTR_ERR(zone_rule->rule); 829 ct_dbg("Failed to add ct entry rule, nat: %d", nat); 830 goto err_rule; 831 } 832 833 zone_rule->attr = attr; 834 835 kvfree(spec); 836 ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone); 837 838 return 0; 839 840 err_rule: 841 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh); 842 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); 843 err_mod_hdr: 844 kfree(attr); 845 err_attr: 846 kvfree(spec); 847 return err; 848 } 849 850 static bool 851 mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry) 852 { 853 return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags); 854 } 855 856 static struct mlx5_ct_entry * 857 mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple) 858 { 859 struct mlx5_ct_entry *entry; 860 861 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple, 862 tuples_ht_params); 863 if (entry && mlx5_tc_ct_entry_valid(entry) && 864 refcount_inc_not_zero(&entry->refcnt)) { 865 return entry; 866 } else if (!entry) { 867 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht, 868 tuple, tuples_nat_ht_params); 869 if (entry && mlx5_tc_ct_entry_valid(entry) && 870 refcount_inc_not_zero(&entry->refcnt)) 871 return entry; 872 } 873 874 return entry ? ERR_PTR(-EINVAL) : NULL; 875 } 876 877 static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry) 878 { 879 struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv; 880 881 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, 882 &entry->tuple_nat_node, 883 tuples_nat_ht_params); 884 rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node, 885 tuples_ht_params); 886 } 887 888 static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry) 889 { 890 struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv; 891 892 mlx5_tc_ct_entry_del_rules(ct_priv, entry); 893 894 spin_lock_bh(&ct_priv->ht_lock); 895 mlx5_tc_ct_entry_remove_from_tuples(entry); 896 spin_unlock_bh(&ct_priv->ht_lock); 897 898 mlx5_tc_ct_counter_put(ct_priv, entry); 899 kfree(entry); 900 } 901 902 static void 903 mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) 904 { 905 if (!refcount_dec_and_test(&entry->refcnt)) 906 return; 907 908 mlx5_tc_ct_entry_del(entry); 909 } 910 911 static void mlx5_tc_ct_entry_del_work(struct work_struct *work) 912 { 913 struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work); 914 915 mlx5_tc_ct_entry_del(entry); 916 } 917 918 static void 919 __mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) 920 { 921 struct mlx5e_priv *priv; 922 923 if (!refcount_dec_and_test(&entry->refcnt)) 924 return; 925 926 priv = netdev_priv(entry->ct_priv->netdev); 927 INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work); 928 queue_work(priv->wq, &entry->work); 929 } 930 931 static struct mlx5_ct_counter * 932 mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv) 933 { 934 struct mlx5_ct_counter *counter; 935 int ret; 936 937 counter = kzalloc(sizeof(*counter), GFP_KERNEL); 938 if (!counter) 939 return ERR_PTR(-ENOMEM); 940 941 counter->is_shared = false; 942 counter->counter = mlx5_fc_create_ex(ct_priv->dev, true); 943 if (IS_ERR(counter->counter)) { 944 ct_dbg("Failed to create counter for ct entry"); 945 ret = PTR_ERR(counter->counter); 946 kfree(counter); 947 return ERR_PTR(ret); 948 } 949 950 return counter; 951 } 952 953 static struct mlx5_ct_counter * 954 mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv, 955 struct mlx5_ct_entry *entry) 956 { 957 struct mlx5_ct_tuple rev_tuple = entry->tuple; 958 struct mlx5_ct_counter *shared_counter; 959 struct mlx5_ct_entry *rev_entry; 960 961 /* get the reversed tuple */ 962 swap(rev_tuple.port.src, rev_tuple.port.dst); 963 964 if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 965 __be32 tmp_addr = rev_tuple.ip.src_v4; 966 967 rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4; 968 rev_tuple.ip.dst_v4 = tmp_addr; 969 } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 970 struct in6_addr tmp_addr = rev_tuple.ip.src_v6; 971 972 rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6; 973 rev_tuple.ip.dst_v6 = tmp_addr; 974 } else { 975 return ERR_PTR(-EOPNOTSUPP); 976 } 977 978 /* Use the same counter as the reverse direction */ 979 spin_lock_bh(&ct_priv->ht_lock); 980 rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple); 981 982 if (IS_ERR(rev_entry)) { 983 spin_unlock_bh(&ct_priv->ht_lock); 984 goto create_counter; 985 } 986 987 if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) { 988 ct_dbg("Using shared counter entry=0x%p rev=0x%p", entry, rev_entry); 989 shared_counter = rev_entry->counter; 990 spin_unlock_bh(&ct_priv->ht_lock); 991 992 mlx5_tc_ct_entry_put(rev_entry); 993 return shared_counter; 994 } 995 996 spin_unlock_bh(&ct_priv->ht_lock); 997 998 create_counter: 999 1000 shared_counter = mlx5_tc_ct_counter_create(ct_priv); 1001 if (IS_ERR(shared_counter)) 1002 return shared_counter; 1003 1004 shared_counter->is_shared = true; 1005 refcount_set(&shared_counter->refcount, 1); 1006 return shared_counter; 1007 } 1008 1009 static int 1010 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv, 1011 struct flow_rule *flow_rule, 1012 struct mlx5_ct_entry *entry, 1013 u8 zone_restore_id) 1014 { 1015 int err; 1016 1017 if (nf_ct_acct_enabled(dev_net(ct_priv->netdev))) 1018 entry->counter = mlx5_tc_ct_counter_create(ct_priv); 1019 else 1020 entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry); 1021 1022 if (IS_ERR(entry->counter)) { 1023 err = PTR_ERR(entry->counter); 1024 return err; 1025 } 1026 1027 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false, 1028 zone_restore_id); 1029 if (err) 1030 goto err_orig; 1031 1032 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true, 1033 zone_restore_id); 1034 if (err) 1035 goto err_nat; 1036 1037 return 0; 1038 1039 err_nat: 1040 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); 1041 err_orig: 1042 mlx5_tc_ct_counter_put(ct_priv, entry); 1043 return err; 1044 } 1045 1046 static int 1047 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, 1048 struct flow_cls_offload *flow) 1049 { 1050 struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow); 1051 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; 1052 struct flow_action_entry *meta_action; 1053 unsigned long cookie = flow->cookie; 1054 struct mlx5_ct_entry *entry; 1055 int err; 1056 1057 meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule); 1058 if (!meta_action) 1059 return -EOPNOTSUPP; 1060 1061 spin_lock_bh(&ct_priv->ht_lock); 1062 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); 1063 if (entry && refcount_inc_not_zero(&entry->refcnt)) { 1064 spin_unlock_bh(&ct_priv->ht_lock); 1065 mlx5_tc_ct_entry_put(entry); 1066 return -EEXIST; 1067 } 1068 spin_unlock_bh(&ct_priv->ht_lock); 1069 1070 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 1071 if (!entry) 1072 return -ENOMEM; 1073 1074 entry->tuple.zone = ft->zone; 1075 entry->cookie = flow->cookie; 1076 entry->restore_cookie = meta_action->ct_metadata.cookie; 1077 refcount_set(&entry->refcnt, 2); 1078 entry->ct_priv = ct_priv; 1079 1080 err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule); 1081 if (err) 1082 goto err_set; 1083 1084 memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple)); 1085 err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule); 1086 if (err) 1087 goto err_set; 1088 1089 spin_lock_bh(&ct_priv->ht_lock); 1090 1091 err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node, 1092 cts_ht_params); 1093 if (err) 1094 goto err_entries; 1095 1096 err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht, 1097 &entry->tuple_node, 1098 tuples_ht_params); 1099 if (err) 1100 goto err_tuple; 1101 1102 if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) { 1103 err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht, 1104 &entry->tuple_nat_node, 1105 tuples_nat_ht_params); 1106 if (err) 1107 goto err_tuple_nat; 1108 } 1109 spin_unlock_bh(&ct_priv->ht_lock); 1110 1111 err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry, 1112 ft->zone_restore_id); 1113 if (err) 1114 goto err_rules; 1115 1116 set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags); 1117 mlx5_tc_ct_entry_put(entry); /* this function reference */ 1118 1119 return 0; 1120 1121 err_rules: 1122 spin_lock_bh(&ct_priv->ht_lock); 1123 if (mlx5_tc_ct_entry_has_nat(entry)) 1124 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, 1125 &entry->tuple_nat_node, tuples_nat_ht_params); 1126 err_tuple_nat: 1127 rhashtable_remove_fast(&ct_priv->ct_tuples_ht, 1128 &entry->tuple_node, 1129 tuples_ht_params); 1130 err_tuple: 1131 rhashtable_remove_fast(&ft->ct_entries_ht, 1132 &entry->node, 1133 cts_ht_params); 1134 err_entries: 1135 spin_unlock_bh(&ct_priv->ht_lock); 1136 err_set: 1137 kfree(entry); 1138 if (err != -EEXIST) 1139 netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err); 1140 return err; 1141 } 1142 1143 static int 1144 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft, 1145 struct flow_cls_offload *flow) 1146 { 1147 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; 1148 unsigned long cookie = flow->cookie; 1149 struct mlx5_ct_entry *entry; 1150 1151 spin_lock_bh(&ct_priv->ht_lock); 1152 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); 1153 if (!entry) { 1154 spin_unlock_bh(&ct_priv->ht_lock); 1155 return -ENOENT; 1156 } 1157 1158 if (!mlx5_tc_ct_entry_valid(entry)) { 1159 spin_unlock_bh(&ct_priv->ht_lock); 1160 return -EINVAL; 1161 } 1162 1163 rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params); 1164 spin_unlock_bh(&ct_priv->ht_lock); 1165 1166 mlx5_tc_ct_entry_put(entry); 1167 1168 return 0; 1169 } 1170 1171 static int 1172 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft, 1173 struct flow_cls_offload *f) 1174 { 1175 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; 1176 unsigned long cookie = f->cookie; 1177 struct mlx5_ct_entry *entry; 1178 u64 lastuse, packets, bytes; 1179 1180 spin_lock_bh(&ct_priv->ht_lock); 1181 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); 1182 if (!entry) { 1183 spin_unlock_bh(&ct_priv->ht_lock); 1184 return -ENOENT; 1185 } 1186 1187 if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) { 1188 spin_unlock_bh(&ct_priv->ht_lock); 1189 return -EINVAL; 1190 } 1191 1192 spin_unlock_bh(&ct_priv->ht_lock); 1193 1194 mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse); 1195 flow_stats_update(&f->stats, bytes, packets, 0, lastuse, 1196 FLOW_ACTION_HW_STATS_DELAYED); 1197 1198 mlx5_tc_ct_entry_put(entry); 1199 return 0; 1200 } 1201 1202 static int 1203 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data, 1204 void *cb_priv) 1205 { 1206 struct flow_cls_offload *f = type_data; 1207 struct mlx5_ct_ft *ft = cb_priv; 1208 1209 if (type != TC_SETUP_CLSFLOWER) 1210 return -EOPNOTSUPP; 1211 1212 switch (f->command) { 1213 case FLOW_CLS_REPLACE: 1214 return mlx5_tc_ct_block_flow_offload_add(ft, f); 1215 case FLOW_CLS_DESTROY: 1216 return mlx5_tc_ct_block_flow_offload_del(ft, f); 1217 case FLOW_CLS_STATS: 1218 return mlx5_tc_ct_block_flow_offload_stats(ft, f); 1219 default: 1220 break; 1221 } 1222 1223 return -EOPNOTSUPP; 1224 } 1225 1226 static bool 1227 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple, 1228 u16 zone) 1229 { 1230 struct flow_keys flow_keys; 1231 1232 skb_reset_network_header(skb); 1233 skb_flow_dissect_flow_keys(skb, &flow_keys, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP); 1234 1235 tuple->zone = zone; 1236 1237 if (flow_keys.basic.ip_proto != IPPROTO_TCP && 1238 flow_keys.basic.ip_proto != IPPROTO_UDP && 1239 flow_keys.basic.ip_proto != IPPROTO_GRE) 1240 return false; 1241 1242 if (flow_keys.basic.ip_proto == IPPROTO_TCP || 1243 flow_keys.basic.ip_proto == IPPROTO_UDP) { 1244 tuple->port.src = flow_keys.ports.src; 1245 tuple->port.dst = flow_keys.ports.dst; 1246 } 1247 tuple->n_proto = flow_keys.basic.n_proto; 1248 tuple->ip_proto = flow_keys.basic.ip_proto; 1249 1250 switch (flow_keys.basic.n_proto) { 1251 case htons(ETH_P_IP): 1252 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 1253 tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src; 1254 tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst; 1255 break; 1256 1257 case htons(ETH_P_IPV6): 1258 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 1259 tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src; 1260 tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst; 1261 break; 1262 default: 1263 goto out; 1264 } 1265 1266 return true; 1267 1268 out: 1269 return false; 1270 } 1271 1272 int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec) 1273 { 1274 u32 ctstate = 0, ctstate_mask = 0; 1275 1276 mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG, 1277 &ctstate, &ctstate_mask); 1278 1279 if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT) 1280 return -EOPNOTSUPP; 1281 1282 ctstate_mask |= MLX5_CT_STATE_TRK_BIT; 1283 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, 1284 ctstate, ctstate_mask); 1285 1286 return 0; 1287 } 1288 1289 void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr) 1290 { 1291 if (!priv || !ct_attr->ct_labels_id) 1292 return; 1293 1294 mlx5_put_label_mapping(priv, ct_attr->ct_labels_id); 1295 } 1296 1297 int 1298 mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, 1299 struct mlx5_flow_spec *spec, 1300 struct flow_cls_offload *f, 1301 struct mlx5_ct_attr *ct_attr, 1302 struct netlink_ext_ack *extack) 1303 { 1304 bool trk, est, untrk, unest, new, rpl, unrpl, rel, unrel, inv, uninv; 1305 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 1306 struct flow_dissector_key_ct *mask, *key; 1307 u32 ctstate = 0, ctstate_mask = 0; 1308 u16 ct_state_on, ct_state_off; 1309 u16 ct_state, ct_state_mask; 1310 struct flow_match_ct match; 1311 u32 ct_labels[4]; 1312 1313 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) 1314 return 0; 1315 1316 if (!priv) { 1317 NL_SET_ERR_MSG_MOD(extack, 1318 "offload of ct matching isn't available"); 1319 return -EOPNOTSUPP; 1320 } 1321 1322 flow_rule_match_ct(rule, &match); 1323 1324 key = match.key; 1325 mask = match.mask; 1326 1327 ct_state = key->ct_state; 1328 ct_state_mask = mask->ct_state; 1329 1330 if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED | 1331 TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED | 1332 TCA_FLOWER_KEY_CT_FLAGS_NEW | 1333 TCA_FLOWER_KEY_CT_FLAGS_REPLY | 1334 TCA_FLOWER_KEY_CT_FLAGS_RELATED | 1335 TCA_FLOWER_KEY_CT_FLAGS_INVALID)) { 1336 NL_SET_ERR_MSG_MOD(extack, 1337 "only ct_state trk, est, new and rpl are supported for offload"); 1338 return -EOPNOTSUPP; 1339 } 1340 1341 ct_state_on = ct_state & ct_state_mask; 1342 ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask; 1343 trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; 1344 new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW; 1345 est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; 1346 rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY; 1347 rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED; 1348 inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID; 1349 untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; 1350 unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; 1351 unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY; 1352 unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED; 1353 uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID; 1354 1355 ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0; 1356 ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; 1357 ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0; 1358 ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0; 1359 ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; 1360 ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0; 1361 ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0; 1362 ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0; 1363 1364 if (rel) { 1365 NL_SET_ERR_MSG_MOD(extack, 1366 "matching on ct_state +rel isn't supported"); 1367 return -EOPNOTSUPP; 1368 } 1369 1370 if (inv) { 1371 NL_SET_ERR_MSG_MOD(extack, 1372 "matching on ct_state +inv isn't supported"); 1373 return -EOPNOTSUPP; 1374 } 1375 1376 if (new) { 1377 NL_SET_ERR_MSG_MOD(extack, 1378 "matching on ct_state +new isn't supported"); 1379 return -EOPNOTSUPP; 1380 } 1381 1382 if (mask->ct_zone) 1383 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 1384 key->ct_zone, MLX5_CT_ZONE_MASK); 1385 if (ctstate_mask) 1386 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, 1387 ctstate, ctstate_mask); 1388 if (mask->ct_mark) 1389 mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG, 1390 key->ct_mark, mask->ct_mark); 1391 if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] || 1392 mask->ct_labels[3]) { 1393 ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0]; 1394 ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1]; 1395 ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2]; 1396 ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3]; 1397 if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id)) 1398 return -EOPNOTSUPP; 1399 mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id, 1400 MLX5_CT_LABELS_MASK); 1401 } 1402 1403 return 0; 1404 } 1405 1406 int 1407 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, 1408 struct mlx5_flow_attr *attr, 1409 struct mlx5e_tc_mod_hdr_acts *mod_acts, 1410 const struct flow_action_entry *act, 1411 struct netlink_ext_ack *extack) 1412 { 1413 bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR; 1414 int err; 1415 1416 if (!priv) { 1417 NL_SET_ERR_MSG_MOD(extack, 1418 "offload of ct action isn't available"); 1419 return -EOPNOTSUPP; 1420 } 1421 1422 attr->ct_attr.zone = act->ct.zone; 1423 attr->ct_attr.ct_action = act->ct.action; 1424 attr->ct_attr.nf_ft = act->ct.flow_table; 1425 1426 if (!clear_action) 1427 goto out; 1428 1429 err = mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0); 1430 if (err) { 1431 NL_SET_ERR_MSG_MOD(extack, "Failed to set registers for ct clear"); 1432 return err; 1433 } 1434 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1435 1436 out: 1437 return 0; 1438 } 1439 1440 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft, 1441 struct mlx5_tc_ct_pre *pre_ct, 1442 bool nat) 1443 { 1444 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; 1445 struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; 1446 struct mlx5_core_dev *dev = ct_priv->dev; 1447 struct mlx5_flow_table *ft = pre_ct->ft; 1448 struct mlx5_flow_destination dest = {}; 1449 struct mlx5_flow_act flow_act = {}; 1450 struct mlx5_modify_hdr *mod_hdr; 1451 struct mlx5_flow_handle *rule; 1452 struct mlx5_flow_spec *spec; 1453 u32 ctstate; 1454 u16 zone; 1455 int err; 1456 1457 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 1458 if (!spec) 1459 return -ENOMEM; 1460 1461 zone = ct_ft->zone & MLX5_CT_ZONE_MASK; 1462 err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type, 1463 ZONE_TO_REG, zone); 1464 if (err) { 1465 ct_dbg("Failed to set zone register mapping"); 1466 goto err_mapping; 1467 } 1468 1469 mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type, 1470 pre_mod_acts.num_actions, 1471 pre_mod_acts.actions); 1472 1473 if (IS_ERR(mod_hdr)) { 1474 err = PTR_ERR(mod_hdr); 1475 ct_dbg("Failed to create pre ct mod hdr"); 1476 goto err_mapping; 1477 } 1478 pre_ct->modify_hdr = mod_hdr; 1479 1480 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 1481 MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1482 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; 1483 flow_act.modify_hdr = mod_hdr; 1484 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 1485 1486 /* add flow rule */ 1487 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 1488 zone, MLX5_CT_ZONE_MASK); 1489 ctstate = MLX5_CT_STATE_TRK_BIT; 1490 if (nat) 1491 ctstate |= MLX5_CT_STATE_NAT_BIT; 1492 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate); 1493 1494 dest.ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act); 1495 rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); 1496 if (IS_ERR(rule)) { 1497 err = PTR_ERR(rule); 1498 ct_dbg("Failed to add pre ct flow rule zone %d", zone); 1499 goto err_flow_rule; 1500 } 1501 pre_ct->flow_rule = rule; 1502 1503 /* add miss rule */ 1504 dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct; 1505 rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1); 1506 if (IS_ERR(rule)) { 1507 err = PTR_ERR(rule); 1508 ct_dbg("Failed to add pre ct miss rule zone %d", zone); 1509 goto err_miss_rule; 1510 } 1511 pre_ct->miss_rule = rule; 1512 1513 mlx5e_mod_hdr_dealloc(&pre_mod_acts); 1514 kvfree(spec); 1515 return 0; 1516 1517 err_miss_rule: 1518 mlx5_del_flow_rules(pre_ct->flow_rule); 1519 err_flow_rule: 1520 mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); 1521 err_mapping: 1522 mlx5e_mod_hdr_dealloc(&pre_mod_acts); 1523 kvfree(spec); 1524 return err; 1525 } 1526 1527 static void 1528 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft, 1529 struct mlx5_tc_ct_pre *pre_ct) 1530 { 1531 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; 1532 struct mlx5_core_dev *dev = ct_priv->dev; 1533 1534 mlx5_del_flow_rules(pre_ct->flow_rule); 1535 mlx5_del_flow_rules(pre_ct->miss_rule); 1536 mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); 1537 } 1538 1539 static int 1540 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft, 1541 struct mlx5_tc_ct_pre *pre_ct, 1542 bool nat) 1543 { 1544 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); 1545 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; 1546 struct mlx5_core_dev *dev = ct_priv->dev; 1547 struct mlx5_flow_table_attr ft_attr = {}; 1548 struct mlx5_flow_namespace *ns; 1549 struct mlx5_flow_table *ft; 1550 struct mlx5_flow_group *g; 1551 u32 metadata_reg_c_2_mask; 1552 u32 *flow_group_in; 1553 void *misc; 1554 int err; 1555 1556 ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type); 1557 if (!ns) { 1558 err = -EOPNOTSUPP; 1559 ct_dbg("Failed to get flow namespace"); 1560 return err; 1561 } 1562 1563 flow_group_in = kvzalloc(inlen, GFP_KERNEL); 1564 if (!flow_group_in) 1565 return -ENOMEM; 1566 1567 ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; 1568 ft_attr.prio = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB ? 1569 FDB_TC_OFFLOAD : MLX5E_TC_PRIO; 1570 ft_attr.max_fte = 2; 1571 ft_attr.level = 1; 1572 ft = mlx5_create_flow_table(ns, &ft_attr); 1573 if (IS_ERR(ft)) { 1574 err = PTR_ERR(ft); 1575 ct_dbg("Failed to create pre ct table"); 1576 goto out_free; 1577 } 1578 pre_ct->ft = ft; 1579 1580 /* create flow group */ 1581 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); 1582 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); 1583 MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 1584 MLX5_MATCH_MISC_PARAMETERS_2); 1585 1586 misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, 1587 match_criteria.misc_parameters_2); 1588 1589 metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK; 1590 metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16); 1591 if (nat) 1592 metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16); 1593 1594 MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2, 1595 metadata_reg_c_2_mask); 1596 1597 g = mlx5_create_flow_group(ft, flow_group_in); 1598 if (IS_ERR(g)) { 1599 err = PTR_ERR(g); 1600 ct_dbg("Failed to create pre ct group"); 1601 goto err_flow_grp; 1602 } 1603 pre_ct->flow_grp = g; 1604 1605 /* create miss group */ 1606 memset(flow_group_in, 0, inlen); 1607 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); 1608 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); 1609 g = mlx5_create_flow_group(ft, flow_group_in); 1610 if (IS_ERR(g)) { 1611 err = PTR_ERR(g); 1612 ct_dbg("Failed to create pre ct miss group"); 1613 goto err_miss_grp; 1614 } 1615 pre_ct->miss_grp = g; 1616 1617 err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat); 1618 if (err) 1619 goto err_add_rules; 1620 1621 kvfree(flow_group_in); 1622 return 0; 1623 1624 err_add_rules: 1625 mlx5_destroy_flow_group(pre_ct->miss_grp); 1626 err_miss_grp: 1627 mlx5_destroy_flow_group(pre_ct->flow_grp); 1628 err_flow_grp: 1629 mlx5_destroy_flow_table(ft); 1630 out_free: 1631 kvfree(flow_group_in); 1632 return err; 1633 } 1634 1635 static void 1636 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft, 1637 struct mlx5_tc_ct_pre *pre_ct) 1638 { 1639 tc_ct_pre_ct_del_rules(ct_ft, pre_ct); 1640 mlx5_destroy_flow_group(pre_ct->miss_grp); 1641 mlx5_destroy_flow_group(pre_ct->flow_grp); 1642 mlx5_destroy_flow_table(pre_ct->ft); 1643 } 1644 1645 static int 1646 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft) 1647 { 1648 int err; 1649 1650 err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false); 1651 if (err) 1652 return err; 1653 1654 err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true); 1655 if (err) 1656 goto err_pre_ct_nat; 1657 1658 return 0; 1659 1660 err_pre_ct_nat: 1661 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); 1662 return err; 1663 } 1664 1665 static void 1666 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft) 1667 { 1668 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat); 1669 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); 1670 } 1671 1672 /* To avoid false lock dependency warning set the ct_entries_ht lock 1673 * class different than the lock class of the ht being used when deleting 1674 * last flow from a group and then deleting a group, we get into del_sw_flow_group() 1675 * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but 1676 * it's different than the ht->mutex here. 1677 */ 1678 static struct lock_class_key ct_entries_ht_lock_key; 1679 1680 static struct mlx5_ct_ft * 1681 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, 1682 struct nf_flowtable *nf_ft) 1683 { 1684 struct mlx5_ct_ft *ft; 1685 int err; 1686 1687 ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params); 1688 if (ft) { 1689 refcount_inc(&ft->refcount); 1690 return ft; 1691 } 1692 1693 ft = kzalloc(sizeof(*ft), GFP_KERNEL); 1694 if (!ft) 1695 return ERR_PTR(-ENOMEM); 1696 1697 err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id); 1698 if (err) 1699 goto err_mapping; 1700 1701 ft->zone = zone; 1702 ft->nf_ft = nf_ft; 1703 ft->ct_priv = ct_priv; 1704 refcount_set(&ft->refcount, 1); 1705 1706 err = mlx5_tc_ct_alloc_pre_ct_tables(ft); 1707 if (err) 1708 goto err_alloc_pre_ct; 1709 1710 err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params); 1711 if (err) 1712 goto err_init; 1713 1714 lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key); 1715 1716 err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node, 1717 zone_params); 1718 if (err) 1719 goto err_insert; 1720 1721 err = nf_flow_table_offload_add_cb(ft->nf_ft, 1722 mlx5_tc_ct_block_flow_offload, ft); 1723 if (err) 1724 goto err_add_cb; 1725 1726 return ft; 1727 1728 err_add_cb: 1729 rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); 1730 err_insert: 1731 rhashtable_destroy(&ft->ct_entries_ht); 1732 err_init: 1733 mlx5_tc_ct_free_pre_ct_tables(ft); 1734 err_alloc_pre_ct: 1735 mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); 1736 err_mapping: 1737 kfree(ft); 1738 return ERR_PTR(err); 1739 } 1740 1741 static void 1742 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg) 1743 { 1744 struct mlx5_ct_entry *entry = ptr; 1745 1746 mlx5_tc_ct_entry_put(entry); 1747 } 1748 1749 static void 1750 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) 1751 { 1752 if (!refcount_dec_and_test(&ft->refcount)) 1753 return; 1754 1755 nf_flow_table_offload_del_cb(ft->nf_ft, 1756 mlx5_tc_ct_block_flow_offload, ft); 1757 rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); 1758 rhashtable_free_and_destroy(&ft->ct_entries_ht, 1759 mlx5_tc_ct_flush_ft_entry, 1760 ct_priv); 1761 mlx5_tc_ct_free_pre_ct_tables(ft); 1762 mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); 1763 kfree(ft); 1764 } 1765 1766 /* We translate the tc filter with CT action to the following HW model: 1767 * 1768 * +---------------------+ 1769 * + ft prio (tc chain) + 1770 * + original match + 1771 * +---------------------+ 1772 * | set chain miss mapping 1773 * | set fte_id 1774 * | set tunnel_id 1775 * | do decap 1776 * v 1777 * +---------------------+ 1778 * + pre_ct/pre_ct_nat + if matches +-------------------------+ 1779 * + zone+nat match +---------------->+ post_act (see below) + 1780 * +---------------------+ set zone +-------------------------+ 1781 * | set zone 1782 * v 1783 * +--------------------+ 1784 * + CT (nat or no nat) + 1785 * + tuple + zone match + 1786 * +--------------------+ 1787 * | set mark 1788 * | set labels_id 1789 * | set established 1790 * | set zone_restore 1791 * | do nat (if needed) 1792 * v 1793 * +--------------+ 1794 * + post_act + original filter actions 1795 * + fte_id match +------------------------> 1796 * +--------------+ 1797 */ 1798 static struct mlx5_flow_handle * 1799 __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, 1800 struct mlx5_flow_spec *orig_spec, 1801 struct mlx5_flow_attr *attr) 1802 { 1803 bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; 1804 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); 1805 struct mlx5e_tc_mod_hdr_acts *pre_mod_acts; 1806 u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type); 1807 struct mlx5_flow_attr *pre_ct_attr; 1808 struct mlx5_modify_hdr *mod_hdr; 1809 struct mlx5_ct_flow *ct_flow; 1810 int chain_mapping = 0, err; 1811 struct mlx5_ct_ft *ft; 1812 1813 ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); 1814 if (!ct_flow) { 1815 kfree(ct_flow); 1816 return ERR_PTR(-ENOMEM); 1817 } 1818 1819 /* Register for CT established events */ 1820 ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone, 1821 attr->ct_attr.nf_ft); 1822 if (IS_ERR(ft)) { 1823 err = PTR_ERR(ft); 1824 ct_dbg("Failed to register to ft callback"); 1825 goto err_ft; 1826 } 1827 ct_flow->ft = ft; 1828 1829 /* Base flow attributes of both rules on original rule attribute */ 1830 ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); 1831 if (!ct_flow->pre_ct_attr) { 1832 err = -ENOMEM; 1833 goto err_alloc_pre; 1834 } 1835 1836 pre_ct_attr = ct_flow->pre_ct_attr; 1837 memcpy(pre_ct_attr, attr, attr_sz); 1838 pre_mod_acts = &pre_ct_attr->parse_attr->mod_hdr_acts; 1839 1840 /* Modify the original rule's action to fwd and modify, leave decap */ 1841 pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP; 1842 pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 1843 MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1844 1845 /* Write chain miss tag for miss in ct table as we 1846 * don't go though all prios of this chain as normal tc rules 1847 * miss. 1848 */ 1849 err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain, 1850 &chain_mapping); 1851 if (err) { 1852 ct_dbg("Failed to get chain register mapping for chain"); 1853 goto err_get_chain; 1854 } 1855 ct_flow->chain_mapping = chain_mapping; 1856 1857 err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type, 1858 CHAIN_TO_REG, chain_mapping); 1859 if (err) { 1860 ct_dbg("Failed to set chain register mapping"); 1861 goto err_mapping; 1862 } 1863 1864 /* If original flow is decap, we do it before going into ct table 1865 * so add a rewrite for the tunnel match_id. 1866 */ 1867 if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) && 1868 attr->chain == 0) { 1869 err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, 1870 ct_priv->ns_type, 1871 TUNNEL_TO_REG, 1872 attr->tunnel_id); 1873 if (err) { 1874 ct_dbg("Failed to set tunnel register mapping"); 1875 goto err_mapping; 1876 } 1877 } 1878 1879 mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type, 1880 pre_mod_acts->num_actions, 1881 pre_mod_acts->actions); 1882 if (IS_ERR(mod_hdr)) { 1883 err = PTR_ERR(mod_hdr); 1884 ct_dbg("Failed to create pre ct mod hdr"); 1885 goto err_mapping; 1886 } 1887 pre_ct_attr->modify_hdr = mod_hdr; 1888 1889 /* Change original rule point to ct table */ 1890 pre_ct_attr->dest_chain = 0; 1891 pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft; 1892 ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec, 1893 pre_ct_attr); 1894 if (IS_ERR(ct_flow->pre_ct_rule)) { 1895 err = PTR_ERR(ct_flow->pre_ct_rule); 1896 ct_dbg("Failed to add pre ct rule"); 1897 goto err_insert_orig; 1898 } 1899 1900 attr->ct_attr.ct_flow = ct_flow; 1901 mlx5e_mod_hdr_dealloc(pre_mod_acts); 1902 1903 return ct_flow->pre_ct_rule; 1904 1905 err_insert_orig: 1906 mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); 1907 err_mapping: 1908 mlx5e_mod_hdr_dealloc(pre_mod_acts); 1909 mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); 1910 err_get_chain: 1911 kfree(ct_flow->pre_ct_attr); 1912 err_alloc_pre: 1913 mlx5_tc_ct_del_ft_cb(ct_priv, ft); 1914 err_ft: 1915 kfree(ct_flow); 1916 netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err); 1917 return ERR_PTR(err); 1918 } 1919 1920 struct mlx5_flow_handle * 1921 mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, 1922 struct mlx5_flow_spec *spec, 1923 struct mlx5_flow_attr *attr, 1924 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) 1925 { 1926 struct mlx5_flow_handle *rule; 1927 1928 if (!priv) 1929 return ERR_PTR(-EOPNOTSUPP); 1930 1931 mutex_lock(&priv->control_lock); 1932 rule = __mlx5_tc_ct_flow_offload(priv, spec, attr); 1933 mutex_unlock(&priv->control_lock); 1934 1935 return rule; 1936 } 1937 1938 static void 1939 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, 1940 struct mlx5_ct_flow *ct_flow, 1941 struct mlx5_flow_attr *attr) 1942 { 1943 struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr; 1944 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); 1945 1946 mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr); 1947 mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); 1948 1949 mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); 1950 mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft); 1951 1952 kfree(ct_flow->pre_ct_attr); 1953 kfree(ct_flow); 1954 } 1955 1956 void 1957 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, 1958 struct mlx5_flow_attr *attr) 1959 { 1960 struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow; 1961 1962 /* We are called on error to clean up stuff from parsing 1963 * but we don't have anything for now 1964 */ 1965 if (!ct_flow) 1966 return; 1967 1968 mutex_lock(&priv->control_lock); 1969 __mlx5_tc_ct_delete_flow(priv, ct_flow, attr); 1970 mutex_unlock(&priv->control_lock); 1971 } 1972 1973 static int 1974 mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv) 1975 { 1976 struct mlx5_flow_table *post_ct = mlx5e_tc_post_act_get_ft(ct_priv->post_act); 1977 struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get(); 1978 int err; 1979 1980 if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB && 1981 ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) { 1982 ct_dbg("Using SMFS ct flow steering provider"); 1983 fs_ops = mlx5_ct_fs_smfs_ops_get(); 1984 } 1985 1986 ct_priv->fs = kzalloc(sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL); 1987 if (!ct_priv->fs) 1988 return -ENOMEM; 1989 1990 ct_priv->fs->netdev = ct_priv->netdev; 1991 ct_priv->fs->dev = ct_priv->dev; 1992 ct_priv->fs_ops = fs_ops; 1993 1994 err = ct_priv->fs_ops->init(ct_priv->fs, ct_priv->ct, ct_priv->ct_nat, post_ct); 1995 if (err) 1996 goto err_init; 1997 1998 return 0; 1999 2000 err_init: 2001 kfree(ct_priv->fs); 2002 return err; 2003 } 2004 2005 static int 2006 mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw, 2007 const char **err_msg) 2008 { 2009 if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) { 2010 /* vlan workaround should be avoided for multi chain rules. 2011 * This is just a sanity check as pop vlan action should 2012 * be supported by any FW that supports ignore_flow_level 2013 */ 2014 2015 *err_msg = "firmware vlan actions support is missing"; 2016 return -EOPNOTSUPP; 2017 } 2018 2019 if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, 2020 fdb_modify_header_fwd_to_table)) { 2021 /* CT always writes to registers which are mod header actions. 2022 * Therefore, mod header and goto is required 2023 */ 2024 2025 *err_msg = "firmware fwd and modify support is missing"; 2026 return -EOPNOTSUPP; 2027 } 2028 2029 if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) { 2030 *err_msg = "register loopback isn't supported"; 2031 return -EOPNOTSUPP; 2032 } 2033 2034 return 0; 2035 } 2036 2037 static int 2038 mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv, 2039 enum mlx5_flow_namespace_type ns_type, 2040 struct mlx5e_post_act *post_act) 2041 { 2042 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 2043 const char *err_msg = NULL; 2044 int err = 0; 2045 2046 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) 2047 /* cannot restore chain ID on HW miss */ 2048 2049 err_msg = "tc skb extension missing"; 2050 err = -EOPNOTSUPP; 2051 goto out_err; 2052 #endif 2053 if (IS_ERR_OR_NULL(post_act)) { 2054 /* Ignore_flow_level support isn't supported by default for VFs and so post_act 2055 * won't be supported. Skip showing error msg. 2056 */ 2057 if (priv->mdev->coredev_type != MLX5_COREDEV_VF) 2058 err_msg = "post action is missing"; 2059 err = -EOPNOTSUPP; 2060 goto out_err; 2061 } 2062 2063 if (ns_type == MLX5_FLOW_NAMESPACE_FDB) 2064 err = mlx5_tc_ct_init_check_esw_support(esw, &err_msg); 2065 2066 out_err: 2067 if (err && err_msg) 2068 netdev_dbg(priv->netdev, "tc ct offload not supported, %s\n", err_msg); 2069 return err; 2070 } 2071 2072 #define INIT_ERR_PREFIX "tc ct offload init failed" 2073 2074 struct mlx5_tc_ct_priv * 2075 mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, 2076 struct mod_hdr_tbl *mod_hdr, 2077 enum mlx5_flow_namespace_type ns_type, 2078 struct mlx5e_post_act *post_act) 2079 { 2080 struct mlx5_tc_ct_priv *ct_priv; 2081 struct mlx5_core_dev *dev; 2082 u64 mapping_id; 2083 int err; 2084 2085 dev = priv->mdev; 2086 err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act); 2087 if (err) 2088 goto err_support; 2089 2090 ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL); 2091 if (!ct_priv) 2092 goto err_alloc; 2093 2094 mapping_id = mlx5_query_nic_system_image_guid(dev); 2095 2096 ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE, 2097 sizeof(u16), 0, true); 2098 if (IS_ERR(ct_priv->zone_mapping)) { 2099 err = PTR_ERR(ct_priv->zone_mapping); 2100 goto err_mapping_zone; 2101 } 2102 2103 ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS, 2104 sizeof(u32) * 4, 0, true); 2105 if (IS_ERR(ct_priv->labels_mapping)) { 2106 err = PTR_ERR(ct_priv->labels_mapping); 2107 goto err_mapping_labels; 2108 } 2109 2110 spin_lock_init(&ct_priv->ht_lock); 2111 ct_priv->ns_type = ns_type; 2112 ct_priv->chains = chains; 2113 ct_priv->netdev = priv->netdev; 2114 ct_priv->dev = priv->mdev; 2115 ct_priv->mod_hdr_tbl = mod_hdr; 2116 ct_priv->ct = mlx5_chains_create_global_table(chains); 2117 if (IS_ERR(ct_priv->ct)) { 2118 err = PTR_ERR(ct_priv->ct); 2119 mlx5_core_warn(dev, 2120 "%s, failed to create ct table err: %d\n", 2121 INIT_ERR_PREFIX, err); 2122 goto err_ct_tbl; 2123 } 2124 2125 ct_priv->ct_nat = mlx5_chains_create_global_table(chains); 2126 if (IS_ERR(ct_priv->ct_nat)) { 2127 err = PTR_ERR(ct_priv->ct_nat); 2128 mlx5_core_warn(dev, 2129 "%s, failed to create ct nat table err: %d\n", 2130 INIT_ERR_PREFIX, err); 2131 goto err_ct_nat_tbl; 2132 } 2133 2134 ct_priv->post_act = post_act; 2135 mutex_init(&ct_priv->control_lock); 2136 if (rhashtable_init(&ct_priv->zone_ht, &zone_params)) 2137 goto err_ct_zone_ht; 2138 if (rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params)) 2139 goto err_ct_tuples_ht; 2140 if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params)) 2141 goto err_ct_tuples_nat_ht; 2142 2143 err = mlx5_tc_ct_fs_init(ct_priv); 2144 if (err) 2145 goto err_init_fs; 2146 2147 return ct_priv; 2148 2149 err_init_fs: 2150 rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); 2151 err_ct_tuples_nat_ht: 2152 rhashtable_destroy(&ct_priv->ct_tuples_ht); 2153 err_ct_tuples_ht: 2154 rhashtable_destroy(&ct_priv->zone_ht); 2155 err_ct_zone_ht: 2156 mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); 2157 err_ct_nat_tbl: 2158 mlx5_chains_destroy_global_table(chains, ct_priv->ct); 2159 err_ct_tbl: 2160 mapping_destroy(ct_priv->labels_mapping); 2161 err_mapping_labels: 2162 mapping_destroy(ct_priv->zone_mapping); 2163 err_mapping_zone: 2164 kfree(ct_priv); 2165 err_alloc: 2166 err_support: 2167 2168 return NULL; 2169 } 2170 2171 void 2172 mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) 2173 { 2174 struct mlx5_fs_chains *chains; 2175 2176 if (!ct_priv) 2177 return; 2178 2179 chains = ct_priv->chains; 2180 2181 ct_priv->fs_ops->destroy(ct_priv->fs); 2182 kfree(ct_priv->fs); 2183 2184 mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); 2185 mlx5_chains_destroy_global_table(chains, ct_priv->ct); 2186 mapping_destroy(ct_priv->zone_mapping); 2187 mapping_destroy(ct_priv->labels_mapping); 2188 2189 rhashtable_destroy(&ct_priv->ct_tuples_ht); 2190 rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); 2191 rhashtable_destroy(&ct_priv->zone_ht); 2192 mutex_destroy(&ct_priv->control_lock); 2193 kfree(ct_priv); 2194 } 2195 2196 bool 2197 mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, 2198 struct sk_buff *skb, u8 zone_restore_id) 2199 { 2200 struct mlx5_ct_tuple tuple = {}; 2201 struct mlx5_ct_entry *entry; 2202 u16 zone; 2203 2204 if (!ct_priv || !zone_restore_id) 2205 return true; 2206 2207 if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone)) 2208 return false; 2209 2210 if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone)) 2211 return false; 2212 2213 spin_lock(&ct_priv->ht_lock); 2214 2215 entry = mlx5_tc_ct_entry_get(ct_priv, &tuple); 2216 if (!entry) { 2217 spin_unlock(&ct_priv->ht_lock); 2218 return false; 2219 } 2220 2221 if (IS_ERR(entry)) { 2222 spin_unlock(&ct_priv->ht_lock); 2223 return false; 2224 } 2225 spin_unlock(&ct_priv->ht_lock); 2226 2227 tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie); 2228 __mlx5_tc_ct_entry_put(entry); 2229 2230 return true; 2231 } 2232