1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2019 Mellanox Technologies. */ 3 4 #include <net/netfilter/nf_conntrack.h> 5 #include <net/netfilter/nf_conntrack_core.h> 6 #include <net/netfilter/nf_conntrack_zones.h> 7 #include <net/netfilter/nf_conntrack_labels.h> 8 #include <net/netfilter/nf_conntrack_helper.h> 9 #include <net/netfilter/nf_conntrack_acct.h> 10 #include <uapi/linux/tc_act/tc_pedit.h> 11 #include <net/tc_act/tc_ct.h> 12 #include <net/flow_offload.h> 13 #include <net/netfilter/nf_flow_table.h> 14 #include <linux/workqueue.h> 15 #include <linux/refcount.h> 16 #include <linux/xarray.h> 17 #include <linux/if_macvlan.h> 18 #include <linux/debugfs.h> 19 20 #include "lib/fs_chains.h" 21 #include "en/tc_ct.h" 22 #include "en/tc/ct_fs.h" 23 #include "en/tc_priv.h" 24 #include "en/mod_hdr.h" 25 #include "en/mapping.h" 26 #include "en/tc/post_act.h" 27 #include "en.h" 28 #include "en_tc.h" 29 #include "en_rep.h" 30 #include "fs_core.h" 31 32 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1) 33 #define MLX5_CT_STATE_TRK_BIT BIT(2) 34 #define MLX5_CT_STATE_NAT_BIT BIT(3) 35 #define MLX5_CT_STATE_REPLY_BIT BIT(4) 36 #define MLX5_CT_STATE_RELATED_BIT BIT(5) 37 #define MLX5_CT_STATE_INVALID_BIT BIT(6) 38 39 #define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen) 40 #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0) 41 42 /* Statically allocate modify actions for 43 * ipv6 and port nat (5) + tuple fields (4) + nic mode zone restore (1) = 10. 44 * This will be increased dynamically if needed (for the ipv6 snat + dnat). 45 */ 46 #define MLX5_CT_MIN_MOD_ACTS 10 47 48 #define ct_dbg(fmt, args...)\ 49 netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args) 50 51 struct mlx5_tc_ct_debugfs { 52 struct { 53 atomic_t offloaded; 54 atomic_t rx_dropped; 55 } stats; 56 57 struct dentry *root; 58 }; 59 60 struct mlx5_tc_ct_priv { 61 struct mlx5_core_dev *dev; 62 const struct net_device *netdev; 63 struct mod_hdr_tbl *mod_hdr_tbl; 64 struct xarray tuple_ids; 65 struct rhashtable zone_ht; 66 struct rhashtable ct_tuples_ht; 67 struct rhashtable ct_tuples_nat_ht; 68 struct mlx5_flow_table *ct; 69 struct mlx5_flow_table *ct_nat; 70 struct mlx5e_post_act *post_act; 71 struct mutex control_lock; /* guards parallel adds/dels */ 72 struct mapping_ctx *zone_mapping; 73 struct mapping_ctx *labels_mapping; 74 enum mlx5_flow_namespace_type ns_type; 75 struct mlx5_fs_chains *chains; 76 struct mlx5_ct_fs *fs; 77 struct mlx5_ct_fs_ops *fs_ops; 78 spinlock_t ht_lock; /* protects ft entries */ 79 80 struct mlx5_tc_ct_debugfs debugfs; 81 }; 82 83 struct mlx5_ct_flow { 84 struct mlx5_flow_attr *pre_ct_attr; 85 struct mlx5_flow_handle *pre_ct_rule; 86 struct mlx5_ct_ft *ft; 87 u32 chain_mapping; 88 }; 89 90 struct mlx5_ct_zone_rule { 91 struct mlx5_ct_fs_rule *rule; 92 struct mlx5e_mod_hdr_handle *mh; 93 struct mlx5_flow_attr *attr; 94 bool nat; 95 }; 96 97 struct mlx5_tc_ct_pre { 98 struct mlx5_flow_table *ft; 99 struct mlx5_flow_group *flow_grp; 100 struct mlx5_flow_group *miss_grp; 101 struct mlx5_flow_handle *flow_rule; 102 struct mlx5_flow_handle *miss_rule; 103 struct mlx5_modify_hdr *modify_hdr; 104 }; 105 106 struct mlx5_ct_ft { 107 struct rhash_head node; 108 u16 zone; 109 u32 zone_restore_id; 110 refcount_t refcount; 111 struct nf_flowtable *nf_ft; 112 struct mlx5_tc_ct_priv *ct_priv; 113 struct rhashtable ct_entries_ht; 114 struct mlx5_tc_ct_pre pre_ct; 115 struct mlx5_tc_ct_pre pre_ct_nat; 116 }; 117 118 struct mlx5_ct_tuple { 119 u16 addr_type; 120 __be16 n_proto; 121 u8 ip_proto; 122 struct { 123 union { 124 __be32 src_v4; 125 struct in6_addr src_v6; 126 }; 127 union { 128 __be32 dst_v4; 129 struct in6_addr dst_v6; 130 }; 131 } ip; 132 struct { 133 __be16 src; 134 __be16 dst; 135 } port; 136 137 u16 zone; 138 }; 139 140 struct mlx5_ct_counter { 141 struct mlx5_fc *counter; 142 refcount_t refcount; 143 bool is_shared; 144 }; 145 146 enum { 147 MLX5_CT_ENTRY_FLAG_VALID, 148 }; 149 150 struct mlx5_ct_entry { 151 struct rhash_head node; 152 struct rhash_head tuple_node; 153 struct rhash_head tuple_nat_node; 154 struct mlx5_ct_counter *counter; 155 unsigned long cookie; 156 unsigned long restore_cookie; 157 struct mlx5_ct_tuple tuple; 158 struct mlx5_ct_tuple tuple_nat; 159 struct mlx5_ct_zone_rule zone_rules[2]; 160 161 struct mlx5_tc_ct_priv *ct_priv; 162 struct work_struct work; 163 164 refcount_t refcnt; 165 unsigned long flags; 166 }; 167 168 static void 169 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, 170 struct mlx5_flow_attr *attr, 171 struct mlx5e_mod_hdr_handle *mh); 172 173 static const struct rhashtable_params cts_ht_params = { 174 .head_offset = offsetof(struct mlx5_ct_entry, node), 175 .key_offset = offsetof(struct mlx5_ct_entry, cookie), 176 .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie), 177 .automatic_shrinking = true, 178 .min_size = 16 * 1024, 179 }; 180 181 static const struct rhashtable_params zone_params = { 182 .head_offset = offsetof(struct mlx5_ct_ft, node), 183 .key_offset = offsetof(struct mlx5_ct_ft, zone), 184 .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone), 185 .automatic_shrinking = true, 186 }; 187 188 static const struct rhashtable_params tuples_ht_params = { 189 .head_offset = offsetof(struct mlx5_ct_entry, tuple_node), 190 .key_offset = offsetof(struct mlx5_ct_entry, tuple), 191 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple), 192 .automatic_shrinking = true, 193 .min_size = 16 * 1024, 194 }; 195 196 static const struct rhashtable_params tuples_nat_ht_params = { 197 .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node), 198 .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat), 199 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat), 200 .automatic_shrinking = true, 201 .min_size = 16 * 1024, 202 }; 203 204 static bool 205 mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry) 206 { 207 return !!(entry->tuple_nat_node.next); 208 } 209 210 static int 211 mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv, 212 u32 *labels, u32 *id) 213 { 214 if (!memchr_inv(labels, 0, sizeof(u32) * 4)) { 215 *id = 0; 216 return 0; 217 } 218 219 if (mapping_add(ct_priv->labels_mapping, labels, id)) 220 return -EOPNOTSUPP; 221 222 return 0; 223 } 224 225 static void 226 mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id) 227 { 228 if (id) 229 mapping_remove(ct_priv->labels_mapping, id); 230 } 231 232 static int 233 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule) 234 { 235 struct flow_match_control control; 236 struct flow_match_basic basic; 237 238 flow_rule_match_basic(rule, &basic); 239 flow_rule_match_control(rule, &control); 240 241 tuple->n_proto = basic.key->n_proto; 242 tuple->ip_proto = basic.key->ip_proto; 243 tuple->addr_type = control.key->addr_type; 244 245 if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 246 struct flow_match_ipv4_addrs match; 247 248 flow_rule_match_ipv4_addrs(rule, &match); 249 tuple->ip.src_v4 = match.key->src; 250 tuple->ip.dst_v4 = match.key->dst; 251 } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 252 struct flow_match_ipv6_addrs match; 253 254 flow_rule_match_ipv6_addrs(rule, &match); 255 tuple->ip.src_v6 = match.key->src; 256 tuple->ip.dst_v6 = match.key->dst; 257 } else { 258 return -EOPNOTSUPP; 259 } 260 261 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { 262 struct flow_match_ports match; 263 264 flow_rule_match_ports(rule, &match); 265 switch (tuple->ip_proto) { 266 case IPPROTO_TCP: 267 case IPPROTO_UDP: 268 tuple->port.src = match.key->src; 269 tuple->port.dst = match.key->dst; 270 break; 271 default: 272 return -EOPNOTSUPP; 273 } 274 } else { 275 if (tuple->ip_proto != IPPROTO_GRE) 276 return -EOPNOTSUPP; 277 } 278 279 return 0; 280 } 281 282 static int 283 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple, 284 struct flow_rule *rule) 285 { 286 struct flow_action *flow_action = &rule->action; 287 struct flow_action_entry *act; 288 u32 offset, val, ip6_offset; 289 int i; 290 291 flow_action_for_each(i, act, flow_action) { 292 if (act->id != FLOW_ACTION_MANGLE) 293 continue; 294 295 offset = act->mangle.offset; 296 val = act->mangle.val; 297 switch (act->mangle.htype) { 298 case FLOW_ACT_MANGLE_HDR_TYPE_IP4: 299 if (offset == offsetof(struct iphdr, saddr)) 300 tuple->ip.src_v4 = cpu_to_be32(val); 301 else if (offset == offsetof(struct iphdr, daddr)) 302 tuple->ip.dst_v4 = cpu_to_be32(val); 303 else 304 return -EOPNOTSUPP; 305 break; 306 307 case FLOW_ACT_MANGLE_HDR_TYPE_IP6: 308 ip6_offset = (offset - offsetof(struct ipv6hdr, saddr)); 309 ip6_offset /= 4; 310 if (ip6_offset < 4) 311 tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val); 312 else if (ip6_offset < 8) 313 tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val); 314 else 315 return -EOPNOTSUPP; 316 break; 317 318 case FLOW_ACT_MANGLE_HDR_TYPE_TCP: 319 if (offset == offsetof(struct tcphdr, source)) 320 tuple->port.src = cpu_to_be16(val); 321 else if (offset == offsetof(struct tcphdr, dest)) 322 tuple->port.dst = cpu_to_be16(val); 323 else 324 return -EOPNOTSUPP; 325 break; 326 327 case FLOW_ACT_MANGLE_HDR_TYPE_UDP: 328 if (offset == offsetof(struct udphdr, source)) 329 tuple->port.src = cpu_to_be16(val); 330 else if (offset == offsetof(struct udphdr, dest)) 331 tuple->port.dst = cpu_to_be16(val); 332 else 333 return -EOPNOTSUPP; 334 break; 335 336 default: 337 return -EOPNOTSUPP; 338 } 339 } 340 341 return 0; 342 } 343 344 static int 345 mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv, 346 struct net_device *ndev) 347 { 348 struct mlx5e_priv *other_priv = netdev_priv(ndev); 349 struct mlx5_core_dev *mdev = ct_priv->dev; 350 bool vf_rep, uplink_rep; 351 352 vf_rep = mlx5e_eswitch_vf_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev); 353 uplink_rep = mlx5e_eswitch_uplink_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev); 354 355 if (vf_rep) 356 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; 357 if (uplink_rep) 358 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; 359 if (is_vlan_dev(ndev)) 360 return mlx5_tc_ct_get_flow_source_match(ct_priv, vlan_dev_real_dev(ndev)); 361 if (netif_is_macvlan(ndev)) 362 return mlx5_tc_ct_get_flow_source_match(ct_priv, macvlan_dev_real_dev(ndev)); 363 if (mlx5e_get_tc_tun(ndev) || netif_is_lag_master(ndev)) 364 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; 365 366 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT; 367 } 368 369 static int 370 mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv, 371 struct mlx5_flow_spec *spec, 372 struct flow_rule *rule) 373 { 374 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 375 outer_headers); 376 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, 377 outer_headers); 378 u16 addr_type = 0; 379 u8 ip_proto = 0; 380 381 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { 382 struct flow_match_basic match; 383 384 flow_rule_match_basic(rule, &match); 385 386 mlx5e_tc_set_ethertype(ct_priv->dev, &match, true, headers_c, headers_v); 387 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 388 match.mask->ip_proto); 389 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, 390 match.key->ip_proto); 391 392 ip_proto = match.key->ip_proto; 393 } 394 395 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { 396 struct flow_match_control match; 397 398 flow_rule_match_control(rule, &match); 399 addr_type = match.key->addr_type; 400 } 401 402 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 403 struct flow_match_ipv4_addrs match; 404 405 flow_rule_match_ipv4_addrs(rule, &match); 406 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 407 src_ipv4_src_ipv6.ipv4_layout.ipv4), 408 &match.mask->src, sizeof(match.mask->src)); 409 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 410 src_ipv4_src_ipv6.ipv4_layout.ipv4), 411 &match.key->src, sizeof(match.key->src)); 412 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 413 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 414 &match.mask->dst, sizeof(match.mask->dst)); 415 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 416 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 417 &match.key->dst, sizeof(match.key->dst)); 418 } 419 420 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 421 struct flow_match_ipv6_addrs match; 422 423 flow_rule_match_ipv6_addrs(rule, &match); 424 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 425 src_ipv4_src_ipv6.ipv6_layout.ipv6), 426 &match.mask->src, sizeof(match.mask->src)); 427 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 428 src_ipv4_src_ipv6.ipv6_layout.ipv6), 429 &match.key->src, sizeof(match.key->src)); 430 431 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 432 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 433 &match.mask->dst, sizeof(match.mask->dst)); 434 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 435 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 436 &match.key->dst, sizeof(match.key->dst)); 437 } 438 439 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { 440 struct flow_match_ports match; 441 442 flow_rule_match_ports(rule, &match); 443 switch (ip_proto) { 444 case IPPROTO_TCP: 445 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 446 tcp_sport, ntohs(match.mask->src)); 447 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 448 tcp_sport, ntohs(match.key->src)); 449 450 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 451 tcp_dport, ntohs(match.mask->dst)); 452 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 453 tcp_dport, ntohs(match.key->dst)); 454 break; 455 456 case IPPROTO_UDP: 457 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 458 udp_sport, ntohs(match.mask->src)); 459 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 460 udp_sport, ntohs(match.key->src)); 461 462 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 463 udp_dport, ntohs(match.mask->dst)); 464 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 465 udp_dport, ntohs(match.key->dst)); 466 break; 467 default: 468 break; 469 } 470 } 471 472 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { 473 struct flow_match_tcp match; 474 475 flow_rule_match_tcp(rule, &match); 476 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, 477 ntohs(match.mask->flags)); 478 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, 479 ntohs(match.key->flags)); 480 } 481 482 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) { 483 struct flow_match_meta match; 484 485 flow_rule_match_meta(rule, &match); 486 487 if (match.key->ingress_ifindex & match.mask->ingress_ifindex) { 488 struct net_device *dev; 489 490 dev = dev_get_by_index(&init_net, match.key->ingress_ifindex); 491 if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, flow_source)) 492 spec->flow_context.flow_source = 493 mlx5_tc_ct_get_flow_source_match(ct_priv, dev); 494 495 dev_put(dev); 496 } 497 } 498 499 return 0; 500 } 501 502 static void 503 mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry) 504 { 505 if (entry->counter->is_shared && 506 !refcount_dec_and_test(&entry->counter->refcount)) 507 return; 508 509 mlx5_fc_destroy(ct_priv->dev, entry->counter->counter); 510 kfree(entry->counter); 511 } 512 513 static void 514 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv, 515 struct mlx5_ct_entry *entry, 516 bool nat) 517 { 518 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; 519 struct mlx5_flow_attr *attr = zone_rule->attr; 520 521 ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone); 522 523 ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule); 524 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh); 525 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); 526 kfree(attr); 527 } 528 529 static void 530 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv, 531 struct mlx5_ct_entry *entry) 532 { 533 mlx5_tc_ct_entry_del_rule(ct_priv, entry, true); 534 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); 535 536 atomic_dec(&ct_priv->debugfs.stats.offloaded); 537 } 538 539 static struct flow_action_entry * 540 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule) 541 { 542 struct flow_action *flow_action = &flow_rule->action; 543 struct flow_action_entry *act; 544 int i; 545 546 flow_action_for_each(i, act, flow_action) { 547 if (act->id == FLOW_ACTION_CT_METADATA) 548 return act; 549 } 550 551 return NULL; 552 } 553 554 static int 555 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv, 556 struct mlx5e_tc_mod_hdr_acts *mod_acts, 557 u8 ct_state, 558 u32 mark, 559 u32 labels_id, 560 u8 zone_restore_id) 561 { 562 enum mlx5_flow_namespace_type ns = ct_priv->ns_type; 563 struct mlx5_core_dev *dev = ct_priv->dev; 564 int err; 565 566 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 567 CTSTATE_TO_REG, ct_state); 568 if (err) 569 return err; 570 571 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 572 MARK_TO_REG, mark); 573 if (err) 574 return err; 575 576 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 577 LABELS_TO_REG, labels_id); 578 if (err) 579 return err; 580 581 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 582 ZONE_RESTORE_TO_REG, zone_restore_id); 583 if (err) 584 return err; 585 586 /* Make another copy of zone id in reg_b for 587 * NIC rx flows since we don't copy reg_c1 to 588 * reg_b upon miss. 589 */ 590 if (ns != MLX5_FLOW_NAMESPACE_FDB) { 591 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 592 NIC_ZONE_RESTORE_TO_REG, zone_restore_id); 593 if (err) 594 return err; 595 } 596 return 0; 597 } 598 599 int mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv, 600 struct mlx5e_tc_mod_hdr_acts *mod_acts) 601 { 602 return mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0); 603 } 604 605 static int 606 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act, 607 char *modact) 608 { 609 u32 offset = act->mangle.offset, field; 610 611 switch (act->mangle.htype) { 612 case FLOW_ACT_MANGLE_HDR_TYPE_IP4: 613 MLX5_SET(set_action_in, modact, length, 0); 614 if (offset == offsetof(struct iphdr, saddr)) 615 field = MLX5_ACTION_IN_FIELD_OUT_SIPV4; 616 else if (offset == offsetof(struct iphdr, daddr)) 617 field = MLX5_ACTION_IN_FIELD_OUT_DIPV4; 618 else 619 return -EOPNOTSUPP; 620 break; 621 622 case FLOW_ACT_MANGLE_HDR_TYPE_IP6: 623 MLX5_SET(set_action_in, modact, length, 0); 624 if (offset == offsetof(struct ipv6hdr, saddr) + 12) 625 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0; 626 else if (offset == offsetof(struct ipv6hdr, saddr) + 8) 627 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32; 628 else if (offset == offsetof(struct ipv6hdr, saddr) + 4) 629 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64; 630 else if (offset == offsetof(struct ipv6hdr, saddr)) 631 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96; 632 else if (offset == offsetof(struct ipv6hdr, daddr) + 12) 633 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0; 634 else if (offset == offsetof(struct ipv6hdr, daddr) + 8) 635 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32; 636 else if (offset == offsetof(struct ipv6hdr, daddr) + 4) 637 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64; 638 else if (offset == offsetof(struct ipv6hdr, daddr)) 639 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96; 640 else 641 return -EOPNOTSUPP; 642 break; 643 644 case FLOW_ACT_MANGLE_HDR_TYPE_TCP: 645 MLX5_SET(set_action_in, modact, length, 16); 646 if (offset == offsetof(struct tcphdr, source)) 647 field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT; 648 else if (offset == offsetof(struct tcphdr, dest)) 649 field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT; 650 else 651 return -EOPNOTSUPP; 652 break; 653 654 case FLOW_ACT_MANGLE_HDR_TYPE_UDP: 655 MLX5_SET(set_action_in, modact, length, 16); 656 if (offset == offsetof(struct udphdr, source)) 657 field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT; 658 else if (offset == offsetof(struct udphdr, dest)) 659 field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT; 660 else 661 return -EOPNOTSUPP; 662 break; 663 664 default: 665 return -EOPNOTSUPP; 666 } 667 668 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); 669 MLX5_SET(set_action_in, modact, offset, 0); 670 MLX5_SET(set_action_in, modact, field, field); 671 MLX5_SET(set_action_in, modact, data, act->mangle.val); 672 673 return 0; 674 } 675 676 static int 677 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv, 678 struct flow_rule *flow_rule, 679 struct mlx5e_tc_mod_hdr_acts *mod_acts) 680 { 681 struct flow_action *flow_action = &flow_rule->action; 682 struct mlx5_core_dev *mdev = ct_priv->dev; 683 struct flow_action_entry *act; 684 char *modact; 685 int err, i; 686 687 flow_action_for_each(i, act, flow_action) { 688 switch (act->id) { 689 case FLOW_ACTION_MANGLE: { 690 modact = mlx5e_mod_hdr_alloc(mdev, ct_priv->ns_type, mod_acts); 691 if (IS_ERR(modact)) 692 return PTR_ERR(modact); 693 694 err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact); 695 if (err) 696 return err; 697 698 mod_acts->num_actions++; 699 } 700 break; 701 702 case FLOW_ACTION_CT_METADATA: 703 /* Handled earlier */ 704 continue; 705 default: 706 return -EOPNOTSUPP; 707 } 708 } 709 710 return 0; 711 } 712 713 static int 714 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, 715 struct mlx5_flow_attr *attr, 716 struct flow_rule *flow_rule, 717 struct mlx5e_mod_hdr_handle **mh, 718 u8 zone_restore_id, bool nat_table, bool has_nat) 719 { 720 DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS); 721 DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr); 722 struct flow_action_entry *meta; 723 u16 ct_state = 0; 724 int err; 725 726 meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule); 727 if (!meta) 728 return -EOPNOTSUPP; 729 730 err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels, 731 &attr->ct_attr.ct_labels_id); 732 if (err) 733 return -EOPNOTSUPP; 734 if (nat_table) { 735 if (has_nat) { 736 err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, &mod_acts); 737 if (err) 738 goto err_mapping; 739 } 740 741 ct_state |= MLX5_CT_STATE_NAT_BIT; 742 } 743 744 ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT; 745 ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT; 746 err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts, 747 ct_state, 748 meta->ct_metadata.mark, 749 attr->ct_attr.ct_labels_id, 750 zone_restore_id); 751 if (err) 752 goto err_mapping; 753 754 if (nat_table && has_nat) { 755 attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type, 756 mod_acts.num_actions, 757 mod_acts.actions); 758 if (IS_ERR(attr->modify_hdr)) { 759 err = PTR_ERR(attr->modify_hdr); 760 goto err_mapping; 761 } 762 763 *mh = NULL; 764 } else { 765 *mh = mlx5e_mod_hdr_attach(ct_priv->dev, 766 ct_priv->mod_hdr_tbl, 767 ct_priv->ns_type, 768 &mod_acts); 769 if (IS_ERR(*mh)) { 770 err = PTR_ERR(*mh); 771 goto err_mapping; 772 } 773 attr->modify_hdr = mlx5e_mod_hdr_get(*mh); 774 } 775 776 mlx5e_mod_hdr_dealloc(&mod_acts); 777 return 0; 778 779 err_mapping: 780 mlx5e_mod_hdr_dealloc(&mod_acts); 781 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); 782 return err; 783 } 784 785 static void 786 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, 787 struct mlx5_flow_attr *attr, 788 struct mlx5e_mod_hdr_handle *mh) 789 { 790 if (mh) 791 mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, mh); 792 else 793 mlx5_modify_header_dealloc(ct_priv->dev, attr->modify_hdr); 794 } 795 796 static int 797 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, 798 struct flow_rule *flow_rule, 799 struct mlx5_ct_entry *entry, 800 bool nat, u8 zone_restore_id) 801 { 802 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; 803 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); 804 struct mlx5_flow_spec *spec = NULL; 805 struct mlx5_flow_attr *attr; 806 int err; 807 808 zone_rule->nat = nat; 809 810 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 811 if (!spec) 812 return -ENOMEM; 813 814 attr = mlx5_alloc_flow_attr(ct_priv->ns_type); 815 if (!attr) { 816 err = -ENOMEM; 817 goto err_attr; 818 } 819 820 err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, 821 &zone_rule->mh, 822 zone_restore_id, 823 nat, 824 mlx5_tc_ct_entry_has_nat(entry)); 825 if (err) { 826 ct_dbg("Failed to create ct entry mod hdr"); 827 goto err_mod_hdr; 828 } 829 830 attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | 831 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 832 MLX5_FLOW_CONTEXT_ACTION_COUNT; 833 attr->dest_chain = 0; 834 attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act); 835 attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct; 836 if (entry->tuple.ip_proto == IPPROTO_TCP || 837 entry->tuple.ip_proto == IPPROTO_UDP) 838 attr->outer_match_level = MLX5_MATCH_L4; 839 else 840 attr->outer_match_level = MLX5_MATCH_L3; 841 attr->counter = entry->counter->counter; 842 attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT; 843 if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) 844 attr->esw_attr->in_mdev = priv->mdev; 845 846 mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule); 847 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK); 848 849 zone_rule->rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule); 850 if (IS_ERR(zone_rule->rule)) { 851 err = PTR_ERR(zone_rule->rule); 852 ct_dbg("Failed to add ct entry rule, nat: %d", nat); 853 goto err_rule; 854 } 855 856 zone_rule->attr = attr; 857 858 kvfree(spec); 859 ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone); 860 861 return 0; 862 863 err_rule: 864 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh); 865 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); 866 err_mod_hdr: 867 kfree(attr); 868 err_attr: 869 kvfree(spec); 870 return err; 871 } 872 873 static bool 874 mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry) 875 { 876 return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags); 877 } 878 879 static struct mlx5_ct_entry * 880 mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple) 881 { 882 struct mlx5_ct_entry *entry; 883 884 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple, 885 tuples_ht_params); 886 if (entry && mlx5_tc_ct_entry_valid(entry) && 887 refcount_inc_not_zero(&entry->refcnt)) { 888 return entry; 889 } else if (!entry) { 890 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht, 891 tuple, tuples_nat_ht_params); 892 if (entry && mlx5_tc_ct_entry_valid(entry) && 893 refcount_inc_not_zero(&entry->refcnt)) 894 return entry; 895 } 896 897 return entry ? ERR_PTR(-EINVAL) : NULL; 898 } 899 900 static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry) 901 { 902 struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv; 903 904 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, 905 &entry->tuple_nat_node, 906 tuples_nat_ht_params); 907 rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node, 908 tuples_ht_params); 909 } 910 911 static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry) 912 { 913 struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv; 914 915 mlx5_tc_ct_entry_del_rules(ct_priv, entry); 916 917 spin_lock_bh(&ct_priv->ht_lock); 918 mlx5_tc_ct_entry_remove_from_tuples(entry); 919 spin_unlock_bh(&ct_priv->ht_lock); 920 921 mlx5_tc_ct_counter_put(ct_priv, entry); 922 kfree(entry); 923 } 924 925 static void 926 mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) 927 { 928 if (!refcount_dec_and_test(&entry->refcnt)) 929 return; 930 931 mlx5_tc_ct_entry_del(entry); 932 } 933 934 static void mlx5_tc_ct_entry_del_work(struct work_struct *work) 935 { 936 struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work); 937 938 mlx5_tc_ct_entry_del(entry); 939 } 940 941 static void 942 __mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) 943 { 944 struct mlx5e_priv *priv; 945 946 if (!refcount_dec_and_test(&entry->refcnt)) 947 return; 948 949 priv = netdev_priv(entry->ct_priv->netdev); 950 INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work); 951 queue_work(priv->wq, &entry->work); 952 } 953 954 static struct mlx5_ct_counter * 955 mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv) 956 { 957 struct mlx5_ct_counter *counter; 958 int ret; 959 960 counter = kzalloc(sizeof(*counter), GFP_KERNEL); 961 if (!counter) 962 return ERR_PTR(-ENOMEM); 963 964 counter->is_shared = false; 965 counter->counter = mlx5_fc_create_ex(ct_priv->dev, true); 966 if (IS_ERR(counter->counter)) { 967 ct_dbg("Failed to create counter for ct entry"); 968 ret = PTR_ERR(counter->counter); 969 kfree(counter); 970 return ERR_PTR(ret); 971 } 972 973 return counter; 974 } 975 976 static struct mlx5_ct_counter * 977 mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv, 978 struct mlx5_ct_entry *entry) 979 { 980 struct mlx5_ct_tuple rev_tuple = entry->tuple; 981 struct mlx5_ct_counter *shared_counter; 982 struct mlx5_ct_entry *rev_entry; 983 984 /* get the reversed tuple */ 985 swap(rev_tuple.port.src, rev_tuple.port.dst); 986 987 if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 988 __be32 tmp_addr = rev_tuple.ip.src_v4; 989 990 rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4; 991 rev_tuple.ip.dst_v4 = tmp_addr; 992 } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 993 struct in6_addr tmp_addr = rev_tuple.ip.src_v6; 994 995 rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6; 996 rev_tuple.ip.dst_v6 = tmp_addr; 997 } else { 998 return ERR_PTR(-EOPNOTSUPP); 999 } 1000 1001 /* Use the same counter as the reverse direction */ 1002 spin_lock_bh(&ct_priv->ht_lock); 1003 rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple); 1004 1005 if (IS_ERR(rev_entry)) { 1006 spin_unlock_bh(&ct_priv->ht_lock); 1007 goto create_counter; 1008 } 1009 1010 if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) { 1011 ct_dbg("Using shared counter entry=0x%p rev=0x%p", entry, rev_entry); 1012 shared_counter = rev_entry->counter; 1013 spin_unlock_bh(&ct_priv->ht_lock); 1014 1015 mlx5_tc_ct_entry_put(rev_entry); 1016 return shared_counter; 1017 } 1018 1019 spin_unlock_bh(&ct_priv->ht_lock); 1020 1021 create_counter: 1022 1023 shared_counter = mlx5_tc_ct_counter_create(ct_priv); 1024 if (IS_ERR(shared_counter)) 1025 return shared_counter; 1026 1027 shared_counter->is_shared = true; 1028 refcount_set(&shared_counter->refcount, 1); 1029 return shared_counter; 1030 } 1031 1032 static int 1033 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv, 1034 struct flow_rule *flow_rule, 1035 struct mlx5_ct_entry *entry, 1036 u8 zone_restore_id) 1037 { 1038 int err; 1039 1040 if (nf_ct_acct_enabled(dev_net(ct_priv->netdev))) 1041 entry->counter = mlx5_tc_ct_counter_create(ct_priv); 1042 else 1043 entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry); 1044 1045 if (IS_ERR(entry->counter)) { 1046 err = PTR_ERR(entry->counter); 1047 return err; 1048 } 1049 1050 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false, 1051 zone_restore_id); 1052 if (err) 1053 goto err_orig; 1054 1055 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true, 1056 zone_restore_id); 1057 if (err) 1058 goto err_nat; 1059 1060 atomic_inc(&ct_priv->debugfs.stats.offloaded); 1061 return 0; 1062 1063 err_nat: 1064 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); 1065 err_orig: 1066 mlx5_tc_ct_counter_put(ct_priv, entry); 1067 return err; 1068 } 1069 1070 static int 1071 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, 1072 struct flow_cls_offload *flow) 1073 { 1074 struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow); 1075 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; 1076 struct flow_action_entry *meta_action; 1077 unsigned long cookie = flow->cookie; 1078 struct mlx5_ct_entry *entry; 1079 int err; 1080 1081 meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule); 1082 if (!meta_action) 1083 return -EOPNOTSUPP; 1084 1085 spin_lock_bh(&ct_priv->ht_lock); 1086 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); 1087 if (entry && refcount_inc_not_zero(&entry->refcnt)) { 1088 spin_unlock_bh(&ct_priv->ht_lock); 1089 mlx5_tc_ct_entry_put(entry); 1090 return -EEXIST; 1091 } 1092 spin_unlock_bh(&ct_priv->ht_lock); 1093 1094 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 1095 if (!entry) 1096 return -ENOMEM; 1097 1098 entry->tuple.zone = ft->zone; 1099 entry->cookie = flow->cookie; 1100 entry->restore_cookie = meta_action->ct_metadata.cookie; 1101 refcount_set(&entry->refcnt, 2); 1102 entry->ct_priv = ct_priv; 1103 1104 err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule); 1105 if (err) 1106 goto err_set; 1107 1108 memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple)); 1109 err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule); 1110 if (err) 1111 goto err_set; 1112 1113 spin_lock_bh(&ct_priv->ht_lock); 1114 1115 err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node, 1116 cts_ht_params); 1117 if (err) 1118 goto err_entries; 1119 1120 err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht, 1121 &entry->tuple_node, 1122 tuples_ht_params); 1123 if (err) 1124 goto err_tuple; 1125 1126 if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) { 1127 err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht, 1128 &entry->tuple_nat_node, 1129 tuples_nat_ht_params); 1130 if (err) 1131 goto err_tuple_nat; 1132 } 1133 spin_unlock_bh(&ct_priv->ht_lock); 1134 1135 err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry, 1136 ft->zone_restore_id); 1137 if (err) 1138 goto err_rules; 1139 1140 set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags); 1141 mlx5_tc_ct_entry_put(entry); /* this function reference */ 1142 1143 return 0; 1144 1145 err_rules: 1146 spin_lock_bh(&ct_priv->ht_lock); 1147 if (mlx5_tc_ct_entry_has_nat(entry)) 1148 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, 1149 &entry->tuple_nat_node, tuples_nat_ht_params); 1150 err_tuple_nat: 1151 rhashtable_remove_fast(&ct_priv->ct_tuples_ht, 1152 &entry->tuple_node, 1153 tuples_ht_params); 1154 err_tuple: 1155 rhashtable_remove_fast(&ft->ct_entries_ht, 1156 &entry->node, 1157 cts_ht_params); 1158 err_entries: 1159 spin_unlock_bh(&ct_priv->ht_lock); 1160 err_set: 1161 kfree(entry); 1162 if (err != -EEXIST) 1163 netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err); 1164 return err; 1165 } 1166 1167 static int 1168 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft, 1169 struct flow_cls_offload *flow) 1170 { 1171 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; 1172 unsigned long cookie = flow->cookie; 1173 struct mlx5_ct_entry *entry; 1174 1175 spin_lock_bh(&ct_priv->ht_lock); 1176 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); 1177 if (!entry) { 1178 spin_unlock_bh(&ct_priv->ht_lock); 1179 return -ENOENT; 1180 } 1181 1182 if (!mlx5_tc_ct_entry_valid(entry)) { 1183 spin_unlock_bh(&ct_priv->ht_lock); 1184 return -EINVAL; 1185 } 1186 1187 rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params); 1188 spin_unlock_bh(&ct_priv->ht_lock); 1189 1190 mlx5_tc_ct_entry_put(entry); 1191 1192 return 0; 1193 } 1194 1195 static int 1196 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft, 1197 struct flow_cls_offload *f) 1198 { 1199 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; 1200 unsigned long cookie = f->cookie; 1201 struct mlx5_ct_entry *entry; 1202 u64 lastuse, packets, bytes; 1203 1204 spin_lock_bh(&ct_priv->ht_lock); 1205 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); 1206 if (!entry) { 1207 spin_unlock_bh(&ct_priv->ht_lock); 1208 return -ENOENT; 1209 } 1210 1211 if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) { 1212 spin_unlock_bh(&ct_priv->ht_lock); 1213 return -EINVAL; 1214 } 1215 1216 spin_unlock_bh(&ct_priv->ht_lock); 1217 1218 mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse); 1219 flow_stats_update(&f->stats, bytes, packets, 0, lastuse, 1220 FLOW_ACTION_HW_STATS_DELAYED); 1221 1222 mlx5_tc_ct_entry_put(entry); 1223 return 0; 1224 } 1225 1226 static int 1227 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data, 1228 void *cb_priv) 1229 { 1230 struct flow_cls_offload *f = type_data; 1231 struct mlx5_ct_ft *ft = cb_priv; 1232 1233 if (type != TC_SETUP_CLSFLOWER) 1234 return -EOPNOTSUPP; 1235 1236 switch (f->command) { 1237 case FLOW_CLS_REPLACE: 1238 return mlx5_tc_ct_block_flow_offload_add(ft, f); 1239 case FLOW_CLS_DESTROY: 1240 return mlx5_tc_ct_block_flow_offload_del(ft, f); 1241 case FLOW_CLS_STATS: 1242 return mlx5_tc_ct_block_flow_offload_stats(ft, f); 1243 default: 1244 break; 1245 } 1246 1247 return -EOPNOTSUPP; 1248 } 1249 1250 static bool 1251 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple, 1252 u16 zone) 1253 { 1254 struct flow_keys flow_keys; 1255 1256 skb_reset_network_header(skb); 1257 skb_flow_dissect_flow_keys(skb, &flow_keys, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP); 1258 1259 tuple->zone = zone; 1260 1261 if (flow_keys.basic.ip_proto != IPPROTO_TCP && 1262 flow_keys.basic.ip_proto != IPPROTO_UDP && 1263 flow_keys.basic.ip_proto != IPPROTO_GRE) 1264 return false; 1265 1266 if (flow_keys.basic.ip_proto == IPPROTO_TCP || 1267 flow_keys.basic.ip_proto == IPPROTO_UDP) { 1268 tuple->port.src = flow_keys.ports.src; 1269 tuple->port.dst = flow_keys.ports.dst; 1270 } 1271 tuple->n_proto = flow_keys.basic.n_proto; 1272 tuple->ip_proto = flow_keys.basic.ip_proto; 1273 1274 switch (flow_keys.basic.n_proto) { 1275 case htons(ETH_P_IP): 1276 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 1277 tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src; 1278 tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst; 1279 break; 1280 1281 case htons(ETH_P_IPV6): 1282 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 1283 tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src; 1284 tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst; 1285 break; 1286 default: 1287 goto out; 1288 } 1289 1290 return true; 1291 1292 out: 1293 return false; 1294 } 1295 1296 int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec) 1297 { 1298 u32 ctstate = 0, ctstate_mask = 0; 1299 1300 mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG, 1301 &ctstate, &ctstate_mask); 1302 1303 if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT) 1304 return -EOPNOTSUPP; 1305 1306 ctstate_mask |= MLX5_CT_STATE_TRK_BIT; 1307 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, 1308 ctstate, ctstate_mask); 1309 1310 return 0; 1311 } 1312 1313 void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr) 1314 { 1315 if (!priv || !ct_attr->ct_labels_id) 1316 return; 1317 1318 mlx5_put_label_mapping(priv, ct_attr->ct_labels_id); 1319 } 1320 1321 int 1322 mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, 1323 struct mlx5_flow_spec *spec, 1324 struct flow_cls_offload *f, 1325 struct mlx5_ct_attr *ct_attr, 1326 struct netlink_ext_ack *extack) 1327 { 1328 bool trk, est, untrk, unest, new, rpl, unrpl, rel, unrel, inv, uninv; 1329 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 1330 struct flow_dissector_key_ct *mask, *key; 1331 u32 ctstate = 0, ctstate_mask = 0; 1332 u16 ct_state_on, ct_state_off; 1333 u16 ct_state, ct_state_mask; 1334 struct flow_match_ct match; 1335 u32 ct_labels[4]; 1336 1337 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) 1338 return 0; 1339 1340 if (!priv) { 1341 NL_SET_ERR_MSG_MOD(extack, 1342 "offload of ct matching isn't available"); 1343 return -EOPNOTSUPP; 1344 } 1345 1346 flow_rule_match_ct(rule, &match); 1347 1348 key = match.key; 1349 mask = match.mask; 1350 1351 ct_state = key->ct_state; 1352 ct_state_mask = mask->ct_state; 1353 1354 if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED | 1355 TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED | 1356 TCA_FLOWER_KEY_CT_FLAGS_NEW | 1357 TCA_FLOWER_KEY_CT_FLAGS_REPLY | 1358 TCA_FLOWER_KEY_CT_FLAGS_RELATED | 1359 TCA_FLOWER_KEY_CT_FLAGS_INVALID)) { 1360 NL_SET_ERR_MSG_MOD(extack, 1361 "only ct_state trk, est, new and rpl are supported for offload"); 1362 return -EOPNOTSUPP; 1363 } 1364 1365 ct_state_on = ct_state & ct_state_mask; 1366 ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask; 1367 trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; 1368 new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW; 1369 est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; 1370 rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY; 1371 rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED; 1372 inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID; 1373 untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; 1374 unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; 1375 unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY; 1376 unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED; 1377 uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID; 1378 1379 ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0; 1380 ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; 1381 ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0; 1382 ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0; 1383 ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; 1384 ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0; 1385 ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0; 1386 ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0; 1387 1388 if (rel) { 1389 NL_SET_ERR_MSG_MOD(extack, 1390 "matching on ct_state +rel isn't supported"); 1391 return -EOPNOTSUPP; 1392 } 1393 1394 if (inv) { 1395 NL_SET_ERR_MSG_MOD(extack, 1396 "matching on ct_state +inv isn't supported"); 1397 return -EOPNOTSUPP; 1398 } 1399 1400 if (new) { 1401 NL_SET_ERR_MSG_MOD(extack, 1402 "matching on ct_state +new isn't supported"); 1403 return -EOPNOTSUPP; 1404 } 1405 1406 if (mask->ct_zone) 1407 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 1408 key->ct_zone, MLX5_CT_ZONE_MASK); 1409 if (ctstate_mask) 1410 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, 1411 ctstate, ctstate_mask); 1412 if (mask->ct_mark) 1413 mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG, 1414 key->ct_mark, mask->ct_mark); 1415 if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] || 1416 mask->ct_labels[3]) { 1417 ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0]; 1418 ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1]; 1419 ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2]; 1420 ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3]; 1421 if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id)) 1422 return -EOPNOTSUPP; 1423 mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id, 1424 MLX5_CT_LABELS_MASK); 1425 } 1426 1427 return 0; 1428 } 1429 1430 int 1431 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, 1432 struct mlx5_flow_attr *attr, 1433 struct mlx5e_tc_mod_hdr_acts *mod_acts, 1434 const struct flow_action_entry *act, 1435 struct netlink_ext_ack *extack) 1436 { 1437 if (!priv) { 1438 NL_SET_ERR_MSG_MOD(extack, 1439 "offload of ct action isn't available"); 1440 return -EOPNOTSUPP; 1441 } 1442 1443 attr->ct_attr.zone = act->ct.zone; 1444 attr->ct_attr.ct_action = act->ct.action; 1445 attr->ct_attr.nf_ft = act->ct.flow_table; 1446 1447 return 0; 1448 } 1449 1450 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft, 1451 struct mlx5_tc_ct_pre *pre_ct, 1452 bool nat) 1453 { 1454 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; 1455 struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; 1456 struct mlx5_core_dev *dev = ct_priv->dev; 1457 struct mlx5_flow_table *ft = pre_ct->ft; 1458 struct mlx5_flow_destination dest = {}; 1459 struct mlx5_flow_act flow_act = {}; 1460 struct mlx5_modify_hdr *mod_hdr; 1461 struct mlx5_flow_handle *rule; 1462 struct mlx5_flow_spec *spec; 1463 u32 ctstate; 1464 u16 zone; 1465 int err; 1466 1467 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 1468 if (!spec) 1469 return -ENOMEM; 1470 1471 zone = ct_ft->zone & MLX5_CT_ZONE_MASK; 1472 err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type, 1473 ZONE_TO_REG, zone); 1474 if (err) { 1475 ct_dbg("Failed to set zone register mapping"); 1476 goto err_mapping; 1477 } 1478 1479 mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type, 1480 pre_mod_acts.num_actions, 1481 pre_mod_acts.actions); 1482 1483 if (IS_ERR(mod_hdr)) { 1484 err = PTR_ERR(mod_hdr); 1485 ct_dbg("Failed to create pre ct mod hdr"); 1486 goto err_mapping; 1487 } 1488 pre_ct->modify_hdr = mod_hdr; 1489 1490 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 1491 MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1492 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; 1493 flow_act.modify_hdr = mod_hdr; 1494 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 1495 1496 /* add flow rule */ 1497 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 1498 zone, MLX5_CT_ZONE_MASK); 1499 ctstate = MLX5_CT_STATE_TRK_BIT; 1500 if (nat) 1501 ctstate |= MLX5_CT_STATE_NAT_BIT; 1502 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate); 1503 1504 dest.ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act); 1505 rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); 1506 if (IS_ERR(rule)) { 1507 err = PTR_ERR(rule); 1508 ct_dbg("Failed to add pre ct flow rule zone %d", zone); 1509 goto err_flow_rule; 1510 } 1511 pre_ct->flow_rule = rule; 1512 1513 /* add miss rule */ 1514 dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct; 1515 rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1); 1516 if (IS_ERR(rule)) { 1517 err = PTR_ERR(rule); 1518 ct_dbg("Failed to add pre ct miss rule zone %d", zone); 1519 goto err_miss_rule; 1520 } 1521 pre_ct->miss_rule = rule; 1522 1523 mlx5e_mod_hdr_dealloc(&pre_mod_acts); 1524 kvfree(spec); 1525 return 0; 1526 1527 err_miss_rule: 1528 mlx5_del_flow_rules(pre_ct->flow_rule); 1529 err_flow_rule: 1530 mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); 1531 err_mapping: 1532 mlx5e_mod_hdr_dealloc(&pre_mod_acts); 1533 kvfree(spec); 1534 return err; 1535 } 1536 1537 static void 1538 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft, 1539 struct mlx5_tc_ct_pre *pre_ct) 1540 { 1541 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; 1542 struct mlx5_core_dev *dev = ct_priv->dev; 1543 1544 mlx5_del_flow_rules(pre_ct->flow_rule); 1545 mlx5_del_flow_rules(pre_ct->miss_rule); 1546 mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); 1547 } 1548 1549 static int 1550 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft, 1551 struct mlx5_tc_ct_pre *pre_ct, 1552 bool nat) 1553 { 1554 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); 1555 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; 1556 struct mlx5_core_dev *dev = ct_priv->dev; 1557 struct mlx5_flow_table_attr ft_attr = {}; 1558 struct mlx5_flow_namespace *ns; 1559 struct mlx5_flow_table *ft; 1560 struct mlx5_flow_group *g; 1561 u32 metadata_reg_c_2_mask; 1562 u32 *flow_group_in; 1563 void *misc; 1564 int err; 1565 1566 ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type); 1567 if (!ns) { 1568 err = -EOPNOTSUPP; 1569 ct_dbg("Failed to get flow namespace"); 1570 return err; 1571 } 1572 1573 flow_group_in = kvzalloc(inlen, GFP_KERNEL); 1574 if (!flow_group_in) 1575 return -ENOMEM; 1576 1577 ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; 1578 ft_attr.prio = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB ? 1579 FDB_TC_OFFLOAD : MLX5E_TC_PRIO; 1580 ft_attr.max_fte = 2; 1581 ft_attr.level = 1; 1582 ft = mlx5_create_flow_table(ns, &ft_attr); 1583 if (IS_ERR(ft)) { 1584 err = PTR_ERR(ft); 1585 ct_dbg("Failed to create pre ct table"); 1586 goto out_free; 1587 } 1588 pre_ct->ft = ft; 1589 1590 /* create flow group */ 1591 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); 1592 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); 1593 MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 1594 MLX5_MATCH_MISC_PARAMETERS_2); 1595 1596 misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, 1597 match_criteria.misc_parameters_2); 1598 1599 metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK; 1600 metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16); 1601 if (nat) 1602 metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16); 1603 1604 MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2, 1605 metadata_reg_c_2_mask); 1606 1607 g = mlx5_create_flow_group(ft, flow_group_in); 1608 if (IS_ERR(g)) { 1609 err = PTR_ERR(g); 1610 ct_dbg("Failed to create pre ct group"); 1611 goto err_flow_grp; 1612 } 1613 pre_ct->flow_grp = g; 1614 1615 /* create miss group */ 1616 memset(flow_group_in, 0, inlen); 1617 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); 1618 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); 1619 g = mlx5_create_flow_group(ft, flow_group_in); 1620 if (IS_ERR(g)) { 1621 err = PTR_ERR(g); 1622 ct_dbg("Failed to create pre ct miss group"); 1623 goto err_miss_grp; 1624 } 1625 pre_ct->miss_grp = g; 1626 1627 err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat); 1628 if (err) 1629 goto err_add_rules; 1630 1631 kvfree(flow_group_in); 1632 return 0; 1633 1634 err_add_rules: 1635 mlx5_destroy_flow_group(pre_ct->miss_grp); 1636 err_miss_grp: 1637 mlx5_destroy_flow_group(pre_ct->flow_grp); 1638 err_flow_grp: 1639 mlx5_destroy_flow_table(ft); 1640 out_free: 1641 kvfree(flow_group_in); 1642 return err; 1643 } 1644 1645 static void 1646 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft, 1647 struct mlx5_tc_ct_pre *pre_ct) 1648 { 1649 tc_ct_pre_ct_del_rules(ct_ft, pre_ct); 1650 mlx5_destroy_flow_group(pre_ct->miss_grp); 1651 mlx5_destroy_flow_group(pre_ct->flow_grp); 1652 mlx5_destroy_flow_table(pre_ct->ft); 1653 } 1654 1655 static int 1656 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft) 1657 { 1658 int err; 1659 1660 err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false); 1661 if (err) 1662 return err; 1663 1664 err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true); 1665 if (err) 1666 goto err_pre_ct_nat; 1667 1668 return 0; 1669 1670 err_pre_ct_nat: 1671 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); 1672 return err; 1673 } 1674 1675 static void 1676 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft) 1677 { 1678 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat); 1679 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); 1680 } 1681 1682 /* To avoid false lock dependency warning set the ct_entries_ht lock 1683 * class different than the lock class of the ht being used when deleting 1684 * last flow from a group and then deleting a group, we get into del_sw_flow_group() 1685 * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but 1686 * it's different than the ht->mutex here. 1687 */ 1688 static struct lock_class_key ct_entries_ht_lock_key; 1689 1690 static struct mlx5_ct_ft * 1691 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, 1692 struct nf_flowtable *nf_ft) 1693 { 1694 struct mlx5_ct_ft *ft; 1695 int err; 1696 1697 ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params); 1698 if (ft) { 1699 refcount_inc(&ft->refcount); 1700 return ft; 1701 } 1702 1703 ft = kzalloc(sizeof(*ft), GFP_KERNEL); 1704 if (!ft) 1705 return ERR_PTR(-ENOMEM); 1706 1707 err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id); 1708 if (err) 1709 goto err_mapping; 1710 1711 ft->zone = zone; 1712 ft->nf_ft = nf_ft; 1713 ft->ct_priv = ct_priv; 1714 refcount_set(&ft->refcount, 1); 1715 1716 err = mlx5_tc_ct_alloc_pre_ct_tables(ft); 1717 if (err) 1718 goto err_alloc_pre_ct; 1719 1720 err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params); 1721 if (err) 1722 goto err_init; 1723 1724 lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key); 1725 1726 err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node, 1727 zone_params); 1728 if (err) 1729 goto err_insert; 1730 1731 err = nf_flow_table_offload_add_cb(ft->nf_ft, 1732 mlx5_tc_ct_block_flow_offload, ft); 1733 if (err) 1734 goto err_add_cb; 1735 1736 return ft; 1737 1738 err_add_cb: 1739 rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); 1740 err_insert: 1741 rhashtable_destroy(&ft->ct_entries_ht); 1742 err_init: 1743 mlx5_tc_ct_free_pre_ct_tables(ft); 1744 err_alloc_pre_ct: 1745 mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); 1746 err_mapping: 1747 kfree(ft); 1748 return ERR_PTR(err); 1749 } 1750 1751 static void 1752 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg) 1753 { 1754 struct mlx5_ct_entry *entry = ptr; 1755 1756 mlx5_tc_ct_entry_put(entry); 1757 } 1758 1759 static void 1760 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) 1761 { 1762 struct mlx5e_priv *priv; 1763 1764 if (!refcount_dec_and_test(&ft->refcount)) 1765 return; 1766 1767 nf_flow_table_offload_del_cb(ft->nf_ft, 1768 mlx5_tc_ct_block_flow_offload, ft); 1769 rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); 1770 rhashtable_free_and_destroy(&ft->ct_entries_ht, 1771 mlx5_tc_ct_flush_ft_entry, 1772 ct_priv); 1773 priv = netdev_priv(ct_priv->netdev); 1774 flush_workqueue(priv->wq); 1775 mlx5_tc_ct_free_pre_ct_tables(ft); 1776 mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); 1777 kfree(ft); 1778 } 1779 1780 /* We translate the tc filter with CT action to the following HW model: 1781 * 1782 * +---------------------+ 1783 * + ft prio (tc chain) + 1784 * + original match + 1785 * +---------------------+ 1786 * | set chain miss mapping 1787 * | set fte_id 1788 * | set tunnel_id 1789 * | do decap 1790 * v 1791 * +---------------------+ 1792 * + pre_ct/pre_ct_nat + if matches +-------------------------+ 1793 * + zone+nat match +---------------->+ post_act (see below) + 1794 * +---------------------+ set zone +-------------------------+ 1795 * | set zone 1796 * v 1797 * +--------------------+ 1798 * + CT (nat or no nat) + 1799 * + tuple + zone match + 1800 * +--------------------+ 1801 * | set mark 1802 * | set labels_id 1803 * | set established 1804 * | set zone_restore 1805 * | do nat (if needed) 1806 * v 1807 * +--------------+ 1808 * + post_act + original filter actions 1809 * + fte_id match +------------------------> 1810 * +--------------+ 1811 */ 1812 static struct mlx5_flow_handle * 1813 __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, 1814 struct mlx5_flow_spec *orig_spec, 1815 struct mlx5_flow_attr *attr) 1816 { 1817 bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; 1818 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); 1819 struct mlx5e_tc_mod_hdr_acts *pre_mod_acts; 1820 u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type); 1821 struct mlx5_flow_attr *pre_ct_attr; 1822 struct mlx5_modify_hdr *mod_hdr; 1823 struct mlx5_ct_flow *ct_flow; 1824 int chain_mapping = 0, err; 1825 struct mlx5_ct_ft *ft; 1826 1827 ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); 1828 if (!ct_flow) { 1829 return ERR_PTR(-ENOMEM); 1830 } 1831 1832 /* Register for CT established events */ 1833 ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone, 1834 attr->ct_attr.nf_ft); 1835 if (IS_ERR(ft)) { 1836 err = PTR_ERR(ft); 1837 ct_dbg("Failed to register to ft callback"); 1838 goto err_ft; 1839 } 1840 ct_flow->ft = ft; 1841 1842 /* Base flow attributes of both rules on original rule attribute */ 1843 ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); 1844 if (!ct_flow->pre_ct_attr) { 1845 err = -ENOMEM; 1846 goto err_alloc_pre; 1847 } 1848 1849 pre_ct_attr = ct_flow->pre_ct_attr; 1850 memcpy(pre_ct_attr, attr, attr_sz); 1851 pre_mod_acts = &pre_ct_attr->parse_attr->mod_hdr_acts; 1852 1853 /* Modify the original rule's action to fwd and modify, leave decap */ 1854 pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP; 1855 pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 1856 MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1857 1858 /* Write chain miss tag for miss in ct table as we 1859 * don't go though all prios of this chain as normal tc rules 1860 * miss. 1861 */ 1862 err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain, 1863 &chain_mapping); 1864 if (err) { 1865 ct_dbg("Failed to get chain register mapping for chain"); 1866 goto err_get_chain; 1867 } 1868 ct_flow->chain_mapping = chain_mapping; 1869 1870 err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type, 1871 CHAIN_TO_REG, chain_mapping); 1872 if (err) { 1873 ct_dbg("Failed to set chain register mapping"); 1874 goto err_mapping; 1875 } 1876 1877 /* If original flow is decap, we do it before going into ct table 1878 * so add a rewrite for the tunnel match_id. 1879 */ 1880 if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) && 1881 attr->chain == 0) { 1882 err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, 1883 ct_priv->ns_type, 1884 TUNNEL_TO_REG, 1885 attr->tunnel_id); 1886 if (err) { 1887 ct_dbg("Failed to set tunnel register mapping"); 1888 goto err_mapping; 1889 } 1890 } 1891 1892 mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type, 1893 pre_mod_acts->num_actions, 1894 pre_mod_acts->actions); 1895 if (IS_ERR(mod_hdr)) { 1896 err = PTR_ERR(mod_hdr); 1897 ct_dbg("Failed to create pre ct mod hdr"); 1898 goto err_mapping; 1899 } 1900 pre_ct_attr->modify_hdr = mod_hdr; 1901 1902 /* Change original rule point to ct table */ 1903 pre_ct_attr->dest_chain = 0; 1904 pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft; 1905 ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec, 1906 pre_ct_attr); 1907 if (IS_ERR(ct_flow->pre_ct_rule)) { 1908 err = PTR_ERR(ct_flow->pre_ct_rule); 1909 ct_dbg("Failed to add pre ct rule"); 1910 goto err_insert_orig; 1911 } 1912 1913 attr->ct_attr.ct_flow = ct_flow; 1914 mlx5e_mod_hdr_dealloc(pre_mod_acts); 1915 1916 return ct_flow->pre_ct_rule; 1917 1918 err_insert_orig: 1919 mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); 1920 err_mapping: 1921 mlx5e_mod_hdr_dealloc(pre_mod_acts); 1922 mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); 1923 err_get_chain: 1924 kfree(ct_flow->pre_ct_attr); 1925 err_alloc_pre: 1926 mlx5_tc_ct_del_ft_cb(ct_priv, ft); 1927 err_ft: 1928 kfree(ct_flow); 1929 netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err); 1930 return ERR_PTR(err); 1931 } 1932 1933 struct mlx5_flow_handle * 1934 mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, 1935 struct mlx5_flow_spec *spec, 1936 struct mlx5_flow_attr *attr, 1937 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) 1938 { 1939 struct mlx5_flow_handle *rule; 1940 1941 if (!priv) 1942 return ERR_PTR(-EOPNOTSUPP); 1943 1944 mutex_lock(&priv->control_lock); 1945 rule = __mlx5_tc_ct_flow_offload(priv, spec, attr); 1946 mutex_unlock(&priv->control_lock); 1947 1948 return rule; 1949 } 1950 1951 static void 1952 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, 1953 struct mlx5_ct_flow *ct_flow, 1954 struct mlx5_flow_attr *attr) 1955 { 1956 struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr; 1957 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); 1958 1959 mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr); 1960 mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); 1961 1962 mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); 1963 mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft); 1964 1965 kfree(ct_flow->pre_ct_attr); 1966 kfree(ct_flow); 1967 } 1968 1969 void 1970 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, 1971 struct mlx5_flow_attr *attr) 1972 { 1973 struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow; 1974 1975 /* We are called on error to clean up stuff from parsing 1976 * but we don't have anything for now 1977 */ 1978 if (!ct_flow) 1979 return; 1980 1981 mutex_lock(&priv->control_lock); 1982 __mlx5_tc_ct_delete_flow(priv, ct_flow, attr); 1983 mutex_unlock(&priv->control_lock); 1984 } 1985 1986 static int 1987 mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv) 1988 { 1989 struct mlx5_flow_table *post_ct = mlx5e_tc_post_act_get_ft(ct_priv->post_act); 1990 struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get(); 1991 int err; 1992 1993 if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB && 1994 ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) { 1995 ct_dbg("Using SMFS ct flow steering provider"); 1996 fs_ops = mlx5_ct_fs_smfs_ops_get(); 1997 } 1998 1999 ct_priv->fs = kzalloc(sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL); 2000 if (!ct_priv->fs) 2001 return -ENOMEM; 2002 2003 ct_priv->fs->netdev = ct_priv->netdev; 2004 ct_priv->fs->dev = ct_priv->dev; 2005 ct_priv->fs_ops = fs_ops; 2006 2007 err = ct_priv->fs_ops->init(ct_priv->fs, ct_priv->ct, ct_priv->ct_nat, post_ct); 2008 if (err) 2009 goto err_init; 2010 2011 return 0; 2012 2013 err_init: 2014 kfree(ct_priv->fs); 2015 return err; 2016 } 2017 2018 static int 2019 mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw, 2020 const char **err_msg) 2021 { 2022 if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) { 2023 /* vlan workaround should be avoided for multi chain rules. 2024 * This is just a sanity check as pop vlan action should 2025 * be supported by any FW that supports ignore_flow_level 2026 */ 2027 2028 *err_msg = "firmware vlan actions support is missing"; 2029 return -EOPNOTSUPP; 2030 } 2031 2032 if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, 2033 fdb_modify_header_fwd_to_table)) { 2034 /* CT always writes to registers which are mod header actions. 2035 * Therefore, mod header and goto is required 2036 */ 2037 2038 *err_msg = "firmware fwd and modify support is missing"; 2039 return -EOPNOTSUPP; 2040 } 2041 2042 if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) { 2043 *err_msg = "register loopback isn't supported"; 2044 return -EOPNOTSUPP; 2045 } 2046 2047 return 0; 2048 } 2049 2050 static int 2051 mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv, 2052 enum mlx5_flow_namespace_type ns_type, 2053 struct mlx5e_post_act *post_act) 2054 { 2055 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 2056 const char *err_msg = NULL; 2057 int err = 0; 2058 2059 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) 2060 /* cannot restore chain ID on HW miss */ 2061 2062 err_msg = "tc skb extension missing"; 2063 err = -EOPNOTSUPP; 2064 goto out_err; 2065 #endif 2066 if (IS_ERR_OR_NULL(post_act)) { 2067 /* Ignore_flow_level support isn't supported by default for VFs and so post_act 2068 * won't be supported. Skip showing error msg. 2069 */ 2070 if (priv->mdev->coredev_type != MLX5_COREDEV_VF) 2071 err_msg = "post action is missing"; 2072 err = -EOPNOTSUPP; 2073 goto out_err; 2074 } 2075 2076 if (ns_type == MLX5_FLOW_NAMESPACE_FDB) 2077 err = mlx5_tc_ct_init_check_esw_support(esw, &err_msg); 2078 2079 out_err: 2080 if (err && err_msg) 2081 netdev_dbg(priv->netdev, "tc ct offload not supported, %s\n", err_msg); 2082 return err; 2083 } 2084 2085 static void 2086 mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv) 2087 { 2088 bool is_fdb = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB; 2089 struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs; 2090 char dirname[16] = {}; 2091 2092 if (sscanf(dirname, "ct_%s", is_fdb ? "fdb" : "nic") < 0) 2093 return; 2094 2095 ct_dbgfs->root = debugfs_create_dir(dirname, mlx5_debugfs_get_dev_root(ct_priv->dev)); 2096 debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root, 2097 &ct_dbgfs->stats.offloaded); 2098 debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root, 2099 &ct_dbgfs->stats.rx_dropped); 2100 } 2101 2102 static void 2103 mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv) 2104 { 2105 debugfs_remove_recursive(ct_priv->debugfs.root); 2106 } 2107 2108 #define INIT_ERR_PREFIX "tc ct offload init failed" 2109 2110 struct mlx5_tc_ct_priv * 2111 mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, 2112 struct mod_hdr_tbl *mod_hdr, 2113 enum mlx5_flow_namespace_type ns_type, 2114 struct mlx5e_post_act *post_act) 2115 { 2116 struct mlx5_tc_ct_priv *ct_priv; 2117 struct mlx5_core_dev *dev; 2118 u64 mapping_id; 2119 int err; 2120 2121 dev = priv->mdev; 2122 err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act); 2123 if (err) 2124 goto err_support; 2125 2126 ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL); 2127 if (!ct_priv) 2128 goto err_alloc; 2129 2130 mapping_id = mlx5_query_nic_system_image_guid(dev); 2131 2132 ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE, 2133 sizeof(u16), 0, true); 2134 if (IS_ERR(ct_priv->zone_mapping)) { 2135 err = PTR_ERR(ct_priv->zone_mapping); 2136 goto err_mapping_zone; 2137 } 2138 2139 ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS, 2140 sizeof(u32) * 4, 0, true); 2141 if (IS_ERR(ct_priv->labels_mapping)) { 2142 err = PTR_ERR(ct_priv->labels_mapping); 2143 goto err_mapping_labels; 2144 } 2145 2146 spin_lock_init(&ct_priv->ht_lock); 2147 ct_priv->ns_type = ns_type; 2148 ct_priv->chains = chains; 2149 ct_priv->netdev = priv->netdev; 2150 ct_priv->dev = priv->mdev; 2151 ct_priv->mod_hdr_tbl = mod_hdr; 2152 ct_priv->ct = mlx5_chains_create_global_table(chains); 2153 if (IS_ERR(ct_priv->ct)) { 2154 err = PTR_ERR(ct_priv->ct); 2155 mlx5_core_warn(dev, 2156 "%s, failed to create ct table err: %d\n", 2157 INIT_ERR_PREFIX, err); 2158 goto err_ct_tbl; 2159 } 2160 2161 ct_priv->ct_nat = mlx5_chains_create_global_table(chains); 2162 if (IS_ERR(ct_priv->ct_nat)) { 2163 err = PTR_ERR(ct_priv->ct_nat); 2164 mlx5_core_warn(dev, 2165 "%s, failed to create ct nat table err: %d\n", 2166 INIT_ERR_PREFIX, err); 2167 goto err_ct_nat_tbl; 2168 } 2169 2170 ct_priv->post_act = post_act; 2171 mutex_init(&ct_priv->control_lock); 2172 if (rhashtable_init(&ct_priv->zone_ht, &zone_params)) 2173 goto err_ct_zone_ht; 2174 if (rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params)) 2175 goto err_ct_tuples_ht; 2176 if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params)) 2177 goto err_ct_tuples_nat_ht; 2178 2179 err = mlx5_tc_ct_fs_init(ct_priv); 2180 if (err) 2181 goto err_init_fs; 2182 2183 mlx5_ct_tc_create_dbgfs(ct_priv); 2184 return ct_priv; 2185 2186 err_init_fs: 2187 rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); 2188 err_ct_tuples_nat_ht: 2189 rhashtable_destroy(&ct_priv->ct_tuples_ht); 2190 err_ct_tuples_ht: 2191 rhashtable_destroy(&ct_priv->zone_ht); 2192 err_ct_zone_ht: 2193 mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); 2194 err_ct_nat_tbl: 2195 mlx5_chains_destroy_global_table(chains, ct_priv->ct); 2196 err_ct_tbl: 2197 mapping_destroy(ct_priv->labels_mapping); 2198 err_mapping_labels: 2199 mapping_destroy(ct_priv->zone_mapping); 2200 err_mapping_zone: 2201 kfree(ct_priv); 2202 err_alloc: 2203 err_support: 2204 2205 return NULL; 2206 } 2207 2208 void 2209 mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) 2210 { 2211 struct mlx5_fs_chains *chains; 2212 2213 if (!ct_priv) 2214 return; 2215 2216 mlx5_ct_tc_remove_dbgfs(ct_priv); 2217 chains = ct_priv->chains; 2218 2219 ct_priv->fs_ops->destroy(ct_priv->fs); 2220 kfree(ct_priv->fs); 2221 2222 mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); 2223 mlx5_chains_destroy_global_table(chains, ct_priv->ct); 2224 mapping_destroy(ct_priv->zone_mapping); 2225 mapping_destroy(ct_priv->labels_mapping); 2226 2227 rhashtable_destroy(&ct_priv->ct_tuples_ht); 2228 rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); 2229 rhashtable_destroy(&ct_priv->zone_ht); 2230 mutex_destroy(&ct_priv->control_lock); 2231 kfree(ct_priv); 2232 } 2233 2234 bool 2235 mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, 2236 struct sk_buff *skb, u8 zone_restore_id) 2237 { 2238 struct mlx5_ct_tuple tuple = {}; 2239 struct mlx5_ct_entry *entry; 2240 u16 zone; 2241 2242 if (!ct_priv || !zone_restore_id) 2243 return true; 2244 2245 if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone)) 2246 goto out_inc_drop; 2247 2248 if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone)) 2249 goto out_inc_drop; 2250 2251 spin_lock(&ct_priv->ht_lock); 2252 2253 entry = mlx5_tc_ct_entry_get(ct_priv, &tuple); 2254 if (!entry) { 2255 spin_unlock(&ct_priv->ht_lock); 2256 goto out_inc_drop; 2257 } 2258 2259 if (IS_ERR(entry)) { 2260 spin_unlock(&ct_priv->ht_lock); 2261 goto out_inc_drop; 2262 } 2263 spin_unlock(&ct_priv->ht_lock); 2264 2265 tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie); 2266 __mlx5_tc_ct_entry_put(entry); 2267 2268 return true; 2269 2270 out_inc_drop: 2271 atomic_inc(&ct_priv->debugfs.stats.rx_dropped); 2272 return false; 2273 } 2274