1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2019 Mellanox Technologies. */ 3 4 #include <net/netfilter/nf_conntrack.h> 5 #include <net/netfilter/nf_conntrack_core.h> 6 #include <net/netfilter/nf_conntrack_zones.h> 7 #include <net/netfilter/nf_conntrack_labels.h> 8 #include <net/netfilter/nf_conntrack_helper.h> 9 #include <net/netfilter/nf_conntrack_acct.h> 10 #include <uapi/linux/tc_act/tc_pedit.h> 11 #include <net/tc_act/tc_ct.h> 12 #include <net/flow_offload.h> 13 #include <net/netfilter/nf_flow_table.h> 14 #include <linux/workqueue.h> 15 #include <linux/refcount.h> 16 #include <linux/xarray.h> 17 #include <linux/if_macvlan.h> 18 #include <linux/debugfs.h> 19 20 #include "lib/fs_chains.h" 21 #include "en/tc_ct.h" 22 #include "en/tc/ct_fs.h" 23 #include "en/tc_priv.h" 24 #include "en/mod_hdr.h" 25 #include "en/mapping.h" 26 #include "en/tc/post_act.h" 27 #include "en.h" 28 #include "en_tc.h" 29 #include "en_rep.h" 30 #include "fs_core.h" 31 32 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1) 33 #define MLX5_CT_STATE_TRK_BIT BIT(2) 34 #define MLX5_CT_STATE_NAT_BIT BIT(3) 35 #define MLX5_CT_STATE_REPLY_BIT BIT(4) 36 #define MLX5_CT_STATE_RELATED_BIT BIT(5) 37 #define MLX5_CT_STATE_INVALID_BIT BIT(6) 38 39 #define MLX5_CT_LABELS_BITS MLX5_REG_MAPPING_MBITS(LABELS_TO_REG) 40 #define MLX5_CT_LABELS_MASK MLX5_REG_MAPPING_MASK(LABELS_TO_REG) 41 42 /* Statically allocate modify actions for 43 * ipv6 and port nat (5) + tuple fields (4) + nic mode zone restore (1) = 10. 44 * This will be increased dynamically if needed (for the ipv6 snat + dnat). 45 */ 46 #define MLX5_CT_MIN_MOD_ACTS 10 47 48 #define ct_dbg(fmt, args...)\ 49 netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args) 50 51 struct mlx5_tc_ct_debugfs { 52 struct { 53 atomic_t offloaded; 54 atomic_t rx_dropped; 55 } stats; 56 57 struct dentry *root; 58 }; 59 60 struct mlx5_tc_ct_priv { 61 struct mlx5_core_dev *dev; 62 const struct net_device *netdev; 63 struct mod_hdr_tbl *mod_hdr_tbl; 64 struct xarray tuple_ids; 65 struct rhashtable zone_ht; 66 struct rhashtable ct_tuples_ht; 67 struct rhashtable ct_tuples_nat_ht; 68 struct mlx5_flow_table *ct; 69 struct mlx5_flow_table *ct_nat; 70 struct mlx5e_post_act *post_act; 71 struct mutex control_lock; /* guards parallel adds/dels */ 72 struct mapping_ctx *zone_mapping; 73 struct mapping_ctx *labels_mapping; 74 enum mlx5_flow_namespace_type ns_type; 75 struct mlx5_fs_chains *chains; 76 struct mlx5_ct_fs *fs; 77 struct mlx5_ct_fs_ops *fs_ops; 78 spinlock_t ht_lock; /* protects ft entries */ 79 struct workqueue_struct *wq; 80 81 struct mlx5_tc_ct_debugfs debugfs; 82 }; 83 84 struct mlx5_ct_flow { 85 struct mlx5_flow_attr *pre_ct_attr; 86 struct mlx5_flow_handle *pre_ct_rule; 87 struct mlx5_ct_ft *ft; 88 u32 chain_mapping; 89 }; 90 91 struct mlx5_ct_zone_rule { 92 struct mlx5_ct_fs_rule *rule; 93 struct mlx5e_mod_hdr_handle *mh; 94 struct mlx5_flow_attr *attr; 95 bool nat; 96 }; 97 98 struct mlx5_tc_ct_pre { 99 struct mlx5_flow_table *ft; 100 struct mlx5_flow_group *flow_grp; 101 struct mlx5_flow_group *miss_grp; 102 struct mlx5_flow_handle *flow_rule; 103 struct mlx5_flow_handle *miss_rule; 104 struct mlx5_modify_hdr *modify_hdr; 105 }; 106 107 struct mlx5_ct_ft { 108 struct rhash_head node; 109 u16 zone; 110 u32 zone_restore_id; 111 refcount_t refcount; 112 struct nf_flowtable *nf_ft; 113 struct mlx5_tc_ct_priv *ct_priv; 114 struct rhashtable ct_entries_ht; 115 struct mlx5_tc_ct_pre pre_ct; 116 struct mlx5_tc_ct_pre pre_ct_nat; 117 }; 118 119 struct mlx5_ct_tuple { 120 u16 addr_type; 121 __be16 n_proto; 122 u8 ip_proto; 123 struct { 124 union { 125 __be32 src_v4; 126 struct in6_addr src_v6; 127 }; 128 union { 129 __be32 dst_v4; 130 struct in6_addr dst_v6; 131 }; 132 } ip; 133 struct { 134 __be16 src; 135 __be16 dst; 136 } port; 137 138 u16 zone; 139 }; 140 141 struct mlx5_ct_counter { 142 struct mlx5_fc *counter; 143 refcount_t refcount; 144 bool is_shared; 145 }; 146 147 enum { 148 MLX5_CT_ENTRY_FLAG_VALID, 149 }; 150 151 struct mlx5_ct_entry { 152 struct rhash_head node; 153 struct rhash_head tuple_node; 154 struct rhash_head tuple_nat_node; 155 struct mlx5_ct_counter *counter; 156 unsigned long cookie; 157 unsigned long restore_cookie; 158 struct mlx5_ct_tuple tuple; 159 struct mlx5_ct_tuple tuple_nat; 160 struct mlx5_ct_zone_rule zone_rules[2]; 161 162 struct mlx5_tc_ct_priv *ct_priv; 163 struct work_struct work; 164 165 refcount_t refcnt; 166 unsigned long flags; 167 }; 168 169 static void 170 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, 171 struct mlx5_flow_attr *attr, 172 struct mlx5e_mod_hdr_handle *mh); 173 174 static const struct rhashtable_params cts_ht_params = { 175 .head_offset = offsetof(struct mlx5_ct_entry, node), 176 .key_offset = offsetof(struct mlx5_ct_entry, cookie), 177 .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie), 178 .automatic_shrinking = true, 179 .min_size = 16 * 1024, 180 }; 181 182 static const struct rhashtable_params zone_params = { 183 .head_offset = offsetof(struct mlx5_ct_ft, node), 184 .key_offset = offsetof(struct mlx5_ct_ft, zone), 185 .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone), 186 .automatic_shrinking = true, 187 }; 188 189 static const struct rhashtable_params tuples_ht_params = { 190 .head_offset = offsetof(struct mlx5_ct_entry, tuple_node), 191 .key_offset = offsetof(struct mlx5_ct_entry, tuple), 192 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple), 193 .automatic_shrinking = true, 194 .min_size = 16 * 1024, 195 }; 196 197 static const struct rhashtable_params tuples_nat_ht_params = { 198 .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node), 199 .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat), 200 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat), 201 .automatic_shrinking = true, 202 .min_size = 16 * 1024, 203 }; 204 205 static bool 206 mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry) 207 { 208 return !!(entry->tuple_nat_node.next); 209 } 210 211 static int 212 mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv, 213 u32 *labels, u32 *id) 214 { 215 if (!memchr_inv(labels, 0, sizeof(u32) * 4)) { 216 *id = 0; 217 return 0; 218 } 219 220 if (mapping_add(ct_priv->labels_mapping, labels, id)) 221 return -EOPNOTSUPP; 222 223 return 0; 224 } 225 226 static void 227 mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id) 228 { 229 if (id) 230 mapping_remove(ct_priv->labels_mapping, id); 231 } 232 233 static int 234 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule) 235 { 236 struct flow_match_control control; 237 struct flow_match_basic basic; 238 239 flow_rule_match_basic(rule, &basic); 240 flow_rule_match_control(rule, &control); 241 242 tuple->n_proto = basic.key->n_proto; 243 tuple->ip_proto = basic.key->ip_proto; 244 tuple->addr_type = control.key->addr_type; 245 246 if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 247 struct flow_match_ipv4_addrs match; 248 249 flow_rule_match_ipv4_addrs(rule, &match); 250 tuple->ip.src_v4 = match.key->src; 251 tuple->ip.dst_v4 = match.key->dst; 252 } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 253 struct flow_match_ipv6_addrs match; 254 255 flow_rule_match_ipv6_addrs(rule, &match); 256 tuple->ip.src_v6 = match.key->src; 257 tuple->ip.dst_v6 = match.key->dst; 258 } else { 259 return -EOPNOTSUPP; 260 } 261 262 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { 263 struct flow_match_ports match; 264 265 flow_rule_match_ports(rule, &match); 266 switch (tuple->ip_proto) { 267 case IPPROTO_TCP: 268 case IPPROTO_UDP: 269 tuple->port.src = match.key->src; 270 tuple->port.dst = match.key->dst; 271 break; 272 default: 273 return -EOPNOTSUPP; 274 } 275 } else { 276 if (tuple->ip_proto != IPPROTO_GRE) 277 return -EOPNOTSUPP; 278 } 279 280 return 0; 281 } 282 283 static int 284 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple, 285 struct flow_rule *rule) 286 { 287 struct flow_action *flow_action = &rule->action; 288 struct flow_action_entry *act; 289 u32 offset, val, ip6_offset; 290 int i; 291 292 flow_action_for_each(i, act, flow_action) { 293 if (act->id != FLOW_ACTION_MANGLE) 294 continue; 295 296 offset = act->mangle.offset; 297 val = act->mangle.val; 298 switch (act->mangle.htype) { 299 case FLOW_ACT_MANGLE_HDR_TYPE_IP4: 300 if (offset == offsetof(struct iphdr, saddr)) 301 tuple->ip.src_v4 = cpu_to_be32(val); 302 else if (offset == offsetof(struct iphdr, daddr)) 303 tuple->ip.dst_v4 = cpu_to_be32(val); 304 else 305 return -EOPNOTSUPP; 306 break; 307 308 case FLOW_ACT_MANGLE_HDR_TYPE_IP6: 309 ip6_offset = (offset - offsetof(struct ipv6hdr, saddr)); 310 ip6_offset /= 4; 311 if (ip6_offset < 4) 312 tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val); 313 else if (ip6_offset < 8) 314 tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val); 315 else 316 return -EOPNOTSUPP; 317 break; 318 319 case FLOW_ACT_MANGLE_HDR_TYPE_TCP: 320 if (offset == offsetof(struct tcphdr, source)) 321 tuple->port.src = cpu_to_be16(val); 322 else if (offset == offsetof(struct tcphdr, dest)) 323 tuple->port.dst = cpu_to_be16(val); 324 else 325 return -EOPNOTSUPP; 326 break; 327 328 case FLOW_ACT_MANGLE_HDR_TYPE_UDP: 329 if (offset == offsetof(struct udphdr, source)) 330 tuple->port.src = cpu_to_be16(val); 331 else if (offset == offsetof(struct udphdr, dest)) 332 tuple->port.dst = cpu_to_be16(val); 333 else 334 return -EOPNOTSUPP; 335 break; 336 337 default: 338 return -EOPNOTSUPP; 339 } 340 } 341 342 return 0; 343 } 344 345 static int 346 mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv, 347 struct net_device *ndev) 348 { 349 struct mlx5e_priv *other_priv = netdev_priv(ndev); 350 struct mlx5_core_dev *mdev = ct_priv->dev; 351 bool vf_rep, uplink_rep; 352 353 vf_rep = mlx5e_eswitch_vf_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev); 354 uplink_rep = mlx5e_eswitch_uplink_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev); 355 356 if (vf_rep) 357 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; 358 if (uplink_rep) 359 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; 360 if (is_vlan_dev(ndev)) 361 return mlx5_tc_ct_get_flow_source_match(ct_priv, vlan_dev_real_dev(ndev)); 362 if (netif_is_macvlan(ndev)) 363 return mlx5_tc_ct_get_flow_source_match(ct_priv, macvlan_dev_real_dev(ndev)); 364 if (mlx5e_get_tc_tun(ndev) || netif_is_lag_master(ndev)) 365 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; 366 367 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT; 368 } 369 370 static int 371 mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv, 372 struct mlx5_flow_spec *spec, 373 struct flow_rule *rule) 374 { 375 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 376 outer_headers); 377 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, 378 outer_headers); 379 u16 addr_type = 0; 380 u8 ip_proto = 0; 381 382 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { 383 struct flow_match_basic match; 384 385 flow_rule_match_basic(rule, &match); 386 387 mlx5e_tc_set_ethertype(ct_priv->dev, &match, true, headers_c, headers_v); 388 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 389 match.mask->ip_proto); 390 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, 391 match.key->ip_proto); 392 393 ip_proto = match.key->ip_proto; 394 } 395 396 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { 397 struct flow_match_control match; 398 399 flow_rule_match_control(rule, &match); 400 addr_type = match.key->addr_type; 401 } 402 403 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 404 struct flow_match_ipv4_addrs match; 405 406 flow_rule_match_ipv4_addrs(rule, &match); 407 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 408 src_ipv4_src_ipv6.ipv4_layout.ipv4), 409 &match.mask->src, sizeof(match.mask->src)); 410 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 411 src_ipv4_src_ipv6.ipv4_layout.ipv4), 412 &match.key->src, sizeof(match.key->src)); 413 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 414 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 415 &match.mask->dst, sizeof(match.mask->dst)); 416 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 417 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 418 &match.key->dst, sizeof(match.key->dst)); 419 } 420 421 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 422 struct flow_match_ipv6_addrs match; 423 424 flow_rule_match_ipv6_addrs(rule, &match); 425 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 426 src_ipv4_src_ipv6.ipv6_layout.ipv6), 427 &match.mask->src, sizeof(match.mask->src)); 428 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 429 src_ipv4_src_ipv6.ipv6_layout.ipv6), 430 &match.key->src, sizeof(match.key->src)); 431 432 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 433 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 434 &match.mask->dst, sizeof(match.mask->dst)); 435 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 436 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 437 &match.key->dst, sizeof(match.key->dst)); 438 } 439 440 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { 441 struct flow_match_ports match; 442 443 flow_rule_match_ports(rule, &match); 444 switch (ip_proto) { 445 case IPPROTO_TCP: 446 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 447 tcp_sport, ntohs(match.mask->src)); 448 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 449 tcp_sport, ntohs(match.key->src)); 450 451 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 452 tcp_dport, ntohs(match.mask->dst)); 453 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 454 tcp_dport, ntohs(match.key->dst)); 455 break; 456 457 case IPPROTO_UDP: 458 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 459 udp_sport, ntohs(match.mask->src)); 460 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 461 udp_sport, ntohs(match.key->src)); 462 463 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 464 udp_dport, ntohs(match.mask->dst)); 465 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 466 udp_dport, ntohs(match.key->dst)); 467 break; 468 default: 469 break; 470 } 471 } 472 473 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { 474 struct flow_match_tcp match; 475 476 flow_rule_match_tcp(rule, &match); 477 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, 478 ntohs(match.mask->flags)); 479 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, 480 ntohs(match.key->flags)); 481 } 482 483 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) { 484 struct flow_match_meta match; 485 486 flow_rule_match_meta(rule, &match); 487 488 if (match.key->ingress_ifindex & match.mask->ingress_ifindex) { 489 struct net_device *dev; 490 491 dev = dev_get_by_index(&init_net, match.key->ingress_ifindex); 492 if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, flow_source)) 493 spec->flow_context.flow_source = 494 mlx5_tc_ct_get_flow_source_match(ct_priv, dev); 495 496 dev_put(dev); 497 } 498 } 499 500 return 0; 501 } 502 503 static void 504 mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry) 505 { 506 if (entry->counter->is_shared && 507 !refcount_dec_and_test(&entry->counter->refcount)) 508 return; 509 510 mlx5_fc_destroy(ct_priv->dev, entry->counter->counter); 511 kfree(entry->counter); 512 } 513 514 static void 515 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv, 516 struct mlx5_ct_entry *entry, 517 bool nat) 518 { 519 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; 520 struct mlx5_flow_attr *attr = zone_rule->attr; 521 522 ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone); 523 524 ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule); 525 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh); 526 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); 527 kfree(attr); 528 } 529 530 static void 531 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv, 532 struct mlx5_ct_entry *entry) 533 { 534 mlx5_tc_ct_entry_del_rule(ct_priv, entry, true); 535 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); 536 537 atomic_dec(&ct_priv->debugfs.stats.offloaded); 538 } 539 540 static struct flow_action_entry * 541 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule) 542 { 543 struct flow_action *flow_action = &flow_rule->action; 544 struct flow_action_entry *act; 545 int i; 546 547 flow_action_for_each(i, act, flow_action) { 548 if (act->id == FLOW_ACTION_CT_METADATA) 549 return act; 550 } 551 552 return NULL; 553 } 554 555 static int 556 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv, 557 struct mlx5e_tc_mod_hdr_acts *mod_acts, 558 u8 ct_state, 559 u32 mark, 560 u32 labels_id, 561 u8 zone_restore_id) 562 { 563 enum mlx5_flow_namespace_type ns = ct_priv->ns_type; 564 struct mlx5_core_dev *dev = ct_priv->dev; 565 int err; 566 567 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 568 CTSTATE_TO_REG, ct_state); 569 if (err) 570 return err; 571 572 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 573 MARK_TO_REG, mark); 574 if (err) 575 return err; 576 577 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 578 LABELS_TO_REG, labels_id); 579 if (err) 580 return err; 581 582 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 583 ZONE_RESTORE_TO_REG, zone_restore_id); 584 if (err) 585 return err; 586 587 /* Make another copy of zone id in reg_b for 588 * NIC rx flows since we don't copy reg_c1 to 589 * reg_b upon miss. 590 */ 591 if (ns != MLX5_FLOW_NAMESPACE_FDB) { 592 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 593 NIC_ZONE_RESTORE_TO_REG, zone_restore_id); 594 if (err) 595 return err; 596 } 597 return 0; 598 } 599 600 int mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv, 601 struct mlx5e_tc_mod_hdr_acts *mod_acts) 602 { 603 return mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0); 604 } 605 606 static int 607 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act, 608 char *modact) 609 { 610 u32 offset = act->mangle.offset, field; 611 612 switch (act->mangle.htype) { 613 case FLOW_ACT_MANGLE_HDR_TYPE_IP4: 614 MLX5_SET(set_action_in, modact, length, 0); 615 if (offset == offsetof(struct iphdr, saddr)) 616 field = MLX5_ACTION_IN_FIELD_OUT_SIPV4; 617 else if (offset == offsetof(struct iphdr, daddr)) 618 field = MLX5_ACTION_IN_FIELD_OUT_DIPV4; 619 else 620 return -EOPNOTSUPP; 621 break; 622 623 case FLOW_ACT_MANGLE_HDR_TYPE_IP6: 624 MLX5_SET(set_action_in, modact, length, 0); 625 if (offset == offsetof(struct ipv6hdr, saddr) + 12) 626 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0; 627 else if (offset == offsetof(struct ipv6hdr, saddr) + 8) 628 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32; 629 else if (offset == offsetof(struct ipv6hdr, saddr) + 4) 630 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64; 631 else if (offset == offsetof(struct ipv6hdr, saddr)) 632 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96; 633 else if (offset == offsetof(struct ipv6hdr, daddr) + 12) 634 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0; 635 else if (offset == offsetof(struct ipv6hdr, daddr) + 8) 636 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32; 637 else if (offset == offsetof(struct ipv6hdr, daddr) + 4) 638 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64; 639 else if (offset == offsetof(struct ipv6hdr, daddr)) 640 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96; 641 else 642 return -EOPNOTSUPP; 643 break; 644 645 case FLOW_ACT_MANGLE_HDR_TYPE_TCP: 646 MLX5_SET(set_action_in, modact, length, 16); 647 if (offset == offsetof(struct tcphdr, source)) 648 field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT; 649 else if (offset == offsetof(struct tcphdr, dest)) 650 field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT; 651 else 652 return -EOPNOTSUPP; 653 break; 654 655 case FLOW_ACT_MANGLE_HDR_TYPE_UDP: 656 MLX5_SET(set_action_in, modact, length, 16); 657 if (offset == offsetof(struct udphdr, source)) 658 field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT; 659 else if (offset == offsetof(struct udphdr, dest)) 660 field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT; 661 else 662 return -EOPNOTSUPP; 663 break; 664 665 default: 666 return -EOPNOTSUPP; 667 } 668 669 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); 670 MLX5_SET(set_action_in, modact, offset, 0); 671 MLX5_SET(set_action_in, modact, field, field); 672 MLX5_SET(set_action_in, modact, data, act->mangle.val); 673 674 return 0; 675 } 676 677 static int 678 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv, 679 struct flow_rule *flow_rule, 680 struct mlx5e_tc_mod_hdr_acts *mod_acts) 681 { 682 struct flow_action *flow_action = &flow_rule->action; 683 struct mlx5_core_dev *mdev = ct_priv->dev; 684 struct flow_action_entry *act; 685 char *modact; 686 int err, i; 687 688 flow_action_for_each(i, act, flow_action) { 689 switch (act->id) { 690 case FLOW_ACTION_MANGLE: { 691 modact = mlx5e_mod_hdr_alloc(mdev, ct_priv->ns_type, mod_acts); 692 if (IS_ERR(modact)) 693 return PTR_ERR(modact); 694 695 err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact); 696 if (err) 697 return err; 698 699 mod_acts->num_actions++; 700 } 701 break; 702 703 case FLOW_ACTION_CT_METADATA: 704 /* Handled earlier */ 705 continue; 706 default: 707 return -EOPNOTSUPP; 708 } 709 } 710 711 return 0; 712 } 713 714 static int 715 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, 716 struct mlx5_flow_attr *attr, 717 struct flow_rule *flow_rule, 718 struct mlx5e_mod_hdr_handle **mh, 719 u8 zone_restore_id, bool nat_table, bool has_nat) 720 { 721 DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS); 722 DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr); 723 struct flow_action_entry *meta; 724 u16 ct_state = 0; 725 int err; 726 727 meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule); 728 if (!meta) 729 return -EOPNOTSUPP; 730 731 err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels, 732 &attr->ct_attr.ct_labels_id); 733 if (err) 734 return -EOPNOTSUPP; 735 if (nat_table) { 736 if (has_nat) { 737 err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, &mod_acts); 738 if (err) 739 goto err_mapping; 740 } 741 742 ct_state |= MLX5_CT_STATE_NAT_BIT; 743 } 744 745 ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT; 746 ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT; 747 err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts, 748 ct_state, 749 meta->ct_metadata.mark, 750 attr->ct_attr.ct_labels_id, 751 zone_restore_id); 752 if (err) 753 goto err_mapping; 754 755 if (nat_table && has_nat) { 756 attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type, 757 mod_acts.num_actions, 758 mod_acts.actions); 759 if (IS_ERR(attr->modify_hdr)) { 760 err = PTR_ERR(attr->modify_hdr); 761 goto err_mapping; 762 } 763 764 *mh = NULL; 765 } else { 766 *mh = mlx5e_mod_hdr_attach(ct_priv->dev, 767 ct_priv->mod_hdr_tbl, 768 ct_priv->ns_type, 769 &mod_acts); 770 if (IS_ERR(*mh)) { 771 err = PTR_ERR(*mh); 772 goto err_mapping; 773 } 774 attr->modify_hdr = mlx5e_mod_hdr_get(*mh); 775 } 776 777 mlx5e_mod_hdr_dealloc(&mod_acts); 778 return 0; 779 780 err_mapping: 781 mlx5e_mod_hdr_dealloc(&mod_acts); 782 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); 783 return err; 784 } 785 786 static void 787 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, 788 struct mlx5_flow_attr *attr, 789 struct mlx5e_mod_hdr_handle *mh) 790 { 791 if (mh) 792 mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, mh); 793 else 794 mlx5_modify_header_dealloc(ct_priv->dev, attr->modify_hdr); 795 } 796 797 static int 798 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, 799 struct flow_rule *flow_rule, 800 struct mlx5_ct_entry *entry, 801 bool nat, u8 zone_restore_id) 802 { 803 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; 804 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); 805 struct mlx5_flow_spec *spec = NULL; 806 struct mlx5_flow_attr *attr; 807 int err; 808 809 zone_rule->nat = nat; 810 811 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 812 if (!spec) 813 return -ENOMEM; 814 815 attr = mlx5_alloc_flow_attr(ct_priv->ns_type); 816 if (!attr) { 817 err = -ENOMEM; 818 goto err_attr; 819 } 820 821 err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, 822 &zone_rule->mh, 823 zone_restore_id, 824 nat, 825 mlx5_tc_ct_entry_has_nat(entry)); 826 if (err) { 827 ct_dbg("Failed to create ct entry mod hdr"); 828 goto err_mod_hdr; 829 } 830 831 attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | 832 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 833 MLX5_FLOW_CONTEXT_ACTION_COUNT; 834 attr->dest_chain = 0; 835 attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act); 836 attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct; 837 if (entry->tuple.ip_proto == IPPROTO_TCP || 838 entry->tuple.ip_proto == IPPROTO_UDP) 839 attr->outer_match_level = MLX5_MATCH_L4; 840 else 841 attr->outer_match_level = MLX5_MATCH_L3; 842 attr->counter = entry->counter->counter; 843 attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT; 844 if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) 845 attr->esw_attr->in_mdev = priv->mdev; 846 847 mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule); 848 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK); 849 850 zone_rule->rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule); 851 if (IS_ERR(zone_rule->rule)) { 852 err = PTR_ERR(zone_rule->rule); 853 ct_dbg("Failed to add ct entry rule, nat: %d", nat); 854 goto err_rule; 855 } 856 857 zone_rule->attr = attr; 858 859 kvfree(spec); 860 ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone); 861 862 return 0; 863 864 err_rule: 865 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh); 866 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); 867 err_mod_hdr: 868 kfree(attr); 869 err_attr: 870 kvfree(spec); 871 return err; 872 } 873 874 static bool 875 mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry) 876 { 877 return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags); 878 } 879 880 static struct mlx5_ct_entry * 881 mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple) 882 { 883 struct mlx5_ct_entry *entry; 884 885 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple, 886 tuples_ht_params); 887 if (entry && mlx5_tc_ct_entry_valid(entry) && 888 refcount_inc_not_zero(&entry->refcnt)) { 889 return entry; 890 } else if (!entry) { 891 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht, 892 tuple, tuples_nat_ht_params); 893 if (entry && mlx5_tc_ct_entry_valid(entry) && 894 refcount_inc_not_zero(&entry->refcnt)) 895 return entry; 896 } 897 898 return entry ? ERR_PTR(-EINVAL) : NULL; 899 } 900 901 static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry) 902 { 903 struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv; 904 905 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, 906 &entry->tuple_nat_node, 907 tuples_nat_ht_params); 908 rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node, 909 tuples_ht_params); 910 } 911 912 static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry) 913 { 914 struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv; 915 916 mlx5_tc_ct_entry_del_rules(ct_priv, entry); 917 918 spin_lock_bh(&ct_priv->ht_lock); 919 mlx5_tc_ct_entry_remove_from_tuples(entry); 920 spin_unlock_bh(&ct_priv->ht_lock); 921 922 mlx5_tc_ct_counter_put(ct_priv, entry); 923 kfree(entry); 924 } 925 926 static void 927 mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) 928 { 929 if (!refcount_dec_and_test(&entry->refcnt)) 930 return; 931 932 mlx5_tc_ct_entry_del(entry); 933 } 934 935 static void mlx5_tc_ct_entry_del_work(struct work_struct *work) 936 { 937 struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work); 938 939 mlx5_tc_ct_entry_del(entry); 940 } 941 942 static void 943 __mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) 944 { 945 if (!refcount_dec_and_test(&entry->refcnt)) 946 return; 947 948 INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work); 949 queue_work(entry->ct_priv->wq, &entry->work); 950 } 951 952 static struct mlx5_ct_counter * 953 mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv) 954 { 955 struct mlx5_ct_counter *counter; 956 int ret; 957 958 counter = kzalloc(sizeof(*counter), GFP_KERNEL); 959 if (!counter) 960 return ERR_PTR(-ENOMEM); 961 962 counter->is_shared = false; 963 counter->counter = mlx5_fc_create_ex(ct_priv->dev, true); 964 if (IS_ERR(counter->counter)) { 965 ct_dbg("Failed to create counter for ct entry"); 966 ret = PTR_ERR(counter->counter); 967 kfree(counter); 968 return ERR_PTR(ret); 969 } 970 971 return counter; 972 } 973 974 static struct mlx5_ct_counter * 975 mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv, 976 struct mlx5_ct_entry *entry) 977 { 978 struct mlx5_ct_tuple rev_tuple = entry->tuple; 979 struct mlx5_ct_counter *shared_counter; 980 struct mlx5_ct_entry *rev_entry; 981 982 /* get the reversed tuple */ 983 swap(rev_tuple.port.src, rev_tuple.port.dst); 984 985 if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 986 __be32 tmp_addr = rev_tuple.ip.src_v4; 987 988 rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4; 989 rev_tuple.ip.dst_v4 = tmp_addr; 990 } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 991 struct in6_addr tmp_addr = rev_tuple.ip.src_v6; 992 993 rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6; 994 rev_tuple.ip.dst_v6 = tmp_addr; 995 } else { 996 return ERR_PTR(-EOPNOTSUPP); 997 } 998 999 /* Use the same counter as the reverse direction */ 1000 spin_lock_bh(&ct_priv->ht_lock); 1001 rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple); 1002 1003 if (IS_ERR(rev_entry)) { 1004 spin_unlock_bh(&ct_priv->ht_lock); 1005 goto create_counter; 1006 } 1007 1008 if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) { 1009 ct_dbg("Using shared counter entry=0x%p rev=0x%p", entry, rev_entry); 1010 shared_counter = rev_entry->counter; 1011 spin_unlock_bh(&ct_priv->ht_lock); 1012 1013 mlx5_tc_ct_entry_put(rev_entry); 1014 return shared_counter; 1015 } 1016 1017 spin_unlock_bh(&ct_priv->ht_lock); 1018 1019 create_counter: 1020 1021 shared_counter = mlx5_tc_ct_counter_create(ct_priv); 1022 if (IS_ERR(shared_counter)) 1023 return shared_counter; 1024 1025 shared_counter->is_shared = true; 1026 refcount_set(&shared_counter->refcount, 1); 1027 return shared_counter; 1028 } 1029 1030 static int 1031 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv, 1032 struct flow_rule *flow_rule, 1033 struct mlx5_ct_entry *entry, 1034 u8 zone_restore_id) 1035 { 1036 int err; 1037 1038 if (nf_ct_acct_enabled(dev_net(ct_priv->netdev))) 1039 entry->counter = mlx5_tc_ct_counter_create(ct_priv); 1040 else 1041 entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry); 1042 1043 if (IS_ERR(entry->counter)) { 1044 err = PTR_ERR(entry->counter); 1045 return err; 1046 } 1047 1048 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false, 1049 zone_restore_id); 1050 if (err) 1051 goto err_orig; 1052 1053 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true, 1054 zone_restore_id); 1055 if (err) 1056 goto err_nat; 1057 1058 atomic_inc(&ct_priv->debugfs.stats.offloaded); 1059 return 0; 1060 1061 err_nat: 1062 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); 1063 err_orig: 1064 mlx5_tc_ct_counter_put(ct_priv, entry); 1065 return err; 1066 } 1067 1068 static int 1069 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, 1070 struct flow_cls_offload *flow) 1071 { 1072 struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow); 1073 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; 1074 struct flow_action_entry *meta_action; 1075 unsigned long cookie = flow->cookie; 1076 struct mlx5_ct_entry *entry; 1077 int err; 1078 1079 meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule); 1080 if (!meta_action) 1081 return -EOPNOTSUPP; 1082 1083 spin_lock_bh(&ct_priv->ht_lock); 1084 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); 1085 if (entry && refcount_inc_not_zero(&entry->refcnt)) { 1086 spin_unlock_bh(&ct_priv->ht_lock); 1087 mlx5_tc_ct_entry_put(entry); 1088 return -EEXIST; 1089 } 1090 spin_unlock_bh(&ct_priv->ht_lock); 1091 1092 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 1093 if (!entry) 1094 return -ENOMEM; 1095 1096 entry->tuple.zone = ft->zone; 1097 entry->cookie = flow->cookie; 1098 entry->restore_cookie = meta_action->ct_metadata.cookie; 1099 refcount_set(&entry->refcnt, 2); 1100 entry->ct_priv = ct_priv; 1101 1102 err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule); 1103 if (err) 1104 goto err_set; 1105 1106 memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple)); 1107 err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule); 1108 if (err) 1109 goto err_set; 1110 1111 spin_lock_bh(&ct_priv->ht_lock); 1112 1113 err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node, 1114 cts_ht_params); 1115 if (err) 1116 goto err_entries; 1117 1118 err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht, 1119 &entry->tuple_node, 1120 tuples_ht_params); 1121 if (err) 1122 goto err_tuple; 1123 1124 if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) { 1125 err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht, 1126 &entry->tuple_nat_node, 1127 tuples_nat_ht_params); 1128 if (err) 1129 goto err_tuple_nat; 1130 } 1131 spin_unlock_bh(&ct_priv->ht_lock); 1132 1133 err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry, 1134 ft->zone_restore_id); 1135 if (err) 1136 goto err_rules; 1137 1138 set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags); 1139 mlx5_tc_ct_entry_put(entry); /* this function reference */ 1140 1141 return 0; 1142 1143 err_rules: 1144 spin_lock_bh(&ct_priv->ht_lock); 1145 if (mlx5_tc_ct_entry_has_nat(entry)) 1146 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, 1147 &entry->tuple_nat_node, tuples_nat_ht_params); 1148 err_tuple_nat: 1149 rhashtable_remove_fast(&ct_priv->ct_tuples_ht, 1150 &entry->tuple_node, 1151 tuples_ht_params); 1152 err_tuple: 1153 rhashtable_remove_fast(&ft->ct_entries_ht, 1154 &entry->node, 1155 cts_ht_params); 1156 err_entries: 1157 spin_unlock_bh(&ct_priv->ht_lock); 1158 err_set: 1159 kfree(entry); 1160 if (err != -EEXIST) 1161 netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err); 1162 return err; 1163 } 1164 1165 static int 1166 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft, 1167 struct flow_cls_offload *flow) 1168 { 1169 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; 1170 unsigned long cookie = flow->cookie; 1171 struct mlx5_ct_entry *entry; 1172 1173 spin_lock_bh(&ct_priv->ht_lock); 1174 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); 1175 if (!entry) { 1176 spin_unlock_bh(&ct_priv->ht_lock); 1177 return -ENOENT; 1178 } 1179 1180 if (!mlx5_tc_ct_entry_valid(entry)) { 1181 spin_unlock_bh(&ct_priv->ht_lock); 1182 return -EINVAL; 1183 } 1184 1185 rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params); 1186 spin_unlock_bh(&ct_priv->ht_lock); 1187 1188 mlx5_tc_ct_entry_put(entry); 1189 1190 return 0; 1191 } 1192 1193 static int 1194 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft, 1195 struct flow_cls_offload *f) 1196 { 1197 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; 1198 unsigned long cookie = f->cookie; 1199 struct mlx5_ct_entry *entry; 1200 u64 lastuse, packets, bytes; 1201 1202 spin_lock_bh(&ct_priv->ht_lock); 1203 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); 1204 if (!entry) { 1205 spin_unlock_bh(&ct_priv->ht_lock); 1206 return -ENOENT; 1207 } 1208 1209 if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) { 1210 spin_unlock_bh(&ct_priv->ht_lock); 1211 return -EINVAL; 1212 } 1213 1214 spin_unlock_bh(&ct_priv->ht_lock); 1215 1216 mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse); 1217 flow_stats_update(&f->stats, bytes, packets, 0, lastuse, 1218 FLOW_ACTION_HW_STATS_DELAYED); 1219 1220 mlx5_tc_ct_entry_put(entry); 1221 return 0; 1222 } 1223 1224 static int 1225 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data, 1226 void *cb_priv) 1227 { 1228 struct flow_cls_offload *f = type_data; 1229 struct mlx5_ct_ft *ft = cb_priv; 1230 1231 if (type != TC_SETUP_CLSFLOWER) 1232 return -EOPNOTSUPP; 1233 1234 switch (f->command) { 1235 case FLOW_CLS_REPLACE: 1236 return mlx5_tc_ct_block_flow_offload_add(ft, f); 1237 case FLOW_CLS_DESTROY: 1238 return mlx5_tc_ct_block_flow_offload_del(ft, f); 1239 case FLOW_CLS_STATS: 1240 return mlx5_tc_ct_block_flow_offload_stats(ft, f); 1241 default: 1242 break; 1243 } 1244 1245 return -EOPNOTSUPP; 1246 } 1247 1248 static bool 1249 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple, 1250 u16 zone) 1251 { 1252 struct flow_keys flow_keys; 1253 1254 skb_reset_network_header(skb); 1255 skb_flow_dissect_flow_keys(skb, &flow_keys, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP); 1256 1257 tuple->zone = zone; 1258 1259 if (flow_keys.basic.ip_proto != IPPROTO_TCP && 1260 flow_keys.basic.ip_proto != IPPROTO_UDP && 1261 flow_keys.basic.ip_proto != IPPROTO_GRE) 1262 return false; 1263 1264 if (flow_keys.basic.ip_proto == IPPROTO_TCP || 1265 flow_keys.basic.ip_proto == IPPROTO_UDP) { 1266 tuple->port.src = flow_keys.ports.src; 1267 tuple->port.dst = flow_keys.ports.dst; 1268 } 1269 tuple->n_proto = flow_keys.basic.n_proto; 1270 tuple->ip_proto = flow_keys.basic.ip_proto; 1271 1272 switch (flow_keys.basic.n_proto) { 1273 case htons(ETH_P_IP): 1274 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 1275 tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src; 1276 tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst; 1277 break; 1278 1279 case htons(ETH_P_IPV6): 1280 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 1281 tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src; 1282 tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst; 1283 break; 1284 default: 1285 goto out; 1286 } 1287 1288 return true; 1289 1290 out: 1291 return false; 1292 } 1293 1294 int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec) 1295 { 1296 u32 ctstate = 0, ctstate_mask = 0; 1297 1298 mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG, 1299 &ctstate, &ctstate_mask); 1300 1301 if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT) 1302 return -EOPNOTSUPP; 1303 1304 ctstate_mask |= MLX5_CT_STATE_TRK_BIT; 1305 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, 1306 ctstate, ctstate_mask); 1307 1308 return 0; 1309 } 1310 1311 void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr) 1312 { 1313 if (!priv || !ct_attr->ct_labels_id) 1314 return; 1315 1316 mlx5_put_label_mapping(priv, ct_attr->ct_labels_id); 1317 } 1318 1319 int 1320 mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, 1321 struct mlx5_flow_spec *spec, 1322 struct flow_cls_offload *f, 1323 struct mlx5_ct_attr *ct_attr, 1324 struct netlink_ext_ack *extack) 1325 { 1326 bool trk, est, untrk, unest, new, rpl, unrpl, rel, unrel, inv, uninv; 1327 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 1328 struct flow_dissector_key_ct *mask, *key; 1329 u32 ctstate = 0, ctstate_mask = 0; 1330 u16 ct_state_on, ct_state_off; 1331 u16 ct_state, ct_state_mask; 1332 struct flow_match_ct match; 1333 u32 ct_labels[4]; 1334 1335 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) 1336 return 0; 1337 1338 if (!priv) { 1339 NL_SET_ERR_MSG_MOD(extack, 1340 "offload of ct matching isn't available"); 1341 return -EOPNOTSUPP; 1342 } 1343 1344 flow_rule_match_ct(rule, &match); 1345 1346 key = match.key; 1347 mask = match.mask; 1348 1349 ct_state = key->ct_state; 1350 ct_state_mask = mask->ct_state; 1351 1352 if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED | 1353 TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED | 1354 TCA_FLOWER_KEY_CT_FLAGS_NEW | 1355 TCA_FLOWER_KEY_CT_FLAGS_REPLY | 1356 TCA_FLOWER_KEY_CT_FLAGS_RELATED | 1357 TCA_FLOWER_KEY_CT_FLAGS_INVALID)) { 1358 NL_SET_ERR_MSG_MOD(extack, 1359 "only ct_state trk, est, new and rpl are supported for offload"); 1360 return -EOPNOTSUPP; 1361 } 1362 1363 ct_state_on = ct_state & ct_state_mask; 1364 ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask; 1365 trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; 1366 new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW; 1367 est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; 1368 rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY; 1369 rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED; 1370 inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID; 1371 untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; 1372 unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; 1373 unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY; 1374 unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED; 1375 uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID; 1376 1377 ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0; 1378 ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; 1379 ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0; 1380 ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0; 1381 ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; 1382 ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0; 1383 ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0; 1384 ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0; 1385 1386 if (rel) { 1387 NL_SET_ERR_MSG_MOD(extack, 1388 "matching on ct_state +rel isn't supported"); 1389 return -EOPNOTSUPP; 1390 } 1391 1392 if (inv) { 1393 NL_SET_ERR_MSG_MOD(extack, 1394 "matching on ct_state +inv isn't supported"); 1395 return -EOPNOTSUPP; 1396 } 1397 1398 if (new) { 1399 NL_SET_ERR_MSG_MOD(extack, 1400 "matching on ct_state +new isn't supported"); 1401 return -EOPNOTSUPP; 1402 } 1403 1404 if (mask->ct_zone) 1405 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 1406 key->ct_zone, MLX5_CT_ZONE_MASK); 1407 if (ctstate_mask) 1408 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, 1409 ctstate, ctstate_mask); 1410 if (mask->ct_mark) 1411 mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG, 1412 key->ct_mark, mask->ct_mark); 1413 if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] || 1414 mask->ct_labels[3]) { 1415 ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0]; 1416 ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1]; 1417 ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2]; 1418 ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3]; 1419 if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id)) 1420 return -EOPNOTSUPP; 1421 mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id, 1422 MLX5_CT_LABELS_MASK); 1423 } 1424 1425 return 0; 1426 } 1427 1428 int 1429 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, 1430 struct mlx5_flow_attr *attr, 1431 struct mlx5e_tc_mod_hdr_acts *mod_acts, 1432 const struct flow_action_entry *act, 1433 struct netlink_ext_ack *extack) 1434 { 1435 if (!priv) { 1436 NL_SET_ERR_MSG_MOD(extack, 1437 "offload of ct action isn't available"); 1438 return -EOPNOTSUPP; 1439 } 1440 1441 attr->ct_attr.zone = act->ct.zone; 1442 attr->ct_attr.ct_action = act->ct.action; 1443 attr->ct_attr.nf_ft = act->ct.flow_table; 1444 1445 return 0; 1446 } 1447 1448 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft, 1449 struct mlx5_tc_ct_pre *pre_ct, 1450 bool nat) 1451 { 1452 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; 1453 struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; 1454 struct mlx5_core_dev *dev = ct_priv->dev; 1455 struct mlx5_flow_table *ft = pre_ct->ft; 1456 struct mlx5_flow_destination dest = {}; 1457 struct mlx5_flow_act flow_act = {}; 1458 struct mlx5_modify_hdr *mod_hdr; 1459 struct mlx5_flow_handle *rule; 1460 struct mlx5_flow_spec *spec; 1461 u32 ctstate; 1462 u16 zone; 1463 int err; 1464 1465 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 1466 if (!spec) 1467 return -ENOMEM; 1468 1469 zone = ct_ft->zone & MLX5_CT_ZONE_MASK; 1470 err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type, 1471 ZONE_TO_REG, zone); 1472 if (err) { 1473 ct_dbg("Failed to set zone register mapping"); 1474 goto err_mapping; 1475 } 1476 1477 mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type, 1478 pre_mod_acts.num_actions, 1479 pre_mod_acts.actions); 1480 1481 if (IS_ERR(mod_hdr)) { 1482 err = PTR_ERR(mod_hdr); 1483 ct_dbg("Failed to create pre ct mod hdr"); 1484 goto err_mapping; 1485 } 1486 pre_ct->modify_hdr = mod_hdr; 1487 1488 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 1489 MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1490 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; 1491 flow_act.modify_hdr = mod_hdr; 1492 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 1493 1494 /* add flow rule */ 1495 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 1496 zone, MLX5_CT_ZONE_MASK); 1497 ctstate = MLX5_CT_STATE_TRK_BIT; 1498 if (nat) 1499 ctstate |= MLX5_CT_STATE_NAT_BIT; 1500 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate); 1501 1502 dest.ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act); 1503 rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); 1504 if (IS_ERR(rule)) { 1505 err = PTR_ERR(rule); 1506 ct_dbg("Failed to add pre ct flow rule zone %d", zone); 1507 goto err_flow_rule; 1508 } 1509 pre_ct->flow_rule = rule; 1510 1511 /* add miss rule */ 1512 dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct; 1513 rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1); 1514 if (IS_ERR(rule)) { 1515 err = PTR_ERR(rule); 1516 ct_dbg("Failed to add pre ct miss rule zone %d", zone); 1517 goto err_miss_rule; 1518 } 1519 pre_ct->miss_rule = rule; 1520 1521 mlx5e_mod_hdr_dealloc(&pre_mod_acts); 1522 kvfree(spec); 1523 return 0; 1524 1525 err_miss_rule: 1526 mlx5_del_flow_rules(pre_ct->flow_rule); 1527 err_flow_rule: 1528 mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); 1529 err_mapping: 1530 mlx5e_mod_hdr_dealloc(&pre_mod_acts); 1531 kvfree(spec); 1532 return err; 1533 } 1534 1535 static void 1536 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft, 1537 struct mlx5_tc_ct_pre *pre_ct) 1538 { 1539 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; 1540 struct mlx5_core_dev *dev = ct_priv->dev; 1541 1542 mlx5_del_flow_rules(pre_ct->flow_rule); 1543 mlx5_del_flow_rules(pre_ct->miss_rule); 1544 mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); 1545 } 1546 1547 static int 1548 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft, 1549 struct mlx5_tc_ct_pre *pre_ct, 1550 bool nat) 1551 { 1552 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); 1553 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; 1554 struct mlx5_core_dev *dev = ct_priv->dev; 1555 struct mlx5_flow_table_attr ft_attr = {}; 1556 struct mlx5_flow_namespace *ns; 1557 struct mlx5_flow_table *ft; 1558 struct mlx5_flow_group *g; 1559 u32 metadata_reg_c_2_mask; 1560 u32 *flow_group_in; 1561 void *misc; 1562 int err; 1563 1564 ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type); 1565 if (!ns) { 1566 err = -EOPNOTSUPP; 1567 ct_dbg("Failed to get flow namespace"); 1568 return err; 1569 } 1570 1571 flow_group_in = kvzalloc(inlen, GFP_KERNEL); 1572 if (!flow_group_in) 1573 return -ENOMEM; 1574 1575 ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; 1576 ft_attr.prio = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB ? 1577 FDB_TC_OFFLOAD : MLX5E_TC_PRIO; 1578 ft_attr.max_fte = 2; 1579 ft_attr.level = 1; 1580 ft = mlx5_create_flow_table(ns, &ft_attr); 1581 if (IS_ERR(ft)) { 1582 err = PTR_ERR(ft); 1583 ct_dbg("Failed to create pre ct table"); 1584 goto out_free; 1585 } 1586 pre_ct->ft = ft; 1587 1588 /* create flow group */ 1589 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); 1590 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); 1591 MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 1592 MLX5_MATCH_MISC_PARAMETERS_2); 1593 1594 misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, 1595 match_criteria.misc_parameters_2); 1596 1597 metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK; 1598 metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16); 1599 if (nat) 1600 metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16); 1601 1602 MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2, 1603 metadata_reg_c_2_mask); 1604 1605 g = mlx5_create_flow_group(ft, flow_group_in); 1606 if (IS_ERR(g)) { 1607 err = PTR_ERR(g); 1608 ct_dbg("Failed to create pre ct group"); 1609 goto err_flow_grp; 1610 } 1611 pre_ct->flow_grp = g; 1612 1613 /* create miss group */ 1614 memset(flow_group_in, 0, inlen); 1615 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); 1616 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); 1617 g = mlx5_create_flow_group(ft, flow_group_in); 1618 if (IS_ERR(g)) { 1619 err = PTR_ERR(g); 1620 ct_dbg("Failed to create pre ct miss group"); 1621 goto err_miss_grp; 1622 } 1623 pre_ct->miss_grp = g; 1624 1625 err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat); 1626 if (err) 1627 goto err_add_rules; 1628 1629 kvfree(flow_group_in); 1630 return 0; 1631 1632 err_add_rules: 1633 mlx5_destroy_flow_group(pre_ct->miss_grp); 1634 err_miss_grp: 1635 mlx5_destroy_flow_group(pre_ct->flow_grp); 1636 err_flow_grp: 1637 mlx5_destroy_flow_table(ft); 1638 out_free: 1639 kvfree(flow_group_in); 1640 return err; 1641 } 1642 1643 static void 1644 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft, 1645 struct mlx5_tc_ct_pre *pre_ct) 1646 { 1647 tc_ct_pre_ct_del_rules(ct_ft, pre_ct); 1648 mlx5_destroy_flow_group(pre_ct->miss_grp); 1649 mlx5_destroy_flow_group(pre_ct->flow_grp); 1650 mlx5_destroy_flow_table(pre_ct->ft); 1651 } 1652 1653 static int 1654 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft) 1655 { 1656 int err; 1657 1658 err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false); 1659 if (err) 1660 return err; 1661 1662 err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true); 1663 if (err) 1664 goto err_pre_ct_nat; 1665 1666 return 0; 1667 1668 err_pre_ct_nat: 1669 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); 1670 return err; 1671 } 1672 1673 static void 1674 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft) 1675 { 1676 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat); 1677 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); 1678 } 1679 1680 /* To avoid false lock dependency warning set the ct_entries_ht lock 1681 * class different than the lock class of the ht being used when deleting 1682 * last flow from a group and then deleting a group, we get into del_sw_flow_group() 1683 * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but 1684 * it's different than the ht->mutex here. 1685 */ 1686 static struct lock_class_key ct_entries_ht_lock_key; 1687 1688 static struct mlx5_ct_ft * 1689 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, 1690 struct nf_flowtable *nf_ft) 1691 { 1692 struct mlx5_ct_ft *ft; 1693 int err; 1694 1695 ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params); 1696 if (ft) { 1697 refcount_inc(&ft->refcount); 1698 return ft; 1699 } 1700 1701 ft = kzalloc(sizeof(*ft), GFP_KERNEL); 1702 if (!ft) 1703 return ERR_PTR(-ENOMEM); 1704 1705 err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id); 1706 if (err) 1707 goto err_mapping; 1708 1709 ft->zone = zone; 1710 ft->nf_ft = nf_ft; 1711 ft->ct_priv = ct_priv; 1712 refcount_set(&ft->refcount, 1); 1713 1714 err = mlx5_tc_ct_alloc_pre_ct_tables(ft); 1715 if (err) 1716 goto err_alloc_pre_ct; 1717 1718 err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params); 1719 if (err) 1720 goto err_init; 1721 1722 lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key); 1723 1724 err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node, 1725 zone_params); 1726 if (err) 1727 goto err_insert; 1728 1729 err = nf_flow_table_offload_add_cb(ft->nf_ft, 1730 mlx5_tc_ct_block_flow_offload, ft); 1731 if (err) 1732 goto err_add_cb; 1733 1734 return ft; 1735 1736 err_add_cb: 1737 rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); 1738 err_insert: 1739 rhashtable_destroy(&ft->ct_entries_ht); 1740 err_init: 1741 mlx5_tc_ct_free_pre_ct_tables(ft); 1742 err_alloc_pre_ct: 1743 mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); 1744 err_mapping: 1745 kfree(ft); 1746 return ERR_PTR(err); 1747 } 1748 1749 static void 1750 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg) 1751 { 1752 struct mlx5_ct_entry *entry = ptr; 1753 1754 mlx5_tc_ct_entry_put(entry); 1755 } 1756 1757 static void 1758 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) 1759 { 1760 if (!refcount_dec_and_test(&ft->refcount)) 1761 return; 1762 1763 flush_workqueue(ct_priv->wq); 1764 nf_flow_table_offload_del_cb(ft->nf_ft, 1765 mlx5_tc_ct_block_flow_offload, ft); 1766 rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); 1767 rhashtable_free_and_destroy(&ft->ct_entries_ht, 1768 mlx5_tc_ct_flush_ft_entry, 1769 ct_priv); 1770 mlx5_tc_ct_free_pre_ct_tables(ft); 1771 mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); 1772 kfree(ft); 1773 } 1774 1775 /* We translate the tc filter with CT action to the following HW model: 1776 * 1777 * +---------------------+ 1778 * + ft prio (tc chain) + 1779 * + original match + 1780 * +---------------------+ 1781 * | set chain miss mapping 1782 * | set fte_id 1783 * | set tunnel_id 1784 * | do decap 1785 * v 1786 * +---------------------+ 1787 * + pre_ct/pre_ct_nat + if matches +-------------------------+ 1788 * + zone+nat match +---------------->+ post_act (see below) + 1789 * +---------------------+ set zone +-------------------------+ 1790 * | set zone 1791 * v 1792 * +--------------------+ 1793 * + CT (nat or no nat) + 1794 * + tuple + zone match + 1795 * +--------------------+ 1796 * | set mark 1797 * | set labels_id 1798 * | set established 1799 * | set zone_restore 1800 * | do nat (if needed) 1801 * v 1802 * +--------------+ 1803 * + post_act + original filter actions 1804 * + fte_id match +------------------------> 1805 * +--------------+ 1806 */ 1807 static struct mlx5_flow_handle * 1808 __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, 1809 struct mlx5_flow_spec *orig_spec, 1810 struct mlx5_flow_attr *attr) 1811 { 1812 bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; 1813 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); 1814 struct mlx5e_tc_mod_hdr_acts *pre_mod_acts; 1815 u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type); 1816 struct mlx5_flow_attr *pre_ct_attr; 1817 struct mlx5_modify_hdr *mod_hdr; 1818 struct mlx5_ct_flow *ct_flow; 1819 int chain_mapping = 0, err; 1820 struct mlx5_ct_ft *ft; 1821 1822 ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); 1823 if (!ct_flow) { 1824 return ERR_PTR(-ENOMEM); 1825 } 1826 1827 /* Register for CT established events */ 1828 ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone, 1829 attr->ct_attr.nf_ft); 1830 if (IS_ERR(ft)) { 1831 err = PTR_ERR(ft); 1832 ct_dbg("Failed to register to ft callback"); 1833 goto err_ft; 1834 } 1835 ct_flow->ft = ft; 1836 1837 /* Base flow attributes of both rules on original rule attribute */ 1838 ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); 1839 if (!ct_flow->pre_ct_attr) { 1840 err = -ENOMEM; 1841 goto err_alloc_pre; 1842 } 1843 1844 pre_ct_attr = ct_flow->pre_ct_attr; 1845 memcpy(pre_ct_attr, attr, attr_sz); 1846 pre_mod_acts = &pre_ct_attr->parse_attr->mod_hdr_acts; 1847 1848 /* Modify the original rule's action to fwd and modify, leave decap */ 1849 pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP; 1850 pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 1851 MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1852 1853 /* Write chain miss tag for miss in ct table as we 1854 * don't go though all prios of this chain as normal tc rules 1855 * miss. 1856 */ 1857 err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain, 1858 &chain_mapping); 1859 if (err) { 1860 ct_dbg("Failed to get chain register mapping for chain"); 1861 goto err_get_chain; 1862 } 1863 ct_flow->chain_mapping = chain_mapping; 1864 1865 err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type, 1866 CHAIN_TO_REG, chain_mapping); 1867 if (err) { 1868 ct_dbg("Failed to set chain register mapping"); 1869 goto err_mapping; 1870 } 1871 1872 /* If original flow is decap, we do it before going into ct table 1873 * so add a rewrite for the tunnel match_id. 1874 */ 1875 if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) && 1876 attr->chain == 0) { 1877 err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, 1878 ct_priv->ns_type, 1879 TUNNEL_TO_REG, 1880 attr->tunnel_id); 1881 if (err) { 1882 ct_dbg("Failed to set tunnel register mapping"); 1883 goto err_mapping; 1884 } 1885 } 1886 1887 mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type, 1888 pre_mod_acts->num_actions, 1889 pre_mod_acts->actions); 1890 if (IS_ERR(mod_hdr)) { 1891 err = PTR_ERR(mod_hdr); 1892 ct_dbg("Failed to create pre ct mod hdr"); 1893 goto err_mapping; 1894 } 1895 pre_ct_attr->modify_hdr = mod_hdr; 1896 1897 /* Change original rule point to ct table */ 1898 pre_ct_attr->dest_chain = 0; 1899 pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft; 1900 ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec, 1901 pre_ct_attr); 1902 if (IS_ERR(ct_flow->pre_ct_rule)) { 1903 err = PTR_ERR(ct_flow->pre_ct_rule); 1904 ct_dbg("Failed to add pre ct rule"); 1905 goto err_insert_orig; 1906 } 1907 1908 attr->ct_attr.ct_flow = ct_flow; 1909 mlx5e_mod_hdr_dealloc(pre_mod_acts); 1910 1911 return ct_flow->pre_ct_rule; 1912 1913 err_insert_orig: 1914 mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); 1915 err_mapping: 1916 mlx5e_mod_hdr_dealloc(pre_mod_acts); 1917 mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); 1918 err_get_chain: 1919 kfree(ct_flow->pre_ct_attr); 1920 err_alloc_pre: 1921 mlx5_tc_ct_del_ft_cb(ct_priv, ft); 1922 err_ft: 1923 kfree(ct_flow); 1924 netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err); 1925 return ERR_PTR(err); 1926 } 1927 1928 struct mlx5_flow_handle * 1929 mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, 1930 struct mlx5_flow_spec *spec, 1931 struct mlx5_flow_attr *attr, 1932 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) 1933 { 1934 struct mlx5_flow_handle *rule; 1935 1936 if (!priv) 1937 return ERR_PTR(-EOPNOTSUPP); 1938 1939 mutex_lock(&priv->control_lock); 1940 rule = __mlx5_tc_ct_flow_offload(priv, spec, attr); 1941 mutex_unlock(&priv->control_lock); 1942 1943 return rule; 1944 } 1945 1946 static void 1947 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, 1948 struct mlx5_ct_flow *ct_flow, 1949 struct mlx5_flow_attr *attr) 1950 { 1951 struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr; 1952 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); 1953 1954 mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr); 1955 mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); 1956 1957 mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); 1958 mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft); 1959 1960 kfree(ct_flow->pre_ct_attr); 1961 kfree(ct_flow); 1962 } 1963 1964 void 1965 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, 1966 struct mlx5_flow_attr *attr) 1967 { 1968 struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow; 1969 1970 /* We are called on error to clean up stuff from parsing 1971 * but we don't have anything for now 1972 */ 1973 if (!ct_flow) 1974 return; 1975 1976 mutex_lock(&priv->control_lock); 1977 __mlx5_tc_ct_delete_flow(priv, ct_flow, attr); 1978 mutex_unlock(&priv->control_lock); 1979 } 1980 1981 static int 1982 mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv) 1983 { 1984 struct mlx5_flow_table *post_ct = mlx5e_tc_post_act_get_ft(ct_priv->post_act); 1985 struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get(); 1986 int err; 1987 1988 if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB && 1989 ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) { 1990 ct_dbg("Using SMFS ct flow steering provider"); 1991 fs_ops = mlx5_ct_fs_smfs_ops_get(); 1992 } 1993 1994 ct_priv->fs = kzalloc(sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL); 1995 if (!ct_priv->fs) 1996 return -ENOMEM; 1997 1998 ct_priv->fs->netdev = ct_priv->netdev; 1999 ct_priv->fs->dev = ct_priv->dev; 2000 ct_priv->fs_ops = fs_ops; 2001 2002 err = ct_priv->fs_ops->init(ct_priv->fs, ct_priv->ct, ct_priv->ct_nat, post_ct); 2003 if (err) 2004 goto err_init; 2005 2006 return 0; 2007 2008 err_init: 2009 kfree(ct_priv->fs); 2010 return err; 2011 } 2012 2013 static int 2014 mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw, 2015 const char **err_msg) 2016 { 2017 if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) { 2018 /* vlan workaround should be avoided for multi chain rules. 2019 * This is just a sanity check as pop vlan action should 2020 * be supported by any FW that supports ignore_flow_level 2021 */ 2022 2023 *err_msg = "firmware vlan actions support is missing"; 2024 return -EOPNOTSUPP; 2025 } 2026 2027 if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, 2028 fdb_modify_header_fwd_to_table)) { 2029 /* CT always writes to registers which are mod header actions. 2030 * Therefore, mod header and goto is required 2031 */ 2032 2033 *err_msg = "firmware fwd and modify support is missing"; 2034 return -EOPNOTSUPP; 2035 } 2036 2037 if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) { 2038 *err_msg = "register loopback isn't supported"; 2039 return -EOPNOTSUPP; 2040 } 2041 2042 return 0; 2043 } 2044 2045 static int 2046 mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv, 2047 enum mlx5_flow_namespace_type ns_type, 2048 struct mlx5e_post_act *post_act) 2049 { 2050 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 2051 const char *err_msg = NULL; 2052 int err = 0; 2053 2054 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) 2055 /* cannot restore chain ID on HW miss */ 2056 2057 err_msg = "tc skb extension missing"; 2058 err = -EOPNOTSUPP; 2059 goto out_err; 2060 #endif 2061 if (IS_ERR_OR_NULL(post_act)) { 2062 /* Ignore_flow_level support isn't supported by default for VFs and so post_act 2063 * won't be supported. Skip showing error msg. 2064 */ 2065 if (priv->mdev->coredev_type == MLX5_COREDEV_PF) 2066 err_msg = "post action is missing"; 2067 err = -EOPNOTSUPP; 2068 goto out_err; 2069 } 2070 2071 if (ns_type == MLX5_FLOW_NAMESPACE_FDB) 2072 err = mlx5_tc_ct_init_check_esw_support(esw, &err_msg); 2073 2074 out_err: 2075 if (err && err_msg) 2076 netdev_dbg(priv->netdev, "tc ct offload not supported, %s\n", err_msg); 2077 return err; 2078 } 2079 2080 static void 2081 mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv) 2082 { 2083 bool is_fdb = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB; 2084 struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs; 2085 char dirname[16] = {}; 2086 2087 if (sscanf(dirname, "ct_%s", is_fdb ? "fdb" : "nic") < 0) 2088 return; 2089 2090 ct_dbgfs->root = debugfs_create_dir(dirname, mlx5_debugfs_get_dev_root(ct_priv->dev)); 2091 debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root, 2092 &ct_dbgfs->stats.offloaded); 2093 debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root, 2094 &ct_dbgfs->stats.rx_dropped); 2095 } 2096 2097 static void 2098 mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv) 2099 { 2100 debugfs_remove_recursive(ct_priv->debugfs.root); 2101 } 2102 2103 #define INIT_ERR_PREFIX "tc ct offload init failed" 2104 2105 struct mlx5_tc_ct_priv * 2106 mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, 2107 struct mod_hdr_tbl *mod_hdr, 2108 enum mlx5_flow_namespace_type ns_type, 2109 struct mlx5e_post_act *post_act) 2110 { 2111 struct mlx5_tc_ct_priv *ct_priv; 2112 struct mlx5_core_dev *dev; 2113 u64 mapping_id; 2114 int err; 2115 2116 dev = priv->mdev; 2117 err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act); 2118 if (err) 2119 goto err_support; 2120 2121 ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL); 2122 if (!ct_priv) 2123 goto err_alloc; 2124 2125 mapping_id = mlx5_query_nic_system_image_guid(dev); 2126 2127 ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE, 2128 sizeof(u16), 0, true); 2129 if (IS_ERR(ct_priv->zone_mapping)) { 2130 err = PTR_ERR(ct_priv->zone_mapping); 2131 goto err_mapping_zone; 2132 } 2133 2134 ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS, 2135 sizeof(u32) * 4, 0, true); 2136 if (IS_ERR(ct_priv->labels_mapping)) { 2137 err = PTR_ERR(ct_priv->labels_mapping); 2138 goto err_mapping_labels; 2139 } 2140 2141 spin_lock_init(&ct_priv->ht_lock); 2142 ct_priv->ns_type = ns_type; 2143 ct_priv->chains = chains; 2144 ct_priv->netdev = priv->netdev; 2145 ct_priv->dev = priv->mdev; 2146 ct_priv->mod_hdr_tbl = mod_hdr; 2147 ct_priv->ct = mlx5_chains_create_global_table(chains); 2148 if (IS_ERR(ct_priv->ct)) { 2149 err = PTR_ERR(ct_priv->ct); 2150 mlx5_core_warn(dev, 2151 "%s, failed to create ct table err: %d\n", 2152 INIT_ERR_PREFIX, err); 2153 goto err_ct_tbl; 2154 } 2155 2156 ct_priv->ct_nat = mlx5_chains_create_global_table(chains); 2157 if (IS_ERR(ct_priv->ct_nat)) { 2158 err = PTR_ERR(ct_priv->ct_nat); 2159 mlx5_core_warn(dev, 2160 "%s, failed to create ct nat table err: %d\n", 2161 INIT_ERR_PREFIX, err); 2162 goto err_ct_nat_tbl; 2163 } 2164 2165 ct_priv->post_act = post_act; 2166 mutex_init(&ct_priv->control_lock); 2167 if (rhashtable_init(&ct_priv->zone_ht, &zone_params)) 2168 goto err_ct_zone_ht; 2169 if (rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params)) 2170 goto err_ct_tuples_ht; 2171 if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params)) 2172 goto err_ct_tuples_nat_ht; 2173 2174 ct_priv->wq = alloc_ordered_workqueue("mlx5e_ct_priv_wq", 0); 2175 if (!ct_priv->wq) { 2176 err = -ENOMEM; 2177 goto err_wq; 2178 } 2179 2180 err = mlx5_tc_ct_fs_init(ct_priv); 2181 if (err) 2182 goto err_init_fs; 2183 2184 mlx5_ct_tc_create_dbgfs(ct_priv); 2185 return ct_priv; 2186 2187 err_init_fs: 2188 destroy_workqueue(ct_priv->wq); 2189 err_wq: 2190 rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); 2191 err_ct_tuples_nat_ht: 2192 rhashtable_destroy(&ct_priv->ct_tuples_ht); 2193 err_ct_tuples_ht: 2194 rhashtable_destroy(&ct_priv->zone_ht); 2195 err_ct_zone_ht: 2196 mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); 2197 err_ct_nat_tbl: 2198 mlx5_chains_destroy_global_table(chains, ct_priv->ct); 2199 err_ct_tbl: 2200 mapping_destroy(ct_priv->labels_mapping); 2201 err_mapping_labels: 2202 mapping_destroy(ct_priv->zone_mapping); 2203 err_mapping_zone: 2204 kfree(ct_priv); 2205 err_alloc: 2206 err_support: 2207 2208 return NULL; 2209 } 2210 2211 void 2212 mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) 2213 { 2214 struct mlx5_fs_chains *chains; 2215 2216 if (!ct_priv) 2217 return; 2218 2219 destroy_workqueue(ct_priv->wq); 2220 mlx5_ct_tc_remove_dbgfs(ct_priv); 2221 chains = ct_priv->chains; 2222 2223 ct_priv->fs_ops->destroy(ct_priv->fs); 2224 kfree(ct_priv->fs); 2225 2226 mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); 2227 mlx5_chains_destroy_global_table(chains, ct_priv->ct); 2228 mapping_destroy(ct_priv->zone_mapping); 2229 mapping_destroy(ct_priv->labels_mapping); 2230 2231 rhashtable_destroy(&ct_priv->ct_tuples_ht); 2232 rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); 2233 rhashtable_destroy(&ct_priv->zone_ht); 2234 mutex_destroy(&ct_priv->control_lock); 2235 kfree(ct_priv); 2236 } 2237 2238 bool 2239 mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, 2240 struct sk_buff *skb, u8 zone_restore_id) 2241 { 2242 struct mlx5_ct_tuple tuple = {}; 2243 struct mlx5_ct_entry *entry; 2244 u16 zone; 2245 2246 if (!ct_priv || !zone_restore_id) 2247 return true; 2248 2249 if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone)) 2250 goto out_inc_drop; 2251 2252 if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone)) 2253 goto out_inc_drop; 2254 2255 spin_lock(&ct_priv->ht_lock); 2256 2257 entry = mlx5_tc_ct_entry_get(ct_priv, &tuple); 2258 if (!entry) { 2259 spin_unlock(&ct_priv->ht_lock); 2260 goto out_inc_drop; 2261 } 2262 2263 if (IS_ERR(entry)) { 2264 spin_unlock(&ct_priv->ht_lock); 2265 goto out_inc_drop; 2266 } 2267 spin_unlock(&ct_priv->ht_lock); 2268 2269 tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie); 2270 __mlx5_tc_ct_entry_put(entry); 2271 2272 return true; 2273 2274 out_inc_drop: 2275 atomic_inc(&ct_priv->debugfs.stats.rx_dropped); 2276 return false; 2277 } 2278