1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2019 Mellanox Technologies. */ 3 4 #include <net/netfilter/nf_conntrack.h> 5 #include <net/netfilter/nf_conntrack_core.h> 6 #include <net/netfilter/nf_conntrack_zones.h> 7 #include <net/netfilter/nf_conntrack_labels.h> 8 #include <net/netfilter/nf_conntrack_helper.h> 9 #include <net/netfilter/nf_conntrack_acct.h> 10 #include <uapi/linux/tc_act/tc_pedit.h> 11 #include <net/tc_act/tc_ct.h> 12 #include <net/flow_offload.h> 13 #include <net/netfilter/nf_flow_table.h> 14 #include <linux/workqueue.h> 15 #include <linux/refcount.h> 16 #include <linux/xarray.h> 17 18 #include "lib/fs_chains.h" 19 #include "en/tc_ct.h" 20 #include "en/mod_hdr.h" 21 #include "en/mapping.h" 22 #include "en.h" 23 #include "en_tc.h" 24 #include "en_rep.h" 25 26 #define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen * 8) 27 #define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0) 28 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1) 29 #define MLX5_CT_STATE_TRK_BIT BIT(2) 30 #define MLX5_CT_STATE_NAT_BIT BIT(3) 31 #define MLX5_CT_STATE_REPLY_BIT BIT(4) 32 33 #define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8) 34 #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0) 35 #define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX 36 37 #define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen * 8) 38 #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0) 39 40 #define ct_dbg(fmt, args...)\ 41 netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args) 42 43 struct mlx5_tc_ct_priv { 44 struct mlx5_core_dev *dev; 45 const struct net_device *netdev; 46 struct mod_hdr_tbl *mod_hdr_tbl; 47 struct idr fte_ids; 48 struct xarray tuple_ids; 49 struct rhashtable zone_ht; 50 struct rhashtable ct_tuples_ht; 51 struct rhashtable ct_tuples_nat_ht; 52 struct mlx5_flow_table *ct; 53 struct mlx5_flow_table *ct_nat; 54 struct mlx5_flow_table *post_ct; 55 struct mutex control_lock; /* guards parallel adds/dels */ 56 struct mapping_ctx *zone_mapping; 57 struct mapping_ctx *labels_mapping; 58 enum mlx5_flow_namespace_type ns_type; 59 struct mlx5_fs_chains *chains; 60 spinlock_t ht_lock; /* protects ft entries */ 61 }; 62 63 struct mlx5_ct_flow { 64 struct mlx5_flow_attr *pre_ct_attr; 65 struct mlx5_flow_attr *post_ct_attr; 66 struct mlx5_flow_handle *pre_ct_rule; 67 struct mlx5_flow_handle *post_ct_rule; 68 struct mlx5_ct_ft *ft; 69 u32 fte_id; 70 u32 chain_mapping; 71 }; 72 73 struct mlx5_ct_zone_rule { 74 struct mlx5_flow_handle *rule; 75 struct mlx5e_mod_hdr_handle *mh; 76 struct mlx5_flow_attr *attr; 77 bool nat; 78 }; 79 80 struct mlx5_tc_ct_pre { 81 struct mlx5_flow_table *ft; 82 struct mlx5_flow_group *flow_grp; 83 struct mlx5_flow_group *miss_grp; 84 struct mlx5_flow_handle *flow_rule; 85 struct mlx5_flow_handle *miss_rule; 86 struct mlx5_modify_hdr *modify_hdr; 87 }; 88 89 struct mlx5_ct_ft { 90 struct rhash_head node; 91 u16 zone; 92 u32 zone_restore_id; 93 refcount_t refcount; 94 struct nf_flowtable *nf_ft; 95 struct mlx5_tc_ct_priv *ct_priv; 96 struct rhashtable ct_entries_ht; 97 struct mlx5_tc_ct_pre pre_ct; 98 struct mlx5_tc_ct_pre pre_ct_nat; 99 }; 100 101 struct mlx5_ct_tuple { 102 u16 addr_type; 103 __be16 n_proto; 104 u8 ip_proto; 105 struct { 106 union { 107 __be32 src_v4; 108 struct in6_addr src_v6; 109 }; 110 union { 111 __be32 dst_v4; 112 struct in6_addr dst_v6; 113 }; 114 } ip; 115 struct { 116 __be16 src; 117 __be16 dst; 118 } port; 119 120 u16 zone; 121 }; 122 123 struct mlx5_ct_counter { 124 struct mlx5_fc *counter; 125 refcount_t refcount; 126 bool is_shared; 127 }; 128 129 enum { 130 MLX5_CT_ENTRY_FLAG_VALID, 131 }; 132 133 struct mlx5_ct_entry { 134 struct rhash_head node; 135 struct rhash_head tuple_node; 136 struct rhash_head tuple_nat_node; 137 struct mlx5_ct_counter *counter; 138 unsigned long cookie; 139 unsigned long restore_cookie; 140 struct mlx5_ct_tuple tuple; 141 struct mlx5_ct_tuple tuple_nat; 142 struct mlx5_ct_zone_rule zone_rules[2]; 143 144 struct mlx5_tc_ct_priv *ct_priv; 145 struct work_struct work; 146 147 refcount_t refcnt; 148 unsigned long flags; 149 }; 150 151 static const struct rhashtable_params cts_ht_params = { 152 .head_offset = offsetof(struct mlx5_ct_entry, node), 153 .key_offset = offsetof(struct mlx5_ct_entry, cookie), 154 .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie), 155 .automatic_shrinking = true, 156 .min_size = 16 * 1024, 157 }; 158 159 static const struct rhashtable_params zone_params = { 160 .head_offset = offsetof(struct mlx5_ct_ft, node), 161 .key_offset = offsetof(struct mlx5_ct_ft, zone), 162 .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone), 163 .automatic_shrinking = true, 164 }; 165 166 static const struct rhashtable_params tuples_ht_params = { 167 .head_offset = offsetof(struct mlx5_ct_entry, tuple_node), 168 .key_offset = offsetof(struct mlx5_ct_entry, tuple), 169 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple), 170 .automatic_shrinking = true, 171 .min_size = 16 * 1024, 172 }; 173 174 static const struct rhashtable_params tuples_nat_ht_params = { 175 .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node), 176 .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat), 177 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat), 178 .automatic_shrinking = true, 179 .min_size = 16 * 1024, 180 }; 181 182 static bool 183 mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry) 184 { 185 return !!(entry->tuple_nat_node.next); 186 } 187 188 static int 189 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule) 190 { 191 struct flow_match_control control; 192 struct flow_match_basic basic; 193 194 flow_rule_match_basic(rule, &basic); 195 flow_rule_match_control(rule, &control); 196 197 tuple->n_proto = basic.key->n_proto; 198 tuple->ip_proto = basic.key->ip_proto; 199 tuple->addr_type = control.key->addr_type; 200 201 if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 202 struct flow_match_ipv4_addrs match; 203 204 flow_rule_match_ipv4_addrs(rule, &match); 205 tuple->ip.src_v4 = match.key->src; 206 tuple->ip.dst_v4 = match.key->dst; 207 } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 208 struct flow_match_ipv6_addrs match; 209 210 flow_rule_match_ipv6_addrs(rule, &match); 211 tuple->ip.src_v6 = match.key->src; 212 tuple->ip.dst_v6 = match.key->dst; 213 } else { 214 return -EOPNOTSUPP; 215 } 216 217 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { 218 struct flow_match_ports match; 219 220 flow_rule_match_ports(rule, &match); 221 switch (tuple->ip_proto) { 222 case IPPROTO_TCP: 223 case IPPROTO_UDP: 224 tuple->port.src = match.key->src; 225 tuple->port.dst = match.key->dst; 226 break; 227 default: 228 return -EOPNOTSUPP; 229 } 230 } else { 231 return -EOPNOTSUPP; 232 } 233 234 return 0; 235 } 236 237 static int 238 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple, 239 struct flow_rule *rule) 240 { 241 struct flow_action *flow_action = &rule->action; 242 struct flow_action_entry *act; 243 u32 offset, val, ip6_offset; 244 int i; 245 246 flow_action_for_each(i, act, flow_action) { 247 if (act->id != FLOW_ACTION_MANGLE) 248 continue; 249 250 offset = act->mangle.offset; 251 val = act->mangle.val; 252 switch (act->mangle.htype) { 253 case FLOW_ACT_MANGLE_HDR_TYPE_IP4: 254 if (offset == offsetof(struct iphdr, saddr)) 255 tuple->ip.src_v4 = cpu_to_be32(val); 256 else if (offset == offsetof(struct iphdr, daddr)) 257 tuple->ip.dst_v4 = cpu_to_be32(val); 258 else 259 return -EOPNOTSUPP; 260 break; 261 262 case FLOW_ACT_MANGLE_HDR_TYPE_IP6: 263 ip6_offset = (offset - offsetof(struct ipv6hdr, saddr)); 264 ip6_offset /= 4; 265 if (ip6_offset < 4) 266 tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val); 267 else if (ip6_offset < 8) 268 tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val); 269 else 270 return -EOPNOTSUPP; 271 break; 272 273 case FLOW_ACT_MANGLE_HDR_TYPE_TCP: 274 if (offset == offsetof(struct tcphdr, source)) 275 tuple->port.src = cpu_to_be16(val); 276 else if (offset == offsetof(struct tcphdr, dest)) 277 tuple->port.dst = cpu_to_be16(val); 278 else 279 return -EOPNOTSUPP; 280 break; 281 282 case FLOW_ACT_MANGLE_HDR_TYPE_UDP: 283 if (offset == offsetof(struct udphdr, source)) 284 tuple->port.src = cpu_to_be16(val); 285 else if (offset == offsetof(struct udphdr, dest)) 286 tuple->port.dst = cpu_to_be16(val); 287 else 288 return -EOPNOTSUPP; 289 break; 290 291 default: 292 return -EOPNOTSUPP; 293 } 294 } 295 296 return 0; 297 } 298 299 static int 300 mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, 301 struct flow_rule *rule) 302 { 303 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, 304 outer_headers); 305 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, 306 outer_headers); 307 u16 addr_type = 0; 308 u8 ip_proto = 0; 309 310 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { 311 struct flow_match_basic match; 312 313 flow_rule_match_basic(rule, &match); 314 315 mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c, 316 headers_v); 317 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 318 match.mask->ip_proto); 319 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, 320 match.key->ip_proto); 321 322 ip_proto = match.key->ip_proto; 323 } 324 325 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { 326 struct flow_match_control match; 327 328 flow_rule_match_control(rule, &match); 329 addr_type = match.key->addr_type; 330 } 331 332 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 333 struct flow_match_ipv4_addrs match; 334 335 flow_rule_match_ipv4_addrs(rule, &match); 336 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 337 src_ipv4_src_ipv6.ipv4_layout.ipv4), 338 &match.mask->src, sizeof(match.mask->src)); 339 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 340 src_ipv4_src_ipv6.ipv4_layout.ipv4), 341 &match.key->src, sizeof(match.key->src)); 342 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 343 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 344 &match.mask->dst, sizeof(match.mask->dst)); 345 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 346 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 347 &match.key->dst, sizeof(match.key->dst)); 348 } 349 350 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 351 struct flow_match_ipv6_addrs match; 352 353 flow_rule_match_ipv6_addrs(rule, &match); 354 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 355 src_ipv4_src_ipv6.ipv6_layout.ipv6), 356 &match.mask->src, sizeof(match.mask->src)); 357 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 358 src_ipv4_src_ipv6.ipv6_layout.ipv6), 359 &match.key->src, sizeof(match.key->src)); 360 361 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 362 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 363 &match.mask->dst, sizeof(match.mask->dst)); 364 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 365 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 366 &match.key->dst, sizeof(match.key->dst)); 367 } 368 369 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { 370 struct flow_match_ports match; 371 372 flow_rule_match_ports(rule, &match); 373 switch (ip_proto) { 374 case IPPROTO_TCP: 375 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 376 tcp_sport, ntohs(match.mask->src)); 377 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 378 tcp_sport, ntohs(match.key->src)); 379 380 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 381 tcp_dport, ntohs(match.mask->dst)); 382 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 383 tcp_dport, ntohs(match.key->dst)); 384 break; 385 386 case IPPROTO_UDP: 387 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 388 udp_sport, ntohs(match.mask->src)); 389 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 390 udp_sport, ntohs(match.key->src)); 391 392 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 393 udp_dport, ntohs(match.mask->dst)); 394 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 395 udp_dport, ntohs(match.key->dst)); 396 break; 397 default: 398 break; 399 } 400 } 401 402 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { 403 struct flow_match_tcp match; 404 405 flow_rule_match_tcp(rule, &match); 406 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, 407 ntohs(match.mask->flags)); 408 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, 409 ntohs(match.key->flags)); 410 } 411 412 return 0; 413 } 414 415 static void 416 mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry) 417 { 418 if (entry->counter->is_shared && 419 !refcount_dec_and_test(&entry->counter->refcount)) 420 return; 421 422 mlx5_fc_destroy(ct_priv->dev, entry->counter->counter); 423 kfree(entry->counter); 424 } 425 426 static void 427 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv, 428 struct mlx5_ct_entry *entry, 429 bool nat) 430 { 431 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; 432 struct mlx5_flow_attr *attr = zone_rule->attr; 433 434 ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone); 435 436 mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr); 437 mlx5e_mod_hdr_detach(ct_priv->dev, 438 ct_priv->mod_hdr_tbl, zone_rule->mh); 439 mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); 440 kfree(attr); 441 } 442 443 static void 444 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv, 445 struct mlx5_ct_entry *entry) 446 { 447 mlx5_tc_ct_entry_del_rule(ct_priv, entry, true); 448 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); 449 } 450 451 static struct flow_action_entry * 452 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule) 453 { 454 struct flow_action *flow_action = &flow_rule->action; 455 struct flow_action_entry *act; 456 int i; 457 458 flow_action_for_each(i, act, flow_action) { 459 if (act->id == FLOW_ACTION_CT_METADATA) 460 return act; 461 } 462 463 return NULL; 464 } 465 466 static int 467 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv, 468 struct mlx5e_tc_mod_hdr_acts *mod_acts, 469 u8 ct_state, 470 u32 mark, 471 u32 labels_id, 472 u8 zone_restore_id) 473 { 474 enum mlx5_flow_namespace_type ns = ct_priv->ns_type; 475 struct mlx5_core_dev *dev = ct_priv->dev; 476 int err; 477 478 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 479 CTSTATE_TO_REG, ct_state); 480 if (err) 481 return err; 482 483 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 484 MARK_TO_REG, mark); 485 if (err) 486 return err; 487 488 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 489 LABELS_TO_REG, labels_id); 490 if (err) 491 return err; 492 493 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 494 ZONE_RESTORE_TO_REG, zone_restore_id); 495 if (err) 496 return err; 497 498 /* Make another copy of zone id in reg_b for 499 * NIC rx flows since we don't copy reg_c1 to 500 * reg_b upon miss. 501 */ 502 if (ns != MLX5_FLOW_NAMESPACE_FDB) { 503 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, 504 NIC_ZONE_RESTORE_TO_REG, zone_restore_id); 505 if (err) 506 return err; 507 } 508 return 0; 509 } 510 511 static int 512 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act, 513 char *modact) 514 { 515 u32 offset = act->mangle.offset, field; 516 517 switch (act->mangle.htype) { 518 case FLOW_ACT_MANGLE_HDR_TYPE_IP4: 519 MLX5_SET(set_action_in, modact, length, 0); 520 if (offset == offsetof(struct iphdr, saddr)) 521 field = MLX5_ACTION_IN_FIELD_OUT_SIPV4; 522 else if (offset == offsetof(struct iphdr, daddr)) 523 field = MLX5_ACTION_IN_FIELD_OUT_DIPV4; 524 else 525 return -EOPNOTSUPP; 526 break; 527 528 case FLOW_ACT_MANGLE_HDR_TYPE_IP6: 529 MLX5_SET(set_action_in, modact, length, 0); 530 if (offset == offsetof(struct ipv6hdr, saddr) + 12) 531 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0; 532 else if (offset == offsetof(struct ipv6hdr, saddr) + 8) 533 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32; 534 else if (offset == offsetof(struct ipv6hdr, saddr) + 4) 535 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64; 536 else if (offset == offsetof(struct ipv6hdr, saddr)) 537 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96; 538 else if (offset == offsetof(struct ipv6hdr, daddr) + 12) 539 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0; 540 else if (offset == offsetof(struct ipv6hdr, daddr) + 8) 541 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32; 542 else if (offset == offsetof(struct ipv6hdr, daddr) + 4) 543 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64; 544 else if (offset == offsetof(struct ipv6hdr, daddr)) 545 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96; 546 else 547 return -EOPNOTSUPP; 548 break; 549 550 case FLOW_ACT_MANGLE_HDR_TYPE_TCP: 551 MLX5_SET(set_action_in, modact, length, 16); 552 if (offset == offsetof(struct tcphdr, source)) 553 field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT; 554 else if (offset == offsetof(struct tcphdr, dest)) 555 field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT; 556 else 557 return -EOPNOTSUPP; 558 break; 559 560 case FLOW_ACT_MANGLE_HDR_TYPE_UDP: 561 MLX5_SET(set_action_in, modact, length, 16); 562 if (offset == offsetof(struct udphdr, source)) 563 field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT; 564 else if (offset == offsetof(struct udphdr, dest)) 565 field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT; 566 else 567 return -EOPNOTSUPP; 568 break; 569 570 default: 571 return -EOPNOTSUPP; 572 } 573 574 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); 575 MLX5_SET(set_action_in, modact, offset, 0); 576 MLX5_SET(set_action_in, modact, field, field); 577 MLX5_SET(set_action_in, modact, data, act->mangle.val); 578 579 return 0; 580 } 581 582 static int 583 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv, 584 struct flow_rule *flow_rule, 585 struct mlx5e_tc_mod_hdr_acts *mod_acts) 586 { 587 struct flow_action *flow_action = &flow_rule->action; 588 struct mlx5_core_dev *mdev = ct_priv->dev; 589 struct flow_action_entry *act; 590 size_t action_size; 591 char *modact; 592 int err, i; 593 594 action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto); 595 596 flow_action_for_each(i, act, flow_action) { 597 switch (act->id) { 598 case FLOW_ACTION_MANGLE: { 599 err = alloc_mod_hdr_actions(mdev, ct_priv->ns_type, 600 mod_acts); 601 if (err) 602 return err; 603 604 modact = mod_acts->actions + 605 mod_acts->num_actions * action_size; 606 607 err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact); 608 if (err) 609 return err; 610 611 mod_acts->num_actions++; 612 } 613 break; 614 615 case FLOW_ACTION_CT_METADATA: 616 /* Handled earlier */ 617 continue; 618 default: 619 return -EOPNOTSUPP; 620 } 621 } 622 623 return 0; 624 } 625 626 static int 627 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, 628 struct mlx5_flow_attr *attr, 629 struct flow_rule *flow_rule, 630 struct mlx5e_mod_hdr_handle **mh, 631 u8 zone_restore_id, bool nat) 632 { 633 struct mlx5e_tc_mod_hdr_acts mod_acts = {}; 634 struct flow_action_entry *meta; 635 u16 ct_state = 0; 636 int err; 637 638 meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule); 639 if (!meta) 640 return -EOPNOTSUPP; 641 642 err = mapping_add(ct_priv->labels_mapping, meta->ct_metadata.labels, 643 &attr->ct_attr.ct_labels_id); 644 if (err) 645 return -EOPNOTSUPP; 646 if (nat) { 647 err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, 648 &mod_acts); 649 if (err) 650 goto err_mapping; 651 652 ct_state |= MLX5_CT_STATE_NAT_BIT; 653 } 654 655 ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT; 656 ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT; 657 err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts, 658 ct_state, 659 meta->ct_metadata.mark, 660 attr->ct_attr.ct_labels_id, 661 zone_restore_id); 662 if (err) 663 goto err_mapping; 664 665 *mh = mlx5e_mod_hdr_attach(ct_priv->dev, 666 ct_priv->mod_hdr_tbl, 667 ct_priv->ns_type, 668 &mod_acts); 669 if (IS_ERR(*mh)) { 670 err = PTR_ERR(*mh); 671 goto err_mapping; 672 } 673 attr->modify_hdr = mlx5e_mod_hdr_get(*mh); 674 675 dealloc_mod_hdr_actions(&mod_acts); 676 return 0; 677 678 err_mapping: 679 dealloc_mod_hdr_actions(&mod_acts); 680 mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); 681 return err; 682 } 683 684 static int 685 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, 686 struct flow_rule *flow_rule, 687 struct mlx5_ct_entry *entry, 688 bool nat, u8 zone_restore_id) 689 { 690 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; 691 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); 692 struct mlx5_flow_spec *spec = NULL; 693 struct mlx5_flow_attr *attr; 694 int err; 695 696 zone_rule->nat = nat; 697 698 spec = kzalloc(sizeof(*spec), GFP_KERNEL); 699 if (!spec) 700 return -ENOMEM; 701 702 attr = mlx5_alloc_flow_attr(ct_priv->ns_type); 703 if (!attr) { 704 err = -ENOMEM; 705 goto err_attr; 706 } 707 708 err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, 709 &zone_rule->mh, 710 zone_restore_id, nat); 711 if (err) { 712 ct_dbg("Failed to create ct entry mod hdr"); 713 goto err_mod_hdr; 714 } 715 716 attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | 717 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 718 MLX5_FLOW_CONTEXT_ACTION_COUNT; 719 attr->dest_chain = 0; 720 attr->dest_ft = ct_priv->post_ct; 721 attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct; 722 attr->outer_match_level = MLX5_MATCH_L4; 723 attr->counter = entry->counter->counter; 724 attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT; 725 if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) 726 attr->esw_attr->in_mdev = priv->mdev; 727 728 mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule); 729 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK); 730 731 zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr); 732 if (IS_ERR(zone_rule->rule)) { 733 err = PTR_ERR(zone_rule->rule); 734 ct_dbg("Failed to add ct entry rule, nat: %d", nat); 735 goto err_rule; 736 } 737 738 zone_rule->attr = attr; 739 740 kfree(spec); 741 ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone); 742 743 return 0; 744 745 err_rule: 746 mlx5e_mod_hdr_detach(ct_priv->dev, 747 ct_priv->mod_hdr_tbl, zone_rule->mh); 748 mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); 749 err_mod_hdr: 750 kfree(attr); 751 err_attr: 752 kfree(spec); 753 return err; 754 } 755 756 static bool 757 mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry) 758 { 759 return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags); 760 } 761 762 static struct mlx5_ct_entry * 763 mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple) 764 { 765 struct mlx5_ct_entry *entry; 766 767 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple, 768 tuples_ht_params); 769 if (entry && mlx5_tc_ct_entry_valid(entry) && 770 refcount_inc_not_zero(&entry->refcnt)) { 771 return entry; 772 } else if (!entry) { 773 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht, 774 tuple, tuples_nat_ht_params); 775 if (entry && mlx5_tc_ct_entry_valid(entry) && 776 refcount_inc_not_zero(&entry->refcnt)) 777 return entry; 778 } 779 780 return entry ? ERR_PTR(-EINVAL) : NULL; 781 } 782 783 static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry) 784 { 785 struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv; 786 787 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, 788 &entry->tuple_nat_node, 789 tuples_nat_ht_params); 790 rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node, 791 tuples_ht_params); 792 } 793 794 static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry) 795 { 796 struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv; 797 798 mlx5_tc_ct_entry_del_rules(ct_priv, entry); 799 800 spin_lock_bh(&ct_priv->ht_lock); 801 mlx5_tc_ct_entry_remove_from_tuples(entry); 802 spin_unlock_bh(&ct_priv->ht_lock); 803 804 mlx5_tc_ct_counter_put(ct_priv, entry); 805 kfree(entry); 806 } 807 808 static void 809 mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) 810 { 811 if (!refcount_dec_and_test(&entry->refcnt)) 812 return; 813 814 mlx5_tc_ct_entry_del(entry); 815 } 816 817 static void mlx5_tc_ct_entry_del_work(struct work_struct *work) 818 { 819 struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work); 820 821 mlx5_tc_ct_entry_del(entry); 822 } 823 824 static void 825 __mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) 826 { 827 struct mlx5e_priv *priv; 828 829 if (!refcount_dec_and_test(&entry->refcnt)) 830 return; 831 832 priv = netdev_priv(entry->ct_priv->netdev); 833 INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work); 834 queue_work(priv->wq, &entry->work); 835 } 836 837 static struct mlx5_ct_counter * 838 mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv) 839 { 840 struct mlx5_ct_counter *counter; 841 int ret; 842 843 counter = kzalloc(sizeof(*counter), GFP_KERNEL); 844 if (!counter) 845 return ERR_PTR(-ENOMEM); 846 847 counter->is_shared = false; 848 counter->counter = mlx5_fc_create(ct_priv->dev, true); 849 if (IS_ERR(counter->counter)) { 850 ct_dbg("Failed to create counter for ct entry"); 851 ret = PTR_ERR(counter->counter); 852 kfree(counter); 853 return ERR_PTR(ret); 854 } 855 856 return counter; 857 } 858 859 static struct mlx5_ct_counter * 860 mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv, 861 struct mlx5_ct_entry *entry) 862 { 863 struct mlx5_ct_tuple rev_tuple = entry->tuple; 864 struct mlx5_ct_counter *shared_counter; 865 struct mlx5_ct_entry *rev_entry; 866 __be16 tmp_port; 867 868 /* get the reversed tuple */ 869 tmp_port = rev_tuple.port.src; 870 rev_tuple.port.src = rev_tuple.port.dst; 871 rev_tuple.port.dst = tmp_port; 872 873 if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { 874 __be32 tmp_addr = rev_tuple.ip.src_v4; 875 876 rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4; 877 rev_tuple.ip.dst_v4 = tmp_addr; 878 } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { 879 struct in6_addr tmp_addr = rev_tuple.ip.src_v6; 880 881 rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6; 882 rev_tuple.ip.dst_v6 = tmp_addr; 883 } else { 884 return ERR_PTR(-EOPNOTSUPP); 885 } 886 887 /* Use the same counter as the reverse direction */ 888 spin_lock_bh(&ct_priv->ht_lock); 889 rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple); 890 891 if (IS_ERR(rev_entry)) { 892 spin_unlock_bh(&ct_priv->ht_lock); 893 goto create_counter; 894 } 895 896 if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) { 897 ct_dbg("Using shared counter entry=0x%p rev=0x%p\n", entry, rev_entry); 898 shared_counter = rev_entry->counter; 899 spin_unlock_bh(&ct_priv->ht_lock); 900 901 mlx5_tc_ct_entry_put(rev_entry); 902 return shared_counter; 903 } 904 905 spin_unlock_bh(&ct_priv->ht_lock); 906 907 create_counter: 908 909 shared_counter = mlx5_tc_ct_counter_create(ct_priv); 910 if (IS_ERR(shared_counter)) 911 return shared_counter; 912 913 shared_counter->is_shared = true; 914 refcount_set(&shared_counter->refcount, 1); 915 return shared_counter; 916 } 917 918 static int 919 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv, 920 struct flow_rule *flow_rule, 921 struct mlx5_ct_entry *entry, 922 u8 zone_restore_id) 923 { 924 int err; 925 926 if (nf_ct_acct_enabled(dev_net(ct_priv->netdev))) 927 entry->counter = mlx5_tc_ct_counter_create(ct_priv); 928 else 929 entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry); 930 931 if (IS_ERR(entry->counter)) { 932 err = PTR_ERR(entry->counter); 933 return err; 934 } 935 936 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false, 937 zone_restore_id); 938 if (err) 939 goto err_orig; 940 941 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true, 942 zone_restore_id); 943 if (err) 944 goto err_nat; 945 946 return 0; 947 948 err_nat: 949 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); 950 err_orig: 951 mlx5_tc_ct_counter_put(ct_priv, entry); 952 return err; 953 } 954 955 static int 956 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, 957 struct flow_cls_offload *flow) 958 { 959 struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow); 960 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; 961 struct flow_action_entry *meta_action; 962 unsigned long cookie = flow->cookie; 963 struct mlx5_ct_entry *entry; 964 int err; 965 966 meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule); 967 if (!meta_action) 968 return -EOPNOTSUPP; 969 970 spin_lock_bh(&ct_priv->ht_lock); 971 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); 972 if (entry && refcount_inc_not_zero(&entry->refcnt)) { 973 spin_unlock_bh(&ct_priv->ht_lock); 974 mlx5_tc_ct_entry_put(entry); 975 return -EEXIST; 976 } 977 spin_unlock_bh(&ct_priv->ht_lock); 978 979 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 980 if (!entry) 981 return -ENOMEM; 982 983 entry->tuple.zone = ft->zone; 984 entry->cookie = flow->cookie; 985 entry->restore_cookie = meta_action->ct_metadata.cookie; 986 refcount_set(&entry->refcnt, 2); 987 entry->ct_priv = ct_priv; 988 989 err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule); 990 if (err) 991 goto err_set; 992 993 memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple)); 994 err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule); 995 if (err) 996 goto err_set; 997 998 spin_lock_bh(&ct_priv->ht_lock); 999 1000 err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node, 1001 cts_ht_params); 1002 if (err) 1003 goto err_entries; 1004 1005 err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht, 1006 &entry->tuple_node, 1007 tuples_ht_params); 1008 if (err) 1009 goto err_tuple; 1010 1011 if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) { 1012 err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht, 1013 &entry->tuple_nat_node, 1014 tuples_nat_ht_params); 1015 if (err) 1016 goto err_tuple_nat; 1017 } 1018 spin_unlock_bh(&ct_priv->ht_lock); 1019 1020 err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry, 1021 ft->zone_restore_id); 1022 if (err) 1023 goto err_rules; 1024 1025 set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags); 1026 mlx5_tc_ct_entry_put(entry); /* this function reference */ 1027 1028 return 0; 1029 1030 err_rules: 1031 spin_lock_bh(&ct_priv->ht_lock); 1032 if (mlx5_tc_ct_entry_has_nat(entry)) 1033 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, 1034 &entry->tuple_nat_node, tuples_nat_ht_params); 1035 err_tuple_nat: 1036 rhashtable_remove_fast(&ct_priv->ct_tuples_ht, 1037 &entry->tuple_node, 1038 tuples_ht_params); 1039 err_tuple: 1040 rhashtable_remove_fast(&ft->ct_entries_ht, 1041 &entry->node, 1042 cts_ht_params); 1043 err_entries: 1044 spin_unlock_bh(&ct_priv->ht_lock); 1045 err_set: 1046 kfree(entry); 1047 if (err != -EEXIST) 1048 netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err); 1049 return err; 1050 } 1051 1052 static int 1053 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft, 1054 struct flow_cls_offload *flow) 1055 { 1056 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; 1057 unsigned long cookie = flow->cookie; 1058 struct mlx5_ct_entry *entry; 1059 1060 spin_lock_bh(&ct_priv->ht_lock); 1061 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); 1062 if (!entry) { 1063 spin_unlock_bh(&ct_priv->ht_lock); 1064 return -ENOENT; 1065 } 1066 1067 if (!mlx5_tc_ct_entry_valid(entry)) { 1068 spin_unlock_bh(&ct_priv->ht_lock); 1069 return -EINVAL; 1070 } 1071 1072 rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params); 1073 mlx5_tc_ct_entry_remove_from_tuples(entry); 1074 spin_unlock_bh(&ct_priv->ht_lock); 1075 1076 mlx5_tc_ct_entry_put(entry); 1077 1078 return 0; 1079 } 1080 1081 static int 1082 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft, 1083 struct flow_cls_offload *f) 1084 { 1085 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; 1086 unsigned long cookie = f->cookie; 1087 struct mlx5_ct_entry *entry; 1088 u64 lastuse, packets, bytes; 1089 1090 spin_lock_bh(&ct_priv->ht_lock); 1091 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); 1092 if (!entry) { 1093 spin_unlock_bh(&ct_priv->ht_lock); 1094 return -ENOENT; 1095 } 1096 1097 if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) { 1098 spin_unlock_bh(&ct_priv->ht_lock); 1099 return -EINVAL; 1100 } 1101 1102 spin_unlock_bh(&ct_priv->ht_lock); 1103 1104 mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse); 1105 flow_stats_update(&f->stats, bytes, packets, 0, lastuse, 1106 FLOW_ACTION_HW_STATS_DELAYED); 1107 1108 mlx5_tc_ct_entry_put(entry); 1109 return 0; 1110 } 1111 1112 static int 1113 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data, 1114 void *cb_priv) 1115 { 1116 struct flow_cls_offload *f = type_data; 1117 struct mlx5_ct_ft *ft = cb_priv; 1118 1119 if (type != TC_SETUP_CLSFLOWER) 1120 return -EOPNOTSUPP; 1121 1122 switch (f->command) { 1123 case FLOW_CLS_REPLACE: 1124 return mlx5_tc_ct_block_flow_offload_add(ft, f); 1125 case FLOW_CLS_DESTROY: 1126 return mlx5_tc_ct_block_flow_offload_del(ft, f); 1127 case FLOW_CLS_STATS: 1128 return mlx5_tc_ct_block_flow_offload_stats(ft, f); 1129 default: 1130 break; 1131 } 1132 1133 return -EOPNOTSUPP; 1134 } 1135 1136 static bool 1137 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple, 1138 u16 zone) 1139 { 1140 struct flow_keys flow_keys; 1141 1142 skb_reset_network_header(skb); 1143 skb_flow_dissect_flow_keys(skb, &flow_keys, 0); 1144 1145 tuple->zone = zone; 1146 1147 if (flow_keys.basic.ip_proto != IPPROTO_TCP && 1148 flow_keys.basic.ip_proto != IPPROTO_UDP) 1149 return false; 1150 1151 tuple->port.src = flow_keys.ports.src; 1152 tuple->port.dst = flow_keys.ports.dst; 1153 tuple->n_proto = flow_keys.basic.n_proto; 1154 tuple->ip_proto = flow_keys.basic.ip_proto; 1155 1156 switch (flow_keys.basic.n_proto) { 1157 case htons(ETH_P_IP): 1158 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 1159 tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src; 1160 tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst; 1161 break; 1162 1163 case htons(ETH_P_IPV6): 1164 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 1165 tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src; 1166 tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst; 1167 break; 1168 default: 1169 goto out; 1170 } 1171 1172 return true; 1173 1174 out: 1175 return false; 1176 } 1177 1178 int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec) 1179 { 1180 u32 ctstate = 0, ctstate_mask = 0; 1181 1182 mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG, 1183 &ctstate, &ctstate_mask); 1184 if (ctstate_mask) 1185 return -EOPNOTSUPP; 1186 1187 ctstate_mask |= MLX5_CT_STATE_TRK_BIT; 1188 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, 1189 ctstate, ctstate_mask); 1190 1191 return 0; 1192 } 1193 1194 void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr) 1195 { 1196 if (!priv || !ct_attr->ct_labels_id) 1197 return; 1198 1199 mapping_remove(priv->labels_mapping, ct_attr->ct_labels_id); 1200 } 1201 1202 int 1203 mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, 1204 struct mlx5_flow_spec *spec, 1205 struct flow_cls_offload *f, 1206 struct mlx5_ct_attr *ct_attr, 1207 struct netlink_ext_ack *extack) 1208 { 1209 struct flow_rule *rule = flow_cls_offload_flow_rule(f); 1210 bool trk, est, untrk, unest, new, rpl, unrpl; 1211 struct flow_dissector_key_ct *mask, *key; 1212 u32 ctstate = 0, ctstate_mask = 0; 1213 u16 ct_state_on, ct_state_off; 1214 u16 ct_state, ct_state_mask; 1215 struct flow_match_ct match; 1216 u32 ct_labels[4]; 1217 1218 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) 1219 return 0; 1220 1221 if (!priv) { 1222 NL_SET_ERR_MSG_MOD(extack, 1223 "offload of ct matching isn't available"); 1224 return -EOPNOTSUPP; 1225 } 1226 1227 flow_rule_match_ct(rule, &match); 1228 1229 key = match.key; 1230 mask = match.mask; 1231 1232 ct_state = key->ct_state; 1233 ct_state_mask = mask->ct_state; 1234 1235 if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED | 1236 TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED | 1237 TCA_FLOWER_KEY_CT_FLAGS_NEW | 1238 TCA_FLOWER_KEY_CT_FLAGS_REPLY)) { 1239 NL_SET_ERR_MSG_MOD(extack, 1240 "only ct_state trk, est, new and rpl are supported for offload"); 1241 return -EOPNOTSUPP; 1242 } 1243 1244 ct_state_on = ct_state & ct_state_mask; 1245 ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask; 1246 trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; 1247 new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW; 1248 est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; 1249 rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY; 1250 untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; 1251 unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; 1252 unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY; 1253 1254 ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0; 1255 ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; 1256 ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0; 1257 ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0; 1258 ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; 1259 ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0; 1260 1261 if (new) { 1262 NL_SET_ERR_MSG_MOD(extack, 1263 "matching on ct_state +new isn't supported"); 1264 return -EOPNOTSUPP; 1265 } 1266 1267 if (mask->ct_zone) 1268 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 1269 key->ct_zone, MLX5_CT_ZONE_MASK); 1270 if (ctstate_mask) 1271 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, 1272 ctstate, ctstate_mask); 1273 if (mask->ct_mark) 1274 mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG, 1275 key->ct_mark, mask->ct_mark); 1276 if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] || 1277 mask->ct_labels[3]) { 1278 ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0]; 1279 ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1]; 1280 ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2]; 1281 ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3]; 1282 if (mapping_add(priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id)) 1283 return -EOPNOTSUPP; 1284 mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id, 1285 MLX5_CT_LABELS_MASK); 1286 } 1287 1288 return 0; 1289 } 1290 1291 int 1292 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, 1293 struct mlx5_flow_attr *attr, 1294 const struct flow_action_entry *act, 1295 struct netlink_ext_ack *extack) 1296 { 1297 if (!priv) { 1298 NL_SET_ERR_MSG_MOD(extack, 1299 "offload of ct action isn't available"); 1300 return -EOPNOTSUPP; 1301 } 1302 1303 attr->ct_attr.zone = act->ct.zone; 1304 attr->ct_attr.ct_action = act->ct.action; 1305 attr->ct_attr.nf_ft = act->ct.flow_table; 1306 1307 return 0; 1308 } 1309 1310 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft, 1311 struct mlx5_tc_ct_pre *pre_ct, 1312 bool nat) 1313 { 1314 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; 1315 struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; 1316 struct mlx5_core_dev *dev = ct_priv->dev; 1317 struct mlx5_flow_table *ft = pre_ct->ft; 1318 struct mlx5_flow_destination dest = {}; 1319 struct mlx5_flow_act flow_act = {}; 1320 struct mlx5_modify_hdr *mod_hdr; 1321 struct mlx5_flow_handle *rule; 1322 struct mlx5_flow_spec *spec; 1323 u32 ctstate; 1324 u16 zone; 1325 int err; 1326 1327 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 1328 if (!spec) 1329 return -ENOMEM; 1330 1331 zone = ct_ft->zone & MLX5_CT_ZONE_MASK; 1332 err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type, 1333 ZONE_TO_REG, zone); 1334 if (err) { 1335 ct_dbg("Failed to set zone register mapping"); 1336 goto err_mapping; 1337 } 1338 1339 mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type, 1340 pre_mod_acts.num_actions, 1341 pre_mod_acts.actions); 1342 1343 if (IS_ERR(mod_hdr)) { 1344 err = PTR_ERR(mod_hdr); 1345 ct_dbg("Failed to create pre ct mod hdr"); 1346 goto err_mapping; 1347 } 1348 pre_ct->modify_hdr = mod_hdr; 1349 1350 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 1351 MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1352 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; 1353 flow_act.modify_hdr = mod_hdr; 1354 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 1355 1356 /* add flow rule */ 1357 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 1358 zone, MLX5_CT_ZONE_MASK); 1359 ctstate = MLX5_CT_STATE_TRK_BIT; 1360 if (nat) 1361 ctstate |= MLX5_CT_STATE_NAT_BIT; 1362 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate); 1363 1364 dest.ft = ct_priv->post_ct; 1365 rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); 1366 if (IS_ERR(rule)) { 1367 err = PTR_ERR(rule); 1368 ct_dbg("Failed to add pre ct flow rule zone %d", zone); 1369 goto err_flow_rule; 1370 } 1371 pre_ct->flow_rule = rule; 1372 1373 /* add miss rule */ 1374 dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct; 1375 rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1); 1376 if (IS_ERR(rule)) { 1377 err = PTR_ERR(rule); 1378 ct_dbg("Failed to add pre ct miss rule zone %d", zone); 1379 goto err_miss_rule; 1380 } 1381 pre_ct->miss_rule = rule; 1382 1383 dealloc_mod_hdr_actions(&pre_mod_acts); 1384 kvfree(spec); 1385 return 0; 1386 1387 err_miss_rule: 1388 mlx5_del_flow_rules(pre_ct->flow_rule); 1389 err_flow_rule: 1390 mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); 1391 err_mapping: 1392 dealloc_mod_hdr_actions(&pre_mod_acts); 1393 kvfree(spec); 1394 return err; 1395 } 1396 1397 static void 1398 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft, 1399 struct mlx5_tc_ct_pre *pre_ct) 1400 { 1401 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; 1402 struct mlx5_core_dev *dev = ct_priv->dev; 1403 1404 mlx5_del_flow_rules(pre_ct->flow_rule); 1405 mlx5_del_flow_rules(pre_ct->miss_rule); 1406 mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); 1407 } 1408 1409 static int 1410 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft, 1411 struct mlx5_tc_ct_pre *pre_ct, 1412 bool nat) 1413 { 1414 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); 1415 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; 1416 struct mlx5_core_dev *dev = ct_priv->dev; 1417 struct mlx5_flow_table_attr ft_attr = {}; 1418 struct mlx5_flow_namespace *ns; 1419 struct mlx5_flow_table *ft; 1420 struct mlx5_flow_group *g; 1421 u32 metadata_reg_c_2_mask; 1422 u32 *flow_group_in; 1423 void *misc; 1424 int err; 1425 1426 ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type); 1427 if (!ns) { 1428 err = -EOPNOTSUPP; 1429 ct_dbg("Failed to get flow namespace"); 1430 return err; 1431 } 1432 1433 flow_group_in = kvzalloc(inlen, GFP_KERNEL); 1434 if (!flow_group_in) 1435 return -ENOMEM; 1436 1437 ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; 1438 ft_attr.prio = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB ? 1439 FDB_TC_OFFLOAD : MLX5E_TC_PRIO; 1440 ft_attr.max_fte = 2; 1441 ft_attr.level = 1; 1442 ft = mlx5_create_flow_table(ns, &ft_attr); 1443 if (IS_ERR(ft)) { 1444 err = PTR_ERR(ft); 1445 ct_dbg("Failed to create pre ct table"); 1446 goto out_free; 1447 } 1448 pre_ct->ft = ft; 1449 1450 /* create flow group */ 1451 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); 1452 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); 1453 MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 1454 MLX5_MATCH_MISC_PARAMETERS_2); 1455 1456 misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, 1457 match_criteria.misc_parameters_2); 1458 1459 metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK; 1460 metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16); 1461 if (nat) 1462 metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16); 1463 1464 MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2, 1465 metadata_reg_c_2_mask); 1466 1467 g = mlx5_create_flow_group(ft, flow_group_in); 1468 if (IS_ERR(g)) { 1469 err = PTR_ERR(g); 1470 ct_dbg("Failed to create pre ct group"); 1471 goto err_flow_grp; 1472 } 1473 pre_ct->flow_grp = g; 1474 1475 /* create miss group */ 1476 memset(flow_group_in, 0, inlen); 1477 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); 1478 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); 1479 g = mlx5_create_flow_group(ft, flow_group_in); 1480 if (IS_ERR(g)) { 1481 err = PTR_ERR(g); 1482 ct_dbg("Failed to create pre ct miss group"); 1483 goto err_miss_grp; 1484 } 1485 pre_ct->miss_grp = g; 1486 1487 err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat); 1488 if (err) 1489 goto err_add_rules; 1490 1491 kvfree(flow_group_in); 1492 return 0; 1493 1494 err_add_rules: 1495 mlx5_destroy_flow_group(pre_ct->miss_grp); 1496 err_miss_grp: 1497 mlx5_destroy_flow_group(pre_ct->flow_grp); 1498 err_flow_grp: 1499 mlx5_destroy_flow_table(ft); 1500 out_free: 1501 kvfree(flow_group_in); 1502 return err; 1503 } 1504 1505 static void 1506 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft, 1507 struct mlx5_tc_ct_pre *pre_ct) 1508 { 1509 tc_ct_pre_ct_del_rules(ct_ft, pre_ct); 1510 mlx5_destroy_flow_group(pre_ct->miss_grp); 1511 mlx5_destroy_flow_group(pre_ct->flow_grp); 1512 mlx5_destroy_flow_table(pre_ct->ft); 1513 } 1514 1515 static int 1516 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft) 1517 { 1518 int err; 1519 1520 err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false); 1521 if (err) 1522 return err; 1523 1524 err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true); 1525 if (err) 1526 goto err_pre_ct_nat; 1527 1528 return 0; 1529 1530 err_pre_ct_nat: 1531 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); 1532 return err; 1533 } 1534 1535 static void 1536 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft) 1537 { 1538 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat); 1539 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); 1540 } 1541 1542 static struct mlx5_ct_ft * 1543 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, 1544 struct nf_flowtable *nf_ft) 1545 { 1546 struct mlx5_ct_ft *ft; 1547 int err; 1548 1549 ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params); 1550 if (ft) { 1551 refcount_inc(&ft->refcount); 1552 return ft; 1553 } 1554 1555 ft = kzalloc(sizeof(*ft), GFP_KERNEL); 1556 if (!ft) 1557 return ERR_PTR(-ENOMEM); 1558 1559 err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id); 1560 if (err) 1561 goto err_mapping; 1562 1563 ft->zone = zone; 1564 ft->nf_ft = nf_ft; 1565 ft->ct_priv = ct_priv; 1566 refcount_set(&ft->refcount, 1); 1567 1568 err = mlx5_tc_ct_alloc_pre_ct_tables(ft); 1569 if (err) 1570 goto err_alloc_pre_ct; 1571 1572 err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params); 1573 if (err) 1574 goto err_init; 1575 1576 err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node, 1577 zone_params); 1578 if (err) 1579 goto err_insert; 1580 1581 err = nf_flow_table_offload_add_cb(ft->nf_ft, 1582 mlx5_tc_ct_block_flow_offload, ft); 1583 if (err) 1584 goto err_add_cb; 1585 1586 return ft; 1587 1588 err_add_cb: 1589 rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); 1590 err_insert: 1591 rhashtable_destroy(&ft->ct_entries_ht); 1592 err_init: 1593 mlx5_tc_ct_free_pre_ct_tables(ft); 1594 err_alloc_pre_ct: 1595 mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); 1596 err_mapping: 1597 kfree(ft); 1598 return ERR_PTR(err); 1599 } 1600 1601 static void 1602 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg) 1603 { 1604 struct mlx5_ct_entry *entry = ptr; 1605 1606 mlx5_tc_ct_entry_put(entry); 1607 } 1608 1609 static void 1610 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) 1611 { 1612 if (!refcount_dec_and_test(&ft->refcount)) 1613 return; 1614 1615 nf_flow_table_offload_del_cb(ft->nf_ft, 1616 mlx5_tc_ct_block_flow_offload, ft); 1617 rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); 1618 rhashtable_free_and_destroy(&ft->ct_entries_ht, 1619 mlx5_tc_ct_flush_ft_entry, 1620 ct_priv); 1621 mlx5_tc_ct_free_pre_ct_tables(ft); 1622 mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); 1623 kfree(ft); 1624 } 1625 1626 /* We translate the tc filter with CT action to the following HW model: 1627 * 1628 * +---------------------+ 1629 * + ft prio (tc chain) + 1630 * + original match + 1631 * +---------------------+ 1632 * | set chain miss mapping 1633 * | set fte_id 1634 * | set tunnel_id 1635 * | do decap 1636 * v 1637 * +---------------------+ 1638 * + pre_ct/pre_ct_nat + if matches +---------------------+ 1639 * + zone+nat match +---------------->+ post_ct (see below) + 1640 * +---------------------+ set zone +---------------------+ 1641 * | set zone 1642 * v 1643 * +--------------------+ 1644 * + CT (nat or no nat) + 1645 * + tuple + zone match + 1646 * +--------------------+ 1647 * | set mark 1648 * | set labels_id 1649 * | set established 1650 * | set zone_restore 1651 * | do nat (if needed) 1652 * v 1653 * +--------------+ 1654 * + post_ct + original filter actions 1655 * + fte_id match +------------------------> 1656 * +--------------+ 1657 */ 1658 static struct mlx5_flow_handle * 1659 __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, 1660 struct mlx5e_tc_flow *flow, 1661 struct mlx5_flow_spec *orig_spec, 1662 struct mlx5_flow_attr *attr) 1663 { 1664 bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; 1665 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); 1666 struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; 1667 u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type); 1668 struct mlx5_flow_spec *post_ct_spec = NULL; 1669 struct mlx5_flow_attr *pre_ct_attr; 1670 struct mlx5_modify_hdr *mod_hdr; 1671 struct mlx5_flow_handle *rule; 1672 struct mlx5_ct_flow *ct_flow; 1673 int chain_mapping = 0, err; 1674 struct mlx5_ct_ft *ft; 1675 u32 fte_id = 1; 1676 1677 post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL); 1678 ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); 1679 if (!post_ct_spec || !ct_flow) { 1680 kfree(post_ct_spec); 1681 kfree(ct_flow); 1682 return ERR_PTR(-ENOMEM); 1683 } 1684 1685 /* Register for CT established events */ 1686 ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone, 1687 attr->ct_attr.nf_ft); 1688 if (IS_ERR(ft)) { 1689 err = PTR_ERR(ft); 1690 ct_dbg("Failed to register to ft callback"); 1691 goto err_ft; 1692 } 1693 ct_flow->ft = ft; 1694 1695 err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id, 1696 MLX5_FTE_ID_MAX, GFP_KERNEL); 1697 if (err) { 1698 netdev_warn(priv->netdev, 1699 "Failed to allocate fte id, err: %d\n", err); 1700 goto err_idr; 1701 } 1702 ct_flow->fte_id = fte_id; 1703 1704 /* Base flow attributes of both rules on original rule attribute */ 1705 ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); 1706 if (!ct_flow->pre_ct_attr) { 1707 err = -ENOMEM; 1708 goto err_alloc_pre; 1709 } 1710 1711 ct_flow->post_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); 1712 if (!ct_flow->post_ct_attr) { 1713 err = -ENOMEM; 1714 goto err_alloc_post; 1715 } 1716 1717 pre_ct_attr = ct_flow->pre_ct_attr; 1718 memcpy(pre_ct_attr, attr, attr_sz); 1719 memcpy(ct_flow->post_ct_attr, attr, attr_sz); 1720 1721 /* Modify the original rule's action to fwd and modify, leave decap */ 1722 pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP; 1723 pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | 1724 MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1725 1726 /* Write chain miss tag for miss in ct table as we 1727 * don't go though all prios of this chain as normal tc rules 1728 * miss. 1729 */ 1730 err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain, 1731 &chain_mapping); 1732 if (err) { 1733 ct_dbg("Failed to get chain register mapping for chain"); 1734 goto err_get_chain; 1735 } 1736 ct_flow->chain_mapping = chain_mapping; 1737 1738 err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type, 1739 CHAIN_TO_REG, chain_mapping); 1740 if (err) { 1741 ct_dbg("Failed to set chain register mapping"); 1742 goto err_mapping; 1743 } 1744 1745 err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type, 1746 FTEID_TO_REG, fte_id); 1747 if (err) { 1748 ct_dbg("Failed to set fte_id register mapping"); 1749 goto err_mapping; 1750 } 1751 1752 /* If original flow is decap, we do it before going into ct table 1753 * so add a rewrite for the tunnel match_id. 1754 */ 1755 if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) && 1756 attr->chain == 0) { 1757 u32 tun_id = mlx5e_tc_get_flow_tun_id(flow); 1758 1759 err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, 1760 ct_priv->ns_type, 1761 TUNNEL_TO_REG, 1762 tun_id); 1763 if (err) { 1764 ct_dbg("Failed to set tunnel register mapping"); 1765 goto err_mapping; 1766 } 1767 } 1768 1769 mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type, 1770 pre_mod_acts.num_actions, 1771 pre_mod_acts.actions); 1772 if (IS_ERR(mod_hdr)) { 1773 err = PTR_ERR(mod_hdr); 1774 ct_dbg("Failed to create pre ct mod hdr"); 1775 goto err_mapping; 1776 } 1777 pre_ct_attr->modify_hdr = mod_hdr; 1778 1779 /* Post ct rule matches on fte_id and executes original rule's 1780 * tc rule action 1781 */ 1782 mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG, 1783 fte_id, MLX5_FTE_ID_MASK); 1784 1785 /* Put post_ct rule on post_ct flow table */ 1786 ct_flow->post_ct_attr->chain = 0; 1787 ct_flow->post_ct_attr->prio = 0; 1788 ct_flow->post_ct_attr->ft = ct_priv->post_ct; 1789 1790 ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE; 1791 ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE; 1792 ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP); 1793 rule = mlx5_tc_rule_insert(priv, post_ct_spec, 1794 ct_flow->post_ct_attr); 1795 ct_flow->post_ct_rule = rule; 1796 if (IS_ERR(ct_flow->post_ct_rule)) { 1797 err = PTR_ERR(ct_flow->post_ct_rule); 1798 ct_dbg("Failed to add post ct rule"); 1799 goto err_insert_post_ct; 1800 } 1801 1802 /* Change original rule point to ct table */ 1803 pre_ct_attr->dest_chain = 0; 1804 pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft; 1805 ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec, 1806 pre_ct_attr); 1807 if (IS_ERR(ct_flow->pre_ct_rule)) { 1808 err = PTR_ERR(ct_flow->pre_ct_rule); 1809 ct_dbg("Failed to add pre ct rule"); 1810 goto err_insert_orig; 1811 } 1812 1813 attr->ct_attr.ct_flow = ct_flow; 1814 dealloc_mod_hdr_actions(&pre_mod_acts); 1815 kfree(post_ct_spec); 1816 1817 return rule; 1818 1819 err_insert_orig: 1820 mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule, 1821 ct_flow->post_ct_attr); 1822 err_insert_post_ct: 1823 mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); 1824 err_mapping: 1825 dealloc_mod_hdr_actions(&pre_mod_acts); 1826 mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); 1827 err_get_chain: 1828 kfree(ct_flow->post_ct_attr); 1829 err_alloc_post: 1830 kfree(ct_flow->pre_ct_attr); 1831 err_alloc_pre: 1832 idr_remove(&ct_priv->fte_ids, fte_id); 1833 err_idr: 1834 mlx5_tc_ct_del_ft_cb(ct_priv, ft); 1835 err_ft: 1836 kfree(post_ct_spec); 1837 kfree(ct_flow); 1838 netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err); 1839 return ERR_PTR(err); 1840 } 1841 1842 static struct mlx5_flow_handle * 1843 __mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv, 1844 struct mlx5_flow_spec *orig_spec, 1845 struct mlx5_flow_attr *attr, 1846 struct mlx5e_tc_mod_hdr_acts *mod_acts) 1847 { 1848 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); 1849 u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type); 1850 struct mlx5_flow_attr *pre_ct_attr; 1851 struct mlx5_modify_hdr *mod_hdr; 1852 struct mlx5_flow_handle *rule; 1853 struct mlx5_ct_flow *ct_flow; 1854 int err; 1855 1856 ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); 1857 if (!ct_flow) 1858 return ERR_PTR(-ENOMEM); 1859 1860 /* Base esw attributes on original rule attribute */ 1861 pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); 1862 if (!pre_ct_attr) { 1863 err = -ENOMEM; 1864 goto err_attr; 1865 } 1866 1867 memcpy(pre_ct_attr, attr, attr_sz); 1868 1869 err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0); 1870 if (err) { 1871 ct_dbg("Failed to set register for ct clear"); 1872 goto err_set_registers; 1873 } 1874 1875 mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type, 1876 mod_acts->num_actions, 1877 mod_acts->actions); 1878 if (IS_ERR(mod_hdr)) { 1879 err = PTR_ERR(mod_hdr); 1880 ct_dbg("Failed to add create ct clear mod hdr"); 1881 goto err_set_registers; 1882 } 1883 1884 pre_ct_attr->modify_hdr = mod_hdr; 1885 pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; 1886 1887 rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr); 1888 if (IS_ERR(rule)) { 1889 err = PTR_ERR(rule); 1890 ct_dbg("Failed to add ct clear rule"); 1891 goto err_insert; 1892 } 1893 1894 attr->ct_attr.ct_flow = ct_flow; 1895 ct_flow->pre_ct_attr = pre_ct_attr; 1896 ct_flow->pre_ct_rule = rule; 1897 return rule; 1898 1899 err_insert: 1900 mlx5_modify_header_dealloc(priv->mdev, mod_hdr); 1901 err_set_registers: 1902 netdev_warn(priv->netdev, 1903 "Failed to offload ct clear flow, err %d\n", err); 1904 kfree(pre_ct_attr); 1905 err_attr: 1906 kfree(ct_flow); 1907 1908 return ERR_PTR(err); 1909 } 1910 1911 struct mlx5_flow_handle * 1912 mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, 1913 struct mlx5e_tc_flow *flow, 1914 struct mlx5_flow_spec *spec, 1915 struct mlx5_flow_attr *attr, 1916 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) 1917 { 1918 bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; 1919 struct mlx5_flow_handle *rule; 1920 1921 if (!priv) 1922 return ERR_PTR(-EOPNOTSUPP); 1923 1924 mutex_lock(&priv->control_lock); 1925 1926 if (clear_action) 1927 rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts); 1928 else 1929 rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr); 1930 mutex_unlock(&priv->control_lock); 1931 1932 return rule; 1933 } 1934 1935 static void 1936 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, 1937 struct mlx5e_tc_flow *flow, 1938 struct mlx5_ct_flow *ct_flow) 1939 { 1940 struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr; 1941 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); 1942 1943 mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, 1944 pre_ct_attr); 1945 mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); 1946 1947 if (ct_flow->post_ct_rule) { 1948 mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule, 1949 ct_flow->post_ct_attr); 1950 mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); 1951 idr_remove(&ct_priv->fte_ids, ct_flow->fte_id); 1952 mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft); 1953 } 1954 1955 kfree(ct_flow->pre_ct_attr); 1956 kfree(ct_flow->post_ct_attr); 1957 kfree(ct_flow); 1958 } 1959 1960 void 1961 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, 1962 struct mlx5e_tc_flow *flow, 1963 struct mlx5_flow_attr *attr) 1964 { 1965 struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow; 1966 1967 /* We are called on error to clean up stuff from parsing 1968 * but we don't have anything for now 1969 */ 1970 if (!ct_flow) 1971 return; 1972 1973 mutex_lock(&priv->control_lock); 1974 __mlx5_tc_ct_delete_flow(priv, flow, ct_flow); 1975 mutex_unlock(&priv->control_lock); 1976 } 1977 1978 static int 1979 mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw, 1980 const char **err_msg) 1981 { 1982 if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) { 1983 *err_msg = "firmware level support is missing"; 1984 return -EOPNOTSUPP; 1985 } 1986 1987 if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) { 1988 /* vlan workaround should be avoided for multi chain rules. 1989 * This is just a sanity check as pop vlan action should 1990 * be supported by any FW that supports ignore_flow_level 1991 */ 1992 1993 *err_msg = "firmware vlan actions support is missing"; 1994 return -EOPNOTSUPP; 1995 } 1996 1997 if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, 1998 fdb_modify_header_fwd_to_table)) { 1999 /* CT always writes to registers which are mod header actions. 2000 * Therefore, mod header and goto is required 2001 */ 2002 2003 *err_msg = "firmware fwd and modify support is missing"; 2004 return -EOPNOTSUPP; 2005 } 2006 2007 if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) { 2008 *err_msg = "register loopback isn't supported"; 2009 return -EOPNOTSUPP; 2010 } 2011 2012 return 0; 2013 } 2014 2015 static int 2016 mlx5_tc_ct_init_check_nic_support(struct mlx5e_priv *priv, 2017 const char **err_msg) 2018 { 2019 if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) { 2020 *err_msg = "firmware level support is missing"; 2021 return -EOPNOTSUPP; 2022 } 2023 2024 return 0; 2025 } 2026 2027 static int 2028 mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv, 2029 enum mlx5_flow_namespace_type ns_type, 2030 const char **err_msg) 2031 { 2032 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 2033 2034 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) 2035 /* cannot restore chain ID on HW miss */ 2036 2037 *err_msg = "tc skb extension missing"; 2038 return -EOPNOTSUPP; 2039 #endif 2040 if (ns_type == MLX5_FLOW_NAMESPACE_FDB) 2041 return mlx5_tc_ct_init_check_esw_support(esw, err_msg); 2042 else 2043 return mlx5_tc_ct_init_check_nic_support(priv, err_msg); 2044 } 2045 2046 #define INIT_ERR_PREFIX "tc ct offload init failed" 2047 2048 struct mlx5_tc_ct_priv * 2049 mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, 2050 struct mod_hdr_tbl *mod_hdr, 2051 enum mlx5_flow_namespace_type ns_type) 2052 { 2053 struct mlx5_tc_ct_priv *ct_priv; 2054 struct mlx5_core_dev *dev; 2055 const char *msg; 2056 int err; 2057 2058 dev = priv->mdev; 2059 err = mlx5_tc_ct_init_check_support(priv, ns_type, &msg); 2060 if (err) { 2061 mlx5_core_warn(dev, 2062 "tc ct offload not supported, %s\n", 2063 msg); 2064 goto err_support; 2065 } 2066 2067 ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL); 2068 if (!ct_priv) 2069 goto err_alloc; 2070 2071 ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true); 2072 if (IS_ERR(ct_priv->zone_mapping)) { 2073 err = PTR_ERR(ct_priv->zone_mapping); 2074 goto err_mapping_zone; 2075 } 2076 2077 ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true); 2078 if (IS_ERR(ct_priv->labels_mapping)) { 2079 err = PTR_ERR(ct_priv->labels_mapping); 2080 goto err_mapping_labels; 2081 } 2082 2083 spin_lock_init(&ct_priv->ht_lock); 2084 ct_priv->ns_type = ns_type; 2085 ct_priv->chains = chains; 2086 ct_priv->netdev = priv->netdev; 2087 ct_priv->dev = priv->mdev; 2088 ct_priv->mod_hdr_tbl = mod_hdr; 2089 ct_priv->ct = mlx5_chains_create_global_table(chains); 2090 if (IS_ERR(ct_priv->ct)) { 2091 err = PTR_ERR(ct_priv->ct); 2092 mlx5_core_warn(dev, 2093 "%s, failed to create ct table err: %d\n", 2094 INIT_ERR_PREFIX, err); 2095 goto err_ct_tbl; 2096 } 2097 2098 ct_priv->ct_nat = mlx5_chains_create_global_table(chains); 2099 if (IS_ERR(ct_priv->ct_nat)) { 2100 err = PTR_ERR(ct_priv->ct_nat); 2101 mlx5_core_warn(dev, 2102 "%s, failed to create ct nat table err: %d\n", 2103 INIT_ERR_PREFIX, err); 2104 goto err_ct_nat_tbl; 2105 } 2106 2107 ct_priv->post_ct = mlx5_chains_create_global_table(chains); 2108 if (IS_ERR(ct_priv->post_ct)) { 2109 err = PTR_ERR(ct_priv->post_ct); 2110 mlx5_core_warn(dev, 2111 "%s, failed to create post ct table err: %d\n", 2112 INIT_ERR_PREFIX, err); 2113 goto err_post_ct_tbl; 2114 } 2115 2116 idr_init(&ct_priv->fte_ids); 2117 mutex_init(&ct_priv->control_lock); 2118 rhashtable_init(&ct_priv->zone_ht, &zone_params); 2119 rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params); 2120 rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params); 2121 2122 return ct_priv; 2123 2124 err_post_ct_tbl: 2125 mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); 2126 err_ct_nat_tbl: 2127 mlx5_chains_destroy_global_table(chains, ct_priv->ct); 2128 err_ct_tbl: 2129 mapping_destroy(ct_priv->labels_mapping); 2130 err_mapping_labels: 2131 mapping_destroy(ct_priv->zone_mapping); 2132 err_mapping_zone: 2133 kfree(ct_priv); 2134 err_alloc: 2135 err_support: 2136 2137 return NULL; 2138 } 2139 2140 void 2141 mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) 2142 { 2143 struct mlx5_fs_chains *chains; 2144 2145 if (!ct_priv) 2146 return; 2147 2148 chains = ct_priv->chains; 2149 2150 mlx5_chains_destroy_global_table(chains, ct_priv->post_ct); 2151 mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); 2152 mlx5_chains_destroy_global_table(chains, ct_priv->ct); 2153 mapping_destroy(ct_priv->zone_mapping); 2154 mapping_destroy(ct_priv->labels_mapping); 2155 2156 rhashtable_destroy(&ct_priv->ct_tuples_ht); 2157 rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); 2158 rhashtable_destroy(&ct_priv->zone_ht); 2159 mutex_destroy(&ct_priv->control_lock); 2160 idr_destroy(&ct_priv->fte_ids); 2161 kfree(ct_priv); 2162 } 2163 2164 bool 2165 mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, 2166 struct sk_buff *skb, u8 zone_restore_id) 2167 { 2168 struct mlx5_ct_tuple tuple = {}; 2169 struct mlx5_ct_entry *entry; 2170 u16 zone; 2171 2172 if (!ct_priv || !zone_restore_id) 2173 return true; 2174 2175 if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone)) 2176 return false; 2177 2178 if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone)) 2179 return false; 2180 2181 spin_lock(&ct_priv->ht_lock); 2182 2183 entry = mlx5_tc_ct_entry_get(ct_priv, &tuple); 2184 if (!entry) { 2185 spin_unlock(&ct_priv->ht_lock); 2186 return false; 2187 } 2188 2189 if (IS_ERR(entry)) { 2190 spin_unlock(&ct_priv->ht_lock); 2191 return false; 2192 } 2193 spin_unlock(&ct_priv->ht_lock); 2194 2195 tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie); 2196 __mlx5_tc_ct_entry_put(entry); 2197 2198 return true; 2199 } 2200