1 // SPDX-License-Identifier: GPL-2.0-only 2 /**************************************************************************** 3 * Driver for Solarflare network controllers and boards 4 * Copyright 2023, Advanced Micro Devices, Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published 8 * by the Free Software Foundation, incorporated herein by reference. 9 */ 10 11 #include "tc_conntrack.h" 12 #include "tc.h" 13 #include "mae.h" 14 15 static int efx_tc_flow_block(enum tc_setup_type type, void *type_data, 16 void *cb_priv); 17 18 static const struct rhashtable_params efx_tc_ct_zone_ht_params = { 19 .key_len = offsetof(struct efx_tc_ct_zone, linkage), 20 .key_offset = 0, 21 .head_offset = offsetof(struct efx_tc_ct_zone, linkage), 22 }; 23 24 static const struct rhashtable_params efx_tc_ct_ht_params = { 25 .key_len = offsetof(struct efx_tc_ct_entry, linkage), 26 .key_offset = 0, 27 .head_offset = offsetof(struct efx_tc_ct_entry, linkage), 28 }; 29 30 static void efx_tc_ct_zone_free(void *ptr, void *arg) 31 { 32 struct efx_tc_ct_zone *zone = ptr; 33 struct efx_nic *efx = zone->efx; 34 35 netif_err(efx, drv, efx->net_dev, 36 "tc ct_zone %u still present at teardown, removing\n", 37 zone->zone); 38 39 nf_flow_table_offload_del_cb(zone->nf_ft, efx_tc_flow_block, zone); 40 kfree(zone); 41 } 42 43 static void efx_tc_ct_free(void *ptr, void *arg) 44 { 45 struct efx_tc_ct_entry *conn = ptr; 46 struct efx_nic *efx = arg; 47 48 netif_err(efx, drv, efx->net_dev, 49 "tc ct_entry %lx still present at teardown\n", 50 conn->cookie); 51 52 /* We can release the counter, but we can't remove the CT itself 53 * from hardware because the table meta is already gone. 54 */ 55 efx_tc_flower_release_counter(efx, conn->cnt); 56 kfree(conn); 57 } 58 59 int efx_tc_init_conntrack(struct efx_nic *efx) 60 { 61 int rc; 62 63 rc = rhashtable_init(&efx->tc->ct_zone_ht, &efx_tc_ct_zone_ht_params); 64 if (rc < 0) 65 goto fail_ct_zone_ht; 66 rc = rhashtable_init(&efx->tc->ct_ht, &efx_tc_ct_ht_params); 67 if (rc < 0) 68 goto fail_ct_ht; 69 return 0; 70 fail_ct_ht: 71 rhashtable_destroy(&efx->tc->ct_zone_ht); 72 fail_ct_zone_ht: 73 return rc; 74 } 75 76 /* Only call this in init failure teardown. 77 * Normal exit should fini instead as there may be entries in the table. 78 */ 79 void efx_tc_destroy_conntrack(struct efx_nic *efx) 80 { 81 rhashtable_destroy(&efx->tc->ct_ht); 82 rhashtable_destroy(&efx->tc->ct_zone_ht); 83 } 84 85 void efx_tc_fini_conntrack(struct efx_nic *efx) 86 { 87 rhashtable_free_and_destroy(&efx->tc->ct_zone_ht, efx_tc_ct_zone_free, NULL); 88 rhashtable_free_and_destroy(&efx->tc->ct_ht, efx_tc_ct_free, efx); 89 } 90 91 #define EFX_NF_TCP_FLAG(flg) cpu_to_be16(be32_to_cpu(TCP_FLAG_##flg) >> 16) 92 93 static int efx_tc_ct_parse_match(struct efx_nic *efx, struct flow_rule *fr, 94 struct efx_tc_ct_entry *conn) 95 { 96 struct flow_dissector *dissector = fr->match.dissector; 97 unsigned char ipv = 0; 98 bool tcp = false; 99 100 if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_CONTROL)) { 101 struct flow_match_control fm; 102 103 flow_rule_match_control(fr, &fm); 104 if (IS_ALL_ONES(fm.mask->addr_type)) 105 switch (fm.key->addr_type) { 106 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 107 ipv = 4; 108 break; 109 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 110 ipv = 6; 111 break; 112 default: 113 break; 114 } 115 } 116 117 if (!ipv) { 118 netif_dbg(efx, drv, efx->net_dev, 119 "Conntrack missing ipv specification\n"); 120 return -EOPNOTSUPP; 121 } 122 123 if (dissector->used_keys & 124 ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) | 125 BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) | 126 BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | 127 BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | 128 BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) | 129 BIT_ULL(FLOW_DISSECTOR_KEY_TCP) | 130 BIT_ULL(FLOW_DISSECTOR_KEY_META))) { 131 netif_dbg(efx, drv, efx->net_dev, 132 "Unsupported conntrack keys %#llx\n", 133 dissector->used_keys); 134 return -EOPNOTSUPP; 135 } 136 137 if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_BASIC)) { 138 struct flow_match_basic fm; 139 140 flow_rule_match_basic(fr, &fm); 141 if (!IS_ALL_ONES(fm.mask->n_proto)) { 142 netif_dbg(efx, drv, efx->net_dev, 143 "Conntrack eth_proto is not exact-match; mask %04x\n", 144 ntohs(fm.mask->n_proto)); 145 return -EOPNOTSUPP; 146 } 147 conn->eth_proto = fm.key->n_proto; 148 if (conn->eth_proto != (ipv == 4 ? htons(ETH_P_IP) 149 : htons(ETH_P_IPV6))) { 150 netif_dbg(efx, drv, efx->net_dev, 151 "Conntrack eth_proto is not IPv%u, is %04x\n", 152 ipv, ntohs(conn->eth_proto)); 153 return -EOPNOTSUPP; 154 } 155 if (!IS_ALL_ONES(fm.mask->ip_proto)) { 156 netif_dbg(efx, drv, efx->net_dev, 157 "Conntrack ip_proto is not exact-match; mask %02x\n", 158 fm.mask->ip_proto); 159 return -EOPNOTSUPP; 160 } 161 conn->ip_proto = fm.key->ip_proto; 162 switch (conn->ip_proto) { 163 case IPPROTO_TCP: 164 tcp = true; 165 break; 166 case IPPROTO_UDP: 167 break; 168 default: 169 netif_dbg(efx, drv, efx->net_dev, 170 "Conntrack ip_proto not TCP or UDP, is %02x\n", 171 conn->ip_proto); 172 return -EOPNOTSUPP; 173 } 174 } else { 175 netif_dbg(efx, drv, efx->net_dev, 176 "Conntrack missing eth_proto, ip_proto\n"); 177 return -EOPNOTSUPP; 178 } 179 180 if (ipv == 4 && flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { 181 struct flow_match_ipv4_addrs fm; 182 183 flow_rule_match_ipv4_addrs(fr, &fm); 184 if (!IS_ALL_ONES(fm.mask->src)) { 185 netif_dbg(efx, drv, efx->net_dev, 186 "Conntrack ipv4.src is not exact-match; mask %08x\n", 187 ntohl(fm.mask->src)); 188 return -EOPNOTSUPP; 189 } 190 conn->src_ip = fm.key->src; 191 if (!IS_ALL_ONES(fm.mask->dst)) { 192 netif_dbg(efx, drv, efx->net_dev, 193 "Conntrack ipv4.dst is not exact-match; mask %08x\n", 194 ntohl(fm.mask->dst)); 195 return -EOPNOTSUPP; 196 } 197 conn->dst_ip = fm.key->dst; 198 } else if (ipv == 6 && flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { 199 struct flow_match_ipv6_addrs fm; 200 201 flow_rule_match_ipv6_addrs(fr, &fm); 202 if (!efx_ipv6_addr_all_ones(&fm.mask->src)) { 203 netif_dbg(efx, drv, efx->net_dev, 204 "Conntrack ipv6.src is not exact-match; mask %pI6\n", 205 &fm.mask->src); 206 return -EOPNOTSUPP; 207 } 208 conn->src_ip6 = fm.key->src; 209 if (!efx_ipv6_addr_all_ones(&fm.mask->dst)) { 210 netif_dbg(efx, drv, efx->net_dev, 211 "Conntrack ipv6.dst is not exact-match; mask %pI6\n", 212 &fm.mask->dst); 213 return -EOPNOTSUPP; 214 } 215 conn->dst_ip6 = fm.key->dst; 216 } else { 217 netif_dbg(efx, drv, efx->net_dev, 218 "Conntrack missing IPv%u addrs\n", ipv); 219 return -EOPNOTSUPP; 220 } 221 222 if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_PORTS)) { 223 struct flow_match_ports fm; 224 225 flow_rule_match_ports(fr, &fm); 226 if (!IS_ALL_ONES(fm.mask->src)) { 227 netif_dbg(efx, drv, efx->net_dev, 228 "Conntrack ports.src is not exact-match; mask %04x\n", 229 ntohs(fm.mask->src)); 230 return -EOPNOTSUPP; 231 } 232 conn->l4_sport = fm.key->src; 233 if (!IS_ALL_ONES(fm.mask->dst)) { 234 netif_dbg(efx, drv, efx->net_dev, 235 "Conntrack ports.dst is not exact-match; mask %04x\n", 236 ntohs(fm.mask->dst)); 237 return -EOPNOTSUPP; 238 } 239 conn->l4_dport = fm.key->dst; 240 } else { 241 netif_dbg(efx, drv, efx->net_dev, "Conntrack missing L4 ports\n"); 242 return -EOPNOTSUPP; 243 } 244 245 if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_TCP)) { 246 __be16 tcp_interesting_flags; 247 struct flow_match_tcp fm; 248 249 if (!tcp) { 250 netif_dbg(efx, drv, efx->net_dev, 251 "Conntrack matching on TCP keys but ipproto is not tcp\n"); 252 return -EOPNOTSUPP; 253 } 254 flow_rule_match_tcp(fr, &fm); 255 tcp_interesting_flags = EFX_NF_TCP_FLAG(SYN) | 256 EFX_NF_TCP_FLAG(RST) | 257 EFX_NF_TCP_FLAG(FIN); 258 /* If any of the tcp_interesting_flags is set, we always 259 * inhibit CT lookup in LHS (so SW can update CT table). 260 */ 261 if (fm.key->flags & tcp_interesting_flags) { 262 netif_dbg(efx, drv, efx->net_dev, 263 "Unsupported conntrack tcp.flags %04x/%04x\n", 264 ntohs(fm.key->flags), ntohs(fm.mask->flags)); 265 return -EOPNOTSUPP; 266 } 267 /* Other TCP flags cannot be filtered at CT */ 268 if (fm.mask->flags & ~tcp_interesting_flags) { 269 netif_dbg(efx, drv, efx->net_dev, 270 "Unsupported conntrack tcp.flags %04x/%04x\n", 271 ntohs(fm.key->flags), ntohs(fm.mask->flags)); 272 return -EOPNOTSUPP; 273 } 274 } 275 276 return 0; 277 } 278 279 static int efx_tc_ct_replace(struct efx_tc_ct_zone *ct_zone, 280 struct flow_cls_offload *tc) 281 { 282 struct flow_rule *fr = flow_cls_offload_flow_rule(tc); 283 struct efx_tc_ct_entry *conn, *old; 284 struct efx_nic *efx = ct_zone->efx; 285 const struct flow_action_entry *fa; 286 struct efx_tc_counter *cnt; 287 int rc, i; 288 289 if (WARN_ON(!efx->tc)) 290 return -ENETDOWN; 291 if (WARN_ON(!efx->tc->up)) 292 return -ENETDOWN; 293 294 conn = kzalloc(sizeof(*conn), GFP_USER); 295 if (!conn) 296 return -ENOMEM; 297 conn->cookie = tc->cookie; 298 old = rhashtable_lookup_get_insert_fast(&efx->tc->ct_ht, 299 &conn->linkage, 300 efx_tc_ct_ht_params); 301 if (old) { 302 netif_dbg(efx, drv, efx->net_dev, 303 "Already offloaded conntrack (cookie %lx)\n", tc->cookie); 304 rc = -EEXIST; 305 goto release; 306 } 307 308 /* Parse match */ 309 conn->zone = ct_zone; 310 rc = efx_tc_ct_parse_match(efx, fr, conn); 311 if (rc) 312 goto release; 313 314 /* Parse actions */ 315 flow_action_for_each(i, fa, &fr->action) { 316 switch (fa->id) { 317 case FLOW_ACTION_CT_METADATA: 318 conn->mark = fa->ct_metadata.mark; 319 if (memchr_inv(fa->ct_metadata.labels, 0, sizeof(fa->ct_metadata.labels))) { 320 netif_dbg(efx, drv, efx->net_dev, 321 "Setting CT label not supported\n"); 322 rc = -EOPNOTSUPP; 323 goto release; 324 } 325 break; 326 default: 327 netif_dbg(efx, drv, efx->net_dev, 328 "Unhandled action %u for conntrack\n", fa->id); 329 rc = -EOPNOTSUPP; 330 goto release; 331 } 332 } 333 334 /* fill in defaults for unmangled values */ 335 conn->nat_ip = conn->dnat ? conn->dst_ip : conn->src_ip; 336 conn->l4_natport = conn->dnat ? conn->l4_dport : conn->l4_sport; 337 338 cnt = efx_tc_flower_allocate_counter(efx, EFX_TC_COUNTER_TYPE_CT); 339 if (IS_ERR(cnt)) { 340 rc = PTR_ERR(cnt); 341 goto release; 342 } 343 conn->cnt = cnt; 344 345 rc = efx_mae_insert_ct(efx, conn); 346 if (rc) { 347 netif_dbg(efx, drv, efx->net_dev, 348 "Failed to insert conntrack, %d\n", rc); 349 goto release; 350 } 351 mutex_lock(&ct_zone->mutex); 352 list_add_tail(&conn->list, &ct_zone->cts); 353 mutex_unlock(&ct_zone->mutex); 354 return 0; 355 release: 356 if (conn->cnt) 357 efx_tc_flower_release_counter(efx, conn->cnt); 358 if (!old) 359 rhashtable_remove_fast(&efx->tc->ct_ht, &conn->linkage, 360 efx_tc_ct_ht_params); 361 kfree(conn); 362 return rc; 363 } 364 365 /* Caller must follow with efx_tc_ct_remove_finish() after RCU grace period! */ 366 static void efx_tc_ct_remove(struct efx_nic *efx, struct efx_tc_ct_entry *conn) 367 { 368 int rc; 369 370 /* Remove it from HW */ 371 rc = efx_mae_remove_ct(efx, conn); 372 /* Delete it from SW */ 373 rhashtable_remove_fast(&efx->tc->ct_ht, &conn->linkage, 374 efx_tc_ct_ht_params); 375 if (rc) { 376 netif_err(efx, drv, efx->net_dev, 377 "Failed to remove conntrack %lx from hw, rc %d\n", 378 conn->cookie, rc); 379 } else { 380 netif_dbg(efx, drv, efx->net_dev, "Removed conntrack %lx\n", 381 conn->cookie); 382 } 383 } 384 385 static void efx_tc_ct_remove_finish(struct efx_nic *efx, struct efx_tc_ct_entry *conn) 386 { 387 /* Remove related CT counter. This is delayed after the conn object we 388 * are working with has been successfully removed. This protects the 389 * counter from being used-after-free inside efx_tc_ct_stats. 390 */ 391 efx_tc_flower_release_counter(efx, conn->cnt); 392 kfree(conn); 393 } 394 395 static int efx_tc_ct_destroy(struct efx_tc_ct_zone *ct_zone, 396 struct flow_cls_offload *tc) 397 { 398 struct efx_nic *efx = ct_zone->efx; 399 struct efx_tc_ct_entry *conn; 400 401 conn = rhashtable_lookup_fast(&efx->tc->ct_ht, &tc->cookie, 402 efx_tc_ct_ht_params); 403 if (!conn) { 404 netif_warn(efx, drv, efx->net_dev, 405 "Conntrack %lx not found to remove\n", tc->cookie); 406 return -ENOENT; 407 } 408 409 mutex_lock(&ct_zone->mutex); 410 list_del(&conn->list); 411 efx_tc_ct_remove(efx, conn); 412 mutex_unlock(&ct_zone->mutex); 413 synchronize_rcu(); 414 efx_tc_ct_remove_finish(efx, conn); 415 return 0; 416 } 417 418 static int efx_tc_ct_stats(struct efx_tc_ct_zone *ct_zone, 419 struct flow_cls_offload *tc) 420 { 421 struct efx_nic *efx = ct_zone->efx; 422 struct efx_tc_ct_entry *conn; 423 struct efx_tc_counter *cnt; 424 425 rcu_read_lock(); 426 conn = rhashtable_lookup_fast(&efx->tc->ct_ht, &tc->cookie, 427 efx_tc_ct_ht_params); 428 if (!conn) { 429 netif_warn(efx, drv, efx->net_dev, 430 "Conntrack %lx not found for stats\n", tc->cookie); 431 rcu_read_unlock(); 432 return -ENOENT; 433 } 434 435 cnt = conn->cnt; 436 spin_lock_bh(&cnt->lock); 437 /* Report only last use */ 438 flow_stats_update(&tc->stats, 0, 0, 0, cnt->touched, 439 FLOW_ACTION_HW_STATS_DELAYED); 440 spin_unlock_bh(&cnt->lock); 441 rcu_read_unlock(); 442 443 return 0; 444 } 445 446 static int efx_tc_flow_block(enum tc_setup_type type, void *type_data, 447 void *cb_priv) 448 { 449 struct flow_cls_offload *tcb = type_data; 450 struct efx_tc_ct_zone *ct_zone = cb_priv; 451 452 if (type != TC_SETUP_CLSFLOWER) 453 return -EOPNOTSUPP; 454 455 switch (tcb->command) { 456 case FLOW_CLS_REPLACE: 457 return efx_tc_ct_replace(ct_zone, tcb); 458 case FLOW_CLS_DESTROY: 459 return efx_tc_ct_destroy(ct_zone, tcb); 460 case FLOW_CLS_STATS: 461 return efx_tc_ct_stats(ct_zone, tcb); 462 default: 463 break; 464 } 465 466 return -EOPNOTSUPP; 467 } 468 469 struct efx_tc_ct_zone *efx_tc_ct_register_zone(struct efx_nic *efx, u16 zone, 470 struct nf_flowtable *ct_ft) 471 { 472 struct efx_tc_ct_zone *ct_zone, *old; 473 int rc; 474 475 ct_zone = kzalloc(sizeof(*ct_zone), GFP_USER); 476 if (!ct_zone) 477 return ERR_PTR(-ENOMEM); 478 ct_zone->zone = zone; 479 old = rhashtable_lookup_get_insert_fast(&efx->tc->ct_zone_ht, 480 &ct_zone->linkage, 481 efx_tc_ct_zone_ht_params); 482 if (old) { 483 /* don't need our new entry */ 484 kfree(ct_zone); 485 if (!refcount_inc_not_zero(&old->ref)) 486 return ERR_PTR(-EAGAIN); 487 /* existing entry found */ 488 WARN_ON_ONCE(old->nf_ft != ct_ft); 489 netif_dbg(efx, drv, efx->net_dev, 490 "Found existing ct_zone for %u\n", zone); 491 return old; 492 } 493 ct_zone->nf_ft = ct_ft; 494 ct_zone->efx = efx; 495 INIT_LIST_HEAD(&ct_zone->cts); 496 mutex_init(&ct_zone->mutex); 497 rc = nf_flow_table_offload_add_cb(ct_ft, efx_tc_flow_block, ct_zone); 498 netif_dbg(efx, drv, efx->net_dev, "Adding new ct_zone for %u, rc %d\n", 499 zone, rc); 500 if (rc < 0) 501 goto fail; 502 refcount_set(&ct_zone->ref, 1); 503 return ct_zone; 504 fail: 505 rhashtable_remove_fast(&efx->tc->ct_zone_ht, &ct_zone->linkage, 506 efx_tc_ct_zone_ht_params); 507 kfree(ct_zone); 508 return ERR_PTR(rc); 509 } 510 511 void efx_tc_ct_unregister_zone(struct efx_nic *efx, 512 struct efx_tc_ct_zone *ct_zone) 513 { 514 struct efx_tc_ct_entry *conn, *next; 515 516 if (!refcount_dec_and_test(&ct_zone->ref)) 517 return; /* still in use */ 518 nf_flow_table_offload_del_cb(ct_zone->nf_ft, efx_tc_flow_block, ct_zone); 519 rhashtable_remove_fast(&efx->tc->ct_zone_ht, &ct_zone->linkage, 520 efx_tc_ct_zone_ht_params); 521 mutex_lock(&ct_zone->mutex); 522 list_for_each_entry(conn, &ct_zone->cts, list) 523 efx_tc_ct_remove(efx, conn); 524 synchronize_rcu(); 525 /* need to use _safe because efx_tc_ct_remove_finish() frees conn */ 526 list_for_each_entry_safe(conn, next, &ct_zone->cts, list) 527 efx_tc_ct_remove_finish(efx, conn); 528 mutex_unlock(&ct_zone->mutex); 529 mutex_destroy(&ct_zone->mutex); 530 netif_dbg(efx, drv, efx->net_dev, "Removed ct_zone for %u\n", 531 ct_zone->zone); 532 kfree(ct_zone); 533 } 534