1 // SPDX-License-Identifier: GPL-2.0-only 2 /**************************************************************************** 3 * Driver for Solarflare network controllers and boards 4 * Copyright 2023, Advanced Micro Devices, Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published 8 * by the Free Software Foundation, incorporated herein by reference. 9 */ 10 11 #include "tc_conntrack.h" 12 #include "tc.h" 13 #include "mae.h" 14 15 static int efx_tc_flow_block(enum tc_setup_type type, void *type_data, 16 void *cb_priv); 17 18 static const struct rhashtable_params efx_tc_ct_zone_ht_params = { 19 .key_len = offsetof(struct efx_tc_ct_zone, linkage), 20 .key_offset = 0, 21 .head_offset = offsetof(struct efx_tc_ct_zone, linkage), 22 }; 23 24 static const struct rhashtable_params efx_tc_ct_ht_params = { 25 .key_len = offsetof(struct efx_tc_ct_entry, linkage), 26 .key_offset = 0, 27 .head_offset = offsetof(struct efx_tc_ct_entry, linkage), 28 }; 29 30 static void efx_tc_ct_zone_free(void *ptr, void *arg) 31 { 32 struct efx_tc_ct_zone *zone = ptr; 33 struct efx_nic *efx = zone->efx; 34 35 netif_err(efx, drv, efx->net_dev, 36 "tc ct_zone %u still present at teardown, removing\n", 37 zone->zone); 38 39 nf_flow_table_offload_del_cb(zone->nf_ft, efx_tc_flow_block, zone); 40 kfree(zone); 41 } 42 43 static void efx_tc_ct_free(void *ptr, void *arg) 44 { 45 struct efx_tc_ct_entry *conn = ptr; 46 struct efx_nic *efx = arg; 47 48 netif_err(efx, drv, efx->net_dev, 49 "tc ct_entry %lx still present at teardown\n", 50 conn->cookie); 51 52 /* We can release the counter, but we can't remove the CT itself 53 * from hardware because the table meta is already gone. 54 */ 55 efx_tc_flower_release_counter(efx, conn->cnt); 56 kfree(conn); 57 } 58 59 int efx_tc_init_conntrack(struct efx_nic *efx) 60 { 61 int rc; 62 63 rc = rhashtable_init(&efx->tc->ct_zone_ht, &efx_tc_ct_zone_ht_params); 64 if (rc < 0) 65 goto fail_ct_zone_ht; 66 rc = rhashtable_init(&efx->tc->ct_ht, &efx_tc_ct_ht_params); 67 if (rc < 0) 68 goto fail_ct_ht; 69 return 0; 70 fail_ct_ht: 71 rhashtable_destroy(&efx->tc->ct_zone_ht); 72 fail_ct_zone_ht: 73 return rc; 74 } 75 76 /* Only call this in init failure teardown. 77 * Normal exit should fini instead as there may be entries in the table. 78 */ 79 void efx_tc_destroy_conntrack(struct efx_nic *efx) 80 { 81 rhashtable_destroy(&efx->tc->ct_ht); 82 rhashtable_destroy(&efx->tc->ct_zone_ht); 83 } 84 85 void efx_tc_fini_conntrack(struct efx_nic *efx) 86 { 87 rhashtable_free_and_destroy(&efx->tc->ct_zone_ht, efx_tc_ct_zone_free, NULL); 88 rhashtable_free_and_destroy(&efx->tc->ct_ht, efx_tc_ct_free, efx); 89 } 90 91 #define EFX_NF_TCP_FLAG(flg) cpu_to_be16(be32_to_cpu(TCP_FLAG_##flg) >> 16) 92 93 static int efx_tc_ct_parse_match(struct efx_nic *efx, struct flow_rule *fr, 94 struct efx_tc_ct_entry *conn) 95 { 96 struct flow_dissector *dissector = fr->match.dissector; 97 unsigned char ipv = 0; 98 bool tcp = false; 99 100 if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_CONTROL)) { 101 struct flow_match_control fm; 102 103 flow_rule_match_control(fr, &fm); 104 if (IS_ALL_ONES(fm.mask->addr_type)) 105 switch (fm.key->addr_type) { 106 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 107 ipv = 4; 108 break; 109 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 110 ipv = 6; 111 break; 112 default: 113 break; 114 } 115 } 116 117 if (!ipv) { 118 netif_dbg(efx, drv, efx->net_dev, 119 "Conntrack missing ipv specification\n"); 120 return -EOPNOTSUPP; 121 } 122 123 if (dissector->used_keys & 124 ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) | 125 BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) | 126 BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | 127 BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | 128 BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) | 129 BIT_ULL(FLOW_DISSECTOR_KEY_TCP) | 130 BIT_ULL(FLOW_DISSECTOR_KEY_META))) { 131 netif_dbg(efx, drv, efx->net_dev, 132 "Unsupported conntrack keys %#llx\n", 133 dissector->used_keys); 134 return -EOPNOTSUPP; 135 } 136 137 if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_BASIC)) { 138 struct flow_match_basic fm; 139 140 flow_rule_match_basic(fr, &fm); 141 if (!IS_ALL_ONES(fm.mask->n_proto)) { 142 netif_dbg(efx, drv, efx->net_dev, 143 "Conntrack eth_proto is not exact-match; mask %04x\n", 144 ntohs(fm.mask->n_proto)); 145 return -EOPNOTSUPP; 146 } 147 conn->eth_proto = fm.key->n_proto; 148 if (conn->eth_proto != (ipv == 4 ? htons(ETH_P_IP) 149 : htons(ETH_P_IPV6))) { 150 netif_dbg(efx, drv, efx->net_dev, 151 "Conntrack eth_proto is not IPv%u, is %04x\n", 152 ipv, ntohs(conn->eth_proto)); 153 return -EOPNOTSUPP; 154 } 155 if (!IS_ALL_ONES(fm.mask->ip_proto)) { 156 netif_dbg(efx, drv, efx->net_dev, 157 "Conntrack ip_proto is not exact-match; mask %02x\n", 158 fm.mask->ip_proto); 159 return -EOPNOTSUPP; 160 } 161 conn->ip_proto = fm.key->ip_proto; 162 switch (conn->ip_proto) { 163 case IPPROTO_TCP: 164 tcp = true; 165 break; 166 case IPPROTO_UDP: 167 break; 168 default: 169 netif_dbg(efx, drv, efx->net_dev, 170 "Conntrack ip_proto not TCP or UDP, is %02x\n", 171 conn->ip_proto); 172 return -EOPNOTSUPP; 173 } 174 } else { 175 netif_dbg(efx, drv, efx->net_dev, 176 "Conntrack missing eth_proto, ip_proto\n"); 177 return -EOPNOTSUPP; 178 } 179 180 if (ipv == 4 && flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { 181 struct flow_match_ipv4_addrs fm; 182 183 flow_rule_match_ipv4_addrs(fr, &fm); 184 if (!IS_ALL_ONES(fm.mask->src)) { 185 netif_dbg(efx, drv, efx->net_dev, 186 "Conntrack ipv4.src is not exact-match; mask %08x\n", 187 ntohl(fm.mask->src)); 188 return -EOPNOTSUPP; 189 } 190 conn->src_ip = fm.key->src; 191 if (!IS_ALL_ONES(fm.mask->dst)) { 192 netif_dbg(efx, drv, efx->net_dev, 193 "Conntrack ipv4.dst is not exact-match; mask %08x\n", 194 ntohl(fm.mask->dst)); 195 return -EOPNOTSUPP; 196 } 197 conn->dst_ip = fm.key->dst; 198 } else if (ipv == 6 && flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { 199 struct flow_match_ipv6_addrs fm; 200 201 flow_rule_match_ipv6_addrs(fr, &fm); 202 if (!efx_ipv6_addr_all_ones(&fm.mask->src)) { 203 netif_dbg(efx, drv, efx->net_dev, 204 "Conntrack ipv6.src is not exact-match; mask %pI6\n", 205 &fm.mask->src); 206 return -EOPNOTSUPP; 207 } 208 conn->src_ip6 = fm.key->src; 209 if (!efx_ipv6_addr_all_ones(&fm.mask->dst)) { 210 netif_dbg(efx, drv, efx->net_dev, 211 "Conntrack ipv6.dst is not exact-match; mask %pI6\n", 212 &fm.mask->dst); 213 return -EOPNOTSUPP; 214 } 215 conn->dst_ip6 = fm.key->dst; 216 } else { 217 netif_dbg(efx, drv, efx->net_dev, 218 "Conntrack missing IPv%u addrs\n", ipv); 219 return -EOPNOTSUPP; 220 } 221 222 if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_PORTS)) { 223 struct flow_match_ports fm; 224 225 flow_rule_match_ports(fr, &fm); 226 if (!IS_ALL_ONES(fm.mask->src)) { 227 netif_dbg(efx, drv, efx->net_dev, 228 "Conntrack ports.src is not exact-match; mask %04x\n", 229 ntohs(fm.mask->src)); 230 return -EOPNOTSUPP; 231 } 232 conn->l4_sport = fm.key->src; 233 if (!IS_ALL_ONES(fm.mask->dst)) { 234 netif_dbg(efx, drv, efx->net_dev, 235 "Conntrack ports.dst is not exact-match; mask %04x\n", 236 ntohs(fm.mask->dst)); 237 return -EOPNOTSUPP; 238 } 239 conn->l4_dport = fm.key->dst; 240 } else { 241 netif_dbg(efx, drv, efx->net_dev, "Conntrack missing L4 ports\n"); 242 return -EOPNOTSUPP; 243 } 244 245 if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_TCP)) { 246 __be16 tcp_interesting_flags; 247 struct flow_match_tcp fm; 248 249 if (!tcp) { 250 netif_dbg(efx, drv, efx->net_dev, 251 "Conntrack matching on TCP keys but ipproto is not tcp\n"); 252 return -EOPNOTSUPP; 253 } 254 flow_rule_match_tcp(fr, &fm); 255 tcp_interesting_flags = EFX_NF_TCP_FLAG(SYN) | 256 EFX_NF_TCP_FLAG(RST) | 257 EFX_NF_TCP_FLAG(FIN); 258 /* If any of the tcp_interesting_flags is set, we always 259 * inhibit CT lookup in LHS (so SW can update CT table). 260 */ 261 if (fm.key->flags & tcp_interesting_flags) { 262 netif_dbg(efx, drv, efx->net_dev, 263 "Unsupported conntrack tcp.flags %04x/%04x\n", 264 ntohs(fm.key->flags), ntohs(fm.mask->flags)); 265 return -EOPNOTSUPP; 266 } 267 /* Other TCP flags cannot be filtered at CT */ 268 if (fm.mask->flags & ~tcp_interesting_flags) { 269 netif_dbg(efx, drv, efx->net_dev, 270 "Unsupported conntrack tcp.flags %04x/%04x\n", 271 ntohs(fm.key->flags), ntohs(fm.mask->flags)); 272 return -EOPNOTSUPP; 273 } 274 } 275 276 return 0; 277 } 278 279 static int efx_tc_ct_replace(struct efx_tc_ct_zone *ct_zone, 280 struct flow_cls_offload *tc) 281 { 282 struct flow_rule *fr = flow_cls_offload_flow_rule(tc); 283 struct efx_tc_ct_entry *conn, *old; 284 struct efx_nic *efx = ct_zone->efx; 285 const struct flow_action_entry *fa; 286 struct efx_tc_counter *cnt; 287 int rc, i; 288 289 if (WARN_ON(!efx->tc)) 290 return -ENETDOWN; 291 if (WARN_ON(!efx->tc->up)) 292 return -ENETDOWN; 293 294 conn = kzalloc(sizeof(*conn), GFP_USER); 295 if (!conn) 296 return -ENOMEM; 297 conn->cookie = tc->cookie; 298 old = rhashtable_lookup_get_insert_fast(&efx->tc->ct_ht, 299 &conn->linkage, 300 efx_tc_ct_ht_params); 301 if (IS_ERR(old)) { 302 rc = PTR_ERR(old); 303 goto release; 304 } else if (old) { 305 netif_dbg(efx, drv, efx->net_dev, 306 "Already offloaded conntrack (cookie %lx)\n", tc->cookie); 307 rc = -EEXIST; 308 goto release; 309 } 310 311 /* Parse match */ 312 conn->zone = ct_zone; 313 rc = efx_tc_ct_parse_match(efx, fr, conn); 314 if (rc) 315 goto release; 316 317 /* Parse actions */ 318 flow_action_for_each(i, fa, &fr->action) { 319 switch (fa->id) { 320 case FLOW_ACTION_CT_METADATA: 321 conn->mark = fa->ct_metadata.mark; 322 if (memchr_inv(fa->ct_metadata.labels, 0, sizeof(fa->ct_metadata.labels))) { 323 netif_dbg(efx, drv, efx->net_dev, 324 "Setting CT label not supported\n"); 325 rc = -EOPNOTSUPP; 326 goto release; 327 } 328 break; 329 default: 330 netif_dbg(efx, drv, efx->net_dev, 331 "Unhandled action %u for conntrack\n", fa->id); 332 rc = -EOPNOTSUPP; 333 goto release; 334 } 335 } 336 337 /* fill in defaults for unmangled values */ 338 conn->nat_ip = conn->dnat ? conn->dst_ip : conn->src_ip; 339 conn->l4_natport = conn->dnat ? conn->l4_dport : conn->l4_sport; 340 341 cnt = efx_tc_flower_allocate_counter(efx, EFX_TC_COUNTER_TYPE_CT); 342 if (IS_ERR(cnt)) { 343 rc = PTR_ERR(cnt); 344 goto release; 345 } 346 conn->cnt = cnt; 347 348 rc = efx_mae_insert_ct(efx, conn); 349 if (rc) { 350 netif_dbg(efx, drv, efx->net_dev, 351 "Failed to insert conntrack, %d\n", rc); 352 goto release; 353 } 354 mutex_lock(&ct_zone->mutex); 355 list_add_tail(&conn->list, &ct_zone->cts); 356 mutex_unlock(&ct_zone->mutex); 357 return 0; 358 release: 359 if (conn->cnt) 360 efx_tc_flower_release_counter(efx, conn->cnt); 361 if (!old) 362 rhashtable_remove_fast(&efx->tc->ct_ht, &conn->linkage, 363 efx_tc_ct_ht_params); 364 kfree(conn); 365 return rc; 366 } 367 368 /* Caller must follow with efx_tc_ct_remove_finish() after RCU grace period! */ 369 static void efx_tc_ct_remove(struct efx_nic *efx, struct efx_tc_ct_entry *conn) 370 { 371 int rc; 372 373 /* Remove it from HW */ 374 rc = efx_mae_remove_ct(efx, conn); 375 /* Delete it from SW */ 376 rhashtable_remove_fast(&efx->tc->ct_ht, &conn->linkage, 377 efx_tc_ct_ht_params); 378 if (rc) { 379 netif_err(efx, drv, efx->net_dev, 380 "Failed to remove conntrack %lx from hw, rc %d\n", 381 conn->cookie, rc); 382 } else { 383 netif_dbg(efx, drv, efx->net_dev, "Removed conntrack %lx\n", 384 conn->cookie); 385 } 386 } 387 388 static void efx_tc_ct_remove_finish(struct efx_nic *efx, struct efx_tc_ct_entry *conn) 389 { 390 /* Remove related CT counter. This is delayed after the conn object we 391 * are working with has been successfully removed. This protects the 392 * counter from being used-after-free inside efx_tc_ct_stats. 393 */ 394 efx_tc_flower_release_counter(efx, conn->cnt); 395 kfree(conn); 396 } 397 398 static int efx_tc_ct_destroy(struct efx_tc_ct_zone *ct_zone, 399 struct flow_cls_offload *tc) 400 { 401 struct efx_nic *efx = ct_zone->efx; 402 struct efx_tc_ct_entry *conn; 403 404 conn = rhashtable_lookup_fast(&efx->tc->ct_ht, &tc->cookie, 405 efx_tc_ct_ht_params); 406 if (!conn) { 407 netif_warn(efx, drv, efx->net_dev, 408 "Conntrack %lx not found to remove\n", tc->cookie); 409 return -ENOENT; 410 } 411 412 mutex_lock(&ct_zone->mutex); 413 list_del(&conn->list); 414 efx_tc_ct_remove(efx, conn); 415 mutex_unlock(&ct_zone->mutex); 416 synchronize_rcu(); 417 efx_tc_ct_remove_finish(efx, conn); 418 return 0; 419 } 420 421 static int efx_tc_ct_stats(struct efx_tc_ct_zone *ct_zone, 422 struct flow_cls_offload *tc) 423 { 424 struct efx_nic *efx = ct_zone->efx; 425 struct efx_tc_ct_entry *conn; 426 struct efx_tc_counter *cnt; 427 428 rcu_read_lock(); 429 conn = rhashtable_lookup_fast(&efx->tc->ct_ht, &tc->cookie, 430 efx_tc_ct_ht_params); 431 if (!conn) { 432 netif_warn(efx, drv, efx->net_dev, 433 "Conntrack %lx not found for stats\n", tc->cookie); 434 rcu_read_unlock(); 435 return -ENOENT; 436 } 437 438 cnt = conn->cnt; 439 spin_lock_bh(&cnt->lock); 440 /* Report only last use */ 441 flow_stats_update(&tc->stats, 0, 0, 0, cnt->touched, 442 FLOW_ACTION_HW_STATS_DELAYED); 443 spin_unlock_bh(&cnt->lock); 444 rcu_read_unlock(); 445 446 return 0; 447 } 448 449 static int efx_tc_flow_block(enum tc_setup_type type, void *type_data, 450 void *cb_priv) 451 { 452 struct flow_cls_offload *tcb = type_data; 453 struct efx_tc_ct_zone *ct_zone = cb_priv; 454 455 if (type != TC_SETUP_CLSFLOWER) 456 return -EOPNOTSUPP; 457 458 switch (tcb->command) { 459 case FLOW_CLS_REPLACE: 460 return efx_tc_ct_replace(ct_zone, tcb); 461 case FLOW_CLS_DESTROY: 462 return efx_tc_ct_destroy(ct_zone, tcb); 463 case FLOW_CLS_STATS: 464 return efx_tc_ct_stats(ct_zone, tcb); 465 default: 466 break; 467 } 468 469 return -EOPNOTSUPP; 470 } 471 472 struct efx_tc_ct_zone *efx_tc_ct_register_zone(struct efx_nic *efx, u16 zone, 473 struct nf_flowtable *ct_ft) 474 { 475 struct efx_tc_ct_zone *ct_zone, *old; 476 int rc; 477 478 ct_zone = kzalloc(sizeof(*ct_zone), GFP_USER); 479 if (!ct_zone) 480 return ERR_PTR(-ENOMEM); 481 ct_zone->zone = zone; 482 old = rhashtable_lookup_get_insert_fast(&efx->tc->ct_zone_ht, 483 &ct_zone->linkage, 484 efx_tc_ct_zone_ht_params); 485 if (old) { 486 /* don't need our new entry */ 487 kfree(ct_zone); 488 if (IS_ERR(old)) /* oh dear, it's actually an error */ 489 return ERR_CAST(old); 490 if (!refcount_inc_not_zero(&old->ref)) 491 return ERR_PTR(-EAGAIN); 492 /* existing entry found */ 493 WARN_ON_ONCE(old->nf_ft != ct_ft); 494 netif_dbg(efx, drv, efx->net_dev, 495 "Found existing ct_zone for %u\n", zone); 496 return old; 497 } 498 ct_zone->nf_ft = ct_ft; 499 ct_zone->efx = efx; 500 INIT_LIST_HEAD(&ct_zone->cts); 501 mutex_init(&ct_zone->mutex); 502 rc = nf_flow_table_offload_add_cb(ct_ft, efx_tc_flow_block, ct_zone); 503 netif_dbg(efx, drv, efx->net_dev, "Adding new ct_zone for %u, rc %d\n", 504 zone, rc); 505 if (rc < 0) 506 goto fail; 507 refcount_set(&ct_zone->ref, 1); 508 return ct_zone; 509 fail: 510 rhashtable_remove_fast(&efx->tc->ct_zone_ht, &ct_zone->linkage, 511 efx_tc_ct_zone_ht_params); 512 kfree(ct_zone); 513 return ERR_PTR(rc); 514 } 515 516 void efx_tc_ct_unregister_zone(struct efx_nic *efx, 517 struct efx_tc_ct_zone *ct_zone) 518 { 519 struct efx_tc_ct_entry *conn, *next; 520 521 if (!refcount_dec_and_test(&ct_zone->ref)) 522 return; /* still in use */ 523 nf_flow_table_offload_del_cb(ct_zone->nf_ft, efx_tc_flow_block, ct_zone); 524 rhashtable_remove_fast(&efx->tc->ct_zone_ht, &ct_zone->linkage, 525 efx_tc_ct_zone_ht_params); 526 mutex_lock(&ct_zone->mutex); 527 list_for_each_entry(conn, &ct_zone->cts, list) 528 efx_tc_ct_remove(efx, conn); 529 synchronize_rcu(); 530 /* need to use _safe because efx_tc_ct_remove_finish() frees conn */ 531 list_for_each_entry_safe(conn, next, &ct_zone->cts, list) 532 efx_tc_ct_remove_finish(efx, conn); 533 mutex_unlock(&ct_zone->mutex); 534 mutex_destroy(&ct_zone->mutex); 535 netif_dbg(efx, drv, efx->net_dev, "Removed ct_zone for %u\n", 536 ct_zone->zone); 537 kfree(ct_zone); 538 } 539