1 // SPDX-License-Identifier: GPL-2.0-only 2 /**************************************************************************** 3 * Driver for Solarflare network controllers and boards 4 * Copyright 2019 Solarflare Communications Inc. 5 * Copyright 2020-2022 Xilinx Inc. 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 as published 9 * by the Free Software Foundation, incorporated herein by reference. 10 */ 11 12 #include <net/pkt_cls.h> 13 #include <net/vxlan.h> 14 #include <net/geneve.h> 15 #include "tc.h" 16 #include "tc_bindings.h" 17 #include "mae.h" 18 #include "ef100_rep.h" 19 #include "efx.h" 20 21 static enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev) 22 { 23 if (netif_is_vxlan(net_dev)) 24 return EFX_ENCAP_TYPE_VXLAN; 25 if (netif_is_geneve(net_dev)) 26 return EFX_ENCAP_TYPE_GENEVE; 27 28 return EFX_ENCAP_TYPE_NONE; 29 } 30 31 #define EFX_EFV_PF NULL 32 /* Look up the representor information (efv) for a device. 33 * May return NULL for the PF (us), or an error pointer for a device that 34 * isn't supported as a TC offload endpoint 35 */ 36 static struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx, 37 struct net_device *dev) 38 { 39 struct efx_rep *efv; 40 41 if (!dev) 42 return ERR_PTR(-EOPNOTSUPP); 43 /* Is it us (the PF)? */ 44 if (dev == efx->net_dev) 45 return EFX_EFV_PF; 46 /* Is it an efx vfrep at all? */ 47 if (dev->netdev_ops != &efx_ef100_rep_netdev_ops) 48 return ERR_PTR(-EOPNOTSUPP); 49 /* Is it ours? We don't support TC rules that include another 50 * EF100's netdevices (not even on another port of the same NIC). 51 */ 52 efv = netdev_priv(dev); 53 if (efv->parent != efx) 54 return ERR_PTR(-EOPNOTSUPP); 55 return efv; 56 } 57 58 /* Convert a driver-internal vport ID into an internal device (PF or VF) */ 59 static s64 efx_tc_flower_internal_mport(struct efx_nic *efx, struct efx_rep *efv) 60 { 61 u32 mport; 62 63 if (IS_ERR(efv)) 64 return PTR_ERR(efv); 65 if (!efv) /* device is PF (us) */ 66 efx_mae_mport_uplink(efx, &mport); 67 else /* device is repr */ 68 efx_mae_mport_mport(efx, efv->mport, &mport); 69 return mport; 70 } 71 72 /* Convert a driver-internal vport ID into an external device (wire or VF) */ 73 static s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv) 74 { 75 u32 mport; 76 77 if (IS_ERR(efv)) 78 return PTR_ERR(efv); 79 if (!efv) /* device is PF (us) */ 80 efx_mae_mport_wire(efx, &mport); 81 else /* device is repr */ 82 efx_mae_mport_mport(efx, efv->mport, &mport); 83 return mport; 84 } 85 86 static const struct rhashtable_params efx_tc_encap_match_ht_params = { 87 .key_len = offsetof(struct efx_tc_encap_match, linkage), 88 .key_offset = 0, 89 .head_offset = offsetof(struct efx_tc_encap_match, linkage), 90 }; 91 92 static const struct rhashtable_params efx_tc_match_action_ht_params = { 93 .key_len = sizeof(unsigned long), 94 .key_offset = offsetof(struct efx_tc_flow_rule, cookie), 95 .head_offset = offsetof(struct efx_tc_flow_rule, linkage), 96 }; 97 98 static void efx_tc_free_action_set(struct efx_nic *efx, 99 struct efx_tc_action_set *act, bool in_hw) 100 { 101 /* Failure paths calling this on the 'cursor' action set in_hw=false, 102 * because if the alloc had succeeded we'd've put it in acts.list and 103 * not still have it in act. 104 */ 105 if (in_hw) { 106 efx_mae_free_action_set(efx, act->fw_id); 107 /* in_hw is true iff we are on an acts.list; make sure to 108 * remove ourselves from that list before we are freed. 109 */ 110 list_del(&act->list); 111 } 112 if (act->count) 113 efx_tc_flower_put_counter_index(efx, act->count); 114 kfree(act); 115 } 116 117 static void efx_tc_free_action_set_list(struct efx_nic *efx, 118 struct efx_tc_action_set_list *acts, 119 bool in_hw) 120 { 121 struct efx_tc_action_set *act, *next; 122 123 /* Failure paths set in_hw=false, because usually the acts didn't get 124 * to efx_mae_alloc_action_set_list(); if they did, the failure tree 125 * has a separate efx_mae_free_action_set_list() before calling us. 126 */ 127 if (in_hw) 128 efx_mae_free_action_set_list(efx, acts); 129 /* Any act that's on the list will be in_hw even if the list isn't */ 130 list_for_each_entry_safe(act, next, &acts->list, list) 131 efx_tc_free_action_set(efx, act, true); 132 /* Don't kfree, as acts is embedded inside a struct efx_tc_flow_rule */ 133 } 134 135 static void efx_tc_flow_free(void *ptr, void *arg) 136 { 137 struct efx_tc_flow_rule *rule = ptr; 138 struct efx_nic *efx = arg; 139 140 netif_err(efx, drv, efx->net_dev, 141 "tc rule %lx still present at teardown, removing\n", 142 rule->cookie); 143 144 efx_mae_delete_rule(efx, rule->fw_id); 145 146 /* Release entries in subsidiary tables */ 147 efx_tc_free_action_set_list(efx, &rule->acts, true); 148 149 kfree(rule); 150 } 151 152 /* Boilerplate for the simple 'copy a field' cases */ 153 #define _MAP_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field) \ 154 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_##_name)) { \ 155 struct flow_match_##_type fm; \ 156 \ 157 flow_rule_match_##_tcget(rule, &fm); \ 158 match->value._field = fm.key->_tcfield; \ 159 match->mask._field = fm.mask->_tcfield; \ 160 } 161 #define MAP_KEY_AND_MASK(_name, _type, _tcfield, _field) \ 162 _MAP_KEY_AND_MASK(_name, _type, _type, _tcfield, _field) 163 #define MAP_ENC_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field) \ 164 _MAP_KEY_AND_MASK(ENC_##_name, _type, _tcget, _tcfield, _field) 165 166 static int efx_tc_flower_parse_match(struct efx_nic *efx, 167 struct flow_rule *rule, 168 struct efx_tc_match *match, 169 struct netlink_ext_ack *extack) 170 { 171 struct flow_dissector *dissector = rule->match.dissector; 172 unsigned char ipv = 0; 173 174 /* Owing to internal TC infelicities, the IPV6_ADDRS key might be set 175 * even on IPv4 filters; so rather than relying on dissector->used_keys 176 * we check the addr_type in the CONTROL key. If we don't find it (or 177 * it's masked, which should never happen), we treat both IPV4_ADDRS 178 * and IPV6_ADDRS as absent. 179 */ 180 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { 181 struct flow_match_control fm; 182 183 flow_rule_match_control(rule, &fm); 184 if (IS_ALL_ONES(fm.mask->addr_type)) 185 switch (fm.key->addr_type) { 186 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 187 ipv = 4; 188 break; 189 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 190 ipv = 6; 191 break; 192 default: 193 break; 194 } 195 196 if (fm.mask->flags & FLOW_DIS_IS_FRAGMENT) { 197 match->value.ip_frag = fm.key->flags & FLOW_DIS_IS_FRAGMENT; 198 match->mask.ip_frag = true; 199 } 200 if (fm.mask->flags & FLOW_DIS_FIRST_FRAG) { 201 match->value.ip_firstfrag = fm.key->flags & FLOW_DIS_FIRST_FRAG; 202 match->mask.ip_firstfrag = true; 203 } 204 if (fm.mask->flags & ~(FLOW_DIS_IS_FRAGMENT | FLOW_DIS_FIRST_FRAG)) { 205 NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported match on control.flags %#x", 206 fm.mask->flags); 207 return -EOPNOTSUPP; 208 } 209 } 210 if (dissector->used_keys & 211 ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | 212 BIT(FLOW_DISSECTOR_KEY_BASIC) | 213 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | 214 BIT(FLOW_DISSECTOR_KEY_VLAN) | 215 BIT(FLOW_DISSECTOR_KEY_CVLAN) | 216 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | 217 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | 218 BIT(FLOW_DISSECTOR_KEY_PORTS) | 219 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | 220 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | 221 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | 222 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | 223 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | 224 BIT(FLOW_DISSECTOR_KEY_TCP) | 225 BIT(FLOW_DISSECTOR_KEY_IP))) { 226 NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported flower keys %#x", 227 dissector->used_keys); 228 return -EOPNOTSUPP; 229 } 230 231 MAP_KEY_AND_MASK(BASIC, basic, n_proto, eth_proto); 232 /* Make sure we're IP if any L3/L4 keys used. */ 233 if (!IS_ALL_ONES(match->mask.eth_proto) || 234 !(match->value.eth_proto == htons(ETH_P_IP) || 235 match->value.eth_proto == htons(ETH_P_IPV6))) 236 if (dissector->used_keys & 237 (BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | 238 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | 239 BIT(FLOW_DISSECTOR_KEY_PORTS) | 240 BIT(FLOW_DISSECTOR_KEY_IP) | 241 BIT(FLOW_DISSECTOR_KEY_TCP))) { 242 NL_SET_ERR_MSG_FMT_MOD(extack, "L3/L4 flower keys %#x require protocol ipv[46]", 243 dissector->used_keys); 244 return -EINVAL; 245 } 246 247 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { 248 struct flow_match_vlan fm; 249 250 flow_rule_match_vlan(rule, &fm); 251 if (fm.mask->vlan_id || fm.mask->vlan_priority || fm.mask->vlan_tpid) { 252 match->value.vlan_proto[0] = fm.key->vlan_tpid; 253 match->mask.vlan_proto[0] = fm.mask->vlan_tpid; 254 match->value.vlan_tci[0] = cpu_to_be16(fm.key->vlan_priority << 13 | 255 fm.key->vlan_id); 256 match->mask.vlan_tci[0] = cpu_to_be16(fm.mask->vlan_priority << 13 | 257 fm.mask->vlan_id); 258 } 259 } 260 261 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { 262 struct flow_match_vlan fm; 263 264 flow_rule_match_cvlan(rule, &fm); 265 if (fm.mask->vlan_id || fm.mask->vlan_priority || fm.mask->vlan_tpid) { 266 match->value.vlan_proto[1] = fm.key->vlan_tpid; 267 match->mask.vlan_proto[1] = fm.mask->vlan_tpid; 268 match->value.vlan_tci[1] = cpu_to_be16(fm.key->vlan_priority << 13 | 269 fm.key->vlan_id); 270 match->mask.vlan_tci[1] = cpu_to_be16(fm.mask->vlan_priority << 13 | 271 fm.mask->vlan_id); 272 } 273 } 274 275 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { 276 struct flow_match_eth_addrs fm; 277 278 flow_rule_match_eth_addrs(rule, &fm); 279 ether_addr_copy(match->value.eth_saddr, fm.key->src); 280 ether_addr_copy(match->value.eth_daddr, fm.key->dst); 281 ether_addr_copy(match->mask.eth_saddr, fm.mask->src); 282 ether_addr_copy(match->mask.eth_daddr, fm.mask->dst); 283 } 284 285 MAP_KEY_AND_MASK(BASIC, basic, ip_proto, ip_proto); 286 /* Make sure we're TCP/UDP if any L4 keys used. */ 287 if ((match->value.ip_proto != IPPROTO_UDP && 288 match->value.ip_proto != IPPROTO_TCP) || !IS_ALL_ONES(match->mask.ip_proto)) 289 if (dissector->used_keys & 290 (BIT(FLOW_DISSECTOR_KEY_PORTS) | 291 BIT(FLOW_DISSECTOR_KEY_TCP))) { 292 NL_SET_ERR_MSG_FMT_MOD(extack, "L4 flower keys %#x require ipproto udp or tcp", 293 dissector->used_keys); 294 return -EINVAL; 295 } 296 MAP_KEY_AND_MASK(IP, ip, tos, ip_tos); 297 MAP_KEY_AND_MASK(IP, ip, ttl, ip_ttl); 298 if (ipv == 4) { 299 MAP_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, src, src_ip); 300 MAP_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, dst, dst_ip); 301 } 302 #ifdef CONFIG_IPV6 303 else if (ipv == 6) { 304 MAP_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, src, src_ip6); 305 MAP_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, dst, dst_ip6); 306 } 307 #endif 308 MAP_KEY_AND_MASK(PORTS, ports, src, l4_sport); 309 MAP_KEY_AND_MASK(PORTS, ports, dst, l4_dport); 310 MAP_KEY_AND_MASK(TCP, tcp, flags, tcp_flags); 311 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { 312 struct flow_match_control fm; 313 314 flow_rule_match_enc_control(rule, &fm); 315 if (fm.mask->flags) { 316 NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported match on enc_control.flags %#x", 317 fm.mask->flags); 318 return -EOPNOTSUPP; 319 } 320 if (!IS_ALL_ONES(fm.mask->addr_type)) { 321 NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported enc addr_type mask %u (key %u)", 322 fm.mask->addr_type, 323 fm.key->addr_type); 324 return -EOPNOTSUPP; 325 } 326 switch (fm.key->addr_type) { 327 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 328 MAP_ENC_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, enc_ipv4_addrs, 329 src, enc_src_ip); 330 MAP_ENC_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, enc_ipv4_addrs, 331 dst, enc_dst_ip); 332 break; 333 #ifdef CONFIG_IPV6 334 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 335 MAP_ENC_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, enc_ipv6_addrs, 336 src, enc_src_ip6); 337 MAP_ENC_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, enc_ipv6_addrs, 338 dst, enc_dst_ip6); 339 break; 340 #endif 341 default: 342 NL_SET_ERR_MSG_FMT_MOD(extack, 343 "Unsupported enc addr_type %u (supported are IPv4, IPv6)", 344 fm.key->addr_type); 345 return -EOPNOTSUPP; 346 } 347 MAP_ENC_KEY_AND_MASK(IP, ip, enc_ip, tos, enc_ip_tos); 348 MAP_ENC_KEY_AND_MASK(IP, ip, enc_ip, ttl, enc_ip_ttl); 349 MAP_ENC_KEY_AND_MASK(PORTS, ports, enc_ports, src, enc_sport); 350 MAP_ENC_KEY_AND_MASK(PORTS, ports, enc_ports, dst, enc_dport); 351 MAP_ENC_KEY_AND_MASK(KEYID, enc_keyid, enc_keyid, keyid, enc_keyid); 352 } else if (dissector->used_keys & 353 (BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | 354 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | 355 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | 356 BIT(FLOW_DISSECTOR_KEY_ENC_IP) | 357 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS))) { 358 NL_SET_ERR_MSG_FMT_MOD(extack, "Flower enc keys require enc_control (keys: %#x)", 359 dissector->used_keys); 360 return -EOPNOTSUPP; 361 } 362 363 return 0; 364 } 365 366 static int efx_tc_flower_record_encap_match(struct efx_nic *efx, 367 struct efx_tc_match *match, 368 enum efx_encap_type type, 369 struct netlink_ext_ack *extack) 370 { 371 struct efx_tc_encap_match *encap, *old; 372 bool ipv6 = false; 373 int rc; 374 375 /* We require that the socket-defining fields (IP addrs and UDP dest 376 * port) are present and exact-match. Other fields are currently not 377 * allowed. This meets what OVS will ask for, and means that we don't 378 * need to handle difficult checks for overlapping matches as could 379 * come up if we allowed masks or varying sets of match fields. 380 */ 381 if (match->mask.enc_dst_ip | match->mask.enc_src_ip) { 382 if (!IS_ALL_ONES(match->mask.enc_dst_ip)) { 383 NL_SET_ERR_MSG_MOD(extack, 384 "Egress encap match is not exact on dst IP address"); 385 return -EOPNOTSUPP; 386 } 387 if (!IS_ALL_ONES(match->mask.enc_src_ip)) { 388 NL_SET_ERR_MSG_MOD(extack, 389 "Egress encap match is not exact on src IP address"); 390 return -EOPNOTSUPP; 391 } 392 #ifdef CONFIG_IPV6 393 if (!ipv6_addr_any(&match->mask.enc_dst_ip6) || 394 !ipv6_addr_any(&match->mask.enc_src_ip6)) { 395 NL_SET_ERR_MSG_MOD(extack, 396 "Egress encap match on both IPv4 and IPv6, don't understand"); 397 return -EOPNOTSUPP; 398 } 399 } else { 400 ipv6 = true; 401 if (!efx_ipv6_addr_all_ones(&match->mask.enc_dst_ip6)) { 402 NL_SET_ERR_MSG_MOD(extack, 403 "Egress encap match is not exact on dst IP address"); 404 return -EOPNOTSUPP; 405 } 406 if (!efx_ipv6_addr_all_ones(&match->mask.enc_src_ip6)) { 407 NL_SET_ERR_MSG_MOD(extack, 408 "Egress encap match is not exact on src IP address"); 409 return -EOPNOTSUPP; 410 } 411 #endif 412 } 413 if (!IS_ALL_ONES(match->mask.enc_dport)) { 414 NL_SET_ERR_MSG_MOD(extack, "Egress encap match is not exact on dst UDP port"); 415 return -EOPNOTSUPP; 416 } 417 if (match->mask.enc_sport) { 418 NL_SET_ERR_MSG_MOD(extack, "Egress encap match on src UDP port not supported"); 419 return -EOPNOTSUPP; 420 } 421 if (match->mask.enc_ip_tos) { 422 NL_SET_ERR_MSG_MOD(extack, "Egress encap match on IP ToS not supported"); 423 return -EOPNOTSUPP; 424 } 425 if (match->mask.enc_ip_ttl) { 426 NL_SET_ERR_MSG_MOD(extack, "Egress encap match on IP TTL not supported"); 427 return -EOPNOTSUPP; 428 } 429 430 rc = efx_mae_check_encap_match_caps(efx, ipv6, extack); 431 if (rc) { 432 NL_SET_ERR_MSG_FMT_MOD(extack, "MAE hw reports no support for IPv%d encap matches", 433 ipv6 ? 6 : 4); 434 return -EOPNOTSUPP; 435 } 436 437 encap = kzalloc(sizeof(*encap), GFP_USER); 438 if (!encap) 439 return -ENOMEM; 440 encap->src_ip = match->value.enc_src_ip; 441 encap->dst_ip = match->value.enc_dst_ip; 442 #ifdef CONFIG_IPV6 443 encap->src_ip6 = match->value.enc_src_ip6; 444 encap->dst_ip6 = match->value.enc_dst_ip6; 445 #endif 446 encap->udp_dport = match->value.enc_dport; 447 encap->tun_type = type; 448 old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_match_ht, 449 &encap->linkage, 450 efx_tc_encap_match_ht_params); 451 if (old) { 452 /* don't need our new entry */ 453 kfree(encap); 454 if (old->tun_type != type) { 455 NL_SET_ERR_MSG_FMT_MOD(extack, 456 "Egress encap match with conflicting tun_type %u != %u", 457 old->tun_type, type); 458 return -EEXIST; 459 } 460 if (!refcount_inc_not_zero(&old->ref)) 461 return -EAGAIN; 462 /* existing entry found */ 463 encap = old; 464 } else { 465 rc = efx_mae_register_encap_match(efx, encap); 466 if (rc) { 467 NL_SET_ERR_MSG_MOD(extack, "Failed to record egress encap match in HW"); 468 goto fail; 469 } 470 refcount_set(&encap->ref, 1); 471 } 472 match->encap = encap; 473 return 0; 474 fail: 475 rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage, 476 efx_tc_encap_match_ht_params); 477 kfree(encap); 478 return rc; 479 } 480 481 static void efx_tc_flower_release_encap_match(struct efx_nic *efx, 482 struct efx_tc_encap_match *encap) 483 { 484 int rc; 485 486 if (!refcount_dec_and_test(&encap->ref)) 487 return; /* still in use */ 488 489 rc = efx_mae_unregister_encap_match(efx, encap); 490 if (rc) 491 /* Display message but carry on and remove entry from our 492 * SW tables, because there's not much we can do about it. 493 */ 494 netif_err(efx, drv, efx->net_dev, 495 "Failed to release encap match %#x, rc %d\n", 496 encap->fw_id, rc); 497 rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage, 498 efx_tc_encap_match_ht_params); 499 kfree(encap); 500 } 501 502 static void efx_tc_delete_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule) 503 { 504 efx_mae_delete_rule(efx, rule->fw_id); 505 506 /* Release entries in subsidiary tables */ 507 efx_tc_free_action_set_list(efx, &rule->acts, true); 508 if (rule->match.encap) 509 efx_tc_flower_release_encap_match(efx, rule->match.encap); 510 rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL; 511 } 512 513 static const char *efx_tc_encap_type_name(enum efx_encap_type typ) 514 { 515 switch (typ) { 516 case EFX_ENCAP_TYPE_NONE: 517 return "none"; 518 case EFX_ENCAP_TYPE_VXLAN: 519 return "vxlan"; 520 case EFX_ENCAP_TYPE_GENEVE: 521 return "geneve"; 522 default: 523 pr_warn_once("Unknown efx_encap_type %d encountered\n", typ); 524 return "unknown"; 525 } 526 } 527 528 /* For details of action order constraints refer to SF-123102-TC-1§12.6.1 */ 529 enum efx_tc_action_order { 530 EFX_TC_AO_DECAP, 531 EFX_TC_AO_VLAN_POP, 532 EFX_TC_AO_VLAN_PUSH, 533 EFX_TC_AO_COUNT, 534 EFX_TC_AO_DELIVER 535 }; 536 /* Determine whether we can add @new action without violating order */ 537 static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act, 538 enum efx_tc_action_order new) 539 { 540 switch (new) { 541 case EFX_TC_AO_DECAP: 542 if (act->decap) 543 return false; 544 fallthrough; 545 case EFX_TC_AO_VLAN_POP: 546 if (act->vlan_pop >= 2) 547 return false; 548 /* If we've already pushed a VLAN, we can't then pop it; 549 * the hardware would instead try to pop an existing VLAN 550 * before pushing the new one. 551 */ 552 if (act->vlan_push) 553 return false; 554 fallthrough; 555 case EFX_TC_AO_VLAN_PUSH: 556 if (act->vlan_push >= 2) 557 return false; 558 fallthrough; 559 case EFX_TC_AO_COUNT: 560 if (act->count) 561 return false; 562 fallthrough; 563 case EFX_TC_AO_DELIVER: 564 return !act->deliver; 565 default: 566 /* Bad caller. Whatever they wanted to do, say they can't. */ 567 WARN_ON_ONCE(1); 568 return false; 569 } 570 } 571 572 static int efx_tc_flower_replace_foreign(struct efx_nic *efx, 573 struct net_device *net_dev, 574 struct flow_cls_offload *tc) 575 { 576 struct flow_rule *fr = flow_cls_offload_flow_rule(tc); 577 struct netlink_ext_ack *extack = tc->common.extack; 578 struct efx_tc_flow_rule *rule = NULL, *old = NULL; 579 struct efx_tc_action_set *act = NULL; 580 bool found = false, uplinked = false; 581 const struct flow_action_entry *fa; 582 struct efx_tc_match match; 583 struct efx_rep *to_efv; 584 s64 rc; 585 int i; 586 587 /* Parse match */ 588 memset(&match, 0, sizeof(match)); 589 rc = efx_tc_flower_parse_match(efx, fr, &match, NULL); 590 if (rc) 591 return rc; 592 /* The rule as given to us doesn't specify a source netdevice. 593 * But, determining whether packets from a VF should match it is 594 * complicated, so leave those to the software slowpath: qualify 595 * the filter with source m-port == wire. 596 */ 597 rc = efx_tc_flower_external_mport(efx, EFX_EFV_PF); 598 if (rc < 0) { 599 NL_SET_ERR_MSG_MOD(extack, "Failed to identify ingress m-port for foreign filter"); 600 return rc; 601 } 602 match.value.ingress_port = rc; 603 match.mask.ingress_port = ~0; 604 605 if (tc->common.chain_index) { 606 NL_SET_ERR_MSG_MOD(extack, "No support for nonzero chain_index"); 607 return -EOPNOTSUPP; 608 } 609 match.mask.recirc_id = 0xff; 610 611 flow_action_for_each(i, fa, &fr->action) { 612 switch (fa->id) { 613 case FLOW_ACTION_REDIRECT: 614 case FLOW_ACTION_MIRRED: /* mirred means mirror here */ 615 to_efv = efx_tc_flower_lookup_efv(efx, fa->dev); 616 if (IS_ERR(to_efv)) 617 continue; 618 found = true; 619 break; 620 default: 621 break; 622 } 623 } 624 if (!found) { /* We don't care. */ 625 netif_dbg(efx, drv, efx->net_dev, 626 "Ignoring foreign filter that doesn't egdev us\n"); 627 rc = -EOPNOTSUPP; 628 goto release; 629 } 630 631 rc = efx_mae_match_check_caps(efx, &match.mask, NULL); 632 if (rc) 633 goto release; 634 635 if (efx_tc_match_is_encap(&match.mask)) { 636 enum efx_encap_type type; 637 638 type = efx_tc_indr_netdev_type(net_dev); 639 if (type == EFX_ENCAP_TYPE_NONE) { 640 NL_SET_ERR_MSG_MOD(extack, 641 "Egress encap match on unsupported tunnel device"); 642 rc = -EOPNOTSUPP; 643 goto release; 644 } 645 646 rc = efx_mae_check_encap_type_supported(efx, type); 647 if (rc) { 648 NL_SET_ERR_MSG_FMT_MOD(extack, 649 "Firmware reports no support for %s encap match", 650 efx_tc_encap_type_name(type)); 651 goto release; 652 } 653 654 rc = efx_tc_flower_record_encap_match(efx, &match, type, 655 extack); 656 if (rc) 657 goto release; 658 } else { 659 /* This is not a tunnel decap rule, ignore it */ 660 netif_dbg(efx, drv, efx->net_dev, 661 "Ignoring foreign filter without encap match\n"); 662 rc = -EOPNOTSUPP; 663 goto release; 664 } 665 666 rule = kzalloc(sizeof(*rule), GFP_USER); 667 if (!rule) { 668 rc = -ENOMEM; 669 goto release; 670 } 671 INIT_LIST_HEAD(&rule->acts.list); 672 rule->cookie = tc->cookie; 673 old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht, 674 &rule->linkage, 675 efx_tc_match_action_ht_params); 676 if (old) { 677 netif_dbg(efx, drv, efx->net_dev, 678 "Ignoring already-offloaded rule (cookie %lx)\n", 679 tc->cookie); 680 rc = -EEXIST; 681 goto release; 682 } 683 684 act = kzalloc(sizeof(*act), GFP_USER); 685 if (!act) { 686 rc = -ENOMEM; 687 goto release; 688 } 689 690 /* Parse actions. For foreign rules we only support decap & redirect. 691 * See corresponding code in efx_tc_flower_replace() for theory of 692 * operation & how 'act' cursor is used. 693 */ 694 flow_action_for_each(i, fa, &fr->action) { 695 struct efx_tc_action_set save; 696 697 switch (fa->id) { 698 case FLOW_ACTION_REDIRECT: 699 case FLOW_ACTION_MIRRED: 700 /* See corresponding code in efx_tc_flower_replace() for 701 * long explanations of what's going on here. 702 */ 703 save = *act; 704 if (fa->hw_stats) { 705 struct efx_tc_counter_index *ctr; 706 707 if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) { 708 NL_SET_ERR_MSG_FMT_MOD(extack, 709 "hw_stats_type %u not supported (only 'delayed')", 710 fa->hw_stats); 711 rc = -EOPNOTSUPP; 712 goto release; 713 } 714 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_COUNT)) { 715 rc = -EOPNOTSUPP; 716 goto release; 717 } 718 719 ctr = efx_tc_flower_get_counter_index(efx, 720 tc->cookie, 721 EFX_TC_COUNTER_TYPE_AR); 722 if (IS_ERR(ctr)) { 723 rc = PTR_ERR(ctr); 724 NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter"); 725 goto release; 726 } 727 act->count = ctr; 728 } 729 730 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) { 731 /* can't happen */ 732 rc = -EOPNOTSUPP; 733 NL_SET_ERR_MSG_MOD(extack, 734 "Deliver action violates action order (can't happen)"); 735 goto release; 736 } 737 to_efv = efx_tc_flower_lookup_efv(efx, fa->dev); 738 /* PF implies egdev is us, in which case we really 739 * want to deliver to the uplink (because this is an 740 * ingress filter). If we don't recognise the egdev 741 * at all, then we'd better trap so SW can handle it. 742 */ 743 if (IS_ERR(to_efv)) 744 to_efv = EFX_EFV_PF; 745 if (to_efv == EFX_EFV_PF) { 746 if (uplinked) 747 break; 748 uplinked = true; 749 } 750 rc = efx_tc_flower_internal_mport(efx, to_efv); 751 if (rc < 0) { 752 NL_SET_ERR_MSG_MOD(extack, "Failed to identify egress m-port"); 753 goto release; 754 } 755 act->dest_mport = rc; 756 act->deliver = 1; 757 rc = efx_mae_alloc_action_set(efx, act); 758 if (rc) { 759 NL_SET_ERR_MSG_MOD(extack, 760 "Failed to write action set to hw (mirred)"); 761 goto release; 762 } 763 list_add_tail(&act->list, &rule->acts.list); 764 act = NULL; 765 if (fa->id == FLOW_ACTION_REDIRECT) 766 break; /* end of the line */ 767 /* Mirror, so continue on with saved act */ 768 act = kzalloc(sizeof(*act), GFP_USER); 769 if (!act) { 770 rc = -ENOMEM; 771 goto release; 772 } 773 *act = save; 774 break; 775 case FLOW_ACTION_TUNNEL_DECAP: 776 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DECAP)) { 777 rc = -EINVAL; 778 NL_SET_ERR_MSG_MOD(extack, "Decap action violates action order"); 779 goto release; 780 } 781 act->decap = 1; 782 /* If we previously delivered/trapped to uplink, now 783 * that we've decapped we'll want another copy if we 784 * try to deliver/trap to uplink again. 785 */ 786 uplinked = false; 787 break; 788 default: 789 NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u", 790 fa->id); 791 rc = -EOPNOTSUPP; 792 goto release; 793 } 794 } 795 796 if (act) { 797 if (!uplinked) { 798 /* Not shot/redirected, so deliver to default dest (which is 799 * the uplink, as this is an ingress filter) 800 */ 801 efx_mae_mport_uplink(efx, &act->dest_mport); 802 act->deliver = 1; 803 } 804 rc = efx_mae_alloc_action_set(efx, act); 805 if (rc) { 806 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (deliver)"); 807 goto release; 808 } 809 list_add_tail(&act->list, &rule->acts.list); 810 act = NULL; /* Prevent double-free in error path */ 811 } 812 813 rule->match = match; 814 815 netif_dbg(efx, drv, efx->net_dev, 816 "Successfully parsed foreign filter (cookie %lx)\n", 817 tc->cookie); 818 819 rc = efx_mae_alloc_action_set_list(efx, &rule->acts); 820 if (rc) { 821 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw"); 822 goto release; 823 } 824 rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC, 825 rule->acts.fw_id, &rule->fw_id); 826 if (rc) { 827 NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw"); 828 goto release_acts; 829 } 830 return 0; 831 832 release_acts: 833 efx_mae_free_action_set_list(efx, &rule->acts); 834 release: 835 /* We failed to insert the rule, so free up any entries we created in 836 * subsidiary tables. 837 */ 838 if (act) 839 efx_tc_free_action_set(efx, act, false); 840 if (rule) { 841 rhashtable_remove_fast(&efx->tc->match_action_ht, 842 &rule->linkage, 843 efx_tc_match_action_ht_params); 844 efx_tc_free_action_set_list(efx, &rule->acts, false); 845 } 846 kfree(rule); 847 if (match.encap) 848 efx_tc_flower_release_encap_match(efx, match.encap); 849 return rc; 850 } 851 852 static int efx_tc_flower_replace(struct efx_nic *efx, 853 struct net_device *net_dev, 854 struct flow_cls_offload *tc, 855 struct efx_rep *efv) 856 { 857 struct flow_rule *fr = flow_cls_offload_flow_rule(tc); 858 struct netlink_ext_ack *extack = tc->common.extack; 859 struct efx_tc_flow_rule *rule = NULL, *old; 860 struct efx_tc_action_set *act = NULL; 861 const struct flow_action_entry *fa; 862 struct efx_rep *from_efv, *to_efv; 863 struct efx_tc_match match; 864 s64 rc; 865 int i; 866 867 if (!tc_can_offload_extack(efx->net_dev, extack)) 868 return -EOPNOTSUPP; 869 if (WARN_ON(!efx->tc)) 870 return -ENETDOWN; 871 if (WARN_ON(!efx->tc->up)) 872 return -ENETDOWN; 873 874 from_efv = efx_tc_flower_lookup_efv(efx, net_dev); 875 if (IS_ERR(from_efv)) { 876 /* Not from our PF or representors, so probably a tunnel dev */ 877 return efx_tc_flower_replace_foreign(efx, net_dev, tc); 878 } 879 880 if (efv != from_efv) { 881 /* can't happen */ 882 NL_SET_ERR_MSG_FMT_MOD(extack, "for %s efv is %snull but from_efv is %snull (can't happen)", 883 netdev_name(net_dev), efv ? "non-" : "", 884 from_efv ? "non-" : ""); 885 return -EINVAL; 886 } 887 888 /* Parse match */ 889 memset(&match, 0, sizeof(match)); 890 rc = efx_tc_flower_external_mport(efx, from_efv); 891 if (rc < 0) { 892 NL_SET_ERR_MSG_MOD(extack, "Failed to identify ingress m-port"); 893 return rc; 894 } 895 match.value.ingress_port = rc; 896 match.mask.ingress_port = ~0; 897 rc = efx_tc_flower_parse_match(efx, fr, &match, extack); 898 if (rc) 899 return rc; 900 if (efx_tc_match_is_encap(&match.mask)) { 901 NL_SET_ERR_MSG_MOD(extack, "Ingress enc_key matches not supported"); 902 rc = -EOPNOTSUPP; 903 goto release; 904 } 905 906 if (tc->common.chain_index) { 907 NL_SET_ERR_MSG_MOD(extack, "No support for nonzero chain_index"); 908 return -EOPNOTSUPP; 909 } 910 match.mask.recirc_id = 0xff; 911 912 rc = efx_mae_match_check_caps(efx, &match.mask, extack); 913 if (rc) 914 return rc; 915 916 rule = kzalloc(sizeof(*rule), GFP_USER); 917 if (!rule) 918 return -ENOMEM; 919 INIT_LIST_HEAD(&rule->acts.list); 920 rule->cookie = tc->cookie; 921 old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht, 922 &rule->linkage, 923 efx_tc_match_action_ht_params); 924 if (old) { 925 netif_dbg(efx, drv, efx->net_dev, 926 "Already offloaded rule (cookie %lx)\n", tc->cookie); 927 rc = -EEXIST; 928 NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded"); 929 goto release; 930 } 931 932 /* Parse actions */ 933 act = kzalloc(sizeof(*act), GFP_USER); 934 if (!act) { 935 rc = -ENOMEM; 936 goto release; 937 } 938 939 /** 940 * DOC: TC action translation 941 * 942 * Actions in TC are sequential and cumulative, with delivery actions 943 * potentially anywhere in the order. The EF100 MAE, however, takes 944 * an 'action set list' consisting of 'action sets', each of which is 945 * applied to the _original_ packet, and consists of a set of optional 946 * actions in a fixed order with delivery at the end. 947 * To translate between these two models, we maintain a 'cursor', @act, 948 * which describes the cumulative effect of all the packet-mutating 949 * actions encountered so far; on handling a delivery (mirred or drop) 950 * action, once the action-set has been inserted into hardware, we 951 * append @act to the action-set list (@rule->acts); if this is a pipe 952 * action (mirred mirror) we then allocate a new @act with a copy of 953 * the cursor state _before_ the delivery action, otherwise we set @act 954 * to %NULL. 955 * This ensures that every allocated action-set is either attached to 956 * @rule->acts or pointed to by @act (and never both), and that only 957 * those action-sets in @rule->acts exist in hardware. Consequently, 958 * in the failure path, @act only needs to be freed in memory, whereas 959 * for @rule->acts we remove each action-set from hardware before 960 * freeing it (efx_tc_free_action_set_list()), even if the action-set 961 * list itself is not in hardware. 962 */ 963 flow_action_for_each(i, fa, &fr->action) { 964 struct efx_tc_action_set save; 965 u16 tci; 966 967 if (!act) { 968 /* more actions after a non-pipe action */ 969 NL_SET_ERR_MSG_MOD(extack, "Action follows non-pipe action"); 970 rc = -EINVAL; 971 goto release; 972 } 973 974 if ((fa->id == FLOW_ACTION_REDIRECT || 975 fa->id == FLOW_ACTION_MIRRED || 976 fa->id == FLOW_ACTION_DROP) && fa->hw_stats) { 977 struct efx_tc_counter_index *ctr; 978 979 /* Currently the only actions that want stats are 980 * mirred and gact (ok, shot, trap, goto-chain), which 981 * means we want stats just before delivery. Also, 982 * note that tunnel_key set shouldn't change the length 983 * — it's only the subsequent mirred that does that, 984 * and the stats are taken _before_ the mirred action 985 * happens. 986 */ 987 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_COUNT)) { 988 /* All supported actions that count either steal 989 * (gact shot, mirred redirect) or clone act 990 * (mirred mirror), so we should never get two 991 * count actions on one action_set. 992 */ 993 NL_SET_ERR_MSG_MOD(extack, "Count-action conflict (can't happen)"); 994 rc = -EOPNOTSUPP; 995 goto release; 996 } 997 998 if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) { 999 NL_SET_ERR_MSG_FMT_MOD(extack, "hw_stats_type %u not supported (only 'delayed')", 1000 fa->hw_stats); 1001 rc = -EOPNOTSUPP; 1002 goto release; 1003 } 1004 1005 ctr = efx_tc_flower_get_counter_index(efx, tc->cookie, 1006 EFX_TC_COUNTER_TYPE_AR); 1007 if (IS_ERR(ctr)) { 1008 rc = PTR_ERR(ctr); 1009 NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter"); 1010 goto release; 1011 } 1012 act->count = ctr; 1013 } 1014 1015 switch (fa->id) { 1016 case FLOW_ACTION_DROP: 1017 rc = efx_mae_alloc_action_set(efx, act); 1018 if (rc) { 1019 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (drop)"); 1020 goto release; 1021 } 1022 list_add_tail(&act->list, &rule->acts.list); 1023 act = NULL; /* end of the line */ 1024 break; 1025 case FLOW_ACTION_REDIRECT: 1026 case FLOW_ACTION_MIRRED: 1027 save = *act; 1028 1029 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) { 1030 /* can't happen */ 1031 rc = -EOPNOTSUPP; 1032 NL_SET_ERR_MSG_MOD(extack, "Deliver action violates action order (can't happen)"); 1033 goto release; 1034 } 1035 1036 to_efv = efx_tc_flower_lookup_efv(efx, fa->dev); 1037 if (IS_ERR(to_efv)) { 1038 NL_SET_ERR_MSG_MOD(extack, "Mirred egress device not on switch"); 1039 rc = PTR_ERR(to_efv); 1040 goto release; 1041 } 1042 rc = efx_tc_flower_external_mport(efx, to_efv); 1043 if (rc < 0) { 1044 NL_SET_ERR_MSG_MOD(extack, "Failed to identify egress m-port"); 1045 goto release; 1046 } 1047 act->dest_mport = rc; 1048 act->deliver = 1; 1049 rc = efx_mae_alloc_action_set(efx, act); 1050 if (rc) { 1051 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (mirred)"); 1052 goto release; 1053 } 1054 list_add_tail(&act->list, &rule->acts.list); 1055 act = NULL; 1056 if (fa->id == FLOW_ACTION_REDIRECT) 1057 break; /* end of the line */ 1058 /* Mirror, so continue on with saved act */ 1059 save.count = NULL; 1060 act = kzalloc(sizeof(*act), GFP_USER); 1061 if (!act) { 1062 rc = -ENOMEM; 1063 goto release; 1064 } 1065 *act = save; 1066 break; 1067 case FLOW_ACTION_VLAN_POP: 1068 if (act->vlan_push) { 1069 act->vlan_push--; 1070 } else if (efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_POP)) { 1071 act->vlan_pop++; 1072 } else { 1073 NL_SET_ERR_MSG_MOD(extack, 1074 "More than two VLAN pops, or action order violated"); 1075 rc = -EINVAL; 1076 goto release; 1077 } 1078 break; 1079 case FLOW_ACTION_VLAN_PUSH: 1080 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_PUSH)) { 1081 rc = -EINVAL; 1082 NL_SET_ERR_MSG_MOD(extack, 1083 "More than two VLAN pushes, or action order violated"); 1084 goto release; 1085 } 1086 tci = fa->vlan.vid & VLAN_VID_MASK; 1087 tci |= fa->vlan.prio << VLAN_PRIO_SHIFT; 1088 act->vlan_tci[act->vlan_push] = cpu_to_be16(tci); 1089 act->vlan_proto[act->vlan_push] = fa->vlan.proto; 1090 act->vlan_push++; 1091 break; 1092 default: 1093 NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u", 1094 fa->id); 1095 rc = -EOPNOTSUPP; 1096 goto release; 1097 } 1098 } 1099 1100 if (act) { 1101 /* Not shot/redirected, so deliver to default dest */ 1102 if (from_efv == EFX_EFV_PF) 1103 /* Rule applies to traffic from the wire, 1104 * and default dest is thus the PF 1105 */ 1106 efx_mae_mport_uplink(efx, &act->dest_mport); 1107 else 1108 /* Representor, so rule applies to traffic from 1109 * representee, and default dest is thus the rep. 1110 * All reps use the same mport for delivery 1111 */ 1112 efx_mae_mport_mport(efx, efx->tc->reps_mport_id, 1113 &act->dest_mport); 1114 act->deliver = 1; 1115 rc = efx_mae_alloc_action_set(efx, act); 1116 if (rc) { 1117 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (deliver)"); 1118 goto release; 1119 } 1120 list_add_tail(&act->list, &rule->acts.list); 1121 act = NULL; /* Prevent double-free in error path */ 1122 } 1123 1124 netif_dbg(efx, drv, efx->net_dev, 1125 "Successfully parsed filter (cookie %lx)\n", 1126 tc->cookie); 1127 1128 rule->match = match; 1129 1130 rc = efx_mae_alloc_action_set_list(efx, &rule->acts); 1131 if (rc) { 1132 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw"); 1133 goto release; 1134 } 1135 rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC, 1136 rule->acts.fw_id, &rule->fw_id); 1137 if (rc) { 1138 NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw"); 1139 goto release_acts; 1140 } 1141 return 0; 1142 1143 release_acts: 1144 efx_mae_free_action_set_list(efx, &rule->acts); 1145 release: 1146 /* We failed to insert the rule, so free up any entries we created in 1147 * subsidiary tables. 1148 */ 1149 if (act) 1150 efx_tc_free_action_set(efx, act, false); 1151 if (rule) { 1152 rhashtable_remove_fast(&efx->tc->match_action_ht, 1153 &rule->linkage, 1154 efx_tc_match_action_ht_params); 1155 efx_tc_free_action_set_list(efx, &rule->acts, false); 1156 } 1157 kfree(rule); 1158 return rc; 1159 } 1160 1161 static int efx_tc_flower_destroy(struct efx_nic *efx, 1162 struct net_device *net_dev, 1163 struct flow_cls_offload *tc) 1164 { 1165 struct netlink_ext_ack *extack = tc->common.extack; 1166 struct efx_tc_flow_rule *rule; 1167 1168 rule = rhashtable_lookup_fast(&efx->tc->match_action_ht, &tc->cookie, 1169 efx_tc_match_action_ht_params); 1170 if (!rule) { 1171 /* Only log a message if we're the ingress device. Otherwise 1172 * it's a foreign filter and we might just not have been 1173 * interested (e.g. we might not have been the egress device 1174 * either). 1175 */ 1176 if (!IS_ERR(efx_tc_flower_lookup_efv(efx, net_dev))) 1177 netif_warn(efx, drv, efx->net_dev, 1178 "Filter %lx not found to remove\n", tc->cookie); 1179 NL_SET_ERR_MSG_MOD(extack, "Flow cookie not found in offloaded rules"); 1180 return -ENOENT; 1181 } 1182 1183 /* Remove it from HW */ 1184 efx_tc_delete_rule(efx, rule); 1185 /* Delete it from SW */ 1186 rhashtable_remove_fast(&efx->tc->match_action_ht, &rule->linkage, 1187 efx_tc_match_action_ht_params); 1188 netif_dbg(efx, drv, efx->net_dev, "Removed filter %lx\n", rule->cookie); 1189 kfree(rule); 1190 return 0; 1191 } 1192 1193 static int efx_tc_flower_stats(struct efx_nic *efx, struct net_device *net_dev, 1194 struct flow_cls_offload *tc) 1195 { 1196 struct netlink_ext_ack *extack = tc->common.extack; 1197 struct efx_tc_counter_index *ctr; 1198 struct efx_tc_counter *cnt; 1199 u64 packets, bytes; 1200 1201 ctr = efx_tc_flower_find_counter_index(efx, tc->cookie); 1202 if (!ctr) { 1203 /* See comment in efx_tc_flower_destroy() */ 1204 if (!IS_ERR(efx_tc_flower_lookup_efv(efx, net_dev))) 1205 if (net_ratelimit()) 1206 netif_warn(efx, drv, efx->net_dev, 1207 "Filter %lx not found for stats\n", 1208 tc->cookie); 1209 NL_SET_ERR_MSG_MOD(extack, "Flow cookie not found in offloaded rules"); 1210 return -ENOENT; 1211 } 1212 if (WARN_ON(!ctr->cnt)) /* can't happen */ 1213 return -EIO; 1214 cnt = ctr->cnt; 1215 1216 spin_lock_bh(&cnt->lock); 1217 /* Report only new pkts/bytes since last time TC asked */ 1218 packets = cnt->packets; 1219 bytes = cnt->bytes; 1220 flow_stats_update(&tc->stats, bytes - cnt->old_bytes, 1221 packets - cnt->old_packets, 0, cnt->touched, 1222 FLOW_ACTION_HW_STATS_DELAYED); 1223 cnt->old_packets = packets; 1224 cnt->old_bytes = bytes; 1225 spin_unlock_bh(&cnt->lock); 1226 return 0; 1227 } 1228 1229 int efx_tc_flower(struct efx_nic *efx, struct net_device *net_dev, 1230 struct flow_cls_offload *tc, struct efx_rep *efv) 1231 { 1232 int rc; 1233 1234 if (!efx->tc) 1235 return -EOPNOTSUPP; 1236 1237 mutex_lock(&efx->tc->mutex); 1238 switch (tc->command) { 1239 case FLOW_CLS_REPLACE: 1240 rc = efx_tc_flower_replace(efx, net_dev, tc, efv); 1241 break; 1242 case FLOW_CLS_DESTROY: 1243 rc = efx_tc_flower_destroy(efx, net_dev, tc); 1244 break; 1245 case FLOW_CLS_STATS: 1246 rc = efx_tc_flower_stats(efx, net_dev, tc); 1247 break; 1248 default: 1249 rc = -EOPNOTSUPP; 1250 break; 1251 } 1252 mutex_unlock(&efx->tc->mutex); 1253 return rc; 1254 } 1255 1256 static int efx_tc_configure_default_rule(struct efx_nic *efx, u32 ing_port, 1257 u32 eg_port, struct efx_tc_flow_rule *rule) 1258 { 1259 struct efx_tc_action_set_list *acts = &rule->acts; 1260 struct efx_tc_match *match = &rule->match; 1261 struct efx_tc_action_set *act; 1262 int rc; 1263 1264 match->value.ingress_port = ing_port; 1265 match->mask.ingress_port = ~0; 1266 act = kzalloc(sizeof(*act), GFP_KERNEL); 1267 if (!act) 1268 return -ENOMEM; 1269 act->deliver = 1; 1270 act->dest_mport = eg_port; 1271 rc = efx_mae_alloc_action_set(efx, act); 1272 if (rc) 1273 goto fail1; 1274 EFX_WARN_ON_PARANOID(!list_empty(&acts->list)); 1275 list_add_tail(&act->list, &acts->list); 1276 rc = efx_mae_alloc_action_set_list(efx, acts); 1277 if (rc) 1278 goto fail2; 1279 rc = efx_mae_insert_rule(efx, match, EFX_TC_PRIO_DFLT, 1280 acts->fw_id, &rule->fw_id); 1281 if (rc) 1282 goto fail3; 1283 return 0; 1284 fail3: 1285 efx_mae_free_action_set_list(efx, acts); 1286 fail2: 1287 list_del(&act->list); 1288 efx_mae_free_action_set(efx, act->fw_id); 1289 fail1: 1290 kfree(act); 1291 return rc; 1292 } 1293 1294 static int efx_tc_configure_default_rule_pf(struct efx_nic *efx) 1295 { 1296 struct efx_tc_flow_rule *rule = &efx->tc->dflt.pf; 1297 u32 ing_port, eg_port; 1298 1299 efx_mae_mport_uplink(efx, &ing_port); 1300 efx_mae_mport_wire(efx, &eg_port); 1301 return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule); 1302 } 1303 1304 static int efx_tc_configure_default_rule_wire(struct efx_nic *efx) 1305 { 1306 struct efx_tc_flow_rule *rule = &efx->tc->dflt.wire; 1307 u32 ing_port, eg_port; 1308 1309 efx_mae_mport_wire(efx, &ing_port); 1310 efx_mae_mport_uplink(efx, &eg_port); 1311 return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule); 1312 } 1313 1314 int efx_tc_configure_default_rule_rep(struct efx_rep *efv) 1315 { 1316 struct efx_tc_flow_rule *rule = &efv->dflt; 1317 struct efx_nic *efx = efv->parent; 1318 u32 ing_port, eg_port; 1319 1320 efx_mae_mport_mport(efx, efv->mport, &ing_port); 1321 efx_mae_mport_mport(efx, efx->tc->reps_mport_id, &eg_port); 1322 return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule); 1323 } 1324 1325 void efx_tc_deconfigure_default_rule(struct efx_nic *efx, 1326 struct efx_tc_flow_rule *rule) 1327 { 1328 if (rule->fw_id != MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL) 1329 efx_tc_delete_rule(efx, rule); 1330 rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL; 1331 } 1332 1333 static int efx_tc_configure_rep_mport(struct efx_nic *efx) 1334 { 1335 u32 rep_mport_label; 1336 int rc; 1337 1338 rc = efx_mae_allocate_mport(efx, &efx->tc->reps_mport_id, &rep_mport_label); 1339 if (rc) 1340 return rc; 1341 pci_dbg(efx->pci_dev, "created rep mport 0x%08x (0x%04x)\n", 1342 efx->tc->reps_mport_id, rep_mport_label); 1343 /* Use mport *selector* as vport ID */ 1344 efx_mae_mport_mport(efx, efx->tc->reps_mport_id, 1345 &efx->tc->reps_mport_vport_id); 1346 return 0; 1347 } 1348 1349 static void efx_tc_deconfigure_rep_mport(struct efx_nic *efx) 1350 { 1351 efx_mae_free_mport(efx, efx->tc->reps_mport_id); 1352 efx->tc->reps_mport_id = MAE_MPORT_SELECTOR_NULL; 1353 } 1354 1355 int efx_tc_insert_rep_filters(struct efx_nic *efx) 1356 { 1357 struct efx_filter_spec promisc, allmulti; 1358 int rc; 1359 1360 if (efx->type->is_vf) 1361 return 0; 1362 if (!efx->tc) 1363 return 0; 1364 efx_filter_init_rx(&promisc, EFX_FILTER_PRI_REQUIRED, 0, 0); 1365 efx_filter_set_uc_def(&promisc); 1366 efx_filter_set_vport_id(&promisc, efx->tc->reps_mport_vport_id); 1367 rc = efx_filter_insert_filter(efx, &promisc, false); 1368 if (rc < 0) 1369 return rc; 1370 efx->tc->reps_filter_uc = rc; 1371 efx_filter_init_rx(&allmulti, EFX_FILTER_PRI_REQUIRED, 0, 0); 1372 efx_filter_set_mc_def(&allmulti); 1373 efx_filter_set_vport_id(&allmulti, efx->tc->reps_mport_vport_id); 1374 rc = efx_filter_insert_filter(efx, &allmulti, false); 1375 if (rc < 0) 1376 return rc; 1377 efx->tc->reps_filter_mc = rc; 1378 return 0; 1379 } 1380 1381 void efx_tc_remove_rep_filters(struct efx_nic *efx) 1382 { 1383 if (efx->type->is_vf) 1384 return; 1385 if (!efx->tc) 1386 return; 1387 if (efx->tc->reps_filter_mc >= 0) 1388 efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, efx->tc->reps_filter_mc); 1389 efx->tc->reps_filter_mc = -1; 1390 if (efx->tc->reps_filter_uc >= 0) 1391 efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, efx->tc->reps_filter_uc); 1392 efx->tc->reps_filter_uc = -1; 1393 } 1394 1395 int efx_init_tc(struct efx_nic *efx) 1396 { 1397 int rc; 1398 1399 rc = efx_mae_get_caps(efx, efx->tc->caps); 1400 if (rc) 1401 return rc; 1402 if (efx->tc->caps->match_field_count > MAE_NUM_FIELDS) 1403 /* Firmware supports some match fields the driver doesn't know 1404 * about. Not fatal, unless any of those fields are required 1405 * (MAE_FIELD_SUPPORTED_MATCH_ALWAYS) but if so we don't know. 1406 */ 1407 netif_warn(efx, probe, efx->net_dev, 1408 "FW reports additional match fields %u\n", 1409 efx->tc->caps->match_field_count); 1410 if (efx->tc->caps->action_prios < EFX_TC_PRIO__NUM) { 1411 netif_err(efx, probe, efx->net_dev, 1412 "Too few action prios supported (have %u, need %u)\n", 1413 efx->tc->caps->action_prios, EFX_TC_PRIO__NUM); 1414 return -EIO; 1415 } 1416 rc = efx_tc_configure_default_rule_pf(efx); 1417 if (rc) 1418 return rc; 1419 rc = efx_tc_configure_default_rule_wire(efx); 1420 if (rc) 1421 return rc; 1422 rc = efx_tc_configure_rep_mport(efx); 1423 if (rc) 1424 return rc; 1425 efx->tc->up = true; 1426 rc = flow_indr_dev_register(efx_tc_indr_setup_cb, efx); 1427 if (rc) 1428 return rc; 1429 return 0; 1430 } 1431 1432 void efx_fini_tc(struct efx_nic *efx) 1433 { 1434 /* We can get called even if efx_init_struct_tc() failed */ 1435 if (!efx->tc) 1436 return; 1437 if (efx->tc->up) 1438 flow_indr_dev_unregister(efx_tc_indr_setup_cb, efx, efx_tc_block_unbind); 1439 efx_tc_deconfigure_rep_mport(efx); 1440 efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.pf); 1441 efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.wire); 1442 efx->tc->up = false; 1443 } 1444 1445 /* At teardown time, all TC filter rules (and thus all resources they created) 1446 * should already have been removed. If we find any in our hashtables, make a 1447 * cursory attempt to clean up the software side. 1448 */ 1449 static void efx_tc_encap_match_free(void *ptr, void *__unused) 1450 { 1451 struct efx_tc_encap_match *encap = ptr; 1452 1453 WARN_ON(refcount_read(&encap->ref)); 1454 kfree(encap); 1455 } 1456 1457 int efx_init_struct_tc(struct efx_nic *efx) 1458 { 1459 int rc; 1460 1461 if (efx->type->is_vf) 1462 return 0; 1463 1464 efx->tc = kzalloc(sizeof(*efx->tc), GFP_KERNEL); 1465 if (!efx->tc) 1466 return -ENOMEM; 1467 efx->tc->caps = kzalloc(sizeof(struct mae_caps), GFP_KERNEL); 1468 if (!efx->tc->caps) { 1469 rc = -ENOMEM; 1470 goto fail_alloc_caps; 1471 } 1472 INIT_LIST_HEAD(&efx->tc->block_list); 1473 1474 mutex_init(&efx->tc->mutex); 1475 init_waitqueue_head(&efx->tc->flush_wq); 1476 rc = efx_tc_init_counters(efx); 1477 if (rc < 0) 1478 goto fail_counters; 1479 rc = rhashtable_init(&efx->tc->encap_match_ht, &efx_tc_encap_match_ht_params); 1480 if (rc < 0) 1481 goto fail_encap_match_ht; 1482 rc = rhashtable_init(&efx->tc->match_action_ht, &efx_tc_match_action_ht_params); 1483 if (rc < 0) 1484 goto fail_match_action_ht; 1485 efx->tc->reps_filter_uc = -1; 1486 efx->tc->reps_filter_mc = -1; 1487 INIT_LIST_HEAD(&efx->tc->dflt.pf.acts.list); 1488 efx->tc->dflt.pf.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL; 1489 INIT_LIST_HEAD(&efx->tc->dflt.wire.acts.list); 1490 efx->tc->dflt.wire.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL; 1491 efx->extra_channel_type[EFX_EXTRA_CHANNEL_TC] = &efx_tc_channel_type; 1492 return 0; 1493 fail_match_action_ht: 1494 rhashtable_destroy(&efx->tc->encap_match_ht); 1495 fail_encap_match_ht: 1496 efx_tc_destroy_counters(efx); 1497 fail_counters: 1498 mutex_destroy(&efx->tc->mutex); 1499 kfree(efx->tc->caps); 1500 fail_alloc_caps: 1501 kfree(efx->tc); 1502 efx->tc = NULL; 1503 return rc; 1504 } 1505 1506 void efx_fini_struct_tc(struct efx_nic *efx) 1507 { 1508 if (!efx->tc) 1509 return; 1510 1511 mutex_lock(&efx->tc->mutex); 1512 EFX_WARN_ON_PARANOID(efx->tc->dflt.pf.fw_id != 1513 MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL); 1514 EFX_WARN_ON_PARANOID(efx->tc->dflt.wire.fw_id != 1515 MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL); 1516 rhashtable_free_and_destroy(&efx->tc->match_action_ht, efx_tc_flow_free, 1517 efx); 1518 rhashtable_free_and_destroy(&efx->tc->encap_match_ht, 1519 efx_tc_encap_match_free, NULL); 1520 efx_tc_fini_counters(efx); 1521 mutex_unlock(&efx->tc->mutex); 1522 mutex_destroy(&efx->tc->mutex); 1523 kfree(efx->tc->caps); 1524 kfree(efx->tc); 1525 efx->tc = NULL; 1526 } 1527