1 // SPDX-License-Identifier: GPL-2.0-only 2 /**************************************************************************** 3 * Driver for Solarflare network controllers and boards 4 * Copyright 2019 Solarflare Communications Inc. 5 * Copyright 2020-2022 Xilinx Inc. 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 as published 9 * by the Free Software Foundation, incorporated herein by reference. 10 */ 11 12 #include <net/pkt_cls.h> 13 #include <net/vxlan.h> 14 #include <net/geneve.h> 15 #include <net/tc_act/tc_ct.h> 16 #include "tc.h" 17 #include "tc_bindings.h" 18 #include "tc_encap_actions.h" 19 #include "tc_conntrack.h" 20 #include "mae.h" 21 #include "ef100_rep.h" 22 #include "efx.h" 23 24 enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev) 25 { 26 if (netif_is_vxlan(net_dev)) 27 return EFX_ENCAP_TYPE_VXLAN; 28 if (netif_is_geneve(net_dev)) 29 return EFX_ENCAP_TYPE_GENEVE; 30 31 return EFX_ENCAP_TYPE_NONE; 32 } 33 34 #define EFX_TC_HDR_TYPE_TTL_MASK ((u32)0xff) 35 /* Hoplimit is stored in the most significant byte in the pedit ipv6 header action */ 36 #define EFX_TC_HDR_TYPE_HLIMIT_MASK ~((u32)0xff000000) 37 #define EFX_EFV_PF NULL 38 /* Look up the representor information (efv) for a device. 39 * May return NULL for the PF (us), or an error pointer for a device that 40 * isn't supported as a TC offload endpoint 41 */ 42 struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx, 43 struct net_device *dev) 44 { 45 struct efx_rep *efv; 46 47 if (!dev) 48 return ERR_PTR(-EOPNOTSUPP); 49 /* Is it us (the PF)? */ 50 if (dev == efx->net_dev) 51 return EFX_EFV_PF; 52 /* Is it an efx vfrep at all? */ 53 if (dev->netdev_ops != &efx_ef100_rep_netdev_ops) 54 return ERR_PTR(-EOPNOTSUPP); 55 /* Is it ours? We don't support TC rules that include another 56 * EF100's netdevices (not even on another port of the same NIC). 57 */ 58 efv = netdev_priv(dev); 59 if (efv->parent != efx) 60 return ERR_PTR(-EOPNOTSUPP); 61 return efv; 62 } 63 64 /* Convert a driver-internal vport ID into an internal device (PF or VF) */ 65 static s64 efx_tc_flower_internal_mport(struct efx_nic *efx, struct efx_rep *efv) 66 { 67 u32 mport; 68 69 if (IS_ERR(efv)) 70 return PTR_ERR(efv); 71 if (!efv) /* device is PF (us) */ 72 efx_mae_mport_uplink(efx, &mport); 73 else /* device is repr */ 74 efx_mae_mport_mport(efx, efv->mport, &mport); 75 return mport; 76 } 77 78 /* Convert a driver-internal vport ID into an external device (wire or VF) */ 79 s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv) 80 { 81 u32 mport; 82 83 if (IS_ERR(efv)) 84 return PTR_ERR(efv); 85 if (!efv) /* device is PF (us) */ 86 efx_mae_mport_wire(efx, &mport); 87 else /* device is repr */ 88 efx_mae_mport_mport(efx, efv->mport, &mport); 89 return mport; 90 } 91 92 static const struct rhashtable_params efx_tc_mac_ht_params = { 93 .key_len = offsetofend(struct efx_tc_mac_pedit_action, h_addr), 94 .key_offset = 0, 95 .head_offset = offsetof(struct efx_tc_mac_pedit_action, linkage), 96 }; 97 98 static const struct rhashtable_params efx_tc_encap_match_ht_params = { 99 .key_len = offsetof(struct efx_tc_encap_match, linkage), 100 .key_offset = 0, 101 .head_offset = offsetof(struct efx_tc_encap_match, linkage), 102 }; 103 104 static const struct rhashtable_params efx_tc_match_action_ht_params = { 105 .key_len = sizeof(unsigned long), 106 .key_offset = offsetof(struct efx_tc_flow_rule, cookie), 107 .head_offset = offsetof(struct efx_tc_flow_rule, linkage), 108 }; 109 110 static const struct rhashtable_params efx_tc_lhs_rule_ht_params = { 111 .key_len = sizeof(unsigned long), 112 .key_offset = offsetof(struct efx_tc_lhs_rule, cookie), 113 .head_offset = offsetof(struct efx_tc_lhs_rule, linkage), 114 }; 115 116 static const struct rhashtable_params efx_tc_recirc_ht_params = { 117 .key_len = offsetof(struct efx_tc_recirc_id, linkage), 118 .key_offset = 0, 119 .head_offset = offsetof(struct efx_tc_recirc_id, linkage), 120 }; 121 122 static struct efx_tc_mac_pedit_action *efx_tc_flower_get_mac(struct efx_nic *efx, 123 unsigned char h_addr[ETH_ALEN], 124 struct netlink_ext_ack *extack) 125 { 126 struct efx_tc_mac_pedit_action *ped, *old; 127 int rc; 128 129 ped = kzalloc(sizeof(*ped), GFP_USER); 130 if (!ped) 131 return ERR_PTR(-ENOMEM); 132 memcpy(ped->h_addr, h_addr, ETH_ALEN); 133 old = rhashtable_lookup_get_insert_fast(&efx->tc->mac_ht, 134 &ped->linkage, 135 efx_tc_mac_ht_params); 136 if (old) { 137 /* don't need our new entry */ 138 kfree(ped); 139 if (!refcount_inc_not_zero(&old->ref)) 140 return ERR_PTR(-EAGAIN); 141 /* existing entry found, ref taken */ 142 return old; 143 } 144 145 rc = efx_mae_allocate_pedit_mac(efx, ped); 146 if (rc < 0) { 147 NL_SET_ERR_MSG_MOD(extack, "Failed to store pedit MAC address in hw"); 148 goto out_remove; 149 } 150 151 /* ref and return */ 152 refcount_set(&ped->ref, 1); 153 return ped; 154 out_remove: 155 rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage, 156 efx_tc_mac_ht_params); 157 kfree(ped); 158 return ERR_PTR(rc); 159 } 160 161 static void efx_tc_flower_put_mac(struct efx_nic *efx, 162 struct efx_tc_mac_pedit_action *ped) 163 { 164 if (!refcount_dec_and_test(&ped->ref)) 165 return; /* still in use */ 166 rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage, 167 efx_tc_mac_ht_params); 168 efx_mae_free_pedit_mac(efx, ped); 169 kfree(ped); 170 } 171 172 static void efx_tc_free_action_set(struct efx_nic *efx, 173 struct efx_tc_action_set *act, bool in_hw) 174 { 175 /* Failure paths calling this on the 'cursor' action set in_hw=false, 176 * because if the alloc had succeeded we'd've put it in acts.list and 177 * not still have it in act. 178 */ 179 if (in_hw) { 180 efx_mae_free_action_set(efx, act->fw_id); 181 /* in_hw is true iff we are on an acts.list; make sure to 182 * remove ourselves from that list before we are freed. 183 */ 184 list_del(&act->list); 185 } 186 if (act->count) { 187 spin_lock_bh(&act->count->cnt->lock); 188 if (!list_empty(&act->count_user)) 189 list_del(&act->count_user); 190 spin_unlock_bh(&act->count->cnt->lock); 191 efx_tc_flower_put_counter_index(efx, act->count); 192 } 193 if (act->encap_md) { 194 list_del(&act->encap_user); 195 efx_tc_flower_release_encap_md(efx, act->encap_md); 196 } 197 if (act->src_mac) 198 efx_tc_flower_put_mac(efx, act->src_mac); 199 if (act->dst_mac) 200 efx_tc_flower_put_mac(efx, act->dst_mac); 201 kfree(act); 202 } 203 204 static void efx_tc_free_action_set_list(struct efx_nic *efx, 205 struct efx_tc_action_set_list *acts, 206 bool in_hw) 207 { 208 struct efx_tc_action_set *act, *next; 209 210 /* Failure paths set in_hw=false, because usually the acts didn't get 211 * to efx_mae_alloc_action_set_list(); if they did, the failure tree 212 * has a separate efx_mae_free_action_set_list() before calling us. 213 */ 214 if (in_hw) 215 efx_mae_free_action_set_list(efx, acts); 216 /* Any act that's on the list will be in_hw even if the list isn't */ 217 list_for_each_entry_safe(act, next, &acts->list, list) 218 efx_tc_free_action_set(efx, act, true); 219 /* Don't kfree, as acts is embedded inside a struct efx_tc_flow_rule */ 220 } 221 222 /* Boilerplate for the simple 'copy a field' cases */ 223 #define _MAP_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field) \ 224 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_##_name)) { \ 225 struct flow_match_##_type fm; \ 226 \ 227 flow_rule_match_##_tcget(rule, &fm); \ 228 match->value._field = fm.key->_tcfield; \ 229 match->mask._field = fm.mask->_tcfield; \ 230 } 231 #define MAP_KEY_AND_MASK(_name, _type, _tcfield, _field) \ 232 _MAP_KEY_AND_MASK(_name, _type, _type, _tcfield, _field) 233 #define MAP_ENC_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field) \ 234 _MAP_KEY_AND_MASK(ENC_##_name, _type, _tcget, _tcfield, _field) 235 236 static int efx_tc_flower_parse_match(struct efx_nic *efx, 237 struct flow_rule *rule, 238 struct efx_tc_match *match, 239 struct netlink_ext_ack *extack) 240 { 241 struct flow_dissector *dissector = rule->match.dissector; 242 unsigned char ipv = 0; 243 244 /* Owing to internal TC infelicities, the IPV6_ADDRS key might be set 245 * even on IPv4 filters; so rather than relying on dissector->used_keys 246 * we check the addr_type in the CONTROL key. If we don't find it (or 247 * it's masked, which should never happen), we treat both IPV4_ADDRS 248 * and IPV6_ADDRS as absent. 249 */ 250 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { 251 struct flow_match_control fm; 252 253 flow_rule_match_control(rule, &fm); 254 if (IS_ALL_ONES(fm.mask->addr_type)) 255 switch (fm.key->addr_type) { 256 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 257 ipv = 4; 258 break; 259 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 260 ipv = 6; 261 break; 262 default: 263 break; 264 } 265 266 if (fm.mask->flags & FLOW_DIS_IS_FRAGMENT) { 267 match->value.ip_frag = fm.key->flags & FLOW_DIS_IS_FRAGMENT; 268 match->mask.ip_frag = true; 269 } 270 if (fm.mask->flags & FLOW_DIS_FIRST_FRAG) { 271 match->value.ip_firstfrag = fm.key->flags & FLOW_DIS_FIRST_FRAG; 272 match->mask.ip_firstfrag = true; 273 } 274 if (fm.mask->flags & ~(FLOW_DIS_IS_FRAGMENT | FLOW_DIS_FIRST_FRAG)) { 275 NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported match on control.flags %#x", 276 fm.mask->flags); 277 return -EOPNOTSUPP; 278 } 279 } 280 if (dissector->used_keys & 281 ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) | 282 BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) | 283 BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) | 284 BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) | 285 BIT_ULL(FLOW_DISSECTOR_KEY_CVLAN) | 286 BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | 287 BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | 288 BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) | 289 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) | 290 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | 291 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | 292 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) | 293 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) | 294 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL) | 295 BIT_ULL(FLOW_DISSECTOR_KEY_CT) | 296 BIT_ULL(FLOW_DISSECTOR_KEY_TCP) | 297 BIT_ULL(FLOW_DISSECTOR_KEY_IP))) { 298 NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported flower keys %#llx", 299 dissector->used_keys); 300 return -EOPNOTSUPP; 301 } 302 303 MAP_KEY_AND_MASK(BASIC, basic, n_proto, eth_proto); 304 /* Make sure we're IP if any L3/L4 keys used. */ 305 if (!IS_ALL_ONES(match->mask.eth_proto) || 306 !(match->value.eth_proto == htons(ETH_P_IP) || 307 match->value.eth_proto == htons(ETH_P_IPV6))) 308 if (dissector->used_keys & 309 (BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | 310 BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | 311 BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) | 312 BIT_ULL(FLOW_DISSECTOR_KEY_IP) | 313 BIT_ULL(FLOW_DISSECTOR_KEY_TCP))) { 314 NL_SET_ERR_MSG_FMT_MOD(extack, 315 "L3/L4 flower keys %#llx require protocol ipv[46]", 316 dissector->used_keys); 317 return -EINVAL; 318 } 319 320 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { 321 struct flow_match_vlan fm; 322 323 flow_rule_match_vlan(rule, &fm); 324 if (fm.mask->vlan_id || fm.mask->vlan_priority || fm.mask->vlan_tpid) { 325 match->value.vlan_proto[0] = fm.key->vlan_tpid; 326 match->mask.vlan_proto[0] = fm.mask->vlan_tpid; 327 match->value.vlan_tci[0] = cpu_to_be16(fm.key->vlan_priority << 13 | 328 fm.key->vlan_id); 329 match->mask.vlan_tci[0] = cpu_to_be16(fm.mask->vlan_priority << 13 | 330 fm.mask->vlan_id); 331 } 332 } 333 334 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { 335 struct flow_match_vlan fm; 336 337 flow_rule_match_cvlan(rule, &fm); 338 if (fm.mask->vlan_id || fm.mask->vlan_priority || fm.mask->vlan_tpid) { 339 match->value.vlan_proto[1] = fm.key->vlan_tpid; 340 match->mask.vlan_proto[1] = fm.mask->vlan_tpid; 341 match->value.vlan_tci[1] = cpu_to_be16(fm.key->vlan_priority << 13 | 342 fm.key->vlan_id); 343 match->mask.vlan_tci[1] = cpu_to_be16(fm.mask->vlan_priority << 13 | 344 fm.mask->vlan_id); 345 } 346 } 347 348 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { 349 struct flow_match_eth_addrs fm; 350 351 flow_rule_match_eth_addrs(rule, &fm); 352 ether_addr_copy(match->value.eth_saddr, fm.key->src); 353 ether_addr_copy(match->value.eth_daddr, fm.key->dst); 354 ether_addr_copy(match->mask.eth_saddr, fm.mask->src); 355 ether_addr_copy(match->mask.eth_daddr, fm.mask->dst); 356 } 357 358 MAP_KEY_AND_MASK(BASIC, basic, ip_proto, ip_proto); 359 /* Make sure we're TCP/UDP if any L4 keys used. */ 360 if ((match->value.ip_proto != IPPROTO_UDP && 361 match->value.ip_proto != IPPROTO_TCP) || !IS_ALL_ONES(match->mask.ip_proto)) 362 if (dissector->used_keys & 363 (BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) | 364 BIT_ULL(FLOW_DISSECTOR_KEY_TCP))) { 365 NL_SET_ERR_MSG_FMT_MOD(extack, 366 "L4 flower keys %#llx require ipproto udp or tcp", 367 dissector->used_keys); 368 return -EINVAL; 369 } 370 MAP_KEY_AND_MASK(IP, ip, tos, ip_tos); 371 MAP_KEY_AND_MASK(IP, ip, ttl, ip_ttl); 372 if (ipv == 4) { 373 MAP_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, src, src_ip); 374 MAP_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, dst, dst_ip); 375 } 376 #ifdef CONFIG_IPV6 377 else if (ipv == 6) { 378 MAP_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, src, src_ip6); 379 MAP_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, dst, dst_ip6); 380 } 381 #endif 382 MAP_KEY_AND_MASK(PORTS, ports, src, l4_sport); 383 MAP_KEY_AND_MASK(PORTS, ports, dst, l4_dport); 384 MAP_KEY_AND_MASK(TCP, tcp, flags, tcp_flags); 385 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { 386 struct flow_match_control fm; 387 388 flow_rule_match_enc_control(rule, &fm); 389 if (fm.mask->flags) { 390 NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported match on enc_control.flags %#x", 391 fm.mask->flags); 392 return -EOPNOTSUPP; 393 } 394 if (!IS_ALL_ONES(fm.mask->addr_type)) { 395 NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported enc addr_type mask %u (key %u)", 396 fm.mask->addr_type, 397 fm.key->addr_type); 398 return -EOPNOTSUPP; 399 } 400 switch (fm.key->addr_type) { 401 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 402 MAP_ENC_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, enc_ipv4_addrs, 403 src, enc_src_ip); 404 MAP_ENC_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, enc_ipv4_addrs, 405 dst, enc_dst_ip); 406 break; 407 #ifdef CONFIG_IPV6 408 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 409 MAP_ENC_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, enc_ipv6_addrs, 410 src, enc_src_ip6); 411 MAP_ENC_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, enc_ipv6_addrs, 412 dst, enc_dst_ip6); 413 break; 414 #endif 415 default: 416 NL_SET_ERR_MSG_FMT_MOD(extack, 417 "Unsupported enc addr_type %u (supported are IPv4, IPv6)", 418 fm.key->addr_type); 419 return -EOPNOTSUPP; 420 } 421 MAP_ENC_KEY_AND_MASK(IP, ip, enc_ip, tos, enc_ip_tos); 422 MAP_ENC_KEY_AND_MASK(IP, ip, enc_ip, ttl, enc_ip_ttl); 423 MAP_ENC_KEY_AND_MASK(PORTS, ports, enc_ports, src, enc_sport); 424 MAP_ENC_KEY_AND_MASK(PORTS, ports, enc_ports, dst, enc_dport); 425 MAP_ENC_KEY_AND_MASK(KEYID, enc_keyid, enc_keyid, keyid, enc_keyid); 426 } else if (dissector->used_keys & 427 (BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) | 428 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | 429 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | 430 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) | 431 BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS))) { 432 NL_SET_ERR_MSG_FMT_MOD(extack, 433 "Flower enc keys require enc_control (keys: %#llx)", 434 dissector->used_keys); 435 return -EOPNOTSUPP; 436 } 437 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) { 438 struct flow_match_ct fm; 439 440 flow_rule_match_ct(rule, &fm); 441 match->value.ct_state_trk = !!(fm.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED); 442 match->mask.ct_state_trk = !!(fm.mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED); 443 match->value.ct_state_est = !!(fm.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED); 444 match->mask.ct_state_est = !!(fm.mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED); 445 if (fm.mask->ct_state & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED | 446 TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED)) { 447 NL_SET_ERR_MSG_FMT_MOD(extack, 448 "Unsupported ct_state match %#x", 449 fm.mask->ct_state); 450 return -EOPNOTSUPP; 451 } 452 match->value.ct_mark = fm.key->ct_mark; 453 match->mask.ct_mark = fm.mask->ct_mark; 454 match->value.ct_zone = fm.key->ct_zone; 455 match->mask.ct_zone = fm.mask->ct_zone; 456 457 if (memchr_inv(fm.mask->ct_labels, 0, sizeof(fm.mask->ct_labels))) { 458 NL_SET_ERR_MSG_MOD(extack, "Matching on ct_label not supported"); 459 return -EOPNOTSUPP; 460 } 461 } 462 463 return 0; 464 } 465 466 static void efx_tc_flower_release_encap_match(struct efx_nic *efx, 467 struct efx_tc_encap_match *encap) 468 { 469 int rc; 470 471 if (!refcount_dec_and_test(&encap->ref)) 472 return; /* still in use */ 473 474 if (encap->type == EFX_TC_EM_DIRECT) { 475 rc = efx_mae_unregister_encap_match(efx, encap); 476 if (rc) 477 /* Display message but carry on and remove entry from our 478 * SW tables, because there's not much we can do about it. 479 */ 480 netif_err(efx, drv, efx->net_dev, 481 "Failed to release encap match %#x, rc %d\n", 482 encap->fw_id, rc); 483 } 484 rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage, 485 efx_tc_encap_match_ht_params); 486 if (encap->pseudo) 487 efx_tc_flower_release_encap_match(efx, encap->pseudo); 488 kfree(encap); 489 } 490 491 static int efx_tc_flower_record_encap_match(struct efx_nic *efx, 492 struct efx_tc_match *match, 493 enum efx_encap_type type, 494 enum efx_tc_em_pseudo_type em_type, 495 u8 child_ip_tos_mask, 496 __be16 child_udp_sport_mask, 497 struct netlink_ext_ack *extack) 498 { 499 struct efx_tc_encap_match *encap, *old, *pseudo = NULL; 500 bool ipv6 = false; 501 int rc; 502 503 /* We require that the socket-defining fields (IP addrs and UDP dest 504 * port) are present and exact-match. Other fields may only be used 505 * if the field-set (and any masks) are the same for all encap 506 * matches on the same <sip,dip,dport> tuple; this is enforced by 507 * pseudo encap matches. 508 */ 509 if (match->mask.enc_dst_ip | match->mask.enc_src_ip) { 510 if (!IS_ALL_ONES(match->mask.enc_dst_ip)) { 511 NL_SET_ERR_MSG_MOD(extack, 512 "Egress encap match is not exact on dst IP address"); 513 return -EOPNOTSUPP; 514 } 515 if (!IS_ALL_ONES(match->mask.enc_src_ip)) { 516 NL_SET_ERR_MSG_MOD(extack, 517 "Egress encap match is not exact on src IP address"); 518 return -EOPNOTSUPP; 519 } 520 #ifdef CONFIG_IPV6 521 if (!ipv6_addr_any(&match->mask.enc_dst_ip6) || 522 !ipv6_addr_any(&match->mask.enc_src_ip6)) { 523 NL_SET_ERR_MSG_MOD(extack, 524 "Egress encap match on both IPv4 and IPv6, don't understand"); 525 return -EOPNOTSUPP; 526 } 527 } else { 528 ipv6 = true; 529 if (!efx_ipv6_addr_all_ones(&match->mask.enc_dst_ip6)) { 530 NL_SET_ERR_MSG_MOD(extack, 531 "Egress encap match is not exact on dst IP address"); 532 return -EOPNOTSUPP; 533 } 534 if (!efx_ipv6_addr_all_ones(&match->mask.enc_src_ip6)) { 535 NL_SET_ERR_MSG_MOD(extack, 536 "Egress encap match is not exact on src IP address"); 537 return -EOPNOTSUPP; 538 } 539 #endif 540 } 541 if (!IS_ALL_ONES(match->mask.enc_dport)) { 542 NL_SET_ERR_MSG_MOD(extack, "Egress encap match is not exact on dst UDP port"); 543 return -EOPNOTSUPP; 544 } 545 if (match->mask.enc_sport || match->mask.enc_ip_tos) { 546 struct efx_tc_match pmatch = *match; 547 548 if (em_type == EFX_TC_EM_PSEUDO_MASK) { /* can't happen */ 549 NL_SET_ERR_MSG_MOD(extack, "Bad recursion in egress encap match handler"); 550 return -EOPNOTSUPP; 551 } 552 pmatch.value.enc_ip_tos = 0; 553 pmatch.mask.enc_ip_tos = 0; 554 pmatch.value.enc_sport = 0; 555 pmatch.mask.enc_sport = 0; 556 rc = efx_tc_flower_record_encap_match(efx, &pmatch, type, 557 EFX_TC_EM_PSEUDO_MASK, 558 match->mask.enc_ip_tos, 559 match->mask.enc_sport, 560 extack); 561 if (rc) 562 return rc; 563 pseudo = pmatch.encap; 564 } 565 if (match->mask.enc_ip_ttl) { 566 NL_SET_ERR_MSG_MOD(extack, "Egress encap match on IP TTL not supported"); 567 rc = -EOPNOTSUPP; 568 goto fail_pseudo; 569 } 570 571 rc = efx_mae_check_encap_match_caps(efx, ipv6, match->mask.enc_ip_tos, 572 match->mask.enc_sport, extack); 573 if (rc) 574 goto fail_pseudo; 575 576 encap = kzalloc(sizeof(*encap), GFP_USER); 577 if (!encap) { 578 rc = -ENOMEM; 579 goto fail_pseudo; 580 } 581 encap->src_ip = match->value.enc_src_ip; 582 encap->dst_ip = match->value.enc_dst_ip; 583 #ifdef CONFIG_IPV6 584 encap->src_ip6 = match->value.enc_src_ip6; 585 encap->dst_ip6 = match->value.enc_dst_ip6; 586 #endif 587 encap->udp_dport = match->value.enc_dport; 588 encap->tun_type = type; 589 encap->ip_tos = match->value.enc_ip_tos; 590 encap->ip_tos_mask = match->mask.enc_ip_tos; 591 encap->child_ip_tos_mask = child_ip_tos_mask; 592 encap->udp_sport = match->value.enc_sport; 593 encap->udp_sport_mask = match->mask.enc_sport; 594 encap->child_udp_sport_mask = child_udp_sport_mask; 595 encap->type = em_type; 596 encap->pseudo = pseudo; 597 old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_match_ht, 598 &encap->linkage, 599 efx_tc_encap_match_ht_params); 600 if (old) { 601 /* don't need our new entry */ 602 kfree(encap); 603 if (pseudo) /* don't need our new pseudo either */ 604 efx_tc_flower_release_encap_match(efx, pseudo); 605 /* check old and new em_types are compatible */ 606 switch (old->type) { 607 case EFX_TC_EM_DIRECT: 608 /* old EM is in hardware, so mustn't overlap with a 609 * pseudo, but may be shared with another direct EM 610 */ 611 if (em_type == EFX_TC_EM_DIRECT) 612 break; 613 NL_SET_ERR_MSG_MOD(extack, "Pseudo encap match conflicts with existing direct entry"); 614 return -EEXIST; 615 case EFX_TC_EM_PSEUDO_MASK: 616 /* old EM is protecting a ToS- or src port-qualified 617 * filter, so may only be shared with another pseudo 618 * for the same ToS and src port masks. 619 */ 620 if (em_type != EFX_TC_EM_PSEUDO_MASK) { 621 NL_SET_ERR_MSG_FMT_MOD(extack, 622 "%s encap match conflicts with existing pseudo(MASK) entry", 623 em_type ? "Pseudo" : "Direct"); 624 return -EEXIST; 625 } 626 if (child_ip_tos_mask != old->child_ip_tos_mask) { 627 NL_SET_ERR_MSG_FMT_MOD(extack, 628 "Pseudo encap match for TOS mask %#04x conflicts with existing pseudo(MASK) entry for TOS mask %#04x", 629 child_ip_tos_mask, 630 old->child_ip_tos_mask); 631 return -EEXIST; 632 } 633 if (child_udp_sport_mask != old->child_udp_sport_mask) { 634 NL_SET_ERR_MSG_FMT_MOD(extack, 635 "Pseudo encap match for UDP src port mask %#x conflicts with existing pseudo(MASK) entry for mask %#x", 636 child_udp_sport_mask, 637 old->child_udp_sport_mask); 638 return -EEXIST; 639 } 640 break; 641 default: /* Unrecognised pseudo-type. Just say no */ 642 NL_SET_ERR_MSG_FMT_MOD(extack, 643 "%s encap match conflicts with existing pseudo(%d) entry", 644 em_type ? "Pseudo" : "Direct", 645 old->type); 646 return -EEXIST; 647 } 648 /* check old and new tun_types are compatible */ 649 if (old->tun_type != type) { 650 NL_SET_ERR_MSG_FMT_MOD(extack, 651 "Egress encap match with conflicting tun_type %u != %u", 652 old->tun_type, type); 653 return -EEXIST; 654 } 655 if (!refcount_inc_not_zero(&old->ref)) 656 return -EAGAIN; 657 /* existing entry found */ 658 encap = old; 659 } else { 660 if (em_type == EFX_TC_EM_DIRECT) { 661 rc = efx_mae_register_encap_match(efx, encap); 662 if (rc) { 663 NL_SET_ERR_MSG_MOD(extack, "Failed to record egress encap match in HW"); 664 goto fail; 665 } 666 } 667 refcount_set(&encap->ref, 1); 668 } 669 match->encap = encap; 670 return 0; 671 fail: 672 rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage, 673 efx_tc_encap_match_ht_params); 674 kfree(encap); 675 fail_pseudo: 676 if (pseudo) 677 efx_tc_flower_release_encap_match(efx, pseudo); 678 return rc; 679 } 680 681 static struct efx_tc_recirc_id *efx_tc_get_recirc_id(struct efx_nic *efx, 682 u32 chain_index, 683 struct net_device *net_dev) 684 { 685 struct efx_tc_recirc_id *rid, *old; 686 int rc; 687 688 rid = kzalloc(sizeof(*rid), GFP_USER); 689 if (!rid) 690 return ERR_PTR(-ENOMEM); 691 rid->chain_index = chain_index; 692 /* We don't take a reference here, because it's implied - if there's 693 * a rule on the net_dev that's been offloaded to us, then the net_dev 694 * can't go away until the rule has been deoffloaded. 695 */ 696 rid->net_dev = net_dev; 697 old = rhashtable_lookup_get_insert_fast(&efx->tc->recirc_ht, 698 &rid->linkage, 699 efx_tc_recirc_ht_params); 700 if (old) { 701 /* don't need our new entry */ 702 kfree(rid); 703 if (!refcount_inc_not_zero(&old->ref)) 704 return ERR_PTR(-EAGAIN); 705 /* existing entry found */ 706 rid = old; 707 } else { 708 rc = ida_alloc_range(&efx->tc->recirc_ida, 1, U8_MAX, GFP_USER); 709 if (rc < 0) { 710 rhashtable_remove_fast(&efx->tc->recirc_ht, 711 &rid->linkage, 712 efx_tc_recirc_ht_params); 713 kfree(rid); 714 return ERR_PTR(rc); 715 } 716 rid->fw_id = rc; 717 refcount_set(&rid->ref, 1); 718 } 719 return rid; 720 } 721 722 static void efx_tc_put_recirc_id(struct efx_nic *efx, struct efx_tc_recirc_id *rid) 723 { 724 if (!refcount_dec_and_test(&rid->ref)) 725 return; /* still in use */ 726 rhashtable_remove_fast(&efx->tc->recirc_ht, &rid->linkage, 727 efx_tc_recirc_ht_params); 728 ida_free(&efx->tc->recirc_ida, rid->fw_id); 729 kfree(rid); 730 } 731 732 static void efx_tc_delete_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule) 733 { 734 efx_mae_delete_rule(efx, rule->fw_id); 735 736 /* Release entries in subsidiary tables */ 737 efx_tc_free_action_set_list(efx, &rule->acts, true); 738 if (rule->match.rid) 739 efx_tc_put_recirc_id(efx, rule->match.rid); 740 if (rule->match.encap) 741 efx_tc_flower_release_encap_match(efx, rule->match.encap); 742 rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL; 743 } 744 745 static const char *efx_tc_encap_type_name(enum efx_encap_type typ) 746 { 747 switch (typ) { 748 case EFX_ENCAP_TYPE_NONE: 749 return "none"; 750 case EFX_ENCAP_TYPE_VXLAN: 751 return "vxlan"; 752 case EFX_ENCAP_TYPE_GENEVE: 753 return "geneve"; 754 default: 755 pr_warn_once("Unknown efx_encap_type %d encountered\n", typ); 756 return "unknown"; 757 } 758 } 759 760 /* For details of action order constraints refer to SF-123102-TC-1§12.6.1 */ 761 enum efx_tc_action_order { 762 EFX_TC_AO_DECAP, 763 EFX_TC_AO_DEC_TTL, 764 EFX_TC_AO_PEDIT_MAC_ADDRS, 765 EFX_TC_AO_VLAN_POP, 766 EFX_TC_AO_VLAN_PUSH, 767 EFX_TC_AO_COUNT, 768 EFX_TC_AO_ENCAP, 769 EFX_TC_AO_DELIVER 770 }; 771 /* Determine whether we can add @new action without violating order */ 772 static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act, 773 enum efx_tc_action_order new) 774 { 775 switch (new) { 776 case EFX_TC_AO_DECAP: 777 if (act->decap) 778 return false; 779 /* PEDIT_MAC_ADDRS must not happen before DECAP, though it 780 * can wait until much later 781 */ 782 if (act->dst_mac || act->src_mac) 783 return false; 784 785 /* Decrementing ttl must not happen before DECAP */ 786 if (act->do_ttl_dec) 787 return false; 788 fallthrough; 789 case EFX_TC_AO_VLAN_POP: 790 if (act->vlan_pop >= 2) 791 return false; 792 /* If we've already pushed a VLAN, we can't then pop it; 793 * the hardware would instead try to pop an existing VLAN 794 * before pushing the new one. 795 */ 796 if (act->vlan_push) 797 return false; 798 fallthrough; 799 case EFX_TC_AO_VLAN_PUSH: 800 if (act->vlan_push >= 2) 801 return false; 802 fallthrough; 803 case EFX_TC_AO_COUNT: 804 if (act->count) 805 return false; 806 fallthrough; 807 case EFX_TC_AO_PEDIT_MAC_ADDRS: 808 case EFX_TC_AO_ENCAP: 809 if (act->encap_md) 810 return false; 811 fallthrough; 812 case EFX_TC_AO_DELIVER: 813 return !act->deliver; 814 case EFX_TC_AO_DEC_TTL: 815 if (act->encap_md) 816 return false; 817 return !act->do_ttl_dec; 818 default: 819 /* Bad caller. Whatever they wanted to do, say they can't. */ 820 WARN_ON_ONCE(1); 821 return false; 822 } 823 } 824 825 /** 826 * DOC: TC conntrack sequences 827 * 828 * The MAE hardware can handle at most two rounds of action rule matching, 829 * consequently we support conntrack through the notion of a "left-hand side 830 * rule". This is a rule which typically contains only the actions "ct" and 831 * "goto chain N", and corresponds to one or more "right-hand side rules" in 832 * chain N, which typically match on +trk+est, and may perform ct(nat) actions. 833 * RHS rules go in the Action Rule table as normal but with a nonzero recirc_id 834 * (the hardware equivalent of chain_index), while LHS rules may go in either 835 * the Action Rule or the Outer Rule table, the latter being preferred for 836 * performance reasons, and set both DO_CT and a recirc_id in their response. 837 * 838 * Besides the RHS rules, there are often also similar rules matching on 839 * +trk+new which perform the ct(commit) action. These are not offloaded. 840 */ 841 842 static bool efx_tc_rule_is_lhs_rule(struct flow_rule *fr, 843 struct efx_tc_match *match) 844 { 845 const struct flow_action_entry *fa; 846 int i; 847 848 flow_action_for_each(i, fa, &fr->action) { 849 switch (fa->id) { 850 case FLOW_ACTION_GOTO: 851 return true; 852 case FLOW_ACTION_CT: 853 /* If rule is -trk, or doesn't mention trk at all, then 854 * a CT action implies a conntrack lookup (hence it's an 855 * LHS rule). If rule is +trk, then a CT action could 856 * just be ct(nat) or even ct(commit) (though the latter 857 * can't be offloaded). 858 */ 859 if (!match->mask.ct_state_trk || !match->value.ct_state_trk) 860 return true; 861 break; 862 default: 863 break; 864 } 865 } 866 return false; 867 } 868 869 static int efx_tc_flower_handle_lhs_actions(struct efx_nic *efx, 870 struct flow_cls_offload *tc, 871 struct flow_rule *fr, 872 struct net_device *net_dev, 873 struct efx_tc_lhs_rule *rule) 874 875 { 876 struct netlink_ext_ack *extack = tc->common.extack; 877 struct efx_tc_lhs_action *act = &rule->lhs_act; 878 const struct flow_action_entry *fa; 879 bool pipe = true; 880 int i; 881 882 flow_action_for_each(i, fa, &fr->action) { 883 struct efx_tc_ct_zone *ct_zone; 884 struct efx_tc_recirc_id *rid; 885 886 if (!pipe) { 887 /* more actions after a non-pipe action */ 888 NL_SET_ERR_MSG_MOD(extack, "Action follows non-pipe action"); 889 return -EINVAL; 890 } 891 switch (fa->id) { 892 case FLOW_ACTION_GOTO: 893 if (!fa->chain_index) { 894 NL_SET_ERR_MSG_MOD(extack, "Can't goto chain 0, no looping in hw"); 895 return -EOPNOTSUPP; 896 } 897 rid = efx_tc_get_recirc_id(efx, fa->chain_index, 898 net_dev); 899 if (IS_ERR(rid)) { 900 NL_SET_ERR_MSG_MOD(extack, "Failed to allocate a hardware recirculation ID for this chain_index"); 901 return PTR_ERR(rid); 902 } 903 act->rid = rid; 904 if (fa->hw_stats) { 905 struct efx_tc_counter_index *cnt; 906 907 if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) { 908 NL_SET_ERR_MSG_FMT_MOD(extack, 909 "hw_stats_type %u not supported (only 'delayed')", 910 fa->hw_stats); 911 return -EOPNOTSUPP; 912 } 913 cnt = efx_tc_flower_get_counter_index(efx, tc->cookie, 914 EFX_TC_COUNTER_TYPE_OR); 915 if (IS_ERR(cnt)) { 916 NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter"); 917 return PTR_ERR(cnt); 918 } 919 WARN_ON(act->count); /* can't happen */ 920 act->count = cnt; 921 } 922 pipe = false; 923 break; 924 case FLOW_ACTION_CT: 925 if (act->zone) { 926 NL_SET_ERR_MSG_MOD(extack, "Can't offload multiple ct actions"); 927 return -EOPNOTSUPP; 928 } 929 if (fa->ct.action & (TCA_CT_ACT_COMMIT | 930 TCA_CT_ACT_FORCE)) { 931 NL_SET_ERR_MSG_MOD(extack, "Can't offload ct commit/force"); 932 return -EOPNOTSUPP; 933 } 934 if (fa->ct.action & TCA_CT_ACT_CLEAR) { 935 NL_SET_ERR_MSG_MOD(extack, "Can't clear ct in LHS rule"); 936 return -EOPNOTSUPP; 937 } 938 if (fa->ct.action & (TCA_CT_ACT_NAT | 939 TCA_CT_ACT_NAT_SRC | 940 TCA_CT_ACT_NAT_DST)) { 941 NL_SET_ERR_MSG_MOD(extack, "Can't perform NAT in LHS rule - packet isn't conntracked yet"); 942 return -EOPNOTSUPP; 943 } 944 if (fa->ct.action) { 945 NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled ct.action %u for LHS rule\n", 946 fa->ct.action); 947 return -EOPNOTSUPP; 948 } 949 ct_zone = efx_tc_ct_register_zone(efx, fa->ct.zone, 950 fa->ct.flow_table); 951 if (IS_ERR(ct_zone)) { 952 NL_SET_ERR_MSG_MOD(extack, "Failed to register for CT updates"); 953 return PTR_ERR(ct_zone); 954 } 955 act->zone = ct_zone; 956 break; 957 default: 958 NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u for LHS rule\n", 959 fa->id); 960 return -EOPNOTSUPP; 961 } 962 } 963 964 if (pipe) { 965 NL_SET_ERR_MSG_MOD(extack, "Missing goto chain in LHS rule"); 966 return -EOPNOTSUPP; 967 } 968 return 0; 969 } 970 971 static void efx_tc_flower_release_lhs_actions(struct efx_nic *efx, 972 struct efx_tc_lhs_action *act) 973 { 974 if (act->rid) 975 efx_tc_put_recirc_id(efx, act->rid); 976 if (act->zone) 977 efx_tc_ct_unregister_zone(efx, act->zone); 978 if (act->count) 979 efx_tc_flower_put_counter_index(efx, act->count); 980 } 981 982 /** 983 * struct efx_tc_mangler_state - accumulates 32-bit pedits into fields 984 * 985 * @dst_mac_32: dst_mac[0:3] has been populated 986 * @dst_mac_16: dst_mac[4:5] has been populated 987 * @src_mac_16: src_mac[0:1] has been populated 988 * @src_mac_32: src_mac[2:5] has been populated 989 * @dst_mac: h_dest field of ethhdr 990 * @src_mac: h_source field of ethhdr 991 * 992 * Since FLOW_ACTION_MANGLE comes in 32-bit chunks that do not 993 * necessarily equate to whole fields of the packet header, this 994 * structure is used to hold the cumulative effect of the partial 995 * field pedits that have been processed so far. 996 */ 997 struct efx_tc_mangler_state { 998 u8 dst_mac_32:1; /* eth->h_dest[0:3] */ 999 u8 dst_mac_16:1; /* eth->h_dest[4:5] */ 1000 u8 src_mac_16:1; /* eth->h_source[0:1] */ 1001 u8 src_mac_32:1; /* eth->h_source[2:5] */ 1002 unsigned char dst_mac[ETH_ALEN]; 1003 unsigned char src_mac[ETH_ALEN]; 1004 }; 1005 1006 /** efx_tc_complete_mac_mangle() - pull complete field pedits out of @mung 1007 * @efx: NIC we're installing a flow rule on 1008 * @act: action set (cursor) to update 1009 * @mung: accumulated partial mangles 1010 * @extack: netlink extended ack for reporting errors 1011 * 1012 * Check @mung to find any combinations of partial mangles that can be 1013 * combined into a complete packet field edit, add that edit to @act, 1014 * and consume the partial mangles from @mung. 1015 */ 1016 1017 static int efx_tc_complete_mac_mangle(struct efx_nic *efx, 1018 struct efx_tc_action_set *act, 1019 struct efx_tc_mangler_state *mung, 1020 struct netlink_ext_ack *extack) 1021 { 1022 struct efx_tc_mac_pedit_action *ped; 1023 1024 if (mung->dst_mac_32 && mung->dst_mac_16) { 1025 ped = efx_tc_flower_get_mac(efx, mung->dst_mac, extack); 1026 if (IS_ERR(ped)) 1027 return PTR_ERR(ped); 1028 1029 /* Check that we have not already populated dst_mac */ 1030 if (act->dst_mac) 1031 efx_tc_flower_put_mac(efx, act->dst_mac); 1032 1033 act->dst_mac = ped; 1034 1035 /* consume the incomplete state */ 1036 mung->dst_mac_32 = 0; 1037 mung->dst_mac_16 = 0; 1038 } 1039 if (mung->src_mac_16 && mung->src_mac_32) { 1040 ped = efx_tc_flower_get_mac(efx, mung->src_mac, extack); 1041 if (IS_ERR(ped)) 1042 return PTR_ERR(ped); 1043 1044 /* Check that we have not already populated src_mac */ 1045 if (act->src_mac) 1046 efx_tc_flower_put_mac(efx, act->src_mac); 1047 1048 act->src_mac = ped; 1049 1050 /* consume the incomplete state */ 1051 mung->src_mac_32 = 0; 1052 mung->src_mac_16 = 0; 1053 } 1054 return 0; 1055 } 1056 1057 static int efx_tc_pedit_add(struct efx_nic *efx, struct efx_tc_action_set *act, 1058 const struct flow_action_entry *fa, 1059 struct netlink_ext_ack *extack) 1060 { 1061 switch (fa->mangle.htype) { 1062 case FLOW_ACT_MANGLE_HDR_TYPE_IP4: 1063 switch (fa->mangle.offset) { 1064 case offsetof(struct iphdr, ttl): 1065 /* check that pedit applies to ttl only */ 1066 if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK) 1067 break; 1068 1069 /* Adding 0xff is equivalent to decrementing the ttl. 1070 * Other added values are not supported. 1071 */ 1072 if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) != U8_MAX) 1073 break; 1074 1075 /* check that we do not decrement ttl twice */ 1076 if (!efx_tc_flower_action_order_ok(act, 1077 EFX_TC_AO_DEC_TTL)) { 1078 NL_SET_ERR_MSG_MOD(extack, "Unsupported: multiple dec ttl"); 1079 return -EOPNOTSUPP; 1080 } 1081 act->do_ttl_dec = 1; 1082 return 0; 1083 default: 1084 break; 1085 } 1086 break; 1087 case FLOW_ACT_MANGLE_HDR_TYPE_IP6: 1088 switch (fa->mangle.offset) { 1089 case round_down(offsetof(struct ipv6hdr, hop_limit), 4): 1090 /* check that pedit applies to hoplimit only */ 1091 if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK) 1092 break; 1093 1094 /* Adding 0xff is equivalent to decrementing the hoplimit. 1095 * Other added values are not supported. 1096 */ 1097 if ((fa->mangle.val >> 24) != U8_MAX) 1098 break; 1099 1100 /* check that we do not decrement hoplimit twice */ 1101 if (!efx_tc_flower_action_order_ok(act, 1102 EFX_TC_AO_DEC_TTL)) { 1103 NL_SET_ERR_MSG_MOD(extack, "Unsupported: multiple dec ttl"); 1104 return -EOPNOTSUPP; 1105 } 1106 act->do_ttl_dec = 1; 1107 return 0; 1108 default: 1109 break; 1110 } 1111 break; 1112 default: 1113 break; 1114 } 1115 1116 NL_SET_ERR_MSG_FMT_MOD(extack, 1117 "Unsupported: ttl add action type %x %x %x/%x", 1118 fa->mangle.htype, fa->mangle.offset, 1119 fa->mangle.val, fa->mangle.mask); 1120 return -EOPNOTSUPP; 1121 } 1122 1123 /** 1124 * efx_tc_mangle() - handle a single 32-bit (or less) pedit 1125 * @efx: NIC we're installing a flow rule on 1126 * @act: action set (cursor) to update 1127 * @fa: FLOW_ACTION_MANGLE action metadata 1128 * @mung: accumulator for partial mangles 1129 * @extack: netlink extended ack for reporting errors 1130 * @match: original match used along with the mangle action 1131 * 1132 * Identify the fields written by a FLOW_ACTION_MANGLE, and record 1133 * the partial mangle state in @mung. If this mangle completes an 1134 * earlier partial mangle, consume and apply to @act by calling 1135 * efx_tc_complete_mac_mangle(). 1136 */ 1137 1138 static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, 1139 const struct flow_action_entry *fa, 1140 struct efx_tc_mangler_state *mung, 1141 struct netlink_ext_ack *extack, 1142 struct efx_tc_match *match) 1143 { 1144 __le32 mac32; 1145 __le16 mac16; 1146 u8 tr_ttl; 1147 1148 switch (fa->mangle.htype) { 1149 case FLOW_ACT_MANGLE_HDR_TYPE_ETH: 1150 BUILD_BUG_ON(offsetof(struct ethhdr, h_dest) != 0); 1151 BUILD_BUG_ON(offsetof(struct ethhdr, h_source) != 6); 1152 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_PEDIT_MAC_ADDRS)) { 1153 NL_SET_ERR_MSG_MOD(extack, 1154 "Pedit mangle mac action violates action order"); 1155 return -EOPNOTSUPP; 1156 } 1157 switch (fa->mangle.offset) { 1158 case 0: 1159 if (fa->mangle.mask) { 1160 NL_SET_ERR_MSG_FMT_MOD(extack, 1161 "Unsupported: mask (%#x) of eth.dst32 mangle", 1162 fa->mangle.mask); 1163 return -EOPNOTSUPP; 1164 } 1165 /* Ethernet address is little-endian */ 1166 mac32 = cpu_to_le32(fa->mangle.val); 1167 memcpy(mung->dst_mac, &mac32, sizeof(mac32)); 1168 mung->dst_mac_32 = 1; 1169 return efx_tc_complete_mac_mangle(efx, act, mung, extack); 1170 case 4: 1171 if (fa->mangle.mask == 0xffff) { 1172 mac16 = cpu_to_le16(fa->mangle.val >> 16); 1173 memcpy(mung->src_mac, &mac16, sizeof(mac16)); 1174 mung->src_mac_16 = 1; 1175 } else if (fa->mangle.mask == 0xffff0000) { 1176 mac16 = cpu_to_le16((u16)fa->mangle.val); 1177 memcpy(mung->dst_mac + 4, &mac16, sizeof(mac16)); 1178 mung->dst_mac_16 = 1; 1179 } else { 1180 NL_SET_ERR_MSG_FMT_MOD(extack, 1181 "Unsupported: mask (%#x) of eth+4 mangle is not high or low 16b", 1182 fa->mangle.mask); 1183 return -EOPNOTSUPP; 1184 } 1185 return efx_tc_complete_mac_mangle(efx, act, mung, extack); 1186 case 8: 1187 if (fa->mangle.mask) { 1188 NL_SET_ERR_MSG_FMT_MOD(extack, 1189 "Unsupported: mask (%#x) of eth.src32 mangle", 1190 fa->mangle.mask); 1191 return -EOPNOTSUPP; 1192 } 1193 mac32 = cpu_to_le32(fa->mangle.val); 1194 memcpy(mung->src_mac + 2, &mac32, sizeof(mac32)); 1195 mung->src_mac_32 = 1; 1196 return efx_tc_complete_mac_mangle(efx, act, mung, extack); 1197 default: 1198 NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported: mangle eth+%u %x/%x", 1199 fa->mangle.offset, fa->mangle.val, fa->mangle.mask); 1200 return -EOPNOTSUPP; 1201 } 1202 break; 1203 case FLOW_ACT_MANGLE_HDR_TYPE_IP4: 1204 switch (fa->mangle.offset) { 1205 case offsetof(struct iphdr, ttl): 1206 /* we currently only support pedit IP4 when it applies 1207 * to TTL and then only when it can be achieved with a 1208 * decrement ttl action 1209 */ 1210 1211 /* check that pedit applies to ttl only */ 1212 if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK) { 1213 NL_SET_ERR_MSG_FMT_MOD(extack, 1214 "Unsupported: mask (%#x) out of range, only support mangle action on ipv4.ttl", 1215 fa->mangle.mask); 1216 return -EOPNOTSUPP; 1217 } 1218 1219 /* we can only convert to a dec ttl when we have an 1220 * exact match on the ttl field 1221 */ 1222 if (match->mask.ip_ttl != U8_MAX) { 1223 NL_SET_ERR_MSG_FMT_MOD(extack, 1224 "Unsupported: only support mangle ipv4.ttl when we have an exact match on ttl, mask used for match (%#x)", 1225 match->mask.ip_ttl); 1226 return -EOPNOTSUPP; 1227 } 1228 1229 /* check that we don't try to decrement 0, which equates 1230 * to setting the ttl to 0xff 1231 */ 1232 if (match->value.ip_ttl == 0) { 1233 NL_SET_ERR_MSG_MOD(extack, 1234 "Unsupported: we cannot decrement ttl past 0"); 1235 return -EOPNOTSUPP; 1236 } 1237 1238 /* check that we do not decrement ttl twice */ 1239 if (!efx_tc_flower_action_order_ok(act, 1240 EFX_TC_AO_DEC_TTL)) { 1241 NL_SET_ERR_MSG_MOD(extack, 1242 "Unsupported: multiple dec ttl"); 1243 return -EOPNOTSUPP; 1244 } 1245 1246 /* check pedit can be achieved with decrement action */ 1247 tr_ttl = match->value.ip_ttl - 1; 1248 if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) == tr_ttl) { 1249 act->do_ttl_dec = 1; 1250 return 0; 1251 } 1252 1253 fallthrough; 1254 default: 1255 NL_SET_ERR_MSG_FMT_MOD(extack, 1256 "Unsupported: only support mangle on the ttl field (offset is %u)", 1257 fa->mangle.offset); 1258 return -EOPNOTSUPP; 1259 } 1260 break; 1261 case FLOW_ACT_MANGLE_HDR_TYPE_IP6: 1262 switch (fa->mangle.offset) { 1263 case round_down(offsetof(struct ipv6hdr, hop_limit), 4): 1264 /* we currently only support pedit IP6 when it applies 1265 * to the hoplimit and then only when it can be achieved 1266 * with a decrement hoplimit action 1267 */ 1268 1269 /* check that pedit applies to ttl only */ 1270 if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK) { 1271 NL_SET_ERR_MSG_FMT_MOD(extack, 1272 "Unsupported: mask (%#x) out of range, only support mangle action on ipv6.hop_limit", 1273 fa->mangle.mask); 1274 1275 return -EOPNOTSUPP; 1276 } 1277 1278 /* we can only convert to a dec ttl when we have an 1279 * exact match on the ttl field 1280 */ 1281 if (match->mask.ip_ttl != U8_MAX) { 1282 NL_SET_ERR_MSG_FMT_MOD(extack, 1283 "Unsupported: only support mangle ipv6.hop_limit when we have an exact match on ttl, mask used for match (%#x)", 1284 match->mask.ip_ttl); 1285 return -EOPNOTSUPP; 1286 } 1287 1288 /* check that we don't try to decrement 0, which equates 1289 * to setting the ttl to 0xff 1290 */ 1291 if (match->value.ip_ttl == 0) { 1292 NL_SET_ERR_MSG_MOD(extack, 1293 "Unsupported: we cannot decrement hop_limit past 0"); 1294 return -EOPNOTSUPP; 1295 } 1296 1297 /* check that we do not decrement hoplimit twice */ 1298 if (!efx_tc_flower_action_order_ok(act, 1299 EFX_TC_AO_DEC_TTL)) { 1300 NL_SET_ERR_MSG_MOD(extack, 1301 "Unsupported: multiple dec ttl"); 1302 return -EOPNOTSUPP; 1303 } 1304 1305 /* check pedit can be achieved with decrement action */ 1306 tr_ttl = match->value.ip_ttl - 1; 1307 if ((fa->mangle.val >> 24) == tr_ttl) { 1308 act->do_ttl_dec = 1; 1309 return 0; 1310 } 1311 1312 fallthrough; 1313 default: 1314 NL_SET_ERR_MSG_FMT_MOD(extack, 1315 "Unsupported: only support mangle on the hop_limit field"); 1316 return -EOPNOTSUPP; 1317 } 1318 default: 1319 NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled mangle htype %u for action rule", 1320 fa->mangle.htype); 1321 return -EOPNOTSUPP; 1322 } 1323 return 0; 1324 } 1325 1326 /** 1327 * efx_tc_incomplete_mangle() - check for leftover partial pedits 1328 * @mung: accumulator for partial mangles 1329 * @extack: netlink extended ack for reporting errors 1330 * 1331 * Since the MAE can only overwrite whole fields, any partial 1332 * field mangle left over on reaching packet delivery (mirred or 1333 * end of TC actions) cannot be offloaded. Check for any such 1334 * and reject them with -%EOPNOTSUPP. 1335 */ 1336 1337 static int efx_tc_incomplete_mangle(struct efx_tc_mangler_state *mung, 1338 struct netlink_ext_ack *extack) 1339 { 1340 if (mung->dst_mac_32 || mung->dst_mac_16) { 1341 NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of destination MAC address"); 1342 return -EOPNOTSUPP; 1343 } 1344 if (mung->src_mac_16 || mung->src_mac_32) { 1345 NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of source MAC address"); 1346 return -EOPNOTSUPP; 1347 } 1348 return 0; 1349 } 1350 1351 static int efx_tc_flower_replace_foreign(struct efx_nic *efx, 1352 struct net_device *net_dev, 1353 struct flow_cls_offload *tc) 1354 { 1355 struct flow_rule *fr = flow_cls_offload_flow_rule(tc); 1356 struct netlink_ext_ack *extack = tc->common.extack; 1357 struct efx_tc_flow_rule *rule = NULL, *old = NULL; 1358 struct efx_tc_action_set *act = NULL; 1359 bool found = false, uplinked = false; 1360 const struct flow_action_entry *fa; 1361 struct efx_tc_match match; 1362 struct efx_rep *to_efv; 1363 s64 rc; 1364 int i; 1365 1366 /* Parse match */ 1367 memset(&match, 0, sizeof(match)); 1368 rc = efx_tc_flower_parse_match(efx, fr, &match, NULL); 1369 if (rc) 1370 return rc; 1371 /* The rule as given to us doesn't specify a source netdevice. 1372 * But, determining whether packets from a VF should match it is 1373 * complicated, so leave those to the software slowpath: qualify 1374 * the filter with source m-port == wire. 1375 */ 1376 rc = efx_tc_flower_external_mport(efx, EFX_EFV_PF); 1377 if (rc < 0) { 1378 NL_SET_ERR_MSG_MOD(extack, "Failed to identify ingress m-port for foreign filter"); 1379 return rc; 1380 } 1381 match.value.ingress_port = rc; 1382 match.mask.ingress_port = ~0; 1383 1384 if (tc->common.chain_index) { 1385 struct efx_tc_recirc_id *rid; 1386 1387 rid = efx_tc_get_recirc_id(efx, tc->common.chain_index, net_dev); 1388 if (IS_ERR(rid)) { 1389 NL_SET_ERR_MSG_FMT_MOD(extack, 1390 "Failed to allocate a hardware recirculation ID for chain_index %u", 1391 tc->common.chain_index); 1392 return PTR_ERR(rid); 1393 } 1394 match.rid = rid; 1395 match.value.recirc_id = rid->fw_id; 1396 } 1397 match.mask.recirc_id = 0xff; 1398 1399 /* AR table can't match on DO_CT (+trk). But a commonly used pattern is 1400 * +trk+est, which is strictly implied by +est, so rewrite it to that. 1401 */ 1402 if (match.mask.ct_state_trk && match.value.ct_state_trk && 1403 match.mask.ct_state_est && match.value.ct_state_est) 1404 match.mask.ct_state_trk = 0; 1405 /* Thanks to CT_TCP_FLAGS_INHIBIT, packets with interesting flags could 1406 * match +trk-est (CT_HIT=0) despite being on an established connection. 1407 * So make -est imply -tcp_syn_fin_rst match to ensure these packets 1408 * still hit the software path. 1409 */ 1410 if (match.mask.ct_state_est && !match.value.ct_state_est) { 1411 if (match.value.tcp_syn_fin_rst) { 1412 /* Can't offload this combination */ 1413 rc = -EOPNOTSUPP; 1414 goto release; 1415 } 1416 match.mask.tcp_syn_fin_rst = true; 1417 } 1418 1419 flow_action_for_each(i, fa, &fr->action) { 1420 switch (fa->id) { 1421 case FLOW_ACTION_REDIRECT: 1422 case FLOW_ACTION_MIRRED: /* mirred means mirror here */ 1423 to_efv = efx_tc_flower_lookup_efv(efx, fa->dev); 1424 if (IS_ERR(to_efv)) 1425 continue; 1426 found = true; 1427 break; 1428 default: 1429 break; 1430 } 1431 } 1432 if (!found) { /* We don't care. */ 1433 netif_dbg(efx, drv, efx->net_dev, 1434 "Ignoring foreign filter that doesn't egdev us\n"); 1435 rc = -EOPNOTSUPP; 1436 goto release; 1437 } 1438 1439 rc = efx_mae_match_check_caps(efx, &match.mask, NULL); 1440 if (rc) 1441 goto release; 1442 1443 if (efx_tc_match_is_encap(&match.mask)) { 1444 enum efx_encap_type type; 1445 1446 type = efx_tc_indr_netdev_type(net_dev); 1447 if (type == EFX_ENCAP_TYPE_NONE) { 1448 NL_SET_ERR_MSG_MOD(extack, 1449 "Egress encap match on unsupported tunnel device"); 1450 rc = -EOPNOTSUPP; 1451 goto release; 1452 } 1453 1454 rc = efx_mae_check_encap_type_supported(efx, type); 1455 if (rc) { 1456 NL_SET_ERR_MSG_FMT_MOD(extack, 1457 "Firmware reports no support for %s encap match", 1458 efx_tc_encap_type_name(type)); 1459 goto release; 1460 } 1461 1462 rc = efx_tc_flower_record_encap_match(efx, &match, type, 1463 EFX_TC_EM_DIRECT, 0, 0, 1464 extack); 1465 if (rc) 1466 goto release; 1467 } else { 1468 /* This is not a tunnel decap rule, ignore it */ 1469 netif_dbg(efx, drv, efx->net_dev, 1470 "Ignoring foreign filter without encap match\n"); 1471 rc = -EOPNOTSUPP; 1472 goto release; 1473 } 1474 1475 rule = kzalloc(sizeof(*rule), GFP_USER); 1476 if (!rule) { 1477 rc = -ENOMEM; 1478 goto release; 1479 } 1480 INIT_LIST_HEAD(&rule->acts.list); 1481 rule->cookie = tc->cookie; 1482 old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht, 1483 &rule->linkage, 1484 efx_tc_match_action_ht_params); 1485 if (old) { 1486 netif_dbg(efx, drv, efx->net_dev, 1487 "Ignoring already-offloaded rule (cookie %lx)\n", 1488 tc->cookie); 1489 rc = -EEXIST; 1490 goto release; 1491 } 1492 1493 act = kzalloc(sizeof(*act), GFP_USER); 1494 if (!act) { 1495 rc = -ENOMEM; 1496 goto release; 1497 } 1498 1499 /* Parse actions. For foreign rules we only support decap & redirect. 1500 * See corresponding code in efx_tc_flower_replace() for theory of 1501 * operation & how 'act' cursor is used. 1502 */ 1503 flow_action_for_each(i, fa, &fr->action) { 1504 struct efx_tc_action_set save; 1505 1506 switch (fa->id) { 1507 case FLOW_ACTION_REDIRECT: 1508 case FLOW_ACTION_MIRRED: 1509 /* See corresponding code in efx_tc_flower_replace() for 1510 * long explanations of what's going on here. 1511 */ 1512 save = *act; 1513 if (fa->hw_stats) { 1514 struct efx_tc_counter_index *ctr; 1515 1516 if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) { 1517 NL_SET_ERR_MSG_FMT_MOD(extack, 1518 "hw_stats_type %u not supported (only 'delayed')", 1519 fa->hw_stats); 1520 rc = -EOPNOTSUPP; 1521 goto release; 1522 } 1523 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_COUNT)) { 1524 rc = -EOPNOTSUPP; 1525 goto release; 1526 } 1527 1528 ctr = efx_tc_flower_get_counter_index(efx, 1529 tc->cookie, 1530 EFX_TC_COUNTER_TYPE_AR); 1531 if (IS_ERR(ctr)) { 1532 rc = PTR_ERR(ctr); 1533 NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter"); 1534 goto release; 1535 } 1536 act->count = ctr; 1537 INIT_LIST_HEAD(&act->count_user); 1538 } 1539 1540 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) { 1541 /* can't happen */ 1542 rc = -EOPNOTSUPP; 1543 NL_SET_ERR_MSG_MOD(extack, 1544 "Deliver action violates action order (can't happen)"); 1545 goto release; 1546 } 1547 to_efv = efx_tc_flower_lookup_efv(efx, fa->dev); 1548 /* PF implies egdev is us, in which case we really 1549 * want to deliver to the uplink (because this is an 1550 * ingress filter). If we don't recognise the egdev 1551 * at all, then we'd better trap so SW can handle it. 1552 */ 1553 if (IS_ERR(to_efv)) 1554 to_efv = EFX_EFV_PF; 1555 if (to_efv == EFX_EFV_PF) { 1556 if (uplinked) 1557 break; 1558 uplinked = true; 1559 } 1560 rc = efx_tc_flower_internal_mport(efx, to_efv); 1561 if (rc < 0) { 1562 NL_SET_ERR_MSG_MOD(extack, "Failed to identify egress m-port"); 1563 goto release; 1564 } 1565 act->dest_mport = rc; 1566 act->deliver = 1; 1567 rc = efx_mae_alloc_action_set(efx, act); 1568 if (rc) { 1569 NL_SET_ERR_MSG_MOD(extack, 1570 "Failed to write action set to hw (mirred)"); 1571 goto release; 1572 } 1573 list_add_tail(&act->list, &rule->acts.list); 1574 act = NULL; 1575 if (fa->id == FLOW_ACTION_REDIRECT) 1576 break; /* end of the line */ 1577 /* Mirror, so continue on with saved act */ 1578 act = kzalloc(sizeof(*act), GFP_USER); 1579 if (!act) { 1580 rc = -ENOMEM; 1581 goto release; 1582 } 1583 *act = save; 1584 break; 1585 case FLOW_ACTION_TUNNEL_DECAP: 1586 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DECAP)) { 1587 rc = -EINVAL; 1588 NL_SET_ERR_MSG_MOD(extack, "Decap action violates action order"); 1589 goto release; 1590 } 1591 act->decap = 1; 1592 /* If we previously delivered/trapped to uplink, now 1593 * that we've decapped we'll want another copy if we 1594 * try to deliver/trap to uplink again. 1595 */ 1596 uplinked = false; 1597 break; 1598 default: 1599 NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u", 1600 fa->id); 1601 rc = -EOPNOTSUPP; 1602 goto release; 1603 } 1604 } 1605 1606 if (act) { 1607 if (!uplinked) { 1608 /* Not shot/redirected, so deliver to default dest (which is 1609 * the uplink, as this is an ingress filter) 1610 */ 1611 efx_mae_mport_uplink(efx, &act->dest_mport); 1612 act->deliver = 1; 1613 } 1614 rc = efx_mae_alloc_action_set(efx, act); 1615 if (rc) { 1616 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (deliver)"); 1617 goto release; 1618 } 1619 list_add_tail(&act->list, &rule->acts.list); 1620 act = NULL; /* Prevent double-free in error path */ 1621 } 1622 1623 rule->match = match; 1624 1625 netif_dbg(efx, drv, efx->net_dev, 1626 "Successfully parsed foreign filter (cookie %lx)\n", 1627 tc->cookie); 1628 1629 rc = efx_mae_alloc_action_set_list(efx, &rule->acts); 1630 if (rc) { 1631 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw"); 1632 goto release; 1633 } 1634 rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC, 1635 rule->acts.fw_id, &rule->fw_id); 1636 if (rc) { 1637 NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw"); 1638 goto release_acts; 1639 } 1640 return 0; 1641 1642 release_acts: 1643 efx_mae_free_action_set_list(efx, &rule->acts); 1644 release: 1645 /* We failed to insert the rule, so free up any entries we created in 1646 * subsidiary tables. 1647 */ 1648 if (match.rid) 1649 efx_tc_put_recirc_id(efx, match.rid); 1650 if (act) 1651 efx_tc_free_action_set(efx, act, false); 1652 if (rule) { 1653 if (!old) 1654 rhashtable_remove_fast(&efx->tc->match_action_ht, 1655 &rule->linkage, 1656 efx_tc_match_action_ht_params); 1657 efx_tc_free_action_set_list(efx, &rule->acts, false); 1658 } 1659 kfree(rule); 1660 if (match.encap) 1661 efx_tc_flower_release_encap_match(efx, match.encap); 1662 return rc; 1663 } 1664 1665 static int efx_tc_flower_replace_lhs(struct efx_nic *efx, 1666 struct flow_cls_offload *tc, 1667 struct flow_rule *fr, 1668 struct efx_tc_match *match, 1669 struct efx_rep *efv, 1670 struct net_device *net_dev) 1671 { 1672 struct netlink_ext_ack *extack = tc->common.extack; 1673 struct efx_tc_lhs_rule *rule, *old; 1674 int rc; 1675 1676 if (tc->common.chain_index) { 1677 NL_SET_ERR_MSG_MOD(extack, "LHS rule only allowed in chain 0"); 1678 return -EOPNOTSUPP; 1679 } 1680 1681 if (match->mask.ct_state_trk && match->value.ct_state_trk) { 1682 NL_SET_ERR_MSG_MOD(extack, "LHS rule can never match +trk"); 1683 return -EOPNOTSUPP; 1684 } 1685 /* LHS rules are always -trk, so we don't need to match on that */ 1686 match->mask.ct_state_trk = 0; 1687 match->value.ct_state_trk = 0; 1688 1689 rc = efx_mae_match_check_caps_lhs(efx, &match->mask, extack); 1690 if (rc) 1691 return rc; 1692 1693 rule = kzalloc(sizeof(*rule), GFP_USER); 1694 if (!rule) 1695 return -ENOMEM; 1696 rule->cookie = tc->cookie; 1697 old = rhashtable_lookup_get_insert_fast(&efx->tc->lhs_rule_ht, 1698 &rule->linkage, 1699 efx_tc_lhs_rule_ht_params); 1700 if (old) { 1701 netif_dbg(efx, drv, efx->net_dev, 1702 "Already offloaded rule (cookie %lx)\n", tc->cookie); 1703 rc = -EEXIST; 1704 NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded"); 1705 goto release; 1706 } 1707 1708 /* Parse actions */ 1709 /* See note in efx_tc_flower_replace() regarding passed net_dev 1710 * (used for efx_tc_get_recirc_id()). 1711 */ 1712 rc = efx_tc_flower_handle_lhs_actions(efx, tc, fr, efx->net_dev, rule); 1713 if (rc) 1714 goto release; 1715 1716 rule->match = *match; 1717 1718 rc = efx_mae_insert_lhs_rule(efx, rule, EFX_TC_PRIO_TC); 1719 if (rc) { 1720 NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw"); 1721 goto release; 1722 } 1723 netif_dbg(efx, drv, efx->net_dev, 1724 "Successfully parsed lhs rule (cookie %lx)\n", 1725 tc->cookie); 1726 return 0; 1727 1728 release: 1729 efx_tc_flower_release_lhs_actions(efx, &rule->lhs_act); 1730 if (!old) 1731 rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &rule->linkage, 1732 efx_tc_lhs_rule_ht_params); 1733 kfree(rule); 1734 return rc; 1735 } 1736 1737 static int efx_tc_flower_replace(struct efx_nic *efx, 1738 struct net_device *net_dev, 1739 struct flow_cls_offload *tc, 1740 struct efx_rep *efv) 1741 { 1742 struct flow_rule *fr = flow_cls_offload_flow_rule(tc); 1743 struct netlink_ext_ack *extack = tc->common.extack; 1744 const struct ip_tunnel_info *encap_info = NULL; 1745 struct efx_tc_flow_rule *rule = NULL, *old; 1746 struct efx_tc_mangler_state mung = {}; 1747 struct efx_tc_action_set *act = NULL; 1748 const struct flow_action_entry *fa; 1749 struct efx_rep *from_efv, *to_efv; 1750 struct efx_tc_match match; 1751 u32 acts_id; 1752 s64 rc; 1753 int i; 1754 1755 if (!tc_can_offload_extack(efx->net_dev, extack)) 1756 return -EOPNOTSUPP; 1757 if (WARN_ON(!efx->tc)) 1758 return -ENETDOWN; 1759 if (WARN_ON(!efx->tc->up)) 1760 return -ENETDOWN; 1761 1762 from_efv = efx_tc_flower_lookup_efv(efx, net_dev); 1763 if (IS_ERR(from_efv)) { 1764 /* Not from our PF or representors, so probably a tunnel dev */ 1765 return efx_tc_flower_replace_foreign(efx, net_dev, tc); 1766 } 1767 1768 if (efv != from_efv) { 1769 /* can't happen */ 1770 NL_SET_ERR_MSG_FMT_MOD(extack, "for %s efv is %snull but from_efv is %snull (can't happen)", 1771 netdev_name(net_dev), efv ? "non-" : "", 1772 from_efv ? "non-" : ""); 1773 return -EINVAL; 1774 } 1775 1776 /* Parse match */ 1777 memset(&match, 0, sizeof(match)); 1778 rc = efx_tc_flower_external_mport(efx, from_efv); 1779 if (rc < 0) { 1780 NL_SET_ERR_MSG_MOD(extack, "Failed to identify ingress m-port"); 1781 return rc; 1782 } 1783 match.value.ingress_port = rc; 1784 match.mask.ingress_port = ~0; 1785 rc = efx_tc_flower_parse_match(efx, fr, &match, extack); 1786 if (rc) 1787 return rc; 1788 if (efx_tc_match_is_encap(&match.mask)) { 1789 NL_SET_ERR_MSG_MOD(extack, "Ingress enc_key matches not supported"); 1790 return -EOPNOTSUPP; 1791 } 1792 1793 if (efx_tc_rule_is_lhs_rule(fr, &match)) 1794 return efx_tc_flower_replace_lhs(efx, tc, fr, &match, efv, 1795 net_dev); 1796 1797 /* chain_index 0 is always recirc_id 0 (and does not appear in recirc_ht). 1798 * Conveniently, match.rid == NULL and match.value.recirc_id == 0 owing 1799 * to the initial memset(), so we don't need to do anything in that case. 1800 */ 1801 if (tc->common.chain_index) { 1802 struct efx_tc_recirc_id *rid; 1803 1804 /* Note regarding passed net_dev: 1805 * VFreps and PF can share chain namespace, as they have 1806 * distinct ingress_mports. So we don't need to burn an 1807 * extra recirc_id if both use the same chain_index. 1808 * (Strictly speaking, we could give each VFrep its own 1809 * recirc_id namespace that doesn't take IDs away from the 1810 * PF, but that would require a bunch of additional IDAs - 1811 * one for each representor - and that's not likely to be 1812 * the main cause of recirc_id exhaustion anyway.) 1813 */ 1814 rid = efx_tc_get_recirc_id(efx, tc->common.chain_index, 1815 efx->net_dev); 1816 if (IS_ERR(rid)) { 1817 NL_SET_ERR_MSG_FMT_MOD(extack, 1818 "Failed to allocate a hardware recirculation ID for chain_index %u", 1819 tc->common.chain_index); 1820 return PTR_ERR(rid); 1821 } 1822 match.rid = rid; 1823 match.value.recirc_id = rid->fw_id; 1824 } 1825 match.mask.recirc_id = 0xff; 1826 1827 /* AR table can't match on DO_CT (+trk). But a commonly used pattern is 1828 * +trk+est, which is strictly implied by +est, so rewrite it to that. 1829 */ 1830 if (match.mask.ct_state_trk && match.value.ct_state_trk && 1831 match.mask.ct_state_est && match.value.ct_state_est) 1832 match.mask.ct_state_trk = 0; 1833 /* Thanks to CT_TCP_FLAGS_INHIBIT, packets with interesting flags could 1834 * match +trk-est (CT_HIT=0) despite being on an established connection. 1835 * So make -est imply -tcp_syn_fin_rst match to ensure these packets 1836 * still hit the software path. 1837 */ 1838 if (match.mask.ct_state_est && !match.value.ct_state_est) { 1839 if (match.value.tcp_syn_fin_rst) { 1840 /* Can't offload this combination */ 1841 rc = -EOPNOTSUPP; 1842 goto release; 1843 } 1844 match.mask.tcp_syn_fin_rst = true; 1845 } 1846 1847 rc = efx_mae_match_check_caps(efx, &match.mask, extack); 1848 if (rc) 1849 goto release; 1850 1851 rule = kzalloc(sizeof(*rule), GFP_USER); 1852 if (!rule) { 1853 rc = -ENOMEM; 1854 goto release; 1855 } 1856 INIT_LIST_HEAD(&rule->acts.list); 1857 rule->cookie = tc->cookie; 1858 old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht, 1859 &rule->linkage, 1860 efx_tc_match_action_ht_params); 1861 if (old) { 1862 netif_dbg(efx, drv, efx->net_dev, 1863 "Already offloaded rule (cookie %lx)\n", tc->cookie); 1864 NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded"); 1865 rc = -EEXIST; 1866 goto release; 1867 } 1868 1869 /* Parse actions */ 1870 act = kzalloc(sizeof(*act), GFP_USER); 1871 if (!act) { 1872 rc = -ENOMEM; 1873 goto release; 1874 } 1875 1876 /** 1877 * DOC: TC action translation 1878 * 1879 * Actions in TC are sequential and cumulative, with delivery actions 1880 * potentially anywhere in the order. The EF100 MAE, however, takes 1881 * an 'action set list' consisting of 'action sets', each of which is 1882 * applied to the _original_ packet, and consists of a set of optional 1883 * actions in a fixed order with delivery at the end. 1884 * To translate between these two models, we maintain a 'cursor', @act, 1885 * which describes the cumulative effect of all the packet-mutating 1886 * actions encountered so far; on handling a delivery (mirred or drop) 1887 * action, once the action-set has been inserted into hardware, we 1888 * append @act to the action-set list (@rule->acts); if this is a pipe 1889 * action (mirred mirror) we then allocate a new @act with a copy of 1890 * the cursor state _before_ the delivery action, otherwise we set @act 1891 * to %NULL. 1892 * This ensures that every allocated action-set is either attached to 1893 * @rule->acts or pointed to by @act (and never both), and that only 1894 * those action-sets in @rule->acts exist in hardware. Consequently, 1895 * in the failure path, @act only needs to be freed in memory, whereas 1896 * for @rule->acts we remove each action-set from hardware before 1897 * freeing it (efx_tc_free_action_set_list()), even if the action-set 1898 * list itself is not in hardware. 1899 */ 1900 flow_action_for_each(i, fa, &fr->action) { 1901 struct efx_tc_action_set save; 1902 u16 tci; 1903 1904 if (!act) { 1905 /* more actions after a non-pipe action */ 1906 NL_SET_ERR_MSG_MOD(extack, "Action follows non-pipe action"); 1907 rc = -EINVAL; 1908 goto release; 1909 } 1910 1911 if ((fa->id == FLOW_ACTION_REDIRECT || 1912 fa->id == FLOW_ACTION_MIRRED || 1913 fa->id == FLOW_ACTION_DROP) && fa->hw_stats) { 1914 struct efx_tc_counter_index *ctr; 1915 1916 /* Currently the only actions that want stats are 1917 * mirred and gact (ok, shot, trap, goto-chain), which 1918 * means we want stats just before delivery. Also, 1919 * note that tunnel_key set shouldn't change the length 1920 * — it's only the subsequent mirred that does that, 1921 * and the stats are taken _before_ the mirred action 1922 * happens. 1923 */ 1924 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_COUNT)) { 1925 /* All supported actions that count either steal 1926 * (gact shot, mirred redirect) or clone act 1927 * (mirred mirror), so we should never get two 1928 * count actions on one action_set. 1929 */ 1930 NL_SET_ERR_MSG_MOD(extack, "Count-action conflict (can't happen)"); 1931 rc = -EOPNOTSUPP; 1932 goto release; 1933 } 1934 1935 if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) { 1936 NL_SET_ERR_MSG_FMT_MOD(extack, "hw_stats_type %u not supported (only 'delayed')", 1937 fa->hw_stats); 1938 rc = -EOPNOTSUPP; 1939 goto release; 1940 } 1941 1942 ctr = efx_tc_flower_get_counter_index(efx, tc->cookie, 1943 EFX_TC_COUNTER_TYPE_AR); 1944 if (IS_ERR(ctr)) { 1945 rc = PTR_ERR(ctr); 1946 NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter"); 1947 goto release; 1948 } 1949 act->count = ctr; 1950 INIT_LIST_HEAD(&act->count_user); 1951 } 1952 1953 switch (fa->id) { 1954 case FLOW_ACTION_DROP: 1955 rc = efx_mae_alloc_action_set(efx, act); 1956 if (rc) { 1957 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (drop)"); 1958 goto release; 1959 } 1960 list_add_tail(&act->list, &rule->acts.list); 1961 act = NULL; /* end of the line */ 1962 break; 1963 case FLOW_ACTION_REDIRECT: 1964 case FLOW_ACTION_MIRRED: 1965 save = *act; 1966 1967 if (encap_info) { 1968 struct efx_tc_encap_action *encap; 1969 1970 if (!efx_tc_flower_action_order_ok(act, 1971 EFX_TC_AO_ENCAP)) { 1972 rc = -EOPNOTSUPP; 1973 NL_SET_ERR_MSG_MOD(extack, "Encap action violates action order"); 1974 goto release; 1975 } 1976 encap = efx_tc_flower_create_encap_md( 1977 efx, encap_info, fa->dev, extack); 1978 if (IS_ERR_OR_NULL(encap)) { 1979 rc = PTR_ERR(encap); 1980 if (!rc) 1981 rc = -EIO; /* arbitrary */ 1982 goto release; 1983 } 1984 act->encap_md = encap; 1985 list_add_tail(&act->encap_user, &encap->users); 1986 act->dest_mport = encap->dest_mport; 1987 act->deliver = 1; 1988 if (act->count && !WARN_ON(!act->count->cnt)) { 1989 /* This counter is used by an encap 1990 * action, which needs a reference back 1991 * so it can prod neighbouring whenever 1992 * traffic is seen. 1993 */ 1994 spin_lock_bh(&act->count->cnt->lock); 1995 list_add_tail(&act->count_user, 1996 &act->count->cnt->users); 1997 spin_unlock_bh(&act->count->cnt->lock); 1998 } 1999 rc = efx_mae_alloc_action_set(efx, act); 2000 if (rc) { 2001 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (encap)"); 2002 goto release; 2003 } 2004 list_add_tail(&act->list, &rule->acts.list); 2005 act->user = &rule->acts; 2006 act = NULL; 2007 if (fa->id == FLOW_ACTION_REDIRECT) 2008 break; /* end of the line */ 2009 /* Mirror, so continue on with saved act */ 2010 save.count = NULL; 2011 act = kzalloc(sizeof(*act), GFP_USER); 2012 if (!act) { 2013 rc = -ENOMEM; 2014 goto release; 2015 } 2016 *act = save; 2017 break; 2018 } 2019 2020 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) { 2021 /* can't happen */ 2022 rc = -EOPNOTSUPP; 2023 NL_SET_ERR_MSG_MOD(extack, "Deliver action violates action order (can't happen)"); 2024 goto release; 2025 } 2026 2027 to_efv = efx_tc_flower_lookup_efv(efx, fa->dev); 2028 if (IS_ERR(to_efv)) { 2029 NL_SET_ERR_MSG_MOD(extack, "Mirred egress device not on switch"); 2030 rc = PTR_ERR(to_efv); 2031 goto release; 2032 } 2033 rc = efx_tc_flower_external_mport(efx, to_efv); 2034 if (rc < 0) { 2035 NL_SET_ERR_MSG_MOD(extack, "Failed to identify egress m-port"); 2036 goto release; 2037 } 2038 act->dest_mport = rc; 2039 act->deliver = 1; 2040 rc = efx_mae_alloc_action_set(efx, act); 2041 if (rc) { 2042 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (mirred)"); 2043 goto release; 2044 } 2045 list_add_tail(&act->list, &rule->acts.list); 2046 act = NULL; 2047 if (fa->id == FLOW_ACTION_REDIRECT) 2048 break; /* end of the line */ 2049 /* Mirror, so continue on with saved act */ 2050 save.count = NULL; 2051 act = kzalloc(sizeof(*act), GFP_USER); 2052 if (!act) { 2053 rc = -ENOMEM; 2054 goto release; 2055 } 2056 *act = save; 2057 break; 2058 case FLOW_ACTION_VLAN_POP: 2059 if (act->vlan_push) { 2060 act->vlan_push--; 2061 } else if (efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_POP)) { 2062 act->vlan_pop++; 2063 } else { 2064 NL_SET_ERR_MSG_MOD(extack, 2065 "More than two VLAN pops, or action order violated"); 2066 rc = -EINVAL; 2067 goto release; 2068 } 2069 break; 2070 case FLOW_ACTION_VLAN_PUSH: 2071 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_PUSH)) { 2072 rc = -EINVAL; 2073 NL_SET_ERR_MSG_MOD(extack, 2074 "More than two VLAN pushes, or action order violated"); 2075 goto release; 2076 } 2077 tci = fa->vlan.vid & VLAN_VID_MASK; 2078 tci |= fa->vlan.prio << VLAN_PRIO_SHIFT; 2079 act->vlan_tci[act->vlan_push] = cpu_to_be16(tci); 2080 act->vlan_proto[act->vlan_push] = fa->vlan.proto; 2081 act->vlan_push++; 2082 break; 2083 case FLOW_ACTION_ADD: 2084 rc = efx_tc_pedit_add(efx, act, fa, extack); 2085 if (rc < 0) 2086 goto release; 2087 break; 2088 case FLOW_ACTION_MANGLE: 2089 rc = efx_tc_mangle(efx, act, fa, &mung, extack, &match); 2090 if (rc < 0) 2091 goto release; 2092 break; 2093 case FLOW_ACTION_TUNNEL_ENCAP: 2094 if (encap_info) { 2095 /* Can't specify encap multiple times. 2096 * If you want to overwrite an existing 2097 * encap_info, use an intervening 2098 * FLOW_ACTION_TUNNEL_DECAP to clear it. 2099 */ 2100 NL_SET_ERR_MSG_MOD(extack, "Tunnel key set when already set"); 2101 rc = -EINVAL; 2102 goto release; 2103 } 2104 if (!fa->tunnel) { 2105 NL_SET_ERR_MSG_MOD(extack, "Tunnel key set is missing key"); 2106 rc = -EOPNOTSUPP; 2107 goto release; 2108 } 2109 encap_info = fa->tunnel; 2110 break; 2111 case FLOW_ACTION_TUNNEL_DECAP: 2112 if (encap_info) { 2113 encap_info = NULL; 2114 break; 2115 } 2116 /* Since we don't support enc_key matches on ingress 2117 * (and if we did there'd be no tunnel-device to give 2118 * us a type), we can't offload a decap that's not 2119 * just undoing a previous encap action. 2120 */ 2121 NL_SET_ERR_MSG_MOD(extack, "Cannot offload tunnel decap action without tunnel device"); 2122 rc = -EOPNOTSUPP; 2123 goto release; 2124 default: 2125 NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u", 2126 fa->id); 2127 rc = -EOPNOTSUPP; 2128 goto release; 2129 } 2130 } 2131 2132 rc = efx_tc_incomplete_mangle(&mung, extack); 2133 if (rc < 0) 2134 goto release; 2135 if (act) { 2136 /* Not shot/redirected, so deliver to default dest */ 2137 if (from_efv == EFX_EFV_PF) 2138 /* Rule applies to traffic from the wire, 2139 * and default dest is thus the PF 2140 */ 2141 efx_mae_mport_uplink(efx, &act->dest_mport); 2142 else 2143 /* Representor, so rule applies to traffic from 2144 * representee, and default dest is thus the rep. 2145 * All reps use the same mport for delivery 2146 */ 2147 efx_mae_mport_mport(efx, efx->tc->reps_mport_id, 2148 &act->dest_mport); 2149 act->deliver = 1; 2150 rc = efx_mae_alloc_action_set(efx, act); 2151 if (rc) { 2152 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (deliver)"); 2153 goto release; 2154 } 2155 list_add_tail(&act->list, &rule->acts.list); 2156 act = NULL; /* Prevent double-free in error path */ 2157 } 2158 2159 netif_dbg(efx, drv, efx->net_dev, 2160 "Successfully parsed filter (cookie %lx)\n", 2161 tc->cookie); 2162 2163 rule->match = match; 2164 2165 rc = efx_mae_alloc_action_set_list(efx, &rule->acts); 2166 if (rc) { 2167 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw"); 2168 goto release; 2169 } 2170 if (from_efv == EFX_EFV_PF) 2171 /* PF netdev, so rule applies to traffic from wire */ 2172 rule->fallback = &efx->tc->facts.pf; 2173 else 2174 /* repdev, so rule applies to traffic from representee */ 2175 rule->fallback = &efx->tc->facts.reps; 2176 if (!efx_tc_check_ready(efx, rule)) { 2177 netif_dbg(efx, drv, efx->net_dev, "action not ready for hw\n"); 2178 acts_id = rule->fallback->fw_id; 2179 } else { 2180 netif_dbg(efx, drv, efx->net_dev, "ready for hw\n"); 2181 acts_id = rule->acts.fw_id; 2182 } 2183 rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC, 2184 acts_id, &rule->fw_id); 2185 if (rc) { 2186 NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw"); 2187 goto release_acts; 2188 } 2189 return 0; 2190 2191 release_acts: 2192 efx_mae_free_action_set_list(efx, &rule->acts); 2193 release: 2194 /* We failed to insert the rule, so free up any entries we created in 2195 * subsidiary tables. 2196 */ 2197 if (match.rid) 2198 efx_tc_put_recirc_id(efx, match.rid); 2199 if (act) 2200 efx_tc_free_action_set(efx, act, false); 2201 if (rule) { 2202 if (!old) 2203 rhashtable_remove_fast(&efx->tc->match_action_ht, 2204 &rule->linkage, 2205 efx_tc_match_action_ht_params); 2206 efx_tc_free_action_set_list(efx, &rule->acts, false); 2207 } 2208 kfree(rule); 2209 return rc; 2210 } 2211 2212 static int efx_tc_flower_destroy(struct efx_nic *efx, 2213 struct net_device *net_dev, 2214 struct flow_cls_offload *tc) 2215 { 2216 struct netlink_ext_ack *extack = tc->common.extack; 2217 struct efx_tc_lhs_rule *lhs_rule; 2218 struct efx_tc_flow_rule *rule; 2219 2220 lhs_rule = rhashtable_lookup_fast(&efx->tc->lhs_rule_ht, &tc->cookie, 2221 efx_tc_lhs_rule_ht_params); 2222 if (lhs_rule) { 2223 /* Remove it from HW */ 2224 efx_mae_remove_lhs_rule(efx, lhs_rule); 2225 /* Delete it from SW */ 2226 efx_tc_flower_release_lhs_actions(efx, &lhs_rule->lhs_act); 2227 rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &lhs_rule->linkage, 2228 efx_tc_lhs_rule_ht_params); 2229 if (lhs_rule->match.encap) 2230 efx_tc_flower_release_encap_match(efx, lhs_rule->match.encap); 2231 netif_dbg(efx, drv, efx->net_dev, "Removed (lhs) filter %lx\n", 2232 lhs_rule->cookie); 2233 kfree(lhs_rule); 2234 return 0; 2235 } 2236 2237 rule = rhashtable_lookup_fast(&efx->tc->match_action_ht, &tc->cookie, 2238 efx_tc_match_action_ht_params); 2239 if (!rule) { 2240 /* Only log a message if we're the ingress device. Otherwise 2241 * it's a foreign filter and we might just not have been 2242 * interested (e.g. we might not have been the egress device 2243 * either). 2244 */ 2245 if (!IS_ERR(efx_tc_flower_lookup_efv(efx, net_dev))) 2246 netif_warn(efx, drv, efx->net_dev, 2247 "Filter %lx not found to remove\n", tc->cookie); 2248 NL_SET_ERR_MSG_MOD(extack, "Flow cookie not found in offloaded rules"); 2249 return -ENOENT; 2250 } 2251 2252 /* Remove it from HW */ 2253 efx_tc_delete_rule(efx, rule); 2254 /* Delete it from SW */ 2255 rhashtable_remove_fast(&efx->tc->match_action_ht, &rule->linkage, 2256 efx_tc_match_action_ht_params); 2257 netif_dbg(efx, drv, efx->net_dev, "Removed filter %lx\n", rule->cookie); 2258 kfree(rule); 2259 return 0; 2260 } 2261 2262 static int efx_tc_flower_stats(struct efx_nic *efx, struct net_device *net_dev, 2263 struct flow_cls_offload *tc) 2264 { 2265 struct netlink_ext_ack *extack = tc->common.extack; 2266 struct efx_tc_counter_index *ctr; 2267 struct efx_tc_counter *cnt; 2268 u64 packets, bytes; 2269 2270 ctr = efx_tc_flower_find_counter_index(efx, tc->cookie); 2271 if (!ctr) { 2272 /* See comment in efx_tc_flower_destroy() */ 2273 if (!IS_ERR(efx_tc_flower_lookup_efv(efx, net_dev))) 2274 if (net_ratelimit()) 2275 netif_warn(efx, drv, efx->net_dev, 2276 "Filter %lx not found for stats\n", 2277 tc->cookie); 2278 NL_SET_ERR_MSG_MOD(extack, "Flow cookie not found in offloaded rules"); 2279 return -ENOENT; 2280 } 2281 if (WARN_ON(!ctr->cnt)) /* can't happen */ 2282 return -EIO; 2283 cnt = ctr->cnt; 2284 2285 spin_lock_bh(&cnt->lock); 2286 /* Report only new pkts/bytes since last time TC asked */ 2287 packets = cnt->packets; 2288 bytes = cnt->bytes; 2289 flow_stats_update(&tc->stats, bytes - cnt->old_bytes, 2290 packets - cnt->old_packets, 0, cnt->touched, 2291 FLOW_ACTION_HW_STATS_DELAYED); 2292 cnt->old_packets = packets; 2293 cnt->old_bytes = bytes; 2294 spin_unlock_bh(&cnt->lock); 2295 return 0; 2296 } 2297 2298 int efx_tc_flower(struct efx_nic *efx, struct net_device *net_dev, 2299 struct flow_cls_offload *tc, struct efx_rep *efv) 2300 { 2301 int rc; 2302 2303 if (!efx->tc) 2304 return -EOPNOTSUPP; 2305 2306 mutex_lock(&efx->tc->mutex); 2307 switch (tc->command) { 2308 case FLOW_CLS_REPLACE: 2309 rc = efx_tc_flower_replace(efx, net_dev, tc, efv); 2310 break; 2311 case FLOW_CLS_DESTROY: 2312 rc = efx_tc_flower_destroy(efx, net_dev, tc); 2313 break; 2314 case FLOW_CLS_STATS: 2315 rc = efx_tc_flower_stats(efx, net_dev, tc); 2316 break; 2317 default: 2318 rc = -EOPNOTSUPP; 2319 break; 2320 } 2321 mutex_unlock(&efx->tc->mutex); 2322 return rc; 2323 } 2324 2325 static int efx_tc_configure_default_rule(struct efx_nic *efx, u32 ing_port, 2326 u32 eg_port, struct efx_tc_flow_rule *rule) 2327 { 2328 struct efx_tc_action_set_list *acts = &rule->acts; 2329 struct efx_tc_match *match = &rule->match; 2330 struct efx_tc_action_set *act; 2331 int rc; 2332 2333 match->value.ingress_port = ing_port; 2334 match->mask.ingress_port = ~0; 2335 act = kzalloc(sizeof(*act), GFP_KERNEL); 2336 if (!act) 2337 return -ENOMEM; 2338 act->deliver = 1; 2339 act->dest_mport = eg_port; 2340 rc = efx_mae_alloc_action_set(efx, act); 2341 if (rc) 2342 goto fail1; 2343 EFX_WARN_ON_PARANOID(!list_empty(&acts->list)); 2344 list_add_tail(&act->list, &acts->list); 2345 rc = efx_mae_alloc_action_set_list(efx, acts); 2346 if (rc) 2347 goto fail2; 2348 rc = efx_mae_insert_rule(efx, match, EFX_TC_PRIO_DFLT, 2349 acts->fw_id, &rule->fw_id); 2350 if (rc) 2351 goto fail3; 2352 return 0; 2353 fail3: 2354 efx_mae_free_action_set_list(efx, acts); 2355 fail2: 2356 list_del(&act->list); 2357 efx_mae_free_action_set(efx, act->fw_id); 2358 fail1: 2359 kfree(act); 2360 return rc; 2361 } 2362 2363 static int efx_tc_configure_default_rule_pf(struct efx_nic *efx) 2364 { 2365 struct efx_tc_flow_rule *rule = &efx->tc->dflt.pf; 2366 u32 ing_port, eg_port; 2367 2368 efx_mae_mport_uplink(efx, &ing_port); 2369 efx_mae_mport_wire(efx, &eg_port); 2370 return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule); 2371 } 2372 2373 static int efx_tc_configure_default_rule_wire(struct efx_nic *efx) 2374 { 2375 struct efx_tc_flow_rule *rule = &efx->tc->dflt.wire; 2376 u32 ing_port, eg_port; 2377 2378 efx_mae_mport_wire(efx, &ing_port); 2379 efx_mae_mport_uplink(efx, &eg_port); 2380 return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule); 2381 } 2382 2383 int efx_tc_configure_default_rule_rep(struct efx_rep *efv) 2384 { 2385 struct efx_tc_flow_rule *rule = &efv->dflt; 2386 struct efx_nic *efx = efv->parent; 2387 u32 ing_port, eg_port; 2388 2389 efx_mae_mport_mport(efx, efv->mport, &ing_port); 2390 efx_mae_mport_mport(efx, efx->tc->reps_mport_id, &eg_port); 2391 return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule); 2392 } 2393 2394 void efx_tc_deconfigure_default_rule(struct efx_nic *efx, 2395 struct efx_tc_flow_rule *rule) 2396 { 2397 if (rule->fw_id != MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL) 2398 efx_tc_delete_rule(efx, rule); 2399 rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL; 2400 } 2401 2402 static int efx_tc_configure_fallback_acts(struct efx_nic *efx, u32 eg_port, 2403 struct efx_tc_action_set_list *acts) 2404 { 2405 struct efx_tc_action_set *act; 2406 int rc; 2407 2408 act = kzalloc(sizeof(*act), GFP_KERNEL); 2409 if (!act) 2410 return -ENOMEM; 2411 act->deliver = 1; 2412 act->dest_mport = eg_port; 2413 rc = efx_mae_alloc_action_set(efx, act); 2414 if (rc) 2415 goto fail1; 2416 EFX_WARN_ON_PARANOID(!list_empty(&acts->list)); 2417 list_add_tail(&act->list, &acts->list); 2418 rc = efx_mae_alloc_action_set_list(efx, acts); 2419 if (rc) 2420 goto fail2; 2421 return 0; 2422 fail2: 2423 list_del(&act->list); 2424 efx_mae_free_action_set(efx, act->fw_id); 2425 fail1: 2426 kfree(act); 2427 return rc; 2428 } 2429 2430 static int efx_tc_configure_fallback_acts_pf(struct efx_nic *efx) 2431 { 2432 struct efx_tc_action_set_list *acts = &efx->tc->facts.pf; 2433 u32 eg_port; 2434 2435 efx_mae_mport_uplink(efx, &eg_port); 2436 return efx_tc_configure_fallback_acts(efx, eg_port, acts); 2437 } 2438 2439 static int efx_tc_configure_fallback_acts_reps(struct efx_nic *efx) 2440 { 2441 struct efx_tc_action_set_list *acts = &efx->tc->facts.reps; 2442 u32 eg_port; 2443 2444 efx_mae_mport_mport(efx, efx->tc->reps_mport_id, &eg_port); 2445 return efx_tc_configure_fallback_acts(efx, eg_port, acts); 2446 } 2447 2448 static void efx_tc_deconfigure_fallback_acts(struct efx_nic *efx, 2449 struct efx_tc_action_set_list *acts) 2450 { 2451 efx_tc_free_action_set_list(efx, acts, true); 2452 } 2453 2454 static int efx_tc_configure_rep_mport(struct efx_nic *efx) 2455 { 2456 u32 rep_mport_label; 2457 int rc; 2458 2459 rc = efx_mae_allocate_mport(efx, &efx->tc->reps_mport_id, &rep_mport_label); 2460 if (rc) 2461 return rc; 2462 pci_dbg(efx->pci_dev, "created rep mport 0x%08x (0x%04x)\n", 2463 efx->tc->reps_mport_id, rep_mport_label); 2464 /* Use mport *selector* as vport ID */ 2465 efx_mae_mport_mport(efx, efx->tc->reps_mport_id, 2466 &efx->tc->reps_mport_vport_id); 2467 return 0; 2468 } 2469 2470 static void efx_tc_deconfigure_rep_mport(struct efx_nic *efx) 2471 { 2472 efx_mae_free_mport(efx, efx->tc->reps_mport_id); 2473 efx->tc->reps_mport_id = MAE_MPORT_SELECTOR_NULL; 2474 } 2475 2476 int efx_tc_insert_rep_filters(struct efx_nic *efx) 2477 { 2478 struct efx_filter_spec promisc, allmulti; 2479 int rc; 2480 2481 if (efx->type->is_vf) 2482 return 0; 2483 if (!efx->tc) 2484 return 0; 2485 efx_filter_init_rx(&promisc, EFX_FILTER_PRI_REQUIRED, 0, 0); 2486 efx_filter_set_uc_def(&promisc); 2487 efx_filter_set_vport_id(&promisc, efx->tc->reps_mport_vport_id); 2488 rc = efx_filter_insert_filter(efx, &promisc, false); 2489 if (rc < 0) 2490 return rc; 2491 efx->tc->reps_filter_uc = rc; 2492 efx_filter_init_rx(&allmulti, EFX_FILTER_PRI_REQUIRED, 0, 0); 2493 efx_filter_set_mc_def(&allmulti); 2494 efx_filter_set_vport_id(&allmulti, efx->tc->reps_mport_vport_id); 2495 rc = efx_filter_insert_filter(efx, &allmulti, false); 2496 if (rc < 0) 2497 return rc; 2498 efx->tc->reps_filter_mc = rc; 2499 return 0; 2500 } 2501 2502 void efx_tc_remove_rep_filters(struct efx_nic *efx) 2503 { 2504 if (efx->type->is_vf) 2505 return; 2506 if (!efx->tc) 2507 return; 2508 if (efx->tc->reps_filter_mc >= 0) 2509 efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, efx->tc->reps_filter_mc); 2510 efx->tc->reps_filter_mc = -1; 2511 if (efx->tc->reps_filter_uc >= 0) 2512 efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, efx->tc->reps_filter_uc); 2513 efx->tc->reps_filter_uc = -1; 2514 } 2515 2516 int efx_init_tc(struct efx_nic *efx) 2517 { 2518 int rc; 2519 2520 rc = efx_mae_get_caps(efx, efx->tc->caps); 2521 if (rc) 2522 return rc; 2523 if (efx->tc->caps->match_field_count > MAE_NUM_FIELDS) 2524 /* Firmware supports some match fields the driver doesn't know 2525 * about. Not fatal, unless any of those fields are required 2526 * (MAE_FIELD_SUPPORTED_MATCH_ALWAYS) but if so we don't know. 2527 */ 2528 netif_warn(efx, probe, efx->net_dev, 2529 "FW reports additional match fields %u\n", 2530 efx->tc->caps->match_field_count); 2531 if (efx->tc->caps->action_prios < EFX_TC_PRIO__NUM) { 2532 netif_err(efx, probe, efx->net_dev, 2533 "Too few action prios supported (have %u, need %u)\n", 2534 efx->tc->caps->action_prios, EFX_TC_PRIO__NUM); 2535 return -EIO; 2536 } 2537 rc = efx_tc_configure_default_rule_pf(efx); 2538 if (rc) 2539 return rc; 2540 rc = efx_tc_configure_default_rule_wire(efx); 2541 if (rc) 2542 return rc; 2543 rc = efx_tc_configure_rep_mport(efx); 2544 if (rc) 2545 return rc; 2546 rc = efx_tc_configure_fallback_acts_pf(efx); 2547 if (rc) 2548 return rc; 2549 rc = efx_tc_configure_fallback_acts_reps(efx); 2550 if (rc) 2551 return rc; 2552 rc = efx_mae_get_tables(efx); 2553 if (rc) 2554 return rc; 2555 rc = flow_indr_dev_register(efx_tc_indr_setup_cb, efx); 2556 if (rc) 2557 goto out_free; 2558 efx->tc->up = true; 2559 return 0; 2560 out_free: 2561 efx_mae_free_tables(efx); 2562 return rc; 2563 } 2564 2565 void efx_fini_tc(struct efx_nic *efx) 2566 { 2567 /* We can get called even if efx_init_struct_tc() failed */ 2568 if (!efx->tc) 2569 return; 2570 if (efx->tc->up) 2571 flow_indr_dev_unregister(efx_tc_indr_setup_cb, efx, efx_tc_block_unbind); 2572 efx_tc_deconfigure_rep_mport(efx); 2573 efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.pf); 2574 efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.wire); 2575 efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.pf); 2576 efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.reps); 2577 efx->tc->up = false; 2578 efx_mae_free_tables(efx); 2579 } 2580 2581 /* At teardown time, all TC filter rules (and thus all resources they created) 2582 * should already have been removed. If we find any in our hashtables, make a 2583 * cursory attempt to clean up the software side. 2584 */ 2585 static void efx_tc_encap_match_free(void *ptr, void *__unused) 2586 { 2587 struct efx_tc_encap_match *encap = ptr; 2588 2589 WARN_ON(refcount_read(&encap->ref)); 2590 kfree(encap); 2591 } 2592 2593 static void efx_tc_recirc_free(void *ptr, void *arg) 2594 { 2595 struct efx_tc_recirc_id *rid = ptr; 2596 struct efx_nic *efx = arg; 2597 2598 WARN_ON(refcount_read(&rid->ref)); 2599 ida_free(&efx->tc->recirc_ida, rid->fw_id); 2600 kfree(rid); 2601 } 2602 2603 static void efx_tc_lhs_free(void *ptr, void *arg) 2604 { 2605 struct efx_tc_lhs_rule *rule = ptr; 2606 struct efx_nic *efx = arg; 2607 2608 netif_err(efx, drv, efx->net_dev, 2609 "tc lhs_rule %lx still present at teardown, removing\n", 2610 rule->cookie); 2611 2612 if (rule->lhs_act.zone) 2613 efx_tc_ct_unregister_zone(efx, rule->lhs_act.zone); 2614 if (rule->lhs_act.count) 2615 efx_tc_flower_put_counter_index(efx, rule->lhs_act.count); 2616 efx_mae_remove_lhs_rule(efx, rule); 2617 2618 kfree(rule); 2619 } 2620 2621 static void efx_tc_mac_free(void *ptr, void *__unused) 2622 { 2623 struct efx_tc_mac_pedit_action *ped = ptr; 2624 2625 WARN_ON(refcount_read(&ped->ref)); 2626 kfree(ped); 2627 } 2628 2629 static void efx_tc_flow_free(void *ptr, void *arg) 2630 { 2631 struct efx_tc_flow_rule *rule = ptr; 2632 struct efx_nic *efx = arg; 2633 2634 netif_err(efx, drv, efx->net_dev, 2635 "tc rule %lx still present at teardown, removing\n", 2636 rule->cookie); 2637 2638 /* Also releases entries in subsidiary tables */ 2639 efx_tc_delete_rule(efx, rule); 2640 2641 kfree(rule); 2642 } 2643 2644 int efx_init_struct_tc(struct efx_nic *efx) 2645 { 2646 int rc; 2647 2648 if (efx->type->is_vf) 2649 return 0; 2650 2651 efx->tc = kzalloc(sizeof(*efx->tc), GFP_KERNEL); 2652 if (!efx->tc) 2653 return -ENOMEM; 2654 efx->tc->caps = kzalloc(sizeof(struct mae_caps), GFP_KERNEL); 2655 if (!efx->tc->caps) { 2656 rc = -ENOMEM; 2657 goto fail_alloc_caps; 2658 } 2659 INIT_LIST_HEAD(&efx->tc->block_list); 2660 2661 mutex_init(&efx->tc->mutex); 2662 init_waitqueue_head(&efx->tc->flush_wq); 2663 rc = efx_tc_init_encap_actions(efx); 2664 if (rc < 0) 2665 goto fail_encap_actions; 2666 rc = efx_tc_init_counters(efx); 2667 if (rc < 0) 2668 goto fail_counters; 2669 rc = rhashtable_init(&efx->tc->mac_ht, &efx_tc_mac_ht_params); 2670 if (rc < 0) 2671 goto fail_mac_ht; 2672 rc = rhashtable_init(&efx->tc->encap_match_ht, &efx_tc_encap_match_ht_params); 2673 if (rc < 0) 2674 goto fail_encap_match_ht; 2675 rc = rhashtable_init(&efx->tc->match_action_ht, &efx_tc_match_action_ht_params); 2676 if (rc < 0) 2677 goto fail_match_action_ht; 2678 rc = rhashtable_init(&efx->tc->lhs_rule_ht, &efx_tc_lhs_rule_ht_params); 2679 if (rc < 0) 2680 goto fail_lhs_rule_ht; 2681 rc = efx_tc_init_conntrack(efx); 2682 if (rc < 0) 2683 goto fail_conntrack; 2684 rc = rhashtable_init(&efx->tc->recirc_ht, &efx_tc_recirc_ht_params); 2685 if (rc < 0) 2686 goto fail_recirc_ht; 2687 ida_init(&efx->tc->recirc_ida); 2688 efx->tc->reps_filter_uc = -1; 2689 efx->tc->reps_filter_mc = -1; 2690 INIT_LIST_HEAD(&efx->tc->dflt.pf.acts.list); 2691 efx->tc->dflt.pf.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL; 2692 INIT_LIST_HEAD(&efx->tc->dflt.wire.acts.list); 2693 efx->tc->dflt.wire.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL; 2694 INIT_LIST_HEAD(&efx->tc->facts.pf.list); 2695 efx->tc->facts.pf.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL; 2696 INIT_LIST_HEAD(&efx->tc->facts.reps.list); 2697 efx->tc->facts.reps.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL; 2698 efx->extra_channel_type[EFX_EXTRA_CHANNEL_TC] = &efx_tc_channel_type; 2699 return 0; 2700 fail_recirc_ht: 2701 efx_tc_destroy_conntrack(efx); 2702 fail_conntrack: 2703 rhashtable_destroy(&efx->tc->lhs_rule_ht); 2704 fail_lhs_rule_ht: 2705 rhashtable_destroy(&efx->tc->match_action_ht); 2706 fail_match_action_ht: 2707 rhashtable_destroy(&efx->tc->encap_match_ht); 2708 fail_encap_match_ht: 2709 rhashtable_destroy(&efx->tc->mac_ht); 2710 fail_mac_ht: 2711 efx_tc_destroy_counters(efx); 2712 fail_counters: 2713 efx_tc_destroy_encap_actions(efx); 2714 fail_encap_actions: 2715 mutex_destroy(&efx->tc->mutex); 2716 kfree(efx->tc->caps); 2717 fail_alloc_caps: 2718 kfree(efx->tc); 2719 efx->tc = NULL; 2720 return rc; 2721 } 2722 2723 void efx_fini_struct_tc(struct efx_nic *efx) 2724 { 2725 if (!efx->tc) 2726 return; 2727 2728 mutex_lock(&efx->tc->mutex); 2729 EFX_WARN_ON_PARANOID(efx->tc->dflt.pf.fw_id != 2730 MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL); 2731 EFX_WARN_ON_PARANOID(efx->tc->dflt.wire.fw_id != 2732 MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL); 2733 EFX_WARN_ON_PARANOID(efx->tc->facts.pf.fw_id != 2734 MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL); 2735 EFX_WARN_ON_PARANOID(efx->tc->facts.reps.fw_id != 2736 MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL); 2737 rhashtable_free_and_destroy(&efx->tc->lhs_rule_ht, efx_tc_lhs_free, efx); 2738 rhashtable_free_and_destroy(&efx->tc->match_action_ht, efx_tc_flow_free, 2739 efx); 2740 rhashtable_free_and_destroy(&efx->tc->encap_match_ht, 2741 efx_tc_encap_match_free, NULL); 2742 efx_tc_fini_conntrack(efx); 2743 rhashtable_free_and_destroy(&efx->tc->recirc_ht, efx_tc_recirc_free, efx); 2744 WARN_ON(!ida_is_empty(&efx->tc->recirc_ida)); 2745 ida_destroy(&efx->tc->recirc_ida); 2746 rhashtable_free_and_destroy(&efx->tc->mac_ht, efx_tc_mac_free, NULL); 2747 efx_tc_fini_counters(efx); 2748 efx_tc_fini_encap_actions(efx); 2749 mutex_unlock(&efx->tc->mutex); 2750 mutex_destroy(&efx->tc->mutex); 2751 kfree(efx->tc->caps); 2752 kfree(efx->tc); 2753 efx->tc = NULL; 2754 } 2755