xref: /openbmc/linux/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c (revision f8523d0e83613ab8d082cd504dc53a09fbba4889)
1 /* Broadcom NetXtreme-C/E network driver.
2  *
3  * Copyright (c) 2017 Broadcom Limited
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation.
8  */
9 
10 #include <linux/netdevice.h>
11 #include <linux/inetdevice.h>
12 #include <linux/if_vlan.h>
13 #include <net/flow_dissector.h>
14 #include <net/pkt_cls.h>
15 #include <net/tc_act/tc_gact.h>
16 #include <net/tc_act/tc_skbedit.h>
17 #include <net/tc_act/tc_mirred.h>
18 #include <net/tc_act/tc_vlan.h>
19 #include <net/tc_act/tc_pedit.h>
20 #include <net/tc_act/tc_tunnel_key.h>
21 #include <net/vxlan.h>
22 
23 #include "bnxt_hsi.h"
24 #include "bnxt.h"
25 #include "bnxt_sriov.h"
26 #include "bnxt_tc.h"
27 #include "bnxt_vfr.h"
28 
29 #define BNXT_FID_INVALID			0xffff
30 #define VLAN_TCI(vid, prio)	((vid) | ((prio) << VLAN_PRIO_SHIFT))
31 
32 #define is_vlan_pcp_wildcarded(vlan_tci_mask)	\
33 	((ntohs(vlan_tci_mask) & VLAN_PRIO_MASK) == 0x0000)
34 #define is_vlan_pcp_exactmatch(vlan_tci_mask)	\
35 	((ntohs(vlan_tci_mask) & VLAN_PRIO_MASK) == VLAN_PRIO_MASK)
36 #define is_vlan_pcp_zero(vlan_tci)	\
37 	((ntohs(vlan_tci) & VLAN_PRIO_MASK) == 0x0000)
38 #define is_vid_exactmatch(vlan_tci_mask)	\
39 	((ntohs(vlan_tci_mask) & VLAN_VID_MASK) == VLAN_VID_MASK)
40 
41 static bool is_wildcard(void *mask, int len);
42 static bool is_exactmatch(void *mask, int len);
43 /* Return the dst fid of the func for flow forwarding
44  * For PFs: src_fid is the fid of the PF
45  * For VF-reps: src_fid the fid of the VF
46  */
47 static u16 bnxt_flow_get_dst_fid(struct bnxt *pf_bp, struct net_device *dev)
48 {
49 	struct bnxt *bp;
50 
51 	/* check if dev belongs to the same switch */
52 	if (!netdev_port_same_parent_id(pf_bp->dev, dev)) {
53 		netdev_info(pf_bp->dev, "dev(ifindex=%d) not on same switch\n",
54 			    dev->ifindex);
55 		return BNXT_FID_INVALID;
56 	}
57 
58 	/* Is dev a VF-rep? */
59 	if (bnxt_dev_is_vf_rep(dev))
60 		return bnxt_vf_rep_get_fid(dev);
61 
62 	bp = netdev_priv(dev);
63 	return bp->pf.fw_fid;
64 }
65 
66 static int bnxt_tc_parse_redir(struct bnxt *bp,
67 			       struct bnxt_tc_actions *actions,
68 			       const struct flow_action_entry *act)
69 {
70 	struct net_device *dev = act->dev;
71 
72 	if (!dev) {
73 		netdev_info(bp->dev, "no dev in mirred action\n");
74 		return -EINVAL;
75 	}
76 
77 	actions->flags |= BNXT_TC_ACTION_FLAG_FWD;
78 	actions->dst_dev = dev;
79 	return 0;
80 }
81 
82 static int bnxt_tc_parse_vlan(struct bnxt *bp,
83 			      struct bnxt_tc_actions *actions,
84 			      const struct flow_action_entry *act)
85 {
86 	switch (act->id) {
87 	case FLOW_ACTION_VLAN_POP:
88 		actions->flags |= BNXT_TC_ACTION_FLAG_POP_VLAN;
89 		break;
90 	case FLOW_ACTION_VLAN_PUSH:
91 		actions->flags |= BNXT_TC_ACTION_FLAG_PUSH_VLAN;
92 		actions->push_vlan_tci = htons(act->vlan.vid);
93 		actions->push_vlan_tpid = act->vlan.proto;
94 		break;
95 	default:
96 		return -EOPNOTSUPP;
97 	}
98 	return 0;
99 }
100 
101 static int bnxt_tc_parse_tunnel_set(struct bnxt *bp,
102 				    struct bnxt_tc_actions *actions,
103 				    const struct flow_action_entry *act)
104 {
105 	const struct ip_tunnel_info *tun_info = act->tunnel;
106 	const struct ip_tunnel_key *tun_key = &tun_info->key;
107 
108 	if (ip_tunnel_info_af(tun_info) != AF_INET) {
109 		netdev_info(bp->dev, "only IPv4 tunnel-encap is supported\n");
110 		return -EOPNOTSUPP;
111 	}
112 
113 	actions->tun_encap_key = *tun_key;
114 	actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP;
115 	return 0;
116 }
117 
118 /* Key & Mask from the stack comes unaligned in multiple iterations of 4 bytes
119  * each(u32).
120  * This routine consolidates such multiple unaligned values into one
121  * field each for Key & Mask (for src and dst macs separately)
122  * For example,
123  *			Mask/Key	Offset	Iteration
124  *			==========	======	=========
125  *	dst mac		0xffffffff	0	1
126  *	dst mac		0x0000ffff	4	2
127  *
128  *	src mac		0xffff0000	4	1
129  *	src mac		0xffffffff	8	2
130  *
131  * The above combination coming from the stack will be consolidated as
132  *			Mask/Key
133  *			==============
134  *	src mac:	0xffffffffffff
135  *	dst mac:	0xffffffffffff
136  */
137 static void bnxt_set_l2_key_mask(u32 part_key, u32 part_mask,
138 				 u8 *actual_key, u8 *actual_mask)
139 {
140 	u32 key = get_unaligned((u32 *)actual_key);
141 	u32 mask = get_unaligned((u32 *)actual_mask);
142 
143 	part_key &= part_mask;
144 	part_key |= key & ~part_mask;
145 
146 	put_unaligned(mask | part_mask, (u32 *)actual_mask);
147 	put_unaligned(part_key, (u32 *)actual_key);
148 }
149 
150 static int
151 bnxt_fill_l2_rewrite_fields(struct bnxt_tc_actions *actions,
152 			    u16 *eth_addr, u16 *eth_addr_mask)
153 {
154 	u16 *p;
155 	int j;
156 
157 	if (unlikely(bnxt_eth_addr_key_mask_invalid(eth_addr, eth_addr_mask)))
158 		return -EINVAL;
159 
160 	if (!is_wildcard(&eth_addr_mask[0], ETH_ALEN)) {
161 		if (!is_exactmatch(&eth_addr_mask[0], ETH_ALEN))
162 			return -EINVAL;
163 		/* FW expects dmac to be in u16 array format */
164 		p = eth_addr;
165 		for (j = 0; j < 3; j++)
166 			actions->l2_rewrite_dmac[j] = cpu_to_be16(*(p + j));
167 	}
168 
169 	if (!is_wildcard(&eth_addr_mask[ETH_ALEN / 2], ETH_ALEN)) {
170 		if (!is_exactmatch(&eth_addr_mask[ETH_ALEN / 2], ETH_ALEN))
171 			return -EINVAL;
172 		/* FW expects smac to be in u16 array format */
173 		p = &eth_addr[ETH_ALEN / 2];
174 		for (j = 0; j < 3; j++)
175 			actions->l2_rewrite_smac[j] = cpu_to_be16(*(p + j));
176 	}
177 
178 	return 0;
179 }
180 
181 static int
182 bnxt_tc_parse_pedit(struct bnxt *bp, struct bnxt_tc_actions *actions,
183 		    struct flow_action_entry *act, int act_idx, u8 *eth_addr,
184 		    u8 *eth_addr_mask)
185 {
186 	size_t offset_of_ip6_daddr = offsetof(struct ipv6hdr, daddr);
187 	size_t offset_of_ip6_saddr = offsetof(struct ipv6hdr, saddr);
188 	u32 mask, val, offset, idx;
189 	u8 htype;
190 
191 	offset = act->mangle.offset;
192 	htype = act->mangle.htype;
193 	mask = ~act->mangle.mask;
194 	val = act->mangle.val;
195 
196 	switch (htype) {
197 	case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
198 		if (offset > PEDIT_OFFSET_SMAC_LAST_4_BYTES) {
199 			netdev_err(bp->dev,
200 				   "%s: eth_hdr: Invalid pedit field\n",
201 				   __func__);
202 			return -EINVAL;
203 		}
204 		actions->flags |= BNXT_TC_ACTION_FLAG_L2_REWRITE;
205 
206 		bnxt_set_l2_key_mask(val, mask, &eth_addr[offset],
207 				     &eth_addr_mask[offset]);
208 		break;
209 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
210 		actions->flags |= BNXT_TC_ACTION_FLAG_NAT_XLATE;
211 		actions->nat.l3_is_ipv4 = true;
212 		if (offset ==  offsetof(struct iphdr, saddr)) {
213 			actions->nat.src_xlate = true;
214 			actions->nat.l3.ipv4.saddr.s_addr = htonl(val);
215 		} else if (offset ==  offsetof(struct iphdr, daddr)) {
216 			actions->nat.src_xlate = false;
217 			actions->nat.l3.ipv4.daddr.s_addr = htonl(val);
218 		} else {
219 			netdev_err(bp->dev,
220 				   "%s: IPv4_hdr: Invalid pedit field\n",
221 				   __func__);
222 			return -EINVAL;
223 		}
224 
225 		netdev_dbg(bp->dev, "nat.src_xlate = %d src IP: %pI4 dst ip : %pI4\n",
226 			   actions->nat.src_xlate, &actions->nat.l3.ipv4.saddr,
227 			   &actions->nat.l3.ipv4.daddr);
228 		break;
229 
230 	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
231 		actions->flags |= BNXT_TC_ACTION_FLAG_NAT_XLATE;
232 		actions->nat.l3_is_ipv4 = false;
233 		if (offset >= offsetof(struct ipv6hdr, saddr) &&
234 		    offset < offset_of_ip6_daddr) {
235 			/* 16 byte IPv6 address comes in 4 iterations of
236 			 * 4byte chunks each
237 			 */
238 			actions->nat.src_xlate = true;
239 			idx = (offset - offset_of_ip6_saddr) / 4;
240 			/* First 4bytes will be copied to idx 0 and so on */
241 			actions->nat.l3.ipv6.saddr.s6_addr32[idx] = htonl(val);
242 		} else if (offset >= offset_of_ip6_daddr &&
243 			   offset < offset_of_ip6_daddr + 16) {
244 			actions->nat.src_xlate = false;
245 			idx = (offset - offset_of_ip6_daddr) / 4;
246 			actions->nat.l3.ipv6.saddr.s6_addr32[idx] = htonl(val);
247 		} else {
248 			netdev_err(bp->dev,
249 				   "%s: IPv6_hdr: Invalid pedit field\n",
250 				   __func__);
251 			return -EINVAL;
252 		}
253 		break;
254 	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
255 	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
256 		/* HW does not support L4 rewrite alone without L3
257 		 * rewrite
258 		 */
259 		if (!(actions->flags & BNXT_TC_ACTION_FLAG_NAT_XLATE)) {
260 			netdev_err(bp->dev,
261 				   "Need to specify L3 rewrite as well\n");
262 			return -EINVAL;
263 		}
264 		if (actions->nat.src_xlate)
265 			actions->nat.l4.ports.sport = htons(val);
266 		else
267 			actions->nat.l4.ports.dport = htons(val);
268 		netdev_dbg(bp->dev, "actions->nat.sport = %d dport = %d\n",
269 			   actions->nat.l4.ports.sport,
270 			   actions->nat.l4.ports.dport);
271 		break;
272 	default:
273 		netdev_err(bp->dev, "%s: Unsupported pedit hdr type\n",
274 			   __func__);
275 		return -EINVAL;
276 	}
277 	return 0;
278 }
279 
280 static int bnxt_tc_parse_actions(struct bnxt *bp,
281 				 struct bnxt_tc_actions *actions,
282 				 struct flow_action *flow_action,
283 				 struct netlink_ext_ack *extack)
284 {
285 	/* Used to store the L2 rewrite mask for dmac (6 bytes) followed by
286 	 * smac (6 bytes) if rewrite of both is specified, otherwise either
287 	 * dmac or smac
288 	 */
289 	u16 eth_addr_mask[ETH_ALEN] = { 0 };
290 	/* Used to store the L2 rewrite key for dmac (6 bytes) followed by
291 	 * smac (6 bytes) if rewrite of both is specified, otherwise either
292 	 * dmac or smac
293 	 */
294 	u16 eth_addr[ETH_ALEN] = { 0 };
295 	struct flow_action_entry *act;
296 	int i, rc;
297 
298 	if (!flow_action_has_entries(flow_action)) {
299 		netdev_info(bp->dev, "no actions\n");
300 		return -EINVAL;
301 	}
302 
303 	if (!flow_action_basic_hw_stats_check(flow_action, extack))
304 		return -EOPNOTSUPP;
305 
306 	flow_action_for_each(i, act, flow_action) {
307 		switch (act->id) {
308 		case FLOW_ACTION_DROP:
309 			actions->flags |= BNXT_TC_ACTION_FLAG_DROP;
310 			return 0; /* don't bother with other actions */
311 		case FLOW_ACTION_REDIRECT:
312 			rc = bnxt_tc_parse_redir(bp, actions, act);
313 			if (rc)
314 				return rc;
315 			break;
316 		case FLOW_ACTION_VLAN_POP:
317 		case FLOW_ACTION_VLAN_PUSH:
318 		case FLOW_ACTION_VLAN_MANGLE:
319 			rc = bnxt_tc_parse_vlan(bp, actions, act);
320 			if (rc)
321 				return rc;
322 			break;
323 		case FLOW_ACTION_TUNNEL_ENCAP:
324 			rc = bnxt_tc_parse_tunnel_set(bp, actions, act);
325 			if (rc)
326 				return rc;
327 			break;
328 		case FLOW_ACTION_TUNNEL_DECAP:
329 			actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_DECAP;
330 			break;
331 		/* Packet edit: L2 rewrite, NAT, NAPT */
332 		case FLOW_ACTION_MANGLE:
333 			rc = bnxt_tc_parse_pedit(bp, actions, act, i,
334 						 (u8 *)eth_addr,
335 						 (u8 *)eth_addr_mask);
336 			if (rc)
337 				return rc;
338 			break;
339 		default:
340 			break;
341 		}
342 	}
343 
344 	if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) {
345 		rc = bnxt_fill_l2_rewrite_fields(actions, eth_addr,
346 						 eth_addr_mask);
347 		if (rc)
348 			return rc;
349 	}
350 
351 	if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
352 		if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
353 			/* dst_fid is PF's fid */
354 			actions->dst_fid = bp->pf.fw_fid;
355 		} else {
356 			/* find the FID from dst_dev */
357 			actions->dst_fid =
358 				bnxt_flow_get_dst_fid(bp, actions->dst_dev);
359 			if (actions->dst_fid == BNXT_FID_INVALID)
360 				return -EINVAL;
361 		}
362 	}
363 
364 	return 0;
365 }
366 
367 static int bnxt_tc_parse_flow(struct bnxt *bp,
368 			      struct flow_cls_offload *tc_flow_cmd,
369 			      struct bnxt_tc_flow *flow)
370 {
371 	struct flow_rule *rule = flow_cls_offload_flow_rule(tc_flow_cmd);
372 	struct flow_dissector *dissector = rule->match.dissector;
373 
374 	/* KEY_CONTROL and KEY_BASIC are needed for forming a meaningful key */
375 	if ((dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) == 0 ||
376 	    (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_BASIC)) == 0) {
377 		netdev_info(bp->dev, "cannot form TC key: used_keys = 0x%x\n",
378 			    dissector->used_keys);
379 		return -EOPNOTSUPP;
380 	}
381 
382 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
383 		struct flow_match_basic match;
384 
385 		flow_rule_match_basic(rule, &match);
386 		flow->l2_key.ether_type = match.key->n_proto;
387 		flow->l2_mask.ether_type = match.mask->n_proto;
388 
389 		if (match.key->n_proto == htons(ETH_P_IP) ||
390 		    match.key->n_proto == htons(ETH_P_IPV6)) {
391 			flow->l4_key.ip_proto = match.key->ip_proto;
392 			flow->l4_mask.ip_proto = match.mask->ip_proto;
393 		}
394 	}
395 
396 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
397 		struct flow_match_eth_addrs match;
398 
399 		flow_rule_match_eth_addrs(rule, &match);
400 		flow->flags |= BNXT_TC_FLOW_FLAGS_ETH_ADDRS;
401 		ether_addr_copy(flow->l2_key.dmac, match.key->dst);
402 		ether_addr_copy(flow->l2_mask.dmac, match.mask->dst);
403 		ether_addr_copy(flow->l2_key.smac, match.key->src);
404 		ether_addr_copy(flow->l2_mask.smac, match.mask->src);
405 	}
406 
407 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
408 		struct flow_match_vlan match;
409 
410 		flow_rule_match_vlan(rule, &match);
411 		flow->l2_key.inner_vlan_tci =
412 			cpu_to_be16(VLAN_TCI(match.key->vlan_id,
413 					     match.key->vlan_priority));
414 		flow->l2_mask.inner_vlan_tci =
415 			cpu_to_be16((VLAN_TCI(match.mask->vlan_id,
416 					      match.mask->vlan_priority)));
417 		flow->l2_key.inner_vlan_tpid = htons(ETH_P_8021Q);
418 		flow->l2_mask.inner_vlan_tpid = htons(0xffff);
419 		flow->l2_key.num_vlans = 1;
420 	}
421 
422 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
423 		struct flow_match_ipv4_addrs match;
424 
425 		flow_rule_match_ipv4_addrs(rule, &match);
426 		flow->flags |= BNXT_TC_FLOW_FLAGS_IPV4_ADDRS;
427 		flow->l3_key.ipv4.daddr.s_addr = match.key->dst;
428 		flow->l3_mask.ipv4.daddr.s_addr = match.mask->dst;
429 		flow->l3_key.ipv4.saddr.s_addr = match.key->src;
430 		flow->l3_mask.ipv4.saddr.s_addr = match.mask->src;
431 	} else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
432 		struct flow_match_ipv6_addrs match;
433 
434 		flow_rule_match_ipv6_addrs(rule, &match);
435 		flow->flags |= BNXT_TC_FLOW_FLAGS_IPV6_ADDRS;
436 		flow->l3_key.ipv6.daddr = match.key->dst;
437 		flow->l3_mask.ipv6.daddr = match.mask->dst;
438 		flow->l3_key.ipv6.saddr = match.key->src;
439 		flow->l3_mask.ipv6.saddr = match.mask->src;
440 	}
441 
442 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
443 		struct flow_match_ports match;
444 
445 		flow_rule_match_ports(rule, &match);
446 		flow->flags |= BNXT_TC_FLOW_FLAGS_PORTS;
447 		flow->l4_key.ports.dport = match.key->dst;
448 		flow->l4_mask.ports.dport = match.mask->dst;
449 		flow->l4_key.ports.sport = match.key->src;
450 		flow->l4_mask.ports.sport = match.mask->src;
451 	}
452 
453 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
454 		struct flow_match_icmp match;
455 
456 		flow_rule_match_icmp(rule, &match);
457 		flow->flags |= BNXT_TC_FLOW_FLAGS_ICMP;
458 		flow->l4_key.icmp.type = match.key->type;
459 		flow->l4_key.icmp.code = match.key->code;
460 		flow->l4_mask.icmp.type = match.mask->type;
461 		flow->l4_mask.icmp.code = match.mask->code;
462 	}
463 
464 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
465 		struct flow_match_ipv4_addrs match;
466 
467 		flow_rule_match_enc_ipv4_addrs(rule, &match);
468 		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS;
469 		flow->tun_key.u.ipv4.dst = match.key->dst;
470 		flow->tun_mask.u.ipv4.dst = match.mask->dst;
471 		flow->tun_key.u.ipv4.src = match.key->src;
472 		flow->tun_mask.u.ipv4.src = match.mask->src;
473 	} else if (flow_rule_match_key(rule,
474 				      FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
475 		return -EOPNOTSUPP;
476 	}
477 
478 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
479 		struct flow_match_enc_keyid match;
480 
481 		flow_rule_match_enc_keyid(rule, &match);
482 		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ID;
483 		flow->tun_key.tun_id = key32_to_tunnel_id(match.key->keyid);
484 		flow->tun_mask.tun_id = key32_to_tunnel_id(match.mask->keyid);
485 	}
486 
487 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
488 		struct flow_match_ports match;
489 
490 		flow_rule_match_enc_ports(rule, &match);
491 		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_PORTS;
492 		flow->tun_key.tp_dst = match.key->dst;
493 		flow->tun_mask.tp_dst = match.mask->dst;
494 		flow->tun_key.tp_src = match.key->src;
495 		flow->tun_mask.tp_src = match.mask->src;
496 	}
497 
498 	return bnxt_tc_parse_actions(bp, &flow->actions, &rule->action,
499 				     tc_flow_cmd->common.extack);
500 }
501 
502 static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp,
503 				   struct bnxt_tc_flow_node *flow_node)
504 {
505 	struct hwrm_cfa_flow_free_input req = { 0 };
506 	int rc;
507 
508 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_FREE, -1, -1);
509 	if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE)
510 		req.ext_flow_handle = flow_node->ext_flow_handle;
511 	else
512 		req.flow_handle = flow_node->flow_handle;
513 
514 	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
515 	if (rc)
516 		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
517 
518 	return rc;
519 }
520 
521 static int ipv6_mask_len(struct in6_addr *mask)
522 {
523 	int mask_len = 0, i;
524 
525 	for (i = 0; i < 4; i++)
526 		mask_len += inet_mask_len(mask->s6_addr32[i]);
527 
528 	return mask_len;
529 }
530 
531 static bool is_wildcard(void *mask, int len)
532 {
533 	const u8 *p = mask;
534 	int i;
535 
536 	for (i = 0; i < len; i++) {
537 		if (p[i] != 0)
538 			return false;
539 	}
540 	return true;
541 }
542 
543 static bool is_exactmatch(void *mask, int len)
544 {
545 	const u8 *p = mask;
546 	int i;
547 
548 	for (i = 0; i < len; i++)
549 		if (p[i] != 0xff)
550 			return false;
551 
552 	return true;
553 }
554 
555 static bool is_vlan_tci_allowed(__be16  vlan_tci_mask,
556 				__be16  vlan_tci)
557 {
558 	/* VLAN priority must be either exactly zero or fully wildcarded and
559 	 * VLAN id must be exact match.
560 	 */
561 	if (is_vid_exactmatch(vlan_tci_mask) &&
562 	    ((is_vlan_pcp_exactmatch(vlan_tci_mask) &&
563 	      is_vlan_pcp_zero(vlan_tci)) ||
564 	     is_vlan_pcp_wildcarded(vlan_tci_mask)))
565 		return true;
566 
567 	return false;
568 }
569 
570 static bool bits_set(void *key, int len)
571 {
572 	const u8 *p = key;
573 	int i;
574 
575 	for (i = 0; i < len; i++)
576 		if (p[i] != 0)
577 			return true;
578 
579 	return false;
580 }
581 
582 static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
583 				    __le16 ref_flow_handle,
584 				    __le32 tunnel_handle,
585 				    struct bnxt_tc_flow_node *flow_node)
586 {
587 	struct bnxt_tc_actions *actions = &flow->actions;
588 	struct bnxt_tc_l3_key *l3_mask = &flow->l3_mask;
589 	struct bnxt_tc_l3_key *l3_key = &flow->l3_key;
590 	struct hwrm_cfa_flow_alloc_input req = { 0 };
591 	struct hwrm_cfa_flow_alloc_output *resp;
592 	u16 flow_flags = 0, action_flags = 0;
593 	int rc;
594 
595 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_ALLOC, -1, -1);
596 
597 	req.src_fid = cpu_to_le16(flow->src_fid);
598 	req.ref_flow_handle = ref_flow_handle;
599 
600 	if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) {
601 		memcpy(req.l2_rewrite_dmac, actions->l2_rewrite_dmac,
602 		       ETH_ALEN);
603 		memcpy(req.l2_rewrite_smac, actions->l2_rewrite_smac,
604 		       ETH_ALEN);
605 		action_flags |=
606 			CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
607 	}
608 
609 	if (actions->flags & BNXT_TC_ACTION_FLAG_NAT_XLATE) {
610 		if (actions->nat.l3_is_ipv4) {
611 			action_flags |=
612 				CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_IPV4_ADDRESS;
613 
614 			if (actions->nat.src_xlate) {
615 				action_flags |=
616 					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
617 				/* L3 source rewrite */
618 				req.nat_ip_address[0] =
619 					actions->nat.l3.ipv4.saddr.s_addr;
620 				/* L4 source port */
621 				if (actions->nat.l4.ports.sport)
622 					req.nat_port =
623 						actions->nat.l4.ports.sport;
624 			} else {
625 				action_flags |=
626 					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
627 				/* L3 destination rewrite */
628 				req.nat_ip_address[0] =
629 					actions->nat.l3.ipv4.daddr.s_addr;
630 				/* L4 destination port */
631 				if (actions->nat.l4.ports.dport)
632 					req.nat_port =
633 						actions->nat.l4.ports.dport;
634 			}
635 			netdev_dbg(bp->dev,
636 				   "req.nat_ip_address: %pI4 src_xlate: %d req.nat_port: %x\n",
637 				   req.nat_ip_address, actions->nat.src_xlate,
638 				   req.nat_port);
639 		} else {
640 			if (actions->nat.src_xlate) {
641 				action_flags |=
642 					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
643 				/* L3 source rewrite */
644 				memcpy(req.nat_ip_address,
645 				       actions->nat.l3.ipv6.saddr.s6_addr32,
646 				       sizeof(req.nat_ip_address));
647 				/* L4 source port */
648 				if (actions->nat.l4.ports.sport)
649 					req.nat_port =
650 						actions->nat.l4.ports.sport;
651 			} else {
652 				action_flags |=
653 					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
654 				/* L3 destination rewrite */
655 				memcpy(req.nat_ip_address,
656 				       actions->nat.l3.ipv6.daddr.s6_addr32,
657 				       sizeof(req.nat_ip_address));
658 				/* L4 destination port */
659 				if (actions->nat.l4.ports.dport)
660 					req.nat_port =
661 						actions->nat.l4.ports.dport;
662 			}
663 			netdev_dbg(bp->dev,
664 				   "req.nat_ip_address: %pI6 src_xlate: %d req.nat_port: %x\n",
665 				   req.nat_ip_address, actions->nat.src_xlate,
666 				   req.nat_port);
667 		}
668 	}
669 
670 	if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP ||
671 	    actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
672 		req.tunnel_handle = tunnel_handle;
673 		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_TUNNEL;
674 		action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TUNNEL;
675 	}
676 
677 	req.ethertype = flow->l2_key.ether_type;
678 	req.ip_proto = flow->l4_key.ip_proto;
679 
680 	if (flow->flags & BNXT_TC_FLOW_FLAGS_ETH_ADDRS) {
681 		memcpy(req.dmac, flow->l2_key.dmac, ETH_ALEN);
682 		memcpy(req.smac, flow->l2_key.smac, ETH_ALEN);
683 	}
684 
685 	if (flow->l2_key.num_vlans > 0) {
686 		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_ONE;
687 		/* FW expects the inner_vlan_tci value to be set
688 		 * in outer_vlan_tci when num_vlans is 1 (which is
689 		 * always the case in TC.)
690 		 */
691 		req.outer_vlan_tci = flow->l2_key.inner_vlan_tci;
692 	}
693 
694 	/* If all IP and L4 fields are wildcarded then this is an L2 flow */
695 	if (is_wildcard(l3_mask, sizeof(*l3_mask)) &&
696 	    is_wildcard(&flow->l4_mask, sizeof(flow->l4_mask))) {
697 		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_L2;
698 	} else {
699 		flow_flags |= flow->l2_key.ether_type == htons(ETH_P_IP) ?
700 				CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV4 :
701 				CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV6;
702 
703 		if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV4_ADDRS) {
704 			req.ip_dst[0] = l3_key->ipv4.daddr.s_addr;
705 			req.ip_dst_mask_len =
706 				inet_mask_len(l3_mask->ipv4.daddr.s_addr);
707 			req.ip_src[0] = l3_key->ipv4.saddr.s_addr;
708 			req.ip_src_mask_len =
709 				inet_mask_len(l3_mask->ipv4.saddr.s_addr);
710 		} else if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV6_ADDRS) {
711 			memcpy(req.ip_dst, l3_key->ipv6.daddr.s6_addr32,
712 			       sizeof(req.ip_dst));
713 			req.ip_dst_mask_len =
714 					ipv6_mask_len(&l3_mask->ipv6.daddr);
715 			memcpy(req.ip_src, l3_key->ipv6.saddr.s6_addr32,
716 			       sizeof(req.ip_src));
717 			req.ip_src_mask_len =
718 					ipv6_mask_len(&l3_mask->ipv6.saddr);
719 		}
720 	}
721 
722 	if (flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) {
723 		req.l4_src_port = flow->l4_key.ports.sport;
724 		req.l4_src_port_mask = flow->l4_mask.ports.sport;
725 		req.l4_dst_port = flow->l4_key.ports.dport;
726 		req.l4_dst_port_mask = flow->l4_mask.ports.dport;
727 	} else if (flow->flags & BNXT_TC_FLOW_FLAGS_ICMP) {
728 		/* l4 ports serve as type/code when ip_proto is ICMP */
729 		req.l4_src_port = htons(flow->l4_key.icmp.type);
730 		req.l4_src_port_mask = htons(flow->l4_mask.icmp.type);
731 		req.l4_dst_port = htons(flow->l4_key.icmp.code);
732 		req.l4_dst_port_mask = htons(flow->l4_mask.icmp.code);
733 	}
734 	req.flags = cpu_to_le16(flow_flags);
735 
736 	if (actions->flags & BNXT_TC_ACTION_FLAG_DROP) {
737 		action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_DROP;
738 	} else {
739 		if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
740 			action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_FWD;
741 			req.dst_fid = cpu_to_le16(actions->dst_fid);
742 		}
743 		if (actions->flags & BNXT_TC_ACTION_FLAG_PUSH_VLAN) {
744 			action_flags |=
745 			    CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
746 			req.l2_rewrite_vlan_tpid = actions->push_vlan_tpid;
747 			req.l2_rewrite_vlan_tci = actions->push_vlan_tci;
748 			memcpy(&req.l2_rewrite_dmac, &req.dmac, ETH_ALEN);
749 			memcpy(&req.l2_rewrite_smac, &req.smac, ETH_ALEN);
750 		}
751 		if (actions->flags & BNXT_TC_ACTION_FLAG_POP_VLAN) {
752 			action_flags |=
753 			    CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
754 			/* Rewrite config with tpid = 0 implies vlan pop */
755 			req.l2_rewrite_vlan_tpid = 0;
756 			memcpy(&req.l2_rewrite_dmac, &req.dmac, ETH_ALEN);
757 			memcpy(&req.l2_rewrite_smac, &req.smac, ETH_ALEN);
758 		}
759 	}
760 	req.action_flags = cpu_to_le16(action_flags);
761 
762 	mutex_lock(&bp->hwrm_cmd_lock);
763 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
764 	if (!rc) {
765 		resp = bnxt_get_hwrm_resp_addr(bp, &req);
766 		/* CFA_FLOW_ALLOC response interpretation:
767 		 *		    fw with	     fw with
768 		 *		    16-bit	     64-bit
769 		 *		    flow handle      flow handle
770 		 *		    ===========	     ===========
771 		 * flow_handle      flow handle      flow context id
772 		 * ext_flow_handle  INVALID	     flow handle
773 		 * flow_id	    INVALID	     flow counter id
774 		 */
775 		flow_node->flow_handle = resp->flow_handle;
776 		if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE) {
777 			flow_node->ext_flow_handle = resp->ext_flow_handle;
778 			flow_node->flow_id = resp->flow_id;
779 		}
780 	}
781 	mutex_unlock(&bp->hwrm_cmd_lock);
782 	return rc;
783 }
784 
785 static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp,
786 				       struct bnxt_tc_flow *flow,
787 				       struct bnxt_tc_l2_key *l2_info,
788 				       __le32 ref_decap_handle,
789 				       __le32 *decap_filter_handle)
790 {
791 	struct hwrm_cfa_decap_filter_alloc_input req = { 0 };
792 	struct hwrm_cfa_decap_filter_alloc_output *resp;
793 	struct ip_tunnel_key *tun_key = &flow->tun_key;
794 	u32 enables = 0;
795 	int rc;
796 
797 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_DECAP_FILTER_ALLOC, -1, -1);
798 
799 	req.flags = cpu_to_le32(CFA_DECAP_FILTER_ALLOC_REQ_FLAGS_OVS_TUNNEL);
800 	enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_TYPE |
801 		   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IP_PROTOCOL;
802 	req.tunnel_type = CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN;
803 	req.ip_protocol = CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP;
804 
805 	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ID) {
806 		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_ID;
807 		/* tunnel_id is wrongly defined in hsi defn. as __le32 */
808 		req.tunnel_id = tunnel_id_to_key32(tun_key->tun_id);
809 	}
810 
811 	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS) {
812 		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR;
813 		ether_addr_copy(req.dst_macaddr, l2_info->dmac);
814 	}
815 	if (l2_info->num_vlans) {
816 		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_IVLAN_VID;
817 		req.t_ivlan_vid = l2_info->inner_vlan_tci;
818 	}
819 
820 	enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_ETHERTYPE;
821 	req.ethertype = htons(ETH_P_IP);
822 
823 	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS) {
824 		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_IPADDR |
825 			   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_IPADDR |
826 			   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IPADDR_TYPE;
827 		req.ip_addr_type = CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4;
828 		req.dst_ipaddr[0] = tun_key->u.ipv4.dst;
829 		req.src_ipaddr[0] = tun_key->u.ipv4.src;
830 	}
831 
832 	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_PORTS) {
833 		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_PORT;
834 		req.dst_port = tun_key->tp_dst;
835 	}
836 
837 	/* Eventhough the decap_handle returned by hwrm_cfa_decap_filter_alloc
838 	 * is defined as __le32, l2_ctxt_ref_id is defined in HSI as __le16.
839 	 */
840 	req.l2_ctxt_ref_id = (__force __le16)ref_decap_handle;
841 	req.enables = cpu_to_le32(enables);
842 
843 	mutex_lock(&bp->hwrm_cmd_lock);
844 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
845 	if (!rc) {
846 		resp = bnxt_get_hwrm_resp_addr(bp, &req);
847 		*decap_filter_handle = resp->decap_filter_id;
848 	} else {
849 		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
850 	}
851 	mutex_unlock(&bp->hwrm_cmd_lock);
852 
853 	return rc;
854 }
855 
856 static int hwrm_cfa_decap_filter_free(struct bnxt *bp,
857 				      __le32 decap_filter_handle)
858 {
859 	struct hwrm_cfa_decap_filter_free_input req = { 0 };
860 	int rc;
861 
862 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_DECAP_FILTER_FREE, -1, -1);
863 	req.decap_filter_id = decap_filter_handle;
864 
865 	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
866 	if (rc)
867 		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
868 
869 	return rc;
870 }
871 
872 static int hwrm_cfa_encap_record_alloc(struct bnxt *bp,
873 				       struct ip_tunnel_key *encap_key,
874 				       struct bnxt_tc_l2_key *l2_info,
875 				       __le32 *encap_record_handle)
876 {
877 	struct hwrm_cfa_encap_record_alloc_input req = { 0 };
878 	struct hwrm_cfa_encap_record_alloc_output *resp;
879 	struct hwrm_cfa_encap_data_vxlan *encap =
880 			(struct hwrm_cfa_encap_data_vxlan *)&req.encap_data;
881 	struct hwrm_vxlan_ipv4_hdr *encap_ipv4 =
882 				(struct hwrm_vxlan_ipv4_hdr *)encap->l3;
883 	int rc;
884 
885 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_ENCAP_RECORD_ALLOC, -1, -1);
886 
887 	req.encap_type = CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN;
888 
889 	ether_addr_copy(encap->dst_mac_addr, l2_info->dmac);
890 	ether_addr_copy(encap->src_mac_addr, l2_info->smac);
891 	if (l2_info->num_vlans) {
892 		encap->num_vlan_tags = l2_info->num_vlans;
893 		encap->ovlan_tci = l2_info->inner_vlan_tci;
894 		encap->ovlan_tpid = l2_info->inner_vlan_tpid;
895 	}
896 
897 	encap_ipv4->ver_hlen = 4 << VXLAN_IPV4_HDR_VER_HLEN_VERSION_SFT;
898 	encap_ipv4->ver_hlen |= 5 << VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_SFT;
899 	encap_ipv4->ttl = encap_key->ttl;
900 
901 	encap_ipv4->dest_ip_addr = encap_key->u.ipv4.dst;
902 	encap_ipv4->src_ip_addr = encap_key->u.ipv4.src;
903 	encap_ipv4->protocol = IPPROTO_UDP;
904 
905 	encap->dst_port = encap_key->tp_dst;
906 	encap->vni = tunnel_id_to_key32(encap_key->tun_id);
907 
908 	mutex_lock(&bp->hwrm_cmd_lock);
909 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
910 	if (!rc) {
911 		resp = bnxt_get_hwrm_resp_addr(bp, &req);
912 		*encap_record_handle = resp->encap_record_id;
913 	} else {
914 		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
915 	}
916 	mutex_unlock(&bp->hwrm_cmd_lock);
917 
918 	return rc;
919 }
920 
921 static int hwrm_cfa_encap_record_free(struct bnxt *bp,
922 				      __le32 encap_record_handle)
923 {
924 	struct hwrm_cfa_encap_record_free_input req = { 0 };
925 	int rc;
926 
927 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_ENCAP_RECORD_FREE, -1, -1);
928 	req.encap_record_id = encap_record_handle;
929 
930 	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
931 	if (rc)
932 		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
933 
934 	return rc;
935 }
936 
937 static int bnxt_tc_put_l2_node(struct bnxt *bp,
938 			       struct bnxt_tc_flow_node *flow_node)
939 {
940 	struct bnxt_tc_l2_node *l2_node = flow_node->l2_node;
941 	struct bnxt_tc_info *tc_info = bp->tc_info;
942 	int rc;
943 
944 	/* remove flow_node from the L2 shared flow list */
945 	list_del(&flow_node->l2_list_node);
946 	if (--l2_node->refcount == 0) {
947 		rc =  rhashtable_remove_fast(&tc_info->l2_table, &l2_node->node,
948 					     tc_info->l2_ht_params);
949 		if (rc)
950 			netdev_err(bp->dev,
951 				   "Error: %s: rhashtable_remove_fast: %d\n",
952 				   __func__, rc);
953 		kfree_rcu(l2_node, rcu);
954 	}
955 	return 0;
956 }
957 
958 static struct bnxt_tc_l2_node *
959 bnxt_tc_get_l2_node(struct bnxt *bp, struct rhashtable *l2_table,
960 		    struct rhashtable_params ht_params,
961 		    struct bnxt_tc_l2_key *l2_key)
962 {
963 	struct bnxt_tc_l2_node *l2_node;
964 	int rc;
965 
966 	l2_node = rhashtable_lookup_fast(l2_table, l2_key, ht_params);
967 	if (!l2_node) {
968 		l2_node = kzalloc(sizeof(*l2_node), GFP_KERNEL);
969 		if (!l2_node) {
970 			rc = -ENOMEM;
971 			return NULL;
972 		}
973 
974 		l2_node->key = *l2_key;
975 		rc = rhashtable_insert_fast(l2_table, &l2_node->node,
976 					    ht_params);
977 		if (rc) {
978 			kfree_rcu(l2_node, rcu);
979 			netdev_err(bp->dev,
980 				   "Error: %s: rhashtable_insert_fast: %d\n",
981 				   __func__, rc);
982 			return NULL;
983 		}
984 		INIT_LIST_HEAD(&l2_node->common_l2_flows);
985 	}
986 	return l2_node;
987 }
988 
989 /* Get the ref_flow_handle for a flow by checking if there are any other
990  * flows that share the same L2 key as this flow.
991  */
992 static int
993 bnxt_tc_get_ref_flow_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
994 			    struct bnxt_tc_flow_node *flow_node,
995 			    __le16 *ref_flow_handle)
996 {
997 	struct bnxt_tc_info *tc_info = bp->tc_info;
998 	struct bnxt_tc_flow_node *ref_flow_node;
999 	struct bnxt_tc_l2_node *l2_node;
1000 
1001 	l2_node = bnxt_tc_get_l2_node(bp, &tc_info->l2_table,
1002 				      tc_info->l2_ht_params,
1003 				      &flow->l2_key);
1004 	if (!l2_node)
1005 		return -1;
1006 
1007 	/* If any other flow is using this l2_node, use it's flow_handle
1008 	 * as the ref_flow_handle
1009 	 */
1010 	if (l2_node->refcount > 0) {
1011 		ref_flow_node = list_first_entry(&l2_node->common_l2_flows,
1012 						 struct bnxt_tc_flow_node,
1013 						 l2_list_node);
1014 		*ref_flow_handle = ref_flow_node->flow_handle;
1015 	} else {
1016 		*ref_flow_handle = cpu_to_le16(0xffff);
1017 	}
1018 
1019 	/* Insert the l2_node into the flow_node so that subsequent flows
1020 	 * with a matching l2 key can use the flow_handle of this flow
1021 	 * as their ref_flow_handle
1022 	 */
1023 	flow_node->l2_node = l2_node;
1024 	list_add(&flow_node->l2_list_node, &l2_node->common_l2_flows);
1025 	l2_node->refcount++;
1026 	return 0;
1027 }
1028 
1029 /* After the flow parsing is done, this routine is used for checking
1030  * if there are any aspects of the flow that prevent it from being
1031  * offloaded.
1032  */
1033 static bool bnxt_tc_can_offload(struct bnxt *bp, struct bnxt_tc_flow *flow)
1034 {
1035 	/* If L4 ports are specified then ip_proto must be TCP or UDP */
1036 	if ((flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) &&
1037 	    (flow->l4_key.ip_proto != IPPROTO_TCP &&
1038 	     flow->l4_key.ip_proto != IPPROTO_UDP)) {
1039 		netdev_info(bp->dev, "Cannot offload non-TCP/UDP (%d) ports\n",
1040 			    flow->l4_key.ip_proto);
1041 		return false;
1042 	}
1043 
1044 	/* Currently source/dest MAC cannot be partial wildcard  */
1045 	if (bits_set(&flow->l2_key.smac, sizeof(flow->l2_key.smac)) &&
1046 	    !is_exactmatch(flow->l2_mask.smac, sizeof(flow->l2_mask.smac))) {
1047 		netdev_info(bp->dev, "Wildcard match unsupported for Source MAC\n");
1048 		return false;
1049 	}
1050 	if (bits_set(&flow->l2_key.dmac, sizeof(flow->l2_key.dmac)) &&
1051 	    !is_exactmatch(&flow->l2_mask.dmac, sizeof(flow->l2_mask.dmac))) {
1052 		netdev_info(bp->dev, "Wildcard match unsupported for Dest MAC\n");
1053 		return false;
1054 	}
1055 
1056 	/* Currently VLAN fields cannot be partial wildcard */
1057 	if (bits_set(&flow->l2_key.inner_vlan_tci,
1058 		     sizeof(flow->l2_key.inner_vlan_tci)) &&
1059 	    !is_vlan_tci_allowed(flow->l2_mask.inner_vlan_tci,
1060 				 flow->l2_key.inner_vlan_tci)) {
1061 		netdev_info(bp->dev, "Unsupported VLAN TCI\n");
1062 		return false;
1063 	}
1064 	if (bits_set(&flow->l2_key.inner_vlan_tpid,
1065 		     sizeof(flow->l2_key.inner_vlan_tpid)) &&
1066 	    !is_exactmatch(&flow->l2_mask.inner_vlan_tpid,
1067 			   sizeof(flow->l2_mask.inner_vlan_tpid))) {
1068 		netdev_info(bp->dev, "Wildcard match unsupported for VLAN TPID\n");
1069 		return false;
1070 	}
1071 
1072 	/* Currently Ethertype must be set */
1073 	if (!is_exactmatch(&flow->l2_mask.ether_type,
1074 			   sizeof(flow->l2_mask.ether_type))) {
1075 		netdev_info(bp->dev, "Wildcard match unsupported for Ethertype\n");
1076 		return false;
1077 	}
1078 
1079 	return true;
1080 }
1081 
1082 /* Returns the final refcount of the node on success
1083  * or a -ve error code on failure
1084  */
1085 static int bnxt_tc_put_tunnel_node(struct bnxt *bp,
1086 				   struct rhashtable *tunnel_table,
1087 				   struct rhashtable_params *ht_params,
1088 				   struct bnxt_tc_tunnel_node *tunnel_node)
1089 {
1090 	int rc;
1091 
1092 	if (--tunnel_node->refcount == 0) {
1093 		rc =  rhashtable_remove_fast(tunnel_table, &tunnel_node->node,
1094 					     *ht_params);
1095 		if (rc) {
1096 			netdev_err(bp->dev, "rhashtable_remove_fast rc=%d\n", rc);
1097 			rc = -1;
1098 		}
1099 		kfree_rcu(tunnel_node, rcu);
1100 		return rc;
1101 	} else {
1102 		return tunnel_node->refcount;
1103 	}
1104 }
1105 
1106 /* Get (or add) either encap or decap tunnel node from/to the supplied
1107  * hash table.
1108  */
1109 static struct bnxt_tc_tunnel_node *
1110 bnxt_tc_get_tunnel_node(struct bnxt *bp, struct rhashtable *tunnel_table,
1111 			struct rhashtable_params *ht_params,
1112 			struct ip_tunnel_key *tun_key)
1113 {
1114 	struct bnxt_tc_tunnel_node *tunnel_node;
1115 	int rc;
1116 
1117 	tunnel_node = rhashtable_lookup_fast(tunnel_table, tun_key, *ht_params);
1118 	if (!tunnel_node) {
1119 		tunnel_node = kzalloc(sizeof(*tunnel_node), GFP_KERNEL);
1120 		if (!tunnel_node) {
1121 			rc = -ENOMEM;
1122 			goto err;
1123 		}
1124 
1125 		tunnel_node->key = *tun_key;
1126 		tunnel_node->tunnel_handle = INVALID_TUNNEL_HANDLE;
1127 		rc = rhashtable_insert_fast(tunnel_table, &tunnel_node->node,
1128 					    *ht_params);
1129 		if (rc) {
1130 			kfree_rcu(tunnel_node, rcu);
1131 			goto err;
1132 		}
1133 	}
1134 	tunnel_node->refcount++;
1135 	return tunnel_node;
1136 err:
1137 	netdev_info(bp->dev, "error rc=%d\n", rc);
1138 	return NULL;
1139 }
1140 
1141 static int bnxt_tc_get_ref_decap_handle(struct bnxt *bp,
1142 					struct bnxt_tc_flow *flow,
1143 					struct bnxt_tc_l2_key *l2_key,
1144 					struct bnxt_tc_flow_node *flow_node,
1145 					__le32 *ref_decap_handle)
1146 {
1147 	struct bnxt_tc_info *tc_info = bp->tc_info;
1148 	struct bnxt_tc_flow_node *ref_flow_node;
1149 	struct bnxt_tc_l2_node *decap_l2_node;
1150 
1151 	decap_l2_node = bnxt_tc_get_l2_node(bp, &tc_info->decap_l2_table,
1152 					    tc_info->decap_l2_ht_params,
1153 					    l2_key);
1154 	if (!decap_l2_node)
1155 		return -1;
1156 
1157 	/* If any other flow is using this decap_l2_node, use it's decap_handle
1158 	 * as the ref_decap_handle
1159 	 */
1160 	if (decap_l2_node->refcount > 0) {
1161 		ref_flow_node =
1162 			list_first_entry(&decap_l2_node->common_l2_flows,
1163 					 struct bnxt_tc_flow_node,
1164 					 decap_l2_list_node);
1165 		*ref_decap_handle = ref_flow_node->decap_node->tunnel_handle;
1166 	} else {
1167 		*ref_decap_handle = INVALID_TUNNEL_HANDLE;
1168 	}
1169 
1170 	/* Insert the l2_node into the flow_node so that subsequent flows
1171 	 * with a matching decap l2 key can use the decap_filter_handle of
1172 	 * this flow as their ref_decap_handle
1173 	 */
1174 	flow_node->decap_l2_node = decap_l2_node;
1175 	list_add(&flow_node->decap_l2_list_node,
1176 		 &decap_l2_node->common_l2_flows);
1177 	decap_l2_node->refcount++;
1178 	return 0;
1179 }
1180 
1181 static void bnxt_tc_put_decap_l2_node(struct bnxt *bp,
1182 				      struct bnxt_tc_flow_node *flow_node)
1183 {
1184 	struct bnxt_tc_l2_node *decap_l2_node = flow_node->decap_l2_node;
1185 	struct bnxt_tc_info *tc_info = bp->tc_info;
1186 	int rc;
1187 
1188 	/* remove flow_node from the decap L2 sharing flow list */
1189 	list_del(&flow_node->decap_l2_list_node);
1190 	if (--decap_l2_node->refcount == 0) {
1191 		rc =  rhashtable_remove_fast(&tc_info->decap_l2_table,
1192 					     &decap_l2_node->node,
1193 					     tc_info->decap_l2_ht_params);
1194 		if (rc)
1195 			netdev_err(bp->dev, "rhashtable_remove_fast rc=%d\n", rc);
1196 		kfree_rcu(decap_l2_node, rcu);
1197 	}
1198 }
1199 
1200 static void bnxt_tc_put_decap_handle(struct bnxt *bp,
1201 				     struct bnxt_tc_flow_node *flow_node)
1202 {
1203 	__le32 decap_handle = flow_node->decap_node->tunnel_handle;
1204 	struct bnxt_tc_info *tc_info = bp->tc_info;
1205 	int rc;
1206 
1207 	if (flow_node->decap_l2_node)
1208 		bnxt_tc_put_decap_l2_node(bp, flow_node);
1209 
1210 	rc = bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table,
1211 				     &tc_info->decap_ht_params,
1212 				     flow_node->decap_node);
1213 	if (!rc && decap_handle != INVALID_TUNNEL_HANDLE)
1214 		hwrm_cfa_decap_filter_free(bp, decap_handle);
1215 }
1216 
1217 static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp,
1218 				       struct ip_tunnel_key *tun_key,
1219 				       struct bnxt_tc_l2_key *l2_info)
1220 {
1221 #ifdef CONFIG_INET
1222 	struct net_device *real_dst_dev = bp->dev;
1223 	struct flowi4 flow = { {0} };
1224 	struct net_device *dst_dev;
1225 	struct neighbour *nbr;
1226 	struct rtable *rt;
1227 	int rc;
1228 
1229 	flow.flowi4_proto = IPPROTO_UDP;
1230 	flow.fl4_dport = tun_key->tp_dst;
1231 	flow.daddr = tun_key->u.ipv4.dst;
1232 
1233 	rt = ip_route_output_key(dev_net(real_dst_dev), &flow);
1234 	if (IS_ERR(rt)) {
1235 		netdev_info(bp->dev, "no route to %pI4b\n", &flow.daddr);
1236 		return -EOPNOTSUPP;
1237 	}
1238 
1239 	/* The route must either point to the real_dst_dev or a dst_dev that
1240 	 * uses the real_dst_dev.
1241 	 */
1242 	dst_dev = rt->dst.dev;
1243 	if (is_vlan_dev(dst_dev)) {
1244 #if IS_ENABLED(CONFIG_VLAN_8021Q)
1245 		struct vlan_dev_priv *vlan = vlan_dev_priv(dst_dev);
1246 
1247 		if (vlan->real_dev != real_dst_dev) {
1248 			netdev_info(bp->dev,
1249 				    "dst_dev(%s) doesn't use PF-if(%s)\n",
1250 				    netdev_name(dst_dev),
1251 				    netdev_name(real_dst_dev));
1252 			rc = -EOPNOTSUPP;
1253 			goto put_rt;
1254 		}
1255 		l2_info->inner_vlan_tci = htons(vlan->vlan_id);
1256 		l2_info->inner_vlan_tpid = vlan->vlan_proto;
1257 		l2_info->num_vlans = 1;
1258 #endif
1259 	} else if (dst_dev != real_dst_dev) {
1260 		netdev_info(bp->dev,
1261 			    "dst_dev(%s) for %pI4b is not PF-if(%s)\n",
1262 			    netdev_name(dst_dev), &flow.daddr,
1263 			    netdev_name(real_dst_dev));
1264 		rc = -EOPNOTSUPP;
1265 		goto put_rt;
1266 	}
1267 
1268 	nbr = dst_neigh_lookup(&rt->dst, &flow.daddr);
1269 	if (!nbr) {
1270 		netdev_info(bp->dev, "can't lookup neighbor for %pI4b\n",
1271 			    &flow.daddr);
1272 		rc = -EOPNOTSUPP;
1273 		goto put_rt;
1274 	}
1275 
1276 	tun_key->u.ipv4.src = flow.saddr;
1277 	tun_key->ttl = ip4_dst_hoplimit(&rt->dst);
1278 	neigh_ha_snapshot(l2_info->dmac, nbr, dst_dev);
1279 	ether_addr_copy(l2_info->smac, dst_dev->dev_addr);
1280 	neigh_release(nbr);
1281 	ip_rt_put(rt);
1282 
1283 	return 0;
1284 put_rt:
1285 	ip_rt_put(rt);
1286 	return rc;
1287 #else
1288 	return -EOPNOTSUPP;
1289 #endif
1290 }
1291 
1292 static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
1293 				    struct bnxt_tc_flow_node *flow_node,
1294 				    __le32 *decap_filter_handle)
1295 {
1296 	struct ip_tunnel_key *decap_key = &flow->tun_key;
1297 	struct bnxt_tc_info *tc_info = bp->tc_info;
1298 	struct bnxt_tc_l2_key l2_info = { {0} };
1299 	struct bnxt_tc_tunnel_node *decap_node;
1300 	struct ip_tunnel_key tun_key = { 0 };
1301 	struct bnxt_tc_l2_key *decap_l2_info;
1302 	__le32 ref_decap_handle;
1303 	int rc;
1304 
1305 	/* Check if there's another flow using the same tunnel decap.
1306 	 * If not, add this tunnel to the table and resolve the other
1307 	 * tunnel header fileds. Ignore src_port in the tunnel_key,
1308 	 * since it is not required for decap filters.
1309 	 */
1310 	decap_key->tp_src = 0;
1311 	decap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->decap_table,
1312 					     &tc_info->decap_ht_params,
1313 					     decap_key);
1314 	if (!decap_node)
1315 		return -ENOMEM;
1316 
1317 	flow_node->decap_node = decap_node;
1318 
1319 	if (decap_node->tunnel_handle != INVALID_TUNNEL_HANDLE)
1320 		goto done;
1321 
1322 	/* Resolve the L2 fields for tunnel decap
1323 	 * Resolve the route for remote vtep (saddr) of the decap key
1324 	 * Find it's next-hop mac addrs
1325 	 */
1326 	tun_key.u.ipv4.dst = flow->tun_key.u.ipv4.src;
1327 	tun_key.tp_dst = flow->tun_key.tp_dst;
1328 	rc = bnxt_tc_resolve_tunnel_hdrs(bp, &tun_key, &l2_info);
1329 	if (rc)
1330 		goto put_decap;
1331 
1332 	decap_l2_info = &decap_node->l2_info;
1333 	/* decap smac is wildcarded */
1334 	ether_addr_copy(decap_l2_info->dmac, l2_info.smac);
1335 	if (l2_info.num_vlans) {
1336 		decap_l2_info->num_vlans = l2_info.num_vlans;
1337 		decap_l2_info->inner_vlan_tpid = l2_info.inner_vlan_tpid;
1338 		decap_l2_info->inner_vlan_tci = l2_info.inner_vlan_tci;
1339 	}
1340 	flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS;
1341 
1342 	/* For getting a decap_filter_handle we first need to check if
1343 	 * there are any other decap flows that share the same tunnel L2
1344 	 * key and if so, pass that flow's decap_filter_handle as the
1345 	 * ref_decap_handle for this flow.
1346 	 */
1347 	rc = bnxt_tc_get_ref_decap_handle(bp, flow, decap_l2_info, flow_node,
1348 					  &ref_decap_handle);
1349 	if (rc)
1350 		goto put_decap;
1351 
1352 	/* Issue the hwrm cmd to allocate a decap filter handle */
1353 	rc = hwrm_cfa_decap_filter_alloc(bp, flow, decap_l2_info,
1354 					 ref_decap_handle,
1355 					 &decap_node->tunnel_handle);
1356 	if (rc)
1357 		goto put_decap_l2;
1358 
1359 done:
1360 	*decap_filter_handle = decap_node->tunnel_handle;
1361 	return 0;
1362 
1363 put_decap_l2:
1364 	bnxt_tc_put_decap_l2_node(bp, flow_node);
1365 put_decap:
1366 	bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table,
1367 				&tc_info->decap_ht_params,
1368 				flow_node->decap_node);
1369 	return rc;
1370 }
1371 
1372 static void bnxt_tc_put_encap_handle(struct bnxt *bp,
1373 				     struct bnxt_tc_tunnel_node *encap_node)
1374 {
1375 	__le32 encap_handle = encap_node->tunnel_handle;
1376 	struct bnxt_tc_info *tc_info = bp->tc_info;
1377 	int rc;
1378 
1379 	rc = bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table,
1380 				     &tc_info->encap_ht_params, encap_node);
1381 	if (!rc && encap_handle != INVALID_TUNNEL_HANDLE)
1382 		hwrm_cfa_encap_record_free(bp, encap_handle);
1383 }
1384 
1385 /* Lookup the tunnel encap table and check if there's an encap_handle
1386  * alloc'd already.
1387  * If not, query L2 info via a route lookup and issue an encap_record_alloc
1388  * cmd to FW.
1389  */
1390 static int bnxt_tc_get_encap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
1391 				    struct bnxt_tc_flow_node *flow_node,
1392 				    __le32 *encap_handle)
1393 {
1394 	struct ip_tunnel_key *encap_key = &flow->actions.tun_encap_key;
1395 	struct bnxt_tc_info *tc_info = bp->tc_info;
1396 	struct bnxt_tc_tunnel_node *encap_node;
1397 	int rc;
1398 
1399 	/* Check if there's another flow using the same tunnel encap.
1400 	 * If not, add this tunnel to the table and resolve the other
1401 	 * tunnel header fileds
1402 	 */
1403 	encap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->encap_table,
1404 					     &tc_info->encap_ht_params,
1405 					     encap_key);
1406 	if (!encap_node)
1407 		return -ENOMEM;
1408 
1409 	flow_node->encap_node = encap_node;
1410 
1411 	if (encap_node->tunnel_handle != INVALID_TUNNEL_HANDLE)
1412 		goto done;
1413 
1414 	rc = bnxt_tc_resolve_tunnel_hdrs(bp, encap_key, &encap_node->l2_info);
1415 	if (rc)
1416 		goto put_encap;
1417 
1418 	/* Allocate a new tunnel encap record */
1419 	rc = hwrm_cfa_encap_record_alloc(bp, encap_key, &encap_node->l2_info,
1420 					 &encap_node->tunnel_handle);
1421 	if (rc)
1422 		goto put_encap;
1423 
1424 done:
1425 	*encap_handle = encap_node->tunnel_handle;
1426 	return 0;
1427 
1428 put_encap:
1429 	bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table,
1430 				&tc_info->encap_ht_params, encap_node);
1431 	return rc;
1432 }
1433 
1434 static void bnxt_tc_put_tunnel_handle(struct bnxt *bp,
1435 				      struct bnxt_tc_flow *flow,
1436 				      struct bnxt_tc_flow_node *flow_node)
1437 {
1438 	if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
1439 		bnxt_tc_put_decap_handle(bp, flow_node);
1440 	else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP)
1441 		bnxt_tc_put_encap_handle(bp, flow_node->encap_node);
1442 }
1443 
1444 static int bnxt_tc_get_tunnel_handle(struct bnxt *bp,
1445 				     struct bnxt_tc_flow *flow,
1446 				     struct bnxt_tc_flow_node *flow_node,
1447 				     __le32 *tunnel_handle)
1448 {
1449 	if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
1450 		return bnxt_tc_get_decap_handle(bp, flow, flow_node,
1451 						tunnel_handle);
1452 	else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP)
1453 		return bnxt_tc_get_encap_handle(bp, flow, flow_node,
1454 						tunnel_handle);
1455 	else
1456 		return 0;
1457 }
1458 static int __bnxt_tc_del_flow(struct bnxt *bp,
1459 			      struct bnxt_tc_flow_node *flow_node)
1460 {
1461 	struct bnxt_tc_info *tc_info = bp->tc_info;
1462 	int rc;
1463 
1464 	/* send HWRM cmd to free the flow-id */
1465 	bnxt_hwrm_cfa_flow_free(bp, flow_node);
1466 
1467 	mutex_lock(&tc_info->lock);
1468 
1469 	/* release references to any tunnel encap/decap nodes */
1470 	bnxt_tc_put_tunnel_handle(bp, &flow_node->flow, flow_node);
1471 
1472 	/* release reference to l2 node */
1473 	bnxt_tc_put_l2_node(bp, flow_node);
1474 
1475 	mutex_unlock(&tc_info->lock);
1476 
1477 	rc = rhashtable_remove_fast(&tc_info->flow_table, &flow_node->node,
1478 				    tc_info->flow_ht_params);
1479 	if (rc)
1480 		netdev_err(bp->dev, "Error: %s: rhashtable_remove_fast rc=%d\n",
1481 			   __func__, rc);
1482 
1483 	kfree_rcu(flow_node, rcu);
1484 	return 0;
1485 }
1486 
1487 static void bnxt_tc_set_flow_dir(struct bnxt *bp, struct bnxt_tc_flow *flow,
1488 				 u16 src_fid)
1489 {
1490 	flow->l2_key.dir = (bp->pf.fw_fid == src_fid) ? BNXT_DIR_RX : BNXT_DIR_TX;
1491 }
1492 
1493 static void bnxt_tc_set_src_fid(struct bnxt *bp, struct bnxt_tc_flow *flow,
1494 				u16 src_fid)
1495 {
1496 	if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
1497 		flow->src_fid = bp->pf.fw_fid;
1498 	else
1499 		flow->src_fid = src_fid;
1500 }
1501 
1502 /* Add a new flow or replace an existing flow.
1503  * Notes on locking:
1504  * There are essentially two critical sections here.
1505  * 1. while adding a new flow
1506  *    a) lookup l2-key
1507  *    b) issue HWRM cmd and get flow_handle
1508  *    c) link l2-key with flow
1509  * 2. while deleting a flow
1510  *    a) unlinking l2-key from flow
1511  * A lock is needed to protect these two critical sections.
1512  *
1513  * The hash-tables are already protected by the rhashtable API.
1514  */
1515 static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid,
1516 			    struct flow_cls_offload *tc_flow_cmd)
1517 {
1518 	struct bnxt_tc_flow_node *new_node, *old_node;
1519 	struct bnxt_tc_info *tc_info = bp->tc_info;
1520 	struct bnxt_tc_flow *flow;
1521 	__le32 tunnel_handle = 0;
1522 	__le16 ref_flow_handle;
1523 	int rc;
1524 
1525 	/* allocate memory for the new flow and it's node */
1526 	new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
1527 	if (!new_node) {
1528 		rc = -ENOMEM;
1529 		goto done;
1530 	}
1531 	new_node->cookie = tc_flow_cmd->cookie;
1532 	flow = &new_node->flow;
1533 
1534 	rc = bnxt_tc_parse_flow(bp, tc_flow_cmd, flow);
1535 	if (rc)
1536 		goto free_node;
1537 
1538 	bnxt_tc_set_src_fid(bp, flow, src_fid);
1539 	bnxt_tc_set_flow_dir(bp, flow, flow->src_fid);
1540 
1541 	if (!bnxt_tc_can_offload(bp, flow)) {
1542 		rc = -EOPNOTSUPP;
1543 		kfree_rcu(new_node, rcu);
1544 		return rc;
1545 	}
1546 
1547 	/* If a flow exists with the same cookie, delete it */
1548 	old_node = rhashtable_lookup_fast(&tc_info->flow_table,
1549 					  &tc_flow_cmd->cookie,
1550 					  tc_info->flow_ht_params);
1551 	if (old_node)
1552 		__bnxt_tc_del_flow(bp, old_node);
1553 
1554 	/* Check if the L2 part of the flow has been offloaded already.
1555 	 * If so, bump up it's refcnt and get it's reference handle.
1556 	 */
1557 	mutex_lock(&tc_info->lock);
1558 	rc = bnxt_tc_get_ref_flow_handle(bp, flow, new_node, &ref_flow_handle);
1559 	if (rc)
1560 		goto unlock;
1561 
1562 	/* If the flow involves tunnel encap/decap, get tunnel_handle */
1563 	rc = bnxt_tc_get_tunnel_handle(bp, flow, new_node, &tunnel_handle);
1564 	if (rc)
1565 		goto put_l2;
1566 
1567 	/* send HWRM cmd to alloc the flow */
1568 	rc = bnxt_hwrm_cfa_flow_alloc(bp, flow, ref_flow_handle,
1569 				      tunnel_handle, new_node);
1570 	if (rc)
1571 		goto put_tunnel;
1572 
1573 	flow->lastused = jiffies;
1574 	spin_lock_init(&flow->stats_lock);
1575 	/* add new flow to flow-table */
1576 	rc = rhashtable_insert_fast(&tc_info->flow_table, &new_node->node,
1577 				    tc_info->flow_ht_params);
1578 	if (rc)
1579 		goto hwrm_flow_free;
1580 
1581 	mutex_unlock(&tc_info->lock);
1582 	return 0;
1583 
1584 hwrm_flow_free:
1585 	bnxt_hwrm_cfa_flow_free(bp, new_node);
1586 put_tunnel:
1587 	bnxt_tc_put_tunnel_handle(bp, flow, new_node);
1588 put_l2:
1589 	bnxt_tc_put_l2_node(bp, new_node);
1590 unlock:
1591 	mutex_unlock(&tc_info->lock);
1592 free_node:
1593 	kfree_rcu(new_node, rcu);
1594 done:
1595 	netdev_err(bp->dev, "Error: %s: cookie=0x%lx error=%d\n",
1596 		   __func__, tc_flow_cmd->cookie, rc);
1597 	return rc;
1598 }
1599 
1600 static int bnxt_tc_del_flow(struct bnxt *bp,
1601 			    struct flow_cls_offload *tc_flow_cmd)
1602 {
1603 	struct bnxt_tc_info *tc_info = bp->tc_info;
1604 	struct bnxt_tc_flow_node *flow_node;
1605 
1606 	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
1607 					   &tc_flow_cmd->cookie,
1608 					   tc_info->flow_ht_params);
1609 	if (!flow_node)
1610 		return -EINVAL;
1611 
1612 	return __bnxt_tc_del_flow(bp, flow_node);
1613 }
1614 
1615 static int bnxt_tc_get_flow_stats(struct bnxt *bp,
1616 				  struct flow_cls_offload *tc_flow_cmd)
1617 {
1618 	struct bnxt_tc_flow_stats stats, *curr_stats, *prev_stats;
1619 	struct bnxt_tc_info *tc_info = bp->tc_info;
1620 	struct bnxt_tc_flow_node *flow_node;
1621 	struct bnxt_tc_flow *flow;
1622 	unsigned long lastused;
1623 
1624 	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
1625 					   &tc_flow_cmd->cookie,
1626 					   tc_info->flow_ht_params);
1627 	if (!flow_node)
1628 		return -1;
1629 
1630 	flow = &flow_node->flow;
1631 	curr_stats = &flow->stats;
1632 	prev_stats = &flow->prev_stats;
1633 
1634 	spin_lock(&flow->stats_lock);
1635 	stats.packets = curr_stats->packets - prev_stats->packets;
1636 	stats.bytes = curr_stats->bytes - prev_stats->bytes;
1637 	*prev_stats = *curr_stats;
1638 	lastused = flow->lastused;
1639 	spin_unlock(&flow->stats_lock);
1640 
1641 	flow_stats_update(&tc_flow_cmd->stats, stats.bytes, stats.packets,
1642 			  lastused, FLOW_ACTION_HW_STATS_DELAYED);
1643 	return 0;
1644 }
1645 
1646 static void bnxt_fill_cfa_stats_req(struct bnxt *bp,
1647 				    struct bnxt_tc_flow_node *flow_node,
1648 				    __le16 *flow_handle, __le32 *flow_id)
1649 {
1650 	u16 handle;
1651 
1652 	if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE) {
1653 		*flow_id = flow_node->flow_id;
1654 
1655 		/* If flow_id is used to fetch flow stats then:
1656 		 * 1. lower 12 bits of flow_handle must be set to all 1s.
1657 		 * 2. 15th bit of flow_handle must specify the flow
1658 		 *    direction (TX/RX).
1659 		 */
1660 		if (flow_node->flow.l2_key.dir == BNXT_DIR_RX)
1661 			handle = CFA_FLOW_INFO_REQ_FLOW_HANDLE_DIR_RX |
1662 				 CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK;
1663 		else
1664 			handle = CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK;
1665 
1666 		*flow_handle = cpu_to_le16(handle);
1667 	} else {
1668 		*flow_handle = flow_node->flow_handle;
1669 	}
1670 }
1671 
1672 static int
1673 bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows,
1674 			     struct bnxt_tc_stats_batch stats_batch[])
1675 {
1676 	struct hwrm_cfa_flow_stats_input req = { 0 };
1677 	struct hwrm_cfa_flow_stats_output *resp;
1678 	__le16 *req_flow_handles = &req.flow_handle_0;
1679 	__le32 *req_flow_ids = &req.flow_id_0;
1680 	int rc, i;
1681 
1682 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_STATS, -1, -1);
1683 	req.num_flows = cpu_to_le16(num_flows);
1684 	for (i = 0; i < num_flows; i++) {
1685 		struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node;
1686 
1687 		bnxt_fill_cfa_stats_req(bp, flow_node,
1688 					&req_flow_handles[i], &req_flow_ids[i]);
1689 	}
1690 
1691 	mutex_lock(&bp->hwrm_cmd_lock);
1692 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
1693 	if (!rc) {
1694 		__le64 *resp_packets;
1695 		__le64 *resp_bytes;
1696 
1697 		resp = bnxt_get_hwrm_resp_addr(bp, &req);
1698 		resp_packets = &resp->packet_0;
1699 		resp_bytes = &resp->byte_0;
1700 
1701 		for (i = 0; i < num_flows; i++) {
1702 			stats_batch[i].hw_stats.packets =
1703 						le64_to_cpu(resp_packets[i]);
1704 			stats_batch[i].hw_stats.bytes =
1705 						le64_to_cpu(resp_bytes[i]);
1706 		}
1707 	} else {
1708 		netdev_info(bp->dev, "error rc=%d\n", rc);
1709 	}
1710 	mutex_unlock(&bp->hwrm_cmd_lock);
1711 
1712 	return rc;
1713 }
1714 
1715 /* Add val to accum while handling a possible wraparound
1716  * of val. Eventhough val is of type u64, its actual width
1717  * is denoted by mask and will wrap-around beyond that width.
1718  */
1719 static void accumulate_val(u64 *accum, u64 val, u64 mask)
1720 {
1721 #define low_bits(x, mask)		((x) & (mask))
1722 #define high_bits(x, mask)		((x) & ~(mask))
1723 	bool wrapped = val < low_bits(*accum, mask);
1724 
1725 	*accum = high_bits(*accum, mask) + val;
1726 	if (wrapped)
1727 		*accum += (mask + 1);
1728 }
1729 
1730 /* The HW counters' width is much less than 64bits.
1731  * Handle possible wrap-around while updating the stat counters
1732  */
1733 static void bnxt_flow_stats_accum(struct bnxt_tc_info *tc_info,
1734 				  struct bnxt_tc_flow_stats *acc_stats,
1735 				  struct bnxt_tc_flow_stats *hw_stats)
1736 {
1737 	accumulate_val(&acc_stats->bytes, hw_stats->bytes, tc_info->bytes_mask);
1738 	accumulate_val(&acc_stats->packets, hw_stats->packets,
1739 		       tc_info->packets_mask);
1740 }
1741 
1742 static int
1743 bnxt_tc_flow_stats_batch_update(struct bnxt *bp, int num_flows,
1744 				struct bnxt_tc_stats_batch stats_batch[])
1745 {
1746 	struct bnxt_tc_info *tc_info = bp->tc_info;
1747 	int rc, i;
1748 
1749 	rc = bnxt_hwrm_cfa_flow_stats_get(bp, num_flows, stats_batch);
1750 	if (rc)
1751 		return rc;
1752 
1753 	for (i = 0; i < num_flows; i++) {
1754 		struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node;
1755 		struct bnxt_tc_flow *flow = &flow_node->flow;
1756 
1757 		spin_lock(&flow->stats_lock);
1758 		bnxt_flow_stats_accum(tc_info, &flow->stats,
1759 				      &stats_batch[i].hw_stats);
1760 		if (flow->stats.packets != flow->prev_stats.packets)
1761 			flow->lastused = jiffies;
1762 		spin_unlock(&flow->stats_lock);
1763 	}
1764 
1765 	return 0;
1766 }
1767 
1768 static int
1769 bnxt_tc_flow_stats_batch_prep(struct bnxt *bp,
1770 			      struct bnxt_tc_stats_batch stats_batch[],
1771 			      int *num_flows)
1772 {
1773 	struct bnxt_tc_info *tc_info = bp->tc_info;
1774 	struct rhashtable_iter *iter = &tc_info->iter;
1775 	void *flow_node;
1776 	int rc, i;
1777 
1778 	rhashtable_walk_start(iter);
1779 
1780 	rc = 0;
1781 	for (i = 0; i < BNXT_FLOW_STATS_BATCH_MAX; i++) {
1782 		flow_node = rhashtable_walk_next(iter);
1783 		if (IS_ERR(flow_node)) {
1784 			i = 0;
1785 			if (PTR_ERR(flow_node) == -EAGAIN) {
1786 				continue;
1787 			} else {
1788 				rc = PTR_ERR(flow_node);
1789 				goto done;
1790 			}
1791 		}
1792 
1793 		/* No more flows */
1794 		if (!flow_node)
1795 			goto done;
1796 
1797 		stats_batch[i].flow_node = flow_node;
1798 	}
1799 done:
1800 	rhashtable_walk_stop(iter);
1801 	*num_flows = i;
1802 	return rc;
1803 }
1804 
1805 void bnxt_tc_flow_stats_work(struct bnxt *bp)
1806 {
1807 	struct bnxt_tc_info *tc_info = bp->tc_info;
1808 	int num_flows, rc;
1809 
1810 	num_flows = atomic_read(&tc_info->flow_table.nelems);
1811 	if (!num_flows)
1812 		return;
1813 
1814 	rhashtable_walk_enter(&tc_info->flow_table, &tc_info->iter);
1815 
1816 	for (;;) {
1817 		rc = bnxt_tc_flow_stats_batch_prep(bp, tc_info->stats_batch,
1818 						   &num_flows);
1819 		if (rc) {
1820 			if (rc == -EAGAIN)
1821 				continue;
1822 			break;
1823 		}
1824 
1825 		if (!num_flows)
1826 			break;
1827 
1828 		bnxt_tc_flow_stats_batch_update(bp, num_flows,
1829 						tc_info->stats_batch);
1830 	}
1831 
1832 	rhashtable_walk_exit(&tc_info->iter);
1833 }
1834 
1835 int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
1836 			 struct flow_cls_offload *cls_flower)
1837 {
1838 	switch (cls_flower->command) {
1839 	case FLOW_CLS_REPLACE:
1840 		return bnxt_tc_add_flow(bp, src_fid, cls_flower);
1841 	case FLOW_CLS_DESTROY:
1842 		return bnxt_tc_del_flow(bp, cls_flower);
1843 	case FLOW_CLS_STATS:
1844 		return bnxt_tc_get_flow_stats(bp, cls_flower);
1845 	default:
1846 		return -EOPNOTSUPP;
1847 	}
1848 }
1849 
1850 static int bnxt_tc_setup_indr_block_cb(enum tc_setup_type type,
1851 				       void *type_data, void *cb_priv)
1852 {
1853 	struct bnxt_flower_indr_block_cb_priv *priv = cb_priv;
1854 	struct flow_cls_offload *flower = type_data;
1855 	struct bnxt *bp = priv->bp;
1856 
1857 	if (flower->common.chain_index)
1858 		return -EOPNOTSUPP;
1859 
1860 	switch (type) {
1861 	case TC_SETUP_CLSFLOWER:
1862 		return bnxt_tc_setup_flower(bp, bp->pf.fw_fid, flower);
1863 	default:
1864 		return -EOPNOTSUPP;
1865 	}
1866 }
1867 
1868 static struct bnxt_flower_indr_block_cb_priv *
1869 bnxt_tc_indr_block_cb_lookup(struct bnxt *bp, struct net_device *netdev)
1870 {
1871 	struct bnxt_flower_indr_block_cb_priv *cb_priv;
1872 
1873 	/* All callback list access should be protected by RTNL. */
1874 	ASSERT_RTNL();
1875 
1876 	list_for_each_entry(cb_priv, &bp->tc_indr_block_list, list)
1877 		if (cb_priv->tunnel_netdev == netdev)
1878 			return cb_priv;
1879 
1880 	return NULL;
1881 }
1882 
1883 static void bnxt_tc_setup_indr_rel(void *cb_priv)
1884 {
1885 	struct bnxt_flower_indr_block_cb_priv *priv = cb_priv;
1886 
1887 	list_del(&priv->list);
1888 	kfree(priv);
1889 }
1890 
1891 static int bnxt_tc_setup_indr_block(struct net_device *netdev, struct bnxt *bp,
1892 				    struct flow_block_offload *f)
1893 {
1894 	struct bnxt_flower_indr_block_cb_priv *cb_priv;
1895 	struct flow_block_cb *block_cb;
1896 
1897 	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
1898 		return -EOPNOTSUPP;
1899 
1900 	switch (f->command) {
1901 	case FLOW_BLOCK_BIND:
1902 		cb_priv = kmalloc(sizeof(*cb_priv), GFP_KERNEL);
1903 		if (!cb_priv)
1904 			return -ENOMEM;
1905 
1906 		cb_priv->tunnel_netdev = netdev;
1907 		cb_priv->bp = bp;
1908 		list_add(&cb_priv->list, &bp->tc_indr_block_list);
1909 
1910 		block_cb = flow_block_cb_alloc(bnxt_tc_setup_indr_block_cb,
1911 					       cb_priv, cb_priv,
1912 					       bnxt_tc_setup_indr_rel);
1913 		if (IS_ERR(block_cb)) {
1914 			list_del(&cb_priv->list);
1915 			kfree(cb_priv);
1916 			return PTR_ERR(block_cb);
1917 		}
1918 
1919 		flow_block_cb_add(block_cb, f);
1920 		list_add_tail(&block_cb->driver_list, &bnxt_block_cb_list);
1921 		break;
1922 	case FLOW_BLOCK_UNBIND:
1923 		cb_priv = bnxt_tc_indr_block_cb_lookup(bp, netdev);
1924 		if (!cb_priv)
1925 			return -ENOENT;
1926 
1927 		block_cb = flow_block_cb_lookup(f->block,
1928 						bnxt_tc_setup_indr_block_cb,
1929 						cb_priv);
1930 		if (!block_cb)
1931 			return -ENOENT;
1932 
1933 		flow_block_cb_remove(block_cb, f);
1934 		list_del(&block_cb->driver_list);
1935 		break;
1936 	default:
1937 		return -EOPNOTSUPP;
1938 	}
1939 	return 0;
1940 }
1941 
1942 static bool bnxt_is_netdev_indr_offload(struct net_device *netdev)
1943 {
1944 	return netif_is_vxlan(netdev);
1945 }
1946 
1947 static int bnxt_tc_setup_indr_cb(struct net_device *netdev, void *cb_priv,
1948 				 enum tc_setup_type type, void *type_data)
1949 {
1950 	if (!bnxt_is_netdev_indr_offload(netdev))
1951 		return -EOPNOTSUPP;
1952 
1953 	switch (type) {
1954 	case TC_SETUP_BLOCK:
1955 		return bnxt_tc_setup_indr_block(netdev, cb_priv, type_data);
1956 	default:
1957 		break;
1958 	}
1959 
1960 	return -EOPNOTSUPP;
1961 }
1962 
1963 static const struct rhashtable_params bnxt_tc_flow_ht_params = {
1964 	.head_offset = offsetof(struct bnxt_tc_flow_node, node),
1965 	.key_offset = offsetof(struct bnxt_tc_flow_node, cookie),
1966 	.key_len = sizeof(((struct bnxt_tc_flow_node *)0)->cookie),
1967 	.automatic_shrinking = true
1968 };
1969 
1970 static const struct rhashtable_params bnxt_tc_l2_ht_params = {
1971 	.head_offset = offsetof(struct bnxt_tc_l2_node, node),
1972 	.key_offset = offsetof(struct bnxt_tc_l2_node, key),
1973 	.key_len = BNXT_TC_L2_KEY_LEN,
1974 	.automatic_shrinking = true
1975 };
1976 
1977 static const struct rhashtable_params bnxt_tc_decap_l2_ht_params = {
1978 	.head_offset = offsetof(struct bnxt_tc_l2_node, node),
1979 	.key_offset = offsetof(struct bnxt_tc_l2_node, key),
1980 	.key_len = BNXT_TC_L2_KEY_LEN,
1981 	.automatic_shrinking = true
1982 };
1983 
1984 static const struct rhashtable_params bnxt_tc_tunnel_ht_params = {
1985 	.head_offset = offsetof(struct bnxt_tc_tunnel_node, node),
1986 	.key_offset = offsetof(struct bnxt_tc_tunnel_node, key),
1987 	.key_len = sizeof(struct ip_tunnel_key),
1988 	.automatic_shrinking = true
1989 };
1990 
1991 /* convert counter width in bits to a mask */
1992 #define mask(width)		((u64)~0 >> (64 - (width)))
1993 
1994 int bnxt_init_tc(struct bnxt *bp)
1995 {
1996 	struct bnxt_tc_info *tc_info;
1997 	int rc;
1998 
1999 	if (bp->hwrm_spec_code < 0x10803) {
2000 		netdev_warn(bp->dev,
2001 			    "Firmware does not support TC flower offload.\n");
2002 		return -ENOTSUPP;
2003 	}
2004 
2005 	tc_info = kzalloc(sizeof(*tc_info), GFP_KERNEL);
2006 	if (!tc_info)
2007 		return -ENOMEM;
2008 	mutex_init(&tc_info->lock);
2009 
2010 	/* Counter widths are programmed by FW */
2011 	tc_info->bytes_mask = mask(36);
2012 	tc_info->packets_mask = mask(28);
2013 
2014 	tc_info->flow_ht_params = bnxt_tc_flow_ht_params;
2015 	rc = rhashtable_init(&tc_info->flow_table, &tc_info->flow_ht_params);
2016 	if (rc)
2017 		goto free_tc_info;
2018 
2019 	tc_info->l2_ht_params = bnxt_tc_l2_ht_params;
2020 	rc = rhashtable_init(&tc_info->l2_table, &tc_info->l2_ht_params);
2021 	if (rc)
2022 		goto destroy_flow_table;
2023 
2024 	tc_info->decap_l2_ht_params = bnxt_tc_decap_l2_ht_params;
2025 	rc = rhashtable_init(&tc_info->decap_l2_table,
2026 			     &tc_info->decap_l2_ht_params);
2027 	if (rc)
2028 		goto destroy_l2_table;
2029 
2030 	tc_info->decap_ht_params = bnxt_tc_tunnel_ht_params;
2031 	rc = rhashtable_init(&tc_info->decap_table,
2032 			     &tc_info->decap_ht_params);
2033 	if (rc)
2034 		goto destroy_decap_l2_table;
2035 
2036 	tc_info->encap_ht_params = bnxt_tc_tunnel_ht_params;
2037 	rc = rhashtable_init(&tc_info->encap_table,
2038 			     &tc_info->encap_ht_params);
2039 	if (rc)
2040 		goto destroy_decap_table;
2041 
2042 	tc_info->enabled = true;
2043 	bp->dev->hw_features |= NETIF_F_HW_TC;
2044 	bp->dev->features |= NETIF_F_HW_TC;
2045 	bp->tc_info = tc_info;
2046 
2047 	/* init indirect block notifications */
2048 	INIT_LIST_HEAD(&bp->tc_indr_block_list);
2049 
2050 	rc = flow_indr_dev_register(bnxt_tc_setup_indr_cb, bp);
2051 	if (!rc)
2052 		return 0;
2053 
2054 	rhashtable_destroy(&tc_info->encap_table);
2055 
2056 destroy_decap_table:
2057 	rhashtable_destroy(&tc_info->decap_table);
2058 destroy_decap_l2_table:
2059 	rhashtable_destroy(&tc_info->decap_l2_table);
2060 destroy_l2_table:
2061 	rhashtable_destroy(&tc_info->l2_table);
2062 destroy_flow_table:
2063 	rhashtable_destroy(&tc_info->flow_table);
2064 free_tc_info:
2065 	kfree(tc_info);
2066 	return rc;
2067 }
2068 
2069 void bnxt_shutdown_tc(struct bnxt *bp)
2070 {
2071 	struct bnxt_tc_info *tc_info = bp->tc_info;
2072 
2073 	if (!bnxt_tc_flower_enabled(bp))
2074 		return;
2075 
2076 	flow_indr_dev_unregister(bnxt_tc_setup_indr_cb, bp,
2077 				 bnxt_tc_setup_indr_block_cb);
2078 	rhashtable_destroy(&tc_info->flow_table);
2079 	rhashtable_destroy(&tc_info->l2_table);
2080 	rhashtable_destroy(&tc_info->decap_l2_table);
2081 	rhashtable_destroy(&tc_info->decap_table);
2082 	rhashtable_destroy(&tc_info->encap_table);
2083 	kfree(tc_info);
2084 	bp->tc_info = NULL;
2085 }
2086