1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
3 
4 #include <net/fib_notifier.h>
5 #include <net/nexthop.h>
6 #include <net/ip_tunnels.h>
7 #include "tc_tun_encap.h"
8 #include "en_tc.h"
9 #include "tc_tun.h"
10 #include "rep/tc.h"
11 #include "diag/en_tc_tracepoint.h"
12 
13 enum {
14 	MLX5E_ROUTE_ENTRY_VALID     = BIT(0),
15 };
16 
17 static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv,
18 				     struct mlx5_flow_attr *attr,
19 				     struct mlx5e_encap_entry *e,
20 				     int out_index)
21 {
22 	struct net_device *route_dev;
23 	int err = 0;
24 
25 	route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex);
26 
27 	if (!route_dev || !netif_is_ovs_master(route_dev))
28 		goto out;
29 
30 	err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex,
31 						MLX5E_TC_INT_PORT_EGRESS,
32 						&attr->action, out_index);
33 
34 out:
35 	if (route_dev)
36 		dev_put(route_dev);
37 
38 	return err;
39 }
40 
41 struct mlx5e_route_key {
42 	int ip_version;
43 	union {
44 		__be32 v4;
45 		struct in6_addr v6;
46 	} endpoint_ip;
47 };
48 
49 struct mlx5e_route_entry {
50 	struct mlx5e_route_key key;
51 	struct list_head encap_entries;
52 	struct list_head decap_flows;
53 	u32 flags;
54 	struct hlist_node hlist;
55 	refcount_t refcnt;
56 	int tunnel_dev_index;
57 	struct rcu_head rcu;
58 };
59 
60 struct mlx5e_tc_tun_encap {
61 	struct mlx5e_priv *priv;
62 	struct notifier_block fib_nb;
63 	spinlock_t route_lock; /* protects route_tbl */
64 	unsigned long route_tbl_last_update;
65 	DECLARE_HASHTABLE(route_tbl, 8);
66 };
67 
68 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
69 {
70 	return r->flags & MLX5E_ROUTE_ENTRY_VALID;
71 }
72 
73 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
74 			     struct mlx5_flow_spec *spec)
75 {
76 	struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
77 	struct mlx5_rx_tun_attr *tun_attr;
78 	void *daddr, *saddr;
79 	u8 ip_version;
80 
81 	tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
82 	if (!tun_attr)
83 		return -ENOMEM;
84 
85 	esw_attr->rx_tun_attr = tun_attr;
86 	ip_version = mlx5e_tc_get_ip_version(spec, true);
87 
88 	if (ip_version == 4) {
89 		daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
90 				     outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
91 		saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
92 				     outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
93 		tun_attr->dst_ip.v4 = *(__be32 *)daddr;
94 		tun_attr->src_ip.v4 = *(__be32 *)saddr;
95 		if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
96 			return 0;
97 	}
98 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
99 	else if (ip_version == 6) {
100 		int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
101 
102 		daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
103 				     outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
104 		saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
105 				     outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
106 		memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
107 		memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
108 		if (ipv6_addr_any(&tun_attr->dst_ip.v6) ||
109 		    ipv6_addr_any(&tun_attr->src_ip.v6))
110 			return 0;
111 	}
112 #endif
113 	/* Only set the flag if both src and dst ip addresses exist. They are
114 	 * required to establish routing.
115 	 */
116 	flow_flag_set(flow, TUN_RX);
117 	flow->attr->tun_ip_version = ip_version;
118 	return 0;
119 }
120 
121 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
122 {
123 	bool all_flow_encaps_valid = true;
124 	int i;
125 
126 	/* Flow can be associated with multiple encap entries.
127 	 * Before offloading the flow verify that all of them have
128 	 * a valid neighbour.
129 	 */
130 	for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
131 		if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
132 			continue;
133 		if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
134 			all_flow_encaps_valid = false;
135 			break;
136 		}
137 	}
138 
139 	return all_flow_encaps_valid;
140 }
141 
142 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
143 			      struct mlx5e_encap_entry *e,
144 			      struct list_head *flow_list)
145 {
146 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
147 	struct mlx5_pkt_reformat_params reformat_params;
148 	struct mlx5_esw_flow_attr *esw_attr;
149 	struct mlx5_flow_handle *rule;
150 	struct mlx5_flow_attr *attr;
151 	struct mlx5_flow_spec *spec;
152 	struct mlx5e_tc_flow *flow;
153 	int err;
154 
155 	if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
156 		return;
157 
158 	memset(&reformat_params, 0, sizeof(reformat_params));
159 	reformat_params.type = e->reformat_type;
160 	reformat_params.size = e->encap_size;
161 	reformat_params.data = e->encap_header;
162 	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
163 						     &reformat_params,
164 						     MLX5_FLOW_NAMESPACE_FDB);
165 	if (IS_ERR(e->pkt_reformat)) {
166 		mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
167 			       PTR_ERR(e->pkt_reformat));
168 		return;
169 	}
170 	e->flags |= MLX5_ENCAP_ENTRY_VALID;
171 	mlx5e_rep_queue_neigh_stats_work(priv);
172 
173 	list_for_each_entry(flow, flow_list, tmp_list) {
174 		if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
175 			continue;
176 
177 		spec = &flow->attr->parse_attr->spec;
178 
179 		attr = mlx5e_tc_get_encap_attr(flow);
180 		esw_attr = attr->esw_attr;
181 		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
182 		esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
183 
184 		/* Do not offload flows with unresolved neighbors */
185 		if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
186 			continue;
187 
188 		err = mlx5e_tc_offload_flow_post_acts(flow);
189 		if (err) {
190 			mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
191 				       err);
192 			continue;
193 		}
194 
195 		/* update from slow path rule to encap rule */
196 		rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
197 		if (IS_ERR(rule)) {
198 			mlx5e_tc_unoffload_flow_post_acts(flow);
199 			err = PTR_ERR(rule);
200 			mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
201 				       err);
202 			continue;
203 		}
204 
205 		mlx5e_tc_unoffload_from_slow_path(esw, flow);
206 		flow->rule[0] = rule;
207 		/* was unset when slow path rule removed */
208 		flow_flag_set(flow, OFFLOADED);
209 	}
210 }
211 
212 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
213 			      struct mlx5e_encap_entry *e,
214 			      struct list_head *flow_list)
215 {
216 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
217 	struct mlx5_esw_flow_attr *esw_attr;
218 	struct mlx5_flow_handle *rule;
219 	struct mlx5_flow_attr *attr;
220 	struct mlx5_flow_spec *spec;
221 	struct mlx5e_tc_flow *flow;
222 	int err;
223 
224 	list_for_each_entry(flow, flow_list, tmp_list) {
225 		if (!mlx5e_is_offloaded_flow(flow))
226 			continue;
227 
228 		attr = mlx5e_tc_get_encap_attr(flow);
229 		esw_attr = attr->esw_attr;
230 		/* mark the flow's encap dest as non-valid */
231 		esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
232 		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
233 
234 		/* Clear pkt_reformat before checking slow path flag. Because
235 		 * in next iteration, the same flow is already set slow path
236 		 * flag, but still need to clear the pkt_reformat.
237 		 */
238 		if (flow_flag_test(flow, SLOW))
239 			continue;
240 
241 		/* update from encap rule to slow path rule */
242 		spec = &flow->attr->parse_attr->spec;
243 		rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
244 
245 		if (IS_ERR(rule)) {
246 			err = PTR_ERR(rule);
247 			mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
248 				       err);
249 			continue;
250 		}
251 
252 		mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
253 		mlx5e_tc_unoffload_flow_post_acts(flow);
254 		flow->rule[0] = rule;
255 		/* was unset when fast path rule removed */
256 		flow_flag_set(flow, OFFLOADED);
257 	}
258 
259 	/* we know that the encap is valid */
260 	e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
261 	mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
262 	e->pkt_reformat = NULL;
263 }
264 
265 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
266 				struct list_head *flow_list,
267 				int index)
268 {
269 	if (IS_ERR(mlx5e_flow_get(flow))) {
270 		/* Flow is being deleted concurrently. Wait for it to be
271 		 * unoffloaded from hardware, otherwise deleting encap will
272 		 * fail.
273 		 */
274 		wait_for_completion(&flow->del_hw_done);
275 		return;
276 	}
277 	wait_for_completion(&flow->init_done);
278 
279 	flow->tmp_entry_index = index;
280 	list_add(&flow->tmp_list, flow_list);
281 }
282 
283 /* Takes reference to all flows attached to encap and adds the flows to
284  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
285  */
286 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
287 {
288 	struct encap_flow_item *efi;
289 	struct mlx5e_tc_flow *flow;
290 
291 	list_for_each_entry(efi, &e->flows, list) {
292 		flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
293 		mlx5e_take_tmp_flow(flow, flow_list, efi->index);
294 	}
295 }
296 
297 /* Takes reference to all flows attached to route and adds the flows to
298  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
299  */
300 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
301 					     struct list_head *flow_list)
302 {
303 	struct mlx5e_tc_flow *flow;
304 
305 	list_for_each_entry(flow, &r->decap_flows, decap_routes)
306 		mlx5e_take_tmp_flow(flow, flow_list, 0);
307 }
308 
309 typedef bool (match_cb)(struct mlx5e_encap_entry *);
310 
311 static struct mlx5e_encap_entry *
312 mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
313 			      struct mlx5e_encap_entry *e,
314 			      match_cb match)
315 {
316 	struct mlx5e_encap_entry *next = NULL;
317 
318 retry:
319 	rcu_read_lock();
320 
321 	/* find encap with non-zero reference counter value */
322 	for (next = e ?
323 		     list_next_or_null_rcu(&nhe->encap_list,
324 					   &e->encap_list,
325 					   struct mlx5e_encap_entry,
326 					   encap_list) :
327 		     list_first_or_null_rcu(&nhe->encap_list,
328 					    struct mlx5e_encap_entry,
329 					    encap_list);
330 	     next;
331 	     next = list_next_or_null_rcu(&nhe->encap_list,
332 					  &next->encap_list,
333 					  struct mlx5e_encap_entry,
334 					  encap_list))
335 		if (mlx5e_encap_take(next))
336 			break;
337 
338 	rcu_read_unlock();
339 
340 	/* release starting encap */
341 	if (e)
342 		mlx5e_encap_put(netdev_priv(e->out_dev), e);
343 	if (!next)
344 		return next;
345 
346 	/* wait for encap to be fully initialized */
347 	wait_for_completion(&next->res_ready);
348 	/* continue searching if encap entry is not in valid state after completion */
349 	if (!match(next)) {
350 		e = next;
351 		goto retry;
352 	}
353 
354 	return next;
355 }
356 
357 static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
358 {
359 	return e->flags & MLX5_ENCAP_ENTRY_VALID;
360 }
361 
362 static struct mlx5e_encap_entry *
363 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
364 			   struct mlx5e_encap_entry *e)
365 {
366 	return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
367 }
368 
369 static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
370 {
371 	return e->compl_result >= 0;
372 }
373 
374 struct mlx5e_encap_entry *
375 mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
376 			  struct mlx5e_encap_entry *e)
377 {
378 	return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
379 }
380 
381 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
382 {
383 	struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
384 	struct mlx5e_encap_entry *e = NULL;
385 	struct mlx5e_tc_flow *flow;
386 	struct mlx5_fc *counter;
387 	struct neigh_table *tbl;
388 	bool neigh_used = false;
389 	struct neighbour *n;
390 	u64 lastuse;
391 
392 	if (m_neigh->family == AF_INET)
393 		tbl = &arp_tbl;
394 #if IS_ENABLED(CONFIG_IPV6)
395 	else if (m_neigh->family == AF_INET6)
396 		tbl = ipv6_stub->nd_tbl;
397 #endif
398 	else
399 		return;
400 
401 	/* mlx5e_get_next_valid_encap() releases previous encap before returning
402 	 * next one.
403 	 */
404 	while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
405 		struct mlx5e_priv *priv = netdev_priv(e->out_dev);
406 		struct encap_flow_item *efi, *tmp;
407 		struct mlx5_eswitch *esw;
408 		LIST_HEAD(flow_list);
409 
410 		esw = priv->mdev->priv.eswitch;
411 		mutex_lock(&esw->offloads.encap_tbl_lock);
412 		list_for_each_entry_safe(efi, tmp, &e->flows, list) {
413 			flow = container_of(efi, struct mlx5e_tc_flow,
414 					    encaps[efi->index]);
415 			if (IS_ERR(mlx5e_flow_get(flow)))
416 				continue;
417 			list_add(&flow->tmp_list, &flow_list);
418 
419 			if (mlx5e_is_offloaded_flow(flow)) {
420 				counter = mlx5e_tc_get_counter(flow);
421 				lastuse = mlx5_fc_query_lastuse(counter);
422 				if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
423 					neigh_used = true;
424 					break;
425 				}
426 			}
427 		}
428 		mutex_unlock(&esw->offloads.encap_tbl_lock);
429 
430 		mlx5e_put_flow_list(priv, &flow_list);
431 		if (neigh_used) {
432 			/* release current encap before breaking the loop */
433 			mlx5e_encap_put(priv, e);
434 			break;
435 		}
436 	}
437 
438 	trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
439 
440 	if (neigh_used) {
441 		nhe->reported_lastuse = jiffies;
442 
443 		/* find the relevant neigh according to the cached device and
444 		 * dst ip pair
445 		 */
446 		n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
447 		if (!n)
448 			return;
449 
450 		neigh_event_send(n, NULL);
451 		neigh_release(n);
452 	}
453 }
454 
455 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
456 {
457 	WARN_ON(!list_empty(&e->flows));
458 
459 	if (e->compl_result > 0) {
460 		mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
461 
462 		if (e->flags & MLX5_ENCAP_ENTRY_VALID)
463 			mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
464 	}
465 
466 	kfree(e->tun_info);
467 	kfree(e->encap_header);
468 	kfree_rcu(e, rcu);
469 }
470 
471 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
472 				struct mlx5e_decap_entry *d)
473 {
474 	WARN_ON(!list_empty(&d->flows));
475 
476 	if (!d->compl_result)
477 		mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
478 
479 	kfree_rcu(d, rcu);
480 }
481 
482 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
483 {
484 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
485 
486 	if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
487 		return;
488 	list_del(&e->route_list);
489 	hash_del_rcu(&e->encap_hlist);
490 	mutex_unlock(&esw->offloads.encap_tbl_lock);
491 
492 	mlx5e_encap_dealloc(priv, e);
493 }
494 
495 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
496 {
497 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
498 
499 	if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
500 		return;
501 	hash_del_rcu(&d->hlist);
502 	mutex_unlock(&esw->offloads.decap_tbl_lock);
503 
504 	mlx5e_decap_dealloc(priv, d);
505 }
506 
507 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
508 				     struct mlx5e_tc_flow *flow,
509 				     int out_index);
510 
511 void mlx5e_detach_encap(struct mlx5e_priv *priv,
512 			struct mlx5e_tc_flow *flow,
513 			struct mlx5_flow_attr *attr,
514 			int out_index)
515 {
516 	struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
517 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
518 
519 	if (!mlx5e_is_eswitch_flow(flow))
520 		return;
521 
522 	if (attr->esw_attr->dests[out_index].flags &
523 	    MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
524 		mlx5e_detach_encap_route(priv, flow, out_index);
525 
526 	/* flow wasn't fully initialized */
527 	if (!e)
528 		return;
529 
530 	mutex_lock(&esw->offloads.encap_tbl_lock);
531 	list_del(&flow->encaps[out_index].list);
532 	flow->encaps[out_index].e = NULL;
533 	if (!refcount_dec_and_test(&e->refcnt)) {
534 		mutex_unlock(&esw->offloads.encap_tbl_lock);
535 		return;
536 	}
537 	list_del(&e->route_list);
538 	hash_del_rcu(&e->encap_hlist);
539 	mutex_unlock(&esw->offloads.encap_tbl_lock);
540 
541 	mlx5e_encap_dealloc(priv, e);
542 }
543 
544 void mlx5e_detach_decap(struct mlx5e_priv *priv,
545 			struct mlx5e_tc_flow *flow)
546 {
547 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
548 	struct mlx5e_decap_entry *d = flow->decap_reformat;
549 
550 	if (!d)
551 		return;
552 
553 	mutex_lock(&esw->offloads.decap_tbl_lock);
554 	list_del(&flow->l3_to_l2_reformat);
555 	flow->decap_reformat = NULL;
556 
557 	if (!refcount_dec_and_test(&d->refcnt)) {
558 		mutex_unlock(&esw->offloads.decap_tbl_lock);
559 		return;
560 	}
561 	hash_del_rcu(&d->hlist);
562 	mutex_unlock(&esw->offloads.decap_tbl_lock);
563 
564 	mlx5e_decap_dealloc(priv, d);
565 }
566 
567 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
568 					   struct mlx5e_encap_key *b)
569 {
570 	return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
571 		a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
572 }
573 
574 bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a,
575 					   struct mlx5e_encap_key *b,
576 					   __be16 tun_flags)
577 {
578 	struct ip_tunnel_info *a_info;
579 	struct ip_tunnel_info *b_info;
580 	bool a_has_opts, b_has_opts;
581 
582 	if (!mlx5e_tc_tun_encap_info_equal_generic(a, b))
583 		return false;
584 
585 	a_has_opts = !!(a->ip_tun_key->tun_flags & tun_flags);
586 	b_has_opts = !!(b->ip_tun_key->tun_flags & tun_flags);
587 
588 	/* keys are equal when both don't have any options attached */
589 	if (!a_has_opts && !b_has_opts)
590 		return true;
591 
592 	if (a_has_opts != b_has_opts)
593 		return false;
594 
595 	/* options stored in memory next to ip_tunnel_info struct */
596 	a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key);
597 	b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key);
598 
599 	return a_info->options_len == b_info->options_len &&
600 	       !memcmp(ip_tunnel_info_opts(a_info),
601 		       ip_tunnel_info_opts(b_info),
602 		       a_info->options_len);
603 }
604 
605 static int cmp_decap_info(struct mlx5e_decap_key *a,
606 			  struct mlx5e_decap_key *b)
607 {
608 	return memcmp(&a->key, &b->key, sizeof(b->key));
609 }
610 
611 static int hash_encap_info(struct mlx5e_encap_key *key)
612 {
613 	return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
614 		     key->tc_tunnel->tunnel_type);
615 }
616 
617 static int hash_decap_info(struct mlx5e_decap_key *key)
618 {
619 	return jhash(&key->key, sizeof(key->key), 0);
620 }
621 
622 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
623 {
624 	return refcount_inc_not_zero(&e->refcnt);
625 }
626 
627 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
628 {
629 	return refcount_inc_not_zero(&e->refcnt);
630 }
631 
632 static struct mlx5e_encap_entry *
633 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
634 		uintptr_t hash_key)
635 {
636 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
637 	struct mlx5e_encap_key e_key;
638 	struct mlx5e_encap_entry *e;
639 
640 	hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
641 				   encap_hlist, hash_key) {
642 		e_key.ip_tun_key = &e->tun_info->key;
643 		e_key.tc_tunnel = e->tunnel;
644 		if (e->tunnel->encap_info_equal(&e_key, key) &&
645 		    mlx5e_encap_take(e))
646 			return e;
647 	}
648 
649 	return NULL;
650 }
651 
652 static struct mlx5e_decap_entry *
653 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
654 		uintptr_t hash_key)
655 {
656 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
657 	struct mlx5e_decap_key r_key;
658 	struct mlx5e_decap_entry *e;
659 
660 	hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
661 				   hlist, hash_key) {
662 		r_key = e->key;
663 		if (!cmp_decap_info(&r_key, key) &&
664 		    mlx5e_decap_take(e))
665 			return e;
666 	}
667 	return NULL;
668 }
669 
670 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
671 {
672 	size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
673 
674 	return kmemdup(tun_info, tun_size, GFP_KERNEL);
675 }
676 
677 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
678 				      struct mlx5e_tc_flow *flow,
679 				      int out_index,
680 				      struct mlx5e_encap_entry *e,
681 				      struct netlink_ext_ack *extack)
682 {
683 	int i;
684 
685 	for (i = 0; i < out_index; i++) {
686 		if (flow->encaps[i].e != e)
687 			continue;
688 		NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
689 		netdev_err(priv->netdev, "can't duplicate encap action\n");
690 		return true;
691 	}
692 
693 	return false;
694 }
695 
696 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
697 			       struct mlx5_flow_attr *attr,
698 			       struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
699 			       struct net_device *out_dev,
700 			       int route_dev_ifindex,
701 			       int out_index)
702 {
703 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
704 	struct net_device *route_dev;
705 	u16 vport_num;
706 	int err = 0;
707 	u32 data;
708 
709 	route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
710 
711 	if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
712 	    !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
713 		goto out;
714 
715 	err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
716 	if (err)
717 		goto out;
718 
719 	attr->dest_chain = 0;
720 	attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
721 	esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
722 	data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
723 						       vport_num);
724 	err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
725 						   MLX5_FLOW_NAMESPACE_FDB,
726 						   VPORT_TO_REG, data);
727 	if (err >= 0) {
728 		esw_attr->dests[out_index].src_port_rewrite_act_id = err;
729 		err = 0;
730 	}
731 
732 out:
733 	if (route_dev)
734 		dev_put(route_dev);
735 	return err;
736 }
737 
738 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
739 				  struct mlx5_esw_flow_attr *attr,
740 				  struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
741 				  struct net_device *out_dev,
742 				  int route_dev_ifindex,
743 				  int out_index)
744 {
745 	int act_id = attr->dests[out_index].src_port_rewrite_act_id;
746 	struct net_device *route_dev;
747 	u16 vport_num;
748 	int err = 0;
749 	u32 data;
750 
751 	route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
752 
753 	if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
754 	    !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
755 		err = -ENODEV;
756 		goto out;
757 	}
758 
759 	err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
760 	if (err)
761 		goto out;
762 
763 	data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
764 						       vport_num);
765 	mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
766 
767 out:
768 	if (route_dev)
769 		dev_put(route_dev);
770 	return err;
771 }
772 
773 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
774 {
775 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
776 	struct mlx5_rep_uplink_priv *uplink_priv;
777 	struct mlx5e_rep_priv *uplink_rpriv;
778 	struct mlx5e_tc_tun_encap *encap;
779 	unsigned int ret;
780 
781 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
782 	uplink_priv = &uplink_rpriv->uplink_priv;
783 	encap = uplink_priv->encap;
784 
785 	spin_lock_bh(&encap->route_lock);
786 	ret = encap->route_tbl_last_update;
787 	spin_unlock_bh(&encap->route_lock);
788 	return ret;
789 }
790 
791 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
792 				    struct mlx5e_tc_flow *flow,
793 				    struct mlx5_flow_attr *attr,
794 				    struct mlx5e_encap_entry *e,
795 				    bool new_encap_entry,
796 				    unsigned long tbl_time_before,
797 				    int out_index);
798 
799 int mlx5e_attach_encap(struct mlx5e_priv *priv,
800 		       struct mlx5e_tc_flow *flow,
801 		       struct mlx5_flow_attr *attr,
802 		       struct net_device *mirred_dev,
803 		       int out_index,
804 		       struct netlink_ext_ack *extack,
805 		       struct net_device **encap_dev)
806 {
807 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
808 	struct mlx5e_tc_flow_parse_attr *parse_attr;
809 	const struct ip_tunnel_info *tun_info;
810 	const struct mlx5e_mpls_info *mpls_info;
811 	unsigned long tbl_time_before = 0;
812 	struct mlx5e_encap_entry *e;
813 	struct mlx5e_encap_key key;
814 	bool entry_created = false;
815 	unsigned short family;
816 	uintptr_t hash_key;
817 	int err = 0;
818 
819 	parse_attr = attr->parse_attr;
820 	tun_info = parse_attr->tun_info[out_index];
821 	mpls_info = &parse_attr->mpls_info[out_index];
822 	family = ip_tunnel_info_af(tun_info);
823 	key.ip_tun_key = &tun_info->key;
824 	key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
825 	if (!key.tc_tunnel) {
826 		NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
827 		return -EOPNOTSUPP;
828 	}
829 
830 	hash_key = hash_encap_info(&key);
831 
832 	mutex_lock(&esw->offloads.encap_tbl_lock);
833 	e = mlx5e_encap_get(priv, &key, hash_key);
834 
835 	/* must verify if encap is valid or not */
836 	if (e) {
837 		/* Check that entry was not already attached to this flow */
838 		if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
839 			err = -EOPNOTSUPP;
840 			goto out_err;
841 		}
842 
843 		mutex_unlock(&esw->offloads.encap_tbl_lock);
844 		wait_for_completion(&e->res_ready);
845 
846 		/* Protect against concurrent neigh update. */
847 		mutex_lock(&esw->offloads.encap_tbl_lock);
848 		if (e->compl_result < 0) {
849 			err = -EREMOTEIO;
850 			goto out_err;
851 		}
852 		goto attach_flow;
853 	}
854 
855 	e = kzalloc(sizeof(*e), GFP_KERNEL);
856 	if (!e) {
857 		err = -ENOMEM;
858 		goto out_err;
859 	}
860 
861 	refcount_set(&e->refcnt, 1);
862 	init_completion(&e->res_ready);
863 	entry_created = true;
864 	INIT_LIST_HEAD(&e->route_list);
865 
866 	tun_info = mlx5e_dup_tun_info(tun_info);
867 	if (!tun_info) {
868 		err = -ENOMEM;
869 		goto out_err_init;
870 	}
871 	e->tun_info = tun_info;
872 	memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info));
873 	err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
874 	if (err)
875 		goto out_err_init;
876 
877 	INIT_LIST_HEAD(&e->flows);
878 	hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
879 	tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
880 	mutex_unlock(&esw->offloads.encap_tbl_lock);
881 
882 	if (family == AF_INET)
883 		err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
884 	else if (family == AF_INET6)
885 		err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
886 
887 	/* Protect against concurrent neigh update. */
888 	mutex_lock(&esw->offloads.encap_tbl_lock);
889 	complete_all(&e->res_ready);
890 	if (err) {
891 		e->compl_result = err;
892 		goto out_err;
893 	}
894 	e->compl_result = 1;
895 
896 attach_flow:
897 	err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created,
898 				       tbl_time_before, out_index);
899 	if (err)
900 		goto out_err;
901 
902 	err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index);
903 	if (err == -EOPNOTSUPP) {
904 		/* If device doesn't support int port offload,
905 		 * redirect to uplink vport.
906 		 */
907 		mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n");
908 		err = 0;
909 	} else if (err) {
910 		goto out_err;
911 	}
912 
913 	flow->encaps[out_index].e = e;
914 	list_add(&flow->encaps[out_index].list, &e->flows);
915 	flow->encaps[out_index].index = out_index;
916 	*encap_dev = e->out_dev;
917 	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
918 		attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
919 		attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
920 	} else {
921 		flow_flag_set(flow, SLOW);
922 	}
923 	mutex_unlock(&esw->offloads.encap_tbl_lock);
924 
925 	return err;
926 
927 out_err:
928 	mutex_unlock(&esw->offloads.encap_tbl_lock);
929 	if (e)
930 		mlx5e_encap_put(priv, e);
931 	return err;
932 
933 out_err_init:
934 	mutex_unlock(&esw->offloads.encap_tbl_lock);
935 	kfree(tun_info);
936 	kfree(e);
937 	return err;
938 }
939 
940 int mlx5e_attach_decap(struct mlx5e_priv *priv,
941 		       struct mlx5e_tc_flow *flow,
942 		       struct netlink_ext_ack *extack)
943 {
944 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
945 	struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
946 	struct mlx5_pkt_reformat_params reformat_params;
947 	struct mlx5e_decap_entry *d;
948 	struct mlx5e_decap_key key;
949 	uintptr_t hash_key;
950 	int err = 0;
951 
952 	if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
953 		NL_SET_ERR_MSG_MOD(extack,
954 				   "encap header larger than max supported");
955 		return -EOPNOTSUPP;
956 	}
957 
958 	key.key = attr->eth;
959 	hash_key = hash_decap_info(&key);
960 	mutex_lock(&esw->offloads.decap_tbl_lock);
961 	d = mlx5e_decap_get(priv, &key, hash_key);
962 	if (d) {
963 		mutex_unlock(&esw->offloads.decap_tbl_lock);
964 		wait_for_completion(&d->res_ready);
965 		mutex_lock(&esw->offloads.decap_tbl_lock);
966 		if (d->compl_result) {
967 			err = -EREMOTEIO;
968 			goto out_free;
969 		}
970 		goto found;
971 	}
972 
973 	d = kzalloc(sizeof(*d), GFP_KERNEL);
974 	if (!d) {
975 		err = -ENOMEM;
976 		goto out_err;
977 	}
978 
979 	d->key = key;
980 	refcount_set(&d->refcnt, 1);
981 	init_completion(&d->res_ready);
982 	INIT_LIST_HEAD(&d->flows);
983 	hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
984 	mutex_unlock(&esw->offloads.decap_tbl_lock);
985 
986 	memset(&reformat_params, 0, sizeof(reformat_params));
987 	reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
988 	reformat_params.size = sizeof(attr->eth);
989 	reformat_params.data = &attr->eth;
990 	d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
991 						     &reformat_params,
992 						     MLX5_FLOW_NAMESPACE_FDB);
993 	if (IS_ERR(d->pkt_reformat)) {
994 		err = PTR_ERR(d->pkt_reformat);
995 		d->compl_result = err;
996 	}
997 	mutex_lock(&esw->offloads.decap_tbl_lock);
998 	complete_all(&d->res_ready);
999 	if (err)
1000 		goto out_free;
1001 
1002 found:
1003 	flow->decap_reformat = d;
1004 	attr->decap_pkt_reformat = d->pkt_reformat;
1005 	list_add(&flow->l3_to_l2_reformat, &d->flows);
1006 	mutex_unlock(&esw->offloads.decap_tbl_lock);
1007 	return 0;
1008 
1009 out_free:
1010 	mutex_unlock(&esw->offloads.decap_tbl_lock);
1011 	mlx5e_decap_put(priv, d);
1012 	return err;
1013 
1014 out_err:
1015 	mutex_unlock(&esw->offloads.decap_tbl_lock);
1016 	return err;
1017 }
1018 
1019 static int cmp_route_info(struct mlx5e_route_key *a,
1020 			  struct mlx5e_route_key *b)
1021 {
1022 	if (a->ip_version == 4 && b->ip_version == 4)
1023 		return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
1024 			      sizeof(a->endpoint_ip.v4));
1025 	else if (a->ip_version == 6 && b->ip_version == 6)
1026 		return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
1027 			      sizeof(a->endpoint_ip.v6));
1028 	return 1;
1029 }
1030 
1031 static u32 hash_route_info(struct mlx5e_route_key *key)
1032 {
1033 	if (key->ip_version == 4)
1034 		return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
1035 	return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
1036 }
1037 
1038 static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
1039 				struct mlx5e_route_entry *r)
1040 {
1041 	WARN_ON(!list_empty(&r->decap_flows));
1042 	WARN_ON(!list_empty(&r->encap_entries));
1043 
1044 	kfree_rcu(r, rcu);
1045 }
1046 
1047 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1048 {
1049 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1050 
1051 	if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
1052 		return;
1053 
1054 	hash_del_rcu(&r->hlist);
1055 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1056 
1057 	mlx5e_route_dealloc(priv, r);
1058 }
1059 
1060 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1061 {
1062 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1063 
1064 	lockdep_assert_held(&esw->offloads.encap_tbl_lock);
1065 
1066 	if (!refcount_dec_and_test(&r->refcnt))
1067 		return;
1068 	hash_del_rcu(&r->hlist);
1069 	mlx5e_route_dealloc(priv, r);
1070 }
1071 
1072 static struct mlx5e_route_entry *
1073 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
1074 		u32 hash_key)
1075 {
1076 	struct mlx5e_route_key r_key;
1077 	struct mlx5e_route_entry *r;
1078 
1079 	hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
1080 		r_key = r->key;
1081 		if (!cmp_route_info(&r_key, key) &&
1082 		    refcount_inc_not_zero(&r->refcnt))
1083 			return r;
1084 	}
1085 	return NULL;
1086 }
1087 
1088 static struct mlx5e_route_entry *
1089 mlx5e_route_get_create(struct mlx5e_priv *priv,
1090 		       struct mlx5e_route_key *key,
1091 		       int tunnel_dev_index,
1092 		       unsigned long *route_tbl_change_time)
1093 {
1094 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1095 	struct mlx5_rep_uplink_priv *uplink_priv;
1096 	struct mlx5e_rep_priv *uplink_rpriv;
1097 	struct mlx5e_tc_tun_encap *encap;
1098 	struct mlx5e_route_entry *r;
1099 	u32 hash_key;
1100 
1101 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1102 	uplink_priv = &uplink_rpriv->uplink_priv;
1103 	encap = uplink_priv->encap;
1104 
1105 	hash_key = hash_route_info(key);
1106 	spin_lock_bh(&encap->route_lock);
1107 	r = mlx5e_route_get(encap, key, hash_key);
1108 	spin_unlock_bh(&encap->route_lock);
1109 	if (r) {
1110 		if (!mlx5e_route_entry_valid(r)) {
1111 			mlx5e_route_put_locked(priv, r);
1112 			return ERR_PTR(-EINVAL);
1113 		}
1114 		return r;
1115 	}
1116 
1117 	r = kzalloc(sizeof(*r), GFP_KERNEL);
1118 	if (!r)
1119 		return ERR_PTR(-ENOMEM);
1120 
1121 	r->key = *key;
1122 	r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1123 	r->tunnel_dev_index = tunnel_dev_index;
1124 	refcount_set(&r->refcnt, 1);
1125 	INIT_LIST_HEAD(&r->decap_flows);
1126 	INIT_LIST_HEAD(&r->encap_entries);
1127 
1128 	spin_lock_bh(&encap->route_lock);
1129 	*route_tbl_change_time = encap->route_tbl_last_update;
1130 	hash_add(encap->route_tbl, &r->hlist, hash_key);
1131 	spin_unlock_bh(&encap->route_lock);
1132 
1133 	return r;
1134 }
1135 
1136 static struct mlx5e_route_entry *
1137 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1138 {
1139 	u32 hash_key = hash_route_info(key);
1140 	struct mlx5e_route_entry *r;
1141 
1142 	spin_lock_bh(&encap->route_lock);
1143 	encap->route_tbl_last_update = jiffies;
1144 	r = mlx5e_route_get(encap, key, hash_key);
1145 	spin_unlock_bh(&encap->route_lock);
1146 
1147 	return r;
1148 }
1149 
1150 struct mlx5e_tc_fib_event_data {
1151 	struct work_struct work;
1152 	unsigned long event;
1153 	struct mlx5e_route_entry *r;
1154 	struct net_device *ul_dev;
1155 };
1156 
1157 static void mlx5e_tc_fib_event_work(struct work_struct *work);
1158 static struct mlx5e_tc_fib_event_data *
1159 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1160 {
1161 	struct mlx5e_tc_fib_event_data *fib_work;
1162 
1163 	fib_work = kzalloc(sizeof(*fib_work), flags);
1164 	if (WARN_ON(!fib_work))
1165 		return NULL;
1166 
1167 	INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1168 	fib_work->event = event;
1169 	fib_work->ul_dev = ul_dev;
1170 
1171 	return fib_work;
1172 }
1173 
1174 static int
1175 mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1176 			   struct mlx5e_route_entry *r,
1177 			   unsigned long event)
1178 {
1179 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1180 	struct mlx5e_tc_fib_event_data *fib_work;
1181 	struct mlx5e_rep_priv *uplink_rpriv;
1182 	struct net_device *ul_dev;
1183 
1184 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1185 	ul_dev = uplink_rpriv->netdev;
1186 
1187 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1188 	if (!fib_work)
1189 		return -ENOMEM;
1190 
1191 	dev_hold(ul_dev);
1192 	refcount_inc(&r->refcnt);
1193 	fib_work->r = r;
1194 	queue_work(priv->wq, &fib_work->work);
1195 
1196 	return 0;
1197 }
1198 
1199 int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1200 			     struct mlx5e_tc_flow *flow)
1201 {
1202 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1203 	unsigned long tbl_time_before, tbl_time_after;
1204 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1205 	struct mlx5_flow_attr *attr = flow->attr;
1206 	struct mlx5_esw_flow_attr *esw_attr;
1207 	struct mlx5e_route_entry *r;
1208 	struct mlx5e_route_key key;
1209 	int err = 0;
1210 
1211 	esw_attr = attr->esw_attr;
1212 	parse_attr = attr->parse_attr;
1213 	mutex_lock(&esw->offloads.encap_tbl_lock);
1214 	if (!esw_attr->rx_tun_attr)
1215 		goto out;
1216 
1217 	tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1218 	tbl_time_after = tbl_time_before;
1219 	err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev);
1220 	if (err || !esw_attr->rx_tun_attr->decap_vport)
1221 		goto out;
1222 
1223 	key.ip_version = attr->tun_ip_version;
1224 	if (key.ip_version == 4)
1225 		key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1226 	else
1227 		key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1228 
1229 	r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
1230 				   &tbl_time_after);
1231 	if (IS_ERR(r)) {
1232 		err = PTR_ERR(r);
1233 		goto out;
1234 	}
1235 	/* Routing changed concurrently. FIB event handler might have missed new
1236 	 * entry, schedule update.
1237 	 */
1238 	if (tbl_time_before != tbl_time_after) {
1239 		err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1240 		if (err) {
1241 			mlx5e_route_put_locked(priv, r);
1242 			goto out;
1243 		}
1244 	}
1245 
1246 	flow->decap_route = r;
1247 	list_add(&flow->decap_routes, &r->decap_flows);
1248 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1249 	return 0;
1250 
1251 out:
1252 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1253 	return err;
1254 }
1255 
1256 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1257 				    struct mlx5e_tc_flow *flow,
1258 				    struct mlx5_flow_attr *attr,
1259 				    struct mlx5e_encap_entry *e,
1260 				    bool new_encap_entry,
1261 				    unsigned long tbl_time_before,
1262 				    int out_index)
1263 {
1264 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1265 	unsigned long tbl_time_after = tbl_time_before;
1266 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1267 	const struct ip_tunnel_info *tun_info;
1268 	struct mlx5_esw_flow_attr *esw_attr;
1269 	struct mlx5e_route_entry *r;
1270 	struct mlx5e_route_key key;
1271 	unsigned short family;
1272 	int err = 0;
1273 
1274 	esw_attr = attr->esw_attr;
1275 	parse_attr = attr->parse_attr;
1276 	tun_info = parse_attr->tun_info[out_index];
1277 	family = ip_tunnel_info_af(tun_info);
1278 
1279 	if (family == AF_INET) {
1280 		key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1281 		key.ip_version = 4;
1282 	} else if (family == AF_INET6) {
1283 		key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1284 		key.ip_version = 6;
1285 	}
1286 
1287 	err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
1288 				  e->route_dev_ifindex, out_index);
1289 	if (err || !(esw_attr->dests[out_index].flags &
1290 		     MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1291 		return err;
1292 
1293 	r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
1294 				   &tbl_time_after);
1295 	if (IS_ERR(r))
1296 		return PTR_ERR(r);
1297 	/* Routing changed concurrently. FIB event handler might have missed new
1298 	 * entry, schedule update.
1299 	 */
1300 	if (tbl_time_before != tbl_time_after) {
1301 		err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1302 		if (err) {
1303 			mlx5e_route_put_locked(priv, r);
1304 			return err;
1305 		}
1306 	}
1307 
1308 	flow->encap_routes[out_index].r = r;
1309 	if (new_encap_entry)
1310 		list_add(&e->route_list, &r->encap_entries);
1311 	flow->encap_routes[out_index].index = out_index;
1312 	return 0;
1313 }
1314 
1315 void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1316 			      struct mlx5e_tc_flow *flow)
1317 {
1318 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1319 	struct mlx5e_route_entry *r = flow->decap_route;
1320 
1321 	if (!r)
1322 		return;
1323 
1324 	mutex_lock(&esw->offloads.encap_tbl_lock);
1325 	list_del(&flow->decap_routes);
1326 	flow->decap_route = NULL;
1327 
1328 	if (!refcount_dec_and_test(&r->refcnt)) {
1329 		mutex_unlock(&esw->offloads.encap_tbl_lock);
1330 		return;
1331 	}
1332 	hash_del_rcu(&r->hlist);
1333 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1334 
1335 	mlx5e_route_dealloc(priv, r);
1336 }
1337 
1338 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1339 				     struct mlx5e_tc_flow *flow,
1340 				     int out_index)
1341 {
1342 	struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1343 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1344 	struct mlx5e_encap_entry *e, *tmp;
1345 
1346 	if (!r)
1347 		return;
1348 
1349 	mutex_lock(&esw->offloads.encap_tbl_lock);
1350 	flow->encap_routes[out_index].r = NULL;
1351 
1352 	if (!refcount_dec_and_test(&r->refcnt)) {
1353 		mutex_unlock(&esw->offloads.encap_tbl_lock);
1354 		return;
1355 	}
1356 	list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1357 		list_del_init(&e->route_list);
1358 	hash_del_rcu(&r->hlist);
1359 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1360 
1361 	mlx5e_route_dealloc(priv, r);
1362 }
1363 
1364 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1365 				   struct mlx5e_encap_entry *e,
1366 				   struct list_head *encap_flows)
1367 {
1368 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1369 	struct mlx5e_tc_flow *flow;
1370 
1371 	list_for_each_entry(flow, encap_flows, tmp_list) {
1372 		struct mlx5_flow_attr *attr = flow->attr;
1373 		struct mlx5_esw_flow_attr *esw_attr;
1374 
1375 		if (!mlx5e_is_offloaded_flow(flow))
1376 			continue;
1377 		esw_attr = attr->esw_attr;
1378 
1379 		if (flow_flag_test(flow, SLOW))
1380 			mlx5e_tc_unoffload_from_slow_path(esw, flow);
1381 		else
1382 			mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1383 
1384 		mlx5e_tc_detach_mod_hdr(priv, flow, attr);
1385 		attr->modify_hdr = NULL;
1386 
1387 		esw_attr->dests[flow->tmp_entry_index].flags &=
1388 			~MLX5_ESW_DEST_ENCAP_VALID;
1389 		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1390 	}
1391 
1392 	e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1393 	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1394 		e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1395 		mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1396 		e->pkt_reformat = NULL;
1397 	}
1398 }
1399 
1400 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1401 				  struct net_device *tunnel_dev,
1402 				  struct mlx5e_encap_entry *e,
1403 				  struct list_head *encap_flows)
1404 {
1405 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1406 	struct mlx5e_tc_flow *flow;
1407 	int err;
1408 
1409 	err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
1410 		mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
1411 		mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
1412 	if (err)
1413 		mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1414 	e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1415 
1416 	list_for_each_entry(flow, encap_flows, tmp_list) {
1417 		struct mlx5e_tc_flow_parse_attr *parse_attr;
1418 		struct mlx5_esw_flow_attr *esw_attr;
1419 		struct mlx5_flow_handle *rule;
1420 		struct mlx5_flow_attr *attr;
1421 		struct mlx5_flow_spec *spec;
1422 
1423 		if (flow_flag_test(flow, FAILED))
1424 			continue;
1425 
1426 		spec = &flow->attr->parse_attr->spec;
1427 
1428 		attr = mlx5e_tc_get_encap_attr(flow);
1429 		esw_attr = attr->esw_attr;
1430 		parse_attr = attr->parse_attr;
1431 
1432 		err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
1433 					     e->out_dev, e->route_dev_ifindex,
1434 					     flow->tmp_entry_index);
1435 		if (err) {
1436 			mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1437 			continue;
1438 		}
1439 
1440 		err = mlx5e_tc_attach_mod_hdr(priv, flow, attr);
1441 		if (err) {
1442 			mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1443 				       err);
1444 			continue;
1445 		}
1446 
1447 		if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1448 			esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1449 			esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1450 			if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1451 				goto offload_to_slow_path;
1452 
1453 			err = mlx5e_tc_offload_flow_post_acts(flow);
1454 			if (err) {
1455 				mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
1456 					       err);
1457 				goto offload_to_slow_path;
1458 			}
1459 
1460 			/* update from slow path rule to encap rule */
1461 			rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
1462 			if (IS_ERR(rule)) {
1463 				mlx5e_tc_unoffload_flow_post_acts(flow);
1464 				err = PTR_ERR(rule);
1465 				mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1466 					       err);
1467 			} else {
1468 				flow->rule[0] = rule;
1469 			}
1470 		} else {
1471 offload_to_slow_path:
1472 			rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1473 			/* mark the flow's encap dest as non-valid */
1474 			esw_attr->dests[flow->tmp_entry_index].flags &=
1475 				~MLX5_ESW_DEST_ENCAP_VALID;
1476 
1477 			if (IS_ERR(rule)) {
1478 				err = PTR_ERR(rule);
1479 				mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1480 					       err);
1481 			} else {
1482 				flow->rule[0] = rule;
1483 			}
1484 		}
1485 		flow_flag_set(flow, OFFLOADED);
1486 	}
1487 }
1488 
1489 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1490 				     struct mlx5e_route_entry *r,
1491 				     struct list_head *flow_list,
1492 				     bool replace)
1493 {
1494 	struct net_device *tunnel_dev;
1495 	struct mlx5e_encap_entry *e;
1496 
1497 	tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1498 	if (!tunnel_dev)
1499 		return -ENODEV;
1500 
1501 	list_for_each_entry(e, &r->encap_entries, route_list) {
1502 		LIST_HEAD(encap_flows);
1503 
1504 		mlx5e_take_all_encap_flows(e, &encap_flows);
1505 		if (list_empty(&encap_flows))
1506 			continue;
1507 
1508 		if (mlx5e_route_entry_valid(r))
1509 			mlx5e_invalidate_encap(priv, e, &encap_flows);
1510 
1511 		if (!replace) {
1512 			list_splice(&encap_flows, flow_list);
1513 			continue;
1514 		}
1515 
1516 		mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
1517 		list_splice(&encap_flows, flow_list);
1518 	}
1519 
1520 	return 0;
1521 }
1522 
1523 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1524 				      struct list_head *flow_list)
1525 {
1526 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1527 	struct mlx5e_tc_flow *flow;
1528 
1529 	list_for_each_entry(flow, flow_list, tmp_list)
1530 		if (mlx5e_is_offloaded_flow(flow))
1531 			mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1532 }
1533 
1534 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1535 				  struct list_head *decap_flows)
1536 {
1537 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1538 	struct mlx5e_tc_flow *flow;
1539 
1540 	list_for_each_entry(flow, decap_flows, tmp_list) {
1541 		struct mlx5e_tc_flow_parse_attr *parse_attr;
1542 		struct mlx5_flow_attr *attr = flow->attr;
1543 		struct mlx5_flow_handle *rule;
1544 		struct mlx5_flow_spec *spec;
1545 		int err;
1546 
1547 		if (flow_flag_test(flow, FAILED))
1548 			continue;
1549 
1550 		parse_attr = attr->parse_attr;
1551 		spec = &parse_attr->spec;
1552 		err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev);
1553 		if (err) {
1554 			mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1555 				       err);
1556 			continue;
1557 		}
1558 
1559 		rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1560 		if (IS_ERR(rule)) {
1561 			err = PTR_ERR(rule);
1562 			mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1563 				       err);
1564 		} else {
1565 			flow->rule[0] = rule;
1566 			flow_flag_set(flow, OFFLOADED);
1567 		}
1568 	}
1569 }
1570 
1571 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1572 					  struct mlx5e_route_entry *r,
1573 					  struct list_head *flow_list,
1574 					  bool replace)
1575 {
1576 	struct net_device *tunnel_dev;
1577 	LIST_HEAD(decap_flows);
1578 
1579 	tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1580 	if (!tunnel_dev)
1581 		return -ENODEV;
1582 
1583 	mlx5e_take_all_route_decap_flows(r, &decap_flows);
1584 	if (mlx5e_route_entry_valid(r))
1585 		mlx5e_unoffload_flow_list(priv, &decap_flows);
1586 	if (replace)
1587 		mlx5e_reoffload_decap(priv, &decap_flows);
1588 
1589 	list_splice(&decap_flows, flow_list);
1590 
1591 	return 0;
1592 }
1593 
1594 static void mlx5e_tc_fib_event_work(struct work_struct *work)
1595 {
1596 	struct mlx5e_tc_fib_event_data *event_data =
1597 		container_of(work, struct mlx5e_tc_fib_event_data, work);
1598 	struct net_device *ul_dev = event_data->ul_dev;
1599 	struct mlx5e_priv *priv = netdev_priv(ul_dev);
1600 	struct mlx5e_route_entry *r = event_data->r;
1601 	struct mlx5_eswitch *esw;
1602 	LIST_HEAD(flow_list);
1603 	bool replace;
1604 	int err;
1605 
1606 	/* sync with concurrent neigh updates */
1607 	rtnl_lock();
1608 	esw = priv->mdev->priv.eswitch;
1609 	mutex_lock(&esw->offloads.encap_tbl_lock);
1610 	replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1611 
1612 	if (!mlx5e_route_entry_valid(r) && !replace)
1613 		goto out;
1614 
1615 	err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
1616 	if (err)
1617 		mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1618 			       err);
1619 
1620 	err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
1621 	if (err)
1622 		mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1623 			       err);
1624 
1625 	if (replace)
1626 		r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1627 out:
1628 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1629 	rtnl_unlock();
1630 
1631 	mlx5e_put_flow_list(priv, &flow_list);
1632 	mlx5e_route_put(priv, event_data->r);
1633 	dev_put(event_data->ul_dev);
1634 	kfree(event_data);
1635 }
1636 
1637 static struct mlx5e_tc_fib_event_data *
1638 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1639 			 struct net_device *ul_dev,
1640 			 struct mlx5e_tc_tun_encap *encap,
1641 			 unsigned long event,
1642 			 struct fib_notifier_info *info)
1643 {
1644 	struct fib_entry_notifier_info *fen_info;
1645 	struct mlx5e_tc_fib_event_data *fib_work;
1646 	struct mlx5e_route_entry *r;
1647 	struct mlx5e_route_key key;
1648 	struct net_device *fib_dev;
1649 
1650 	fen_info = container_of(info, struct fib_entry_notifier_info, info);
1651 	if (fen_info->fi->nh)
1652 		return NULL;
1653 	fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
1654 	if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1655 	    fen_info->dst_len != 32)
1656 		return NULL;
1657 
1658 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1659 	if (!fib_work)
1660 		return ERR_PTR(-ENOMEM);
1661 
1662 	key.endpoint_ip.v4 = htonl(fen_info->dst);
1663 	key.ip_version = 4;
1664 
1665 	/* Can't fail after this point because releasing reference to r
1666 	 * requires obtaining sleeping mutex which we can't do in atomic
1667 	 * context.
1668 	 */
1669 	r = mlx5e_route_lookup_for_update(encap, &key);
1670 	if (!r)
1671 		goto out;
1672 	fib_work->r = r;
1673 	dev_hold(ul_dev);
1674 
1675 	return fib_work;
1676 
1677 out:
1678 	kfree(fib_work);
1679 	return NULL;
1680 }
1681 
1682 static struct mlx5e_tc_fib_event_data *
1683 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1684 			 struct net_device *ul_dev,
1685 			 struct mlx5e_tc_tun_encap *encap,
1686 			 unsigned long event,
1687 			 struct fib_notifier_info *info)
1688 {
1689 	struct fib6_entry_notifier_info *fen_info;
1690 	struct mlx5e_tc_fib_event_data *fib_work;
1691 	struct mlx5e_route_entry *r;
1692 	struct mlx5e_route_key key;
1693 	struct net_device *fib_dev;
1694 
1695 	fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1696 	fib_dev = fib6_info_nh_dev(fen_info->rt);
1697 	if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1698 	    fen_info->rt->fib6_dst.plen != 128)
1699 		return NULL;
1700 
1701 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1702 	if (!fib_work)
1703 		return ERR_PTR(-ENOMEM);
1704 
1705 	memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1706 	       sizeof(fen_info->rt->fib6_dst.addr));
1707 	key.ip_version = 6;
1708 
1709 	/* Can't fail after this point because releasing reference to r
1710 	 * requires obtaining sleeping mutex which we can't do in atomic
1711 	 * context.
1712 	 */
1713 	r = mlx5e_route_lookup_for_update(encap, &key);
1714 	if (!r)
1715 		goto out;
1716 	fib_work->r = r;
1717 	dev_hold(ul_dev);
1718 
1719 	return fib_work;
1720 
1721 out:
1722 	kfree(fib_work);
1723 	return NULL;
1724 }
1725 
1726 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1727 {
1728 	struct mlx5e_tc_fib_event_data *fib_work;
1729 	struct fib_notifier_info *info = ptr;
1730 	struct mlx5e_tc_tun_encap *encap;
1731 	struct net_device *ul_dev;
1732 	struct mlx5e_priv *priv;
1733 
1734 	encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1735 	priv = encap->priv;
1736 	ul_dev = priv->netdev;
1737 	priv = netdev_priv(ul_dev);
1738 
1739 	switch (event) {
1740 	case FIB_EVENT_ENTRY_REPLACE:
1741 	case FIB_EVENT_ENTRY_DEL:
1742 		if (info->family == AF_INET)
1743 			fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1744 		else if (info->family == AF_INET6)
1745 			fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1746 		else
1747 			return NOTIFY_DONE;
1748 
1749 		if (!IS_ERR_OR_NULL(fib_work)) {
1750 			queue_work(priv->wq, &fib_work->work);
1751 		} else if (IS_ERR(fib_work)) {
1752 			NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1753 			mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1754 				       PTR_ERR(fib_work));
1755 		}
1756 
1757 		break;
1758 	default:
1759 		return NOTIFY_DONE;
1760 	}
1761 
1762 	return NOTIFY_DONE;
1763 }
1764 
1765 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1766 {
1767 	struct mlx5e_tc_tun_encap *encap;
1768 	int err;
1769 
1770 	encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
1771 	if (!encap)
1772 		return ERR_PTR(-ENOMEM);
1773 
1774 	encap->priv = priv;
1775 	encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1776 	spin_lock_init(&encap->route_lock);
1777 	hash_init(encap->route_tbl);
1778 	err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
1779 				    NULL, NULL);
1780 	if (err) {
1781 		kvfree(encap);
1782 		return ERR_PTR(err);
1783 	}
1784 
1785 	return encap;
1786 }
1787 
1788 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1789 {
1790 	if (!encap)
1791 		return;
1792 
1793 	unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
1794 	flush_workqueue(encap->priv->wq); /* flush fib event works */
1795 	kvfree(encap);
1796 }
1797