1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
3 
4 #include <net/fib_notifier.h>
5 #include <net/nexthop.h>
6 #include <net/ip_tunnels.h>
7 #include "tc_tun_encap.h"
8 #include "en_tc.h"
9 #include "tc_tun.h"
10 #include "rep/tc.h"
11 #include "diag/en_tc_tracepoint.h"
12 
13 enum {
14 	MLX5E_ROUTE_ENTRY_VALID     = BIT(0),
15 };
16 
17 static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv,
18 				     struct mlx5_flow_attr *attr,
19 				     struct mlx5e_encap_entry *e,
20 				     int out_index)
21 {
22 	struct net_device *route_dev;
23 	int err = 0;
24 
25 	route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex);
26 
27 	if (!route_dev || !netif_is_ovs_master(route_dev))
28 		goto out;
29 
30 	err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex,
31 						MLX5E_TC_INT_PORT_EGRESS,
32 						&attr->action, out_index);
33 
34 out:
35 	if (route_dev)
36 		dev_put(route_dev);
37 
38 	return err;
39 }
40 
41 struct mlx5e_route_key {
42 	int ip_version;
43 	union {
44 		__be32 v4;
45 		struct in6_addr v6;
46 	} endpoint_ip;
47 };
48 
49 struct mlx5e_route_entry {
50 	struct mlx5e_route_key key;
51 	struct list_head encap_entries;
52 	struct list_head decap_flows;
53 	u32 flags;
54 	struct hlist_node hlist;
55 	refcount_t refcnt;
56 	int tunnel_dev_index;
57 	struct rcu_head rcu;
58 };
59 
60 struct mlx5e_tc_tun_encap {
61 	struct mlx5e_priv *priv;
62 	struct notifier_block fib_nb;
63 	spinlock_t route_lock; /* protects route_tbl */
64 	unsigned long route_tbl_last_update;
65 	DECLARE_HASHTABLE(route_tbl, 8);
66 };
67 
68 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
69 {
70 	return r->flags & MLX5E_ROUTE_ENTRY_VALID;
71 }
72 
73 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
74 			     struct mlx5_flow_spec *spec)
75 {
76 	struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
77 	struct mlx5_rx_tun_attr *tun_attr;
78 	void *daddr, *saddr;
79 	u8 ip_version;
80 
81 	tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
82 	if (!tun_attr)
83 		return -ENOMEM;
84 
85 	esw_attr->rx_tun_attr = tun_attr;
86 	ip_version = mlx5e_tc_get_ip_version(spec, true);
87 
88 	if (ip_version == 4) {
89 		daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
90 				     outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
91 		saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
92 				     outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
93 		tun_attr->dst_ip.v4 = *(__be32 *)daddr;
94 		tun_attr->src_ip.v4 = *(__be32 *)saddr;
95 		if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
96 			return 0;
97 	}
98 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
99 	else if (ip_version == 6) {
100 		int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
101 
102 		daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
103 				     outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
104 		saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
105 				     outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
106 		memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
107 		memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
108 		if (ipv6_addr_any(&tun_attr->dst_ip.v6) ||
109 		    ipv6_addr_any(&tun_attr->src_ip.v6))
110 			return 0;
111 	}
112 #endif
113 	/* Only set the flag if both src and dst ip addresses exist. They are
114 	 * required to establish routing.
115 	 */
116 	flow_flag_set(flow, TUN_RX);
117 	flow->attr->tun_ip_version = ip_version;
118 	return 0;
119 }
120 
121 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
122 {
123 	bool all_flow_encaps_valid = true;
124 	int i;
125 
126 	/* Flow can be associated with multiple encap entries.
127 	 * Before offloading the flow verify that all of them have
128 	 * a valid neighbour.
129 	 */
130 	for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
131 		if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
132 			continue;
133 		if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
134 			all_flow_encaps_valid = false;
135 			break;
136 		}
137 	}
138 
139 	return all_flow_encaps_valid;
140 }
141 
142 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
143 			      struct mlx5e_encap_entry *e,
144 			      struct list_head *flow_list)
145 {
146 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
147 	struct mlx5_pkt_reformat_params reformat_params;
148 	struct mlx5_esw_flow_attr *esw_attr;
149 	struct mlx5_flow_handle *rule;
150 	struct mlx5_flow_attr *attr;
151 	struct mlx5_flow_spec *spec;
152 	struct mlx5e_tc_flow *flow;
153 	int err;
154 
155 	if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
156 		return;
157 
158 	memset(&reformat_params, 0, sizeof(reformat_params));
159 	reformat_params.type = e->reformat_type;
160 	reformat_params.size = e->encap_size;
161 	reformat_params.data = e->encap_header;
162 	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
163 						     &reformat_params,
164 						     MLX5_FLOW_NAMESPACE_FDB);
165 	if (IS_ERR(e->pkt_reformat)) {
166 		mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
167 			       PTR_ERR(e->pkt_reformat));
168 		return;
169 	}
170 	e->flags |= MLX5_ENCAP_ENTRY_VALID;
171 	mlx5e_rep_queue_neigh_stats_work(priv);
172 
173 	list_for_each_entry(flow, flow_list, tmp_list) {
174 		if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
175 			continue;
176 
177 		spec = &flow->attr->parse_attr->spec;
178 
179 		attr = mlx5e_tc_get_encap_attr(flow);
180 		esw_attr = attr->esw_attr;
181 		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
182 		esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
183 
184 		/* Do not offload flows with unresolved neighbors */
185 		if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
186 			continue;
187 
188 		err = mlx5e_tc_offload_flow_post_acts(flow);
189 		if (err) {
190 			mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
191 				       err);
192 			continue;
193 		}
194 
195 		/* update from slow path rule to encap rule */
196 		rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
197 		if (IS_ERR(rule)) {
198 			mlx5e_tc_unoffload_flow_post_acts(flow);
199 			err = PTR_ERR(rule);
200 			mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
201 				       err);
202 			continue;
203 		}
204 
205 		mlx5e_tc_unoffload_from_slow_path(esw, flow);
206 		flow->rule[0] = rule;
207 		/* was unset when slow path rule removed */
208 		flow_flag_set(flow, OFFLOADED);
209 	}
210 }
211 
212 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
213 			      struct mlx5e_encap_entry *e,
214 			      struct list_head *flow_list)
215 {
216 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
217 	struct mlx5_esw_flow_attr *esw_attr;
218 	struct mlx5_flow_handle *rule;
219 	struct mlx5_flow_attr *attr;
220 	struct mlx5_flow_spec *spec;
221 	struct mlx5e_tc_flow *flow;
222 	int err;
223 
224 	list_for_each_entry(flow, flow_list, tmp_list) {
225 		if (!mlx5e_is_offloaded_flow(flow))
226 			continue;
227 
228 		attr = mlx5e_tc_get_encap_attr(flow);
229 		esw_attr = attr->esw_attr;
230 		/* mark the flow's encap dest as non-valid */
231 		esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
232 		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
233 
234 		/* Clear pkt_reformat before checking slow path flag. Because
235 		 * in next iteration, the same flow is already set slow path
236 		 * flag, but still need to clear the pkt_reformat.
237 		 */
238 		if (flow_flag_test(flow, SLOW))
239 			continue;
240 
241 		/* update from encap rule to slow path rule */
242 		spec = &flow->attr->parse_attr->spec;
243 		rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
244 
245 		if (IS_ERR(rule)) {
246 			err = PTR_ERR(rule);
247 			mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
248 				       err);
249 			continue;
250 		}
251 
252 		mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
253 		mlx5e_tc_unoffload_flow_post_acts(flow);
254 		flow->rule[0] = rule;
255 		/* was unset when fast path rule removed */
256 		flow_flag_set(flow, OFFLOADED);
257 	}
258 
259 	/* we know that the encap is valid */
260 	e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
261 	mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
262 	e->pkt_reformat = NULL;
263 }
264 
265 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
266 				struct list_head *flow_list,
267 				int index)
268 {
269 	if (IS_ERR(mlx5e_flow_get(flow))) {
270 		/* Flow is being deleted concurrently. Wait for it to be
271 		 * unoffloaded from hardware, otherwise deleting encap will
272 		 * fail.
273 		 */
274 		wait_for_completion(&flow->del_hw_done);
275 		return;
276 	}
277 	wait_for_completion(&flow->init_done);
278 
279 	flow->tmp_entry_index = index;
280 	list_add(&flow->tmp_list, flow_list);
281 }
282 
283 /* Takes reference to all flows attached to encap and adds the flows to
284  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
285  */
286 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
287 {
288 	struct encap_flow_item *efi;
289 	struct mlx5e_tc_flow *flow;
290 
291 	list_for_each_entry(efi, &e->flows, list) {
292 		flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
293 		mlx5e_take_tmp_flow(flow, flow_list, efi->index);
294 	}
295 }
296 
297 /* Takes reference to all flows attached to route and adds the flows to
298  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
299  */
300 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
301 					     struct list_head *flow_list)
302 {
303 	struct mlx5e_tc_flow *flow;
304 
305 	list_for_each_entry(flow, &r->decap_flows, decap_routes)
306 		mlx5e_take_tmp_flow(flow, flow_list, 0);
307 }
308 
309 typedef bool (match_cb)(struct mlx5e_encap_entry *);
310 
311 static struct mlx5e_encap_entry *
312 mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
313 			      struct mlx5e_encap_entry *e,
314 			      match_cb match)
315 {
316 	struct mlx5e_encap_entry *next = NULL;
317 
318 retry:
319 	rcu_read_lock();
320 
321 	/* find encap with non-zero reference counter value */
322 	for (next = e ?
323 		     list_next_or_null_rcu(&nhe->encap_list,
324 					   &e->encap_list,
325 					   struct mlx5e_encap_entry,
326 					   encap_list) :
327 		     list_first_or_null_rcu(&nhe->encap_list,
328 					    struct mlx5e_encap_entry,
329 					    encap_list);
330 	     next;
331 	     next = list_next_or_null_rcu(&nhe->encap_list,
332 					  &next->encap_list,
333 					  struct mlx5e_encap_entry,
334 					  encap_list))
335 		if (mlx5e_encap_take(next))
336 			break;
337 
338 	rcu_read_unlock();
339 
340 	/* release starting encap */
341 	if (e)
342 		mlx5e_encap_put(netdev_priv(e->out_dev), e);
343 	if (!next)
344 		return next;
345 
346 	/* wait for encap to be fully initialized */
347 	wait_for_completion(&next->res_ready);
348 	/* continue searching if encap entry is not in valid state after completion */
349 	if (!match(next)) {
350 		e = next;
351 		goto retry;
352 	}
353 
354 	return next;
355 }
356 
357 static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
358 {
359 	return e->flags & MLX5_ENCAP_ENTRY_VALID;
360 }
361 
362 static struct mlx5e_encap_entry *
363 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
364 			   struct mlx5e_encap_entry *e)
365 {
366 	return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
367 }
368 
369 static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
370 {
371 	return e->compl_result >= 0;
372 }
373 
374 struct mlx5e_encap_entry *
375 mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
376 			  struct mlx5e_encap_entry *e)
377 {
378 	return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
379 }
380 
381 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
382 {
383 	struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
384 	struct mlx5e_encap_entry *e = NULL;
385 	struct mlx5e_tc_flow *flow;
386 	struct mlx5_fc *counter;
387 	struct neigh_table *tbl;
388 	bool neigh_used = false;
389 	struct neighbour *n;
390 	u64 lastuse;
391 
392 	if (m_neigh->family == AF_INET)
393 		tbl = &arp_tbl;
394 #if IS_ENABLED(CONFIG_IPV6)
395 	else if (m_neigh->family == AF_INET6)
396 		tbl = ipv6_stub->nd_tbl;
397 #endif
398 	else
399 		return;
400 
401 	/* mlx5e_get_next_valid_encap() releases previous encap before returning
402 	 * next one.
403 	 */
404 	while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
405 		struct mlx5e_priv *priv = netdev_priv(e->out_dev);
406 		struct encap_flow_item *efi, *tmp;
407 		struct mlx5_eswitch *esw;
408 		LIST_HEAD(flow_list);
409 
410 		esw = priv->mdev->priv.eswitch;
411 		mutex_lock(&esw->offloads.encap_tbl_lock);
412 		list_for_each_entry_safe(efi, tmp, &e->flows, list) {
413 			flow = container_of(efi, struct mlx5e_tc_flow,
414 					    encaps[efi->index]);
415 			if (IS_ERR(mlx5e_flow_get(flow)))
416 				continue;
417 			list_add(&flow->tmp_list, &flow_list);
418 
419 			if (mlx5e_is_offloaded_flow(flow)) {
420 				counter = mlx5e_tc_get_counter(flow);
421 				lastuse = mlx5_fc_query_lastuse(counter);
422 				if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
423 					neigh_used = true;
424 					break;
425 				}
426 			}
427 		}
428 		mutex_unlock(&esw->offloads.encap_tbl_lock);
429 
430 		mlx5e_put_flow_list(priv, &flow_list);
431 		if (neigh_used) {
432 			/* release current encap before breaking the loop */
433 			mlx5e_encap_put(priv, e);
434 			break;
435 		}
436 	}
437 
438 	trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
439 
440 	if (neigh_used) {
441 		nhe->reported_lastuse = jiffies;
442 
443 		/* find the relevant neigh according to the cached device and
444 		 * dst ip pair
445 		 */
446 		n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
447 		if (!n)
448 			return;
449 
450 		neigh_event_send(n, NULL);
451 		neigh_release(n);
452 	}
453 }
454 
455 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
456 {
457 	WARN_ON(!list_empty(&e->flows));
458 
459 	if (e->compl_result > 0) {
460 		mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
461 
462 		if (e->flags & MLX5_ENCAP_ENTRY_VALID)
463 			mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
464 	}
465 
466 	kfree(e->tun_info);
467 	kfree(e->encap_header);
468 	kfree_rcu(e, rcu);
469 }
470 
471 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
472 				struct mlx5e_decap_entry *d)
473 {
474 	WARN_ON(!list_empty(&d->flows));
475 
476 	if (!d->compl_result)
477 		mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
478 
479 	kfree_rcu(d, rcu);
480 }
481 
482 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
483 {
484 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
485 
486 	if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
487 		return;
488 	list_del(&e->route_list);
489 	hash_del_rcu(&e->encap_hlist);
490 	mutex_unlock(&esw->offloads.encap_tbl_lock);
491 
492 	mlx5e_encap_dealloc(priv, e);
493 }
494 
495 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
496 {
497 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
498 
499 	if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
500 		return;
501 	hash_del_rcu(&d->hlist);
502 	mutex_unlock(&esw->offloads.decap_tbl_lock);
503 
504 	mlx5e_decap_dealloc(priv, d);
505 }
506 
507 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
508 				     struct mlx5e_tc_flow *flow,
509 				     int out_index);
510 
511 void mlx5e_detach_encap(struct mlx5e_priv *priv,
512 			struct mlx5e_tc_flow *flow,
513 			struct mlx5_flow_attr *attr,
514 			int out_index)
515 {
516 	struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
517 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
518 
519 	if (!mlx5e_is_eswitch_flow(flow))
520 		return;
521 
522 	if (attr->esw_attr->dests[out_index].flags &
523 	    MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
524 		mlx5e_detach_encap_route(priv, flow, out_index);
525 
526 	/* flow wasn't fully initialized */
527 	if (!e)
528 		return;
529 
530 	mutex_lock(&esw->offloads.encap_tbl_lock);
531 	list_del(&flow->encaps[out_index].list);
532 	flow->encaps[out_index].e = NULL;
533 	if (!refcount_dec_and_test(&e->refcnt)) {
534 		mutex_unlock(&esw->offloads.encap_tbl_lock);
535 		return;
536 	}
537 	list_del(&e->route_list);
538 	hash_del_rcu(&e->encap_hlist);
539 	mutex_unlock(&esw->offloads.encap_tbl_lock);
540 
541 	mlx5e_encap_dealloc(priv, e);
542 }
543 
544 void mlx5e_detach_decap(struct mlx5e_priv *priv,
545 			struct mlx5e_tc_flow *flow)
546 {
547 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
548 	struct mlx5e_decap_entry *d = flow->decap_reformat;
549 
550 	if (!d)
551 		return;
552 
553 	mutex_lock(&esw->offloads.decap_tbl_lock);
554 	list_del(&flow->l3_to_l2_reformat);
555 	flow->decap_reformat = NULL;
556 
557 	if (!refcount_dec_and_test(&d->refcnt)) {
558 		mutex_unlock(&esw->offloads.decap_tbl_lock);
559 		return;
560 	}
561 	hash_del_rcu(&d->hlist);
562 	mutex_unlock(&esw->offloads.decap_tbl_lock);
563 
564 	mlx5e_decap_dealloc(priv, d);
565 }
566 
567 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
568 					   struct mlx5e_encap_key *b)
569 {
570 	return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
571 		a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
572 }
573 
574 bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a,
575 					   struct mlx5e_encap_key *b,
576 					   __be16 tun_flags)
577 {
578 	struct ip_tunnel_info *a_info;
579 	struct ip_tunnel_info *b_info;
580 	bool a_has_opts, b_has_opts;
581 
582 	if (!mlx5e_tc_tun_encap_info_equal_generic(a, b))
583 		return false;
584 
585 	a_has_opts = !!(a->ip_tun_key->tun_flags & tun_flags);
586 	b_has_opts = !!(b->ip_tun_key->tun_flags & tun_flags);
587 
588 	/* keys are equal when both don't have any options attached */
589 	if (!a_has_opts && !b_has_opts)
590 		return true;
591 
592 	if (a_has_opts != b_has_opts)
593 		return false;
594 
595 	/* options stored in memory next to ip_tunnel_info struct */
596 	a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key);
597 	b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key);
598 
599 	return a_info->options_len == b_info->options_len &&
600 	       !memcmp(ip_tunnel_info_opts(a_info),
601 		       ip_tunnel_info_opts(b_info),
602 		       a_info->options_len);
603 }
604 
605 static int cmp_decap_info(struct mlx5e_decap_key *a,
606 			  struct mlx5e_decap_key *b)
607 {
608 	return memcmp(&a->key, &b->key, sizeof(b->key));
609 }
610 
611 static int hash_encap_info(struct mlx5e_encap_key *key)
612 {
613 	return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
614 		     key->tc_tunnel->tunnel_type);
615 }
616 
617 static int hash_decap_info(struct mlx5e_decap_key *key)
618 {
619 	return jhash(&key->key, sizeof(key->key), 0);
620 }
621 
622 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
623 {
624 	return refcount_inc_not_zero(&e->refcnt);
625 }
626 
627 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
628 {
629 	return refcount_inc_not_zero(&e->refcnt);
630 }
631 
632 static struct mlx5e_encap_entry *
633 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
634 		uintptr_t hash_key)
635 {
636 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
637 	struct mlx5e_encap_key e_key;
638 	struct mlx5e_encap_entry *e;
639 
640 	hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
641 				   encap_hlist, hash_key) {
642 		e_key.ip_tun_key = &e->tun_info->key;
643 		e_key.tc_tunnel = e->tunnel;
644 		if (e->tunnel->encap_info_equal(&e_key, key) &&
645 		    mlx5e_encap_take(e))
646 			return e;
647 	}
648 
649 	return NULL;
650 }
651 
652 static struct mlx5e_decap_entry *
653 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
654 		uintptr_t hash_key)
655 {
656 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
657 	struct mlx5e_decap_key r_key;
658 	struct mlx5e_decap_entry *e;
659 
660 	hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
661 				   hlist, hash_key) {
662 		r_key = e->key;
663 		if (!cmp_decap_info(&r_key, key) &&
664 		    mlx5e_decap_take(e))
665 			return e;
666 	}
667 	return NULL;
668 }
669 
670 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
671 {
672 	size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
673 
674 	return kmemdup(tun_info, tun_size, GFP_KERNEL);
675 }
676 
677 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
678 				      struct mlx5e_tc_flow *flow,
679 				      int out_index,
680 				      struct mlx5e_encap_entry *e,
681 				      struct netlink_ext_ack *extack)
682 {
683 	int i;
684 
685 	for (i = 0; i < out_index; i++) {
686 		if (flow->encaps[i].e != e)
687 			continue;
688 		NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
689 		netdev_err(priv->netdev, "can't duplicate encap action\n");
690 		return true;
691 	}
692 
693 	return false;
694 }
695 
696 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
697 			       struct mlx5_flow_attr *attr,
698 			       struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
699 			       struct net_device *out_dev,
700 			       int route_dev_ifindex,
701 			       int out_index)
702 {
703 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
704 	struct net_device *route_dev;
705 	u16 vport_num;
706 	int err = 0;
707 	u32 data;
708 
709 	route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
710 
711 	if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
712 	    !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
713 		goto out;
714 
715 	err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
716 	if (err)
717 		goto out;
718 
719 	attr->dest_chain = 0;
720 	attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
721 	esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
722 	data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
723 						       vport_num);
724 	err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
725 						   MLX5_FLOW_NAMESPACE_FDB,
726 						   VPORT_TO_REG, data);
727 	if (err >= 0) {
728 		esw_attr->dests[out_index].src_port_rewrite_act_id = err;
729 		err = 0;
730 	}
731 
732 out:
733 	if (route_dev)
734 		dev_put(route_dev);
735 	return err;
736 }
737 
738 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
739 				  struct mlx5_esw_flow_attr *attr,
740 				  struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
741 				  struct net_device *out_dev,
742 				  int route_dev_ifindex,
743 				  int out_index)
744 {
745 	int act_id = attr->dests[out_index].src_port_rewrite_act_id;
746 	struct net_device *route_dev;
747 	u16 vport_num;
748 	int err = 0;
749 	u32 data;
750 
751 	route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
752 
753 	if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
754 	    !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
755 		err = -ENODEV;
756 		goto out;
757 	}
758 
759 	err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
760 	if (err)
761 		goto out;
762 
763 	data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
764 						       vport_num);
765 	mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
766 
767 out:
768 	if (route_dev)
769 		dev_put(route_dev);
770 	return err;
771 }
772 
773 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
774 {
775 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
776 	struct mlx5_rep_uplink_priv *uplink_priv;
777 	struct mlx5e_rep_priv *uplink_rpriv;
778 	struct mlx5e_tc_tun_encap *encap;
779 	unsigned int ret;
780 
781 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
782 	uplink_priv = &uplink_rpriv->uplink_priv;
783 	encap = uplink_priv->encap;
784 
785 	spin_lock_bh(&encap->route_lock);
786 	ret = encap->route_tbl_last_update;
787 	spin_unlock_bh(&encap->route_lock);
788 	return ret;
789 }
790 
791 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
792 				    struct mlx5e_tc_flow *flow,
793 				    struct mlx5_flow_attr *attr,
794 				    struct mlx5e_encap_entry *e,
795 				    bool new_encap_entry,
796 				    unsigned long tbl_time_before,
797 				    int out_index);
798 
799 int mlx5e_attach_encap(struct mlx5e_priv *priv,
800 		       struct mlx5e_tc_flow *flow,
801 		       struct mlx5_flow_attr *attr,
802 		       struct net_device *mirred_dev,
803 		       int out_index,
804 		       struct netlink_ext_ack *extack,
805 		       struct net_device **encap_dev)
806 {
807 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
808 	struct mlx5e_tc_flow_parse_attr *parse_attr;
809 	const struct ip_tunnel_info *tun_info;
810 	const struct mlx5e_mpls_info *mpls_info;
811 	unsigned long tbl_time_before = 0;
812 	struct mlx5e_encap_entry *e;
813 	struct mlx5e_encap_key key;
814 	bool entry_created = false;
815 	unsigned short family;
816 	uintptr_t hash_key;
817 	int err = 0;
818 
819 	parse_attr = attr->parse_attr;
820 	tun_info = parse_attr->tun_info[out_index];
821 	mpls_info = &parse_attr->mpls_info[out_index];
822 	family = ip_tunnel_info_af(tun_info);
823 	key.ip_tun_key = &tun_info->key;
824 	key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
825 	if (!key.tc_tunnel) {
826 		NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
827 		return -EOPNOTSUPP;
828 	}
829 
830 	hash_key = hash_encap_info(&key);
831 
832 	mutex_lock(&esw->offloads.encap_tbl_lock);
833 	e = mlx5e_encap_get(priv, &key, hash_key);
834 
835 	/* must verify if encap is valid or not */
836 	if (e) {
837 		/* Check that entry was not already attached to this flow */
838 		if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
839 			err = -EOPNOTSUPP;
840 			goto out_err;
841 		}
842 
843 		mutex_unlock(&esw->offloads.encap_tbl_lock);
844 		wait_for_completion(&e->res_ready);
845 
846 		/* Protect against concurrent neigh update. */
847 		mutex_lock(&esw->offloads.encap_tbl_lock);
848 		if (e->compl_result < 0) {
849 			err = -EREMOTEIO;
850 			goto out_err;
851 		}
852 		goto attach_flow;
853 	}
854 
855 	e = kzalloc(sizeof(*e), GFP_KERNEL);
856 	if (!e) {
857 		err = -ENOMEM;
858 		goto out_err;
859 	}
860 
861 	refcount_set(&e->refcnt, 1);
862 	init_completion(&e->res_ready);
863 	entry_created = true;
864 	INIT_LIST_HEAD(&e->route_list);
865 
866 	tun_info = mlx5e_dup_tun_info(tun_info);
867 	if (!tun_info) {
868 		err = -ENOMEM;
869 		goto out_err_init;
870 	}
871 	e->tun_info = tun_info;
872 	memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info));
873 	err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
874 	if (err)
875 		goto out_err_init;
876 
877 	INIT_LIST_HEAD(&e->flows);
878 	hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
879 	tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
880 	mutex_unlock(&esw->offloads.encap_tbl_lock);
881 
882 	if (family == AF_INET)
883 		err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
884 	else if (family == AF_INET6)
885 		err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
886 
887 	/* Protect against concurrent neigh update. */
888 	mutex_lock(&esw->offloads.encap_tbl_lock);
889 	complete_all(&e->res_ready);
890 	if (err) {
891 		e->compl_result = err;
892 		goto out_err;
893 	}
894 	e->compl_result = 1;
895 
896 attach_flow:
897 	err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created,
898 				       tbl_time_before, out_index);
899 	if (err)
900 		goto out_err;
901 
902 	err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index);
903 	if (err == -EOPNOTSUPP) {
904 		/* If device doesn't support int port offload,
905 		 * redirect to uplink vport.
906 		 */
907 		mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n");
908 		err = 0;
909 	} else if (err) {
910 		goto out_err;
911 	}
912 
913 	flow->encaps[out_index].e = e;
914 	list_add(&flow->encaps[out_index].list, &e->flows);
915 	flow->encaps[out_index].index = out_index;
916 	*encap_dev = e->out_dev;
917 	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
918 		attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
919 		attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
920 	} else {
921 		flow_flag_set(flow, SLOW);
922 	}
923 	mutex_unlock(&esw->offloads.encap_tbl_lock);
924 
925 	return err;
926 
927 out_err:
928 	mutex_unlock(&esw->offloads.encap_tbl_lock);
929 	if (e)
930 		mlx5e_encap_put(priv, e);
931 	return err;
932 
933 out_err_init:
934 	mutex_unlock(&esw->offloads.encap_tbl_lock);
935 	kfree(tun_info);
936 	kfree(e);
937 	return err;
938 }
939 
940 int mlx5e_attach_decap(struct mlx5e_priv *priv,
941 		       struct mlx5e_tc_flow *flow,
942 		       struct netlink_ext_ack *extack)
943 {
944 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
945 	struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
946 	struct mlx5_pkt_reformat_params reformat_params;
947 	struct mlx5e_decap_entry *d;
948 	struct mlx5e_decap_key key;
949 	uintptr_t hash_key;
950 	int err = 0;
951 
952 	if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
953 		NL_SET_ERR_MSG_MOD(extack,
954 				   "encap header larger than max supported");
955 		return -EOPNOTSUPP;
956 	}
957 
958 	key.key = attr->eth;
959 	hash_key = hash_decap_info(&key);
960 	mutex_lock(&esw->offloads.decap_tbl_lock);
961 	d = mlx5e_decap_get(priv, &key, hash_key);
962 	if (d) {
963 		mutex_unlock(&esw->offloads.decap_tbl_lock);
964 		wait_for_completion(&d->res_ready);
965 		mutex_lock(&esw->offloads.decap_tbl_lock);
966 		if (d->compl_result) {
967 			err = -EREMOTEIO;
968 			goto out_free;
969 		}
970 		goto found;
971 	}
972 
973 	d = kzalloc(sizeof(*d), GFP_KERNEL);
974 	if (!d) {
975 		err = -ENOMEM;
976 		goto out_err;
977 	}
978 
979 	d->key = key;
980 	refcount_set(&d->refcnt, 1);
981 	init_completion(&d->res_ready);
982 	INIT_LIST_HEAD(&d->flows);
983 	hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
984 	mutex_unlock(&esw->offloads.decap_tbl_lock);
985 
986 	memset(&reformat_params, 0, sizeof(reformat_params));
987 	reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
988 	reformat_params.size = sizeof(attr->eth);
989 	reformat_params.data = &attr->eth;
990 	d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
991 						     &reformat_params,
992 						     MLX5_FLOW_NAMESPACE_FDB);
993 	if (IS_ERR(d->pkt_reformat)) {
994 		err = PTR_ERR(d->pkt_reformat);
995 		d->compl_result = err;
996 	}
997 	mutex_lock(&esw->offloads.decap_tbl_lock);
998 	complete_all(&d->res_ready);
999 	if (err)
1000 		goto out_free;
1001 
1002 found:
1003 	flow->decap_reformat = d;
1004 	attr->decap_pkt_reformat = d->pkt_reformat;
1005 	list_add(&flow->l3_to_l2_reformat, &d->flows);
1006 	mutex_unlock(&esw->offloads.decap_tbl_lock);
1007 	return 0;
1008 
1009 out_free:
1010 	mutex_unlock(&esw->offloads.decap_tbl_lock);
1011 	mlx5e_decap_put(priv, d);
1012 	return err;
1013 
1014 out_err:
1015 	mutex_unlock(&esw->offloads.decap_tbl_lock);
1016 	return err;
1017 }
1018 
1019 int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv,
1020 				 struct mlx5e_tc_flow *flow,
1021 				 struct mlx5_flow_attr *attr,
1022 				 struct netlink_ext_ack *extack,
1023 				 bool *vf_tun)
1024 {
1025 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1026 	struct mlx5_esw_flow_attr *esw_attr;
1027 	struct net_device *encap_dev = NULL;
1028 	struct mlx5e_rep_priv *rpriv;
1029 	struct mlx5e_priv *out_priv;
1030 	int out_index;
1031 	int err = 0;
1032 
1033 	if (!mlx5e_is_eswitch_flow(flow))
1034 		return 0;
1035 
1036 	parse_attr = attr->parse_attr;
1037 	esw_attr = attr->esw_attr;
1038 	*vf_tun = false;
1039 
1040 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1041 		struct net_device *out_dev;
1042 		int mirred_ifindex;
1043 
1044 		if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1045 			continue;
1046 
1047 		mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1048 		out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
1049 		if (!out_dev) {
1050 			NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
1051 			err = -ENODEV;
1052 			goto out;
1053 		}
1054 		err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
1055 					 extack, &encap_dev);
1056 		dev_put(out_dev);
1057 		if (err)
1058 			goto out;
1059 
1060 		if (esw_attr->dests[out_index].flags &
1061 		    MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
1062 		    !esw_attr->dest_int_port)
1063 			*vf_tun = true;
1064 
1065 		out_priv = netdev_priv(encap_dev);
1066 		rpriv = out_priv->ppriv;
1067 		esw_attr->dests[out_index].rep = rpriv->rep;
1068 		esw_attr->dests[out_index].mdev = out_priv->mdev;
1069 	}
1070 
1071 	if (*vf_tun && esw_attr->out_count > 1) {
1072 		NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
1073 		err = -EOPNOTSUPP;
1074 		goto out;
1075 	}
1076 
1077 out:
1078 	return err;
1079 }
1080 
1081 void mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv *priv,
1082 				    struct mlx5e_tc_flow *flow,
1083 				    struct mlx5_flow_attr *attr)
1084 {
1085 	struct mlx5_esw_flow_attr *esw_attr;
1086 	int out_index;
1087 
1088 	if (!mlx5e_is_eswitch_flow(flow))
1089 		return;
1090 
1091 	esw_attr = attr->esw_attr;
1092 
1093 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1094 		if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1095 			continue;
1096 
1097 		mlx5e_detach_encap(flow->priv, flow, attr, out_index);
1098 		kfree(attr->parse_attr->tun_info[out_index]);
1099 	}
1100 }
1101 
1102 static int cmp_route_info(struct mlx5e_route_key *a,
1103 			  struct mlx5e_route_key *b)
1104 {
1105 	if (a->ip_version == 4 && b->ip_version == 4)
1106 		return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
1107 			      sizeof(a->endpoint_ip.v4));
1108 	else if (a->ip_version == 6 && b->ip_version == 6)
1109 		return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
1110 			      sizeof(a->endpoint_ip.v6));
1111 	return 1;
1112 }
1113 
1114 static u32 hash_route_info(struct mlx5e_route_key *key)
1115 {
1116 	if (key->ip_version == 4)
1117 		return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
1118 	return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
1119 }
1120 
1121 static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
1122 				struct mlx5e_route_entry *r)
1123 {
1124 	WARN_ON(!list_empty(&r->decap_flows));
1125 	WARN_ON(!list_empty(&r->encap_entries));
1126 
1127 	kfree_rcu(r, rcu);
1128 }
1129 
1130 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1131 {
1132 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1133 
1134 	if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
1135 		return;
1136 
1137 	hash_del_rcu(&r->hlist);
1138 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1139 
1140 	mlx5e_route_dealloc(priv, r);
1141 }
1142 
1143 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1144 {
1145 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1146 
1147 	lockdep_assert_held(&esw->offloads.encap_tbl_lock);
1148 
1149 	if (!refcount_dec_and_test(&r->refcnt))
1150 		return;
1151 	hash_del_rcu(&r->hlist);
1152 	mlx5e_route_dealloc(priv, r);
1153 }
1154 
1155 static struct mlx5e_route_entry *
1156 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
1157 		u32 hash_key)
1158 {
1159 	struct mlx5e_route_key r_key;
1160 	struct mlx5e_route_entry *r;
1161 
1162 	hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
1163 		r_key = r->key;
1164 		if (!cmp_route_info(&r_key, key) &&
1165 		    refcount_inc_not_zero(&r->refcnt))
1166 			return r;
1167 	}
1168 	return NULL;
1169 }
1170 
1171 static struct mlx5e_route_entry *
1172 mlx5e_route_get_create(struct mlx5e_priv *priv,
1173 		       struct mlx5e_route_key *key,
1174 		       int tunnel_dev_index,
1175 		       unsigned long *route_tbl_change_time)
1176 {
1177 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1178 	struct mlx5_rep_uplink_priv *uplink_priv;
1179 	struct mlx5e_rep_priv *uplink_rpriv;
1180 	struct mlx5e_tc_tun_encap *encap;
1181 	struct mlx5e_route_entry *r;
1182 	u32 hash_key;
1183 
1184 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1185 	uplink_priv = &uplink_rpriv->uplink_priv;
1186 	encap = uplink_priv->encap;
1187 
1188 	hash_key = hash_route_info(key);
1189 	spin_lock_bh(&encap->route_lock);
1190 	r = mlx5e_route_get(encap, key, hash_key);
1191 	spin_unlock_bh(&encap->route_lock);
1192 	if (r) {
1193 		if (!mlx5e_route_entry_valid(r)) {
1194 			mlx5e_route_put_locked(priv, r);
1195 			return ERR_PTR(-EINVAL);
1196 		}
1197 		return r;
1198 	}
1199 
1200 	r = kzalloc(sizeof(*r), GFP_KERNEL);
1201 	if (!r)
1202 		return ERR_PTR(-ENOMEM);
1203 
1204 	r->key = *key;
1205 	r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1206 	r->tunnel_dev_index = tunnel_dev_index;
1207 	refcount_set(&r->refcnt, 1);
1208 	INIT_LIST_HEAD(&r->decap_flows);
1209 	INIT_LIST_HEAD(&r->encap_entries);
1210 
1211 	spin_lock_bh(&encap->route_lock);
1212 	*route_tbl_change_time = encap->route_tbl_last_update;
1213 	hash_add(encap->route_tbl, &r->hlist, hash_key);
1214 	spin_unlock_bh(&encap->route_lock);
1215 
1216 	return r;
1217 }
1218 
1219 static struct mlx5e_route_entry *
1220 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1221 {
1222 	u32 hash_key = hash_route_info(key);
1223 	struct mlx5e_route_entry *r;
1224 
1225 	spin_lock_bh(&encap->route_lock);
1226 	encap->route_tbl_last_update = jiffies;
1227 	r = mlx5e_route_get(encap, key, hash_key);
1228 	spin_unlock_bh(&encap->route_lock);
1229 
1230 	return r;
1231 }
1232 
1233 struct mlx5e_tc_fib_event_data {
1234 	struct work_struct work;
1235 	unsigned long event;
1236 	struct mlx5e_route_entry *r;
1237 	struct net_device *ul_dev;
1238 };
1239 
1240 static void mlx5e_tc_fib_event_work(struct work_struct *work);
1241 static struct mlx5e_tc_fib_event_data *
1242 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1243 {
1244 	struct mlx5e_tc_fib_event_data *fib_work;
1245 
1246 	fib_work = kzalloc(sizeof(*fib_work), flags);
1247 	if (WARN_ON(!fib_work))
1248 		return NULL;
1249 
1250 	INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1251 	fib_work->event = event;
1252 	fib_work->ul_dev = ul_dev;
1253 
1254 	return fib_work;
1255 }
1256 
1257 static int
1258 mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1259 			   struct mlx5e_route_entry *r,
1260 			   unsigned long event)
1261 {
1262 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1263 	struct mlx5e_tc_fib_event_data *fib_work;
1264 	struct mlx5e_rep_priv *uplink_rpriv;
1265 	struct net_device *ul_dev;
1266 
1267 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1268 	ul_dev = uplink_rpriv->netdev;
1269 
1270 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1271 	if (!fib_work)
1272 		return -ENOMEM;
1273 
1274 	dev_hold(ul_dev);
1275 	refcount_inc(&r->refcnt);
1276 	fib_work->r = r;
1277 	queue_work(priv->wq, &fib_work->work);
1278 
1279 	return 0;
1280 }
1281 
1282 int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1283 			     struct mlx5e_tc_flow *flow)
1284 {
1285 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1286 	unsigned long tbl_time_before, tbl_time_after;
1287 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1288 	struct mlx5_flow_attr *attr = flow->attr;
1289 	struct mlx5_esw_flow_attr *esw_attr;
1290 	struct mlx5e_route_entry *r;
1291 	struct mlx5e_route_key key;
1292 	int err = 0;
1293 
1294 	esw_attr = attr->esw_attr;
1295 	parse_attr = attr->parse_attr;
1296 	mutex_lock(&esw->offloads.encap_tbl_lock);
1297 	if (!esw_attr->rx_tun_attr)
1298 		goto out;
1299 
1300 	tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1301 	tbl_time_after = tbl_time_before;
1302 	err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev);
1303 	if (err || !esw_attr->rx_tun_attr->decap_vport)
1304 		goto out;
1305 
1306 	key.ip_version = attr->tun_ip_version;
1307 	if (key.ip_version == 4)
1308 		key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1309 	else
1310 		key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1311 
1312 	r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
1313 				   &tbl_time_after);
1314 	if (IS_ERR(r)) {
1315 		err = PTR_ERR(r);
1316 		goto out;
1317 	}
1318 	/* Routing changed concurrently. FIB event handler might have missed new
1319 	 * entry, schedule update.
1320 	 */
1321 	if (tbl_time_before != tbl_time_after) {
1322 		err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1323 		if (err) {
1324 			mlx5e_route_put_locked(priv, r);
1325 			goto out;
1326 		}
1327 	}
1328 
1329 	flow->decap_route = r;
1330 	list_add(&flow->decap_routes, &r->decap_flows);
1331 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1332 	return 0;
1333 
1334 out:
1335 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1336 	return err;
1337 }
1338 
1339 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1340 				    struct mlx5e_tc_flow *flow,
1341 				    struct mlx5_flow_attr *attr,
1342 				    struct mlx5e_encap_entry *e,
1343 				    bool new_encap_entry,
1344 				    unsigned long tbl_time_before,
1345 				    int out_index)
1346 {
1347 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1348 	unsigned long tbl_time_after = tbl_time_before;
1349 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1350 	const struct ip_tunnel_info *tun_info;
1351 	struct mlx5_esw_flow_attr *esw_attr;
1352 	struct mlx5e_route_entry *r;
1353 	struct mlx5e_route_key key;
1354 	unsigned short family;
1355 	int err = 0;
1356 
1357 	esw_attr = attr->esw_attr;
1358 	parse_attr = attr->parse_attr;
1359 	tun_info = parse_attr->tun_info[out_index];
1360 	family = ip_tunnel_info_af(tun_info);
1361 
1362 	if (family == AF_INET) {
1363 		key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1364 		key.ip_version = 4;
1365 	} else if (family == AF_INET6) {
1366 		key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1367 		key.ip_version = 6;
1368 	}
1369 
1370 	err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
1371 				  e->route_dev_ifindex, out_index);
1372 	if (err || !(esw_attr->dests[out_index].flags &
1373 		     MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1374 		return err;
1375 
1376 	r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
1377 				   &tbl_time_after);
1378 	if (IS_ERR(r))
1379 		return PTR_ERR(r);
1380 	/* Routing changed concurrently. FIB event handler might have missed new
1381 	 * entry, schedule update.
1382 	 */
1383 	if (tbl_time_before != tbl_time_after) {
1384 		err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1385 		if (err) {
1386 			mlx5e_route_put_locked(priv, r);
1387 			return err;
1388 		}
1389 	}
1390 
1391 	flow->encap_routes[out_index].r = r;
1392 	if (new_encap_entry)
1393 		list_add(&e->route_list, &r->encap_entries);
1394 	flow->encap_routes[out_index].index = out_index;
1395 	return 0;
1396 }
1397 
1398 void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1399 			      struct mlx5e_tc_flow *flow)
1400 {
1401 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1402 	struct mlx5e_route_entry *r = flow->decap_route;
1403 
1404 	if (!r)
1405 		return;
1406 
1407 	mutex_lock(&esw->offloads.encap_tbl_lock);
1408 	list_del(&flow->decap_routes);
1409 	flow->decap_route = NULL;
1410 
1411 	if (!refcount_dec_and_test(&r->refcnt)) {
1412 		mutex_unlock(&esw->offloads.encap_tbl_lock);
1413 		return;
1414 	}
1415 	hash_del_rcu(&r->hlist);
1416 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1417 
1418 	mlx5e_route_dealloc(priv, r);
1419 }
1420 
1421 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1422 				     struct mlx5e_tc_flow *flow,
1423 				     int out_index)
1424 {
1425 	struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1426 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1427 	struct mlx5e_encap_entry *e, *tmp;
1428 
1429 	if (!r)
1430 		return;
1431 
1432 	mutex_lock(&esw->offloads.encap_tbl_lock);
1433 	flow->encap_routes[out_index].r = NULL;
1434 
1435 	if (!refcount_dec_and_test(&r->refcnt)) {
1436 		mutex_unlock(&esw->offloads.encap_tbl_lock);
1437 		return;
1438 	}
1439 	list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1440 		list_del_init(&e->route_list);
1441 	hash_del_rcu(&r->hlist);
1442 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1443 
1444 	mlx5e_route_dealloc(priv, r);
1445 }
1446 
1447 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1448 				   struct mlx5e_encap_entry *e,
1449 				   struct list_head *encap_flows)
1450 {
1451 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1452 	struct mlx5e_tc_flow *flow;
1453 
1454 	list_for_each_entry(flow, encap_flows, tmp_list) {
1455 		struct mlx5_esw_flow_attr *esw_attr;
1456 		struct mlx5_flow_attr *attr;
1457 
1458 		if (!mlx5e_is_offloaded_flow(flow))
1459 			continue;
1460 
1461 		attr = mlx5e_tc_get_encap_attr(flow);
1462 		esw_attr = attr->esw_attr;
1463 
1464 		if (flow_flag_test(flow, SLOW))
1465 			mlx5e_tc_unoffload_from_slow_path(esw, flow);
1466 		else
1467 			mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1468 
1469 		mlx5e_tc_detach_mod_hdr(priv, flow, attr);
1470 		attr->modify_hdr = NULL;
1471 
1472 		esw_attr->dests[flow->tmp_entry_index].flags &=
1473 			~MLX5_ESW_DEST_ENCAP_VALID;
1474 		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1475 	}
1476 
1477 	e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1478 	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1479 		e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1480 		mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1481 		e->pkt_reformat = NULL;
1482 	}
1483 }
1484 
1485 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1486 				  struct net_device *tunnel_dev,
1487 				  struct mlx5e_encap_entry *e,
1488 				  struct list_head *encap_flows)
1489 {
1490 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1491 	struct mlx5e_tc_flow *flow;
1492 	int err;
1493 
1494 	err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
1495 		mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
1496 		mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
1497 	if (err)
1498 		mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1499 	e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1500 
1501 	list_for_each_entry(flow, encap_flows, tmp_list) {
1502 		struct mlx5e_tc_flow_parse_attr *parse_attr;
1503 		struct mlx5_esw_flow_attr *esw_attr;
1504 		struct mlx5_flow_handle *rule;
1505 		struct mlx5_flow_attr *attr;
1506 		struct mlx5_flow_spec *spec;
1507 
1508 		if (flow_flag_test(flow, FAILED))
1509 			continue;
1510 
1511 		spec = &flow->attr->parse_attr->spec;
1512 
1513 		attr = mlx5e_tc_get_encap_attr(flow);
1514 		esw_attr = attr->esw_attr;
1515 		parse_attr = attr->parse_attr;
1516 
1517 		err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
1518 					     e->out_dev, e->route_dev_ifindex,
1519 					     flow->tmp_entry_index);
1520 		if (err) {
1521 			mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1522 			continue;
1523 		}
1524 
1525 		err = mlx5e_tc_attach_mod_hdr(priv, flow, attr);
1526 		if (err) {
1527 			mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1528 				       err);
1529 			continue;
1530 		}
1531 
1532 		if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1533 			esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1534 			esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1535 			if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1536 				goto offload_to_slow_path;
1537 
1538 			err = mlx5e_tc_offload_flow_post_acts(flow);
1539 			if (err) {
1540 				mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
1541 					       err);
1542 				goto offload_to_slow_path;
1543 			}
1544 
1545 			/* update from slow path rule to encap rule */
1546 			rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
1547 			if (IS_ERR(rule)) {
1548 				mlx5e_tc_unoffload_flow_post_acts(flow);
1549 				err = PTR_ERR(rule);
1550 				mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1551 					       err);
1552 			} else {
1553 				flow->rule[0] = rule;
1554 			}
1555 		} else {
1556 offload_to_slow_path:
1557 			rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1558 			/* mark the flow's encap dest as non-valid */
1559 			esw_attr->dests[flow->tmp_entry_index].flags &=
1560 				~MLX5_ESW_DEST_ENCAP_VALID;
1561 
1562 			if (IS_ERR(rule)) {
1563 				err = PTR_ERR(rule);
1564 				mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1565 					       err);
1566 			} else {
1567 				flow->rule[0] = rule;
1568 			}
1569 		}
1570 		flow_flag_set(flow, OFFLOADED);
1571 	}
1572 }
1573 
1574 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1575 				     struct mlx5e_route_entry *r,
1576 				     struct list_head *flow_list,
1577 				     bool replace)
1578 {
1579 	struct net_device *tunnel_dev;
1580 	struct mlx5e_encap_entry *e;
1581 
1582 	tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1583 	if (!tunnel_dev)
1584 		return -ENODEV;
1585 
1586 	list_for_each_entry(e, &r->encap_entries, route_list) {
1587 		LIST_HEAD(encap_flows);
1588 
1589 		mlx5e_take_all_encap_flows(e, &encap_flows);
1590 		if (list_empty(&encap_flows))
1591 			continue;
1592 
1593 		if (mlx5e_route_entry_valid(r))
1594 			mlx5e_invalidate_encap(priv, e, &encap_flows);
1595 
1596 		if (!replace) {
1597 			list_splice(&encap_flows, flow_list);
1598 			continue;
1599 		}
1600 
1601 		mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
1602 		list_splice(&encap_flows, flow_list);
1603 	}
1604 
1605 	return 0;
1606 }
1607 
1608 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1609 				      struct list_head *flow_list)
1610 {
1611 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1612 	struct mlx5e_tc_flow *flow;
1613 
1614 	list_for_each_entry(flow, flow_list, tmp_list)
1615 		if (mlx5e_is_offloaded_flow(flow))
1616 			mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1617 }
1618 
1619 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1620 				  struct list_head *decap_flows)
1621 {
1622 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1623 	struct mlx5e_tc_flow *flow;
1624 
1625 	list_for_each_entry(flow, decap_flows, tmp_list) {
1626 		struct mlx5e_tc_flow_parse_attr *parse_attr;
1627 		struct mlx5_flow_attr *attr = flow->attr;
1628 		struct mlx5_flow_handle *rule;
1629 		struct mlx5_flow_spec *spec;
1630 		int err;
1631 
1632 		if (flow_flag_test(flow, FAILED))
1633 			continue;
1634 
1635 		parse_attr = attr->parse_attr;
1636 		spec = &parse_attr->spec;
1637 		err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev);
1638 		if (err) {
1639 			mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1640 				       err);
1641 			continue;
1642 		}
1643 
1644 		rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1645 		if (IS_ERR(rule)) {
1646 			err = PTR_ERR(rule);
1647 			mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1648 				       err);
1649 		} else {
1650 			flow->rule[0] = rule;
1651 			flow_flag_set(flow, OFFLOADED);
1652 		}
1653 	}
1654 }
1655 
1656 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1657 					  struct mlx5e_route_entry *r,
1658 					  struct list_head *flow_list,
1659 					  bool replace)
1660 {
1661 	struct net_device *tunnel_dev;
1662 	LIST_HEAD(decap_flows);
1663 
1664 	tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1665 	if (!tunnel_dev)
1666 		return -ENODEV;
1667 
1668 	mlx5e_take_all_route_decap_flows(r, &decap_flows);
1669 	if (mlx5e_route_entry_valid(r))
1670 		mlx5e_unoffload_flow_list(priv, &decap_flows);
1671 	if (replace)
1672 		mlx5e_reoffload_decap(priv, &decap_flows);
1673 
1674 	list_splice(&decap_flows, flow_list);
1675 
1676 	return 0;
1677 }
1678 
1679 static void mlx5e_tc_fib_event_work(struct work_struct *work)
1680 {
1681 	struct mlx5e_tc_fib_event_data *event_data =
1682 		container_of(work, struct mlx5e_tc_fib_event_data, work);
1683 	struct net_device *ul_dev = event_data->ul_dev;
1684 	struct mlx5e_priv *priv = netdev_priv(ul_dev);
1685 	struct mlx5e_route_entry *r = event_data->r;
1686 	struct mlx5_eswitch *esw;
1687 	LIST_HEAD(flow_list);
1688 	bool replace;
1689 	int err;
1690 
1691 	/* sync with concurrent neigh updates */
1692 	rtnl_lock();
1693 	esw = priv->mdev->priv.eswitch;
1694 	mutex_lock(&esw->offloads.encap_tbl_lock);
1695 	replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1696 
1697 	if (!mlx5e_route_entry_valid(r) && !replace)
1698 		goto out;
1699 
1700 	err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
1701 	if (err)
1702 		mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1703 			       err);
1704 
1705 	err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
1706 	if (err)
1707 		mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1708 			       err);
1709 
1710 	if (replace)
1711 		r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1712 out:
1713 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1714 	rtnl_unlock();
1715 
1716 	mlx5e_put_flow_list(priv, &flow_list);
1717 	mlx5e_route_put(priv, event_data->r);
1718 	dev_put(event_data->ul_dev);
1719 	kfree(event_data);
1720 }
1721 
1722 static struct mlx5e_tc_fib_event_data *
1723 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1724 			 struct net_device *ul_dev,
1725 			 struct mlx5e_tc_tun_encap *encap,
1726 			 unsigned long event,
1727 			 struct fib_notifier_info *info)
1728 {
1729 	struct fib_entry_notifier_info *fen_info;
1730 	struct mlx5e_tc_fib_event_data *fib_work;
1731 	struct mlx5e_route_entry *r;
1732 	struct mlx5e_route_key key;
1733 	struct net_device *fib_dev;
1734 
1735 	fen_info = container_of(info, struct fib_entry_notifier_info, info);
1736 	if (fen_info->fi->nh)
1737 		return NULL;
1738 	fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
1739 	if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1740 	    fen_info->dst_len != 32)
1741 		return NULL;
1742 
1743 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1744 	if (!fib_work)
1745 		return ERR_PTR(-ENOMEM);
1746 
1747 	key.endpoint_ip.v4 = htonl(fen_info->dst);
1748 	key.ip_version = 4;
1749 
1750 	/* Can't fail after this point because releasing reference to r
1751 	 * requires obtaining sleeping mutex which we can't do in atomic
1752 	 * context.
1753 	 */
1754 	r = mlx5e_route_lookup_for_update(encap, &key);
1755 	if (!r)
1756 		goto out;
1757 	fib_work->r = r;
1758 	dev_hold(ul_dev);
1759 
1760 	return fib_work;
1761 
1762 out:
1763 	kfree(fib_work);
1764 	return NULL;
1765 }
1766 
1767 static struct mlx5e_tc_fib_event_data *
1768 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1769 			 struct net_device *ul_dev,
1770 			 struct mlx5e_tc_tun_encap *encap,
1771 			 unsigned long event,
1772 			 struct fib_notifier_info *info)
1773 {
1774 	struct fib6_entry_notifier_info *fen_info;
1775 	struct mlx5e_tc_fib_event_data *fib_work;
1776 	struct mlx5e_route_entry *r;
1777 	struct mlx5e_route_key key;
1778 	struct net_device *fib_dev;
1779 
1780 	fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1781 	fib_dev = fib6_info_nh_dev(fen_info->rt);
1782 	if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1783 	    fen_info->rt->fib6_dst.plen != 128)
1784 		return NULL;
1785 
1786 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1787 	if (!fib_work)
1788 		return ERR_PTR(-ENOMEM);
1789 
1790 	memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1791 	       sizeof(fen_info->rt->fib6_dst.addr));
1792 	key.ip_version = 6;
1793 
1794 	/* Can't fail after this point because releasing reference to r
1795 	 * requires obtaining sleeping mutex which we can't do in atomic
1796 	 * context.
1797 	 */
1798 	r = mlx5e_route_lookup_for_update(encap, &key);
1799 	if (!r)
1800 		goto out;
1801 	fib_work->r = r;
1802 	dev_hold(ul_dev);
1803 
1804 	return fib_work;
1805 
1806 out:
1807 	kfree(fib_work);
1808 	return NULL;
1809 }
1810 
1811 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1812 {
1813 	struct mlx5e_tc_fib_event_data *fib_work;
1814 	struct fib_notifier_info *info = ptr;
1815 	struct mlx5e_tc_tun_encap *encap;
1816 	struct net_device *ul_dev;
1817 	struct mlx5e_priv *priv;
1818 
1819 	encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1820 	priv = encap->priv;
1821 	ul_dev = priv->netdev;
1822 	priv = netdev_priv(ul_dev);
1823 
1824 	switch (event) {
1825 	case FIB_EVENT_ENTRY_REPLACE:
1826 	case FIB_EVENT_ENTRY_DEL:
1827 		if (info->family == AF_INET)
1828 			fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1829 		else if (info->family == AF_INET6)
1830 			fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1831 		else
1832 			return NOTIFY_DONE;
1833 
1834 		if (!IS_ERR_OR_NULL(fib_work)) {
1835 			queue_work(priv->wq, &fib_work->work);
1836 		} else if (IS_ERR(fib_work)) {
1837 			NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1838 			mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1839 				       PTR_ERR(fib_work));
1840 		}
1841 
1842 		break;
1843 	default:
1844 		return NOTIFY_DONE;
1845 	}
1846 
1847 	return NOTIFY_DONE;
1848 }
1849 
1850 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1851 {
1852 	struct mlx5e_tc_tun_encap *encap;
1853 	int err;
1854 
1855 	encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
1856 	if (!encap)
1857 		return ERR_PTR(-ENOMEM);
1858 
1859 	encap->priv = priv;
1860 	encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1861 	spin_lock_init(&encap->route_lock);
1862 	hash_init(encap->route_tbl);
1863 	err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
1864 				    NULL, NULL);
1865 	if (err) {
1866 		kvfree(encap);
1867 		return ERR_PTR(err);
1868 	}
1869 
1870 	return encap;
1871 }
1872 
1873 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1874 {
1875 	if (!encap)
1876 		return;
1877 
1878 	unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
1879 	flush_workqueue(encap->priv->wq); /* flush fib event works */
1880 	kvfree(encap);
1881 }
1882