1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies. */
3 
4 #include <net/dst_metadata.h>
5 #include <linux/netdevice.h>
6 #include <linux/list.h>
7 #include <linux/rculist.h>
8 #include <linux/rtnetlink.h>
9 #include <linux/workqueue.h>
10 #include <linux/spinlock.h>
11 #include "tc.h"
12 #include "neigh.h"
13 #include "en_rep.h"
14 #include "eswitch.h"
15 #include "lib/fs_chains.h"
16 #include "en/tc_ct.h"
17 #include "en/mapping.h"
18 #include "en/tc_tun.h"
19 #include "lib/port_tun.h"
20 
21 struct mlx5e_rep_indr_block_priv {
22 	struct net_device *netdev;
23 	struct mlx5e_rep_priv *rpriv;
24 
25 	struct list_head list;
26 };
27 
28 int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
29 				 struct mlx5e_encap_entry *e,
30 				 struct mlx5e_neigh *m_neigh,
31 				 struct net_device *neigh_dev)
32 {
33 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
34 	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
35 	struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
36 	struct mlx5e_neigh_hash_entry *nhe;
37 	int err;
38 
39 	err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type);
40 	if (err)
41 		return err;
42 
43 	mutex_lock(&rpriv->neigh_update.encap_lock);
44 	nhe = mlx5e_rep_neigh_entry_lookup(priv, m_neigh);
45 	if (!nhe) {
46 		err = mlx5e_rep_neigh_entry_create(priv, m_neigh, neigh_dev, &nhe);
47 		if (err) {
48 			mutex_unlock(&rpriv->neigh_update.encap_lock);
49 			mlx5_tun_entropy_refcount_dec(tun_entropy,
50 						      e->reformat_type);
51 			return err;
52 		}
53 	}
54 
55 	e->nhe = nhe;
56 	spin_lock(&nhe->encap_list_lock);
57 	list_add_rcu(&e->encap_list, &nhe->encap_list);
58 	spin_unlock(&nhe->encap_list_lock);
59 
60 	mutex_unlock(&rpriv->neigh_update.encap_lock);
61 
62 	return 0;
63 }
64 
65 void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
66 				  struct mlx5e_encap_entry *e)
67 {
68 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
69 	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
70 	struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
71 
72 	if (!e->nhe)
73 		return;
74 
75 	spin_lock(&e->nhe->encap_list_lock);
76 	list_del_rcu(&e->encap_list);
77 	spin_unlock(&e->nhe->encap_list_lock);
78 
79 	mlx5e_rep_neigh_entry_release(e->nhe);
80 	e->nhe = NULL;
81 	mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type);
82 }
83 
84 void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
85 			    struct mlx5e_encap_entry *e,
86 			    bool neigh_connected,
87 			    unsigned char ha[ETH_ALEN])
88 {
89 	struct ethhdr *eth = (struct ethhdr *)e->encap_header;
90 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
91 	bool encap_connected;
92 	LIST_HEAD(flow_list);
93 
94 	ASSERT_RTNL();
95 
96 	/* wait for encap to be fully initialized */
97 	wait_for_completion(&e->res_ready);
98 
99 	mutex_lock(&esw->offloads.encap_tbl_lock);
100 	encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
101 	if (e->compl_result < 0 || (encap_connected == neigh_connected &&
102 				    ether_addr_equal(e->h_dest, ha)))
103 		goto unlock;
104 
105 	mlx5e_take_all_encap_flows(e, &flow_list);
106 
107 	if ((e->flags & MLX5_ENCAP_ENTRY_VALID) &&
108 	    (!neigh_connected || !ether_addr_equal(e->h_dest, ha)))
109 		mlx5e_tc_encap_flows_del(priv, e, &flow_list);
110 
111 	if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
112 		struct net_device *route_dev;
113 
114 		ether_addr_copy(e->h_dest, ha);
115 		ether_addr_copy(eth->h_dest, ha);
116 		/* Update the encap source mac, in case that we delete
117 		 * the flows when encap source mac changed.
118 		 */
119 		route_dev = __dev_get_by_index(dev_net(priv->netdev), e->route_dev_ifindex);
120 		if (route_dev)
121 			ether_addr_copy(eth->h_source, route_dev->dev_addr);
122 
123 		mlx5e_tc_encap_flows_add(priv, e, &flow_list);
124 	}
125 unlock:
126 	mutex_unlock(&esw->offloads.encap_tbl_lock);
127 	mlx5e_put_flow_list(priv, &flow_list);
128 }
129 
130 static int
131 mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
132 			      struct flow_cls_offload *cls_flower, int flags)
133 {
134 	switch (cls_flower->command) {
135 	case FLOW_CLS_REPLACE:
136 		return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
137 					      flags);
138 	case FLOW_CLS_DESTROY:
139 		return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
140 					   flags);
141 	case FLOW_CLS_STATS:
142 		return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
143 					  flags);
144 	default:
145 		return -EOPNOTSUPP;
146 	}
147 }
148 
149 static
150 int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv,
151 				    struct tc_cls_matchall_offload *ma)
152 {
153 	switch (ma->command) {
154 	case TC_CLSMATCHALL_REPLACE:
155 		return mlx5e_tc_configure_matchall(priv, ma);
156 	case TC_CLSMATCHALL_DESTROY:
157 		return mlx5e_tc_delete_matchall(priv, ma);
158 	case TC_CLSMATCHALL_STATS:
159 		mlx5e_tc_stats_matchall(priv, ma);
160 		return 0;
161 	default:
162 		return -EOPNOTSUPP;
163 	}
164 }
165 
166 static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
167 				 void *cb_priv)
168 {
169 	unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
170 	struct mlx5e_priv *priv = cb_priv;
171 
172 	switch (type) {
173 	case TC_SETUP_CLSFLOWER:
174 		return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags);
175 	case TC_SETUP_CLSMATCHALL:
176 		return mlx5e_rep_setup_tc_cls_matchall(priv, type_data);
177 	default:
178 		return -EOPNOTSUPP;
179 	}
180 }
181 
182 static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
183 				 void *cb_priv)
184 {
185 	struct flow_cls_offload tmp, *f = type_data;
186 	struct mlx5e_priv *priv = cb_priv;
187 	struct mlx5_eswitch *esw;
188 	unsigned long flags;
189 	int err;
190 
191 	flags = MLX5_TC_FLAG(INGRESS) |
192 		MLX5_TC_FLAG(ESW_OFFLOAD) |
193 		MLX5_TC_FLAG(FT_OFFLOAD);
194 	esw = priv->mdev->priv.eswitch;
195 
196 	switch (type) {
197 	case TC_SETUP_CLSFLOWER:
198 		memcpy(&tmp, f, sizeof(*f));
199 
200 		if (!mlx5_chains_prios_supported(esw_chains(esw)))
201 			return -EOPNOTSUPP;
202 
203 		/* Re-use tc offload path by moving the ft flow to the
204 		 * reserved ft chain.
205 		 *
206 		 * FT offload can use prio range [0, INT_MAX], so we normalize
207 		 * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
208 		 * as with tc, where prio 0 isn't supported.
209 		 *
210 		 * We only support chain 0 of FT offload.
211 		 */
212 		if (tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw)))
213 			return -EOPNOTSUPP;
214 		if (tmp.common.chain_index != 0)
215 			return -EOPNOTSUPP;
216 
217 		tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw));
218 		tmp.common.prio++;
219 		err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags);
220 		memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
221 		return err;
222 	default:
223 		return -EOPNOTSUPP;
224 	}
225 }
226 
227 static LIST_HEAD(mlx5e_rep_block_tc_cb_list);
228 static LIST_HEAD(mlx5e_rep_block_ft_cb_list);
229 int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
230 		       void *type_data)
231 {
232 	struct mlx5e_priv *priv = netdev_priv(dev);
233 	struct flow_block_offload *f = type_data;
234 
235 	f->unlocked_driver_cb = true;
236 
237 	switch (type) {
238 	case TC_SETUP_BLOCK:
239 		return flow_block_cb_setup_simple(type_data,
240 						  &mlx5e_rep_block_tc_cb_list,
241 						  mlx5e_rep_setup_tc_cb,
242 						  priv, priv, true);
243 	case TC_SETUP_FT:
244 		return flow_block_cb_setup_simple(type_data,
245 						  &mlx5e_rep_block_ft_cb_list,
246 						  mlx5e_rep_setup_ft_cb,
247 						  priv, priv, true);
248 	default:
249 		return -EOPNOTSUPP;
250 	}
251 }
252 
253 int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv)
254 {
255 	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
256 	int err;
257 
258 	mutex_init(&uplink_priv->unready_flows_lock);
259 	INIT_LIST_HEAD(&uplink_priv->unready_flows);
260 
261 	/* init shared tc flow table */
262 	err = mlx5e_tc_esw_init(&uplink_priv->tc_ht);
263 	return err;
264 }
265 
266 void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv)
267 {
268 	/* delete shared tc flow table */
269 	mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht);
270 	mutex_destroy(&rpriv->uplink_priv.unready_flows_lock);
271 }
272 
273 void mlx5e_rep_tc_enable(struct mlx5e_priv *priv)
274 {
275 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
276 
277 	INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work,
278 		  mlx5e_tc_reoffload_flows_work);
279 }
280 
281 void mlx5e_rep_tc_disable(struct mlx5e_priv *priv)
282 {
283 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
284 
285 	cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
286 }
287 
288 int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv)
289 {
290 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
291 
292 	queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work);
293 
294 	return NOTIFY_OK;
295 }
296 
297 static struct mlx5e_rep_indr_block_priv *
298 mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv,
299 				 struct net_device *netdev)
300 {
301 	struct mlx5e_rep_indr_block_priv *cb_priv;
302 
303 	/* All callback list access should be protected by RTNL. */
304 	ASSERT_RTNL();
305 
306 	list_for_each_entry(cb_priv,
307 			    &rpriv->uplink_priv.tc_indr_block_priv_list,
308 			    list)
309 		if (cb_priv->netdev == netdev)
310 			return cb_priv;
311 
312 	return NULL;
313 }
314 
315 static int
316 mlx5e_rep_indr_offload(struct net_device *netdev,
317 		       struct flow_cls_offload *flower,
318 		       struct mlx5e_rep_indr_block_priv *indr_priv,
319 		       unsigned long flags)
320 {
321 	struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev);
322 	int err = 0;
323 
324 	switch (flower->command) {
325 	case FLOW_CLS_REPLACE:
326 		err = mlx5e_configure_flower(netdev, priv, flower, flags);
327 		break;
328 	case FLOW_CLS_DESTROY:
329 		err = mlx5e_delete_flower(netdev, priv, flower, flags);
330 		break;
331 	case FLOW_CLS_STATS:
332 		err = mlx5e_stats_flower(netdev, priv, flower, flags);
333 		break;
334 	default:
335 		err = -EOPNOTSUPP;
336 	}
337 
338 	return err;
339 }
340 
341 static int mlx5e_rep_indr_setup_tc_cb(enum tc_setup_type type,
342 				      void *type_data, void *indr_priv)
343 {
344 	unsigned long flags = MLX5_TC_FLAG(EGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
345 	struct mlx5e_rep_indr_block_priv *priv = indr_priv;
346 
347 	switch (type) {
348 	case TC_SETUP_CLSFLOWER:
349 		return mlx5e_rep_indr_offload(priv->netdev, type_data, priv,
350 					      flags);
351 	default:
352 		return -EOPNOTSUPP;
353 	}
354 }
355 
356 static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type,
357 				      void *type_data, void *indr_priv)
358 {
359 	struct mlx5e_rep_indr_block_priv *priv = indr_priv;
360 	struct flow_cls_offload *f = type_data;
361 	struct flow_cls_offload tmp;
362 	struct mlx5e_priv *mpriv;
363 	struct mlx5_eswitch *esw;
364 	unsigned long flags;
365 	int err;
366 
367 	mpriv = netdev_priv(priv->rpriv->netdev);
368 	esw = mpriv->mdev->priv.eswitch;
369 
370 	flags = MLX5_TC_FLAG(EGRESS) |
371 		MLX5_TC_FLAG(ESW_OFFLOAD) |
372 		MLX5_TC_FLAG(FT_OFFLOAD);
373 
374 	switch (type) {
375 	case TC_SETUP_CLSFLOWER:
376 		memcpy(&tmp, f, sizeof(*f));
377 
378 		/* Re-use tc offload path by moving the ft flow to the
379 		 * reserved ft chain.
380 		 *
381 		 * FT offload can use prio range [0, INT_MAX], so we normalize
382 		 * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
383 		 * as with tc, where prio 0 isn't supported.
384 		 *
385 		 * We only support chain 0 of FT offload.
386 		 */
387 		if (!mlx5_chains_prios_supported(esw_chains(esw)) ||
388 		    tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw)) ||
389 		    tmp.common.chain_index)
390 			return -EOPNOTSUPP;
391 
392 		tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw));
393 		tmp.common.prio++;
394 		err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags);
395 		memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
396 		return err;
397 	default:
398 		return -EOPNOTSUPP;
399 	}
400 }
401 
402 static void mlx5e_rep_indr_block_unbind(void *cb_priv)
403 {
404 	struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv;
405 
406 	list_del(&indr_priv->list);
407 	kfree(indr_priv);
408 }
409 
410 static LIST_HEAD(mlx5e_block_cb_list);
411 
412 static int
413 mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch,
414 			   struct mlx5e_rep_priv *rpriv,
415 			   struct flow_block_offload *f,
416 			   flow_setup_cb_t *setup_cb,
417 			   void *data,
418 			   void (*cleanup)(struct flow_block_cb *block_cb))
419 {
420 	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
421 	struct mlx5e_rep_indr_block_priv *indr_priv;
422 	struct flow_block_cb *block_cb;
423 
424 	if (!mlx5e_tc_tun_device_to_offload(priv, netdev) &&
425 	    !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev))
426 		return -EOPNOTSUPP;
427 
428 	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
429 		return -EOPNOTSUPP;
430 
431 	f->unlocked_driver_cb = true;
432 	f->driver_block_list = &mlx5e_block_cb_list;
433 
434 	switch (f->command) {
435 	case FLOW_BLOCK_BIND:
436 		indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
437 		if (indr_priv)
438 			return -EEXIST;
439 
440 		indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL);
441 		if (!indr_priv)
442 			return -ENOMEM;
443 
444 		indr_priv->netdev = netdev;
445 		indr_priv->rpriv = rpriv;
446 		list_add(&indr_priv->list,
447 			 &rpriv->uplink_priv.tc_indr_block_priv_list);
448 
449 		block_cb = flow_indr_block_cb_alloc(setup_cb, indr_priv, indr_priv,
450 						    mlx5e_rep_indr_block_unbind,
451 						    f, netdev, sch, data, rpriv,
452 						    cleanup);
453 		if (IS_ERR(block_cb)) {
454 			list_del(&indr_priv->list);
455 			kfree(indr_priv);
456 			return PTR_ERR(block_cb);
457 		}
458 		flow_block_cb_add(block_cb, f);
459 		list_add_tail(&block_cb->driver_list, &mlx5e_block_cb_list);
460 
461 		return 0;
462 	case FLOW_BLOCK_UNBIND:
463 		indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
464 		if (!indr_priv)
465 			return -ENOENT;
466 
467 		block_cb = flow_block_cb_lookup(f->block, setup_cb, indr_priv);
468 		if (!block_cb)
469 			return -ENOENT;
470 
471 		flow_indr_block_cb_remove(block_cb, f);
472 		list_del(&block_cb->driver_list);
473 		return 0;
474 	default:
475 		return -EOPNOTSUPP;
476 	}
477 	return 0;
478 }
479 
480 static
481 int mlx5e_rep_indr_setup_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv,
482 			    enum tc_setup_type type, void *type_data,
483 			    void *data,
484 			    void (*cleanup)(struct flow_block_cb *block_cb))
485 {
486 	switch (type) {
487 	case TC_SETUP_BLOCK:
488 		return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data,
489 						  mlx5e_rep_indr_setup_tc_cb,
490 						  data, cleanup);
491 	case TC_SETUP_FT:
492 		return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data,
493 						  mlx5e_rep_indr_setup_ft_cb,
494 						  data, cleanup);
495 	default:
496 		return -EOPNOTSUPP;
497 	}
498 }
499 
500 int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv)
501 {
502 	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
503 
504 	/* init indirect block notifications */
505 	INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list);
506 
507 	return flow_indr_dev_register(mlx5e_rep_indr_setup_cb, rpriv);
508 }
509 
510 void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv)
511 {
512 	flow_indr_dev_unregister(mlx5e_rep_indr_setup_cb, rpriv,
513 				 mlx5e_rep_indr_block_unbind);
514 }
515 
516 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
517 static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
518 				 struct mlx5e_tc_update_priv *tc_priv,
519 				 u32 tunnel_id)
520 {
521 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
522 	struct tunnel_match_enc_opts enc_opts = {};
523 	struct mlx5_rep_uplink_priv *uplink_priv;
524 	struct mlx5e_rep_priv *uplink_rpriv;
525 	struct metadata_dst *tun_dst;
526 	struct tunnel_match_key key;
527 	u32 tun_id, enc_opts_id;
528 	struct net_device *dev;
529 	int err;
530 
531 	enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK;
532 	tun_id = tunnel_id >> ENC_OPTS_BITS;
533 
534 	if (!tun_id)
535 		return true;
536 
537 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
538 	uplink_priv = &uplink_rpriv->uplink_priv;
539 
540 	err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key);
541 	if (err) {
542 		WARN_ON_ONCE(true);
543 		netdev_dbg(priv->netdev,
544 			   "Couldn't find tunnel for tun_id: %d, err: %d\n",
545 			   tun_id, err);
546 		return false;
547 	}
548 
549 	if (enc_opts_id) {
550 		err = mapping_find(uplink_priv->tunnel_enc_opts_mapping,
551 				   enc_opts_id, &enc_opts);
552 		if (err) {
553 			netdev_dbg(priv->netdev,
554 				   "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n",
555 				   enc_opts_id, err);
556 			return false;
557 		}
558 	}
559 
560 	if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
561 		tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst,
562 					   key.enc_ip.tos, key.enc_ip.ttl,
563 					   key.enc_tp.dst, TUNNEL_KEY,
564 					   key32_to_tunnel_id(key.enc_key_id.keyid),
565 					   enc_opts.key.len);
566 	} else if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
567 		tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst,
568 					     key.enc_ip.tos, key.enc_ip.ttl,
569 					     key.enc_tp.dst, 0, TUNNEL_KEY,
570 					     key32_to_tunnel_id(key.enc_key_id.keyid),
571 					     enc_opts.key.len);
572 	} else {
573 		netdev_dbg(priv->netdev,
574 			   "Couldn't restore tunnel, unsupported addr_type: %d\n",
575 			   key.enc_control.addr_type);
576 		return false;
577 	}
578 
579 	if (!tun_dst) {
580 		netdev_dbg(priv->netdev, "Couldn't restore tunnel, no tun_dst\n");
581 		return false;
582 	}
583 
584 	tun_dst->u.tun_info.key.tp_src = key.enc_tp.src;
585 
586 	if (enc_opts.key.len)
587 		ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
588 					enc_opts.key.data,
589 					enc_opts.key.len,
590 					enc_opts.key.dst_opt_type);
591 
592 	skb_dst_set(skb, (struct dst_entry *)tun_dst);
593 	dev = dev_get_by_index(&init_net, key.filter_ifindex);
594 	if (!dev) {
595 		netdev_dbg(priv->netdev,
596 			   "Couldn't find tunnel device with ifindex: %d\n",
597 			   key.filter_ifindex);
598 		return false;
599 	}
600 
601 	/* Set tun_dev so we do dev_put() after datapath */
602 	tc_priv->tun_dev = dev;
603 
604 	skb->dev = dev;
605 
606 	return true;
607 }
608 #endif /* CONFIG_NET_TC_SKB_EXT */
609 
610 bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
611 			     struct sk_buff *skb,
612 			     struct mlx5e_tc_update_priv *tc_priv)
613 {
614 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
615 	u32 chain = 0, reg_c0, reg_c1, tunnel_id, zone_restore_id;
616 	struct mlx5_rep_uplink_priv *uplink_priv;
617 	struct mlx5e_rep_priv *uplink_rpriv;
618 	struct tc_skb_ext *tc_skb_ext;
619 	struct mlx5_eswitch *esw;
620 	struct mlx5e_priv *priv;
621 	int err;
622 
623 	reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
624 	if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG)
625 		reg_c0 = 0;
626 	reg_c1 = be32_to_cpu(cqe->ft_metadata);
627 
628 	if (!reg_c0)
629 		return true;
630 
631 	/* If reg_c0 is not equal to the default flow tag then skb->mark
632 	 * is not supported and must be reset back to 0.
633 	 */
634 	skb->mark = 0;
635 
636 	priv = netdev_priv(skb->dev);
637 	esw = priv->mdev->priv.eswitch;
638 
639 	err = mlx5_get_chain_for_tag(esw_chains(esw), reg_c0, &chain);
640 	if (err) {
641 		netdev_dbg(priv->netdev,
642 			   "Couldn't find chain for chain tag: %d, err: %d\n",
643 			   reg_c0, err);
644 		return false;
645 	}
646 
647 	if (chain) {
648 		tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
649 		if (!tc_skb_ext) {
650 			WARN_ON(1);
651 			return false;
652 		}
653 
654 		tc_skb_ext->chain = chain;
655 
656 		zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK;
657 
658 		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
659 		uplink_priv = &uplink_rpriv->uplink_priv;
660 		if (!mlx5e_tc_ct_restore_flow(uplink_priv->ct_priv, skb,
661 					      zone_restore_id))
662 			return false;
663 	}
664 
665 	tunnel_id = reg_c1 >> ESW_TUN_OFFSET;
666 	return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
667 #endif /* CONFIG_NET_TC_SKB_EXT */
668 
669 	return true;
670 }
671 
672 void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv)
673 {
674 	if (tc_priv->tun_dev)
675 		dev_put(tc_priv->tun_dev);
676 }
677