xref: /openbmc/linux/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c (revision 04295878beac396dae47ba93141cae0d9386e7ef)
1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
3 
4 #include <net/ip_tunnels.h>
5 #include <net/ip6_tunnel.h>
6 #include <net/inet_ecn.h>
7 
8 #include "spectrum_ipip.h"
9 #include "reg.h"
10 
11 struct ip_tunnel_parm
12 mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev)
13 {
14 	struct ip_tunnel *tun = netdev_priv(ol_dev);
15 
16 	return tun->parms;
17 }
18 
19 struct __ip6_tnl_parm
20 mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev)
21 {
22 	struct ip6_tnl *tun = netdev_priv(ol_dev);
23 
24 	return tun->parms;
25 }
26 
27 static bool mlxsw_sp_ipip_parms4_has_ikey(struct ip_tunnel_parm parms)
28 {
29 	return !!(parms.i_flags & TUNNEL_KEY);
30 }
31 
32 static bool mlxsw_sp_ipip_parms4_has_okey(struct ip_tunnel_parm parms)
33 {
34 	return !!(parms.o_flags & TUNNEL_KEY);
35 }
36 
37 static u32 mlxsw_sp_ipip_parms4_ikey(struct ip_tunnel_parm parms)
38 {
39 	return mlxsw_sp_ipip_parms4_has_ikey(parms) ?
40 		be32_to_cpu(parms.i_key) : 0;
41 }
42 
43 static u32 mlxsw_sp_ipip_parms4_okey(struct ip_tunnel_parm parms)
44 {
45 	return mlxsw_sp_ipip_parms4_has_okey(parms) ?
46 		be32_to_cpu(parms.o_key) : 0;
47 }
48 
49 static union mlxsw_sp_l3addr
50 mlxsw_sp_ipip_parms4_saddr(struct ip_tunnel_parm parms)
51 {
52 	return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.saddr };
53 }
54 
55 static union mlxsw_sp_l3addr
56 mlxsw_sp_ipip_parms6_saddr(struct __ip6_tnl_parm parms)
57 {
58 	return (union mlxsw_sp_l3addr) { .addr6 = parms.laddr };
59 }
60 
61 static union mlxsw_sp_l3addr
62 mlxsw_sp_ipip_parms4_daddr(struct ip_tunnel_parm parms)
63 {
64 	return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.daddr };
65 }
66 
67 static union mlxsw_sp_l3addr
68 mlxsw_sp_ipip_parms6_daddr(struct __ip6_tnl_parm parms)
69 {
70 	return (union mlxsw_sp_l3addr) { .addr6 = parms.raddr };
71 }
72 
73 union mlxsw_sp_l3addr
74 mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
75 			   const struct net_device *ol_dev)
76 {
77 	struct ip_tunnel_parm parms4;
78 	struct __ip6_tnl_parm parms6;
79 
80 	switch (proto) {
81 	case MLXSW_SP_L3_PROTO_IPV4:
82 		parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
83 		return mlxsw_sp_ipip_parms4_saddr(parms4);
84 	case MLXSW_SP_L3_PROTO_IPV6:
85 		parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
86 		return mlxsw_sp_ipip_parms6_saddr(parms6);
87 	}
88 
89 	WARN_ON(1);
90 	return (union mlxsw_sp_l3addr) {0};
91 }
92 
93 static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev)
94 {
95 
96 	struct ip_tunnel_parm parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
97 
98 	return mlxsw_sp_ipip_parms4_daddr(parms4).addr4;
99 }
100 
101 static union mlxsw_sp_l3addr
102 mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
103 			   const struct net_device *ol_dev)
104 {
105 	struct ip_tunnel_parm parms4;
106 	struct __ip6_tnl_parm parms6;
107 
108 	switch (proto) {
109 	case MLXSW_SP_L3_PROTO_IPV4:
110 		parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
111 		return mlxsw_sp_ipip_parms4_daddr(parms4);
112 	case MLXSW_SP_L3_PROTO_IPV6:
113 		parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
114 		return mlxsw_sp_ipip_parms6_daddr(parms6);
115 	}
116 
117 	WARN_ON(1);
118 	return (union mlxsw_sp_l3addr) {0};
119 }
120 
121 bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr)
122 {
123 	union mlxsw_sp_l3addr naddr = {0};
124 
125 	return !memcmp(&addr, &naddr, sizeof(naddr));
126 }
127 
128 static int
129 mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
130 				  struct mlxsw_sp_ipip_entry *ipip_entry)
131 {
132 	u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
133 	__be32 daddr4 = mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev);
134 	char ratr_pl[MLXSW_REG_RATR_LEN];
135 
136 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
137 			    true, MLXSW_REG_RATR_TYPE_IPIP,
138 			    adj_index, rif_index);
139 	mlxsw_reg_ratr_ipip4_entry_pack(ratr_pl, be32_to_cpu(daddr4));
140 
141 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
142 }
143 
144 static int
145 mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp,
146 				     u32 tunnel_index,
147 				     struct mlxsw_sp_ipip_entry *ipip_entry)
148 {
149 	u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
150 	u16 ul_rif_id = mlxsw_sp_ipip_lb_ul_rif_id(ipip_entry->ol_lb);
151 	char rtdp_pl[MLXSW_REG_RTDP_LEN];
152 	struct ip_tunnel_parm parms;
153 	unsigned int type_check;
154 	bool has_ikey;
155 	u32 daddr4;
156 	u32 ikey;
157 
158 	parms = mlxsw_sp_ipip_netdev_parms4(ipip_entry->ol_dev);
159 	has_ikey = mlxsw_sp_ipip_parms4_has_ikey(parms);
160 	ikey = mlxsw_sp_ipip_parms4_ikey(parms);
161 
162 	mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index);
163 	mlxsw_reg_rtdp_egress_router_interface_set(rtdp_pl, ul_rif_id);
164 
165 	type_check = has_ikey ?
166 		MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY :
167 		MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE;
168 
169 	/* Linux demuxes tunnels based on packet SIP (which must match tunnel
170 	 * remote IP). Thus configure decap so that it filters out packets that
171 	 * are not IPv4 or have the wrong SIP. IPIP_DECAP_ERROR trap is
172 	 * generated for packets that fail this criterion. Linux then handles
173 	 * such packets in slow path and generates ICMP destination unreachable.
174 	 */
175 	daddr4 = be32_to_cpu(mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev));
176 	mlxsw_reg_rtdp_ipip4_pack(rtdp_pl, rif_index,
177 				  MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV4,
178 				  type_check, has_ikey, daddr4, ikey);
179 
180 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl);
181 }
182 
183 static int
184 mlxsw_sp_ipip_fib_entry_op_gre4_do(struct mlxsw_sp *mlxsw_sp,
185 				   const struct mlxsw_sp_router_ll_ops *ll_ops,
186 				   struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
187 				   u32 dip, u8 prefix_len, u16 ul_vr_id,
188 				   enum mlxsw_sp_fib_entry_op op,
189 				   u32 tunnel_index,
190 				   struct mlxsw_sp_fib_entry_priv *priv)
191 {
192 	ll_ops->fib_entry_pack(op_ctx, MLXSW_SP_L3_PROTO_IPV4, op, ul_vr_id,
193 			       prefix_len, (unsigned char *) &dip, priv);
194 	ll_ops->fib_entry_act_ip2me_tun_pack(op_ctx, tunnel_index);
195 	return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops);
196 }
197 
198 static int mlxsw_sp_ipip_fib_entry_op_gre4(struct mlxsw_sp *mlxsw_sp,
199 					   const struct mlxsw_sp_router_ll_ops *ll_ops,
200 					   struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
201 					   struct mlxsw_sp_ipip_entry *ipip_entry,
202 					   enum mlxsw_sp_fib_entry_op op, u32 tunnel_index,
203 					   struct mlxsw_sp_fib_entry_priv *priv)
204 {
205 	u16 ul_vr_id = mlxsw_sp_ipip_lb_ul_vr_id(ipip_entry->ol_lb);
206 	__be32 dip;
207 	int err;
208 
209 	err = mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(mlxsw_sp, tunnel_index,
210 						   ipip_entry);
211 	if (err)
212 		return err;
213 
214 	dip = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4,
215 					 ipip_entry->ol_dev).addr4;
216 	return mlxsw_sp_ipip_fib_entry_op_gre4_do(mlxsw_sp, ll_ops, op_ctx, be32_to_cpu(dip),
217 						  32, ul_vr_id, op, tunnel_index, priv);
218 }
219 
220 static bool mlxsw_sp_ipip_tunnel_complete(enum mlxsw_sp_l3proto proto,
221 					  const struct net_device *ol_dev)
222 {
223 	union mlxsw_sp_l3addr saddr = mlxsw_sp_ipip_netdev_saddr(proto, ol_dev);
224 	union mlxsw_sp_l3addr daddr = mlxsw_sp_ipip_netdev_daddr(proto, ol_dev);
225 
226 	/* Tunnels with unset local or remote address are valid in Linux and
227 	 * used for lightweight tunnels (LWT) and Non-Broadcast Multi-Access
228 	 * (NBMA) tunnels. In principle these can be offloaded, but the driver
229 	 * currently doesn't support this. So punt.
230 	 */
231 	return !mlxsw_sp_l3addr_is_zero(saddr) &&
232 	       !mlxsw_sp_l3addr_is_zero(daddr);
233 }
234 
235 static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp,
236 					   const struct net_device *ol_dev)
237 {
238 	struct ip_tunnel *tunnel = netdev_priv(ol_dev);
239 	__be16 okflags = TUNNEL_KEY; /* We can't offload any other features. */
240 	bool inherit_ttl = tunnel->parms.iph.ttl == 0;
241 	bool inherit_tos = tunnel->parms.iph.tos & 0x1;
242 
243 	return (tunnel->parms.i_flags & ~okflags) == 0 &&
244 	       (tunnel->parms.o_flags & ~okflags) == 0 &&
245 	       inherit_ttl && inherit_tos &&
246 	       mlxsw_sp_ipip_tunnel_complete(MLXSW_SP_L3_PROTO_IPV4, ol_dev);
247 }
248 
249 static struct mlxsw_sp_rif_ipip_lb_config
250 mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp,
251 				      const struct net_device *ol_dev)
252 {
253 	struct ip_tunnel_parm parms = mlxsw_sp_ipip_netdev_parms4(ol_dev);
254 	enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
255 
256 	lb_ipipt = mlxsw_sp_ipip_parms4_has_okey(parms) ?
257 		MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP :
258 		MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP;
259 	return (struct mlxsw_sp_rif_ipip_lb_config){
260 		.lb_ipipt = lb_ipipt,
261 		.okey = mlxsw_sp_ipip_parms4_okey(parms),
262 		.ul_protocol = MLXSW_SP_L3_PROTO_IPV4,
263 		.saddr = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4,
264 						    ol_dev),
265 	};
266 }
267 
268 static int
269 mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp,
270 				    struct mlxsw_sp_ipip_entry *ipip_entry,
271 				    struct netlink_ext_ack *extack)
272 {
273 	union mlxsw_sp_l3addr old_saddr, new_saddr;
274 	union mlxsw_sp_l3addr old_daddr, new_daddr;
275 	struct ip_tunnel_parm new_parms;
276 	bool update_tunnel = false;
277 	bool update_decap = false;
278 	bool update_nhs = false;
279 	int err = 0;
280 
281 	new_parms = mlxsw_sp_ipip_netdev_parms4(ipip_entry->ol_dev);
282 
283 	new_saddr = mlxsw_sp_ipip_parms4_saddr(new_parms);
284 	old_saddr = mlxsw_sp_ipip_parms4_saddr(ipip_entry->parms4);
285 	new_daddr = mlxsw_sp_ipip_parms4_daddr(new_parms);
286 	old_daddr = mlxsw_sp_ipip_parms4_daddr(ipip_entry->parms4);
287 
288 	if (!mlxsw_sp_l3addr_eq(&new_saddr, &old_saddr)) {
289 		u16 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
290 
291 		/* Since the local address has changed, if there is another
292 		 * tunnel with a matching saddr, both need to be demoted.
293 		 */
294 		if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp,
295 							 MLXSW_SP_L3_PROTO_IPV4,
296 							 new_saddr, ul_tb_id,
297 							 ipip_entry)) {
298 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
299 			return 0;
300 		}
301 
302 		update_tunnel = true;
303 	} else if ((mlxsw_sp_ipip_parms4_okey(ipip_entry->parms4) !=
304 		    mlxsw_sp_ipip_parms4_okey(new_parms)) ||
305 		   ipip_entry->parms4.link != new_parms.link) {
306 		update_tunnel = true;
307 	} else if (!mlxsw_sp_l3addr_eq(&new_daddr, &old_daddr)) {
308 		update_nhs = true;
309 	} else if (mlxsw_sp_ipip_parms4_ikey(ipip_entry->parms4) !=
310 		   mlxsw_sp_ipip_parms4_ikey(new_parms)) {
311 		update_decap = true;
312 	}
313 
314 	if (update_tunnel)
315 		err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
316 							  true, true, true,
317 							  extack);
318 	else if (update_nhs)
319 		err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
320 							  false, false, true,
321 							  extack);
322 	else if (update_decap)
323 		err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
324 							  false, false, false,
325 							  extack);
326 
327 	ipip_entry->parms4 = new_parms;
328 	return err;
329 }
330 
331 static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = {
332 	.dev_type = ARPHRD_IPGRE,
333 	.ul_proto = MLXSW_SP_L3_PROTO_IPV4,
334 	.nexthop_update = mlxsw_sp_ipip_nexthop_update_gre4,
335 	.fib_entry_op = mlxsw_sp_ipip_fib_entry_op_gre4,
336 	.can_offload = mlxsw_sp_ipip_can_offload_gre4,
337 	.ol_loopback_config = mlxsw_sp_ipip_ol_loopback_config_gre4,
338 	.ol_netdev_change = mlxsw_sp_ipip_ol_netdev_change_gre4,
339 };
340 
341 const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[] = {
342 	[MLXSW_SP_IPIP_TYPE_GRE4] = &mlxsw_sp_ipip_gre4_ops,
343 };
344 
345 static int mlxsw_sp_ipip_ecn_encap_init_one(struct mlxsw_sp *mlxsw_sp,
346 					    u8 inner_ecn, u8 outer_ecn)
347 {
348 	char tieem_pl[MLXSW_REG_TIEEM_LEN];
349 
350 	mlxsw_reg_tieem_pack(tieem_pl, inner_ecn, outer_ecn);
351 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tieem), tieem_pl);
352 }
353 
354 int mlxsw_sp_ipip_ecn_encap_init(struct mlxsw_sp *mlxsw_sp)
355 {
356 	int i;
357 
358 	/* Iterate over inner ECN values */
359 	for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) {
360 		u8 outer_ecn = INET_ECN_encapsulate(0, i);
361 		int err;
362 
363 		err = mlxsw_sp_ipip_ecn_encap_init_one(mlxsw_sp, i, outer_ecn);
364 		if (err)
365 			return err;
366 	}
367 
368 	return 0;
369 }
370 
371 static int mlxsw_sp_ipip_ecn_decap_init_one(struct mlxsw_sp *mlxsw_sp,
372 					    u8 inner_ecn, u8 outer_ecn)
373 {
374 	char tidem_pl[MLXSW_REG_TIDEM_LEN];
375 	bool trap_en, set_ce = false;
376 	u8 new_inner_ecn;
377 
378 	trap_en = __INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce);
379 	new_inner_ecn = set_ce ? INET_ECN_CE : inner_ecn;
380 
381 	mlxsw_reg_tidem_pack(tidem_pl, outer_ecn, inner_ecn, new_inner_ecn,
382 			     trap_en, trap_en ? MLXSW_TRAP_ID_DECAP_ECN0 : 0);
383 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tidem), tidem_pl);
384 }
385 
386 int mlxsw_sp_ipip_ecn_decap_init(struct mlxsw_sp *mlxsw_sp)
387 {
388 	int i, j, err;
389 
390 	/* Iterate over inner ECN values */
391 	for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) {
392 		/* Iterate over outer ECN values */
393 		for (j = INET_ECN_NOT_ECT; j <= INET_ECN_CE; j++) {
394 			err = mlxsw_sp_ipip_ecn_decap_init_one(mlxsw_sp, i, j);
395 			if (err)
396 				return err;
397 		}
398 	}
399 
400 	return 0;
401 }
402