xref: /openbmc/linux/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c (revision e33bbe69149b802c0c77bfb822685772f85388ca)
1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
3  * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. Neither the names of the copyright holders nor the names of its
15  *    contributors may be used to endorse or promote products derived from
16  *    this software without specific prior written permission.
17  *
18  * Alternatively, this software may be distributed under the terms of the
19  * GNU General Public License ("GPL") version 2 as published by the Free
20  * Software Foundation.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include <net/ip_tunnels.h>
36 #include <net/ip6_tunnel.h>
37 
38 #include "spectrum_ipip.h"
39 
40 struct ip_tunnel_parm
41 mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev)
42 {
43 	struct ip_tunnel *tun = netdev_priv(ol_dev);
44 
45 	return tun->parms;
46 }
47 
48 struct __ip6_tnl_parm
49 mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev)
50 {
51 	struct ip6_tnl *tun = netdev_priv(ol_dev);
52 
53 	return tun->parms;
54 }
55 
56 static bool mlxsw_sp_ipip_parms4_has_ikey(struct ip_tunnel_parm parms)
57 {
58 	return !!(parms.i_flags & TUNNEL_KEY);
59 }
60 
61 static bool mlxsw_sp_ipip_parms4_has_okey(struct ip_tunnel_parm parms)
62 {
63 	return !!(parms.o_flags & TUNNEL_KEY);
64 }
65 
66 static u32 mlxsw_sp_ipip_parms4_ikey(struct ip_tunnel_parm parms)
67 {
68 	return mlxsw_sp_ipip_parms4_has_ikey(parms) ?
69 		be32_to_cpu(parms.i_key) : 0;
70 }
71 
72 static u32 mlxsw_sp_ipip_parms4_okey(struct ip_tunnel_parm parms)
73 {
74 	return mlxsw_sp_ipip_parms4_has_okey(parms) ?
75 		be32_to_cpu(parms.o_key) : 0;
76 }
77 
78 static union mlxsw_sp_l3addr
79 mlxsw_sp_ipip_parms4_saddr(struct ip_tunnel_parm parms)
80 {
81 	return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.saddr };
82 }
83 
84 static union mlxsw_sp_l3addr
85 mlxsw_sp_ipip_parms6_saddr(struct __ip6_tnl_parm parms)
86 {
87 	return (union mlxsw_sp_l3addr) { .addr6 = parms.laddr };
88 }
89 
90 static union mlxsw_sp_l3addr
91 mlxsw_sp_ipip_parms4_daddr(struct ip_tunnel_parm parms)
92 {
93 	return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.daddr };
94 }
95 
96 static union mlxsw_sp_l3addr
97 mlxsw_sp_ipip_parms6_daddr(struct __ip6_tnl_parm parms)
98 {
99 	return (union mlxsw_sp_l3addr) { .addr6 = parms.raddr };
100 }
101 
102 union mlxsw_sp_l3addr
103 mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
104 			   const struct net_device *ol_dev)
105 {
106 	struct ip_tunnel_parm parms4;
107 	struct __ip6_tnl_parm parms6;
108 
109 	switch (proto) {
110 	case MLXSW_SP_L3_PROTO_IPV4:
111 		parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
112 		return mlxsw_sp_ipip_parms4_saddr(parms4);
113 	case MLXSW_SP_L3_PROTO_IPV6:
114 		parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
115 		return mlxsw_sp_ipip_parms6_saddr(parms6);
116 	}
117 
118 	WARN_ON(1);
119 	return (union mlxsw_sp_l3addr) {0};
120 }
121 
122 static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev)
123 {
124 
125 	struct ip_tunnel_parm parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
126 
127 	return mlxsw_sp_ipip_parms4_daddr(parms4).addr4;
128 }
129 
130 static union mlxsw_sp_l3addr
131 mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
132 			   const struct net_device *ol_dev)
133 {
134 	struct ip_tunnel_parm parms4;
135 	struct __ip6_tnl_parm parms6;
136 
137 	switch (proto) {
138 	case MLXSW_SP_L3_PROTO_IPV4:
139 		parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
140 		return mlxsw_sp_ipip_parms4_daddr(parms4);
141 	case MLXSW_SP_L3_PROTO_IPV6:
142 		parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
143 		return mlxsw_sp_ipip_parms6_daddr(parms6);
144 	}
145 
146 	WARN_ON(1);
147 	return (union mlxsw_sp_l3addr) {0};
148 }
149 
150 bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr)
151 {
152 	union mlxsw_sp_l3addr naddr = {0};
153 
154 	return !memcmp(&addr, &naddr, sizeof(naddr));
155 }
156 
157 static int
158 mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
159 				  struct mlxsw_sp_ipip_entry *ipip_entry)
160 {
161 	u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
162 	__be32 daddr4 = mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev);
163 	char ratr_pl[MLXSW_REG_RATR_LEN];
164 
165 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
166 			    true, MLXSW_REG_RATR_TYPE_IPIP,
167 			    adj_index, rif_index);
168 	mlxsw_reg_ratr_ipip4_entry_pack(ratr_pl, be32_to_cpu(daddr4));
169 
170 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
171 }
172 
173 static int
174 mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp,
175 				     u32 tunnel_index,
176 				     struct mlxsw_sp_ipip_entry *ipip_entry)
177 {
178 	u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
179 	char rtdp_pl[MLXSW_REG_RTDP_LEN];
180 	struct ip_tunnel_parm parms;
181 	unsigned int type_check;
182 	bool has_ikey;
183 	u32 daddr4;
184 	u32 ikey;
185 
186 	parms = mlxsw_sp_ipip_netdev_parms4(ipip_entry->ol_dev);
187 	has_ikey = mlxsw_sp_ipip_parms4_has_ikey(parms);
188 	ikey = mlxsw_sp_ipip_parms4_ikey(parms);
189 
190 	mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index);
191 
192 	type_check = has_ikey ?
193 		MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY :
194 		MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE;
195 
196 	/* Linux demuxes tunnels based on packet SIP (which must match tunnel
197 	 * remote IP). Thus configure decap so that it filters out packets that
198 	 * are not IPv4 or have the wrong SIP. IPIP_DECAP_ERROR trap is
199 	 * generated for packets that fail this criterion. Linux then handles
200 	 * such packets in slow path and generates ICMP destination unreachable.
201 	 */
202 	daddr4 = be32_to_cpu(mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev));
203 	mlxsw_reg_rtdp_ipip4_pack(rtdp_pl, rif_index,
204 				  MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV4,
205 				  type_check, has_ikey, daddr4, ikey);
206 
207 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl);
208 }
209 
210 static int
211 mlxsw_sp_ipip_fib_entry_op_gre4_ralue(struct mlxsw_sp *mlxsw_sp,
212 				      u32 dip, u8 prefix_len, u16 ul_vr_id,
213 				      enum mlxsw_reg_ralue_op op,
214 				      u32 tunnel_index)
215 {
216 	char ralue_pl[MLXSW_REG_RALUE_LEN];
217 
218 	mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_REG_RALXX_PROTOCOL_IPV4, op,
219 			      ul_vr_id, prefix_len, dip);
220 	mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl, tunnel_index);
221 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
222 }
223 
224 static int mlxsw_sp_ipip_fib_entry_op_gre4(struct mlxsw_sp *mlxsw_sp,
225 					struct mlxsw_sp_ipip_entry *ipip_entry,
226 					enum mlxsw_reg_ralue_op op,
227 					u32 tunnel_index)
228 {
229 	u16 ul_vr_id = mlxsw_sp_ipip_lb_ul_vr_id(ipip_entry->ol_lb);
230 	__be32 dip;
231 	int err;
232 
233 	err = mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(mlxsw_sp, tunnel_index,
234 						   ipip_entry);
235 	if (err)
236 		return err;
237 
238 	dip = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4,
239 					 ipip_entry->ol_dev).addr4;
240 	return mlxsw_sp_ipip_fib_entry_op_gre4_ralue(mlxsw_sp, be32_to_cpu(dip),
241 						     32, ul_vr_id, op,
242 						     tunnel_index);
243 }
244 
245 static bool mlxsw_sp_ipip_tunnel_complete(enum mlxsw_sp_l3proto proto,
246 					  const struct net_device *ol_dev)
247 {
248 	union mlxsw_sp_l3addr saddr = mlxsw_sp_ipip_netdev_saddr(proto, ol_dev);
249 	union mlxsw_sp_l3addr daddr = mlxsw_sp_ipip_netdev_daddr(proto, ol_dev);
250 
251 	/* Tunnels with unset local or remote address are valid in Linux and
252 	 * used for lightweight tunnels (LWT) and Non-Broadcast Multi-Access
253 	 * (NBMA) tunnels. In principle these can be offloaded, but the driver
254 	 * currently doesn't support this. So punt.
255 	 */
256 	return !mlxsw_sp_l3addr_is_zero(saddr) &&
257 	       !mlxsw_sp_l3addr_is_zero(daddr);
258 }
259 
260 static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp,
261 					   const struct net_device *ol_dev,
262 					   enum mlxsw_sp_l3proto ol_proto)
263 {
264 	struct ip_tunnel *tunnel = netdev_priv(ol_dev);
265 	__be16 okflags = TUNNEL_KEY; /* We can't offload any other features. */
266 	bool inherit_ttl = tunnel->parms.iph.ttl == 0;
267 	bool inherit_tos = tunnel->parms.iph.tos & 0x1;
268 
269 	return (tunnel->parms.i_flags & ~okflags) == 0 &&
270 	       (tunnel->parms.o_flags & ~okflags) == 0 &&
271 	       inherit_ttl && inherit_tos &&
272 	       mlxsw_sp_ipip_tunnel_complete(MLXSW_SP_L3_PROTO_IPV4, ol_dev);
273 }
274 
275 static struct mlxsw_sp_rif_ipip_lb_config
276 mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp,
277 				      const struct net_device *ol_dev)
278 {
279 	struct ip_tunnel_parm parms = mlxsw_sp_ipip_netdev_parms4(ol_dev);
280 	enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
281 
282 	lb_ipipt = mlxsw_sp_ipip_parms4_has_okey(parms) ?
283 		MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP :
284 		MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP;
285 	return (struct mlxsw_sp_rif_ipip_lb_config){
286 		.lb_ipipt = lb_ipipt,
287 		.okey = mlxsw_sp_ipip_parms4_okey(parms),
288 		.ul_protocol = MLXSW_SP_L3_PROTO_IPV4,
289 		.saddr = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4,
290 						    ol_dev),
291 	};
292 }
293 
294 static int
295 mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp,
296 				    struct mlxsw_sp_ipip_entry *ipip_entry,
297 				    struct netlink_ext_ack *extack)
298 {
299 	union mlxsw_sp_l3addr old_saddr, new_saddr;
300 	union mlxsw_sp_l3addr old_daddr, new_daddr;
301 	struct ip_tunnel_parm new_parms;
302 	bool update_tunnel = false;
303 	bool update_decap = false;
304 	bool update_nhs = false;
305 	int err = 0;
306 
307 	new_parms = mlxsw_sp_ipip_netdev_parms4(ipip_entry->ol_dev);
308 
309 	new_saddr = mlxsw_sp_ipip_parms4_saddr(new_parms);
310 	old_saddr = mlxsw_sp_ipip_parms4_saddr(ipip_entry->parms4);
311 	new_daddr = mlxsw_sp_ipip_parms4_daddr(new_parms);
312 	old_daddr = mlxsw_sp_ipip_parms4_daddr(ipip_entry->parms4);
313 
314 	if (!mlxsw_sp_l3addr_eq(&new_saddr, &old_saddr)) {
315 		u16 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
316 
317 		/* Since the local address has changed, if there is another
318 		 * tunnel with a matching saddr, both need to be demoted.
319 		 */
320 		if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp,
321 							 MLXSW_SP_L3_PROTO_IPV4,
322 							 new_saddr, ul_tb_id,
323 							 ipip_entry)) {
324 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
325 			return 0;
326 		}
327 
328 		update_tunnel = true;
329 	} else if ((mlxsw_sp_ipip_parms4_okey(ipip_entry->parms4) !=
330 		    mlxsw_sp_ipip_parms4_okey(new_parms)) ||
331 		   ipip_entry->parms4.link != new_parms.link) {
332 		update_tunnel = true;
333 	} else if (!mlxsw_sp_l3addr_eq(&new_daddr, &old_daddr)) {
334 		update_nhs = true;
335 	} else if (mlxsw_sp_ipip_parms4_ikey(ipip_entry->parms4) !=
336 		   mlxsw_sp_ipip_parms4_ikey(new_parms)) {
337 		update_decap = true;
338 	}
339 
340 	if (update_tunnel)
341 		err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
342 							  true, true, true,
343 							  extack);
344 	else if (update_nhs)
345 		err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
346 							  false, false, true,
347 							  extack);
348 	else if (update_decap)
349 		err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
350 							  false, false, false,
351 							  extack);
352 
353 	ipip_entry->parms4 = new_parms;
354 	return err;
355 }
356 
357 static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = {
358 	.dev_type = ARPHRD_IPGRE,
359 	.ul_proto = MLXSW_SP_L3_PROTO_IPV4,
360 	.nexthop_update = mlxsw_sp_ipip_nexthop_update_gre4,
361 	.fib_entry_op = mlxsw_sp_ipip_fib_entry_op_gre4,
362 	.can_offload = mlxsw_sp_ipip_can_offload_gre4,
363 	.ol_loopback_config = mlxsw_sp_ipip_ol_loopback_config_gre4,
364 	.ol_netdev_change = mlxsw_sp_ipip_ol_netdev_change_gre4,
365 };
366 
367 const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[] = {
368 	[MLXSW_SP_IPIP_TYPE_GRE4] = &mlxsw_sp_ipip_gre4_ops,
369 };
370