xref: /openbmc/linux/net/core/lwtunnel.c (revision 840d9a813c8eaa5c55d86525e374a97ca5023b53)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * lwtunnel	Infrastructure for light weight tunnels like mpls
4  *
5  * Authors:	Roopa Prabhu, <roopa@cumulusnetworks.com>
6  */
7 
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/lwtunnel.h>
17 #include <linux/in.h>
18 #include <linux/init.h>
19 #include <linux/err.h>
20 
21 #include <net/lwtunnel.h>
22 #include <net/rtnetlink.h>
23 #include <net/ip6_fib.h>
24 #include <net/rtnh.h>
25 
26 #include "dev.h"
27 
28 DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
29 EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled);
30 
31 #ifdef CONFIG_MODULES
32 
lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)33 static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
34 {
35 	/* Only lwt encaps implemented without using an interface for
36 	 * the encap need to return a string here.
37 	 */
38 	switch (encap_type) {
39 	case LWTUNNEL_ENCAP_MPLS:
40 		return "MPLS";
41 	case LWTUNNEL_ENCAP_ILA:
42 		return "ILA";
43 	case LWTUNNEL_ENCAP_SEG6:
44 		return "SEG6";
45 	case LWTUNNEL_ENCAP_BPF:
46 		return "BPF";
47 	case LWTUNNEL_ENCAP_SEG6_LOCAL:
48 		return "SEG6LOCAL";
49 	case LWTUNNEL_ENCAP_RPL:
50 		return "RPL";
51 	case LWTUNNEL_ENCAP_IOAM6:
52 		return "IOAM6";
53 	case LWTUNNEL_ENCAP_XFRM:
54 		/* module autoload not supported for encap type */
55 		return NULL;
56 	case LWTUNNEL_ENCAP_IP6:
57 	case LWTUNNEL_ENCAP_IP:
58 	case LWTUNNEL_ENCAP_NONE:
59 	case __LWTUNNEL_ENCAP_MAX:
60 		/* should not have got here */
61 		WARN_ON(1);
62 		break;
63 	}
64 	return NULL;
65 }
66 
67 #endif /* CONFIG_MODULES */
68 
lwtunnel_state_alloc(int encap_len)69 struct lwtunnel_state *lwtunnel_state_alloc(int encap_len)
70 {
71 	struct lwtunnel_state *lws;
72 
73 	lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC);
74 
75 	return lws;
76 }
77 EXPORT_SYMBOL_GPL(lwtunnel_state_alloc);
78 
79 static const struct lwtunnel_encap_ops __rcu *
80 		lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly;
81 
lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops * ops,unsigned int num)82 int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops,
83 			   unsigned int num)
84 {
85 	if (num > LWTUNNEL_ENCAP_MAX)
86 		return -ERANGE;
87 
88 	return !cmpxchg((const struct lwtunnel_encap_ops **)
89 			&lwtun_encaps[num],
90 			NULL, ops) ? 0 : -1;
91 }
92 EXPORT_SYMBOL_GPL(lwtunnel_encap_add_ops);
93 
lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops * ops,unsigned int encap_type)94 int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
95 			   unsigned int encap_type)
96 {
97 	int ret;
98 
99 	if (encap_type == LWTUNNEL_ENCAP_NONE ||
100 	    encap_type > LWTUNNEL_ENCAP_MAX)
101 		return -ERANGE;
102 
103 	ret = (cmpxchg((const struct lwtunnel_encap_ops **)
104 		       &lwtun_encaps[encap_type],
105 		       ops, NULL) == ops) ? 0 : -1;
106 
107 	synchronize_net();
108 
109 	return ret;
110 }
111 EXPORT_SYMBOL_GPL(lwtunnel_encap_del_ops);
112 
lwtunnel_build_state(struct net * net,u16 encap_type,struct nlattr * encap,unsigned int family,const void * cfg,struct lwtunnel_state ** lws,struct netlink_ext_ack * extack)113 int lwtunnel_build_state(struct net *net, u16 encap_type,
114 			 struct nlattr *encap, unsigned int family,
115 			 const void *cfg, struct lwtunnel_state **lws,
116 			 struct netlink_ext_ack *extack)
117 {
118 	const struct lwtunnel_encap_ops *ops;
119 	bool found = false;
120 	int ret = -EINVAL;
121 
122 	if (encap_type == LWTUNNEL_ENCAP_NONE ||
123 	    encap_type > LWTUNNEL_ENCAP_MAX) {
124 		NL_SET_ERR_MSG_ATTR(extack, encap,
125 				    "Unknown LWT encapsulation type");
126 		return ret;
127 	}
128 
129 	ret = -EOPNOTSUPP;
130 	rcu_read_lock();
131 	ops = rcu_dereference(lwtun_encaps[encap_type]);
132 	if (likely(ops && ops->build_state && try_module_get(ops->owner)))
133 		found = true;
134 	rcu_read_unlock();
135 
136 	if (found) {
137 		ret = ops->build_state(net, encap, family, cfg, lws, extack);
138 		if (ret)
139 			module_put(ops->owner);
140 	} else {
141 		/* don't rely on -EOPNOTSUPP to detect match as build_state
142 		 * handlers could return it
143 		 */
144 		NL_SET_ERR_MSG_ATTR(extack, encap,
145 				    "LWT encapsulation type not supported");
146 	}
147 
148 	return ret;
149 }
150 EXPORT_SYMBOL_GPL(lwtunnel_build_state);
151 
lwtunnel_valid_encap_type(u16 encap_type,struct netlink_ext_ack * extack)152 int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack)
153 {
154 	const struct lwtunnel_encap_ops *ops;
155 	int ret = -EINVAL;
156 
157 	if (encap_type == LWTUNNEL_ENCAP_NONE ||
158 	    encap_type > LWTUNNEL_ENCAP_MAX) {
159 		NL_SET_ERR_MSG(extack, "Unknown lwt encapsulation type");
160 		return ret;
161 	}
162 
163 	rcu_read_lock();
164 	ops = rcu_dereference(lwtun_encaps[encap_type]);
165 	rcu_read_unlock();
166 #ifdef CONFIG_MODULES
167 	if (!ops) {
168 		const char *encap_type_str = lwtunnel_encap_str(encap_type);
169 
170 		if (encap_type_str) {
171 			__rtnl_unlock();
172 			request_module("rtnl-lwt-%s", encap_type_str);
173 			rtnl_lock();
174 
175 			rcu_read_lock();
176 			ops = rcu_dereference(lwtun_encaps[encap_type]);
177 			rcu_read_unlock();
178 		}
179 	}
180 #endif
181 	ret = ops ? 0 : -EOPNOTSUPP;
182 	if (ret < 0)
183 		NL_SET_ERR_MSG(extack, "lwt encapsulation type not supported");
184 
185 	return ret;
186 }
187 EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type);
188 
lwtunnel_valid_encap_type_attr(struct nlattr * attr,int remaining,struct netlink_ext_ack * extack)189 int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining,
190 				   struct netlink_ext_ack *extack)
191 {
192 	struct rtnexthop *rtnh = (struct rtnexthop *)attr;
193 	struct nlattr *nla_entype;
194 	struct nlattr *attrs;
195 	u16 encap_type;
196 	int attrlen;
197 
198 	while (rtnh_ok(rtnh, remaining)) {
199 		attrlen = rtnh_attrlen(rtnh);
200 		if (attrlen > 0) {
201 			attrs = rtnh_attrs(rtnh);
202 			nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
203 
204 			if (nla_entype) {
205 				if (nla_len(nla_entype) < sizeof(u16)) {
206 					NL_SET_ERR_MSG(extack, "Invalid RTA_ENCAP_TYPE");
207 					return -EINVAL;
208 				}
209 				encap_type = nla_get_u16(nla_entype);
210 
211 				if (lwtunnel_valid_encap_type(encap_type,
212 							      extack) != 0)
213 					return -EOPNOTSUPP;
214 			}
215 		}
216 		rtnh = rtnh_next(rtnh, &remaining);
217 	}
218 
219 	return 0;
220 }
221 EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type_attr);
222 
lwtstate_free(struct lwtunnel_state * lws)223 void lwtstate_free(struct lwtunnel_state *lws)
224 {
225 	const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type];
226 
227 	if (ops->destroy_state) {
228 		ops->destroy_state(lws);
229 		kfree_rcu(lws, rcu);
230 	} else {
231 		kfree(lws);
232 	}
233 	module_put(ops->owner);
234 }
235 EXPORT_SYMBOL_GPL(lwtstate_free);
236 
lwtunnel_fill_encap(struct sk_buff * skb,struct lwtunnel_state * lwtstate,int encap_attr,int encap_type_attr)237 int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate,
238 			int encap_attr, int encap_type_attr)
239 {
240 	const struct lwtunnel_encap_ops *ops;
241 	struct nlattr *nest;
242 	int ret;
243 
244 	if (!lwtstate)
245 		return 0;
246 
247 	if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
248 	    lwtstate->type > LWTUNNEL_ENCAP_MAX)
249 		return 0;
250 
251 	nest = nla_nest_start_noflag(skb, encap_attr);
252 	if (!nest)
253 		return -EMSGSIZE;
254 
255 	ret = -EOPNOTSUPP;
256 	rcu_read_lock();
257 	ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
258 	if (likely(ops && ops->fill_encap))
259 		ret = ops->fill_encap(skb, lwtstate);
260 	rcu_read_unlock();
261 
262 	if (ret)
263 		goto nla_put_failure;
264 	nla_nest_end(skb, nest);
265 	ret = nla_put_u16(skb, encap_type_attr, lwtstate->type);
266 	if (ret)
267 		goto nla_put_failure;
268 
269 	return 0;
270 
271 nla_put_failure:
272 	nla_nest_cancel(skb, nest);
273 
274 	return (ret == -EOPNOTSUPP ? 0 : ret);
275 }
276 EXPORT_SYMBOL_GPL(lwtunnel_fill_encap);
277 
lwtunnel_get_encap_size(struct lwtunnel_state * lwtstate)278 int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
279 {
280 	const struct lwtunnel_encap_ops *ops;
281 	int ret = 0;
282 
283 	if (!lwtstate)
284 		return 0;
285 
286 	if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
287 	    lwtstate->type > LWTUNNEL_ENCAP_MAX)
288 		return 0;
289 
290 	rcu_read_lock();
291 	ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
292 	if (likely(ops && ops->get_encap_size))
293 		ret = nla_total_size(ops->get_encap_size(lwtstate));
294 	rcu_read_unlock();
295 
296 	return ret;
297 }
298 EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size);
299 
lwtunnel_cmp_encap(struct lwtunnel_state * a,struct lwtunnel_state * b)300 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
301 {
302 	const struct lwtunnel_encap_ops *ops;
303 	int ret = 0;
304 
305 	if (!a && !b)
306 		return 0;
307 
308 	if (!a || !b)
309 		return 1;
310 
311 	if (a->type != b->type)
312 		return 1;
313 
314 	if (a->type == LWTUNNEL_ENCAP_NONE ||
315 	    a->type > LWTUNNEL_ENCAP_MAX)
316 		return 0;
317 
318 	rcu_read_lock();
319 	ops = rcu_dereference(lwtun_encaps[a->type]);
320 	if (likely(ops && ops->cmp_encap))
321 		ret = ops->cmp_encap(a, b);
322 	rcu_read_unlock();
323 
324 	return ret;
325 }
326 EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap);
327 
lwtunnel_output(struct net * net,struct sock * sk,struct sk_buff * skb)328 int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
329 {
330 	const struct lwtunnel_encap_ops *ops;
331 	struct lwtunnel_state *lwtstate;
332 	struct dst_entry *dst;
333 	int ret;
334 
335 	if (dev_xmit_recursion()) {
336 		net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
337 				     __func__);
338 		ret = -ENETDOWN;
339 		goto drop;
340 	}
341 
342 	dst = skb_dst(skb);
343 	if (!dst) {
344 		ret = -EINVAL;
345 		goto drop;
346 	}
347 	lwtstate = dst->lwtstate;
348 
349 	if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
350 	    lwtstate->type > LWTUNNEL_ENCAP_MAX)
351 		return 0;
352 
353 	ret = -EOPNOTSUPP;
354 	rcu_read_lock();
355 	ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
356 	if (likely(ops && ops->output)) {
357 		dev_xmit_recursion_inc();
358 		ret = ops->output(net, sk, skb);
359 		dev_xmit_recursion_dec();
360 	}
361 	rcu_read_unlock();
362 
363 	if (ret == -EOPNOTSUPP)
364 		goto drop;
365 
366 	return ret;
367 
368 drop:
369 	kfree_skb(skb);
370 
371 	return ret;
372 }
373 EXPORT_SYMBOL_GPL(lwtunnel_output);
374 
lwtunnel_xmit(struct sk_buff * skb)375 int lwtunnel_xmit(struct sk_buff *skb)
376 {
377 	const struct lwtunnel_encap_ops *ops;
378 	struct lwtunnel_state *lwtstate;
379 	struct dst_entry *dst;
380 	int ret;
381 
382 	if (dev_xmit_recursion()) {
383 		net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
384 				     __func__);
385 		ret = -ENETDOWN;
386 		goto drop;
387 	}
388 
389 	dst = skb_dst(skb);
390 	if (!dst) {
391 		ret = -EINVAL;
392 		goto drop;
393 	}
394 
395 	lwtstate = dst->lwtstate;
396 
397 	if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
398 	    lwtstate->type > LWTUNNEL_ENCAP_MAX)
399 		return 0;
400 
401 	ret = -EOPNOTSUPP;
402 	rcu_read_lock();
403 	ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
404 	if (likely(ops && ops->xmit)) {
405 		dev_xmit_recursion_inc();
406 		ret = ops->xmit(skb);
407 		dev_xmit_recursion_dec();
408 	}
409 	rcu_read_unlock();
410 
411 	if (ret == -EOPNOTSUPP)
412 		goto drop;
413 
414 	return ret;
415 
416 drop:
417 	kfree_skb(skb);
418 
419 	return ret;
420 }
421 EXPORT_SYMBOL_GPL(lwtunnel_xmit);
422 
lwtunnel_input(struct sk_buff * skb)423 int lwtunnel_input(struct sk_buff *skb)
424 {
425 	const struct lwtunnel_encap_ops *ops;
426 	struct lwtunnel_state *lwtstate;
427 	struct dst_entry *dst;
428 	int ret;
429 
430 	if (dev_xmit_recursion()) {
431 		net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
432 				     __func__);
433 		ret = -ENETDOWN;
434 		goto drop;
435 	}
436 
437 	dst = skb_dst(skb);
438 	if (!dst) {
439 		ret = -EINVAL;
440 		goto drop;
441 	}
442 	lwtstate = dst->lwtstate;
443 
444 	if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
445 	    lwtstate->type > LWTUNNEL_ENCAP_MAX)
446 		return 0;
447 
448 	ret = -EOPNOTSUPP;
449 	rcu_read_lock();
450 	ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
451 	if (likely(ops && ops->input)) {
452 		dev_xmit_recursion_inc();
453 		ret = ops->input(skb);
454 		dev_xmit_recursion_dec();
455 	}
456 	rcu_read_unlock();
457 
458 	if (ret == -EOPNOTSUPP)
459 		goto drop;
460 
461 	return ret;
462 
463 drop:
464 	kfree_skb(skb);
465 
466 	return ret;
467 }
468 EXPORT_SYMBOL_GPL(lwtunnel_input);
469