1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * lwtunnel Infrastructure for light weight tunnels like mpls
4 *
5 * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com>
6 */
7
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/lwtunnel.h>
17 #include <linux/in.h>
18 #include <linux/init.h>
19 #include <linux/err.h>
20
21 #include <net/lwtunnel.h>
22 #include <net/rtnetlink.h>
23 #include <net/ip6_fib.h>
24 #include <net/rtnh.h>
25
26 #include "dev.h"
27
28 DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
29 EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled);
30
31 #ifdef CONFIG_MODULES
32
lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)33 static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
34 {
35 /* Only lwt encaps implemented without using an interface for
36 * the encap need to return a string here.
37 */
38 switch (encap_type) {
39 case LWTUNNEL_ENCAP_MPLS:
40 return "MPLS";
41 case LWTUNNEL_ENCAP_ILA:
42 return "ILA";
43 case LWTUNNEL_ENCAP_SEG6:
44 return "SEG6";
45 case LWTUNNEL_ENCAP_BPF:
46 return "BPF";
47 case LWTUNNEL_ENCAP_SEG6_LOCAL:
48 return "SEG6LOCAL";
49 case LWTUNNEL_ENCAP_RPL:
50 return "RPL";
51 case LWTUNNEL_ENCAP_IOAM6:
52 return "IOAM6";
53 case LWTUNNEL_ENCAP_XFRM:
54 /* module autoload not supported for encap type */
55 return NULL;
56 case LWTUNNEL_ENCAP_IP6:
57 case LWTUNNEL_ENCAP_IP:
58 case LWTUNNEL_ENCAP_NONE:
59 case __LWTUNNEL_ENCAP_MAX:
60 /* should not have got here */
61 WARN_ON(1);
62 break;
63 }
64 return NULL;
65 }
66
67 #endif /* CONFIG_MODULES */
68
lwtunnel_state_alloc(int encap_len)69 struct lwtunnel_state *lwtunnel_state_alloc(int encap_len)
70 {
71 struct lwtunnel_state *lws;
72
73 lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC);
74
75 return lws;
76 }
77 EXPORT_SYMBOL_GPL(lwtunnel_state_alloc);
78
79 static const struct lwtunnel_encap_ops __rcu *
80 lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly;
81
lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops * ops,unsigned int num)82 int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops,
83 unsigned int num)
84 {
85 if (num > LWTUNNEL_ENCAP_MAX)
86 return -ERANGE;
87
88 return !cmpxchg((const struct lwtunnel_encap_ops **)
89 &lwtun_encaps[num],
90 NULL, ops) ? 0 : -1;
91 }
92 EXPORT_SYMBOL_GPL(lwtunnel_encap_add_ops);
93
lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops * ops,unsigned int encap_type)94 int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
95 unsigned int encap_type)
96 {
97 int ret;
98
99 if (encap_type == LWTUNNEL_ENCAP_NONE ||
100 encap_type > LWTUNNEL_ENCAP_MAX)
101 return -ERANGE;
102
103 ret = (cmpxchg((const struct lwtunnel_encap_ops **)
104 &lwtun_encaps[encap_type],
105 ops, NULL) == ops) ? 0 : -1;
106
107 synchronize_net();
108
109 return ret;
110 }
111 EXPORT_SYMBOL_GPL(lwtunnel_encap_del_ops);
112
lwtunnel_build_state(struct net * net,u16 encap_type,struct nlattr * encap,unsigned int family,const void * cfg,struct lwtunnel_state ** lws,struct netlink_ext_ack * extack)113 int lwtunnel_build_state(struct net *net, u16 encap_type,
114 struct nlattr *encap, unsigned int family,
115 const void *cfg, struct lwtunnel_state **lws,
116 struct netlink_ext_ack *extack)
117 {
118 const struct lwtunnel_encap_ops *ops;
119 bool found = false;
120 int ret = -EINVAL;
121
122 if (encap_type == LWTUNNEL_ENCAP_NONE ||
123 encap_type > LWTUNNEL_ENCAP_MAX) {
124 NL_SET_ERR_MSG_ATTR(extack, encap,
125 "Unknown LWT encapsulation type");
126 return ret;
127 }
128
129 ret = -EOPNOTSUPP;
130 rcu_read_lock();
131 ops = rcu_dereference(lwtun_encaps[encap_type]);
132 if (likely(ops && ops->build_state && try_module_get(ops->owner)))
133 found = true;
134 rcu_read_unlock();
135
136 if (found) {
137 ret = ops->build_state(net, encap, family, cfg, lws, extack);
138 if (ret)
139 module_put(ops->owner);
140 } else {
141 /* don't rely on -EOPNOTSUPP to detect match as build_state
142 * handlers could return it
143 */
144 NL_SET_ERR_MSG_ATTR(extack, encap,
145 "LWT encapsulation type not supported");
146 }
147
148 return ret;
149 }
150 EXPORT_SYMBOL_GPL(lwtunnel_build_state);
151
lwtunnel_valid_encap_type(u16 encap_type,struct netlink_ext_ack * extack)152 int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack)
153 {
154 const struct lwtunnel_encap_ops *ops;
155 int ret = -EINVAL;
156
157 if (encap_type == LWTUNNEL_ENCAP_NONE ||
158 encap_type > LWTUNNEL_ENCAP_MAX) {
159 NL_SET_ERR_MSG(extack, "Unknown lwt encapsulation type");
160 return ret;
161 }
162
163 rcu_read_lock();
164 ops = rcu_dereference(lwtun_encaps[encap_type]);
165 rcu_read_unlock();
166 #ifdef CONFIG_MODULES
167 if (!ops) {
168 const char *encap_type_str = lwtunnel_encap_str(encap_type);
169
170 if (encap_type_str) {
171 __rtnl_unlock();
172 request_module("rtnl-lwt-%s", encap_type_str);
173 rtnl_lock();
174
175 rcu_read_lock();
176 ops = rcu_dereference(lwtun_encaps[encap_type]);
177 rcu_read_unlock();
178 }
179 }
180 #endif
181 ret = ops ? 0 : -EOPNOTSUPP;
182 if (ret < 0)
183 NL_SET_ERR_MSG(extack, "lwt encapsulation type not supported");
184
185 return ret;
186 }
187 EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type);
188
lwtunnel_valid_encap_type_attr(struct nlattr * attr,int remaining,struct netlink_ext_ack * extack)189 int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining,
190 struct netlink_ext_ack *extack)
191 {
192 struct rtnexthop *rtnh = (struct rtnexthop *)attr;
193 struct nlattr *nla_entype;
194 struct nlattr *attrs;
195 u16 encap_type;
196 int attrlen;
197
198 while (rtnh_ok(rtnh, remaining)) {
199 attrlen = rtnh_attrlen(rtnh);
200 if (attrlen > 0) {
201 attrs = rtnh_attrs(rtnh);
202 nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
203
204 if (nla_entype) {
205 if (nla_len(nla_entype) < sizeof(u16)) {
206 NL_SET_ERR_MSG(extack, "Invalid RTA_ENCAP_TYPE");
207 return -EINVAL;
208 }
209 encap_type = nla_get_u16(nla_entype);
210
211 if (lwtunnel_valid_encap_type(encap_type,
212 extack) != 0)
213 return -EOPNOTSUPP;
214 }
215 }
216 rtnh = rtnh_next(rtnh, &remaining);
217 }
218
219 return 0;
220 }
221 EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type_attr);
222
lwtstate_free(struct lwtunnel_state * lws)223 void lwtstate_free(struct lwtunnel_state *lws)
224 {
225 const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type];
226
227 if (ops->destroy_state) {
228 ops->destroy_state(lws);
229 kfree_rcu(lws, rcu);
230 } else {
231 kfree(lws);
232 }
233 module_put(ops->owner);
234 }
235 EXPORT_SYMBOL_GPL(lwtstate_free);
236
lwtunnel_fill_encap(struct sk_buff * skb,struct lwtunnel_state * lwtstate,int encap_attr,int encap_type_attr)237 int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate,
238 int encap_attr, int encap_type_attr)
239 {
240 const struct lwtunnel_encap_ops *ops;
241 struct nlattr *nest;
242 int ret;
243
244 if (!lwtstate)
245 return 0;
246
247 if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
248 lwtstate->type > LWTUNNEL_ENCAP_MAX)
249 return 0;
250
251 nest = nla_nest_start_noflag(skb, encap_attr);
252 if (!nest)
253 return -EMSGSIZE;
254
255 ret = -EOPNOTSUPP;
256 rcu_read_lock();
257 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
258 if (likely(ops && ops->fill_encap))
259 ret = ops->fill_encap(skb, lwtstate);
260 rcu_read_unlock();
261
262 if (ret)
263 goto nla_put_failure;
264 nla_nest_end(skb, nest);
265 ret = nla_put_u16(skb, encap_type_attr, lwtstate->type);
266 if (ret)
267 goto nla_put_failure;
268
269 return 0;
270
271 nla_put_failure:
272 nla_nest_cancel(skb, nest);
273
274 return (ret == -EOPNOTSUPP ? 0 : ret);
275 }
276 EXPORT_SYMBOL_GPL(lwtunnel_fill_encap);
277
lwtunnel_get_encap_size(struct lwtunnel_state * lwtstate)278 int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
279 {
280 const struct lwtunnel_encap_ops *ops;
281 int ret = 0;
282
283 if (!lwtstate)
284 return 0;
285
286 if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
287 lwtstate->type > LWTUNNEL_ENCAP_MAX)
288 return 0;
289
290 rcu_read_lock();
291 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
292 if (likely(ops && ops->get_encap_size))
293 ret = nla_total_size(ops->get_encap_size(lwtstate));
294 rcu_read_unlock();
295
296 return ret;
297 }
298 EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size);
299
lwtunnel_cmp_encap(struct lwtunnel_state * a,struct lwtunnel_state * b)300 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
301 {
302 const struct lwtunnel_encap_ops *ops;
303 int ret = 0;
304
305 if (!a && !b)
306 return 0;
307
308 if (!a || !b)
309 return 1;
310
311 if (a->type != b->type)
312 return 1;
313
314 if (a->type == LWTUNNEL_ENCAP_NONE ||
315 a->type > LWTUNNEL_ENCAP_MAX)
316 return 0;
317
318 rcu_read_lock();
319 ops = rcu_dereference(lwtun_encaps[a->type]);
320 if (likely(ops && ops->cmp_encap))
321 ret = ops->cmp_encap(a, b);
322 rcu_read_unlock();
323
324 return ret;
325 }
326 EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap);
327
lwtunnel_output(struct net * net,struct sock * sk,struct sk_buff * skb)328 int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
329 {
330 const struct lwtunnel_encap_ops *ops;
331 struct lwtunnel_state *lwtstate;
332 struct dst_entry *dst;
333 int ret;
334
335 if (dev_xmit_recursion()) {
336 net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
337 __func__);
338 ret = -ENETDOWN;
339 goto drop;
340 }
341
342 dst = skb_dst(skb);
343 if (!dst) {
344 ret = -EINVAL;
345 goto drop;
346 }
347 lwtstate = dst->lwtstate;
348
349 if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
350 lwtstate->type > LWTUNNEL_ENCAP_MAX)
351 return 0;
352
353 ret = -EOPNOTSUPP;
354 rcu_read_lock();
355 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
356 if (likely(ops && ops->output)) {
357 dev_xmit_recursion_inc();
358 ret = ops->output(net, sk, skb);
359 dev_xmit_recursion_dec();
360 }
361 rcu_read_unlock();
362
363 if (ret == -EOPNOTSUPP)
364 goto drop;
365
366 return ret;
367
368 drop:
369 kfree_skb(skb);
370
371 return ret;
372 }
373 EXPORT_SYMBOL_GPL(lwtunnel_output);
374
lwtunnel_xmit(struct sk_buff * skb)375 int lwtunnel_xmit(struct sk_buff *skb)
376 {
377 const struct lwtunnel_encap_ops *ops;
378 struct lwtunnel_state *lwtstate;
379 struct dst_entry *dst;
380 int ret;
381
382 if (dev_xmit_recursion()) {
383 net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
384 __func__);
385 ret = -ENETDOWN;
386 goto drop;
387 }
388
389 dst = skb_dst(skb);
390 if (!dst) {
391 ret = -EINVAL;
392 goto drop;
393 }
394
395 lwtstate = dst->lwtstate;
396
397 if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
398 lwtstate->type > LWTUNNEL_ENCAP_MAX)
399 return 0;
400
401 ret = -EOPNOTSUPP;
402 rcu_read_lock();
403 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
404 if (likely(ops && ops->xmit)) {
405 dev_xmit_recursion_inc();
406 ret = ops->xmit(skb);
407 dev_xmit_recursion_dec();
408 }
409 rcu_read_unlock();
410
411 if (ret == -EOPNOTSUPP)
412 goto drop;
413
414 return ret;
415
416 drop:
417 kfree_skb(skb);
418
419 return ret;
420 }
421 EXPORT_SYMBOL_GPL(lwtunnel_xmit);
422
lwtunnel_input(struct sk_buff * skb)423 int lwtunnel_input(struct sk_buff *skb)
424 {
425 const struct lwtunnel_encap_ops *ops;
426 struct lwtunnel_state *lwtstate;
427 struct dst_entry *dst;
428 int ret;
429
430 if (dev_xmit_recursion()) {
431 net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
432 __func__);
433 ret = -ENETDOWN;
434 goto drop;
435 }
436
437 dst = skb_dst(skb);
438 if (!dst) {
439 ret = -EINVAL;
440 goto drop;
441 }
442 lwtstate = dst->lwtstate;
443
444 if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
445 lwtstate->type > LWTUNNEL_ENCAP_MAX)
446 return 0;
447
448 ret = -EOPNOTSUPP;
449 rcu_read_lock();
450 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
451 if (likely(ops && ops->input)) {
452 dev_xmit_recursion_inc();
453 ret = ops->input(skb);
454 dev_xmit_recursion_dec();
455 }
456 rcu_read_unlock();
457
458 if (ret == -EOPNOTSUPP)
459 goto drop;
460
461 return ret;
462
463 drop:
464 kfree_skb(skb);
465
466 return ret;
467 }
468 EXPORT_SYMBOL_GPL(lwtunnel_input);
469