1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * lwtunnel Infrastructure for light weight tunnels like mpls 4 * 5 * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com> 6 */ 7 8 #include <linux/capability.h> 9 #include <linux/module.h> 10 #include <linux/types.h> 11 #include <linux/kernel.h> 12 #include <linux/slab.h> 13 #include <linux/uaccess.h> 14 #include <linux/skbuff.h> 15 #include <linux/netdevice.h> 16 #include <linux/lwtunnel.h> 17 #include <linux/in.h> 18 #include <linux/init.h> 19 #include <linux/err.h> 20 21 #include <net/lwtunnel.h> 22 #include <net/rtnetlink.h> 23 #include <net/ip6_fib.h> 24 #include <net/rtnh.h> 25 26 #include "dev.h" 27 28 DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled); 29 EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled); 30 31 #ifdef CONFIG_MODULES 32 33 static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) 34 { 35 /* Only lwt encaps implemented without using an interface for 36 * the encap need to return a string here. 37 */ 38 switch (encap_type) { 39 case LWTUNNEL_ENCAP_MPLS: 40 return "MPLS"; 41 case LWTUNNEL_ENCAP_ILA: 42 return "ILA"; 43 case LWTUNNEL_ENCAP_SEG6: 44 return "SEG6"; 45 case LWTUNNEL_ENCAP_BPF: 46 return "BPF"; 47 case LWTUNNEL_ENCAP_SEG6_LOCAL: 48 return "SEG6LOCAL"; 49 case LWTUNNEL_ENCAP_RPL: 50 return "RPL"; 51 case LWTUNNEL_ENCAP_IOAM6: 52 return "IOAM6"; 53 case LWTUNNEL_ENCAP_XFRM: 54 /* module autoload not supported for encap type */ 55 return NULL; 56 case LWTUNNEL_ENCAP_IP6: 57 case LWTUNNEL_ENCAP_IP: 58 case LWTUNNEL_ENCAP_NONE: 59 case __LWTUNNEL_ENCAP_MAX: 60 /* should not have got here */ 61 WARN_ON(1); 62 break; 63 } 64 return NULL; 65 } 66 67 #endif /* CONFIG_MODULES */ 68 69 struct lwtunnel_state *lwtunnel_state_alloc(int encap_len) 70 { 71 struct lwtunnel_state *lws; 72 73 lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC); 74 75 return lws; 76 } 77 EXPORT_SYMBOL_GPL(lwtunnel_state_alloc); 78 79 static const struct lwtunnel_encap_ops __rcu * 80 lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly; 81 82 int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops, 83 unsigned int num) 84 { 85 if (num > LWTUNNEL_ENCAP_MAX) 86 return -ERANGE; 87 88 return !cmpxchg((const struct lwtunnel_encap_ops **) 89 &lwtun_encaps[num], 90 NULL, ops) ? 0 : -1; 91 } 92 EXPORT_SYMBOL_GPL(lwtunnel_encap_add_ops); 93 94 int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, 95 unsigned int encap_type) 96 { 97 int ret; 98 99 if (encap_type == LWTUNNEL_ENCAP_NONE || 100 encap_type > LWTUNNEL_ENCAP_MAX) 101 return -ERANGE; 102 103 ret = (cmpxchg((const struct lwtunnel_encap_ops **) 104 &lwtun_encaps[encap_type], 105 ops, NULL) == ops) ? 0 : -1; 106 107 synchronize_net(); 108 109 return ret; 110 } 111 EXPORT_SYMBOL_GPL(lwtunnel_encap_del_ops); 112 113 int lwtunnel_build_state(struct net *net, u16 encap_type, 114 struct nlattr *encap, unsigned int family, 115 const void *cfg, struct lwtunnel_state **lws, 116 struct netlink_ext_ack *extack) 117 { 118 const struct lwtunnel_encap_ops *ops; 119 bool found = false; 120 int ret = -EINVAL; 121 122 if (encap_type == LWTUNNEL_ENCAP_NONE || 123 encap_type > LWTUNNEL_ENCAP_MAX) { 124 NL_SET_ERR_MSG_ATTR(extack, encap, 125 "Unknown LWT encapsulation type"); 126 return ret; 127 } 128 129 ret = -EOPNOTSUPP; 130 rcu_read_lock(); 131 ops = rcu_dereference(lwtun_encaps[encap_type]); 132 if (likely(ops && ops->build_state && try_module_get(ops->owner))) 133 found = true; 134 rcu_read_unlock(); 135 136 if (found) { 137 ret = ops->build_state(net, encap, family, cfg, lws, extack); 138 if (ret) 139 module_put(ops->owner); 140 } else { 141 /* don't rely on -EOPNOTSUPP to detect match as build_state 142 * handlers could return it 143 */ 144 NL_SET_ERR_MSG_ATTR(extack, encap, 145 "LWT encapsulation type not supported"); 146 } 147 148 return ret; 149 } 150 EXPORT_SYMBOL_GPL(lwtunnel_build_state); 151 152 int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack) 153 { 154 const struct lwtunnel_encap_ops *ops; 155 int ret = -EINVAL; 156 157 if (encap_type == LWTUNNEL_ENCAP_NONE || 158 encap_type > LWTUNNEL_ENCAP_MAX) { 159 NL_SET_ERR_MSG(extack, "Unknown lwt encapsulation type"); 160 return ret; 161 } 162 163 rcu_read_lock(); 164 ops = rcu_dereference(lwtun_encaps[encap_type]); 165 rcu_read_unlock(); 166 #ifdef CONFIG_MODULES 167 if (!ops) { 168 const char *encap_type_str = lwtunnel_encap_str(encap_type); 169 170 if (encap_type_str) { 171 __rtnl_unlock(); 172 request_module("rtnl-lwt-%s", encap_type_str); 173 rtnl_lock(); 174 175 rcu_read_lock(); 176 ops = rcu_dereference(lwtun_encaps[encap_type]); 177 rcu_read_unlock(); 178 } 179 } 180 #endif 181 ret = ops ? 0 : -EOPNOTSUPP; 182 if (ret < 0) 183 NL_SET_ERR_MSG(extack, "lwt encapsulation type not supported"); 184 185 return ret; 186 } 187 EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type); 188 189 int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, 190 struct netlink_ext_ack *extack) 191 { 192 struct rtnexthop *rtnh = (struct rtnexthop *)attr; 193 struct nlattr *nla_entype; 194 struct nlattr *attrs; 195 u16 encap_type; 196 int attrlen; 197 198 while (rtnh_ok(rtnh, remaining)) { 199 attrlen = rtnh_attrlen(rtnh); 200 if (attrlen > 0) { 201 attrs = rtnh_attrs(rtnh); 202 nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 203 204 if (nla_entype) { 205 if (nla_len(nla_entype) < sizeof(u16)) { 206 NL_SET_ERR_MSG(extack, "Invalid RTA_ENCAP_TYPE"); 207 return -EINVAL; 208 } 209 encap_type = nla_get_u16(nla_entype); 210 211 if (lwtunnel_valid_encap_type(encap_type, 212 extack) != 0) 213 return -EOPNOTSUPP; 214 } 215 } 216 rtnh = rtnh_next(rtnh, &remaining); 217 } 218 219 return 0; 220 } 221 EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type_attr); 222 223 void lwtstate_free(struct lwtunnel_state *lws) 224 { 225 const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type]; 226 227 if (ops->destroy_state) { 228 ops->destroy_state(lws); 229 kfree_rcu(lws, rcu); 230 } else { 231 kfree(lws); 232 } 233 module_put(ops->owner); 234 } 235 EXPORT_SYMBOL_GPL(lwtstate_free); 236 237 int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate, 238 int encap_attr, int encap_type_attr) 239 { 240 const struct lwtunnel_encap_ops *ops; 241 struct nlattr *nest; 242 int ret; 243 244 if (!lwtstate) 245 return 0; 246 247 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 248 lwtstate->type > LWTUNNEL_ENCAP_MAX) 249 return 0; 250 251 nest = nla_nest_start_noflag(skb, encap_attr); 252 if (!nest) 253 return -EMSGSIZE; 254 255 ret = -EOPNOTSUPP; 256 rcu_read_lock(); 257 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 258 if (likely(ops && ops->fill_encap)) 259 ret = ops->fill_encap(skb, lwtstate); 260 rcu_read_unlock(); 261 262 if (ret) 263 goto nla_put_failure; 264 nla_nest_end(skb, nest); 265 ret = nla_put_u16(skb, encap_type_attr, lwtstate->type); 266 if (ret) 267 goto nla_put_failure; 268 269 return 0; 270 271 nla_put_failure: 272 nla_nest_cancel(skb, nest); 273 274 return (ret == -EOPNOTSUPP ? 0 : ret); 275 } 276 EXPORT_SYMBOL_GPL(lwtunnel_fill_encap); 277 278 int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) 279 { 280 const struct lwtunnel_encap_ops *ops; 281 int ret = 0; 282 283 if (!lwtstate) 284 return 0; 285 286 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 287 lwtstate->type > LWTUNNEL_ENCAP_MAX) 288 return 0; 289 290 rcu_read_lock(); 291 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 292 if (likely(ops && ops->get_encap_size)) 293 ret = nla_total_size(ops->get_encap_size(lwtstate)); 294 rcu_read_unlock(); 295 296 return ret; 297 } 298 EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size); 299 300 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) 301 { 302 const struct lwtunnel_encap_ops *ops; 303 int ret = 0; 304 305 if (!a && !b) 306 return 0; 307 308 if (!a || !b) 309 return 1; 310 311 if (a->type != b->type) 312 return 1; 313 314 if (a->type == LWTUNNEL_ENCAP_NONE || 315 a->type > LWTUNNEL_ENCAP_MAX) 316 return 0; 317 318 rcu_read_lock(); 319 ops = rcu_dereference(lwtun_encaps[a->type]); 320 if (likely(ops && ops->cmp_encap)) 321 ret = ops->cmp_encap(a, b); 322 rcu_read_unlock(); 323 324 return ret; 325 } 326 EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap); 327 328 int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) 329 { 330 const struct lwtunnel_encap_ops *ops; 331 struct lwtunnel_state *lwtstate; 332 struct dst_entry *dst; 333 int ret; 334 335 if (dev_xmit_recursion()) { 336 net_crit_ratelimited("%s(): recursion limit reached on datapath\n", 337 __func__); 338 ret = -ENETDOWN; 339 goto drop; 340 } 341 342 dst = skb_dst(skb); 343 if (!dst) { 344 ret = -EINVAL; 345 goto drop; 346 } 347 lwtstate = dst->lwtstate; 348 349 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 350 lwtstate->type > LWTUNNEL_ENCAP_MAX) 351 return 0; 352 353 ret = -EOPNOTSUPP; 354 rcu_read_lock(); 355 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 356 if (likely(ops && ops->output)) { 357 dev_xmit_recursion_inc(); 358 ret = ops->output(net, sk, skb); 359 dev_xmit_recursion_dec(); 360 } 361 rcu_read_unlock(); 362 363 if (ret == -EOPNOTSUPP) 364 goto drop; 365 366 return ret; 367 368 drop: 369 kfree_skb(skb); 370 371 return ret; 372 } 373 EXPORT_SYMBOL_GPL(lwtunnel_output); 374 375 int lwtunnel_xmit(struct sk_buff *skb) 376 { 377 const struct lwtunnel_encap_ops *ops; 378 struct lwtunnel_state *lwtstate; 379 struct dst_entry *dst; 380 int ret; 381 382 if (dev_xmit_recursion()) { 383 net_crit_ratelimited("%s(): recursion limit reached on datapath\n", 384 __func__); 385 ret = -ENETDOWN; 386 goto drop; 387 } 388 389 dst = skb_dst(skb); 390 if (!dst) { 391 ret = -EINVAL; 392 goto drop; 393 } 394 395 lwtstate = dst->lwtstate; 396 397 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 398 lwtstate->type > LWTUNNEL_ENCAP_MAX) 399 return 0; 400 401 ret = -EOPNOTSUPP; 402 rcu_read_lock(); 403 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 404 if (likely(ops && ops->xmit)) { 405 dev_xmit_recursion_inc(); 406 ret = ops->xmit(skb); 407 dev_xmit_recursion_dec(); 408 } 409 rcu_read_unlock(); 410 411 if (ret == -EOPNOTSUPP) 412 goto drop; 413 414 return ret; 415 416 drop: 417 kfree_skb(skb); 418 419 return ret; 420 } 421 EXPORT_SYMBOL_GPL(lwtunnel_xmit); 422 423 int lwtunnel_input(struct sk_buff *skb) 424 { 425 const struct lwtunnel_encap_ops *ops; 426 struct lwtunnel_state *lwtstate; 427 struct dst_entry *dst; 428 int ret; 429 430 if (dev_xmit_recursion()) { 431 net_crit_ratelimited("%s(): recursion limit reached on datapath\n", 432 __func__); 433 ret = -ENETDOWN; 434 goto drop; 435 } 436 437 dst = skb_dst(skb); 438 if (!dst) { 439 ret = -EINVAL; 440 goto drop; 441 } 442 lwtstate = dst->lwtstate; 443 444 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 445 lwtstate->type > LWTUNNEL_ENCAP_MAX) 446 return 0; 447 448 ret = -EOPNOTSUPP; 449 rcu_read_lock(); 450 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 451 if (likely(ops && ops->input)) { 452 dev_xmit_recursion_inc(); 453 ret = ops->input(skb); 454 dev_xmit_recursion_dec(); 455 } 456 rcu_read_unlock(); 457 458 if (ret == -EOPNOTSUPP) 459 goto drop; 460 461 return ret; 462 463 drop: 464 kfree_skb(skb); 465 466 return ret; 467 } 468 EXPORT_SYMBOL_GPL(lwtunnel_input); 469