1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * lwtunnel Infrastructure for light weight tunnels like mpls 4 * 5 * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com> 6 */ 7 8 #include <linux/capability.h> 9 #include <linux/module.h> 10 #include <linux/types.h> 11 #include <linux/kernel.h> 12 #include <linux/slab.h> 13 #include <linux/uaccess.h> 14 #include <linux/skbuff.h> 15 #include <linux/netdevice.h> 16 #include <linux/lwtunnel.h> 17 #include <linux/in.h> 18 #include <linux/init.h> 19 #include <linux/err.h> 20 21 #include <net/lwtunnel.h> 22 #include <net/rtnetlink.h> 23 #include <net/ip6_fib.h> 24 #include <net/rtnh.h> 25 26 DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled); 27 EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled); 28 29 #ifdef CONFIG_MODULES 30 31 static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) 32 { 33 /* Only lwt encaps implemented without using an interface for 34 * the encap need to return a string here. 35 */ 36 switch (encap_type) { 37 case LWTUNNEL_ENCAP_MPLS: 38 return "MPLS"; 39 case LWTUNNEL_ENCAP_ILA: 40 return "ILA"; 41 case LWTUNNEL_ENCAP_SEG6: 42 return "SEG6"; 43 case LWTUNNEL_ENCAP_BPF: 44 return "BPF"; 45 case LWTUNNEL_ENCAP_SEG6_LOCAL: 46 return "SEG6LOCAL"; 47 case LWTUNNEL_ENCAP_RPL: 48 return "RPL"; 49 case LWTUNNEL_ENCAP_IOAM6: 50 return "IOAM6"; 51 case LWTUNNEL_ENCAP_IP6: 52 case LWTUNNEL_ENCAP_IP: 53 case LWTUNNEL_ENCAP_NONE: 54 case __LWTUNNEL_ENCAP_MAX: 55 /* should not have got here */ 56 WARN_ON(1); 57 break; 58 } 59 return NULL; 60 } 61 62 #endif /* CONFIG_MODULES */ 63 64 struct lwtunnel_state *lwtunnel_state_alloc(int encap_len) 65 { 66 struct lwtunnel_state *lws; 67 68 lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC); 69 70 return lws; 71 } 72 EXPORT_SYMBOL_GPL(lwtunnel_state_alloc); 73 74 static const struct lwtunnel_encap_ops __rcu * 75 lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly; 76 77 int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops, 78 unsigned int num) 79 { 80 if (num > LWTUNNEL_ENCAP_MAX) 81 return -ERANGE; 82 83 return !cmpxchg((const struct lwtunnel_encap_ops **) 84 &lwtun_encaps[num], 85 NULL, ops) ? 0 : -1; 86 } 87 EXPORT_SYMBOL_GPL(lwtunnel_encap_add_ops); 88 89 int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, 90 unsigned int encap_type) 91 { 92 int ret; 93 94 if (encap_type == LWTUNNEL_ENCAP_NONE || 95 encap_type > LWTUNNEL_ENCAP_MAX) 96 return -ERANGE; 97 98 ret = (cmpxchg((const struct lwtunnel_encap_ops **) 99 &lwtun_encaps[encap_type], 100 ops, NULL) == ops) ? 0 : -1; 101 102 synchronize_net(); 103 104 return ret; 105 } 106 EXPORT_SYMBOL_GPL(lwtunnel_encap_del_ops); 107 108 int lwtunnel_build_state(struct net *net, u16 encap_type, 109 struct nlattr *encap, unsigned int family, 110 const void *cfg, struct lwtunnel_state **lws, 111 struct netlink_ext_ack *extack) 112 { 113 const struct lwtunnel_encap_ops *ops; 114 bool found = false; 115 int ret = -EINVAL; 116 117 if (encap_type == LWTUNNEL_ENCAP_NONE || 118 encap_type > LWTUNNEL_ENCAP_MAX) { 119 NL_SET_ERR_MSG_ATTR(extack, encap, 120 "Unknown LWT encapsulation type"); 121 return ret; 122 } 123 124 ret = -EOPNOTSUPP; 125 rcu_read_lock(); 126 ops = rcu_dereference(lwtun_encaps[encap_type]); 127 if (likely(ops && ops->build_state && try_module_get(ops->owner))) 128 found = true; 129 rcu_read_unlock(); 130 131 if (found) { 132 ret = ops->build_state(net, encap, family, cfg, lws, extack); 133 if (ret) 134 module_put(ops->owner); 135 } else { 136 /* don't rely on -EOPNOTSUPP to detect match as build_state 137 * handlers could return it 138 */ 139 NL_SET_ERR_MSG_ATTR(extack, encap, 140 "LWT encapsulation type not supported"); 141 } 142 143 return ret; 144 } 145 EXPORT_SYMBOL_GPL(lwtunnel_build_state); 146 147 int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack) 148 { 149 const struct lwtunnel_encap_ops *ops; 150 int ret = -EINVAL; 151 152 if (encap_type == LWTUNNEL_ENCAP_NONE || 153 encap_type > LWTUNNEL_ENCAP_MAX) { 154 NL_SET_ERR_MSG(extack, "Unknown lwt encapsulation type"); 155 return ret; 156 } 157 158 rcu_read_lock(); 159 ops = rcu_dereference(lwtun_encaps[encap_type]); 160 rcu_read_unlock(); 161 #ifdef CONFIG_MODULES 162 if (!ops) { 163 const char *encap_type_str = lwtunnel_encap_str(encap_type); 164 165 if (encap_type_str) { 166 __rtnl_unlock(); 167 request_module("rtnl-lwt-%s", encap_type_str); 168 rtnl_lock(); 169 170 rcu_read_lock(); 171 ops = rcu_dereference(lwtun_encaps[encap_type]); 172 rcu_read_unlock(); 173 } 174 } 175 #endif 176 ret = ops ? 0 : -EOPNOTSUPP; 177 if (ret < 0) 178 NL_SET_ERR_MSG(extack, "lwt encapsulation type not supported"); 179 180 return ret; 181 } 182 EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type); 183 184 int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, 185 struct netlink_ext_ack *extack) 186 { 187 struct rtnexthop *rtnh = (struct rtnexthop *)attr; 188 struct nlattr *nla_entype; 189 struct nlattr *attrs; 190 u16 encap_type; 191 int attrlen; 192 193 while (rtnh_ok(rtnh, remaining)) { 194 attrlen = rtnh_attrlen(rtnh); 195 if (attrlen > 0) { 196 attrs = rtnh_attrs(rtnh); 197 nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 198 199 if (nla_entype) { 200 encap_type = nla_get_u16(nla_entype); 201 202 if (lwtunnel_valid_encap_type(encap_type, 203 extack) != 0) 204 return -EOPNOTSUPP; 205 } 206 } 207 rtnh = rtnh_next(rtnh, &remaining); 208 } 209 210 return 0; 211 } 212 EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type_attr); 213 214 void lwtstate_free(struct lwtunnel_state *lws) 215 { 216 const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type]; 217 218 if (ops->destroy_state) { 219 ops->destroy_state(lws); 220 kfree_rcu(lws, rcu); 221 } else { 222 kfree(lws); 223 } 224 module_put(ops->owner); 225 } 226 EXPORT_SYMBOL_GPL(lwtstate_free); 227 228 int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate, 229 int encap_attr, int encap_type_attr) 230 { 231 const struct lwtunnel_encap_ops *ops; 232 struct nlattr *nest; 233 int ret; 234 235 if (!lwtstate) 236 return 0; 237 238 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 239 lwtstate->type > LWTUNNEL_ENCAP_MAX) 240 return 0; 241 242 nest = nla_nest_start_noflag(skb, encap_attr); 243 if (!nest) 244 return -EMSGSIZE; 245 246 ret = -EOPNOTSUPP; 247 rcu_read_lock(); 248 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 249 if (likely(ops && ops->fill_encap)) 250 ret = ops->fill_encap(skb, lwtstate); 251 rcu_read_unlock(); 252 253 if (ret) 254 goto nla_put_failure; 255 nla_nest_end(skb, nest); 256 ret = nla_put_u16(skb, encap_type_attr, lwtstate->type); 257 if (ret) 258 goto nla_put_failure; 259 260 return 0; 261 262 nla_put_failure: 263 nla_nest_cancel(skb, nest); 264 265 return (ret == -EOPNOTSUPP ? 0 : ret); 266 } 267 EXPORT_SYMBOL_GPL(lwtunnel_fill_encap); 268 269 int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) 270 { 271 const struct lwtunnel_encap_ops *ops; 272 int ret = 0; 273 274 if (!lwtstate) 275 return 0; 276 277 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 278 lwtstate->type > LWTUNNEL_ENCAP_MAX) 279 return 0; 280 281 rcu_read_lock(); 282 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 283 if (likely(ops && ops->get_encap_size)) 284 ret = nla_total_size(ops->get_encap_size(lwtstate)); 285 rcu_read_unlock(); 286 287 return ret; 288 } 289 EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size); 290 291 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) 292 { 293 const struct lwtunnel_encap_ops *ops; 294 int ret = 0; 295 296 if (!a && !b) 297 return 0; 298 299 if (!a || !b) 300 return 1; 301 302 if (a->type != b->type) 303 return 1; 304 305 if (a->type == LWTUNNEL_ENCAP_NONE || 306 a->type > LWTUNNEL_ENCAP_MAX) 307 return 0; 308 309 rcu_read_lock(); 310 ops = rcu_dereference(lwtun_encaps[a->type]); 311 if (likely(ops && ops->cmp_encap)) 312 ret = ops->cmp_encap(a, b); 313 rcu_read_unlock(); 314 315 return ret; 316 } 317 EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap); 318 319 int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) 320 { 321 struct dst_entry *dst = skb_dst(skb); 322 const struct lwtunnel_encap_ops *ops; 323 struct lwtunnel_state *lwtstate; 324 int ret = -EINVAL; 325 326 if (!dst) 327 goto drop; 328 lwtstate = dst->lwtstate; 329 330 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 331 lwtstate->type > LWTUNNEL_ENCAP_MAX) 332 return 0; 333 334 ret = -EOPNOTSUPP; 335 rcu_read_lock(); 336 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 337 if (likely(ops && ops->output)) 338 ret = ops->output(net, sk, skb); 339 rcu_read_unlock(); 340 341 if (ret == -EOPNOTSUPP) 342 goto drop; 343 344 return ret; 345 346 drop: 347 kfree_skb(skb); 348 349 return ret; 350 } 351 EXPORT_SYMBOL_GPL(lwtunnel_output); 352 353 int lwtunnel_xmit(struct sk_buff *skb) 354 { 355 struct dst_entry *dst = skb_dst(skb); 356 const struct lwtunnel_encap_ops *ops; 357 struct lwtunnel_state *lwtstate; 358 int ret = -EINVAL; 359 360 if (!dst) 361 goto drop; 362 363 lwtstate = dst->lwtstate; 364 365 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 366 lwtstate->type > LWTUNNEL_ENCAP_MAX) 367 return 0; 368 369 ret = -EOPNOTSUPP; 370 rcu_read_lock(); 371 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 372 if (likely(ops && ops->xmit)) 373 ret = ops->xmit(skb); 374 rcu_read_unlock(); 375 376 if (ret == -EOPNOTSUPP) 377 goto drop; 378 379 return ret; 380 381 drop: 382 kfree_skb(skb); 383 384 return ret; 385 } 386 EXPORT_SYMBOL_GPL(lwtunnel_xmit); 387 388 int lwtunnel_input(struct sk_buff *skb) 389 { 390 struct dst_entry *dst = skb_dst(skb); 391 const struct lwtunnel_encap_ops *ops; 392 struct lwtunnel_state *lwtstate; 393 int ret = -EINVAL; 394 395 if (!dst) 396 goto drop; 397 lwtstate = dst->lwtstate; 398 399 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 400 lwtstate->type > LWTUNNEL_ENCAP_MAX) 401 return 0; 402 403 ret = -EOPNOTSUPP; 404 rcu_read_lock(); 405 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 406 if (likely(ops && ops->input)) 407 ret = ops->input(skb); 408 rcu_read_unlock(); 409 410 if (ret == -EOPNOTSUPP) 411 goto drop; 412 413 return ret; 414 415 drop: 416 kfree_skb(skb); 417 418 return ret; 419 } 420 EXPORT_SYMBOL_GPL(lwtunnel_input); 421