1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * lwtunnel Infrastructure for light weight tunnels like mpls 4 * 5 * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com> 6 */ 7 8 #include <linux/capability.h> 9 #include <linux/module.h> 10 #include <linux/types.h> 11 #include <linux/kernel.h> 12 #include <linux/slab.h> 13 #include <linux/uaccess.h> 14 #include <linux/skbuff.h> 15 #include <linux/netdevice.h> 16 #include <linux/lwtunnel.h> 17 #include <linux/in.h> 18 #include <linux/init.h> 19 #include <linux/err.h> 20 21 #include <net/lwtunnel.h> 22 #include <net/rtnetlink.h> 23 #include <net/ip6_fib.h> 24 #include <net/rtnh.h> 25 26 #ifdef CONFIG_MODULES 27 28 static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) 29 { 30 /* Only lwt encaps implemented without using an interface for 31 * the encap need to return a string here. 32 */ 33 switch (encap_type) { 34 case LWTUNNEL_ENCAP_MPLS: 35 return "MPLS"; 36 case LWTUNNEL_ENCAP_ILA: 37 return "ILA"; 38 case LWTUNNEL_ENCAP_SEG6: 39 return "SEG6"; 40 case LWTUNNEL_ENCAP_BPF: 41 return "BPF"; 42 case LWTUNNEL_ENCAP_SEG6_LOCAL: 43 return "SEG6LOCAL"; 44 case LWTUNNEL_ENCAP_RPL: 45 return "RPL"; 46 case LWTUNNEL_ENCAP_IOAM6: 47 return "IOAM6"; 48 case LWTUNNEL_ENCAP_IP6: 49 case LWTUNNEL_ENCAP_IP: 50 case LWTUNNEL_ENCAP_NONE: 51 case __LWTUNNEL_ENCAP_MAX: 52 /* should not have got here */ 53 WARN_ON(1); 54 break; 55 } 56 return NULL; 57 } 58 59 #endif /* CONFIG_MODULES */ 60 61 struct lwtunnel_state *lwtunnel_state_alloc(int encap_len) 62 { 63 struct lwtunnel_state *lws; 64 65 lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC); 66 67 return lws; 68 } 69 EXPORT_SYMBOL_GPL(lwtunnel_state_alloc); 70 71 static const struct lwtunnel_encap_ops __rcu * 72 lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly; 73 74 int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops, 75 unsigned int num) 76 { 77 if (num > LWTUNNEL_ENCAP_MAX) 78 return -ERANGE; 79 80 return !cmpxchg((const struct lwtunnel_encap_ops **) 81 &lwtun_encaps[num], 82 NULL, ops) ? 0 : -1; 83 } 84 EXPORT_SYMBOL_GPL(lwtunnel_encap_add_ops); 85 86 int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, 87 unsigned int encap_type) 88 { 89 int ret; 90 91 if (encap_type == LWTUNNEL_ENCAP_NONE || 92 encap_type > LWTUNNEL_ENCAP_MAX) 93 return -ERANGE; 94 95 ret = (cmpxchg((const struct lwtunnel_encap_ops **) 96 &lwtun_encaps[encap_type], 97 ops, NULL) == ops) ? 0 : -1; 98 99 synchronize_net(); 100 101 return ret; 102 } 103 EXPORT_SYMBOL_GPL(lwtunnel_encap_del_ops); 104 105 int lwtunnel_build_state(struct net *net, u16 encap_type, 106 struct nlattr *encap, unsigned int family, 107 const void *cfg, struct lwtunnel_state **lws, 108 struct netlink_ext_ack *extack) 109 { 110 const struct lwtunnel_encap_ops *ops; 111 bool found = false; 112 int ret = -EINVAL; 113 114 if (encap_type == LWTUNNEL_ENCAP_NONE || 115 encap_type > LWTUNNEL_ENCAP_MAX) { 116 NL_SET_ERR_MSG_ATTR(extack, encap, 117 "Unknown LWT encapsulation type"); 118 return ret; 119 } 120 121 ret = -EOPNOTSUPP; 122 rcu_read_lock(); 123 ops = rcu_dereference(lwtun_encaps[encap_type]); 124 if (likely(ops && ops->build_state && try_module_get(ops->owner))) 125 found = true; 126 rcu_read_unlock(); 127 128 if (found) { 129 ret = ops->build_state(net, encap, family, cfg, lws, extack); 130 if (ret) 131 module_put(ops->owner); 132 } else { 133 /* don't rely on -EOPNOTSUPP to detect match as build_state 134 * handlers could return it 135 */ 136 NL_SET_ERR_MSG_ATTR(extack, encap, 137 "LWT encapsulation type not supported"); 138 } 139 140 return ret; 141 } 142 EXPORT_SYMBOL_GPL(lwtunnel_build_state); 143 144 int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack) 145 { 146 const struct lwtunnel_encap_ops *ops; 147 int ret = -EINVAL; 148 149 if (encap_type == LWTUNNEL_ENCAP_NONE || 150 encap_type > LWTUNNEL_ENCAP_MAX) { 151 NL_SET_ERR_MSG(extack, "Unknown lwt encapsulation type"); 152 return ret; 153 } 154 155 rcu_read_lock(); 156 ops = rcu_dereference(lwtun_encaps[encap_type]); 157 rcu_read_unlock(); 158 #ifdef CONFIG_MODULES 159 if (!ops) { 160 const char *encap_type_str = lwtunnel_encap_str(encap_type); 161 162 if (encap_type_str) { 163 __rtnl_unlock(); 164 request_module("rtnl-lwt-%s", encap_type_str); 165 rtnl_lock(); 166 167 rcu_read_lock(); 168 ops = rcu_dereference(lwtun_encaps[encap_type]); 169 rcu_read_unlock(); 170 } 171 } 172 #endif 173 ret = ops ? 0 : -EOPNOTSUPP; 174 if (ret < 0) 175 NL_SET_ERR_MSG(extack, "lwt encapsulation type not supported"); 176 177 return ret; 178 } 179 EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type); 180 181 int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, 182 struct netlink_ext_ack *extack) 183 { 184 struct rtnexthop *rtnh = (struct rtnexthop *)attr; 185 struct nlattr *nla_entype; 186 struct nlattr *attrs; 187 u16 encap_type; 188 int attrlen; 189 190 while (rtnh_ok(rtnh, remaining)) { 191 attrlen = rtnh_attrlen(rtnh); 192 if (attrlen > 0) { 193 attrs = rtnh_attrs(rtnh); 194 nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 195 196 if (nla_entype) { 197 encap_type = nla_get_u16(nla_entype); 198 199 if (lwtunnel_valid_encap_type(encap_type, 200 extack) != 0) 201 return -EOPNOTSUPP; 202 } 203 } 204 rtnh = rtnh_next(rtnh, &remaining); 205 } 206 207 return 0; 208 } 209 EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type_attr); 210 211 void lwtstate_free(struct lwtunnel_state *lws) 212 { 213 const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type]; 214 215 if (ops->destroy_state) { 216 ops->destroy_state(lws); 217 kfree_rcu(lws, rcu); 218 } else { 219 kfree(lws); 220 } 221 module_put(ops->owner); 222 } 223 EXPORT_SYMBOL_GPL(lwtstate_free); 224 225 int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate, 226 int encap_attr, int encap_type_attr) 227 { 228 const struct lwtunnel_encap_ops *ops; 229 struct nlattr *nest; 230 int ret; 231 232 if (!lwtstate) 233 return 0; 234 235 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 236 lwtstate->type > LWTUNNEL_ENCAP_MAX) 237 return 0; 238 239 nest = nla_nest_start_noflag(skb, encap_attr); 240 if (!nest) 241 return -EMSGSIZE; 242 243 ret = -EOPNOTSUPP; 244 rcu_read_lock(); 245 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 246 if (likely(ops && ops->fill_encap)) 247 ret = ops->fill_encap(skb, lwtstate); 248 rcu_read_unlock(); 249 250 if (ret) 251 goto nla_put_failure; 252 nla_nest_end(skb, nest); 253 ret = nla_put_u16(skb, encap_type_attr, lwtstate->type); 254 if (ret) 255 goto nla_put_failure; 256 257 return 0; 258 259 nla_put_failure: 260 nla_nest_cancel(skb, nest); 261 262 return (ret == -EOPNOTSUPP ? 0 : ret); 263 } 264 EXPORT_SYMBOL_GPL(lwtunnel_fill_encap); 265 266 int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) 267 { 268 const struct lwtunnel_encap_ops *ops; 269 int ret = 0; 270 271 if (!lwtstate) 272 return 0; 273 274 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 275 lwtstate->type > LWTUNNEL_ENCAP_MAX) 276 return 0; 277 278 rcu_read_lock(); 279 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 280 if (likely(ops && ops->get_encap_size)) 281 ret = nla_total_size(ops->get_encap_size(lwtstate)); 282 rcu_read_unlock(); 283 284 return ret; 285 } 286 EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size); 287 288 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) 289 { 290 const struct lwtunnel_encap_ops *ops; 291 int ret = 0; 292 293 if (!a && !b) 294 return 0; 295 296 if (!a || !b) 297 return 1; 298 299 if (a->type != b->type) 300 return 1; 301 302 if (a->type == LWTUNNEL_ENCAP_NONE || 303 a->type > LWTUNNEL_ENCAP_MAX) 304 return 0; 305 306 rcu_read_lock(); 307 ops = rcu_dereference(lwtun_encaps[a->type]); 308 if (likely(ops && ops->cmp_encap)) 309 ret = ops->cmp_encap(a, b); 310 rcu_read_unlock(); 311 312 return ret; 313 } 314 EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap); 315 316 int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) 317 { 318 struct dst_entry *dst = skb_dst(skb); 319 const struct lwtunnel_encap_ops *ops; 320 struct lwtunnel_state *lwtstate; 321 int ret = -EINVAL; 322 323 if (!dst) 324 goto drop; 325 lwtstate = dst->lwtstate; 326 327 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 328 lwtstate->type > LWTUNNEL_ENCAP_MAX) 329 return 0; 330 331 ret = -EOPNOTSUPP; 332 rcu_read_lock(); 333 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 334 if (likely(ops && ops->output)) 335 ret = ops->output(net, sk, skb); 336 rcu_read_unlock(); 337 338 if (ret == -EOPNOTSUPP) 339 goto drop; 340 341 return ret; 342 343 drop: 344 kfree_skb(skb); 345 346 return ret; 347 } 348 EXPORT_SYMBOL_GPL(lwtunnel_output); 349 350 int lwtunnel_xmit(struct sk_buff *skb) 351 { 352 struct dst_entry *dst = skb_dst(skb); 353 const struct lwtunnel_encap_ops *ops; 354 struct lwtunnel_state *lwtstate; 355 int ret = -EINVAL; 356 357 if (!dst) 358 goto drop; 359 360 lwtstate = dst->lwtstate; 361 362 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 363 lwtstate->type > LWTUNNEL_ENCAP_MAX) 364 return 0; 365 366 ret = -EOPNOTSUPP; 367 rcu_read_lock(); 368 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 369 if (likely(ops && ops->xmit)) 370 ret = ops->xmit(skb); 371 rcu_read_unlock(); 372 373 if (ret == -EOPNOTSUPP) 374 goto drop; 375 376 return ret; 377 378 drop: 379 kfree_skb(skb); 380 381 return ret; 382 } 383 EXPORT_SYMBOL_GPL(lwtunnel_xmit); 384 385 int lwtunnel_input(struct sk_buff *skb) 386 { 387 struct dst_entry *dst = skb_dst(skb); 388 const struct lwtunnel_encap_ops *ops; 389 struct lwtunnel_state *lwtstate; 390 int ret = -EINVAL; 391 392 if (!dst) 393 goto drop; 394 lwtstate = dst->lwtstate; 395 396 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 397 lwtstate->type > LWTUNNEL_ENCAP_MAX) 398 return 0; 399 400 ret = -EOPNOTSUPP; 401 rcu_read_lock(); 402 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 403 if (likely(ops && ops->input)) 404 ret = ops->input(skb); 405 rcu_read_unlock(); 406 407 if (ret == -EOPNOTSUPP) 408 goto drop; 409 410 return ret; 411 412 drop: 413 kfree_skb(skb); 414 415 return ret; 416 } 417 EXPORT_SYMBOL_GPL(lwtunnel_input); 418