1 /* 2 * lwtunnel Infrastructure for light weight tunnels like mpls 3 * 4 * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 */ 12 13 #include <linux/capability.h> 14 #include <linux/module.h> 15 #include <linux/types.h> 16 #include <linux/kernel.h> 17 #include <linux/slab.h> 18 #include <linux/uaccess.h> 19 #include <linux/skbuff.h> 20 #include <linux/netdevice.h> 21 #include <linux/lwtunnel.h> 22 #include <linux/in.h> 23 #include <linux/init.h> 24 #include <linux/err.h> 25 26 #include <net/lwtunnel.h> 27 #include <net/rtnetlink.h> 28 #include <net/ip6_fib.h> 29 #include <net/nexthop.h> 30 31 #ifdef CONFIG_MODULES 32 33 static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) 34 { 35 /* Only lwt encaps implemented without using an interface for 36 * the encap need to return a string here. 37 */ 38 switch (encap_type) { 39 case LWTUNNEL_ENCAP_MPLS: 40 return "MPLS"; 41 case LWTUNNEL_ENCAP_ILA: 42 return "ILA"; 43 case LWTUNNEL_ENCAP_SEG6: 44 return "SEG6"; 45 case LWTUNNEL_ENCAP_BPF: 46 return "BPF"; 47 case LWTUNNEL_ENCAP_IP6: 48 case LWTUNNEL_ENCAP_IP: 49 case LWTUNNEL_ENCAP_NONE: 50 case __LWTUNNEL_ENCAP_MAX: 51 /* should not have got here */ 52 WARN_ON(1); 53 break; 54 } 55 return NULL; 56 } 57 58 #endif /* CONFIG_MODULES */ 59 60 struct lwtunnel_state *lwtunnel_state_alloc(int encap_len) 61 { 62 struct lwtunnel_state *lws; 63 64 lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC); 65 66 return lws; 67 } 68 EXPORT_SYMBOL(lwtunnel_state_alloc); 69 70 static const struct lwtunnel_encap_ops __rcu * 71 lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly; 72 73 int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops, 74 unsigned int num) 75 { 76 if (num > LWTUNNEL_ENCAP_MAX) 77 return -ERANGE; 78 79 return !cmpxchg((const struct lwtunnel_encap_ops **) 80 &lwtun_encaps[num], 81 NULL, ops) ? 0 : -1; 82 } 83 EXPORT_SYMBOL(lwtunnel_encap_add_ops); 84 85 int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, 86 unsigned int encap_type) 87 { 88 int ret; 89 90 if (encap_type == LWTUNNEL_ENCAP_NONE || 91 encap_type > LWTUNNEL_ENCAP_MAX) 92 return -ERANGE; 93 94 ret = (cmpxchg((const struct lwtunnel_encap_ops **) 95 &lwtun_encaps[encap_type], 96 ops, NULL) == ops) ? 0 : -1; 97 98 synchronize_net(); 99 100 return ret; 101 } 102 EXPORT_SYMBOL(lwtunnel_encap_del_ops); 103 104 int lwtunnel_build_state(u16 encap_type, 105 struct nlattr *encap, unsigned int family, 106 const void *cfg, struct lwtunnel_state **lws) 107 { 108 const struct lwtunnel_encap_ops *ops; 109 int ret = -EINVAL; 110 111 if (encap_type == LWTUNNEL_ENCAP_NONE || 112 encap_type > LWTUNNEL_ENCAP_MAX) 113 return ret; 114 115 ret = -EOPNOTSUPP; 116 rcu_read_lock(); 117 ops = rcu_dereference(lwtun_encaps[encap_type]); 118 if (likely(ops && ops->build_state && try_module_get(ops->owner))) { 119 ret = ops->build_state(encap, family, cfg, lws); 120 if (ret) 121 module_put(ops->owner); 122 } 123 rcu_read_unlock(); 124 125 return ret; 126 } 127 EXPORT_SYMBOL(lwtunnel_build_state); 128 129 int lwtunnel_valid_encap_type(u16 encap_type) 130 { 131 const struct lwtunnel_encap_ops *ops; 132 int ret = -EINVAL; 133 134 if (encap_type == LWTUNNEL_ENCAP_NONE || 135 encap_type > LWTUNNEL_ENCAP_MAX) 136 return ret; 137 138 rcu_read_lock(); 139 ops = rcu_dereference(lwtun_encaps[encap_type]); 140 rcu_read_unlock(); 141 #ifdef CONFIG_MODULES 142 if (!ops) { 143 const char *encap_type_str = lwtunnel_encap_str(encap_type); 144 145 if (encap_type_str) { 146 __rtnl_unlock(); 147 request_module("rtnl-lwt-%s", encap_type_str); 148 rtnl_lock(); 149 150 rcu_read_lock(); 151 ops = rcu_dereference(lwtun_encaps[encap_type]); 152 rcu_read_unlock(); 153 } 154 } 155 #endif 156 return ops ? 0 : -EOPNOTSUPP; 157 } 158 EXPORT_SYMBOL(lwtunnel_valid_encap_type); 159 160 int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining) 161 { 162 struct rtnexthop *rtnh = (struct rtnexthop *)attr; 163 struct nlattr *nla_entype; 164 struct nlattr *attrs; 165 u16 encap_type; 166 int attrlen; 167 168 while (rtnh_ok(rtnh, remaining)) { 169 attrlen = rtnh_attrlen(rtnh); 170 if (attrlen > 0) { 171 attrs = rtnh_attrs(rtnh); 172 nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 173 174 if (nla_entype) { 175 encap_type = nla_get_u16(nla_entype); 176 177 if (lwtunnel_valid_encap_type(encap_type) != 0) 178 return -EOPNOTSUPP; 179 } 180 } 181 rtnh = rtnh_next(rtnh, &remaining); 182 } 183 184 return 0; 185 } 186 EXPORT_SYMBOL(lwtunnel_valid_encap_type_attr); 187 188 void lwtstate_free(struct lwtunnel_state *lws) 189 { 190 const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type]; 191 192 if (ops->destroy_state) { 193 ops->destroy_state(lws); 194 kfree_rcu(lws, rcu); 195 } else { 196 kfree(lws); 197 } 198 module_put(ops->owner); 199 } 200 EXPORT_SYMBOL(lwtstate_free); 201 202 int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate) 203 { 204 const struct lwtunnel_encap_ops *ops; 205 struct nlattr *nest; 206 int ret = -EINVAL; 207 208 if (!lwtstate) 209 return 0; 210 211 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 212 lwtstate->type > LWTUNNEL_ENCAP_MAX) 213 return 0; 214 215 ret = -EOPNOTSUPP; 216 nest = nla_nest_start(skb, RTA_ENCAP); 217 rcu_read_lock(); 218 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 219 if (likely(ops && ops->fill_encap)) 220 ret = ops->fill_encap(skb, lwtstate); 221 rcu_read_unlock(); 222 223 if (ret) 224 goto nla_put_failure; 225 nla_nest_end(skb, nest); 226 ret = nla_put_u16(skb, RTA_ENCAP_TYPE, lwtstate->type); 227 if (ret) 228 goto nla_put_failure; 229 230 return 0; 231 232 nla_put_failure: 233 nla_nest_cancel(skb, nest); 234 235 return (ret == -EOPNOTSUPP ? 0 : ret); 236 } 237 EXPORT_SYMBOL(lwtunnel_fill_encap); 238 239 int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) 240 { 241 const struct lwtunnel_encap_ops *ops; 242 int ret = 0; 243 244 if (!lwtstate) 245 return 0; 246 247 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 248 lwtstate->type > LWTUNNEL_ENCAP_MAX) 249 return 0; 250 251 rcu_read_lock(); 252 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 253 if (likely(ops && ops->get_encap_size)) 254 ret = nla_total_size(ops->get_encap_size(lwtstate)); 255 rcu_read_unlock(); 256 257 return ret; 258 } 259 EXPORT_SYMBOL(lwtunnel_get_encap_size); 260 261 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) 262 { 263 const struct lwtunnel_encap_ops *ops; 264 int ret = 0; 265 266 if (!a && !b) 267 return 0; 268 269 if (!a || !b) 270 return 1; 271 272 if (a->type != b->type) 273 return 1; 274 275 if (a->type == LWTUNNEL_ENCAP_NONE || 276 a->type > LWTUNNEL_ENCAP_MAX) 277 return 0; 278 279 rcu_read_lock(); 280 ops = rcu_dereference(lwtun_encaps[a->type]); 281 if (likely(ops && ops->cmp_encap)) 282 ret = ops->cmp_encap(a, b); 283 rcu_read_unlock(); 284 285 return ret; 286 } 287 EXPORT_SYMBOL(lwtunnel_cmp_encap); 288 289 int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) 290 { 291 struct dst_entry *dst = skb_dst(skb); 292 const struct lwtunnel_encap_ops *ops; 293 struct lwtunnel_state *lwtstate; 294 int ret = -EINVAL; 295 296 if (!dst) 297 goto drop; 298 lwtstate = dst->lwtstate; 299 300 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 301 lwtstate->type > LWTUNNEL_ENCAP_MAX) 302 return 0; 303 304 ret = -EOPNOTSUPP; 305 rcu_read_lock(); 306 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 307 if (likely(ops && ops->output)) 308 ret = ops->output(net, sk, skb); 309 rcu_read_unlock(); 310 311 if (ret == -EOPNOTSUPP) 312 goto drop; 313 314 return ret; 315 316 drop: 317 kfree_skb(skb); 318 319 return ret; 320 } 321 EXPORT_SYMBOL(lwtunnel_output); 322 323 int lwtunnel_xmit(struct sk_buff *skb) 324 { 325 struct dst_entry *dst = skb_dst(skb); 326 const struct lwtunnel_encap_ops *ops; 327 struct lwtunnel_state *lwtstate; 328 int ret = -EINVAL; 329 330 if (!dst) 331 goto drop; 332 333 lwtstate = dst->lwtstate; 334 335 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 336 lwtstate->type > LWTUNNEL_ENCAP_MAX) 337 return 0; 338 339 ret = -EOPNOTSUPP; 340 rcu_read_lock(); 341 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 342 if (likely(ops && ops->xmit)) 343 ret = ops->xmit(skb); 344 rcu_read_unlock(); 345 346 if (ret == -EOPNOTSUPP) 347 goto drop; 348 349 return ret; 350 351 drop: 352 kfree_skb(skb); 353 354 return ret; 355 } 356 EXPORT_SYMBOL(lwtunnel_xmit); 357 358 int lwtunnel_input(struct sk_buff *skb) 359 { 360 struct dst_entry *dst = skb_dst(skb); 361 const struct lwtunnel_encap_ops *ops; 362 struct lwtunnel_state *lwtstate; 363 int ret = -EINVAL; 364 365 if (!dst) 366 goto drop; 367 lwtstate = dst->lwtstate; 368 369 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 370 lwtstate->type > LWTUNNEL_ENCAP_MAX) 371 return 0; 372 373 ret = -EOPNOTSUPP; 374 rcu_read_lock(); 375 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 376 if (likely(ops && ops->input)) 377 ret = ops->input(skb); 378 rcu_read_unlock(); 379 380 if (ret == -EOPNOTSUPP) 381 goto drop; 382 383 return ret; 384 385 drop: 386 kfree_skb(skb); 387 388 return ret; 389 } 390 EXPORT_SYMBOL(lwtunnel_input); 391