1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * xfrm_input.c 4 * 5 * Changes: 6 * YOSHIFUJI Hideaki @USAGI 7 * Split up af-specific portion 8 * 9 */ 10 11 #include <linux/bottom_half.h> 12 #include <linux/interrupt.h> 13 #include <linux/slab.h> 14 #include <linux/module.h> 15 #include <linux/netdevice.h> 16 #include <linux/percpu.h> 17 #include <net/dst.h> 18 #include <net/ip.h> 19 #include <net/xfrm.h> 20 #include <net/ip_tunnels.h> 21 #include <net/ip6_tunnel.h> 22 23 struct xfrm_trans_tasklet { 24 struct tasklet_struct tasklet; 25 struct sk_buff_head queue; 26 }; 27 28 struct xfrm_trans_cb { 29 int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb); 30 }; 31 32 #define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0])) 33 34 static struct kmem_cache *secpath_cachep __read_mostly; 35 36 static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); 37 static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1]; 38 39 static struct gro_cells gro_cells; 40 static struct net_device xfrm_napi_dev; 41 42 static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet); 43 44 int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo) 45 { 46 int err = 0; 47 48 if (WARN_ON(afinfo->family >= ARRAY_SIZE(xfrm_input_afinfo))) 49 return -EAFNOSUPPORT; 50 51 spin_lock_bh(&xfrm_input_afinfo_lock); 52 if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL)) 53 err = -EEXIST; 54 else 55 rcu_assign_pointer(xfrm_input_afinfo[afinfo->family], afinfo); 56 spin_unlock_bh(&xfrm_input_afinfo_lock); 57 return err; 58 } 59 EXPORT_SYMBOL(xfrm_input_register_afinfo); 60 61 int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo) 62 { 63 int err = 0; 64 65 spin_lock_bh(&xfrm_input_afinfo_lock); 66 if (likely(xfrm_input_afinfo[afinfo->family] != NULL)) { 67 if (unlikely(xfrm_input_afinfo[afinfo->family] != afinfo)) 68 err = -EINVAL; 69 else 70 RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->family], NULL); 71 } 72 spin_unlock_bh(&xfrm_input_afinfo_lock); 73 synchronize_rcu(); 74 return err; 75 } 76 EXPORT_SYMBOL(xfrm_input_unregister_afinfo); 77 78 static const struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family) 79 { 80 const struct xfrm_input_afinfo *afinfo; 81 82 if (WARN_ON_ONCE(family >= ARRAY_SIZE(xfrm_input_afinfo))) 83 return NULL; 84 85 rcu_read_lock(); 86 afinfo = rcu_dereference(xfrm_input_afinfo[family]); 87 if (unlikely(!afinfo)) 88 rcu_read_unlock(); 89 return afinfo; 90 } 91 92 static int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family, u8 protocol, 93 int err) 94 { 95 int ret; 96 const struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family); 97 98 if (!afinfo) 99 return -EAFNOSUPPORT; 100 101 ret = afinfo->callback(skb, protocol, err); 102 rcu_read_unlock(); 103 104 return ret; 105 } 106 107 void __secpath_destroy(struct sec_path *sp) 108 { 109 int i; 110 for (i = 0; i < sp->len; i++) 111 xfrm_state_put(sp->xvec[i]); 112 kmem_cache_free(secpath_cachep, sp); 113 } 114 EXPORT_SYMBOL(__secpath_destroy); 115 116 struct sec_path *secpath_dup(struct sec_path *src) 117 { 118 struct sec_path *sp; 119 120 sp = kmem_cache_alloc(secpath_cachep, GFP_ATOMIC); 121 if (!sp) 122 return NULL; 123 124 sp->len = 0; 125 sp->olen = 0; 126 127 memset(sp->ovec, 0, sizeof(sp->ovec[XFRM_MAX_OFFLOAD_DEPTH])); 128 129 if (src) { 130 int i; 131 132 memcpy(sp, src, sizeof(*sp)); 133 for (i = 0; i < sp->len; i++) 134 xfrm_state_hold(sp->xvec[i]); 135 } 136 refcount_set(&sp->refcnt, 1); 137 return sp; 138 } 139 EXPORT_SYMBOL(secpath_dup); 140 141 int secpath_set(struct sk_buff *skb) 142 { 143 struct sec_path *sp; 144 145 /* Allocate new secpath or COW existing one. */ 146 if (!skb->sp || refcount_read(&skb->sp->refcnt) != 1) { 147 sp = secpath_dup(skb->sp); 148 if (!sp) 149 return -ENOMEM; 150 151 if (skb->sp) 152 secpath_put(skb->sp); 153 skb->sp = sp; 154 } 155 return 0; 156 } 157 EXPORT_SYMBOL(secpath_set); 158 159 /* Fetch spi and seq from ipsec header */ 160 161 int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) 162 { 163 int offset, offset_seq; 164 int hlen; 165 166 switch (nexthdr) { 167 case IPPROTO_AH: 168 hlen = sizeof(struct ip_auth_hdr); 169 offset = offsetof(struct ip_auth_hdr, spi); 170 offset_seq = offsetof(struct ip_auth_hdr, seq_no); 171 break; 172 case IPPROTO_ESP: 173 hlen = sizeof(struct ip_esp_hdr); 174 offset = offsetof(struct ip_esp_hdr, spi); 175 offset_seq = offsetof(struct ip_esp_hdr, seq_no); 176 break; 177 case IPPROTO_COMP: 178 if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr))) 179 return -EINVAL; 180 *spi = htonl(ntohs(*(__be16 *)(skb_transport_header(skb) + 2))); 181 *seq = 0; 182 return 0; 183 default: 184 return 1; 185 } 186 187 if (!pskb_may_pull(skb, hlen)) 188 return -EINVAL; 189 190 *spi = *(__be32 *)(skb_transport_header(skb) + offset); 191 *seq = *(__be32 *)(skb_transport_header(skb) + offset_seq); 192 return 0; 193 } 194 EXPORT_SYMBOL(xfrm_parse_spi); 195 196 int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) 197 { 198 struct xfrm_mode *inner_mode = x->inner_mode; 199 int err; 200 201 err = x->outer_mode->afinfo->extract_input(x, skb); 202 if (err) 203 return err; 204 205 if (x->sel.family == AF_UNSPEC) { 206 inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); 207 if (inner_mode == NULL) 208 return -EAFNOSUPPORT; 209 } 210 211 skb->protocol = inner_mode->afinfo->eth_proto; 212 return inner_mode->input2(x, skb); 213 } 214 EXPORT_SYMBOL(xfrm_prepare_input); 215 216 int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) 217 { 218 struct net *net = dev_net(skb->dev); 219 int err; 220 __be32 seq; 221 __be32 seq_hi; 222 struct xfrm_state *x = NULL; 223 xfrm_address_t *daddr; 224 struct xfrm_mode *inner_mode; 225 u32 mark = skb->mark; 226 unsigned int family = AF_UNSPEC; 227 int decaps = 0; 228 int async = 0; 229 bool xfrm_gro = false; 230 bool crypto_done = false; 231 struct xfrm_offload *xo = xfrm_offload(skb); 232 233 if (encap_type < 0) { 234 x = xfrm_input_state(skb); 235 236 if (unlikely(x->km.state != XFRM_STATE_VALID)) { 237 if (x->km.state == XFRM_STATE_ACQ) 238 XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); 239 else 240 XFRM_INC_STATS(net, 241 LINUX_MIB_XFRMINSTATEINVALID); 242 goto drop; 243 } 244 245 family = x->outer_mode->afinfo->family; 246 247 /* An encap_type of -1 indicates async resumption. */ 248 if (encap_type == -1) { 249 async = 1; 250 seq = XFRM_SKB_CB(skb)->seq.input.low; 251 goto resume; 252 } 253 254 /* encap_type < -1 indicates a GRO call. */ 255 encap_type = 0; 256 seq = XFRM_SPI_SKB_CB(skb)->seq; 257 258 if (xo && (xo->flags & CRYPTO_DONE)) { 259 crypto_done = true; 260 x = xfrm_input_state(skb); 261 family = XFRM_SPI_SKB_CB(skb)->family; 262 263 if (!(xo->status & CRYPTO_SUCCESS)) { 264 if (xo->status & 265 (CRYPTO_TRANSPORT_AH_AUTH_FAILED | 266 CRYPTO_TRANSPORT_ESP_AUTH_FAILED | 267 CRYPTO_TUNNEL_AH_AUTH_FAILED | 268 CRYPTO_TUNNEL_ESP_AUTH_FAILED)) { 269 270 xfrm_audit_state_icvfail(x, skb, 271 x->type->proto); 272 x->stats.integrity_failed++; 273 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR); 274 goto drop; 275 } 276 277 if (xo->status & CRYPTO_INVALID_PROTOCOL) { 278 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR); 279 goto drop; 280 } 281 282 XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); 283 goto drop; 284 } 285 286 if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) { 287 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); 288 goto drop; 289 } 290 } 291 292 goto lock; 293 } 294 295 family = XFRM_SPI_SKB_CB(skb)->family; 296 297 /* if tunnel is present override skb->mark value with tunnel i_key */ 298 switch (family) { 299 case AF_INET: 300 if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4) 301 mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4->parms.i_key); 302 break; 303 case AF_INET6: 304 if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6) 305 mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6->parms.i_key); 306 break; 307 } 308 309 err = secpath_set(skb); 310 if (err) { 311 XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); 312 goto drop; 313 } 314 315 seq = 0; 316 if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) { 317 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); 318 goto drop; 319 } 320 321 daddr = (xfrm_address_t *)(skb_network_header(skb) + 322 XFRM_SPI_SKB_CB(skb)->daddroff); 323 do { 324 if (skb->sp->len == XFRM_MAX_DEPTH) { 325 XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); 326 goto drop; 327 } 328 329 x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family); 330 if (x == NULL) { 331 XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); 332 xfrm_audit_state_notfound(skb, family, spi, seq); 333 goto drop; 334 } 335 336 skb->sp->xvec[skb->sp->len++] = x; 337 338 lock: 339 spin_lock(&x->lock); 340 341 if (unlikely(x->km.state != XFRM_STATE_VALID)) { 342 if (x->km.state == XFRM_STATE_ACQ) 343 XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); 344 else 345 XFRM_INC_STATS(net, 346 LINUX_MIB_XFRMINSTATEINVALID); 347 goto drop_unlock; 348 } 349 350 if ((x->encap ? x->encap->encap_type : 0) != encap_type) { 351 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); 352 goto drop_unlock; 353 } 354 355 if (x->repl->check(x, skb, seq)) { 356 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); 357 goto drop_unlock; 358 } 359 360 if (xfrm_state_check_expire(x)) { 361 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEEXPIRED); 362 goto drop_unlock; 363 } 364 365 spin_unlock(&x->lock); 366 367 if (xfrm_tunnel_check(skb, x, family)) { 368 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); 369 goto drop; 370 } 371 372 seq_hi = htonl(xfrm_replay_seqhi(x, seq)); 373 374 XFRM_SKB_CB(skb)->seq.input.low = seq; 375 XFRM_SKB_CB(skb)->seq.input.hi = seq_hi; 376 377 skb_dst_force(skb); 378 dev_hold(skb->dev); 379 380 if (crypto_done) 381 nexthdr = x->type_offload->input_tail(x, skb); 382 else 383 nexthdr = x->type->input(x, skb); 384 385 if (nexthdr == -EINPROGRESS) 386 return 0; 387 resume: 388 dev_put(skb->dev); 389 390 spin_lock(&x->lock); 391 if (nexthdr <= 0) { 392 if (nexthdr == -EBADMSG) { 393 xfrm_audit_state_icvfail(x, skb, 394 x->type->proto); 395 x->stats.integrity_failed++; 396 } 397 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR); 398 goto drop_unlock; 399 } 400 401 /* only the first xfrm gets the encap type */ 402 encap_type = 0; 403 404 if (async && x->repl->recheck(x, skb, seq)) { 405 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); 406 goto drop_unlock; 407 } 408 409 x->repl->advance(x, seq); 410 411 x->curlft.bytes += skb->len; 412 x->curlft.packets++; 413 414 spin_unlock(&x->lock); 415 416 XFRM_MODE_SKB_CB(skb)->protocol = nexthdr; 417 418 inner_mode = x->inner_mode; 419 420 if (x->sel.family == AF_UNSPEC) { 421 inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); 422 if (inner_mode == NULL) { 423 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); 424 goto drop; 425 } 426 } 427 428 if (inner_mode->input(x, skb)) { 429 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); 430 goto drop; 431 } 432 433 if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { 434 decaps = 1; 435 break; 436 } 437 438 /* 439 * We need the inner address. However, we only get here for 440 * transport mode so the outer address is identical. 441 */ 442 daddr = &x->id.daddr; 443 family = x->outer_mode->afinfo->family; 444 445 err = xfrm_parse_spi(skb, nexthdr, &spi, &seq); 446 if (err < 0) { 447 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); 448 goto drop; 449 } 450 } while (!err); 451 452 err = xfrm_rcv_cb(skb, family, x->type->proto, 0); 453 if (err) 454 goto drop; 455 456 nf_reset(skb); 457 458 if (decaps) { 459 if (skb->sp) 460 skb->sp->olen = 0; 461 skb_dst_drop(skb); 462 gro_cells_receive(&gro_cells, skb); 463 return 0; 464 } else { 465 xo = xfrm_offload(skb); 466 if (xo) 467 xfrm_gro = xo->flags & XFRM_GRO; 468 469 err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async); 470 if (xfrm_gro) { 471 if (skb->sp) 472 skb->sp->olen = 0; 473 skb_dst_drop(skb); 474 gro_cells_receive(&gro_cells, skb); 475 return err; 476 } 477 478 return err; 479 } 480 481 drop_unlock: 482 spin_unlock(&x->lock); 483 drop: 484 xfrm_rcv_cb(skb, family, x && x->type ? x->type->proto : nexthdr, -1); 485 kfree_skb(skb); 486 return 0; 487 } 488 EXPORT_SYMBOL(xfrm_input); 489 490 int xfrm_input_resume(struct sk_buff *skb, int nexthdr) 491 { 492 return xfrm_input(skb, nexthdr, 0, -1); 493 } 494 EXPORT_SYMBOL(xfrm_input_resume); 495 496 static void xfrm_trans_reinject(unsigned long data) 497 { 498 struct xfrm_trans_tasklet *trans = (void *)data; 499 struct sk_buff_head queue; 500 struct sk_buff *skb; 501 502 __skb_queue_head_init(&queue); 503 skb_queue_splice_init(&trans->queue, &queue); 504 505 while ((skb = __skb_dequeue(&queue))) 506 XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb); 507 } 508 509 int xfrm_trans_queue(struct sk_buff *skb, 510 int (*finish)(struct net *, struct sock *, 511 struct sk_buff *)) 512 { 513 struct xfrm_trans_tasklet *trans; 514 515 trans = this_cpu_ptr(&xfrm_trans_tasklet); 516 517 if (skb_queue_len(&trans->queue) >= netdev_max_backlog) 518 return -ENOBUFS; 519 520 XFRM_TRANS_SKB_CB(skb)->finish = finish; 521 skb_queue_tail(&trans->queue, skb); 522 tasklet_schedule(&trans->tasklet); 523 return 0; 524 } 525 EXPORT_SYMBOL(xfrm_trans_queue); 526 527 void __init xfrm_input_init(void) 528 { 529 int err; 530 int i; 531 532 init_dummy_netdev(&xfrm_napi_dev); 533 err = gro_cells_init(&gro_cells, &xfrm_napi_dev); 534 if (err) 535 gro_cells.cells = NULL; 536 537 secpath_cachep = kmem_cache_create("secpath_cache", 538 sizeof(struct sec_path), 539 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 540 NULL); 541 542 for_each_possible_cpu(i) { 543 struct xfrm_trans_tasklet *trans; 544 545 trans = &per_cpu(xfrm_trans_tasklet, i); 546 __skb_queue_head_init(&trans->queue); 547 tasklet_init(&trans->tasklet, xfrm_trans_reinject, 548 (unsigned long)trans); 549 } 550 } 551