1 /* 2 * xfrm_output.c - Common IPsec encapsulation code. 3 * 4 * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #include <linux/errno.h> 13 #include <linux/module.h> 14 #include <linux/netdevice.h> 15 #include <linux/netfilter.h> 16 #include <linux/skbuff.h> 17 #include <linux/slab.h> 18 #include <linux/spinlock.h> 19 #include <net/dst.h> 20 #include <net/inet_ecn.h> 21 #include <net/xfrm.h> 22 23 #include "xfrm_inout.h" 24 25 static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb); 26 static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); 27 28 static int xfrm_skb_check_space(struct sk_buff *skb) 29 { 30 struct dst_entry *dst = skb_dst(skb); 31 int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev) 32 - skb_headroom(skb); 33 int ntail = dst->dev->needed_tailroom - skb_tailroom(skb); 34 35 if (nhead <= 0) { 36 if (ntail <= 0) 37 return 0; 38 nhead = 0; 39 } else if (ntail < 0) 40 ntail = 0; 41 42 return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); 43 } 44 45 /* Children define the path of the packet through the 46 * Linux networking. Thus, destinations are stackable. 47 */ 48 49 static struct dst_entry *skb_dst_pop(struct sk_buff *skb) 50 { 51 struct dst_entry *child = dst_clone(xfrm_dst_child(skb_dst(skb))); 52 53 skb_dst_drop(skb); 54 return child; 55 } 56 57 /* Add encapsulation header. 58 * 59 * The IP header will be moved forward to make space for the encapsulation 60 * header. 61 */ 62 static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) 63 { 64 struct iphdr *iph = ip_hdr(skb); 65 int ihl = iph->ihl * 4; 66 67 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 68 69 skb_set_network_header(skb, -x->props.header_len); 70 skb->mac_header = skb->network_header + 71 offsetof(struct iphdr, protocol); 72 skb->transport_header = skb->network_header + ihl; 73 __skb_pull(skb, ihl); 74 memmove(skb_network_header(skb), iph, ihl); 75 return 0; 76 } 77 78 /* Add encapsulation header. 79 * 80 * The IP header and mutable extension headers will be moved forward to make 81 * space for the encapsulation header. 82 */ 83 static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) 84 { 85 #if IS_ENABLED(CONFIG_IPV6) 86 struct ipv6hdr *iph; 87 u8 *prevhdr; 88 int hdr_len; 89 90 iph = ipv6_hdr(skb); 91 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 92 93 hdr_len = x->type->hdr_offset(x, skb, &prevhdr); 94 if (hdr_len < 0) 95 return hdr_len; 96 skb_set_mac_header(skb, 97 (prevhdr - x->props.header_len) - skb->data); 98 skb_set_network_header(skb, -x->props.header_len); 99 skb->transport_header = skb->network_header + hdr_len; 100 __skb_pull(skb, hdr_len); 101 memmove(ipv6_hdr(skb), iph, hdr_len); 102 return 0; 103 #else 104 WARN_ON_ONCE(1); 105 return -EAFNOSUPPORT; 106 #endif 107 } 108 109 /* Add route optimization header space. 110 * 111 * The IP header and mutable extension headers will be moved forward to make 112 * space for the route optimization header. 113 */ 114 static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) 115 { 116 #if IS_ENABLED(CONFIG_IPV6) 117 struct ipv6hdr *iph; 118 u8 *prevhdr; 119 int hdr_len; 120 121 iph = ipv6_hdr(skb); 122 123 hdr_len = x->type->hdr_offset(x, skb, &prevhdr); 124 if (hdr_len < 0) 125 return hdr_len; 126 skb_set_mac_header(skb, 127 (prevhdr - x->props.header_len) - skb->data); 128 skb_set_network_header(skb, -x->props.header_len); 129 skb->transport_header = skb->network_header + hdr_len; 130 __skb_pull(skb, hdr_len); 131 memmove(ipv6_hdr(skb), iph, hdr_len); 132 133 x->lastused = ktime_get_real_seconds(); 134 135 return 0; 136 #else 137 WARN_ON_ONCE(1); 138 return -EAFNOSUPPORT; 139 #endif 140 } 141 142 /* Add encapsulation header. 143 * 144 * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. 145 */ 146 static int xfrm4_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb) 147 { 148 struct ip_beet_phdr *ph; 149 struct iphdr *top_iph; 150 int hdrlen, optlen; 151 152 hdrlen = 0; 153 optlen = XFRM_MODE_SKB_CB(skb)->optlen; 154 if (unlikely(optlen)) 155 hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); 156 157 skb_set_network_header(skb, -x->props.header_len - hdrlen + 158 (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph))); 159 if (x->sel.family != AF_INET6) 160 skb->network_header += IPV4_BEET_PHMAXLEN; 161 skb->mac_header = skb->network_header + 162 offsetof(struct iphdr, protocol); 163 skb->transport_header = skb->network_header + sizeof(*top_iph); 164 165 xfrm4_beet_make_header(skb); 166 167 ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen); 168 169 top_iph = ip_hdr(skb); 170 171 if (unlikely(optlen)) { 172 if (WARN_ON(optlen < 0)) 173 return -EINVAL; 174 175 ph->padlen = 4 - (optlen & 4); 176 ph->hdrlen = optlen / 8; 177 ph->nexthdr = top_iph->protocol; 178 if (ph->padlen) 179 memset(ph + 1, IPOPT_NOP, ph->padlen); 180 181 top_iph->protocol = IPPROTO_BEETPH; 182 top_iph->ihl = sizeof(struct iphdr) / 4; 183 } 184 185 top_iph->saddr = x->props.saddr.a4; 186 top_iph->daddr = x->id.daddr.a4; 187 188 return 0; 189 } 190 191 /* Add encapsulation header. 192 * 193 * The top IP header will be constructed per RFC 2401. 194 */ 195 static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) 196 { 197 struct dst_entry *dst = skb_dst(skb); 198 struct iphdr *top_iph; 199 int flags; 200 201 skb_set_inner_network_header(skb, skb_network_offset(skb)); 202 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 203 204 skb_set_network_header(skb, -x->props.header_len); 205 skb->mac_header = skb->network_header + 206 offsetof(struct iphdr, protocol); 207 skb->transport_header = skb->network_header + sizeof(*top_iph); 208 top_iph = ip_hdr(skb); 209 210 top_iph->ihl = 5; 211 top_iph->version = 4; 212 213 top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family); 214 215 /* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */ 216 if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) 217 top_iph->tos = 0; 218 else 219 top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos; 220 top_iph->tos = INET_ECN_encapsulate(top_iph->tos, 221 XFRM_MODE_SKB_CB(skb)->tos); 222 223 flags = x->props.flags; 224 if (flags & XFRM_STATE_NOECN) 225 IP_ECN_clear(top_iph); 226 227 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? 228 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); 229 230 top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst)); 231 232 top_iph->saddr = x->props.saddr.a4; 233 top_iph->daddr = x->id.daddr.a4; 234 ip_select_ident(dev_net(dst->dev), skb, NULL); 235 236 return 0; 237 } 238 239 #if IS_ENABLED(CONFIG_IPV6) 240 static int xfrm6_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) 241 { 242 struct dst_entry *dst = skb_dst(skb); 243 struct ipv6hdr *top_iph; 244 int dsfield; 245 246 skb_set_inner_network_header(skb, skb_network_offset(skb)); 247 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 248 249 skb_set_network_header(skb, -x->props.header_len); 250 skb->mac_header = skb->network_header + 251 offsetof(struct ipv6hdr, nexthdr); 252 skb->transport_header = skb->network_header + sizeof(*top_iph); 253 top_iph = ipv6_hdr(skb); 254 255 top_iph->version = 6; 256 257 memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, 258 sizeof(top_iph->flow_lbl)); 259 top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family); 260 261 if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) 262 dsfield = 0; 263 else 264 dsfield = XFRM_MODE_SKB_CB(skb)->tos; 265 dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos); 266 if (x->props.flags & XFRM_STATE_NOECN) 267 dsfield &= ~INET_ECN_MASK; 268 ipv6_change_dsfield(top_iph, 0, dsfield); 269 top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst)); 270 top_iph->saddr = *(struct in6_addr *)&x->props.saddr; 271 top_iph->daddr = *(struct in6_addr *)&x->id.daddr; 272 return 0; 273 } 274 275 static int xfrm6_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb) 276 { 277 struct ipv6hdr *top_iph; 278 struct ip_beet_phdr *ph; 279 int optlen, hdr_len; 280 281 hdr_len = 0; 282 optlen = XFRM_MODE_SKB_CB(skb)->optlen; 283 if (unlikely(optlen)) 284 hdr_len += IPV4_BEET_PHMAXLEN - (optlen & 4); 285 286 skb_set_network_header(skb, -x->props.header_len - hdr_len); 287 if (x->sel.family != AF_INET6) 288 skb->network_header += IPV4_BEET_PHMAXLEN; 289 skb->mac_header = skb->network_header + 290 offsetof(struct ipv6hdr, nexthdr); 291 skb->transport_header = skb->network_header + sizeof(*top_iph); 292 ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdr_len); 293 294 xfrm6_beet_make_header(skb); 295 296 top_iph = ipv6_hdr(skb); 297 if (unlikely(optlen)) { 298 if (WARN_ON(optlen < 0)) 299 return -EINVAL; 300 301 ph->padlen = 4 - (optlen & 4); 302 ph->hdrlen = optlen / 8; 303 ph->nexthdr = top_iph->nexthdr; 304 if (ph->padlen) 305 memset(ph + 1, IPOPT_NOP, ph->padlen); 306 307 top_iph->nexthdr = IPPROTO_BEETPH; 308 } 309 310 top_iph->saddr = *(struct in6_addr *)&x->props.saddr; 311 top_iph->daddr = *(struct in6_addr *)&x->id.daddr; 312 return 0; 313 } 314 #endif 315 316 /* Add encapsulation header. 317 * 318 * On exit, the transport header will be set to the start of the 319 * encapsulation header to be filled in by x->type->output and the mac 320 * header will be set to the nextheader (protocol for IPv4) field of the 321 * extension header directly preceding the encapsulation header, or in 322 * its absence, that of the top IP header. 323 * The value of the network header will always point to the top IP header 324 * while skb->data will point to the payload. 325 */ 326 static int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) 327 { 328 int err; 329 330 err = xfrm_inner_extract_output(x, skb); 331 if (err) 332 return err; 333 334 IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; 335 skb->protocol = htons(ETH_P_IP); 336 337 switch (x->outer_mode.encap) { 338 case XFRM_MODE_BEET: 339 return xfrm4_beet_encap_add(x, skb); 340 case XFRM_MODE_TUNNEL: 341 return xfrm4_tunnel_encap_add(x, skb); 342 } 343 344 WARN_ON_ONCE(1); 345 return -EOPNOTSUPP; 346 } 347 348 static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) 349 { 350 #if IS_ENABLED(CONFIG_IPV6) 351 int err; 352 353 err = xfrm_inner_extract_output(x, skb); 354 if (err) 355 return err; 356 357 skb->ignore_df = 1; 358 skb->protocol = htons(ETH_P_IPV6); 359 360 switch (x->outer_mode.encap) { 361 case XFRM_MODE_BEET: 362 return xfrm6_beet_encap_add(x, skb); 363 case XFRM_MODE_TUNNEL: 364 return xfrm6_tunnel_encap_add(x, skb); 365 default: 366 WARN_ON_ONCE(1); 367 return -EOPNOTSUPP; 368 } 369 #endif 370 WARN_ON_ONCE(1); 371 return -EAFNOSUPPORT; 372 } 373 374 static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) 375 { 376 switch (x->outer_mode.encap) { 377 case XFRM_MODE_BEET: 378 case XFRM_MODE_TUNNEL: 379 if (x->outer_mode.family == AF_INET) 380 return xfrm4_prepare_output(x, skb); 381 if (x->outer_mode.family == AF_INET6) 382 return xfrm6_prepare_output(x, skb); 383 break; 384 case XFRM_MODE_TRANSPORT: 385 if (x->outer_mode.family == AF_INET) 386 return xfrm4_transport_output(x, skb); 387 if (x->outer_mode.family == AF_INET6) 388 return xfrm6_transport_output(x, skb); 389 break; 390 case XFRM_MODE_ROUTEOPTIMIZATION: 391 if (x->outer_mode.family == AF_INET6) 392 return xfrm6_ro_output(x, skb); 393 WARN_ON_ONCE(1); 394 break; 395 default: 396 WARN_ON_ONCE(1); 397 break; 398 } 399 400 return -EOPNOTSUPP; 401 } 402 403 #if IS_ENABLED(CONFIG_NET_PKTGEN) 404 int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) 405 { 406 return xfrm_outer_mode_output(x, skb); 407 } 408 EXPORT_SYMBOL_GPL(pktgen_xfrm_outer_mode_output); 409 #endif 410 411 static int xfrm_output_one(struct sk_buff *skb, int err) 412 { 413 struct dst_entry *dst = skb_dst(skb); 414 struct xfrm_state *x = dst->xfrm; 415 struct net *net = xs_net(x); 416 417 if (err <= 0) 418 goto resume; 419 420 do { 421 err = xfrm_skb_check_space(skb); 422 if (err) { 423 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 424 goto error_nolock; 425 } 426 427 skb->mark = xfrm_smark_get(skb->mark, x); 428 429 err = xfrm_outer_mode_output(x, skb); 430 if (err) { 431 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR); 432 goto error_nolock; 433 } 434 435 spin_lock_bh(&x->lock); 436 437 if (unlikely(x->km.state != XFRM_STATE_VALID)) { 438 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID); 439 err = -EINVAL; 440 goto error; 441 } 442 443 err = xfrm_state_check_expire(x); 444 if (err) { 445 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED); 446 goto error; 447 } 448 449 err = x->repl->overflow(x, skb); 450 if (err) { 451 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); 452 goto error; 453 } 454 455 x->curlft.bytes += skb->len; 456 x->curlft.packets++; 457 458 spin_unlock_bh(&x->lock); 459 460 skb_dst_force(skb); 461 if (!skb_dst(skb)) { 462 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 463 err = -EHOSTUNREACH; 464 goto error_nolock; 465 } 466 467 if (xfrm_offload(skb)) { 468 x->type_offload->encap(x, skb); 469 } else { 470 /* Inner headers are invalid now. */ 471 skb->encapsulation = 0; 472 473 err = x->type->output(x, skb); 474 if (err == -EINPROGRESS) 475 goto out; 476 } 477 478 resume: 479 if (err) { 480 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEPROTOERROR); 481 goto error_nolock; 482 } 483 484 dst = skb_dst_pop(skb); 485 if (!dst) { 486 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 487 err = -EHOSTUNREACH; 488 goto error_nolock; 489 } 490 skb_dst_set(skb, dst); 491 x = dst->xfrm; 492 } while (x && !(x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL)); 493 494 return 0; 495 496 error: 497 spin_unlock_bh(&x->lock); 498 error_nolock: 499 kfree_skb(skb); 500 out: 501 return err; 502 } 503 504 int xfrm_output_resume(struct sk_buff *skb, int err) 505 { 506 struct net *net = xs_net(skb_dst(skb)->xfrm); 507 508 while (likely((err = xfrm_output_one(skb, err)) == 0)) { 509 nf_reset(skb); 510 511 err = skb_dst(skb)->ops->local_out(net, skb->sk, skb); 512 if (unlikely(err != 1)) 513 goto out; 514 515 if (!skb_dst(skb)->xfrm) 516 return dst_output(net, skb->sk, skb); 517 518 err = nf_hook(skb_dst(skb)->ops->family, 519 NF_INET_POST_ROUTING, net, skb->sk, skb, 520 NULL, skb_dst(skb)->dev, xfrm_output2); 521 if (unlikely(err != 1)) 522 goto out; 523 } 524 525 if (err == -EINPROGRESS) 526 err = 0; 527 528 out: 529 return err; 530 } 531 EXPORT_SYMBOL_GPL(xfrm_output_resume); 532 533 static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 534 { 535 return xfrm_output_resume(skb, 1); 536 } 537 538 static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb) 539 { 540 struct sk_buff *segs; 541 542 BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET); 543 BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_SGO_CB_OFFSET); 544 segs = skb_gso_segment(skb, 0); 545 kfree_skb(skb); 546 if (IS_ERR(segs)) 547 return PTR_ERR(segs); 548 if (segs == NULL) 549 return -EINVAL; 550 551 do { 552 struct sk_buff *nskb = segs->next; 553 int err; 554 555 skb_mark_not_on_list(segs); 556 err = xfrm_output2(net, sk, segs); 557 558 if (unlikely(err)) { 559 kfree_skb_list(nskb); 560 return err; 561 } 562 563 segs = nskb; 564 } while (segs); 565 566 return 0; 567 } 568 569 int xfrm_output(struct sock *sk, struct sk_buff *skb) 570 { 571 struct net *net = dev_net(skb_dst(skb)->dev); 572 struct xfrm_state *x = skb_dst(skb)->xfrm; 573 int err; 574 575 secpath_reset(skb); 576 577 if (xfrm_dev_offload_ok(skb, x)) { 578 struct sec_path *sp; 579 580 sp = secpath_set(skb); 581 if (!sp) { 582 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 583 kfree_skb(skb); 584 return -ENOMEM; 585 } 586 skb->encapsulation = 1; 587 588 sp->olen++; 589 sp->xvec[sp->len++] = x; 590 xfrm_state_hold(x); 591 592 if (skb_is_gso(skb)) { 593 skb_shinfo(skb)->gso_type |= SKB_GSO_ESP; 594 595 return xfrm_output2(net, sk, skb); 596 } 597 598 if (x->xso.dev && x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM) 599 goto out; 600 } 601 602 if (skb_is_gso(skb)) 603 return xfrm_output_gso(net, sk, skb); 604 605 if (skb->ip_summed == CHECKSUM_PARTIAL) { 606 err = skb_checksum_help(skb); 607 if (err) { 608 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 609 kfree_skb(skb); 610 return err; 611 } 612 } 613 614 out: 615 return xfrm_output2(net, sk, skb); 616 } 617 EXPORT_SYMBOL_GPL(xfrm_output); 618 619 static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) 620 { 621 const struct xfrm_state_afinfo *afinfo; 622 const struct xfrm_mode *inner_mode; 623 int err = -EAFNOSUPPORT; 624 625 if (x->sel.family == AF_UNSPEC) 626 inner_mode = xfrm_ip2inner_mode(x, 627 xfrm_af2proto(skb_dst(skb)->ops->family)); 628 else 629 inner_mode = &x->inner_mode; 630 631 if (inner_mode == NULL) 632 return -EAFNOSUPPORT; 633 634 rcu_read_lock(); 635 afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); 636 if (likely(afinfo)) 637 err = afinfo->extract_output(x, skb); 638 rcu_read_unlock(); 639 640 return err; 641 } 642 643 void xfrm_local_error(struct sk_buff *skb, int mtu) 644 { 645 unsigned int proto; 646 struct xfrm_state_afinfo *afinfo; 647 648 if (skb->protocol == htons(ETH_P_IP)) 649 proto = AF_INET; 650 else if (skb->protocol == htons(ETH_P_IPV6)) 651 proto = AF_INET6; 652 else 653 return; 654 655 afinfo = xfrm_state_get_afinfo(proto); 656 if (afinfo) { 657 afinfo->local_error(skb, mtu); 658 rcu_read_unlock(); 659 } 660 } 661 EXPORT_SYMBOL_GPL(xfrm_local_error); 662