1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * xfrm_output.c - Common IPsec encapsulation code. 4 * 5 * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> 6 */ 7 8 #include <linux/errno.h> 9 #include <linux/module.h> 10 #include <linux/netdevice.h> 11 #include <linux/netfilter.h> 12 #include <linux/skbuff.h> 13 #include <linux/slab.h> 14 #include <linux/spinlock.h> 15 #include <net/dst.h> 16 #include <net/icmp.h> 17 #include <net/inet_ecn.h> 18 #include <net/xfrm.h> 19 20 #if IS_ENABLED(CONFIG_IPV6) 21 #include <net/ip6_route.h> 22 #include <net/ipv6_stubs.h> 23 #endif 24 25 #include "xfrm_inout.h" 26 27 static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb); 28 static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); 29 30 static int xfrm_skb_check_space(struct sk_buff *skb) 31 { 32 struct dst_entry *dst = skb_dst(skb); 33 int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev) 34 - skb_headroom(skb); 35 int ntail = dst->dev->needed_tailroom - skb_tailroom(skb); 36 37 if (nhead <= 0) { 38 if (ntail <= 0) 39 return 0; 40 nhead = 0; 41 } else if (ntail < 0) 42 ntail = 0; 43 44 return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); 45 } 46 47 /* Children define the path of the packet through the 48 * Linux networking. Thus, destinations are stackable. 49 */ 50 51 static struct dst_entry *skb_dst_pop(struct sk_buff *skb) 52 { 53 struct dst_entry *child = dst_clone(xfrm_dst_child(skb_dst(skb))); 54 55 skb_dst_drop(skb); 56 return child; 57 } 58 59 /* Add encapsulation header. 60 * 61 * The IP header will be moved forward to make space for the encapsulation 62 * header. 63 */ 64 static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) 65 { 66 struct iphdr *iph = ip_hdr(skb); 67 int ihl = iph->ihl * 4; 68 69 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 70 71 skb_set_network_header(skb, -x->props.header_len); 72 skb->mac_header = skb->network_header + 73 offsetof(struct iphdr, protocol); 74 skb->transport_header = skb->network_header + ihl; 75 __skb_pull(skb, ihl); 76 memmove(skb_network_header(skb), iph, ihl); 77 return 0; 78 } 79 80 #if IS_ENABLED(CONFIG_IPV6_MIP6) 81 static int mip6_rthdr_offset(struct sk_buff *skb, u8 **nexthdr, int type) 82 { 83 const unsigned char *nh = skb_network_header(skb); 84 unsigned int offset = sizeof(struct ipv6hdr); 85 unsigned int packet_len; 86 int found_rhdr = 0; 87 88 packet_len = skb_tail_pointer(skb) - nh; 89 *nexthdr = &ipv6_hdr(skb)->nexthdr; 90 91 while (offset <= packet_len) { 92 struct ipv6_opt_hdr *exthdr; 93 94 switch (**nexthdr) { 95 case NEXTHDR_HOP: 96 break; 97 case NEXTHDR_ROUTING: 98 if (type == IPPROTO_ROUTING && offset + 3 <= packet_len) { 99 struct ipv6_rt_hdr *rt; 100 101 rt = (struct ipv6_rt_hdr *)(nh + offset); 102 if (rt->type != 0) 103 return offset; 104 } 105 found_rhdr = 1; 106 break; 107 case NEXTHDR_DEST: 108 /* HAO MUST NOT appear more than once. 109 * XXX: It is better to try to find by the end of 110 * XXX: packet if HAO exists. 111 */ 112 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) { 113 net_dbg_ratelimited("mip6: hao exists already, override\n"); 114 return offset; 115 } 116 117 if (found_rhdr) 118 return offset; 119 120 break; 121 default: 122 return offset; 123 } 124 125 if (offset + sizeof(struct ipv6_opt_hdr) > packet_len) 126 return -EINVAL; 127 128 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + 129 offset); 130 offset += ipv6_optlen(exthdr); 131 if (offset > IPV6_MAXPLEN) 132 return -EINVAL; 133 *nexthdr = &exthdr->nexthdr; 134 } 135 136 return -EINVAL; 137 } 138 #endif 139 140 #if IS_ENABLED(CONFIG_IPV6) 141 static int xfrm6_hdr_offset(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr) 142 { 143 switch (x->type->proto) { 144 #if IS_ENABLED(CONFIG_IPV6_MIP6) 145 case IPPROTO_DSTOPTS: 146 case IPPROTO_ROUTING: 147 return mip6_rthdr_offset(skb, prevhdr, x->type->proto); 148 #endif 149 default: 150 break; 151 } 152 153 return ip6_find_1stfragopt(skb, prevhdr); 154 } 155 #endif 156 157 /* Add encapsulation header. 158 * 159 * The IP header and mutable extension headers will be moved forward to make 160 * space for the encapsulation header. 161 */ 162 static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) 163 { 164 #if IS_ENABLED(CONFIG_IPV6) 165 struct ipv6hdr *iph; 166 u8 *prevhdr; 167 int hdr_len; 168 169 iph = ipv6_hdr(skb); 170 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 171 172 hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr); 173 if (hdr_len < 0) 174 return hdr_len; 175 skb_set_mac_header(skb, 176 (prevhdr - x->props.header_len) - skb->data); 177 skb_set_network_header(skb, -x->props.header_len); 178 skb->transport_header = skb->network_header + hdr_len; 179 __skb_pull(skb, hdr_len); 180 memmove(ipv6_hdr(skb), iph, hdr_len); 181 return 0; 182 #else 183 WARN_ON_ONCE(1); 184 return -EAFNOSUPPORT; 185 #endif 186 } 187 188 /* Add route optimization header space. 189 * 190 * The IP header and mutable extension headers will be moved forward to make 191 * space for the route optimization header. 192 */ 193 static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) 194 { 195 #if IS_ENABLED(CONFIG_IPV6) 196 struct ipv6hdr *iph; 197 u8 *prevhdr; 198 int hdr_len; 199 200 iph = ipv6_hdr(skb); 201 202 hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr); 203 if (hdr_len < 0) 204 return hdr_len; 205 skb_set_mac_header(skb, 206 (prevhdr - x->props.header_len) - skb->data); 207 skb_set_network_header(skb, -x->props.header_len); 208 skb->transport_header = skb->network_header + hdr_len; 209 __skb_pull(skb, hdr_len); 210 memmove(ipv6_hdr(skb), iph, hdr_len); 211 212 x->lastused = ktime_get_real_seconds(); 213 214 return 0; 215 #else 216 WARN_ON_ONCE(1); 217 return -EAFNOSUPPORT; 218 #endif 219 } 220 221 /* Add encapsulation header. 222 * 223 * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. 224 */ 225 static int xfrm4_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb) 226 { 227 struct ip_beet_phdr *ph; 228 struct iphdr *top_iph; 229 int hdrlen, optlen; 230 231 hdrlen = 0; 232 optlen = XFRM_MODE_SKB_CB(skb)->optlen; 233 if (unlikely(optlen)) 234 hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); 235 236 skb_set_network_header(skb, -x->props.header_len - hdrlen + 237 (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph))); 238 if (x->sel.family != AF_INET6) 239 skb->network_header += IPV4_BEET_PHMAXLEN; 240 skb->mac_header = skb->network_header + 241 offsetof(struct iphdr, protocol); 242 skb->transport_header = skb->network_header + sizeof(*top_iph); 243 244 xfrm4_beet_make_header(skb); 245 246 ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen); 247 248 top_iph = ip_hdr(skb); 249 250 if (unlikely(optlen)) { 251 if (WARN_ON(optlen < 0)) 252 return -EINVAL; 253 254 ph->padlen = 4 - (optlen & 4); 255 ph->hdrlen = optlen / 8; 256 ph->nexthdr = top_iph->protocol; 257 if (ph->padlen) 258 memset(ph + 1, IPOPT_NOP, ph->padlen); 259 260 top_iph->protocol = IPPROTO_BEETPH; 261 top_iph->ihl = sizeof(struct iphdr) / 4; 262 } 263 264 top_iph->saddr = x->props.saddr.a4; 265 top_iph->daddr = x->id.daddr.a4; 266 267 return 0; 268 } 269 270 /* Add encapsulation header. 271 * 272 * The top IP header will be constructed per RFC 2401. 273 */ 274 static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) 275 { 276 bool small_ipv6 = (skb->protocol == htons(ETH_P_IPV6)) && (skb->len <= IPV6_MIN_MTU); 277 struct dst_entry *dst = skb_dst(skb); 278 struct iphdr *top_iph; 279 int flags; 280 281 skb_set_inner_network_header(skb, skb_network_offset(skb)); 282 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 283 284 skb_set_network_header(skb, -x->props.header_len); 285 skb->mac_header = skb->network_header + 286 offsetof(struct iphdr, protocol); 287 skb->transport_header = skb->network_header + sizeof(*top_iph); 288 top_iph = ip_hdr(skb); 289 290 top_iph->ihl = 5; 291 top_iph->version = 4; 292 293 top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family); 294 295 /* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */ 296 if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) 297 top_iph->tos = 0; 298 else 299 top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos; 300 top_iph->tos = INET_ECN_encapsulate(top_iph->tos, 301 XFRM_MODE_SKB_CB(skb)->tos); 302 303 flags = x->props.flags; 304 if (flags & XFRM_STATE_NOECN) 305 IP_ECN_clear(top_iph); 306 307 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) || small_ipv6 ? 308 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); 309 310 top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst)); 311 312 top_iph->saddr = x->props.saddr.a4; 313 top_iph->daddr = x->id.daddr.a4; 314 ip_select_ident(dev_net(dst->dev), skb, NULL); 315 316 return 0; 317 } 318 319 #if IS_ENABLED(CONFIG_IPV6) 320 static int xfrm6_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) 321 { 322 struct dst_entry *dst = skb_dst(skb); 323 struct ipv6hdr *top_iph; 324 int dsfield; 325 326 skb_set_inner_network_header(skb, skb_network_offset(skb)); 327 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 328 329 skb_set_network_header(skb, -x->props.header_len); 330 skb->mac_header = skb->network_header + 331 offsetof(struct ipv6hdr, nexthdr); 332 skb->transport_header = skb->network_header + sizeof(*top_iph); 333 top_iph = ipv6_hdr(skb); 334 335 top_iph->version = 6; 336 337 memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, 338 sizeof(top_iph->flow_lbl)); 339 top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family); 340 341 if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) 342 dsfield = 0; 343 else 344 dsfield = XFRM_MODE_SKB_CB(skb)->tos; 345 dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos); 346 if (x->props.flags & XFRM_STATE_NOECN) 347 dsfield &= ~INET_ECN_MASK; 348 ipv6_change_dsfield(top_iph, 0, dsfield); 349 top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst)); 350 top_iph->saddr = *(struct in6_addr *)&x->props.saddr; 351 top_iph->daddr = *(struct in6_addr *)&x->id.daddr; 352 return 0; 353 } 354 355 static int xfrm6_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb) 356 { 357 struct ipv6hdr *top_iph; 358 struct ip_beet_phdr *ph; 359 int optlen, hdr_len; 360 361 hdr_len = 0; 362 optlen = XFRM_MODE_SKB_CB(skb)->optlen; 363 if (unlikely(optlen)) 364 hdr_len += IPV4_BEET_PHMAXLEN - (optlen & 4); 365 366 skb_set_network_header(skb, -x->props.header_len - hdr_len); 367 if (x->sel.family != AF_INET6) 368 skb->network_header += IPV4_BEET_PHMAXLEN; 369 skb->mac_header = skb->network_header + 370 offsetof(struct ipv6hdr, nexthdr); 371 skb->transport_header = skb->network_header + sizeof(*top_iph); 372 ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdr_len); 373 374 xfrm6_beet_make_header(skb); 375 376 top_iph = ipv6_hdr(skb); 377 if (unlikely(optlen)) { 378 if (WARN_ON(optlen < 0)) 379 return -EINVAL; 380 381 ph->padlen = 4 - (optlen & 4); 382 ph->hdrlen = optlen / 8; 383 ph->nexthdr = top_iph->nexthdr; 384 if (ph->padlen) 385 memset(ph + 1, IPOPT_NOP, ph->padlen); 386 387 top_iph->nexthdr = IPPROTO_BEETPH; 388 } 389 390 top_iph->saddr = *(struct in6_addr *)&x->props.saddr; 391 top_iph->daddr = *(struct in6_addr *)&x->id.daddr; 392 return 0; 393 } 394 #endif 395 396 /* Add encapsulation header. 397 * 398 * On exit, the transport header will be set to the start of the 399 * encapsulation header to be filled in by x->type->output and the mac 400 * header will be set to the nextheader (protocol for IPv4) field of the 401 * extension header directly preceding the encapsulation header, or in 402 * its absence, that of the top IP header. 403 * The value of the network header will always point to the top IP header 404 * while skb->data will point to the payload. 405 */ 406 static int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) 407 { 408 int err; 409 410 err = xfrm_inner_extract_output(x, skb); 411 if (err) 412 return err; 413 414 IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; 415 skb->protocol = htons(ETH_P_IP); 416 417 switch (x->outer_mode.encap) { 418 case XFRM_MODE_BEET: 419 return xfrm4_beet_encap_add(x, skb); 420 case XFRM_MODE_TUNNEL: 421 return xfrm4_tunnel_encap_add(x, skb); 422 } 423 424 WARN_ON_ONCE(1); 425 return -EOPNOTSUPP; 426 } 427 428 static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) 429 { 430 #if IS_ENABLED(CONFIG_IPV6) 431 int err; 432 433 err = xfrm_inner_extract_output(x, skb); 434 if (err) 435 return err; 436 437 skb->ignore_df = 1; 438 skb->protocol = htons(ETH_P_IPV6); 439 440 switch (x->outer_mode.encap) { 441 case XFRM_MODE_BEET: 442 return xfrm6_beet_encap_add(x, skb); 443 case XFRM_MODE_TUNNEL: 444 return xfrm6_tunnel_encap_add(x, skb); 445 default: 446 WARN_ON_ONCE(1); 447 return -EOPNOTSUPP; 448 } 449 #endif 450 WARN_ON_ONCE(1); 451 return -EAFNOSUPPORT; 452 } 453 454 static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) 455 { 456 switch (x->outer_mode.encap) { 457 case XFRM_MODE_BEET: 458 case XFRM_MODE_TUNNEL: 459 if (x->outer_mode.family == AF_INET) 460 return xfrm4_prepare_output(x, skb); 461 if (x->outer_mode.family == AF_INET6) 462 return xfrm6_prepare_output(x, skb); 463 break; 464 case XFRM_MODE_TRANSPORT: 465 if (x->outer_mode.family == AF_INET) 466 return xfrm4_transport_output(x, skb); 467 if (x->outer_mode.family == AF_INET6) 468 return xfrm6_transport_output(x, skb); 469 break; 470 case XFRM_MODE_ROUTEOPTIMIZATION: 471 if (x->outer_mode.family == AF_INET6) 472 return xfrm6_ro_output(x, skb); 473 WARN_ON_ONCE(1); 474 break; 475 default: 476 WARN_ON_ONCE(1); 477 break; 478 } 479 480 return -EOPNOTSUPP; 481 } 482 483 #if IS_ENABLED(CONFIG_NET_PKTGEN) 484 int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) 485 { 486 return xfrm_outer_mode_output(x, skb); 487 } 488 EXPORT_SYMBOL_GPL(pktgen_xfrm_outer_mode_output); 489 #endif 490 491 static int xfrm_output_one(struct sk_buff *skb, int err) 492 { 493 struct dst_entry *dst = skb_dst(skb); 494 struct xfrm_state *x = dst->xfrm; 495 struct net *net = xs_net(x); 496 497 if (err <= 0) 498 goto resume; 499 500 do { 501 err = xfrm_skb_check_space(skb); 502 if (err) { 503 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 504 goto error_nolock; 505 } 506 507 skb->mark = xfrm_smark_get(skb->mark, x); 508 509 err = xfrm_outer_mode_output(x, skb); 510 if (err) { 511 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR); 512 goto error_nolock; 513 } 514 515 spin_lock_bh(&x->lock); 516 517 if (unlikely(x->km.state != XFRM_STATE_VALID)) { 518 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID); 519 err = -EINVAL; 520 goto error; 521 } 522 523 err = xfrm_state_check_expire(x); 524 if (err) { 525 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED); 526 goto error; 527 } 528 529 err = xfrm_replay_overflow(x, skb); 530 if (err) { 531 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); 532 goto error; 533 } 534 535 x->curlft.bytes += skb->len; 536 x->curlft.packets++; 537 538 spin_unlock_bh(&x->lock); 539 540 skb_dst_force(skb); 541 if (!skb_dst(skb)) { 542 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 543 err = -EHOSTUNREACH; 544 goto error_nolock; 545 } 546 547 if (xfrm_offload(skb)) { 548 x->type_offload->encap(x, skb); 549 } else { 550 /* Inner headers are invalid now. */ 551 skb->encapsulation = 0; 552 553 err = x->type->output(x, skb); 554 if (err == -EINPROGRESS) 555 goto out; 556 } 557 558 resume: 559 if (err) { 560 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEPROTOERROR); 561 goto error_nolock; 562 } 563 564 dst = skb_dst_pop(skb); 565 if (!dst) { 566 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 567 err = -EHOSTUNREACH; 568 goto error_nolock; 569 } 570 skb_dst_set(skb, dst); 571 x = dst->xfrm; 572 } while (x && !(x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL)); 573 574 return 0; 575 576 error: 577 spin_unlock_bh(&x->lock); 578 error_nolock: 579 kfree_skb(skb); 580 out: 581 return err; 582 } 583 584 int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err) 585 { 586 struct net *net = xs_net(skb_dst(skb)->xfrm); 587 588 while (likely((err = xfrm_output_one(skb, err)) == 0)) { 589 nf_reset_ct(skb); 590 591 err = skb_dst(skb)->ops->local_out(net, sk, skb); 592 if (unlikely(err != 1)) 593 goto out; 594 595 if (!skb_dst(skb)->xfrm) 596 return dst_output(net, sk, skb); 597 598 err = nf_hook(skb_dst(skb)->ops->family, 599 NF_INET_POST_ROUTING, net, sk, skb, 600 NULL, skb_dst(skb)->dev, xfrm_output2); 601 if (unlikely(err != 1)) 602 goto out; 603 } 604 605 if (err == -EINPROGRESS) 606 err = 0; 607 608 out: 609 return err; 610 } 611 EXPORT_SYMBOL_GPL(xfrm_output_resume); 612 613 static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 614 { 615 return xfrm_output_resume(sk, skb, 1); 616 } 617 618 static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb) 619 { 620 struct sk_buff *segs, *nskb; 621 622 BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_GSO_CB_OFFSET); 623 BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_GSO_CB_OFFSET); 624 segs = skb_gso_segment(skb, 0); 625 kfree_skb(skb); 626 if (IS_ERR(segs)) 627 return PTR_ERR(segs); 628 if (segs == NULL) 629 return -EINVAL; 630 631 skb_list_walk_safe(segs, segs, nskb) { 632 int err; 633 634 skb_mark_not_on_list(segs); 635 err = xfrm_output2(net, sk, segs); 636 637 if (unlikely(err)) { 638 kfree_skb_list(nskb); 639 return err; 640 } 641 } 642 643 return 0; 644 } 645 646 /* For partial checksum offload, the outer header checksum is calculated 647 * by software and the inner header checksum is calculated by hardware. 648 * This requires hardware to know the inner packet type to calculate 649 * the inner header checksum. Save inner ip protocol here to avoid 650 * traversing the packet in the vendor's xmit code. 651 * For IPsec tunnel mode save the ip protocol from the IP header of the 652 * plain text packet. Otherwise If the encap type is IPIP, just save 653 * skb->inner_ipproto in any other case get the ip protocol from the IP 654 * header. 655 */ 656 static void xfrm_get_inner_ipproto(struct sk_buff *skb, struct xfrm_state *x) 657 { 658 struct xfrm_offload *xo = xfrm_offload(skb); 659 const struct ethhdr *eth; 660 661 if (!xo) 662 return; 663 664 if (x->outer_mode.encap == XFRM_MODE_TUNNEL) { 665 switch (x->outer_mode.family) { 666 case AF_INET: 667 xo->inner_ipproto = ip_hdr(skb)->protocol; 668 break; 669 case AF_INET6: 670 xo->inner_ipproto = ipv6_hdr(skb)->nexthdr; 671 break; 672 default: 673 break; 674 } 675 676 return; 677 } 678 679 /* non-Tunnel Mode */ 680 if (!skb->encapsulation) 681 return; 682 683 if (skb->inner_protocol_type == ENCAP_TYPE_IPPROTO) { 684 xo->inner_ipproto = skb->inner_ipproto; 685 return; 686 } 687 688 if (skb->inner_protocol_type != ENCAP_TYPE_ETHER) 689 return; 690 691 eth = (struct ethhdr *)skb_inner_mac_header(skb); 692 693 switch (ntohs(eth->h_proto)) { 694 case ETH_P_IPV6: 695 xo->inner_ipproto = inner_ipv6_hdr(skb)->nexthdr; 696 break; 697 case ETH_P_IP: 698 xo->inner_ipproto = inner_ip_hdr(skb)->protocol; 699 break; 700 } 701 } 702 703 int xfrm_output(struct sock *sk, struct sk_buff *skb) 704 { 705 struct net *net = dev_net(skb_dst(skb)->dev); 706 struct xfrm_state *x = skb_dst(skb)->xfrm; 707 int err; 708 709 switch (x->outer_mode.family) { 710 case AF_INET: 711 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 712 IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; 713 break; 714 case AF_INET6: 715 memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); 716 717 IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; 718 break; 719 } 720 721 secpath_reset(skb); 722 723 if (xfrm_dev_offload_ok(skb, x)) { 724 struct sec_path *sp; 725 726 sp = secpath_set(skb); 727 if (!sp) { 728 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 729 kfree_skb(skb); 730 return -ENOMEM; 731 } 732 733 sp->olen++; 734 sp->xvec[sp->len++] = x; 735 xfrm_state_hold(x); 736 737 xfrm_get_inner_ipproto(skb, x); 738 skb->encapsulation = 1; 739 740 if (skb_is_gso(skb)) { 741 if (skb->inner_protocol) 742 return xfrm_output_gso(net, sk, skb); 743 744 skb_shinfo(skb)->gso_type |= SKB_GSO_ESP; 745 goto out; 746 } 747 748 if (x->xso.dev && x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM) 749 goto out; 750 } else { 751 if (skb_is_gso(skb)) 752 return xfrm_output_gso(net, sk, skb); 753 } 754 755 if (skb->ip_summed == CHECKSUM_PARTIAL) { 756 err = skb_checksum_help(skb); 757 if (err) { 758 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 759 kfree_skb(skb); 760 return err; 761 } 762 } 763 764 out: 765 return xfrm_output2(net, sk, skb); 766 } 767 EXPORT_SYMBOL_GPL(xfrm_output); 768 769 static int xfrm4_tunnel_check_size(struct sk_buff *skb) 770 { 771 int mtu, ret = 0; 772 773 if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) 774 goto out; 775 776 if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df) 777 goto out; 778 779 mtu = dst_mtu(skb_dst(skb)); 780 if ((!skb_is_gso(skb) && skb->len > mtu) || 781 (skb_is_gso(skb) && 782 !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) { 783 skb->protocol = htons(ETH_P_IP); 784 785 if (skb->sk) 786 xfrm_local_error(skb, mtu); 787 else 788 icmp_send(skb, ICMP_DEST_UNREACH, 789 ICMP_FRAG_NEEDED, htonl(mtu)); 790 ret = -EMSGSIZE; 791 } 792 out: 793 return ret; 794 } 795 796 static int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb) 797 { 798 int err; 799 800 if (x->outer_mode.encap == XFRM_MODE_BEET && 801 ip_is_fragment(ip_hdr(skb))) { 802 net_warn_ratelimited("BEET mode doesn't support inner IPv4 fragments\n"); 803 return -EAFNOSUPPORT; 804 } 805 806 err = xfrm4_tunnel_check_size(skb); 807 if (err) 808 return err; 809 810 XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol; 811 812 xfrm4_extract_header(skb); 813 return 0; 814 } 815 816 #if IS_ENABLED(CONFIG_IPV6) 817 static int xfrm6_tunnel_check_size(struct sk_buff *skb) 818 { 819 int mtu, ret = 0; 820 struct dst_entry *dst = skb_dst(skb); 821 822 if (skb->ignore_df) 823 goto out; 824 825 mtu = dst_mtu(dst); 826 if (mtu < IPV6_MIN_MTU) 827 mtu = IPV6_MIN_MTU; 828 829 if ((!skb_is_gso(skb) && skb->len > mtu) || 830 (skb_is_gso(skb) && 831 !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) { 832 skb->dev = dst->dev; 833 skb->protocol = htons(ETH_P_IPV6); 834 835 if (xfrm6_local_dontfrag(skb->sk)) 836 ipv6_stub->xfrm6_local_rxpmtu(skb, mtu); 837 else if (skb->sk) 838 xfrm_local_error(skb, mtu); 839 else 840 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 841 ret = -EMSGSIZE; 842 } 843 out: 844 return ret; 845 } 846 #endif 847 848 static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) 849 { 850 #if IS_ENABLED(CONFIG_IPV6) 851 int err; 852 853 err = xfrm6_tunnel_check_size(skb); 854 if (err) 855 return err; 856 857 XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr; 858 859 xfrm6_extract_header(skb); 860 return 0; 861 #else 862 WARN_ON_ONCE(1); 863 return -EAFNOSUPPORT; 864 #endif 865 } 866 867 static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) 868 { 869 const struct xfrm_mode *inner_mode; 870 871 if (x->sel.family == AF_UNSPEC) 872 inner_mode = xfrm_ip2inner_mode(x, 873 xfrm_af2proto(skb_dst(skb)->ops->family)); 874 else 875 inner_mode = &x->inner_mode; 876 877 if (inner_mode == NULL) 878 return -EAFNOSUPPORT; 879 880 switch (inner_mode->family) { 881 case AF_INET: 882 return xfrm4_extract_output(x, skb); 883 case AF_INET6: 884 return xfrm6_extract_output(x, skb); 885 } 886 887 return -EAFNOSUPPORT; 888 } 889 890 void xfrm_local_error(struct sk_buff *skb, int mtu) 891 { 892 unsigned int proto; 893 struct xfrm_state_afinfo *afinfo; 894 895 if (skb->protocol == htons(ETH_P_IP)) 896 proto = AF_INET; 897 else if (skb->protocol == htons(ETH_P_IPV6) && 898 skb->sk->sk_family == AF_INET6) 899 proto = AF_INET6; 900 else 901 return; 902 903 afinfo = xfrm_state_get_afinfo(proto); 904 if (afinfo) { 905 afinfo->local_error(skb, mtu); 906 rcu_read_unlock(); 907 } 908 } 909 EXPORT_SYMBOL_GPL(xfrm_local_error); 910