1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * xfrm_output.c - Common IPsec encapsulation code. 4 * 5 * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> 6 */ 7 8 #include <linux/errno.h> 9 #include <linux/module.h> 10 #include <linux/netdevice.h> 11 #include <linux/netfilter.h> 12 #include <linux/skbuff.h> 13 #include <linux/slab.h> 14 #include <linux/spinlock.h> 15 #include <net/dst.h> 16 #include <net/icmp.h> 17 #include <net/inet_ecn.h> 18 #include <net/xfrm.h> 19 20 #if IS_ENABLED(CONFIG_IPV6) 21 #include <net/ip6_route.h> 22 #include <net/ipv6_stubs.h> 23 #endif 24 25 #include "xfrm_inout.h" 26 27 static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb); 28 static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); 29 30 static int xfrm_skb_check_space(struct sk_buff *skb) 31 { 32 struct dst_entry *dst = skb_dst(skb); 33 int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev) 34 - skb_headroom(skb); 35 int ntail = dst->dev->needed_tailroom - skb_tailroom(skb); 36 37 if (nhead <= 0) { 38 if (ntail <= 0) 39 return 0; 40 nhead = 0; 41 } else if (ntail < 0) 42 ntail = 0; 43 44 return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); 45 } 46 47 /* Children define the path of the packet through the 48 * Linux networking. Thus, destinations are stackable. 49 */ 50 51 static struct dst_entry *skb_dst_pop(struct sk_buff *skb) 52 { 53 struct dst_entry *child = dst_clone(xfrm_dst_child(skb_dst(skb))); 54 55 skb_dst_drop(skb); 56 return child; 57 } 58 59 /* Add encapsulation header. 60 * 61 * The IP header will be moved forward to make space for the encapsulation 62 * header. 63 */ 64 static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) 65 { 66 struct iphdr *iph = ip_hdr(skb); 67 int ihl = iph->ihl * 4; 68 69 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 70 71 skb_set_network_header(skb, -x->props.header_len); 72 skb->mac_header = skb->network_header + 73 offsetof(struct iphdr, protocol); 74 skb->transport_header = skb->network_header + ihl; 75 __skb_pull(skb, ihl); 76 memmove(skb_network_header(skb), iph, ihl); 77 return 0; 78 } 79 80 #if IS_ENABLED(CONFIG_IPV6_MIP6) 81 static int mip6_rthdr_offset(struct sk_buff *skb, u8 **nexthdr, int type) 82 { 83 const unsigned char *nh = skb_network_header(skb); 84 unsigned int offset = sizeof(struct ipv6hdr); 85 unsigned int packet_len; 86 int found_rhdr = 0; 87 88 packet_len = skb_tail_pointer(skb) - nh; 89 *nexthdr = &ipv6_hdr(skb)->nexthdr; 90 91 while (offset <= packet_len) { 92 struct ipv6_opt_hdr *exthdr; 93 94 switch (**nexthdr) { 95 case NEXTHDR_HOP: 96 break; 97 case NEXTHDR_ROUTING: 98 if (type == IPPROTO_ROUTING && offset + 3 <= packet_len) { 99 struct ipv6_rt_hdr *rt; 100 101 rt = (struct ipv6_rt_hdr *)(nh + offset); 102 if (rt->type != 0) 103 return offset; 104 } 105 found_rhdr = 1; 106 break; 107 case NEXTHDR_DEST: 108 /* HAO MUST NOT appear more than once. 109 * XXX: It is better to try to find by the end of 110 * XXX: packet if HAO exists. 111 */ 112 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) { 113 net_dbg_ratelimited("mip6: hao exists already, override\n"); 114 return offset; 115 } 116 117 if (found_rhdr) 118 return offset; 119 120 break; 121 default: 122 return offset; 123 } 124 125 if (offset + sizeof(struct ipv6_opt_hdr) > packet_len) 126 return -EINVAL; 127 128 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + 129 offset); 130 offset += ipv6_optlen(exthdr); 131 if (offset > IPV6_MAXPLEN) 132 return -EINVAL; 133 *nexthdr = &exthdr->nexthdr; 134 } 135 136 return -EINVAL; 137 } 138 #endif 139 140 #if IS_ENABLED(CONFIG_IPV6) 141 static int xfrm6_hdr_offset(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr) 142 { 143 switch (x->type->proto) { 144 #if IS_ENABLED(CONFIG_IPV6_MIP6) 145 case IPPROTO_DSTOPTS: 146 case IPPROTO_ROUTING: 147 return mip6_rthdr_offset(skb, prevhdr, x->type->proto); 148 #endif 149 default: 150 break; 151 } 152 153 return ip6_find_1stfragopt(skb, prevhdr); 154 } 155 #endif 156 157 /* Add encapsulation header. 158 * 159 * The IP header and mutable extension headers will be moved forward to make 160 * space for the encapsulation header. 161 */ 162 static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) 163 { 164 #if IS_ENABLED(CONFIG_IPV6) 165 struct ipv6hdr *iph; 166 u8 *prevhdr; 167 int hdr_len; 168 169 iph = ipv6_hdr(skb); 170 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 171 172 hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr); 173 if (hdr_len < 0) 174 return hdr_len; 175 skb_set_mac_header(skb, 176 (prevhdr - x->props.header_len) - skb->data); 177 skb_set_network_header(skb, -x->props.header_len); 178 skb->transport_header = skb->network_header + hdr_len; 179 __skb_pull(skb, hdr_len); 180 memmove(ipv6_hdr(skb), iph, hdr_len); 181 return 0; 182 #else 183 WARN_ON_ONCE(1); 184 return -EAFNOSUPPORT; 185 #endif 186 } 187 188 /* Add route optimization header space. 189 * 190 * The IP header and mutable extension headers will be moved forward to make 191 * space for the route optimization header. 192 */ 193 static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) 194 { 195 #if IS_ENABLED(CONFIG_IPV6) 196 struct ipv6hdr *iph; 197 u8 *prevhdr; 198 int hdr_len; 199 200 iph = ipv6_hdr(skb); 201 202 hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr); 203 if (hdr_len < 0) 204 return hdr_len; 205 skb_set_mac_header(skb, 206 (prevhdr - x->props.header_len) - skb->data); 207 skb_set_network_header(skb, -x->props.header_len); 208 skb->transport_header = skb->network_header + hdr_len; 209 __skb_pull(skb, hdr_len); 210 memmove(ipv6_hdr(skb), iph, hdr_len); 211 212 x->lastused = ktime_get_real_seconds(); 213 214 return 0; 215 #else 216 WARN_ON_ONCE(1); 217 return -EAFNOSUPPORT; 218 #endif 219 } 220 221 /* Add encapsulation header. 222 * 223 * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. 224 */ 225 static int xfrm4_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb) 226 { 227 struct ip_beet_phdr *ph; 228 struct iphdr *top_iph; 229 int hdrlen, optlen; 230 231 hdrlen = 0; 232 optlen = XFRM_MODE_SKB_CB(skb)->optlen; 233 if (unlikely(optlen)) 234 hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); 235 236 skb_set_network_header(skb, -x->props.header_len - hdrlen + 237 (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph))); 238 if (x->sel.family != AF_INET6) 239 skb->network_header += IPV4_BEET_PHMAXLEN; 240 skb->mac_header = skb->network_header + 241 offsetof(struct iphdr, protocol); 242 skb->transport_header = skb->network_header + sizeof(*top_iph); 243 244 xfrm4_beet_make_header(skb); 245 246 ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen); 247 248 top_iph = ip_hdr(skb); 249 250 if (unlikely(optlen)) { 251 if (WARN_ON(optlen < 0)) 252 return -EINVAL; 253 254 ph->padlen = 4 - (optlen & 4); 255 ph->hdrlen = optlen / 8; 256 ph->nexthdr = top_iph->protocol; 257 if (ph->padlen) 258 memset(ph + 1, IPOPT_NOP, ph->padlen); 259 260 top_iph->protocol = IPPROTO_BEETPH; 261 top_iph->ihl = sizeof(struct iphdr) / 4; 262 } 263 264 top_iph->saddr = x->props.saddr.a4; 265 top_iph->daddr = x->id.daddr.a4; 266 267 return 0; 268 } 269 270 /* Add encapsulation header. 271 * 272 * The top IP header will be constructed per RFC 2401. 273 */ 274 static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) 275 { 276 struct dst_entry *dst = skb_dst(skb); 277 struct iphdr *top_iph; 278 int flags; 279 280 skb_set_inner_network_header(skb, skb_network_offset(skb)); 281 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 282 283 skb_set_network_header(skb, -x->props.header_len); 284 skb->mac_header = skb->network_header + 285 offsetof(struct iphdr, protocol); 286 skb->transport_header = skb->network_header + sizeof(*top_iph); 287 top_iph = ip_hdr(skb); 288 289 top_iph->ihl = 5; 290 top_iph->version = 4; 291 292 top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family); 293 294 /* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */ 295 if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) 296 top_iph->tos = 0; 297 else 298 top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos; 299 top_iph->tos = INET_ECN_encapsulate(top_iph->tos, 300 XFRM_MODE_SKB_CB(skb)->tos); 301 302 flags = x->props.flags; 303 if (flags & XFRM_STATE_NOECN) 304 IP_ECN_clear(top_iph); 305 306 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? 307 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); 308 309 top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst)); 310 311 top_iph->saddr = x->props.saddr.a4; 312 top_iph->daddr = x->id.daddr.a4; 313 ip_select_ident(dev_net(dst->dev), skb, NULL); 314 315 return 0; 316 } 317 318 #if IS_ENABLED(CONFIG_IPV6) 319 static int xfrm6_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) 320 { 321 struct dst_entry *dst = skb_dst(skb); 322 struct ipv6hdr *top_iph; 323 int dsfield; 324 325 skb_set_inner_network_header(skb, skb_network_offset(skb)); 326 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 327 328 skb_set_network_header(skb, -x->props.header_len); 329 skb->mac_header = skb->network_header + 330 offsetof(struct ipv6hdr, nexthdr); 331 skb->transport_header = skb->network_header + sizeof(*top_iph); 332 top_iph = ipv6_hdr(skb); 333 334 top_iph->version = 6; 335 336 memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, 337 sizeof(top_iph->flow_lbl)); 338 top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family); 339 340 if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) 341 dsfield = 0; 342 else 343 dsfield = XFRM_MODE_SKB_CB(skb)->tos; 344 dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos); 345 if (x->props.flags & XFRM_STATE_NOECN) 346 dsfield &= ~INET_ECN_MASK; 347 ipv6_change_dsfield(top_iph, 0, dsfield); 348 top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst)); 349 top_iph->saddr = *(struct in6_addr *)&x->props.saddr; 350 top_iph->daddr = *(struct in6_addr *)&x->id.daddr; 351 return 0; 352 } 353 354 static int xfrm6_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb) 355 { 356 struct ipv6hdr *top_iph; 357 struct ip_beet_phdr *ph; 358 int optlen, hdr_len; 359 360 hdr_len = 0; 361 optlen = XFRM_MODE_SKB_CB(skb)->optlen; 362 if (unlikely(optlen)) 363 hdr_len += IPV4_BEET_PHMAXLEN - (optlen & 4); 364 365 skb_set_network_header(skb, -x->props.header_len - hdr_len); 366 if (x->sel.family != AF_INET6) 367 skb->network_header += IPV4_BEET_PHMAXLEN; 368 skb->mac_header = skb->network_header + 369 offsetof(struct ipv6hdr, nexthdr); 370 skb->transport_header = skb->network_header + sizeof(*top_iph); 371 ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdr_len); 372 373 xfrm6_beet_make_header(skb); 374 375 top_iph = ipv6_hdr(skb); 376 if (unlikely(optlen)) { 377 if (WARN_ON(optlen < 0)) 378 return -EINVAL; 379 380 ph->padlen = 4 - (optlen & 4); 381 ph->hdrlen = optlen / 8; 382 ph->nexthdr = top_iph->nexthdr; 383 if (ph->padlen) 384 memset(ph + 1, IPOPT_NOP, ph->padlen); 385 386 top_iph->nexthdr = IPPROTO_BEETPH; 387 } 388 389 top_iph->saddr = *(struct in6_addr *)&x->props.saddr; 390 top_iph->daddr = *(struct in6_addr *)&x->id.daddr; 391 return 0; 392 } 393 #endif 394 395 /* Add encapsulation header. 396 * 397 * On exit, the transport header will be set to the start of the 398 * encapsulation header to be filled in by x->type->output and the mac 399 * header will be set to the nextheader (protocol for IPv4) field of the 400 * extension header directly preceding the encapsulation header, or in 401 * its absence, that of the top IP header. 402 * The value of the network header will always point to the top IP header 403 * while skb->data will point to the payload. 404 */ 405 static int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) 406 { 407 int err; 408 409 err = xfrm_inner_extract_output(x, skb); 410 if (err) 411 return err; 412 413 IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; 414 skb->protocol = htons(ETH_P_IP); 415 416 switch (x->outer_mode.encap) { 417 case XFRM_MODE_BEET: 418 return xfrm4_beet_encap_add(x, skb); 419 case XFRM_MODE_TUNNEL: 420 return xfrm4_tunnel_encap_add(x, skb); 421 } 422 423 WARN_ON_ONCE(1); 424 return -EOPNOTSUPP; 425 } 426 427 static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) 428 { 429 #if IS_ENABLED(CONFIG_IPV6) 430 int err; 431 432 err = xfrm_inner_extract_output(x, skb); 433 if (err) 434 return err; 435 436 skb->ignore_df = 1; 437 skb->protocol = htons(ETH_P_IPV6); 438 439 switch (x->outer_mode.encap) { 440 case XFRM_MODE_BEET: 441 return xfrm6_beet_encap_add(x, skb); 442 case XFRM_MODE_TUNNEL: 443 return xfrm6_tunnel_encap_add(x, skb); 444 default: 445 WARN_ON_ONCE(1); 446 return -EOPNOTSUPP; 447 } 448 #endif 449 WARN_ON_ONCE(1); 450 return -EAFNOSUPPORT; 451 } 452 453 static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) 454 { 455 switch (x->outer_mode.encap) { 456 case XFRM_MODE_BEET: 457 case XFRM_MODE_TUNNEL: 458 if (x->outer_mode.family == AF_INET) 459 return xfrm4_prepare_output(x, skb); 460 if (x->outer_mode.family == AF_INET6) 461 return xfrm6_prepare_output(x, skb); 462 break; 463 case XFRM_MODE_TRANSPORT: 464 if (x->outer_mode.family == AF_INET) 465 return xfrm4_transport_output(x, skb); 466 if (x->outer_mode.family == AF_INET6) 467 return xfrm6_transport_output(x, skb); 468 break; 469 case XFRM_MODE_ROUTEOPTIMIZATION: 470 if (x->outer_mode.family == AF_INET6) 471 return xfrm6_ro_output(x, skb); 472 WARN_ON_ONCE(1); 473 break; 474 default: 475 WARN_ON_ONCE(1); 476 break; 477 } 478 479 return -EOPNOTSUPP; 480 } 481 482 #if IS_ENABLED(CONFIG_NET_PKTGEN) 483 int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) 484 { 485 return xfrm_outer_mode_output(x, skb); 486 } 487 EXPORT_SYMBOL_GPL(pktgen_xfrm_outer_mode_output); 488 #endif 489 490 static int xfrm_output_one(struct sk_buff *skb, int err) 491 { 492 struct dst_entry *dst = skb_dst(skb); 493 struct xfrm_state *x = dst->xfrm; 494 struct net *net = xs_net(x); 495 496 if (err <= 0) 497 goto resume; 498 499 do { 500 err = xfrm_skb_check_space(skb); 501 if (err) { 502 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 503 goto error_nolock; 504 } 505 506 skb->mark = xfrm_smark_get(skb->mark, x); 507 508 err = xfrm_outer_mode_output(x, skb); 509 if (err) { 510 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR); 511 goto error_nolock; 512 } 513 514 spin_lock_bh(&x->lock); 515 516 if (unlikely(x->km.state != XFRM_STATE_VALID)) { 517 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID); 518 err = -EINVAL; 519 goto error; 520 } 521 522 err = xfrm_state_check_expire(x); 523 if (err) { 524 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED); 525 goto error; 526 } 527 528 err = xfrm_replay_overflow(x, skb); 529 if (err) { 530 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); 531 goto error; 532 } 533 534 x->curlft.bytes += skb->len; 535 x->curlft.packets++; 536 x->curlft.use_time = ktime_get_real_seconds(); 537 538 spin_unlock_bh(&x->lock); 539 540 skb_dst_force(skb); 541 if (!skb_dst(skb)) { 542 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 543 err = -EHOSTUNREACH; 544 goto error_nolock; 545 } 546 547 if (xfrm_offload(skb)) { 548 x->type_offload->encap(x, skb); 549 } else { 550 /* Inner headers are invalid now. */ 551 skb->encapsulation = 0; 552 553 err = x->type->output(x, skb); 554 if (err == -EINPROGRESS) 555 goto out; 556 } 557 558 resume: 559 if (err) { 560 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEPROTOERROR); 561 goto error_nolock; 562 } 563 564 dst = skb_dst_pop(skb); 565 if (!dst) { 566 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 567 err = -EHOSTUNREACH; 568 goto error_nolock; 569 } 570 skb_dst_set(skb, dst); 571 x = dst->xfrm; 572 } while (x && !(x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL)); 573 574 return 0; 575 576 error: 577 spin_unlock_bh(&x->lock); 578 error_nolock: 579 kfree_skb(skb); 580 out: 581 return err; 582 } 583 584 int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err) 585 { 586 struct net *net = xs_net(skb_dst(skb)->xfrm); 587 588 while (likely((err = xfrm_output_one(skb, err)) == 0)) { 589 nf_reset_ct(skb); 590 591 err = skb_dst(skb)->ops->local_out(net, sk, skb); 592 if (unlikely(err != 1)) 593 goto out; 594 595 if (!skb_dst(skb)->xfrm) 596 return dst_output(net, sk, skb); 597 598 err = nf_hook(skb_dst(skb)->ops->family, 599 NF_INET_POST_ROUTING, net, sk, skb, 600 NULL, skb_dst(skb)->dev, xfrm_output2); 601 if (unlikely(err != 1)) 602 goto out; 603 } 604 605 if (err == -EINPROGRESS) 606 err = 0; 607 608 out: 609 return err; 610 } 611 EXPORT_SYMBOL_GPL(xfrm_output_resume); 612 613 static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 614 { 615 return xfrm_output_resume(sk, skb, 1); 616 } 617 618 static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb) 619 { 620 struct sk_buff *segs, *nskb; 621 622 BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_GSO_CB_OFFSET); 623 BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_GSO_CB_OFFSET); 624 segs = skb_gso_segment(skb, 0); 625 kfree_skb(skb); 626 if (IS_ERR(segs)) 627 return PTR_ERR(segs); 628 if (segs == NULL) 629 return -EINVAL; 630 631 skb_list_walk_safe(segs, segs, nskb) { 632 int err; 633 634 skb_mark_not_on_list(segs); 635 err = xfrm_output2(net, sk, segs); 636 637 if (unlikely(err)) { 638 kfree_skb_list(nskb); 639 return err; 640 } 641 } 642 643 return 0; 644 } 645 646 /* For partial checksum offload, the outer header checksum is calculated 647 * by software and the inner header checksum is calculated by hardware. 648 * This requires hardware to know the inner packet type to calculate 649 * the inner header checksum. Save inner ip protocol here to avoid 650 * traversing the packet in the vendor's xmit code. 651 * For IPsec tunnel mode save the ip protocol from the IP header of the 652 * plain text packet. Otherwise If the encap type is IPIP, just save 653 * skb->inner_ipproto in any other case get the ip protocol from the IP 654 * header. 655 */ 656 static void xfrm_get_inner_ipproto(struct sk_buff *skb, struct xfrm_state *x) 657 { 658 struct xfrm_offload *xo = xfrm_offload(skb); 659 const struct ethhdr *eth; 660 661 if (!xo) 662 return; 663 664 if (x->outer_mode.encap == XFRM_MODE_TUNNEL) { 665 switch (x->outer_mode.family) { 666 case AF_INET: 667 xo->inner_ipproto = ip_hdr(skb)->protocol; 668 break; 669 case AF_INET6: 670 xo->inner_ipproto = ipv6_hdr(skb)->nexthdr; 671 break; 672 default: 673 break; 674 } 675 676 return; 677 } 678 679 /* non-Tunnel Mode */ 680 if (!skb->encapsulation) 681 return; 682 683 if (skb->inner_protocol_type == ENCAP_TYPE_IPPROTO) { 684 xo->inner_ipproto = skb->inner_ipproto; 685 return; 686 } 687 688 if (skb->inner_protocol_type != ENCAP_TYPE_ETHER) 689 return; 690 691 eth = (struct ethhdr *)skb_inner_mac_header(skb); 692 693 switch (ntohs(eth->h_proto)) { 694 case ETH_P_IPV6: 695 xo->inner_ipproto = inner_ipv6_hdr(skb)->nexthdr; 696 break; 697 case ETH_P_IP: 698 xo->inner_ipproto = inner_ip_hdr(skb)->protocol; 699 break; 700 } 701 } 702 703 int xfrm_output(struct sock *sk, struct sk_buff *skb) 704 { 705 struct net *net = dev_net(skb_dst(skb)->dev); 706 struct xfrm_state *x = skb_dst(skb)->xfrm; 707 int err; 708 709 switch (x->outer_mode.family) { 710 case AF_INET: 711 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 712 IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; 713 break; 714 case AF_INET6: 715 memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); 716 717 IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; 718 break; 719 } 720 721 secpath_reset(skb); 722 723 if (xfrm_dev_offload_ok(skb, x)) { 724 struct sec_path *sp; 725 726 sp = secpath_set(skb); 727 if (!sp) { 728 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 729 kfree_skb(skb); 730 return -ENOMEM; 731 } 732 733 sp->olen++; 734 sp->xvec[sp->len++] = x; 735 xfrm_state_hold(x); 736 737 xfrm_get_inner_ipproto(skb, x); 738 skb->encapsulation = 1; 739 740 if (skb_is_gso(skb)) { 741 if (skb->inner_protocol) 742 return xfrm_output_gso(net, sk, skb); 743 744 skb_shinfo(skb)->gso_type |= SKB_GSO_ESP; 745 goto out; 746 } 747 748 if (x->xso.dev && x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM) 749 goto out; 750 } else { 751 if (skb_is_gso(skb)) 752 return xfrm_output_gso(net, sk, skb); 753 } 754 755 if (skb->ip_summed == CHECKSUM_PARTIAL) { 756 err = skb_checksum_help(skb); 757 if (err) { 758 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 759 kfree_skb(skb); 760 return err; 761 } 762 } 763 764 out: 765 return xfrm_output2(net, sk, skb); 766 } 767 EXPORT_SYMBOL_GPL(xfrm_output); 768 769 static int xfrm4_tunnel_check_size(struct sk_buff *skb) 770 { 771 int mtu, ret = 0; 772 773 if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) 774 goto out; 775 776 if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df) 777 goto out; 778 779 mtu = dst_mtu(skb_dst(skb)); 780 if ((!skb_is_gso(skb) && skb->len > mtu) || 781 (skb_is_gso(skb) && 782 !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) { 783 skb->protocol = htons(ETH_P_IP); 784 785 if (skb->sk) 786 xfrm_local_error(skb, mtu); 787 else 788 icmp_send(skb, ICMP_DEST_UNREACH, 789 ICMP_FRAG_NEEDED, htonl(mtu)); 790 ret = -EMSGSIZE; 791 } 792 out: 793 return ret; 794 } 795 796 static int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb) 797 { 798 int err; 799 800 if (x->outer_mode.encap == XFRM_MODE_BEET && 801 ip_is_fragment(ip_hdr(skb))) { 802 net_warn_ratelimited("BEET mode doesn't support inner IPv4 fragments\n"); 803 return -EAFNOSUPPORT; 804 } 805 806 err = xfrm4_tunnel_check_size(skb); 807 if (err) 808 return err; 809 810 XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol; 811 812 xfrm4_extract_header(skb); 813 return 0; 814 } 815 816 #if IS_ENABLED(CONFIG_IPV6) 817 static int xfrm6_tunnel_check_size(struct sk_buff *skb) 818 { 819 int mtu, ret = 0; 820 struct dst_entry *dst = skb_dst(skb); 821 822 if (skb->ignore_df) 823 goto out; 824 825 mtu = dst_mtu(dst); 826 if (mtu < IPV6_MIN_MTU) 827 mtu = IPV6_MIN_MTU; 828 829 if ((!skb_is_gso(skb) && skb->len > mtu) || 830 (skb_is_gso(skb) && 831 !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) { 832 skb->dev = dst->dev; 833 skb->protocol = htons(ETH_P_IPV6); 834 835 if (xfrm6_local_dontfrag(skb->sk)) 836 ipv6_stub->xfrm6_local_rxpmtu(skb, mtu); 837 else if (skb->sk) 838 xfrm_local_error(skb, mtu); 839 else 840 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 841 ret = -EMSGSIZE; 842 } 843 out: 844 return ret; 845 } 846 #endif 847 848 static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) 849 { 850 #if IS_ENABLED(CONFIG_IPV6) 851 int err; 852 853 err = xfrm6_tunnel_check_size(skb); 854 if (err) 855 return err; 856 857 XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr; 858 859 xfrm6_extract_header(skb); 860 return 0; 861 #else 862 WARN_ON_ONCE(1); 863 return -EAFNOSUPPORT; 864 #endif 865 } 866 867 static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) 868 { 869 const struct xfrm_mode *inner_mode; 870 871 if (x->sel.family == AF_UNSPEC) 872 inner_mode = xfrm_ip2inner_mode(x, 873 xfrm_af2proto(skb_dst(skb)->ops->family)); 874 else 875 inner_mode = &x->inner_mode; 876 877 if (inner_mode == NULL) 878 return -EAFNOSUPPORT; 879 880 switch (inner_mode->family) { 881 case AF_INET: 882 return xfrm4_extract_output(x, skb); 883 case AF_INET6: 884 return xfrm6_extract_output(x, skb); 885 } 886 887 return -EAFNOSUPPORT; 888 } 889 890 void xfrm_local_error(struct sk_buff *skb, int mtu) 891 { 892 unsigned int proto; 893 struct xfrm_state_afinfo *afinfo; 894 895 if (skb->protocol == htons(ETH_P_IP)) 896 proto = AF_INET; 897 else if (skb->protocol == htons(ETH_P_IPV6) && 898 skb->sk->sk_family == AF_INET6) 899 proto = AF_INET6; 900 else 901 return; 902 903 afinfo = xfrm_state_get_afinfo(proto); 904 if (afinfo) { 905 afinfo->local_error(skb, mtu); 906 rcu_read_unlock(); 907 } 908 } 909 EXPORT_SYMBOL_GPL(xfrm_local_error); 910