1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * xfrm_output.c - Common IPsec encapsulation code. 4 * 5 * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> 6 */ 7 8 #include <linux/errno.h> 9 #include <linux/module.h> 10 #include <linux/netdevice.h> 11 #include <linux/netfilter.h> 12 #include <linux/skbuff.h> 13 #include <linux/slab.h> 14 #include <linux/spinlock.h> 15 #include <net/dst.h> 16 #include <net/icmp.h> 17 #include <net/inet_ecn.h> 18 #include <net/xfrm.h> 19 20 #if IS_ENABLED(CONFIG_IPV6) 21 #include <net/ip6_route.h> 22 #include <net/ipv6_stubs.h> 23 #endif 24 25 #include "xfrm_inout.h" 26 27 static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb); 28 static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); 29 30 static int xfrm_skb_check_space(struct sk_buff *skb) 31 { 32 struct dst_entry *dst = skb_dst(skb); 33 int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev) 34 - skb_headroom(skb); 35 int ntail = dst->dev->needed_tailroom - skb_tailroom(skb); 36 37 if (nhead <= 0) { 38 if (ntail <= 0) 39 return 0; 40 nhead = 0; 41 } else if (ntail < 0) 42 ntail = 0; 43 44 return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); 45 } 46 47 /* Children define the path of the packet through the 48 * Linux networking. Thus, destinations are stackable. 49 */ 50 51 static struct dst_entry *skb_dst_pop(struct sk_buff *skb) 52 { 53 struct dst_entry *child = dst_clone(xfrm_dst_child(skb_dst(skb))); 54 55 skb_dst_drop(skb); 56 return child; 57 } 58 59 /* Add encapsulation header. 60 * 61 * The IP header will be moved forward to make space for the encapsulation 62 * header. 63 */ 64 static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) 65 { 66 struct iphdr *iph = ip_hdr(skb); 67 int ihl = iph->ihl * 4; 68 69 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 70 71 skb_set_network_header(skb, -x->props.header_len); 72 skb->mac_header = skb->network_header + 73 offsetof(struct iphdr, protocol); 74 skb->transport_header = skb->network_header + ihl; 75 __skb_pull(skb, ihl); 76 memmove(skb_network_header(skb), iph, ihl); 77 return 0; 78 } 79 80 #if IS_ENABLED(CONFIG_IPV6_MIP6) 81 static int mip6_rthdr_offset(struct sk_buff *skb, u8 **nexthdr, int type) 82 { 83 const unsigned char *nh = skb_network_header(skb); 84 unsigned int offset = sizeof(struct ipv6hdr); 85 unsigned int packet_len; 86 int found_rhdr = 0; 87 88 packet_len = skb_tail_pointer(skb) - nh; 89 *nexthdr = &ipv6_hdr(skb)->nexthdr; 90 91 while (offset <= packet_len) { 92 struct ipv6_opt_hdr *exthdr; 93 94 switch (**nexthdr) { 95 case NEXTHDR_HOP: 96 break; 97 case NEXTHDR_ROUTING: 98 if (type == IPPROTO_ROUTING && offset + 3 <= packet_len) { 99 struct ipv6_rt_hdr *rt; 100 101 rt = (struct ipv6_rt_hdr *)(nh + offset); 102 if (rt->type != 0) 103 return offset; 104 } 105 found_rhdr = 1; 106 break; 107 case NEXTHDR_DEST: 108 /* HAO MUST NOT appear more than once. 109 * XXX: It is better to try to find by the end of 110 * XXX: packet if HAO exists. 111 */ 112 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) { 113 net_dbg_ratelimited("mip6: hao exists already, override\n"); 114 return offset; 115 } 116 117 if (found_rhdr) 118 return offset; 119 120 break; 121 default: 122 return offset; 123 } 124 125 if (offset + sizeof(struct ipv6_opt_hdr) > packet_len) 126 return -EINVAL; 127 128 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + 129 offset); 130 offset += ipv6_optlen(exthdr); 131 if (offset > IPV6_MAXPLEN) 132 return -EINVAL; 133 *nexthdr = &exthdr->nexthdr; 134 } 135 136 return -EINVAL; 137 } 138 #endif 139 140 #if IS_ENABLED(CONFIG_IPV6) 141 static int xfrm6_hdr_offset(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr) 142 { 143 switch (x->type->proto) { 144 #if IS_ENABLED(CONFIG_IPV6_MIP6) 145 case IPPROTO_DSTOPTS: 146 case IPPROTO_ROUTING: 147 return mip6_rthdr_offset(skb, prevhdr, x->type->proto); 148 #endif 149 default: 150 break; 151 } 152 153 return ip6_find_1stfragopt(skb, prevhdr); 154 } 155 #endif 156 157 /* Add encapsulation header. 158 * 159 * The IP header and mutable extension headers will be moved forward to make 160 * space for the encapsulation header. 161 */ 162 static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) 163 { 164 #if IS_ENABLED(CONFIG_IPV6) 165 struct ipv6hdr *iph; 166 u8 *prevhdr; 167 int hdr_len; 168 169 iph = ipv6_hdr(skb); 170 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 171 172 hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr); 173 if (hdr_len < 0) 174 return hdr_len; 175 skb_set_mac_header(skb, 176 (prevhdr - x->props.header_len) - skb->data); 177 skb_set_network_header(skb, -x->props.header_len); 178 skb->transport_header = skb->network_header + hdr_len; 179 __skb_pull(skb, hdr_len); 180 memmove(ipv6_hdr(skb), iph, hdr_len); 181 return 0; 182 #else 183 WARN_ON_ONCE(1); 184 return -EAFNOSUPPORT; 185 #endif 186 } 187 188 /* Add route optimization header space. 189 * 190 * The IP header and mutable extension headers will be moved forward to make 191 * space for the route optimization header. 192 */ 193 static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) 194 { 195 #if IS_ENABLED(CONFIG_IPV6) 196 struct ipv6hdr *iph; 197 u8 *prevhdr; 198 int hdr_len; 199 200 iph = ipv6_hdr(skb); 201 202 hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr); 203 if (hdr_len < 0) 204 return hdr_len; 205 skb_set_mac_header(skb, 206 (prevhdr - x->props.header_len) - skb->data); 207 skb_set_network_header(skb, -x->props.header_len); 208 skb->transport_header = skb->network_header + hdr_len; 209 __skb_pull(skb, hdr_len); 210 memmove(ipv6_hdr(skb), iph, hdr_len); 211 212 x->lastused = ktime_get_real_seconds(); 213 214 return 0; 215 #else 216 WARN_ON_ONCE(1); 217 return -EAFNOSUPPORT; 218 #endif 219 } 220 221 /* Add encapsulation header. 222 * 223 * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. 224 */ 225 static int xfrm4_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb) 226 { 227 struct ip_beet_phdr *ph; 228 struct iphdr *top_iph; 229 int hdrlen, optlen; 230 231 hdrlen = 0; 232 optlen = XFRM_MODE_SKB_CB(skb)->optlen; 233 if (unlikely(optlen)) 234 hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); 235 236 skb_set_network_header(skb, -x->props.header_len - hdrlen + 237 (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph))); 238 if (x->sel.family != AF_INET6) 239 skb->network_header += IPV4_BEET_PHMAXLEN; 240 skb->mac_header = skb->network_header + 241 offsetof(struct iphdr, protocol); 242 skb->transport_header = skb->network_header + sizeof(*top_iph); 243 244 xfrm4_beet_make_header(skb); 245 246 ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen); 247 248 top_iph = ip_hdr(skb); 249 250 if (unlikely(optlen)) { 251 if (WARN_ON(optlen < 0)) 252 return -EINVAL; 253 254 ph->padlen = 4 - (optlen & 4); 255 ph->hdrlen = optlen / 8; 256 ph->nexthdr = top_iph->protocol; 257 if (ph->padlen) 258 memset(ph + 1, IPOPT_NOP, ph->padlen); 259 260 top_iph->protocol = IPPROTO_BEETPH; 261 top_iph->ihl = sizeof(struct iphdr) / 4; 262 } 263 264 top_iph->saddr = x->props.saddr.a4; 265 top_iph->daddr = x->id.daddr.a4; 266 267 return 0; 268 } 269 270 /* Add encapsulation header. 271 * 272 * The top IP header will be constructed per RFC 2401. 273 */ 274 static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) 275 { 276 bool small_ipv6 = (skb->protocol == htons(ETH_P_IPV6)) && (skb->len <= IPV6_MIN_MTU); 277 struct dst_entry *dst = skb_dst(skb); 278 struct iphdr *top_iph; 279 int flags; 280 281 skb_set_inner_network_header(skb, skb_network_offset(skb)); 282 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 283 284 skb_set_network_header(skb, -x->props.header_len); 285 skb->mac_header = skb->network_header + 286 offsetof(struct iphdr, protocol); 287 skb->transport_header = skb->network_header + sizeof(*top_iph); 288 top_iph = ip_hdr(skb); 289 290 top_iph->ihl = 5; 291 top_iph->version = 4; 292 293 top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family); 294 295 /* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */ 296 if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) 297 top_iph->tos = 0; 298 else 299 top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos; 300 top_iph->tos = INET_ECN_encapsulate(top_iph->tos, 301 XFRM_MODE_SKB_CB(skb)->tos); 302 303 flags = x->props.flags; 304 if (flags & XFRM_STATE_NOECN) 305 IP_ECN_clear(top_iph); 306 307 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) || small_ipv6 ? 308 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); 309 310 top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst)); 311 312 top_iph->saddr = x->props.saddr.a4; 313 top_iph->daddr = x->id.daddr.a4; 314 ip_select_ident(dev_net(dst->dev), skb, NULL); 315 316 return 0; 317 } 318 319 #if IS_ENABLED(CONFIG_IPV6) 320 static int xfrm6_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) 321 { 322 struct dst_entry *dst = skb_dst(skb); 323 struct ipv6hdr *top_iph; 324 int dsfield; 325 326 skb_set_inner_network_header(skb, skb_network_offset(skb)); 327 skb_set_inner_transport_header(skb, skb_transport_offset(skb)); 328 329 skb_set_network_header(skb, -x->props.header_len); 330 skb->mac_header = skb->network_header + 331 offsetof(struct ipv6hdr, nexthdr); 332 skb->transport_header = skb->network_header + sizeof(*top_iph); 333 top_iph = ipv6_hdr(skb); 334 335 top_iph->version = 6; 336 337 memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, 338 sizeof(top_iph->flow_lbl)); 339 top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family); 340 341 if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) 342 dsfield = 0; 343 else 344 dsfield = XFRM_MODE_SKB_CB(skb)->tos; 345 dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos); 346 if (x->props.flags & XFRM_STATE_NOECN) 347 dsfield &= ~INET_ECN_MASK; 348 ipv6_change_dsfield(top_iph, 0, dsfield); 349 top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst)); 350 top_iph->saddr = *(struct in6_addr *)&x->props.saddr; 351 top_iph->daddr = *(struct in6_addr *)&x->id.daddr; 352 return 0; 353 } 354 355 static int xfrm6_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb) 356 { 357 struct ipv6hdr *top_iph; 358 struct ip_beet_phdr *ph; 359 int optlen, hdr_len; 360 361 hdr_len = 0; 362 optlen = XFRM_MODE_SKB_CB(skb)->optlen; 363 if (unlikely(optlen)) 364 hdr_len += IPV4_BEET_PHMAXLEN - (optlen & 4); 365 366 skb_set_network_header(skb, -x->props.header_len - hdr_len); 367 if (x->sel.family != AF_INET6) 368 skb->network_header += IPV4_BEET_PHMAXLEN; 369 skb->mac_header = skb->network_header + 370 offsetof(struct ipv6hdr, nexthdr); 371 skb->transport_header = skb->network_header + sizeof(*top_iph); 372 ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdr_len); 373 374 xfrm6_beet_make_header(skb); 375 376 top_iph = ipv6_hdr(skb); 377 if (unlikely(optlen)) { 378 if (WARN_ON(optlen < 0)) 379 return -EINVAL; 380 381 ph->padlen = 4 - (optlen & 4); 382 ph->hdrlen = optlen / 8; 383 ph->nexthdr = top_iph->nexthdr; 384 if (ph->padlen) 385 memset(ph + 1, IPOPT_NOP, ph->padlen); 386 387 top_iph->nexthdr = IPPROTO_BEETPH; 388 } 389 390 top_iph->saddr = *(struct in6_addr *)&x->props.saddr; 391 top_iph->daddr = *(struct in6_addr *)&x->id.daddr; 392 return 0; 393 } 394 #endif 395 396 /* Add encapsulation header. 397 * 398 * On exit, the transport header will be set to the start of the 399 * encapsulation header to be filled in by x->type->output and the mac 400 * header will be set to the nextheader (protocol for IPv4) field of the 401 * extension header directly preceding the encapsulation header, or in 402 * its absence, that of the top IP header. 403 * The value of the network header will always point to the top IP header 404 * while skb->data will point to the payload. 405 */ 406 static int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) 407 { 408 int err; 409 410 err = xfrm_inner_extract_output(x, skb); 411 if (err) 412 return err; 413 414 IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; 415 skb->protocol = htons(ETH_P_IP); 416 417 switch (x->outer_mode.encap) { 418 case XFRM_MODE_BEET: 419 return xfrm4_beet_encap_add(x, skb); 420 case XFRM_MODE_TUNNEL: 421 return xfrm4_tunnel_encap_add(x, skb); 422 } 423 424 WARN_ON_ONCE(1); 425 return -EOPNOTSUPP; 426 } 427 428 static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) 429 { 430 #if IS_ENABLED(CONFIG_IPV6) 431 int err; 432 433 err = xfrm_inner_extract_output(x, skb); 434 if (err) 435 return err; 436 437 skb->ignore_df = 1; 438 skb->protocol = htons(ETH_P_IPV6); 439 440 switch (x->outer_mode.encap) { 441 case XFRM_MODE_BEET: 442 return xfrm6_beet_encap_add(x, skb); 443 case XFRM_MODE_TUNNEL: 444 return xfrm6_tunnel_encap_add(x, skb); 445 default: 446 WARN_ON_ONCE(1); 447 return -EOPNOTSUPP; 448 } 449 #endif 450 WARN_ON_ONCE(1); 451 return -EAFNOSUPPORT; 452 } 453 454 static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) 455 { 456 switch (x->outer_mode.encap) { 457 case XFRM_MODE_BEET: 458 case XFRM_MODE_TUNNEL: 459 if (x->outer_mode.family == AF_INET) 460 return xfrm4_prepare_output(x, skb); 461 if (x->outer_mode.family == AF_INET6) 462 return xfrm6_prepare_output(x, skb); 463 break; 464 case XFRM_MODE_TRANSPORT: 465 if (x->outer_mode.family == AF_INET) 466 return xfrm4_transport_output(x, skb); 467 if (x->outer_mode.family == AF_INET6) 468 return xfrm6_transport_output(x, skb); 469 break; 470 case XFRM_MODE_ROUTEOPTIMIZATION: 471 if (x->outer_mode.family == AF_INET6) 472 return xfrm6_ro_output(x, skb); 473 WARN_ON_ONCE(1); 474 break; 475 default: 476 WARN_ON_ONCE(1); 477 break; 478 } 479 480 return -EOPNOTSUPP; 481 } 482 483 #if IS_ENABLED(CONFIG_NET_PKTGEN) 484 int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) 485 { 486 return xfrm_outer_mode_output(x, skb); 487 } 488 EXPORT_SYMBOL_GPL(pktgen_xfrm_outer_mode_output); 489 #endif 490 491 static int xfrm_output_one(struct sk_buff *skb, int err) 492 { 493 struct dst_entry *dst = skb_dst(skb); 494 struct xfrm_state *x = dst->xfrm; 495 struct net *net = xs_net(x); 496 497 if (err <= 0) 498 goto resume; 499 500 do { 501 err = xfrm_skb_check_space(skb); 502 if (err) { 503 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 504 goto error_nolock; 505 } 506 507 skb->mark = xfrm_smark_get(skb->mark, x); 508 509 err = xfrm_outer_mode_output(x, skb); 510 if (err) { 511 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR); 512 goto error_nolock; 513 } 514 515 spin_lock_bh(&x->lock); 516 517 if (unlikely(x->km.state != XFRM_STATE_VALID)) { 518 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID); 519 err = -EINVAL; 520 goto error; 521 } 522 523 err = xfrm_state_check_expire(x); 524 if (err) { 525 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED); 526 goto error; 527 } 528 529 err = xfrm_replay_overflow(x, skb); 530 if (err) { 531 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); 532 goto error; 533 } 534 535 x->curlft.bytes += skb->len; 536 x->curlft.packets++; 537 x->curlft.use_time = ktime_get_real_seconds(); 538 539 spin_unlock_bh(&x->lock); 540 541 skb_dst_force(skb); 542 if (!skb_dst(skb)) { 543 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 544 err = -EHOSTUNREACH; 545 goto error_nolock; 546 } 547 548 if (xfrm_offload(skb)) { 549 x->type_offload->encap(x, skb); 550 } else { 551 /* Inner headers are invalid now. */ 552 skb->encapsulation = 0; 553 554 err = x->type->output(x, skb); 555 if (err == -EINPROGRESS) 556 goto out; 557 } 558 559 resume: 560 if (err) { 561 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEPROTOERROR); 562 goto error_nolock; 563 } 564 565 dst = skb_dst_pop(skb); 566 if (!dst) { 567 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 568 err = -EHOSTUNREACH; 569 goto error_nolock; 570 } 571 skb_dst_set(skb, dst); 572 x = dst->xfrm; 573 } while (x && !(x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL)); 574 575 return 0; 576 577 error: 578 spin_unlock_bh(&x->lock); 579 error_nolock: 580 kfree_skb(skb); 581 out: 582 return err; 583 } 584 585 int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err) 586 { 587 struct net *net = xs_net(skb_dst(skb)->xfrm); 588 589 while (likely((err = xfrm_output_one(skb, err)) == 0)) { 590 nf_reset_ct(skb); 591 592 err = skb_dst(skb)->ops->local_out(net, sk, skb); 593 if (unlikely(err != 1)) 594 goto out; 595 596 if (!skb_dst(skb)->xfrm) 597 return dst_output(net, sk, skb); 598 599 err = nf_hook(skb_dst(skb)->ops->family, 600 NF_INET_POST_ROUTING, net, sk, skb, 601 NULL, skb_dst(skb)->dev, xfrm_output2); 602 if (unlikely(err != 1)) 603 goto out; 604 } 605 606 if (err == -EINPROGRESS) 607 err = 0; 608 609 out: 610 return err; 611 } 612 EXPORT_SYMBOL_GPL(xfrm_output_resume); 613 614 static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 615 { 616 return xfrm_output_resume(sk, skb, 1); 617 } 618 619 static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb) 620 { 621 struct sk_buff *segs, *nskb; 622 623 BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_GSO_CB_OFFSET); 624 BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_GSO_CB_OFFSET); 625 segs = skb_gso_segment(skb, 0); 626 kfree_skb(skb); 627 if (IS_ERR(segs)) 628 return PTR_ERR(segs); 629 if (segs == NULL) 630 return -EINVAL; 631 632 skb_list_walk_safe(segs, segs, nskb) { 633 int err; 634 635 skb_mark_not_on_list(segs); 636 err = xfrm_output2(net, sk, segs); 637 638 if (unlikely(err)) { 639 kfree_skb_list(nskb); 640 return err; 641 } 642 } 643 644 return 0; 645 } 646 647 /* For partial checksum offload, the outer header checksum is calculated 648 * by software and the inner header checksum is calculated by hardware. 649 * This requires hardware to know the inner packet type to calculate 650 * the inner header checksum. Save inner ip protocol here to avoid 651 * traversing the packet in the vendor's xmit code. 652 * For IPsec tunnel mode save the ip protocol from the IP header of the 653 * plain text packet. Otherwise If the encap type is IPIP, just save 654 * skb->inner_ipproto in any other case get the ip protocol from the IP 655 * header. 656 */ 657 static void xfrm_get_inner_ipproto(struct sk_buff *skb, struct xfrm_state *x) 658 { 659 struct xfrm_offload *xo = xfrm_offload(skb); 660 const struct ethhdr *eth; 661 662 if (!xo) 663 return; 664 665 if (x->outer_mode.encap == XFRM_MODE_TUNNEL) { 666 switch (x->outer_mode.family) { 667 case AF_INET: 668 xo->inner_ipproto = ip_hdr(skb)->protocol; 669 break; 670 case AF_INET6: 671 xo->inner_ipproto = ipv6_hdr(skb)->nexthdr; 672 break; 673 default: 674 break; 675 } 676 677 return; 678 } 679 680 /* non-Tunnel Mode */ 681 if (!skb->encapsulation) 682 return; 683 684 if (skb->inner_protocol_type == ENCAP_TYPE_IPPROTO) { 685 xo->inner_ipproto = skb->inner_ipproto; 686 return; 687 } 688 689 if (skb->inner_protocol_type != ENCAP_TYPE_ETHER) 690 return; 691 692 eth = (struct ethhdr *)skb_inner_mac_header(skb); 693 694 switch (ntohs(eth->h_proto)) { 695 case ETH_P_IPV6: 696 xo->inner_ipproto = inner_ipv6_hdr(skb)->nexthdr; 697 break; 698 case ETH_P_IP: 699 xo->inner_ipproto = inner_ip_hdr(skb)->protocol; 700 break; 701 } 702 } 703 704 int xfrm_output(struct sock *sk, struct sk_buff *skb) 705 { 706 struct net *net = dev_net(skb_dst(skb)->dev); 707 struct xfrm_state *x = skb_dst(skb)->xfrm; 708 int err; 709 710 switch (x->outer_mode.family) { 711 case AF_INET: 712 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 713 IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; 714 break; 715 case AF_INET6: 716 memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); 717 718 IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; 719 break; 720 } 721 722 secpath_reset(skb); 723 724 if (xfrm_dev_offload_ok(skb, x)) { 725 struct sec_path *sp; 726 727 sp = secpath_set(skb); 728 if (!sp) { 729 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 730 kfree_skb(skb); 731 return -ENOMEM; 732 } 733 734 sp->olen++; 735 sp->xvec[sp->len++] = x; 736 xfrm_state_hold(x); 737 738 xfrm_get_inner_ipproto(skb, x); 739 skb->encapsulation = 1; 740 741 if (skb_is_gso(skb)) { 742 if (skb->inner_protocol) 743 return xfrm_output_gso(net, sk, skb); 744 745 skb_shinfo(skb)->gso_type |= SKB_GSO_ESP; 746 goto out; 747 } 748 749 if (x->xso.dev && x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM) 750 goto out; 751 } else { 752 if (skb_is_gso(skb)) 753 return xfrm_output_gso(net, sk, skb); 754 } 755 756 if (skb->ip_summed == CHECKSUM_PARTIAL) { 757 err = skb_checksum_help(skb); 758 if (err) { 759 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 760 kfree_skb(skb); 761 return err; 762 } 763 } 764 765 out: 766 return xfrm_output2(net, sk, skb); 767 } 768 EXPORT_SYMBOL_GPL(xfrm_output); 769 770 static int xfrm4_tunnel_check_size(struct sk_buff *skb) 771 { 772 int mtu, ret = 0; 773 774 if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) 775 goto out; 776 777 if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df) 778 goto out; 779 780 mtu = dst_mtu(skb_dst(skb)); 781 if ((!skb_is_gso(skb) && skb->len > mtu) || 782 (skb_is_gso(skb) && 783 !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) { 784 skb->protocol = htons(ETH_P_IP); 785 786 if (skb->sk) 787 xfrm_local_error(skb, mtu); 788 else 789 icmp_send(skb, ICMP_DEST_UNREACH, 790 ICMP_FRAG_NEEDED, htonl(mtu)); 791 ret = -EMSGSIZE; 792 } 793 out: 794 return ret; 795 } 796 797 static int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb) 798 { 799 int err; 800 801 if (x->outer_mode.encap == XFRM_MODE_BEET && 802 ip_is_fragment(ip_hdr(skb))) { 803 net_warn_ratelimited("BEET mode doesn't support inner IPv4 fragments\n"); 804 return -EAFNOSUPPORT; 805 } 806 807 err = xfrm4_tunnel_check_size(skb); 808 if (err) 809 return err; 810 811 XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol; 812 813 xfrm4_extract_header(skb); 814 return 0; 815 } 816 817 #if IS_ENABLED(CONFIG_IPV6) 818 static int xfrm6_tunnel_check_size(struct sk_buff *skb) 819 { 820 int mtu, ret = 0; 821 struct dst_entry *dst = skb_dst(skb); 822 823 if (skb->ignore_df) 824 goto out; 825 826 mtu = dst_mtu(dst); 827 if (mtu < IPV6_MIN_MTU) 828 mtu = IPV6_MIN_MTU; 829 830 if ((!skb_is_gso(skb) && skb->len > mtu) || 831 (skb_is_gso(skb) && 832 !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) { 833 skb->dev = dst->dev; 834 skb->protocol = htons(ETH_P_IPV6); 835 836 if (xfrm6_local_dontfrag(skb->sk)) 837 ipv6_stub->xfrm6_local_rxpmtu(skb, mtu); 838 else if (skb->sk) 839 xfrm_local_error(skb, mtu); 840 else 841 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 842 ret = -EMSGSIZE; 843 } 844 out: 845 return ret; 846 } 847 #endif 848 849 static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) 850 { 851 #if IS_ENABLED(CONFIG_IPV6) 852 int err; 853 854 err = xfrm6_tunnel_check_size(skb); 855 if (err) 856 return err; 857 858 XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr; 859 860 xfrm6_extract_header(skb); 861 return 0; 862 #else 863 WARN_ON_ONCE(1); 864 return -EAFNOSUPPORT; 865 #endif 866 } 867 868 static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) 869 { 870 const struct xfrm_mode *inner_mode; 871 872 if (x->sel.family == AF_UNSPEC) 873 inner_mode = xfrm_ip2inner_mode(x, 874 xfrm_af2proto(skb_dst(skb)->ops->family)); 875 else 876 inner_mode = &x->inner_mode; 877 878 if (inner_mode == NULL) 879 return -EAFNOSUPPORT; 880 881 switch (inner_mode->family) { 882 case AF_INET: 883 return xfrm4_extract_output(x, skb); 884 case AF_INET6: 885 return xfrm6_extract_output(x, skb); 886 } 887 888 return -EAFNOSUPPORT; 889 } 890 891 void xfrm_local_error(struct sk_buff *skb, int mtu) 892 { 893 unsigned int proto; 894 struct xfrm_state_afinfo *afinfo; 895 896 if (skb->protocol == htons(ETH_P_IP)) 897 proto = AF_INET; 898 else if (skb->protocol == htons(ETH_P_IPV6) && 899 skb->sk->sk_family == AF_INET6) 900 proto = AF_INET6; 901 else 902 return; 903 904 afinfo = xfrm_state_get_afinfo(proto); 905 if (afinfo) { 906 afinfo->local_error(skb, mtu); 907 rcu_read_unlock(); 908 } 909 } 910 EXPORT_SYMBOL_GPL(xfrm_local_error); 911