1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * IPv6 output functions 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on linux/net/ipv4/ip_output.c 10 * 11 * Changes: 12 * A.N.Kuznetsov : airthmetics in fragmentation. 13 * extension headers are implemented. 14 * route changes now work. 15 * ip6_forward does not confuse sniffers. 16 * etc. 17 * 18 * H. von Brand : Added missing #include <linux/string.h> 19 * Imran Patel : frag id should be in NBO 20 * Kazunori MIYAZAWA @USAGI 21 * : add ip6_append_data and related functions 22 * for datagram xmit 23 */ 24 25 #include <linux/errno.h> 26 #include <linux/kernel.h> 27 #include <linux/string.h> 28 #include <linux/socket.h> 29 #include <linux/net.h> 30 #include <linux/netdevice.h> 31 #include <linux/if_arp.h> 32 #include <linux/in6.h> 33 #include <linux/tcp.h> 34 #include <linux/route.h> 35 #include <linux/module.h> 36 #include <linux/slab.h> 37 38 #include <linux/bpf-cgroup.h> 39 #include <linux/netfilter.h> 40 #include <linux/netfilter_ipv6.h> 41 42 #include <net/sock.h> 43 #include <net/snmp.h> 44 45 #include <net/ipv6.h> 46 #include <net/ndisc.h> 47 #include <net/protocol.h> 48 #include <net/ip6_route.h> 49 #include <net/addrconf.h> 50 #include <net/rawv6.h> 51 #include <net/icmp.h> 52 #include <net/xfrm.h> 53 #include <net/checksum.h> 54 #include <linux/mroute6.h> 55 #include <net/l3mdev.h> 56 #include <net/lwtunnel.h> 57 #include <net/ip_tunnels.h> 58 59 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 60 { 61 struct dst_entry *dst = skb_dst(skb); 62 struct net_device *dev = dst->dev; 63 struct inet6_dev *idev = ip6_dst_idev(dst); 64 unsigned int hh_len = LL_RESERVED_SPACE(dev); 65 const struct in6_addr *daddr, *nexthop; 66 struct ipv6hdr *hdr; 67 struct neighbour *neigh; 68 int ret; 69 70 /* Be paranoid, rather than too clever. */ 71 if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) { 72 skb = skb_expand_head(skb, hh_len); 73 if (!skb) { 74 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 75 return -ENOMEM; 76 } 77 } 78 79 hdr = ipv6_hdr(skb); 80 daddr = &hdr->daddr; 81 if (ipv6_addr_is_multicast(daddr)) { 82 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && 83 ((mroute6_is_socket(net, skb) && 84 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 85 ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) { 86 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 87 88 /* Do not check for IFF_ALLMULTI; multicast routing 89 is not supported in any case. 90 */ 91 if (newskb) 92 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 93 net, sk, newskb, NULL, newskb->dev, 94 dev_loopback_xmit); 95 96 if (hdr->hop_limit == 0) { 97 IP6_INC_STATS(net, idev, 98 IPSTATS_MIB_OUTDISCARDS); 99 kfree_skb(skb); 100 return 0; 101 } 102 } 103 104 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len); 105 if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL && 106 !(dev->flags & IFF_LOOPBACK)) { 107 kfree_skb(skb); 108 return 0; 109 } 110 } 111 112 if (lwtunnel_xmit_redirect(dst->lwtstate)) { 113 int res = lwtunnel_xmit(skb); 114 115 if (res < 0 || res == LWTUNNEL_XMIT_DONE) 116 return res; 117 } 118 119 rcu_read_lock_bh(); 120 nexthop = rt6_nexthop((struct rt6_info *)dst, daddr); 121 neigh = __ipv6_neigh_lookup_noref(dev, nexthop); 122 123 if (unlikely(IS_ERR_OR_NULL(neigh))) { 124 if (unlikely(!neigh)) 125 neigh = __neigh_create(&nd_tbl, nexthop, dev, false); 126 if (IS_ERR(neigh)) { 127 rcu_read_unlock_bh(); 128 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); 129 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); 130 return -EINVAL; 131 } 132 } 133 sock_confirm_neigh(skb, neigh); 134 ret = neigh_output(neigh, skb, false); 135 rcu_read_unlock_bh(); 136 return ret; 137 } 138 139 static int 140 ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, 141 struct sk_buff *skb, unsigned int mtu) 142 { 143 struct sk_buff *segs, *nskb; 144 netdev_features_t features; 145 int ret = 0; 146 147 /* Please see corresponding comment in ip_finish_output_gso 148 * describing the cases where GSO segment length exceeds the 149 * egress MTU. 150 */ 151 features = netif_skb_features(skb); 152 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); 153 if (IS_ERR_OR_NULL(segs)) { 154 kfree_skb(skb); 155 return -ENOMEM; 156 } 157 158 consume_skb(skb); 159 160 skb_list_walk_safe(segs, segs, nskb) { 161 int err; 162 163 skb_mark_not_on_list(segs); 164 err = ip6_fragment(net, sk, segs, ip6_finish_output2); 165 if (err && ret == 0) 166 ret = err; 167 } 168 169 return ret; 170 } 171 172 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 173 { 174 unsigned int mtu; 175 176 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 177 /* Policy lookup after SNAT yielded a new policy */ 178 if (skb_dst(skb)->xfrm) { 179 IP6CB(skb)->flags |= IP6SKB_REROUTED; 180 return dst_output(net, sk, skb); 181 } 182 #endif 183 184 mtu = ip6_skb_dst_mtu(skb); 185 if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu)) 186 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); 187 188 if ((skb->len > mtu && !skb_is_gso(skb)) || 189 dst_allfrag(skb_dst(skb)) || 190 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) 191 return ip6_fragment(net, sk, skb, ip6_finish_output2); 192 else 193 return ip6_finish_output2(net, sk, skb); 194 } 195 196 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 197 { 198 int ret; 199 200 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); 201 switch (ret) { 202 case NET_XMIT_SUCCESS: 203 case NET_XMIT_CN: 204 return __ip6_finish_output(net, sk, skb) ? : ret; 205 default: 206 kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS); 207 return ret; 208 } 209 } 210 211 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 212 { 213 struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; 214 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 215 216 skb->protocol = htons(ETH_P_IPV6); 217 skb->dev = dev; 218 219 if (unlikely(idev->cnf.disable_ipv6)) { 220 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 221 kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED); 222 return 0; 223 } 224 225 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, 226 net, sk, skb, indev, dev, 227 ip6_finish_output, 228 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 229 } 230 EXPORT_SYMBOL(ip6_output); 231 232 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) 233 { 234 if (!np->autoflowlabel_set) 235 return ip6_default_np_autolabel(net); 236 else 237 return np->autoflowlabel; 238 } 239 240 /* 241 * xmit an sk_buff (used by TCP, SCTP and DCCP) 242 * Note : socket lock is not held for SYNACK packets, but might be modified 243 * by calls to skb_set_owner_w() and ipv6_local_error(), 244 * which are using proper atomic operations or spinlocks. 245 */ 246 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, 247 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority) 248 { 249 struct net *net = sock_net(sk); 250 const struct ipv6_pinfo *np = inet6_sk(sk); 251 struct in6_addr *first_hop = &fl6->daddr; 252 struct dst_entry *dst = skb_dst(skb); 253 struct net_device *dev = dst->dev; 254 struct inet6_dev *idev = ip6_dst_idev(dst); 255 unsigned int head_room; 256 struct ipv6hdr *hdr; 257 u8 proto = fl6->flowi6_proto; 258 int seg_len = skb->len; 259 int hlimit = -1; 260 u32 mtu; 261 262 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev); 263 if (opt) 264 head_room += opt->opt_nflen + opt->opt_flen; 265 266 if (unlikely(head_room > skb_headroom(skb))) { 267 skb = skb_expand_head(skb, head_room); 268 if (!skb) { 269 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 270 return -ENOBUFS; 271 } 272 } 273 274 if (opt) { 275 seg_len += opt->opt_nflen + opt->opt_flen; 276 277 if (opt->opt_flen) 278 ipv6_push_frag_opts(skb, opt, &proto); 279 280 if (opt->opt_nflen) 281 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, 282 &fl6->saddr); 283 } 284 285 skb_push(skb, sizeof(struct ipv6hdr)); 286 skb_reset_network_header(skb); 287 hdr = ipv6_hdr(skb); 288 289 /* 290 * Fill in the IPv6 header 291 */ 292 if (np) 293 hlimit = np->hop_limit; 294 if (hlimit < 0) 295 hlimit = ip6_dst_hoplimit(dst); 296 297 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, 298 ip6_autoflowlabel(net, np), fl6)); 299 300 hdr->payload_len = htons(seg_len); 301 hdr->nexthdr = proto; 302 hdr->hop_limit = hlimit; 303 304 hdr->saddr = fl6->saddr; 305 hdr->daddr = *first_hop; 306 307 skb->protocol = htons(ETH_P_IPV6); 308 skb->priority = priority; 309 skb->mark = mark; 310 311 mtu = dst_mtu(dst); 312 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { 313 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); 314 315 /* if egress device is enslaved to an L3 master device pass the 316 * skb to its handler for processing 317 */ 318 skb = l3mdev_ip6_out((struct sock *)sk, skb); 319 if (unlikely(!skb)) 320 return 0; 321 322 /* hooks should never assume socket lock is held. 323 * we promote our socket to non const 324 */ 325 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, 326 net, (struct sock *)sk, skb, NULL, dev, 327 dst_output); 328 } 329 330 skb->dev = dev; 331 /* ipv6_local_error() does not require socket lock, 332 * we promote our socket to non const 333 */ 334 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); 335 336 IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS); 337 kfree_skb(skb); 338 return -EMSGSIZE; 339 } 340 EXPORT_SYMBOL(ip6_xmit); 341 342 static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 343 { 344 struct ip6_ra_chain *ra; 345 struct sock *last = NULL; 346 347 read_lock(&ip6_ra_lock); 348 for (ra = ip6_ra_chain; ra; ra = ra->next) { 349 struct sock *sk = ra->sk; 350 if (sk && ra->sel == sel && 351 (!sk->sk_bound_dev_if || 352 sk->sk_bound_dev_if == skb->dev->ifindex)) { 353 struct ipv6_pinfo *np = inet6_sk(sk); 354 355 if (np && np->rtalert_isolate && 356 !net_eq(sock_net(sk), dev_net(skb->dev))) { 357 continue; 358 } 359 if (last) { 360 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 361 if (skb2) 362 rawv6_rcv(last, skb2); 363 } 364 last = sk; 365 } 366 } 367 368 if (last) { 369 rawv6_rcv(last, skb); 370 read_unlock(&ip6_ra_lock); 371 return 1; 372 } 373 read_unlock(&ip6_ra_lock); 374 return 0; 375 } 376 377 static int ip6_forward_proxy_check(struct sk_buff *skb) 378 { 379 struct ipv6hdr *hdr = ipv6_hdr(skb); 380 u8 nexthdr = hdr->nexthdr; 381 __be16 frag_off; 382 int offset; 383 384 if (ipv6_ext_hdr(nexthdr)) { 385 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); 386 if (offset < 0) 387 return 0; 388 } else 389 offset = sizeof(struct ipv6hdr); 390 391 if (nexthdr == IPPROTO_ICMPV6) { 392 struct icmp6hdr *icmp6; 393 394 if (!pskb_may_pull(skb, (skb_network_header(skb) + 395 offset + 1 - skb->data))) 396 return 0; 397 398 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 399 400 switch (icmp6->icmp6_type) { 401 case NDISC_ROUTER_SOLICITATION: 402 case NDISC_ROUTER_ADVERTISEMENT: 403 case NDISC_NEIGHBOUR_SOLICITATION: 404 case NDISC_NEIGHBOUR_ADVERTISEMENT: 405 case NDISC_REDIRECT: 406 /* For reaction involving unicast neighbor discovery 407 * message destined to the proxied address, pass it to 408 * input function. 409 */ 410 return 1; 411 default: 412 break; 413 } 414 } 415 416 /* 417 * The proxying router can't forward traffic sent to a link-local 418 * address, so signal the sender and discard the packet. This 419 * behavior is clarified by the MIPv6 specification. 420 */ 421 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 422 dst_link_failure(skb); 423 return -1; 424 } 425 426 return 0; 427 } 428 429 static inline int ip6_forward_finish(struct net *net, struct sock *sk, 430 struct sk_buff *skb) 431 { 432 struct dst_entry *dst = skb_dst(skb); 433 434 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 435 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); 436 437 #ifdef CONFIG_NET_SWITCHDEV 438 if (skb->offload_l3_fwd_mark) { 439 consume_skb(skb); 440 return 0; 441 } 442 #endif 443 444 skb_clear_tstamp(skb); 445 return dst_output(net, sk, skb); 446 } 447 448 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) 449 { 450 if (skb->len <= mtu) 451 return false; 452 453 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ 454 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) 455 return true; 456 457 if (skb->ignore_df) 458 return false; 459 460 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) 461 return false; 462 463 return true; 464 } 465 466 int ip6_forward(struct sk_buff *skb) 467 { 468 struct dst_entry *dst = skb_dst(skb); 469 struct ipv6hdr *hdr = ipv6_hdr(skb); 470 struct inet6_skb_parm *opt = IP6CB(skb); 471 struct net *net = dev_net(dst->dev); 472 struct inet6_dev *idev; 473 SKB_DR(reason); 474 u32 mtu; 475 476 idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); 477 if (net->ipv6.devconf_all->forwarding == 0) 478 goto error; 479 480 if (skb->pkt_type != PACKET_HOST) 481 goto drop; 482 483 if (unlikely(skb->sk)) 484 goto drop; 485 486 if (skb_warn_if_lro(skb)) 487 goto drop; 488 489 if (!net->ipv6.devconf_all->disable_policy && 490 (!idev || !idev->cnf.disable_policy) && 491 !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 492 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 493 goto drop; 494 } 495 496 skb_forward_csum(skb); 497 498 /* 499 * We DO NOT make any processing on 500 * RA packets, pushing them to user level AS IS 501 * without ane WARRANTY that application will be able 502 * to interpret them. The reason is that we 503 * cannot make anything clever here. 504 * 505 * We are not end-node, so that if packet contains 506 * AH/ESP, we cannot make anything. 507 * Defragmentation also would be mistake, RA packets 508 * cannot be fragmented, because there is no warranty 509 * that different fragments will go along one path. --ANK 510 */ 511 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { 512 if (ip6_call_ra_chain(skb, ntohs(opt->ra))) 513 return 0; 514 } 515 516 /* 517 * check and decrement ttl 518 */ 519 if (hdr->hop_limit <= 1) { 520 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 521 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); 522 523 kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); 524 return -ETIMEDOUT; 525 } 526 527 /* XXX: idev->cnf.proxy_ndp? */ 528 if (net->ipv6.devconf_all->proxy_ndp && 529 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 530 int proxied = ip6_forward_proxy_check(skb); 531 if (proxied > 0) { 532 hdr->hop_limit--; 533 return ip6_input(skb); 534 } else if (proxied < 0) { 535 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 536 goto drop; 537 } 538 } 539 540 if (!xfrm6_route_forward(skb)) { 541 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 542 SKB_DR_SET(reason, XFRM_POLICY); 543 goto drop; 544 } 545 dst = skb_dst(skb); 546 547 /* IPv6 specs say nothing about it, but it is clear that we cannot 548 send redirects to source routed frames. 549 We don't send redirects to frames decapsulated from IPsec. 550 */ 551 if (IP6CB(skb)->iif == dst->dev->ifindex && 552 opt->srcrt == 0 && !skb_sec_path(skb)) { 553 struct in6_addr *target = NULL; 554 struct inet_peer *peer; 555 struct rt6_info *rt; 556 557 /* 558 * incoming and outgoing devices are the same 559 * send a redirect. 560 */ 561 562 rt = (struct rt6_info *) dst; 563 if (rt->rt6i_flags & RTF_GATEWAY) 564 target = &rt->rt6i_gateway; 565 else 566 target = &hdr->daddr; 567 568 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1); 569 570 /* Limit redirects both by destination (here) 571 and by source (inside ndisc_send_redirect) 572 */ 573 if (inet_peer_xrlim_allow(peer, 1*HZ)) 574 ndisc_send_redirect(skb, target); 575 if (peer) 576 inet_putpeer(peer); 577 } else { 578 int addrtype = ipv6_addr_type(&hdr->saddr); 579 580 /* This check is security critical. */ 581 if (addrtype == IPV6_ADDR_ANY || 582 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 583 goto error; 584 if (addrtype & IPV6_ADDR_LINKLOCAL) { 585 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 586 ICMPV6_NOT_NEIGHBOUR, 0); 587 goto error; 588 } 589 } 590 591 mtu = ip6_dst_mtu_maybe_forward(dst, true); 592 if (mtu < IPV6_MIN_MTU) 593 mtu = IPV6_MIN_MTU; 594 595 if (ip6_pkt_too_big(skb, mtu)) { 596 /* Again, force OUTPUT device used as source address */ 597 skb->dev = dst->dev; 598 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 599 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS); 600 __IP6_INC_STATS(net, ip6_dst_idev(dst), 601 IPSTATS_MIB_FRAGFAILS); 602 kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); 603 return -EMSGSIZE; 604 } 605 606 if (skb_cow(skb, dst->dev->hard_header_len)) { 607 __IP6_INC_STATS(net, ip6_dst_idev(dst), 608 IPSTATS_MIB_OUTDISCARDS); 609 goto drop; 610 } 611 612 hdr = ipv6_hdr(skb); 613 614 /* Mangling hops number delayed to point after skb COW */ 615 616 hdr->hop_limit--; 617 618 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 619 net, NULL, skb, skb->dev, dst->dev, 620 ip6_forward_finish); 621 622 error: 623 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); 624 SKB_DR_SET(reason, IP_INADDRERRORS); 625 drop: 626 kfree_skb_reason(skb, reason); 627 return -EINVAL; 628 } 629 630 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 631 { 632 to->pkt_type = from->pkt_type; 633 to->priority = from->priority; 634 to->protocol = from->protocol; 635 skb_dst_drop(to); 636 skb_dst_set(to, dst_clone(skb_dst(from))); 637 to->dev = from->dev; 638 to->mark = from->mark; 639 640 skb_copy_hash(to, from); 641 642 #ifdef CONFIG_NET_SCHED 643 to->tc_index = from->tc_index; 644 #endif 645 nf_copy(to, from); 646 skb_ext_copy(to, from); 647 skb_copy_secmark(to, from); 648 } 649 650 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr, 651 u8 nexthdr, __be32 frag_id, 652 struct ip6_fraglist_iter *iter) 653 { 654 unsigned int first_len; 655 struct frag_hdr *fh; 656 657 /* BUILD HEADER */ 658 *prevhdr = NEXTHDR_FRAGMENT; 659 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 660 if (!iter->tmp_hdr) 661 return -ENOMEM; 662 663 iter->frag = skb_shinfo(skb)->frag_list; 664 skb_frag_list_init(skb); 665 666 iter->offset = 0; 667 iter->hlen = hlen; 668 iter->frag_id = frag_id; 669 iter->nexthdr = nexthdr; 670 671 __skb_pull(skb, hlen); 672 fh = __skb_push(skb, sizeof(struct frag_hdr)); 673 __skb_push(skb, hlen); 674 skb_reset_network_header(skb); 675 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen); 676 677 fh->nexthdr = nexthdr; 678 fh->reserved = 0; 679 fh->frag_off = htons(IP6_MF); 680 fh->identification = frag_id; 681 682 first_len = skb_pagelen(skb); 683 skb->data_len = first_len - skb_headlen(skb); 684 skb->len = first_len; 685 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); 686 687 return 0; 688 } 689 EXPORT_SYMBOL(ip6_fraglist_init); 690 691 void ip6_fraglist_prepare(struct sk_buff *skb, 692 struct ip6_fraglist_iter *iter) 693 { 694 struct sk_buff *frag = iter->frag; 695 unsigned int hlen = iter->hlen; 696 struct frag_hdr *fh; 697 698 frag->ip_summed = CHECKSUM_NONE; 699 skb_reset_transport_header(frag); 700 fh = __skb_push(frag, sizeof(struct frag_hdr)); 701 __skb_push(frag, hlen); 702 skb_reset_network_header(frag); 703 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen); 704 iter->offset += skb->len - hlen - sizeof(struct frag_hdr); 705 fh->nexthdr = iter->nexthdr; 706 fh->reserved = 0; 707 fh->frag_off = htons(iter->offset); 708 if (frag->next) 709 fh->frag_off |= htons(IP6_MF); 710 fh->identification = iter->frag_id; 711 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 712 ip6_copy_metadata(frag, skb); 713 } 714 EXPORT_SYMBOL(ip6_fraglist_prepare); 715 716 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu, 717 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr, 718 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state) 719 { 720 state->prevhdr = prevhdr; 721 state->nexthdr = nexthdr; 722 state->frag_id = frag_id; 723 724 state->hlen = hlen; 725 state->mtu = mtu; 726 727 state->left = skb->len - hlen; /* Space per frame */ 728 state->ptr = hlen; /* Where to start from */ 729 730 state->hroom = hdr_room; 731 state->troom = needed_tailroom; 732 733 state->offset = 0; 734 } 735 EXPORT_SYMBOL(ip6_frag_init); 736 737 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state) 738 { 739 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset; 740 struct sk_buff *frag; 741 struct frag_hdr *fh; 742 unsigned int len; 743 744 len = state->left; 745 /* IF: it doesn't fit, use 'mtu' - the data space left */ 746 if (len > state->mtu) 747 len = state->mtu; 748 /* IF: we are not sending up to and including the packet end 749 then align the next start on an eight byte boundary */ 750 if (len < state->left) 751 len &= ~7; 752 753 /* Allocate buffer */ 754 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) + 755 state->hroom + state->troom, GFP_ATOMIC); 756 if (!frag) 757 return ERR_PTR(-ENOMEM); 758 759 /* 760 * Set up data on packet 761 */ 762 763 ip6_copy_metadata(frag, skb); 764 skb_reserve(frag, state->hroom); 765 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr)); 766 skb_reset_network_header(frag); 767 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen); 768 frag->transport_header = (frag->network_header + state->hlen + 769 sizeof(struct frag_hdr)); 770 771 /* 772 * Charge the memory for the fragment to any owner 773 * it might possess 774 */ 775 if (skb->sk) 776 skb_set_owner_w(frag, skb->sk); 777 778 /* 779 * Copy the packet header into the new buffer. 780 */ 781 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen); 782 783 fragnexthdr_offset = skb_network_header(frag); 784 fragnexthdr_offset += prevhdr - skb_network_header(skb); 785 *fragnexthdr_offset = NEXTHDR_FRAGMENT; 786 787 /* 788 * Build fragment header. 789 */ 790 fh->nexthdr = state->nexthdr; 791 fh->reserved = 0; 792 fh->identification = state->frag_id; 793 794 /* 795 * Copy a block of the IP datagram. 796 */ 797 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag), 798 len)); 799 state->left -= len; 800 801 fh->frag_off = htons(state->offset); 802 if (state->left > 0) 803 fh->frag_off |= htons(IP6_MF); 804 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 805 806 state->ptr += len; 807 state->offset += len; 808 809 return frag; 810 } 811 EXPORT_SYMBOL(ip6_frag_next); 812 813 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 814 int (*output)(struct net *, struct sock *, struct sk_buff *)) 815 { 816 struct sk_buff *frag; 817 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 818 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? 819 inet6_sk(skb->sk) : NULL; 820 bool mono_delivery_time = skb->mono_delivery_time; 821 struct ip6_frag_state state; 822 unsigned int mtu, hlen, nexthdr_offset; 823 ktime_t tstamp = skb->tstamp; 824 int hroom, err = 0; 825 __be32 frag_id; 826 u8 *prevhdr, nexthdr = 0; 827 828 err = ip6_find_1stfragopt(skb, &prevhdr); 829 if (err < 0) 830 goto fail; 831 hlen = err; 832 nexthdr = *prevhdr; 833 nexthdr_offset = prevhdr - skb_network_header(skb); 834 835 mtu = ip6_skb_dst_mtu(skb); 836 837 /* We must not fragment if the socket is set to force MTU discovery 838 * or if the skb it not generated by a local socket. 839 */ 840 if (unlikely(!skb->ignore_df && skb->len > mtu)) 841 goto fail_toobig; 842 843 if (IP6CB(skb)->frag_max_size) { 844 if (IP6CB(skb)->frag_max_size > mtu) 845 goto fail_toobig; 846 847 /* don't send fragments larger than what we received */ 848 mtu = IP6CB(skb)->frag_max_size; 849 if (mtu < IPV6_MIN_MTU) 850 mtu = IPV6_MIN_MTU; 851 } 852 853 if (np && np->frag_size < mtu) { 854 if (np->frag_size) 855 mtu = np->frag_size; 856 } 857 if (mtu < hlen + sizeof(struct frag_hdr) + 8) 858 goto fail_toobig; 859 mtu -= hlen + sizeof(struct frag_hdr); 860 861 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, 862 &ipv6_hdr(skb)->saddr); 863 864 if (skb->ip_summed == CHECKSUM_PARTIAL && 865 (err = skb_checksum_help(skb))) 866 goto fail; 867 868 prevhdr = skb_network_header(skb) + nexthdr_offset; 869 hroom = LL_RESERVED_SPACE(rt->dst.dev); 870 if (skb_has_frag_list(skb)) { 871 unsigned int first_len = skb_pagelen(skb); 872 struct ip6_fraglist_iter iter; 873 struct sk_buff *frag2; 874 875 if (first_len - hlen > mtu || 876 ((first_len - hlen) & 7) || 877 skb_cloned(skb) || 878 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) 879 goto slow_path; 880 881 skb_walk_frags(skb, frag) { 882 /* Correct geometry. */ 883 if (frag->len > mtu || 884 ((frag->len & 7) && frag->next) || 885 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr))) 886 goto slow_path_clean; 887 888 /* Partially cloned skb? */ 889 if (skb_shared(frag)) 890 goto slow_path_clean; 891 892 BUG_ON(frag->sk); 893 if (skb->sk) { 894 frag->sk = skb->sk; 895 frag->destructor = sock_wfree; 896 } 897 skb->truesize -= frag->truesize; 898 } 899 900 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id, 901 &iter); 902 if (err < 0) 903 goto fail; 904 905 for (;;) { 906 /* Prepare header of the next frame, 907 * before previous one went down. */ 908 if (iter.frag) 909 ip6_fraglist_prepare(skb, &iter); 910 911 skb_set_delivery_time(skb, tstamp, mono_delivery_time); 912 err = output(net, sk, skb); 913 if (!err) 914 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 915 IPSTATS_MIB_FRAGCREATES); 916 917 if (err || !iter.frag) 918 break; 919 920 skb = ip6_fraglist_next(&iter); 921 } 922 923 kfree(iter.tmp_hdr); 924 925 if (err == 0) { 926 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 927 IPSTATS_MIB_FRAGOKS); 928 return 0; 929 } 930 931 kfree_skb_list(iter.frag); 932 933 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 934 IPSTATS_MIB_FRAGFAILS); 935 return err; 936 937 slow_path_clean: 938 skb_walk_frags(skb, frag2) { 939 if (frag2 == frag) 940 break; 941 frag2->sk = NULL; 942 frag2->destructor = NULL; 943 skb->truesize += frag2->truesize; 944 } 945 } 946 947 slow_path: 948 /* 949 * Fragment the datagram. 950 */ 951 952 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom, 953 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id, 954 &state); 955 956 /* 957 * Keep copying data until we run out. 958 */ 959 960 while (state.left > 0) { 961 frag = ip6_frag_next(skb, &state); 962 if (IS_ERR(frag)) { 963 err = PTR_ERR(frag); 964 goto fail; 965 } 966 967 /* 968 * Put this fragment into the sending queue. 969 */ 970 skb_set_delivery_time(frag, tstamp, mono_delivery_time); 971 err = output(net, sk, frag); 972 if (err) 973 goto fail; 974 975 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 976 IPSTATS_MIB_FRAGCREATES); 977 } 978 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 979 IPSTATS_MIB_FRAGOKS); 980 consume_skb(skb); 981 return err; 982 983 fail_toobig: 984 if (skb->sk && dst_allfrag(skb_dst(skb))) 985 sk_gso_disable(skb->sk); 986 987 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 988 err = -EMSGSIZE; 989 990 fail: 991 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 992 IPSTATS_MIB_FRAGFAILS); 993 kfree_skb(skb); 994 return err; 995 } 996 997 static inline int ip6_rt_check(const struct rt6key *rt_key, 998 const struct in6_addr *fl_addr, 999 const struct in6_addr *addr_cache) 1000 { 1001 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 1002 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache)); 1003 } 1004 1005 static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 1006 struct dst_entry *dst, 1007 const struct flowi6 *fl6) 1008 { 1009 struct ipv6_pinfo *np = inet6_sk(sk); 1010 struct rt6_info *rt; 1011 1012 if (!dst) 1013 goto out; 1014 1015 if (dst->ops->family != AF_INET6) { 1016 dst_release(dst); 1017 return NULL; 1018 } 1019 1020 rt = (struct rt6_info *)dst; 1021 /* Yes, checking route validity in not connected 1022 * case is not very simple. Take into account, 1023 * that we do not support routing by source, TOS, 1024 * and MSG_DONTROUTE --ANK (980726) 1025 * 1026 * 1. ip6_rt_check(): If route was host route, 1027 * check that cached destination is current. 1028 * If it is network route, we still may 1029 * check its validity using saved pointer 1030 * to the last used address: daddr_cache. 1031 * We do not want to save whole address now, 1032 * (because main consumer of this service 1033 * is tcp, which has not this problem), 1034 * so that the last trick works only on connected 1035 * sockets. 1036 * 2. oif also should be the same. 1037 */ 1038 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || 1039 #ifdef CONFIG_IPV6_SUBTREES 1040 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || 1041 #endif 1042 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { 1043 dst_release(dst); 1044 dst = NULL; 1045 } 1046 1047 out: 1048 return dst; 1049 } 1050 1051 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, 1052 struct dst_entry **dst, struct flowi6 *fl6) 1053 { 1054 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 1055 struct neighbour *n; 1056 struct rt6_info *rt; 1057 #endif 1058 int err; 1059 int flags = 0; 1060 1061 /* The correct way to handle this would be to do 1062 * ip6_route_get_saddr, and then ip6_route_output; however, 1063 * the route-specific preferred source forces the 1064 * ip6_route_output call _before_ ip6_route_get_saddr. 1065 * 1066 * In source specific routing (no src=any default route), 1067 * ip6_route_output will fail given src=any saddr, though, so 1068 * that's why we try it again later. 1069 */ 1070 if (ipv6_addr_any(&fl6->saddr)) { 1071 struct fib6_info *from; 1072 struct rt6_info *rt; 1073 1074 *dst = ip6_route_output(net, sk, fl6); 1075 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; 1076 1077 rcu_read_lock(); 1078 from = rt ? rcu_dereference(rt->from) : NULL; 1079 err = ip6_route_get_saddr(net, from, &fl6->daddr, 1080 sk ? inet6_sk(sk)->srcprefs : 0, 1081 &fl6->saddr); 1082 rcu_read_unlock(); 1083 1084 if (err) 1085 goto out_err_release; 1086 1087 /* If we had an erroneous initial result, pretend it 1088 * never existed and let the SA-enabled version take 1089 * over. 1090 */ 1091 if ((*dst)->error) { 1092 dst_release(*dst); 1093 *dst = NULL; 1094 } 1095 1096 if (fl6->flowi6_oif) 1097 flags |= RT6_LOOKUP_F_IFACE; 1098 } 1099 1100 if (!*dst) 1101 *dst = ip6_route_output_flags(net, sk, fl6, flags); 1102 1103 err = (*dst)->error; 1104 if (err) 1105 goto out_err_release; 1106 1107 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 1108 /* 1109 * Here if the dst entry we've looked up 1110 * has a neighbour entry that is in the INCOMPLETE 1111 * state and the src address from the flow is 1112 * marked as OPTIMISTIC, we release the found 1113 * dst entry and replace it instead with the 1114 * dst entry of the nexthop router 1115 */ 1116 rt = (struct rt6_info *) *dst; 1117 rcu_read_lock_bh(); 1118 n = __ipv6_neigh_lookup_noref(rt->dst.dev, 1119 rt6_nexthop(rt, &fl6->daddr)); 1120 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; 1121 rcu_read_unlock_bh(); 1122 1123 if (err) { 1124 struct inet6_ifaddr *ifp; 1125 struct flowi6 fl_gw6; 1126 int redirect; 1127 1128 ifp = ipv6_get_ifaddr(net, &fl6->saddr, 1129 (*dst)->dev, 1); 1130 1131 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 1132 if (ifp) 1133 in6_ifa_put(ifp); 1134 1135 if (redirect) { 1136 /* 1137 * We need to get the dst entry for the 1138 * default router instead 1139 */ 1140 dst_release(*dst); 1141 memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); 1142 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); 1143 *dst = ip6_route_output(net, sk, &fl_gw6); 1144 err = (*dst)->error; 1145 if (err) 1146 goto out_err_release; 1147 } 1148 } 1149 #endif 1150 if (ipv6_addr_v4mapped(&fl6->saddr) && 1151 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) { 1152 err = -EAFNOSUPPORT; 1153 goto out_err_release; 1154 } 1155 1156 return 0; 1157 1158 out_err_release: 1159 dst_release(*dst); 1160 *dst = NULL; 1161 1162 if (err == -ENETUNREACH) 1163 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); 1164 return err; 1165 } 1166 1167 /** 1168 * ip6_dst_lookup - perform route lookup on flow 1169 * @net: Network namespace to perform lookup in 1170 * @sk: socket which provides route info 1171 * @dst: pointer to dst_entry * for result 1172 * @fl6: flow to lookup 1173 * 1174 * This function performs a route lookup on the given flow. 1175 * 1176 * It returns zero on success, or a standard errno code on error. 1177 */ 1178 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, 1179 struct flowi6 *fl6) 1180 { 1181 *dst = NULL; 1182 return ip6_dst_lookup_tail(net, sk, dst, fl6); 1183 } 1184 EXPORT_SYMBOL_GPL(ip6_dst_lookup); 1185 1186 /** 1187 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec 1188 * @net: Network namespace to perform lookup in 1189 * @sk: socket which provides route info 1190 * @fl6: flow to lookup 1191 * @final_dst: final destination address for ipsec lookup 1192 * 1193 * This function performs a route lookup on the given flow. 1194 * 1195 * It returns a valid dst pointer on success, or a pointer encoded 1196 * error code. 1197 */ 1198 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6, 1199 const struct in6_addr *final_dst) 1200 { 1201 struct dst_entry *dst = NULL; 1202 int err; 1203 1204 err = ip6_dst_lookup_tail(net, sk, &dst, fl6); 1205 if (err) 1206 return ERR_PTR(err); 1207 if (final_dst) 1208 fl6->daddr = *final_dst; 1209 1210 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0); 1211 } 1212 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); 1213 1214 /** 1215 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow 1216 * @sk: socket which provides the dst cache and route info 1217 * @fl6: flow to lookup 1218 * @final_dst: final destination address for ipsec lookup 1219 * @connected: whether @sk is connected or not 1220 * 1221 * This function performs a route lookup on the given flow with the 1222 * possibility of using the cached route in the socket if it is valid. 1223 * It will take the socket dst lock when operating on the dst cache. 1224 * As a result, this function can only be used in process context. 1225 * 1226 * In addition, for a connected socket, cache the dst in the socket 1227 * if the current cache is not valid. 1228 * 1229 * It returns a valid dst pointer on success, or a pointer encoded 1230 * error code. 1231 */ 1232 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 1233 const struct in6_addr *final_dst, 1234 bool connected) 1235 { 1236 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1237 1238 dst = ip6_sk_dst_check(sk, dst, fl6); 1239 if (dst) 1240 return dst; 1241 1242 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst); 1243 if (connected && !IS_ERR(dst)) 1244 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6); 1245 1246 return dst; 1247 } 1248 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); 1249 1250 /** 1251 * ip6_dst_lookup_tunnel - perform route lookup on tunnel 1252 * @skb: Packet for which lookup is done 1253 * @dev: Tunnel device 1254 * @net: Network namespace of tunnel device 1255 * @sock: Socket which provides route info 1256 * @saddr: Memory to store the src ip address 1257 * @info: Tunnel information 1258 * @protocol: IP protocol 1259 * @use_cache: Flag to enable cache usage 1260 * This function performs a route lookup on a tunnel 1261 * 1262 * It returns a valid dst pointer and stores src address to be used in 1263 * tunnel in param saddr on success, else a pointer encoded error code. 1264 */ 1265 1266 struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, 1267 struct net_device *dev, 1268 struct net *net, 1269 struct socket *sock, 1270 struct in6_addr *saddr, 1271 const struct ip_tunnel_info *info, 1272 u8 protocol, 1273 bool use_cache) 1274 { 1275 struct dst_entry *dst = NULL; 1276 #ifdef CONFIG_DST_CACHE 1277 struct dst_cache *dst_cache; 1278 #endif 1279 struct flowi6 fl6; 1280 __u8 prio; 1281 1282 #ifdef CONFIG_DST_CACHE 1283 dst_cache = (struct dst_cache *)&info->dst_cache; 1284 if (use_cache) { 1285 dst = dst_cache_get_ip6(dst_cache, saddr); 1286 if (dst) 1287 return dst; 1288 } 1289 #endif 1290 memset(&fl6, 0, sizeof(fl6)); 1291 fl6.flowi6_mark = skb->mark; 1292 fl6.flowi6_proto = protocol; 1293 fl6.daddr = info->key.u.ipv6.dst; 1294 fl6.saddr = info->key.u.ipv6.src; 1295 prio = info->key.tos; 1296 fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio), 1297 info->key.label); 1298 1299 dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, 1300 NULL); 1301 if (IS_ERR(dst)) { 1302 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); 1303 return ERR_PTR(-ENETUNREACH); 1304 } 1305 if (dst->dev == dev) { /* is this necessary? */ 1306 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); 1307 dst_release(dst); 1308 return ERR_PTR(-ELOOP); 1309 } 1310 #ifdef CONFIG_DST_CACHE 1311 if (use_cache) 1312 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); 1313 #endif 1314 *saddr = fl6.saddr; 1315 return dst; 1316 } 1317 EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel); 1318 1319 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1320 gfp_t gfp) 1321 { 1322 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1323 } 1324 1325 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 1326 gfp_t gfp) 1327 { 1328 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1329 } 1330 1331 static void ip6_append_data_mtu(unsigned int *mtu, 1332 int *maxfraglen, 1333 unsigned int fragheaderlen, 1334 struct sk_buff *skb, 1335 struct rt6_info *rt, 1336 unsigned int orig_mtu) 1337 { 1338 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { 1339 if (!skb) { 1340 /* first fragment, reserve header_len */ 1341 *mtu = orig_mtu - rt->dst.header_len; 1342 1343 } else { 1344 /* 1345 * this fragment is not first, the headers 1346 * space is regarded as data space. 1347 */ 1348 *mtu = orig_mtu; 1349 } 1350 *maxfraglen = ((*mtu - fragheaderlen) & ~7) 1351 + fragheaderlen - sizeof(struct frag_hdr); 1352 } 1353 } 1354 1355 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, 1356 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6, 1357 struct rt6_info *rt) 1358 { 1359 struct ipv6_pinfo *np = inet6_sk(sk); 1360 unsigned int mtu; 1361 struct ipv6_txoptions *nopt, *opt = ipc6->opt; 1362 1363 /* callers pass dst together with a reference, set it first so 1364 * ip6_cork_release() can put it down even in case of an error. 1365 */ 1366 cork->base.dst = &rt->dst; 1367 1368 /* 1369 * setup for corking 1370 */ 1371 if (opt) { 1372 if (WARN_ON(v6_cork->opt)) 1373 return -EINVAL; 1374 1375 nopt = v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); 1376 if (unlikely(!nopt)) 1377 return -ENOBUFS; 1378 1379 nopt->tot_len = sizeof(*opt); 1380 nopt->opt_flen = opt->opt_flen; 1381 nopt->opt_nflen = opt->opt_nflen; 1382 1383 nopt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation); 1384 if (opt->dst0opt && !nopt->dst0opt) 1385 return -ENOBUFS; 1386 1387 nopt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation); 1388 if (opt->dst1opt && !nopt->dst1opt) 1389 return -ENOBUFS; 1390 1391 nopt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation); 1392 if (opt->hopopt && !nopt->hopopt) 1393 return -ENOBUFS; 1394 1395 nopt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation); 1396 if (opt->srcrt && !nopt->srcrt) 1397 return -ENOBUFS; 1398 1399 /* need source address above miyazawa*/ 1400 } 1401 v6_cork->hop_limit = ipc6->hlimit; 1402 v6_cork->tclass = ipc6->tclass; 1403 if (rt->dst.flags & DST_XFRM_TUNNEL) 1404 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1405 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); 1406 else 1407 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1408 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); 1409 if (np->frag_size < mtu) { 1410 if (np->frag_size) 1411 mtu = np->frag_size; 1412 } 1413 cork->base.fragsize = mtu; 1414 cork->base.gso_size = ipc6->gso_size; 1415 cork->base.tx_flags = 0; 1416 cork->base.mark = ipc6->sockc.mark; 1417 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); 1418 1419 if (dst_allfrag(xfrm_dst_path(&rt->dst))) 1420 cork->base.flags |= IPCORK_ALLFRAG; 1421 cork->base.length = 0; 1422 1423 cork->base.transmit_time = ipc6->sockc.transmit_time; 1424 1425 return 0; 1426 } 1427 1428 static int __ip6_append_data(struct sock *sk, 1429 struct sk_buff_head *queue, 1430 struct inet_cork_full *cork_full, 1431 struct inet6_cork *v6_cork, 1432 struct page_frag *pfrag, 1433 int getfrag(void *from, char *to, int offset, 1434 int len, int odd, struct sk_buff *skb), 1435 void *from, int length, int transhdrlen, 1436 unsigned int flags, struct ipcm6_cookie *ipc6) 1437 { 1438 struct sk_buff *skb, *skb_prev = NULL; 1439 struct inet_cork *cork = &cork_full->base; 1440 struct flowi6 *fl6 = &cork_full->fl.u.ip6; 1441 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; 1442 struct ubuf_info *uarg = NULL; 1443 int exthdrlen = 0; 1444 int dst_exthdrlen = 0; 1445 int hh_len; 1446 int copy; 1447 int err; 1448 int offset = 0; 1449 u32 tskey = 0; 1450 struct rt6_info *rt = (struct rt6_info *)cork->dst; 1451 struct ipv6_txoptions *opt = v6_cork->opt; 1452 int csummode = CHECKSUM_NONE; 1453 unsigned int maxnonfragsize, headersize; 1454 unsigned int wmem_alloc_delta = 0; 1455 bool paged, extra_uref = false; 1456 1457 skb = skb_peek_tail(queue); 1458 if (!skb) { 1459 exthdrlen = opt ? opt->opt_flen : 0; 1460 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; 1461 } 1462 1463 paged = !!cork->gso_size; 1464 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize; 1465 orig_mtu = mtu; 1466 1467 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && 1468 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) 1469 tskey = atomic_inc_return(&sk->sk_tskey) - 1; 1470 1471 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1472 1473 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1474 (opt ? opt->opt_nflen : 0); 1475 1476 headersize = sizeof(struct ipv6hdr) + 1477 (opt ? opt->opt_flen + opt->opt_nflen : 0) + 1478 (dst_allfrag(&rt->dst) ? 1479 sizeof(struct frag_hdr) : 0) + 1480 rt->rt6i_nfheader_len; 1481 1482 if (mtu <= fragheaderlen || 1483 ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr)) 1484 goto emsgsize; 1485 1486 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - 1487 sizeof(struct frag_hdr); 1488 1489 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit 1490 * the first fragment 1491 */ 1492 if (headersize + transhdrlen > mtu) 1493 goto emsgsize; 1494 1495 if (cork->length + length > mtu - headersize && ipc6->dontfrag && 1496 (sk->sk_protocol == IPPROTO_UDP || 1497 sk->sk_protocol == IPPROTO_ICMPV6 || 1498 sk->sk_protocol == IPPROTO_RAW)) { 1499 ipv6_local_rxpmtu(sk, fl6, mtu - headersize + 1500 sizeof(struct ipv6hdr)); 1501 goto emsgsize; 1502 } 1503 1504 if (ip6_sk_ignore_df(sk)) 1505 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; 1506 else 1507 maxnonfragsize = mtu; 1508 1509 if (cork->length + length > maxnonfragsize - headersize) { 1510 emsgsize: 1511 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0); 1512 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu); 1513 return -EMSGSIZE; 1514 } 1515 1516 /* CHECKSUM_PARTIAL only with no extension headers and when 1517 * we are not going to fragment 1518 */ 1519 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && 1520 headersize == sizeof(struct ipv6hdr) && 1521 length <= mtu - headersize && 1522 (!(flags & MSG_MORE) || cork->gso_size) && 1523 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) 1524 csummode = CHECKSUM_PARTIAL; 1525 1526 if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) { 1527 uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb)); 1528 if (!uarg) 1529 return -ENOBUFS; 1530 extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ 1531 if (rt->dst.dev->features & NETIF_F_SG && 1532 csummode == CHECKSUM_PARTIAL) { 1533 paged = true; 1534 } else { 1535 uarg->zerocopy = 0; 1536 skb_zcopy_set(skb, uarg, &extra_uref); 1537 } 1538 } 1539 1540 /* 1541 * Let's try using as much space as possible. 1542 * Use MTU if total length of the message fits into the MTU. 1543 * Otherwise, we need to reserve fragment header and 1544 * fragment alignment (= 8-15 octects, in total). 1545 * 1546 * Note that we may need to "move" the data from the tail 1547 * of the buffer to the new fragment when we split 1548 * the message. 1549 * 1550 * FIXME: It may be fragmented into multiple chunks 1551 * at once if non-fragmentable extension headers 1552 * are too large. 1553 * --yoshfuji 1554 */ 1555 1556 cork->length += length; 1557 if (!skb) 1558 goto alloc_new_skb; 1559 1560 while (length > 0) { 1561 /* Check if the remaining data fits into current packet. */ 1562 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1563 if (copy < length) 1564 copy = maxfraglen - skb->len; 1565 1566 if (copy <= 0) { 1567 char *data; 1568 unsigned int datalen; 1569 unsigned int fraglen; 1570 unsigned int fraggap; 1571 unsigned int alloclen, alloc_extra; 1572 unsigned int pagedlen; 1573 alloc_new_skb: 1574 /* There's no room in the current skb */ 1575 if (skb) 1576 fraggap = skb->len - maxfraglen; 1577 else 1578 fraggap = 0; 1579 /* update mtu and maxfraglen if necessary */ 1580 if (!skb || !skb_prev) 1581 ip6_append_data_mtu(&mtu, &maxfraglen, 1582 fragheaderlen, skb, rt, 1583 orig_mtu); 1584 1585 skb_prev = skb; 1586 1587 /* 1588 * If remaining data exceeds the mtu, 1589 * we know we need more fragment(s). 1590 */ 1591 datalen = length + fraggap; 1592 1593 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1594 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; 1595 fraglen = datalen + fragheaderlen; 1596 pagedlen = 0; 1597 1598 alloc_extra = hh_len; 1599 alloc_extra += dst_exthdrlen; 1600 alloc_extra += rt->dst.trailer_len; 1601 1602 /* We just reserve space for fragment header. 1603 * Note: this may be overallocation if the message 1604 * (without MSG_MORE) fits into the MTU. 1605 */ 1606 alloc_extra += sizeof(struct frag_hdr); 1607 1608 if ((flags & MSG_MORE) && 1609 !(rt->dst.dev->features&NETIF_F_SG)) 1610 alloclen = mtu; 1611 else if (!paged && 1612 (fraglen + alloc_extra < SKB_MAX_ALLOC || 1613 !(rt->dst.dev->features & NETIF_F_SG))) 1614 alloclen = fraglen; 1615 else { 1616 alloclen = min_t(int, fraglen, MAX_HEADER); 1617 pagedlen = fraglen - alloclen; 1618 } 1619 alloclen += alloc_extra; 1620 1621 if (datalen != length + fraggap) { 1622 /* 1623 * this is not the last fragment, the trailer 1624 * space is regarded as data space. 1625 */ 1626 datalen += rt->dst.trailer_len; 1627 } 1628 1629 fraglen = datalen + fragheaderlen; 1630 1631 copy = datalen - transhdrlen - fraggap - pagedlen; 1632 if (copy < 0) { 1633 err = -EINVAL; 1634 goto error; 1635 } 1636 if (transhdrlen) { 1637 skb = sock_alloc_send_skb(sk, alloclen, 1638 (flags & MSG_DONTWAIT), &err); 1639 } else { 1640 skb = NULL; 1641 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <= 1642 2 * sk->sk_sndbuf) 1643 skb = alloc_skb(alloclen, 1644 sk->sk_allocation); 1645 if (unlikely(!skb)) 1646 err = -ENOBUFS; 1647 } 1648 if (!skb) 1649 goto error; 1650 /* 1651 * Fill in the control structures 1652 */ 1653 skb->protocol = htons(ETH_P_IPV6); 1654 skb->ip_summed = csummode; 1655 skb->csum = 0; 1656 /* reserve for fragmentation and ipsec header */ 1657 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + 1658 dst_exthdrlen); 1659 1660 /* 1661 * Find where to start putting bytes 1662 */ 1663 data = skb_put(skb, fraglen - pagedlen); 1664 skb_set_network_header(skb, exthdrlen); 1665 data += fragheaderlen; 1666 skb->transport_header = (skb->network_header + 1667 fragheaderlen); 1668 if (fraggap) { 1669 skb->csum = skb_copy_and_csum_bits( 1670 skb_prev, maxfraglen, 1671 data + transhdrlen, fraggap); 1672 skb_prev->csum = csum_sub(skb_prev->csum, 1673 skb->csum); 1674 data += fraggap; 1675 pskb_trim_unique(skb_prev, maxfraglen); 1676 } 1677 if (copy > 0 && 1678 getfrag(from, data + transhdrlen, offset, 1679 copy, fraggap, skb) < 0) { 1680 err = -EFAULT; 1681 kfree_skb(skb); 1682 goto error; 1683 } 1684 1685 offset += copy; 1686 length -= copy + transhdrlen; 1687 transhdrlen = 0; 1688 exthdrlen = 0; 1689 dst_exthdrlen = 0; 1690 1691 /* Only the initial fragment is time stamped */ 1692 skb_shinfo(skb)->tx_flags = cork->tx_flags; 1693 cork->tx_flags = 0; 1694 skb_shinfo(skb)->tskey = tskey; 1695 tskey = 0; 1696 skb_zcopy_set(skb, uarg, &extra_uref); 1697 1698 if ((flags & MSG_CONFIRM) && !skb_prev) 1699 skb_set_dst_pending_confirm(skb, 1); 1700 1701 /* 1702 * Put the packet on the pending queue 1703 */ 1704 if (!skb->destructor) { 1705 skb->destructor = sock_wfree; 1706 skb->sk = sk; 1707 wmem_alloc_delta += skb->truesize; 1708 } 1709 __skb_queue_tail(queue, skb); 1710 continue; 1711 } 1712 1713 if (copy > length) 1714 copy = length; 1715 1716 if (!(rt->dst.dev->features&NETIF_F_SG) && 1717 skb_tailroom(skb) >= copy) { 1718 unsigned int off; 1719 1720 off = skb->len; 1721 if (getfrag(from, skb_put(skb, copy), 1722 offset, copy, off, skb) < 0) { 1723 __skb_trim(skb, off); 1724 err = -EFAULT; 1725 goto error; 1726 } 1727 } else if (!uarg || !uarg->zerocopy) { 1728 int i = skb_shinfo(skb)->nr_frags; 1729 1730 err = -ENOMEM; 1731 if (!sk_page_frag_refill(sk, pfrag)) 1732 goto error; 1733 1734 if (!skb_can_coalesce(skb, i, pfrag->page, 1735 pfrag->offset)) { 1736 err = -EMSGSIZE; 1737 if (i == MAX_SKB_FRAGS) 1738 goto error; 1739 1740 __skb_fill_page_desc(skb, i, pfrag->page, 1741 pfrag->offset, 0); 1742 skb_shinfo(skb)->nr_frags = ++i; 1743 get_page(pfrag->page); 1744 } 1745 copy = min_t(int, copy, pfrag->size - pfrag->offset); 1746 if (getfrag(from, 1747 page_address(pfrag->page) + pfrag->offset, 1748 offset, copy, skb->len, skb) < 0) 1749 goto error_efault; 1750 1751 pfrag->offset += copy; 1752 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); 1753 skb->len += copy; 1754 skb->data_len += copy; 1755 skb->truesize += copy; 1756 wmem_alloc_delta += copy; 1757 } else { 1758 err = skb_zerocopy_iter_dgram(skb, from, copy); 1759 if (err < 0) 1760 goto error; 1761 } 1762 offset += copy; 1763 length -= copy; 1764 } 1765 1766 if (wmem_alloc_delta) 1767 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 1768 return 0; 1769 1770 error_efault: 1771 err = -EFAULT; 1772 error: 1773 net_zcopy_put_abort(uarg, extra_uref); 1774 cork->length -= length; 1775 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1776 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 1777 return err; 1778 } 1779 1780 int ip6_append_data(struct sock *sk, 1781 int getfrag(void *from, char *to, int offset, int len, 1782 int odd, struct sk_buff *skb), 1783 void *from, int length, int transhdrlen, 1784 struct ipcm6_cookie *ipc6, struct flowi6 *fl6, 1785 struct rt6_info *rt, unsigned int flags) 1786 { 1787 struct inet_sock *inet = inet_sk(sk); 1788 struct ipv6_pinfo *np = inet6_sk(sk); 1789 int exthdrlen; 1790 int err; 1791 1792 if (flags&MSG_PROBE) 1793 return 0; 1794 if (skb_queue_empty(&sk->sk_write_queue)) { 1795 /* 1796 * setup for corking 1797 */ 1798 dst_hold(&rt->dst); 1799 err = ip6_setup_cork(sk, &inet->cork, &np->cork, 1800 ipc6, rt); 1801 if (err) 1802 return err; 1803 1804 inet->cork.fl.u.ip6 = *fl6; 1805 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 1806 length += exthdrlen; 1807 transhdrlen += exthdrlen; 1808 } else { 1809 transhdrlen = 0; 1810 } 1811 1812 return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork, 1813 &np->cork, sk_page_frag(sk), getfrag, 1814 from, length, transhdrlen, flags, ipc6); 1815 } 1816 EXPORT_SYMBOL_GPL(ip6_append_data); 1817 1818 static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork) 1819 { 1820 struct dst_entry *dst = cork->base.dst; 1821 1822 cork->base.dst = NULL; 1823 cork->base.flags &= ~IPCORK_ALLFRAG; 1824 skb_dst_set(skb, dst); 1825 } 1826 1827 static void ip6_cork_release(struct inet_cork_full *cork, 1828 struct inet6_cork *v6_cork) 1829 { 1830 if (v6_cork->opt) { 1831 struct ipv6_txoptions *opt = v6_cork->opt; 1832 1833 kfree(opt->dst0opt); 1834 kfree(opt->dst1opt); 1835 kfree(opt->hopopt); 1836 kfree(opt->srcrt); 1837 kfree(opt); 1838 v6_cork->opt = NULL; 1839 } 1840 1841 if (cork->base.dst) { 1842 dst_release(cork->base.dst); 1843 cork->base.dst = NULL; 1844 cork->base.flags &= ~IPCORK_ALLFRAG; 1845 } 1846 } 1847 1848 struct sk_buff *__ip6_make_skb(struct sock *sk, 1849 struct sk_buff_head *queue, 1850 struct inet_cork_full *cork, 1851 struct inet6_cork *v6_cork) 1852 { 1853 struct sk_buff *skb, *tmp_skb; 1854 struct sk_buff **tail_skb; 1855 struct in6_addr *final_dst; 1856 struct ipv6_pinfo *np = inet6_sk(sk); 1857 struct net *net = sock_net(sk); 1858 struct ipv6hdr *hdr; 1859 struct ipv6_txoptions *opt = v6_cork->opt; 1860 struct rt6_info *rt = (struct rt6_info *)cork->base.dst; 1861 struct flowi6 *fl6 = &cork->fl.u.ip6; 1862 unsigned char proto = fl6->flowi6_proto; 1863 1864 skb = __skb_dequeue(queue); 1865 if (!skb) 1866 goto out; 1867 tail_skb = &(skb_shinfo(skb)->frag_list); 1868 1869 /* move skb->data to ip header from ext header */ 1870 if (skb->data < skb_network_header(skb)) 1871 __skb_pull(skb, skb_network_offset(skb)); 1872 while ((tmp_skb = __skb_dequeue(queue)) != NULL) { 1873 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1874 *tail_skb = tmp_skb; 1875 tail_skb = &(tmp_skb->next); 1876 skb->len += tmp_skb->len; 1877 skb->data_len += tmp_skb->len; 1878 skb->truesize += tmp_skb->truesize; 1879 tmp_skb->destructor = NULL; 1880 tmp_skb->sk = NULL; 1881 } 1882 1883 /* Allow local fragmentation. */ 1884 skb->ignore_df = ip6_sk_ignore_df(sk); 1885 __skb_pull(skb, skb_network_header_len(skb)); 1886 1887 final_dst = &fl6->daddr; 1888 if (opt && opt->opt_flen) 1889 ipv6_push_frag_opts(skb, opt, &proto); 1890 if (opt && opt->opt_nflen) 1891 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr); 1892 1893 skb_push(skb, sizeof(struct ipv6hdr)); 1894 skb_reset_network_header(skb); 1895 hdr = ipv6_hdr(skb); 1896 1897 ip6_flow_hdr(hdr, v6_cork->tclass, 1898 ip6_make_flowlabel(net, skb, fl6->flowlabel, 1899 ip6_autoflowlabel(net, np), fl6)); 1900 hdr->hop_limit = v6_cork->hop_limit; 1901 hdr->nexthdr = proto; 1902 hdr->saddr = fl6->saddr; 1903 hdr->daddr = *final_dst; 1904 1905 skb->priority = sk->sk_priority; 1906 skb->mark = cork->base.mark; 1907 skb->tstamp = cork->base.transmit_time; 1908 1909 ip6_cork_steal_dst(skb, cork); 1910 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 1911 if (proto == IPPROTO_ICMPV6) { 1912 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1913 1914 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); 1915 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 1916 } 1917 1918 ip6_cork_release(cork, v6_cork); 1919 out: 1920 return skb; 1921 } 1922 1923 int ip6_send_skb(struct sk_buff *skb) 1924 { 1925 struct net *net = sock_net(skb->sk); 1926 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 1927 int err; 1928 1929 err = ip6_local_out(net, skb->sk, skb); 1930 if (err) { 1931 if (err > 0) 1932 err = net_xmit_errno(err); 1933 if (err) 1934 IP6_INC_STATS(net, rt->rt6i_idev, 1935 IPSTATS_MIB_OUTDISCARDS); 1936 } 1937 1938 return err; 1939 } 1940 1941 int ip6_push_pending_frames(struct sock *sk) 1942 { 1943 struct sk_buff *skb; 1944 1945 skb = ip6_finish_skb(sk); 1946 if (!skb) 1947 return 0; 1948 1949 return ip6_send_skb(skb); 1950 } 1951 EXPORT_SYMBOL_GPL(ip6_push_pending_frames); 1952 1953 static void __ip6_flush_pending_frames(struct sock *sk, 1954 struct sk_buff_head *queue, 1955 struct inet_cork_full *cork, 1956 struct inet6_cork *v6_cork) 1957 { 1958 struct sk_buff *skb; 1959 1960 while ((skb = __skb_dequeue_tail(queue)) != NULL) { 1961 if (skb_dst(skb)) 1962 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 1963 IPSTATS_MIB_OUTDISCARDS); 1964 kfree_skb(skb); 1965 } 1966 1967 ip6_cork_release(cork, v6_cork); 1968 } 1969 1970 void ip6_flush_pending_frames(struct sock *sk) 1971 { 1972 __ip6_flush_pending_frames(sk, &sk->sk_write_queue, 1973 &inet_sk(sk)->cork, &inet6_sk(sk)->cork); 1974 } 1975 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); 1976 1977 struct sk_buff *ip6_make_skb(struct sock *sk, 1978 int getfrag(void *from, char *to, int offset, 1979 int len, int odd, struct sk_buff *skb), 1980 void *from, int length, int transhdrlen, 1981 struct ipcm6_cookie *ipc6, struct rt6_info *rt, 1982 unsigned int flags, struct inet_cork_full *cork) 1983 { 1984 struct inet6_cork v6_cork; 1985 struct sk_buff_head queue; 1986 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 1987 int err; 1988 1989 if (flags & MSG_PROBE) { 1990 dst_release(&rt->dst); 1991 return NULL; 1992 } 1993 1994 __skb_queue_head_init(&queue); 1995 1996 cork->base.flags = 0; 1997 cork->base.addr = 0; 1998 cork->base.opt = NULL; 1999 v6_cork.opt = NULL; 2000 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt); 2001 if (err) { 2002 ip6_cork_release(cork, &v6_cork); 2003 return ERR_PTR(err); 2004 } 2005 if (ipc6->dontfrag < 0) 2006 ipc6->dontfrag = inet6_sk(sk)->dontfrag; 2007 2008 err = __ip6_append_data(sk, &queue, cork, &v6_cork, 2009 ¤t->task_frag, getfrag, from, 2010 length + exthdrlen, transhdrlen + exthdrlen, 2011 flags, ipc6); 2012 if (err) { 2013 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork); 2014 return ERR_PTR(err); 2015 } 2016 2017 return __ip6_make_skb(sk, &queue, cork, &v6_cork); 2018 } 2019