1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * IPv6 output functions 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on linux/net/ipv4/ip_output.c 10 * 11 * Changes: 12 * A.N.Kuznetsov : airthmetics in fragmentation. 13 * extension headers are implemented. 14 * route changes now work. 15 * ip6_forward does not confuse sniffers. 16 * etc. 17 * 18 * H. von Brand : Added missing #include <linux/string.h> 19 * Imran Patel : frag id should be in NBO 20 * Kazunori MIYAZAWA @USAGI 21 * : add ip6_append_data and related functions 22 * for datagram xmit 23 */ 24 25 #include <linux/errno.h> 26 #include <linux/kernel.h> 27 #include <linux/string.h> 28 #include <linux/socket.h> 29 #include <linux/net.h> 30 #include <linux/netdevice.h> 31 #include <linux/if_arp.h> 32 #include <linux/in6.h> 33 #include <linux/tcp.h> 34 #include <linux/route.h> 35 #include <linux/module.h> 36 #include <linux/slab.h> 37 38 #include <linux/bpf-cgroup.h> 39 #include <linux/netfilter.h> 40 #include <linux/netfilter_ipv6.h> 41 42 #include <net/sock.h> 43 #include <net/snmp.h> 44 45 #include <net/ipv6.h> 46 #include <net/ndisc.h> 47 #include <net/protocol.h> 48 #include <net/ip6_route.h> 49 #include <net/addrconf.h> 50 #include <net/rawv6.h> 51 #include <net/icmp.h> 52 #include <net/xfrm.h> 53 #include <net/checksum.h> 54 #include <linux/mroute6.h> 55 #include <net/l3mdev.h> 56 #include <net/lwtunnel.h> 57 #include <net/ip_tunnels.h> 58 59 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 60 { 61 struct dst_entry *dst = skb_dst(skb); 62 struct net_device *dev = dst->dev; 63 const struct in6_addr *nexthop; 64 struct neighbour *neigh; 65 int ret; 66 67 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 68 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 69 70 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && 71 ((mroute6_is_socket(net, skb) && 72 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 73 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 74 &ipv6_hdr(skb)->saddr))) { 75 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 76 77 /* Do not check for IFF_ALLMULTI; multicast routing 78 is not supported in any case. 79 */ 80 if (newskb) 81 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 82 net, sk, newskb, NULL, newskb->dev, 83 dev_loopback_xmit); 84 85 if (ipv6_hdr(skb)->hop_limit == 0) { 86 IP6_INC_STATS(net, idev, 87 IPSTATS_MIB_OUTDISCARDS); 88 kfree_skb(skb); 89 return 0; 90 } 91 } 92 93 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len); 94 95 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= 96 IPV6_ADDR_SCOPE_NODELOCAL && 97 !(dev->flags & IFF_LOOPBACK)) { 98 kfree_skb(skb); 99 return 0; 100 } 101 } 102 103 if (lwtunnel_xmit_redirect(dst->lwtstate)) { 104 int res = lwtunnel_xmit(skb); 105 106 if (res < 0 || res == LWTUNNEL_XMIT_DONE) 107 return res; 108 } 109 110 rcu_read_lock_bh(); 111 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); 112 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); 113 if (unlikely(!neigh)) 114 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); 115 if (!IS_ERR(neigh)) { 116 sock_confirm_neigh(skb, neigh); 117 ret = neigh_output(neigh, skb, false); 118 rcu_read_unlock_bh(); 119 return ret; 120 } 121 rcu_read_unlock_bh(); 122 123 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 124 kfree_skb(skb); 125 return -EINVAL; 126 } 127 128 static int 129 ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, 130 struct sk_buff *skb, unsigned int mtu) 131 { 132 struct sk_buff *segs, *nskb; 133 netdev_features_t features; 134 int ret = 0; 135 136 /* Please see corresponding comment in ip_finish_output_gso 137 * describing the cases where GSO segment length exceeds the 138 * egress MTU. 139 */ 140 features = netif_skb_features(skb); 141 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); 142 if (IS_ERR_OR_NULL(segs)) { 143 kfree_skb(skb); 144 return -ENOMEM; 145 } 146 147 consume_skb(skb); 148 149 skb_list_walk_safe(segs, segs, nskb) { 150 int err; 151 152 skb_mark_not_on_list(segs); 153 err = ip6_fragment(net, sk, segs, ip6_finish_output2); 154 if (err && ret == 0) 155 ret = err; 156 } 157 158 return ret; 159 } 160 161 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 162 { 163 unsigned int mtu; 164 165 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 166 /* Policy lookup after SNAT yielded a new policy */ 167 if (skb_dst(skb)->xfrm) { 168 IPCB(skb)->flags |= IPSKB_REROUTED; 169 return dst_output(net, sk, skb); 170 } 171 #endif 172 173 mtu = ip6_skb_dst_mtu(skb); 174 if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu)) 175 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); 176 177 if ((skb->len > mtu && !skb_is_gso(skb)) || 178 dst_allfrag(skb_dst(skb)) || 179 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) 180 return ip6_fragment(net, sk, skb, ip6_finish_output2); 181 else 182 return ip6_finish_output2(net, sk, skb); 183 } 184 185 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 186 { 187 int ret; 188 189 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); 190 switch (ret) { 191 case NET_XMIT_SUCCESS: 192 return __ip6_finish_output(net, sk, skb); 193 case NET_XMIT_CN: 194 return __ip6_finish_output(net, sk, skb) ? : ret; 195 default: 196 kfree_skb(skb); 197 return ret; 198 } 199 } 200 201 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 202 { 203 struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; 204 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 205 206 skb->protocol = htons(ETH_P_IPV6); 207 skb->dev = dev; 208 209 if (unlikely(idev->cnf.disable_ipv6)) { 210 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 211 kfree_skb(skb); 212 return 0; 213 } 214 215 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, 216 net, sk, skb, indev, dev, 217 ip6_finish_output, 218 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 219 } 220 EXPORT_SYMBOL(ip6_output); 221 222 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) 223 { 224 if (!np->autoflowlabel_set) 225 return ip6_default_np_autolabel(net); 226 else 227 return np->autoflowlabel; 228 } 229 230 /* 231 * xmit an sk_buff (used by TCP, SCTP and DCCP) 232 * Note : socket lock is not held for SYNACK packets, but might be modified 233 * by calls to skb_set_owner_w() and ipv6_local_error(), 234 * which are using proper atomic operations or spinlocks. 235 */ 236 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, 237 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority) 238 { 239 struct net *net = sock_net(sk); 240 const struct ipv6_pinfo *np = inet6_sk(sk); 241 struct in6_addr *first_hop = &fl6->daddr; 242 struct dst_entry *dst = skb_dst(skb); 243 unsigned int head_room; 244 struct ipv6hdr *hdr; 245 u8 proto = fl6->flowi6_proto; 246 int seg_len = skb->len; 247 int hlimit = -1; 248 u32 mtu; 249 250 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); 251 if (opt) 252 head_room += opt->opt_nflen + opt->opt_flen; 253 254 if (unlikely(skb_headroom(skb) < head_room)) { 255 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 256 if (!skb2) { 257 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 258 IPSTATS_MIB_OUTDISCARDS); 259 kfree_skb(skb); 260 return -ENOBUFS; 261 } 262 if (skb->sk) 263 skb_set_owner_w(skb2, skb->sk); 264 consume_skb(skb); 265 skb = skb2; 266 } 267 268 if (opt) { 269 seg_len += opt->opt_nflen + opt->opt_flen; 270 271 if (opt->opt_flen) 272 ipv6_push_frag_opts(skb, opt, &proto); 273 274 if (opt->opt_nflen) 275 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, 276 &fl6->saddr); 277 } 278 279 skb_push(skb, sizeof(struct ipv6hdr)); 280 skb_reset_network_header(skb); 281 hdr = ipv6_hdr(skb); 282 283 /* 284 * Fill in the IPv6 header 285 */ 286 if (np) 287 hlimit = np->hop_limit; 288 if (hlimit < 0) 289 hlimit = ip6_dst_hoplimit(dst); 290 291 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, 292 ip6_autoflowlabel(net, np), fl6)); 293 294 hdr->payload_len = htons(seg_len); 295 hdr->nexthdr = proto; 296 hdr->hop_limit = hlimit; 297 298 hdr->saddr = fl6->saddr; 299 hdr->daddr = *first_hop; 300 301 skb->protocol = htons(ETH_P_IPV6); 302 skb->priority = priority; 303 skb->mark = mark; 304 305 mtu = dst_mtu(dst); 306 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { 307 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), 308 IPSTATS_MIB_OUT, skb->len); 309 310 /* if egress device is enslaved to an L3 master device pass the 311 * skb to its handler for processing 312 */ 313 skb = l3mdev_ip6_out((struct sock *)sk, skb); 314 if (unlikely(!skb)) 315 return 0; 316 317 /* hooks should never assume socket lock is held. 318 * we promote our socket to non const 319 */ 320 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, 321 net, (struct sock *)sk, skb, NULL, dst->dev, 322 dst_output); 323 } 324 325 skb->dev = dst->dev; 326 /* ipv6_local_error() does not require socket lock, 327 * we promote our socket to non const 328 */ 329 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); 330 331 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); 332 kfree_skb(skb); 333 return -EMSGSIZE; 334 } 335 EXPORT_SYMBOL(ip6_xmit); 336 337 static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 338 { 339 struct ip6_ra_chain *ra; 340 struct sock *last = NULL; 341 342 read_lock(&ip6_ra_lock); 343 for (ra = ip6_ra_chain; ra; ra = ra->next) { 344 struct sock *sk = ra->sk; 345 if (sk && ra->sel == sel && 346 (!sk->sk_bound_dev_if || 347 sk->sk_bound_dev_if == skb->dev->ifindex)) { 348 struct ipv6_pinfo *np = inet6_sk(sk); 349 350 if (np && np->rtalert_isolate && 351 !net_eq(sock_net(sk), dev_net(skb->dev))) { 352 continue; 353 } 354 if (last) { 355 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 356 if (skb2) 357 rawv6_rcv(last, skb2); 358 } 359 last = sk; 360 } 361 } 362 363 if (last) { 364 rawv6_rcv(last, skb); 365 read_unlock(&ip6_ra_lock); 366 return 1; 367 } 368 read_unlock(&ip6_ra_lock); 369 return 0; 370 } 371 372 static int ip6_forward_proxy_check(struct sk_buff *skb) 373 { 374 struct ipv6hdr *hdr = ipv6_hdr(skb); 375 u8 nexthdr = hdr->nexthdr; 376 __be16 frag_off; 377 int offset; 378 379 if (ipv6_ext_hdr(nexthdr)) { 380 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); 381 if (offset < 0) 382 return 0; 383 } else 384 offset = sizeof(struct ipv6hdr); 385 386 if (nexthdr == IPPROTO_ICMPV6) { 387 struct icmp6hdr *icmp6; 388 389 if (!pskb_may_pull(skb, (skb_network_header(skb) + 390 offset + 1 - skb->data))) 391 return 0; 392 393 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 394 395 switch (icmp6->icmp6_type) { 396 case NDISC_ROUTER_SOLICITATION: 397 case NDISC_ROUTER_ADVERTISEMENT: 398 case NDISC_NEIGHBOUR_SOLICITATION: 399 case NDISC_NEIGHBOUR_ADVERTISEMENT: 400 case NDISC_REDIRECT: 401 /* For reaction involving unicast neighbor discovery 402 * message destined to the proxied address, pass it to 403 * input function. 404 */ 405 return 1; 406 default: 407 break; 408 } 409 } 410 411 /* 412 * The proxying router can't forward traffic sent to a link-local 413 * address, so signal the sender and discard the packet. This 414 * behavior is clarified by the MIPv6 specification. 415 */ 416 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 417 dst_link_failure(skb); 418 return -1; 419 } 420 421 return 0; 422 } 423 424 static inline int ip6_forward_finish(struct net *net, struct sock *sk, 425 struct sk_buff *skb) 426 { 427 struct dst_entry *dst = skb_dst(skb); 428 429 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 430 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); 431 432 #ifdef CONFIG_NET_SWITCHDEV 433 if (skb->offload_l3_fwd_mark) { 434 consume_skb(skb); 435 return 0; 436 } 437 #endif 438 439 skb->tstamp = 0; 440 return dst_output(net, sk, skb); 441 } 442 443 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) 444 { 445 if (skb->len <= mtu) 446 return false; 447 448 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ 449 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) 450 return true; 451 452 if (skb->ignore_df) 453 return false; 454 455 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) 456 return false; 457 458 return true; 459 } 460 461 int ip6_forward(struct sk_buff *skb) 462 { 463 struct inet6_dev *idev = __in6_dev_get_safely(skb->dev); 464 struct dst_entry *dst = skb_dst(skb); 465 struct ipv6hdr *hdr = ipv6_hdr(skb); 466 struct inet6_skb_parm *opt = IP6CB(skb); 467 struct net *net = dev_net(dst->dev); 468 u32 mtu; 469 470 if (net->ipv6.devconf_all->forwarding == 0) 471 goto error; 472 473 if (skb->pkt_type != PACKET_HOST) 474 goto drop; 475 476 if (unlikely(skb->sk)) 477 goto drop; 478 479 if (skb_warn_if_lro(skb)) 480 goto drop; 481 482 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 483 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 484 goto drop; 485 } 486 487 skb_forward_csum(skb); 488 489 /* 490 * We DO NOT make any processing on 491 * RA packets, pushing them to user level AS IS 492 * without ane WARRANTY that application will be able 493 * to interpret them. The reason is that we 494 * cannot make anything clever here. 495 * 496 * We are not end-node, so that if packet contains 497 * AH/ESP, we cannot make anything. 498 * Defragmentation also would be mistake, RA packets 499 * cannot be fragmented, because there is no warranty 500 * that different fragments will go along one path. --ANK 501 */ 502 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { 503 if (ip6_call_ra_chain(skb, ntohs(opt->ra))) 504 return 0; 505 } 506 507 /* 508 * check and decrement ttl 509 */ 510 if (hdr->hop_limit <= 1) { 511 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 512 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); 513 514 kfree_skb(skb); 515 return -ETIMEDOUT; 516 } 517 518 /* XXX: idev->cnf.proxy_ndp? */ 519 if (net->ipv6.devconf_all->proxy_ndp && 520 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 521 int proxied = ip6_forward_proxy_check(skb); 522 if (proxied > 0) 523 return ip6_input(skb); 524 else if (proxied < 0) { 525 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 526 goto drop; 527 } 528 } 529 530 if (!xfrm6_route_forward(skb)) { 531 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 532 goto drop; 533 } 534 dst = skb_dst(skb); 535 536 /* IPv6 specs say nothing about it, but it is clear that we cannot 537 send redirects to source routed frames. 538 We don't send redirects to frames decapsulated from IPsec. 539 */ 540 if (IP6CB(skb)->iif == dst->dev->ifindex && 541 opt->srcrt == 0 && !skb_sec_path(skb)) { 542 struct in6_addr *target = NULL; 543 struct inet_peer *peer; 544 struct rt6_info *rt; 545 546 /* 547 * incoming and outgoing devices are the same 548 * send a redirect. 549 */ 550 551 rt = (struct rt6_info *) dst; 552 if (rt->rt6i_flags & RTF_GATEWAY) 553 target = &rt->rt6i_gateway; 554 else 555 target = &hdr->daddr; 556 557 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1); 558 559 /* Limit redirects both by destination (here) 560 and by source (inside ndisc_send_redirect) 561 */ 562 if (inet_peer_xrlim_allow(peer, 1*HZ)) 563 ndisc_send_redirect(skb, target); 564 if (peer) 565 inet_putpeer(peer); 566 } else { 567 int addrtype = ipv6_addr_type(&hdr->saddr); 568 569 /* This check is security critical. */ 570 if (addrtype == IPV6_ADDR_ANY || 571 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 572 goto error; 573 if (addrtype & IPV6_ADDR_LINKLOCAL) { 574 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 575 ICMPV6_NOT_NEIGHBOUR, 0); 576 goto error; 577 } 578 } 579 580 mtu = ip6_dst_mtu_forward(dst); 581 if (mtu < IPV6_MIN_MTU) 582 mtu = IPV6_MIN_MTU; 583 584 if (ip6_pkt_too_big(skb, mtu)) { 585 /* Again, force OUTPUT device used as source address */ 586 skb->dev = dst->dev; 587 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 588 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS); 589 __IP6_INC_STATS(net, ip6_dst_idev(dst), 590 IPSTATS_MIB_FRAGFAILS); 591 kfree_skb(skb); 592 return -EMSGSIZE; 593 } 594 595 if (skb_cow(skb, dst->dev->hard_header_len)) { 596 __IP6_INC_STATS(net, ip6_dst_idev(dst), 597 IPSTATS_MIB_OUTDISCARDS); 598 goto drop; 599 } 600 601 hdr = ipv6_hdr(skb); 602 603 /* Mangling hops number delayed to point after skb COW */ 604 605 hdr->hop_limit--; 606 607 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 608 net, NULL, skb, skb->dev, dst->dev, 609 ip6_forward_finish); 610 611 error: 612 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); 613 drop: 614 kfree_skb(skb); 615 return -EINVAL; 616 } 617 618 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 619 { 620 to->pkt_type = from->pkt_type; 621 to->priority = from->priority; 622 to->protocol = from->protocol; 623 skb_dst_drop(to); 624 skb_dst_set(to, dst_clone(skb_dst(from))); 625 to->dev = from->dev; 626 to->mark = from->mark; 627 628 skb_copy_hash(to, from); 629 630 #ifdef CONFIG_NET_SCHED 631 to->tc_index = from->tc_index; 632 #endif 633 nf_copy(to, from); 634 skb_ext_copy(to, from); 635 skb_copy_secmark(to, from); 636 } 637 638 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr, 639 u8 nexthdr, __be32 frag_id, 640 struct ip6_fraglist_iter *iter) 641 { 642 unsigned int first_len; 643 struct frag_hdr *fh; 644 645 /* BUILD HEADER */ 646 *prevhdr = NEXTHDR_FRAGMENT; 647 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 648 if (!iter->tmp_hdr) 649 return -ENOMEM; 650 651 iter->frag = skb_shinfo(skb)->frag_list; 652 skb_frag_list_init(skb); 653 654 iter->offset = 0; 655 iter->hlen = hlen; 656 iter->frag_id = frag_id; 657 iter->nexthdr = nexthdr; 658 659 __skb_pull(skb, hlen); 660 fh = __skb_push(skb, sizeof(struct frag_hdr)); 661 __skb_push(skb, hlen); 662 skb_reset_network_header(skb); 663 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen); 664 665 fh->nexthdr = nexthdr; 666 fh->reserved = 0; 667 fh->frag_off = htons(IP6_MF); 668 fh->identification = frag_id; 669 670 first_len = skb_pagelen(skb); 671 skb->data_len = first_len - skb_headlen(skb); 672 skb->len = first_len; 673 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); 674 675 return 0; 676 } 677 EXPORT_SYMBOL(ip6_fraglist_init); 678 679 void ip6_fraglist_prepare(struct sk_buff *skb, 680 struct ip6_fraglist_iter *iter) 681 { 682 struct sk_buff *frag = iter->frag; 683 unsigned int hlen = iter->hlen; 684 struct frag_hdr *fh; 685 686 frag->ip_summed = CHECKSUM_NONE; 687 skb_reset_transport_header(frag); 688 fh = __skb_push(frag, sizeof(struct frag_hdr)); 689 __skb_push(frag, hlen); 690 skb_reset_network_header(frag); 691 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen); 692 iter->offset += skb->len - hlen - sizeof(struct frag_hdr); 693 fh->nexthdr = iter->nexthdr; 694 fh->reserved = 0; 695 fh->frag_off = htons(iter->offset); 696 if (frag->next) 697 fh->frag_off |= htons(IP6_MF); 698 fh->identification = iter->frag_id; 699 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 700 ip6_copy_metadata(frag, skb); 701 } 702 EXPORT_SYMBOL(ip6_fraglist_prepare); 703 704 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu, 705 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr, 706 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state) 707 { 708 state->prevhdr = prevhdr; 709 state->nexthdr = nexthdr; 710 state->frag_id = frag_id; 711 712 state->hlen = hlen; 713 state->mtu = mtu; 714 715 state->left = skb->len - hlen; /* Space per frame */ 716 state->ptr = hlen; /* Where to start from */ 717 718 state->hroom = hdr_room; 719 state->troom = needed_tailroom; 720 721 state->offset = 0; 722 } 723 EXPORT_SYMBOL(ip6_frag_init); 724 725 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state) 726 { 727 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset; 728 struct sk_buff *frag; 729 struct frag_hdr *fh; 730 unsigned int len; 731 732 len = state->left; 733 /* IF: it doesn't fit, use 'mtu' - the data space left */ 734 if (len > state->mtu) 735 len = state->mtu; 736 /* IF: we are not sending up to and including the packet end 737 then align the next start on an eight byte boundary */ 738 if (len < state->left) 739 len &= ~7; 740 741 /* Allocate buffer */ 742 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) + 743 state->hroom + state->troom, GFP_ATOMIC); 744 if (!frag) 745 return ERR_PTR(-ENOMEM); 746 747 /* 748 * Set up data on packet 749 */ 750 751 ip6_copy_metadata(frag, skb); 752 skb_reserve(frag, state->hroom); 753 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr)); 754 skb_reset_network_header(frag); 755 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen); 756 frag->transport_header = (frag->network_header + state->hlen + 757 sizeof(struct frag_hdr)); 758 759 /* 760 * Charge the memory for the fragment to any owner 761 * it might possess 762 */ 763 if (skb->sk) 764 skb_set_owner_w(frag, skb->sk); 765 766 /* 767 * Copy the packet header into the new buffer. 768 */ 769 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen); 770 771 fragnexthdr_offset = skb_network_header(frag); 772 fragnexthdr_offset += prevhdr - skb_network_header(skb); 773 *fragnexthdr_offset = NEXTHDR_FRAGMENT; 774 775 /* 776 * Build fragment header. 777 */ 778 fh->nexthdr = state->nexthdr; 779 fh->reserved = 0; 780 fh->identification = state->frag_id; 781 782 /* 783 * Copy a block of the IP datagram. 784 */ 785 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag), 786 len)); 787 state->left -= len; 788 789 fh->frag_off = htons(state->offset); 790 if (state->left > 0) 791 fh->frag_off |= htons(IP6_MF); 792 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 793 794 state->ptr += len; 795 state->offset += len; 796 797 return frag; 798 } 799 EXPORT_SYMBOL(ip6_frag_next); 800 801 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 802 int (*output)(struct net *, struct sock *, struct sk_buff *)) 803 { 804 struct sk_buff *frag; 805 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 806 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? 807 inet6_sk(skb->sk) : NULL; 808 struct ip6_frag_state state; 809 unsigned int mtu, hlen, nexthdr_offset; 810 ktime_t tstamp = skb->tstamp; 811 int hroom, err = 0; 812 __be32 frag_id; 813 u8 *prevhdr, nexthdr = 0; 814 815 err = ip6_find_1stfragopt(skb, &prevhdr); 816 if (err < 0) 817 goto fail; 818 hlen = err; 819 nexthdr = *prevhdr; 820 nexthdr_offset = prevhdr - skb_network_header(skb); 821 822 mtu = ip6_skb_dst_mtu(skb); 823 824 /* We must not fragment if the socket is set to force MTU discovery 825 * or if the skb it not generated by a local socket. 826 */ 827 if (unlikely(!skb->ignore_df && skb->len > mtu)) 828 goto fail_toobig; 829 830 if (IP6CB(skb)->frag_max_size) { 831 if (IP6CB(skb)->frag_max_size > mtu) 832 goto fail_toobig; 833 834 /* don't send fragments larger than what we received */ 835 mtu = IP6CB(skb)->frag_max_size; 836 if (mtu < IPV6_MIN_MTU) 837 mtu = IPV6_MIN_MTU; 838 } 839 840 if (np && np->frag_size < mtu) { 841 if (np->frag_size) 842 mtu = np->frag_size; 843 } 844 if (mtu < hlen + sizeof(struct frag_hdr) + 8) 845 goto fail_toobig; 846 mtu -= hlen + sizeof(struct frag_hdr); 847 848 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, 849 &ipv6_hdr(skb)->saddr); 850 851 if (skb->ip_summed == CHECKSUM_PARTIAL && 852 (err = skb_checksum_help(skb))) 853 goto fail; 854 855 prevhdr = skb_network_header(skb) + nexthdr_offset; 856 hroom = LL_RESERVED_SPACE(rt->dst.dev); 857 if (skb_has_frag_list(skb)) { 858 unsigned int first_len = skb_pagelen(skb); 859 struct ip6_fraglist_iter iter; 860 struct sk_buff *frag2; 861 862 if (first_len - hlen > mtu || 863 ((first_len - hlen) & 7) || 864 skb_cloned(skb) || 865 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) 866 goto slow_path; 867 868 skb_walk_frags(skb, frag) { 869 /* Correct geometry. */ 870 if (frag->len > mtu || 871 ((frag->len & 7) && frag->next) || 872 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr))) 873 goto slow_path_clean; 874 875 /* Partially cloned skb? */ 876 if (skb_shared(frag)) 877 goto slow_path_clean; 878 879 BUG_ON(frag->sk); 880 if (skb->sk) { 881 frag->sk = skb->sk; 882 frag->destructor = sock_wfree; 883 } 884 skb->truesize -= frag->truesize; 885 } 886 887 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id, 888 &iter); 889 if (err < 0) 890 goto fail; 891 892 for (;;) { 893 /* Prepare header of the next frame, 894 * before previous one went down. */ 895 if (iter.frag) 896 ip6_fraglist_prepare(skb, &iter); 897 898 skb->tstamp = tstamp; 899 err = output(net, sk, skb); 900 if (!err) 901 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 902 IPSTATS_MIB_FRAGCREATES); 903 904 if (err || !iter.frag) 905 break; 906 907 skb = ip6_fraglist_next(&iter); 908 } 909 910 kfree(iter.tmp_hdr); 911 912 if (err == 0) { 913 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 914 IPSTATS_MIB_FRAGOKS); 915 return 0; 916 } 917 918 kfree_skb_list(iter.frag); 919 920 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 921 IPSTATS_MIB_FRAGFAILS); 922 return err; 923 924 slow_path_clean: 925 skb_walk_frags(skb, frag2) { 926 if (frag2 == frag) 927 break; 928 frag2->sk = NULL; 929 frag2->destructor = NULL; 930 skb->truesize += frag2->truesize; 931 } 932 } 933 934 slow_path: 935 /* 936 * Fragment the datagram. 937 */ 938 939 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom, 940 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id, 941 &state); 942 943 /* 944 * Keep copying data until we run out. 945 */ 946 947 while (state.left > 0) { 948 frag = ip6_frag_next(skb, &state); 949 if (IS_ERR(frag)) { 950 err = PTR_ERR(frag); 951 goto fail; 952 } 953 954 /* 955 * Put this fragment into the sending queue. 956 */ 957 frag->tstamp = tstamp; 958 err = output(net, sk, frag); 959 if (err) 960 goto fail; 961 962 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 963 IPSTATS_MIB_FRAGCREATES); 964 } 965 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 966 IPSTATS_MIB_FRAGOKS); 967 consume_skb(skb); 968 return err; 969 970 fail_toobig: 971 if (skb->sk && dst_allfrag(skb_dst(skb))) 972 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); 973 974 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 975 err = -EMSGSIZE; 976 977 fail: 978 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 979 IPSTATS_MIB_FRAGFAILS); 980 kfree_skb(skb); 981 return err; 982 } 983 984 static inline int ip6_rt_check(const struct rt6key *rt_key, 985 const struct in6_addr *fl_addr, 986 const struct in6_addr *addr_cache) 987 { 988 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 989 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache)); 990 } 991 992 static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 993 struct dst_entry *dst, 994 const struct flowi6 *fl6) 995 { 996 struct ipv6_pinfo *np = inet6_sk(sk); 997 struct rt6_info *rt; 998 999 if (!dst) 1000 goto out; 1001 1002 if (dst->ops->family != AF_INET6) { 1003 dst_release(dst); 1004 return NULL; 1005 } 1006 1007 rt = (struct rt6_info *)dst; 1008 /* Yes, checking route validity in not connected 1009 * case is not very simple. Take into account, 1010 * that we do not support routing by source, TOS, 1011 * and MSG_DONTROUTE --ANK (980726) 1012 * 1013 * 1. ip6_rt_check(): If route was host route, 1014 * check that cached destination is current. 1015 * If it is network route, we still may 1016 * check its validity using saved pointer 1017 * to the last used address: daddr_cache. 1018 * We do not want to save whole address now, 1019 * (because main consumer of this service 1020 * is tcp, which has not this problem), 1021 * so that the last trick works only on connected 1022 * sockets. 1023 * 2. oif also should be the same. 1024 */ 1025 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || 1026 #ifdef CONFIG_IPV6_SUBTREES 1027 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || 1028 #endif 1029 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) && 1030 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) { 1031 dst_release(dst); 1032 dst = NULL; 1033 } 1034 1035 out: 1036 return dst; 1037 } 1038 1039 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, 1040 struct dst_entry **dst, struct flowi6 *fl6) 1041 { 1042 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 1043 struct neighbour *n; 1044 struct rt6_info *rt; 1045 #endif 1046 int err; 1047 int flags = 0; 1048 1049 /* The correct way to handle this would be to do 1050 * ip6_route_get_saddr, and then ip6_route_output; however, 1051 * the route-specific preferred source forces the 1052 * ip6_route_output call _before_ ip6_route_get_saddr. 1053 * 1054 * In source specific routing (no src=any default route), 1055 * ip6_route_output will fail given src=any saddr, though, so 1056 * that's why we try it again later. 1057 */ 1058 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) { 1059 struct fib6_info *from; 1060 struct rt6_info *rt; 1061 bool had_dst = *dst != NULL; 1062 1063 if (!had_dst) 1064 *dst = ip6_route_output(net, sk, fl6); 1065 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; 1066 1067 rcu_read_lock(); 1068 from = rt ? rcu_dereference(rt->from) : NULL; 1069 err = ip6_route_get_saddr(net, from, &fl6->daddr, 1070 sk ? inet6_sk(sk)->srcprefs : 0, 1071 &fl6->saddr); 1072 rcu_read_unlock(); 1073 1074 if (err) 1075 goto out_err_release; 1076 1077 /* If we had an erroneous initial result, pretend it 1078 * never existed and let the SA-enabled version take 1079 * over. 1080 */ 1081 if (!had_dst && (*dst)->error) { 1082 dst_release(*dst); 1083 *dst = NULL; 1084 } 1085 1086 if (fl6->flowi6_oif) 1087 flags |= RT6_LOOKUP_F_IFACE; 1088 } 1089 1090 if (!*dst) 1091 *dst = ip6_route_output_flags(net, sk, fl6, flags); 1092 1093 err = (*dst)->error; 1094 if (err) 1095 goto out_err_release; 1096 1097 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 1098 /* 1099 * Here if the dst entry we've looked up 1100 * has a neighbour entry that is in the INCOMPLETE 1101 * state and the src address from the flow is 1102 * marked as OPTIMISTIC, we release the found 1103 * dst entry and replace it instead with the 1104 * dst entry of the nexthop router 1105 */ 1106 rt = (struct rt6_info *) *dst; 1107 rcu_read_lock_bh(); 1108 n = __ipv6_neigh_lookup_noref(rt->dst.dev, 1109 rt6_nexthop(rt, &fl6->daddr)); 1110 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; 1111 rcu_read_unlock_bh(); 1112 1113 if (err) { 1114 struct inet6_ifaddr *ifp; 1115 struct flowi6 fl_gw6; 1116 int redirect; 1117 1118 ifp = ipv6_get_ifaddr(net, &fl6->saddr, 1119 (*dst)->dev, 1); 1120 1121 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 1122 if (ifp) 1123 in6_ifa_put(ifp); 1124 1125 if (redirect) { 1126 /* 1127 * We need to get the dst entry for the 1128 * default router instead 1129 */ 1130 dst_release(*dst); 1131 memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); 1132 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); 1133 *dst = ip6_route_output(net, sk, &fl_gw6); 1134 err = (*dst)->error; 1135 if (err) 1136 goto out_err_release; 1137 } 1138 } 1139 #endif 1140 if (ipv6_addr_v4mapped(&fl6->saddr) && 1141 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) { 1142 err = -EAFNOSUPPORT; 1143 goto out_err_release; 1144 } 1145 1146 return 0; 1147 1148 out_err_release: 1149 dst_release(*dst); 1150 *dst = NULL; 1151 1152 if (err == -ENETUNREACH) 1153 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); 1154 return err; 1155 } 1156 1157 /** 1158 * ip6_dst_lookup - perform route lookup on flow 1159 * @net: Network namespace to perform lookup in 1160 * @sk: socket which provides route info 1161 * @dst: pointer to dst_entry * for result 1162 * @fl6: flow to lookup 1163 * 1164 * This function performs a route lookup on the given flow. 1165 * 1166 * It returns zero on success, or a standard errno code on error. 1167 */ 1168 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, 1169 struct flowi6 *fl6) 1170 { 1171 *dst = NULL; 1172 return ip6_dst_lookup_tail(net, sk, dst, fl6); 1173 } 1174 EXPORT_SYMBOL_GPL(ip6_dst_lookup); 1175 1176 /** 1177 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec 1178 * @net: Network namespace to perform lookup in 1179 * @sk: socket which provides route info 1180 * @fl6: flow to lookup 1181 * @final_dst: final destination address for ipsec lookup 1182 * 1183 * This function performs a route lookup on the given flow. 1184 * 1185 * It returns a valid dst pointer on success, or a pointer encoded 1186 * error code. 1187 */ 1188 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6, 1189 const struct in6_addr *final_dst) 1190 { 1191 struct dst_entry *dst = NULL; 1192 int err; 1193 1194 err = ip6_dst_lookup_tail(net, sk, &dst, fl6); 1195 if (err) 1196 return ERR_PTR(err); 1197 if (final_dst) 1198 fl6->daddr = *final_dst; 1199 1200 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0); 1201 } 1202 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); 1203 1204 /** 1205 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow 1206 * @sk: socket which provides the dst cache and route info 1207 * @fl6: flow to lookup 1208 * @final_dst: final destination address for ipsec lookup 1209 * @connected: whether @sk is connected or not 1210 * 1211 * This function performs a route lookup on the given flow with the 1212 * possibility of using the cached route in the socket if it is valid. 1213 * It will take the socket dst lock when operating on the dst cache. 1214 * As a result, this function can only be used in process context. 1215 * 1216 * In addition, for a connected socket, cache the dst in the socket 1217 * if the current cache is not valid. 1218 * 1219 * It returns a valid dst pointer on success, or a pointer encoded 1220 * error code. 1221 */ 1222 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 1223 const struct in6_addr *final_dst, 1224 bool connected) 1225 { 1226 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1227 1228 dst = ip6_sk_dst_check(sk, dst, fl6); 1229 if (dst) 1230 return dst; 1231 1232 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst); 1233 if (connected && !IS_ERR(dst)) 1234 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6); 1235 1236 return dst; 1237 } 1238 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); 1239 1240 /** 1241 * ip6_dst_lookup_tunnel - perform route lookup on tunnel 1242 * @skb: Packet for which lookup is done 1243 * @dev: Tunnel device 1244 * @net: Network namespace of tunnel device 1245 * @sock: Socket which provides route info 1246 * @saddr: Memory to store the src ip address 1247 * @info: Tunnel information 1248 * @protocol: IP protocol 1249 * @use_cache: Flag to enable cache usage 1250 * This function performs a route lookup on a tunnel 1251 * 1252 * It returns a valid dst pointer and stores src address to be used in 1253 * tunnel in param saddr on success, else a pointer encoded error code. 1254 */ 1255 1256 struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, 1257 struct net_device *dev, 1258 struct net *net, 1259 struct socket *sock, 1260 struct in6_addr *saddr, 1261 const struct ip_tunnel_info *info, 1262 u8 protocol, 1263 bool use_cache) 1264 { 1265 struct dst_entry *dst = NULL; 1266 #ifdef CONFIG_DST_CACHE 1267 struct dst_cache *dst_cache; 1268 #endif 1269 struct flowi6 fl6; 1270 __u8 prio; 1271 1272 #ifdef CONFIG_DST_CACHE 1273 dst_cache = (struct dst_cache *)&info->dst_cache; 1274 if (use_cache) { 1275 dst = dst_cache_get_ip6(dst_cache, saddr); 1276 if (dst) 1277 return dst; 1278 } 1279 #endif 1280 memset(&fl6, 0, sizeof(fl6)); 1281 fl6.flowi6_mark = skb->mark; 1282 fl6.flowi6_proto = protocol; 1283 fl6.daddr = info->key.u.ipv6.dst; 1284 fl6.saddr = info->key.u.ipv6.src; 1285 prio = info->key.tos; 1286 fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio), 1287 info->key.label); 1288 1289 dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, 1290 NULL); 1291 if (IS_ERR(dst)) { 1292 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); 1293 return ERR_PTR(-ENETUNREACH); 1294 } 1295 if (dst->dev == dev) { /* is this necessary? */ 1296 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); 1297 dst_release(dst); 1298 return ERR_PTR(-ELOOP); 1299 } 1300 #ifdef CONFIG_DST_CACHE 1301 if (use_cache) 1302 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); 1303 #endif 1304 *saddr = fl6.saddr; 1305 return dst; 1306 } 1307 EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel); 1308 1309 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1310 gfp_t gfp) 1311 { 1312 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1313 } 1314 1315 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 1316 gfp_t gfp) 1317 { 1318 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1319 } 1320 1321 static void ip6_append_data_mtu(unsigned int *mtu, 1322 int *maxfraglen, 1323 unsigned int fragheaderlen, 1324 struct sk_buff *skb, 1325 struct rt6_info *rt, 1326 unsigned int orig_mtu) 1327 { 1328 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { 1329 if (!skb) { 1330 /* first fragment, reserve header_len */ 1331 *mtu = orig_mtu - rt->dst.header_len; 1332 1333 } else { 1334 /* 1335 * this fragment is not first, the headers 1336 * space is regarded as data space. 1337 */ 1338 *mtu = orig_mtu; 1339 } 1340 *maxfraglen = ((*mtu - fragheaderlen) & ~7) 1341 + fragheaderlen - sizeof(struct frag_hdr); 1342 } 1343 } 1344 1345 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, 1346 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6, 1347 struct rt6_info *rt, struct flowi6 *fl6) 1348 { 1349 struct ipv6_pinfo *np = inet6_sk(sk); 1350 unsigned int mtu; 1351 struct ipv6_txoptions *opt = ipc6->opt; 1352 1353 /* 1354 * setup for corking 1355 */ 1356 if (opt) { 1357 if (WARN_ON(v6_cork->opt)) 1358 return -EINVAL; 1359 1360 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); 1361 if (unlikely(!v6_cork->opt)) 1362 return -ENOBUFS; 1363 1364 v6_cork->opt->tot_len = sizeof(*opt); 1365 v6_cork->opt->opt_flen = opt->opt_flen; 1366 v6_cork->opt->opt_nflen = opt->opt_nflen; 1367 1368 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt, 1369 sk->sk_allocation); 1370 if (opt->dst0opt && !v6_cork->opt->dst0opt) 1371 return -ENOBUFS; 1372 1373 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt, 1374 sk->sk_allocation); 1375 if (opt->dst1opt && !v6_cork->opt->dst1opt) 1376 return -ENOBUFS; 1377 1378 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt, 1379 sk->sk_allocation); 1380 if (opt->hopopt && !v6_cork->opt->hopopt) 1381 return -ENOBUFS; 1382 1383 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt, 1384 sk->sk_allocation); 1385 if (opt->srcrt && !v6_cork->opt->srcrt) 1386 return -ENOBUFS; 1387 1388 /* need source address above miyazawa*/ 1389 } 1390 dst_hold(&rt->dst); 1391 cork->base.dst = &rt->dst; 1392 cork->fl.u.ip6 = *fl6; 1393 v6_cork->hop_limit = ipc6->hlimit; 1394 v6_cork->tclass = ipc6->tclass; 1395 if (rt->dst.flags & DST_XFRM_TUNNEL) 1396 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1397 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); 1398 else 1399 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1400 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); 1401 if (np->frag_size < mtu) { 1402 if (np->frag_size) 1403 mtu = np->frag_size; 1404 } 1405 if (mtu < IPV6_MIN_MTU) 1406 return -EINVAL; 1407 cork->base.fragsize = mtu; 1408 cork->base.gso_size = ipc6->gso_size; 1409 cork->base.tx_flags = 0; 1410 cork->base.mark = ipc6->sockc.mark; 1411 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); 1412 1413 if (dst_allfrag(xfrm_dst_path(&rt->dst))) 1414 cork->base.flags |= IPCORK_ALLFRAG; 1415 cork->base.length = 0; 1416 1417 cork->base.transmit_time = ipc6->sockc.transmit_time; 1418 1419 return 0; 1420 } 1421 1422 static int __ip6_append_data(struct sock *sk, 1423 struct flowi6 *fl6, 1424 struct sk_buff_head *queue, 1425 struct inet_cork *cork, 1426 struct inet6_cork *v6_cork, 1427 struct page_frag *pfrag, 1428 int getfrag(void *from, char *to, int offset, 1429 int len, int odd, struct sk_buff *skb), 1430 void *from, int length, int transhdrlen, 1431 unsigned int flags, struct ipcm6_cookie *ipc6) 1432 { 1433 struct sk_buff *skb, *skb_prev = NULL; 1434 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; 1435 struct ubuf_info *uarg = NULL; 1436 int exthdrlen = 0; 1437 int dst_exthdrlen = 0; 1438 int hh_len; 1439 int copy; 1440 int err; 1441 int offset = 0; 1442 u32 tskey = 0; 1443 struct rt6_info *rt = (struct rt6_info *)cork->dst; 1444 struct ipv6_txoptions *opt = v6_cork->opt; 1445 int csummode = CHECKSUM_NONE; 1446 unsigned int maxnonfragsize, headersize; 1447 unsigned int wmem_alloc_delta = 0; 1448 bool paged, extra_uref = false; 1449 1450 skb = skb_peek_tail(queue); 1451 if (!skb) { 1452 exthdrlen = opt ? opt->opt_flen : 0; 1453 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; 1454 } 1455 1456 paged = !!cork->gso_size; 1457 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize; 1458 orig_mtu = mtu; 1459 1460 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && 1461 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) 1462 tskey = sk->sk_tskey++; 1463 1464 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1465 1466 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1467 (opt ? opt->opt_nflen : 0); 1468 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - 1469 sizeof(struct frag_hdr); 1470 1471 headersize = sizeof(struct ipv6hdr) + 1472 (opt ? opt->opt_flen + opt->opt_nflen : 0) + 1473 (dst_allfrag(&rt->dst) ? 1474 sizeof(struct frag_hdr) : 0) + 1475 rt->rt6i_nfheader_len; 1476 1477 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit 1478 * the first fragment 1479 */ 1480 if (headersize + transhdrlen > mtu) 1481 goto emsgsize; 1482 1483 if (cork->length + length > mtu - headersize && ipc6->dontfrag && 1484 (sk->sk_protocol == IPPROTO_UDP || 1485 sk->sk_protocol == IPPROTO_RAW)) { 1486 ipv6_local_rxpmtu(sk, fl6, mtu - headersize + 1487 sizeof(struct ipv6hdr)); 1488 goto emsgsize; 1489 } 1490 1491 if (ip6_sk_ignore_df(sk)) 1492 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; 1493 else 1494 maxnonfragsize = mtu; 1495 1496 if (cork->length + length > maxnonfragsize - headersize) { 1497 emsgsize: 1498 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0); 1499 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu); 1500 return -EMSGSIZE; 1501 } 1502 1503 /* CHECKSUM_PARTIAL only with no extension headers and when 1504 * we are not going to fragment 1505 */ 1506 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && 1507 headersize == sizeof(struct ipv6hdr) && 1508 length <= mtu - headersize && 1509 (!(flags & MSG_MORE) || cork->gso_size) && 1510 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) 1511 csummode = CHECKSUM_PARTIAL; 1512 1513 if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) { 1514 uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb)); 1515 if (!uarg) 1516 return -ENOBUFS; 1517 extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ 1518 if (rt->dst.dev->features & NETIF_F_SG && 1519 csummode == CHECKSUM_PARTIAL) { 1520 paged = true; 1521 } else { 1522 uarg->zerocopy = 0; 1523 skb_zcopy_set(skb, uarg, &extra_uref); 1524 } 1525 } 1526 1527 /* 1528 * Let's try using as much space as possible. 1529 * Use MTU if total length of the message fits into the MTU. 1530 * Otherwise, we need to reserve fragment header and 1531 * fragment alignment (= 8-15 octects, in total). 1532 * 1533 * Note that we may need to "move" the data from the tail 1534 * of the buffer to the new fragment when we split 1535 * the message. 1536 * 1537 * FIXME: It may be fragmented into multiple chunks 1538 * at once if non-fragmentable extension headers 1539 * are too large. 1540 * --yoshfuji 1541 */ 1542 1543 cork->length += length; 1544 if (!skb) 1545 goto alloc_new_skb; 1546 1547 while (length > 0) { 1548 /* Check if the remaining data fits into current packet. */ 1549 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1550 if (copy < length) 1551 copy = maxfraglen - skb->len; 1552 1553 if (copy <= 0) { 1554 char *data; 1555 unsigned int datalen; 1556 unsigned int fraglen; 1557 unsigned int fraggap; 1558 unsigned int alloclen; 1559 unsigned int pagedlen; 1560 alloc_new_skb: 1561 /* There's no room in the current skb */ 1562 if (skb) 1563 fraggap = skb->len - maxfraglen; 1564 else 1565 fraggap = 0; 1566 /* update mtu and maxfraglen if necessary */ 1567 if (!skb || !skb_prev) 1568 ip6_append_data_mtu(&mtu, &maxfraglen, 1569 fragheaderlen, skb, rt, 1570 orig_mtu); 1571 1572 skb_prev = skb; 1573 1574 /* 1575 * If remaining data exceeds the mtu, 1576 * we know we need more fragment(s). 1577 */ 1578 datalen = length + fraggap; 1579 1580 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1581 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; 1582 fraglen = datalen + fragheaderlen; 1583 pagedlen = 0; 1584 1585 if ((flags & MSG_MORE) && 1586 !(rt->dst.dev->features&NETIF_F_SG)) 1587 alloclen = mtu; 1588 else if (!paged) 1589 alloclen = fraglen; 1590 else { 1591 alloclen = min_t(int, fraglen, MAX_HEADER); 1592 pagedlen = fraglen - alloclen; 1593 } 1594 1595 alloclen += dst_exthdrlen; 1596 1597 if (datalen != length + fraggap) { 1598 /* 1599 * this is not the last fragment, the trailer 1600 * space is regarded as data space. 1601 */ 1602 datalen += rt->dst.trailer_len; 1603 } 1604 1605 alloclen += rt->dst.trailer_len; 1606 fraglen = datalen + fragheaderlen; 1607 1608 /* 1609 * We just reserve space for fragment header. 1610 * Note: this may be overallocation if the message 1611 * (without MSG_MORE) fits into the MTU. 1612 */ 1613 alloclen += sizeof(struct frag_hdr); 1614 1615 copy = datalen - transhdrlen - fraggap - pagedlen; 1616 if (copy < 0) { 1617 err = -EINVAL; 1618 goto error; 1619 } 1620 if (transhdrlen) { 1621 skb = sock_alloc_send_skb(sk, 1622 alloclen + hh_len, 1623 (flags & MSG_DONTWAIT), &err); 1624 } else { 1625 skb = NULL; 1626 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <= 1627 2 * sk->sk_sndbuf) 1628 skb = alloc_skb(alloclen + hh_len, 1629 sk->sk_allocation); 1630 if (unlikely(!skb)) 1631 err = -ENOBUFS; 1632 } 1633 if (!skb) 1634 goto error; 1635 /* 1636 * Fill in the control structures 1637 */ 1638 skb->protocol = htons(ETH_P_IPV6); 1639 skb->ip_summed = csummode; 1640 skb->csum = 0; 1641 /* reserve for fragmentation and ipsec header */ 1642 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + 1643 dst_exthdrlen); 1644 1645 /* 1646 * Find where to start putting bytes 1647 */ 1648 data = skb_put(skb, fraglen - pagedlen); 1649 skb_set_network_header(skb, exthdrlen); 1650 data += fragheaderlen; 1651 skb->transport_header = (skb->network_header + 1652 fragheaderlen); 1653 if (fraggap) { 1654 skb->csum = skb_copy_and_csum_bits( 1655 skb_prev, maxfraglen, 1656 data + transhdrlen, fraggap); 1657 skb_prev->csum = csum_sub(skb_prev->csum, 1658 skb->csum); 1659 data += fraggap; 1660 pskb_trim_unique(skb_prev, maxfraglen); 1661 } 1662 if (copy > 0 && 1663 getfrag(from, data + transhdrlen, offset, 1664 copy, fraggap, skb) < 0) { 1665 err = -EFAULT; 1666 kfree_skb(skb); 1667 goto error; 1668 } 1669 1670 offset += copy; 1671 length -= copy + transhdrlen; 1672 transhdrlen = 0; 1673 exthdrlen = 0; 1674 dst_exthdrlen = 0; 1675 1676 /* Only the initial fragment is time stamped */ 1677 skb_shinfo(skb)->tx_flags = cork->tx_flags; 1678 cork->tx_flags = 0; 1679 skb_shinfo(skb)->tskey = tskey; 1680 tskey = 0; 1681 skb_zcopy_set(skb, uarg, &extra_uref); 1682 1683 if ((flags & MSG_CONFIRM) && !skb_prev) 1684 skb_set_dst_pending_confirm(skb, 1); 1685 1686 /* 1687 * Put the packet on the pending queue 1688 */ 1689 if (!skb->destructor) { 1690 skb->destructor = sock_wfree; 1691 skb->sk = sk; 1692 wmem_alloc_delta += skb->truesize; 1693 } 1694 __skb_queue_tail(queue, skb); 1695 continue; 1696 } 1697 1698 if (copy > length) 1699 copy = length; 1700 1701 if (!(rt->dst.dev->features&NETIF_F_SG) && 1702 skb_tailroom(skb) >= copy) { 1703 unsigned int off; 1704 1705 off = skb->len; 1706 if (getfrag(from, skb_put(skb, copy), 1707 offset, copy, off, skb) < 0) { 1708 __skb_trim(skb, off); 1709 err = -EFAULT; 1710 goto error; 1711 } 1712 } else if (!uarg || !uarg->zerocopy) { 1713 int i = skb_shinfo(skb)->nr_frags; 1714 1715 err = -ENOMEM; 1716 if (!sk_page_frag_refill(sk, pfrag)) 1717 goto error; 1718 1719 if (!skb_can_coalesce(skb, i, pfrag->page, 1720 pfrag->offset)) { 1721 err = -EMSGSIZE; 1722 if (i == MAX_SKB_FRAGS) 1723 goto error; 1724 1725 __skb_fill_page_desc(skb, i, pfrag->page, 1726 pfrag->offset, 0); 1727 skb_shinfo(skb)->nr_frags = ++i; 1728 get_page(pfrag->page); 1729 } 1730 copy = min_t(int, copy, pfrag->size - pfrag->offset); 1731 if (getfrag(from, 1732 page_address(pfrag->page) + pfrag->offset, 1733 offset, copy, skb->len, skb) < 0) 1734 goto error_efault; 1735 1736 pfrag->offset += copy; 1737 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); 1738 skb->len += copy; 1739 skb->data_len += copy; 1740 skb->truesize += copy; 1741 wmem_alloc_delta += copy; 1742 } else { 1743 err = skb_zerocopy_iter_dgram(skb, from, copy); 1744 if (err < 0) 1745 goto error; 1746 } 1747 offset += copy; 1748 length -= copy; 1749 } 1750 1751 if (wmem_alloc_delta) 1752 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 1753 return 0; 1754 1755 error_efault: 1756 err = -EFAULT; 1757 error: 1758 net_zcopy_put_abort(uarg, extra_uref); 1759 cork->length -= length; 1760 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1761 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 1762 return err; 1763 } 1764 1765 int ip6_append_data(struct sock *sk, 1766 int getfrag(void *from, char *to, int offset, int len, 1767 int odd, struct sk_buff *skb), 1768 void *from, int length, int transhdrlen, 1769 struct ipcm6_cookie *ipc6, struct flowi6 *fl6, 1770 struct rt6_info *rt, unsigned int flags) 1771 { 1772 struct inet_sock *inet = inet_sk(sk); 1773 struct ipv6_pinfo *np = inet6_sk(sk); 1774 int exthdrlen; 1775 int err; 1776 1777 if (flags&MSG_PROBE) 1778 return 0; 1779 if (skb_queue_empty(&sk->sk_write_queue)) { 1780 /* 1781 * setup for corking 1782 */ 1783 err = ip6_setup_cork(sk, &inet->cork, &np->cork, 1784 ipc6, rt, fl6); 1785 if (err) 1786 return err; 1787 1788 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 1789 length += exthdrlen; 1790 transhdrlen += exthdrlen; 1791 } else { 1792 fl6 = &inet->cork.fl.u.ip6; 1793 transhdrlen = 0; 1794 } 1795 1796 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base, 1797 &np->cork, sk_page_frag(sk), getfrag, 1798 from, length, transhdrlen, flags, ipc6); 1799 } 1800 EXPORT_SYMBOL_GPL(ip6_append_data); 1801 1802 static void ip6_cork_release(struct inet_cork_full *cork, 1803 struct inet6_cork *v6_cork) 1804 { 1805 if (v6_cork->opt) { 1806 kfree(v6_cork->opt->dst0opt); 1807 kfree(v6_cork->opt->dst1opt); 1808 kfree(v6_cork->opt->hopopt); 1809 kfree(v6_cork->opt->srcrt); 1810 kfree(v6_cork->opt); 1811 v6_cork->opt = NULL; 1812 } 1813 1814 if (cork->base.dst) { 1815 dst_release(cork->base.dst); 1816 cork->base.dst = NULL; 1817 cork->base.flags &= ~IPCORK_ALLFRAG; 1818 } 1819 memset(&cork->fl, 0, sizeof(cork->fl)); 1820 } 1821 1822 struct sk_buff *__ip6_make_skb(struct sock *sk, 1823 struct sk_buff_head *queue, 1824 struct inet_cork_full *cork, 1825 struct inet6_cork *v6_cork) 1826 { 1827 struct sk_buff *skb, *tmp_skb; 1828 struct sk_buff **tail_skb; 1829 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 1830 struct ipv6_pinfo *np = inet6_sk(sk); 1831 struct net *net = sock_net(sk); 1832 struct ipv6hdr *hdr; 1833 struct ipv6_txoptions *opt = v6_cork->opt; 1834 struct rt6_info *rt = (struct rt6_info *)cork->base.dst; 1835 struct flowi6 *fl6 = &cork->fl.u.ip6; 1836 unsigned char proto = fl6->flowi6_proto; 1837 1838 skb = __skb_dequeue(queue); 1839 if (!skb) 1840 goto out; 1841 tail_skb = &(skb_shinfo(skb)->frag_list); 1842 1843 /* move skb->data to ip header from ext header */ 1844 if (skb->data < skb_network_header(skb)) 1845 __skb_pull(skb, skb_network_offset(skb)); 1846 while ((tmp_skb = __skb_dequeue(queue)) != NULL) { 1847 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1848 *tail_skb = tmp_skb; 1849 tail_skb = &(tmp_skb->next); 1850 skb->len += tmp_skb->len; 1851 skb->data_len += tmp_skb->len; 1852 skb->truesize += tmp_skb->truesize; 1853 tmp_skb->destructor = NULL; 1854 tmp_skb->sk = NULL; 1855 } 1856 1857 /* Allow local fragmentation. */ 1858 skb->ignore_df = ip6_sk_ignore_df(sk); 1859 1860 *final_dst = fl6->daddr; 1861 __skb_pull(skb, skb_network_header_len(skb)); 1862 if (opt && opt->opt_flen) 1863 ipv6_push_frag_opts(skb, opt, &proto); 1864 if (opt && opt->opt_nflen) 1865 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr); 1866 1867 skb_push(skb, sizeof(struct ipv6hdr)); 1868 skb_reset_network_header(skb); 1869 hdr = ipv6_hdr(skb); 1870 1871 ip6_flow_hdr(hdr, v6_cork->tclass, 1872 ip6_make_flowlabel(net, skb, fl6->flowlabel, 1873 ip6_autoflowlabel(net, np), fl6)); 1874 hdr->hop_limit = v6_cork->hop_limit; 1875 hdr->nexthdr = proto; 1876 hdr->saddr = fl6->saddr; 1877 hdr->daddr = *final_dst; 1878 1879 skb->priority = sk->sk_priority; 1880 skb->mark = cork->base.mark; 1881 1882 skb->tstamp = cork->base.transmit_time; 1883 1884 skb_dst_set(skb, dst_clone(&rt->dst)); 1885 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 1886 if (proto == IPPROTO_ICMPV6) { 1887 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1888 1889 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); 1890 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 1891 } 1892 1893 ip6_cork_release(cork, v6_cork); 1894 out: 1895 return skb; 1896 } 1897 1898 int ip6_send_skb(struct sk_buff *skb) 1899 { 1900 struct net *net = sock_net(skb->sk); 1901 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 1902 int err; 1903 1904 err = ip6_local_out(net, skb->sk, skb); 1905 if (err) { 1906 if (err > 0) 1907 err = net_xmit_errno(err); 1908 if (err) 1909 IP6_INC_STATS(net, rt->rt6i_idev, 1910 IPSTATS_MIB_OUTDISCARDS); 1911 } 1912 1913 return err; 1914 } 1915 1916 int ip6_push_pending_frames(struct sock *sk) 1917 { 1918 struct sk_buff *skb; 1919 1920 skb = ip6_finish_skb(sk); 1921 if (!skb) 1922 return 0; 1923 1924 return ip6_send_skb(skb); 1925 } 1926 EXPORT_SYMBOL_GPL(ip6_push_pending_frames); 1927 1928 static void __ip6_flush_pending_frames(struct sock *sk, 1929 struct sk_buff_head *queue, 1930 struct inet_cork_full *cork, 1931 struct inet6_cork *v6_cork) 1932 { 1933 struct sk_buff *skb; 1934 1935 while ((skb = __skb_dequeue_tail(queue)) != NULL) { 1936 if (skb_dst(skb)) 1937 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 1938 IPSTATS_MIB_OUTDISCARDS); 1939 kfree_skb(skb); 1940 } 1941 1942 ip6_cork_release(cork, v6_cork); 1943 } 1944 1945 void ip6_flush_pending_frames(struct sock *sk) 1946 { 1947 __ip6_flush_pending_frames(sk, &sk->sk_write_queue, 1948 &inet_sk(sk)->cork, &inet6_sk(sk)->cork); 1949 } 1950 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); 1951 1952 struct sk_buff *ip6_make_skb(struct sock *sk, 1953 int getfrag(void *from, char *to, int offset, 1954 int len, int odd, struct sk_buff *skb), 1955 void *from, int length, int transhdrlen, 1956 struct ipcm6_cookie *ipc6, struct flowi6 *fl6, 1957 struct rt6_info *rt, unsigned int flags, 1958 struct inet_cork_full *cork) 1959 { 1960 struct inet6_cork v6_cork; 1961 struct sk_buff_head queue; 1962 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 1963 int err; 1964 1965 if (flags & MSG_PROBE) 1966 return NULL; 1967 1968 __skb_queue_head_init(&queue); 1969 1970 cork->base.flags = 0; 1971 cork->base.addr = 0; 1972 cork->base.opt = NULL; 1973 cork->base.dst = NULL; 1974 v6_cork.opt = NULL; 1975 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6); 1976 if (err) { 1977 ip6_cork_release(cork, &v6_cork); 1978 return ERR_PTR(err); 1979 } 1980 if (ipc6->dontfrag < 0) 1981 ipc6->dontfrag = inet6_sk(sk)->dontfrag; 1982 1983 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork, 1984 ¤t->task_frag, getfrag, from, 1985 length + exthdrlen, transhdrlen + exthdrlen, 1986 flags, ipc6); 1987 if (err) { 1988 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork); 1989 return ERR_PTR(err); 1990 } 1991 1992 return __ip6_make_skb(sk, &queue, cork, &v6_cork); 1993 } 1994