1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * IPv6 output functions 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on linux/net/ipv4/ip_output.c 10 * 11 * Changes: 12 * A.N.Kuznetsov : airthmetics in fragmentation. 13 * extension headers are implemented. 14 * route changes now work. 15 * ip6_forward does not confuse sniffers. 16 * etc. 17 * 18 * H. von Brand : Added missing #include <linux/string.h> 19 * Imran Patel : frag id should be in NBO 20 * Kazunori MIYAZAWA @USAGI 21 * : add ip6_append_data and related functions 22 * for datagram xmit 23 */ 24 25 #include <linux/errno.h> 26 #include <linux/kernel.h> 27 #include <linux/string.h> 28 #include <linux/socket.h> 29 #include <linux/net.h> 30 #include <linux/netdevice.h> 31 #include <linux/if_arp.h> 32 #include <linux/in6.h> 33 #include <linux/tcp.h> 34 #include <linux/route.h> 35 #include <linux/module.h> 36 #include <linux/slab.h> 37 38 #include <linux/bpf-cgroup.h> 39 #include <linux/netfilter.h> 40 #include <linux/netfilter_ipv6.h> 41 42 #include <net/sock.h> 43 #include <net/snmp.h> 44 45 #include <net/ipv6.h> 46 #include <net/ndisc.h> 47 #include <net/protocol.h> 48 #include <net/ip6_route.h> 49 #include <net/addrconf.h> 50 #include <net/rawv6.h> 51 #include <net/icmp.h> 52 #include <net/xfrm.h> 53 #include <net/checksum.h> 54 #include <linux/mroute6.h> 55 #include <net/l3mdev.h> 56 #include <net/lwtunnel.h> 57 #include <net/ip_tunnels.h> 58 59 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 60 { 61 struct dst_entry *dst = skb_dst(skb); 62 struct net_device *dev = dst->dev; 63 const struct in6_addr *nexthop; 64 struct neighbour *neigh; 65 int ret; 66 67 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 68 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 69 70 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && 71 ((mroute6_is_socket(net, skb) && 72 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 73 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 74 &ipv6_hdr(skb)->saddr))) { 75 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 76 77 /* Do not check for IFF_ALLMULTI; multicast routing 78 is not supported in any case. 79 */ 80 if (newskb) 81 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 82 net, sk, newskb, NULL, newskb->dev, 83 dev_loopback_xmit); 84 85 if (ipv6_hdr(skb)->hop_limit == 0) { 86 IP6_INC_STATS(net, idev, 87 IPSTATS_MIB_OUTDISCARDS); 88 kfree_skb(skb); 89 return 0; 90 } 91 } 92 93 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len); 94 95 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= 96 IPV6_ADDR_SCOPE_NODELOCAL && 97 !(dev->flags & IFF_LOOPBACK)) { 98 kfree_skb(skb); 99 return 0; 100 } 101 } 102 103 if (lwtunnel_xmit_redirect(dst->lwtstate)) { 104 int res = lwtunnel_xmit(skb); 105 106 if (res < 0 || res == LWTUNNEL_XMIT_DONE) 107 return res; 108 } 109 110 rcu_read_lock_bh(); 111 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); 112 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); 113 if (unlikely(!neigh)) 114 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); 115 if (!IS_ERR(neigh)) { 116 sock_confirm_neigh(skb, neigh); 117 ret = neigh_output(neigh, skb, false); 118 rcu_read_unlock_bh(); 119 return ret; 120 } 121 rcu_read_unlock_bh(); 122 123 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 124 kfree_skb(skb); 125 return -EINVAL; 126 } 127 128 static int 129 ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, 130 struct sk_buff *skb, unsigned int mtu) 131 { 132 struct sk_buff *segs, *nskb; 133 netdev_features_t features; 134 int ret = 0; 135 136 /* Please see corresponding comment in ip_finish_output_gso 137 * describing the cases where GSO segment length exceeds the 138 * egress MTU. 139 */ 140 features = netif_skb_features(skb); 141 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); 142 if (IS_ERR_OR_NULL(segs)) { 143 kfree_skb(skb); 144 return -ENOMEM; 145 } 146 147 consume_skb(skb); 148 149 skb_list_walk_safe(segs, segs, nskb) { 150 int err; 151 152 skb_mark_not_on_list(segs); 153 err = ip6_fragment(net, sk, segs, ip6_finish_output2); 154 if (err && ret == 0) 155 ret = err; 156 } 157 158 return ret; 159 } 160 161 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 162 { 163 unsigned int mtu; 164 165 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 166 /* Policy lookup after SNAT yielded a new policy */ 167 if (skb_dst(skb)->xfrm) { 168 IPCB(skb)->flags |= IPSKB_REROUTED; 169 return dst_output(net, sk, skb); 170 } 171 #endif 172 173 mtu = ip6_skb_dst_mtu(skb); 174 if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu)) 175 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); 176 177 if ((skb->len > mtu && !skb_is_gso(skb)) || 178 dst_allfrag(skb_dst(skb)) || 179 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) 180 return ip6_fragment(net, sk, skb, ip6_finish_output2); 181 else 182 return ip6_finish_output2(net, sk, skb); 183 } 184 185 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 186 { 187 int ret; 188 189 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); 190 switch (ret) { 191 case NET_XMIT_SUCCESS: 192 return __ip6_finish_output(net, sk, skb); 193 case NET_XMIT_CN: 194 return __ip6_finish_output(net, sk, skb) ? : ret; 195 default: 196 kfree_skb(skb); 197 return ret; 198 } 199 } 200 201 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 202 { 203 struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; 204 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 205 206 skb->protocol = htons(ETH_P_IPV6); 207 skb->dev = dev; 208 209 if (unlikely(idev->cnf.disable_ipv6)) { 210 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 211 kfree_skb(skb); 212 return 0; 213 } 214 215 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, 216 net, sk, skb, indev, dev, 217 ip6_finish_output, 218 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 219 } 220 EXPORT_SYMBOL(ip6_output); 221 222 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) 223 { 224 if (!np->autoflowlabel_set) 225 return ip6_default_np_autolabel(net); 226 else 227 return np->autoflowlabel; 228 } 229 230 /* 231 * xmit an sk_buff (used by TCP, SCTP and DCCP) 232 * Note : socket lock is not held for SYNACK packets, but might be modified 233 * by calls to skb_set_owner_w() and ipv6_local_error(), 234 * which are using proper atomic operations or spinlocks. 235 */ 236 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, 237 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority) 238 { 239 struct net *net = sock_net(sk); 240 const struct ipv6_pinfo *np = inet6_sk(sk); 241 struct in6_addr *first_hop = &fl6->daddr; 242 struct dst_entry *dst = skb_dst(skb); 243 unsigned int head_room; 244 struct ipv6hdr *hdr; 245 u8 proto = fl6->flowi6_proto; 246 int seg_len = skb->len; 247 int hlimit = -1; 248 u32 mtu; 249 250 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); 251 if (opt) 252 head_room += opt->opt_nflen + opt->opt_flen; 253 254 if (unlikely(skb_headroom(skb) < head_room)) { 255 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 256 if (!skb2) { 257 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 258 IPSTATS_MIB_OUTDISCARDS); 259 kfree_skb(skb); 260 return -ENOBUFS; 261 } 262 if (skb->sk) 263 skb_set_owner_w(skb2, skb->sk); 264 consume_skb(skb); 265 skb = skb2; 266 } 267 268 if (opt) { 269 seg_len += opt->opt_nflen + opt->opt_flen; 270 271 if (opt->opt_flen) 272 ipv6_push_frag_opts(skb, opt, &proto); 273 274 if (opt->opt_nflen) 275 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, 276 &fl6->saddr); 277 } 278 279 skb_push(skb, sizeof(struct ipv6hdr)); 280 skb_reset_network_header(skb); 281 hdr = ipv6_hdr(skb); 282 283 /* 284 * Fill in the IPv6 header 285 */ 286 if (np) 287 hlimit = np->hop_limit; 288 if (hlimit < 0) 289 hlimit = ip6_dst_hoplimit(dst); 290 291 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, 292 ip6_autoflowlabel(net, np), fl6)); 293 294 hdr->payload_len = htons(seg_len); 295 hdr->nexthdr = proto; 296 hdr->hop_limit = hlimit; 297 298 hdr->saddr = fl6->saddr; 299 hdr->daddr = *first_hop; 300 301 skb->protocol = htons(ETH_P_IPV6); 302 skb->priority = priority; 303 skb->mark = mark; 304 305 mtu = dst_mtu(dst); 306 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { 307 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), 308 IPSTATS_MIB_OUT, skb->len); 309 310 /* if egress device is enslaved to an L3 master device pass the 311 * skb to its handler for processing 312 */ 313 skb = l3mdev_ip6_out((struct sock *)sk, skb); 314 if (unlikely(!skb)) 315 return 0; 316 317 /* hooks should never assume socket lock is held. 318 * we promote our socket to non const 319 */ 320 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, 321 net, (struct sock *)sk, skb, NULL, dst->dev, 322 dst_output); 323 } 324 325 skb->dev = dst->dev; 326 /* ipv6_local_error() does not require socket lock, 327 * we promote our socket to non const 328 */ 329 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); 330 331 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); 332 kfree_skb(skb); 333 return -EMSGSIZE; 334 } 335 EXPORT_SYMBOL(ip6_xmit); 336 337 static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 338 { 339 struct ip6_ra_chain *ra; 340 struct sock *last = NULL; 341 342 read_lock(&ip6_ra_lock); 343 for (ra = ip6_ra_chain; ra; ra = ra->next) { 344 struct sock *sk = ra->sk; 345 if (sk && ra->sel == sel && 346 (!sk->sk_bound_dev_if || 347 sk->sk_bound_dev_if == skb->dev->ifindex)) { 348 struct ipv6_pinfo *np = inet6_sk(sk); 349 350 if (np && np->rtalert_isolate && 351 !net_eq(sock_net(sk), dev_net(skb->dev))) { 352 continue; 353 } 354 if (last) { 355 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 356 if (skb2) 357 rawv6_rcv(last, skb2); 358 } 359 last = sk; 360 } 361 } 362 363 if (last) { 364 rawv6_rcv(last, skb); 365 read_unlock(&ip6_ra_lock); 366 return 1; 367 } 368 read_unlock(&ip6_ra_lock); 369 return 0; 370 } 371 372 static int ip6_forward_proxy_check(struct sk_buff *skb) 373 { 374 struct ipv6hdr *hdr = ipv6_hdr(skb); 375 u8 nexthdr = hdr->nexthdr; 376 __be16 frag_off; 377 int offset; 378 379 if (ipv6_ext_hdr(nexthdr)) { 380 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); 381 if (offset < 0) 382 return 0; 383 } else 384 offset = sizeof(struct ipv6hdr); 385 386 if (nexthdr == IPPROTO_ICMPV6) { 387 struct icmp6hdr *icmp6; 388 389 if (!pskb_may_pull(skb, (skb_network_header(skb) + 390 offset + 1 - skb->data))) 391 return 0; 392 393 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 394 395 switch (icmp6->icmp6_type) { 396 case NDISC_ROUTER_SOLICITATION: 397 case NDISC_ROUTER_ADVERTISEMENT: 398 case NDISC_NEIGHBOUR_SOLICITATION: 399 case NDISC_NEIGHBOUR_ADVERTISEMENT: 400 case NDISC_REDIRECT: 401 /* For reaction involving unicast neighbor discovery 402 * message destined to the proxied address, pass it to 403 * input function. 404 */ 405 return 1; 406 default: 407 break; 408 } 409 } 410 411 /* 412 * The proxying router can't forward traffic sent to a link-local 413 * address, so signal the sender and discard the packet. This 414 * behavior is clarified by the MIPv6 specification. 415 */ 416 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 417 dst_link_failure(skb); 418 return -1; 419 } 420 421 return 0; 422 } 423 424 static inline int ip6_forward_finish(struct net *net, struct sock *sk, 425 struct sk_buff *skb) 426 { 427 struct dst_entry *dst = skb_dst(skb); 428 429 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 430 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); 431 432 #ifdef CONFIG_NET_SWITCHDEV 433 if (skb->offload_l3_fwd_mark) { 434 consume_skb(skb); 435 return 0; 436 } 437 #endif 438 439 skb->tstamp = 0; 440 return dst_output(net, sk, skb); 441 } 442 443 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) 444 { 445 if (skb->len <= mtu) 446 return false; 447 448 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ 449 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) 450 return true; 451 452 if (skb->ignore_df) 453 return false; 454 455 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) 456 return false; 457 458 return true; 459 } 460 461 int ip6_forward(struct sk_buff *skb) 462 { 463 struct inet6_dev *idev = __in6_dev_get_safely(skb->dev); 464 struct dst_entry *dst = skb_dst(skb); 465 struct ipv6hdr *hdr = ipv6_hdr(skb); 466 struct inet6_skb_parm *opt = IP6CB(skb); 467 struct net *net = dev_net(dst->dev); 468 u32 mtu; 469 470 if (net->ipv6.devconf_all->forwarding == 0) 471 goto error; 472 473 if (skb->pkt_type != PACKET_HOST) 474 goto drop; 475 476 if (unlikely(skb->sk)) 477 goto drop; 478 479 if (skb_warn_if_lro(skb)) 480 goto drop; 481 482 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 483 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 484 goto drop; 485 } 486 487 skb_forward_csum(skb); 488 489 /* 490 * We DO NOT make any processing on 491 * RA packets, pushing them to user level AS IS 492 * without ane WARRANTY that application will be able 493 * to interpret them. The reason is that we 494 * cannot make anything clever here. 495 * 496 * We are not end-node, so that if packet contains 497 * AH/ESP, we cannot make anything. 498 * Defragmentation also would be mistake, RA packets 499 * cannot be fragmented, because there is no warranty 500 * that different fragments will go along one path. --ANK 501 */ 502 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { 503 if (ip6_call_ra_chain(skb, ntohs(opt->ra))) 504 return 0; 505 } 506 507 /* 508 * check and decrement ttl 509 */ 510 if (hdr->hop_limit <= 1) { 511 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 512 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); 513 514 kfree_skb(skb); 515 return -ETIMEDOUT; 516 } 517 518 /* XXX: idev->cnf.proxy_ndp? */ 519 if (net->ipv6.devconf_all->proxy_ndp && 520 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 521 int proxied = ip6_forward_proxy_check(skb); 522 if (proxied > 0) 523 return ip6_input(skb); 524 else if (proxied < 0) { 525 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 526 goto drop; 527 } 528 } 529 530 if (!xfrm6_route_forward(skb)) { 531 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 532 goto drop; 533 } 534 dst = skb_dst(skb); 535 536 /* IPv6 specs say nothing about it, but it is clear that we cannot 537 send redirects to source routed frames. 538 We don't send redirects to frames decapsulated from IPsec. 539 */ 540 if (IP6CB(skb)->iif == dst->dev->ifindex && 541 opt->srcrt == 0 && !skb_sec_path(skb)) { 542 struct in6_addr *target = NULL; 543 struct inet_peer *peer; 544 struct rt6_info *rt; 545 546 /* 547 * incoming and outgoing devices are the same 548 * send a redirect. 549 */ 550 551 rt = (struct rt6_info *) dst; 552 if (rt->rt6i_flags & RTF_GATEWAY) 553 target = &rt->rt6i_gateway; 554 else 555 target = &hdr->daddr; 556 557 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1); 558 559 /* Limit redirects both by destination (here) 560 and by source (inside ndisc_send_redirect) 561 */ 562 if (inet_peer_xrlim_allow(peer, 1*HZ)) 563 ndisc_send_redirect(skb, target); 564 if (peer) 565 inet_putpeer(peer); 566 } else { 567 int addrtype = ipv6_addr_type(&hdr->saddr); 568 569 /* This check is security critical. */ 570 if (addrtype == IPV6_ADDR_ANY || 571 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 572 goto error; 573 if (addrtype & IPV6_ADDR_LINKLOCAL) { 574 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 575 ICMPV6_NOT_NEIGHBOUR, 0); 576 goto error; 577 } 578 } 579 580 mtu = ip6_dst_mtu_forward(dst); 581 if (mtu < IPV6_MIN_MTU) 582 mtu = IPV6_MIN_MTU; 583 584 if (ip6_pkt_too_big(skb, mtu)) { 585 /* Again, force OUTPUT device used as source address */ 586 skb->dev = dst->dev; 587 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 588 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS); 589 __IP6_INC_STATS(net, ip6_dst_idev(dst), 590 IPSTATS_MIB_FRAGFAILS); 591 kfree_skb(skb); 592 return -EMSGSIZE; 593 } 594 595 if (skb_cow(skb, dst->dev->hard_header_len)) { 596 __IP6_INC_STATS(net, ip6_dst_idev(dst), 597 IPSTATS_MIB_OUTDISCARDS); 598 goto drop; 599 } 600 601 hdr = ipv6_hdr(skb); 602 603 /* Mangling hops number delayed to point after skb COW */ 604 605 hdr->hop_limit--; 606 607 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 608 net, NULL, skb, skb->dev, dst->dev, 609 ip6_forward_finish); 610 611 error: 612 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); 613 drop: 614 kfree_skb(skb); 615 return -EINVAL; 616 } 617 618 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 619 { 620 to->pkt_type = from->pkt_type; 621 to->priority = from->priority; 622 to->protocol = from->protocol; 623 skb_dst_drop(to); 624 skb_dst_set(to, dst_clone(skb_dst(from))); 625 to->dev = from->dev; 626 to->mark = from->mark; 627 628 skb_copy_hash(to, from); 629 630 #ifdef CONFIG_NET_SCHED 631 to->tc_index = from->tc_index; 632 #endif 633 nf_copy(to, from); 634 skb_ext_copy(to, from); 635 skb_copy_secmark(to, from); 636 } 637 638 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr, 639 u8 nexthdr, __be32 frag_id, 640 struct ip6_fraglist_iter *iter) 641 { 642 unsigned int first_len; 643 struct frag_hdr *fh; 644 645 /* BUILD HEADER */ 646 *prevhdr = NEXTHDR_FRAGMENT; 647 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 648 if (!iter->tmp_hdr) 649 return -ENOMEM; 650 651 iter->frag = skb_shinfo(skb)->frag_list; 652 skb_frag_list_init(skb); 653 654 iter->offset = 0; 655 iter->hlen = hlen; 656 iter->frag_id = frag_id; 657 iter->nexthdr = nexthdr; 658 659 __skb_pull(skb, hlen); 660 fh = __skb_push(skb, sizeof(struct frag_hdr)); 661 __skb_push(skb, hlen); 662 skb_reset_network_header(skb); 663 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen); 664 665 fh->nexthdr = nexthdr; 666 fh->reserved = 0; 667 fh->frag_off = htons(IP6_MF); 668 fh->identification = frag_id; 669 670 first_len = skb_pagelen(skb); 671 skb->data_len = first_len - skb_headlen(skb); 672 skb->len = first_len; 673 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); 674 675 return 0; 676 } 677 EXPORT_SYMBOL(ip6_fraglist_init); 678 679 void ip6_fraglist_prepare(struct sk_buff *skb, 680 struct ip6_fraglist_iter *iter) 681 { 682 struct sk_buff *frag = iter->frag; 683 unsigned int hlen = iter->hlen; 684 struct frag_hdr *fh; 685 686 frag->ip_summed = CHECKSUM_NONE; 687 skb_reset_transport_header(frag); 688 fh = __skb_push(frag, sizeof(struct frag_hdr)); 689 __skb_push(frag, hlen); 690 skb_reset_network_header(frag); 691 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen); 692 iter->offset += skb->len - hlen - sizeof(struct frag_hdr); 693 fh->nexthdr = iter->nexthdr; 694 fh->reserved = 0; 695 fh->frag_off = htons(iter->offset); 696 if (frag->next) 697 fh->frag_off |= htons(IP6_MF); 698 fh->identification = iter->frag_id; 699 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 700 ip6_copy_metadata(frag, skb); 701 } 702 EXPORT_SYMBOL(ip6_fraglist_prepare); 703 704 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu, 705 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr, 706 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state) 707 { 708 state->prevhdr = prevhdr; 709 state->nexthdr = nexthdr; 710 state->frag_id = frag_id; 711 712 state->hlen = hlen; 713 state->mtu = mtu; 714 715 state->left = skb->len - hlen; /* Space per frame */ 716 state->ptr = hlen; /* Where to start from */ 717 718 state->hroom = hdr_room; 719 state->troom = needed_tailroom; 720 721 state->offset = 0; 722 } 723 EXPORT_SYMBOL(ip6_frag_init); 724 725 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state) 726 { 727 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset; 728 struct sk_buff *frag; 729 struct frag_hdr *fh; 730 unsigned int len; 731 732 len = state->left; 733 /* IF: it doesn't fit, use 'mtu' - the data space left */ 734 if (len > state->mtu) 735 len = state->mtu; 736 /* IF: we are not sending up to and including the packet end 737 then align the next start on an eight byte boundary */ 738 if (len < state->left) 739 len &= ~7; 740 741 /* Allocate buffer */ 742 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) + 743 state->hroom + state->troom, GFP_ATOMIC); 744 if (!frag) 745 return ERR_PTR(-ENOMEM); 746 747 /* 748 * Set up data on packet 749 */ 750 751 ip6_copy_metadata(frag, skb); 752 skb_reserve(frag, state->hroom); 753 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr)); 754 skb_reset_network_header(frag); 755 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen); 756 frag->transport_header = (frag->network_header + state->hlen + 757 sizeof(struct frag_hdr)); 758 759 /* 760 * Charge the memory for the fragment to any owner 761 * it might possess 762 */ 763 if (skb->sk) 764 skb_set_owner_w(frag, skb->sk); 765 766 /* 767 * Copy the packet header into the new buffer. 768 */ 769 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen); 770 771 fragnexthdr_offset = skb_network_header(frag); 772 fragnexthdr_offset += prevhdr - skb_network_header(skb); 773 *fragnexthdr_offset = NEXTHDR_FRAGMENT; 774 775 /* 776 * Build fragment header. 777 */ 778 fh->nexthdr = state->nexthdr; 779 fh->reserved = 0; 780 fh->identification = state->frag_id; 781 782 /* 783 * Copy a block of the IP datagram. 784 */ 785 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag), 786 len)); 787 state->left -= len; 788 789 fh->frag_off = htons(state->offset); 790 if (state->left > 0) 791 fh->frag_off |= htons(IP6_MF); 792 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 793 794 state->ptr += len; 795 state->offset += len; 796 797 return frag; 798 } 799 EXPORT_SYMBOL(ip6_frag_next); 800 801 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 802 int (*output)(struct net *, struct sock *, struct sk_buff *)) 803 { 804 struct sk_buff *frag; 805 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 806 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? 807 inet6_sk(skb->sk) : NULL; 808 struct ip6_frag_state state; 809 unsigned int mtu, hlen, nexthdr_offset; 810 ktime_t tstamp = skb->tstamp; 811 int hroom, err = 0; 812 __be32 frag_id; 813 u8 *prevhdr, nexthdr = 0; 814 815 err = ip6_find_1stfragopt(skb, &prevhdr); 816 if (err < 0) 817 goto fail; 818 hlen = err; 819 nexthdr = *prevhdr; 820 nexthdr_offset = prevhdr - skb_network_header(skb); 821 822 mtu = ip6_skb_dst_mtu(skb); 823 824 /* We must not fragment if the socket is set to force MTU discovery 825 * or if the skb it not generated by a local socket. 826 */ 827 if (unlikely(!skb->ignore_df && skb->len > mtu)) 828 goto fail_toobig; 829 830 if (IP6CB(skb)->frag_max_size) { 831 if (IP6CB(skb)->frag_max_size > mtu) 832 goto fail_toobig; 833 834 /* don't send fragments larger than what we received */ 835 mtu = IP6CB(skb)->frag_max_size; 836 if (mtu < IPV6_MIN_MTU) 837 mtu = IPV6_MIN_MTU; 838 } 839 840 if (np && np->frag_size < mtu) { 841 if (np->frag_size) 842 mtu = np->frag_size; 843 } 844 if (mtu < hlen + sizeof(struct frag_hdr) + 8) 845 goto fail_toobig; 846 mtu -= hlen + sizeof(struct frag_hdr); 847 848 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, 849 &ipv6_hdr(skb)->saddr); 850 851 if (skb->ip_summed == CHECKSUM_PARTIAL && 852 (err = skb_checksum_help(skb))) 853 goto fail; 854 855 prevhdr = skb_network_header(skb) + nexthdr_offset; 856 hroom = LL_RESERVED_SPACE(rt->dst.dev); 857 if (skb_has_frag_list(skb)) { 858 unsigned int first_len = skb_pagelen(skb); 859 struct ip6_fraglist_iter iter; 860 struct sk_buff *frag2; 861 862 if (first_len - hlen > mtu || 863 ((first_len - hlen) & 7) || 864 skb_cloned(skb) || 865 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) 866 goto slow_path; 867 868 skb_walk_frags(skb, frag) { 869 /* Correct geometry. */ 870 if (frag->len > mtu || 871 ((frag->len & 7) && frag->next) || 872 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr))) 873 goto slow_path_clean; 874 875 /* Partially cloned skb? */ 876 if (skb_shared(frag)) 877 goto slow_path_clean; 878 879 BUG_ON(frag->sk); 880 if (skb->sk) { 881 frag->sk = skb->sk; 882 frag->destructor = sock_wfree; 883 } 884 skb->truesize -= frag->truesize; 885 } 886 887 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id, 888 &iter); 889 if (err < 0) 890 goto fail; 891 892 for (;;) { 893 /* Prepare header of the next frame, 894 * before previous one went down. */ 895 if (iter.frag) 896 ip6_fraglist_prepare(skb, &iter); 897 898 skb->tstamp = tstamp; 899 err = output(net, sk, skb); 900 if (!err) 901 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 902 IPSTATS_MIB_FRAGCREATES); 903 904 if (err || !iter.frag) 905 break; 906 907 skb = ip6_fraglist_next(&iter); 908 } 909 910 kfree(iter.tmp_hdr); 911 912 if (err == 0) { 913 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 914 IPSTATS_MIB_FRAGOKS); 915 return 0; 916 } 917 918 kfree_skb_list(iter.frag); 919 920 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 921 IPSTATS_MIB_FRAGFAILS); 922 return err; 923 924 slow_path_clean: 925 skb_walk_frags(skb, frag2) { 926 if (frag2 == frag) 927 break; 928 frag2->sk = NULL; 929 frag2->destructor = NULL; 930 skb->truesize += frag2->truesize; 931 } 932 } 933 934 slow_path: 935 /* 936 * Fragment the datagram. 937 */ 938 939 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom, 940 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id, 941 &state); 942 943 /* 944 * Keep copying data until we run out. 945 */ 946 947 while (state.left > 0) { 948 frag = ip6_frag_next(skb, &state); 949 if (IS_ERR(frag)) { 950 err = PTR_ERR(frag); 951 goto fail; 952 } 953 954 /* 955 * Put this fragment into the sending queue. 956 */ 957 frag->tstamp = tstamp; 958 err = output(net, sk, frag); 959 if (err) 960 goto fail; 961 962 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 963 IPSTATS_MIB_FRAGCREATES); 964 } 965 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 966 IPSTATS_MIB_FRAGOKS); 967 consume_skb(skb); 968 return err; 969 970 fail_toobig: 971 if (skb->sk && dst_allfrag(skb_dst(skb))) 972 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); 973 974 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 975 err = -EMSGSIZE; 976 977 fail: 978 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 979 IPSTATS_MIB_FRAGFAILS); 980 kfree_skb(skb); 981 return err; 982 } 983 984 static inline int ip6_rt_check(const struct rt6key *rt_key, 985 const struct in6_addr *fl_addr, 986 const struct in6_addr *addr_cache) 987 { 988 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 989 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache)); 990 } 991 992 static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 993 struct dst_entry *dst, 994 const struct flowi6 *fl6) 995 { 996 struct ipv6_pinfo *np = inet6_sk(sk); 997 struct rt6_info *rt; 998 999 if (!dst) 1000 goto out; 1001 1002 if (dst->ops->family != AF_INET6) { 1003 dst_release(dst); 1004 return NULL; 1005 } 1006 1007 rt = (struct rt6_info *)dst; 1008 /* Yes, checking route validity in not connected 1009 * case is not very simple. Take into account, 1010 * that we do not support routing by source, TOS, 1011 * and MSG_DONTROUTE --ANK (980726) 1012 * 1013 * 1. ip6_rt_check(): If route was host route, 1014 * check that cached destination is current. 1015 * If it is network route, we still may 1016 * check its validity using saved pointer 1017 * to the last used address: daddr_cache. 1018 * We do not want to save whole address now, 1019 * (because main consumer of this service 1020 * is tcp, which has not this problem), 1021 * so that the last trick works only on connected 1022 * sockets. 1023 * 2. oif also should be the same. 1024 */ 1025 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || 1026 #ifdef CONFIG_IPV6_SUBTREES 1027 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || 1028 #endif 1029 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) && 1030 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) { 1031 dst_release(dst); 1032 dst = NULL; 1033 } 1034 1035 out: 1036 return dst; 1037 } 1038 1039 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, 1040 struct dst_entry **dst, struct flowi6 *fl6) 1041 { 1042 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 1043 struct neighbour *n; 1044 struct rt6_info *rt; 1045 #endif 1046 int err; 1047 int flags = 0; 1048 1049 /* The correct way to handle this would be to do 1050 * ip6_route_get_saddr, and then ip6_route_output; however, 1051 * the route-specific preferred source forces the 1052 * ip6_route_output call _before_ ip6_route_get_saddr. 1053 * 1054 * In source specific routing (no src=any default route), 1055 * ip6_route_output will fail given src=any saddr, though, so 1056 * that's why we try it again later. 1057 */ 1058 if (ipv6_addr_any(&fl6->saddr)) { 1059 struct fib6_info *from; 1060 struct rt6_info *rt; 1061 1062 *dst = ip6_route_output(net, sk, fl6); 1063 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; 1064 1065 rcu_read_lock(); 1066 from = rt ? rcu_dereference(rt->from) : NULL; 1067 err = ip6_route_get_saddr(net, from, &fl6->daddr, 1068 sk ? inet6_sk(sk)->srcprefs : 0, 1069 &fl6->saddr); 1070 rcu_read_unlock(); 1071 1072 if (err) 1073 goto out_err_release; 1074 1075 /* If we had an erroneous initial result, pretend it 1076 * never existed and let the SA-enabled version take 1077 * over. 1078 */ 1079 if ((*dst)->error) { 1080 dst_release(*dst); 1081 *dst = NULL; 1082 } 1083 1084 if (fl6->flowi6_oif) 1085 flags |= RT6_LOOKUP_F_IFACE; 1086 } 1087 1088 if (!*dst) 1089 *dst = ip6_route_output_flags(net, sk, fl6, flags); 1090 1091 err = (*dst)->error; 1092 if (err) 1093 goto out_err_release; 1094 1095 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 1096 /* 1097 * Here if the dst entry we've looked up 1098 * has a neighbour entry that is in the INCOMPLETE 1099 * state and the src address from the flow is 1100 * marked as OPTIMISTIC, we release the found 1101 * dst entry and replace it instead with the 1102 * dst entry of the nexthop router 1103 */ 1104 rt = (struct rt6_info *) *dst; 1105 rcu_read_lock_bh(); 1106 n = __ipv6_neigh_lookup_noref(rt->dst.dev, 1107 rt6_nexthop(rt, &fl6->daddr)); 1108 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; 1109 rcu_read_unlock_bh(); 1110 1111 if (err) { 1112 struct inet6_ifaddr *ifp; 1113 struct flowi6 fl_gw6; 1114 int redirect; 1115 1116 ifp = ipv6_get_ifaddr(net, &fl6->saddr, 1117 (*dst)->dev, 1); 1118 1119 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 1120 if (ifp) 1121 in6_ifa_put(ifp); 1122 1123 if (redirect) { 1124 /* 1125 * We need to get the dst entry for the 1126 * default router instead 1127 */ 1128 dst_release(*dst); 1129 memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); 1130 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); 1131 *dst = ip6_route_output(net, sk, &fl_gw6); 1132 err = (*dst)->error; 1133 if (err) 1134 goto out_err_release; 1135 } 1136 } 1137 #endif 1138 if (ipv6_addr_v4mapped(&fl6->saddr) && 1139 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) { 1140 err = -EAFNOSUPPORT; 1141 goto out_err_release; 1142 } 1143 1144 return 0; 1145 1146 out_err_release: 1147 dst_release(*dst); 1148 *dst = NULL; 1149 1150 if (err == -ENETUNREACH) 1151 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); 1152 return err; 1153 } 1154 1155 /** 1156 * ip6_dst_lookup - perform route lookup on flow 1157 * @net: Network namespace to perform lookup in 1158 * @sk: socket which provides route info 1159 * @dst: pointer to dst_entry * for result 1160 * @fl6: flow to lookup 1161 * 1162 * This function performs a route lookup on the given flow. 1163 * 1164 * It returns zero on success, or a standard errno code on error. 1165 */ 1166 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, 1167 struct flowi6 *fl6) 1168 { 1169 *dst = NULL; 1170 return ip6_dst_lookup_tail(net, sk, dst, fl6); 1171 } 1172 EXPORT_SYMBOL_GPL(ip6_dst_lookup); 1173 1174 /** 1175 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec 1176 * @net: Network namespace to perform lookup in 1177 * @sk: socket which provides route info 1178 * @fl6: flow to lookup 1179 * @final_dst: final destination address for ipsec lookup 1180 * 1181 * This function performs a route lookup on the given flow. 1182 * 1183 * It returns a valid dst pointer on success, or a pointer encoded 1184 * error code. 1185 */ 1186 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6, 1187 const struct in6_addr *final_dst) 1188 { 1189 struct dst_entry *dst = NULL; 1190 int err; 1191 1192 err = ip6_dst_lookup_tail(net, sk, &dst, fl6); 1193 if (err) 1194 return ERR_PTR(err); 1195 if (final_dst) 1196 fl6->daddr = *final_dst; 1197 1198 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0); 1199 } 1200 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); 1201 1202 /** 1203 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow 1204 * @sk: socket which provides the dst cache and route info 1205 * @fl6: flow to lookup 1206 * @final_dst: final destination address for ipsec lookup 1207 * @connected: whether @sk is connected or not 1208 * 1209 * This function performs a route lookup on the given flow with the 1210 * possibility of using the cached route in the socket if it is valid. 1211 * It will take the socket dst lock when operating on the dst cache. 1212 * As a result, this function can only be used in process context. 1213 * 1214 * In addition, for a connected socket, cache the dst in the socket 1215 * if the current cache is not valid. 1216 * 1217 * It returns a valid dst pointer on success, or a pointer encoded 1218 * error code. 1219 */ 1220 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 1221 const struct in6_addr *final_dst, 1222 bool connected) 1223 { 1224 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1225 1226 dst = ip6_sk_dst_check(sk, dst, fl6); 1227 if (dst) 1228 return dst; 1229 1230 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst); 1231 if (connected && !IS_ERR(dst)) 1232 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6); 1233 1234 return dst; 1235 } 1236 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); 1237 1238 /** 1239 * ip6_dst_lookup_tunnel - perform route lookup on tunnel 1240 * @skb: Packet for which lookup is done 1241 * @dev: Tunnel device 1242 * @net: Network namespace of tunnel device 1243 * @sock: Socket which provides route info 1244 * @saddr: Memory to store the src ip address 1245 * @info: Tunnel information 1246 * @protocol: IP protocol 1247 * @use_cache: Flag to enable cache usage 1248 * This function performs a route lookup on a tunnel 1249 * 1250 * It returns a valid dst pointer and stores src address to be used in 1251 * tunnel in param saddr on success, else a pointer encoded error code. 1252 */ 1253 1254 struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, 1255 struct net_device *dev, 1256 struct net *net, 1257 struct socket *sock, 1258 struct in6_addr *saddr, 1259 const struct ip_tunnel_info *info, 1260 u8 protocol, 1261 bool use_cache) 1262 { 1263 struct dst_entry *dst = NULL; 1264 #ifdef CONFIG_DST_CACHE 1265 struct dst_cache *dst_cache; 1266 #endif 1267 struct flowi6 fl6; 1268 __u8 prio; 1269 1270 #ifdef CONFIG_DST_CACHE 1271 dst_cache = (struct dst_cache *)&info->dst_cache; 1272 if (use_cache) { 1273 dst = dst_cache_get_ip6(dst_cache, saddr); 1274 if (dst) 1275 return dst; 1276 } 1277 #endif 1278 memset(&fl6, 0, sizeof(fl6)); 1279 fl6.flowi6_mark = skb->mark; 1280 fl6.flowi6_proto = protocol; 1281 fl6.daddr = info->key.u.ipv6.dst; 1282 fl6.saddr = info->key.u.ipv6.src; 1283 prio = info->key.tos; 1284 fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio), 1285 info->key.label); 1286 1287 dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, 1288 NULL); 1289 if (IS_ERR(dst)) { 1290 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); 1291 return ERR_PTR(-ENETUNREACH); 1292 } 1293 if (dst->dev == dev) { /* is this necessary? */ 1294 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); 1295 dst_release(dst); 1296 return ERR_PTR(-ELOOP); 1297 } 1298 #ifdef CONFIG_DST_CACHE 1299 if (use_cache) 1300 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); 1301 #endif 1302 *saddr = fl6.saddr; 1303 return dst; 1304 } 1305 EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel); 1306 1307 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1308 gfp_t gfp) 1309 { 1310 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1311 } 1312 1313 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 1314 gfp_t gfp) 1315 { 1316 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1317 } 1318 1319 static void ip6_append_data_mtu(unsigned int *mtu, 1320 int *maxfraglen, 1321 unsigned int fragheaderlen, 1322 struct sk_buff *skb, 1323 struct rt6_info *rt, 1324 unsigned int orig_mtu) 1325 { 1326 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { 1327 if (!skb) { 1328 /* first fragment, reserve header_len */ 1329 *mtu = orig_mtu - rt->dst.header_len; 1330 1331 } else { 1332 /* 1333 * this fragment is not first, the headers 1334 * space is regarded as data space. 1335 */ 1336 *mtu = orig_mtu; 1337 } 1338 *maxfraglen = ((*mtu - fragheaderlen) & ~7) 1339 + fragheaderlen - sizeof(struct frag_hdr); 1340 } 1341 } 1342 1343 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, 1344 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6, 1345 struct rt6_info *rt, struct flowi6 *fl6) 1346 { 1347 struct ipv6_pinfo *np = inet6_sk(sk); 1348 unsigned int mtu; 1349 struct ipv6_txoptions *opt = ipc6->opt; 1350 1351 /* 1352 * setup for corking 1353 */ 1354 if (opt) { 1355 if (WARN_ON(v6_cork->opt)) 1356 return -EINVAL; 1357 1358 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); 1359 if (unlikely(!v6_cork->opt)) 1360 return -ENOBUFS; 1361 1362 v6_cork->opt->tot_len = sizeof(*opt); 1363 v6_cork->opt->opt_flen = opt->opt_flen; 1364 v6_cork->opt->opt_nflen = opt->opt_nflen; 1365 1366 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt, 1367 sk->sk_allocation); 1368 if (opt->dst0opt && !v6_cork->opt->dst0opt) 1369 return -ENOBUFS; 1370 1371 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt, 1372 sk->sk_allocation); 1373 if (opt->dst1opt && !v6_cork->opt->dst1opt) 1374 return -ENOBUFS; 1375 1376 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt, 1377 sk->sk_allocation); 1378 if (opt->hopopt && !v6_cork->opt->hopopt) 1379 return -ENOBUFS; 1380 1381 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt, 1382 sk->sk_allocation); 1383 if (opt->srcrt && !v6_cork->opt->srcrt) 1384 return -ENOBUFS; 1385 1386 /* need source address above miyazawa*/ 1387 } 1388 dst_hold(&rt->dst); 1389 cork->base.dst = &rt->dst; 1390 cork->fl.u.ip6 = *fl6; 1391 v6_cork->hop_limit = ipc6->hlimit; 1392 v6_cork->tclass = ipc6->tclass; 1393 if (rt->dst.flags & DST_XFRM_TUNNEL) 1394 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1395 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); 1396 else 1397 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1398 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); 1399 if (np->frag_size < mtu) { 1400 if (np->frag_size) 1401 mtu = np->frag_size; 1402 } 1403 if (mtu < IPV6_MIN_MTU) 1404 return -EINVAL; 1405 cork->base.fragsize = mtu; 1406 cork->base.gso_size = ipc6->gso_size; 1407 cork->base.tx_flags = 0; 1408 cork->base.mark = ipc6->sockc.mark; 1409 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); 1410 1411 if (dst_allfrag(xfrm_dst_path(&rt->dst))) 1412 cork->base.flags |= IPCORK_ALLFRAG; 1413 cork->base.length = 0; 1414 1415 cork->base.transmit_time = ipc6->sockc.transmit_time; 1416 1417 return 0; 1418 } 1419 1420 static int __ip6_append_data(struct sock *sk, 1421 struct flowi6 *fl6, 1422 struct sk_buff_head *queue, 1423 struct inet_cork *cork, 1424 struct inet6_cork *v6_cork, 1425 struct page_frag *pfrag, 1426 int getfrag(void *from, char *to, int offset, 1427 int len, int odd, struct sk_buff *skb), 1428 void *from, int length, int transhdrlen, 1429 unsigned int flags, struct ipcm6_cookie *ipc6) 1430 { 1431 struct sk_buff *skb, *skb_prev = NULL; 1432 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; 1433 struct ubuf_info *uarg = NULL; 1434 int exthdrlen = 0; 1435 int dst_exthdrlen = 0; 1436 int hh_len; 1437 int copy; 1438 int err; 1439 int offset = 0; 1440 u32 tskey = 0; 1441 struct rt6_info *rt = (struct rt6_info *)cork->dst; 1442 struct ipv6_txoptions *opt = v6_cork->opt; 1443 int csummode = CHECKSUM_NONE; 1444 unsigned int maxnonfragsize, headersize; 1445 unsigned int wmem_alloc_delta = 0; 1446 bool paged, extra_uref = false; 1447 1448 skb = skb_peek_tail(queue); 1449 if (!skb) { 1450 exthdrlen = opt ? opt->opt_flen : 0; 1451 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; 1452 } 1453 1454 paged = !!cork->gso_size; 1455 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize; 1456 orig_mtu = mtu; 1457 1458 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && 1459 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) 1460 tskey = sk->sk_tskey++; 1461 1462 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1463 1464 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1465 (opt ? opt->opt_nflen : 0); 1466 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - 1467 sizeof(struct frag_hdr); 1468 1469 headersize = sizeof(struct ipv6hdr) + 1470 (opt ? opt->opt_flen + opt->opt_nflen : 0) + 1471 (dst_allfrag(&rt->dst) ? 1472 sizeof(struct frag_hdr) : 0) + 1473 rt->rt6i_nfheader_len; 1474 1475 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit 1476 * the first fragment 1477 */ 1478 if (headersize + transhdrlen > mtu) 1479 goto emsgsize; 1480 1481 if (cork->length + length > mtu - headersize && ipc6->dontfrag && 1482 (sk->sk_protocol == IPPROTO_UDP || 1483 sk->sk_protocol == IPPROTO_RAW)) { 1484 ipv6_local_rxpmtu(sk, fl6, mtu - headersize + 1485 sizeof(struct ipv6hdr)); 1486 goto emsgsize; 1487 } 1488 1489 if (ip6_sk_ignore_df(sk)) 1490 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; 1491 else 1492 maxnonfragsize = mtu; 1493 1494 if (cork->length + length > maxnonfragsize - headersize) { 1495 emsgsize: 1496 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0); 1497 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu); 1498 return -EMSGSIZE; 1499 } 1500 1501 /* CHECKSUM_PARTIAL only with no extension headers and when 1502 * we are not going to fragment 1503 */ 1504 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && 1505 headersize == sizeof(struct ipv6hdr) && 1506 length <= mtu - headersize && 1507 (!(flags & MSG_MORE) || cork->gso_size) && 1508 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) 1509 csummode = CHECKSUM_PARTIAL; 1510 1511 if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) { 1512 uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb)); 1513 if (!uarg) 1514 return -ENOBUFS; 1515 extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ 1516 if (rt->dst.dev->features & NETIF_F_SG && 1517 csummode == CHECKSUM_PARTIAL) { 1518 paged = true; 1519 } else { 1520 uarg->zerocopy = 0; 1521 skb_zcopy_set(skb, uarg, &extra_uref); 1522 } 1523 } 1524 1525 /* 1526 * Let's try using as much space as possible. 1527 * Use MTU if total length of the message fits into the MTU. 1528 * Otherwise, we need to reserve fragment header and 1529 * fragment alignment (= 8-15 octects, in total). 1530 * 1531 * Note that we may need to "move" the data from the tail 1532 * of the buffer to the new fragment when we split 1533 * the message. 1534 * 1535 * FIXME: It may be fragmented into multiple chunks 1536 * at once if non-fragmentable extension headers 1537 * are too large. 1538 * --yoshfuji 1539 */ 1540 1541 cork->length += length; 1542 if (!skb) 1543 goto alloc_new_skb; 1544 1545 while (length > 0) { 1546 /* Check if the remaining data fits into current packet. */ 1547 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1548 if (copy < length) 1549 copy = maxfraglen - skb->len; 1550 1551 if (copy <= 0) { 1552 char *data; 1553 unsigned int datalen; 1554 unsigned int fraglen; 1555 unsigned int fraggap; 1556 unsigned int alloclen, alloc_extra; 1557 unsigned int pagedlen; 1558 alloc_new_skb: 1559 /* There's no room in the current skb */ 1560 if (skb) 1561 fraggap = skb->len - maxfraglen; 1562 else 1563 fraggap = 0; 1564 /* update mtu and maxfraglen if necessary */ 1565 if (!skb || !skb_prev) 1566 ip6_append_data_mtu(&mtu, &maxfraglen, 1567 fragheaderlen, skb, rt, 1568 orig_mtu); 1569 1570 skb_prev = skb; 1571 1572 /* 1573 * If remaining data exceeds the mtu, 1574 * we know we need more fragment(s). 1575 */ 1576 datalen = length + fraggap; 1577 1578 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1579 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; 1580 fraglen = datalen + fragheaderlen; 1581 pagedlen = 0; 1582 1583 alloc_extra = hh_len; 1584 alloc_extra += dst_exthdrlen; 1585 alloc_extra += rt->dst.trailer_len; 1586 1587 /* We just reserve space for fragment header. 1588 * Note: this may be overallocation if the message 1589 * (without MSG_MORE) fits into the MTU. 1590 */ 1591 alloc_extra += sizeof(struct frag_hdr); 1592 1593 if ((flags & MSG_MORE) && 1594 !(rt->dst.dev->features&NETIF_F_SG)) 1595 alloclen = mtu; 1596 else if (!paged && 1597 (fraglen + alloc_extra < SKB_MAX_ALLOC || 1598 !(rt->dst.dev->features & NETIF_F_SG))) 1599 alloclen = fraglen; 1600 else { 1601 alloclen = min_t(int, fraglen, MAX_HEADER); 1602 pagedlen = fraglen - alloclen; 1603 } 1604 alloclen += alloc_extra; 1605 1606 if (datalen != length + fraggap) { 1607 /* 1608 * this is not the last fragment, the trailer 1609 * space is regarded as data space. 1610 */ 1611 datalen += rt->dst.trailer_len; 1612 } 1613 1614 fraglen = datalen + fragheaderlen; 1615 1616 copy = datalen - transhdrlen - fraggap - pagedlen; 1617 if (copy < 0) { 1618 err = -EINVAL; 1619 goto error; 1620 } 1621 if (transhdrlen) { 1622 skb = sock_alloc_send_skb(sk, alloclen, 1623 (flags & MSG_DONTWAIT), &err); 1624 } else { 1625 skb = NULL; 1626 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <= 1627 2 * sk->sk_sndbuf) 1628 skb = alloc_skb(alloclen, 1629 sk->sk_allocation); 1630 if (unlikely(!skb)) 1631 err = -ENOBUFS; 1632 } 1633 if (!skb) 1634 goto error; 1635 /* 1636 * Fill in the control structures 1637 */ 1638 skb->protocol = htons(ETH_P_IPV6); 1639 skb->ip_summed = csummode; 1640 skb->csum = 0; 1641 /* reserve for fragmentation and ipsec header */ 1642 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + 1643 dst_exthdrlen); 1644 1645 /* 1646 * Find where to start putting bytes 1647 */ 1648 data = skb_put(skb, fraglen - pagedlen); 1649 skb_set_network_header(skb, exthdrlen); 1650 data += fragheaderlen; 1651 skb->transport_header = (skb->network_header + 1652 fragheaderlen); 1653 if (fraggap) { 1654 skb->csum = skb_copy_and_csum_bits( 1655 skb_prev, maxfraglen, 1656 data + transhdrlen, fraggap); 1657 skb_prev->csum = csum_sub(skb_prev->csum, 1658 skb->csum); 1659 data += fraggap; 1660 pskb_trim_unique(skb_prev, maxfraglen); 1661 } 1662 if (copy > 0 && 1663 getfrag(from, data + transhdrlen, offset, 1664 copy, fraggap, skb) < 0) { 1665 err = -EFAULT; 1666 kfree_skb(skb); 1667 goto error; 1668 } 1669 1670 offset += copy; 1671 length -= copy + transhdrlen; 1672 transhdrlen = 0; 1673 exthdrlen = 0; 1674 dst_exthdrlen = 0; 1675 1676 /* Only the initial fragment is time stamped */ 1677 skb_shinfo(skb)->tx_flags = cork->tx_flags; 1678 cork->tx_flags = 0; 1679 skb_shinfo(skb)->tskey = tskey; 1680 tskey = 0; 1681 skb_zcopy_set(skb, uarg, &extra_uref); 1682 1683 if ((flags & MSG_CONFIRM) && !skb_prev) 1684 skb_set_dst_pending_confirm(skb, 1); 1685 1686 /* 1687 * Put the packet on the pending queue 1688 */ 1689 if (!skb->destructor) { 1690 skb->destructor = sock_wfree; 1691 skb->sk = sk; 1692 wmem_alloc_delta += skb->truesize; 1693 } 1694 __skb_queue_tail(queue, skb); 1695 continue; 1696 } 1697 1698 if (copy > length) 1699 copy = length; 1700 1701 if (!(rt->dst.dev->features&NETIF_F_SG) && 1702 skb_tailroom(skb) >= copy) { 1703 unsigned int off; 1704 1705 off = skb->len; 1706 if (getfrag(from, skb_put(skb, copy), 1707 offset, copy, off, skb) < 0) { 1708 __skb_trim(skb, off); 1709 err = -EFAULT; 1710 goto error; 1711 } 1712 } else if (!uarg || !uarg->zerocopy) { 1713 int i = skb_shinfo(skb)->nr_frags; 1714 1715 err = -ENOMEM; 1716 if (!sk_page_frag_refill(sk, pfrag)) 1717 goto error; 1718 1719 if (!skb_can_coalesce(skb, i, pfrag->page, 1720 pfrag->offset)) { 1721 err = -EMSGSIZE; 1722 if (i == MAX_SKB_FRAGS) 1723 goto error; 1724 1725 __skb_fill_page_desc(skb, i, pfrag->page, 1726 pfrag->offset, 0); 1727 skb_shinfo(skb)->nr_frags = ++i; 1728 get_page(pfrag->page); 1729 } 1730 copy = min_t(int, copy, pfrag->size - pfrag->offset); 1731 if (getfrag(from, 1732 page_address(pfrag->page) + pfrag->offset, 1733 offset, copy, skb->len, skb) < 0) 1734 goto error_efault; 1735 1736 pfrag->offset += copy; 1737 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); 1738 skb->len += copy; 1739 skb->data_len += copy; 1740 skb->truesize += copy; 1741 wmem_alloc_delta += copy; 1742 } else { 1743 err = skb_zerocopy_iter_dgram(skb, from, copy); 1744 if (err < 0) 1745 goto error; 1746 } 1747 offset += copy; 1748 length -= copy; 1749 } 1750 1751 if (wmem_alloc_delta) 1752 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 1753 return 0; 1754 1755 error_efault: 1756 err = -EFAULT; 1757 error: 1758 net_zcopy_put_abort(uarg, extra_uref); 1759 cork->length -= length; 1760 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1761 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 1762 return err; 1763 } 1764 1765 int ip6_append_data(struct sock *sk, 1766 int getfrag(void *from, char *to, int offset, int len, 1767 int odd, struct sk_buff *skb), 1768 void *from, int length, int transhdrlen, 1769 struct ipcm6_cookie *ipc6, struct flowi6 *fl6, 1770 struct rt6_info *rt, unsigned int flags) 1771 { 1772 struct inet_sock *inet = inet_sk(sk); 1773 struct ipv6_pinfo *np = inet6_sk(sk); 1774 int exthdrlen; 1775 int err; 1776 1777 if (flags&MSG_PROBE) 1778 return 0; 1779 if (skb_queue_empty(&sk->sk_write_queue)) { 1780 /* 1781 * setup for corking 1782 */ 1783 err = ip6_setup_cork(sk, &inet->cork, &np->cork, 1784 ipc6, rt, fl6); 1785 if (err) 1786 return err; 1787 1788 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 1789 length += exthdrlen; 1790 transhdrlen += exthdrlen; 1791 } else { 1792 fl6 = &inet->cork.fl.u.ip6; 1793 transhdrlen = 0; 1794 } 1795 1796 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base, 1797 &np->cork, sk_page_frag(sk), getfrag, 1798 from, length, transhdrlen, flags, ipc6); 1799 } 1800 EXPORT_SYMBOL_GPL(ip6_append_data); 1801 1802 static void ip6_cork_release(struct inet_cork_full *cork, 1803 struct inet6_cork *v6_cork) 1804 { 1805 if (v6_cork->opt) { 1806 kfree(v6_cork->opt->dst0opt); 1807 kfree(v6_cork->opt->dst1opt); 1808 kfree(v6_cork->opt->hopopt); 1809 kfree(v6_cork->opt->srcrt); 1810 kfree(v6_cork->opt); 1811 v6_cork->opt = NULL; 1812 } 1813 1814 if (cork->base.dst) { 1815 dst_release(cork->base.dst); 1816 cork->base.dst = NULL; 1817 cork->base.flags &= ~IPCORK_ALLFRAG; 1818 } 1819 memset(&cork->fl, 0, sizeof(cork->fl)); 1820 } 1821 1822 struct sk_buff *__ip6_make_skb(struct sock *sk, 1823 struct sk_buff_head *queue, 1824 struct inet_cork_full *cork, 1825 struct inet6_cork *v6_cork) 1826 { 1827 struct sk_buff *skb, *tmp_skb; 1828 struct sk_buff **tail_skb; 1829 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 1830 struct ipv6_pinfo *np = inet6_sk(sk); 1831 struct net *net = sock_net(sk); 1832 struct ipv6hdr *hdr; 1833 struct ipv6_txoptions *opt = v6_cork->opt; 1834 struct rt6_info *rt = (struct rt6_info *)cork->base.dst; 1835 struct flowi6 *fl6 = &cork->fl.u.ip6; 1836 unsigned char proto = fl6->flowi6_proto; 1837 1838 skb = __skb_dequeue(queue); 1839 if (!skb) 1840 goto out; 1841 tail_skb = &(skb_shinfo(skb)->frag_list); 1842 1843 /* move skb->data to ip header from ext header */ 1844 if (skb->data < skb_network_header(skb)) 1845 __skb_pull(skb, skb_network_offset(skb)); 1846 while ((tmp_skb = __skb_dequeue(queue)) != NULL) { 1847 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1848 *tail_skb = tmp_skb; 1849 tail_skb = &(tmp_skb->next); 1850 skb->len += tmp_skb->len; 1851 skb->data_len += tmp_skb->len; 1852 skb->truesize += tmp_skb->truesize; 1853 tmp_skb->destructor = NULL; 1854 tmp_skb->sk = NULL; 1855 } 1856 1857 /* Allow local fragmentation. */ 1858 skb->ignore_df = ip6_sk_ignore_df(sk); 1859 1860 *final_dst = fl6->daddr; 1861 __skb_pull(skb, skb_network_header_len(skb)); 1862 if (opt && opt->opt_flen) 1863 ipv6_push_frag_opts(skb, opt, &proto); 1864 if (opt && opt->opt_nflen) 1865 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr); 1866 1867 skb_push(skb, sizeof(struct ipv6hdr)); 1868 skb_reset_network_header(skb); 1869 hdr = ipv6_hdr(skb); 1870 1871 ip6_flow_hdr(hdr, v6_cork->tclass, 1872 ip6_make_flowlabel(net, skb, fl6->flowlabel, 1873 ip6_autoflowlabel(net, np), fl6)); 1874 hdr->hop_limit = v6_cork->hop_limit; 1875 hdr->nexthdr = proto; 1876 hdr->saddr = fl6->saddr; 1877 hdr->daddr = *final_dst; 1878 1879 skb->priority = sk->sk_priority; 1880 skb->mark = cork->base.mark; 1881 1882 skb->tstamp = cork->base.transmit_time; 1883 1884 skb_dst_set(skb, dst_clone(&rt->dst)); 1885 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 1886 if (proto == IPPROTO_ICMPV6) { 1887 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1888 1889 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); 1890 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 1891 } 1892 1893 ip6_cork_release(cork, v6_cork); 1894 out: 1895 return skb; 1896 } 1897 1898 int ip6_send_skb(struct sk_buff *skb) 1899 { 1900 struct net *net = sock_net(skb->sk); 1901 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 1902 int err; 1903 1904 err = ip6_local_out(net, skb->sk, skb); 1905 if (err) { 1906 if (err > 0) 1907 err = net_xmit_errno(err); 1908 if (err) 1909 IP6_INC_STATS(net, rt->rt6i_idev, 1910 IPSTATS_MIB_OUTDISCARDS); 1911 } 1912 1913 return err; 1914 } 1915 1916 int ip6_push_pending_frames(struct sock *sk) 1917 { 1918 struct sk_buff *skb; 1919 1920 skb = ip6_finish_skb(sk); 1921 if (!skb) 1922 return 0; 1923 1924 return ip6_send_skb(skb); 1925 } 1926 EXPORT_SYMBOL_GPL(ip6_push_pending_frames); 1927 1928 static void __ip6_flush_pending_frames(struct sock *sk, 1929 struct sk_buff_head *queue, 1930 struct inet_cork_full *cork, 1931 struct inet6_cork *v6_cork) 1932 { 1933 struct sk_buff *skb; 1934 1935 while ((skb = __skb_dequeue_tail(queue)) != NULL) { 1936 if (skb_dst(skb)) 1937 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 1938 IPSTATS_MIB_OUTDISCARDS); 1939 kfree_skb(skb); 1940 } 1941 1942 ip6_cork_release(cork, v6_cork); 1943 } 1944 1945 void ip6_flush_pending_frames(struct sock *sk) 1946 { 1947 __ip6_flush_pending_frames(sk, &sk->sk_write_queue, 1948 &inet_sk(sk)->cork, &inet6_sk(sk)->cork); 1949 } 1950 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); 1951 1952 struct sk_buff *ip6_make_skb(struct sock *sk, 1953 int getfrag(void *from, char *to, int offset, 1954 int len, int odd, struct sk_buff *skb), 1955 void *from, int length, int transhdrlen, 1956 struct ipcm6_cookie *ipc6, struct flowi6 *fl6, 1957 struct rt6_info *rt, unsigned int flags, 1958 struct inet_cork_full *cork) 1959 { 1960 struct inet6_cork v6_cork; 1961 struct sk_buff_head queue; 1962 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 1963 int err; 1964 1965 if (flags & MSG_PROBE) 1966 return NULL; 1967 1968 __skb_queue_head_init(&queue); 1969 1970 cork->base.flags = 0; 1971 cork->base.addr = 0; 1972 cork->base.opt = NULL; 1973 cork->base.dst = NULL; 1974 v6_cork.opt = NULL; 1975 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6); 1976 if (err) { 1977 ip6_cork_release(cork, &v6_cork); 1978 return ERR_PTR(err); 1979 } 1980 if (ipc6->dontfrag < 0) 1981 ipc6->dontfrag = inet6_sk(sk)->dontfrag; 1982 1983 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork, 1984 ¤t->task_frag, getfrag, from, 1985 length + exthdrlen, transhdrlen + exthdrlen, 1986 flags, ipc6); 1987 if (err) { 1988 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork); 1989 return ERR_PTR(err); 1990 } 1991 1992 return __ip6_make_skb(sk, &queue, cork, &v6_cork); 1993 } 1994