1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * IPv6 output functions 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on linux/net/ipv4/ip_output.c 10 * 11 * Changes: 12 * A.N.Kuznetsov : airthmetics in fragmentation. 13 * extension headers are implemented. 14 * route changes now work. 15 * ip6_forward does not confuse sniffers. 16 * etc. 17 * 18 * H. von Brand : Added missing #include <linux/string.h> 19 * Imran Patel : frag id should be in NBO 20 * Kazunori MIYAZAWA @USAGI 21 * : add ip6_append_data and related functions 22 * for datagram xmit 23 */ 24 25 #include <linux/errno.h> 26 #include <linux/kernel.h> 27 #include <linux/string.h> 28 #include <linux/socket.h> 29 #include <linux/net.h> 30 #include <linux/netdevice.h> 31 #include <linux/if_arp.h> 32 #include <linux/in6.h> 33 #include <linux/tcp.h> 34 #include <linux/route.h> 35 #include <linux/module.h> 36 #include <linux/slab.h> 37 38 #include <linux/bpf-cgroup.h> 39 #include <linux/netfilter.h> 40 #include <linux/netfilter_ipv6.h> 41 42 #include <net/sock.h> 43 #include <net/snmp.h> 44 45 #include <net/ipv6.h> 46 #include <net/ndisc.h> 47 #include <net/protocol.h> 48 #include <net/ip6_route.h> 49 #include <net/addrconf.h> 50 #include <net/rawv6.h> 51 #include <net/icmp.h> 52 #include <net/xfrm.h> 53 #include <net/checksum.h> 54 #include <linux/mroute6.h> 55 #include <net/l3mdev.h> 56 #include <net/lwtunnel.h> 57 #include <net/ip_tunnels.h> 58 59 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 60 { 61 struct dst_entry *dst = skb_dst(skb); 62 struct net_device *dev = dst->dev; 63 struct inet6_dev *idev = ip6_dst_idev(dst); 64 unsigned int hh_len = LL_RESERVED_SPACE(dev); 65 const struct in6_addr *daddr, *nexthop; 66 struct ipv6hdr *hdr; 67 struct neighbour *neigh; 68 int ret; 69 70 /* Be paranoid, rather than too clever. */ 71 if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) { 72 skb = skb_expand_head(skb, hh_len); 73 if (!skb) { 74 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 75 return -ENOMEM; 76 } 77 } 78 79 hdr = ipv6_hdr(skb); 80 daddr = &hdr->daddr; 81 if (ipv6_addr_is_multicast(daddr)) { 82 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && 83 ((mroute6_is_socket(net, skb) && 84 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 85 ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) { 86 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 87 88 /* Do not check for IFF_ALLMULTI; multicast routing 89 is not supported in any case. 90 */ 91 if (newskb) 92 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 93 net, sk, newskb, NULL, newskb->dev, 94 dev_loopback_xmit); 95 96 if (hdr->hop_limit == 0) { 97 IP6_INC_STATS(net, idev, 98 IPSTATS_MIB_OUTDISCARDS); 99 kfree_skb(skb); 100 return 0; 101 } 102 } 103 104 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len); 105 if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL && 106 !(dev->flags & IFF_LOOPBACK)) { 107 kfree_skb(skb); 108 return 0; 109 } 110 } 111 112 if (lwtunnel_xmit_redirect(dst->lwtstate)) { 113 int res = lwtunnel_xmit(skb); 114 115 if (res < 0 || res == LWTUNNEL_XMIT_DONE) 116 return res; 117 } 118 119 rcu_read_lock_bh(); 120 nexthop = rt6_nexthop((struct rt6_info *)dst, daddr); 121 neigh = __ipv6_neigh_lookup_noref(dev, nexthop); 122 if (unlikely(!neigh)) 123 neigh = __neigh_create(&nd_tbl, nexthop, dev, false); 124 if (!IS_ERR(neigh)) { 125 sock_confirm_neigh(skb, neigh); 126 ret = neigh_output(neigh, skb, false); 127 rcu_read_unlock_bh(); 128 return ret; 129 } 130 rcu_read_unlock_bh(); 131 132 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); 133 kfree_skb(skb); 134 return -EINVAL; 135 } 136 137 static int 138 ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, 139 struct sk_buff *skb, unsigned int mtu) 140 { 141 struct sk_buff *segs, *nskb; 142 netdev_features_t features; 143 int ret = 0; 144 145 /* Please see corresponding comment in ip_finish_output_gso 146 * describing the cases where GSO segment length exceeds the 147 * egress MTU. 148 */ 149 features = netif_skb_features(skb); 150 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); 151 if (IS_ERR_OR_NULL(segs)) { 152 kfree_skb(skb); 153 return -ENOMEM; 154 } 155 156 consume_skb(skb); 157 158 skb_list_walk_safe(segs, segs, nskb) { 159 int err; 160 161 skb_mark_not_on_list(segs); 162 err = ip6_fragment(net, sk, segs, ip6_finish_output2); 163 if (err && ret == 0) 164 ret = err; 165 } 166 167 return ret; 168 } 169 170 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 171 { 172 unsigned int mtu; 173 174 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 175 /* Policy lookup after SNAT yielded a new policy */ 176 if (skb_dst(skb)->xfrm) { 177 IPCB(skb)->flags |= IPSKB_REROUTED; 178 return dst_output(net, sk, skb); 179 } 180 #endif 181 182 mtu = ip6_skb_dst_mtu(skb); 183 if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu)) 184 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); 185 186 if ((skb->len > mtu && !skb_is_gso(skb)) || 187 dst_allfrag(skb_dst(skb)) || 188 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) 189 return ip6_fragment(net, sk, skb, ip6_finish_output2); 190 else 191 return ip6_finish_output2(net, sk, skb); 192 } 193 194 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 195 { 196 int ret; 197 198 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); 199 switch (ret) { 200 case NET_XMIT_SUCCESS: 201 return __ip6_finish_output(net, sk, skb); 202 case NET_XMIT_CN: 203 return __ip6_finish_output(net, sk, skb) ? : ret; 204 default: 205 kfree_skb(skb); 206 return ret; 207 } 208 } 209 210 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 211 { 212 struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; 213 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 214 215 skb->protocol = htons(ETH_P_IPV6); 216 skb->dev = dev; 217 218 if (unlikely(idev->cnf.disable_ipv6)) { 219 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 220 kfree_skb(skb); 221 return 0; 222 } 223 224 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, 225 net, sk, skb, indev, dev, 226 ip6_finish_output, 227 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 228 } 229 EXPORT_SYMBOL(ip6_output); 230 231 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) 232 { 233 if (!np->autoflowlabel_set) 234 return ip6_default_np_autolabel(net); 235 else 236 return np->autoflowlabel; 237 } 238 239 /* 240 * xmit an sk_buff (used by TCP, SCTP and DCCP) 241 * Note : socket lock is not held for SYNACK packets, but might be modified 242 * by calls to skb_set_owner_w() and ipv6_local_error(), 243 * which are using proper atomic operations or spinlocks. 244 */ 245 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, 246 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority) 247 { 248 struct net *net = sock_net(sk); 249 const struct ipv6_pinfo *np = inet6_sk(sk); 250 struct in6_addr *first_hop = &fl6->daddr; 251 struct dst_entry *dst = skb_dst(skb); 252 struct net_device *dev = dst->dev; 253 struct inet6_dev *idev = ip6_dst_idev(dst); 254 unsigned int head_room; 255 struct ipv6hdr *hdr; 256 u8 proto = fl6->flowi6_proto; 257 int seg_len = skb->len; 258 int hlimit = -1; 259 u32 mtu; 260 261 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev); 262 if (opt) 263 head_room += opt->opt_nflen + opt->opt_flen; 264 265 if (unlikely(head_room > skb_headroom(skb))) { 266 skb = skb_expand_head(skb, head_room); 267 if (!skb) { 268 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 269 return -ENOBUFS; 270 } 271 } 272 273 if (opt) { 274 seg_len += opt->opt_nflen + opt->opt_flen; 275 276 if (opt->opt_flen) 277 ipv6_push_frag_opts(skb, opt, &proto); 278 279 if (opt->opt_nflen) 280 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, 281 &fl6->saddr); 282 } 283 284 skb_push(skb, sizeof(struct ipv6hdr)); 285 skb_reset_network_header(skb); 286 hdr = ipv6_hdr(skb); 287 288 /* 289 * Fill in the IPv6 header 290 */ 291 if (np) 292 hlimit = np->hop_limit; 293 if (hlimit < 0) 294 hlimit = ip6_dst_hoplimit(dst); 295 296 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, 297 ip6_autoflowlabel(net, np), fl6)); 298 299 hdr->payload_len = htons(seg_len); 300 hdr->nexthdr = proto; 301 hdr->hop_limit = hlimit; 302 303 hdr->saddr = fl6->saddr; 304 hdr->daddr = *first_hop; 305 306 skb->protocol = htons(ETH_P_IPV6); 307 skb->priority = priority; 308 skb->mark = mark; 309 310 mtu = dst_mtu(dst); 311 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { 312 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); 313 314 /* if egress device is enslaved to an L3 master device pass the 315 * skb to its handler for processing 316 */ 317 skb = l3mdev_ip6_out((struct sock *)sk, skb); 318 if (unlikely(!skb)) 319 return 0; 320 321 /* hooks should never assume socket lock is held. 322 * we promote our socket to non const 323 */ 324 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, 325 net, (struct sock *)sk, skb, NULL, dev, 326 dst_output); 327 } 328 329 skb->dev = dev; 330 /* ipv6_local_error() does not require socket lock, 331 * we promote our socket to non const 332 */ 333 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); 334 335 IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS); 336 kfree_skb(skb); 337 return -EMSGSIZE; 338 } 339 EXPORT_SYMBOL(ip6_xmit); 340 341 static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 342 { 343 struct ip6_ra_chain *ra; 344 struct sock *last = NULL; 345 346 read_lock(&ip6_ra_lock); 347 for (ra = ip6_ra_chain; ra; ra = ra->next) { 348 struct sock *sk = ra->sk; 349 if (sk && ra->sel == sel && 350 (!sk->sk_bound_dev_if || 351 sk->sk_bound_dev_if == skb->dev->ifindex)) { 352 struct ipv6_pinfo *np = inet6_sk(sk); 353 354 if (np && np->rtalert_isolate && 355 !net_eq(sock_net(sk), dev_net(skb->dev))) { 356 continue; 357 } 358 if (last) { 359 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 360 if (skb2) 361 rawv6_rcv(last, skb2); 362 } 363 last = sk; 364 } 365 } 366 367 if (last) { 368 rawv6_rcv(last, skb); 369 read_unlock(&ip6_ra_lock); 370 return 1; 371 } 372 read_unlock(&ip6_ra_lock); 373 return 0; 374 } 375 376 static int ip6_forward_proxy_check(struct sk_buff *skb) 377 { 378 struct ipv6hdr *hdr = ipv6_hdr(skb); 379 u8 nexthdr = hdr->nexthdr; 380 __be16 frag_off; 381 int offset; 382 383 if (ipv6_ext_hdr(nexthdr)) { 384 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); 385 if (offset < 0) 386 return 0; 387 } else 388 offset = sizeof(struct ipv6hdr); 389 390 if (nexthdr == IPPROTO_ICMPV6) { 391 struct icmp6hdr *icmp6; 392 393 if (!pskb_may_pull(skb, (skb_network_header(skb) + 394 offset + 1 - skb->data))) 395 return 0; 396 397 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 398 399 switch (icmp6->icmp6_type) { 400 case NDISC_ROUTER_SOLICITATION: 401 case NDISC_ROUTER_ADVERTISEMENT: 402 case NDISC_NEIGHBOUR_SOLICITATION: 403 case NDISC_NEIGHBOUR_ADVERTISEMENT: 404 case NDISC_REDIRECT: 405 /* For reaction involving unicast neighbor discovery 406 * message destined to the proxied address, pass it to 407 * input function. 408 */ 409 return 1; 410 default: 411 break; 412 } 413 } 414 415 /* 416 * The proxying router can't forward traffic sent to a link-local 417 * address, so signal the sender and discard the packet. This 418 * behavior is clarified by the MIPv6 specification. 419 */ 420 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 421 dst_link_failure(skb); 422 return -1; 423 } 424 425 return 0; 426 } 427 428 static inline int ip6_forward_finish(struct net *net, struct sock *sk, 429 struct sk_buff *skb) 430 { 431 struct dst_entry *dst = skb_dst(skb); 432 433 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 434 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); 435 436 #ifdef CONFIG_NET_SWITCHDEV 437 if (skb->offload_l3_fwd_mark) { 438 consume_skb(skb); 439 return 0; 440 } 441 #endif 442 443 skb->tstamp = 0; 444 return dst_output(net, sk, skb); 445 } 446 447 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) 448 { 449 if (skb->len <= mtu) 450 return false; 451 452 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ 453 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) 454 return true; 455 456 if (skb->ignore_df) 457 return false; 458 459 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) 460 return false; 461 462 return true; 463 } 464 465 int ip6_forward(struct sk_buff *skb) 466 { 467 struct inet6_dev *idev = __in6_dev_get_safely(skb->dev); 468 struct dst_entry *dst = skb_dst(skb); 469 struct ipv6hdr *hdr = ipv6_hdr(skb); 470 struct inet6_skb_parm *opt = IP6CB(skb); 471 struct net *net = dev_net(dst->dev); 472 u32 mtu; 473 474 if (net->ipv6.devconf_all->forwarding == 0) 475 goto error; 476 477 if (skb->pkt_type != PACKET_HOST) 478 goto drop; 479 480 if (unlikely(skb->sk)) 481 goto drop; 482 483 if (skb_warn_if_lro(skb)) 484 goto drop; 485 486 if (!net->ipv6.devconf_all->disable_policy && 487 !idev->cnf.disable_policy && 488 !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 489 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 490 goto drop; 491 } 492 493 skb_forward_csum(skb); 494 495 /* 496 * We DO NOT make any processing on 497 * RA packets, pushing them to user level AS IS 498 * without ane WARRANTY that application will be able 499 * to interpret them. The reason is that we 500 * cannot make anything clever here. 501 * 502 * We are not end-node, so that if packet contains 503 * AH/ESP, we cannot make anything. 504 * Defragmentation also would be mistake, RA packets 505 * cannot be fragmented, because there is no warranty 506 * that different fragments will go along one path. --ANK 507 */ 508 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { 509 if (ip6_call_ra_chain(skb, ntohs(opt->ra))) 510 return 0; 511 } 512 513 /* 514 * check and decrement ttl 515 */ 516 if (hdr->hop_limit <= 1) { 517 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 518 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); 519 520 kfree_skb(skb); 521 return -ETIMEDOUT; 522 } 523 524 /* XXX: idev->cnf.proxy_ndp? */ 525 if (net->ipv6.devconf_all->proxy_ndp && 526 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 527 int proxied = ip6_forward_proxy_check(skb); 528 if (proxied > 0) { 529 hdr->hop_limit--; 530 return ip6_input(skb); 531 } else if (proxied < 0) { 532 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 533 goto drop; 534 } 535 } 536 537 if (!xfrm6_route_forward(skb)) { 538 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 539 goto drop; 540 } 541 dst = skb_dst(skb); 542 543 /* IPv6 specs say nothing about it, but it is clear that we cannot 544 send redirects to source routed frames. 545 We don't send redirects to frames decapsulated from IPsec. 546 */ 547 if (IP6CB(skb)->iif == dst->dev->ifindex && 548 opt->srcrt == 0 && !skb_sec_path(skb)) { 549 struct in6_addr *target = NULL; 550 struct inet_peer *peer; 551 struct rt6_info *rt; 552 553 /* 554 * incoming and outgoing devices are the same 555 * send a redirect. 556 */ 557 558 rt = (struct rt6_info *) dst; 559 if (rt->rt6i_flags & RTF_GATEWAY) 560 target = &rt->rt6i_gateway; 561 else 562 target = &hdr->daddr; 563 564 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1); 565 566 /* Limit redirects both by destination (here) 567 and by source (inside ndisc_send_redirect) 568 */ 569 if (inet_peer_xrlim_allow(peer, 1*HZ)) 570 ndisc_send_redirect(skb, target); 571 if (peer) 572 inet_putpeer(peer); 573 } else { 574 int addrtype = ipv6_addr_type(&hdr->saddr); 575 576 /* This check is security critical. */ 577 if (addrtype == IPV6_ADDR_ANY || 578 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 579 goto error; 580 if (addrtype & IPV6_ADDR_LINKLOCAL) { 581 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 582 ICMPV6_NOT_NEIGHBOUR, 0); 583 goto error; 584 } 585 } 586 587 mtu = ip6_dst_mtu_maybe_forward(dst, true); 588 if (mtu < IPV6_MIN_MTU) 589 mtu = IPV6_MIN_MTU; 590 591 if (ip6_pkt_too_big(skb, mtu)) { 592 /* Again, force OUTPUT device used as source address */ 593 skb->dev = dst->dev; 594 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 595 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS); 596 __IP6_INC_STATS(net, ip6_dst_idev(dst), 597 IPSTATS_MIB_FRAGFAILS); 598 kfree_skb(skb); 599 return -EMSGSIZE; 600 } 601 602 if (skb_cow(skb, dst->dev->hard_header_len)) { 603 __IP6_INC_STATS(net, ip6_dst_idev(dst), 604 IPSTATS_MIB_OUTDISCARDS); 605 goto drop; 606 } 607 608 hdr = ipv6_hdr(skb); 609 610 /* Mangling hops number delayed to point after skb COW */ 611 612 hdr->hop_limit--; 613 614 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 615 net, NULL, skb, skb->dev, dst->dev, 616 ip6_forward_finish); 617 618 error: 619 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); 620 drop: 621 kfree_skb(skb); 622 return -EINVAL; 623 } 624 625 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 626 { 627 to->pkt_type = from->pkt_type; 628 to->priority = from->priority; 629 to->protocol = from->protocol; 630 skb_dst_drop(to); 631 skb_dst_set(to, dst_clone(skb_dst(from))); 632 to->dev = from->dev; 633 to->mark = from->mark; 634 635 skb_copy_hash(to, from); 636 637 #ifdef CONFIG_NET_SCHED 638 to->tc_index = from->tc_index; 639 #endif 640 nf_copy(to, from); 641 skb_ext_copy(to, from); 642 skb_copy_secmark(to, from); 643 } 644 645 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr, 646 u8 nexthdr, __be32 frag_id, 647 struct ip6_fraglist_iter *iter) 648 { 649 unsigned int first_len; 650 struct frag_hdr *fh; 651 652 /* BUILD HEADER */ 653 *prevhdr = NEXTHDR_FRAGMENT; 654 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 655 if (!iter->tmp_hdr) 656 return -ENOMEM; 657 658 iter->frag = skb_shinfo(skb)->frag_list; 659 skb_frag_list_init(skb); 660 661 iter->offset = 0; 662 iter->hlen = hlen; 663 iter->frag_id = frag_id; 664 iter->nexthdr = nexthdr; 665 666 __skb_pull(skb, hlen); 667 fh = __skb_push(skb, sizeof(struct frag_hdr)); 668 __skb_push(skb, hlen); 669 skb_reset_network_header(skb); 670 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen); 671 672 fh->nexthdr = nexthdr; 673 fh->reserved = 0; 674 fh->frag_off = htons(IP6_MF); 675 fh->identification = frag_id; 676 677 first_len = skb_pagelen(skb); 678 skb->data_len = first_len - skb_headlen(skb); 679 skb->len = first_len; 680 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); 681 682 return 0; 683 } 684 EXPORT_SYMBOL(ip6_fraglist_init); 685 686 void ip6_fraglist_prepare(struct sk_buff *skb, 687 struct ip6_fraglist_iter *iter) 688 { 689 struct sk_buff *frag = iter->frag; 690 unsigned int hlen = iter->hlen; 691 struct frag_hdr *fh; 692 693 frag->ip_summed = CHECKSUM_NONE; 694 skb_reset_transport_header(frag); 695 fh = __skb_push(frag, sizeof(struct frag_hdr)); 696 __skb_push(frag, hlen); 697 skb_reset_network_header(frag); 698 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen); 699 iter->offset += skb->len - hlen - sizeof(struct frag_hdr); 700 fh->nexthdr = iter->nexthdr; 701 fh->reserved = 0; 702 fh->frag_off = htons(iter->offset); 703 if (frag->next) 704 fh->frag_off |= htons(IP6_MF); 705 fh->identification = iter->frag_id; 706 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 707 ip6_copy_metadata(frag, skb); 708 } 709 EXPORT_SYMBOL(ip6_fraglist_prepare); 710 711 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu, 712 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr, 713 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state) 714 { 715 state->prevhdr = prevhdr; 716 state->nexthdr = nexthdr; 717 state->frag_id = frag_id; 718 719 state->hlen = hlen; 720 state->mtu = mtu; 721 722 state->left = skb->len - hlen; /* Space per frame */ 723 state->ptr = hlen; /* Where to start from */ 724 725 state->hroom = hdr_room; 726 state->troom = needed_tailroom; 727 728 state->offset = 0; 729 } 730 EXPORT_SYMBOL(ip6_frag_init); 731 732 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state) 733 { 734 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset; 735 struct sk_buff *frag; 736 struct frag_hdr *fh; 737 unsigned int len; 738 739 len = state->left; 740 /* IF: it doesn't fit, use 'mtu' - the data space left */ 741 if (len > state->mtu) 742 len = state->mtu; 743 /* IF: we are not sending up to and including the packet end 744 then align the next start on an eight byte boundary */ 745 if (len < state->left) 746 len &= ~7; 747 748 /* Allocate buffer */ 749 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) + 750 state->hroom + state->troom, GFP_ATOMIC); 751 if (!frag) 752 return ERR_PTR(-ENOMEM); 753 754 /* 755 * Set up data on packet 756 */ 757 758 ip6_copy_metadata(frag, skb); 759 skb_reserve(frag, state->hroom); 760 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr)); 761 skb_reset_network_header(frag); 762 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen); 763 frag->transport_header = (frag->network_header + state->hlen + 764 sizeof(struct frag_hdr)); 765 766 /* 767 * Charge the memory for the fragment to any owner 768 * it might possess 769 */ 770 if (skb->sk) 771 skb_set_owner_w(frag, skb->sk); 772 773 /* 774 * Copy the packet header into the new buffer. 775 */ 776 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen); 777 778 fragnexthdr_offset = skb_network_header(frag); 779 fragnexthdr_offset += prevhdr - skb_network_header(skb); 780 *fragnexthdr_offset = NEXTHDR_FRAGMENT; 781 782 /* 783 * Build fragment header. 784 */ 785 fh->nexthdr = state->nexthdr; 786 fh->reserved = 0; 787 fh->identification = state->frag_id; 788 789 /* 790 * Copy a block of the IP datagram. 791 */ 792 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag), 793 len)); 794 state->left -= len; 795 796 fh->frag_off = htons(state->offset); 797 if (state->left > 0) 798 fh->frag_off |= htons(IP6_MF); 799 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 800 801 state->ptr += len; 802 state->offset += len; 803 804 return frag; 805 } 806 EXPORT_SYMBOL(ip6_frag_next); 807 808 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 809 int (*output)(struct net *, struct sock *, struct sk_buff *)) 810 { 811 struct sk_buff *frag; 812 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 813 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? 814 inet6_sk(skb->sk) : NULL; 815 struct ip6_frag_state state; 816 unsigned int mtu, hlen, nexthdr_offset; 817 ktime_t tstamp = skb->tstamp; 818 int hroom, err = 0; 819 __be32 frag_id; 820 u8 *prevhdr, nexthdr = 0; 821 822 err = ip6_find_1stfragopt(skb, &prevhdr); 823 if (err < 0) 824 goto fail; 825 hlen = err; 826 nexthdr = *prevhdr; 827 nexthdr_offset = prevhdr - skb_network_header(skb); 828 829 mtu = ip6_skb_dst_mtu(skb); 830 831 /* We must not fragment if the socket is set to force MTU discovery 832 * or if the skb it not generated by a local socket. 833 */ 834 if (unlikely(!skb->ignore_df && skb->len > mtu)) 835 goto fail_toobig; 836 837 if (IP6CB(skb)->frag_max_size) { 838 if (IP6CB(skb)->frag_max_size > mtu) 839 goto fail_toobig; 840 841 /* don't send fragments larger than what we received */ 842 mtu = IP6CB(skb)->frag_max_size; 843 if (mtu < IPV6_MIN_MTU) 844 mtu = IPV6_MIN_MTU; 845 } 846 847 if (np && np->frag_size < mtu) { 848 if (np->frag_size) 849 mtu = np->frag_size; 850 } 851 if (mtu < hlen + sizeof(struct frag_hdr) + 8) 852 goto fail_toobig; 853 mtu -= hlen + sizeof(struct frag_hdr); 854 855 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, 856 &ipv6_hdr(skb)->saddr); 857 858 if (skb->ip_summed == CHECKSUM_PARTIAL && 859 (err = skb_checksum_help(skb))) 860 goto fail; 861 862 prevhdr = skb_network_header(skb) + nexthdr_offset; 863 hroom = LL_RESERVED_SPACE(rt->dst.dev); 864 if (skb_has_frag_list(skb)) { 865 unsigned int first_len = skb_pagelen(skb); 866 struct ip6_fraglist_iter iter; 867 struct sk_buff *frag2; 868 869 if (first_len - hlen > mtu || 870 ((first_len - hlen) & 7) || 871 skb_cloned(skb) || 872 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) 873 goto slow_path; 874 875 skb_walk_frags(skb, frag) { 876 /* Correct geometry. */ 877 if (frag->len > mtu || 878 ((frag->len & 7) && frag->next) || 879 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr))) 880 goto slow_path_clean; 881 882 /* Partially cloned skb? */ 883 if (skb_shared(frag)) 884 goto slow_path_clean; 885 886 BUG_ON(frag->sk); 887 if (skb->sk) { 888 frag->sk = skb->sk; 889 frag->destructor = sock_wfree; 890 } 891 skb->truesize -= frag->truesize; 892 } 893 894 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id, 895 &iter); 896 if (err < 0) 897 goto fail; 898 899 for (;;) { 900 /* Prepare header of the next frame, 901 * before previous one went down. */ 902 if (iter.frag) 903 ip6_fraglist_prepare(skb, &iter); 904 905 skb->tstamp = tstamp; 906 err = output(net, sk, skb); 907 if (!err) 908 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 909 IPSTATS_MIB_FRAGCREATES); 910 911 if (err || !iter.frag) 912 break; 913 914 skb = ip6_fraglist_next(&iter); 915 } 916 917 kfree(iter.tmp_hdr); 918 919 if (err == 0) { 920 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 921 IPSTATS_MIB_FRAGOKS); 922 return 0; 923 } 924 925 kfree_skb_list(iter.frag); 926 927 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 928 IPSTATS_MIB_FRAGFAILS); 929 return err; 930 931 slow_path_clean: 932 skb_walk_frags(skb, frag2) { 933 if (frag2 == frag) 934 break; 935 frag2->sk = NULL; 936 frag2->destructor = NULL; 937 skb->truesize += frag2->truesize; 938 } 939 } 940 941 slow_path: 942 /* 943 * Fragment the datagram. 944 */ 945 946 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom, 947 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id, 948 &state); 949 950 /* 951 * Keep copying data until we run out. 952 */ 953 954 while (state.left > 0) { 955 frag = ip6_frag_next(skb, &state); 956 if (IS_ERR(frag)) { 957 err = PTR_ERR(frag); 958 goto fail; 959 } 960 961 /* 962 * Put this fragment into the sending queue. 963 */ 964 frag->tstamp = tstamp; 965 err = output(net, sk, frag); 966 if (err) 967 goto fail; 968 969 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 970 IPSTATS_MIB_FRAGCREATES); 971 } 972 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 973 IPSTATS_MIB_FRAGOKS); 974 consume_skb(skb); 975 return err; 976 977 fail_toobig: 978 if (skb->sk && dst_allfrag(skb_dst(skb))) 979 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); 980 981 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 982 err = -EMSGSIZE; 983 984 fail: 985 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 986 IPSTATS_MIB_FRAGFAILS); 987 kfree_skb(skb); 988 return err; 989 } 990 991 static inline int ip6_rt_check(const struct rt6key *rt_key, 992 const struct in6_addr *fl_addr, 993 const struct in6_addr *addr_cache) 994 { 995 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 996 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache)); 997 } 998 999 static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 1000 struct dst_entry *dst, 1001 const struct flowi6 *fl6) 1002 { 1003 struct ipv6_pinfo *np = inet6_sk(sk); 1004 struct rt6_info *rt; 1005 1006 if (!dst) 1007 goto out; 1008 1009 if (dst->ops->family != AF_INET6) { 1010 dst_release(dst); 1011 return NULL; 1012 } 1013 1014 rt = (struct rt6_info *)dst; 1015 /* Yes, checking route validity in not connected 1016 * case is not very simple. Take into account, 1017 * that we do not support routing by source, TOS, 1018 * and MSG_DONTROUTE --ANK (980726) 1019 * 1020 * 1. ip6_rt_check(): If route was host route, 1021 * check that cached destination is current. 1022 * If it is network route, we still may 1023 * check its validity using saved pointer 1024 * to the last used address: daddr_cache. 1025 * We do not want to save whole address now, 1026 * (because main consumer of this service 1027 * is tcp, which has not this problem), 1028 * so that the last trick works only on connected 1029 * sockets. 1030 * 2. oif also should be the same. 1031 */ 1032 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || 1033 #ifdef CONFIG_IPV6_SUBTREES 1034 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || 1035 #endif 1036 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) && 1037 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) { 1038 dst_release(dst); 1039 dst = NULL; 1040 } 1041 1042 out: 1043 return dst; 1044 } 1045 1046 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, 1047 struct dst_entry **dst, struct flowi6 *fl6) 1048 { 1049 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 1050 struct neighbour *n; 1051 struct rt6_info *rt; 1052 #endif 1053 int err; 1054 int flags = 0; 1055 1056 /* The correct way to handle this would be to do 1057 * ip6_route_get_saddr, and then ip6_route_output; however, 1058 * the route-specific preferred source forces the 1059 * ip6_route_output call _before_ ip6_route_get_saddr. 1060 * 1061 * In source specific routing (no src=any default route), 1062 * ip6_route_output will fail given src=any saddr, though, so 1063 * that's why we try it again later. 1064 */ 1065 if (ipv6_addr_any(&fl6->saddr)) { 1066 struct fib6_info *from; 1067 struct rt6_info *rt; 1068 1069 *dst = ip6_route_output(net, sk, fl6); 1070 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; 1071 1072 rcu_read_lock(); 1073 from = rt ? rcu_dereference(rt->from) : NULL; 1074 err = ip6_route_get_saddr(net, from, &fl6->daddr, 1075 sk ? inet6_sk(sk)->srcprefs : 0, 1076 &fl6->saddr); 1077 rcu_read_unlock(); 1078 1079 if (err) 1080 goto out_err_release; 1081 1082 /* If we had an erroneous initial result, pretend it 1083 * never existed and let the SA-enabled version take 1084 * over. 1085 */ 1086 if ((*dst)->error) { 1087 dst_release(*dst); 1088 *dst = NULL; 1089 } 1090 1091 if (fl6->flowi6_oif) 1092 flags |= RT6_LOOKUP_F_IFACE; 1093 } 1094 1095 if (!*dst) 1096 *dst = ip6_route_output_flags(net, sk, fl6, flags); 1097 1098 err = (*dst)->error; 1099 if (err) 1100 goto out_err_release; 1101 1102 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 1103 /* 1104 * Here if the dst entry we've looked up 1105 * has a neighbour entry that is in the INCOMPLETE 1106 * state and the src address from the flow is 1107 * marked as OPTIMISTIC, we release the found 1108 * dst entry and replace it instead with the 1109 * dst entry of the nexthop router 1110 */ 1111 rt = (struct rt6_info *) *dst; 1112 rcu_read_lock_bh(); 1113 n = __ipv6_neigh_lookup_noref(rt->dst.dev, 1114 rt6_nexthop(rt, &fl6->daddr)); 1115 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; 1116 rcu_read_unlock_bh(); 1117 1118 if (err) { 1119 struct inet6_ifaddr *ifp; 1120 struct flowi6 fl_gw6; 1121 int redirect; 1122 1123 ifp = ipv6_get_ifaddr(net, &fl6->saddr, 1124 (*dst)->dev, 1); 1125 1126 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 1127 if (ifp) 1128 in6_ifa_put(ifp); 1129 1130 if (redirect) { 1131 /* 1132 * We need to get the dst entry for the 1133 * default router instead 1134 */ 1135 dst_release(*dst); 1136 memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); 1137 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); 1138 *dst = ip6_route_output(net, sk, &fl_gw6); 1139 err = (*dst)->error; 1140 if (err) 1141 goto out_err_release; 1142 } 1143 } 1144 #endif 1145 if (ipv6_addr_v4mapped(&fl6->saddr) && 1146 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) { 1147 err = -EAFNOSUPPORT; 1148 goto out_err_release; 1149 } 1150 1151 return 0; 1152 1153 out_err_release: 1154 dst_release(*dst); 1155 *dst = NULL; 1156 1157 if (err == -ENETUNREACH) 1158 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); 1159 return err; 1160 } 1161 1162 /** 1163 * ip6_dst_lookup - perform route lookup on flow 1164 * @net: Network namespace to perform lookup in 1165 * @sk: socket which provides route info 1166 * @dst: pointer to dst_entry * for result 1167 * @fl6: flow to lookup 1168 * 1169 * This function performs a route lookup on the given flow. 1170 * 1171 * It returns zero on success, or a standard errno code on error. 1172 */ 1173 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, 1174 struct flowi6 *fl6) 1175 { 1176 *dst = NULL; 1177 return ip6_dst_lookup_tail(net, sk, dst, fl6); 1178 } 1179 EXPORT_SYMBOL_GPL(ip6_dst_lookup); 1180 1181 /** 1182 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec 1183 * @net: Network namespace to perform lookup in 1184 * @sk: socket which provides route info 1185 * @fl6: flow to lookup 1186 * @final_dst: final destination address for ipsec lookup 1187 * 1188 * This function performs a route lookup on the given flow. 1189 * 1190 * It returns a valid dst pointer on success, or a pointer encoded 1191 * error code. 1192 */ 1193 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6, 1194 const struct in6_addr *final_dst) 1195 { 1196 struct dst_entry *dst = NULL; 1197 int err; 1198 1199 err = ip6_dst_lookup_tail(net, sk, &dst, fl6); 1200 if (err) 1201 return ERR_PTR(err); 1202 if (final_dst) 1203 fl6->daddr = *final_dst; 1204 1205 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0); 1206 } 1207 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); 1208 1209 /** 1210 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow 1211 * @sk: socket which provides the dst cache and route info 1212 * @fl6: flow to lookup 1213 * @final_dst: final destination address for ipsec lookup 1214 * @connected: whether @sk is connected or not 1215 * 1216 * This function performs a route lookup on the given flow with the 1217 * possibility of using the cached route in the socket if it is valid. 1218 * It will take the socket dst lock when operating on the dst cache. 1219 * As a result, this function can only be used in process context. 1220 * 1221 * In addition, for a connected socket, cache the dst in the socket 1222 * if the current cache is not valid. 1223 * 1224 * It returns a valid dst pointer on success, or a pointer encoded 1225 * error code. 1226 */ 1227 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 1228 const struct in6_addr *final_dst, 1229 bool connected) 1230 { 1231 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1232 1233 dst = ip6_sk_dst_check(sk, dst, fl6); 1234 if (dst) 1235 return dst; 1236 1237 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst); 1238 if (connected && !IS_ERR(dst)) 1239 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6); 1240 1241 return dst; 1242 } 1243 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); 1244 1245 /** 1246 * ip6_dst_lookup_tunnel - perform route lookup on tunnel 1247 * @skb: Packet for which lookup is done 1248 * @dev: Tunnel device 1249 * @net: Network namespace of tunnel device 1250 * @sock: Socket which provides route info 1251 * @saddr: Memory to store the src ip address 1252 * @info: Tunnel information 1253 * @protocol: IP protocol 1254 * @use_cache: Flag to enable cache usage 1255 * This function performs a route lookup on a tunnel 1256 * 1257 * It returns a valid dst pointer and stores src address to be used in 1258 * tunnel in param saddr on success, else a pointer encoded error code. 1259 */ 1260 1261 struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, 1262 struct net_device *dev, 1263 struct net *net, 1264 struct socket *sock, 1265 struct in6_addr *saddr, 1266 const struct ip_tunnel_info *info, 1267 u8 protocol, 1268 bool use_cache) 1269 { 1270 struct dst_entry *dst = NULL; 1271 #ifdef CONFIG_DST_CACHE 1272 struct dst_cache *dst_cache; 1273 #endif 1274 struct flowi6 fl6; 1275 __u8 prio; 1276 1277 #ifdef CONFIG_DST_CACHE 1278 dst_cache = (struct dst_cache *)&info->dst_cache; 1279 if (use_cache) { 1280 dst = dst_cache_get_ip6(dst_cache, saddr); 1281 if (dst) 1282 return dst; 1283 } 1284 #endif 1285 memset(&fl6, 0, sizeof(fl6)); 1286 fl6.flowi6_mark = skb->mark; 1287 fl6.flowi6_proto = protocol; 1288 fl6.daddr = info->key.u.ipv6.dst; 1289 fl6.saddr = info->key.u.ipv6.src; 1290 prio = info->key.tos; 1291 fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio), 1292 info->key.label); 1293 1294 dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, 1295 NULL); 1296 if (IS_ERR(dst)) { 1297 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); 1298 return ERR_PTR(-ENETUNREACH); 1299 } 1300 if (dst->dev == dev) { /* is this necessary? */ 1301 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); 1302 dst_release(dst); 1303 return ERR_PTR(-ELOOP); 1304 } 1305 #ifdef CONFIG_DST_CACHE 1306 if (use_cache) 1307 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); 1308 #endif 1309 *saddr = fl6.saddr; 1310 return dst; 1311 } 1312 EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel); 1313 1314 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1315 gfp_t gfp) 1316 { 1317 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1318 } 1319 1320 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 1321 gfp_t gfp) 1322 { 1323 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1324 } 1325 1326 static void ip6_append_data_mtu(unsigned int *mtu, 1327 int *maxfraglen, 1328 unsigned int fragheaderlen, 1329 struct sk_buff *skb, 1330 struct rt6_info *rt, 1331 unsigned int orig_mtu) 1332 { 1333 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { 1334 if (!skb) { 1335 /* first fragment, reserve header_len */ 1336 *mtu = orig_mtu - rt->dst.header_len; 1337 1338 } else { 1339 /* 1340 * this fragment is not first, the headers 1341 * space is regarded as data space. 1342 */ 1343 *mtu = orig_mtu; 1344 } 1345 *maxfraglen = ((*mtu - fragheaderlen) & ~7) 1346 + fragheaderlen - sizeof(struct frag_hdr); 1347 } 1348 } 1349 1350 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, 1351 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6, 1352 struct rt6_info *rt, struct flowi6 *fl6) 1353 { 1354 struct ipv6_pinfo *np = inet6_sk(sk); 1355 unsigned int mtu; 1356 struct ipv6_txoptions *opt = ipc6->opt; 1357 1358 /* 1359 * setup for corking 1360 */ 1361 if (opt) { 1362 if (WARN_ON(v6_cork->opt)) 1363 return -EINVAL; 1364 1365 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); 1366 if (unlikely(!v6_cork->opt)) 1367 return -ENOBUFS; 1368 1369 v6_cork->opt->tot_len = sizeof(*opt); 1370 v6_cork->opt->opt_flen = opt->opt_flen; 1371 v6_cork->opt->opt_nflen = opt->opt_nflen; 1372 1373 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt, 1374 sk->sk_allocation); 1375 if (opt->dst0opt && !v6_cork->opt->dst0opt) 1376 return -ENOBUFS; 1377 1378 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt, 1379 sk->sk_allocation); 1380 if (opt->dst1opt && !v6_cork->opt->dst1opt) 1381 return -ENOBUFS; 1382 1383 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt, 1384 sk->sk_allocation); 1385 if (opt->hopopt && !v6_cork->opt->hopopt) 1386 return -ENOBUFS; 1387 1388 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt, 1389 sk->sk_allocation); 1390 if (opt->srcrt && !v6_cork->opt->srcrt) 1391 return -ENOBUFS; 1392 1393 /* need source address above miyazawa*/ 1394 } 1395 dst_hold(&rt->dst); 1396 cork->base.dst = &rt->dst; 1397 cork->fl.u.ip6 = *fl6; 1398 v6_cork->hop_limit = ipc6->hlimit; 1399 v6_cork->tclass = ipc6->tclass; 1400 if (rt->dst.flags & DST_XFRM_TUNNEL) 1401 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1402 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); 1403 else 1404 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1405 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); 1406 if (np->frag_size < mtu) { 1407 if (np->frag_size) 1408 mtu = np->frag_size; 1409 } 1410 if (mtu < IPV6_MIN_MTU) 1411 return -EINVAL; 1412 cork->base.fragsize = mtu; 1413 cork->base.gso_size = ipc6->gso_size; 1414 cork->base.tx_flags = 0; 1415 cork->base.mark = ipc6->sockc.mark; 1416 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); 1417 1418 if (dst_allfrag(xfrm_dst_path(&rt->dst))) 1419 cork->base.flags |= IPCORK_ALLFRAG; 1420 cork->base.length = 0; 1421 1422 cork->base.transmit_time = ipc6->sockc.transmit_time; 1423 1424 return 0; 1425 } 1426 1427 static int __ip6_append_data(struct sock *sk, 1428 struct flowi6 *fl6, 1429 struct sk_buff_head *queue, 1430 struct inet_cork *cork, 1431 struct inet6_cork *v6_cork, 1432 struct page_frag *pfrag, 1433 int getfrag(void *from, char *to, int offset, 1434 int len, int odd, struct sk_buff *skb), 1435 void *from, int length, int transhdrlen, 1436 unsigned int flags, struct ipcm6_cookie *ipc6) 1437 { 1438 struct sk_buff *skb, *skb_prev = NULL; 1439 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; 1440 struct ubuf_info *uarg = NULL; 1441 int exthdrlen = 0; 1442 int dst_exthdrlen = 0; 1443 int hh_len; 1444 int copy; 1445 int err; 1446 int offset = 0; 1447 u32 tskey = 0; 1448 struct rt6_info *rt = (struct rt6_info *)cork->dst; 1449 struct ipv6_txoptions *opt = v6_cork->opt; 1450 int csummode = CHECKSUM_NONE; 1451 unsigned int maxnonfragsize, headersize; 1452 unsigned int wmem_alloc_delta = 0; 1453 bool paged, extra_uref = false; 1454 1455 skb = skb_peek_tail(queue); 1456 if (!skb) { 1457 exthdrlen = opt ? opt->opt_flen : 0; 1458 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; 1459 } 1460 1461 paged = !!cork->gso_size; 1462 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize; 1463 orig_mtu = mtu; 1464 1465 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && 1466 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) 1467 tskey = sk->sk_tskey++; 1468 1469 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1470 1471 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1472 (opt ? opt->opt_nflen : 0); 1473 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - 1474 sizeof(struct frag_hdr); 1475 1476 headersize = sizeof(struct ipv6hdr) + 1477 (opt ? opt->opt_flen + opt->opt_nflen : 0) + 1478 (dst_allfrag(&rt->dst) ? 1479 sizeof(struct frag_hdr) : 0) + 1480 rt->rt6i_nfheader_len; 1481 1482 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit 1483 * the first fragment 1484 */ 1485 if (headersize + transhdrlen > mtu) 1486 goto emsgsize; 1487 1488 if (cork->length + length > mtu - headersize && ipc6->dontfrag && 1489 (sk->sk_protocol == IPPROTO_UDP || 1490 sk->sk_protocol == IPPROTO_RAW)) { 1491 ipv6_local_rxpmtu(sk, fl6, mtu - headersize + 1492 sizeof(struct ipv6hdr)); 1493 goto emsgsize; 1494 } 1495 1496 if (ip6_sk_ignore_df(sk)) 1497 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; 1498 else 1499 maxnonfragsize = mtu; 1500 1501 if (cork->length + length > maxnonfragsize - headersize) { 1502 emsgsize: 1503 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0); 1504 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu); 1505 return -EMSGSIZE; 1506 } 1507 1508 /* CHECKSUM_PARTIAL only with no extension headers and when 1509 * we are not going to fragment 1510 */ 1511 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && 1512 headersize == sizeof(struct ipv6hdr) && 1513 length <= mtu - headersize && 1514 (!(flags & MSG_MORE) || cork->gso_size) && 1515 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) 1516 csummode = CHECKSUM_PARTIAL; 1517 1518 if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) { 1519 uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb)); 1520 if (!uarg) 1521 return -ENOBUFS; 1522 extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ 1523 if (rt->dst.dev->features & NETIF_F_SG && 1524 csummode == CHECKSUM_PARTIAL) { 1525 paged = true; 1526 } else { 1527 uarg->zerocopy = 0; 1528 skb_zcopy_set(skb, uarg, &extra_uref); 1529 } 1530 } 1531 1532 /* 1533 * Let's try using as much space as possible. 1534 * Use MTU if total length of the message fits into the MTU. 1535 * Otherwise, we need to reserve fragment header and 1536 * fragment alignment (= 8-15 octects, in total). 1537 * 1538 * Note that we may need to "move" the data from the tail 1539 * of the buffer to the new fragment when we split 1540 * the message. 1541 * 1542 * FIXME: It may be fragmented into multiple chunks 1543 * at once if non-fragmentable extension headers 1544 * are too large. 1545 * --yoshfuji 1546 */ 1547 1548 cork->length += length; 1549 if (!skb) 1550 goto alloc_new_skb; 1551 1552 while (length > 0) { 1553 /* Check if the remaining data fits into current packet. */ 1554 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1555 if (copy < length) 1556 copy = maxfraglen - skb->len; 1557 1558 if (copy <= 0) { 1559 char *data; 1560 unsigned int datalen; 1561 unsigned int fraglen; 1562 unsigned int fraggap; 1563 unsigned int alloclen, alloc_extra; 1564 unsigned int pagedlen; 1565 alloc_new_skb: 1566 /* There's no room in the current skb */ 1567 if (skb) 1568 fraggap = skb->len - maxfraglen; 1569 else 1570 fraggap = 0; 1571 /* update mtu and maxfraglen if necessary */ 1572 if (!skb || !skb_prev) 1573 ip6_append_data_mtu(&mtu, &maxfraglen, 1574 fragheaderlen, skb, rt, 1575 orig_mtu); 1576 1577 skb_prev = skb; 1578 1579 /* 1580 * If remaining data exceeds the mtu, 1581 * we know we need more fragment(s). 1582 */ 1583 datalen = length + fraggap; 1584 1585 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1586 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; 1587 fraglen = datalen + fragheaderlen; 1588 pagedlen = 0; 1589 1590 alloc_extra = hh_len; 1591 alloc_extra += dst_exthdrlen; 1592 alloc_extra += rt->dst.trailer_len; 1593 1594 /* We just reserve space for fragment header. 1595 * Note: this may be overallocation if the message 1596 * (without MSG_MORE) fits into the MTU. 1597 */ 1598 alloc_extra += sizeof(struct frag_hdr); 1599 1600 if ((flags & MSG_MORE) && 1601 !(rt->dst.dev->features&NETIF_F_SG)) 1602 alloclen = mtu; 1603 else if (!paged && 1604 (fraglen + alloc_extra < SKB_MAX_ALLOC || 1605 !(rt->dst.dev->features & NETIF_F_SG))) 1606 alloclen = fraglen; 1607 else { 1608 alloclen = min_t(int, fraglen, MAX_HEADER); 1609 pagedlen = fraglen - alloclen; 1610 } 1611 alloclen += alloc_extra; 1612 1613 if (datalen != length + fraggap) { 1614 /* 1615 * this is not the last fragment, the trailer 1616 * space is regarded as data space. 1617 */ 1618 datalen += rt->dst.trailer_len; 1619 } 1620 1621 fraglen = datalen + fragheaderlen; 1622 1623 copy = datalen - transhdrlen - fraggap - pagedlen; 1624 if (copy < 0) { 1625 err = -EINVAL; 1626 goto error; 1627 } 1628 if (transhdrlen) { 1629 skb = sock_alloc_send_skb(sk, alloclen, 1630 (flags & MSG_DONTWAIT), &err); 1631 } else { 1632 skb = NULL; 1633 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <= 1634 2 * sk->sk_sndbuf) 1635 skb = alloc_skb(alloclen, 1636 sk->sk_allocation); 1637 if (unlikely(!skb)) 1638 err = -ENOBUFS; 1639 } 1640 if (!skb) 1641 goto error; 1642 /* 1643 * Fill in the control structures 1644 */ 1645 skb->protocol = htons(ETH_P_IPV6); 1646 skb->ip_summed = csummode; 1647 skb->csum = 0; 1648 /* reserve for fragmentation and ipsec header */ 1649 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + 1650 dst_exthdrlen); 1651 1652 /* 1653 * Find where to start putting bytes 1654 */ 1655 data = skb_put(skb, fraglen - pagedlen); 1656 skb_set_network_header(skb, exthdrlen); 1657 data += fragheaderlen; 1658 skb->transport_header = (skb->network_header + 1659 fragheaderlen); 1660 if (fraggap) { 1661 skb->csum = skb_copy_and_csum_bits( 1662 skb_prev, maxfraglen, 1663 data + transhdrlen, fraggap); 1664 skb_prev->csum = csum_sub(skb_prev->csum, 1665 skb->csum); 1666 data += fraggap; 1667 pskb_trim_unique(skb_prev, maxfraglen); 1668 } 1669 if (copy > 0 && 1670 getfrag(from, data + transhdrlen, offset, 1671 copy, fraggap, skb) < 0) { 1672 err = -EFAULT; 1673 kfree_skb(skb); 1674 goto error; 1675 } 1676 1677 offset += copy; 1678 length -= copy + transhdrlen; 1679 transhdrlen = 0; 1680 exthdrlen = 0; 1681 dst_exthdrlen = 0; 1682 1683 /* Only the initial fragment is time stamped */ 1684 skb_shinfo(skb)->tx_flags = cork->tx_flags; 1685 cork->tx_flags = 0; 1686 skb_shinfo(skb)->tskey = tskey; 1687 tskey = 0; 1688 skb_zcopy_set(skb, uarg, &extra_uref); 1689 1690 if ((flags & MSG_CONFIRM) && !skb_prev) 1691 skb_set_dst_pending_confirm(skb, 1); 1692 1693 /* 1694 * Put the packet on the pending queue 1695 */ 1696 if (!skb->destructor) { 1697 skb->destructor = sock_wfree; 1698 skb->sk = sk; 1699 wmem_alloc_delta += skb->truesize; 1700 } 1701 __skb_queue_tail(queue, skb); 1702 continue; 1703 } 1704 1705 if (copy > length) 1706 copy = length; 1707 1708 if (!(rt->dst.dev->features&NETIF_F_SG) && 1709 skb_tailroom(skb) >= copy) { 1710 unsigned int off; 1711 1712 off = skb->len; 1713 if (getfrag(from, skb_put(skb, copy), 1714 offset, copy, off, skb) < 0) { 1715 __skb_trim(skb, off); 1716 err = -EFAULT; 1717 goto error; 1718 } 1719 } else if (!uarg || !uarg->zerocopy) { 1720 int i = skb_shinfo(skb)->nr_frags; 1721 1722 err = -ENOMEM; 1723 if (!sk_page_frag_refill(sk, pfrag)) 1724 goto error; 1725 1726 if (!skb_can_coalesce(skb, i, pfrag->page, 1727 pfrag->offset)) { 1728 err = -EMSGSIZE; 1729 if (i == MAX_SKB_FRAGS) 1730 goto error; 1731 1732 __skb_fill_page_desc(skb, i, pfrag->page, 1733 pfrag->offset, 0); 1734 skb_shinfo(skb)->nr_frags = ++i; 1735 get_page(pfrag->page); 1736 } 1737 copy = min_t(int, copy, pfrag->size - pfrag->offset); 1738 if (getfrag(from, 1739 page_address(pfrag->page) + pfrag->offset, 1740 offset, copy, skb->len, skb) < 0) 1741 goto error_efault; 1742 1743 pfrag->offset += copy; 1744 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); 1745 skb->len += copy; 1746 skb->data_len += copy; 1747 skb->truesize += copy; 1748 wmem_alloc_delta += copy; 1749 } else { 1750 err = skb_zerocopy_iter_dgram(skb, from, copy); 1751 if (err < 0) 1752 goto error; 1753 } 1754 offset += copy; 1755 length -= copy; 1756 } 1757 1758 if (wmem_alloc_delta) 1759 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 1760 return 0; 1761 1762 error_efault: 1763 err = -EFAULT; 1764 error: 1765 net_zcopy_put_abort(uarg, extra_uref); 1766 cork->length -= length; 1767 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1768 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 1769 return err; 1770 } 1771 1772 int ip6_append_data(struct sock *sk, 1773 int getfrag(void *from, char *to, int offset, int len, 1774 int odd, struct sk_buff *skb), 1775 void *from, int length, int transhdrlen, 1776 struct ipcm6_cookie *ipc6, struct flowi6 *fl6, 1777 struct rt6_info *rt, unsigned int flags) 1778 { 1779 struct inet_sock *inet = inet_sk(sk); 1780 struct ipv6_pinfo *np = inet6_sk(sk); 1781 int exthdrlen; 1782 int err; 1783 1784 if (flags&MSG_PROBE) 1785 return 0; 1786 if (skb_queue_empty(&sk->sk_write_queue)) { 1787 /* 1788 * setup for corking 1789 */ 1790 err = ip6_setup_cork(sk, &inet->cork, &np->cork, 1791 ipc6, rt, fl6); 1792 if (err) 1793 return err; 1794 1795 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 1796 length += exthdrlen; 1797 transhdrlen += exthdrlen; 1798 } else { 1799 fl6 = &inet->cork.fl.u.ip6; 1800 transhdrlen = 0; 1801 } 1802 1803 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base, 1804 &np->cork, sk_page_frag(sk), getfrag, 1805 from, length, transhdrlen, flags, ipc6); 1806 } 1807 EXPORT_SYMBOL_GPL(ip6_append_data); 1808 1809 static void ip6_cork_release(struct inet_cork_full *cork, 1810 struct inet6_cork *v6_cork) 1811 { 1812 if (v6_cork->opt) { 1813 kfree(v6_cork->opt->dst0opt); 1814 kfree(v6_cork->opt->dst1opt); 1815 kfree(v6_cork->opt->hopopt); 1816 kfree(v6_cork->opt->srcrt); 1817 kfree(v6_cork->opt); 1818 v6_cork->opt = NULL; 1819 } 1820 1821 if (cork->base.dst) { 1822 dst_release(cork->base.dst); 1823 cork->base.dst = NULL; 1824 cork->base.flags &= ~IPCORK_ALLFRAG; 1825 } 1826 memset(&cork->fl, 0, sizeof(cork->fl)); 1827 } 1828 1829 struct sk_buff *__ip6_make_skb(struct sock *sk, 1830 struct sk_buff_head *queue, 1831 struct inet_cork_full *cork, 1832 struct inet6_cork *v6_cork) 1833 { 1834 struct sk_buff *skb, *tmp_skb; 1835 struct sk_buff **tail_skb; 1836 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 1837 struct ipv6_pinfo *np = inet6_sk(sk); 1838 struct net *net = sock_net(sk); 1839 struct ipv6hdr *hdr; 1840 struct ipv6_txoptions *opt = v6_cork->opt; 1841 struct rt6_info *rt = (struct rt6_info *)cork->base.dst; 1842 struct flowi6 *fl6 = &cork->fl.u.ip6; 1843 unsigned char proto = fl6->flowi6_proto; 1844 1845 skb = __skb_dequeue(queue); 1846 if (!skb) 1847 goto out; 1848 tail_skb = &(skb_shinfo(skb)->frag_list); 1849 1850 /* move skb->data to ip header from ext header */ 1851 if (skb->data < skb_network_header(skb)) 1852 __skb_pull(skb, skb_network_offset(skb)); 1853 while ((tmp_skb = __skb_dequeue(queue)) != NULL) { 1854 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1855 *tail_skb = tmp_skb; 1856 tail_skb = &(tmp_skb->next); 1857 skb->len += tmp_skb->len; 1858 skb->data_len += tmp_skb->len; 1859 skb->truesize += tmp_skb->truesize; 1860 tmp_skb->destructor = NULL; 1861 tmp_skb->sk = NULL; 1862 } 1863 1864 /* Allow local fragmentation. */ 1865 skb->ignore_df = ip6_sk_ignore_df(sk); 1866 1867 *final_dst = fl6->daddr; 1868 __skb_pull(skb, skb_network_header_len(skb)); 1869 if (opt && opt->opt_flen) 1870 ipv6_push_frag_opts(skb, opt, &proto); 1871 if (opt && opt->opt_nflen) 1872 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr); 1873 1874 skb_push(skb, sizeof(struct ipv6hdr)); 1875 skb_reset_network_header(skb); 1876 hdr = ipv6_hdr(skb); 1877 1878 ip6_flow_hdr(hdr, v6_cork->tclass, 1879 ip6_make_flowlabel(net, skb, fl6->flowlabel, 1880 ip6_autoflowlabel(net, np), fl6)); 1881 hdr->hop_limit = v6_cork->hop_limit; 1882 hdr->nexthdr = proto; 1883 hdr->saddr = fl6->saddr; 1884 hdr->daddr = *final_dst; 1885 1886 skb->priority = sk->sk_priority; 1887 skb->mark = cork->base.mark; 1888 1889 skb->tstamp = cork->base.transmit_time; 1890 1891 skb_dst_set(skb, dst_clone(&rt->dst)); 1892 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 1893 if (proto == IPPROTO_ICMPV6) { 1894 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1895 1896 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); 1897 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 1898 } 1899 1900 ip6_cork_release(cork, v6_cork); 1901 out: 1902 return skb; 1903 } 1904 1905 int ip6_send_skb(struct sk_buff *skb) 1906 { 1907 struct net *net = sock_net(skb->sk); 1908 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 1909 int err; 1910 1911 err = ip6_local_out(net, skb->sk, skb); 1912 if (err) { 1913 if (err > 0) 1914 err = net_xmit_errno(err); 1915 if (err) 1916 IP6_INC_STATS(net, rt->rt6i_idev, 1917 IPSTATS_MIB_OUTDISCARDS); 1918 } 1919 1920 return err; 1921 } 1922 1923 int ip6_push_pending_frames(struct sock *sk) 1924 { 1925 struct sk_buff *skb; 1926 1927 skb = ip6_finish_skb(sk); 1928 if (!skb) 1929 return 0; 1930 1931 return ip6_send_skb(skb); 1932 } 1933 EXPORT_SYMBOL_GPL(ip6_push_pending_frames); 1934 1935 static void __ip6_flush_pending_frames(struct sock *sk, 1936 struct sk_buff_head *queue, 1937 struct inet_cork_full *cork, 1938 struct inet6_cork *v6_cork) 1939 { 1940 struct sk_buff *skb; 1941 1942 while ((skb = __skb_dequeue_tail(queue)) != NULL) { 1943 if (skb_dst(skb)) 1944 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 1945 IPSTATS_MIB_OUTDISCARDS); 1946 kfree_skb(skb); 1947 } 1948 1949 ip6_cork_release(cork, v6_cork); 1950 } 1951 1952 void ip6_flush_pending_frames(struct sock *sk) 1953 { 1954 __ip6_flush_pending_frames(sk, &sk->sk_write_queue, 1955 &inet_sk(sk)->cork, &inet6_sk(sk)->cork); 1956 } 1957 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); 1958 1959 struct sk_buff *ip6_make_skb(struct sock *sk, 1960 int getfrag(void *from, char *to, int offset, 1961 int len, int odd, struct sk_buff *skb), 1962 void *from, int length, int transhdrlen, 1963 struct ipcm6_cookie *ipc6, struct flowi6 *fl6, 1964 struct rt6_info *rt, unsigned int flags, 1965 struct inet_cork_full *cork) 1966 { 1967 struct inet6_cork v6_cork; 1968 struct sk_buff_head queue; 1969 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 1970 int err; 1971 1972 if (flags & MSG_PROBE) 1973 return NULL; 1974 1975 __skb_queue_head_init(&queue); 1976 1977 cork->base.flags = 0; 1978 cork->base.addr = 0; 1979 cork->base.opt = NULL; 1980 cork->base.dst = NULL; 1981 v6_cork.opt = NULL; 1982 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6); 1983 if (err) { 1984 ip6_cork_release(cork, &v6_cork); 1985 return ERR_PTR(err); 1986 } 1987 if (ipc6->dontfrag < 0) 1988 ipc6->dontfrag = inet6_sk(sk)->dontfrag; 1989 1990 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork, 1991 ¤t->task_frag, getfrag, from, 1992 length + exthdrlen, transhdrlen + exthdrlen, 1993 flags, ipc6); 1994 if (err) { 1995 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork); 1996 return ERR_PTR(err); 1997 } 1998 1999 return __ip6_make_skb(sk, &queue, cork, &v6_cork); 2000 } 2001