1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * IPv6 output functions 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on linux/net/ipv4/ip_output.c 10 * 11 * Changes: 12 * A.N.Kuznetsov : airthmetics in fragmentation. 13 * extension headers are implemented. 14 * route changes now work. 15 * ip6_forward does not confuse sniffers. 16 * etc. 17 * 18 * H. von Brand : Added missing #include <linux/string.h> 19 * Imran Patel : frag id should be in NBO 20 * Kazunori MIYAZAWA @USAGI 21 * : add ip6_append_data and related functions 22 * for datagram xmit 23 */ 24 25 #include <linux/errno.h> 26 #include <linux/kernel.h> 27 #include <linux/string.h> 28 #include <linux/socket.h> 29 #include <linux/net.h> 30 #include <linux/netdevice.h> 31 #include <linux/if_arp.h> 32 #include <linux/in6.h> 33 #include <linux/tcp.h> 34 #include <linux/route.h> 35 #include <linux/module.h> 36 #include <linux/slab.h> 37 38 #include <linux/bpf-cgroup.h> 39 #include <linux/netfilter.h> 40 #include <linux/netfilter_ipv6.h> 41 42 #include <net/sock.h> 43 #include <net/snmp.h> 44 45 #include <net/ipv6.h> 46 #include <net/ndisc.h> 47 #include <net/protocol.h> 48 #include <net/ip6_route.h> 49 #include <net/addrconf.h> 50 #include <net/rawv6.h> 51 #include <net/icmp.h> 52 #include <net/xfrm.h> 53 #include <net/checksum.h> 54 #include <linux/mroute6.h> 55 #include <net/l3mdev.h> 56 #include <net/lwtunnel.h> 57 #include <net/ip_tunnels.h> 58 59 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 60 { 61 struct dst_entry *dst = skb_dst(skb); 62 struct net_device *dev = dst->dev; 63 unsigned int hh_len = LL_RESERVED_SPACE(dev); 64 int delta = hh_len - skb_headroom(skb); 65 const struct in6_addr *nexthop; 66 struct neighbour *neigh; 67 int ret; 68 69 /* Be paranoid, rather than too clever. */ 70 if (unlikely(delta > 0) && dev->header_ops) { 71 /* pskb_expand_head() might crash, if skb is shared */ 72 if (skb_shared(skb)) { 73 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); 74 75 if (likely(nskb)) { 76 if (skb->sk) 77 skb_set_owner_w(skb, skb->sk); 78 consume_skb(skb); 79 } else { 80 kfree_skb(skb); 81 } 82 skb = nskb; 83 } 84 if (skb && 85 pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { 86 kfree_skb(skb); 87 skb = NULL; 88 } 89 if (!skb) { 90 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); 91 return -ENOMEM; 92 } 93 } 94 95 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 96 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 97 98 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && 99 ((mroute6_is_socket(net, skb) && 100 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 101 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 102 &ipv6_hdr(skb)->saddr))) { 103 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 104 105 /* Do not check for IFF_ALLMULTI; multicast routing 106 is not supported in any case. 107 */ 108 if (newskb) 109 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 110 net, sk, newskb, NULL, newskb->dev, 111 dev_loopback_xmit); 112 113 if (ipv6_hdr(skb)->hop_limit == 0) { 114 IP6_INC_STATS(net, idev, 115 IPSTATS_MIB_OUTDISCARDS); 116 kfree_skb(skb); 117 return 0; 118 } 119 } 120 121 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len); 122 123 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= 124 IPV6_ADDR_SCOPE_NODELOCAL && 125 !(dev->flags & IFF_LOOPBACK)) { 126 kfree_skb(skb); 127 return 0; 128 } 129 } 130 131 if (lwtunnel_xmit_redirect(dst->lwtstate)) { 132 int res = lwtunnel_xmit(skb); 133 134 if (res < 0 || res == LWTUNNEL_XMIT_DONE) 135 return res; 136 } 137 138 rcu_read_lock_bh(); 139 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); 140 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); 141 if (unlikely(!neigh)) 142 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); 143 if (!IS_ERR(neigh)) { 144 sock_confirm_neigh(skb, neigh); 145 ret = neigh_output(neigh, skb, false); 146 rcu_read_unlock_bh(); 147 return ret; 148 } 149 rcu_read_unlock_bh(); 150 151 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 152 kfree_skb(skb); 153 return -EINVAL; 154 } 155 156 static int 157 ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, 158 struct sk_buff *skb, unsigned int mtu) 159 { 160 struct sk_buff *segs, *nskb; 161 netdev_features_t features; 162 int ret = 0; 163 164 /* Please see corresponding comment in ip_finish_output_gso 165 * describing the cases where GSO segment length exceeds the 166 * egress MTU. 167 */ 168 features = netif_skb_features(skb); 169 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); 170 if (IS_ERR_OR_NULL(segs)) { 171 kfree_skb(skb); 172 return -ENOMEM; 173 } 174 175 consume_skb(skb); 176 177 skb_list_walk_safe(segs, segs, nskb) { 178 int err; 179 180 skb_mark_not_on_list(segs); 181 err = ip6_fragment(net, sk, segs, ip6_finish_output2); 182 if (err && ret == 0) 183 ret = err; 184 } 185 186 return ret; 187 } 188 189 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 190 { 191 unsigned int mtu; 192 193 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 194 /* Policy lookup after SNAT yielded a new policy */ 195 if (skb_dst(skb)->xfrm) { 196 IPCB(skb)->flags |= IPSKB_REROUTED; 197 return dst_output(net, sk, skb); 198 } 199 #endif 200 201 mtu = ip6_skb_dst_mtu(skb); 202 if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu)) 203 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); 204 205 if ((skb->len > mtu && !skb_is_gso(skb)) || 206 dst_allfrag(skb_dst(skb)) || 207 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) 208 return ip6_fragment(net, sk, skb, ip6_finish_output2); 209 else 210 return ip6_finish_output2(net, sk, skb); 211 } 212 213 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 214 { 215 int ret; 216 217 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); 218 switch (ret) { 219 case NET_XMIT_SUCCESS: 220 return __ip6_finish_output(net, sk, skb); 221 case NET_XMIT_CN: 222 return __ip6_finish_output(net, sk, skb) ? : ret; 223 default: 224 kfree_skb(skb); 225 return ret; 226 } 227 } 228 229 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 230 { 231 struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; 232 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 233 234 skb->protocol = htons(ETH_P_IPV6); 235 skb->dev = dev; 236 237 if (unlikely(idev->cnf.disable_ipv6)) { 238 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 239 kfree_skb(skb); 240 return 0; 241 } 242 243 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, 244 net, sk, skb, indev, dev, 245 ip6_finish_output, 246 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 247 } 248 EXPORT_SYMBOL(ip6_output); 249 250 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) 251 { 252 if (!np->autoflowlabel_set) 253 return ip6_default_np_autolabel(net); 254 else 255 return np->autoflowlabel; 256 } 257 258 /* 259 * xmit an sk_buff (used by TCP, SCTP and DCCP) 260 * Note : socket lock is not held for SYNACK packets, but might be modified 261 * by calls to skb_set_owner_w() and ipv6_local_error(), 262 * which are using proper atomic operations or spinlocks. 263 */ 264 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, 265 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority) 266 { 267 struct net *net = sock_net(sk); 268 const struct ipv6_pinfo *np = inet6_sk(sk); 269 struct in6_addr *first_hop = &fl6->daddr; 270 struct dst_entry *dst = skb_dst(skb); 271 unsigned int head_room; 272 struct ipv6hdr *hdr; 273 u8 proto = fl6->flowi6_proto; 274 int seg_len = skb->len; 275 int hlimit = -1; 276 u32 mtu; 277 278 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); 279 if (opt) 280 head_room += opt->opt_nflen + opt->opt_flen; 281 282 if (unlikely(skb_headroom(skb) < head_room)) { 283 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 284 if (!skb2) { 285 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 286 IPSTATS_MIB_OUTDISCARDS); 287 kfree_skb(skb); 288 return -ENOBUFS; 289 } 290 if (skb->sk) 291 skb_set_owner_w(skb2, skb->sk); 292 consume_skb(skb); 293 skb = skb2; 294 } 295 296 if (opt) { 297 seg_len += opt->opt_nflen + opt->opt_flen; 298 299 if (opt->opt_flen) 300 ipv6_push_frag_opts(skb, opt, &proto); 301 302 if (opt->opt_nflen) 303 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, 304 &fl6->saddr); 305 } 306 307 skb_push(skb, sizeof(struct ipv6hdr)); 308 skb_reset_network_header(skb); 309 hdr = ipv6_hdr(skb); 310 311 /* 312 * Fill in the IPv6 header 313 */ 314 if (np) 315 hlimit = np->hop_limit; 316 if (hlimit < 0) 317 hlimit = ip6_dst_hoplimit(dst); 318 319 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, 320 ip6_autoflowlabel(net, np), fl6)); 321 322 hdr->payload_len = htons(seg_len); 323 hdr->nexthdr = proto; 324 hdr->hop_limit = hlimit; 325 326 hdr->saddr = fl6->saddr; 327 hdr->daddr = *first_hop; 328 329 skb->protocol = htons(ETH_P_IPV6); 330 skb->priority = priority; 331 skb->mark = mark; 332 333 mtu = dst_mtu(dst); 334 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { 335 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), 336 IPSTATS_MIB_OUT, skb->len); 337 338 /* if egress device is enslaved to an L3 master device pass the 339 * skb to its handler for processing 340 */ 341 skb = l3mdev_ip6_out((struct sock *)sk, skb); 342 if (unlikely(!skb)) 343 return 0; 344 345 /* hooks should never assume socket lock is held. 346 * we promote our socket to non const 347 */ 348 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, 349 net, (struct sock *)sk, skb, NULL, dst->dev, 350 dst_output); 351 } 352 353 skb->dev = dst->dev; 354 /* ipv6_local_error() does not require socket lock, 355 * we promote our socket to non const 356 */ 357 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); 358 359 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); 360 kfree_skb(skb); 361 return -EMSGSIZE; 362 } 363 EXPORT_SYMBOL(ip6_xmit); 364 365 static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 366 { 367 struct ip6_ra_chain *ra; 368 struct sock *last = NULL; 369 370 read_lock(&ip6_ra_lock); 371 for (ra = ip6_ra_chain; ra; ra = ra->next) { 372 struct sock *sk = ra->sk; 373 if (sk && ra->sel == sel && 374 (!sk->sk_bound_dev_if || 375 sk->sk_bound_dev_if == skb->dev->ifindex)) { 376 struct ipv6_pinfo *np = inet6_sk(sk); 377 378 if (np && np->rtalert_isolate && 379 !net_eq(sock_net(sk), dev_net(skb->dev))) { 380 continue; 381 } 382 if (last) { 383 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 384 if (skb2) 385 rawv6_rcv(last, skb2); 386 } 387 last = sk; 388 } 389 } 390 391 if (last) { 392 rawv6_rcv(last, skb); 393 read_unlock(&ip6_ra_lock); 394 return 1; 395 } 396 read_unlock(&ip6_ra_lock); 397 return 0; 398 } 399 400 static int ip6_forward_proxy_check(struct sk_buff *skb) 401 { 402 struct ipv6hdr *hdr = ipv6_hdr(skb); 403 u8 nexthdr = hdr->nexthdr; 404 __be16 frag_off; 405 int offset; 406 407 if (ipv6_ext_hdr(nexthdr)) { 408 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); 409 if (offset < 0) 410 return 0; 411 } else 412 offset = sizeof(struct ipv6hdr); 413 414 if (nexthdr == IPPROTO_ICMPV6) { 415 struct icmp6hdr *icmp6; 416 417 if (!pskb_may_pull(skb, (skb_network_header(skb) + 418 offset + 1 - skb->data))) 419 return 0; 420 421 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 422 423 switch (icmp6->icmp6_type) { 424 case NDISC_ROUTER_SOLICITATION: 425 case NDISC_ROUTER_ADVERTISEMENT: 426 case NDISC_NEIGHBOUR_SOLICITATION: 427 case NDISC_NEIGHBOUR_ADVERTISEMENT: 428 case NDISC_REDIRECT: 429 /* For reaction involving unicast neighbor discovery 430 * message destined to the proxied address, pass it to 431 * input function. 432 */ 433 return 1; 434 default: 435 break; 436 } 437 } 438 439 /* 440 * The proxying router can't forward traffic sent to a link-local 441 * address, so signal the sender and discard the packet. This 442 * behavior is clarified by the MIPv6 specification. 443 */ 444 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 445 dst_link_failure(skb); 446 return -1; 447 } 448 449 return 0; 450 } 451 452 static inline int ip6_forward_finish(struct net *net, struct sock *sk, 453 struct sk_buff *skb) 454 { 455 struct dst_entry *dst = skb_dst(skb); 456 457 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 458 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); 459 460 #ifdef CONFIG_NET_SWITCHDEV 461 if (skb->offload_l3_fwd_mark) { 462 consume_skb(skb); 463 return 0; 464 } 465 #endif 466 467 skb->tstamp = 0; 468 return dst_output(net, sk, skb); 469 } 470 471 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) 472 { 473 if (skb->len <= mtu) 474 return false; 475 476 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ 477 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) 478 return true; 479 480 if (skb->ignore_df) 481 return false; 482 483 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) 484 return false; 485 486 return true; 487 } 488 489 int ip6_forward(struct sk_buff *skb) 490 { 491 struct inet6_dev *idev = __in6_dev_get_safely(skb->dev); 492 struct dst_entry *dst = skb_dst(skb); 493 struct ipv6hdr *hdr = ipv6_hdr(skb); 494 struct inet6_skb_parm *opt = IP6CB(skb); 495 struct net *net = dev_net(dst->dev); 496 u32 mtu; 497 498 if (net->ipv6.devconf_all->forwarding == 0) 499 goto error; 500 501 if (skb->pkt_type != PACKET_HOST) 502 goto drop; 503 504 if (unlikely(skb->sk)) 505 goto drop; 506 507 if (skb_warn_if_lro(skb)) 508 goto drop; 509 510 if (!net->ipv6.devconf_all->disable_policy && 511 !idev->cnf.disable_policy && 512 !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 513 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 514 goto drop; 515 } 516 517 skb_forward_csum(skb); 518 519 /* 520 * We DO NOT make any processing on 521 * RA packets, pushing them to user level AS IS 522 * without ane WARRANTY that application will be able 523 * to interpret them. The reason is that we 524 * cannot make anything clever here. 525 * 526 * We are not end-node, so that if packet contains 527 * AH/ESP, we cannot make anything. 528 * Defragmentation also would be mistake, RA packets 529 * cannot be fragmented, because there is no warranty 530 * that different fragments will go along one path. --ANK 531 */ 532 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { 533 if (ip6_call_ra_chain(skb, ntohs(opt->ra))) 534 return 0; 535 } 536 537 /* 538 * check and decrement ttl 539 */ 540 if (hdr->hop_limit <= 1) { 541 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 542 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); 543 544 kfree_skb(skb); 545 return -ETIMEDOUT; 546 } 547 548 /* XXX: idev->cnf.proxy_ndp? */ 549 if (net->ipv6.devconf_all->proxy_ndp && 550 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 551 int proxied = ip6_forward_proxy_check(skb); 552 if (proxied > 0) 553 return ip6_input(skb); 554 else if (proxied < 0) { 555 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 556 goto drop; 557 } 558 } 559 560 if (!xfrm6_route_forward(skb)) { 561 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 562 goto drop; 563 } 564 dst = skb_dst(skb); 565 566 /* IPv6 specs say nothing about it, but it is clear that we cannot 567 send redirects to source routed frames. 568 We don't send redirects to frames decapsulated from IPsec. 569 */ 570 if (IP6CB(skb)->iif == dst->dev->ifindex && 571 opt->srcrt == 0 && !skb_sec_path(skb)) { 572 struct in6_addr *target = NULL; 573 struct inet_peer *peer; 574 struct rt6_info *rt; 575 576 /* 577 * incoming and outgoing devices are the same 578 * send a redirect. 579 */ 580 581 rt = (struct rt6_info *) dst; 582 if (rt->rt6i_flags & RTF_GATEWAY) 583 target = &rt->rt6i_gateway; 584 else 585 target = &hdr->daddr; 586 587 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1); 588 589 /* Limit redirects both by destination (here) 590 and by source (inside ndisc_send_redirect) 591 */ 592 if (inet_peer_xrlim_allow(peer, 1*HZ)) 593 ndisc_send_redirect(skb, target); 594 if (peer) 595 inet_putpeer(peer); 596 } else { 597 int addrtype = ipv6_addr_type(&hdr->saddr); 598 599 /* This check is security critical. */ 600 if (addrtype == IPV6_ADDR_ANY || 601 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 602 goto error; 603 if (addrtype & IPV6_ADDR_LINKLOCAL) { 604 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 605 ICMPV6_NOT_NEIGHBOUR, 0); 606 goto error; 607 } 608 } 609 610 mtu = ip6_dst_mtu_forward(dst); 611 if (mtu < IPV6_MIN_MTU) 612 mtu = IPV6_MIN_MTU; 613 614 if (ip6_pkt_too_big(skb, mtu)) { 615 /* Again, force OUTPUT device used as source address */ 616 skb->dev = dst->dev; 617 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 618 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS); 619 __IP6_INC_STATS(net, ip6_dst_idev(dst), 620 IPSTATS_MIB_FRAGFAILS); 621 kfree_skb(skb); 622 return -EMSGSIZE; 623 } 624 625 if (skb_cow(skb, dst->dev->hard_header_len)) { 626 __IP6_INC_STATS(net, ip6_dst_idev(dst), 627 IPSTATS_MIB_OUTDISCARDS); 628 goto drop; 629 } 630 631 hdr = ipv6_hdr(skb); 632 633 /* Mangling hops number delayed to point after skb COW */ 634 635 hdr->hop_limit--; 636 637 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 638 net, NULL, skb, skb->dev, dst->dev, 639 ip6_forward_finish); 640 641 error: 642 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); 643 drop: 644 kfree_skb(skb); 645 return -EINVAL; 646 } 647 648 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 649 { 650 to->pkt_type = from->pkt_type; 651 to->priority = from->priority; 652 to->protocol = from->protocol; 653 skb_dst_drop(to); 654 skb_dst_set(to, dst_clone(skb_dst(from))); 655 to->dev = from->dev; 656 to->mark = from->mark; 657 658 skb_copy_hash(to, from); 659 660 #ifdef CONFIG_NET_SCHED 661 to->tc_index = from->tc_index; 662 #endif 663 nf_copy(to, from); 664 skb_ext_copy(to, from); 665 skb_copy_secmark(to, from); 666 } 667 668 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr, 669 u8 nexthdr, __be32 frag_id, 670 struct ip6_fraglist_iter *iter) 671 { 672 unsigned int first_len; 673 struct frag_hdr *fh; 674 675 /* BUILD HEADER */ 676 *prevhdr = NEXTHDR_FRAGMENT; 677 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 678 if (!iter->tmp_hdr) 679 return -ENOMEM; 680 681 iter->frag = skb_shinfo(skb)->frag_list; 682 skb_frag_list_init(skb); 683 684 iter->offset = 0; 685 iter->hlen = hlen; 686 iter->frag_id = frag_id; 687 iter->nexthdr = nexthdr; 688 689 __skb_pull(skb, hlen); 690 fh = __skb_push(skb, sizeof(struct frag_hdr)); 691 __skb_push(skb, hlen); 692 skb_reset_network_header(skb); 693 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen); 694 695 fh->nexthdr = nexthdr; 696 fh->reserved = 0; 697 fh->frag_off = htons(IP6_MF); 698 fh->identification = frag_id; 699 700 first_len = skb_pagelen(skb); 701 skb->data_len = first_len - skb_headlen(skb); 702 skb->len = first_len; 703 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); 704 705 return 0; 706 } 707 EXPORT_SYMBOL(ip6_fraglist_init); 708 709 void ip6_fraglist_prepare(struct sk_buff *skb, 710 struct ip6_fraglist_iter *iter) 711 { 712 struct sk_buff *frag = iter->frag; 713 unsigned int hlen = iter->hlen; 714 struct frag_hdr *fh; 715 716 frag->ip_summed = CHECKSUM_NONE; 717 skb_reset_transport_header(frag); 718 fh = __skb_push(frag, sizeof(struct frag_hdr)); 719 __skb_push(frag, hlen); 720 skb_reset_network_header(frag); 721 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen); 722 iter->offset += skb->len - hlen - sizeof(struct frag_hdr); 723 fh->nexthdr = iter->nexthdr; 724 fh->reserved = 0; 725 fh->frag_off = htons(iter->offset); 726 if (frag->next) 727 fh->frag_off |= htons(IP6_MF); 728 fh->identification = iter->frag_id; 729 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 730 ip6_copy_metadata(frag, skb); 731 } 732 EXPORT_SYMBOL(ip6_fraglist_prepare); 733 734 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu, 735 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr, 736 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state) 737 { 738 state->prevhdr = prevhdr; 739 state->nexthdr = nexthdr; 740 state->frag_id = frag_id; 741 742 state->hlen = hlen; 743 state->mtu = mtu; 744 745 state->left = skb->len - hlen; /* Space per frame */ 746 state->ptr = hlen; /* Where to start from */ 747 748 state->hroom = hdr_room; 749 state->troom = needed_tailroom; 750 751 state->offset = 0; 752 } 753 EXPORT_SYMBOL(ip6_frag_init); 754 755 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state) 756 { 757 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset; 758 struct sk_buff *frag; 759 struct frag_hdr *fh; 760 unsigned int len; 761 762 len = state->left; 763 /* IF: it doesn't fit, use 'mtu' - the data space left */ 764 if (len > state->mtu) 765 len = state->mtu; 766 /* IF: we are not sending up to and including the packet end 767 then align the next start on an eight byte boundary */ 768 if (len < state->left) 769 len &= ~7; 770 771 /* Allocate buffer */ 772 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) + 773 state->hroom + state->troom, GFP_ATOMIC); 774 if (!frag) 775 return ERR_PTR(-ENOMEM); 776 777 /* 778 * Set up data on packet 779 */ 780 781 ip6_copy_metadata(frag, skb); 782 skb_reserve(frag, state->hroom); 783 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr)); 784 skb_reset_network_header(frag); 785 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen); 786 frag->transport_header = (frag->network_header + state->hlen + 787 sizeof(struct frag_hdr)); 788 789 /* 790 * Charge the memory for the fragment to any owner 791 * it might possess 792 */ 793 if (skb->sk) 794 skb_set_owner_w(frag, skb->sk); 795 796 /* 797 * Copy the packet header into the new buffer. 798 */ 799 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen); 800 801 fragnexthdr_offset = skb_network_header(frag); 802 fragnexthdr_offset += prevhdr - skb_network_header(skb); 803 *fragnexthdr_offset = NEXTHDR_FRAGMENT; 804 805 /* 806 * Build fragment header. 807 */ 808 fh->nexthdr = state->nexthdr; 809 fh->reserved = 0; 810 fh->identification = state->frag_id; 811 812 /* 813 * Copy a block of the IP datagram. 814 */ 815 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag), 816 len)); 817 state->left -= len; 818 819 fh->frag_off = htons(state->offset); 820 if (state->left > 0) 821 fh->frag_off |= htons(IP6_MF); 822 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 823 824 state->ptr += len; 825 state->offset += len; 826 827 return frag; 828 } 829 EXPORT_SYMBOL(ip6_frag_next); 830 831 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 832 int (*output)(struct net *, struct sock *, struct sk_buff *)) 833 { 834 struct sk_buff *frag; 835 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 836 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? 837 inet6_sk(skb->sk) : NULL; 838 struct ip6_frag_state state; 839 unsigned int mtu, hlen, nexthdr_offset; 840 ktime_t tstamp = skb->tstamp; 841 int hroom, err = 0; 842 __be32 frag_id; 843 u8 *prevhdr, nexthdr = 0; 844 845 err = ip6_find_1stfragopt(skb, &prevhdr); 846 if (err < 0) 847 goto fail; 848 hlen = err; 849 nexthdr = *prevhdr; 850 nexthdr_offset = prevhdr - skb_network_header(skb); 851 852 mtu = ip6_skb_dst_mtu(skb); 853 854 /* We must not fragment if the socket is set to force MTU discovery 855 * or if the skb it not generated by a local socket. 856 */ 857 if (unlikely(!skb->ignore_df && skb->len > mtu)) 858 goto fail_toobig; 859 860 if (IP6CB(skb)->frag_max_size) { 861 if (IP6CB(skb)->frag_max_size > mtu) 862 goto fail_toobig; 863 864 /* don't send fragments larger than what we received */ 865 mtu = IP6CB(skb)->frag_max_size; 866 if (mtu < IPV6_MIN_MTU) 867 mtu = IPV6_MIN_MTU; 868 } 869 870 if (np && np->frag_size < mtu) { 871 if (np->frag_size) 872 mtu = np->frag_size; 873 } 874 if (mtu < hlen + sizeof(struct frag_hdr) + 8) 875 goto fail_toobig; 876 mtu -= hlen + sizeof(struct frag_hdr); 877 878 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, 879 &ipv6_hdr(skb)->saddr); 880 881 if (skb->ip_summed == CHECKSUM_PARTIAL && 882 (err = skb_checksum_help(skb))) 883 goto fail; 884 885 prevhdr = skb_network_header(skb) + nexthdr_offset; 886 hroom = LL_RESERVED_SPACE(rt->dst.dev); 887 if (skb_has_frag_list(skb)) { 888 unsigned int first_len = skb_pagelen(skb); 889 struct ip6_fraglist_iter iter; 890 struct sk_buff *frag2; 891 892 if (first_len - hlen > mtu || 893 ((first_len - hlen) & 7) || 894 skb_cloned(skb) || 895 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) 896 goto slow_path; 897 898 skb_walk_frags(skb, frag) { 899 /* Correct geometry. */ 900 if (frag->len > mtu || 901 ((frag->len & 7) && frag->next) || 902 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr))) 903 goto slow_path_clean; 904 905 /* Partially cloned skb? */ 906 if (skb_shared(frag)) 907 goto slow_path_clean; 908 909 BUG_ON(frag->sk); 910 if (skb->sk) { 911 frag->sk = skb->sk; 912 frag->destructor = sock_wfree; 913 } 914 skb->truesize -= frag->truesize; 915 } 916 917 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id, 918 &iter); 919 if (err < 0) 920 goto fail; 921 922 for (;;) { 923 /* Prepare header of the next frame, 924 * before previous one went down. */ 925 if (iter.frag) 926 ip6_fraglist_prepare(skb, &iter); 927 928 skb->tstamp = tstamp; 929 err = output(net, sk, skb); 930 if (!err) 931 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 932 IPSTATS_MIB_FRAGCREATES); 933 934 if (err || !iter.frag) 935 break; 936 937 skb = ip6_fraglist_next(&iter); 938 } 939 940 kfree(iter.tmp_hdr); 941 942 if (err == 0) { 943 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 944 IPSTATS_MIB_FRAGOKS); 945 return 0; 946 } 947 948 kfree_skb_list(iter.frag); 949 950 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 951 IPSTATS_MIB_FRAGFAILS); 952 return err; 953 954 slow_path_clean: 955 skb_walk_frags(skb, frag2) { 956 if (frag2 == frag) 957 break; 958 frag2->sk = NULL; 959 frag2->destructor = NULL; 960 skb->truesize += frag2->truesize; 961 } 962 } 963 964 slow_path: 965 /* 966 * Fragment the datagram. 967 */ 968 969 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom, 970 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id, 971 &state); 972 973 /* 974 * Keep copying data until we run out. 975 */ 976 977 while (state.left > 0) { 978 frag = ip6_frag_next(skb, &state); 979 if (IS_ERR(frag)) { 980 err = PTR_ERR(frag); 981 goto fail; 982 } 983 984 /* 985 * Put this fragment into the sending queue. 986 */ 987 frag->tstamp = tstamp; 988 err = output(net, sk, frag); 989 if (err) 990 goto fail; 991 992 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 993 IPSTATS_MIB_FRAGCREATES); 994 } 995 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 996 IPSTATS_MIB_FRAGOKS); 997 consume_skb(skb); 998 return err; 999 1000 fail_toobig: 1001 if (skb->sk && dst_allfrag(skb_dst(skb))) 1002 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); 1003 1004 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1005 err = -EMSGSIZE; 1006 1007 fail: 1008 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 1009 IPSTATS_MIB_FRAGFAILS); 1010 kfree_skb(skb); 1011 return err; 1012 } 1013 1014 static inline int ip6_rt_check(const struct rt6key *rt_key, 1015 const struct in6_addr *fl_addr, 1016 const struct in6_addr *addr_cache) 1017 { 1018 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 1019 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache)); 1020 } 1021 1022 static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 1023 struct dst_entry *dst, 1024 const struct flowi6 *fl6) 1025 { 1026 struct ipv6_pinfo *np = inet6_sk(sk); 1027 struct rt6_info *rt; 1028 1029 if (!dst) 1030 goto out; 1031 1032 if (dst->ops->family != AF_INET6) { 1033 dst_release(dst); 1034 return NULL; 1035 } 1036 1037 rt = (struct rt6_info *)dst; 1038 /* Yes, checking route validity in not connected 1039 * case is not very simple. Take into account, 1040 * that we do not support routing by source, TOS, 1041 * and MSG_DONTROUTE --ANK (980726) 1042 * 1043 * 1. ip6_rt_check(): If route was host route, 1044 * check that cached destination is current. 1045 * If it is network route, we still may 1046 * check its validity using saved pointer 1047 * to the last used address: daddr_cache. 1048 * We do not want to save whole address now, 1049 * (because main consumer of this service 1050 * is tcp, which has not this problem), 1051 * so that the last trick works only on connected 1052 * sockets. 1053 * 2. oif also should be the same. 1054 */ 1055 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || 1056 #ifdef CONFIG_IPV6_SUBTREES 1057 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || 1058 #endif 1059 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) && 1060 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) { 1061 dst_release(dst); 1062 dst = NULL; 1063 } 1064 1065 out: 1066 return dst; 1067 } 1068 1069 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, 1070 struct dst_entry **dst, struct flowi6 *fl6) 1071 { 1072 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 1073 struct neighbour *n; 1074 struct rt6_info *rt; 1075 #endif 1076 int err; 1077 int flags = 0; 1078 1079 /* The correct way to handle this would be to do 1080 * ip6_route_get_saddr, and then ip6_route_output; however, 1081 * the route-specific preferred source forces the 1082 * ip6_route_output call _before_ ip6_route_get_saddr. 1083 * 1084 * In source specific routing (no src=any default route), 1085 * ip6_route_output will fail given src=any saddr, though, so 1086 * that's why we try it again later. 1087 */ 1088 if (ipv6_addr_any(&fl6->saddr)) { 1089 struct fib6_info *from; 1090 struct rt6_info *rt; 1091 1092 *dst = ip6_route_output(net, sk, fl6); 1093 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; 1094 1095 rcu_read_lock(); 1096 from = rt ? rcu_dereference(rt->from) : NULL; 1097 err = ip6_route_get_saddr(net, from, &fl6->daddr, 1098 sk ? inet6_sk(sk)->srcprefs : 0, 1099 &fl6->saddr); 1100 rcu_read_unlock(); 1101 1102 if (err) 1103 goto out_err_release; 1104 1105 /* If we had an erroneous initial result, pretend it 1106 * never existed and let the SA-enabled version take 1107 * over. 1108 */ 1109 if ((*dst)->error) { 1110 dst_release(*dst); 1111 *dst = NULL; 1112 } 1113 1114 if (fl6->flowi6_oif) 1115 flags |= RT6_LOOKUP_F_IFACE; 1116 } 1117 1118 if (!*dst) 1119 *dst = ip6_route_output_flags(net, sk, fl6, flags); 1120 1121 err = (*dst)->error; 1122 if (err) 1123 goto out_err_release; 1124 1125 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 1126 /* 1127 * Here if the dst entry we've looked up 1128 * has a neighbour entry that is in the INCOMPLETE 1129 * state and the src address from the flow is 1130 * marked as OPTIMISTIC, we release the found 1131 * dst entry and replace it instead with the 1132 * dst entry of the nexthop router 1133 */ 1134 rt = (struct rt6_info *) *dst; 1135 rcu_read_lock_bh(); 1136 n = __ipv6_neigh_lookup_noref(rt->dst.dev, 1137 rt6_nexthop(rt, &fl6->daddr)); 1138 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; 1139 rcu_read_unlock_bh(); 1140 1141 if (err) { 1142 struct inet6_ifaddr *ifp; 1143 struct flowi6 fl_gw6; 1144 int redirect; 1145 1146 ifp = ipv6_get_ifaddr(net, &fl6->saddr, 1147 (*dst)->dev, 1); 1148 1149 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 1150 if (ifp) 1151 in6_ifa_put(ifp); 1152 1153 if (redirect) { 1154 /* 1155 * We need to get the dst entry for the 1156 * default router instead 1157 */ 1158 dst_release(*dst); 1159 memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); 1160 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); 1161 *dst = ip6_route_output(net, sk, &fl_gw6); 1162 err = (*dst)->error; 1163 if (err) 1164 goto out_err_release; 1165 } 1166 } 1167 #endif 1168 if (ipv6_addr_v4mapped(&fl6->saddr) && 1169 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) { 1170 err = -EAFNOSUPPORT; 1171 goto out_err_release; 1172 } 1173 1174 return 0; 1175 1176 out_err_release: 1177 dst_release(*dst); 1178 *dst = NULL; 1179 1180 if (err == -ENETUNREACH) 1181 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); 1182 return err; 1183 } 1184 1185 /** 1186 * ip6_dst_lookup - perform route lookup on flow 1187 * @net: Network namespace to perform lookup in 1188 * @sk: socket which provides route info 1189 * @dst: pointer to dst_entry * for result 1190 * @fl6: flow to lookup 1191 * 1192 * This function performs a route lookup on the given flow. 1193 * 1194 * It returns zero on success, or a standard errno code on error. 1195 */ 1196 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, 1197 struct flowi6 *fl6) 1198 { 1199 *dst = NULL; 1200 return ip6_dst_lookup_tail(net, sk, dst, fl6); 1201 } 1202 EXPORT_SYMBOL_GPL(ip6_dst_lookup); 1203 1204 /** 1205 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec 1206 * @net: Network namespace to perform lookup in 1207 * @sk: socket which provides route info 1208 * @fl6: flow to lookup 1209 * @final_dst: final destination address for ipsec lookup 1210 * 1211 * This function performs a route lookup on the given flow. 1212 * 1213 * It returns a valid dst pointer on success, or a pointer encoded 1214 * error code. 1215 */ 1216 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6, 1217 const struct in6_addr *final_dst) 1218 { 1219 struct dst_entry *dst = NULL; 1220 int err; 1221 1222 err = ip6_dst_lookup_tail(net, sk, &dst, fl6); 1223 if (err) 1224 return ERR_PTR(err); 1225 if (final_dst) 1226 fl6->daddr = *final_dst; 1227 1228 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0); 1229 } 1230 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); 1231 1232 /** 1233 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow 1234 * @sk: socket which provides the dst cache and route info 1235 * @fl6: flow to lookup 1236 * @final_dst: final destination address for ipsec lookup 1237 * @connected: whether @sk is connected or not 1238 * 1239 * This function performs a route lookup on the given flow with the 1240 * possibility of using the cached route in the socket if it is valid. 1241 * It will take the socket dst lock when operating on the dst cache. 1242 * As a result, this function can only be used in process context. 1243 * 1244 * In addition, for a connected socket, cache the dst in the socket 1245 * if the current cache is not valid. 1246 * 1247 * It returns a valid dst pointer on success, or a pointer encoded 1248 * error code. 1249 */ 1250 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 1251 const struct in6_addr *final_dst, 1252 bool connected) 1253 { 1254 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1255 1256 dst = ip6_sk_dst_check(sk, dst, fl6); 1257 if (dst) 1258 return dst; 1259 1260 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst); 1261 if (connected && !IS_ERR(dst)) 1262 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6); 1263 1264 return dst; 1265 } 1266 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); 1267 1268 /** 1269 * ip6_dst_lookup_tunnel - perform route lookup on tunnel 1270 * @skb: Packet for which lookup is done 1271 * @dev: Tunnel device 1272 * @net: Network namespace of tunnel device 1273 * @sock: Socket which provides route info 1274 * @saddr: Memory to store the src ip address 1275 * @info: Tunnel information 1276 * @protocol: IP protocol 1277 * @use_cache: Flag to enable cache usage 1278 * This function performs a route lookup on a tunnel 1279 * 1280 * It returns a valid dst pointer and stores src address to be used in 1281 * tunnel in param saddr on success, else a pointer encoded error code. 1282 */ 1283 1284 struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, 1285 struct net_device *dev, 1286 struct net *net, 1287 struct socket *sock, 1288 struct in6_addr *saddr, 1289 const struct ip_tunnel_info *info, 1290 u8 protocol, 1291 bool use_cache) 1292 { 1293 struct dst_entry *dst = NULL; 1294 #ifdef CONFIG_DST_CACHE 1295 struct dst_cache *dst_cache; 1296 #endif 1297 struct flowi6 fl6; 1298 __u8 prio; 1299 1300 #ifdef CONFIG_DST_CACHE 1301 dst_cache = (struct dst_cache *)&info->dst_cache; 1302 if (use_cache) { 1303 dst = dst_cache_get_ip6(dst_cache, saddr); 1304 if (dst) 1305 return dst; 1306 } 1307 #endif 1308 memset(&fl6, 0, sizeof(fl6)); 1309 fl6.flowi6_mark = skb->mark; 1310 fl6.flowi6_proto = protocol; 1311 fl6.daddr = info->key.u.ipv6.dst; 1312 fl6.saddr = info->key.u.ipv6.src; 1313 prio = info->key.tos; 1314 fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio), 1315 info->key.label); 1316 1317 dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, 1318 NULL); 1319 if (IS_ERR(dst)) { 1320 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); 1321 return ERR_PTR(-ENETUNREACH); 1322 } 1323 if (dst->dev == dev) { /* is this necessary? */ 1324 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); 1325 dst_release(dst); 1326 return ERR_PTR(-ELOOP); 1327 } 1328 #ifdef CONFIG_DST_CACHE 1329 if (use_cache) 1330 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); 1331 #endif 1332 *saddr = fl6.saddr; 1333 return dst; 1334 } 1335 EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel); 1336 1337 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1338 gfp_t gfp) 1339 { 1340 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1341 } 1342 1343 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 1344 gfp_t gfp) 1345 { 1346 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1347 } 1348 1349 static void ip6_append_data_mtu(unsigned int *mtu, 1350 int *maxfraglen, 1351 unsigned int fragheaderlen, 1352 struct sk_buff *skb, 1353 struct rt6_info *rt, 1354 unsigned int orig_mtu) 1355 { 1356 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { 1357 if (!skb) { 1358 /* first fragment, reserve header_len */ 1359 *mtu = orig_mtu - rt->dst.header_len; 1360 1361 } else { 1362 /* 1363 * this fragment is not first, the headers 1364 * space is regarded as data space. 1365 */ 1366 *mtu = orig_mtu; 1367 } 1368 *maxfraglen = ((*mtu - fragheaderlen) & ~7) 1369 + fragheaderlen - sizeof(struct frag_hdr); 1370 } 1371 } 1372 1373 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, 1374 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6, 1375 struct rt6_info *rt, struct flowi6 *fl6) 1376 { 1377 struct ipv6_pinfo *np = inet6_sk(sk); 1378 unsigned int mtu; 1379 struct ipv6_txoptions *opt = ipc6->opt; 1380 1381 /* 1382 * setup for corking 1383 */ 1384 if (opt) { 1385 if (WARN_ON(v6_cork->opt)) 1386 return -EINVAL; 1387 1388 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); 1389 if (unlikely(!v6_cork->opt)) 1390 return -ENOBUFS; 1391 1392 v6_cork->opt->tot_len = sizeof(*opt); 1393 v6_cork->opt->opt_flen = opt->opt_flen; 1394 v6_cork->opt->opt_nflen = opt->opt_nflen; 1395 1396 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt, 1397 sk->sk_allocation); 1398 if (opt->dst0opt && !v6_cork->opt->dst0opt) 1399 return -ENOBUFS; 1400 1401 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt, 1402 sk->sk_allocation); 1403 if (opt->dst1opt && !v6_cork->opt->dst1opt) 1404 return -ENOBUFS; 1405 1406 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt, 1407 sk->sk_allocation); 1408 if (opt->hopopt && !v6_cork->opt->hopopt) 1409 return -ENOBUFS; 1410 1411 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt, 1412 sk->sk_allocation); 1413 if (opt->srcrt && !v6_cork->opt->srcrt) 1414 return -ENOBUFS; 1415 1416 /* need source address above miyazawa*/ 1417 } 1418 dst_hold(&rt->dst); 1419 cork->base.dst = &rt->dst; 1420 cork->fl.u.ip6 = *fl6; 1421 v6_cork->hop_limit = ipc6->hlimit; 1422 v6_cork->tclass = ipc6->tclass; 1423 if (rt->dst.flags & DST_XFRM_TUNNEL) 1424 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1425 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); 1426 else 1427 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1428 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); 1429 if (np->frag_size < mtu) { 1430 if (np->frag_size) 1431 mtu = np->frag_size; 1432 } 1433 if (mtu < IPV6_MIN_MTU) 1434 return -EINVAL; 1435 cork->base.fragsize = mtu; 1436 cork->base.gso_size = ipc6->gso_size; 1437 cork->base.tx_flags = 0; 1438 cork->base.mark = ipc6->sockc.mark; 1439 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); 1440 1441 if (dst_allfrag(xfrm_dst_path(&rt->dst))) 1442 cork->base.flags |= IPCORK_ALLFRAG; 1443 cork->base.length = 0; 1444 1445 cork->base.transmit_time = ipc6->sockc.transmit_time; 1446 1447 return 0; 1448 } 1449 1450 static int __ip6_append_data(struct sock *sk, 1451 struct flowi6 *fl6, 1452 struct sk_buff_head *queue, 1453 struct inet_cork *cork, 1454 struct inet6_cork *v6_cork, 1455 struct page_frag *pfrag, 1456 int getfrag(void *from, char *to, int offset, 1457 int len, int odd, struct sk_buff *skb), 1458 void *from, int length, int transhdrlen, 1459 unsigned int flags, struct ipcm6_cookie *ipc6) 1460 { 1461 struct sk_buff *skb, *skb_prev = NULL; 1462 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; 1463 struct ubuf_info *uarg = NULL; 1464 int exthdrlen = 0; 1465 int dst_exthdrlen = 0; 1466 int hh_len; 1467 int copy; 1468 int err; 1469 int offset = 0; 1470 u32 tskey = 0; 1471 struct rt6_info *rt = (struct rt6_info *)cork->dst; 1472 struct ipv6_txoptions *opt = v6_cork->opt; 1473 int csummode = CHECKSUM_NONE; 1474 unsigned int maxnonfragsize, headersize; 1475 unsigned int wmem_alloc_delta = 0; 1476 bool paged, extra_uref = false; 1477 1478 skb = skb_peek_tail(queue); 1479 if (!skb) { 1480 exthdrlen = opt ? opt->opt_flen : 0; 1481 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; 1482 } 1483 1484 paged = !!cork->gso_size; 1485 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize; 1486 orig_mtu = mtu; 1487 1488 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && 1489 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) 1490 tskey = sk->sk_tskey++; 1491 1492 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1493 1494 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1495 (opt ? opt->opt_nflen : 0); 1496 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - 1497 sizeof(struct frag_hdr); 1498 1499 headersize = sizeof(struct ipv6hdr) + 1500 (opt ? opt->opt_flen + opt->opt_nflen : 0) + 1501 (dst_allfrag(&rt->dst) ? 1502 sizeof(struct frag_hdr) : 0) + 1503 rt->rt6i_nfheader_len; 1504 1505 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit 1506 * the first fragment 1507 */ 1508 if (headersize + transhdrlen > mtu) 1509 goto emsgsize; 1510 1511 if (cork->length + length > mtu - headersize && ipc6->dontfrag && 1512 (sk->sk_protocol == IPPROTO_UDP || 1513 sk->sk_protocol == IPPROTO_RAW)) { 1514 ipv6_local_rxpmtu(sk, fl6, mtu - headersize + 1515 sizeof(struct ipv6hdr)); 1516 goto emsgsize; 1517 } 1518 1519 if (ip6_sk_ignore_df(sk)) 1520 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; 1521 else 1522 maxnonfragsize = mtu; 1523 1524 if (cork->length + length > maxnonfragsize - headersize) { 1525 emsgsize: 1526 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0); 1527 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu); 1528 return -EMSGSIZE; 1529 } 1530 1531 /* CHECKSUM_PARTIAL only with no extension headers and when 1532 * we are not going to fragment 1533 */ 1534 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && 1535 headersize == sizeof(struct ipv6hdr) && 1536 length <= mtu - headersize && 1537 (!(flags & MSG_MORE) || cork->gso_size) && 1538 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) 1539 csummode = CHECKSUM_PARTIAL; 1540 1541 if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) { 1542 uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb)); 1543 if (!uarg) 1544 return -ENOBUFS; 1545 extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ 1546 if (rt->dst.dev->features & NETIF_F_SG && 1547 csummode == CHECKSUM_PARTIAL) { 1548 paged = true; 1549 } else { 1550 uarg->zerocopy = 0; 1551 skb_zcopy_set(skb, uarg, &extra_uref); 1552 } 1553 } 1554 1555 /* 1556 * Let's try using as much space as possible. 1557 * Use MTU if total length of the message fits into the MTU. 1558 * Otherwise, we need to reserve fragment header and 1559 * fragment alignment (= 8-15 octects, in total). 1560 * 1561 * Note that we may need to "move" the data from the tail 1562 * of the buffer to the new fragment when we split 1563 * the message. 1564 * 1565 * FIXME: It may be fragmented into multiple chunks 1566 * at once if non-fragmentable extension headers 1567 * are too large. 1568 * --yoshfuji 1569 */ 1570 1571 cork->length += length; 1572 if (!skb) 1573 goto alloc_new_skb; 1574 1575 while (length > 0) { 1576 /* Check if the remaining data fits into current packet. */ 1577 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1578 if (copy < length) 1579 copy = maxfraglen - skb->len; 1580 1581 if (copy <= 0) { 1582 char *data; 1583 unsigned int datalen; 1584 unsigned int fraglen; 1585 unsigned int fraggap; 1586 unsigned int alloclen, alloc_extra; 1587 unsigned int pagedlen; 1588 alloc_new_skb: 1589 /* There's no room in the current skb */ 1590 if (skb) 1591 fraggap = skb->len - maxfraglen; 1592 else 1593 fraggap = 0; 1594 /* update mtu and maxfraglen if necessary */ 1595 if (!skb || !skb_prev) 1596 ip6_append_data_mtu(&mtu, &maxfraglen, 1597 fragheaderlen, skb, rt, 1598 orig_mtu); 1599 1600 skb_prev = skb; 1601 1602 /* 1603 * If remaining data exceeds the mtu, 1604 * we know we need more fragment(s). 1605 */ 1606 datalen = length + fraggap; 1607 1608 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1609 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; 1610 fraglen = datalen + fragheaderlen; 1611 pagedlen = 0; 1612 1613 alloc_extra = hh_len; 1614 alloc_extra += dst_exthdrlen; 1615 alloc_extra += rt->dst.trailer_len; 1616 1617 /* We just reserve space for fragment header. 1618 * Note: this may be overallocation if the message 1619 * (without MSG_MORE) fits into the MTU. 1620 */ 1621 alloc_extra += sizeof(struct frag_hdr); 1622 1623 if ((flags & MSG_MORE) && 1624 !(rt->dst.dev->features&NETIF_F_SG)) 1625 alloclen = mtu; 1626 else if (!paged && 1627 (fraglen + alloc_extra < SKB_MAX_ALLOC || 1628 !(rt->dst.dev->features & NETIF_F_SG))) 1629 alloclen = fraglen; 1630 else { 1631 alloclen = min_t(int, fraglen, MAX_HEADER); 1632 pagedlen = fraglen - alloclen; 1633 } 1634 alloclen += alloc_extra; 1635 1636 if (datalen != length + fraggap) { 1637 /* 1638 * this is not the last fragment, the trailer 1639 * space is regarded as data space. 1640 */ 1641 datalen += rt->dst.trailer_len; 1642 } 1643 1644 fraglen = datalen + fragheaderlen; 1645 1646 copy = datalen - transhdrlen - fraggap - pagedlen; 1647 if (copy < 0) { 1648 err = -EINVAL; 1649 goto error; 1650 } 1651 if (transhdrlen) { 1652 skb = sock_alloc_send_skb(sk, alloclen, 1653 (flags & MSG_DONTWAIT), &err); 1654 } else { 1655 skb = NULL; 1656 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <= 1657 2 * sk->sk_sndbuf) 1658 skb = alloc_skb(alloclen, 1659 sk->sk_allocation); 1660 if (unlikely(!skb)) 1661 err = -ENOBUFS; 1662 } 1663 if (!skb) 1664 goto error; 1665 /* 1666 * Fill in the control structures 1667 */ 1668 skb->protocol = htons(ETH_P_IPV6); 1669 skb->ip_summed = csummode; 1670 skb->csum = 0; 1671 /* reserve for fragmentation and ipsec header */ 1672 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + 1673 dst_exthdrlen); 1674 1675 /* 1676 * Find where to start putting bytes 1677 */ 1678 data = skb_put(skb, fraglen - pagedlen); 1679 skb_set_network_header(skb, exthdrlen); 1680 data += fragheaderlen; 1681 skb->transport_header = (skb->network_header + 1682 fragheaderlen); 1683 if (fraggap) { 1684 skb->csum = skb_copy_and_csum_bits( 1685 skb_prev, maxfraglen, 1686 data + transhdrlen, fraggap); 1687 skb_prev->csum = csum_sub(skb_prev->csum, 1688 skb->csum); 1689 data += fraggap; 1690 pskb_trim_unique(skb_prev, maxfraglen); 1691 } 1692 if (copy > 0 && 1693 getfrag(from, data + transhdrlen, offset, 1694 copy, fraggap, skb) < 0) { 1695 err = -EFAULT; 1696 kfree_skb(skb); 1697 goto error; 1698 } 1699 1700 offset += copy; 1701 length -= copy + transhdrlen; 1702 transhdrlen = 0; 1703 exthdrlen = 0; 1704 dst_exthdrlen = 0; 1705 1706 /* Only the initial fragment is time stamped */ 1707 skb_shinfo(skb)->tx_flags = cork->tx_flags; 1708 cork->tx_flags = 0; 1709 skb_shinfo(skb)->tskey = tskey; 1710 tskey = 0; 1711 skb_zcopy_set(skb, uarg, &extra_uref); 1712 1713 if ((flags & MSG_CONFIRM) && !skb_prev) 1714 skb_set_dst_pending_confirm(skb, 1); 1715 1716 /* 1717 * Put the packet on the pending queue 1718 */ 1719 if (!skb->destructor) { 1720 skb->destructor = sock_wfree; 1721 skb->sk = sk; 1722 wmem_alloc_delta += skb->truesize; 1723 } 1724 __skb_queue_tail(queue, skb); 1725 continue; 1726 } 1727 1728 if (copy > length) 1729 copy = length; 1730 1731 if (!(rt->dst.dev->features&NETIF_F_SG) && 1732 skb_tailroom(skb) >= copy) { 1733 unsigned int off; 1734 1735 off = skb->len; 1736 if (getfrag(from, skb_put(skb, copy), 1737 offset, copy, off, skb) < 0) { 1738 __skb_trim(skb, off); 1739 err = -EFAULT; 1740 goto error; 1741 } 1742 } else if (!uarg || !uarg->zerocopy) { 1743 int i = skb_shinfo(skb)->nr_frags; 1744 1745 err = -ENOMEM; 1746 if (!sk_page_frag_refill(sk, pfrag)) 1747 goto error; 1748 1749 if (!skb_can_coalesce(skb, i, pfrag->page, 1750 pfrag->offset)) { 1751 err = -EMSGSIZE; 1752 if (i == MAX_SKB_FRAGS) 1753 goto error; 1754 1755 __skb_fill_page_desc(skb, i, pfrag->page, 1756 pfrag->offset, 0); 1757 skb_shinfo(skb)->nr_frags = ++i; 1758 get_page(pfrag->page); 1759 } 1760 copy = min_t(int, copy, pfrag->size - pfrag->offset); 1761 if (getfrag(from, 1762 page_address(pfrag->page) + pfrag->offset, 1763 offset, copy, skb->len, skb) < 0) 1764 goto error_efault; 1765 1766 pfrag->offset += copy; 1767 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); 1768 skb->len += copy; 1769 skb->data_len += copy; 1770 skb->truesize += copy; 1771 wmem_alloc_delta += copy; 1772 } else { 1773 err = skb_zerocopy_iter_dgram(skb, from, copy); 1774 if (err < 0) 1775 goto error; 1776 } 1777 offset += copy; 1778 length -= copy; 1779 } 1780 1781 if (wmem_alloc_delta) 1782 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 1783 return 0; 1784 1785 error_efault: 1786 err = -EFAULT; 1787 error: 1788 net_zcopy_put_abort(uarg, extra_uref); 1789 cork->length -= length; 1790 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1791 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 1792 return err; 1793 } 1794 1795 int ip6_append_data(struct sock *sk, 1796 int getfrag(void *from, char *to, int offset, int len, 1797 int odd, struct sk_buff *skb), 1798 void *from, int length, int transhdrlen, 1799 struct ipcm6_cookie *ipc6, struct flowi6 *fl6, 1800 struct rt6_info *rt, unsigned int flags) 1801 { 1802 struct inet_sock *inet = inet_sk(sk); 1803 struct ipv6_pinfo *np = inet6_sk(sk); 1804 int exthdrlen; 1805 int err; 1806 1807 if (flags&MSG_PROBE) 1808 return 0; 1809 if (skb_queue_empty(&sk->sk_write_queue)) { 1810 /* 1811 * setup for corking 1812 */ 1813 err = ip6_setup_cork(sk, &inet->cork, &np->cork, 1814 ipc6, rt, fl6); 1815 if (err) 1816 return err; 1817 1818 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 1819 length += exthdrlen; 1820 transhdrlen += exthdrlen; 1821 } else { 1822 fl6 = &inet->cork.fl.u.ip6; 1823 transhdrlen = 0; 1824 } 1825 1826 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base, 1827 &np->cork, sk_page_frag(sk), getfrag, 1828 from, length, transhdrlen, flags, ipc6); 1829 } 1830 EXPORT_SYMBOL_GPL(ip6_append_data); 1831 1832 static void ip6_cork_release(struct inet_cork_full *cork, 1833 struct inet6_cork *v6_cork) 1834 { 1835 if (v6_cork->opt) { 1836 kfree(v6_cork->opt->dst0opt); 1837 kfree(v6_cork->opt->dst1opt); 1838 kfree(v6_cork->opt->hopopt); 1839 kfree(v6_cork->opt->srcrt); 1840 kfree(v6_cork->opt); 1841 v6_cork->opt = NULL; 1842 } 1843 1844 if (cork->base.dst) { 1845 dst_release(cork->base.dst); 1846 cork->base.dst = NULL; 1847 cork->base.flags &= ~IPCORK_ALLFRAG; 1848 } 1849 memset(&cork->fl, 0, sizeof(cork->fl)); 1850 } 1851 1852 struct sk_buff *__ip6_make_skb(struct sock *sk, 1853 struct sk_buff_head *queue, 1854 struct inet_cork_full *cork, 1855 struct inet6_cork *v6_cork) 1856 { 1857 struct sk_buff *skb, *tmp_skb; 1858 struct sk_buff **tail_skb; 1859 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 1860 struct ipv6_pinfo *np = inet6_sk(sk); 1861 struct net *net = sock_net(sk); 1862 struct ipv6hdr *hdr; 1863 struct ipv6_txoptions *opt = v6_cork->opt; 1864 struct rt6_info *rt = (struct rt6_info *)cork->base.dst; 1865 struct flowi6 *fl6 = &cork->fl.u.ip6; 1866 unsigned char proto = fl6->flowi6_proto; 1867 1868 skb = __skb_dequeue(queue); 1869 if (!skb) 1870 goto out; 1871 tail_skb = &(skb_shinfo(skb)->frag_list); 1872 1873 /* move skb->data to ip header from ext header */ 1874 if (skb->data < skb_network_header(skb)) 1875 __skb_pull(skb, skb_network_offset(skb)); 1876 while ((tmp_skb = __skb_dequeue(queue)) != NULL) { 1877 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1878 *tail_skb = tmp_skb; 1879 tail_skb = &(tmp_skb->next); 1880 skb->len += tmp_skb->len; 1881 skb->data_len += tmp_skb->len; 1882 skb->truesize += tmp_skb->truesize; 1883 tmp_skb->destructor = NULL; 1884 tmp_skb->sk = NULL; 1885 } 1886 1887 /* Allow local fragmentation. */ 1888 skb->ignore_df = ip6_sk_ignore_df(sk); 1889 1890 *final_dst = fl6->daddr; 1891 __skb_pull(skb, skb_network_header_len(skb)); 1892 if (opt && opt->opt_flen) 1893 ipv6_push_frag_opts(skb, opt, &proto); 1894 if (opt && opt->opt_nflen) 1895 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr); 1896 1897 skb_push(skb, sizeof(struct ipv6hdr)); 1898 skb_reset_network_header(skb); 1899 hdr = ipv6_hdr(skb); 1900 1901 ip6_flow_hdr(hdr, v6_cork->tclass, 1902 ip6_make_flowlabel(net, skb, fl6->flowlabel, 1903 ip6_autoflowlabel(net, np), fl6)); 1904 hdr->hop_limit = v6_cork->hop_limit; 1905 hdr->nexthdr = proto; 1906 hdr->saddr = fl6->saddr; 1907 hdr->daddr = *final_dst; 1908 1909 skb->priority = sk->sk_priority; 1910 skb->mark = cork->base.mark; 1911 1912 skb->tstamp = cork->base.transmit_time; 1913 1914 skb_dst_set(skb, dst_clone(&rt->dst)); 1915 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 1916 if (proto == IPPROTO_ICMPV6) { 1917 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1918 1919 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); 1920 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 1921 } 1922 1923 ip6_cork_release(cork, v6_cork); 1924 out: 1925 return skb; 1926 } 1927 1928 int ip6_send_skb(struct sk_buff *skb) 1929 { 1930 struct net *net = sock_net(skb->sk); 1931 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 1932 int err; 1933 1934 err = ip6_local_out(net, skb->sk, skb); 1935 if (err) { 1936 if (err > 0) 1937 err = net_xmit_errno(err); 1938 if (err) 1939 IP6_INC_STATS(net, rt->rt6i_idev, 1940 IPSTATS_MIB_OUTDISCARDS); 1941 } 1942 1943 return err; 1944 } 1945 1946 int ip6_push_pending_frames(struct sock *sk) 1947 { 1948 struct sk_buff *skb; 1949 1950 skb = ip6_finish_skb(sk); 1951 if (!skb) 1952 return 0; 1953 1954 return ip6_send_skb(skb); 1955 } 1956 EXPORT_SYMBOL_GPL(ip6_push_pending_frames); 1957 1958 static void __ip6_flush_pending_frames(struct sock *sk, 1959 struct sk_buff_head *queue, 1960 struct inet_cork_full *cork, 1961 struct inet6_cork *v6_cork) 1962 { 1963 struct sk_buff *skb; 1964 1965 while ((skb = __skb_dequeue_tail(queue)) != NULL) { 1966 if (skb_dst(skb)) 1967 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 1968 IPSTATS_MIB_OUTDISCARDS); 1969 kfree_skb(skb); 1970 } 1971 1972 ip6_cork_release(cork, v6_cork); 1973 } 1974 1975 void ip6_flush_pending_frames(struct sock *sk) 1976 { 1977 __ip6_flush_pending_frames(sk, &sk->sk_write_queue, 1978 &inet_sk(sk)->cork, &inet6_sk(sk)->cork); 1979 } 1980 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); 1981 1982 struct sk_buff *ip6_make_skb(struct sock *sk, 1983 int getfrag(void *from, char *to, int offset, 1984 int len, int odd, struct sk_buff *skb), 1985 void *from, int length, int transhdrlen, 1986 struct ipcm6_cookie *ipc6, struct flowi6 *fl6, 1987 struct rt6_info *rt, unsigned int flags, 1988 struct inet_cork_full *cork) 1989 { 1990 struct inet6_cork v6_cork; 1991 struct sk_buff_head queue; 1992 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 1993 int err; 1994 1995 if (flags & MSG_PROBE) 1996 return NULL; 1997 1998 __skb_queue_head_init(&queue); 1999 2000 cork->base.flags = 0; 2001 cork->base.addr = 0; 2002 cork->base.opt = NULL; 2003 cork->base.dst = NULL; 2004 v6_cork.opt = NULL; 2005 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6); 2006 if (err) { 2007 ip6_cork_release(cork, &v6_cork); 2008 return ERR_PTR(err); 2009 } 2010 if (ipc6->dontfrag < 0) 2011 ipc6->dontfrag = inet6_sk(sk)->dontfrag; 2012 2013 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork, 2014 ¤t->task_frag, getfrag, from, 2015 length + exthdrlen, transhdrlen + exthdrlen, 2016 flags, ipc6); 2017 if (err) { 2018 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork); 2019 return ERR_PTR(err); 2020 } 2021 2022 return __ip6_make_skb(sk, &queue, cork, &v6_cork); 2023 } 2024