1 /* 2 * IPv6 output functions 3 * Linux INET6 implementation 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * Based on linux/net/ipv4/ip_output.c 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 * 15 * Changes: 16 * A.N.Kuznetsov : airthmetics in fragmentation. 17 * extension headers are implemented. 18 * route changes now work. 19 * ip6_forward does not confuse sniffers. 20 * etc. 21 * 22 * H. von Brand : Added missing #include <linux/string.h> 23 * Imran Patel : frag id should be in NBO 24 * Kazunori MIYAZAWA @USAGI 25 * : add ip6_append_data and related functions 26 * for datagram xmit 27 */ 28 29 #include <linux/errno.h> 30 #include <linux/kernel.h> 31 #include <linux/string.h> 32 #include <linux/socket.h> 33 #include <linux/net.h> 34 #include <linux/netdevice.h> 35 #include <linux/if_arp.h> 36 #include <linux/in6.h> 37 #include <linux/tcp.h> 38 #include <linux/route.h> 39 #include <linux/module.h> 40 41 #include <linux/netfilter.h> 42 #include <linux/netfilter_ipv6.h> 43 44 #include <net/sock.h> 45 #include <net/snmp.h> 46 47 #include <net/ipv6.h> 48 #include <net/ndisc.h> 49 #include <net/protocol.h> 50 #include <net/ip6_route.h> 51 #include <net/addrconf.h> 52 #include <net/rawv6.h> 53 #include <net/icmp.h> 54 #include <net/xfrm.h> 55 #include <net/checksum.h> 56 #include <linux/mroute6.h> 57 58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); 59 60 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr) 61 { 62 static u32 ipv6_fragmentation_id = 1; 63 static DEFINE_SPINLOCK(ip6_id_lock); 64 65 spin_lock_bh(&ip6_id_lock); 66 fhdr->identification = htonl(ipv6_fragmentation_id); 67 if (++ipv6_fragmentation_id == 0) 68 ipv6_fragmentation_id = 1; 69 spin_unlock_bh(&ip6_id_lock); 70 } 71 72 int __ip6_local_out(struct sk_buff *skb) 73 { 74 int len; 75 76 len = skb->len - sizeof(struct ipv6hdr); 77 if (len > IPV6_MAXPLEN) 78 len = 0; 79 ipv6_hdr(skb)->payload_len = htons(len); 80 81 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev, 82 dst_output); 83 } 84 85 int ip6_local_out(struct sk_buff *skb) 86 { 87 int err; 88 89 err = __ip6_local_out(skb); 90 if (likely(err == 1)) 91 err = dst_output(skb); 92 93 return err; 94 } 95 EXPORT_SYMBOL_GPL(ip6_local_out); 96 97 static int ip6_output_finish(struct sk_buff *skb) 98 { 99 struct dst_entry *dst = skb->dst; 100 101 if (dst->hh) 102 return neigh_hh_output(dst->hh, skb); 103 else if (dst->neighbour) 104 return dst->neighbour->output(skb); 105 106 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 107 kfree_skb(skb); 108 return -EINVAL; 109 110 } 111 112 /* dev_loopback_xmit for use with netfilter. */ 113 static int ip6_dev_loopback_xmit(struct sk_buff *newskb) 114 { 115 skb_reset_mac_header(newskb); 116 __skb_pull(newskb, skb_network_offset(newskb)); 117 newskb->pkt_type = PACKET_LOOPBACK; 118 newskb->ip_summed = CHECKSUM_UNNECESSARY; 119 WARN_ON(!newskb->dst); 120 121 netif_rx(newskb); 122 return 0; 123 } 124 125 126 static int ip6_output2(struct sk_buff *skb) 127 { 128 struct dst_entry *dst = skb->dst; 129 struct net_device *dev = dst->dev; 130 131 skb->protocol = htons(ETH_P_IPV6); 132 skb->dev = dev; 133 134 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 135 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL; 136 struct inet6_dev *idev = ip6_dst_idev(skb->dst); 137 138 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) && 139 ((mroute6_socket && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 140 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 141 &ipv6_hdr(skb)->saddr))) { 142 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 143 144 /* Do not check for IFF_ALLMULTI; multicast routing 145 is not supported in any case. 146 */ 147 if (newskb) 148 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb, 149 NULL, newskb->dev, 150 ip6_dev_loopback_xmit); 151 152 if (ipv6_hdr(skb)->hop_limit == 0) { 153 IP6_INC_STATS(dev_net(dev), idev, 154 IPSTATS_MIB_OUTDISCARDS); 155 kfree_skb(skb); 156 return 0; 157 } 158 } 159 160 IP6_INC_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCASTPKTS); 161 } 162 163 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev, 164 ip6_output_finish); 165 } 166 167 static inline int ip6_skb_dst_mtu(struct sk_buff *skb) 168 { 169 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; 170 171 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ? 172 skb->dst->dev->mtu : dst_mtu(skb->dst); 173 } 174 175 int ip6_output(struct sk_buff *skb) 176 { 177 struct inet6_dev *idev = ip6_dst_idev(skb->dst); 178 if (unlikely(idev->cnf.disable_ipv6)) { 179 IP6_INC_STATS(dev_net(skb->dst->dev), idev, 180 IPSTATS_MIB_OUTDISCARDS); 181 kfree_skb(skb); 182 return 0; 183 } 184 185 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || 186 dst_allfrag(skb->dst)) 187 return ip6_fragment(skb, ip6_output2); 188 else 189 return ip6_output2(skb); 190 } 191 192 /* 193 * xmit an sk_buff (used by TCP) 194 */ 195 196 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, 197 struct ipv6_txoptions *opt, int ipfragok) 198 { 199 struct net *net = sock_net(sk); 200 struct ipv6_pinfo *np = inet6_sk(sk); 201 struct in6_addr *first_hop = &fl->fl6_dst; 202 struct dst_entry *dst = skb->dst; 203 struct ipv6hdr *hdr; 204 u8 proto = fl->proto; 205 int seg_len = skb->len; 206 int hlimit, tclass; 207 u32 mtu; 208 209 if (opt) { 210 unsigned int head_room; 211 212 /* First: exthdrs may take lots of space (~8K for now) 213 MAX_HEADER is not enough. 214 */ 215 head_room = opt->opt_nflen + opt->opt_flen; 216 seg_len += head_room; 217 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); 218 219 if (skb_headroom(skb) < head_room) { 220 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 221 if (skb2 == NULL) { 222 IP6_INC_STATS(net, ip6_dst_idev(skb->dst), 223 IPSTATS_MIB_OUTDISCARDS); 224 kfree_skb(skb); 225 return -ENOBUFS; 226 } 227 kfree_skb(skb); 228 skb = skb2; 229 if (sk) 230 skb_set_owner_w(skb, sk); 231 } 232 if (opt->opt_flen) 233 ipv6_push_frag_opts(skb, opt, &proto); 234 if (opt->opt_nflen) 235 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); 236 } 237 238 skb_push(skb, sizeof(struct ipv6hdr)); 239 skb_reset_network_header(skb); 240 hdr = ipv6_hdr(skb); 241 242 /* Allow local fragmentation. */ 243 if (ipfragok) 244 skb->local_df = 1; 245 246 /* 247 * Fill in the IPv6 header 248 */ 249 250 hlimit = -1; 251 if (np) 252 hlimit = np->hop_limit; 253 if (hlimit < 0) 254 hlimit = ip6_dst_hoplimit(dst); 255 256 tclass = -1; 257 if (np) 258 tclass = np->tclass; 259 if (tclass < 0) 260 tclass = 0; 261 262 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel; 263 264 hdr->payload_len = htons(seg_len); 265 hdr->nexthdr = proto; 266 hdr->hop_limit = hlimit; 267 268 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 269 ipv6_addr_copy(&hdr->daddr, first_hop); 270 271 skb->priority = sk->sk_priority; 272 skb->mark = sk->sk_mark; 273 274 mtu = dst_mtu(dst); 275 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { 276 IP6_INC_STATS(net, ip6_dst_idev(skb->dst), 277 IPSTATS_MIB_OUTREQUESTS); 278 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, 279 dst_output); 280 } 281 282 if (net_ratelimit()) 283 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); 284 skb->dev = dst->dev; 285 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); 286 IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); 287 kfree_skb(skb); 288 return -EMSGSIZE; 289 } 290 291 EXPORT_SYMBOL(ip6_xmit); 292 293 /* 294 * To avoid extra problems ND packets are send through this 295 * routine. It's code duplication but I really want to avoid 296 * extra checks since ipv6_build_header is used by TCP (which 297 * is for us performance critical) 298 */ 299 300 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, 301 const struct in6_addr *saddr, const struct in6_addr *daddr, 302 int proto, int len) 303 { 304 struct ipv6_pinfo *np = inet6_sk(sk); 305 struct ipv6hdr *hdr; 306 int totlen; 307 308 skb->protocol = htons(ETH_P_IPV6); 309 skb->dev = dev; 310 311 totlen = len + sizeof(struct ipv6hdr); 312 313 skb_reset_network_header(skb); 314 skb_put(skb, sizeof(struct ipv6hdr)); 315 hdr = ipv6_hdr(skb); 316 317 *(__be32*)hdr = htonl(0x60000000); 318 319 hdr->payload_len = htons(len); 320 hdr->nexthdr = proto; 321 hdr->hop_limit = np->hop_limit; 322 323 ipv6_addr_copy(&hdr->saddr, saddr); 324 ipv6_addr_copy(&hdr->daddr, daddr); 325 326 return 0; 327 } 328 329 static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 330 { 331 struct ip6_ra_chain *ra; 332 struct sock *last = NULL; 333 334 read_lock(&ip6_ra_lock); 335 for (ra = ip6_ra_chain; ra; ra = ra->next) { 336 struct sock *sk = ra->sk; 337 if (sk && ra->sel == sel && 338 (!sk->sk_bound_dev_if || 339 sk->sk_bound_dev_if == skb->dev->ifindex)) { 340 if (last) { 341 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 342 if (skb2) 343 rawv6_rcv(last, skb2); 344 } 345 last = sk; 346 } 347 } 348 349 if (last) { 350 rawv6_rcv(last, skb); 351 read_unlock(&ip6_ra_lock); 352 return 1; 353 } 354 read_unlock(&ip6_ra_lock); 355 return 0; 356 } 357 358 static int ip6_forward_proxy_check(struct sk_buff *skb) 359 { 360 struct ipv6hdr *hdr = ipv6_hdr(skb); 361 u8 nexthdr = hdr->nexthdr; 362 int offset; 363 364 if (ipv6_ext_hdr(nexthdr)) { 365 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr); 366 if (offset < 0) 367 return 0; 368 } else 369 offset = sizeof(struct ipv6hdr); 370 371 if (nexthdr == IPPROTO_ICMPV6) { 372 struct icmp6hdr *icmp6; 373 374 if (!pskb_may_pull(skb, (skb_network_header(skb) + 375 offset + 1 - skb->data))) 376 return 0; 377 378 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 379 380 switch (icmp6->icmp6_type) { 381 case NDISC_ROUTER_SOLICITATION: 382 case NDISC_ROUTER_ADVERTISEMENT: 383 case NDISC_NEIGHBOUR_SOLICITATION: 384 case NDISC_NEIGHBOUR_ADVERTISEMENT: 385 case NDISC_REDIRECT: 386 /* For reaction involving unicast neighbor discovery 387 * message destined to the proxied address, pass it to 388 * input function. 389 */ 390 return 1; 391 default: 392 break; 393 } 394 } 395 396 /* 397 * The proxying router can't forward traffic sent to a link-local 398 * address, so signal the sender and discard the packet. This 399 * behavior is clarified by the MIPv6 specification. 400 */ 401 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 402 dst_link_failure(skb); 403 return -1; 404 } 405 406 return 0; 407 } 408 409 static inline int ip6_forward_finish(struct sk_buff *skb) 410 { 411 return dst_output(skb); 412 } 413 414 int ip6_forward(struct sk_buff *skb) 415 { 416 struct dst_entry *dst = skb->dst; 417 struct ipv6hdr *hdr = ipv6_hdr(skb); 418 struct inet6_skb_parm *opt = IP6CB(skb); 419 struct net *net = dev_net(dst->dev); 420 421 if (net->ipv6.devconf_all->forwarding == 0) 422 goto error; 423 424 if (skb_warn_if_lro(skb)) 425 goto drop; 426 427 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 428 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 429 goto drop; 430 } 431 432 skb_forward_csum(skb); 433 434 /* 435 * We DO NOT make any processing on 436 * RA packets, pushing them to user level AS IS 437 * without ane WARRANTY that application will be able 438 * to interpret them. The reason is that we 439 * cannot make anything clever here. 440 * 441 * We are not end-node, so that if packet contains 442 * AH/ESP, we cannot make anything. 443 * Defragmentation also would be mistake, RA packets 444 * cannot be fragmented, because there is no warranty 445 * that different fragments will go along one path. --ANK 446 */ 447 if (opt->ra) { 448 u8 *ptr = skb_network_header(skb) + opt->ra; 449 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3])) 450 return 0; 451 } 452 453 /* 454 * check and decrement ttl 455 */ 456 if (hdr->hop_limit <= 1) { 457 /* Force OUTPUT device used as source address */ 458 skb->dev = dst->dev; 459 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 460 0, skb->dev); 461 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); 462 463 kfree_skb(skb); 464 return -ETIMEDOUT; 465 } 466 467 /* XXX: idev->cnf.proxy_ndp? */ 468 if (net->ipv6.devconf_all->proxy_ndp && 469 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 470 int proxied = ip6_forward_proxy_check(skb); 471 if (proxied > 0) 472 return ip6_input(skb); 473 else if (proxied < 0) { 474 IP6_INC_STATS(net, ip6_dst_idev(dst), 475 IPSTATS_MIB_INDISCARDS); 476 goto drop; 477 } 478 } 479 480 if (!xfrm6_route_forward(skb)) { 481 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 482 goto drop; 483 } 484 dst = skb->dst; 485 486 /* IPv6 specs say nothing about it, but it is clear that we cannot 487 send redirects to source routed frames. 488 We don't send redirects to frames decapsulated from IPsec. 489 */ 490 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && 491 !skb->sp) { 492 struct in6_addr *target = NULL; 493 struct rt6_info *rt; 494 struct neighbour *n = dst->neighbour; 495 496 /* 497 * incoming and outgoing devices are the same 498 * send a redirect. 499 */ 500 501 rt = (struct rt6_info *) dst; 502 if ((rt->rt6i_flags & RTF_GATEWAY)) 503 target = (struct in6_addr*)&n->primary_key; 504 else 505 target = &hdr->daddr; 506 507 /* Limit redirects both by destination (here) 508 and by source (inside ndisc_send_redirect) 509 */ 510 if (xrlim_allow(dst, 1*HZ)) 511 ndisc_send_redirect(skb, n, target); 512 } else { 513 int addrtype = ipv6_addr_type(&hdr->saddr); 514 515 /* This check is security critical. */ 516 if (addrtype == IPV6_ADDR_ANY || 517 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 518 goto error; 519 if (addrtype & IPV6_ADDR_LINKLOCAL) { 520 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 521 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev); 522 goto error; 523 } 524 } 525 526 if (skb->len > dst_mtu(dst)) { 527 /* Again, force OUTPUT device used as source address */ 528 skb->dev = dst->dev; 529 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev); 530 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); 531 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); 532 kfree_skb(skb); 533 return -EMSGSIZE; 534 } 535 536 if (skb_cow(skb, dst->dev->hard_header_len)) { 537 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); 538 goto drop; 539 } 540 541 hdr = ipv6_hdr(skb); 542 543 /* Mangling hops number delayed to point after skb COW */ 544 545 hdr->hop_limit--; 546 547 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 548 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev, 549 ip6_forward_finish); 550 551 error: 552 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); 553 drop: 554 kfree_skb(skb); 555 return -EINVAL; 556 } 557 558 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 559 { 560 to->pkt_type = from->pkt_type; 561 to->priority = from->priority; 562 to->protocol = from->protocol; 563 dst_release(to->dst); 564 to->dst = dst_clone(from->dst); 565 to->dev = from->dev; 566 to->mark = from->mark; 567 568 #ifdef CONFIG_NET_SCHED 569 to->tc_index = from->tc_index; 570 #endif 571 nf_copy(to, from); 572 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 573 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 574 to->nf_trace = from->nf_trace; 575 #endif 576 skb_copy_secmark(to, from); 577 } 578 579 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) 580 { 581 u16 offset = sizeof(struct ipv6hdr); 582 struct ipv6_opt_hdr *exthdr = 583 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); 584 unsigned int packet_len = skb->tail - skb->network_header; 585 int found_rhdr = 0; 586 *nexthdr = &ipv6_hdr(skb)->nexthdr; 587 588 while (offset + 1 <= packet_len) { 589 590 switch (**nexthdr) { 591 592 case NEXTHDR_HOP: 593 break; 594 case NEXTHDR_ROUTING: 595 found_rhdr = 1; 596 break; 597 case NEXTHDR_DEST: 598 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 599 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) 600 break; 601 #endif 602 if (found_rhdr) 603 return offset; 604 break; 605 default : 606 return offset; 607 } 608 609 offset += ipv6_optlen(exthdr); 610 *nexthdr = &exthdr->nexthdr; 611 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + 612 offset); 613 } 614 615 return offset; 616 } 617 618 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 619 { 620 struct sk_buff *frag; 621 struct rt6_info *rt = (struct rt6_info*)skb->dst; 622 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; 623 struct ipv6hdr *tmp_hdr; 624 struct frag_hdr *fh; 625 unsigned int mtu, hlen, left, len; 626 __be32 frag_id = 0; 627 int ptr, offset = 0, err=0; 628 u8 *prevhdr, nexthdr = 0; 629 struct net *net = dev_net(skb->dst->dev); 630 631 hlen = ip6_find_1stfragopt(skb, &prevhdr); 632 nexthdr = *prevhdr; 633 634 mtu = ip6_skb_dst_mtu(skb); 635 636 /* We must not fragment if the socket is set to force MTU discovery 637 * or if the skb it not generated by a local socket. (This last 638 * check should be redundant, but it's free.) 639 */ 640 if (!skb->local_df) { 641 skb->dev = skb->dst->dev; 642 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); 643 IP6_INC_STATS(net, ip6_dst_idev(skb->dst), 644 IPSTATS_MIB_FRAGFAILS); 645 kfree_skb(skb); 646 return -EMSGSIZE; 647 } 648 649 if (np && np->frag_size < mtu) { 650 if (np->frag_size) 651 mtu = np->frag_size; 652 } 653 mtu -= hlen + sizeof(struct frag_hdr); 654 655 if (skb_shinfo(skb)->frag_list) { 656 int first_len = skb_pagelen(skb); 657 int truesizes = 0; 658 659 if (first_len - hlen > mtu || 660 ((first_len - hlen) & 7) || 661 skb_cloned(skb)) 662 goto slow_path; 663 664 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) { 665 /* Correct geometry. */ 666 if (frag->len > mtu || 667 ((frag->len & 7) && frag->next) || 668 skb_headroom(frag) < hlen) 669 goto slow_path; 670 671 /* Partially cloned skb? */ 672 if (skb_shared(frag)) 673 goto slow_path; 674 675 BUG_ON(frag->sk); 676 if (skb->sk) { 677 sock_hold(skb->sk); 678 frag->sk = skb->sk; 679 frag->destructor = sock_wfree; 680 truesizes += frag->truesize; 681 } 682 } 683 684 err = 0; 685 offset = 0; 686 frag = skb_shinfo(skb)->frag_list; 687 skb_shinfo(skb)->frag_list = NULL; 688 /* BUILD HEADER */ 689 690 *prevhdr = NEXTHDR_FRAGMENT; 691 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 692 if (!tmp_hdr) { 693 IP6_INC_STATS(net, ip6_dst_idev(skb->dst), 694 IPSTATS_MIB_FRAGFAILS); 695 return -ENOMEM; 696 } 697 698 __skb_pull(skb, hlen); 699 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); 700 __skb_push(skb, hlen); 701 skb_reset_network_header(skb); 702 memcpy(skb_network_header(skb), tmp_hdr, hlen); 703 704 ipv6_select_ident(skb, fh); 705 fh->nexthdr = nexthdr; 706 fh->reserved = 0; 707 fh->frag_off = htons(IP6_MF); 708 frag_id = fh->identification; 709 710 first_len = skb_pagelen(skb); 711 skb->data_len = first_len - skb_headlen(skb); 712 skb->truesize -= truesizes; 713 skb->len = first_len; 714 ipv6_hdr(skb)->payload_len = htons(first_len - 715 sizeof(struct ipv6hdr)); 716 717 dst_hold(&rt->u.dst); 718 719 for (;;) { 720 /* Prepare header of the next frame, 721 * before previous one went down. */ 722 if (frag) { 723 frag->ip_summed = CHECKSUM_NONE; 724 skb_reset_transport_header(frag); 725 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); 726 __skb_push(frag, hlen); 727 skb_reset_network_header(frag); 728 memcpy(skb_network_header(frag), tmp_hdr, 729 hlen); 730 offset += skb->len - hlen - sizeof(struct frag_hdr); 731 fh->nexthdr = nexthdr; 732 fh->reserved = 0; 733 fh->frag_off = htons(offset); 734 if (frag->next != NULL) 735 fh->frag_off |= htons(IP6_MF); 736 fh->identification = frag_id; 737 ipv6_hdr(frag)->payload_len = 738 htons(frag->len - 739 sizeof(struct ipv6hdr)); 740 ip6_copy_metadata(frag, skb); 741 } 742 743 err = output(skb); 744 if(!err) 745 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), 746 IPSTATS_MIB_FRAGCREATES); 747 748 if (err || !frag) 749 break; 750 751 skb = frag; 752 frag = skb->next; 753 skb->next = NULL; 754 } 755 756 kfree(tmp_hdr); 757 758 if (err == 0) { 759 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), 760 IPSTATS_MIB_FRAGOKS); 761 dst_release(&rt->u.dst); 762 return 0; 763 } 764 765 while (frag) { 766 skb = frag->next; 767 kfree_skb(frag); 768 frag = skb; 769 } 770 771 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), 772 IPSTATS_MIB_FRAGFAILS); 773 dst_release(&rt->u.dst); 774 return err; 775 } 776 777 slow_path: 778 left = skb->len - hlen; /* Space per frame */ 779 ptr = hlen; /* Where to start from */ 780 781 /* 782 * Fragment the datagram. 783 */ 784 785 *prevhdr = NEXTHDR_FRAGMENT; 786 787 /* 788 * Keep copying data until we run out. 789 */ 790 while(left > 0) { 791 len = left; 792 /* IF: it doesn't fit, use 'mtu' - the data space left */ 793 if (len > mtu) 794 len = mtu; 795 /* IF: we are not sending upto and including the packet end 796 then align the next start on an eight byte boundary */ 797 if (len < left) { 798 len &= ~7; 799 } 800 /* 801 * Allocate buffer. 802 */ 803 804 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { 805 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); 806 IP6_INC_STATS(net, ip6_dst_idev(skb->dst), 807 IPSTATS_MIB_FRAGFAILS); 808 err = -ENOMEM; 809 goto fail; 810 } 811 812 /* 813 * Set up data on packet 814 */ 815 816 ip6_copy_metadata(frag, skb); 817 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev)); 818 skb_put(frag, len + hlen + sizeof(struct frag_hdr)); 819 skb_reset_network_header(frag); 820 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); 821 frag->transport_header = (frag->network_header + hlen + 822 sizeof(struct frag_hdr)); 823 824 /* 825 * Charge the memory for the fragment to any owner 826 * it might possess 827 */ 828 if (skb->sk) 829 skb_set_owner_w(frag, skb->sk); 830 831 /* 832 * Copy the packet header into the new buffer. 833 */ 834 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); 835 836 /* 837 * Build fragment header. 838 */ 839 fh->nexthdr = nexthdr; 840 fh->reserved = 0; 841 if (!frag_id) { 842 ipv6_select_ident(skb, fh); 843 frag_id = fh->identification; 844 } else 845 fh->identification = frag_id; 846 847 /* 848 * Copy a block of the IP datagram. 849 */ 850 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len)) 851 BUG(); 852 left -= len; 853 854 fh->frag_off = htons(offset); 855 if (left > 0) 856 fh->frag_off |= htons(IP6_MF); 857 ipv6_hdr(frag)->payload_len = htons(frag->len - 858 sizeof(struct ipv6hdr)); 859 860 ptr += len; 861 offset += len; 862 863 /* 864 * Put this fragment into the sending queue. 865 */ 866 err = output(frag); 867 if (err) 868 goto fail; 869 870 IP6_INC_STATS(net, ip6_dst_idev(skb->dst), 871 IPSTATS_MIB_FRAGCREATES); 872 } 873 IP6_INC_STATS(net, ip6_dst_idev(skb->dst), 874 IPSTATS_MIB_FRAGOKS); 875 kfree_skb(skb); 876 return err; 877 878 fail: 879 IP6_INC_STATS(net, ip6_dst_idev(skb->dst), 880 IPSTATS_MIB_FRAGFAILS); 881 kfree_skb(skb); 882 return err; 883 } 884 885 static inline int ip6_rt_check(struct rt6key *rt_key, 886 struct in6_addr *fl_addr, 887 struct in6_addr *addr_cache) 888 { 889 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 890 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache))); 891 } 892 893 static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 894 struct dst_entry *dst, 895 struct flowi *fl) 896 { 897 struct ipv6_pinfo *np = inet6_sk(sk); 898 struct rt6_info *rt = (struct rt6_info *)dst; 899 900 if (!dst) 901 goto out; 902 903 /* Yes, checking route validity in not connected 904 * case is not very simple. Take into account, 905 * that we do not support routing by source, TOS, 906 * and MSG_DONTROUTE --ANK (980726) 907 * 908 * 1. ip6_rt_check(): If route was host route, 909 * check that cached destination is current. 910 * If it is network route, we still may 911 * check its validity using saved pointer 912 * to the last used address: daddr_cache. 913 * We do not want to save whole address now, 914 * (because main consumer of this service 915 * is tcp, which has not this problem), 916 * so that the last trick works only on connected 917 * sockets. 918 * 2. oif also should be the same. 919 */ 920 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) || 921 #ifdef CONFIG_IPV6_SUBTREES 922 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || 923 #endif 924 (fl->oif && fl->oif != dst->dev->ifindex)) { 925 dst_release(dst); 926 dst = NULL; 927 } 928 929 out: 930 return dst; 931 } 932 933 static int ip6_dst_lookup_tail(struct sock *sk, 934 struct dst_entry **dst, struct flowi *fl) 935 { 936 int err; 937 struct net *net = sock_net(sk); 938 939 if (*dst == NULL) 940 *dst = ip6_route_output(net, sk, fl); 941 942 if ((err = (*dst)->error)) 943 goto out_err_release; 944 945 if (ipv6_addr_any(&fl->fl6_src)) { 946 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev, 947 &fl->fl6_dst, 948 sk ? inet6_sk(sk)->srcprefs : 0, 949 &fl->fl6_src); 950 if (err) 951 goto out_err_release; 952 } 953 954 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 955 /* 956 * Here if the dst entry we've looked up 957 * has a neighbour entry that is in the INCOMPLETE 958 * state and the src address from the flow is 959 * marked as OPTIMISTIC, we release the found 960 * dst entry and replace it instead with the 961 * dst entry of the nexthop router 962 */ 963 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) { 964 struct inet6_ifaddr *ifp; 965 struct flowi fl_gw; 966 int redirect; 967 968 ifp = ipv6_get_ifaddr(net, &fl->fl6_src, 969 (*dst)->dev, 1); 970 971 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 972 if (ifp) 973 in6_ifa_put(ifp); 974 975 if (redirect) { 976 /* 977 * We need to get the dst entry for the 978 * default router instead 979 */ 980 dst_release(*dst); 981 memcpy(&fl_gw, fl, sizeof(struct flowi)); 982 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr)); 983 *dst = ip6_route_output(net, sk, &fl_gw); 984 if ((err = (*dst)->error)) 985 goto out_err_release; 986 } 987 } 988 #endif 989 990 return 0; 991 992 out_err_release: 993 if (err == -ENETUNREACH) 994 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES); 995 dst_release(*dst); 996 *dst = NULL; 997 return err; 998 } 999 1000 /** 1001 * ip6_dst_lookup - perform route lookup on flow 1002 * @sk: socket which provides route info 1003 * @dst: pointer to dst_entry * for result 1004 * @fl: flow to lookup 1005 * 1006 * This function performs a route lookup on the given flow. 1007 * 1008 * It returns zero on success, or a standard errno code on error. 1009 */ 1010 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) 1011 { 1012 *dst = NULL; 1013 return ip6_dst_lookup_tail(sk, dst, fl); 1014 } 1015 EXPORT_SYMBOL_GPL(ip6_dst_lookup); 1016 1017 /** 1018 * ip6_sk_dst_lookup - perform socket cached route lookup on flow 1019 * @sk: socket which provides the dst cache and route info 1020 * @dst: pointer to dst_entry * for result 1021 * @fl: flow to lookup 1022 * 1023 * This function performs a route lookup on the given flow with the 1024 * possibility of using the cached route in the socket if it is valid. 1025 * It will take the socket dst lock when operating on the dst cache. 1026 * As a result, this function can only be used in process context. 1027 * 1028 * It returns zero on success, or a standard errno code on error. 1029 */ 1030 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) 1031 { 1032 *dst = NULL; 1033 if (sk) { 1034 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1035 *dst = ip6_sk_dst_check(sk, *dst, fl); 1036 } 1037 1038 return ip6_dst_lookup_tail(sk, dst, fl); 1039 } 1040 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup); 1041 1042 static inline int ip6_ufo_append_data(struct sock *sk, 1043 int getfrag(void *from, char *to, int offset, int len, 1044 int odd, struct sk_buff *skb), 1045 void *from, int length, int hh_len, int fragheaderlen, 1046 int transhdrlen, int mtu,unsigned int flags) 1047 1048 { 1049 struct sk_buff *skb; 1050 int err; 1051 1052 /* There is support for UDP large send offload by network 1053 * device, so create one single skb packet containing complete 1054 * udp datagram 1055 */ 1056 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { 1057 skb = sock_alloc_send_skb(sk, 1058 hh_len + fragheaderlen + transhdrlen + 20, 1059 (flags & MSG_DONTWAIT), &err); 1060 if (skb == NULL) 1061 return -ENOMEM; 1062 1063 /* reserve space for Hardware header */ 1064 skb_reserve(skb, hh_len); 1065 1066 /* create space for UDP/IP header */ 1067 skb_put(skb,fragheaderlen + transhdrlen); 1068 1069 /* initialize network header pointer */ 1070 skb_reset_network_header(skb); 1071 1072 /* initialize protocol header pointer */ 1073 skb->transport_header = skb->network_header + fragheaderlen; 1074 1075 skb->ip_summed = CHECKSUM_PARTIAL; 1076 skb->csum = 0; 1077 sk->sk_sndmsg_off = 0; 1078 } 1079 1080 err = skb_append_datato_frags(sk,skb, getfrag, from, 1081 (length - transhdrlen)); 1082 if (!err) { 1083 struct frag_hdr fhdr; 1084 1085 /* specify the length of each IP datagram fragment*/ 1086 skb_shinfo(skb)->gso_size = mtu - fragheaderlen - 1087 sizeof(struct frag_hdr); 1088 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 1089 ipv6_select_ident(skb, &fhdr); 1090 skb_shinfo(skb)->ip6_frag_id = fhdr.identification; 1091 __skb_queue_tail(&sk->sk_write_queue, skb); 1092 1093 return 0; 1094 } 1095 /* There is not enough support do UPD LSO, 1096 * so follow normal path 1097 */ 1098 kfree_skb(skb); 1099 1100 return err; 1101 } 1102 1103 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, 1104 int offset, int len, int odd, struct sk_buff *skb), 1105 void *from, int length, int transhdrlen, 1106 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, 1107 struct rt6_info *rt, unsigned int flags) 1108 { 1109 struct inet_sock *inet = inet_sk(sk); 1110 struct ipv6_pinfo *np = inet6_sk(sk); 1111 struct sk_buff *skb; 1112 unsigned int maxfraglen, fragheaderlen; 1113 int exthdrlen; 1114 int hh_len; 1115 int mtu; 1116 int copy; 1117 int err; 1118 int offset = 0; 1119 int csummode = CHECKSUM_NONE; 1120 1121 if (flags&MSG_PROBE) 1122 return 0; 1123 if (skb_queue_empty(&sk->sk_write_queue)) { 1124 /* 1125 * setup for corking 1126 */ 1127 if (opt) { 1128 if (np->cork.opt == NULL) { 1129 np->cork.opt = kmalloc(opt->tot_len, 1130 sk->sk_allocation); 1131 if (unlikely(np->cork.opt == NULL)) 1132 return -ENOBUFS; 1133 } else if (np->cork.opt->tot_len < opt->tot_len) { 1134 printk(KERN_DEBUG "ip6_append_data: invalid option length\n"); 1135 return -EINVAL; 1136 } 1137 memcpy(np->cork.opt, opt, opt->tot_len); 1138 inet->cork.flags |= IPCORK_OPT; 1139 /* need source address above miyazawa*/ 1140 } 1141 dst_hold(&rt->u.dst); 1142 inet->cork.dst = &rt->u.dst; 1143 inet->cork.fl = *fl; 1144 np->cork.hop_limit = hlimit; 1145 np->cork.tclass = tclass; 1146 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? 1147 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path); 1148 if (np->frag_size < mtu) { 1149 if (np->frag_size) 1150 mtu = np->frag_size; 1151 } 1152 inet->cork.fragsize = mtu; 1153 if (dst_allfrag(rt->u.dst.path)) 1154 inet->cork.flags |= IPCORK_ALLFRAG; 1155 inet->cork.length = 0; 1156 sk->sk_sndmsg_page = NULL; 1157 sk->sk_sndmsg_off = 0; 1158 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) - 1159 rt->rt6i_nfheader_len; 1160 length += exthdrlen; 1161 transhdrlen += exthdrlen; 1162 } else { 1163 rt = (struct rt6_info *)inet->cork.dst; 1164 fl = &inet->cork.fl; 1165 if (inet->cork.flags & IPCORK_OPT) 1166 opt = np->cork.opt; 1167 transhdrlen = 0; 1168 exthdrlen = 0; 1169 mtu = inet->cork.fragsize; 1170 } 1171 1172 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); 1173 1174 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1175 (opt ? opt->opt_nflen : 0); 1176 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); 1177 1178 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { 1179 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { 1180 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen); 1181 return -EMSGSIZE; 1182 } 1183 } 1184 1185 /* 1186 * Let's try using as much space as possible. 1187 * Use MTU if total length of the message fits into the MTU. 1188 * Otherwise, we need to reserve fragment header and 1189 * fragment alignment (= 8-15 octects, in total). 1190 * 1191 * Note that we may need to "move" the data from the tail of 1192 * of the buffer to the new fragment when we split 1193 * the message. 1194 * 1195 * FIXME: It may be fragmented into multiple chunks 1196 * at once if non-fragmentable extension headers 1197 * are too large. 1198 * --yoshfuji 1199 */ 1200 1201 inet->cork.length += length; 1202 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && 1203 (rt->u.dst.dev->features & NETIF_F_UFO)) { 1204 1205 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len, 1206 fragheaderlen, transhdrlen, mtu, 1207 flags); 1208 if (err) 1209 goto error; 1210 return 0; 1211 } 1212 1213 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 1214 goto alloc_new_skb; 1215 1216 while (length > 0) { 1217 /* Check if the remaining data fits into current packet. */ 1218 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1219 if (copy < length) 1220 copy = maxfraglen - skb->len; 1221 1222 if (copy <= 0) { 1223 char *data; 1224 unsigned int datalen; 1225 unsigned int fraglen; 1226 unsigned int fraggap; 1227 unsigned int alloclen; 1228 struct sk_buff *skb_prev; 1229 alloc_new_skb: 1230 skb_prev = skb; 1231 1232 /* There's no room in the current skb */ 1233 if (skb_prev) 1234 fraggap = skb_prev->len - maxfraglen; 1235 else 1236 fraggap = 0; 1237 1238 /* 1239 * If remaining data exceeds the mtu, 1240 * we know we need more fragment(s). 1241 */ 1242 datalen = length + fraggap; 1243 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1244 datalen = maxfraglen - fragheaderlen; 1245 1246 fraglen = datalen + fragheaderlen; 1247 if ((flags & MSG_MORE) && 1248 !(rt->u.dst.dev->features&NETIF_F_SG)) 1249 alloclen = mtu; 1250 else 1251 alloclen = datalen + fragheaderlen; 1252 1253 /* 1254 * The last fragment gets additional space at tail. 1255 * Note: we overallocate on fragments with MSG_MODE 1256 * because we have no idea if we're the last one. 1257 */ 1258 if (datalen == length + fraggap) 1259 alloclen += rt->u.dst.trailer_len; 1260 1261 /* 1262 * We just reserve space for fragment header. 1263 * Note: this may be overallocation if the message 1264 * (without MSG_MORE) fits into the MTU. 1265 */ 1266 alloclen += sizeof(struct frag_hdr); 1267 1268 if (transhdrlen) { 1269 skb = sock_alloc_send_skb(sk, 1270 alloclen + hh_len, 1271 (flags & MSG_DONTWAIT), &err); 1272 } else { 1273 skb = NULL; 1274 if (atomic_read(&sk->sk_wmem_alloc) <= 1275 2 * sk->sk_sndbuf) 1276 skb = sock_wmalloc(sk, 1277 alloclen + hh_len, 1, 1278 sk->sk_allocation); 1279 if (unlikely(skb == NULL)) 1280 err = -ENOBUFS; 1281 } 1282 if (skb == NULL) 1283 goto error; 1284 /* 1285 * Fill in the control structures 1286 */ 1287 skb->ip_summed = csummode; 1288 skb->csum = 0; 1289 /* reserve for fragmentation */ 1290 skb_reserve(skb, hh_len+sizeof(struct frag_hdr)); 1291 1292 /* 1293 * Find where to start putting bytes 1294 */ 1295 data = skb_put(skb, fraglen); 1296 skb_set_network_header(skb, exthdrlen); 1297 data += fragheaderlen; 1298 skb->transport_header = (skb->network_header + 1299 fragheaderlen); 1300 if (fraggap) { 1301 skb->csum = skb_copy_and_csum_bits( 1302 skb_prev, maxfraglen, 1303 data + transhdrlen, fraggap, 0); 1304 skb_prev->csum = csum_sub(skb_prev->csum, 1305 skb->csum); 1306 data += fraggap; 1307 pskb_trim_unique(skb_prev, maxfraglen); 1308 } 1309 copy = datalen - transhdrlen - fraggap; 1310 if (copy < 0) { 1311 err = -EINVAL; 1312 kfree_skb(skb); 1313 goto error; 1314 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { 1315 err = -EFAULT; 1316 kfree_skb(skb); 1317 goto error; 1318 } 1319 1320 offset += copy; 1321 length -= datalen - fraggap; 1322 transhdrlen = 0; 1323 exthdrlen = 0; 1324 csummode = CHECKSUM_NONE; 1325 1326 /* 1327 * Put the packet on the pending queue 1328 */ 1329 __skb_queue_tail(&sk->sk_write_queue, skb); 1330 continue; 1331 } 1332 1333 if (copy > length) 1334 copy = length; 1335 1336 if (!(rt->u.dst.dev->features&NETIF_F_SG)) { 1337 unsigned int off; 1338 1339 off = skb->len; 1340 if (getfrag(from, skb_put(skb, copy), 1341 offset, copy, off, skb) < 0) { 1342 __skb_trim(skb, off); 1343 err = -EFAULT; 1344 goto error; 1345 } 1346 } else { 1347 int i = skb_shinfo(skb)->nr_frags; 1348 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; 1349 struct page *page = sk->sk_sndmsg_page; 1350 int off = sk->sk_sndmsg_off; 1351 unsigned int left; 1352 1353 if (page && (left = PAGE_SIZE - off) > 0) { 1354 if (copy >= left) 1355 copy = left; 1356 if (page != frag->page) { 1357 if (i == MAX_SKB_FRAGS) { 1358 err = -EMSGSIZE; 1359 goto error; 1360 } 1361 get_page(page); 1362 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); 1363 frag = &skb_shinfo(skb)->frags[i]; 1364 } 1365 } else if(i < MAX_SKB_FRAGS) { 1366 if (copy > PAGE_SIZE) 1367 copy = PAGE_SIZE; 1368 page = alloc_pages(sk->sk_allocation, 0); 1369 if (page == NULL) { 1370 err = -ENOMEM; 1371 goto error; 1372 } 1373 sk->sk_sndmsg_page = page; 1374 sk->sk_sndmsg_off = 0; 1375 1376 skb_fill_page_desc(skb, i, page, 0, 0); 1377 frag = &skb_shinfo(skb)->frags[i]; 1378 } else { 1379 err = -EMSGSIZE; 1380 goto error; 1381 } 1382 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { 1383 err = -EFAULT; 1384 goto error; 1385 } 1386 sk->sk_sndmsg_off += copy; 1387 frag->size += copy; 1388 skb->len += copy; 1389 skb->data_len += copy; 1390 skb->truesize += copy; 1391 atomic_add(copy, &sk->sk_wmem_alloc); 1392 } 1393 offset += copy; 1394 length -= copy; 1395 } 1396 return 0; 1397 error: 1398 inet->cork.length -= length; 1399 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1400 return err; 1401 } 1402 1403 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) 1404 { 1405 inet->cork.flags &= ~IPCORK_OPT; 1406 kfree(np->cork.opt); 1407 np->cork.opt = NULL; 1408 if (inet->cork.dst) { 1409 dst_release(inet->cork.dst); 1410 inet->cork.dst = NULL; 1411 inet->cork.flags &= ~IPCORK_ALLFRAG; 1412 } 1413 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); 1414 } 1415 1416 int ip6_push_pending_frames(struct sock *sk) 1417 { 1418 struct sk_buff *skb, *tmp_skb; 1419 struct sk_buff **tail_skb; 1420 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 1421 struct inet_sock *inet = inet_sk(sk); 1422 struct ipv6_pinfo *np = inet6_sk(sk); 1423 struct net *net = sock_net(sk); 1424 struct ipv6hdr *hdr; 1425 struct ipv6_txoptions *opt = np->cork.opt; 1426 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; 1427 struct flowi *fl = &inet->cork.fl; 1428 unsigned char proto = fl->proto; 1429 int err = 0; 1430 1431 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) 1432 goto out; 1433 tail_skb = &(skb_shinfo(skb)->frag_list); 1434 1435 /* move skb->data to ip header from ext header */ 1436 if (skb->data < skb_network_header(skb)) 1437 __skb_pull(skb, skb_network_offset(skb)); 1438 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { 1439 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1440 *tail_skb = tmp_skb; 1441 tail_skb = &(tmp_skb->next); 1442 skb->len += tmp_skb->len; 1443 skb->data_len += tmp_skb->len; 1444 skb->truesize += tmp_skb->truesize; 1445 __sock_put(tmp_skb->sk); 1446 tmp_skb->destructor = NULL; 1447 tmp_skb->sk = NULL; 1448 } 1449 1450 /* Allow local fragmentation. */ 1451 if (np->pmtudisc < IPV6_PMTUDISC_DO) 1452 skb->local_df = 1; 1453 1454 ipv6_addr_copy(final_dst, &fl->fl6_dst); 1455 __skb_pull(skb, skb_network_header_len(skb)); 1456 if (opt && opt->opt_flen) 1457 ipv6_push_frag_opts(skb, opt, &proto); 1458 if (opt && opt->opt_nflen) 1459 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); 1460 1461 skb_push(skb, sizeof(struct ipv6hdr)); 1462 skb_reset_network_header(skb); 1463 hdr = ipv6_hdr(skb); 1464 1465 *(__be32*)hdr = fl->fl6_flowlabel | 1466 htonl(0x60000000 | ((int)np->cork.tclass << 20)); 1467 1468 hdr->hop_limit = np->cork.hop_limit; 1469 hdr->nexthdr = proto; 1470 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 1471 ipv6_addr_copy(&hdr->daddr, final_dst); 1472 1473 skb->priority = sk->sk_priority; 1474 skb->mark = sk->sk_mark; 1475 1476 skb->dst = dst_clone(&rt->u.dst); 1477 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); 1478 if (proto == IPPROTO_ICMPV6) { 1479 struct inet6_dev *idev = ip6_dst_idev(skb->dst); 1480 1481 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type); 1482 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); 1483 } 1484 1485 err = ip6_local_out(skb); 1486 if (err) { 1487 if (err > 0) 1488 err = np->recverr ? net_xmit_errno(err) : 0; 1489 if (err) 1490 goto error; 1491 } 1492 1493 out: 1494 ip6_cork_release(inet, np); 1495 return err; 1496 error: 1497 goto out; 1498 } 1499 1500 void ip6_flush_pending_frames(struct sock *sk) 1501 { 1502 struct sk_buff *skb; 1503 1504 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { 1505 if (skb->dst) 1506 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb->dst), 1507 IPSTATS_MIB_OUTDISCARDS); 1508 kfree_skb(skb); 1509 } 1510 1511 ip6_cork_release(inet_sk(sk), inet6_sk(sk)); 1512 } 1513