1 /* 2 * IPv6 output functions 3 * Linux INET6 implementation 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * Based on linux/net/ipv4/ip_output.c 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 * 15 * Changes: 16 * A.N.Kuznetsov : airthmetics in fragmentation. 17 * extension headers are implemented. 18 * route changes now work. 19 * ip6_forward does not confuse sniffers. 20 * etc. 21 * 22 * H. von Brand : Added missing #include <linux/string.h> 23 * Imran Patel : frag id should be in NBO 24 * Kazunori MIYAZAWA @USAGI 25 * : add ip6_append_data and related functions 26 * for datagram xmit 27 */ 28 29 #include <linux/errno.h> 30 #include <linux/kernel.h> 31 #include <linux/string.h> 32 #include <linux/socket.h> 33 #include <linux/net.h> 34 #include <linux/netdevice.h> 35 #include <linux/if_arp.h> 36 #include <linux/in6.h> 37 #include <linux/tcp.h> 38 #include <linux/route.h> 39 #include <linux/module.h> 40 41 #include <linux/netfilter.h> 42 #include <linux/netfilter_ipv6.h> 43 44 #include <net/sock.h> 45 #include <net/snmp.h> 46 47 #include <net/ipv6.h> 48 #include <net/ndisc.h> 49 #include <net/protocol.h> 50 #include <net/ip6_route.h> 51 #include <net/addrconf.h> 52 #include <net/rawv6.h> 53 #include <net/icmp.h> 54 #include <net/xfrm.h> 55 #include <net/checksum.h> 56 #include <linux/mroute6.h> 57 58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); 59 60 int __ip6_local_out(struct sk_buff *skb) 61 { 62 int len; 63 64 len = skb->len - sizeof(struct ipv6hdr); 65 if (len > IPV6_MAXPLEN) 66 len = 0; 67 ipv6_hdr(skb)->payload_len = htons(len); 68 69 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, 70 dst_output); 71 } 72 73 int ip6_local_out(struct sk_buff *skb) 74 { 75 int err; 76 77 err = __ip6_local_out(skb); 78 if (likely(err == 1)) 79 err = dst_output(skb); 80 81 return err; 82 } 83 EXPORT_SYMBOL_GPL(ip6_local_out); 84 85 static int ip6_output_finish(struct sk_buff *skb) 86 { 87 struct dst_entry *dst = skb_dst(skb); 88 89 if (dst->hh) 90 return neigh_hh_output(dst->hh, skb); 91 else if (dst->neighbour) 92 return dst->neighbour->output(skb); 93 94 IP6_INC_STATS_BH(dev_net(dst->dev), 95 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 96 kfree_skb(skb); 97 return -EINVAL; 98 99 } 100 101 /* dev_loopback_xmit for use with netfilter. */ 102 static int ip6_dev_loopback_xmit(struct sk_buff *newskb) 103 { 104 skb_reset_mac_header(newskb); 105 __skb_pull(newskb, skb_network_offset(newskb)); 106 newskb->pkt_type = PACKET_LOOPBACK; 107 newskb->ip_summed = CHECKSUM_UNNECESSARY; 108 WARN_ON(!skb_dst(newskb)); 109 110 netif_rx(newskb); 111 return 0; 112 } 113 114 115 static int ip6_output2(struct sk_buff *skb) 116 { 117 struct dst_entry *dst = skb_dst(skb); 118 struct net_device *dev = dst->dev; 119 120 skb->protocol = htons(ETH_P_IPV6); 121 skb->dev = dev; 122 123 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 124 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL; 125 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 126 127 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) && 128 ((mroute6_socket(dev_net(dev)) && 129 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 130 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 131 &ipv6_hdr(skb)->saddr))) { 132 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 133 134 /* Do not check for IFF_ALLMULTI; multicast routing 135 is not supported in any case. 136 */ 137 if (newskb) 138 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb, 139 NULL, newskb->dev, 140 ip6_dev_loopback_xmit); 141 142 if (ipv6_hdr(skb)->hop_limit == 0) { 143 IP6_INC_STATS(dev_net(dev), idev, 144 IPSTATS_MIB_OUTDISCARDS); 145 kfree_skb(skb); 146 return 0; 147 } 148 } 149 150 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, 151 skb->len); 152 } 153 154 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev, 155 ip6_output_finish); 156 } 157 158 static inline int ip6_skb_dst_mtu(struct sk_buff *skb) 159 { 160 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; 161 162 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ? 163 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); 164 } 165 166 int ip6_output(struct sk_buff *skb) 167 { 168 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 169 if (unlikely(idev->cnf.disable_ipv6)) { 170 IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev, 171 IPSTATS_MIB_OUTDISCARDS); 172 kfree_skb(skb); 173 return 0; 174 } 175 176 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || 177 dst_allfrag(skb_dst(skb))) 178 return ip6_fragment(skb, ip6_output2); 179 else 180 return ip6_output2(skb); 181 } 182 183 /* 184 * xmit an sk_buff (used by TCP) 185 */ 186 187 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, 188 struct ipv6_txoptions *opt, int ipfragok) 189 { 190 struct net *net = sock_net(sk); 191 struct ipv6_pinfo *np = inet6_sk(sk); 192 struct in6_addr *first_hop = &fl->fl6_dst; 193 struct dst_entry *dst = skb_dst(skb); 194 struct ipv6hdr *hdr; 195 u8 proto = fl->proto; 196 int seg_len = skb->len; 197 int hlimit = -1; 198 int tclass = 0; 199 u32 mtu; 200 201 if (opt) { 202 unsigned int head_room; 203 204 /* First: exthdrs may take lots of space (~8K for now) 205 MAX_HEADER is not enough. 206 */ 207 head_room = opt->opt_nflen + opt->opt_flen; 208 seg_len += head_room; 209 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); 210 211 if (skb_headroom(skb) < head_room) { 212 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 213 if (skb2 == NULL) { 214 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 215 IPSTATS_MIB_OUTDISCARDS); 216 kfree_skb(skb); 217 return -ENOBUFS; 218 } 219 kfree_skb(skb); 220 skb = skb2; 221 if (sk) 222 skb_set_owner_w(skb, sk); 223 } 224 if (opt->opt_flen) 225 ipv6_push_frag_opts(skb, opt, &proto); 226 if (opt->opt_nflen) 227 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); 228 } 229 230 skb_push(skb, sizeof(struct ipv6hdr)); 231 skb_reset_network_header(skb); 232 hdr = ipv6_hdr(skb); 233 234 /* Allow local fragmentation. */ 235 if (ipfragok) 236 skb->local_df = 1; 237 238 /* 239 * Fill in the IPv6 header 240 */ 241 if (np) { 242 tclass = np->tclass; 243 hlimit = np->hop_limit; 244 } 245 if (hlimit < 0) 246 hlimit = ip6_dst_hoplimit(dst); 247 248 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel; 249 250 hdr->payload_len = htons(seg_len); 251 hdr->nexthdr = proto; 252 hdr->hop_limit = hlimit; 253 254 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 255 ipv6_addr_copy(&hdr->daddr, first_hop); 256 257 skb->priority = sk->sk_priority; 258 skb->mark = sk->sk_mark; 259 260 mtu = dst_mtu(dst); 261 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { 262 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), 263 IPSTATS_MIB_OUT, skb->len); 264 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, 265 dst_output); 266 } 267 268 if (net_ratelimit()) 269 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); 270 skb->dev = dst->dev; 271 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); 272 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); 273 kfree_skb(skb); 274 return -EMSGSIZE; 275 } 276 277 EXPORT_SYMBOL(ip6_xmit); 278 279 /* 280 * To avoid extra problems ND packets are send through this 281 * routine. It's code duplication but I really want to avoid 282 * extra checks since ipv6_build_header is used by TCP (which 283 * is for us performance critical) 284 */ 285 286 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, 287 const struct in6_addr *saddr, const struct in6_addr *daddr, 288 int proto, int len) 289 { 290 struct ipv6_pinfo *np = inet6_sk(sk); 291 struct ipv6hdr *hdr; 292 int totlen; 293 294 skb->protocol = htons(ETH_P_IPV6); 295 skb->dev = dev; 296 297 totlen = len + sizeof(struct ipv6hdr); 298 299 skb_reset_network_header(skb); 300 skb_put(skb, sizeof(struct ipv6hdr)); 301 hdr = ipv6_hdr(skb); 302 303 *(__be32*)hdr = htonl(0x60000000); 304 305 hdr->payload_len = htons(len); 306 hdr->nexthdr = proto; 307 hdr->hop_limit = np->hop_limit; 308 309 ipv6_addr_copy(&hdr->saddr, saddr); 310 ipv6_addr_copy(&hdr->daddr, daddr); 311 312 return 0; 313 } 314 315 static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 316 { 317 struct ip6_ra_chain *ra; 318 struct sock *last = NULL; 319 320 read_lock(&ip6_ra_lock); 321 for (ra = ip6_ra_chain; ra; ra = ra->next) { 322 struct sock *sk = ra->sk; 323 if (sk && ra->sel == sel && 324 (!sk->sk_bound_dev_if || 325 sk->sk_bound_dev_if == skb->dev->ifindex)) { 326 if (last) { 327 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 328 if (skb2) 329 rawv6_rcv(last, skb2); 330 } 331 last = sk; 332 } 333 } 334 335 if (last) { 336 rawv6_rcv(last, skb); 337 read_unlock(&ip6_ra_lock); 338 return 1; 339 } 340 read_unlock(&ip6_ra_lock); 341 return 0; 342 } 343 344 static int ip6_forward_proxy_check(struct sk_buff *skb) 345 { 346 struct ipv6hdr *hdr = ipv6_hdr(skb); 347 u8 nexthdr = hdr->nexthdr; 348 int offset; 349 350 if (ipv6_ext_hdr(nexthdr)) { 351 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr); 352 if (offset < 0) 353 return 0; 354 } else 355 offset = sizeof(struct ipv6hdr); 356 357 if (nexthdr == IPPROTO_ICMPV6) { 358 struct icmp6hdr *icmp6; 359 360 if (!pskb_may_pull(skb, (skb_network_header(skb) + 361 offset + 1 - skb->data))) 362 return 0; 363 364 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 365 366 switch (icmp6->icmp6_type) { 367 case NDISC_ROUTER_SOLICITATION: 368 case NDISC_ROUTER_ADVERTISEMENT: 369 case NDISC_NEIGHBOUR_SOLICITATION: 370 case NDISC_NEIGHBOUR_ADVERTISEMENT: 371 case NDISC_REDIRECT: 372 /* For reaction involving unicast neighbor discovery 373 * message destined to the proxied address, pass it to 374 * input function. 375 */ 376 return 1; 377 default: 378 break; 379 } 380 } 381 382 /* 383 * The proxying router can't forward traffic sent to a link-local 384 * address, so signal the sender and discard the packet. This 385 * behavior is clarified by the MIPv6 specification. 386 */ 387 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 388 dst_link_failure(skb); 389 return -1; 390 } 391 392 return 0; 393 } 394 395 static inline int ip6_forward_finish(struct sk_buff *skb) 396 { 397 return dst_output(skb); 398 } 399 400 int ip6_forward(struct sk_buff *skb) 401 { 402 struct dst_entry *dst = skb_dst(skb); 403 struct ipv6hdr *hdr = ipv6_hdr(skb); 404 struct inet6_skb_parm *opt = IP6CB(skb); 405 struct net *net = dev_net(dst->dev); 406 407 if (net->ipv6.devconf_all->forwarding == 0) 408 goto error; 409 410 if (skb_warn_if_lro(skb)) 411 goto drop; 412 413 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 414 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 415 goto drop; 416 } 417 418 skb_forward_csum(skb); 419 420 /* 421 * We DO NOT make any processing on 422 * RA packets, pushing them to user level AS IS 423 * without ane WARRANTY that application will be able 424 * to interpret them. The reason is that we 425 * cannot make anything clever here. 426 * 427 * We are not end-node, so that if packet contains 428 * AH/ESP, we cannot make anything. 429 * Defragmentation also would be mistake, RA packets 430 * cannot be fragmented, because there is no warranty 431 * that different fragments will go along one path. --ANK 432 */ 433 if (opt->ra) { 434 u8 *ptr = skb_network_header(skb) + opt->ra; 435 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3])) 436 return 0; 437 } 438 439 /* 440 * check and decrement ttl 441 */ 442 if (hdr->hop_limit <= 1) { 443 /* Force OUTPUT device used as source address */ 444 skb->dev = dst->dev; 445 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 446 0, skb->dev); 447 IP6_INC_STATS_BH(net, 448 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); 449 450 kfree_skb(skb); 451 return -ETIMEDOUT; 452 } 453 454 /* XXX: idev->cnf.proxy_ndp? */ 455 if (net->ipv6.devconf_all->proxy_ndp && 456 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 457 int proxied = ip6_forward_proxy_check(skb); 458 if (proxied > 0) 459 return ip6_input(skb); 460 else if (proxied < 0) { 461 IP6_INC_STATS(net, ip6_dst_idev(dst), 462 IPSTATS_MIB_INDISCARDS); 463 goto drop; 464 } 465 } 466 467 if (!xfrm6_route_forward(skb)) { 468 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 469 goto drop; 470 } 471 dst = skb_dst(skb); 472 473 /* IPv6 specs say nothing about it, but it is clear that we cannot 474 send redirects to source routed frames. 475 We don't send redirects to frames decapsulated from IPsec. 476 */ 477 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && 478 !skb_sec_path(skb)) { 479 struct in6_addr *target = NULL; 480 struct rt6_info *rt; 481 struct neighbour *n = dst->neighbour; 482 483 /* 484 * incoming and outgoing devices are the same 485 * send a redirect. 486 */ 487 488 rt = (struct rt6_info *) dst; 489 if ((rt->rt6i_flags & RTF_GATEWAY)) 490 target = (struct in6_addr*)&n->primary_key; 491 else 492 target = &hdr->daddr; 493 494 /* Limit redirects both by destination (here) 495 and by source (inside ndisc_send_redirect) 496 */ 497 if (xrlim_allow(dst, 1*HZ)) 498 ndisc_send_redirect(skb, n, target); 499 } else { 500 int addrtype = ipv6_addr_type(&hdr->saddr); 501 502 /* This check is security critical. */ 503 if (addrtype == IPV6_ADDR_ANY || 504 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 505 goto error; 506 if (addrtype & IPV6_ADDR_LINKLOCAL) { 507 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 508 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev); 509 goto error; 510 } 511 } 512 513 if (skb->len > dst_mtu(dst)) { 514 /* Again, force OUTPUT device used as source address */ 515 skb->dev = dst->dev; 516 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev); 517 IP6_INC_STATS_BH(net, 518 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); 519 IP6_INC_STATS_BH(net, 520 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); 521 kfree_skb(skb); 522 return -EMSGSIZE; 523 } 524 525 if (skb_cow(skb, dst->dev->hard_header_len)) { 526 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); 527 goto drop; 528 } 529 530 hdr = ipv6_hdr(skb); 531 532 /* Mangling hops number delayed to point after skb COW */ 533 534 hdr->hop_limit--; 535 536 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 537 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev, 538 ip6_forward_finish); 539 540 error: 541 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); 542 drop: 543 kfree_skb(skb); 544 return -EINVAL; 545 } 546 547 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 548 { 549 to->pkt_type = from->pkt_type; 550 to->priority = from->priority; 551 to->protocol = from->protocol; 552 skb_dst_drop(to); 553 skb_dst_set(to, dst_clone(skb_dst(from))); 554 to->dev = from->dev; 555 to->mark = from->mark; 556 557 #ifdef CONFIG_NET_SCHED 558 to->tc_index = from->tc_index; 559 #endif 560 nf_copy(to, from); 561 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 562 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 563 to->nf_trace = from->nf_trace; 564 #endif 565 skb_copy_secmark(to, from); 566 } 567 568 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) 569 { 570 u16 offset = sizeof(struct ipv6hdr); 571 struct ipv6_opt_hdr *exthdr = 572 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); 573 unsigned int packet_len = skb->tail - skb->network_header; 574 int found_rhdr = 0; 575 *nexthdr = &ipv6_hdr(skb)->nexthdr; 576 577 while (offset + 1 <= packet_len) { 578 579 switch (**nexthdr) { 580 581 case NEXTHDR_HOP: 582 break; 583 case NEXTHDR_ROUTING: 584 found_rhdr = 1; 585 break; 586 case NEXTHDR_DEST: 587 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 588 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) 589 break; 590 #endif 591 if (found_rhdr) 592 return offset; 593 break; 594 default : 595 return offset; 596 } 597 598 offset += ipv6_optlen(exthdr); 599 *nexthdr = &exthdr->nexthdr; 600 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + 601 offset); 602 } 603 604 return offset; 605 } 606 607 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 608 { 609 struct sk_buff *frag; 610 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); 611 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; 612 struct ipv6hdr *tmp_hdr; 613 struct frag_hdr *fh; 614 unsigned int mtu, hlen, left, len; 615 __be32 frag_id = 0; 616 int ptr, offset = 0, err=0; 617 u8 *prevhdr, nexthdr = 0; 618 struct net *net = dev_net(skb_dst(skb)->dev); 619 620 hlen = ip6_find_1stfragopt(skb, &prevhdr); 621 nexthdr = *prevhdr; 622 623 mtu = ip6_skb_dst_mtu(skb); 624 625 /* We must not fragment if the socket is set to force MTU discovery 626 * or if the skb it not generated by a local socket. (This last 627 * check should be redundant, but it's free.) 628 */ 629 if (!skb->local_df) { 630 skb->dev = skb_dst(skb)->dev; 631 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); 632 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 633 IPSTATS_MIB_FRAGFAILS); 634 kfree_skb(skb); 635 return -EMSGSIZE; 636 } 637 638 if (np && np->frag_size < mtu) { 639 if (np->frag_size) 640 mtu = np->frag_size; 641 } 642 mtu -= hlen + sizeof(struct frag_hdr); 643 644 if (skb_has_frags(skb)) { 645 int first_len = skb_pagelen(skb); 646 int truesizes = 0; 647 648 if (first_len - hlen > mtu || 649 ((first_len - hlen) & 7) || 650 skb_cloned(skb)) 651 goto slow_path; 652 653 skb_walk_frags(skb, frag) { 654 /* Correct geometry. */ 655 if (frag->len > mtu || 656 ((frag->len & 7) && frag->next) || 657 skb_headroom(frag) < hlen) 658 goto slow_path; 659 660 /* Partially cloned skb? */ 661 if (skb_shared(frag)) 662 goto slow_path; 663 664 BUG_ON(frag->sk); 665 if (skb->sk) { 666 frag->sk = skb->sk; 667 frag->destructor = sock_wfree; 668 truesizes += frag->truesize; 669 } 670 } 671 672 err = 0; 673 offset = 0; 674 frag = skb_shinfo(skb)->frag_list; 675 skb_frag_list_init(skb); 676 /* BUILD HEADER */ 677 678 *prevhdr = NEXTHDR_FRAGMENT; 679 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 680 if (!tmp_hdr) { 681 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 682 IPSTATS_MIB_FRAGFAILS); 683 return -ENOMEM; 684 } 685 686 __skb_pull(skb, hlen); 687 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); 688 __skb_push(skb, hlen); 689 skb_reset_network_header(skb); 690 memcpy(skb_network_header(skb), tmp_hdr, hlen); 691 692 ipv6_select_ident(fh); 693 fh->nexthdr = nexthdr; 694 fh->reserved = 0; 695 fh->frag_off = htons(IP6_MF); 696 frag_id = fh->identification; 697 698 first_len = skb_pagelen(skb); 699 skb->data_len = first_len - skb_headlen(skb); 700 skb->truesize -= truesizes; 701 skb->len = first_len; 702 ipv6_hdr(skb)->payload_len = htons(first_len - 703 sizeof(struct ipv6hdr)); 704 705 dst_hold(&rt->u.dst); 706 707 for (;;) { 708 /* Prepare header of the next frame, 709 * before previous one went down. */ 710 if (frag) { 711 frag->ip_summed = CHECKSUM_NONE; 712 skb_reset_transport_header(frag); 713 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); 714 __skb_push(frag, hlen); 715 skb_reset_network_header(frag); 716 memcpy(skb_network_header(frag), tmp_hdr, 717 hlen); 718 offset += skb->len - hlen - sizeof(struct frag_hdr); 719 fh->nexthdr = nexthdr; 720 fh->reserved = 0; 721 fh->frag_off = htons(offset); 722 if (frag->next != NULL) 723 fh->frag_off |= htons(IP6_MF); 724 fh->identification = frag_id; 725 ipv6_hdr(frag)->payload_len = 726 htons(frag->len - 727 sizeof(struct ipv6hdr)); 728 ip6_copy_metadata(frag, skb); 729 } 730 731 err = output(skb); 732 if(!err) 733 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), 734 IPSTATS_MIB_FRAGCREATES); 735 736 if (err || !frag) 737 break; 738 739 skb = frag; 740 frag = skb->next; 741 skb->next = NULL; 742 } 743 744 kfree(tmp_hdr); 745 746 if (err == 0) { 747 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), 748 IPSTATS_MIB_FRAGOKS); 749 dst_release(&rt->u.dst); 750 return 0; 751 } 752 753 while (frag) { 754 skb = frag->next; 755 kfree_skb(frag); 756 frag = skb; 757 } 758 759 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), 760 IPSTATS_MIB_FRAGFAILS); 761 dst_release(&rt->u.dst); 762 return err; 763 } 764 765 slow_path: 766 left = skb->len - hlen; /* Space per frame */ 767 ptr = hlen; /* Where to start from */ 768 769 /* 770 * Fragment the datagram. 771 */ 772 773 *prevhdr = NEXTHDR_FRAGMENT; 774 775 /* 776 * Keep copying data until we run out. 777 */ 778 while(left > 0) { 779 len = left; 780 /* IF: it doesn't fit, use 'mtu' - the data space left */ 781 if (len > mtu) 782 len = mtu; 783 /* IF: we are not sending upto and including the packet end 784 then align the next start on an eight byte boundary */ 785 if (len < left) { 786 len &= ~7; 787 } 788 /* 789 * Allocate buffer. 790 */ 791 792 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { 793 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); 794 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 795 IPSTATS_MIB_FRAGFAILS); 796 err = -ENOMEM; 797 goto fail; 798 } 799 800 /* 801 * Set up data on packet 802 */ 803 804 ip6_copy_metadata(frag, skb); 805 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev)); 806 skb_put(frag, len + hlen + sizeof(struct frag_hdr)); 807 skb_reset_network_header(frag); 808 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); 809 frag->transport_header = (frag->network_header + hlen + 810 sizeof(struct frag_hdr)); 811 812 /* 813 * Charge the memory for the fragment to any owner 814 * it might possess 815 */ 816 if (skb->sk) 817 skb_set_owner_w(frag, skb->sk); 818 819 /* 820 * Copy the packet header into the new buffer. 821 */ 822 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); 823 824 /* 825 * Build fragment header. 826 */ 827 fh->nexthdr = nexthdr; 828 fh->reserved = 0; 829 if (!frag_id) { 830 ipv6_select_ident(fh); 831 frag_id = fh->identification; 832 } else 833 fh->identification = frag_id; 834 835 /* 836 * Copy a block of the IP datagram. 837 */ 838 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len)) 839 BUG(); 840 left -= len; 841 842 fh->frag_off = htons(offset); 843 if (left > 0) 844 fh->frag_off |= htons(IP6_MF); 845 ipv6_hdr(frag)->payload_len = htons(frag->len - 846 sizeof(struct ipv6hdr)); 847 848 ptr += len; 849 offset += len; 850 851 /* 852 * Put this fragment into the sending queue. 853 */ 854 err = output(frag); 855 if (err) 856 goto fail; 857 858 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 859 IPSTATS_MIB_FRAGCREATES); 860 } 861 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 862 IPSTATS_MIB_FRAGOKS); 863 kfree_skb(skb); 864 return err; 865 866 fail: 867 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 868 IPSTATS_MIB_FRAGFAILS); 869 kfree_skb(skb); 870 return err; 871 } 872 873 static inline int ip6_rt_check(struct rt6key *rt_key, 874 struct in6_addr *fl_addr, 875 struct in6_addr *addr_cache) 876 { 877 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 878 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache))); 879 } 880 881 static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 882 struct dst_entry *dst, 883 struct flowi *fl) 884 { 885 struct ipv6_pinfo *np = inet6_sk(sk); 886 struct rt6_info *rt = (struct rt6_info *)dst; 887 888 if (!dst) 889 goto out; 890 891 /* Yes, checking route validity in not connected 892 * case is not very simple. Take into account, 893 * that we do not support routing by source, TOS, 894 * and MSG_DONTROUTE --ANK (980726) 895 * 896 * 1. ip6_rt_check(): If route was host route, 897 * check that cached destination is current. 898 * If it is network route, we still may 899 * check its validity using saved pointer 900 * to the last used address: daddr_cache. 901 * We do not want to save whole address now, 902 * (because main consumer of this service 903 * is tcp, which has not this problem), 904 * so that the last trick works only on connected 905 * sockets. 906 * 2. oif also should be the same. 907 */ 908 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) || 909 #ifdef CONFIG_IPV6_SUBTREES 910 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || 911 #endif 912 (fl->oif && fl->oif != dst->dev->ifindex)) { 913 dst_release(dst); 914 dst = NULL; 915 } 916 917 out: 918 return dst; 919 } 920 921 static int ip6_dst_lookup_tail(struct sock *sk, 922 struct dst_entry **dst, struct flowi *fl) 923 { 924 int err; 925 struct net *net = sock_net(sk); 926 927 if (*dst == NULL) 928 *dst = ip6_route_output(net, sk, fl); 929 930 if ((err = (*dst)->error)) 931 goto out_err_release; 932 933 if (ipv6_addr_any(&fl->fl6_src)) { 934 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev, 935 &fl->fl6_dst, 936 sk ? inet6_sk(sk)->srcprefs : 0, 937 &fl->fl6_src); 938 if (err) 939 goto out_err_release; 940 } 941 942 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 943 /* 944 * Here if the dst entry we've looked up 945 * has a neighbour entry that is in the INCOMPLETE 946 * state and the src address from the flow is 947 * marked as OPTIMISTIC, we release the found 948 * dst entry and replace it instead with the 949 * dst entry of the nexthop router 950 */ 951 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) { 952 struct inet6_ifaddr *ifp; 953 struct flowi fl_gw; 954 int redirect; 955 956 ifp = ipv6_get_ifaddr(net, &fl->fl6_src, 957 (*dst)->dev, 1); 958 959 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 960 if (ifp) 961 in6_ifa_put(ifp); 962 963 if (redirect) { 964 /* 965 * We need to get the dst entry for the 966 * default router instead 967 */ 968 dst_release(*dst); 969 memcpy(&fl_gw, fl, sizeof(struct flowi)); 970 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr)); 971 *dst = ip6_route_output(net, sk, &fl_gw); 972 if ((err = (*dst)->error)) 973 goto out_err_release; 974 } 975 } 976 #endif 977 978 return 0; 979 980 out_err_release: 981 if (err == -ENETUNREACH) 982 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); 983 dst_release(*dst); 984 *dst = NULL; 985 return err; 986 } 987 988 /** 989 * ip6_dst_lookup - perform route lookup on flow 990 * @sk: socket which provides route info 991 * @dst: pointer to dst_entry * for result 992 * @fl: flow to lookup 993 * 994 * This function performs a route lookup on the given flow. 995 * 996 * It returns zero on success, or a standard errno code on error. 997 */ 998 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) 999 { 1000 *dst = NULL; 1001 return ip6_dst_lookup_tail(sk, dst, fl); 1002 } 1003 EXPORT_SYMBOL_GPL(ip6_dst_lookup); 1004 1005 /** 1006 * ip6_sk_dst_lookup - perform socket cached route lookup on flow 1007 * @sk: socket which provides the dst cache and route info 1008 * @dst: pointer to dst_entry * for result 1009 * @fl: flow to lookup 1010 * 1011 * This function performs a route lookup on the given flow with the 1012 * possibility of using the cached route in the socket if it is valid. 1013 * It will take the socket dst lock when operating on the dst cache. 1014 * As a result, this function can only be used in process context. 1015 * 1016 * It returns zero on success, or a standard errno code on error. 1017 */ 1018 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) 1019 { 1020 *dst = NULL; 1021 if (sk) { 1022 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1023 *dst = ip6_sk_dst_check(sk, *dst, fl); 1024 } 1025 1026 return ip6_dst_lookup_tail(sk, dst, fl); 1027 } 1028 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup); 1029 1030 static inline int ip6_ufo_append_data(struct sock *sk, 1031 int getfrag(void *from, char *to, int offset, int len, 1032 int odd, struct sk_buff *skb), 1033 void *from, int length, int hh_len, int fragheaderlen, 1034 int transhdrlen, int mtu,unsigned int flags) 1035 1036 { 1037 struct sk_buff *skb; 1038 int err; 1039 1040 /* There is support for UDP large send offload by network 1041 * device, so create one single skb packet containing complete 1042 * udp datagram 1043 */ 1044 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { 1045 skb = sock_alloc_send_skb(sk, 1046 hh_len + fragheaderlen + transhdrlen + 20, 1047 (flags & MSG_DONTWAIT), &err); 1048 if (skb == NULL) 1049 return -ENOMEM; 1050 1051 /* reserve space for Hardware header */ 1052 skb_reserve(skb, hh_len); 1053 1054 /* create space for UDP/IP header */ 1055 skb_put(skb,fragheaderlen + transhdrlen); 1056 1057 /* initialize network header pointer */ 1058 skb_reset_network_header(skb); 1059 1060 /* initialize protocol header pointer */ 1061 skb->transport_header = skb->network_header + fragheaderlen; 1062 1063 skb->ip_summed = CHECKSUM_PARTIAL; 1064 skb->csum = 0; 1065 sk->sk_sndmsg_off = 0; 1066 } 1067 1068 err = skb_append_datato_frags(sk,skb, getfrag, from, 1069 (length - transhdrlen)); 1070 if (!err) { 1071 struct frag_hdr fhdr; 1072 1073 /* Specify the length of each IPv6 datagram fragment. 1074 * It has to be a multiple of 8. 1075 */ 1076 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - 1077 sizeof(struct frag_hdr)) & ~7; 1078 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 1079 ipv6_select_ident(&fhdr); 1080 skb_shinfo(skb)->ip6_frag_id = fhdr.identification; 1081 __skb_queue_tail(&sk->sk_write_queue, skb); 1082 1083 return 0; 1084 } 1085 /* There is not enough support do UPD LSO, 1086 * so follow normal path 1087 */ 1088 kfree_skb(skb); 1089 1090 return err; 1091 } 1092 1093 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1094 gfp_t gfp) 1095 { 1096 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1097 } 1098 1099 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 1100 gfp_t gfp) 1101 { 1102 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1103 } 1104 1105 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, 1106 int offset, int len, int odd, struct sk_buff *skb), 1107 void *from, int length, int transhdrlen, 1108 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, 1109 struct rt6_info *rt, unsigned int flags) 1110 { 1111 struct inet_sock *inet = inet_sk(sk); 1112 struct ipv6_pinfo *np = inet6_sk(sk); 1113 struct sk_buff *skb; 1114 unsigned int maxfraglen, fragheaderlen; 1115 int exthdrlen; 1116 int hh_len; 1117 int mtu; 1118 int copy; 1119 int err; 1120 int offset = 0; 1121 int csummode = CHECKSUM_NONE; 1122 1123 if (flags&MSG_PROBE) 1124 return 0; 1125 if (skb_queue_empty(&sk->sk_write_queue)) { 1126 /* 1127 * setup for corking 1128 */ 1129 if (opt) { 1130 if (WARN_ON(np->cork.opt)) 1131 return -EINVAL; 1132 1133 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation); 1134 if (unlikely(np->cork.opt == NULL)) 1135 return -ENOBUFS; 1136 1137 np->cork.opt->tot_len = opt->tot_len; 1138 np->cork.opt->opt_flen = opt->opt_flen; 1139 np->cork.opt->opt_nflen = opt->opt_nflen; 1140 1141 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt, 1142 sk->sk_allocation); 1143 if (opt->dst0opt && !np->cork.opt->dst0opt) 1144 return -ENOBUFS; 1145 1146 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt, 1147 sk->sk_allocation); 1148 if (opt->dst1opt && !np->cork.opt->dst1opt) 1149 return -ENOBUFS; 1150 1151 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt, 1152 sk->sk_allocation); 1153 if (opt->hopopt && !np->cork.opt->hopopt) 1154 return -ENOBUFS; 1155 1156 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt, 1157 sk->sk_allocation); 1158 if (opt->srcrt && !np->cork.opt->srcrt) 1159 return -ENOBUFS; 1160 1161 /* need source address above miyazawa*/ 1162 } 1163 dst_hold(&rt->u.dst); 1164 inet->cork.dst = &rt->u.dst; 1165 inet->cork.fl = *fl; 1166 np->cork.hop_limit = hlimit; 1167 np->cork.tclass = tclass; 1168 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? 1169 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path); 1170 if (np->frag_size < mtu) { 1171 if (np->frag_size) 1172 mtu = np->frag_size; 1173 } 1174 inet->cork.fragsize = mtu; 1175 if (dst_allfrag(rt->u.dst.path)) 1176 inet->cork.flags |= IPCORK_ALLFRAG; 1177 inet->cork.length = 0; 1178 sk->sk_sndmsg_page = NULL; 1179 sk->sk_sndmsg_off = 0; 1180 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) - 1181 rt->rt6i_nfheader_len; 1182 length += exthdrlen; 1183 transhdrlen += exthdrlen; 1184 } else { 1185 rt = (struct rt6_info *)inet->cork.dst; 1186 fl = &inet->cork.fl; 1187 opt = np->cork.opt; 1188 transhdrlen = 0; 1189 exthdrlen = 0; 1190 mtu = inet->cork.fragsize; 1191 } 1192 1193 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); 1194 1195 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1196 (opt ? opt->opt_nflen : 0); 1197 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); 1198 1199 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { 1200 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { 1201 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen); 1202 return -EMSGSIZE; 1203 } 1204 } 1205 1206 /* 1207 * Let's try using as much space as possible. 1208 * Use MTU if total length of the message fits into the MTU. 1209 * Otherwise, we need to reserve fragment header and 1210 * fragment alignment (= 8-15 octects, in total). 1211 * 1212 * Note that we may need to "move" the data from the tail of 1213 * of the buffer to the new fragment when we split 1214 * the message. 1215 * 1216 * FIXME: It may be fragmented into multiple chunks 1217 * at once if non-fragmentable extension headers 1218 * are too large. 1219 * --yoshfuji 1220 */ 1221 1222 inet->cork.length += length; 1223 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && 1224 (rt->u.dst.dev->features & NETIF_F_UFO)) { 1225 1226 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len, 1227 fragheaderlen, transhdrlen, mtu, 1228 flags); 1229 if (err) 1230 goto error; 1231 return 0; 1232 } 1233 1234 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 1235 goto alloc_new_skb; 1236 1237 while (length > 0) { 1238 /* Check if the remaining data fits into current packet. */ 1239 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1240 if (copy < length) 1241 copy = maxfraglen - skb->len; 1242 1243 if (copy <= 0) { 1244 char *data; 1245 unsigned int datalen; 1246 unsigned int fraglen; 1247 unsigned int fraggap; 1248 unsigned int alloclen; 1249 struct sk_buff *skb_prev; 1250 alloc_new_skb: 1251 skb_prev = skb; 1252 1253 /* There's no room in the current skb */ 1254 if (skb_prev) 1255 fraggap = skb_prev->len - maxfraglen; 1256 else 1257 fraggap = 0; 1258 1259 /* 1260 * If remaining data exceeds the mtu, 1261 * we know we need more fragment(s). 1262 */ 1263 datalen = length + fraggap; 1264 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1265 datalen = maxfraglen - fragheaderlen; 1266 1267 fraglen = datalen + fragheaderlen; 1268 if ((flags & MSG_MORE) && 1269 !(rt->u.dst.dev->features&NETIF_F_SG)) 1270 alloclen = mtu; 1271 else 1272 alloclen = datalen + fragheaderlen; 1273 1274 /* 1275 * The last fragment gets additional space at tail. 1276 * Note: we overallocate on fragments with MSG_MODE 1277 * because we have no idea if we're the last one. 1278 */ 1279 if (datalen == length + fraggap) 1280 alloclen += rt->u.dst.trailer_len; 1281 1282 /* 1283 * We just reserve space for fragment header. 1284 * Note: this may be overallocation if the message 1285 * (without MSG_MORE) fits into the MTU. 1286 */ 1287 alloclen += sizeof(struct frag_hdr); 1288 1289 if (transhdrlen) { 1290 skb = sock_alloc_send_skb(sk, 1291 alloclen + hh_len, 1292 (flags & MSG_DONTWAIT), &err); 1293 } else { 1294 skb = NULL; 1295 if (atomic_read(&sk->sk_wmem_alloc) <= 1296 2 * sk->sk_sndbuf) 1297 skb = sock_wmalloc(sk, 1298 alloclen + hh_len, 1, 1299 sk->sk_allocation); 1300 if (unlikely(skb == NULL)) 1301 err = -ENOBUFS; 1302 } 1303 if (skb == NULL) 1304 goto error; 1305 /* 1306 * Fill in the control structures 1307 */ 1308 skb->ip_summed = csummode; 1309 skb->csum = 0; 1310 /* reserve for fragmentation */ 1311 skb_reserve(skb, hh_len+sizeof(struct frag_hdr)); 1312 1313 /* 1314 * Find where to start putting bytes 1315 */ 1316 data = skb_put(skb, fraglen); 1317 skb_set_network_header(skb, exthdrlen); 1318 data += fragheaderlen; 1319 skb->transport_header = (skb->network_header + 1320 fragheaderlen); 1321 if (fraggap) { 1322 skb->csum = skb_copy_and_csum_bits( 1323 skb_prev, maxfraglen, 1324 data + transhdrlen, fraggap, 0); 1325 skb_prev->csum = csum_sub(skb_prev->csum, 1326 skb->csum); 1327 data += fraggap; 1328 pskb_trim_unique(skb_prev, maxfraglen); 1329 } 1330 copy = datalen - transhdrlen - fraggap; 1331 if (copy < 0) { 1332 err = -EINVAL; 1333 kfree_skb(skb); 1334 goto error; 1335 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { 1336 err = -EFAULT; 1337 kfree_skb(skb); 1338 goto error; 1339 } 1340 1341 offset += copy; 1342 length -= datalen - fraggap; 1343 transhdrlen = 0; 1344 exthdrlen = 0; 1345 csummode = CHECKSUM_NONE; 1346 1347 /* 1348 * Put the packet on the pending queue 1349 */ 1350 __skb_queue_tail(&sk->sk_write_queue, skb); 1351 continue; 1352 } 1353 1354 if (copy > length) 1355 copy = length; 1356 1357 if (!(rt->u.dst.dev->features&NETIF_F_SG)) { 1358 unsigned int off; 1359 1360 off = skb->len; 1361 if (getfrag(from, skb_put(skb, copy), 1362 offset, copy, off, skb) < 0) { 1363 __skb_trim(skb, off); 1364 err = -EFAULT; 1365 goto error; 1366 } 1367 } else { 1368 int i = skb_shinfo(skb)->nr_frags; 1369 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; 1370 struct page *page = sk->sk_sndmsg_page; 1371 int off = sk->sk_sndmsg_off; 1372 unsigned int left; 1373 1374 if (page && (left = PAGE_SIZE - off) > 0) { 1375 if (copy >= left) 1376 copy = left; 1377 if (page != frag->page) { 1378 if (i == MAX_SKB_FRAGS) { 1379 err = -EMSGSIZE; 1380 goto error; 1381 } 1382 get_page(page); 1383 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); 1384 frag = &skb_shinfo(skb)->frags[i]; 1385 } 1386 } else if(i < MAX_SKB_FRAGS) { 1387 if (copy > PAGE_SIZE) 1388 copy = PAGE_SIZE; 1389 page = alloc_pages(sk->sk_allocation, 0); 1390 if (page == NULL) { 1391 err = -ENOMEM; 1392 goto error; 1393 } 1394 sk->sk_sndmsg_page = page; 1395 sk->sk_sndmsg_off = 0; 1396 1397 skb_fill_page_desc(skb, i, page, 0, 0); 1398 frag = &skb_shinfo(skb)->frags[i]; 1399 } else { 1400 err = -EMSGSIZE; 1401 goto error; 1402 } 1403 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { 1404 err = -EFAULT; 1405 goto error; 1406 } 1407 sk->sk_sndmsg_off += copy; 1408 frag->size += copy; 1409 skb->len += copy; 1410 skb->data_len += copy; 1411 skb->truesize += copy; 1412 atomic_add(copy, &sk->sk_wmem_alloc); 1413 } 1414 offset += copy; 1415 length -= copy; 1416 } 1417 return 0; 1418 error: 1419 inet->cork.length -= length; 1420 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1421 return err; 1422 } 1423 1424 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) 1425 { 1426 if (np->cork.opt) { 1427 kfree(np->cork.opt->dst0opt); 1428 kfree(np->cork.opt->dst1opt); 1429 kfree(np->cork.opt->hopopt); 1430 kfree(np->cork.opt->srcrt); 1431 kfree(np->cork.opt); 1432 np->cork.opt = NULL; 1433 } 1434 1435 if (inet->cork.dst) { 1436 dst_release(inet->cork.dst); 1437 inet->cork.dst = NULL; 1438 inet->cork.flags &= ~IPCORK_ALLFRAG; 1439 } 1440 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); 1441 } 1442 1443 int ip6_push_pending_frames(struct sock *sk) 1444 { 1445 struct sk_buff *skb, *tmp_skb; 1446 struct sk_buff **tail_skb; 1447 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 1448 struct inet_sock *inet = inet_sk(sk); 1449 struct ipv6_pinfo *np = inet6_sk(sk); 1450 struct net *net = sock_net(sk); 1451 struct ipv6hdr *hdr; 1452 struct ipv6_txoptions *opt = np->cork.opt; 1453 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; 1454 struct flowi *fl = &inet->cork.fl; 1455 unsigned char proto = fl->proto; 1456 int err = 0; 1457 1458 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) 1459 goto out; 1460 tail_skb = &(skb_shinfo(skb)->frag_list); 1461 1462 /* move skb->data to ip header from ext header */ 1463 if (skb->data < skb_network_header(skb)) 1464 __skb_pull(skb, skb_network_offset(skb)); 1465 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { 1466 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1467 *tail_skb = tmp_skb; 1468 tail_skb = &(tmp_skb->next); 1469 skb->len += tmp_skb->len; 1470 skb->data_len += tmp_skb->len; 1471 skb->truesize += tmp_skb->truesize; 1472 tmp_skb->destructor = NULL; 1473 tmp_skb->sk = NULL; 1474 } 1475 1476 /* Allow local fragmentation. */ 1477 if (np->pmtudisc < IPV6_PMTUDISC_DO) 1478 skb->local_df = 1; 1479 1480 ipv6_addr_copy(final_dst, &fl->fl6_dst); 1481 __skb_pull(skb, skb_network_header_len(skb)); 1482 if (opt && opt->opt_flen) 1483 ipv6_push_frag_opts(skb, opt, &proto); 1484 if (opt && opt->opt_nflen) 1485 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); 1486 1487 skb_push(skb, sizeof(struct ipv6hdr)); 1488 skb_reset_network_header(skb); 1489 hdr = ipv6_hdr(skb); 1490 1491 *(__be32*)hdr = fl->fl6_flowlabel | 1492 htonl(0x60000000 | ((int)np->cork.tclass << 20)); 1493 1494 hdr->hop_limit = np->cork.hop_limit; 1495 hdr->nexthdr = proto; 1496 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 1497 ipv6_addr_copy(&hdr->daddr, final_dst); 1498 1499 skb->priority = sk->sk_priority; 1500 skb->mark = sk->sk_mark; 1501 1502 skb_dst_set(skb, dst_clone(&rt->u.dst)); 1503 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 1504 if (proto == IPPROTO_ICMPV6) { 1505 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1506 1507 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); 1508 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); 1509 } 1510 1511 err = ip6_local_out(skb); 1512 if (err) { 1513 if (err > 0) 1514 err = net_xmit_errno(err); 1515 if (err) 1516 goto error; 1517 } 1518 1519 out: 1520 ip6_cork_release(inet, np); 1521 return err; 1522 error: 1523 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1524 goto out; 1525 } 1526 1527 void ip6_flush_pending_frames(struct sock *sk) 1528 { 1529 struct sk_buff *skb; 1530 1531 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { 1532 if (skb_dst(skb)) 1533 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 1534 IPSTATS_MIB_OUTDISCARDS); 1535 kfree_skb(skb); 1536 } 1537 1538 ip6_cork_release(inet_sk(sk), inet6_sk(sk)); 1539 } 1540