1 /* 2 * IPv6 output functions 3 * Linux INET6 implementation 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * Based on linux/net/ipv4/ip_output.c 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 * 15 * Changes: 16 * A.N.Kuznetsov : airthmetics in fragmentation. 17 * extension headers are implemented. 18 * route changes now work. 19 * ip6_forward does not confuse sniffers. 20 * etc. 21 * 22 * H. von Brand : Added missing #include <linux/string.h> 23 * Imran Patel : frag id should be in NBO 24 * Kazunori MIYAZAWA @USAGI 25 * : add ip6_append_data and related functions 26 * for datagram xmit 27 */ 28 29 #include <linux/errno.h> 30 #include <linux/kernel.h> 31 #include <linux/string.h> 32 #include <linux/socket.h> 33 #include <linux/net.h> 34 #include <linux/netdevice.h> 35 #include <linux/if_arp.h> 36 #include <linux/in6.h> 37 #include <linux/tcp.h> 38 #include <linux/route.h> 39 #include <linux/module.h> 40 41 #include <linux/netfilter.h> 42 #include <linux/netfilter_ipv6.h> 43 44 #include <net/sock.h> 45 #include <net/snmp.h> 46 47 #include <net/ipv6.h> 48 #include <net/ndisc.h> 49 #include <net/protocol.h> 50 #include <net/ip6_route.h> 51 #include <net/addrconf.h> 52 #include <net/rawv6.h> 53 #include <net/icmp.h> 54 #include <net/xfrm.h> 55 #include <net/checksum.h> 56 #include <linux/mroute6.h> 57 58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); 59 60 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr) 61 { 62 static u32 ipv6_fragmentation_id = 1; 63 static DEFINE_SPINLOCK(ip6_id_lock); 64 65 spin_lock_bh(&ip6_id_lock); 66 fhdr->identification = htonl(ipv6_fragmentation_id); 67 if (++ipv6_fragmentation_id == 0) 68 ipv6_fragmentation_id = 1; 69 spin_unlock_bh(&ip6_id_lock); 70 } 71 72 int __ip6_local_out(struct sk_buff *skb) 73 { 74 int len; 75 76 len = skb->len - sizeof(struct ipv6hdr); 77 if (len > IPV6_MAXPLEN) 78 len = 0; 79 ipv6_hdr(skb)->payload_len = htons(len); 80 81 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev, 82 dst_output); 83 } 84 85 int ip6_local_out(struct sk_buff *skb) 86 { 87 int err; 88 89 err = __ip6_local_out(skb); 90 if (likely(err == 1)) 91 err = dst_output(skb); 92 93 return err; 94 } 95 EXPORT_SYMBOL_GPL(ip6_local_out); 96 97 static int ip6_output_finish(struct sk_buff *skb) 98 { 99 struct dst_entry *dst = skb->dst; 100 101 if (dst->hh) 102 return neigh_hh_output(dst->hh, skb); 103 else if (dst->neighbour) 104 return dst->neighbour->output(skb); 105 106 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 107 kfree_skb(skb); 108 return -EINVAL; 109 110 } 111 112 /* dev_loopback_xmit for use with netfilter. */ 113 static int ip6_dev_loopback_xmit(struct sk_buff *newskb) 114 { 115 skb_reset_mac_header(newskb); 116 __skb_pull(newskb, skb_network_offset(newskb)); 117 newskb->pkt_type = PACKET_LOOPBACK; 118 newskb->ip_summed = CHECKSUM_UNNECESSARY; 119 WARN_ON(!newskb->dst); 120 121 netif_rx(newskb); 122 return 0; 123 } 124 125 126 static int ip6_output2(struct sk_buff *skb) 127 { 128 struct dst_entry *dst = skb->dst; 129 struct net_device *dev = dst->dev; 130 131 skb->protocol = htons(ETH_P_IPV6); 132 skb->dev = dev; 133 134 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 135 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL; 136 struct inet6_dev *idev = ip6_dst_idev(skb->dst); 137 138 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) && 139 ((mroute6_socket && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 140 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 141 &ipv6_hdr(skb)->saddr))) { 142 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 143 144 /* Do not check for IFF_ALLMULTI; multicast routing 145 is not supported in any case. 146 */ 147 if (newskb) 148 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb, 149 NULL, newskb->dev, 150 ip6_dev_loopback_xmit); 151 152 if (ipv6_hdr(skb)->hop_limit == 0) { 153 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); 154 kfree_skb(skb); 155 return 0; 156 } 157 } 158 159 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS); 160 } 161 162 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev, 163 ip6_output_finish); 164 } 165 166 static inline int ip6_skb_dst_mtu(struct sk_buff *skb) 167 { 168 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; 169 170 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ? 171 skb->dst->dev->mtu : dst_mtu(skb->dst); 172 } 173 174 int ip6_output(struct sk_buff *skb) 175 { 176 struct inet6_dev *idev = ip6_dst_idev(skb->dst); 177 if (unlikely(idev->cnf.disable_ipv6)) { 178 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); 179 kfree_skb(skb); 180 return 0; 181 } 182 183 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || 184 dst_allfrag(skb->dst)) 185 return ip6_fragment(skb, ip6_output2); 186 else 187 return ip6_output2(skb); 188 } 189 190 /* 191 * xmit an sk_buff (used by TCP) 192 */ 193 194 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, 195 struct ipv6_txoptions *opt, int ipfragok) 196 { 197 struct ipv6_pinfo *np = inet6_sk(sk); 198 struct in6_addr *first_hop = &fl->fl6_dst; 199 struct dst_entry *dst = skb->dst; 200 struct ipv6hdr *hdr; 201 u8 proto = fl->proto; 202 int seg_len = skb->len; 203 int hlimit, tclass; 204 u32 mtu; 205 206 if (opt) { 207 unsigned int head_room; 208 209 /* First: exthdrs may take lots of space (~8K for now) 210 MAX_HEADER is not enough. 211 */ 212 head_room = opt->opt_nflen + opt->opt_flen; 213 seg_len += head_room; 214 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); 215 216 if (skb_headroom(skb) < head_room) { 217 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 218 if (skb2 == NULL) { 219 IP6_INC_STATS(ip6_dst_idev(skb->dst), 220 IPSTATS_MIB_OUTDISCARDS); 221 kfree_skb(skb); 222 return -ENOBUFS; 223 } 224 kfree_skb(skb); 225 skb = skb2; 226 if (sk) 227 skb_set_owner_w(skb, sk); 228 } 229 if (opt->opt_flen) 230 ipv6_push_frag_opts(skb, opt, &proto); 231 if (opt->opt_nflen) 232 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); 233 } 234 235 skb_push(skb, sizeof(struct ipv6hdr)); 236 skb_reset_network_header(skb); 237 hdr = ipv6_hdr(skb); 238 239 /* Allow local fragmentation. */ 240 if (ipfragok) 241 skb->local_df = 1; 242 243 /* 244 * Fill in the IPv6 header 245 */ 246 247 hlimit = -1; 248 if (np) 249 hlimit = np->hop_limit; 250 if (hlimit < 0) 251 hlimit = ip6_dst_hoplimit(dst); 252 253 tclass = -1; 254 if (np) 255 tclass = np->tclass; 256 if (tclass < 0) 257 tclass = 0; 258 259 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel; 260 261 hdr->payload_len = htons(seg_len); 262 hdr->nexthdr = proto; 263 hdr->hop_limit = hlimit; 264 265 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 266 ipv6_addr_copy(&hdr->daddr, first_hop); 267 268 skb->priority = sk->sk_priority; 269 skb->mark = sk->sk_mark; 270 271 mtu = dst_mtu(dst); 272 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { 273 IP6_INC_STATS(ip6_dst_idev(skb->dst), 274 IPSTATS_MIB_OUTREQUESTS); 275 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, 276 dst_output); 277 } 278 279 if (net_ratelimit()) 280 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); 281 skb->dev = dst->dev; 282 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); 283 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); 284 kfree_skb(skb); 285 return -EMSGSIZE; 286 } 287 288 EXPORT_SYMBOL(ip6_xmit); 289 290 /* 291 * To avoid extra problems ND packets are send through this 292 * routine. It's code duplication but I really want to avoid 293 * extra checks since ipv6_build_header is used by TCP (which 294 * is for us performance critical) 295 */ 296 297 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, 298 const struct in6_addr *saddr, const struct in6_addr *daddr, 299 int proto, int len) 300 { 301 struct ipv6_pinfo *np = inet6_sk(sk); 302 struct ipv6hdr *hdr; 303 int totlen; 304 305 skb->protocol = htons(ETH_P_IPV6); 306 skb->dev = dev; 307 308 totlen = len + sizeof(struct ipv6hdr); 309 310 skb_reset_network_header(skb); 311 skb_put(skb, sizeof(struct ipv6hdr)); 312 hdr = ipv6_hdr(skb); 313 314 *(__be32*)hdr = htonl(0x60000000); 315 316 hdr->payload_len = htons(len); 317 hdr->nexthdr = proto; 318 hdr->hop_limit = np->hop_limit; 319 320 ipv6_addr_copy(&hdr->saddr, saddr); 321 ipv6_addr_copy(&hdr->daddr, daddr); 322 323 return 0; 324 } 325 326 static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 327 { 328 struct ip6_ra_chain *ra; 329 struct sock *last = NULL; 330 331 read_lock(&ip6_ra_lock); 332 for (ra = ip6_ra_chain; ra; ra = ra->next) { 333 struct sock *sk = ra->sk; 334 if (sk && ra->sel == sel && 335 (!sk->sk_bound_dev_if || 336 sk->sk_bound_dev_if == skb->dev->ifindex)) { 337 if (last) { 338 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 339 if (skb2) 340 rawv6_rcv(last, skb2); 341 } 342 last = sk; 343 } 344 } 345 346 if (last) { 347 rawv6_rcv(last, skb); 348 read_unlock(&ip6_ra_lock); 349 return 1; 350 } 351 read_unlock(&ip6_ra_lock); 352 return 0; 353 } 354 355 static int ip6_forward_proxy_check(struct sk_buff *skb) 356 { 357 struct ipv6hdr *hdr = ipv6_hdr(skb); 358 u8 nexthdr = hdr->nexthdr; 359 int offset; 360 361 if (ipv6_ext_hdr(nexthdr)) { 362 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr); 363 if (offset < 0) 364 return 0; 365 } else 366 offset = sizeof(struct ipv6hdr); 367 368 if (nexthdr == IPPROTO_ICMPV6) { 369 struct icmp6hdr *icmp6; 370 371 if (!pskb_may_pull(skb, (skb_network_header(skb) + 372 offset + 1 - skb->data))) 373 return 0; 374 375 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 376 377 switch (icmp6->icmp6_type) { 378 case NDISC_ROUTER_SOLICITATION: 379 case NDISC_ROUTER_ADVERTISEMENT: 380 case NDISC_NEIGHBOUR_SOLICITATION: 381 case NDISC_NEIGHBOUR_ADVERTISEMENT: 382 case NDISC_REDIRECT: 383 /* For reaction involving unicast neighbor discovery 384 * message destined to the proxied address, pass it to 385 * input function. 386 */ 387 return 1; 388 default: 389 break; 390 } 391 } 392 393 /* 394 * The proxying router can't forward traffic sent to a link-local 395 * address, so signal the sender and discard the packet. This 396 * behavior is clarified by the MIPv6 specification. 397 */ 398 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 399 dst_link_failure(skb); 400 return -1; 401 } 402 403 return 0; 404 } 405 406 static inline int ip6_forward_finish(struct sk_buff *skb) 407 { 408 return dst_output(skb); 409 } 410 411 int ip6_forward(struct sk_buff *skb) 412 { 413 struct dst_entry *dst = skb->dst; 414 struct ipv6hdr *hdr = ipv6_hdr(skb); 415 struct inet6_skb_parm *opt = IP6CB(skb); 416 struct net *net = dev_net(dst->dev); 417 418 if (net->ipv6.devconf_all->forwarding == 0) 419 goto error; 420 421 if (skb_warn_if_lro(skb)) 422 goto drop; 423 424 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 425 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 426 goto drop; 427 } 428 429 skb_forward_csum(skb); 430 431 /* 432 * We DO NOT make any processing on 433 * RA packets, pushing them to user level AS IS 434 * without ane WARRANTY that application will be able 435 * to interpret them. The reason is that we 436 * cannot make anything clever here. 437 * 438 * We are not end-node, so that if packet contains 439 * AH/ESP, we cannot make anything. 440 * Defragmentation also would be mistake, RA packets 441 * cannot be fragmented, because there is no warranty 442 * that different fragments will go along one path. --ANK 443 */ 444 if (opt->ra) { 445 u8 *ptr = skb_network_header(skb) + opt->ra; 446 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3])) 447 return 0; 448 } 449 450 /* 451 * check and decrement ttl 452 */ 453 if (hdr->hop_limit <= 1) { 454 /* Force OUTPUT device used as source address */ 455 skb->dev = dst->dev; 456 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 457 0, skb->dev); 458 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); 459 460 kfree_skb(skb); 461 return -ETIMEDOUT; 462 } 463 464 /* XXX: idev->cnf.proxy_ndp? */ 465 if (net->ipv6.devconf_all->proxy_ndp && 466 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 467 int proxied = ip6_forward_proxy_check(skb); 468 if (proxied > 0) 469 return ip6_input(skb); 470 else if (proxied < 0) { 471 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 472 goto drop; 473 } 474 } 475 476 if (!xfrm6_route_forward(skb)) { 477 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 478 goto drop; 479 } 480 dst = skb->dst; 481 482 /* IPv6 specs say nothing about it, but it is clear that we cannot 483 send redirects to source routed frames. 484 We don't send redirects to frames decapsulated from IPsec. 485 */ 486 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && 487 !skb->sp) { 488 struct in6_addr *target = NULL; 489 struct rt6_info *rt; 490 struct neighbour *n = dst->neighbour; 491 492 /* 493 * incoming and outgoing devices are the same 494 * send a redirect. 495 */ 496 497 rt = (struct rt6_info *) dst; 498 if ((rt->rt6i_flags & RTF_GATEWAY)) 499 target = (struct in6_addr*)&n->primary_key; 500 else 501 target = &hdr->daddr; 502 503 /* Limit redirects both by destination (here) 504 and by source (inside ndisc_send_redirect) 505 */ 506 if (xrlim_allow(dst, 1*HZ)) 507 ndisc_send_redirect(skb, n, target); 508 } else { 509 int addrtype = ipv6_addr_type(&hdr->saddr); 510 511 /* This check is security critical. */ 512 if (addrtype == IPV6_ADDR_ANY || 513 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 514 goto error; 515 if (addrtype & IPV6_ADDR_LINKLOCAL) { 516 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 517 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev); 518 goto error; 519 } 520 } 521 522 if (skb->len > dst_mtu(dst)) { 523 /* Again, force OUTPUT device used as source address */ 524 skb->dev = dst->dev; 525 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev); 526 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); 527 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); 528 kfree_skb(skb); 529 return -EMSGSIZE; 530 } 531 532 if (skb_cow(skb, dst->dev->hard_header_len)) { 533 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); 534 goto drop; 535 } 536 537 hdr = ipv6_hdr(skb); 538 539 /* Mangling hops number delayed to point after skb COW */ 540 541 hdr->hop_limit--; 542 543 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 544 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev, 545 ip6_forward_finish); 546 547 error: 548 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); 549 drop: 550 kfree_skb(skb); 551 return -EINVAL; 552 } 553 554 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 555 { 556 to->pkt_type = from->pkt_type; 557 to->priority = from->priority; 558 to->protocol = from->protocol; 559 dst_release(to->dst); 560 to->dst = dst_clone(from->dst); 561 to->dev = from->dev; 562 to->mark = from->mark; 563 564 #ifdef CONFIG_NET_SCHED 565 to->tc_index = from->tc_index; 566 #endif 567 nf_copy(to, from); 568 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 569 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 570 to->nf_trace = from->nf_trace; 571 #endif 572 skb_copy_secmark(to, from); 573 } 574 575 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) 576 { 577 u16 offset = sizeof(struct ipv6hdr); 578 struct ipv6_opt_hdr *exthdr = 579 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); 580 unsigned int packet_len = skb->tail - skb->network_header; 581 int found_rhdr = 0; 582 *nexthdr = &ipv6_hdr(skb)->nexthdr; 583 584 while (offset + 1 <= packet_len) { 585 586 switch (**nexthdr) { 587 588 case NEXTHDR_HOP: 589 break; 590 case NEXTHDR_ROUTING: 591 found_rhdr = 1; 592 break; 593 case NEXTHDR_DEST: 594 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 595 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) 596 break; 597 #endif 598 if (found_rhdr) 599 return offset; 600 break; 601 default : 602 return offset; 603 } 604 605 offset += ipv6_optlen(exthdr); 606 *nexthdr = &exthdr->nexthdr; 607 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + 608 offset); 609 } 610 611 return offset; 612 } 613 614 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 615 { 616 struct sk_buff *frag; 617 struct rt6_info *rt = (struct rt6_info*)skb->dst; 618 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; 619 struct ipv6hdr *tmp_hdr; 620 struct frag_hdr *fh; 621 unsigned int mtu, hlen, left, len; 622 __be32 frag_id = 0; 623 int ptr, offset = 0, err=0; 624 u8 *prevhdr, nexthdr = 0; 625 626 hlen = ip6_find_1stfragopt(skb, &prevhdr); 627 nexthdr = *prevhdr; 628 629 mtu = ip6_skb_dst_mtu(skb); 630 631 /* We must not fragment if the socket is set to force MTU discovery 632 * or if the skb it not generated by a local socket. (This last 633 * check should be redundant, but it's free.) 634 */ 635 if (!skb->local_df) { 636 skb->dev = skb->dst->dev; 637 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); 638 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); 639 kfree_skb(skb); 640 return -EMSGSIZE; 641 } 642 643 if (np && np->frag_size < mtu) { 644 if (np->frag_size) 645 mtu = np->frag_size; 646 } 647 mtu -= hlen + sizeof(struct frag_hdr); 648 649 if (skb_shinfo(skb)->frag_list) { 650 int first_len = skb_pagelen(skb); 651 int truesizes = 0; 652 653 if (first_len - hlen > mtu || 654 ((first_len - hlen) & 7) || 655 skb_cloned(skb)) 656 goto slow_path; 657 658 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) { 659 /* Correct geometry. */ 660 if (frag->len > mtu || 661 ((frag->len & 7) && frag->next) || 662 skb_headroom(frag) < hlen) 663 goto slow_path; 664 665 /* Partially cloned skb? */ 666 if (skb_shared(frag)) 667 goto slow_path; 668 669 BUG_ON(frag->sk); 670 if (skb->sk) { 671 sock_hold(skb->sk); 672 frag->sk = skb->sk; 673 frag->destructor = sock_wfree; 674 truesizes += frag->truesize; 675 } 676 } 677 678 err = 0; 679 offset = 0; 680 frag = skb_shinfo(skb)->frag_list; 681 skb_shinfo(skb)->frag_list = NULL; 682 /* BUILD HEADER */ 683 684 *prevhdr = NEXTHDR_FRAGMENT; 685 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 686 if (!tmp_hdr) { 687 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); 688 return -ENOMEM; 689 } 690 691 __skb_pull(skb, hlen); 692 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); 693 __skb_push(skb, hlen); 694 skb_reset_network_header(skb); 695 memcpy(skb_network_header(skb), tmp_hdr, hlen); 696 697 ipv6_select_ident(skb, fh); 698 fh->nexthdr = nexthdr; 699 fh->reserved = 0; 700 fh->frag_off = htons(IP6_MF); 701 frag_id = fh->identification; 702 703 first_len = skb_pagelen(skb); 704 skb->data_len = first_len - skb_headlen(skb); 705 skb->truesize -= truesizes; 706 skb->len = first_len; 707 ipv6_hdr(skb)->payload_len = htons(first_len - 708 sizeof(struct ipv6hdr)); 709 710 dst_hold(&rt->u.dst); 711 712 for (;;) { 713 /* Prepare header of the next frame, 714 * before previous one went down. */ 715 if (frag) { 716 frag->ip_summed = CHECKSUM_NONE; 717 skb_reset_transport_header(frag); 718 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); 719 __skb_push(frag, hlen); 720 skb_reset_network_header(frag); 721 memcpy(skb_network_header(frag), tmp_hdr, 722 hlen); 723 offset += skb->len - hlen - sizeof(struct frag_hdr); 724 fh->nexthdr = nexthdr; 725 fh->reserved = 0; 726 fh->frag_off = htons(offset); 727 if (frag->next != NULL) 728 fh->frag_off |= htons(IP6_MF); 729 fh->identification = frag_id; 730 ipv6_hdr(frag)->payload_len = 731 htons(frag->len - 732 sizeof(struct ipv6hdr)); 733 ip6_copy_metadata(frag, skb); 734 } 735 736 err = output(skb); 737 if(!err) 738 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES); 739 740 if (err || !frag) 741 break; 742 743 skb = frag; 744 frag = skb->next; 745 skb->next = NULL; 746 } 747 748 kfree(tmp_hdr); 749 750 if (err == 0) { 751 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS); 752 dst_release(&rt->u.dst); 753 return 0; 754 } 755 756 while (frag) { 757 skb = frag->next; 758 kfree_skb(frag); 759 frag = skb; 760 } 761 762 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS); 763 dst_release(&rt->u.dst); 764 return err; 765 } 766 767 slow_path: 768 left = skb->len - hlen; /* Space per frame */ 769 ptr = hlen; /* Where to start from */ 770 771 /* 772 * Fragment the datagram. 773 */ 774 775 *prevhdr = NEXTHDR_FRAGMENT; 776 777 /* 778 * Keep copying data until we run out. 779 */ 780 while(left > 0) { 781 len = left; 782 /* IF: it doesn't fit, use 'mtu' - the data space left */ 783 if (len > mtu) 784 len = mtu; 785 /* IF: we are not sending upto and including the packet end 786 then align the next start on an eight byte boundary */ 787 if (len < left) { 788 len &= ~7; 789 } 790 /* 791 * Allocate buffer. 792 */ 793 794 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { 795 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); 796 IP6_INC_STATS(ip6_dst_idev(skb->dst), 797 IPSTATS_MIB_FRAGFAILS); 798 err = -ENOMEM; 799 goto fail; 800 } 801 802 /* 803 * Set up data on packet 804 */ 805 806 ip6_copy_metadata(frag, skb); 807 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev)); 808 skb_put(frag, len + hlen + sizeof(struct frag_hdr)); 809 skb_reset_network_header(frag); 810 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); 811 frag->transport_header = (frag->network_header + hlen + 812 sizeof(struct frag_hdr)); 813 814 /* 815 * Charge the memory for the fragment to any owner 816 * it might possess 817 */ 818 if (skb->sk) 819 skb_set_owner_w(frag, skb->sk); 820 821 /* 822 * Copy the packet header into the new buffer. 823 */ 824 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); 825 826 /* 827 * Build fragment header. 828 */ 829 fh->nexthdr = nexthdr; 830 fh->reserved = 0; 831 if (!frag_id) { 832 ipv6_select_ident(skb, fh); 833 frag_id = fh->identification; 834 } else 835 fh->identification = frag_id; 836 837 /* 838 * Copy a block of the IP datagram. 839 */ 840 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len)) 841 BUG(); 842 left -= len; 843 844 fh->frag_off = htons(offset); 845 if (left > 0) 846 fh->frag_off |= htons(IP6_MF); 847 ipv6_hdr(frag)->payload_len = htons(frag->len - 848 sizeof(struct ipv6hdr)); 849 850 ptr += len; 851 offset += len; 852 853 /* 854 * Put this fragment into the sending queue. 855 */ 856 err = output(frag); 857 if (err) 858 goto fail; 859 860 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES); 861 } 862 IP6_INC_STATS(ip6_dst_idev(skb->dst), 863 IPSTATS_MIB_FRAGOKS); 864 kfree_skb(skb); 865 return err; 866 867 fail: 868 IP6_INC_STATS(ip6_dst_idev(skb->dst), 869 IPSTATS_MIB_FRAGFAILS); 870 kfree_skb(skb); 871 return err; 872 } 873 874 static inline int ip6_rt_check(struct rt6key *rt_key, 875 struct in6_addr *fl_addr, 876 struct in6_addr *addr_cache) 877 { 878 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 879 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache))); 880 } 881 882 static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 883 struct dst_entry *dst, 884 struct flowi *fl) 885 { 886 struct ipv6_pinfo *np = inet6_sk(sk); 887 struct rt6_info *rt = (struct rt6_info *)dst; 888 889 if (!dst) 890 goto out; 891 892 /* Yes, checking route validity in not connected 893 * case is not very simple. Take into account, 894 * that we do not support routing by source, TOS, 895 * and MSG_DONTROUTE --ANK (980726) 896 * 897 * 1. ip6_rt_check(): If route was host route, 898 * check that cached destination is current. 899 * If it is network route, we still may 900 * check its validity using saved pointer 901 * to the last used address: daddr_cache. 902 * We do not want to save whole address now, 903 * (because main consumer of this service 904 * is tcp, which has not this problem), 905 * so that the last trick works only on connected 906 * sockets. 907 * 2. oif also should be the same. 908 */ 909 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) || 910 #ifdef CONFIG_IPV6_SUBTREES 911 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || 912 #endif 913 (fl->oif && fl->oif != dst->dev->ifindex)) { 914 dst_release(dst); 915 dst = NULL; 916 } 917 918 out: 919 return dst; 920 } 921 922 static int ip6_dst_lookup_tail(struct sock *sk, 923 struct dst_entry **dst, struct flowi *fl) 924 { 925 int err; 926 struct net *net = sock_net(sk); 927 928 if (*dst == NULL) 929 *dst = ip6_route_output(net, sk, fl); 930 931 if ((err = (*dst)->error)) 932 goto out_err_release; 933 934 if (ipv6_addr_any(&fl->fl6_src)) { 935 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev, 936 &fl->fl6_dst, 937 sk ? inet6_sk(sk)->srcprefs : 0, 938 &fl->fl6_src); 939 if (err) 940 goto out_err_release; 941 } 942 943 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 944 /* 945 * Here if the dst entry we've looked up 946 * has a neighbour entry that is in the INCOMPLETE 947 * state and the src address from the flow is 948 * marked as OPTIMISTIC, we release the found 949 * dst entry and replace it instead with the 950 * dst entry of the nexthop router 951 */ 952 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) { 953 struct inet6_ifaddr *ifp; 954 struct flowi fl_gw; 955 int redirect; 956 957 ifp = ipv6_get_ifaddr(net, &fl->fl6_src, 958 (*dst)->dev, 1); 959 960 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 961 if (ifp) 962 in6_ifa_put(ifp); 963 964 if (redirect) { 965 /* 966 * We need to get the dst entry for the 967 * default router instead 968 */ 969 dst_release(*dst); 970 memcpy(&fl_gw, fl, sizeof(struct flowi)); 971 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr)); 972 *dst = ip6_route_output(net, sk, &fl_gw); 973 if ((err = (*dst)->error)) 974 goto out_err_release; 975 } 976 } 977 #endif 978 979 return 0; 980 981 out_err_release: 982 if (err == -ENETUNREACH) 983 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES); 984 dst_release(*dst); 985 *dst = NULL; 986 return err; 987 } 988 989 /** 990 * ip6_dst_lookup - perform route lookup on flow 991 * @sk: socket which provides route info 992 * @dst: pointer to dst_entry * for result 993 * @fl: flow to lookup 994 * 995 * This function performs a route lookup on the given flow. 996 * 997 * It returns zero on success, or a standard errno code on error. 998 */ 999 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) 1000 { 1001 *dst = NULL; 1002 return ip6_dst_lookup_tail(sk, dst, fl); 1003 } 1004 EXPORT_SYMBOL_GPL(ip6_dst_lookup); 1005 1006 /** 1007 * ip6_sk_dst_lookup - perform socket cached route lookup on flow 1008 * @sk: socket which provides the dst cache and route info 1009 * @dst: pointer to dst_entry * for result 1010 * @fl: flow to lookup 1011 * 1012 * This function performs a route lookup on the given flow with the 1013 * possibility of using the cached route in the socket if it is valid. 1014 * It will take the socket dst lock when operating on the dst cache. 1015 * As a result, this function can only be used in process context. 1016 * 1017 * It returns zero on success, or a standard errno code on error. 1018 */ 1019 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) 1020 { 1021 *dst = NULL; 1022 if (sk) { 1023 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1024 *dst = ip6_sk_dst_check(sk, *dst, fl); 1025 } 1026 1027 return ip6_dst_lookup_tail(sk, dst, fl); 1028 } 1029 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup); 1030 1031 static inline int ip6_ufo_append_data(struct sock *sk, 1032 int getfrag(void *from, char *to, int offset, int len, 1033 int odd, struct sk_buff *skb), 1034 void *from, int length, int hh_len, int fragheaderlen, 1035 int transhdrlen, int mtu,unsigned int flags) 1036 1037 { 1038 struct sk_buff *skb; 1039 int err; 1040 1041 /* There is support for UDP large send offload by network 1042 * device, so create one single skb packet containing complete 1043 * udp datagram 1044 */ 1045 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { 1046 skb = sock_alloc_send_skb(sk, 1047 hh_len + fragheaderlen + transhdrlen + 20, 1048 (flags & MSG_DONTWAIT), &err); 1049 if (skb == NULL) 1050 return -ENOMEM; 1051 1052 /* reserve space for Hardware header */ 1053 skb_reserve(skb, hh_len); 1054 1055 /* create space for UDP/IP header */ 1056 skb_put(skb,fragheaderlen + transhdrlen); 1057 1058 /* initialize network header pointer */ 1059 skb_reset_network_header(skb); 1060 1061 /* initialize protocol header pointer */ 1062 skb->transport_header = skb->network_header + fragheaderlen; 1063 1064 skb->ip_summed = CHECKSUM_PARTIAL; 1065 skb->csum = 0; 1066 sk->sk_sndmsg_off = 0; 1067 } 1068 1069 err = skb_append_datato_frags(sk,skb, getfrag, from, 1070 (length - transhdrlen)); 1071 if (!err) { 1072 struct frag_hdr fhdr; 1073 1074 /* specify the length of each IP datagram fragment*/ 1075 skb_shinfo(skb)->gso_size = mtu - fragheaderlen - 1076 sizeof(struct frag_hdr); 1077 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 1078 ipv6_select_ident(skb, &fhdr); 1079 skb_shinfo(skb)->ip6_frag_id = fhdr.identification; 1080 __skb_queue_tail(&sk->sk_write_queue, skb); 1081 1082 return 0; 1083 } 1084 /* There is not enough support do UPD LSO, 1085 * so follow normal path 1086 */ 1087 kfree_skb(skb); 1088 1089 return err; 1090 } 1091 1092 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, 1093 int offset, int len, int odd, struct sk_buff *skb), 1094 void *from, int length, int transhdrlen, 1095 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, 1096 struct rt6_info *rt, unsigned int flags) 1097 { 1098 struct inet_sock *inet = inet_sk(sk); 1099 struct ipv6_pinfo *np = inet6_sk(sk); 1100 struct sk_buff *skb; 1101 unsigned int maxfraglen, fragheaderlen; 1102 int exthdrlen; 1103 int hh_len; 1104 int mtu; 1105 int copy; 1106 int err; 1107 int offset = 0; 1108 int csummode = CHECKSUM_NONE; 1109 1110 if (flags&MSG_PROBE) 1111 return 0; 1112 if (skb_queue_empty(&sk->sk_write_queue)) { 1113 /* 1114 * setup for corking 1115 */ 1116 if (opt) { 1117 if (np->cork.opt == NULL) { 1118 np->cork.opt = kmalloc(opt->tot_len, 1119 sk->sk_allocation); 1120 if (unlikely(np->cork.opt == NULL)) 1121 return -ENOBUFS; 1122 } else if (np->cork.opt->tot_len < opt->tot_len) { 1123 printk(KERN_DEBUG "ip6_append_data: invalid option length\n"); 1124 return -EINVAL; 1125 } 1126 memcpy(np->cork.opt, opt, opt->tot_len); 1127 inet->cork.flags |= IPCORK_OPT; 1128 /* need source address above miyazawa*/ 1129 } 1130 dst_hold(&rt->u.dst); 1131 inet->cork.dst = &rt->u.dst; 1132 inet->cork.fl = *fl; 1133 np->cork.hop_limit = hlimit; 1134 np->cork.tclass = tclass; 1135 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? 1136 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path); 1137 if (np->frag_size < mtu) { 1138 if (np->frag_size) 1139 mtu = np->frag_size; 1140 } 1141 inet->cork.fragsize = mtu; 1142 if (dst_allfrag(rt->u.dst.path)) 1143 inet->cork.flags |= IPCORK_ALLFRAG; 1144 inet->cork.length = 0; 1145 sk->sk_sndmsg_page = NULL; 1146 sk->sk_sndmsg_off = 0; 1147 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) - 1148 rt->rt6i_nfheader_len; 1149 length += exthdrlen; 1150 transhdrlen += exthdrlen; 1151 } else { 1152 rt = (struct rt6_info *)inet->cork.dst; 1153 fl = &inet->cork.fl; 1154 if (inet->cork.flags & IPCORK_OPT) 1155 opt = np->cork.opt; 1156 transhdrlen = 0; 1157 exthdrlen = 0; 1158 mtu = inet->cork.fragsize; 1159 } 1160 1161 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); 1162 1163 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1164 (opt ? opt->opt_nflen : 0); 1165 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); 1166 1167 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { 1168 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { 1169 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen); 1170 return -EMSGSIZE; 1171 } 1172 } 1173 1174 /* 1175 * Let's try using as much space as possible. 1176 * Use MTU if total length of the message fits into the MTU. 1177 * Otherwise, we need to reserve fragment header and 1178 * fragment alignment (= 8-15 octects, in total). 1179 * 1180 * Note that we may need to "move" the data from the tail of 1181 * of the buffer to the new fragment when we split 1182 * the message. 1183 * 1184 * FIXME: It may be fragmented into multiple chunks 1185 * at once if non-fragmentable extension headers 1186 * are too large. 1187 * --yoshfuji 1188 */ 1189 1190 inet->cork.length += length; 1191 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && 1192 (rt->u.dst.dev->features & NETIF_F_UFO)) { 1193 1194 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len, 1195 fragheaderlen, transhdrlen, mtu, 1196 flags); 1197 if (err) 1198 goto error; 1199 return 0; 1200 } 1201 1202 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 1203 goto alloc_new_skb; 1204 1205 while (length > 0) { 1206 /* Check if the remaining data fits into current packet. */ 1207 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1208 if (copy < length) 1209 copy = maxfraglen - skb->len; 1210 1211 if (copy <= 0) { 1212 char *data; 1213 unsigned int datalen; 1214 unsigned int fraglen; 1215 unsigned int fraggap; 1216 unsigned int alloclen; 1217 struct sk_buff *skb_prev; 1218 alloc_new_skb: 1219 skb_prev = skb; 1220 1221 /* There's no room in the current skb */ 1222 if (skb_prev) 1223 fraggap = skb_prev->len - maxfraglen; 1224 else 1225 fraggap = 0; 1226 1227 /* 1228 * If remaining data exceeds the mtu, 1229 * we know we need more fragment(s). 1230 */ 1231 datalen = length + fraggap; 1232 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1233 datalen = maxfraglen - fragheaderlen; 1234 1235 fraglen = datalen + fragheaderlen; 1236 if ((flags & MSG_MORE) && 1237 !(rt->u.dst.dev->features&NETIF_F_SG)) 1238 alloclen = mtu; 1239 else 1240 alloclen = datalen + fragheaderlen; 1241 1242 /* 1243 * The last fragment gets additional space at tail. 1244 * Note: we overallocate on fragments with MSG_MODE 1245 * because we have no idea if we're the last one. 1246 */ 1247 if (datalen == length + fraggap) 1248 alloclen += rt->u.dst.trailer_len; 1249 1250 /* 1251 * We just reserve space for fragment header. 1252 * Note: this may be overallocation if the message 1253 * (without MSG_MORE) fits into the MTU. 1254 */ 1255 alloclen += sizeof(struct frag_hdr); 1256 1257 if (transhdrlen) { 1258 skb = sock_alloc_send_skb(sk, 1259 alloclen + hh_len, 1260 (flags & MSG_DONTWAIT), &err); 1261 } else { 1262 skb = NULL; 1263 if (atomic_read(&sk->sk_wmem_alloc) <= 1264 2 * sk->sk_sndbuf) 1265 skb = sock_wmalloc(sk, 1266 alloclen + hh_len, 1, 1267 sk->sk_allocation); 1268 if (unlikely(skb == NULL)) 1269 err = -ENOBUFS; 1270 } 1271 if (skb == NULL) 1272 goto error; 1273 /* 1274 * Fill in the control structures 1275 */ 1276 skb->ip_summed = csummode; 1277 skb->csum = 0; 1278 /* reserve for fragmentation */ 1279 skb_reserve(skb, hh_len+sizeof(struct frag_hdr)); 1280 1281 /* 1282 * Find where to start putting bytes 1283 */ 1284 data = skb_put(skb, fraglen); 1285 skb_set_network_header(skb, exthdrlen); 1286 data += fragheaderlen; 1287 skb->transport_header = (skb->network_header + 1288 fragheaderlen); 1289 if (fraggap) { 1290 skb->csum = skb_copy_and_csum_bits( 1291 skb_prev, maxfraglen, 1292 data + transhdrlen, fraggap, 0); 1293 skb_prev->csum = csum_sub(skb_prev->csum, 1294 skb->csum); 1295 data += fraggap; 1296 pskb_trim_unique(skb_prev, maxfraglen); 1297 } 1298 copy = datalen - transhdrlen - fraggap; 1299 if (copy < 0) { 1300 err = -EINVAL; 1301 kfree_skb(skb); 1302 goto error; 1303 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { 1304 err = -EFAULT; 1305 kfree_skb(skb); 1306 goto error; 1307 } 1308 1309 offset += copy; 1310 length -= datalen - fraggap; 1311 transhdrlen = 0; 1312 exthdrlen = 0; 1313 csummode = CHECKSUM_NONE; 1314 1315 /* 1316 * Put the packet on the pending queue 1317 */ 1318 __skb_queue_tail(&sk->sk_write_queue, skb); 1319 continue; 1320 } 1321 1322 if (copy > length) 1323 copy = length; 1324 1325 if (!(rt->u.dst.dev->features&NETIF_F_SG)) { 1326 unsigned int off; 1327 1328 off = skb->len; 1329 if (getfrag(from, skb_put(skb, copy), 1330 offset, copy, off, skb) < 0) { 1331 __skb_trim(skb, off); 1332 err = -EFAULT; 1333 goto error; 1334 } 1335 } else { 1336 int i = skb_shinfo(skb)->nr_frags; 1337 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; 1338 struct page *page = sk->sk_sndmsg_page; 1339 int off = sk->sk_sndmsg_off; 1340 unsigned int left; 1341 1342 if (page && (left = PAGE_SIZE - off) > 0) { 1343 if (copy >= left) 1344 copy = left; 1345 if (page != frag->page) { 1346 if (i == MAX_SKB_FRAGS) { 1347 err = -EMSGSIZE; 1348 goto error; 1349 } 1350 get_page(page); 1351 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); 1352 frag = &skb_shinfo(skb)->frags[i]; 1353 } 1354 } else if(i < MAX_SKB_FRAGS) { 1355 if (copy > PAGE_SIZE) 1356 copy = PAGE_SIZE; 1357 page = alloc_pages(sk->sk_allocation, 0); 1358 if (page == NULL) { 1359 err = -ENOMEM; 1360 goto error; 1361 } 1362 sk->sk_sndmsg_page = page; 1363 sk->sk_sndmsg_off = 0; 1364 1365 skb_fill_page_desc(skb, i, page, 0, 0); 1366 frag = &skb_shinfo(skb)->frags[i]; 1367 } else { 1368 err = -EMSGSIZE; 1369 goto error; 1370 } 1371 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { 1372 err = -EFAULT; 1373 goto error; 1374 } 1375 sk->sk_sndmsg_off += copy; 1376 frag->size += copy; 1377 skb->len += copy; 1378 skb->data_len += copy; 1379 skb->truesize += copy; 1380 atomic_add(copy, &sk->sk_wmem_alloc); 1381 } 1382 offset += copy; 1383 length -= copy; 1384 } 1385 return 0; 1386 error: 1387 inet->cork.length -= length; 1388 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1389 return err; 1390 } 1391 1392 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) 1393 { 1394 inet->cork.flags &= ~IPCORK_OPT; 1395 kfree(np->cork.opt); 1396 np->cork.opt = NULL; 1397 if (inet->cork.dst) { 1398 dst_release(inet->cork.dst); 1399 inet->cork.dst = NULL; 1400 inet->cork.flags &= ~IPCORK_ALLFRAG; 1401 } 1402 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); 1403 } 1404 1405 int ip6_push_pending_frames(struct sock *sk) 1406 { 1407 struct sk_buff *skb, *tmp_skb; 1408 struct sk_buff **tail_skb; 1409 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 1410 struct inet_sock *inet = inet_sk(sk); 1411 struct ipv6_pinfo *np = inet6_sk(sk); 1412 struct ipv6hdr *hdr; 1413 struct ipv6_txoptions *opt = np->cork.opt; 1414 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; 1415 struct flowi *fl = &inet->cork.fl; 1416 unsigned char proto = fl->proto; 1417 int err = 0; 1418 1419 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) 1420 goto out; 1421 tail_skb = &(skb_shinfo(skb)->frag_list); 1422 1423 /* move skb->data to ip header from ext header */ 1424 if (skb->data < skb_network_header(skb)) 1425 __skb_pull(skb, skb_network_offset(skb)); 1426 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { 1427 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1428 *tail_skb = tmp_skb; 1429 tail_skb = &(tmp_skb->next); 1430 skb->len += tmp_skb->len; 1431 skb->data_len += tmp_skb->len; 1432 skb->truesize += tmp_skb->truesize; 1433 __sock_put(tmp_skb->sk); 1434 tmp_skb->destructor = NULL; 1435 tmp_skb->sk = NULL; 1436 } 1437 1438 /* Allow local fragmentation. */ 1439 if (np->pmtudisc < IPV6_PMTUDISC_DO) 1440 skb->local_df = 1; 1441 1442 ipv6_addr_copy(final_dst, &fl->fl6_dst); 1443 __skb_pull(skb, skb_network_header_len(skb)); 1444 if (opt && opt->opt_flen) 1445 ipv6_push_frag_opts(skb, opt, &proto); 1446 if (opt && opt->opt_nflen) 1447 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); 1448 1449 skb_push(skb, sizeof(struct ipv6hdr)); 1450 skb_reset_network_header(skb); 1451 hdr = ipv6_hdr(skb); 1452 1453 *(__be32*)hdr = fl->fl6_flowlabel | 1454 htonl(0x60000000 | ((int)np->cork.tclass << 20)); 1455 1456 hdr->hop_limit = np->cork.hop_limit; 1457 hdr->nexthdr = proto; 1458 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 1459 ipv6_addr_copy(&hdr->daddr, final_dst); 1460 1461 skb->priority = sk->sk_priority; 1462 skb->mark = sk->sk_mark; 1463 1464 skb->dst = dst_clone(&rt->u.dst); 1465 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); 1466 if (proto == IPPROTO_ICMPV6) { 1467 struct inet6_dev *idev = ip6_dst_idev(skb->dst); 1468 1469 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type); 1470 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); 1471 } 1472 1473 err = ip6_local_out(skb); 1474 if (err) { 1475 if (err > 0) 1476 err = np->recverr ? net_xmit_errno(err) : 0; 1477 if (err) 1478 goto error; 1479 } 1480 1481 out: 1482 ip6_cork_release(inet, np); 1483 return err; 1484 error: 1485 goto out; 1486 } 1487 1488 void ip6_flush_pending_frames(struct sock *sk) 1489 { 1490 struct sk_buff *skb; 1491 1492 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { 1493 if (skb->dst) 1494 IP6_INC_STATS(ip6_dst_idev(skb->dst), 1495 IPSTATS_MIB_OUTDISCARDS); 1496 kfree_skb(skb); 1497 } 1498 1499 ip6_cork_release(inet_sk(sk), inet6_sk(sk)); 1500 } 1501