1 /* 2 * IPv6 output functions 3 * Linux INET6 implementation 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * Based on linux/net/ipv4/ip_output.c 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 * 15 * Changes: 16 * A.N.Kuznetsov : airthmetics in fragmentation. 17 * extension headers are implemented. 18 * route changes now work. 19 * ip6_forward does not confuse sniffers. 20 * etc. 21 * 22 * H. von Brand : Added missing #include <linux/string.h> 23 * Imran Patel : frag id should be in NBO 24 * Kazunori MIYAZAWA @USAGI 25 * : add ip6_append_data and related functions 26 * for datagram xmit 27 */ 28 29 #include <linux/errno.h> 30 #include <linux/kernel.h> 31 #include <linux/string.h> 32 #include <linux/socket.h> 33 #include <linux/net.h> 34 #include <linux/netdevice.h> 35 #include <linux/if_arp.h> 36 #include <linux/in6.h> 37 #include <linux/tcp.h> 38 #include <linux/route.h> 39 #include <linux/module.h> 40 #include <linux/slab.h> 41 42 #include <linux/netfilter.h> 43 #include <linux/netfilter_ipv6.h> 44 45 #include <net/sock.h> 46 #include <net/snmp.h> 47 48 #include <net/ipv6.h> 49 #include <net/ndisc.h> 50 #include <net/protocol.h> 51 #include <net/ip6_route.h> 52 #include <net/addrconf.h> 53 #include <net/rawv6.h> 54 #include <net/icmp.h> 55 #include <net/xfrm.h> 56 #include <net/checksum.h> 57 #include <linux/mroute6.h> 58 59 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); 60 61 int __ip6_local_out(struct sk_buff *skb) 62 { 63 int len; 64 65 len = skb->len - sizeof(struct ipv6hdr); 66 if (len > IPV6_MAXPLEN) 67 len = 0; 68 ipv6_hdr(skb)->payload_len = htons(len); 69 70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, 71 skb_dst(skb)->dev, dst_output); 72 } 73 74 int ip6_local_out(struct sk_buff *skb) 75 { 76 int err; 77 78 err = __ip6_local_out(skb); 79 if (likely(err == 1)) 80 err = dst_output(skb); 81 82 return err; 83 } 84 EXPORT_SYMBOL_GPL(ip6_local_out); 85 86 /* dev_loopback_xmit for use with netfilter. */ 87 static int ip6_dev_loopback_xmit(struct sk_buff *newskb) 88 { 89 skb_reset_mac_header(newskb); 90 __skb_pull(newskb, skb_network_offset(newskb)); 91 newskb->pkt_type = PACKET_LOOPBACK; 92 newskb->ip_summed = CHECKSUM_UNNECESSARY; 93 WARN_ON(!skb_dst(newskb)); 94 95 netif_rx_ni(newskb); 96 return 0; 97 } 98 99 static int ip6_finish_output2(struct sk_buff *skb) 100 { 101 struct dst_entry *dst = skb_dst(skb); 102 struct net_device *dev = dst->dev; 103 104 skb->protocol = htons(ETH_P_IPV6); 105 skb->dev = dev; 106 107 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 108 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 109 110 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) && 111 ((mroute6_socket(dev_net(dev), skb) && 112 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 113 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 114 &ipv6_hdr(skb)->saddr))) { 115 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 116 117 /* Do not check for IFF_ALLMULTI; multicast routing 118 is not supported in any case. 119 */ 120 if (newskb) 121 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 122 newskb, NULL, newskb->dev, 123 ip6_dev_loopback_xmit); 124 125 if (ipv6_hdr(skb)->hop_limit == 0) { 126 IP6_INC_STATS(dev_net(dev), idev, 127 IPSTATS_MIB_OUTDISCARDS); 128 kfree_skb(skb); 129 return 0; 130 } 131 } 132 133 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, 134 skb->len); 135 } 136 137 if (dst->hh) 138 return neigh_hh_output(dst->hh, skb); 139 else if (dst->neighbour) 140 return dst->neighbour->output(skb); 141 142 IP6_INC_STATS_BH(dev_net(dst->dev), 143 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 144 kfree_skb(skb); 145 return -EINVAL; 146 } 147 148 static int ip6_finish_output(struct sk_buff *skb) 149 { 150 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || 151 dst_allfrag(skb_dst(skb))) 152 return ip6_fragment(skb, ip6_finish_output2); 153 else 154 return ip6_finish_output2(skb); 155 } 156 157 int ip6_output(struct sk_buff *skb) 158 { 159 struct net_device *dev = skb_dst(skb)->dev; 160 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 161 if (unlikely(idev->cnf.disable_ipv6)) { 162 IP6_INC_STATS(dev_net(dev), idev, 163 IPSTATS_MIB_OUTDISCARDS); 164 kfree_skb(skb); 165 return 0; 166 } 167 168 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev, 169 ip6_finish_output, 170 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 171 } 172 173 /* 174 * xmit an sk_buff (used by TCP, SCTP and DCCP) 175 */ 176 177 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, 178 struct ipv6_txoptions *opt) 179 { 180 struct net *net = sock_net(sk); 181 struct ipv6_pinfo *np = inet6_sk(sk); 182 struct in6_addr *first_hop = &fl->fl6_dst; 183 struct dst_entry *dst = skb_dst(skb); 184 struct ipv6hdr *hdr; 185 u8 proto = fl->proto; 186 int seg_len = skb->len; 187 int hlimit = -1; 188 int tclass = 0; 189 u32 mtu; 190 191 if (opt) { 192 unsigned int head_room; 193 194 /* First: exthdrs may take lots of space (~8K for now) 195 MAX_HEADER is not enough. 196 */ 197 head_room = opt->opt_nflen + opt->opt_flen; 198 seg_len += head_room; 199 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); 200 201 if (skb_headroom(skb) < head_room) { 202 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 203 if (skb2 == NULL) { 204 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 205 IPSTATS_MIB_OUTDISCARDS); 206 kfree_skb(skb); 207 return -ENOBUFS; 208 } 209 kfree_skb(skb); 210 skb = skb2; 211 skb_set_owner_w(skb, sk); 212 } 213 if (opt->opt_flen) 214 ipv6_push_frag_opts(skb, opt, &proto); 215 if (opt->opt_nflen) 216 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); 217 } 218 219 skb_push(skb, sizeof(struct ipv6hdr)); 220 skb_reset_network_header(skb); 221 hdr = ipv6_hdr(skb); 222 223 /* 224 * Fill in the IPv6 header 225 */ 226 if (np) { 227 tclass = np->tclass; 228 hlimit = np->hop_limit; 229 } 230 if (hlimit < 0) 231 hlimit = ip6_dst_hoplimit(dst); 232 233 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel; 234 235 hdr->payload_len = htons(seg_len); 236 hdr->nexthdr = proto; 237 hdr->hop_limit = hlimit; 238 239 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 240 ipv6_addr_copy(&hdr->daddr, first_hop); 241 242 skb->priority = sk->sk_priority; 243 skb->mark = sk->sk_mark; 244 245 mtu = dst_mtu(dst); 246 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { 247 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), 248 IPSTATS_MIB_OUT, skb->len); 249 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, 250 dst->dev, dst_output); 251 } 252 253 if (net_ratelimit()) 254 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); 255 skb->dev = dst->dev; 256 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 257 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); 258 kfree_skb(skb); 259 return -EMSGSIZE; 260 } 261 262 EXPORT_SYMBOL(ip6_xmit); 263 264 /* 265 * To avoid extra problems ND packets are send through this 266 * routine. It's code duplication but I really want to avoid 267 * extra checks since ipv6_build_header is used by TCP (which 268 * is for us performance critical) 269 */ 270 271 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, 272 const struct in6_addr *saddr, const struct in6_addr *daddr, 273 int proto, int len) 274 { 275 struct ipv6_pinfo *np = inet6_sk(sk); 276 struct ipv6hdr *hdr; 277 int totlen; 278 279 skb->protocol = htons(ETH_P_IPV6); 280 skb->dev = dev; 281 282 totlen = len + sizeof(struct ipv6hdr); 283 284 skb_reset_network_header(skb); 285 skb_put(skb, sizeof(struct ipv6hdr)); 286 hdr = ipv6_hdr(skb); 287 288 *(__be32*)hdr = htonl(0x60000000); 289 290 hdr->payload_len = htons(len); 291 hdr->nexthdr = proto; 292 hdr->hop_limit = np->hop_limit; 293 294 ipv6_addr_copy(&hdr->saddr, saddr); 295 ipv6_addr_copy(&hdr->daddr, daddr); 296 297 return 0; 298 } 299 300 static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 301 { 302 struct ip6_ra_chain *ra; 303 struct sock *last = NULL; 304 305 read_lock(&ip6_ra_lock); 306 for (ra = ip6_ra_chain; ra; ra = ra->next) { 307 struct sock *sk = ra->sk; 308 if (sk && ra->sel == sel && 309 (!sk->sk_bound_dev_if || 310 sk->sk_bound_dev_if == skb->dev->ifindex)) { 311 if (last) { 312 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 313 if (skb2) 314 rawv6_rcv(last, skb2); 315 } 316 last = sk; 317 } 318 } 319 320 if (last) { 321 rawv6_rcv(last, skb); 322 read_unlock(&ip6_ra_lock); 323 return 1; 324 } 325 read_unlock(&ip6_ra_lock); 326 return 0; 327 } 328 329 static int ip6_forward_proxy_check(struct sk_buff *skb) 330 { 331 struct ipv6hdr *hdr = ipv6_hdr(skb); 332 u8 nexthdr = hdr->nexthdr; 333 int offset; 334 335 if (ipv6_ext_hdr(nexthdr)) { 336 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr); 337 if (offset < 0) 338 return 0; 339 } else 340 offset = sizeof(struct ipv6hdr); 341 342 if (nexthdr == IPPROTO_ICMPV6) { 343 struct icmp6hdr *icmp6; 344 345 if (!pskb_may_pull(skb, (skb_network_header(skb) + 346 offset + 1 - skb->data))) 347 return 0; 348 349 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 350 351 switch (icmp6->icmp6_type) { 352 case NDISC_ROUTER_SOLICITATION: 353 case NDISC_ROUTER_ADVERTISEMENT: 354 case NDISC_NEIGHBOUR_SOLICITATION: 355 case NDISC_NEIGHBOUR_ADVERTISEMENT: 356 case NDISC_REDIRECT: 357 /* For reaction involving unicast neighbor discovery 358 * message destined to the proxied address, pass it to 359 * input function. 360 */ 361 return 1; 362 default: 363 break; 364 } 365 } 366 367 /* 368 * The proxying router can't forward traffic sent to a link-local 369 * address, so signal the sender and discard the packet. This 370 * behavior is clarified by the MIPv6 specification. 371 */ 372 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 373 dst_link_failure(skb); 374 return -1; 375 } 376 377 return 0; 378 } 379 380 static inline int ip6_forward_finish(struct sk_buff *skb) 381 { 382 return dst_output(skb); 383 } 384 385 int ip6_forward(struct sk_buff *skb) 386 { 387 struct dst_entry *dst = skb_dst(skb); 388 struct ipv6hdr *hdr = ipv6_hdr(skb); 389 struct inet6_skb_parm *opt = IP6CB(skb); 390 struct net *net = dev_net(dst->dev); 391 u32 mtu; 392 393 if (net->ipv6.devconf_all->forwarding == 0) 394 goto error; 395 396 if (skb_warn_if_lro(skb)) 397 goto drop; 398 399 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 400 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 401 goto drop; 402 } 403 404 if (skb->pkt_type != PACKET_HOST) 405 goto drop; 406 407 skb_forward_csum(skb); 408 409 /* 410 * We DO NOT make any processing on 411 * RA packets, pushing them to user level AS IS 412 * without ane WARRANTY that application will be able 413 * to interpret them. The reason is that we 414 * cannot make anything clever here. 415 * 416 * We are not end-node, so that if packet contains 417 * AH/ESP, we cannot make anything. 418 * Defragmentation also would be mistake, RA packets 419 * cannot be fragmented, because there is no warranty 420 * that different fragments will go along one path. --ANK 421 */ 422 if (opt->ra) { 423 u8 *ptr = skb_network_header(skb) + opt->ra; 424 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3])) 425 return 0; 426 } 427 428 /* 429 * check and decrement ttl 430 */ 431 if (hdr->hop_limit <= 1) { 432 /* Force OUTPUT device used as source address */ 433 skb->dev = dst->dev; 434 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 435 IP6_INC_STATS_BH(net, 436 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); 437 438 kfree_skb(skb); 439 return -ETIMEDOUT; 440 } 441 442 /* XXX: idev->cnf.proxy_ndp? */ 443 if (net->ipv6.devconf_all->proxy_ndp && 444 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 445 int proxied = ip6_forward_proxy_check(skb); 446 if (proxied > 0) 447 return ip6_input(skb); 448 else if (proxied < 0) { 449 IP6_INC_STATS(net, ip6_dst_idev(dst), 450 IPSTATS_MIB_INDISCARDS); 451 goto drop; 452 } 453 } 454 455 if (!xfrm6_route_forward(skb)) { 456 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 457 goto drop; 458 } 459 dst = skb_dst(skb); 460 461 /* IPv6 specs say nothing about it, but it is clear that we cannot 462 send redirects to source routed frames. 463 We don't send redirects to frames decapsulated from IPsec. 464 */ 465 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && 466 !skb_sec_path(skb)) { 467 struct in6_addr *target = NULL; 468 struct rt6_info *rt; 469 struct neighbour *n = dst->neighbour; 470 471 /* 472 * incoming and outgoing devices are the same 473 * send a redirect. 474 */ 475 476 rt = (struct rt6_info *) dst; 477 if ((rt->rt6i_flags & RTF_GATEWAY)) 478 target = (struct in6_addr*)&n->primary_key; 479 else 480 target = &hdr->daddr; 481 482 /* Limit redirects both by destination (here) 483 and by source (inside ndisc_send_redirect) 484 */ 485 if (xrlim_allow(dst, 1*HZ)) 486 ndisc_send_redirect(skb, n, target); 487 } else { 488 int addrtype = ipv6_addr_type(&hdr->saddr); 489 490 /* This check is security critical. */ 491 if (addrtype == IPV6_ADDR_ANY || 492 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 493 goto error; 494 if (addrtype & IPV6_ADDR_LINKLOCAL) { 495 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 496 ICMPV6_NOT_NEIGHBOUR, 0); 497 goto error; 498 } 499 } 500 501 mtu = dst_mtu(dst); 502 if (mtu < IPV6_MIN_MTU) 503 mtu = IPV6_MIN_MTU; 504 505 if (skb->len > mtu && !skb_is_gso(skb)) { 506 /* Again, force OUTPUT device used as source address */ 507 skb->dev = dst->dev; 508 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 509 IP6_INC_STATS_BH(net, 510 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); 511 IP6_INC_STATS_BH(net, 512 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); 513 kfree_skb(skb); 514 return -EMSGSIZE; 515 } 516 517 if (skb_cow(skb, dst->dev->hard_header_len)) { 518 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); 519 goto drop; 520 } 521 522 hdr = ipv6_hdr(skb); 523 524 /* Mangling hops number delayed to point after skb COW */ 525 526 hdr->hop_limit--; 527 528 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 529 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, 530 ip6_forward_finish); 531 532 error: 533 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); 534 drop: 535 kfree_skb(skb); 536 return -EINVAL; 537 } 538 539 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 540 { 541 to->pkt_type = from->pkt_type; 542 to->priority = from->priority; 543 to->protocol = from->protocol; 544 skb_dst_drop(to); 545 skb_dst_set(to, dst_clone(skb_dst(from))); 546 to->dev = from->dev; 547 to->mark = from->mark; 548 549 #ifdef CONFIG_NET_SCHED 550 to->tc_index = from->tc_index; 551 #endif 552 nf_copy(to, from); 553 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 554 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 555 to->nf_trace = from->nf_trace; 556 #endif 557 skb_copy_secmark(to, from); 558 } 559 560 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) 561 { 562 u16 offset = sizeof(struct ipv6hdr); 563 struct ipv6_opt_hdr *exthdr = 564 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); 565 unsigned int packet_len = skb->tail - skb->network_header; 566 int found_rhdr = 0; 567 *nexthdr = &ipv6_hdr(skb)->nexthdr; 568 569 while (offset + 1 <= packet_len) { 570 571 switch (**nexthdr) { 572 573 case NEXTHDR_HOP: 574 break; 575 case NEXTHDR_ROUTING: 576 found_rhdr = 1; 577 break; 578 case NEXTHDR_DEST: 579 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 580 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) 581 break; 582 #endif 583 if (found_rhdr) 584 return offset; 585 break; 586 default : 587 return offset; 588 } 589 590 offset += ipv6_optlen(exthdr); 591 *nexthdr = &exthdr->nexthdr; 592 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + 593 offset); 594 } 595 596 return offset; 597 } 598 599 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 600 { 601 struct sk_buff *frag; 602 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); 603 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; 604 struct ipv6hdr *tmp_hdr; 605 struct frag_hdr *fh; 606 unsigned int mtu, hlen, left, len; 607 __be32 frag_id = 0; 608 int ptr, offset = 0, err=0; 609 u8 *prevhdr, nexthdr = 0; 610 struct net *net = dev_net(skb_dst(skb)->dev); 611 612 hlen = ip6_find_1stfragopt(skb, &prevhdr); 613 nexthdr = *prevhdr; 614 615 mtu = ip6_skb_dst_mtu(skb); 616 617 /* We must not fragment if the socket is set to force MTU discovery 618 * or if the skb it not generated by a local socket. 619 */ 620 if (!skb->local_df && skb->len > mtu) { 621 skb->dev = skb_dst(skb)->dev; 622 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 623 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 624 IPSTATS_MIB_FRAGFAILS); 625 kfree_skb(skb); 626 return -EMSGSIZE; 627 } 628 629 if (np && np->frag_size < mtu) { 630 if (np->frag_size) 631 mtu = np->frag_size; 632 } 633 mtu -= hlen + sizeof(struct frag_hdr); 634 635 if (skb_has_frag_list(skb)) { 636 int first_len = skb_pagelen(skb); 637 struct sk_buff *frag2; 638 639 if (first_len - hlen > mtu || 640 ((first_len - hlen) & 7) || 641 skb_cloned(skb)) 642 goto slow_path; 643 644 skb_walk_frags(skb, frag) { 645 /* Correct geometry. */ 646 if (frag->len > mtu || 647 ((frag->len & 7) && frag->next) || 648 skb_headroom(frag) < hlen) 649 goto slow_path_clean; 650 651 /* Partially cloned skb? */ 652 if (skb_shared(frag)) 653 goto slow_path_clean; 654 655 BUG_ON(frag->sk); 656 if (skb->sk) { 657 frag->sk = skb->sk; 658 frag->destructor = sock_wfree; 659 } 660 skb->truesize -= frag->truesize; 661 } 662 663 err = 0; 664 offset = 0; 665 frag = skb_shinfo(skb)->frag_list; 666 skb_frag_list_init(skb); 667 /* BUILD HEADER */ 668 669 *prevhdr = NEXTHDR_FRAGMENT; 670 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 671 if (!tmp_hdr) { 672 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 673 IPSTATS_MIB_FRAGFAILS); 674 return -ENOMEM; 675 } 676 677 __skb_pull(skb, hlen); 678 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); 679 __skb_push(skb, hlen); 680 skb_reset_network_header(skb); 681 memcpy(skb_network_header(skb), tmp_hdr, hlen); 682 683 ipv6_select_ident(fh); 684 fh->nexthdr = nexthdr; 685 fh->reserved = 0; 686 fh->frag_off = htons(IP6_MF); 687 frag_id = fh->identification; 688 689 first_len = skb_pagelen(skb); 690 skb->data_len = first_len - skb_headlen(skb); 691 skb->len = first_len; 692 ipv6_hdr(skb)->payload_len = htons(first_len - 693 sizeof(struct ipv6hdr)); 694 695 dst_hold(&rt->dst); 696 697 for (;;) { 698 /* Prepare header of the next frame, 699 * before previous one went down. */ 700 if (frag) { 701 frag->ip_summed = CHECKSUM_NONE; 702 skb_reset_transport_header(frag); 703 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); 704 __skb_push(frag, hlen); 705 skb_reset_network_header(frag); 706 memcpy(skb_network_header(frag), tmp_hdr, 707 hlen); 708 offset += skb->len - hlen - sizeof(struct frag_hdr); 709 fh->nexthdr = nexthdr; 710 fh->reserved = 0; 711 fh->frag_off = htons(offset); 712 if (frag->next != NULL) 713 fh->frag_off |= htons(IP6_MF); 714 fh->identification = frag_id; 715 ipv6_hdr(frag)->payload_len = 716 htons(frag->len - 717 sizeof(struct ipv6hdr)); 718 ip6_copy_metadata(frag, skb); 719 } 720 721 err = output(skb); 722 if(!err) 723 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 724 IPSTATS_MIB_FRAGCREATES); 725 726 if (err || !frag) 727 break; 728 729 skb = frag; 730 frag = skb->next; 731 skb->next = NULL; 732 } 733 734 kfree(tmp_hdr); 735 736 if (err == 0) { 737 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 738 IPSTATS_MIB_FRAGOKS); 739 dst_release(&rt->dst); 740 return 0; 741 } 742 743 while (frag) { 744 skb = frag->next; 745 kfree_skb(frag); 746 frag = skb; 747 } 748 749 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 750 IPSTATS_MIB_FRAGFAILS); 751 dst_release(&rt->dst); 752 return err; 753 754 slow_path_clean: 755 skb_walk_frags(skb, frag2) { 756 if (frag2 == frag) 757 break; 758 frag2->sk = NULL; 759 frag2->destructor = NULL; 760 skb->truesize += frag2->truesize; 761 } 762 } 763 764 slow_path: 765 left = skb->len - hlen; /* Space per frame */ 766 ptr = hlen; /* Where to start from */ 767 768 /* 769 * Fragment the datagram. 770 */ 771 772 *prevhdr = NEXTHDR_FRAGMENT; 773 774 /* 775 * Keep copying data until we run out. 776 */ 777 while(left > 0) { 778 len = left; 779 /* IF: it doesn't fit, use 'mtu' - the data space left */ 780 if (len > mtu) 781 len = mtu; 782 /* IF: we are not sending upto and including the packet end 783 then align the next start on an eight byte boundary */ 784 if (len < left) { 785 len &= ~7; 786 } 787 /* 788 * Allocate buffer. 789 */ 790 791 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) { 792 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); 793 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 794 IPSTATS_MIB_FRAGFAILS); 795 err = -ENOMEM; 796 goto fail; 797 } 798 799 /* 800 * Set up data on packet 801 */ 802 803 ip6_copy_metadata(frag, skb); 804 skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev)); 805 skb_put(frag, len + hlen + sizeof(struct frag_hdr)); 806 skb_reset_network_header(frag); 807 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); 808 frag->transport_header = (frag->network_header + hlen + 809 sizeof(struct frag_hdr)); 810 811 /* 812 * Charge the memory for the fragment to any owner 813 * it might possess 814 */ 815 if (skb->sk) 816 skb_set_owner_w(frag, skb->sk); 817 818 /* 819 * Copy the packet header into the new buffer. 820 */ 821 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); 822 823 /* 824 * Build fragment header. 825 */ 826 fh->nexthdr = nexthdr; 827 fh->reserved = 0; 828 if (!frag_id) { 829 ipv6_select_ident(fh); 830 frag_id = fh->identification; 831 } else 832 fh->identification = frag_id; 833 834 /* 835 * Copy a block of the IP datagram. 836 */ 837 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len)) 838 BUG(); 839 left -= len; 840 841 fh->frag_off = htons(offset); 842 if (left > 0) 843 fh->frag_off |= htons(IP6_MF); 844 ipv6_hdr(frag)->payload_len = htons(frag->len - 845 sizeof(struct ipv6hdr)); 846 847 ptr += len; 848 offset += len; 849 850 /* 851 * Put this fragment into the sending queue. 852 */ 853 err = output(frag); 854 if (err) 855 goto fail; 856 857 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 858 IPSTATS_MIB_FRAGCREATES); 859 } 860 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 861 IPSTATS_MIB_FRAGOKS); 862 kfree_skb(skb); 863 return err; 864 865 fail: 866 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 867 IPSTATS_MIB_FRAGFAILS); 868 kfree_skb(skb); 869 return err; 870 } 871 872 static inline int ip6_rt_check(struct rt6key *rt_key, 873 struct in6_addr *fl_addr, 874 struct in6_addr *addr_cache) 875 { 876 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 877 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)); 878 } 879 880 static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 881 struct dst_entry *dst, 882 struct flowi *fl) 883 { 884 struct ipv6_pinfo *np = inet6_sk(sk); 885 struct rt6_info *rt = (struct rt6_info *)dst; 886 887 if (!dst) 888 goto out; 889 890 /* Yes, checking route validity in not connected 891 * case is not very simple. Take into account, 892 * that we do not support routing by source, TOS, 893 * and MSG_DONTROUTE --ANK (980726) 894 * 895 * 1. ip6_rt_check(): If route was host route, 896 * check that cached destination is current. 897 * If it is network route, we still may 898 * check its validity using saved pointer 899 * to the last used address: daddr_cache. 900 * We do not want to save whole address now, 901 * (because main consumer of this service 902 * is tcp, which has not this problem), 903 * so that the last trick works only on connected 904 * sockets. 905 * 2. oif also should be the same. 906 */ 907 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) || 908 #ifdef CONFIG_IPV6_SUBTREES 909 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || 910 #endif 911 (fl->oif && fl->oif != dst->dev->ifindex)) { 912 dst_release(dst); 913 dst = NULL; 914 } 915 916 out: 917 return dst; 918 } 919 920 static int ip6_dst_lookup_tail(struct sock *sk, 921 struct dst_entry **dst, struct flowi *fl) 922 { 923 int err; 924 struct net *net = sock_net(sk); 925 926 if (*dst == NULL) 927 *dst = ip6_route_output(net, sk, fl); 928 929 if ((err = (*dst)->error)) 930 goto out_err_release; 931 932 if (ipv6_addr_any(&fl->fl6_src)) { 933 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev, 934 &fl->fl6_dst, 935 sk ? inet6_sk(sk)->srcprefs : 0, 936 &fl->fl6_src); 937 if (err) 938 goto out_err_release; 939 } 940 941 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 942 /* 943 * Here if the dst entry we've looked up 944 * has a neighbour entry that is in the INCOMPLETE 945 * state and the src address from the flow is 946 * marked as OPTIMISTIC, we release the found 947 * dst entry and replace it instead with the 948 * dst entry of the nexthop router 949 */ 950 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) { 951 struct inet6_ifaddr *ifp; 952 struct flowi fl_gw; 953 int redirect; 954 955 ifp = ipv6_get_ifaddr(net, &fl->fl6_src, 956 (*dst)->dev, 1); 957 958 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 959 if (ifp) 960 in6_ifa_put(ifp); 961 962 if (redirect) { 963 /* 964 * We need to get the dst entry for the 965 * default router instead 966 */ 967 dst_release(*dst); 968 memcpy(&fl_gw, fl, sizeof(struct flowi)); 969 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr)); 970 *dst = ip6_route_output(net, sk, &fl_gw); 971 if ((err = (*dst)->error)) 972 goto out_err_release; 973 } 974 } 975 #endif 976 977 return 0; 978 979 out_err_release: 980 if (err == -ENETUNREACH) 981 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); 982 dst_release(*dst); 983 *dst = NULL; 984 return err; 985 } 986 987 /** 988 * ip6_dst_lookup - perform route lookup on flow 989 * @sk: socket which provides route info 990 * @dst: pointer to dst_entry * for result 991 * @fl: flow to lookup 992 * 993 * This function performs a route lookup on the given flow. 994 * 995 * It returns zero on success, or a standard errno code on error. 996 */ 997 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) 998 { 999 *dst = NULL; 1000 return ip6_dst_lookup_tail(sk, dst, fl); 1001 } 1002 EXPORT_SYMBOL_GPL(ip6_dst_lookup); 1003 1004 /** 1005 * ip6_sk_dst_lookup - perform socket cached route lookup on flow 1006 * @sk: socket which provides the dst cache and route info 1007 * @dst: pointer to dst_entry * for result 1008 * @fl: flow to lookup 1009 * 1010 * This function performs a route lookup on the given flow with the 1011 * possibility of using the cached route in the socket if it is valid. 1012 * It will take the socket dst lock when operating on the dst cache. 1013 * As a result, this function can only be used in process context. 1014 * 1015 * It returns zero on success, or a standard errno code on error. 1016 */ 1017 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) 1018 { 1019 *dst = NULL; 1020 if (sk) { 1021 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1022 *dst = ip6_sk_dst_check(sk, *dst, fl); 1023 } 1024 1025 return ip6_dst_lookup_tail(sk, dst, fl); 1026 } 1027 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup); 1028 1029 static inline int ip6_ufo_append_data(struct sock *sk, 1030 int getfrag(void *from, char *to, int offset, int len, 1031 int odd, struct sk_buff *skb), 1032 void *from, int length, int hh_len, int fragheaderlen, 1033 int transhdrlen, int mtu,unsigned int flags) 1034 1035 { 1036 struct sk_buff *skb; 1037 int err; 1038 1039 /* There is support for UDP large send offload by network 1040 * device, so create one single skb packet containing complete 1041 * udp datagram 1042 */ 1043 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { 1044 skb = sock_alloc_send_skb(sk, 1045 hh_len + fragheaderlen + transhdrlen + 20, 1046 (flags & MSG_DONTWAIT), &err); 1047 if (skb == NULL) 1048 return -ENOMEM; 1049 1050 /* reserve space for Hardware header */ 1051 skb_reserve(skb, hh_len); 1052 1053 /* create space for UDP/IP header */ 1054 skb_put(skb,fragheaderlen + transhdrlen); 1055 1056 /* initialize network header pointer */ 1057 skb_reset_network_header(skb); 1058 1059 /* initialize protocol header pointer */ 1060 skb->transport_header = skb->network_header + fragheaderlen; 1061 1062 skb->ip_summed = CHECKSUM_PARTIAL; 1063 skb->csum = 0; 1064 sk->sk_sndmsg_off = 0; 1065 } 1066 1067 err = skb_append_datato_frags(sk,skb, getfrag, from, 1068 (length - transhdrlen)); 1069 if (!err) { 1070 struct frag_hdr fhdr; 1071 1072 /* Specify the length of each IPv6 datagram fragment. 1073 * It has to be a multiple of 8. 1074 */ 1075 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - 1076 sizeof(struct frag_hdr)) & ~7; 1077 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 1078 ipv6_select_ident(&fhdr); 1079 skb_shinfo(skb)->ip6_frag_id = fhdr.identification; 1080 __skb_queue_tail(&sk->sk_write_queue, skb); 1081 1082 return 0; 1083 } 1084 /* There is not enough support do UPD LSO, 1085 * so follow normal path 1086 */ 1087 kfree_skb(skb); 1088 1089 return err; 1090 } 1091 1092 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1093 gfp_t gfp) 1094 { 1095 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1096 } 1097 1098 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 1099 gfp_t gfp) 1100 { 1101 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1102 } 1103 1104 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, 1105 int offset, int len, int odd, struct sk_buff *skb), 1106 void *from, int length, int transhdrlen, 1107 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, 1108 struct rt6_info *rt, unsigned int flags, int dontfrag) 1109 { 1110 struct inet_sock *inet = inet_sk(sk); 1111 struct ipv6_pinfo *np = inet6_sk(sk); 1112 struct sk_buff *skb; 1113 unsigned int maxfraglen, fragheaderlen; 1114 int exthdrlen; 1115 int hh_len; 1116 int mtu; 1117 int copy; 1118 int err; 1119 int offset = 0; 1120 int csummode = CHECKSUM_NONE; 1121 1122 if (flags&MSG_PROBE) 1123 return 0; 1124 if (skb_queue_empty(&sk->sk_write_queue)) { 1125 /* 1126 * setup for corking 1127 */ 1128 if (opt) { 1129 if (WARN_ON(np->cork.opt)) 1130 return -EINVAL; 1131 1132 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation); 1133 if (unlikely(np->cork.opt == NULL)) 1134 return -ENOBUFS; 1135 1136 np->cork.opt->tot_len = opt->tot_len; 1137 np->cork.opt->opt_flen = opt->opt_flen; 1138 np->cork.opt->opt_nflen = opt->opt_nflen; 1139 1140 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt, 1141 sk->sk_allocation); 1142 if (opt->dst0opt && !np->cork.opt->dst0opt) 1143 return -ENOBUFS; 1144 1145 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt, 1146 sk->sk_allocation); 1147 if (opt->dst1opt && !np->cork.opt->dst1opt) 1148 return -ENOBUFS; 1149 1150 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt, 1151 sk->sk_allocation); 1152 if (opt->hopopt && !np->cork.opt->hopopt) 1153 return -ENOBUFS; 1154 1155 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt, 1156 sk->sk_allocation); 1157 if (opt->srcrt && !np->cork.opt->srcrt) 1158 return -ENOBUFS; 1159 1160 /* need source address above miyazawa*/ 1161 } 1162 dst_hold(&rt->dst); 1163 inet->cork.dst = &rt->dst; 1164 inet->cork.fl = *fl; 1165 np->cork.hop_limit = hlimit; 1166 np->cork.tclass = tclass; 1167 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? 1168 rt->dst.dev->mtu : dst_mtu(rt->dst.path); 1169 if (np->frag_size < mtu) { 1170 if (np->frag_size) 1171 mtu = np->frag_size; 1172 } 1173 inet->cork.fragsize = mtu; 1174 if (dst_allfrag(rt->dst.path)) 1175 inet->cork.flags |= IPCORK_ALLFRAG; 1176 inet->cork.length = 0; 1177 sk->sk_sndmsg_page = NULL; 1178 sk->sk_sndmsg_off = 0; 1179 exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) - 1180 rt->rt6i_nfheader_len; 1181 length += exthdrlen; 1182 transhdrlen += exthdrlen; 1183 } else { 1184 rt = (struct rt6_info *)inet->cork.dst; 1185 fl = &inet->cork.fl; 1186 opt = np->cork.opt; 1187 transhdrlen = 0; 1188 exthdrlen = 0; 1189 mtu = inet->cork.fragsize; 1190 } 1191 1192 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1193 1194 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1195 (opt ? opt->opt_nflen : 0); 1196 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); 1197 1198 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { 1199 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { 1200 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen); 1201 return -EMSGSIZE; 1202 } 1203 } 1204 1205 /* 1206 * Let's try using as much space as possible. 1207 * Use MTU if total length of the message fits into the MTU. 1208 * Otherwise, we need to reserve fragment header and 1209 * fragment alignment (= 8-15 octects, in total). 1210 * 1211 * Note that we may need to "move" the data from the tail of 1212 * of the buffer to the new fragment when we split 1213 * the message. 1214 * 1215 * FIXME: It may be fragmented into multiple chunks 1216 * at once if non-fragmentable extension headers 1217 * are too large. 1218 * --yoshfuji 1219 */ 1220 1221 inet->cork.length += length; 1222 if (length > mtu) { 1223 int proto = sk->sk_protocol; 1224 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ 1225 ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen); 1226 return -EMSGSIZE; 1227 } 1228 1229 if (proto == IPPROTO_UDP && 1230 (rt->dst.dev->features & NETIF_F_UFO)) { 1231 1232 err = ip6_ufo_append_data(sk, getfrag, from, length, 1233 hh_len, fragheaderlen, 1234 transhdrlen, mtu, flags); 1235 if (err) 1236 goto error; 1237 return 0; 1238 } 1239 } 1240 1241 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 1242 goto alloc_new_skb; 1243 1244 while (length > 0) { 1245 /* Check if the remaining data fits into current packet. */ 1246 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1247 if (copy < length) 1248 copy = maxfraglen - skb->len; 1249 1250 if (copy <= 0) { 1251 char *data; 1252 unsigned int datalen; 1253 unsigned int fraglen; 1254 unsigned int fraggap; 1255 unsigned int alloclen; 1256 struct sk_buff *skb_prev; 1257 alloc_new_skb: 1258 skb_prev = skb; 1259 1260 /* There's no room in the current skb */ 1261 if (skb_prev) 1262 fraggap = skb_prev->len - maxfraglen; 1263 else 1264 fraggap = 0; 1265 1266 /* 1267 * If remaining data exceeds the mtu, 1268 * we know we need more fragment(s). 1269 */ 1270 datalen = length + fraggap; 1271 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1272 datalen = maxfraglen - fragheaderlen; 1273 1274 fraglen = datalen + fragheaderlen; 1275 if ((flags & MSG_MORE) && 1276 !(rt->dst.dev->features&NETIF_F_SG)) 1277 alloclen = mtu; 1278 else 1279 alloclen = datalen + fragheaderlen; 1280 1281 /* 1282 * The last fragment gets additional space at tail. 1283 * Note: we overallocate on fragments with MSG_MODE 1284 * because we have no idea if we're the last one. 1285 */ 1286 if (datalen == length + fraggap) 1287 alloclen += rt->dst.trailer_len; 1288 1289 /* 1290 * We just reserve space for fragment header. 1291 * Note: this may be overallocation if the message 1292 * (without MSG_MORE) fits into the MTU. 1293 */ 1294 alloclen += sizeof(struct frag_hdr); 1295 1296 if (transhdrlen) { 1297 skb = sock_alloc_send_skb(sk, 1298 alloclen + hh_len, 1299 (flags & MSG_DONTWAIT), &err); 1300 } else { 1301 skb = NULL; 1302 if (atomic_read(&sk->sk_wmem_alloc) <= 1303 2 * sk->sk_sndbuf) 1304 skb = sock_wmalloc(sk, 1305 alloclen + hh_len, 1, 1306 sk->sk_allocation); 1307 if (unlikely(skb == NULL)) 1308 err = -ENOBUFS; 1309 } 1310 if (skb == NULL) 1311 goto error; 1312 /* 1313 * Fill in the control structures 1314 */ 1315 skb->ip_summed = csummode; 1316 skb->csum = 0; 1317 /* reserve for fragmentation */ 1318 skb_reserve(skb, hh_len+sizeof(struct frag_hdr)); 1319 1320 /* 1321 * Find where to start putting bytes 1322 */ 1323 data = skb_put(skb, fraglen); 1324 skb_set_network_header(skb, exthdrlen); 1325 data += fragheaderlen; 1326 skb->transport_header = (skb->network_header + 1327 fragheaderlen); 1328 if (fraggap) { 1329 skb->csum = skb_copy_and_csum_bits( 1330 skb_prev, maxfraglen, 1331 data + transhdrlen, fraggap, 0); 1332 skb_prev->csum = csum_sub(skb_prev->csum, 1333 skb->csum); 1334 data += fraggap; 1335 pskb_trim_unique(skb_prev, maxfraglen); 1336 } 1337 copy = datalen - transhdrlen - fraggap; 1338 if (copy < 0) { 1339 err = -EINVAL; 1340 kfree_skb(skb); 1341 goto error; 1342 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { 1343 err = -EFAULT; 1344 kfree_skb(skb); 1345 goto error; 1346 } 1347 1348 offset += copy; 1349 length -= datalen - fraggap; 1350 transhdrlen = 0; 1351 exthdrlen = 0; 1352 csummode = CHECKSUM_NONE; 1353 1354 /* 1355 * Put the packet on the pending queue 1356 */ 1357 __skb_queue_tail(&sk->sk_write_queue, skb); 1358 continue; 1359 } 1360 1361 if (copy > length) 1362 copy = length; 1363 1364 if (!(rt->dst.dev->features&NETIF_F_SG)) { 1365 unsigned int off; 1366 1367 off = skb->len; 1368 if (getfrag(from, skb_put(skb, copy), 1369 offset, copy, off, skb) < 0) { 1370 __skb_trim(skb, off); 1371 err = -EFAULT; 1372 goto error; 1373 } 1374 } else { 1375 int i = skb_shinfo(skb)->nr_frags; 1376 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; 1377 struct page *page = sk->sk_sndmsg_page; 1378 int off = sk->sk_sndmsg_off; 1379 unsigned int left; 1380 1381 if (page && (left = PAGE_SIZE - off) > 0) { 1382 if (copy >= left) 1383 copy = left; 1384 if (page != frag->page) { 1385 if (i == MAX_SKB_FRAGS) { 1386 err = -EMSGSIZE; 1387 goto error; 1388 } 1389 get_page(page); 1390 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); 1391 frag = &skb_shinfo(skb)->frags[i]; 1392 } 1393 } else if(i < MAX_SKB_FRAGS) { 1394 if (copy > PAGE_SIZE) 1395 copy = PAGE_SIZE; 1396 page = alloc_pages(sk->sk_allocation, 0); 1397 if (page == NULL) { 1398 err = -ENOMEM; 1399 goto error; 1400 } 1401 sk->sk_sndmsg_page = page; 1402 sk->sk_sndmsg_off = 0; 1403 1404 skb_fill_page_desc(skb, i, page, 0, 0); 1405 frag = &skb_shinfo(skb)->frags[i]; 1406 } else { 1407 err = -EMSGSIZE; 1408 goto error; 1409 } 1410 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { 1411 err = -EFAULT; 1412 goto error; 1413 } 1414 sk->sk_sndmsg_off += copy; 1415 frag->size += copy; 1416 skb->len += copy; 1417 skb->data_len += copy; 1418 skb->truesize += copy; 1419 atomic_add(copy, &sk->sk_wmem_alloc); 1420 } 1421 offset += copy; 1422 length -= copy; 1423 } 1424 return 0; 1425 error: 1426 inet->cork.length -= length; 1427 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1428 return err; 1429 } 1430 1431 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) 1432 { 1433 if (np->cork.opt) { 1434 kfree(np->cork.opt->dst0opt); 1435 kfree(np->cork.opt->dst1opt); 1436 kfree(np->cork.opt->hopopt); 1437 kfree(np->cork.opt->srcrt); 1438 kfree(np->cork.opt); 1439 np->cork.opt = NULL; 1440 } 1441 1442 if (inet->cork.dst) { 1443 dst_release(inet->cork.dst); 1444 inet->cork.dst = NULL; 1445 inet->cork.flags &= ~IPCORK_ALLFRAG; 1446 } 1447 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); 1448 } 1449 1450 int ip6_push_pending_frames(struct sock *sk) 1451 { 1452 struct sk_buff *skb, *tmp_skb; 1453 struct sk_buff **tail_skb; 1454 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 1455 struct inet_sock *inet = inet_sk(sk); 1456 struct ipv6_pinfo *np = inet6_sk(sk); 1457 struct net *net = sock_net(sk); 1458 struct ipv6hdr *hdr; 1459 struct ipv6_txoptions *opt = np->cork.opt; 1460 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; 1461 struct flowi *fl = &inet->cork.fl; 1462 unsigned char proto = fl->proto; 1463 int err = 0; 1464 1465 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) 1466 goto out; 1467 tail_skb = &(skb_shinfo(skb)->frag_list); 1468 1469 /* move skb->data to ip header from ext header */ 1470 if (skb->data < skb_network_header(skb)) 1471 __skb_pull(skb, skb_network_offset(skb)); 1472 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { 1473 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1474 *tail_skb = tmp_skb; 1475 tail_skb = &(tmp_skb->next); 1476 skb->len += tmp_skb->len; 1477 skb->data_len += tmp_skb->len; 1478 skb->truesize += tmp_skb->truesize; 1479 tmp_skb->destructor = NULL; 1480 tmp_skb->sk = NULL; 1481 } 1482 1483 /* Allow local fragmentation. */ 1484 if (np->pmtudisc < IPV6_PMTUDISC_DO) 1485 skb->local_df = 1; 1486 1487 ipv6_addr_copy(final_dst, &fl->fl6_dst); 1488 __skb_pull(skb, skb_network_header_len(skb)); 1489 if (opt && opt->opt_flen) 1490 ipv6_push_frag_opts(skb, opt, &proto); 1491 if (opt && opt->opt_nflen) 1492 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); 1493 1494 skb_push(skb, sizeof(struct ipv6hdr)); 1495 skb_reset_network_header(skb); 1496 hdr = ipv6_hdr(skb); 1497 1498 *(__be32*)hdr = fl->fl6_flowlabel | 1499 htonl(0x60000000 | ((int)np->cork.tclass << 20)); 1500 1501 hdr->hop_limit = np->cork.hop_limit; 1502 hdr->nexthdr = proto; 1503 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 1504 ipv6_addr_copy(&hdr->daddr, final_dst); 1505 1506 skb->priority = sk->sk_priority; 1507 skb->mark = sk->sk_mark; 1508 1509 skb_dst_set(skb, dst_clone(&rt->dst)); 1510 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 1511 if (proto == IPPROTO_ICMPV6) { 1512 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1513 1514 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); 1515 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); 1516 } 1517 1518 err = ip6_local_out(skb); 1519 if (err) { 1520 if (err > 0) 1521 err = net_xmit_errno(err); 1522 if (err) 1523 goto error; 1524 } 1525 1526 out: 1527 ip6_cork_release(inet, np); 1528 return err; 1529 error: 1530 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1531 goto out; 1532 } 1533 1534 void ip6_flush_pending_frames(struct sock *sk) 1535 { 1536 struct sk_buff *skb; 1537 1538 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { 1539 if (skb_dst(skb)) 1540 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 1541 IPSTATS_MIB_OUTDISCARDS); 1542 kfree_skb(skb); 1543 } 1544 1545 ip6_cork_release(inet_sk(sk), inet6_sk(sk)); 1546 } 1547