1 /* 2 * IPv6 output functions 3 * Linux INET6 implementation 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * Based on linux/net/ipv4/ip_output.c 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 * 15 * Changes: 16 * A.N.Kuznetsov : airthmetics in fragmentation. 17 * extension headers are implemented. 18 * route changes now work. 19 * ip6_forward does not confuse sniffers. 20 * etc. 21 * 22 * H. von Brand : Added missing #include <linux/string.h> 23 * Imran Patel : frag id should be in NBO 24 * Kazunori MIYAZAWA @USAGI 25 * : add ip6_append_data and related functions 26 * for datagram xmit 27 */ 28 29 #include <linux/errno.h> 30 #include <linux/kernel.h> 31 #include <linux/string.h> 32 #include <linux/socket.h> 33 #include <linux/net.h> 34 #include <linux/netdevice.h> 35 #include <linux/if_arp.h> 36 #include <linux/in6.h> 37 #include <linux/tcp.h> 38 #include <linux/route.h> 39 #include <linux/module.h> 40 #include <linux/slab.h> 41 42 #include <linux/netfilter.h> 43 #include <linux/netfilter_ipv6.h> 44 45 #include <net/sock.h> 46 #include <net/snmp.h> 47 48 #include <net/ipv6.h> 49 #include <net/ndisc.h> 50 #include <net/protocol.h> 51 #include <net/ip6_route.h> 52 #include <net/addrconf.h> 53 #include <net/rawv6.h> 54 #include <net/icmp.h> 55 #include <net/xfrm.h> 56 #include <net/checksum.h> 57 #include <linux/mroute6.h> 58 59 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); 60 61 int __ip6_local_out(struct sk_buff *skb) 62 { 63 int len; 64 65 len = skb->len - sizeof(struct ipv6hdr); 66 if (len > IPV6_MAXPLEN) 67 len = 0; 68 ipv6_hdr(skb)->payload_len = htons(len); 69 70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, 71 skb_dst(skb)->dev, dst_output); 72 } 73 74 int ip6_local_out(struct sk_buff *skb) 75 { 76 int err; 77 78 err = __ip6_local_out(skb); 79 if (likely(err == 1)) 80 err = dst_output(skb); 81 82 return err; 83 } 84 EXPORT_SYMBOL_GPL(ip6_local_out); 85 86 /* dev_loopback_xmit for use with netfilter. */ 87 static int ip6_dev_loopback_xmit(struct sk_buff *newskb) 88 { 89 skb_reset_mac_header(newskb); 90 __skb_pull(newskb, skb_network_offset(newskb)); 91 newskb->pkt_type = PACKET_LOOPBACK; 92 newskb->ip_summed = CHECKSUM_UNNECESSARY; 93 WARN_ON(!skb_dst(newskb)); 94 95 netif_rx_ni(newskb); 96 return 0; 97 } 98 99 static int ip6_finish_output2(struct sk_buff *skb) 100 { 101 struct dst_entry *dst = skb_dst(skb); 102 struct net_device *dev = dst->dev; 103 struct neighbour *neigh; 104 105 skb->protocol = htons(ETH_P_IPV6); 106 skb->dev = dev; 107 108 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 109 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 110 111 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) && 112 ((mroute6_socket(dev_net(dev), skb) && 113 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 114 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 115 &ipv6_hdr(skb)->saddr))) { 116 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 117 118 /* Do not check for IFF_ALLMULTI; multicast routing 119 is not supported in any case. 120 */ 121 if (newskb) 122 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 123 newskb, NULL, newskb->dev, 124 ip6_dev_loopback_xmit); 125 126 if (ipv6_hdr(skb)->hop_limit == 0) { 127 IP6_INC_STATS(dev_net(dev), idev, 128 IPSTATS_MIB_OUTDISCARDS); 129 kfree_skb(skb); 130 return 0; 131 } 132 } 133 134 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, 135 skb->len); 136 } 137 138 neigh = dst->neighbour; 139 if (neigh) { 140 struct hh_cache *hh = &neigh->hh; 141 if (hh->hh_len) 142 return neigh_hh_output(hh, skb); 143 else 144 return dst->neighbour->output(skb); 145 } 146 IP6_INC_STATS_BH(dev_net(dst->dev), 147 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 148 kfree_skb(skb); 149 return -EINVAL; 150 } 151 152 static int ip6_finish_output(struct sk_buff *skb) 153 { 154 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || 155 dst_allfrag(skb_dst(skb))) 156 return ip6_fragment(skb, ip6_finish_output2); 157 else 158 return ip6_finish_output2(skb); 159 } 160 161 int ip6_output(struct sk_buff *skb) 162 { 163 struct net_device *dev = skb_dst(skb)->dev; 164 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 165 if (unlikely(idev->cnf.disable_ipv6)) { 166 IP6_INC_STATS(dev_net(dev), idev, 167 IPSTATS_MIB_OUTDISCARDS); 168 kfree_skb(skb); 169 return 0; 170 } 171 172 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev, 173 ip6_finish_output, 174 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 175 } 176 177 /* 178 * xmit an sk_buff (used by TCP, SCTP and DCCP) 179 */ 180 181 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, 182 struct ipv6_txoptions *opt) 183 { 184 struct net *net = sock_net(sk); 185 struct ipv6_pinfo *np = inet6_sk(sk); 186 struct in6_addr *first_hop = &fl6->daddr; 187 struct dst_entry *dst = skb_dst(skb); 188 struct ipv6hdr *hdr; 189 u8 proto = fl6->flowi6_proto; 190 int seg_len = skb->len; 191 int hlimit = -1; 192 int tclass = 0; 193 u32 mtu; 194 195 if (opt) { 196 unsigned int head_room; 197 198 /* First: exthdrs may take lots of space (~8K for now) 199 MAX_HEADER is not enough. 200 */ 201 head_room = opt->opt_nflen + opt->opt_flen; 202 seg_len += head_room; 203 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); 204 205 if (skb_headroom(skb) < head_room) { 206 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 207 if (skb2 == NULL) { 208 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 209 IPSTATS_MIB_OUTDISCARDS); 210 kfree_skb(skb); 211 return -ENOBUFS; 212 } 213 kfree_skb(skb); 214 skb = skb2; 215 skb_set_owner_w(skb, sk); 216 } 217 if (opt->opt_flen) 218 ipv6_push_frag_opts(skb, opt, &proto); 219 if (opt->opt_nflen) 220 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); 221 } 222 223 skb_push(skb, sizeof(struct ipv6hdr)); 224 skb_reset_network_header(skb); 225 hdr = ipv6_hdr(skb); 226 227 /* 228 * Fill in the IPv6 header 229 */ 230 if (np) { 231 tclass = np->tclass; 232 hlimit = np->hop_limit; 233 } 234 if (hlimit < 0) 235 hlimit = ip6_dst_hoplimit(dst); 236 237 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel; 238 239 hdr->payload_len = htons(seg_len); 240 hdr->nexthdr = proto; 241 hdr->hop_limit = hlimit; 242 243 ipv6_addr_copy(&hdr->saddr, &fl6->saddr); 244 ipv6_addr_copy(&hdr->daddr, first_hop); 245 246 skb->priority = sk->sk_priority; 247 skb->mark = sk->sk_mark; 248 249 mtu = dst_mtu(dst); 250 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { 251 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), 252 IPSTATS_MIB_OUT, skb->len); 253 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, 254 dst->dev, dst_output); 255 } 256 257 if (net_ratelimit()) 258 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); 259 skb->dev = dst->dev; 260 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 261 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); 262 kfree_skb(skb); 263 return -EMSGSIZE; 264 } 265 266 EXPORT_SYMBOL(ip6_xmit); 267 268 /* 269 * To avoid extra problems ND packets are send through this 270 * routine. It's code duplication but I really want to avoid 271 * extra checks since ipv6_build_header is used by TCP (which 272 * is for us performance critical) 273 */ 274 275 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, 276 const struct in6_addr *saddr, const struct in6_addr *daddr, 277 int proto, int len) 278 { 279 struct ipv6_pinfo *np = inet6_sk(sk); 280 struct ipv6hdr *hdr; 281 282 skb->protocol = htons(ETH_P_IPV6); 283 skb->dev = dev; 284 285 skb_reset_network_header(skb); 286 skb_put(skb, sizeof(struct ipv6hdr)); 287 hdr = ipv6_hdr(skb); 288 289 *(__be32*)hdr = htonl(0x60000000); 290 291 hdr->payload_len = htons(len); 292 hdr->nexthdr = proto; 293 hdr->hop_limit = np->hop_limit; 294 295 ipv6_addr_copy(&hdr->saddr, saddr); 296 ipv6_addr_copy(&hdr->daddr, daddr); 297 298 return 0; 299 } 300 301 static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 302 { 303 struct ip6_ra_chain *ra; 304 struct sock *last = NULL; 305 306 read_lock(&ip6_ra_lock); 307 for (ra = ip6_ra_chain; ra; ra = ra->next) { 308 struct sock *sk = ra->sk; 309 if (sk && ra->sel == sel && 310 (!sk->sk_bound_dev_if || 311 sk->sk_bound_dev_if == skb->dev->ifindex)) { 312 if (last) { 313 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 314 if (skb2) 315 rawv6_rcv(last, skb2); 316 } 317 last = sk; 318 } 319 } 320 321 if (last) { 322 rawv6_rcv(last, skb); 323 read_unlock(&ip6_ra_lock); 324 return 1; 325 } 326 read_unlock(&ip6_ra_lock); 327 return 0; 328 } 329 330 static int ip6_forward_proxy_check(struct sk_buff *skb) 331 { 332 struct ipv6hdr *hdr = ipv6_hdr(skb); 333 u8 nexthdr = hdr->nexthdr; 334 int offset; 335 336 if (ipv6_ext_hdr(nexthdr)) { 337 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr); 338 if (offset < 0) 339 return 0; 340 } else 341 offset = sizeof(struct ipv6hdr); 342 343 if (nexthdr == IPPROTO_ICMPV6) { 344 struct icmp6hdr *icmp6; 345 346 if (!pskb_may_pull(skb, (skb_network_header(skb) + 347 offset + 1 - skb->data))) 348 return 0; 349 350 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 351 352 switch (icmp6->icmp6_type) { 353 case NDISC_ROUTER_SOLICITATION: 354 case NDISC_ROUTER_ADVERTISEMENT: 355 case NDISC_NEIGHBOUR_SOLICITATION: 356 case NDISC_NEIGHBOUR_ADVERTISEMENT: 357 case NDISC_REDIRECT: 358 /* For reaction involving unicast neighbor discovery 359 * message destined to the proxied address, pass it to 360 * input function. 361 */ 362 return 1; 363 default: 364 break; 365 } 366 } 367 368 /* 369 * The proxying router can't forward traffic sent to a link-local 370 * address, so signal the sender and discard the packet. This 371 * behavior is clarified by the MIPv6 specification. 372 */ 373 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 374 dst_link_failure(skb); 375 return -1; 376 } 377 378 return 0; 379 } 380 381 static inline int ip6_forward_finish(struct sk_buff *skb) 382 { 383 return dst_output(skb); 384 } 385 386 int ip6_forward(struct sk_buff *skb) 387 { 388 struct dst_entry *dst = skb_dst(skb); 389 struct ipv6hdr *hdr = ipv6_hdr(skb); 390 struct inet6_skb_parm *opt = IP6CB(skb); 391 struct net *net = dev_net(dst->dev); 392 u32 mtu; 393 394 if (net->ipv6.devconf_all->forwarding == 0) 395 goto error; 396 397 if (skb_warn_if_lro(skb)) 398 goto drop; 399 400 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 401 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 402 goto drop; 403 } 404 405 if (skb->pkt_type != PACKET_HOST) 406 goto drop; 407 408 skb_forward_csum(skb); 409 410 /* 411 * We DO NOT make any processing on 412 * RA packets, pushing them to user level AS IS 413 * without ane WARRANTY that application will be able 414 * to interpret them. The reason is that we 415 * cannot make anything clever here. 416 * 417 * We are not end-node, so that if packet contains 418 * AH/ESP, we cannot make anything. 419 * Defragmentation also would be mistake, RA packets 420 * cannot be fragmented, because there is no warranty 421 * that different fragments will go along one path. --ANK 422 */ 423 if (opt->ra) { 424 u8 *ptr = skb_network_header(skb) + opt->ra; 425 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3])) 426 return 0; 427 } 428 429 /* 430 * check and decrement ttl 431 */ 432 if (hdr->hop_limit <= 1) { 433 /* Force OUTPUT device used as source address */ 434 skb->dev = dst->dev; 435 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 436 IP6_INC_STATS_BH(net, 437 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); 438 439 kfree_skb(skb); 440 return -ETIMEDOUT; 441 } 442 443 /* XXX: idev->cnf.proxy_ndp? */ 444 if (net->ipv6.devconf_all->proxy_ndp && 445 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 446 int proxied = ip6_forward_proxy_check(skb); 447 if (proxied > 0) 448 return ip6_input(skb); 449 else if (proxied < 0) { 450 IP6_INC_STATS(net, ip6_dst_idev(dst), 451 IPSTATS_MIB_INDISCARDS); 452 goto drop; 453 } 454 } 455 456 if (!xfrm6_route_forward(skb)) { 457 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 458 goto drop; 459 } 460 dst = skb_dst(skb); 461 462 /* IPv6 specs say nothing about it, but it is clear that we cannot 463 send redirects to source routed frames. 464 We don't send redirects to frames decapsulated from IPsec. 465 */ 466 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && 467 !skb_sec_path(skb)) { 468 struct in6_addr *target = NULL; 469 struct rt6_info *rt; 470 struct neighbour *n = dst->neighbour; 471 472 /* 473 * incoming and outgoing devices are the same 474 * send a redirect. 475 */ 476 477 rt = (struct rt6_info *) dst; 478 if ((rt->rt6i_flags & RTF_GATEWAY)) 479 target = (struct in6_addr*)&n->primary_key; 480 else 481 target = &hdr->daddr; 482 483 if (!rt->rt6i_peer) 484 rt6_bind_peer(rt, 1); 485 486 /* Limit redirects both by destination (here) 487 and by source (inside ndisc_send_redirect) 488 */ 489 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) 490 ndisc_send_redirect(skb, n, target); 491 } else { 492 int addrtype = ipv6_addr_type(&hdr->saddr); 493 494 /* This check is security critical. */ 495 if (addrtype == IPV6_ADDR_ANY || 496 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 497 goto error; 498 if (addrtype & IPV6_ADDR_LINKLOCAL) { 499 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 500 ICMPV6_NOT_NEIGHBOUR, 0); 501 goto error; 502 } 503 } 504 505 mtu = dst_mtu(dst); 506 if (mtu < IPV6_MIN_MTU) 507 mtu = IPV6_MIN_MTU; 508 509 if (skb->len > mtu && !skb_is_gso(skb)) { 510 /* Again, force OUTPUT device used as source address */ 511 skb->dev = dst->dev; 512 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 513 IP6_INC_STATS_BH(net, 514 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); 515 IP6_INC_STATS_BH(net, 516 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); 517 kfree_skb(skb); 518 return -EMSGSIZE; 519 } 520 521 if (skb_cow(skb, dst->dev->hard_header_len)) { 522 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); 523 goto drop; 524 } 525 526 hdr = ipv6_hdr(skb); 527 528 /* Mangling hops number delayed to point after skb COW */ 529 530 hdr->hop_limit--; 531 532 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 533 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, 534 ip6_forward_finish); 535 536 error: 537 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); 538 drop: 539 kfree_skb(skb); 540 return -EINVAL; 541 } 542 543 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 544 { 545 to->pkt_type = from->pkt_type; 546 to->priority = from->priority; 547 to->protocol = from->protocol; 548 skb_dst_drop(to); 549 skb_dst_set(to, dst_clone(skb_dst(from))); 550 to->dev = from->dev; 551 to->mark = from->mark; 552 553 #ifdef CONFIG_NET_SCHED 554 to->tc_index = from->tc_index; 555 #endif 556 nf_copy(to, from); 557 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 558 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 559 to->nf_trace = from->nf_trace; 560 #endif 561 skb_copy_secmark(to, from); 562 } 563 564 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) 565 { 566 u16 offset = sizeof(struct ipv6hdr); 567 struct ipv6_opt_hdr *exthdr = 568 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); 569 unsigned int packet_len = skb->tail - skb->network_header; 570 int found_rhdr = 0; 571 *nexthdr = &ipv6_hdr(skb)->nexthdr; 572 573 while (offset + 1 <= packet_len) { 574 575 switch (**nexthdr) { 576 577 case NEXTHDR_HOP: 578 break; 579 case NEXTHDR_ROUTING: 580 found_rhdr = 1; 581 break; 582 case NEXTHDR_DEST: 583 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 584 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) 585 break; 586 #endif 587 if (found_rhdr) 588 return offset; 589 break; 590 default : 591 return offset; 592 } 593 594 offset += ipv6_optlen(exthdr); 595 *nexthdr = &exthdr->nexthdr; 596 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + 597 offset); 598 } 599 600 return offset; 601 } 602 603 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 604 { 605 struct sk_buff *frag; 606 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); 607 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; 608 struct ipv6hdr *tmp_hdr; 609 struct frag_hdr *fh; 610 unsigned int mtu, hlen, left, len; 611 __be32 frag_id = 0; 612 int ptr, offset = 0, err=0; 613 u8 *prevhdr, nexthdr = 0; 614 struct net *net = dev_net(skb_dst(skb)->dev); 615 616 hlen = ip6_find_1stfragopt(skb, &prevhdr); 617 nexthdr = *prevhdr; 618 619 mtu = ip6_skb_dst_mtu(skb); 620 621 /* We must not fragment if the socket is set to force MTU discovery 622 * or if the skb it not generated by a local socket. 623 */ 624 if (!skb->local_df && skb->len > mtu) { 625 skb->dev = skb_dst(skb)->dev; 626 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 627 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 628 IPSTATS_MIB_FRAGFAILS); 629 kfree_skb(skb); 630 return -EMSGSIZE; 631 } 632 633 if (np && np->frag_size < mtu) { 634 if (np->frag_size) 635 mtu = np->frag_size; 636 } 637 mtu -= hlen + sizeof(struct frag_hdr); 638 639 if (skb_has_frag_list(skb)) { 640 int first_len = skb_pagelen(skb); 641 struct sk_buff *frag2; 642 643 if (first_len - hlen > mtu || 644 ((first_len - hlen) & 7) || 645 skb_cloned(skb)) 646 goto slow_path; 647 648 skb_walk_frags(skb, frag) { 649 /* Correct geometry. */ 650 if (frag->len > mtu || 651 ((frag->len & 7) && frag->next) || 652 skb_headroom(frag) < hlen) 653 goto slow_path_clean; 654 655 /* Partially cloned skb? */ 656 if (skb_shared(frag)) 657 goto slow_path_clean; 658 659 BUG_ON(frag->sk); 660 if (skb->sk) { 661 frag->sk = skb->sk; 662 frag->destructor = sock_wfree; 663 } 664 skb->truesize -= frag->truesize; 665 } 666 667 err = 0; 668 offset = 0; 669 frag = skb_shinfo(skb)->frag_list; 670 skb_frag_list_init(skb); 671 /* BUILD HEADER */ 672 673 *prevhdr = NEXTHDR_FRAGMENT; 674 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 675 if (!tmp_hdr) { 676 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 677 IPSTATS_MIB_FRAGFAILS); 678 return -ENOMEM; 679 } 680 681 __skb_pull(skb, hlen); 682 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); 683 __skb_push(skb, hlen); 684 skb_reset_network_header(skb); 685 memcpy(skb_network_header(skb), tmp_hdr, hlen); 686 687 ipv6_select_ident(fh); 688 fh->nexthdr = nexthdr; 689 fh->reserved = 0; 690 fh->frag_off = htons(IP6_MF); 691 frag_id = fh->identification; 692 693 first_len = skb_pagelen(skb); 694 skb->data_len = first_len - skb_headlen(skb); 695 skb->len = first_len; 696 ipv6_hdr(skb)->payload_len = htons(first_len - 697 sizeof(struct ipv6hdr)); 698 699 dst_hold(&rt->dst); 700 701 for (;;) { 702 /* Prepare header of the next frame, 703 * before previous one went down. */ 704 if (frag) { 705 frag->ip_summed = CHECKSUM_NONE; 706 skb_reset_transport_header(frag); 707 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); 708 __skb_push(frag, hlen); 709 skb_reset_network_header(frag); 710 memcpy(skb_network_header(frag), tmp_hdr, 711 hlen); 712 offset += skb->len - hlen - sizeof(struct frag_hdr); 713 fh->nexthdr = nexthdr; 714 fh->reserved = 0; 715 fh->frag_off = htons(offset); 716 if (frag->next != NULL) 717 fh->frag_off |= htons(IP6_MF); 718 fh->identification = frag_id; 719 ipv6_hdr(frag)->payload_len = 720 htons(frag->len - 721 sizeof(struct ipv6hdr)); 722 ip6_copy_metadata(frag, skb); 723 } 724 725 err = output(skb); 726 if(!err) 727 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 728 IPSTATS_MIB_FRAGCREATES); 729 730 if (err || !frag) 731 break; 732 733 skb = frag; 734 frag = skb->next; 735 skb->next = NULL; 736 } 737 738 kfree(tmp_hdr); 739 740 if (err == 0) { 741 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 742 IPSTATS_MIB_FRAGOKS); 743 dst_release(&rt->dst); 744 return 0; 745 } 746 747 while (frag) { 748 skb = frag->next; 749 kfree_skb(frag); 750 frag = skb; 751 } 752 753 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 754 IPSTATS_MIB_FRAGFAILS); 755 dst_release(&rt->dst); 756 return err; 757 758 slow_path_clean: 759 skb_walk_frags(skb, frag2) { 760 if (frag2 == frag) 761 break; 762 frag2->sk = NULL; 763 frag2->destructor = NULL; 764 skb->truesize += frag2->truesize; 765 } 766 } 767 768 slow_path: 769 left = skb->len - hlen; /* Space per frame */ 770 ptr = hlen; /* Where to start from */ 771 772 /* 773 * Fragment the datagram. 774 */ 775 776 *prevhdr = NEXTHDR_FRAGMENT; 777 778 /* 779 * Keep copying data until we run out. 780 */ 781 while(left > 0) { 782 len = left; 783 /* IF: it doesn't fit, use 'mtu' - the data space left */ 784 if (len > mtu) 785 len = mtu; 786 /* IF: we are not sending up to and including the packet end 787 then align the next start on an eight byte boundary */ 788 if (len < left) { 789 len &= ~7; 790 } 791 /* 792 * Allocate buffer. 793 */ 794 795 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) { 796 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); 797 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 798 IPSTATS_MIB_FRAGFAILS); 799 err = -ENOMEM; 800 goto fail; 801 } 802 803 /* 804 * Set up data on packet 805 */ 806 807 ip6_copy_metadata(frag, skb); 808 skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev)); 809 skb_put(frag, len + hlen + sizeof(struct frag_hdr)); 810 skb_reset_network_header(frag); 811 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); 812 frag->transport_header = (frag->network_header + hlen + 813 sizeof(struct frag_hdr)); 814 815 /* 816 * Charge the memory for the fragment to any owner 817 * it might possess 818 */ 819 if (skb->sk) 820 skb_set_owner_w(frag, skb->sk); 821 822 /* 823 * Copy the packet header into the new buffer. 824 */ 825 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); 826 827 /* 828 * Build fragment header. 829 */ 830 fh->nexthdr = nexthdr; 831 fh->reserved = 0; 832 if (!frag_id) { 833 ipv6_select_ident(fh); 834 frag_id = fh->identification; 835 } else 836 fh->identification = frag_id; 837 838 /* 839 * Copy a block of the IP datagram. 840 */ 841 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len)) 842 BUG(); 843 left -= len; 844 845 fh->frag_off = htons(offset); 846 if (left > 0) 847 fh->frag_off |= htons(IP6_MF); 848 ipv6_hdr(frag)->payload_len = htons(frag->len - 849 sizeof(struct ipv6hdr)); 850 851 ptr += len; 852 offset += len; 853 854 /* 855 * Put this fragment into the sending queue. 856 */ 857 err = output(frag); 858 if (err) 859 goto fail; 860 861 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 862 IPSTATS_MIB_FRAGCREATES); 863 } 864 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 865 IPSTATS_MIB_FRAGOKS); 866 kfree_skb(skb); 867 return err; 868 869 fail: 870 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 871 IPSTATS_MIB_FRAGFAILS); 872 kfree_skb(skb); 873 return err; 874 } 875 876 static inline int ip6_rt_check(const struct rt6key *rt_key, 877 const struct in6_addr *fl_addr, 878 const struct in6_addr *addr_cache) 879 { 880 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 881 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)); 882 } 883 884 static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 885 struct dst_entry *dst, 886 const struct flowi6 *fl6) 887 { 888 struct ipv6_pinfo *np = inet6_sk(sk); 889 struct rt6_info *rt = (struct rt6_info *)dst; 890 891 if (!dst) 892 goto out; 893 894 /* Yes, checking route validity in not connected 895 * case is not very simple. Take into account, 896 * that we do not support routing by source, TOS, 897 * and MSG_DONTROUTE --ANK (980726) 898 * 899 * 1. ip6_rt_check(): If route was host route, 900 * check that cached destination is current. 901 * If it is network route, we still may 902 * check its validity using saved pointer 903 * to the last used address: daddr_cache. 904 * We do not want to save whole address now, 905 * (because main consumer of this service 906 * is tcp, which has not this problem), 907 * so that the last trick works only on connected 908 * sockets. 909 * 2. oif also should be the same. 910 */ 911 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || 912 #ifdef CONFIG_IPV6_SUBTREES 913 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || 914 #endif 915 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { 916 dst_release(dst); 917 dst = NULL; 918 } 919 920 out: 921 return dst; 922 } 923 924 static int ip6_dst_lookup_tail(struct sock *sk, 925 struct dst_entry **dst, struct flowi6 *fl6) 926 { 927 int err; 928 struct net *net = sock_net(sk); 929 930 if (*dst == NULL) 931 *dst = ip6_route_output(net, sk, fl6); 932 933 if ((err = (*dst)->error)) 934 goto out_err_release; 935 936 if (ipv6_addr_any(&fl6->saddr)) { 937 struct rt6_info *rt = (struct rt6_info *) *dst; 938 err = ip6_route_get_saddr(net, rt, &fl6->daddr, 939 sk ? inet6_sk(sk)->srcprefs : 0, 940 &fl6->saddr); 941 if (err) 942 goto out_err_release; 943 } 944 945 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 946 /* 947 * Here if the dst entry we've looked up 948 * has a neighbour entry that is in the INCOMPLETE 949 * state and the src address from the flow is 950 * marked as OPTIMISTIC, we release the found 951 * dst entry and replace it instead with the 952 * dst entry of the nexthop router 953 */ 954 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) { 955 struct inet6_ifaddr *ifp; 956 struct flowi6 fl_gw6; 957 int redirect; 958 959 ifp = ipv6_get_ifaddr(net, &fl6->saddr, 960 (*dst)->dev, 1); 961 962 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 963 if (ifp) 964 in6_ifa_put(ifp); 965 966 if (redirect) { 967 /* 968 * We need to get the dst entry for the 969 * default router instead 970 */ 971 dst_release(*dst); 972 memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); 973 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); 974 *dst = ip6_route_output(net, sk, &fl_gw6); 975 if ((err = (*dst)->error)) 976 goto out_err_release; 977 } 978 } 979 #endif 980 981 return 0; 982 983 out_err_release: 984 if (err == -ENETUNREACH) 985 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); 986 dst_release(*dst); 987 *dst = NULL; 988 return err; 989 } 990 991 /** 992 * ip6_dst_lookup - perform route lookup on flow 993 * @sk: socket which provides route info 994 * @dst: pointer to dst_entry * for result 995 * @fl6: flow to lookup 996 * 997 * This function performs a route lookup on the given flow. 998 * 999 * It returns zero on success, or a standard errno code on error. 1000 */ 1001 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) 1002 { 1003 *dst = NULL; 1004 return ip6_dst_lookup_tail(sk, dst, fl6); 1005 } 1006 EXPORT_SYMBOL_GPL(ip6_dst_lookup); 1007 1008 /** 1009 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec 1010 * @sk: socket which provides route info 1011 * @fl6: flow to lookup 1012 * @final_dst: final destination address for ipsec lookup 1013 * @can_sleep: we are in a sleepable context 1014 * 1015 * This function performs a route lookup on the given flow. 1016 * 1017 * It returns a valid dst pointer on success, or a pointer encoded 1018 * error code. 1019 */ 1020 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 1021 const struct in6_addr *final_dst, 1022 bool can_sleep) 1023 { 1024 struct dst_entry *dst = NULL; 1025 int err; 1026 1027 err = ip6_dst_lookup_tail(sk, &dst, fl6); 1028 if (err) 1029 return ERR_PTR(err); 1030 if (final_dst) 1031 ipv6_addr_copy(&fl6->daddr, final_dst); 1032 if (can_sleep) 1033 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; 1034 1035 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); 1036 } 1037 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); 1038 1039 /** 1040 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow 1041 * @sk: socket which provides the dst cache and route info 1042 * @fl6: flow to lookup 1043 * @final_dst: final destination address for ipsec lookup 1044 * @can_sleep: we are in a sleepable context 1045 * 1046 * This function performs a route lookup on the given flow with the 1047 * possibility of using the cached route in the socket if it is valid. 1048 * It will take the socket dst lock when operating on the dst cache. 1049 * As a result, this function can only be used in process context. 1050 * 1051 * It returns a valid dst pointer on success, or a pointer encoded 1052 * error code. 1053 */ 1054 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 1055 const struct in6_addr *final_dst, 1056 bool can_sleep) 1057 { 1058 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1059 int err; 1060 1061 dst = ip6_sk_dst_check(sk, dst, fl6); 1062 1063 err = ip6_dst_lookup_tail(sk, &dst, fl6); 1064 if (err) 1065 return ERR_PTR(err); 1066 if (final_dst) 1067 ipv6_addr_copy(&fl6->daddr, final_dst); 1068 if (can_sleep) 1069 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; 1070 1071 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); 1072 } 1073 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); 1074 1075 static inline int ip6_ufo_append_data(struct sock *sk, 1076 int getfrag(void *from, char *to, int offset, int len, 1077 int odd, struct sk_buff *skb), 1078 void *from, int length, int hh_len, int fragheaderlen, 1079 int transhdrlen, int mtu,unsigned int flags) 1080 1081 { 1082 struct sk_buff *skb; 1083 int err; 1084 1085 /* There is support for UDP large send offload by network 1086 * device, so create one single skb packet containing complete 1087 * udp datagram 1088 */ 1089 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { 1090 skb = sock_alloc_send_skb(sk, 1091 hh_len + fragheaderlen + transhdrlen + 20, 1092 (flags & MSG_DONTWAIT), &err); 1093 if (skb == NULL) 1094 return -ENOMEM; 1095 1096 /* reserve space for Hardware header */ 1097 skb_reserve(skb, hh_len); 1098 1099 /* create space for UDP/IP header */ 1100 skb_put(skb,fragheaderlen + transhdrlen); 1101 1102 /* initialize network header pointer */ 1103 skb_reset_network_header(skb); 1104 1105 /* initialize protocol header pointer */ 1106 skb->transport_header = skb->network_header + fragheaderlen; 1107 1108 skb->ip_summed = CHECKSUM_PARTIAL; 1109 skb->csum = 0; 1110 } 1111 1112 err = skb_append_datato_frags(sk,skb, getfrag, from, 1113 (length - transhdrlen)); 1114 if (!err) { 1115 struct frag_hdr fhdr; 1116 1117 /* Specify the length of each IPv6 datagram fragment. 1118 * It has to be a multiple of 8. 1119 */ 1120 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - 1121 sizeof(struct frag_hdr)) & ~7; 1122 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 1123 ipv6_select_ident(&fhdr); 1124 skb_shinfo(skb)->ip6_frag_id = fhdr.identification; 1125 __skb_queue_tail(&sk->sk_write_queue, skb); 1126 1127 return 0; 1128 } 1129 /* There is not enough support do UPD LSO, 1130 * so follow normal path 1131 */ 1132 kfree_skb(skb); 1133 1134 return err; 1135 } 1136 1137 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1138 gfp_t gfp) 1139 { 1140 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1141 } 1142 1143 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 1144 gfp_t gfp) 1145 { 1146 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1147 } 1148 1149 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, 1150 int offset, int len, int odd, struct sk_buff *skb), 1151 void *from, int length, int transhdrlen, 1152 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, 1153 struct rt6_info *rt, unsigned int flags, int dontfrag) 1154 { 1155 struct inet_sock *inet = inet_sk(sk); 1156 struct ipv6_pinfo *np = inet6_sk(sk); 1157 struct inet_cork *cork; 1158 struct sk_buff *skb; 1159 unsigned int maxfraglen, fragheaderlen; 1160 int exthdrlen; 1161 int hh_len; 1162 int mtu; 1163 int copy; 1164 int err; 1165 int offset = 0; 1166 int csummode = CHECKSUM_NONE; 1167 __u8 tx_flags = 0; 1168 1169 if (flags&MSG_PROBE) 1170 return 0; 1171 cork = &inet->cork.base; 1172 if (skb_queue_empty(&sk->sk_write_queue)) { 1173 /* 1174 * setup for corking 1175 */ 1176 if (opt) { 1177 if (WARN_ON(np->cork.opt)) 1178 return -EINVAL; 1179 1180 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation); 1181 if (unlikely(np->cork.opt == NULL)) 1182 return -ENOBUFS; 1183 1184 np->cork.opt->tot_len = opt->tot_len; 1185 np->cork.opt->opt_flen = opt->opt_flen; 1186 np->cork.opt->opt_nflen = opt->opt_nflen; 1187 1188 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt, 1189 sk->sk_allocation); 1190 if (opt->dst0opt && !np->cork.opt->dst0opt) 1191 return -ENOBUFS; 1192 1193 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt, 1194 sk->sk_allocation); 1195 if (opt->dst1opt && !np->cork.opt->dst1opt) 1196 return -ENOBUFS; 1197 1198 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt, 1199 sk->sk_allocation); 1200 if (opt->hopopt && !np->cork.opt->hopopt) 1201 return -ENOBUFS; 1202 1203 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt, 1204 sk->sk_allocation); 1205 if (opt->srcrt && !np->cork.opt->srcrt) 1206 return -ENOBUFS; 1207 1208 /* need source address above miyazawa*/ 1209 } 1210 dst_hold(&rt->dst); 1211 cork->dst = &rt->dst; 1212 inet->cork.fl.u.ip6 = *fl6; 1213 np->cork.hop_limit = hlimit; 1214 np->cork.tclass = tclass; 1215 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? 1216 rt->dst.dev->mtu : dst_mtu(rt->dst.path); 1217 if (np->frag_size < mtu) { 1218 if (np->frag_size) 1219 mtu = np->frag_size; 1220 } 1221 cork->fragsize = mtu; 1222 if (dst_allfrag(rt->dst.path)) 1223 cork->flags |= IPCORK_ALLFRAG; 1224 cork->length = 0; 1225 sk->sk_sndmsg_page = NULL; 1226 sk->sk_sndmsg_off = 0; 1227 exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) - 1228 rt->rt6i_nfheader_len; 1229 length += exthdrlen; 1230 transhdrlen += exthdrlen; 1231 } else { 1232 rt = (struct rt6_info *)cork->dst; 1233 fl6 = &inet->cork.fl.u.ip6; 1234 opt = np->cork.opt; 1235 transhdrlen = 0; 1236 exthdrlen = 0; 1237 mtu = cork->fragsize; 1238 } 1239 1240 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1241 1242 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1243 (opt ? opt->opt_nflen : 0); 1244 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); 1245 1246 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { 1247 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { 1248 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); 1249 return -EMSGSIZE; 1250 } 1251 } 1252 1253 /* For UDP, check if TX timestamp is enabled */ 1254 if (sk->sk_type == SOCK_DGRAM) { 1255 err = sock_tx_timestamp(sk, &tx_flags); 1256 if (err) 1257 goto error; 1258 } 1259 1260 /* 1261 * Let's try using as much space as possible. 1262 * Use MTU if total length of the message fits into the MTU. 1263 * Otherwise, we need to reserve fragment header and 1264 * fragment alignment (= 8-15 octects, in total). 1265 * 1266 * Note that we may need to "move" the data from the tail of 1267 * of the buffer to the new fragment when we split 1268 * the message. 1269 * 1270 * FIXME: It may be fragmented into multiple chunks 1271 * at once if non-fragmentable extension headers 1272 * are too large. 1273 * --yoshfuji 1274 */ 1275 1276 cork->length += length; 1277 if (length > mtu) { 1278 int proto = sk->sk_protocol; 1279 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ 1280 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); 1281 return -EMSGSIZE; 1282 } 1283 1284 if (proto == IPPROTO_UDP && 1285 (rt->dst.dev->features & NETIF_F_UFO)) { 1286 1287 err = ip6_ufo_append_data(sk, getfrag, from, length, 1288 hh_len, fragheaderlen, 1289 transhdrlen, mtu, flags); 1290 if (err) 1291 goto error; 1292 return 0; 1293 } 1294 } 1295 1296 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 1297 goto alloc_new_skb; 1298 1299 while (length > 0) { 1300 /* Check if the remaining data fits into current packet. */ 1301 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1302 if (copy < length) 1303 copy = maxfraglen - skb->len; 1304 1305 if (copy <= 0) { 1306 char *data; 1307 unsigned int datalen; 1308 unsigned int fraglen; 1309 unsigned int fraggap; 1310 unsigned int alloclen; 1311 struct sk_buff *skb_prev; 1312 alloc_new_skb: 1313 skb_prev = skb; 1314 1315 /* There's no room in the current skb */ 1316 if (skb_prev) 1317 fraggap = skb_prev->len - maxfraglen; 1318 else 1319 fraggap = 0; 1320 1321 /* 1322 * If remaining data exceeds the mtu, 1323 * we know we need more fragment(s). 1324 */ 1325 datalen = length + fraggap; 1326 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1327 datalen = maxfraglen - fragheaderlen; 1328 1329 fraglen = datalen + fragheaderlen; 1330 if ((flags & MSG_MORE) && 1331 !(rt->dst.dev->features&NETIF_F_SG)) 1332 alloclen = mtu; 1333 else 1334 alloclen = datalen + fragheaderlen; 1335 1336 /* 1337 * The last fragment gets additional space at tail. 1338 * Note: we overallocate on fragments with MSG_MODE 1339 * because we have no idea if we're the last one. 1340 */ 1341 if (datalen == length + fraggap) 1342 alloclen += rt->dst.trailer_len; 1343 1344 /* 1345 * We just reserve space for fragment header. 1346 * Note: this may be overallocation if the message 1347 * (without MSG_MORE) fits into the MTU. 1348 */ 1349 alloclen += sizeof(struct frag_hdr); 1350 1351 if (transhdrlen) { 1352 skb = sock_alloc_send_skb(sk, 1353 alloclen + hh_len, 1354 (flags & MSG_DONTWAIT), &err); 1355 } else { 1356 skb = NULL; 1357 if (atomic_read(&sk->sk_wmem_alloc) <= 1358 2 * sk->sk_sndbuf) 1359 skb = sock_wmalloc(sk, 1360 alloclen + hh_len, 1, 1361 sk->sk_allocation); 1362 if (unlikely(skb == NULL)) 1363 err = -ENOBUFS; 1364 else { 1365 /* Only the initial fragment 1366 * is time stamped. 1367 */ 1368 tx_flags = 0; 1369 } 1370 } 1371 if (skb == NULL) 1372 goto error; 1373 /* 1374 * Fill in the control structures 1375 */ 1376 skb->ip_summed = csummode; 1377 skb->csum = 0; 1378 /* reserve for fragmentation */ 1379 skb_reserve(skb, hh_len+sizeof(struct frag_hdr)); 1380 1381 if (sk->sk_type == SOCK_DGRAM) 1382 skb_shinfo(skb)->tx_flags = tx_flags; 1383 1384 /* 1385 * Find where to start putting bytes 1386 */ 1387 data = skb_put(skb, fraglen); 1388 skb_set_network_header(skb, exthdrlen); 1389 data += fragheaderlen; 1390 skb->transport_header = (skb->network_header + 1391 fragheaderlen); 1392 if (fraggap) { 1393 skb->csum = skb_copy_and_csum_bits( 1394 skb_prev, maxfraglen, 1395 data + transhdrlen, fraggap, 0); 1396 skb_prev->csum = csum_sub(skb_prev->csum, 1397 skb->csum); 1398 data += fraggap; 1399 pskb_trim_unique(skb_prev, maxfraglen); 1400 } 1401 copy = datalen - transhdrlen - fraggap; 1402 if (copy < 0) { 1403 err = -EINVAL; 1404 kfree_skb(skb); 1405 goto error; 1406 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { 1407 err = -EFAULT; 1408 kfree_skb(skb); 1409 goto error; 1410 } 1411 1412 offset += copy; 1413 length -= datalen - fraggap; 1414 transhdrlen = 0; 1415 exthdrlen = 0; 1416 csummode = CHECKSUM_NONE; 1417 1418 /* 1419 * Put the packet on the pending queue 1420 */ 1421 __skb_queue_tail(&sk->sk_write_queue, skb); 1422 continue; 1423 } 1424 1425 if (copy > length) 1426 copy = length; 1427 1428 if (!(rt->dst.dev->features&NETIF_F_SG)) { 1429 unsigned int off; 1430 1431 off = skb->len; 1432 if (getfrag(from, skb_put(skb, copy), 1433 offset, copy, off, skb) < 0) { 1434 __skb_trim(skb, off); 1435 err = -EFAULT; 1436 goto error; 1437 } 1438 } else { 1439 int i = skb_shinfo(skb)->nr_frags; 1440 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; 1441 struct page *page = sk->sk_sndmsg_page; 1442 int off = sk->sk_sndmsg_off; 1443 unsigned int left; 1444 1445 if (page && (left = PAGE_SIZE - off) > 0) { 1446 if (copy >= left) 1447 copy = left; 1448 if (page != frag->page) { 1449 if (i == MAX_SKB_FRAGS) { 1450 err = -EMSGSIZE; 1451 goto error; 1452 } 1453 get_page(page); 1454 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); 1455 frag = &skb_shinfo(skb)->frags[i]; 1456 } 1457 } else if(i < MAX_SKB_FRAGS) { 1458 if (copy > PAGE_SIZE) 1459 copy = PAGE_SIZE; 1460 page = alloc_pages(sk->sk_allocation, 0); 1461 if (page == NULL) { 1462 err = -ENOMEM; 1463 goto error; 1464 } 1465 sk->sk_sndmsg_page = page; 1466 sk->sk_sndmsg_off = 0; 1467 1468 skb_fill_page_desc(skb, i, page, 0, 0); 1469 frag = &skb_shinfo(skb)->frags[i]; 1470 } else { 1471 err = -EMSGSIZE; 1472 goto error; 1473 } 1474 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { 1475 err = -EFAULT; 1476 goto error; 1477 } 1478 sk->sk_sndmsg_off += copy; 1479 frag->size += copy; 1480 skb->len += copy; 1481 skb->data_len += copy; 1482 skb->truesize += copy; 1483 atomic_add(copy, &sk->sk_wmem_alloc); 1484 } 1485 offset += copy; 1486 length -= copy; 1487 } 1488 return 0; 1489 error: 1490 cork->length -= length; 1491 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1492 return err; 1493 } 1494 1495 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) 1496 { 1497 if (np->cork.opt) { 1498 kfree(np->cork.opt->dst0opt); 1499 kfree(np->cork.opt->dst1opt); 1500 kfree(np->cork.opt->hopopt); 1501 kfree(np->cork.opt->srcrt); 1502 kfree(np->cork.opt); 1503 np->cork.opt = NULL; 1504 } 1505 1506 if (inet->cork.base.dst) { 1507 dst_release(inet->cork.base.dst); 1508 inet->cork.base.dst = NULL; 1509 inet->cork.base.flags &= ~IPCORK_ALLFRAG; 1510 } 1511 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); 1512 } 1513 1514 int ip6_push_pending_frames(struct sock *sk) 1515 { 1516 struct sk_buff *skb, *tmp_skb; 1517 struct sk_buff **tail_skb; 1518 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 1519 struct inet_sock *inet = inet_sk(sk); 1520 struct ipv6_pinfo *np = inet6_sk(sk); 1521 struct net *net = sock_net(sk); 1522 struct ipv6hdr *hdr; 1523 struct ipv6_txoptions *opt = np->cork.opt; 1524 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst; 1525 struct flowi6 *fl6 = &inet->cork.fl.u.ip6; 1526 unsigned char proto = fl6->flowi6_proto; 1527 int err = 0; 1528 1529 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) 1530 goto out; 1531 tail_skb = &(skb_shinfo(skb)->frag_list); 1532 1533 /* move skb->data to ip header from ext header */ 1534 if (skb->data < skb_network_header(skb)) 1535 __skb_pull(skb, skb_network_offset(skb)); 1536 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { 1537 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1538 *tail_skb = tmp_skb; 1539 tail_skb = &(tmp_skb->next); 1540 skb->len += tmp_skb->len; 1541 skb->data_len += tmp_skb->len; 1542 skb->truesize += tmp_skb->truesize; 1543 tmp_skb->destructor = NULL; 1544 tmp_skb->sk = NULL; 1545 } 1546 1547 /* Allow local fragmentation. */ 1548 if (np->pmtudisc < IPV6_PMTUDISC_DO) 1549 skb->local_df = 1; 1550 1551 ipv6_addr_copy(final_dst, &fl6->daddr); 1552 __skb_pull(skb, skb_network_header_len(skb)); 1553 if (opt && opt->opt_flen) 1554 ipv6_push_frag_opts(skb, opt, &proto); 1555 if (opt && opt->opt_nflen) 1556 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); 1557 1558 skb_push(skb, sizeof(struct ipv6hdr)); 1559 skb_reset_network_header(skb); 1560 hdr = ipv6_hdr(skb); 1561 1562 *(__be32*)hdr = fl6->flowlabel | 1563 htonl(0x60000000 | ((int)np->cork.tclass << 20)); 1564 1565 hdr->hop_limit = np->cork.hop_limit; 1566 hdr->nexthdr = proto; 1567 ipv6_addr_copy(&hdr->saddr, &fl6->saddr); 1568 ipv6_addr_copy(&hdr->daddr, final_dst); 1569 1570 skb->priority = sk->sk_priority; 1571 skb->mark = sk->sk_mark; 1572 1573 skb_dst_set(skb, dst_clone(&rt->dst)); 1574 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 1575 if (proto == IPPROTO_ICMPV6) { 1576 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1577 1578 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); 1579 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); 1580 } 1581 1582 err = ip6_local_out(skb); 1583 if (err) { 1584 if (err > 0) 1585 err = net_xmit_errno(err); 1586 if (err) 1587 goto error; 1588 } 1589 1590 out: 1591 ip6_cork_release(inet, np); 1592 return err; 1593 error: 1594 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1595 goto out; 1596 } 1597 1598 void ip6_flush_pending_frames(struct sock *sk) 1599 { 1600 struct sk_buff *skb; 1601 1602 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { 1603 if (skb_dst(skb)) 1604 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 1605 IPSTATS_MIB_OUTDISCARDS); 1606 kfree_skb(skb); 1607 } 1608 1609 ip6_cork_release(inet_sk(sk), inet6_sk(sk)); 1610 } 1611