1 /* 2 * IPv6 output functions 3 * Linux INET6 implementation 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $ 9 * 10 * Based on linux/net/ipv4/ip_output.c 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 * 17 * Changes: 18 * A.N.Kuznetsov : airthmetics in fragmentation. 19 * extension headers are implemented. 20 * route changes now work. 21 * ip6_forward does not confuse sniffers. 22 * etc. 23 * 24 * H. von Brand : Added missing #include <linux/string.h> 25 * Imran Patel : frag id should be in NBO 26 * Kazunori MIYAZAWA @USAGI 27 * : add ip6_append_data and related functions 28 * for datagram xmit 29 */ 30 31 #include <linux/errno.h> 32 #include <linux/kernel.h> 33 #include <linux/string.h> 34 #include <linux/socket.h> 35 #include <linux/net.h> 36 #include <linux/netdevice.h> 37 #include <linux/if_arp.h> 38 #include <linux/in6.h> 39 #include <linux/tcp.h> 40 #include <linux/route.h> 41 #include <linux/module.h> 42 43 #include <linux/netfilter.h> 44 #include <linux/netfilter_ipv6.h> 45 46 #include <net/sock.h> 47 #include <net/snmp.h> 48 49 #include <net/ipv6.h> 50 #include <net/ndisc.h> 51 #include <net/protocol.h> 52 #include <net/ip6_route.h> 53 #include <net/addrconf.h> 54 #include <net/rawv6.h> 55 #include <net/icmp.h> 56 #include <net/xfrm.h> 57 #include <net/checksum.h> 58 59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); 60 61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr) 62 { 63 static u32 ipv6_fragmentation_id = 1; 64 static DEFINE_SPINLOCK(ip6_id_lock); 65 66 spin_lock_bh(&ip6_id_lock); 67 fhdr->identification = htonl(ipv6_fragmentation_id); 68 if (++ipv6_fragmentation_id == 0) 69 ipv6_fragmentation_id = 1; 70 spin_unlock_bh(&ip6_id_lock); 71 } 72 73 int __ip6_local_out(struct sk_buff *skb) 74 { 75 int len; 76 77 len = skb->len - sizeof(struct ipv6hdr); 78 if (len > IPV6_MAXPLEN) 79 len = 0; 80 ipv6_hdr(skb)->payload_len = htons(len); 81 82 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev, 83 dst_output); 84 } 85 86 int ip6_local_out(struct sk_buff *skb) 87 { 88 int err; 89 90 err = __ip6_local_out(skb); 91 if (likely(err == 1)) 92 err = dst_output(skb); 93 94 return err; 95 } 96 EXPORT_SYMBOL_GPL(ip6_local_out); 97 98 static int ip6_output_finish(struct sk_buff *skb) 99 { 100 struct dst_entry *dst = skb->dst; 101 102 if (dst->hh) 103 return neigh_hh_output(dst->hh, skb); 104 else if (dst->neighbour) 105 return dst->neighbour->output(skb); 106 107 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 108 kfree_skb(skb); 109 return -EINVAL; 110 111 } 112 113 /* dev_loopback_xmit for use with netfilter. */ 114 static int ip6_dev_loopback_xmit(struct sk_buff *newskb) 115 { 116 skb_reset_mac_header(newskb); 117 __skb_pull(newskb, skb_network_offset(newskb)); 118 newskb->pkt_type = PACKET_LOOPBACK; 119 newskb->ip_summed = CHECKSUM_UNNECESSARY; 120 BUG_TRAP(newskb->dst); 121 122 netif_rx(newskb); 123 return 0; 124 } 125 126 127 static int ip6_output2(struct sk_buff *skb) 128 { 129 struct dst_entry *dst = skb->dst; 130 struct net_device *dev = dst->dev; 131 132 skb->protocol = htons(ETH_P_IPV6); 133 skb->dev = dev; 134 135 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 136 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL; 137 struct inet6_dev *idev = ip6_dst_idev(skb->dst); 138 139 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) && 140 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 141 &ipv6_hdr(skb)->saddr)) { 142 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 143 144 /* Do not check for IFF_ALLMULTI; multicast routing 145 is not supported in any case. 146 */ 147 if (newskb) 148 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb, 149 NULL, newskb->dev, 150 ip6_dev_loopback_xmit); 151 152 if (ipv6_hdr(skb)->hop_limit == 0) { 153 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); 154 kfree_skb(skb); 155 return 0; 156 } 157 } 158 159 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS); 160 } 161 162 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev, 163 ip6_output_finish); 164 } 165 166 static inline int ip6_skb_dst_mtu(struct sk_buff *skb) 167 { 168 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; 169 170 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ? 171 skb->dst->dev->mtu : dst_mtu(skb->dst); 172 } 173 174 int ip6_output(struct sk_buff *skb) 175 { 176 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || 177 dst_allfrag(skb->dst)) 178 return ip6_fragment(skb, ip6_output2); 179 else 180 return ip6_output2(skb); 181 } 182 183 /* 184 * xmit an sk_buff (used by TCP) 185 */ 186 187 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, 188 struct ipv6_txoptions *opt, int ipfragok) 189 { 190 struct ipv6_pinfo *np = inet6_sk(sk); 191 struct in6_addr *first_hop = &fl->fl6_dst; 192 struct dst_entry *dst = skb->dst; 193 struct ipv6hdr *hdr; 194 u8 proto = fl->proto; 195 int seg_len = skb->len; 196 int hlimit, tclass; 197 u32 mtu; 198 199 if (opt) { 200 unsigned int head_room; 201 202 /* First: exthdrs may take lots of space (~8K for now) 203 MAX_HEADER is not enough. 204 */ 205 head_room = opt->opt_nflen + opt->opt_flen; 206 seg_len += head_room; 207 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); 208 209 if (skb_headroom(skb) < head_room) { 210 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 211 if (skb2 == NULL) { 212 IP6_INC_STATS(ip6_dst_idev(skb->dst), 213 IPSTATS_MIB_OUTDISCARDS); 214 kfree_skb(skb); 215 return -ENOBUFS; 216 } 217 kfree_skb(skb); 218 skb = skb2; 219 if (sk) 220 skb_set_owner_w(skb, sk); 221 } 222 if (opt->opt_flen) 223 ipv6_push_frag_opts(skb, opt, &proto); 224 if (opt->opt_nflen) 225 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); 226 } 227 228 skb_push(skb, sizeof(struct ipv6hdr)); 229 skb_reset_network_header(skb); 230 hdr = ipv6_hdr(skb); 231 232 /* 233 * Fill in the IPv6 header 234 */ 235 236 hlimit = -1; 237 if (np) 238 hlimit = np->hop_limit; 239 if (hlimit < 0) 240 hlimit = dst_metric(dst, RTAX_HOPLIMIT); 241 if (hlimit < 0) 242 hlimit = ipv6_get_hoplimit(dst->dev); 243 244 tclass = -1; 245 if (np) 246 tclass = np->tclass; 247 if (tclass < 0) 248 tclass = 0; 249 250 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel; 251 252 hdr->payload_len = htons(seg_len); 253 hdr->nexthdr = proto; 254 hdr->hop_limit = hlimit; 255 256 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 257 ipv6_addr_copy(&hdr->daddr, first_hop); 258 259 skb->priority = sk->sk_priority; 260 skb->mark = sk->sk_mark; 261 262 mtu = dst_mtu(dst); 263 if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) { 264 IP6_INC_STATS(ip6_dst_idev(skb->dst), 265 IPSTATS_MIB_OUTREQUESTS); 266 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, 267 dst_output); 268 } 269 270 if (net_ratelimit()) 271 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); 272 skb->dev = dst->dev; 273 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); 274 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); 275 kfree_skb(skb); 276 return -EMSGSIZE; 277 } 278 279 EXPORT_SYMBOL(ip6_xmit); 280 281 /* 282 * To avoid extra problems ND packets are send through this 283 * routine. It's code duplication but I really want to avoid 284 * extra checks since ipv6_build_header is used by TCP (which 285 * is for us performance critical) 286 */ 287 288 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, 289 struct in6_addr *saddr, struct in6_addr *daddr, 290 int proto, int len) 291 { 292 struct ipv6_pinfo *np = inet6_sk(sk); 293 struct ipv6hdr *hdr; 294 int totlen; 295 296 skb->protocol = htons(ETH_P_IPV6); 297 skb->dev = dev; 298 299 totlen = len + sizeof(struct ipv6hdr); 300 301 skb_reset_network_header(skb); 302 skb_put(skb, sizeof(struct ipv6hdr)); 303 hdr = ipv6_hdr(skb); 304 305 *(__be32*)hdr = htonl(0x60000000); 306 307 hdr->payload_len = htons(len); 308 hdr->nexthdr = proto; 309 hdr->hop_limit = np->hop_limit; 310 311 ipv6_addr_copy(&hdr->saddr, saddr); 312 ipv6_addr_copy(&hdr->daddr, daddr); 313 314 return 0; 315 } 316 317 static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 318 { 319 struct ip6_ra_chain *ra; 320 struct sock *last = NULL; 321 322 read_lock(&ip6_ra_lock); 323 for (ra = ip6_ra_chain; ra; ra = ra->next) { 324 struct sock *sk = ra->sk; 325 if (sk && ra->sel == sel && 326 (!sk->sk_bound_dev_if || 327 sk->sk_bound_dev_if == skb->dev->ifindex)) { 328 if (last) { 329 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 330 if (skb2) 331 rawv6_rcv(last, skb2); 332 } 333 last = sk; 334 } 335 } 336 337 if (last) { 338 rawv6_rcv(last, skb); 339 read_unlock(&ip6_ra_lock); 340 return 1; 341 } 342 read_unlock(&ip6_ra_lock); 343 return 0; 344 } 345 346 static int ip6_forward_proxy_check(struct sk_buff *skb) 347 { 348 struct ipv6hdr *hdr = ipv6_hdr(skb); 349 u8 nexthdr = hdr->nexthdr; 350 int offset; 351 352 if (ipv6_ext_hdr(nexthdr)) { 353 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr); 354 if (offset < 0) 355 return 0; 356 } else 357 offset = sizeof(struct ipv6hdr); 358 359 if (nexthdr == IPPROTO_ICMPV6) { 360 struct icmp6hdr *icmp6; 361 362 if (!pskb_may_pull(skb, (skb_network_header(skb) + 363 offset + 1 - skb->data))) 364 return 0; 365 366 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 367 368 switch (icmp6->icmp6_type) { 369 case NDISC_ROUTER_SOLICITATION: 370 case NDISC_ROUTER_ADVERTISEMENT: 371 case NDISC_NEIGHBOUR_SOLICITATION: 372 case NDISC_NEIGHBOUR_ADVERTISEMENT: 373 case NDISC_REDIRECT: 374 /* For reaction involving unicast neighbor discovery 375 * message destined to the proxied address, pass it to 376 * input function. 377 */ 378 return 1; 379 default: 380 break; 381 } 382 } 383 384 /* 385 * The proxying router can't forward traffic sent to a link-local 386 * address, so signal the sender and discard the packet. This 387 * behavior is clarified by the MIPv6 specification. 388 */ 389 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 390 dst_link_failure(skb); 391 return -1; 392 } 393 394 return 0; 395 } 396 397 static inline int ip6_forward_finish(struct sk_buff *skb) 398 { 399 return dst_output(skb); 400 } 401 402 int ip6_forward(struct sk_buff *skb) 403 { 404 struct dst_entry *dst = skb->dst; 405 struct ipv6hdr *hdr = ipv6_hdr(skb); 406 struct inet6_skb_parm *opt = IP6CB(skb); 407 408 if (ipv6_devconf.forwarding == 0) 409 goto error; 410 411 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 412 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 413 goto drop; 414 } 415 416 skb_forward_csum(skb); 417 418 /* 419 * We DO NOT make any processing on 420 * RA packets, pushing them to user level AS IS 421 * without ane WARRANTY that application will be able 422 * to interpret them. The reason is that we 423 * cannot make anything clever here. 424 * 425 * We are not end-node, so that if packet contains 426 * AH/ESP, we cannot make anything. 427 * Defragmentation also would be mistake, RA packets 428 * cannot be fragmented, because there is no warranty 429 * that different fragments will go along one path. --ANK 430 */ 431 if (opt->ra) { 432 u8 *ptr = skb_network_header(skb) + opt->ra; 433 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3])) 434 return 0; 435 } 436 437 /* 438 * check and decrement ttl 439 */ 440 if (hdr->hop_limit <= 1) { 441 /* Force OUTPUT device used as source address */ 442 skb->dev = dst->dev; 443 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 444 0, skb->dev); 445 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); 446 447 kfree_skb(skb); 448 return -ETIMEDOUT; 449 } 450 451 /* XXX: idev->cnf.proxy_ndp? */ 452 if (ipv6_devconf.proxy_ndp && 453 pneigh_lookup(&nd_tbl, &init_net, &hdr->daddr, skb->dev, 0)) { 454 int proxied = ip6_forward_proxy_check(skb); 455 if (proxied > 0) 456 return ip6_input(skb); 457 else if (proxied < 0) { 458 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 459 goto drop; 460 } 461 } 462 463 if (!xfrm6_route_forward(skb)) { 464 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 465 goto drop; 466 } 467 dst = skb->dst; 468 469 /* IPv6 specs say nothing about it, but it is clear that we cannot 470 send redirects to source routed frames. 471 We don't send redirects to frames decapsulated from IPsec. 472 */ 473 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && 474 !skb->sp) { 475 struct in6_addr *target = NULL; 476 struct rt6_info *rt; 477 struct neighbour *n = dst->neighbour; 478 479 /* 480 * incoming and outgoing devices are the same 481 * send a redirect. 482 */ 483 484 rt = (struct rt6_info *) dst; 485 if ((rt->rt6i_flags & RTF_GATEWAY)) 486 target = (struct in6_addr*)&n->primary_key; 487 else 488 target = &hdr->daddr; 489 490 /* Limit redirects both by destination (here) 491 and by source (inside ndisc_send_redirect) 492 */ 493 if (xrlim_allow(dst, 1*HZ)) 494 ndisc_send_redirect(skb, n, target); 495 } else { 496 int addrtype = ipv6_addr_type(&hdr->saddr); 497 498 /* This check is security critical. */ 499 if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK)) 500 goto error; 501 if (addrtype & IPV6_ADDR_LINKLOCAL) { 502 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 503 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev); 504 goto error; 505 } 506 } 507 508 if (skb->len > dst_mtu(dst)) { 509 /* Again, force OUTPUT device used as source address */ 510 skb->dev = dst->dev; 511 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev); 512 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); 513 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); 514 kfree_skb(skb); 515 return -EMSGSIZE; 516 } 517 518 if (skb_cow(skb, dst->dev->hard_header_len)) { 519 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); 520 goto drop; 521 } 522 523 hdr = ipv6_hdr(skb); 524 525 /* Mangling hops number delayed to point after skb COW */ 526 527 hdr->hop_limit--; 528 529 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 530 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev, 531 ip6_forward_finish); 532 533 error: 534 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); 535 drop: 536 kfree_skb(skb); 537 return -EINVAL; 538 } 539 540 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 541 { 542 to->pkt_type = from->pkt_type; 543 to->priority = from->priority; 544 to->protocol = from->protocol; 545 dst_release(to->dst); 546 to->dst = dst_clone(from->dst); 547 to->dev = from->dev; 548 to->mark = from->mark; 549 550 #ifdef CONFIG_NET_SCHED 551 to->tc_index = from->tc_index; 552 #endif 553 nf_copy(to, from); 554 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 555 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 556 to->nf_trace = from->nf_trace; 557 #endif 558 skb_copy_secmark(to, from); 559 } 560 561 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) 562 { 563 u16 offset = sizeof(struct ipv6hdr); 564 struct ipv6_opt_hdr *exthdr = 565 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); 566 unsigned int packet_len = skb->tail - skb->network_header; 567 int found_rhdr = 0; 568 *nexthdr = &ipv6_hdr(skb)->nexthdr; 569 570 while (offset + 1 <= packet_len) { 571 572 switch (**nexthdr) { 573 574 case NEXTHDR_HOP: 575 break; 576 case NEXTHDR_ROUTING: 577 found_rhdr = 1; 578 break; 579 case NEXTHDR_DEST: 580 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 581 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) 582 break; 583 #endif 584 if (found_rhdr) 585 return offset; 586 break; 587 default : 588 return offset; 589 } 590 591 offset += ipv6_optlen(exthdr); 592 *nexthdr = &exthdr->nexthdr; 593 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + 594 offset); 595 } 596 597 return offset; 598 } 599 EXPORT_SYMBOL_GPL(ip6_find_1stfragopt); 600 601 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 602 { 603 struct net_device *dev; 604 struct sk_buff *frag; 605 struct rt6_info *rt = (struct rt6_info*)skb->dst; 606 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; 607 struct ipv6hdr *tmp_hdr; 608 struct frag_hdr *fh; 609 unsigned int mtu, hlen, left, len; 610 __be32 frag_id = 0; 611 int ptr, offset = 0, err=0; 612 u8 *prevhdr, nexthdr = 0; 613 614 dev = rt->u.dst.dev; 615 hlen = ip6_find_1stfragopt(skb, &prevhdr); 616 nexthdr = *prevhdr; 617 618 mtu = ip6_skb_dst_mtu(skb); 619 620 /* We must not fragment if the socket is set to force MTU discovery 621 * or if the skb it not generated by a local socket. (This last 622 * check should be redundant, but it's free.) 623 */ 624 if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) { 625 skb->dev = skb->dst->dev; 626 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); 627 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); 628 kfree_skb(skb); 629 return -EMSGSIZE; 630 } 631 632 if (np && np->frag_size < mtu) { 633 if (np->frag_size) 634 mtu = np->frag_size; 635 } 636 mtu -= hlen + sizeof(struct frag_hdr); 637 638 if (skb_shinfo(skb)->frag_list) { 639 int first_len = skb_pagelen(skb); 640 int truesizes = 0; 641 642 if (first_len - hlen > mtu || 643 ((first_len - hlen) & 7) || 644 skb_cloned(skb)) 645 goto slow_path; 646 647 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) { 648 /* Correct geometry. */ 649 if (frag->len > mtu || 650 ((frag->len & 7) && frag->next) || 651 skb_headroom(frag) < hlen) 652 goto slow_path; 653 654 /* Partially cloned skb? */ 655 if (skb_shared(frag)) 656 goto slow_path; 657 658 BUG_ON(frag->sk); 659 if (skb->sk) { 660 sock_hold(skb->sk); 661 frag->sk = skb->sk; 662 frag->destructor = sock_wfree; 663 truesizes += frag->truesize; 664 } 665 } 666 667 err = 0; 668 offset = 0; 669 frag = skb_shinfo(skb)->frag_list; 670 skb_shinfo(skb)->frag_list = NULL; 671 /* BUILD HEADER */ 672 673 *prevhdr = NEXTHDR_FRAGMENT; 674 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 675 if (!tmp_hdr) { 676 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); 677 return -ENOMEM; 678 } 679 680 __skb_pull(skb, hlen); 681 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); 682 __skb_push(skb, hlen); 683 skb_reset_network_header(skb); 684 memcpy(skb_network_header(skb), tmp_hdr, hlen); 685 686 ipv6_select_ident(skb, fh); 687 fh->nexthdr = nexthdr; 688 fh->reserved = 0; 689 fh->frag_off = htons(IP6_MF); 690 frag_id = fh->identification; 691 692 first_len = skb_pagelen(skb); 693 skb->data_len = first_len - skb_headlen(skb); 694 skb->truesize -= truesizes; 695 skb->len = first_len; 696 ipv6_hdr(skb)->payload_len = htons(first_len - 697 sizeof(struct ipv6hdr)); 698 699 dst_hold(&rt->u.dst); 700 701 for (;;) { 702 /* Prepare header of the next frame, 703 * before previous one went down. */ 704 if (frag) { 705 frag->ip_summed = CHECKSUM_NONE; 706 skb_reset_transport_header(frag); 707 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); 708 __skb_push(frag, hlen); 709 skb_reset_network_header(frag); 710 memcpy(skb_network_header(frag), tmp_hdr, 711 hlen); 712 offset += skb->len - hlen - sizeof(struct frag_hdr); 713 fh->nexthdr = nexthdr; 714 fh->reserved = 0; 715 fh->frag_off = htons(offset); 716 if (frag->next != NULL) 717 fh->frag_off |= htons(IP6_MF); 718 fh->identification = frag_id; 719 ipv6_hdr(frag)->payload_len = 720 htons(frag->len - 721 sizeof(struct ipv6hdr)); 722 ip6_copy_metadata(frag, skb); 723 } 724 725 err = output(skb); 726 if(!err) 727 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES); 728 729 if (err || !frag) 730 break; 731 732 skb = frag; 733 frag = skb->next; 734 skb->next = NULL; 735 } 736 737 kfree(tmp_hdr); 738 739 if (err == 0) { 740 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS); 741 dst_release(&rt->u.dst); 742 return 0; 743 } 744 745 while (frag) { 746 skb = frag->next; 747 kfree_skb(frag); 748 frag = skb; 749 } 750 751 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS); 752 dst_release(&rt->u.dst); 753 return err; 754 } 755 756 slow_path: 757 left = skb->len - hlen; /* Space per frame */ 758 ptr = hlen; /* Where to start from */ 759 760 /* 761 * Fragment the datagram. 762 */ 763 764 *prevhdr = NEXTHDR_FRAGMENT; 765 766 /* 767 * Keep copying data until we run out. 768 */ 769 while(left > 0) { 770 len = left; 771 /* IF: it doesn't fit, use 'mtu' - the data space left */ 772 if (len > mtu) 773 len = mtu; 774 /* IF: we are not sending upto and including the packet end 775 then align the next start on an eight byte boundary */ 776 if (len < left) { 777 len &= ~7; 778 } 779 /* 780 * Allocate buffer. 781 */ 782 783 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { 784 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); 785 IP6_INC_STATS(ip6_dst_idev(skb->dst), 786 IPSTATS_MIB_FRAGFAILS); 787 err = -ENOMEM; 788 goto fail; 789 } 790 791 /* 792 * Set up data on packet 793 */ 794 795 ip6_copy_metadata(frag, skb); 796 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev)); 797 skb_put(frag, len + hlen + sizeof(struct frag_hdr)); 798 skb_reset_network_header(frag); 799 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); 800 frag->transport_header = (frag->network_header + hlen + 801 sizeof(struct frag_hdr)); 802 803 /* 804 * Charge the memory for the fragment to any owner 805 * it might possess 806 */ 807 if (skb->sk) 808 skb_set_owner_w(frag, skb->sk); 809 810 /* 811 * Copy the packet header into the new buffer. 812 */ 813 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); 814 815 /* 816 * Build fragment header. 817 */ 818 fh->nexthdr = nexthdr; 819 fh->reserved = 0; 820 if (!frag_id) { 821 ipv6_select_ident(skb, fh); 822 frag_id = fh->identification; 823 } else 824 fh->identification = frag_id; 825 826 /* 827 * Copy a block of the IP datagram. 828 */ 829 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len)) 830 BUG(); 831 left -= len; 832 833 fh->frag_off = htons(offset); 834 if (left > 0) 835 fh->frag_off |= htons(IP6_MF); 836 ipv6_hdr(frag)->payload_len = htons(frag->len - 837 sizeof(struct ipv6hdr)); 838 839 ptr += len; 840 offset += len; 841 842 /* 843 * Put this fragment into the sending queue. 844 */ 845 err = output(frag); 846 if (err) 847 goto fail; 848 849 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES); 850 } 851 IP6_INC_STATS(ip6_dst_idev(skb->dst), 852 IPSTATS_MIB_FRAGOKS); 853 kfree_skb(skb); 854 return err; 855 856 fail: 857 IP6_INC_STATS(ip6_dst_idev(skb->dst), 858 IPSTATS_MIB_FRAGFAILS); 859 kfree_skb(skb); 860 return err; 861 } 862 863 static inline int ip6_rt_check(struct rt6key *rt_key, 864 struct in6_addr *fl_addr, 865 struct in6_addr *addr_cache) 866 { 867 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 868 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache))); 869 } 870 871 static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 872 struct dst_entry *dst, 873 struct flowi *fl) 874 { 875 struct ipv6_pinfo *np = inet6_sk(sk); 876 struct rt6_info *rt = (struct rt6_info *)dst; 877 878 if (!dst) 879 goto out; 880 881 /* Yes, checking route validity in not connected 882 * case is not very simple. Take into account, 883 * that we do not support routing by source, TOS, 884 * and MSG_DONTROUTE --ANK (980726) 885 * 886 * 1. ip6_rt_check(): If route was host route, 887 * check that cached destination is current. 888 * If it is network route, we still may 889 * check its validity using saved pointer 890 * to the last used address: daddr_cache. 891 * We do not want to save whole address now, 892 * (because main consumer of this service 893 * is tcp, which has not this problem), 894 * so that the last trick works only on connected 895 * sockets. 896 * 2. oif also should be the same. 897 */ 898 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) || 899 #ifdef CONFIG_IPV6_SUBTREES 900 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || 901 #endif 902 (fl->oif && fl->oif != dst->dev->ifindex)) { 903 dst_release(dst); 904 dst = NULL; 905 } 906 907 out: 908 return dst; 909 } 910 911 static int ip6_dst_lookup_tail(struct sock *sk, 912 struct dst_entry **dst, struct flowi *fl) 913 { 914 int err; 915 916 if (*dst == NULL) 917 *dst = ip6_route_output(sk, fl); 918 919 if ((err = (*dst)->error)) 920 goto out_err_release; 921 922 if (ipv6_addr_any(&fl->fl6_src)) { 923 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src); 924 if (err) 925 goto out_err_release; 926 } 927 928 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 929 /* 930 * Here if the dst entry we've looked up 931 * has a neighbour entry that is in the INCOMPLETE 932 * state and the src address from the flow is 933 * marked as OPTIMISTIC, we release the found 934 * dst entry and replace it instead with the 935 * dst entry of the nexthop router 936 */ 937 if (!((*dst)->neighbour->nud_state & NUD_VALID)) { 938 struct inet6_ifaddr *ifp; 939 struct flowi fl_gw; 940 int redirect; 941 942 ifp = ipv6_get_ifaddr(&init_net, &fl->fl6_src, 943 (*dst)->dev, 1); 944 945 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 946 if (ifp) 947 in6_ifa_put(ifp); 948 949 if (redirect) { 950 /* 951 * We need to get the dst entry for the 952 * default router instead 953 */ 954 dst_release(*dst); 955 memcpy(&fl_gw, fl, sizeof(struct flowi)); 956 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr)); 957 *dst = ip6_route_output(sk, &fl_gw); 958 if ((err = (*dst)->error)) 959 goto out_err_release; 960 } 961 } 962 #endif 963 964 return 0; 965 966 out_err_release: 967 if (err == -ENETUNREACH) 968 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES); 969 dst_release(*dst); 970 *dst = NULL; 971 return err; 972 } 973 974 /** 975 * ip6_dst_lookup - perform route lookup on flow 976 * @sk: socket which provides route info 977 * @dst: pointer to dst_entry * for result 978 * @fl: flow to lookup 979 * 980 * This function performs a route lookup on the given flow. 981 * 982 * It returns zero on success, or a standard errno code on error. 983 */ 984 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) 985 { 986 *dst = NULL; 987 return ip6_dst_lookup_tail(sk, dst, fl); 988 } 989 EXPORT_SYMBOL_GPL(ip6_dst_lookup); 990 991 /** 992 * ip6_sk_dst_lookup - perform socket cached route lookup on flow 993 * @sk: socket which provides the dst cache and route info 994 * @dst: pointer to dst_entry * for result 995 * @fl: flow to lookup 996 * 997 * This function performs a route lookup on the given flow with the 998 * possibility of using the cached route in the socket if it is valid. 999 * It will take the socket dst lock when operating on the dst cache. 1000 * As a result, this function can only be used in process context. 1001 * 1002 * It returns zero on success, or a standard errno code on error. 1003 */ 1004 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) 1005 { 1006 *dst = NULL; 1007 if (sk) { 1008 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1009 *dst = ip6_sk_dst_check(sk, *dst, fl); 1010 } 1011 1012 return ip6_dst_lookup_tail(sk, dst, fl); 1013 } 1014 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup); 1015 1016 static inline int ip6_ufo_append_data(struct sock *sk, 1017 int getfrag(void *from, char *to, int offset, int len, 1018 int odd, struct sk_buff *skb), 1019 void *from, int length, int hh_len, int fragheaderlen, 1020 int transhdrlen, int mtu,unsigned int flags) 1021 1022 { 1023 struct sk_buff *skb; 1024 int err; 1025 1026 /* There is support for UDP large send offload by network 1027 * device, so create one single skb packet containing complete 1028 * udp datagram 1029 */ 1030 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { 1031 skb = sock_alloc_send_skb(sk, 1032 hh_len + fragheaderlen + transhdrlen + 20, 1033 (flags & MSG_DONTWAIT), &err); 1034 if (skb == NULL) 1035 return -ENOMEM; 1036 1037 /* reserve space for Hardware header */ 1038 skb_reserve(skb, hh_len); 1039 1040 /* create space for UDP/IP header */ 1041 skb_put(skb,fragheaderlen + transhdrlen); 1042 1043 /* initialize network header pointer */ 1044 skb_reset_network_header(skb); 1045 1046 /* initialize protocol header pointer */ 1047 skb->transport_header = skb->network_header + fragheaderlen; 1048 1049 skb->ip_summed = CHECKSUM_PARTIAL; 1050 skb->csum = 0; 1051 sk->sk_sndmsg_off = 0; 1052 } 1053 1054 err = skb_append_datato_frags(sk,skb, getfrag, from, 1055 (length - transhdrlen)); 1056 if (!err) { 1057 struct frag_hdr fhdr; 1058 1059 /* specify the length of each IP datagram fragment*/ 1060 skb_shinfo(skb)->gso_size = mtu - fragheaderlen - 1061 sizeof(struct frag_hdr); 1062 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 1063 ipv6_select_ident(skb, &fhdr); 1064 skb_shinfo(skb)->ip6_frag_id = fhdr.identification; 1065 __skb_queue_tail(&sk->sk_write_queue, skb); 1066 1067 return 0; 1068 } 1069 /* There is not enough support do UPD LSO, 1070 * so follow normal path 1071 */ 1072 kfree_skb(skb); 1073 1074 return err; 1075 } 1076 1077 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, 1078 int offset, int len, int odd, struct sk_buff *skb), 1079 void *from, int length, int transhdrlen, 1080 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, 1081 struct rt6_info *rt, unsigned int flags) 1082 { 1083 struct inet_sock *inet = inet_sk(sk); 1084 struct ipv6_pinfo *np = inet6_sk(sk); 1085 struct sk_buff *skb; 1086 unsigned int maxfraglen, fragheaderlen; 1087 int exthdrlen; 1088 int hh_len; 1089 int mtu; 1090 int copy; 1091 int err; 1092 int offset = 0; 1093 int csummode = CHECKSUM_NONE; 1094 1095 if (flags&MSG_PROBE) 1096 return 0; 1097 if (skb_queue_empty(&sk->sk_write_queue)) { 1098 /* 1099 * setup for corking 1100 */ 1101 if (opt) { 1102 if (np->cork.opt == NULL) { 1103 np->cork.opt = kmalloc(opt->tot_len, 1104 sk->sk_allocation); 1105 if (unlikely(np->cork.opt == NULL)) 1106 return -ENOBUFS; 1107 } else if (np->cork.opt->tot_len < opt->tot_len) { 1108 printk(KERN_DEBUG "ip6_append_data: invalid option length\n"); 1109 return -EINVAL; 1110 } 1111 memcpy(np->cork.opt, opt, opt->tot_len); 1112 inet->cork.flags |= IPCORK_OPT; 1113 /* need source address above miyazawa*/ 1114 } 1115 dst_hold(&rt->u.dst); 1116 np->cork.rt = rt; 1117 inet->cork.fl = *fl; 1118 np->cork.hop_limit = hlimit; 1119 np->cork.tclass = tclass; 1120 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? 1121 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path); 1122 if (np->frag_size < mtu) { 1123 if (np->frag_size) 1124 mtu = np->frag_size; 1125 } 1126 inet->cork.fragsize = mtu; 1127 if (dst_allfrag(rt->u.dst.path)) 1128 inet->cork.flags |= IPCORK_ALLFRAG; 1129 inet->cork.length = 0; 1130 sk->sk_sndmsg_page = NULL; 1131 sk->sk_sndmsg_off = 0; 1132 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) - 1133 rt->rt6i_nfheader_len; 1134 length += exthdrlen; 1135 transhdrlen += exthdrlen; 1136 } else { 1137 rt = np->cork.rt; 1138 fl = &inet->cork.fl; 1139 if (inet->cork.flags & IPCORK_OPT) 1140 opt = np->cork.opt; 1141 transhdrlen = 0; 1142 exthdrlen = 0; 1143 mtu = inet->cork.fragsize; 1144 } 1145 1146 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); 1147 1148 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1149 (opt ? opt->opt_nflen : 0); 1150 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); 1151 1152 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { 1153 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { 1154 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen); 1155 return -EMSGSIZE; 1156 } 1157 } 1158 1159 /* 1160 * Let's try using as much space as possible. 1161 * Use MTU if total length of the message fits into the MTU. 1162 * Otherwise, we need to reserve fragment header and 1163 * fragment alignment (= 8-15 octects, in total). 1164 * 1165 * Note that we may need to "move" the data from the tail of 1166 * of the buffer to the new fragment when we split 1167 * the message. 1168 * 1169 * FIXME: It may be fragmented into multiple chunks 1170 * at once if non-fragmentable extension headers 1171 * are too large. 1172 * --yoshfuji 1173 */ 1174 1175 inet->cork.length += length; 1176 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && 1177 (rt->u.dst.dev->features & NETIF_F_UFO)) { 1178 1179 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len, 1180 fragheaderlen, transhdrlen, mtu, 1181 flags); 1182 if (err) 1183 goto error; 1184 return 0; 1185 } 1186 1187 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 1188 goto alloc_new_skb; 1189 1190 while (length > 0) { 1191 /* Check if the remaining data fits into current packet. */ 1192 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1193 if (copy < length) 1194 copy = maxfraglen - skb->len; 1195 1196 if (copy <= 0) { 1197 char *data; 1198 unsigned int datalen; 1199 unsigned int fraglen; 1200 unsigned int fraggap; 1201 unsigned int alloclen; 1202 struct sk_buff *skb_prev; 1203 alloc_new_skb: 1204 skb_prev = skb; 1205 1206 /* There's no room in the current skb */ 1207 if (skb_prev) 1208 fraggap = skb_prev->len - maxfraglen; 1209 else 1210 fraggap = 0; 1211 1212 /* 1213 * If remaining data exceeds the mtu, 1214 * we know we need more fragment(s). 1215 */ 1216 datalen = length + fraggap; 1217 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1218 datalen = maxfraglen - fragheaderlen; 1219 1220 fraglen = datalen + fragheaderlen; 1221 if ((flags & MSG_MORE) && 1222 !(rt->u.dst.dev->features&NETIF_F_SG)) 1223 alloclen = mtu; 1224 else 1225 alloclen = datalen + fragheaderlen; 1226 1227 /* 1228 * The last fragment gets additional space at tail. 1229 * Note: we overallocate on fragments with MSG_MODE 1230 * because we have no idea if we're the last one. 1231 */ 1232 if (datalen == length + fraggap) 1233 alloclen += rt->u.dst.trailer_len; 1234 1235 /* 1236 * We just reserve space for fragment header. 1237 * Note: this may be overallocation if the message 1238 * (without MSG_MORE) fits into the MTU. 1239 */ 1240 alloclen += sizeof(struct frag_hdr); 1241 1242 if (transhdrlen) { 1243 skb = sock_alloc_send_skb(sk, 1244 alloclen + hh_len, 1245 (flags & MSG_DONTWAIT), &err); 1246 } else { 1247 skb = NULL; 1248 if (atomic_read(&sk->sk_wmem_alloc) <= 1249 2 * sk->sk_sndbuf) 1250 skb = sock_wmalloc(sk, 1251 alloclen + hh_len, 1, 1252 sk->sk_allocation); 1253 if (unlikely(skb == NULL)) 1254 err = -ENOBUFS; 1255 } 1256 if (skb == NULL) 1257 goto error; 1258 /* 1259 * Fill in the control structures 1260 */ 1261 skb->ip_summed = csummode; 1262 skb->csum = 0; 1263 /* reserve for fragmentation */ 1264 skb_reserve(skb, hh_len+sizeof(struct frag_hdr)); 1265 1266 /* 1267 * Find where to start putting bytes 1268 */ 1269 data = skb_put(skb, fraglen); 1270 skb_set_network_header(skb, exthdrlen); 1271 data += fragheaderlen; 1272 skb->transport_header = (skb->network_header + 1273 fragheaderlen); 1274 if (fraggap) { 1275 skb->csum = skb_copy_and_csum_bits( 1276 skb_prev, maxfraglen, 1277 data + transhdrlen, fraggap, 0); 1278 skb_prev->csum = csum_sub(skb_prev->csum, 1279 skb->csum); 1280 data += fraggap; 1281 pskb_trim_unique(skb_prev, maxfraglen); 1282 } 1283 copy = datalen - transhdrlen - fraggap; 1284 if (copy < 0) { 1285 err = -EINVAL; 1286 kfree_skb(skb); 1287 goto error; 1288 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { 1289 err = -EFAULT; 1290 kfree_skb(skb); 1291 goto error; 1292 } 1293 1294 offset += copy; 1295 length -= datalen - fraggap; 1296 transhdrlen = 0; 1297 exthdrlen = 0; 1298 csummode = CHECKSUM_NONE; 1299 1300 /* 1301 * Put the packet on the pending queue 1302 */ 1303 __skb_queue_tail(&sk->sk_write_queue, skb); 1304 continue; 1305 } 1306 1307 if (copy > length) 1308 copy = length; 1309 1310 if (!(rt->u.dst.dev->features&NETIF_F_SG)) { 1311 unsigned int off; 1312 1313 off = skb->len; 1314 if (getfrag(from, skb_put(skb, copy), 1315 offset, copy, off, skb) < 0) { 1316 __skb_trim(skb, off); 1317 err = -EFAULT; 1318 goto error; 1319 } 1320 } else { 1321 int i = skb_shinfo(skb)->nr_frags; 1322 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; 1323 struct page *page = sk->sk_sndmsg_page; 1324 int off = sk->sk_sndmsg_off; 1325 unsigned int left; 1326 1327 if (page && (left = PAGE_SIZE - off) > 0) { 1328 if (copy >= left) 1329 copy = left; 1330 if (page != frag->page) { 1331 if (i == MAX_SKB_FRAGS) { 1332 err = -EMSGSIZE; 1333 goto error; 1334 } 1335 get_page(page); 1336 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); 1337 frag = &skb_shinfo(skb)->frags[i]; 1338 } 1339 } else if(i < MAX_SKB_FRAGS) { 1340 if (copy > PAGE_SIZE) 1341 copy = PAGE_SIZE; 1342 page = alloc_pages(sk->sk_allocation, 0); 1343 if (page == NULL) { 1344 err = -ENOMEM; 1345 goto error; 1346 } 1347 sk->sk_sndmsg_page = page; 1348 sk->sk_sndmsg_off = 0; 1349 1350 skb_fill_page_desc(skb, i, page, 0, 0); 1351 frag = &skb_shinfo(skb)->frags[i]; 1352 } else { 1353 err = -EMSGSIZE; 1354 goto error; 1355 } 1356 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { 1357 err = -EFAULT; 1358 goto error; 1359 } 1360 sk->sk_sndmsg_off += copy; 1361 frag->size += copy; 1362 skb->len += copy; 1363 skb->data_len += copy; 1364 skb->truesize += copy; 1365 atomic_add(copy, &sk->sk_wmem_alloc); 1366 } 1367 offset += copy; 1368 length -= copy; 1369 } 1370 return 0; 1371 error: 1372 inet->cork.length -= length; 1373 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1374 return err; 1375 } 1376 1377 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) 1378 { 1379 inet->cork.flags &= ~IPCORK_OPT; 1380 kfree(np->cork.opt); 1381 np->cork.opt = NULL; 1382 if (np->cork.rt) { 1383 dst_release(&np->cork.rt->u.dst); 1384 np->cork.rt = NULL; 1385 inet->cork.flags &= ~IPCORK_ALLFRAG; 1386 } 1387 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); 1388 } 1389 1390 int ip6_push_pending_frames(struct sock *sk) 1391 { 1392 struct sk_buff *skb, *tmp_skb; 1393 struct sk_buff **tail_skb; 1394 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 1395 struct inet_sock *inet = inet_sk(sk); 1396 struct ipv6_pinfo *np = inet6_sk(sk); 1397 struct ipv6hdr *hdr; 1398 struct ipv6_txoptions *opt = np->cork.opt; 1399 struct rt6_info *rt = np->cork.rt; 1400 struct flowi *fl = &inet->cork.fl; 1401 unsigned char proto = fl->proto; 1402 int err = 0; 1403 1404 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) 1405 goto out; 1406 tail_skb = &(skb_shinfo(skb)->frag_list); 1407 1408 /* move skb->data to ip header from ext header */ 1409 if (skb->data < skb_network_header(skb)) 1410 __skb_pull(skb, skb_network_offset(skb)); 1411 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { 1412 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1413 *tail_skb = tmp_skb; 1414 tail_skb = &(tmp_skb->next); 1415 skb->len += tmp_skb->len; 1416 skb->data_len += tmp_skb->len; 1417 skb->truesize += tmp_skb->truesize; 1418 __sock_put(tmp_skb->sk); 1419 tmp_skb->destructor = NULL; 1420 tmp_skb->sk = NULL; 1421 } 1422 1423 ipv6_addr_copy(final_dst, &fl->fl6_dst); 1424 __skb_pull(skb, skb_network_header_len(skb)); 1425 if (opt && opt->opt_flen) 1426 ipv6_push_frag_opts(skb, opt, &proto); 1427 if (opt && opt->opt_nflen) 1428 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); 1429 1430 skb_push(skb, sizeof(struct ipv6hdr)); 1431 skb_reset_network_header(skb); 1432 hdr = ipv6_hdr(skb); 1433 1434 *(__be32*)hdr = fl->fl6_flowlabel | 1435 htonl(0x60000000 | ((int)np->cork.tclass << 20)); 1436 1437 hdr->hop_limit = np->cork.hop_limit; 1438 hdr->nexthdr = proto; 1439 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 1440 ipv6_addr_copy(&hdr->daddr, final_dst); 1441 1442 skb->priority = sk->sk_priority; 1443 skb->mark = sk->sk_mark; 1444 1445 skb->dst = dst_clone(&rt->u.dst); 1446 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); 1447 if (proto == IPPROTO_ICMPV6) { 1448 struct inet6_dev *idev = ip6_dst_idev(skb->dst); 1449 1450 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type); 1451 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); 1452 } 1453 1454 err = ip6_local_out(skb); 1455 if (err) { 1456 if (err > 0) 1457 err = np->recverr ? net_xmit_errno(err) : 0; 1458 if (err) 1459 goto error; 1460 } 1461 1462 out: 1463 ip6_cork_release(inet, np); 1464 return err; 1465 error: 1466 goto out; 1467 } 1468 1469 void ip6_flush_pending_frames(struct sock *sk) 1470 { 1471 struct sk_buff *skb; 1472 1473 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { 1474 if (skb->dst) 1475 IP6_INC_STATS(ip6_dst_idev(skb->dst), 1476 IPSTATS_MIB_OUTDISCARDS); 1477 kfree_skb(skb); 1478 } 1479 1480 ip6_cork_release(inet_sk(sk), inet6_sk(sk)); 1481 } 1482