1 /* 2 * IPv6 output functions 3 * Linux INET6 implementation 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $ 9 * 10 * Based on linux/net/ipv4/ip_output.c 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 * 17 * Changes: 18 * A.N.Kuznetsov : airthmetics in fragmentation. 19 * extension headers are implemented. 20 * route changes now work. 21 * ip6_forward does not confuse sniffers. 22 * etc. 23 * 24 * H. von Brand : Added missing #include <linux/string.h> 25 * Imran Patel : frag id should be in NBO 26 * Kazunori MIYAZAWA @USAGI 27 * : add ip6_append_data and related functions 28 * for datagram xmit 29 */ 30 31 #include <linux/config.h> 32 #include <linux/errno.h> 33 #include <linux/types.h> 34 #include <linux/string.h> 35 #include <linux/socket.h> 36 #include <linux/net.h> 37 #include <linux/netdevice.h> 38 #include <linux/if_arp.h> 39 #include <linux/in6.h> 40 #include <linux/tcp.h> 41 #include <linux/route.h> 42 43 #include <linux/netfilter.h> 44 #include <linux/netfilter_ipv6.h> 45 46 #include <net/sock.h> 47 #include <net/snmp.h> 48 49 #include <net/ipv6.h> 50 #include <net/ndisc.h> 51 #include <net/protocol.h> 52 #include <net/ip6_route.h> 53 #include <net/addrconf.h> 54 #include <net/rawv6.h> 55 #include <net/icmp.h> 56 #include <net/xfrm.h> 57 #include <net/checksum.h> 58 59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); 60 61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr) 62 { 63 static u32 ipv6_fragmentation_id = 1; 64 static DEFINE_SPINLOCK(ip6_id_lock); 65 66 spin_lock_bh(&ip6_id_lock); 67 fhdr->identification = htonl(ipv6_fragmentation_id); 68 if (++ipv6_fragmentation_id == 0) 69 ipv6_fragmentation_id = 1; 70 spin_unlock_bh(&ip6_id_lock); 71 } 72 73 static inline int ip6_output_finish(struct sk_buff *skb) 74 { 75 76 struct dst_entry *dst = skb->dst; 77 struct hh_cache *hh = dst->hh; 78 79 if (hh) { 80 int hh_alen; 81 82 read_lock_bh(&hh->hh_lock); 83 hh_alen = HH_DATA_ALIGN(hh->hh_len); 84 memcpy(skb->data - hh_alen, hh->hh_data, hh_alen); 85 read_unlock_bh(&hh->hh_lock); 86 skb_push(skb, hh->hh_len); 87 return hh->hh_output(skb); 88 } else if (dst->neighbour) 89 return dst->neighbour->output(skb); 90 91 IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 92 kfree_skb(skb); 93 return -EINVAL; 94 95 } 96 97 /* dev_loopback_xmit for use with netfilter. */ 98 static int ip6_dev_loopback_xmit(struct sk_buff *newskb) 99 { 100 newskb->mac.raw = newskb->data; 101 __skb_pull(newskb, newskb->nh.raw - newskb->data); 102 newskb->pkt_type = PACKET_LOOPBACK; 103 newskb->ip_summed = CHECKSUM_UNNECESSARY; 104 BUG_TRAP(newskb->dst); 105 106 netif_rx(newskb); 107 return 0; 108 } 109 110 111 static int ip6_output2(struct sk_buff *skb) 112 { 113 struct dst_entry *dst = skb->dst; 114 struct net_device *dev = dst->dev; 115 116 skb->protocol = htons(ETH_P_IPV6); 117 skb->dev = dev; 118 119 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) { 120 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL; 121 122 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) && 123 ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr, 124 &skb->nh.ipv6h->saddr)) { 125 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 126 127 /* Do not check for IFF_ALLMULTI; multicast routing 128 is not supported in any case. 129 */ 130 if (newskb) 131 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL, 132 newskb->dev, 133 ip6_dev_loopback_xmit); 134 135 if (skb->nh.ipv6h->hop_limit == 0) { 136 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); 137 kfree_skb(skb); 138 return 0; 139 } 140 } 141 142 IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS); 143 } 144 145 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish); 146 } 147 148 int ip6_output(struct sk_buff *skb) 149 { 150 if (skb->len > dst_mtu(skb->dst) || dst_allfrag(skb->dst)) 151 return ip6_fragment(skb, ip6_output2); 152 else 153 return ip6_output2(skb); 154 } 155 156 #ifdef CONFIG_NETFILTER 157 int ip6_route_me_harder(struct sk_buff *skb) 158 { 159 struct ipv6hdr *iph = skb->nh.ipv6h; 160 struct dst_entry *dst; 161 struct flowi fl = { 162 .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, 163 .nl_u = 164 { .ip6_u = 165 { .daddr = iph->daddr, 166 .saddr = iph->saddr, } }, 167 .proto = iph->nexthdr, 168 }; 169 170 dst = ip6_route_output(skb->sk, &fl); 171 172 if (dst->error) { 173 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); 174 LIMIT_NETDEBUG( 175 printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n")); 176 dst_release(dst); 177 return -EINVAL; 178 } 179 180 /* Drop old route. */ 181 dst_release(skb->dst); 182 183 skb->dst = dst; 184 return 0; 185 } 186 #endif 187 188 static inline int ip6_maybe_reroute(struct sk_buff *skb) 189 { 190 #ifdef CONFIG_NETFILTER 191 if (skb->nfcache & NFC_ALTERED){ 192 if (ip6_route_me_harder(skb) != 0){ 193 kfree_skb(skb); 194 return -EINVAL; 195 } 196 } 197 #endif /* CONFIG_NETFILTER */ 198 return dst_output(skb); 199 } 200 201 /* 202 * xmit an sk_buff (used by TCP) 203 */ 204 205 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, 206 struct ipv6_txoptions *opt, int ipfragok) 207 { 208 struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL; 209 struct in6_addr *first_hop = &fl->fl6_dst; 210 struct dst_entry *dst = skb->dst; 211 struct ipv6hdr *hdr; 212 u8 proto = fl->proto; 213 int seg_len = skb->len; 214 int hlimit; 215 u32 mtu; 216 217 if (opt) { 218 int head_room; 219 220 /* First: exthdrs may take lots of space (~8K for now) 221 MAX_HEADER is not enough. 222 */ 223 head_room = opt->opt_nflen + opt->opt_flen; 224 seg_len += head_room; 225 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); 226 227 if (skb_headroom(skb) < head_room) { 228 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 229 kfree_skb(skb); 230 skb = skb2; 231 if (skb == NULL) { 232 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); 233 return -ENOBUFS; 234 } 235 if (sk) 236 skb_set_owner_w(skb, sk); 237 } 238 if (opt->opt_flen) 239 ipv6_push_frag_opts(skb, opt, &proto); 240 if (opt->opt_nflen) 241 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); 242 } 243 244 hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr)); 245 246 /* 247 * Fill in the IPv6 header 248 */ 249 250 *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel; 251 hlimit = -1; 252 if (np) 253 hlimit = np->hop_limit; 254 if (hlimit < 0) 255 hlimit = dst_metric(dst, RTAX_HOPLIMIT); 256 if (hlimit < 0) 257 hlimit = ipv6_get_hoplimit(dst->dev); 258 259 hdr->payload_len = htons(seg_len); 260 hdr->nexthdr = proto; 261 hdr->hop_limit = hlimit; 262 263 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 264 ipv6_addr_copy(&hdr->daddr, first_hop); 265 266 mtu = dst_mtu(dst); 267 if ((skb->len <= mtu) || ipfragok) { 268 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); 269 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute); 270 } 271 272 if (net_ratelimit()) 273 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); 274 skb->dev = dst->dev; 275 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); 276 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); 277 kfree_skb(skb); 278 return -EMSGSIZE; 279 } 280 281 /* 282 * To avoid extra problems ND packets are send through this 283 * routine. It's code duplication but I really want to avoid 284 * extra checks since ipv6_build_header is used by TCP (which 285 * is for us performance critical) 286 */ 287 288 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, 289 struct in6_addr *saddr, struct in6_addr *daddr, 290 int proto, int len) 291 { 292 struct ipv6_pinfo *np = inet6_sk(sk); 293 struct ipv6hdr *hdr; 294 int totlen; 295 296 skb->protocol = htons(ETH_P_IPV6); 297 skb->dev = dev; 298 299 totlen = len + sizeof(struct ipv6hdr); 300 301 hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr)); 302 skb->nh.ipv6h = hdr; 303 304 *(u32*)hdr = htonl(0x60000000); 305 306 hdr->payload_len = htons(len); 307 hdr->nexthdr = proto; 308 hdr->hop_limit = np->hop_limit; 309 310 ipv6_addr_copy(&hdr->saddr, saddr); 311 ipv6_addr_copy(&hdr->daddr, daddr); 312 313 return 0; 314 } 315 316 static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 317 { 318 struct ip6_ra_chain *ra; 319 struct sock *last = NULL; 320 321 read_lock(&ip6_ra_lock); 322 for (ra = ip6_ra_chain; ra; ra = ra->next) { 323 struct sock *sk = ra->sk; 324 if (sk && ra->sel == sel) { 325 if (last) { 326 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 327 if (skb2) 328 rawv6_rcv(last, skb2); 329 } 330 last = sk; 331 } 332 } 333 334 if (last) { 335 rawv6_rcv(last, skb); 336 read_unlock(&ip6_ra_lock); 337 return 1; 338 } 339 read_unlock(&ip6_ra_lock); 340 return 0; 341 } 342 343 static inline int ip6_forward_finish(struct sk_buff *skb) 344 { 345 return dst_output(skb); 346 } 347 348 int ip6_forward(struct sk_buff *skb) 349 { 350 struct dst_entry *dst = skb->dst; 351 struct ipv6hdr *hdr = skb->nh.ipv6h; 352 struct inet6_skb_parm *opt = IP6CB(skb); 353 354 if (ipv6_devconf.forwarding == 0) 355 goto error; 356 357 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 358 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS); 359 goto drop; 360 } 361 362 skb->ip_summed = CHECKSUM_NONE; 363 364 /* 365 * We DO NOT make any processing on 366 * RA packets, pushing them to user level AS IS 367 * without ane WARRANTY that application will be able 368 * to interpret them. The reason is that we 369 * cannot make anything clever here. 370 * 371 * We are not end-node, so that if packet contains 372 * AH/ESP, we cannot make anything. 373 * Defragmentation also would be mistake, RA packets 374 * cannot be fragmented, because there is no warranty 375 * that different fragments will go along one path. --ANK 376 */ 377 if (opt->ra) { 378 u8 *ptr = skb->nh.raw + opt->ra; 379 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3])) 380 return 0; 381 } 382 383 /* 384 * check and decrement ttl 385 */ 386 if (hdr->hop_limit <= 1) { 387 /* Force OUTPUT device used as source address */ 388 skb->dev = dst->dev; 389 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 390 0, skb->dev); 391 392 kfree_skb(skb); 393 return -ETIMEDOUT; 394 } 395 396 if (!xfrm6_route_forward(skb)) { 397 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS); 398 goto drop; 399 } 400 dst = skb->dst; 401 402 /* IPv6 specs say nothing about it, but it is clear that we cannot 403 send redirects to source routed frames. 404 */ 405 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) { 406 struct in6_addr *target = NULL; 407 struct rt6_info *rt; 408 struct neighbour *n = dst->neighbour; 409 410 /* 411 * incoming and outgoing devices are the same 412 * send a redirect. 413 */ 414 415 rt = (struct rt6_info *) dst; 416 if ((rt->rt6i_flags & RTF_GATEWAY)) 417 target = (struct in6_addr*)&n->primary_key; 418 else 419 target = &hdr->daddr; 420 421 /* Limit redirects both by destination (here) 422 and by source (inside ndisc_send_redirect) 423 */ 424 if (xrlim_allow(dst, 1*HZ)) 425 ndisc_send_redirect(skb, n, target); 426 } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK 427 |IPV6_ADDR_LINKLOCAL)) { 428 /* This check is security critical. */ 429 goto error; 430 } 431 432 if (skb->len > dst_mtu(dst)) { 433 /* Again, force OUTPUT device used as source address */ 434 skb->dev = dst->dev; 435 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev); 436 IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS); 437 IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS); 438 kfree_skb(skb); 439 return -EMSGSIZE; 440 } 441 442 if (skb_cow(skb, dst->dev->hard_header_len)) { 443 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); 444 goto drop; 445 } 446 447 hdr = skb->nh.ipv6h; 448 449 /* Mangling hops number delayed to point after skb COW */ 450 451 hdr->hop_limit--; 452 453 IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS); 454 return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish); 455 456 error: 457 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); 458 drop: 459 kfree_skb(skb); 460 return -EINVAL; 461 } 462 463 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 464 { 465 to->pkt_type = from->pkt_type; 466 to->priority = from->priority; 467 to->protocol = from->protocol; 468 to->security = from->security; 469 dst_release(to->dst); 470 to->dst = dst_clone(from->dst); 471 to->dev = from->dev; 472 473 #ifdef CONFIG_NET_SCHED 474 to->tc_index = from->tc_index; 475 #endif 476 #ifdef CONFIG_NETFILTER 477 to->nfmark = from->nfmark; 478 /* Connection association is same as pre-frag packet */ 479 to->nfct = from->nfct; 480 nf_conntrack_get(to->nfct); 481 to->nfctinfo = from->nfctinfo; 482 #ifdef CONFIG_BRIDGE_NETFILTER 483 nf_bridge_put(to->nf_bridge); 484 to->nf_bridge = from->nf_bridge; 485 nf_bridge_get(to->nf_bridge); 486 #endif 487 #ifdef CONFIG_NETFILTER_DEBUG 488 to->nf_debug = from->nf_debug; 489 #endif 490 #endif 491 } 492 493 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) 494 { 495 u16 offset = sizeof(struct ipv6hdr); 496 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1); 497 unsigned int packet_len = skb->tail - skb->nh.raw; 498 int found_rhdr = 0; 499 *nexthdr = &skb->nh.ipv6h->nexthdr; 500 501 while (offset + 1 <= packet_len) { 502 503 switch (**nexthdr) { 504 505 case NEXTHDR_HOP: 506 case NEXTHDR_ROUTING: 507 case NEXTHDR_DEST: 508 if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1; 509 if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset; 510 offset += ipv6_optlen(exthdr); 511 *nexthdr = &exthdr->nexthdr; 512 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); 513 break; 514 default : 515 return offset; 516 } 517 } 518 519 return offset; 520 } 521 522 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 523 { 524 struct net_device *dev; 525 struct sk_buff *frag; 526 struct rt6_info *rt = (struct rt6_info*)skb->dst; 527 struct ipv6hdr *tmp_hdr; 528 struct frag_hdr *fh; 529 unsigned int mtu, hlen, left, len; 530 u32 frag_id = 0; 531 int ptr, offset = 0, err=0; 532 u8 *prevhdr, nexthdr = 0; 533 534 dev = rt->u.dst.dev; 535 hlen = ip6_find_1stfragopt(skb, &prevhdr); 536 nexthdr = *prevhdr; 537 538 mtu = dst_mtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr); 539 540 if (skb_shinfo(skb)->frag_list) { 541 int first_len = skb_pagelen(skb); 542 543 if (first_len - hlen > mtu || 544 ((first_len - hlen) & 7) || 545 skb_cloned(skb)) 546 goto slow_path; 547 548 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) { 549 /* Correct geometry. */ 550 if (frag->len > mtu || 551 ((frag->len & 7) && frag->next) || 552 skb_headroom(frag) < hlen) 553 goto slow_path; 554 555 /* Correct socket ownership. */ 556 if (frag->sk == NULL) 557 goto slow_path; 558 559 /* Partially cloned skb? */ 560 if (skb_shared(frag)) 561 goto slow_path; 562 } 563 564 err = 0; 565 offset = 0; 566 frag = skb_shinfo(skb)->frag_list; 567 skb_shinfo(skb)->frag_list = NULL; 568 /* BUILD HEADER */ 569 570 tmp_hdr = kmalloc(hlen, GFP_ATOMIC); 571 if (!tmp_hdr) { 572 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); 573 return -ENOMEM; 574 } 575 576 *prevhdr = NEXTHDR_FRAGMENT; 577 memcpy(tmp_hdr, skb->nh.raw, hlen); 578 __skb_pull(skb, hlen); 579 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); 580 skb->nh.raw = __skb_push(skb, hlen); 581 memcpy(skb->nh.raw, tmp_hdr, hlen); 582 583 ipv6_select_ident(skb, fh); 584 fh->nexthdr = nexthdr; 585 fh->reserved = 0; 586 fh->frag_off = htons(IP6_MF); 587 frag_id = fh->identification; 588 589 first_len = skb_pagelen(skb); 590 skb->data_len = first_len - skb_headlen(skb); 591 skb->len = first_len; 592 skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr)); 593 594 595 for (;;) { 596 /* Prepare header of the next frame, 597 * before previous one went down. */ 598 if (frag) { 599 frag->ip_summed = CHECKSUM_NONE; 600 frag->h.raw = frag->data; 601 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); 602 frag->nh.raw = __skb_push(frag, hlen); 603 memcpy(frag->nh.raw, tmp_hdr, hlen); 604 offset += skb->len - hlen - sizeof(struct frag_hdr); 605 fh->nexthdr = nexthdr; 606 fh->reserved = 0; 607 fh->frag_off = htons(offset); 608 if (frag->next != NULL) 609 fh->frag_off |= htons(IP6_MF); 610 fh->identification = frag_id; 611 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 612 ip6_copy_metadata(frag, skb); 613 } 614 615 err = output(skb); 616 if (err || !frag) 617 break; 618 619 skb = frag; 620 frag = skb->next; 621 skb->next = NULL; 622 } 623 624 if (tmp_hdr) 625 kfree(tmp_hdr); 626 627 if (err == 0) { 628 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS); 629 return 0; 630 } 631 632 while (frag) { 633 skb = frag->next; 634 kfree_skb(frag); 635 frag = skb; 636 } 637 638 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); 639 return err; 640 } 641 642 slow_path: 643 left = skb->len - hlen; /* Space per frame */ 644 ptr = hlen; /* Where to start from */ 645 646 /* 647 * Fragment the datagram. 648 */ 649 650 *prevhdr = NEXTHDR_FRAGMENT; 651 652 /* 653 * Keep copying data until we run out. 654 */ 655 while(left > 0) { 656 len = left; 657 /* IF: it doesn't fit, use 'mtu' - the data space left */ 658 if (len > mtu) 659 len = mtu; 660 /* IF: we are not sending upto and including the packet end 661 then align the next start on an eight byte boundary */ 662 if (len < left) { 663 len &= ~7; 664 } 665 /* 666 * Allocate buffer. 667 */ 668 669 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { 670 NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n")); 671 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); 672 err = -ENOMEM; 673 goto fail; 674 } 675 676 /* 677 * Set up data on packet 678 */ 679 680 ip6_copy_metadata(frag, skb); 681 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev)); 682 skb_put(frag, len + hlen + sizeof(struct frag_hdr)); 683 frag->nh.raw = frag->data; 684 fh = (struct frag_hdr*)(frag->data + hlen); 685 frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr); 686 687 /* 688 * Charge the memory for the fragment to any owner 689 * it might possess 690 */ 691 if (skb->sk) 692 skb_set_owner_w(frag, skb->sk); 693 694 /* 695 * Copy the packet header into the new buffer. 696 */ 697 memcpy(frag->nh.raw, skb->data, hlen); 698 699 /* 700 * Build fragment header. 701 */ 702 fh->nexthdr = nexthdr; 703 fh->reserved = 0; 704 if (frag_id) { 705 ipv6_select_ident(skb, fh); 706 frag_id = fh->identification; 707 } else 708 fh->identification = frag_id; 709 710 /* 711 * Copy a block of the IP datagram. 712 */ 713 if (skb_copy_bits(skb, ptr, frag->h.raw, len)) 714 BUG(); 715 left -= len; 716 717 fh->frag_off = htons(offset); 718 if (left > 0) 719 fh->frag_off |= htons(IP6_MF); 720 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 721 722 ptr += len; 723 offset += len; 724 725 /* 726 * Put this fragment into the sending queue. 727 */ 728 729 IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES); 730 731 err = output(frag); 732 if (err) 733 goto fail; 734 } 735 kfree_skb(skb); 736 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS); 737 return err; 738 739 fail: 740 kfree_skb(skb); 741 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS); 742 return err; 743 } 744 745 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) 746 { 747 int err = 0; 748 749 *dst = NULL; 750 if (sk) { 751 struct ipv6_pinfo *np = inet6_sk(sk); 752 753 *dst = sk_dst_check(sk, np->dst_cookie); 754 if (*dst) { 755 struct rt6_info *rt = (struct rt6_info*)*dst; 756 757 /* Yes, checking route validity in not connected 758 case is not very simple. Take into account, 759 that we do not support routing by source, TOS, 760 and MSG_DONTROUTE --ANK (980726) 761 762 1. If route was host route, check that 763 cached destination is current. 764 If it is network route, we still may 765 check its validity using saved pointer 766 to the last used address: daddr_cache. 767 We do not want to save whole address now, 768 (because main consumer of this service 769 is tcp, which has not this problem), 770 so that the last trick works only on connected 771 sockets. 772 2. oif also should be the same. 773 */ 774 775 if (((rt->rt6i_dst.plen != 128 || 776 !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr)) 777 && (np->daddr_cache == NULL || 778 !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache))) 779 || (fl->oif && fl->oif != (*dst)->dev->ifindex)) { 780 dst_release(*dst); 781 *dst = NULL; 782 } 783 } 784 } 785 786 if (*dst == NULL) 787 *dst = ip6_route_output(sk, fl); 788 789 if ((err = (*dst)->error)) 790 goto out_err_release; 791 792 if (ipv6_addr_any(&fl->fl6_src)) { 793 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src); 794 795 if (err) { 796 #if IP6_DEBUG >= 2 797 printk(KERN_DEBUG "ip6_dst_lookup: " 798 "no available source address\n"); 799 #endif 800 goto out_err_release; 801 } 802 } 803 804 return 0; 805 806 out_err_release: 807 dst_release(*dst); 808 *dst = NULL; 809 return err; 810 } 811 812 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), 813 void *from, int length, int transhdrlen, 814 int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt, 815 unsigned int flags) 816 { 817 struct inet_sock *inet = inet_sk(sk); 818 struct ipv6_pinfo *np = inet6_sk(sk); 819 struct sk_buff *skb; 820 unsigned int maxfraglen, fragheaderlen; 821 int exthdrlen; 822 int hh_len; 823 int mtu; 824 int copy; 825 int err; 826 int offset = 0; 827 int csummode = CHECKSUM_NONE; 828 829 if (flags&MSG_PROBE) 830 return 0; 831 if (skb_queue_empty(&sk->sk_write_queue)) { 832 /* 833 * setup for corking 834 */ 835 if (opt) { 836 if (np->cork.opt == NULL) { 837 np->cork.opt = kmalloc(opt->tot_len, 838 sk->sk_allocation); 839 if (unlikely(np->cork.opt == NULL)) 840 return -ENOBUFS; 841 } else if (np->cork.opt->tot_len < opt->tot_len) { 842 printk(KERN_DEBUG "ip6_append_data: invalid option length\n"); 843 return -EINVAL; 844 } 845 memcpy(np->cork.opt, opt, opt->tot_len); 846 inet->cork.flags |= IPCORK_OPT; 847 /* need source address above miyazawa*/ 848 } 849 dst_hold(&rt->u.dst); 850 np->cork.rt = rt; 851 inet->cork.fl = *fl; 852 np->cork.hop_limit = hlimit; 853 inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path); 854 if (dst_allfrag(rt->u.dst.path)) 855 inet->cork.flags |= IPCORK_ALLFRAG; 856 inet->cork.length = 0; 857 sk->sk_sndmsg_page = NULL; 858 sk->sk_sndmsg_off = 0; 859 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0); 860 length += exthdrlen; 861 transhdrlen += exthdrlen; 862 } else { 863 rt = np->cork.rt; 864 fl = &inet->cork.fl; 865 if (inet->cork.flags & IPCORK_OPT) 866 opt = np->cork.opt; 867 transhdrlen = 0; 868 exthdrlen = 0; 869 mtu = inet->cork.fragsize; 870 } 871 872 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); 873 874 fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0); 875 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); 876 877 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { 878 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { 879 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen); 880 return -EMSGSIZE; 881 } 882 } 883 884 /* 885 * Let's try using as much space as possible. 886 * Use MTU if total length of the message fits into the MTU. 887 * Otherwise, we need to reserve fragment header and 888 * fragment alignment (= 8-15 octects, in total). 889 * 890 * Note that we may need to "move" the data from the tail of 891 * of the buffer to the new fragment when we split 892 * the message. 893 * 894 * FIXME: It may be fragmented into multiple chunks 895 * at once if non-fragmentable extension headers 896 * are too large. 897 * --yoshfuji 898 */ 899 900 inet->cork.length += length; 901 902 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 903 goto alloc_new_skb; 904 905 while (length > 0) { 906 /* Check if the remaining data fits into current packet. */ 907 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 908 if (copy < length) 909 copy = maxfraglen - skb->len; 910 911 if (copy <= 0) { 912 char *data; 913 unsigned int datalen; 914 unsigned int fraglen; 915 unsigned int fraggap; 916 unsigned int alloclen; 917 struct sk_buff *skb_prev; 918 alloc_new_skb: 919 skb_prev = skb; 920 921 /* There's no room in the current skb */ 922 if (skb_prev) 923 fraggap = skb_prev->len - maxfraglen; 924 else 925 fraggap = 0; 926 927 /* 928 * If remaining data exceeds the mtu, 929 * we know we need more fragment(s). 930 */ 931 datalen = length + fraggap; 932 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 933 datalen = maxfraglen - fragheaderlen; 934 935 fraglen = datalen + fragheaderlen; 936 if ((flags & MSG_MORE) && 937 !(rt->u.dst.dev->features&NETIF_F_SG)) 938 alloclen = mtu; 939 else 940 alloclen = datalen + fragheaderlen; 941 942 /* 943 * The last fragment gets additional space at tail. 944 * Note: we overallocate on fragments with MSG_MODE 945 * because we have no idea if we're the last one. 946 */ 947 if (datalen == length + fraggap) 948 alloclen += rt->u.dst.trailer_len; 949 950 /* 951 * We just reserve space for fragment header. 952 * Note: this may be overallocation if the message 953 * (without MSG_MORE) fits into the MTU. 954 */ 955 alloclen += sizeof(struct frag_hdr); 956 957 if (transhdrlen) { 958 skb = sock_alloc_send_skb(sk, 959 alloclen + hh_len, 960 (flags & MSG_DONTWAIT), &err); 961 } else { 962 skb = NULL; 963 if (atomic_read(&sk->sk_wmem_alloc) <= 964 2 * sk->sk_sndbuf) 965 skb = sock_wmalloc(sk, 966 alloclen + hh_len, 1, 967 sk->sk_allocation); 968 if (unlikely(skb == NULL)) 969 err = -ENOBUFS; 970 } 971 if (skb == NULL) 972 goto error; 973 /* 974 * Fill in the control structures 975 */ 976 skb->ip_summed = csummode; 977 skb->csum = 0; 978 /* reserve for fragmentation */ 979 skb_reserve(skb, hh_len+sizeof(struct frag_hdr)); 980 981 /* 982 * Find where to start putting bytes 983 */ 984 data = skb_put(skb, fraglen); 985 skb->nh.raw = data + exthdrlen; 986 data += fragheaderlen; 987 skb->h.raw = data + exthdrlen; 988 989 if (fraggap) { 990 skb->csum = skb_copy_and_csum_bits( 991 skb_prev, maxfraglen, 992 data + transhdrlen, fraggap, 0); 993 skb_prev->csum = csum_sub(skb_prev->csum, 994 skb->csum); 995 data += fraggap; 996 skb_trim(skb_prev, maxfraglen); 997 } 998 copy = datalen - transhdrlen - fraggap; 999 if (copy < 0) { 1000 err = -EINVAL; 1001 kfree_skb(skb); 1002 goto error; 1003 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { 1004 err = -EFAULT; 1005 kfree_skb(skb); 1006 goto error; 1007 } 1008 1009 offset += copy; 1010 length -= datalen - fraggap; 1011 transhdrlen = 0; 1012 exthdrlen = 0; 1013 csummode = CHECKSUM_NONE; 1014 1015 /* 1016 * Put the packet on the pending queue 1017 */ 1018 __skb_queue_tail(&sk->sk_write_queue, skb); 1019 continue; 1020 } 1021 1022 if (copy > length) 1023 copy = length; 1024 1025 if (!(rt->u.dst.dev->features&NETIF_F_SG)) { 1026 unsigned int off; 1027 1028 off = skb->len; 1029 if (getfrag(from, skb_put(skb, copy), 1030 offset, copy, off, skb) < 0) { 1031 __skb_trim(skb, off); 1032 err = -EFAULT; 1033 goto error; 1034 } 1035 } else { 1036 int i = skb_shinfo(skb)->nr_frags; 1037 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; 1038 struct page *page = sk->sk_sndmsg_page; 1039 int off = sk->sk_sndmsg_off; 1040 unsigned int left; 1041 1042 if (page && (left = PAGE_SIZE - off) > 0) { 1043 if (copy >= left) 1044 copy = left; 1045 if (page != frag->page) { 1046 if (i == MAX_SKB_FRAGS) { 1047 err = -EMSGSIZE; 1048 goto error; 1049 } 1050 get_page(page); 1051 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); 1052 frag = &skb_shinfo(skb)->frags[i]; 1053 } 1054 } else if(i < MAX_SKB_FRAGS) { 1055 if (copy > PAGE_SIZE) 1056 copy = PAGE_SIZE; 1057 page = alloc_pages(sk->sk_allocation, 0); 1058 if (page == NULL) { 1059 err = -ENOMEM; 1060 goto error; 1061 } 1062 sk->sk_sndmsg_page = page; 1063 sk->sk_sndmsg_off = 0; 1064 1065 skb_fill_page_desc(skb, i, page, 0, 0); 1066 frag = &skb_shinfo(skb)->frags[i]; 1067 skb->truesize += PAGE_SIZE; 1068 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); 1069 } else { 1070 err = -EMSGSIZE; 1071 goto error; 1072 } 1073 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { 1074 err = -EFAULT; 1075 goto error; 1076 } 1077 sk->sk_sndmsg_off += copy; 1078 frag->size += copy; 1079 skb->len += copy; 1080 skb->data_len += copy; 1081 } 1082 offset += copy; 1083 length -= copy; 1084 } 1085 return 0; 1086 error: 1087 inet->cork.length -= length; 1088 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); 1089 return err; 1090 } 1091 1092 int ip6_push_pending_frames(struct sock *sk) 1093 { 1094 struct sk_buff *skb, *tmp_skb; 1095 struct sk_buff **tail_skb; 1096 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 1097 struct inet_sock *inet = inet_sk(sk); 1098 struct ipv6_pinfo *np = inet6_sk(sk); 1099 struct ipv6hdr *hdr; 1100 struct ipv6_txoptions *opt = np->cork.opt; 1101 struct rt6_info *rt = np->cork.rt; 1102 struct flowi *fl = &inet->cork.fl; 1103 unsigned char proto = fl->proto; 1104 int err = 0; 1105 1106 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) 1107 goto out; 1108 tail_skb = &(skb_shinfo(skb)->frag_list); 1109 1110 /* move skb->data to ip header from ext header */ 1111 if (skb->data < skb->nh.raw) 1112 __skb_pull(skb, skb->nh.raw - skb->data); 1113 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { 1114 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw); 1115 *tail_skb = tmp_skb; 1116 tail_skb = &(tmp_skb->next); 1117 skb->len += tmp_skb->len; 1118 skb->data_len += tmp_skb->len; 1119 #if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */ 1120 skb->truesize += tmp_skb->truesize; 1121 __sock_put(tmp_skb->sk); 1122 tmp_skb->destructor = NULL; 1123 tmp_skb->sk = NULL; 1124 #endif 1125 } 1126 1127 ipv6_addr_copy(final_dst, &fl->fl6_dst); 1128 __skb_pull(skb, skb->h.raw - skb->nh.raw); 1129 if (opt && opt->opt_flen) 1130 ipv6_push_frag_opts(skb, opt, &proto); 1131 if (opt && opt->opt_nflen) 1132 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); 1133 1134 skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr)); 1135 1136 *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000); 1137 1138 if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) 1139 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); 1140 else 1141 hdr->payload_len = 0; 1142 hdr->hop_limit = np->cork.hop_limit; 1143 hdr->nexthdr = proto; 1144 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); 1145 ipv6_addr_copy(&hdr->daddr, final_dst); 1146 1147 skb->dst = dst_clone(&rt->u.dst); 1148 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); 1149 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); 1150 if (err) { 1151 if (err > 0) 1152 err = inet->recverr ? net_xmit_errno(err) : 0; 1153 if (err) 1154 goto error; 1155 } 1156 1157 out: 1158 inet->cork.flags &= ~IPCORK_OPT; 1159 if (np->cork.opt) { 1160 kfree(np->cork.opt); 1161 np->cork.opt = NULL; 1162 } 1163 if (np->cork.rt) { 1164 dst_release(&np->cork.rt->u.dst); 1165 np->cork.rt = NULL; 1166 inet->cork.flags &= ~IPCORK_ALLFRAG; 1167 } 1168 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); 1169 return err; 1170 error: 1171 goto out; 1172 } 1173 1174 void ip6_flush_pending_frames(struct sock *sk) 1175 { 1176 struct inet_sock *inet = inet_sk(sk); 1177 struct ipv6_pinfo *np = inet6_sk(sk); 1178 struct sk_buff *skb; 1179 1180 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { 1181 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS); 1182 kfree_skb(skb); 1183 } 1184 1185 inet->cork.flags &= ~IPCORK_OPT; 1186 1187 if (np->cork.opt) { 1188 kfree(np->cork.opt); 1189 np->cork.opt = NULL; 1190 } 1191 if (np->cork.rt) { 1192 dst_release(&np->cork.rt->u.dst); 1193 np->cork.rt = NULL; 1194 inet->cork.flags &= ~IPCORK_ALLFRAG; 1195 } 1196 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); 1197 } 1198