1 /* 2 * IPv6 output functions 3 * Linux INET6 implementation 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * Based on linux/net/ipv4/ip_output.c 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 * 15 * Changes: 16 * A.N.Kuznetsov : airthmetics in fragmentation. 17 * extension headers are implemented. 18 * route changes now work. 19 * ip6_forward does not confuse sniffers. 20 * etc. 21 * 22 * H. von Brand : Added missing #include <linux/string.h> 23 * Imran Patel : frag id should be in NBO 24 * Kazunori MIYAZAWA @USAGI 25 * : add ip6_append_data and related functions 26 * for datagram xmit 27 */ 28 29 #include <linux/errno.h> 30 #include <linux/kernel.h> 31 #include <linux/string.h> 32 #include <linux/socket.h> 33 #include <linux/net.h> 34 #include <linux/netdevice.h> 35 #include <linux/if_arp.h> 36 #include <linux/in6.h> 37 #include <linux/tcp.h> 38 #include <linux/route.h> 39 #include <linux/module.h> 40 #include <linux/slab.h> 41 42 #include <linux/netfilter.h> 43 #include <linux/netfilter_ipv6.h> 44 45 #include <net/sock.h> 46 #include <net/snmp.h> 47 48 #include <net/ipv6.h> 49 #include <net/ndisc.h> 50 #include <net/protocol.h> 51 #include <net/ip6_route.h> 52 #include <net/addrconf.h> 53 #include <net/rawv6.h> 54 #include <net/icmp.h> 55 #include <net/xfrm.h> 56 #include <net/checksum.h> 57 #include <linux/mroute6.h> 58 59 static int ip6_finish_output2(struct sk_buff *skb) 60 { 61 struct dst_entry *dst = skb_dst(skb); 62 struct net_device *dev = dst->dev; 63 struct neighbour *neigh; 64 struct in6_addr *nexthop; 65 int ret; 66 67 skb->protocol = htons(ETH_P_IPV6); 68 skb->dev = dev; 69 70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 72 73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) && 74 ((mroute6_socket(dev_net(dev), skb) && 75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 77 &ipv6_hdr(skb)->saddr))) { 78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 79 80 /* Do not check for IFF_ALLMULTI; multicast routing 81 is not supported in any case. 82 */ 83 if (newskb) 84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 85 newskb, NULL, newskb->dev, 86 dev_loopback_xmit); 87 88 if (ipv6_hdr(skb)->hop_limit == 0) { 89 IP6_INC_STATS(dev_net(dev), idev, 90 IPSTATS_MIB_OUTDISCARDS); 91 kfree_skb(skb); 92 return 0; 93 } 94 } 95 96 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, 97 skb->len); 98 99 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= 100 IPV6_ADDR_SCOPE_NODELOCAL && 101 !(dev->flags & IFF_LOOPBACK)) { 102 kfree_skb(skb); 103 return 0; 104 } 105 } 106 107 rcu_read_lock_bh(); 108 nexthop = rt6_nexthop((struct rt6_info *)dst); 109 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); 110 if (unlikely(!neigh)) 111 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); 112 if (!IS_ERR(neigh)) { 113 ret = dst_neigh_output(dst, neigh, skb); 114 rcu_read_unlock_bh(); 115 return ret; 116 } 117 rcu_read_unlock_bh(); 118 119 IP6_INC_STATS(dev_net(dst->dev), 120 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 121 kfree_skb(skb); 122 return -EINVAL; 123 } 124 125 static int ip6_finish_output(struct sk_buff *skb) 126 { 127 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || 128 dst_allfrag(skb_dst(skb)) || 129 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) 130 return ip6_fragment(skb, ip6_finish_output2); 131 else 132 return ip6_finish_output2(skb); 133 } 134 135 int ip6_output(struct sock *sk, struct sk_buff *skb) 136 { 137 struct net_device *dev = skb_dst(skb)->dev; 138 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 139 if (unlikely(idev->cnf.disable_ipv6)) { 140 IP6_INC_STATS(dev_net(dev), idev, 141 IPSTATS_MIB_OUTDISCARDS); 142 kfree_skb(skb); 143 return 0; 144 } 145 146 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev, 147 ip6_finish_output, 148 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 149 } 150 151 /* 152 * xmit an sk_buff (used by TCP, SCTP and DCCP) 153 */ 154 155 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, 156 struct ipv6_txoptions *opt, int tclass) 157 { 158 struct net *net = sock_net(sk); 159 struct ipv6_pinfo *np = inet6_sk(sk); 160 struct in6_addr *first_hop = &fl6->daddr; 161 struct dst_entry *dst = skb_dst(skb); 162 struct ipv6hdr *hdr; 163 u8 proto = fl6->flowi6_proto; 164 int seg_len = skb->len; 165 int hlimit = -1; 166 u32 mtu; 167 168 if (opt) { 169 unsigned int head_room; 170 171 /* First: exthdrs may take lots of space (~8K for now) 172 MAX_HEADER is not enough. 173 */ 174 head_room = opt->opt_nflen + opt->opt_flen; 175 seg_len += head_room; 176 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); 177 178 if (skb_headroom(skb) < head_room) { 179 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 180 if (!skb2) { 181 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 182 IPSTATS_MIB_OUTDISCARDS); 183 kfree_skb(skb); 184 return -ENOBUFS; 185 } 186 consume_skb(skb); 187 skb = skb2; 188 skb_set_owner_w(skb, sk); 189 } 190 if (opt->opt_flen) 191 ipv6_push_frag_opts(skb, opt, &proto); 192 if (opt->opt_nflen) 193 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); 194 } 195 196 skb_push(skb, sizeof(struct ipv6hdr)); 197 skb_reset_network_header(skb); 198 hdr = ipv6_hdr(skb); 199 200 /* 201 * Fill in the IPv6 header 202 */ 203 if (np) 204 hlimit = np->hop_limit; 205 if (hlimit < 0) 206 hlimit = ip6_dst_hoplimit(dst); 207 208 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, 209 np->autoflowlabel)); 210 211 hdr->payload_len = htons(seg_len); 212 hdr->nexthdr = proto; 213 hdr->hop_limit = hlimit; 214 215 hdr->saddr = fl6->saddr; 216 hdr->daddr = *first_hop; 217 218 skb->protocol = htons(ETH_P_IPV6); 219 skb->priority = sk->sk_priority; 220 skb->mark = sk->sk_mark; 221 222 mtu = dst_mtu(dst); 223 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { 224 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), 225 IPSTATS_MIB_OUT, skb->len); 226 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, 227 dst->dev, dst_output); 228 } 229 230 skb->dev = dst->dev; 231 ipv6_local_error(sk, EMSGSIZE, fl6, mtu); 232 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); 233 kfree_skb(skb); 234 return -EMSGSIZE; 235 } 236 EXPORT_SYMBOL(ip6_xmit); 237 238 static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 239 { 240 struct ip6_ra_chain *ra; 241 struct sock *last = NULL; 242 243 read_lock(&ip6_ra_lock); 244 for (ra = ip6_ra_chain; ra; ra = ra->next) { 245 struct sock *sk = ra->sk; 246 if (sk && ra->sel == sel && 247 (!sk->sk_bound_dev_if || 248 sk->sk_bound_dev_if == skb->dev->ifindex)) { 249 if (last) { 250 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 251 if (skb2) 252 rawv6_rcv(last, skb2); 253 } 254 last = sk; 255 } 256 } 257 258 if (last) { 259 rawv6_rcv(last, skb); 260 read_unlock(&ip6_ra_lock); 261 return 1; 262 } 263 read_unlock(&ip6_ra_lock); 264 return 0; 265 } 266 267 static int ip6_forward_proxy_check(struct sk_buff *skb) 268 { 269 struct ipv6hdr *hdr = ipv6_hdr(skb); 270 u8 nexthdr = hdr->nexthdr; 271 __be16 frag_off; 272 int offset; 273 274 if (ipv6_ext_hdr(nexthdr)) { 275 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); 276 if (offset < 0) 277 return 0; 278 } else 279 offset = sizeof(struct ipv6hdr); 280 281 if (nexthdr == IPPROTO_ICMPV6) { 282 struct icmp6hdr *icmp6; 283 284 if (!pskb_may_pull(skb, (skb_network_header(skb) + 285 offset + 1 - skb->data))) 286 return 0; 287 288 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 289 290 switch (icmp6->icmp6_type) { 291 case NDISC_ROUTER_SOLICITATION: 292 case NDISC_ROUTER_ADVERTISEMENT: 293 case NDISC_NEIGHBOUR_SOLICITATION: 294 case NDISC_NEIGHBOUR_ADVERTISEMENT: 295 case NDISC_REDIRECT: 296 /* For reaction involving unicast neighbor discovery 297 * message destined to the proxied address, pass it to 298 * input function. 299 */ 300 return 1; 301 default: 302 break; 303 } 304 } 305 306 /* 307 * The proxying router can't forward traffic sent to a link-local 308 * address, so signal the sender and discard the packet. This 309 * behavior is clarified by the MIPv6 specification. 310 */ 311 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 312 dst_link_failure(skb); 313 return -1; 314 } 315 316 return 0; 317 } 318 319 static inline int ip6_forward_finish(struct sk_buff *skb) 320 { 321 skb_sender_cpu_clear(skb); 322 return dst_output(skb); 323 } 324 325 static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) 326 { 327 unsigned int mtu; 328 struct inet6_dev *idev; 329 330 if (dst_metric_locked(dst, RTAX_MTU)) { 331 mtu = dst_metric_raw(dst, RTAX_MTU); 332 if (mtu) 333 return mtu; 334 } 335 336 mtu = IPV6_MIN_MTU; 337 rcu_read_lock(); 338 idev = __in6_dev_get(dst->dev); 339 if (idev) 340 mtu = idev->cnf.mtu6; 341 rcu_read_unlock(); 342 343 return mtu; 344 } 345 346 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) 347 { 348 if (skb->len <= mtu) 349 return false; 350 351 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ 352 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) 353 return true; 354 355 if (skb->ignore_df) 356 return false; 357 358 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) 359 return false; 360 361 return true; 362 } 363 364 int ip6_forward(struct sk_buff *skb) 365 { 366 struct dst_entry *dst = skb_dst(skb); 367 struct ipv6hdr *hdr = ipv6_hdr(skb); 368 struct inet6_skb_parm *opt = IP6CB(skb); 369 struct net *net = dev_net(dst->dev); 370 u32 mtu; 371 372 if (net->ipv6.devconf_all->forwarding == 0) 373 goto error; 374 375 if (skb->pkt_type != PACKET_HOST) 376 goto drop; 377 378 if (skb_warn_if_lro(skb)) 379 goto drop; 380 381 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 382 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), 383 IPSTATS_MIB_INDISCARDS); 384 goto drop; 385 } 386 387 skb_forward_csum(skb); 388 389 /* 390 * We DO NOT make any processing on 391 * RA packets, pushing them to user level AS IS 392 * without ane WARRANTY that application will be able 393 * to interpret them. The reason is that we 394 * cannot make anything clever here. 395 * 396 * We are not end-node, so that if packet contains 397 * AH/ESP, we cannot make anything. 398 * Defragmentation also would be mistake, RA packets 399 * cannot be fragmented, because there is no warranty 400 * that different fragments will go along one path. --ANK 401 */ 402 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { 403 if (ip6_call_ra_chain(skb, ntohs(opt->ra))) 404 return 0; 405 } 406 407 /* 408 * check and decrement ttl 409 */ 410 if (hdr->hop_limit <= 1) { 411 /* Force OUTPUT device used as source address */ 412 skb->dev = dst->dev; 413 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 414 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), 415 IPSTATS_MIB_INHDRERRORS); 416 417 kfree_skb(skb); 418 return -ETIMEDOUT; 419 } 420 421 /* XXX: idev->cnf.proxy_ndp? */ 422 if (net->ipv6.devconf_all->proxy_ndp && 423 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 424 int proxied = ip6_forward_proxy_check(skb); 425 if (proxied > 0) 426 return ip6_input(skb); 427 else if (proxied < 0) { 428 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), 429 IPSTATS_MIB_INDISCARDS); 430 goto drop; 431 } 432 } 433 434 if (!xfrm6_route_forward(skb)) { 435 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), 436 IPSTATS_MIB_INDISCARDS); 437 goto drop; 438 } 439 dst = skb_dst(skb); 440 441 /* IPv6 specs say nothing about it, but it is clear that we cannot 442 send redirects to source routed frames. 443 We don't send redirects to frames decapsulated from IPsec. 444 */ 445 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) { 446 struct in6_addr *target = NULL; 447 struct inet_peer *peer; 448 struct rt6_info *rt; 449 450 /* 451 * incoming and outgoing devices are the same 452 * send a redirect. 453 */ 454 455 rt = (struct rt6_info *) dst; 456 if (rt->rt6i_flags & RTF_GATEWAY) 457 target = &rt->rt6i_gateway; 458 else 459 target = &hdr->daddr; 460 461 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); 462 463 /* Limit redirects both by destination (here) 464 and by source (inside ndisc_send_redirect) 465 */ 466 if (inet_peer_xrlim_allow(peer, 1*HZ)) 467 ndisc_send_redirect(skb, target); 468 if (peer) 469 inet_putpeer(peer); 470 } else { 471 int addrtype = ipv6_addr_type(&hdr->saddr); 472 473 /* This check is security critical. */ 474 if (addrtype == IPV6_ADDR_ANY || 475 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 476 goto error; 477 if (addrtype & IPV6_ADDR_LINKLOCAL) { 478 icmpv6_send(skb, ICMPV6_DEST_UNREACH, 479 ICMPV6_NOT_NEIGHBOUR, 0); 480 goto error; 481 } 482 } 483 484 mtu = ip6_dst_mtu_forward(dst); 485 if (mtu < IPV6_MIN_MTU) 486 mtu = IPV6_MIN_MTU; 487 488 if (ip6_pkt_too_big(skb, mtu)) { 489 /* Again, force OUTPUT device used as source address */ 490 skb->dev = dst->dev; 491 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 492 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), 493 IPSTATS_MIB_INTOOBIGERRORS); 494 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), 495 IPSTATS_MIB_FRAGFAILS); 496 kfree_skb(skb); 497 return -EMSGSIZE; 498 } 499 500 if (skb_cow(skb, dst->dev->hard_header_len)) { 501 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), 502 IPSTATS_MIB_OUTDISCARDS); 503 goto drop; 504 } 505 506 hdr = ipv6_hdr(skb); 507 508 /* Mangling hops number delayed to point after skb COW */ 509 510 hdr->hop_limit--; 511 512 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 513 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); 514 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, 515 ip6_forward_finish); 516 517 error: 518 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); 519 drop: 520 kfree_skb(skb); 521 return -EINVAL; 522 } 523 524 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 525 { 526 to->pkt_type = from->pkt_type; 527 to->priority = from->priority; 528 to->protocol = from->protocol; 529 skb_dst_drop(to); 530 skb_dst_set(to, dst_clone(skb_dst(from))); 531 to->dev = from->dev; 532 to->mark = from->mark; 533 534 #ifdef CONFIG_NET_SCHED 535 to->tc_index = from->tc_index; 536 #endif 537 nf_copy(to, from); 538 skb_copy_secmark(to, from); 539 } 540 541 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 542 { 543 struct sk_buff *frag; 544 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 545 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; 546 struct ipv6hdr *tmp_hdr; 547 struct frag_hdr *fh; 548 unsigned int mtu, hlen, left, len; 549 int hroom, troom; 550 __be32 frag_id = 0; 551 int ptr, offset = 0, err = 0; 552 u8 *prevhdr, nexthdr = 0; 553 struct net *net = dev_net(skb_dst(skb)->dev); 554 555 hlen = ip6_find_1stfragopt(skb, &prevhdr); 556 nexthdr = *prevhdr; 557 558 mtu = ip6_skb_dst_mtu(skb); 559 560 /* We must not fragment if the socket is set to force MTU discovery 561 * or if the skb it not generated by a local socket. 562 */ 563 if (unlikely(!skb->ignore_df && skb->len > mtu) || 564 (IP6CB(skb)->frag_max_size && 565 IP6CB(skb)->frag_max_size > mtu)) { 566 if (skb->sk && dst_allfrag(skb_dst(skb))) 567 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); 568 569 skb->dev = skb_dst(skb)->dev; 570 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 571 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 572 IPSTATS_MIB_FRAGFAILS); 573 kfree_skb(skb); 574 return -EMSGSIZE; 575 } 576 577 if (np && np->frag_size < mtu) { 578 if (np->frag_size) 579 mtu = np->frag_size; 580 } 581 mtu -= hlen + sizeof(struct frag_hdr); 582 583 if (skb_has_frag_list(skb)) { 584 int first_len = skb_pagelen(skb); 585 struct sk_buff *frag2; 586 587 if (first_len - hlen > mtu || 588 ((first_len - hlen) & 7) || 589 skb_cloned(skb)) 590 goto slow_path; 591 592 skb_walk_frags(skb, frag) { 593 /* Correct geometry. */ 594 if (frag->len > mtu || 595 ((frag->len & 7) && frag->next) || 596 skb_headroom(frag) < hlen) 597 goto slow_path_clean; 598 599 /* Partially cloned skb? */ 600 if (skb_shared(frag)) 601 goto slow_path_clean; 602 603 BUG_ON(frag->sk); 604 if (skb->sk) { 605 frag->sk = skb->sk; 606 frag->destructor = sock_wfree; 607 } 608 skb->truesize -= frag->truesize; 609 } 610 611 err = 0; 612 offset = 0; 613 frag = skb_shinfo(skb)->frag_list; 614 skb_frag_list_init(skb); 615 /* BUILD HEADER */ 616 617 *prevhdr = NEXTHDR_FRAGMENT; 618 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 619 if (!tmp_hdr) { 620 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 621 IPSTATS_MIB_FRAGFAILS); 622 return -ENOMEM; 623 } 624 625 __skb_pull(skb, hlen); 626 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr)); 627 __skb_push(skb, hlen); 628 skb_reset_network_header(skb); 629 memcpy(skb_network_header(skb), tmp_hdr, hlen); 630 631 ipv6_select_ident(net, fh, rt); 632 fh->nexthdr = nexthdr; 633 fh->reserved = 0; 634 fh->frag_off = htons(IP6_MF); 635 frag_id = fh->identification; 636 637 first_len = skb_pagelen(skb); 638 skb->data_len = first_len - skb_headlen(skb); 639 skb->len = first_len; 640 ipv6_hdr(skb)->payload_len = htons(first_len - 641 sizeof(struct ipv6hdr)); 642 643 dst_hold(&rt->dst); 644 645 for (;;) { 646 /* Prepare header of the next frame, 647 * before previous one went down. */ 648 if (frag) { 649 frag->ip_summed = CHECKSUM_NONE; 650 skb_reset_transport_header(frag); 651 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr)); 652 __skb_push(frag, hlen); 653 skb_reset_network_header(frag); 654 memcpy(skb_network_header(frag), tmp_hdr, 655 hlen); 656 offset += skb->len - hlen - sizeof(struct frag_hdr); 657 fh->nexthdr = nexthdr; 658 fh->reserved = 0; 659 fh->frag_off = htons(offset); 660 if (frag->next != NULL) 661 fh->frag_off |= htons(IP6_MF); 662 fh->identification = frag_id; 663 ipv6_hdr(frag)->payload_len = 664 htons(frag->len - 665 sizeof(struct ipv6hdr)); 666 ip6_copy_metadata(frag, skb); 667 } 668 669 err = output(skb); 670 if (!err) 671 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 672 IPSTATS_MIB_FRAGCREATES); 673 674 if (err || !frag) 675 break; 676 677 skb = frag; 678 frag = skb->next; 679 skb->next = NULL; 680 } 681 682 kfree(tmp_hdr); 683 684 if (err == 0) { 685 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 686 IPSTATS_MIB_FRAGOKS); 687 ip6_rt_put(rt); 688 return 0; 689 } 690 691 kfree_skb_list(frag); 692 693 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 694 IPSTATS_MIB_FRAGFAILS); 695 ip6_rt_put(rt); 696 return err; 697 698 slow_path_clean: 699 skb_walk_frags(skb, frag2) { 700 if (frag2 == frag) 701 break; 702 frag2->sk = NULL; 703 frag2->destructor = NULL; 704 skb->truesize += frag2->truesize; 705 } 706 } 707 708 slow_path: 709 if ((skb->ip_summed == CHECKSUM_PARTIAL) && 710 skb_checksum_help(skb)) 711 goto fail; 712 713 left = skb->len - hlen; /* Space per frame */ 714 ptr = hlen; /* Where to start from */ 715 716 /* 717 * Fragment the datagram. 718 */ 719 720 *prevhdr = NEXTHDR_FRAGMENT; 721 hroom = LL_RESERVED_SPACE(rt->dst.dev); 722 troom = rt->dst.dev->needed_tailroom; 723 724 /* 725 * Keep copying data until we run out. 726 */ 727 while (left > 0) { 728 len = left; 729 /* IF: it doesn't fit, use 'mtu' - the data space left */ 730 if (len > mtu) 731 len = mtu; 732 /* IF: we are not sending up to and including the packet end 733 then align the next start on an eight byte boundary */ 734 if (len < left) { 735 len &= ~7; 736 } 737 738 /* Allocate buffer */ 739 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + 740 hroom + troom, GFP_ATOMIC); 741 if (!frag) { 742 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 743 IPSTATS_MIB_FRAGFAILS); 744 err = -ENOMEM; 745 goto fail; 746 } 747 748 /* 749 * Set up data on packet 750 */ 751 752 ip6_copy_metadata(frag, skb); 753 skb_reserve(frag, hroom); 754 skb_put(frag, len + hlen + sizeof(struct frag_hdr)); 755 skb_reset_network_header(frag); 756 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); 757 frag->transport_header = (frag->network_header + hlen + 758 sizeof(struct frag_hdr)); 759 760 /* 761 * Charge the memory for the fragment to any owner 762 * it might possess 763 */ 764 if (skb->sk) 765 skb_set_owner_w(frag, skb->sk); 766 767 /* 768 * Copy the packet header into the new buffer. 769 */ 770 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); 771 772 /* 773 * Build fragment header. 774 */ 775 fh->nexthdr = nexthdr; 776 fh->reserved = 0; 777 if (!frag_id) { 778 ipv6_select_ident(net, fh, rt); 779 frag_id = fh->identification; 780 } else 781 fh->identification = frag_id; 782 783 /* 784 * Copy a block of the IP datagram. 785 */ 786 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag), 787 len)); 788 left -= len; 789 790 fh->frag_off = htons(offset); 791 if (left > 0) 792 fh->frag_off |= htons(IP6_MF); 793 ipv6_hdr(frag)->payload_len = htons(frag->len - 794 sizeof(struct ipv6hdr)); 795 796 ptr += len; 797 offset += len; 798 799 /* 800 * Put this fragment into the sending queue. 801 */ 802 err = output(frag); 803 if (err) 804 goto fail; 805 806 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 807 IPSTATS_MIB_FRAGCREATES); 808 } 809 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 810 IPSTATS_MIB_FRAGOKS); 811 consume_skb(skb); 812 return err; 813 814 fail: 815 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 816 IPSTATS_MIB_FRAGFAILS); 817 kfree_skb(skb); 818 return err; 819 } 820 821 static inline int ip6_rt_check(const struct rt6key *rt_key, 822 const struct in6_addr *fl_addr, 823 const struct in6_addr *addr_cache) 824 { 825 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 826 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache)); 827 } 828 829 static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 830 struct dst_entry *dst, 831 const struct flowi6 *fl6) 832 { 833 struct ipv6_pinfo *np = inet6_sk(sk); 834 struct rt6_info *rt; 835 836 if (!dst) 837 goto out; 838 839 if (dst->ops->family != AF_INET6) { 840 dst_release(dst); 841 return NULL; 842 } 843 844 rt = (struct rt6_info *)dst; 845 /* Yes, checking route validity in not connected 846 * case is not very simple. Take into account, 847 * that we do not support routing by source, TOS, 848 * and MSG_DONTROUTE --ANK (980726) 849 * 850 * 1. ip6_rt_check(): If route was host route, 851 * check that cached destination is current. 852 * If it is network route, we still may 853 * check its validity using saved pointer 854 * to the last used address: daddr_cache. 855 * We do not want to save whole address now, 856 * (because main consumer of this service 857 * is tcp, which has not this problem), 858 * so that the last trick works only on connected 859 * sockets. 860 * 2. oif also should be the same. 861 */ 862 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || 863 #ifdef CONFIG_IPV6_SUBTREES 864 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || 865 #endif 866 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { 867 dst_release(dst); 868 dst = NULL; 869 } 870 871 out: 872 return dst; 873 } 874 875 static int ip6_dst_lookup_tail(struct sock *sk, 876 struct dst_entry **dst, struct flowi6 *fl6) 877 { 878 struct net *net = sock_net(sk); 879 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 880 struct neighbour *n; 881 struct rt6_info *rt; 882 #endif 883 int err; 884 885 if (!*dst) 886 *dst = ip6_route_output(net, sk, fl6); 887 888 err = (*dst)->error; 889 if (err) 890 goto out_err_release; 891 892 if (ipv6_addr_any(&fl6->saddr)) { 893 struct rt6_info *rt = (struct rt6_info *) *dst; 894 err = ip6_route_get_saddr(net, rt, &fl6->daddr, 895 sk ? inet6_sk(sk)->srcprefs : 0, 896 &fl6->saddr); 897 if (err) 898 goto out_err_release; 899 } 900 901 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 902 /* 903 * Here if the dst entry we've looked up 904 * has a neighbour entry that is in the INCOMPLETE 905 * state and the src address from the flow is 906 * marked as OPTIMISTIC, we release the found 907 * dst entry and replace it instead with the 908 * dst entry of the nexthop router 909 */ 910 rt = (struct rt6_info *) *dst; 911 rcu_read_lock_bh(); 912 n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt)); 913 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; 914 rcu_read_unlock_bh(); 915 916 if (err) { 917 struct inet6_ifaddr *ifp; 918 struct flowi6 fl_gw6; 919 int redirect; 920 921 ifp = ipv6_get_ifaddr(net, &fl6->saddr, 922 (*dst)->dev, 1); 923 924 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 925 if (ifp) 926 in6_ifa_put(ifp); 927 928 if (redirect) { 929 /* 930 * We need to get the dst entry for the 931 * default router instead 932 */ 933 dst_release(*dst); 934 memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); 935 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); 936 *dst = ip6_route_output(net, sk, &fl_gw6); 937 err = (*dst)->error; 938 if (err) 939 goto out_err_release; 940 } 941 } 942 #endif 943 944 return 0; 945 946 out_err_release: 947 if (err == -ENETUNREACH) 948 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); 949 dst_release(*dst); 950 *dst = NULL; 951 return err; 952 } 953 954 /** 955 * ip6_dst_lookup - perform route lookup on flow 956 * @sk: socket which provides route info 957 * @dst: pointer to dst_entry * for result 958 * @fl6: flow to lookup 959 * 960 * This function performs a route lookup on the given flow. 961 * 962 * It returns zero on success, or a standard errno code on error. 963 */ 964 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) 965 { 966 *dst = NULL; 967 return ip6_dst_lookup_tail(sk, dst, fl6); 968 } 969 EXPORT_SYMBOL_GPL(ip6_dst_lookup); 970 971 /** 972 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec 973 * @sk: socket which provides route info 974 * @fl6: flow to lookup 975 * @final_dst: final destination address for ipsec lookup 976 * 977 * This function performs a route lookup on the given flow. 978 * 979 * It returns a valid dst pointer on success, or a pointer encoded 980 * error code. 981 */ 982 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 983 const struct in6_addr *final_dst) 984 { 985 struct dst_entry *dst = NULL; 986 int err; 987 988 err = ip6_dst_lookup_tail(sk, &dst, fl6); 989 if (err) 990 return ERR_PTR(err); 991 if (final_dst) 992 fl6->daddr = *final_dst; 993 994 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); 995 } 996 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); 997 998 /** 999 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow 1000 * @sk: socket which provides the dst cache and route info 1001 * @fl6: flow to lookup 1002 * @final_dst: final destination address for ipsec lookup 1003 * 1004 * This function performs a route lookup on the given flow with the 1005 * possibility of using the cached route in the socket if it is valid. 1006 * It will take the socket dst lock when operating on the dst cache. 1007 * As a result, this function can only be used in process context. 1008 * 1009 * It returns a valid dst pointer on success, or a pointer encoded 1010 * error code. 1011 */ 1012 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 1013 const struct in6_addr *final_dst) 1014 { 1015 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1016 int err; 1017 1018 dst = ip6_sk_dst_check(sk, dst, fl6); 1019 1020 err = ip6_dst_lookup_tail(sk, &dst, fl6); 1021 if (err) 1022 return ERR_PTR(err); 1023 if (final_dst) 1024 fl6->daddr = *final_dst; 1025 1026 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); 1027 } 1028 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); 1029 1030 static inline int ip6_ufo_append_data(struct sock *sk, 1031 struct sk_buff_head *queue, 1032 int getfrag(void *from, char *to, int offset, int len, 1033 int odd, struct sk_buff *skb), 1034 void *from, int length, int hh_len, int fragheaderlen, 1035 int transhdrlen, int mtu, unsigned int flags, 1036 struct rt6_info *rt) 1037 1038 { 1039 struct sk_buff *skb; 1040 struct frag_hdr fhdr; 1041 int err; 1042 1043 /* There is support for UDP large send offload by network 1044 * device, so create one single skb packet containing complete 1045 * udp datagram 1046 */ 1047 skb = skb_peek_tail(queue); 1048 if (!skb) { 1049 skb = sock_alloc_send_skb(sk, 1050 hh_len + fragheaderlen + transhdrlen + 20, 1051 (flags & MSG_DONTWAIT), &err); 1052 if (!skb) 1053 return err; 1054 1055 /* reserve space for Hardware header */ 1056 skb_reserve(skb, hh_len); 1057 1058 /* create space for UDP/IP header */ 1059 skb_put(skb, fragheaderlen + transhdrlen); 1060 1061 /* initialize network header pointer */ 1062 skb_reset_network_header(skb); 1063 1064 /* initialize protocol header pointer */ 1065 skb->transport_header = skb->network_header + fragheaderlen; 1066 1067 skb->protocol = htons(ETH_P_IPV6); 1068 skb->csum = 0; 1069 1070 __skb_queue_tail(queue, skb); 1071 } else if (skb_is_gso(skb)) { 1072 goto append; 1073 } 1074 1075 skb->ip_summed = CHECKSUM_PARTIAL; 1076 /* Specify the length of each IPv6 datagram fragment. 1077 * It has to be a multiple of 8. 1078 */ 1079 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - 1080 sizeof(struct frag_hdr)) & ~7; 1081 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 1082 ipv6_select_ident(sock_net(sk), &fhdr, rt); 1083 skb_shinfo(skb)->ip6_frag_id = fhdr.identification; 1084 1085 append: 1086 return skb_append_datato_frags(sk, skb, getfrag, from, 1087 (length - transhdrlen)); 1088 } 1089 1090 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1091 gfp_t gfp) 1092 { 1093 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1094 } 1095 1096 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 1097 gfp_t gfp) 1098 { 1099 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1100 } 1101 1102 static void ip6_append_data_mtu(unsigned int *mtu, 1103 int *maxfraglen, 1104 unsigned int fragheaderlen, 1105 struct sk_buff *skb, 1106 struct rt6_info *rt, 1107 unsigned int orig_mtu) 1108 { 1109 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { 1110 if (!skb) { 1111 /* first fragment, reserve header_len */ 1112 *mtu = orig_mtu - rt->dst.header_len; 1113 1114 } else { 1115 /* 1116 * this fragment is not first, the headers 1117 * space is regarded as data space. 1118 */ 1119 *mtu = orig_mtu; 1120 } 1121 *maxfraglen = ((*mtu - fragheaderlen) & ~7) 1122 + fragheaderlen - sizeof(struct frag_hdr); 1123 } 1124 } 1125 1126 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, 1127 struct inet6_cork *v6_cork, 1128 int hlimit, int tclass, struct ipv6_txoptions *opt, 1129 struct rt6_info *rt, struct flowi6 *fl6) 1130 { 1131 struct ipv6_pinfo *np = inet6_sk(sk); 1132 unsigned int mtu; 1133 1134 /* 1135 * setup for corking 1136 */ 1137 if (opt) { 1138 if (WARN_ON(v6_cork->opt)) 1139 return -EINVAL; 1140 1141 v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation); 1142 if (unlikely(!v6_cork->opt)) 1143 return -ENOBUFS; 1144 1145 v6_cork->opt->tot_len = opt->tot_len; 1146 v6_cork->opt->opt_flen = opt->opt_flen; 1147 v6_cork->opt->opt_nflen = opt->opt_nflen; 1148 1149 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt, 1150 sk->sk_allocation); 1151 if (opt->dst0opt && !v6_cork->opt->dst0opt) 1152 return -ENOBUFS; 1153 1154 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt, 1155 sk->sk_allocation); 1156 if (opt->dst1opt && !v6_cork->opt->dst1opt) 1157 return -ENOBUFS; 1158 1159 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt, 1160 sk->sk_allocation); 1161 if (opt->hopopt && !v6_cork->opt->hopopt) 1162 return -ENOBUFS; 1163 1164 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt, 1165 sk->sk_allocation); 1166 if (opt->srcrt && !v6_cork->opt->srcrt) 1167 return -ENOBUFS; 1168 1169 /* need source address above miyazawa*/ 1170 } 1171 dst_hold(&rt->dst); 1172 cork->base.dst = &rt->dst; 1173 cork->fl.u.ip6 = *fl6; 1174 v6_cork->hop_limit = hlimit; 1175 v6_cork->tclass = tclass; 1176 if (rt->dst.flags & DST_XFRM_TUNNEL) 1177 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1178 rt->dst.dev->mtu : dst_mtu(&rt->dst); 1179 else 1180 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1181 rt->dst.dev->mtu : dst_mtu(rt->dst.path); 1182 if (np->frag_size < mtu) { 1183 if (np->frag_size) 1184 mtu = np->frag_size; 1185 } 1186 cork->base.fragsize = mtu; 1187 if (dst_allfrag(rt->dst.path)) 1188 cork->base.flags |= IPCORK_ALLFRAG; 1189 cork->base.length = 0; 1190 1191 return 0; 1192 } 1193 1194 static int __ip6_append_data(struct sock *sk, 1195 struct flowi6 *fl6, 1196 struct sk_buff_head *queue, 1197 struct inet_cork *cork, 1198 struct inet6_cork *v6_cork, 1199 struct page_frag *pfrag, 1200 int getfrag(void *from, char *to, int offset, 1201 int len, int odd, struct sk_buff *skb), 1202 void *from, int length, int transhdrlen, 1203 unsigned int flags, int dontfrag) 1204 { 1205 struct sk_buff *skb, *skb_prev = NULL; 1206 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu; 1207 int exthdrlen = 0; 1208 int dst_exthdrlen = 0; 1209 int hh_len; 1210 int copy; 1211 int err; 1212 int offset = 0; 1213 __u8 tx_flags = 0; 1214 u32 tskey = 0; 1215 struct rt6_info *rt = (struct rt6_info *)cork->dst; 1216 struct ipv6_txoptions *opt = v6_cork->opt; 1217 int csummode = CHECKSUM_NONE; 1218 1219 skb = skb_peek_tail(queue); 1220 if (!skb) { 1221 exthdrlen = opt ? opt->opt_flen : 0; 1222 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; 1223 } 1224 1225 mtu = cork->fragsize; 1226 orig_mtu = mtu; 1227 1228 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 1229 1230 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1231 (opt ? opt->opt_nflen : 0); 1232 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - 1233 sizeof(struct frag_hdr); 1234 1235 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { 1236 unsigned int maxnonfragsize, headersize; 1237 1238 headersize = sizeof(struct ipv6hdr) + 1239 (opt ? opt->opt_flen + opt->opt_nflen : 0) + 1240 (dst_allfrag(&rt->dst) ? 1241 sizeof(struct frag_hdr) : 0) + 1242 rt->rt6i_nfheader_len; 1243 1244 if (ip6_sk_ignore_df(sk)) 1245 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; 1246 else 1247 maxnonfragsize = mtu; 1248 1249 /* dontfrag active */ 1250 if ((cork->length + length > mtu - headersize) && dontfrag && 1251 (sk->sk_protocol == IPPROTO_UDP || 1252 sk->sk_protocol == IPPROTO_RAW)) { 1253 ipv6_local_rxpmtu(sk, fl6, mtu - headersize + 1254 sizeof(struct ipv6hdr)); 1255 goto emsgsize; 1256 } 1257 1258 if (cork->length + length > maxnonfragsize - headersize) { 1259 emsgsize: 1260 ipv6_local_error(sk, EMSGSIZE, fl6, 1261 mtu - headersize + 1262 sizeof(struct ipv6hdr)); 1263 return -EMSGSIZE; 1264 } 1265 } 1266 1267 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) { 1268 sock_tx_timestamp(sk, &tx_flags); 1269 if (tx_flags & SKBTX_ANY_SW_TSTAMP && 1270 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) 1271 tskey = sk->sk_tskey++; 1272 } 1273 1274 /* If this is the first and only packet and device 1275 * supports checksum offloading, let's use it. 1276 */ 1277 if (!skb && sk->sk_protocol == IPPROTO_UDP && 1278 length + fragheaderlen < mtu && 1279 rt->dst.dev->features & NETIF_F_V6_CSUM && 1280 !exthdrlen) 1281 csummode = CHECKSUM_PARTIAL; 1282 /* 1283 * Let's try using as much space as possible. 1284 * Use MTU if total length of the message fits into the MTU. 1285 * Otherwise, we need to reserve fragment header and 1286 * fragment alignment (= 8-15 octects, in total). 1287 * 1288 * Note that we may need to "move" the data from the tail of 1289 * of the buffer to the new fragment when we split 1290 * the message. 1291 * 1292 * FIXME: It may be fragmented into multiple chunks 1293 * at once if non-fragmentable extension headers 1294 * are too large. 1295 * --yoshfuji 1296 */ 1297 1298 cork->length += length; 1299 if (((length > mtu) || 1300 (skb && skb_is_gso(skb))) && 1301 (sk->sk_protocol == IPPROTO_UDP) && 1302 (rt->dst.dev->features & NETIF_F_UFO) && 1303 (sk->sk_type == SOCK_DGRAM)) { 1304 err = ip6_ufo_append_data(sk, queue, getfrag, from, length, 1305 hh_len, fragheaderlen, 1306 transhdrlen, mtu, flags, rt); 1307 if (err) 1308 goto error; 1309 return 0; 1310 } 1311 1312 if (!skb) 1313 goto alloc_new_skb; 1314 1315 while (length > 0) { 1316 /* Check if the remaining data fits into current packet. */ 1317 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 1318 if (copy < length) 1319 copy = maxfraglen - skb->len; 1320 1321 if (copy <= 0) { 1322 char *data; 1323 unsigned int datalen; 1324 unsigned int fraglen; 1325 unsigned int fraggap; 1326 unsigned int alloclen; 1327 alloc_new_skb: 1328 /* There's no room in the current skb */ 1329 if (skb) 1330 fraggap = skb->len - maxfraglen; 1331 else 1332 fraggap = 0; 1333 /* update mtu and maxfraglen if necessary */ 1334 if (!skb || !skb_prev) 1335 ip6_append_data_mtu(&mtu, &maxfraglen, 1336 fragheaderlen, skb, rt, 1337 orig_mtu); 1338 1339 skb_prev = skb; 1340 1341 /* 1342 * If remaining data exceeds the mtu, 1343 * we know we need more fragment(s). 1344 */ 1345 datalen = length + fraggap; 1346 1347 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1348 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; 1349 if ((flags & MSG_MORE) && 1350 !(rt->dst.dev->features&NETIF_F_SG)) 1351 alloclen = mtu; 1352 else 1353 alloclen = datalen + fragheaderlen; 1354 1355 alloclen += dst_exthdrlen; 1356 1357 if (datalen != length + fraggap) { 1358 /* 1359 * this is not the last fragment, the trailer 1360 * space is regarded as data space. 1361 */ 1362 datalen += rt->dst.trailer_len; 1363 } 1364 1365 alloclen += rt->dst.trailer_len; 1366 fraglen = datalen + fragheaderlen; 1367 1368 /* 1369 * We just reserve space for fragment header. 1370 * Note: this may be overallocation if the message 1371 * (without MSG_MORE) fits into the MTU. 1372 */ 1373 alloclen += sizeof(struct frag_hdr); 1374 1375 if (transhdrlen) { 1376 skb = sock_alloc_send_skb(sk, 1377 alloclen + hh_len, 1378 (flags & MSG_DONTWAIT), &err); 1379 } else { 1380 skb = NULL; 1381 if (atomic_read(&sk->sk_wmem_alloc) <= 1382 2 * sk->sk_sndbuf) 1383 skb = sock_wmalloc(sk, 1384 alloclen + hh_len, 1, 1385 sk->sk_allocation); 1386 if (unlikely(!skb)) 1387 err = -ENOBUFS; 1388 } 1389 if (!skb) 1390 goto error; 1391 /* 1392 * Fill in the control structures 1393 */ 1394 skb->protocol = htons(ETH_P_IPV6); 1395 skb->ip_summed = csummode; 1396 skb->csum = 0; 1397 /* reserve for fragmentation and ipsec header */ 1398 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + 1399 dst_exthdrlen); 1400 1401 /* Only the initial fragment is time stamped */ 1402 skb_shinfo(skb)->tx_flags = tx_flags; 1403 tx_flags = 0; 1404 skb_shinfo(skb)->tskey = tskey; 1405 tskey = 0; 1406 1407 /* 1408 * Find where to start putting bytes 1409 */ 1410 data = skb_put(skb, fraglen); 1411 skb_set_network_header(skb, exthdrlen); 1412 data += fragheaderlen; 1413 skb->transport_header = (skb->network_header + 1414 fragheaderlen); 1415 if (fraggap) { 1416 skb->csum = skb_copy_and_csum_bits( 1417 skb_prev, maxfraglen, 1418 data + transhdrlen, fraggap, 0); 1419 skb_prev->csum = csum_sub(skb_prev->csum, 1420 skb->csum); 1421 data += fraggap; 1422 pskb_trim_unique(skb_prev, maxfraglen); 1423 } 1424 copy = datalen - transhdrlen - fraggap; 1425 1426 if (copy < 0) { 1427 err = -EINVAL; 1428 kfree_skb(skb); 1429 goto error; 1430 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { 1431 err = -EFAULT; 1432 kfree_skb(skb); 1433 goto error; 1434 } 1435 1436 offset += copy; 1437 length -= datalen - fraggap; 1438 transhdrlen = 0; 1439 exthdrlen = 0; 1440 dst_exthdrlen = 0; 1441 1442 /* 1443 * Put the packet on the pending queue 1444 */ 1445 __skb_queue_tail(queue, skb); 1446 continue; 1447 } 1448 1449 if (copy > length) 1450 copy = length; 1451 1452 if (!(rt->dst.dev->features&NETIF_F_SG)) { 1453 unsigned int off; 1454 1455 off = skb->len; 1456 if (getfrag(from, skb_put(skb, copy), 1457 offset, copy, off, skb) < 0) { 1458 __skb_trim(skb, off); 1459 err = -EFAULT; 1460 goto error; 1461 } 1462 } else { 1463 int i = skb_shinfo(skb)->nr_frags; 1464 1465 err = -ENOMEM; 1466 if (!sk_page_frag_refill(sk, pfrag)) 1467 goto error; 1468 1469 if (!skb_can_coalesce(skb, i, pfrag->page, 1470 pfrag->offset)) { 1471 err = -EMSGSIZE; 1472 if (i == MAX_SKB_FRAGS) 1473 goto error; 1474 1475 __skb_fill_page_desc(skb, i, pfrag->page, 1476 pfrag->offset, 0); 1477 skb_shinfo(skb)->nr_frags = ++i; 1478 get_page(pfrag->page); 1479 } 1480 copy = min_t(int, copy, pfrag->size - pfrag->offset); 1481 if (getfrag(from, 1482 page_address(pfrag->page) + pfrag->offset, 1483 offset, copy, skb->len, skb) < 0) 1484 goto error_efault; 1485 1486 pfrag->offset += copy; 1487 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); 1488 skb->len += copy; 1489 skb->data_len += copy; 1490 skb->truesize += copy; 1491 atomic_add(copy, &sk->sk_wmem_alloc); 1492 } 1493 offset += copy; 1494 length -= copy; 1495 } 1496 1497 return 0; 1498 1499 error_efault: 1500 err = -EFAULT; 1501 error: 1502 cork->length -= length; 1503 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1504 return err; 1505 } 1506 1507 int ip6_append_data(struct sock *sk, 1508 int getfrag(void *from, char *to, int offset, int len, 1509 int odd, struct sk_buff *skb), 1510 void *from, int length, int transhdrlen, int hlimit, 1511 int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, 1512 struct rt6_info *rt, unsigned int flags, int dontfrag) 1513 { 1514 struct inet_sock *inet = inet_sk(sk); 1515 struct ipv6_pinfo *np = inet6_sk(sk); 1516 int exthdrlen; 1517 int err; 1518 1519 if (flags&MSG_PROBE) 1520 return 0; 1521 if (skb_queue_empty(&sk->sk_write_queue)) { 1522 /* 1523 * setup for corking 1524 */ 1525 err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit, 1526 tclass, opt, rt, fl6); 1527 if (err) 1528 return err; 1529 1530 exthdrlen = (opt ? opt->opt_flen : 0); 1531 length += exthdrlen; 1532 transhdrlen += exthdrlen; 1533 } else { 1534 fl6 = &inet->cork.fl.u.ip6; 1535 transhdrlen = 0; 1536 } 1537 1538 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base, 1539 &np->cork, sk_page_frag(sk), getfrag, 1540 from, length, transhdrlen, flags, dontfrag); 1541 } 1542 EXPORT_SYMBOL_GPL(ip6_append_data); 1543 1544 static void ip6_cork_release(struct inet_cork_full *cork, 1545 struct inet6_cork *v6_cork) 1546 { 1547 if (v6_cork->opt) { 1548 kfree(v6_cork->opt->dst0opt); 1549 kfree(v6_cork->opt->dst1opt); 1550 kfree(v6_cork->opt->hopopt); 1551 kfree(v6_cork->opt->srcrt); 1552 kfree(v6_cork->opt); 1553 v6_cork->opt = NULL; 1554 } 1555 1556 if (cork->base.dst) { 1557 dst_release(cork->base.dst); 1558 cork->base.dst = NULL; 1559 cork->base.flags &= ~IPCORK_ALLFRAG; 1560 } 1561 memset(&cork->fl, 0, sizeof(cork->fl)); 1562 } 1563 1564 struct sk_buff *__ip6_make_skb(struct sock *sk, 1565 struct sk_buff_head *queue, 1566 struct inet_cork_full *cork, 1567 struct inet6_cork *v6_cork) 1568 { 1569 struct sk_buff *skb, *tmp_skb; 1570 struct sk_buff **tail_skb; 1571 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 1572 struct ipv6_pinfo *np = inet6_sk(sk); 1573 struct net *net = sock_net(sk); 1574 struct ipv6hdr *hdr; 1575 struct ipv6_txoptions *opt = v6_cork->opt; 1576 struct rt6_info *rt = (struct rt6_info *)cork->base.dst; 1577 struct flowi6 *fl6 = &cork->fl.u.ip6; 1578 unsigned char proto = fl6->flowi6_proto; 1579 1580 skb = __skb_dequeue(queue); 1581 if (!skb) 1582 goto out; 1583 tail_skb = &(skb_shinfo(skb)->frag_list); 1584 1585 /* move skb->data to ip header from ext header */ 1586 if (skb->data < skb_network_header(skb)) 1587 __skb_pull(skb, skb_network_offset(skb)); 1588 while ((tmp_skb = __skb_dequeue(queue)) != NULL) { 1589 __skb_pull(tmp_skb, skb_network_header_len(skb)); 1590 *tail_skb = tmp_skb; 1591 tail_skb = &(tmp_skb->next); 1592 skb->len += tmp_skb->len; 1593 skb->data_len += tmp_skb->len; 1594 skb->truesize += tmp_skb->truesize; 1595 tmp_skb->destructor = NULL; 1596 tmp_skb->sk = NULL; 1597 } 1598 1599 /* Allow local fragmentation. */ 1600 skb->ignore_df = ip6_sk_ignore_df(sk); 1601 1602 *final_dst = fl6->daddr; 1603 __skb_pull(skb, skb_network_header_len(skb)); 1604 if (opt && opt->opt_flen) 1605 ipv6_push_frag_opts(skb, opt, &proto); 1606 if (opt && opt->opt_nflen) 1607 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); 1608 1609 skb_push(skb, sizeof(struct ipv6hdr)); 1610 skb_reset_network_header(skb); 1611 hdr = ipv6_hdr(skb); 1612 1613 ip6_flow_hdr(hdr, v6_cork->tclass, 1614 ip6_make_flowlabel(net, skb, fl6->flowlabel, 1615 np->autoflowlabel)); 1616 hdr->hop_limit = v6_cork->hop_limit; 1617 hdr->nexthdr = proto; 1618 hdr->saddr = fl6->saddr; 1619 hdr->daddr = *final_dst; 1620 1621 skb->priority = sk->sk_priority; 1622 skb->mark = sk->sk_mark; 1623 1624 skb_dst_set(skb, dst_clone(&rt->dst)); 1625 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 1626 if (proto == IPPROTO_ICMPV6) { 1627 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1628 1629 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); 1630 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 1631 } 1632 1633 ip6_cork_release(cork, v6_cork); 1634 out: 1635 return skb; 1636 } 1637 1638 int ip6_send_skb(struct sk_buff *skb) 1639 { 1640 struct net *net = sock_net(skb->sk); 1641 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 1642 int err; 1643 1644 err = ip6_local_out(skb); 1645 if (err) { 1646 if (err > 0) 1647 err = net_xmit_errno(err); 1648 if (err) 1649 IP6_INC_STATS(net, rt->rt6i_idev, 1650 IPSTATS_MIB_OUTDISCARDS); 1651 } 1652 1653 return err; 1654 } 1655 1656 int ip6_push_pending_frames(struct sock *sk) 1657 { 1658 struct sk_buff *skb; 1659 1660 skb = ip6_finish_skb(sk); 1661 if (!skb) 1662 return 0; 1663 1664 return ip6_send_skb(skb); 1665 } 1666 EXPORT_SYMBOL_GPL(ip6_push_pending_frames); 1667 1668 static void __ip6_flush_pending_frames(struct sock *sk, 1669 struct sk_buff_head *queue, 1670 struct inet_cork_full *cork, 1671 struct inet6_cork *v6_cork) 1672 { 1673 struct sk_buff *skb; 1674 1675 while ((skb = __skb_dequeue_tail(queue)) != NULL) { 1676 if (skb_dst(skb)) 1677 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 1678 IPSTATS_MIB_OUTDISCARDS); 1679 kfree_skb(skb); 1680 } 1681 1682 ip6_cork_release(cork, v6_cork); 1683 } 1684 1685 void ip6_flush_pending_frames(struct sock *sk) 1686 { 1687 __ip6_flush_pending_frames(sk, &sk->sk_write_queue, 1688 &inet_sk(sk)->cork, &inet6_sk(sk)->cork); 1689 } 1690 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); 1691 1692 struct sk_buff *ip6_make_skb(struct sock *sk, 1693 int getfrag(void *from, char *to, int offset, 1694 int len, int odd, struct sk_buff *skb), 1695 void *from, int length, int transhdrlen, 1696 int hlimit, int tclass, 1697 struct ipv6_txoptions *opt, struct flowi6 *fl6, 1698 struct rt6_info *rt, unsigned int flags, 1699 int dontfrag) 1700 { 1701 struct inet_cork_full cork; 1702 struct inet6_cork v6_cork; 1703 struct sk_buff_head queue; 1704 int exthdrlen = (opt ? opt->opt_flen : 0); 1705 int err; 1706 1707 if (flags & MSG_PROBE) 1708 return NULL; 1709 1710 __skb_queue_head_init(&queue); 1711 1712 cork.base.flags = 0; 1713 cork.base.addr = 0; 1714 cork.base.opt = NULL; 1715 v6_cork.opt = NULL; 1716 err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6); 1717 if (err) 1718 return ERR_PTR(err); 1719 1720 if (dontfrag < 0) 1721 dontfrag = inet6_sk(sk)->dontfrag; 1722 1723 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork, 1724 ¤t->task_frag, getfrag, from, 1725 length + exthdrlen, transhdrlen + exthdrlen, 1726 flags, dontfrag); 1727 if (err) { 1728 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork); 1729 return ERR_PTR(err); 1730 } 1731 1732 return __ip6_make_skb(sk, &queue, &cork, &v6_cork); 1733 } 1734