11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * IPv6 output functions 31da177e4SLinus Torvalds * Linux INET6 implementation 41da177e4SLinus Torvalds * 51da177e4SLinus Torvalds * Authors: 61da177e4SLinus Torvalds * Pedro Roque <roque@di.fc.ul.pt> 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * Based on linux/net/ipv4/ip_output.c 91da177e4SLinus Torvalds * 101da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 111da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 121da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 131da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 141da177e4SLinus Torvalds * 151da177e4SLinus Torvalds * Changes: 161da177e4SLinus Torvalds * A.N.Kuznetsov : airthmetics in fragmentation. 171da177e4SLinus Torvalds * extension headers are implemented. 181da177e4SLinus Torvalds * route changes now work. 191da177e4SLinus Torvalds * ip6_forward does not confuse sniffers. 201da177e4SLinus Torvalds * etc. 211da177e4SLinus Torvalds * 221da177e4SLinus Torvalds * H. von Brand : Added missing #include <linux/string.h> 231da177e4SLinus Torvalds * Imran Patel : frag id should be in NBO 241da177e4SLinus Torvalds * Kazunori MIYAZAWA @USAGI 251da177e4SLinus Torvalds * : add ip6_append_data and related functions 261da177e4SLinus Torvalds * for datagram xmit 271da177e4SLinus Torvalds */ 281da177e4SLinus Torvalds 291da177e4SLinus Torvalds #include <linux/errno.h> 30ef76bc23SHerbert Xu #include <linux/kernel.h> 311da177e4SLinus Torvalds #include <linux/string.h> 321da177e4SLinus Torvalds #include <linux/socket.h> 331da177e4SLinus Torvalds #include <linux/net.h> 341da177e4SLinus Torvalds #include <linux/netdevice.h> 351da177e4SLinus Torvalds #include <linux/if_arp.h> 361da177e4SLinus Torvalds #include <linux/in6.h> 371da177e4SLinus Torvalds #include <linux/tcp.h> 381da177e4SLinus Torvalds #include <linux/route.h> 39b59f45d0SHerbert Xu #include <linux/module.h> 405a0e3ad6STejun Heo #include <linux/slab.h> 411da177e4SLinus Torvalds 421da177e4SLinus Torvalds #include <linux/netfilter.h> 431da177e4SLinus Torvalds #include <linux/netfilter_ipv6.h> 441da177e4SLinus Torvalds 451da177e4SLinus Torvalds #include <net/sock.h> 461da177e4SLinus Torvalds #include <net/snmp.h> 471da177e4SLinus Torvalds 481da177e4SLinus Torvalds #include <net/ipv6.h> 491da177e4SLinus Torvalds #include <net/ndisc.h> 501da177e4SLinus Torvalds #include <net/protocol.h> 511da177e4SLinus Torvalds #include <net/ip6_route.h> 521da177e4SLinus Torvalds #include <net/addrconf.h> 531da177e4SLinus Torvalds #include <net/rawv6.h> 541da177e4SLinus Torvalds #include <net/icmp.h> 551da177e4SLinus Torvalds #include <net/xfrm.h> 561da177e4SLinus Torvalds #include <net/checksum.h> 577bc570c8SYOSHIFUJI Hideaki #include <linux/mroute6.h> 581da177e4SLinus Torvalds 59ad0081e4SDavid Stevens int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); 601da177e4SLinus Torvalds 61ef76bc23SHerbert Xu int __ip6_local_out(struct sk_buff *skb) 62ef76bc23SHerbert Xu { 63ef76bc23SHerbert Xu int len; 64ef76bc23SHerbert Xu 65ef76bc23SHerbert Xu len = skb->len - sizeof(struct ipv6hdr); 66ef76bc23SHerbert Xu if (len > IPV6_MAXPLEN) 67ef76bc23SHerbert Xu len = 0; 68ef76bc23SHerbert Xu ipv6_hdr(skb)->payload_len = htons(len); 69ef76bc23SHerbert Xu 70b2e0b385SJan Engelhardt return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, 71b2e0b385SJan Engelhardt skb_dst(skb)->dev, dst_output); 72ef76bc23SHerbert Xu } 73ef76bc23SHerbert Xu 74ef76bc23SHerbert Xu int ip6_local_out(struct sk_buff *skb) 75ef76bc23SHerbert Xu { 76ef76bc23SHerbert Xu int err; 77ef76bc23SHerbert Xu 78ef76bc23SHerbert Xu err = __ip6_local_out(skb); 79ef76bc23SHerbert Xu if (likely(err == 1)) 80ef76bc23SHerbert Xu err = dst_output(skb); 81ef76bc23SHerbert Xu 82ef76bc23SHerbert Xu return err; 83ef76bc23SHerbert Xu } 84ef76bc23SHerbert Xu EXPORT_SYMBOL_GPL(ip6_local_out); 85ef76bc23SHerbert Xu 861da177e4SLinus Torvalds /* dev_loopback_xmit for use with netfilter. */ 871da177e4SLinus Torvalds static int ip6_dev_loopback_xmit(struct sk_buff *newskb) 881da177e4SLinus Torvalds { 89459a98edSArnaldo Carvalho de Melo skb_reset_mac_header(newskb); 90bbe735e4SArnaldo Carvalho de Melo __skb_pull(newskb, skb_network_offset(newskb)); 911da177e4SLinus Torvalds newskb->pkt_type = PACKET_LOOPBACK; 921da177e4SLinus Torvalds newskb->ip_summed = CHECKSUM_UNNECESSARY; 93adf30907SEric Dumazet WARN_ON(!skb_dst(newskb)); 941da177e4SLinus Torvalds 95e30b38c2SEric Dumazet netif_rx_ni(newskb); 961da177e4SLinus Torvalds return 0; 971da177e4SLinus Torvalds } 981da177e4SLinus Torvalds 999e508490SJan Engelhardt static int ip6_finish_output2(struct sk_buff *skb) 1001da177e4SLinus Torvalds { 101adf30907SEric Dumazet struct dst_entry *dst = skb_dst(skb); 1021da177e4SLinus Torvalds struct net_device *dev = dst->dev; 103f6b72b62SDavid S. Miller struct neighbour *neigh; 1041da177e4SLinus Torvalds 1051da177e4SLinus Torvalds skb->protocol = htons(ETH_P_IPV6); 1061da177e4SLinus Torvalds skb->dev = dev; 1071da177e4SLinus Torvalds 1080660e03fSArnaldo Carvalho de Melo if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 109adf30907SEric Dumazet struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1101da177e4SLinus Torvalds 1117ad6848cSOctavian Purdila if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) && 112d1db275dSPatrick McHardy ((mroute6_socket(dev_net(dev), skb) && 113bd91b8bfSBenjamin Thery !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 1140660e03fSArnaldo Carvalho de Melo ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 1157bc570c8SYOSHIFUJI Hideaki &ipv6_hdr(skb)->saddr))) { 1161da177e4SLinus Torvalds struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 1171da177e4SLinus Torvalds 1181da177e4SLinus Torvalds /* Do not check for IFF_ALLMULTI; multicast routing 1191da177e4SLinus Torvalds is not supported in any case. 1201da177e4SLinus Torvalds */ 1211da177e4SLinus Torvalds if (newskb) 122b2e0b385SJan Engelhardt NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 123b2e0b385SJan Engelhardt newskb, NULL, newskb->dev, 1241da177e4SLinus Torvalds ip6_dev_loopback_xmit); 1251da177e4SLinus Torvalds 1260660e03fSArnaldo Carvalho de Melo if (ipv6_hdr(skb)->hop_limit == 0) { 1273bd653c8SDenis V. Lunev IP6_INC_STATS(dev_net(dev), idev, 1283bd653c8SDenis V. Lunev IPSTATS_MIB_OUTDISCARDS); 1291da177e4SLinus Torvalds kfree_skb(skb); 1301da177e4SLinus Torvalds return 0; 1311da177e4SLinus Torvalds } 1321da177e4SLinus Torvalds } 1331da177e4SLinus Torvalds 134edf391ffSNeil Horman IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, 135edf391ffSNeil Horman skb->len); 1361da177e4SLinus Torvalds } 1371da177e4SLinus Torvalds 138f2c31e32SEric Dumazet rcu_read_lock(); 13969cce1d1SDavid S. Miller neigh = dst_get_neighbour(dst); 140f2c31e32SEric Dumazet if (neigh) { 141f2c31e32SEric Dumazet int res = neigh_output(neigh, skb); 14205e3aa09SDavid S. Miller 143f2c31e32SEric Dumazet rcu_read_unlock(); 144f2c31e32SEric Dumazet return res; 145f2c31e32SEric Dumazet } 146f2c31e32SEric Dumazet rcu_read_unlock(); 1479e508490SJan Engelhardt IP6_INC_STATS_BH(dev_net(dst->dev), 1489e508490SJan Engelhardt ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 1499e508490SJan Engelhardt kfree_skb(skb); 1509e508490SJan Engelhardt return -EINVAL; 1511da177e4SLinus Torvalds } 1521da177e4SLinus Torvalds 1539e508490SJan Engelhardt static int ip6_finish_output(struct sk_buff *skb) 1549e508490SJan Engelhardt { 1559e508490SJan Engelhardt if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || 1569e508490SJan Engelhardt dst_allfrag(skb_dst(skb))) 1579e508490SJan Engelhardt return ip6_fragment(skb, ip6_finish_output2); 1589e508490SJan Engelhardt else 1599e508490SJan Engelhardt return ip6_finish_output2(skb); 1609e508490SJan Engelhardt } 1619e508490SJan Engelhardt 1621da177e4SLinus Torvalds int ip6_output(struct sk_buff *skb) 1631da177e4SLinus Torvalds { 1649e508490SJan Engelhardt struct net_device *dev = skb_dst(skb)->dev; 165adf30907SEric Dumazet struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 166778d80beSYOSHIFUJI Hideaki if (unlikely(idev->cnf.disable_ipv6)) { 1679e508490SJan Engelhardt IP6_INC_STATS(dev_net(dev), idev, 1683bd653c8SDenis V. Lunev IPSTATS_MIB_OUTDISCARDS); 169778d80beSYOSHIFUJI Hideaki kfree_skb(skb); 170778d80beSYOSHIFUJI Hideaki return 0; 171778d80beSYOSHIFUJI Hideaki } 172778d80beSYOSHIFUJI Hideaki 1739c6eb28aSJan Engelhardt return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev, 1749c6eb28aSJan Engelhardt ip6_finish_output, 1759c6eb28aSJan Engelhardt !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 1761da177e4SLinus Torvalds } 1771da177e4SLinus Torvalds 1781da177e4SLinus Torvalds /* 179b5d43998SShan Wei * xmit an sk_buff (used by TCP, SCTP and DCCP) 1801da177e4SLinus Torvalds */ 1811da177e4SLinus Torvalds 1824c9483b2SDavid S. Miller int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, 1834e15ed4dSShan Wei struct ipv6_txoptions *opt) 1841da177e4SLinus Torvalds { 1853bd653c8SDenis V. Lunev struct net *net = sock_net(sk); 186b30bd282SPatrick McHardy struct ipv6_pinfo *np = inet6_sk(sk); 1874c9483b2SDavid S. Miller struct in6_addr *first_hop = &fl6->daddr; 188adf30907SEric Dumazet struct dst_entry *dst = skb_dst(skb); 1891da177e4SLinus Torvalds struct ipv6hdr *hdr; 1904c9483b2SDavid S. Miller u8 proto = fl6->flowi6_proto; 1911da177e4SLinus Torvalds int seg_len = skb->len; 192e651f03aSGerrit Renker int hlimit = -1; 193e651f03aSGerrit Renker int tclass = 0; 1941da177e4SLinus Torvalds u32 mtu; 1951da177e4SLinus Torvalds 1961da177e4SLinus Torvalds if (opt) { 197c2636b4dSChuck Lever unsigned int head_room; 1981da177e4SLinus Torvalds 1991da177e4SLinus Torvalds /* First: exthdrs may take lots of space (~8K for now) 2001da177e4SLinus Torvalds MAX_HEADER is not enough. 2011da177e4SLinus Torvalds */ 2021da177e4SLinus Torvalds head_room = opt->opt_nflen + opt->opt_flen; 2031da177e4SLinus Torvalds seg_len += head_room; 2041da177e4SLinus Torvalds head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); 2051da177e4SLinus Torvalds 2061da177e4SLinus Torvalds if (skb_headroom(skb) < head_room) { 2071da177e4SLinus Torvalds struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 208a11d206dSYOSHIFUJI Hideaki if (skb2 == NULL) { 209adf30907SEric Dumazet IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 210a11d206dSYOSHIFUJI Hideaki IPSTATS_MIB_OUTDISCARDS); 2111da177e4SLinus Torvalds kfree_skb(skb); 2121da177e4SLinus Torvalds return -ENOBUFS; 2131da177e4SLinus Torvalds } 214a11d206dSYOSHIFUJI Hideaki kfree_skb(skb); 215a11d206dSYOSHIFUJI Hideaki skb = skb2; 2161da177e4SLinus Torvalds skb_set_owner_w(skb, sk); 2171da177e4SLinus Torvalds } 2181da177e4SLinus Torvalds if (opt->opt_flen) 2191da177e4SLinus Torvalds ipv6_push_frag_opts(skb, opt, &proto); 2201da177e4SLinus Torvalds if (opt->opt_nflen) 2211da177e4SLinus Torvalds ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); 2221da177e4SLinus Torvalds } 2231da177e4SLinus Torvalds 224e2d1bca7SArnaldo Carvalho de Melo skb_push(skb, sizeof(struct ipv6hdr)); 225e2d1bca7SArnaldo Carvalho de Melo skb_reset_network_header(skb); 2260660e03fSArnaldo Carvalho de Melo hdr = ipv6_hdr(skb); 2271da177e4SLinus Torvalds 2281da177e4SLinus Torvalds /* 2291da177e4SLinus Torvalds * Fill in the IPv6 header 2301da177e4SLinus Torvalds */ 231e651f03aSGerrit Renker if (np) { 232e651f03aSGerrit Renker tclass = np->tclass; 2331da177e4SLinus Torvalds hlimit = np->hop_limit; 234e651f03aSGerrit Renker } 2351da177e4SLinus Torvalds if (hlimit < 0) 2366b75d090SYOSHIFUJI Hideaki hlimit = ip6_dst_hoplimit(dst); 2371da177e4SLinus Torvalds 2384c9483b2SDavid S. Miller *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel; 23941a1f8eaSYOSHIFUJI Hideaki 2401da177e4SLinus Torvalds hdr->payload_len = htons(seg_len); 2411da177e4SLinus Torvalds hdr->nexthdr = proto; 2421da177e4SLinus Torvalds hdr->hop_limit = hlimit; 2431da177e4SLinus Torvalds 2444c9483b2SDavid S. Miller ipv6_addr_copy(&hdr->saddr, &fl6->saddr); 2451da177e4SLinus Torvalds ipv6_addr_copy(&hdr->daddr, first_hop); 2461da177e4SLinus Torvalds 247a2c2064fSPatrick McHardy skb->priority = sk->sk_priority; 2484a19ec58SLaszlo Attila Toth skb->mark = sk->sk_mark; 249a2c2064fSPatrick McHardy 2501da177e4SLinus Torvalds mtu = dst_mtu(dst); 251283d07acSWei Yongjun if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { 252adf30907SEric Dumazet IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), 253edf391ffSNeil Horman IPSTATS_MIB_OUT, skb->len); 254b2e0b385SJan Engelhardt return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, 255b2e0b385SJan Engelhardt dst->dev, dst_output); 2561da177e4SLinus Torvalds } 2571da177e4SLinus Torvalds 2581da177e4SLinus Torvalds if (net_ratelimit()) 2591da177e4SLinus Torvalds printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); 2601da177e4SLinus Torvalds skb->dev = dst->dev; 2613ffe533cSAlexey Dobriyan icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 262adf30907SEric Dumazet IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); 2631da177e4SLinus Torvalds kfree_skb(skb); 2641da177e4SLinus Torvalds return -EMSGSIZE; 2651da177e4SLinus Torvalds } 2661da177e4SLinus Torvalds 2677159039aSYOSHIFUJI Hideaki EXPORT_SYMBOL(ip6_xmit); 2687159039aSYOSHIFUJI Hideaki 2691da177e4SLinus Torvalds /* 2701da177e4SLinus Torvalds * To avoid extra problems ND packets are send through this 2711da177e4SLinus Torvalds * routine. It's code duplication but I really want to avoid 2721da177e4SLinus Torvalds * extra checks since ipv6_build_header is used by TCP (which 2731da177e4SLinus Torvalds * is for us performance critical) 2741da177e4SLinus Torvalds */ 2751da177e4SLinus Torvalds 2761da177e4SLinus Torvalds int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, 2779acd9f3aSYOSHIFUJI Hideaki const struct in6_addr *saddr, const struct in6_addr *daddr, 2781da177e4SLinus Torvalds int proto, int len) 2791da177e4SLinus Torvalds { 2801da177e4SLinus Torvalds struct ipv6_pinfo *np = inet6_sk(sk); 2811da177e4SLinus Torvalds struct ipv6hdr *hdr; 2821da177e4SLinus Torvalds 2831da177e4SLinus Torvalds skb->protocol = htons(ETH_P_IPV6); 2841da177e4SLinus Torvalds skb->dev = dev; 2851da177e4SLinus Torvalds 28655f79cc0SArnaldo Carvalho de Melo skb_reset_network_header(skb); 28755f79cc0SArnaldo Carvalho de Melo skb_put(skb, sizeof(struct ipv6hdr)); 2880660e03fSArnaldo Carvalho de Melo hdr = ipv6_hdr(skb); 2891da177e4SLinus Torvalds 290ae08e1f0SAl Viro *(__be32*)hdr = htonl(0x60000000); 2911da177e4SLinus Torvalds 2921da177e4SLinus Torvalds hdr->payload_len = htons(len); 2931da177e4SLinus Torvalds hdr->nexthdr = proto; 2941da177e4SLinus Torvalds hdr->hop_limit = np->hop_limit; 2951da177e4SLinus Torvalds 2961da177e4SLinus Torvalds ipv6_addr_copy(&hdr->saddr, saddr); 2971da177e4SLinus Torvalds ipv6_addr_copy(&hdr->daddr, daddr); 2981da177e4SLinus Torvalds 2991da177e4SLinus Torvalds return 0; 3001da177e4SLinus Torvalds } 3011da177e4SLinus Torvalds 3021da177e4SLinus Torvalds static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 3031da177e4SLinus Torvalds { 3041da177e4SLinus Torvalds struct ip6_ra_chain *ra; 3051da177e4SLinus Torvalds struct sock *last = NULL; 3061da177e4SLinus Torvalds 3071da177e4SLinus Torvalds read_lock(&ip6_ra_lock); 3081da177e4SLinus Torvalds for (ra = ip6_ra_chain; ra; ra = ra->next) { 3091da177e4SLinus Torvalds struct sock *sk = ra->sk; 3100bd1b59bSAndrew McDonald if (sk && ra->sel == sel && 3110bd1b59bSAndrew McDonald (!sk->sk_bound_dev_if || 3120bd1b59bSAndrew McDonald sk->sk_bound_dev_if == skb->dev->ifindex)) { 3131da177e4SLinus Torvalds if (last) { 3141da177e4SLinus Torvalds struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 3151da177e4SLinus Torvalds if (skb2) 3161da177e4SLinus Torvalds rawv6_rcv(last, skb2); 3171da177e4SLinus Torvalds } 3181da177e4SLinus Torvalds last = sk; 3191da177e4SLinus Torvalds } 3201da177e4SLinus Torvalds } 3211da177e4SLinus Torvalds 3221da177e4SLinus Torvalds if (last) { 3231da177e4SLinus Torvalds rawv6_rcv(last, skb); 3241da177e4SLinus Torvalds read_unlock(&ip6_ra_lock); 3251da177e4SLinus Torvalds return 1; 3261da177e4SLinus Torvalds } 3271da177e4SLinus Torvalds read_unlock(&ip6_ra_lock); 3281da177e4SLinus Torvalds return 0; 3291da177e4SLinus Torvalds } 3301da177e4SLinus Torvalds 331e21e0b5fSVille Nuorvala static int ip6_forward_proxy_check(struct sk_buff *skb) 332e21e0b5fSVille Nuorvala { 3330660e03fSArnaldo Carvalho de Melo struct ipv6hdr *hdr = ipv6_hdr(skb); 334e21e0b5fSVille Nuorvala u8 nexthdr = hdr->nexthdr; 335e21e0b5fSVille Nuorvala int offset; 336e21e0b5fSVille Nuorvala 337e21e0b5fSVille Nuorvala if (ipv6_ext_hdr(nexthdr)) { 338e21e0b5fSVille Nuorvala offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr); 339e21e0b5fSVille Nuorvala if (offset < 0) 340e21e0b5fSVille Nuorvala return 0; 341e21e0b5fSVille Nuorvala } else 342e21e0b5fSVille Nuorvala offset = sizeof(struct ipv6hdr); 343e21e0b5fSVille Nuorvala 344e21e0b5fSVille Nuorvala if (nexthdr == IPPROTO_ICMPV6) { 345e21e0b5fSVille Nuorvala struct icmp6hdr *icmp6; 346e21e0b5fSVille Nuorvala 347d56f90a7SArnaldo Carvalho de Melo if (!pskb_may_pull(skb, (skb_network_header(skb) + 348d56f90a7SArnaldo Carvalho de Melo offset + 1 - skb->data))) 349e21e0b5fSVille Nuorvala return 0; 350e21e0b5fSVille Nuorvala 351d56f90a7SArnaldo Carvalho de Melo icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 352e21e0b5fSVille Nuorvala 353e21e0b5fSVille Nuorvala switch (icmp6->icmp6_type) { 354e21e0b5fSVille Nuorvala case NDISC_ROUTER_SOLICITATION: 355e21e0b5fSVille Nuorvala case NDISC_ROUTER_ADVERTISEMENT: 356e21e0b5fSVille Nuorvala case NDISC_NEIGHBOUR_SOLICITATION: 357e21e0b5fSVille Nuorvala case NDISC_NEIGHBOUR_ADVERTISEMENT: 358e21e0b5fSVille Nuorvala case NDISC_REDIRECT: 359e21e0b5fSVille Nuorvala /* For reaction involving unicast neighbor discovery 360e21e0b5fSVille Nuorvala * message destined to the proxied address, pass it to 361e21e0b5fSVille Nuorvala * input function. 362e21e0b5fSVille Nuorvala */ 363e21e0b5fSVille Nuorvala return 1; 364e21e0b5fSVille Nuorvala default: 365e21e0b5fSVille Nuorvala break; 366e21e0b5fSVille Nuorvala } 367e21e0b5fSVille Nuorvala } 368e21e0b5fSVille Nuorvala 36974553b09SVille Nuorvala /* 37074553b09SVille Nuorvala * The proxying router can't forward traffic sent to a link-local 37174553b09SVille Nuorvala * address, so signal the sender and discard the packet. This 37274553b09SVille Nuorvala * behavior is clarified by the MIPv6 specification. 37374553b09SVille Nuorvala */ 37474553b09SVille Nuorvala if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 37574553b09SVille Nuorvala dst_link_failure(skb); 37674553b09SVille Nuorvala return -1; 37774553b09SVille Nuorvala } 37874553b09SVille Nuorvala 379e21e0b5fSVille Nuorvala return 0; 380e21e0b5fSVille Nuorvala } 381e21e0b5fSVille Nuorvala 3821da177e4SLinus Torvalds static inline int ip6_forward_finish(struct sk_buff *skb) 3831da177e4SLinus Torvalds { 3841da177e4SLinus Torvalds return dst_output(skb); 3851da177e4SLinus Torvalds } 3861da177e4SLinus Torvalds 3871da177e4SLinus Torvalds int ip6_forward(struct sk_buff *skb) 3881da177e4SLinus Torvalds { 389adf30907SEric Dumazet struct dst_entry *dst = skb_dst(skb); 3900660e03fSArnaldo Carvalho de Melo struct ipv6hdr *hdr = ipv6_hdr(skb); 3911da177e4SLinus Torvalds struct inet6_skb_parm *opt = IP6CB(skb); 392c346dca1SYOSHIFUJI Hideaki struct net *net = dev_net(dst->dev); 39369cce1d1SDavid S. Miller struct neighbour *n; 39414f3ad6fSUlrich Weber u32 mtu; 3951da177e4SLinus Torvalds 39653b7997fSYOSHIFUJI Hideaki if (net->ipv6.devconf_all->forwarding == 0) 3971da177e4SLinus Torvalds goto error; 3981da177e4SLinus Torvalds 3994497b076SBen Hutchings if (skb_warn_if_lro(skb)) 4004497b076SBen Hutchings goto drop; 4014497b076SBen Hutchings 4021da177e4SLinus Torvalds if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 4033bd653c8SDenis V. Lunev IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 4041da177e4SLinus Torvalds goto drop; 4051da177e4SLinus Torvalds } 4061da177e4SLinus Torvalds 40772b43d08SAlexey Kuznetsov if (skb->pkt_type != PACKET_HOST) 40872b43d08SAlexey Kuznetsov goto drop; 40972b43d08SAlexey Kuznetsov 41035fc92a9SHerbert Xu skb_forward_csum(skb); 4111da177e4SLinus Torvalds 4121da177e4SLinus Torvalds /* 4131da177e4SLinus Torvalds * We DO NOT make any processing on 4141da177e4SLinus Torvalds * RA packets, pushing them to user level AS IS 4151da177e4SLinus Torvalds * without ane WARRANTY that application will be able 4161da177e4SLinus Torvalds * to interpret them. The reason is that we 4171da177e4SLinus Torvalds * cannot make anything clever here. 4181da177e4SLinus Torvalds * 4191da177e4SLinus Torvalds * We are not end-node, so that if packet contains 4201da177e4SLinus Torvalds * AH/ESP, we cannot make anything. 4211da177e4SLinus Torvalds * Defragmentation also would be mistake, RA packets 4221da177e4SLinus Torvalds * cannot be fragmented, because there is no warranty 4231da177e4SLinus Torvalds * that different fragments will go along one path. --ANK 4241da177e4SLinus Torvalds */ 4251da177e4SLinus Torvalds if (opt->ra) { 426d56f90a7SArnaldo Carvalho de Melo u8 *ptr = skb_network_header(skb) + opt->ra; 4271da177e4SLinus Torvalds if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3])) 4281da177e4SLinus Torvalds return 0; 4291da177e4SLinus Torvalds } 4301da177e4SLinus Torvalds 4311da177e4SLinus Torvalds /* 4321da177e4SLinus Torvalds * check and decrement ttl 4331da177e4SLinus Torvalds */ 4341da177e4SLinus Torvalds if (hdr->hop_limit <= 1) { 4351da177e4SLinus Torvalds /* Force OUTPUT device used as source address */ 4361da177e4SLinus Torvalds skb->dev = dst->dev; 4373ffe533cSAlexey Dobriyan icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 438483a47d2SDenis V. Lunev IP6_INC_STATS_BH(net, 439483a47d2SDenis V. Lunev ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); 4401da177e4SLinus Torvalds 4411da177e4SLinus Torvalds kfree_skb(skb); 4421da177e4SLinus Torvalds return -ETIMEDOUT; 4431da177e4SLinus Torvalds } 4441da177e4SLinus Torvalds 445fbea49e1SYOSHIFUJI Hideaki /* XXX: idev->cnf.proxy_ndp? */ 44653b7997fSYOSHIFUJI Hideaki if (net->ipv6.devconf_all->proxy_ndp && 4478a3edd80SDaniel Lezcano pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 44874553b09SVille Nuorvala int proxied = ip6_forward_proxy_check(skb); 44974553b09SVille Nuorvala if (proxied > 0) 450e21e0b5fSVille Nuorvala return ip6_input(skb); 45174553b09SVille Nuorvala else if (proxied < 0) { 4523bd653c8SDenis V. Lunev IP6_INC_STATS(net, ip6_dst_idev(dst), 4533bd653c8SDenis V. Lunev IPSTATS_MIB_INDISCARDS); 45474553b09SVille Nuorvala goto drop; 45574553b09SVille Nuorvala } 456e21e0b5fSVille Nuorvala } 457e21e0b5fSVille Nuorvala 4581da177e4SLinus Torvalds if (!xfrm6_route_forward(skb)) { 4593bd653c8SDenis V. Lunev IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); 4601da177e4SLinus Torvalds goto drop; 4611da177e4SLinus Torvalds } 462adf30907SEric Dumazet dst = skb_dst(skb); 4631da177e4SLinus Torvalds 4641da177e4SLinus Torvalds /* IPv6 specs say nothing about it, but it is clear that we cannot 4651da177e4SLinus Torvalds send redirects to source routed frames. 4661e5dc146SMasahide NAKAMURA We don't send redirects to frames decapsulated from IPsec. 4671da177e4SLinus Torvalds */ 46869cce1d1SDavid S. Miller n = dst_get_neighbour(dst); 46969cce1d1SDavid S. Miller if (skb->dev == dst->dev && n && opt->srcrt == 0 && !skb_sec_path(skb)) { 4701da177e4SLinus Torvalds struct in6_addr *target = NULL; 4711da177e4SLinus Torvalds struct rt6_info *rt; 4721da177e4SLinus Torvalds 4731da177e4SLinus Torvalds /* 4741da177e4SLinus Torvalds * incoming and outgoing devices are the same 4751da177e4SLinus Torvalds * send a redirect. 4761da177e4SLinus Torvalds */ 4771da177e4SLinus Torvalds 4781da177e4SLinus Torvalds rt = (struct rt6_info *) dst; 4791da177e4SLinus Torvalds if ((rt->rt6i_flags & RTF_GATEWAY)) 4801da177e4SLinus Torvalds target = (struct in6_addr*)&n->primary_key; 4811da177e4SLinus Torvalds else 4821da177e4SLinus Torvalds target = &hdr->daddr; 4831da177e4SLinus Torvalds 48492d86829SDavid S. Miller if (!rt->rt6i_peer) 48592d86829SDavid S. Miller rt6_bind_peer(rt, 1); 48692d86829SDavid S. Miller 4871da177e4SLinus Torvalds /* Limit redirects both by destination (here) 4881da177e4SLinus Torvalds and by source (inside ndisc_send_redirect) 4891da177e4SLinus Torvalds */ 49092d86829SDavid S. Miller if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) 4911da177e4SLinus Torvalds ndisc_send_redirect(skb, n, target); 4925bb1ab09SDavid L Stevens } else { 4935bb1ab09SDavid L Stevens int addrtype = ipv6_addr_type(&hdr->saddr); 4945bb1ab09SDavid L Stevens 4951da177e4SLinus Torvalds /* This check is security critical. */ 496f81b2e7dSYOSHIFUJI Hideaki if (addrtype == IPV6_ADDR_ANY || 497f81b2e7dSYOSHIFUJI Hideaki addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 4981da177e4SLinus Torvalds goto error; 4995bb1ab09SDavid L Stevens if (addrtype & IPV6_ADDR_LINKLOCAL) { 5005bb1ab09SDavid L Stevens icmpv6_send(skb, ICMPV6_DEST_UNREACH, 5013ffe533cSAlexey Dobriyan ICMPV6_NOT_NEIGHBOUR, 0); 5025bb1ab09SDavid L Stevens goto error; 5035bb1ab09SDavid L Stevens } 5041da177e4SLinus Torvalds } 5051da177e4SLinus Torvalds 50614f3ad6fSUlrich Weber mtu = dst_mtu(dst); 50714f3ad6fSUlrich Weber if (mtu < IPV6_MIN_MTU) 50814f3ad6fSUlrich Weber mtu = IPV6_MIN_MTU; 50914f3ad6fSUlrich Weber 5100aa68271SHerbert Xu if (skb->len > mtu && !skb_is_gso(skb)) { 5111da177e4SLinus Torvalds /* Again, force OUTPUT device used as source address */ 5121da177e4SLinus Torvalds skb->dev = dst->dev; 51314f3ad6fSUlrich Weber icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 514483a47d2SDenis V. Lunev IP6_INC_STATS_BH(net, 515483a47d2SDenis V. Lunev ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); 516483a47d2SDenis V. Lunev IP6_INC_STATS_BH(net, 517483a47d2SDenis V. Lunev ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); 5181da177e4SLinus Torvalds kfree_skb(skb); 5191da177e4SLinus Torvalds return -EMSGSIZE; 5201da177e4SLinus Torvalds } 5211da177e4SLinus Torvalds 5221da177e4SLinus Torvalds if (skb_cow(skb, dst->dev->hard_header_len)) { 5233bd653c8SDenis V. Lunev IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); 5241da177e4SLinus Torvalds goto drop; 5251da177e4SLinus Torvalds } 5261da177e4SLinus Torvalds 5270660e03fSArnaldo Carvalho de Melo hdr = ipv6_hdr(skb); 5281da177e4SLinus Torvalds 5291da177e4SLinus Torvalds /* Mangling hops number delayed to point after skb COW */ 5301da177e4SLinus Torvalds 5311da177e4SLinus Torvalds hdr->hop_limit--; 5321da177e4SLinus Torvalds 533483a47d2SDenis V. Lunev IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 534b2e0b385SJan Engelhardt return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, 5356e23ae2aSPatrick McHardy ip6_forward_finish); 5361da177e4SLinus Torvalds 5371da177e4SLinus Torvalds error: 538483a47d2SDenis V. Lunev IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); 5391da177e4SLinus Torvalds drop: 5401da177e4SLinus Torvalds kfree_skb(skb); 5411da177e4SLinus Torvalds return -EINVAL; 5421da177e4SLinus Torvalds } 5431da177e4SLinus Torvalds 5441da177e4SLinus Torvalds static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 5451da177e4SLinus Torvalds { 5461da177e4SLinus Torvalds to->pkt_type = from->pkt_type; 5471da177e4SLinus Torvalds to->priority = from->priority; 5481da177e4SLinus Torvalds to->protocol = from->protocol; 549adf30907SEric Dumazet skb_dst_drop(to); 550adf30907SEric Dumazet skb_dst_set(to, dst_clone(skb_dst(from))); 5511da177e4SLinus Torvalds to->dev = from->dev; 55282e91ffeSThomas Graf to->mark = from->mark; 5531da177e4SLinus Torvalds 5541da177e4SLinus Torvalds #ifdef CONFIG_NET_SCHED 5551da177e4SLinus Torvalds to->tc_index = from->tc_index; 5561da177e4SLinus Torvalds #endif 557e7ac05f3SYasuyuki Kozakai nf_copy(to, from); 558ba9dda3aSJozsef Kadlecsik #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 559ba9dda3aSJozsef Kadlecsik defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 560ba9dda3aSJozsef Kadlecsik to->nf_trace = from->nf_trace; 561ba9dda3aSJozsef Kadlecsik #endif 562984bc16cSJames Morris skb_copy_secmark(to, from); 5631da177e4SLinus Torvalds } 5641da177e4SLinus Torvalds 5651da177e4SLinus Torvalds int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) 5661da177e4SLinus Torvalds { 5671da177e4SLinus Torvalds u16 offset = sizeof(struct ipv6hdr); 5680660e03fSArnaldo Carvalho de Melo struct ipv6_opt_hdr *exthdr = 5690660e03fSArnaldo Carvalho de Melo (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); 57027a884dcSArnaldo Carvalho de Melo unsigned int packet_len = skb->tail - skb->network_header; 5711da177e4SLinus Torvalds int found_rhdr = 0; 5720660e03fSArnaldo Carvalho de Melo *nexthdr = &ipv6_hdr(skb)->nexthdr; 5731da177e4SLinus Torvalds 5741da177e4SLinus Torvalds while (offset + 1 <= packet_len) { 5751da177e4SLinus Torvalds 5761da177e4SLinus Torvalds switch (**nexthdr) { 5771da177e4SLinus Torvalds 5781da177e4SLinus Torvalds case NEXTHDR_HOP: 57927637df9SMasahide NAKAMURA break; 5801da177e4SLinus Torvalds case NEXTHDR_ROUTING: 58127637df9SMasahide NAKAMURA found_rhdr = 1; 58227637df9SMasahide NAKAMURA break; 5831da177e4SLinus Torvalds case NEXTHDR_DEST: 58459fbb3a6SMasahide NAKAMURA #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 58527637df9SMasahide NAKAMURA if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) 58627637df9SMasahide NAKAMURA break; 58727637df9SMasahide NAKAMURA #endif 58827637df9SMasahide NAKAMURA if (found_rhdr) 58927637df9SMasahide NAKAMURA return offset; 5901da177e4SLinus Torvalds break; 5911da177e4SLinus Torvalds default : 5921da177e4SLinus Torvalds return offset; 5931da177e4SLinus Torvalds } 59427637df9SMasahide NAKAMURA 59527637df9SMasahide NAKAMURA offset += ipv6_optlen(exthdr); 59627637df9SMasahide NAKAMURA *nexthdr = &exthdr->nexthdr; 597d56f90a7SArnaldo Carvalho de Melo exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + 598d56f90a7SArnaldo Carvalho de Melo offset); 5991da177e4SLinus Torvalds } 6001da177e4SLinus Torvalds 6011da177e4SLinus Torvalds return offset; 6021da177e4SLinus Torvalds } 6031da177e4SLinus Torvalds 60487c48fa3SEric Dumazet void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) 60587c48fa3SEric Dumazet { 60687c48fa3SEric Dumazet static atomic_t ipv6_fragmentation_id; 60787c48fa3SEric Dumazet int old, new; 60887c48fa3SEric Dumazet 60987c48fa3SEric Dumazet if (rt) { 61087c48fa3SEric Dumazet struct inet_peer *peer; 61187c48fa3SEric Dumazet 61287c48fa3SEric Dumazet if (!rt->rt6i_peer) 61387c48fa3SEric Dumazet rt6_bind_peer(rt, 1); 61487c48fa3SEric Dumazet peer = rt->rt6i_peer; 61587c48fa3SEric Dumazet if (peer) { 61687c48fa3SEric Dumazet fhdr->identification = htonl(inet_getid(peer, 0)); 61787c48fa3SEric Dumazet return; 61887c48fa3SEric Dumazet } 61987c48fa3SEric Dumazet } 62087c48fa3SEric Dumazet do { 62187c48fa3SEric Dumazet old = atomic_read(&ipv6_fragmentation_id); 62287c48fa3SEric Dumazet new = old + 1; 62387c48fa3SEric Dumazet if (!new) 62487c48fa3SEric Dumazet new = 1; 62587c48fa3SEric Dumazet } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old); 62687c48fa3SEric Dumazet fhdr->identification = htonl(new); 62787c48fa3SEric Dumazet } 62887c48fa3SEric Dumazet 629ad0081e4SDavid Stevens int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 6301da177e4SLinus Torvalds { 6311da177e4SLinus Torvalds struct sk_buff *frag; 632adf30907SEric Dumazet struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); 633d91675f9SYOSHIFUJI Hideaki struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; 6341da177e4SLinus Torvalds struct ipv6hdr *tmp_hdr; 6351da177e4SLinus Torvalds struct frag_hdr *fh; 6361da177e4SLinus Torvalds unsigned int mtu, hlen, left, len; 637ae08e1f0SAl Viro __be32 frag_id = 0; 6381da177e4SLinus Torvalds int ptr, offset = 0, err=0; 6391da177e4SLinus Torvalds u8 *prevhdr, nexthdr = 0; 640adf30907SEric Dumazet struct net *net = dev_net(skb_dst(skb)->dev); 6411da177e4SLinus Torvalds 6421da177e4SLinus Torvalds hlen = ip6_find_1stfragopt(skb, &prevhdr); 6431da177e4SLinus Torvalds nexthdr = *prevhdr; 6441da177e4SLinus Torvalds 645628a5c56SJohn Heffner mtu = ip6_skb_dst_mtu(skb); 646b881ef76SJohn Heffner 647b881ef76SJohn Heffner /* We must not fragment if the socket is set to force MTU discovery 64814f3ad6fSUlrich Weber * or if the skb it not generated by a local socket. 649b881ef76SJohn Heffner */ 650f2228f78SShan Wei if (!skb->local_df && skb->len > mtu) { 651adf30907SEric Dumazet skb->dev = skb_dst(skb)->dev; 6523ffe533cSAlexey Dobriyan icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 653adf30907SEric Dumazet IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 6543bd653c8SDenis V. Lunev IPSTATS_MIB_FRAGFAILS); 655b881ef76SJohn Heffner kfree_skb(skb); 656b881ef76SJohn Heffner return -EMSGSIZE; 657b881ef76SJohn Heffner } 658b881ef76SJohn Heffner 659d91675f9SYOSHIFUJI Hideaki if (np && np->frag_size < mtu) { 660d91675f9SYOSHIFUJI Hideaki if (np->frag_size) 661d91675f9SYOSHIFUJI Hideaki mtu = np->frag_size; 662d91675f9SYOSHIFUJI Hideaki } 663d91675f9SYOSHIFUJI Hideaki mtu -= hlen + sizeof(struct frag_hdr); 6641da177e4SLinus Torvalds 66521dc3301SDavid S. Miller if (skb_has_frag_list(skb)) { 6661da177e4SLinus Torvalds int first_len = skb_pagelen(skb); 6673d13008eSEric Dumazet struct sk_buff *frag2; 6681da177e4SLinus Torvalds 6691da177e4SLinus Torvalds if (first_len - hlen > mtu || 6701da177e4SLinus Torvalds ((first_len - hlen) & 7) || 6711da177e4SLinus Torvalds skb_cloned(skb)) 6721da177e4SLinus Torvalds goto slow_path; 6731da177e4SLinus Torvalds 6744d9092bbSDavid S. Miller skb_walk_frags(skb, frag) { 6751da177e4SLinus Torvalds /* Correct geometry. */ 6761da177e4SLinus Torvalds if (frag->len > mtu || 6771da177e4SLinus Torvalds ((frag->len & 7) && frag->next) || 6781da177e4SLinus Torvalds skb_headroom(frag) < hlen) 6793d13008eSEric Dumazet goto slow_path_clean; 6801da177e4SLinus Torvalds 6811da177e4SLinus Torvalds /* Partially cloned skb? */ 6821da177e4SLinus Torvalds if (skb_shared(frag)) 6833d13008eSEric Dumazet goto slow_path_clean; 6842fdba6b0SHerbert Xu 6852fdba6b0SHerbert Xu BUG_ON(frag->sk); 6862fdba6b0SHerbert Xu if (skb->sk) { 6872fdba6b0SHerbert Xu frag->sk = skb->sk; 6882fdba6b0SHerbert Xu frag->destructor = sock_wfree; 6892fdba6b0SHerbert Xu } 6903d13008eSEric Dumazet skb->truesize -= frag->truesize; 6911da177e4SLinus Torvalds } 6921da177e4SLinus Torvalds 6931da177e4SLinus Torvalds err = 0; 6941da177e4SLinus Torvalds offset = 0; 6951da177e4SLinus Torvalds frag = skb_shinfo(skb)->frag_list; 6964d9092bbSDavid S. Miller skb_frag_list_init(skb); 6971da177e4SLinus Torvalds /* BUILD HEADER */ 6981da177e4SLinus Torvalds 6999a217a1cSYOSHIFUJI Hideaki *prevhdr = NEXTHDR_FRAGMENT; 700d56f90a7SArnaldo Carvalho de Melo tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 7011da177e4SLinus Torvalds if (!tmp_hdr) { 702adf30907SEric Dumazet IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 7033bd653c8SDenis V. Lunev IPSTATS_MIB_FRAGFAILS); 7041da177e4SLinus Torvalds return -ENOMEM; 7051da177e4SLinus Torvalds } 7061da177e4SLinus Torvalds 7071da177e4SLinus Torvalds __skb_pull(skb, hlen); 7081da177e4SLinus Torvalds fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); 709e2d1bca7SArnaldo Carvalho de Melo __skb_push(skb, hlen); 710e2d1bca7SArnaldo Carvalho de Melo skb_reset_network_header(skb); 711d56f90a7SArnaldo Carvalho de Melo memcpy(skb_network_header(skb), tmp_hdr, hlen); 7121da177e4SLinus Torvalds 71387c48fa3SEric Dumazet ipv6_select_ident(fh, rt); 7141da177e4SLinus Torvalds fh->nexthdr = nexthdr; 7151da177e4SLinus Torvalds fh->reserved = 0; 7161da177e4SLinus Torvalds fh->frag_off = htons(IP6_MF); 7171da177e4SLinus Torvalds frag_id = fh->identification; 7181da177e4SLinus Torvalds 7191da177e4SLinus Torvalds first_len = skb_pagelen(skb); 7201da177e4SLinus Torvalds skb->data_len = first_len - skb_headlen(skb); 7211da177e4SLinus Torvalds skb->len = first_len; 7220660e03fSArnaldo Carvalho de Melo ipv6_hdr(skb)->payload_len = htons(first_len - 7230660e03fSArnaldo Carvalho de Melo sizeof(struct ipv6hdr)); 7241da177e4SLinus Torvalds 725d8d1f30bSChangli Gao dst_hold(&rt->dst); 7261da177e4SLinus Torvalds 7271da177e4SLinus Torvalds for (;;) { 7281da177e4SLinus Torvalds /* Prepare header of the next frame, 7291da177e4SLinus Torvalds * before previous one went down. */ 7301da177e4SLinus Torvalds if (frag) { 7311da177e4SLinus Torvalds frag->ip_summed = CHECKSUM_NONE; 732badff6d0SArnaldo Carvalho de Melo skb_reset_transport_header(frag); 7331da177e4SLinus Torvalds fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); 734e2d1bca7SArnaldo Carvalho de Melo __skb_push(frag, hlen); 735e2d1bca7SArnaldo Carvalho de Melo skb_reset_network_header(frag); 736d56f90a7SArnaldo Carvalho de Melo memcpy(skb_network_header(frag), tmp_hdr, 737d56f90a7SArnaldo Carvalho de Melo hlen); 7381da177e4SLinus Torvalds offset += skb->len - hlen - sizeof(struct frag_hdr); 7391da177e4SLinus Torvalds fh->nexthdr = nexthdr; 7401da177e4SLinus Torvalds fh->reserved = 0; 7411da177e4SLinus Torvalds fh->frag_off = htons(offset); 7421da177e4SLinus Torvalds if (frag->next != NULL) 7431da177e4SLinus Torvalds fh->frag_off |= htons(IP6_MF); 7441da177e4SLinus Torvalds fh->identification = frag_id; 7450660e03fSArnaldo Carvalho de Melo ipv6_hdr(frag)->payload_len = 7460660e03fSArnaldo Carvalho de Melo htons(frag->len - 7470660e03fSArnaldo Carvalho de Melo sizeof(struct ipv6hdr)); 7481da177e4SLinus Torvalds ip6_copy_metadata(frag, skb); 7491da177e4SLinus Torvalds } 7501da177e4SLinus Torvalds 7511da177e4SLinus Torvalds err = output(skb); 752dafee490SWei Dong if(!err) 753d8d1f30bSChangli Gao IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 7543bd653c8SDenis V. Lunev IPSTATS_MIB_FRAGCREATES); 755dafee490SWei Dong 7561da177e4SLinus Torvalds if (err || !frag) 7571da177e4SLinus Torvalds break; 7581da177e4SLinus Torvalds 7591da177e4SLinus Torvalds skb = frag; 7601da177e4SLinus Torvalds frag = skb->next; 7611da177e4SLinus Torvalds skb->next = NULL; 7621da177e4SLinus Torvalds } 7631da177e4SLinus Torvalds 7641da177e4SLinus Torvalds kfree(tmp_hdr); 7651da177e4SLinus Torvalds 7661da177e4SLinus Torvalds if (err == 0) { 767d8d1f30bSChangli Gao IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 7683bd653c8SDenis V. Lunev IPSTATS_MIB_FRAGOKS); 769d8d1f30bSChangli Gao dst_release(&rt->dst); 7701da177e4SLinus Torvalds return 0; 7711da177e4SLinus Torvalds } 7721da177e4SLinus Torvalds 7731da177e4SLinus Torvalds while (frag) { 7741da177e4SLinus Torvalds skb = frag->next; 7751da177e4SLinus Torvalds kfree_skb(frag); 7761da177e4SLinus Torvalds frag = skb; 7771da177e4SLinus Torvalds } 7781da177e4SLinus Torvalds 779d8d1f30bSChangli Gao IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 7803bd653c8SDenis V. Lunev IPSTATS_MIB_FRAGFAILS); 781d8d1f30bSChangli Gao dst_release(&rt->dst); 7821da177e4SLinus Torvalds return err; 7833d13008eSEric Dumazet 7843d13008eSEric Dumazet slow_path_clean: 7853d13008eSEric Dumazet skb_walk_frags(skb, frag2) { 7863d13008eSEric Dumazet if (frag2 == frag) 7873d13008eSEric Dumazet break; 7883d13008eSEric Dumazet frag2->sk = NULL; 7893d13008eSEric Dumazet frag2->destructor = NULL; 7903d13008eSEric Dumazet skb->truesize += frag2->truesize; 7913d13008eSEric Dumazet } 7921da177e4SLinus Torvalds } 7931da177e4SLinus Torvalds 7941da177e4SLinus Torvalds slow_path: 7951da177e4SLinus Torvalds left = skb->len - hlen; /* Space per frame */ 7961da177e4SLinus Torvalds ptr = hlen; /* Where to start from */ 7971da177e4SLinus Torvalds 7981da177e4SLinus Torvalds /* 7991da177e4SLinus Torvalds * Fragment the datagram. 8001da177e4SLinus Torvalds */ 8011da177e4SLinus Torvalds 8021da177e4SLinus Torvalds *prevhdr = NEXTHDR_FRAGMENT; 8031da177e4SLinus Torvalds 8041da177e4SLinus Torvalds /* 8051da177e4SLinus Torvalds * Keep copying data until we run out. 8061da177e4SLinus Torvalds */ 8071da177e4SLinus Torvalds while(left > 0) { 8081da177e4SLinus Torvalds len = left; 8091da177e4SLinus Torvalds /* IF: it doesn't fit, use 'mtu' - the data space left */ 8101da177e4SLinus Torvalds if (len > mtu) 8111da177e4SLinus Torvalds len = mtu; 8121da177e4SLinus Torvalds /* IF: we are not sending up to and including the packet end 8131da177e4SLinus Torvalds then align the next start on an eight byte boundary */ 8141da177e4SLinus Torvalds if (len < left) { 8151da177e4SLinus Torvalds len &= ~7; 8161da177e4SLinus Torvalds } 8171da177e4SLinus Torvalds /* 8181da177e4SLinus Torvalds * Allocate buffer. 8191da177e4SLinus Torvalds */ 8201da177e4SLinus Torvalds 821d8d1f30bSChangli Gao if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) { 82264ce2073SPatrick McHardy NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); 823adf30907SEric Dumazet IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 824a11d206dSYOSHIFUJI Hideaki IPSTATS_MIB_FRAGFAILS); 8251da177e4SLinus Torvalds err = -ENOMEM; 8261da177e4SLinus Torvalds goto fail; 8271da177e4SLinus Torvalds } 8281da177e4SLinus Torvalds 8291da177e4SLinus Torvalds /* 8301da177e4SLinus Torvalds * Set up data on packet 8311da177e4SLinus Torvalds */ 8321da177e4SLinus Torvalds 8331da177e4SLinus Torvalds ip6_copy_metadata(frag, skb); 834d8d1f30bSChangli Gao skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev)); 8351da177e4SLinus Torvalds skb_put(frag, len + hlen + sizeof(struct frag_hdr)); 836c1d2bbe1SArnaldo Carvalho de Melo skb_reset_network_header(frag); 837badff6d0SArnaldo Carvalho de Melo fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); 838b0e380b1SArnaldo Carvalho de Melo frag->transport_header = (frag->network_header + hlen + 839b0e380b1SArnaldo Carvalho de Melo sizeof(struct frag_hdr)); 8401da177e4SLinus Torvalds 8411da177e4SLinus Torvalds /* 8421da177e4SLinus Torvalds * Charge the memory for the fragment to any owner 8431da177e4SLinus Torvalds * it might possess 8441da177e4SLinus Torvalds */ 8451da177e4SLinus Torvalds if (skb->sk) 8461da177e4SLinus Torvalds skb_set_owner_w(frag, skb->sk); 8471da177e4SLinus Torvalds 8481da177e4SLinus Torvalds /* 8491da177e4SLinus Torvalds * Copy the packet header into the new buffer. 8501da177e4SLinus Torvalds */ 851d626f62bSArnaldo Carvalho de Melo skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); 8521da177e4SLinus Torvalds 8531da177e4SLinus Torvalds /* 8541da177e4SLinus Torvalds * Build fragment header. 8551da177e4SLinus Torvalds */ 8561da177e4SLinus Torvalds fh->nexthdr = nexthdr; 8571da177e4SLinus Torvalds fh->reserved = 0; 858f36d6ab1SYan Zheng if (!frag_id) { 85987c48fa3SEric Dumazet ipv6_select_ident(fh, rt); 8601da177e4SLinus Torvalds frag_id = fh->identification; 8611da177e4SLinus Torvalds } else 8621da177e4SLinus Torvalds fh->identification = frag_id; 8631da177e4SLinus Torvalds 8641da177e4SLinus Torvalds /* 8651da177e4SLinus Torvalds * Copy a block of the IP datagram. 8661da177e4SLinus Torvalds */ 8678984e41dSWei Yongjun if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len)) 8681da177e4SLinus Torvalds BUG(); 8691da177e4SLinus Torvalds left -= len; 8701da177e4SLinus Torvalds 8711da177e4SLinus Torvalds fh->frag_off = htons(offset); 8721da177e4SLinus Torvalds if (left > 0) 8731da177e4SLinus Torvalds fh->frag_off |= htons(IP6_MF); 8740660e03fSArnaldo Carvalho de Melo ipv6_hdr(frag)->payload_len = htons(frag->len - 8750660e03fSArnaldo Carvalho de Melo sizeof(struct ipv6hdr)); 8761da177e4SLinus Torvalds 8771da177e4SLinus Torvalds ptr += len; 8781da177e4SLinus Torvalds offset += len; 8791da177e4SLinus Torvalds 8801da177e4SLinus Torvalds /* 8811da177e4SLinus Torvalds * Put this fragment into the sending queue. 8821da177e4SLinus Torvalds */ 8831da177e4SLinus Torvalds err = output(frag); 8841da177e4SLinus Torvalds if (err) 8851da177e4SLinus Torvalds goto fail; 886dafee490SWei Dong 887adf30907SEric Dumazet IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 8883bd653c8SDenis V. Lunev IPSTATS_MIB_FRAGCREATES); 8891da177e4SLinus Torvalds } 890adf30907SEric Dumazet IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 891a11d206dSYOSHIFUJI Hideaki IPSTATS_MIB_FRAGOKS); 8921da177e4SLinus Torvalds kfree_skb(skb); 8931da177e4SLinus Torvalds return err; 8941da177e4SLinus Torvalds 8951da177e4SLinus Torvalds fail: 896adf30907SEric Dumazet IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 897a11d206dSYOSHIFUJI Hideaki IPSTATS_MIB_FRAGFAILS); 8981da177e4SLinus Torvalds kfree_skb(skb); 8991da177e4SLinus Torvalds return err; 9001da177e4SLinus Torvalds } 9011da177e4SLinus Torvalds 902b71d1d42SEric Dumazet static inline int ip6_rt_check(const struct rt6key *rt_key, 903b71d1d42SEric Dumazet const struct in6_addr *fl_addr, 904b71d1d42SEric Dumazet const struct in6_addr *addr_cache) 905cf6b1982SYOSHIFUJI Hideaki { 906a02cec21SEric Dumazet return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 907a02cec21SEric Dumazet (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)); 908cf6b1982SYOSHIFUJI Hideaki } 909cf6b1982SYOSHIFUJI Hideaki 910497c615aSHerbert Xu static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 911497c615aSHerbert Xu struct dst_entry *dst, 912b71d1d42SEric Dumazet const struct flowi6 *fl6) 9131da177e4SLinus Torvalds { 9141da177e4SLinus Torvalds struct ipv6_pinfo *np = inet6_sk(sk); 915497c615aSHerbert Xu struct rt6_info *rt = (struct rt6_info *)dst; 9161da177e4SLinus Torvalds 917497c615aSHerbert Xu if (!dst) 918497c615aSHerbert Xu goto out; 9191da177e4SLinus Torvalds 9201da177e4SLinus Torvalds /* Yes, checking route validity in not connected 921d76e60a5SDavid S. Miller * case is not very simple. Take into account, 922d76e60a5SDavid S. Miller * that we do not support routing by source, TOS, 923d76e60a5SDavid S. Miller * and MSG_DONTROUTE --ANK (980726) 924d76e60a5SDavid S. Miller * 925cf6b1982SYOSHIFUJI Hideaki * 1. ip6_rt_check(): If route was host route, 926cf6b1982SYOSHIFUJI Hideaki * check that cached destination is current. 927d76e60a5SDavid S. Miller * If it is network route, we still may 928d76e60a5SDavid S. Miller * check its validity using saved pointer 929d76e60a5SDavid S. Miller * to the last used address: daddr_cache. 930d76e60a5SDavid S. Miller * We do not want to save whole address now, 931d76e60a5SDavid S. Miller * (because main consumer of this service 932d76e60a5SDavid S. Miller * is tcp, which has not this problem), 933d76e60a5SDavid S. Miller * so that the last trick works only on connected 934d76e60a5SDavid S. Miller * sockets. 935d76e60a5SDavid S. Miller * 2. oif also should be the same. 9361da177e4SLinus Torvalds */ 9374c9483b2SDavid S. Miller if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || 9388e1ef0a9SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_SUBTREES 9394c9483b2SDavid S. Miller ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || 9408e1ef0a9SYOSHIFUJI Hideaki #endif 9414c9483b2SDavid S. Miller (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { 942497c615aSHerbert Xu dst_release(dst); 943497c615aSHerbert Xu dst = NULL; 9441da177e4SLinus Torvalds } 945497c615aSHerbert Xu 946497c615aSHerbert Xu out: 947497c615aSHerbert Xu return dst; 9481da177e4SLinus Torvalds } 949497c615aSHerbert Xu 950497c615aSHerbert Xu static int ip6_dst_lookup_tail(struct sock *sk, 9514c9483b2SDavid S. Miller struct dst_entry **dst, struct flowi6 *fl6) 952497c615aSHerbert Xu { 9533b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(sk); 95469cce1d1SDavid S. Miller #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 95569cce1d1SDavid S. Miller struct neighbour *n; 95669cce1d1SDavid S. Miller #endif 95769cce1d1SDavid S. Miller int err; 9581da177e4SLinus Torvalds 9591da177e4SLinus Torvalds if (*dst == NULL) 9604c9483b2SDavid S. Miller *dst = ip6_route_output(net, sk, fl6); 9611da177e4SLinus Torvalds 9621da177e4SLinus Torvalds if ((err = (*dst)->error)) 9631da177e4SLinus Torvalds goto out_err_release; 9641da177e4SLinus Torvalds 9654c9483b2SDavid S. Miller if (ipv6_addr_any(&fl6->saddr)) { 966c3968a85SDaniel Walter struct rt6_info *rt = (struct rt6_info *) *dst; 967c3968a85SDaniel Walter err = ip6_route_get_saddr(net, rt, &fl6->daddr, 9687cbca67cSYOSHIFUJI Hideaki sk ? inet6_sk(sk)->srcprefs : 0, 9694c9483b2SDavid S. Miller &fl6->saddr); 97044456d37SOlaf Hering if (err) 9711da177e4SLinus Torvalds goto out_err_release; 9721da177e4SLinus Torvalds } 9731da177e4SLinus Torvalds 97495c385b4SNeil Horman #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 97595c385b4SNeil Horman /* 97695c385b4SNeil Horman * Here if the dst entry we've looked up 97795c385b4SNeil Horman * has a neighbour entry that is in the INCOMPLETE 97895c385b4SNeil Horman * state and the src address from the flow is 97995c385b4SNeil Horman * marked as OPTIMISTIC, we release the found 98095c385b4SNeil Horman * dst entry and replace it instead with the 98195c385b4SNeil Horman * dst entry of the nexthop router 98295c385b4SNeil Horman */ 983f2c31e32SEric Dumazet rcu_read_lock(); 98469cce1d1SDavid S. Miller n = dst_get_neighbour(*dst); 98569cce1d1SDavid S. Miller if (n && !(n->nud_state & NUD_VALID)) { 98695c385b4SNeil Horman struct inet6_ifaddr *ifp; 9874c9483b2SDavid S. Miller struct flowi6 fl_gw6; 98895c385b4SNeil Horman int redirect; 98995c385b4SNeil Horman 990f2c31e32SEric Dumazet rcu_read_unlock(); 9914c9483b2SDavid S. Miller ifp = ipv6_get_ifaddr(net, &fl6->saddr, 9921cab3da6SDaniel Lezcano (*dst)->dev, 1); 99395c385b4SNeil Horman 99495c385b4SNeil Horman redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 99595c385b4SNeil Horman if (ifp) 99695c385b4SNeil Horman in6_ifa_put(ifp); 99795c385b4SNeil Horman 99895c385b4SNeil Horman if (redirect) { 99995c385b4SNeil Horman /* 100095c385b4SNeil Horman * We need to get the dst entry for the 100195c385b4SNeil Horman * default router instead 100295c385b4SNeil Horman */ 100395c385b4SNeil Horman dst_release(*dst); 10044c9483b2SDavid S. Miller memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); 10054c9483b2SDavid S. Miller memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); 10064c9483b2SDavid S. Miller *dst = ip6_route_output(net, sk, &fl_gw6); 100795c385b4SNeil Horman if ((err = (*dst)->error)) 100895c385b4SNeil Horman goto out_err_release; 100995c385b4SNeil Horman } 1010f2c31e32SEric Dumazet } else { 1011f2c31e32SEric Dumazet rcu_read_unlock(); 101295c385b4SNeil Horman } 101395c385b4SNeil Horman #endif 101495c385b4SNeil Horman 10151da177e4SLinus Torvalds return 0; 10161da177e4SLinus Torvalds 10171da177e4SLinus Torvalds out_err_release: 1018ca46f9c8SMitsuru Chinen if (err == -ENETUNREACH) 1019483a47d2SDenis V. Lunev IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); 10201da177e4SLinus Torvalds dst_release(*dst); 10211da177e4SLinus Torvalds *dst = NULL; 10221da177e4SLinus Torvalds return err; 10231da177e4SLinus Torvalds } 102434a0b3cdSAdrian Bunk 1025497c615aSHerbert Xu /** 1026497c615aSHerbert Xu * ip6_dst_lookup - perform route lookup on flow 1027497c615aSHerbert Xu * @sk: socket which provides route info 1028497c615aSHerbert Xu * @dst: pointer to dst_entry * for result 10294c9483b2SDavid S. Miller * @fl6: flow to lookup 1030497c615aSHerbert Xu * 1031497c615aSHerbert Xu * This function performs a route lookup on the given flow. 1032497c615aSHerbert Xu * 1033497c615aSHerbert Xu * It returns zero on success, or a standard errno code on error. 1034497c615aSHerbert Xu */ 10354c9483b2SDavid S. Miller int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) 1036497c615aSHerbert Xu { 1037497c615aSHerbert Xu *dst = NULL; 10384c9483b2SDavid S. Miller return ip6_dst_lookup_tail(sk, dst, fl6); 1039497c615aSHerbert Xu } 10403cf3dc6cSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(ip6_dst_lookup); 10413cf3dc6cSArnaldo Carvalho de Melo 1042497c615aSHerbert Xu /** 104368d0c6d3SDavid S. Miller * ip6_dst_lookup_flow - perform route lookup on flow with ipsec 104468d0c6d3SDavid S. Miller * @sk: socket which provides route info 10454c9483b2SDavid S. Miller * @fl6: flow to lookup 104668d0c6d3SDavid S. Miller * @final_dst: final destination address for ipsec lookup 1047a1414715SDavid S. Miller * @can_sleep: we are in a sleepable context 104868d0c6d3SDavid S. Miller * 104968d0c6d3SDavid S. Miller * This function performs a route lookup on the given flow. 105068d0c6d3SDavid S. Miller * 105168d0c6d3SDavid S. Miller * It returns a valid dst pointer on success, or a pointer encoded 105268d0c6d3SDavid S. Miller * error code. 105368d0c6d3SDavid S. Miller */ 10544c9483b2SDavid S. Miller struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 105568d0c6d3SDavid S. Miller const struct in6_addr *final_dst, 1056a1414715SDavid S. Miller bool can_sleep) 105768d0c6d3SDavid S. Miller { 105868d0c6d3SDavid S. Miller struct dst_entry *dst = NULL; 105968d0c6d3SDavid S. Miller int err; 106068d0c6d3SDavid S. Miller 10614c9483b2SDavid S. Miller err = ip6_dst_lookup_tail(sk, &dst, fl6); 106268d0c6d3SDavid S. Miller if (err) 106368d0c6d3SDavid S. Miller return ERR_PTR(err); 106468d0c6d3SDavid S. Miller if (final_dst) 10654c9483b2SDavid S. Miller ipv6_addr_copy(&fl6->daddr, final_dst); 10662774c131SDavid S. Miller if (can_sleep) 10674c9483b2SDavid S. Miller fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; 10682774c131SDavid S. Miller 10694c9483b2SDavid S. Miller return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); 107068d0c6d3SDavid S. Miller } 107168d0c6d3SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); 107268d0c6d3SDavid S. Miller 107368d0c6d3SDavid S. Miller /** 107468d0c6d3SDavid S. Miller * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow 107568d0c6d3SDavid S. Miller * @sk: socket which provides the dst cache and route info 10764c9483b2SDavid S. Miller * @fl6: flow to lookup 107768d0c6d3SDavid S. Miller * @final_dst: final destination address for ipsec lookup 1078a1414715SDavid S. Miller * @can_sleep: we are in a sleepable context 1079497c615aSHerbert Xu * 1080497c615aSHerbert Xu * This function performs a route lookup on the given flow with the 1081497c615aSHerbert Xu * possibility of using the cached route in the socket if it is valid. 1082497c615aSHerbert Xu * It will take the socket dst lock when operating on the dst cache. 1083497c615aSHerbert Xu * As a result, this function can only be used in process context. 1084497c615aSHerbert Xu * 108568d0c6d3SDavid S. Miller * It returns a valid dst pointer on success, or a pointer encoded 108668d0c6d3SDavid S. Miller * error code. 1087497c615aSHerbert Xu */ 10884c9483b2SDavid S. Miller struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 108968d0c6d3SDavid S. Miller const struct in6_addr *final_dst, 1090a1414715SDavid S. Miller bool can_sleep) 1091497c615aSHerbert Xu { 109268d0c6d3SDavid S. Miller struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 109368d0c6d3SDavid S. Miller int err; 1094497c615aSHerbert Xu 10954c9483b2SDavid S. Miller dst = ip6_sk_dst_check(sk, dst, fl6); 109668d0c6d3SDavid S. Miller 10974c9483b2SDavid S. Miller err = ip6_dst_lookup_tail(sk, &dst, fl6); 109868d0c6d3SDavid S. Miller if (err) 109968d0c6d3SDavid S. Miller return ERR_PTR(err); 110068d0c6d3SDavid S. Miller if (final_dst) 11014c9483b2SDavid S. Miller ipv6_addr_copy(&fl6->daddr, final_dst); 11022774c131SDavid S. Miller if (can_sleep) 11034c9483b2SDavid S. Miller fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; 11042774c131SDavid S. Miller 11054c9483b2SDavid S. Miller return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); 110668d0c6d3SDavid S. Miller } 110768d0c6d3SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); 1108497c615aSHerbert Xu 110934a0b3cdSAdrian Bunk static inline int ip6_ufo_append_data(struct sock *sk, 1110e89e9cf5SAnanda Raju int getfrag(void *from, char *to, int offset, int len, 1111e89e9cf5SAnanda Raju int odd, struct sk_buff *skb), 1112e89e9cf5SAnanda Raju void *from, int length, int hh_len, int fragheaderlen, 111387c48fa3SEric Dumazet int transhdrlen, int mtu,unsigned int flags, 111487c48fa3SEric Dumazet struct rt6_info *rt) 1115e89e9cf5SAnanda Raju 1116e89e9cf5SAnanda Raju { 1117e89e9cf5SAnanda Raju struct sk_buff *skb; 1118e89e9cf5SAnanda Raju int err; 1119e89e9cf5SAnanda Raju 1120e89e9cf5SAnanda Raju /* There is support for UDP large send offload by network 1121e89e9cf5SAnanda Raju * device, so create one single skb packet containing complete 1122e89e9cf5SAnanda Raju * udp datagram 1123e89e9cf5SAnanda Raju */ 1124e89e9cf5SAnanda Raju if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { 1125e89e9cf5SAnanda Raju skb = sock_alloc_send_skb(sk, 1126e89e9cf5SAnanda Raju hh_len + fragheaderlen + transhdrlen + 20, 1127e89e9cf5SAnanda Raju (flags & MSG_DONTWAIT), &err); 1128e89e9cf5SAnanda Raju if (skb == NULL) 1129e89e9cf5SAnanda Raju return -ENOMEM; 1130e89e9cf5SAnanda Raju 1131e89e9cf5SAnanda Raju /* reserve space for Hardware header */ 1132e89e9cf5SAnanda Raju skb_reserve(skb, hh_len); 1133e89e9cf5SAnanda Raju 1134e89e9cf5SAnanda Raju /* create space for UDP/IP header */ 1135e89e9cf5SAnanda Raju skb_put(skb,fragheaderlen + transhdrlen); 1136e89e9cf5SAnanda Raju 1137e89e9cf5SAnanda Raju /* initialize network header pointer */ 1138c1d2bbe1SArnaldo Carvalho de Melo skb_reset_network_header(skb); 1139e89e9cf5SAnanda Raju 1140e89e9cf5SAnanda Raju /* initialize protocol header pointer */ 1141b0e380b1SArnaldo Carvalho de Melo skb->transport_header = skb->network_header + fragheaderlen; 1142e89e9cf5SAnanda Raju 114384fa7933SPatrick McHardy skb->ip_summed = CHECKSUM_PARTIAL; 1144e89e9cf5SAnanda Raju skb->csum = 0; 1145e89e9cf5SAnanda Raju } 1146e89e9cf5SAnanda Raju 1147e89e9cf5SAnanda Raju err = skb_append_datato_frags(sk,skb, getfrag, from, 1148e89e9cf5SAnanda Raju (length - transhdrlen)); 1149e89e9cf5SAnanda Raju if (!err) { 1150e89e9cf5SAnanda Raju struct frag_hdr fhdr; 1151e89e9cf5SAnanda Raju 1152c31d5326SSridhar Samudrala /* Specify the length of each IPv6 datagram fragment. 1153c31d5326SSridhar Samudrala * It has to be a multiple of 8. 1154c31d5326SSridhar Samudrala */ 1155c31d5326SSridhar Samudrala skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - 1156c31d5326SSridhar Samudrala sizeof(struct frag_hdr)) & ~7; 1157f83ef8c0SHerbert Xu skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 115887c48fa3SEric Dumazet ipv6_select_ident(&fhdr, rt); 1159e89e9cf5SAnanda Raju skb_shinfo(skb)->ip6_frag_id = fhdr.identification; 1160e89e9cf5SAnanda Raju __skb_queue_tail(&sk->sk_write_queue, skb); 1161e89e9cf5SAnanda Raju 1162e89e9cf5SAnanda Raju return 0; 1163e89e9cf5SAnanda Raju } 1164e89e9cf5SAnanda Raju /* There is not enough support do UPD LSO, 1165e89e9cf5SAnanda Raju * so follow normal path 1166e89e9cf5SAnanda Raju */ 1167e89e9cf5SAnanda Raju kfree_skb(skb); 1168e89e9cf5SAnanda Raju 1169e89e9cf5SAnanda Raju return err; 1170e89e9cf5SAnanda Raju } 11711da177e4SLinus Torvalds 11720178b695SHerbert Xu static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 11730178b695SHerbert Xu gfp_t gfp) 11740178b695SHerbert Xu { 11750178b695SHerbert Xu return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 11760178b695SHerbert Xu } 11770178b695SHerbert Xu 11780178b695SHerbert Xu static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 11790178b695SHerbert Xu gfp_t gfp) 11800178b695SHerbert Xu { 11810178b695SHerbert Xu return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 11820178b695SHerbert Xu } 11830178b695SHerbert Xu 118441a1f8eaSYOSHIFUJI Hideaki int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, 118541a1f8eaSYOSHIFUJI Hideaki int offset, int len, int odd, struct sk_buff *skb), 11861da177e4SLinus Torvalds void *from, int length, int transhdrlen, 11874c9483b2SDavid S. Miller int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, 118813b52cd4SBrian Haley struct rt6_info *rt, unsigned int flags, int dontfrag) 11891da177e4SLinus Torvalds { 11901da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 11911da177e4SLinus Torvalds struct ipv6_pinfo *np = inet6_sk(sk); 1192bdc712b4SDavid S. Miller struct inet_cork *cork; 11931da177e4SLinus Torvalds struct sk_buff *skb; 11941da177e4SLinus Torvalds unsigned int maxfraglen, fragheaderlen; 11951da177e4SLinus Torvalds int exthdrlen; 1196*299b0767SSteffen Klassert int dst_exthdrlen; 11971da177e4SLinus Torvalds int hh_len; 11981da177e4SLinus Torvalds int mtu; 11991da177e4SLinus Torvalds int copy; 12001da177e4SLinus Torvalds int err; 12011da177e4SLinus Torvalds int offset = 0; 12021da177e4SLinus Torvalds int csummode = CHECKSUM_NONE; 1203a693e698SAnders Berggren __u8 tx_flags = 0; 12041da177e4SLinus Torvalds 12051da177e4SLinus Torvalds if (flags&MSG_PROBE) 12061da177e4SLinus Torvalds return 0; 1207bdc712b4SDavid S. Miller cork = &inet->cork.base; 12081da177e4SLinus Torvalds if (skb_queue_empty(&sk->sk_write_queue)) { 12091da177e4SLinus Torvalds /* 12101da177e4SLinus Torvalds * setup for corking 12111da177e4SLinus Torvalds */ 12121da177e4SLinus Torvalds if (opt) { 12130178b695SHerbert Xu if (WARN_ON(np->cork.opt)) 12140178b695SHerbert Xu return -EINVAL; 12150178b695SHerbert Xu 12160178b695SHerbert Xu np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation); 12171da177e4SLinus Torvalds if (unlikely(np->cork.opt == NULL)) 12181da177e4SLinus Torvalds return -ENOBUFS; 12190178b695SHerbert Xu 12200178b695SHerbert Xu np->cork.opt->tot_len = opt->tot_len; 12210178b695SHerbert Xu np->cork.opt->opt_flen = opt->opt_flen; 12220178b695SHerbert Xu np->cork.opt->opt_nflen = opt->opt_nflen; 12230178b695SHerbert Xu 12240178b695SHerbert Xu np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt, 12250178b695SHerbert Xu sk->sk_allocation); 12260178b695SHerbert Xu if (opt->dst0opt && !np->cork.opt->dst0opt) 12270178b695SHerbert Xu return -ENOBUFS; 12280178b695SHerbert Xu 12290178b695SHerbert Xu np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt, 12300178b695SHerbert Xu sk->sk_allocation); 12310178b695SHerbert Xu if (opt->dst1opt && !np->cork.opt->dst1opt) 12320178b695SHerbert Xu return -ENOBUFS; 12330178b695SHerbert Xu 12340178b695SHerbert Xu np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt, 12350178b695SHerbert Xu sk->sk_allocation); 12360178b695SHerbert Xu if (opt->hopopt && !np->cork.opt->hopopt) 12370178b695SHerbert Xu return -ENOBUFS; 12380178b695SHerbert Xu 12390178b695SHerbert Xu np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt, 12400178b695SHerbert Xu sk->sk_allocation); 12410178b695SHerbert Xu if (opt->srcrt && !np->cork.opt->srcrt) 12420178b695SHerbert Xu return -ENOBUFS; 12430178b695SHerbert Xu 12441da177e4SLinus Torvalds /* need source address above miyazawa*/ 12451da177e4SLinus Torvalds } 1246d8d1f30bSChangli Gao dst_hold(&rt->dst); 1247bdc712b4SDavid S. Miller cork->dst = &rt->dst; 12484c9483b2SDavid S. Miller inet->cork.fl.u.ip6 = *fl6; 12491da177e4SLinus Torvalds np->cork.hop_limit = hlimit; 125041a1f8eaSYOSHIFUJI Hideaki np->cork.tclass = tclass; 1251628a5c56SJohn Heffner mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? 1252*299b0767SSteffen Klassert rt->dst.dev->mtu : dst_mtu(&rt->dst); 1253c7503609SDave Jones if (np->frag_size < mtu) { 1254d91675f9SYOSHIFUJI Hideaki if (np->frag_size) 1255d91675f9SYOSHIFUJI Hideaki mtu = np->frag_size; 1256d91675f9SYOSHIFUJI Hideaki } 1257bdc712b4SDavid S. Miller cork->fragsize = mtu; 1258d8d1f30bSChangli Gao if (dst_allfrag(rt->dst.path)) 1259bdc712b4SDavid S. Miller cork->flags |= IPCORK_ALLFRAG; 1260bdc712b4SDavid S. Miller cork->length = 0; 12611da177e4SLinus Torvalds sk->sk_sndmsg_page = NULL; 12621da177e4SLinus Torvalds sk->sk_sndmsg_off = 0; 1263*299b0767SSteffen Klassert exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len; 12641da177e4SLinus Torvalds length += exthdrlen; 12651da177e4SLinus Torvalds transhdrlen += exthdrlen; 1266*299b0767SSteffen Klassert dst_exthdrlen = rt->dst.header_len; 12671da177e4SLinus Torvalds } else { 1268bdc712b4SDavid S. Miller rt = (struct rt6_info *)cork->dst; 12694c9483b2SDavid S. Miller fl6 = &inet->cork.fl.u.ip6; 12701da177e4SLinus Torvalds opt = np->cork.opt; 12711da177e4SLinus Torvalds transhdrlen = 0; 12721da177e4SLinus Torvalds exthdrlen = 0; 1273*299b0767SSteffen Klassert dst_exthdrlen = 0; 1274bdc712b4SDavid S. Miller mtu = cork->fragsize; 12751da177e4SLinus Torvalds } 12761da177e4SLinus Torvalds 1277d8d1f30bSChangli Gao hh_len = LL_RESERVED_SPACE(rt->dst.dev); 12781da177e4SLinus Torvalds 1279a1b05140SMasahide NAKAMURA fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1280b4ce9277SHerbert Xu (opt ? opt->opt_nflen : 0); 12811da177e4SLinus Torvalds maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); 12821da177e4SLinus Torvalds 12831da177e4SLinus Torvalds if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { 1284bdc712b4SDavid S. Miller if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { 12854c9483b2SDavid S. Miller ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); 12861da177e4SLinus Torvalds return -EMSGSIZE; 12871da177e4SLinus Torvalds } 12881da177e4SLinus Torvalds } 12891da177e4SLinus Torvalds 1290a693e698SAnders Berggren /* For UDP, check if TX timestamp is enabled */ 1291a693e698SAnders Berggren if (sk->sk_type == SOCK_DGRAM) { 1292a693e698SAnders Berggren err = sock_tx_timestamp(sk, &tx_flags); 1293a693e698SAnders Berggren if (err) 1294a693e698SAnders Berggren goto error; 1295a693e698SAnders Berggren } 1296a693e698SAnders Berggren 12971da177e4SLinus Torvalds /* 12981da177e4SLinus Torvalds * Let's try using as much space as possible. 12991da177e4SLinus Torvalds * Use MTU if total length of the message fits into the MTU. 13001da177e4SLinus Torvalds * Otherwise, we need to reserve fragment header and 13011da177e4SLinus Torvalds * fragment alignment (= 8-15 octects, in total). 13021da177e4SLinus Torvalds * 13031da177e4SLinus Torvalds * Note that we may need to "move" the data from the tail of 13041da177e4SLinus Torvalds * of the buffer to the new fragment when we split 13051da177e4SLinus Torvalds * the message. 13061da177e4SLinus Torvalds * 13071da177e4SLinus Torvalds * FIXME: It may be fragmented into multiple chunks 13081da177e4SLinus Torvalds * at once if non-fragmentable extension headers 13091da177e4SLinus Torvalds * are too large. 13101da177e4SLinus Torvalds * --yoshfuji 13111da177e4SLinus Torvalds */ 13121da177e4SLinus Torvalds 1313bdc712b4SDavid S. Miller cork->length += length; 13144b340ae2SBrian Haley if (length > mtu) { 13154b340ae2SBrian Haley int proto = sk->sk_protocol; 13164b340ae2SBrian Haley if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ 13174c9483b2SDavid S. Miller ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); 13184b340ae2SBrian Haley return -EMSGSIZE; 13194b340ae2SBrian Haley } 13204b340ae2SBrian Haley 13214b340ae2SBrian Haley if (proto == IPPROTO_UDP && 1322d8d1f30bSChangli Gao (rt->dst.dev->features & NETIF_F_UFO)) { 1323e89e9cf5SAnanda Raju 13244b340ae2SBrian Haley err = ip6_ufo_append_data(sk, getfrag, from, length, 13254b340ae2SBrian Haley hh_len, fragheaderlen, 132687c48fa3SEric Dumazet transhdrlen, mtu, flags, rt); 1327baa829d8SPatrick McHardy if (err) 1328e89e9cf5SAnanda Raju goto error; 1329e89e9cf5SAnanda Raju return 0; 1330e89e9cf5SAnanda Raju } 13314b340ae2SBrian Haley } 13321da177e4SLinus Torvalds 13331da177e4SLinus Torvalds if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 13341da177e4SLinus Torvalds goto alloc_new_skb; 13351da177e4SLinus Torvalds 13361da177e4SLinus Torvalds while (length > 0) { 13371da177e4SLinus Torvalds /* Check if the remaining data fits into current packet. */ 1338bdc712b4SDavid S. Miller copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 13391da177e4SLinus Torvalds if (copy < length) 13401da177e4SLinus Torvalds copy = maxfraglen - skb->len; 13411da177e4SLinus Torvalds 13421da177e4SLinus Torvalds if (copy <= 0) { 13431da177e4SLinus Torvalds char *data; 13441da177e4SLinus Torvalds unsigned int datalen; 13451da177e4SLinus Torvalds unsigned int fraglen; 13461da177e4SLinus Torvalds unsigned int fraggap; 13471da177e4SLinus Torvalds unsigned int alloclen; 13481da177e4SLinus Torvalds struct sk_buff *skb_prev; 13491da177e4SLinus Torvalds alloc_new_skb: 13501da177e4SLinus Torvalds skb_prev = skb; 13511da177e4SLinus Torvalds 13521da177e4SLinus Torvalds /* There's no room in the current skb */ 13531da177e4SLinus Torvalds if (skb_prev) 13541da177e4SLinus Torvalds fraggap = skb_prev->len - maxfraglen; 13551da177e4SLinus Torvalds else 13561da177e4SLinus Torvalds fraggap = 0; 13571da177e4SLinus Torvalds 13581da177e4SLinus Torvalds /* 13591da177e4SLinus Torvalds * If remaining data exceeds the mtu, 13601da177e4SLinus Torvalds * we know we need more fragment(s). 13611da177e4SLinus Torvalds */ 13621da177e4SLinus Torvalds datalen = length + fraggap; 1363bdc712b4SDavid S. Miller if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 13641da177e4SLinus Torvalds datalen = maxfraglen - fragheaderlen; 13651da177e4SLinus Torvalds 13661da177e4SLinus Torvalds fraglen = datalen + fragheaderlen; 13671da177e4SLinus Torvalds if ((flags & MSG_MORE) && 1368d8d1f30bSChangli Gao !(rt->dst.dev->features&NETIF_F_SG)) 13691da177e4SLinus Torvalds alloclen = mtu; 13701da177e4SLinus Torvalds else 13711da177e4SLinus Torvalds alloclen = datalen + fragheaderlen; 13721da177e4SLinus Torvalds 1373*299b0767SSteffen Klassert alloclen += dst_exthdrlen; 1374*299b0767SSteffen Klassert 13751da177e4SLinus Torvalds /* 13761da177e4SLinus Torvalds * The last fragment gets additional space at tail. 13771da177e4SLinus Torvalds * Note: we overallocate on fragments with MSG_MODE 13781da177e4SLinus Torvalds * because we have no idea if we're the last one. 13791da177e4SLinus Torvalds */ 13801da177e4SLinus Torvalds if (datalen == length + fraggap) 1381d8d1f30bSChangli Gao alloclen += rt->dst.trailer_len; 13821da177e4SLinus Torvalds 13831da177e4SLinus Torvalds /* 13841da177e4SLinus Torvalds * We just reserve space for fragment header. 13851da177e4SLinus Torvalds * Note: this may be overallocation if the message 13861da177e4SLinus Torvalds * (without MSG_MORE) fits into the MTU. 13871da177e4SLinus Torvalds */ 13881da177e4SLinus Torvalds alloclen += sizeof(struct frag_hdr); 13891da177e4SLinus Torvalds 13901da177e4SLinus Torvalds if (transhdrlen) { 13911da177e4SLinus Torvalds skb = sock_alloc_send_skb(sk, 13921da177e4SLinus Torvalds alloclen + hh_len, 13931da177e4SLinus Torvalds (flags & MSG_DONTWAIT), &err); 13941da177e4SLinus Torvalds } else { 13951da177e4SLinus Torvalds skb = NULL; 13961da177e4SLinus Torvalds if (atomic_read(&sk->sk_wmem_alloc) <= 13971da177e4SLinus Torvalds 2 * sk->sk_sndbuf) 13981da177e4SLinus Torvalds skb = sock_wmalloc(sk, 13991da177e4SLinus Torvalds alloclen + hh_len, 1, 14001da177e4SLinus Torvalds sk->sk_allocation); 14011da177e4SLinus Torvalds if (unlikely(skb == NULL)) 14021da177e4SLinus Torvalds err = -ENOBUFS; 1403a693e698SAnders Berggren else { 1404a693e698SAnders Berggren /* Only the initial fragment 1405a693e698SAnders Berggren * is time stamped. 1406a693e698SAnders Berggren */ 1407a693e698SAnders Berggren tx_flags = 0; 1408a693e698SAnders Berggren } 14091da177e4SLinus Torvalds } 14101da177e4SLinus Torvalds if (skb == NULL) 14111da177e4SLinus Torvalds goto error; 14121da177e4SLinus Torvalds /* 14131da177e4SLinus Torvalds * Fill in the control structures 14141da177e4SLinus Torvalds */ 14151da177e4SLinus Torvalds skb->ip_summed = csummode; 14161da177e4SLinus Torvalds skb->csum = 0; 14171da177e4SLinus Torvalds /* reserve for fragmentation */ 14181da177e4SLinus Torvalds skb_reserve(skb, hh_len+sizeof(struct frag_hdr)); 14191da177e4SLinus Torvalds 1420a693e698SAnders Berggren if (sk->sk_type == SOCK_DGRAM) 1421a693e698SAnders Berggren skb_shinfo(skb)->tx_flags = tx_flags; 1422a693e698SAnders Berggren 14231da177e4SLinus Torvalds /* 14241da177e4SLinus Torvalds * Find where to start putting bytes 14251da177e4SLinus Torvalds */ 1426*299b0767SSteffen Klassert data = skb_put(skb, fraglen + dst_exthdrlen); 1427*299b0767SSteffen Klassert skb_set_network_header(skb, exthdrlen + dst_exthdrlen); 1428*299b0767SSteffen Klassert data += fragheaderlen + dst_exthdrlen; 1429b0e380b1SArnaldo Carvalho de Melo skb->transport_header = (skb->network_header + 1430b0e380b1SArnaldo Carvalho de Melo fragheaderlen); 14311da177e4SLinus Torvalds if (fraggap) { 14321da177e4SLinus Torvalds skb->csum = skb_copy_and_csum_bits( 14331da177e4SLinus Torvalds skb_prev, maxfraglen, 14341da177e4SLinus Torvalds data + transhdrlen, fraggap, 0); 14351da177e4SLinus Torvalds skb_prev->csum = csum_sub(skb_prev->csum, 14361da177e4SLinus Torvalds skb->csum); 14371da177e4SLinus Torvalds data += fraggap; 1438e9fa4f7bSHerbert Xu pskb_trim_unique(skb_prev, maxfraglen); 14391da177e4SLinus Torvalds } 14401da177e4SLinus Torvalds copy = datalen - transhdrlen - fraggap; 1441*299b0767SSteffen Klassert 14421da177e4SLinus Torvalds if (copy < 0) { 14431da177e4SLinus Torvalds err = -EINVAL; 14441da177e4SLinus Torvalds kfree_skb(skb); 14451da177e4SLinus Torvalds goto error; 14461da177e4SLinus Torvalds } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { 14471da177e4SLinus Torvalds err = -EFAULT; 14481da177e4SLinus Torvalds kfree_skb(skb); 14491da177e4SLinus Torvalds goto error; 14501da177e4SLinus Torvalds } 14511da177e4SLinus Torvalds 14521da177e4SLinus Torvalds offset += copy; 14531da177e4SLinus Torvalds length -= datalen - fraggap; 14541da177e4SLinus Torvalds transhdrlen = 0; 14551da177e4SLinus Torvalds exthdrlen = 0; 1456*299b0767SSteffen Klassert dst_exthdrlen = 0; 14571da177e4SLinus Torvalds csummode = CHECKSUM_NONE; 14581da177e4SLinus Torvalds 14591da177e4SLinus Torvalds /* 14601da177e4SLinus Torvalds * Put the packet on the pending queue 14611da177e4SLinus Torvalds */ 14621da177e4SLinus Torvalds __skb_queue_tail(&sk->sk_write_queue, skb); 14631da177e4SLinus Torvalds continue; 14641da177e4SLinus Torvalds } 14651da177e4SLinus Torvalds 14661da177e4SLinus Torvalds if (copy > length) 14671da177e4SLinus Torvalds copy = length; 14681da177e4SLinus Torvalds 1469d8d1f30bSChangli Gao if (!(rt->dst.dev->features&NETIF_F_SG)) { 14701da177e4SLinus Torvalds unsigned int off; 14711da177e4SLinus Torvalds 14721da177e4SLinus Torvalds off = skb->len; 14731da177e4SLinus Torvalds if (getfrag(from, skb_put(skb, copy), 14741da177e4SLinus Torvalds offset, copy, off, skb) < 0) { 14751da177e4SLinus Torvalds __skb_trim(skb, off); 14761da177e4SLinus Torvalds err = -EFAULT; 14771da177e4SLinus Torvalds goto error; 14781da177e4SLinus Torvalds } 14791da177e4SLinus Torvalds } else { 14801da177e4SLinus Torvalds int i = skb_shinfo(skb)->nr_frags; 14811da177e4SLinus Torvalds skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; 14821da177e4SLinus Torvalds struct page *page = sk->sk_sndmsg_page; 14831da177e4SLinus Torvalds int off = sk->sk_sndmsg_off; 14841da177e4SLinus Torvalds unsigned int left; 14851da177e4SLinus Torvalds 14861da177e4SLinus Torvalds if (page && (left = PAGE_SIZE - off) > 0) { 14871da177e4SLinus Torvalds if (copy >= left) 14881da177e4SLinus Torvalds copy = left; 1489408dadf0SIan Campbell if (page != skb_frag_page(frag)) { 14901da177e4SLinus Torvalds if (i == MAX_SKB_FRAGS) { 14911da177e4SLinus Torvalds err = -EMSGSIZE; 14921da177e4SLinus Torvalds goto error; 14931da177e4SLinus Torvalds } 14941da177e4SLinus Torvalds skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); 1495408dadf0SIan Campbell skb_frag_ref(skb, i); 14961da177e4SLinus Torvalds frag = &skb_shinfo(skb)->frags[i]; 14971da177e4SLinus Torvalds } 14981da177e4SLinus Torvalds } else if(i < MAX_SKB_FRAGS) { 14991da177e4SLinus Torvalds if (copy > PAGE_SIZE) 15001da177e4SLinus Torvalds copy = PAGE_SIZE; 15011da177e4SLinus Torvalds page = alloc_pages(sk->sk_allocation, 0); 15021da177e4SLinus Torvalds if (page == NULL) { 15031da177e4SLinus Torvalds err = -ENOMEM; 15041da177e4SLinus Torvalds goto error; 15051da177e4SLinus Torvalds } 15061da177e4SLinus Torvalds sk->sk_sndmsg_page = page; 15071da177e4SLinus Torvalds sk->sk_sndmsg_off = 0; 15081da177e4SLinus Torvalds 15091da177e4SLinus Torvalds skb_fill_page_desc(skb, i, page, 0, 0); 15101da177e4SLinus Torvalds frag = &skb_shinfo(skb)->frags[i]; 15111da177e4SLinus Torvalds } else { 15121da177e4SLinus Torvalds err = -EMSGSIZE; 15131da177e4SLinus Torvalds goto error; 15141da177e4SLinus Torvalds } 1515408dadf0SIan Campbell if (getfrag(from, skb_frag_address(frag)+frag->size, 1516408dadf0SIan Campbell offset, copy, skb->len, skb) < 0) { 15171da177e4SLinus Torvalds err = -EFAULT; 15181da177e4SLinus Torvalds goto error; 15191da177e4SLinus Torvalds } 15201da177e4SLinus Torvalds sk->sk_sndmsg_off += copy; 15211da177e4SLinus Torvalds frag->size += copy; 15221da177e4SLinus Torvalds skb->len += copy; 15231da177e4SLinus Torvalds skb->data_len += copy; 1524f945fa7aSHerbert Xu skb->truesize += copy; 1525f945fa7aSHerbert Xu atomic_add(copy, &sk->sk_wmem_alloc); 15261da177e4SLinus Torvalds } 15271da177e4SLinus Torvalds offset += copy; 15281da177e4SLinus Torvalds length -= copy; 15291da177e4SLinus Torvalds } 15301da177e4SLinus Torvalds return 0; 15311da177e4SLinus Torvalds error: 1532bdc712b4SDavid S. Miller cork->length -= length; 15333bd653c8SDenis V. Lunev IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 15341da177e4SLinus Torvalds return err; 15351da177e4SLinus Torvalds } 15361da177e4SLinus Torvalds 1537bf138862SPavel Emelyanov static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) 1538bf138862SPavel Emelyanov { 15390178b695SHerbert Xu if (np->cork.opt) { 15400178b695SHerbert Xu kfree(np->cork.opt->dst0opt); 15410178b695SHerbert Xu kfree(np->cork.opt->dst1opt); 15420178b695SHerbert Xu kfree(np->cork.opt->hopopt); 15430178b695SHerbert Xu kfree(np->cork.opt->srcrt); 1544bf138862SPavel Emelyanov kfree(np->cork.opt); 1545bf138862SPavel Emelyanov np->cork.opt = NULL; 15460178b695SHerbert Xu } 15470178b695SHerbert Xu 1548bdc712b4SDavid S. Miller if (inet->cork.base.dst) { 1549bdc712b4SDavid S. Miller dst_release(inet->cork.base.dst); 1550bdc712b4SDavid S. Miller inet->cork.base.dst = NULL; 1551bdc712b4SDavid S. Miller inet->cork.base.flags &= ~IPCORK_ALLFRAG; 1552bf138862SPavel Emelyanov } 1553bf138862SPavel Emelyanov memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); 1554bf138862SPavel Emelyanov } 1555bf138862SPavel Emelyanov 15561da177e4SLinus Torvalds int ip6_push_pending_frames(struct sock *sk) 15571da177e4SLinus Torvalds { 15581da177e4SLinus Torvalds struct sk_buff *skb, *tmp_skb; 15591da177e4SLinus Torvalds struct sk_buff **tail_skb; 15601da177e4SLinus Torvalds struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 15611da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 15621da177e4SLinus Torvalds struct ipv6_pinfo *np = inet6_sk(sk); 15633bd653c8SDenis V. Lunev struct net *net = sock_net(sk); 15641da177e4SLinus Torvalds struct ipv6hdr *hdr; 15651da177e4SLinus Torvalds struct ipv6_txoptions *opt = np->cork.opt; 1566bdc712b4SDavid S. Miller struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst; 15674c9483b2SDavid S. Miller struct flowi6 *fl6 = &inet->cork.fl.u.ip6; 15684c9483b2SDavid S. Miller unsigned char proto = fl6->flowi6_proto; 15691da177e4SLinus Torvalds int err = 0; 15701da177e4SLinus Torvalds 15711da177e4SLinus Torvalds if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) 15721da177e4SLinus Torvalds goto out; 15731da177e4SLinus Torvalds tail_skb = &(skb_shinfo(skb)->frag_list); 15741da177e4SLinus Torvalds 15751da177e4SLinus Torvalds /* move skb->data to ip header from ext header */ 1576d56f90a7SArnaldo Carvalho de Melo if (skb->data < skb_network_header(skb)) 1577bbe735e4SArnaldo Carvalho de Melo __skb_pull(skb, skb_network_offset(skb)); 15781da177e4SLinus Torvalds while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { 1579cfe1fc77SArnaldo Carvalho de Melo __skb_pull(tmp_skb, skb_network_header_len(skb)); 15801da177e4SLinus Torvalds *tail_skb = tmp_skb; 15811da177e4SLinus Torvalds tail_skb = &(tmp_skb->next); 15821da177e4SLinus Torvalds skb->len += tmp_skb->len; 15831da177e4SLinus Torvalds skb->data_len += tmp_skb->len; 15841da177e4SLinus Torvalds skb->truesize += tmp_skb->truesize; 15851da177e4SLinus Torvalds tmp_skb->destructor = NULL; 15861da177e4SLinus Torvalds tmp_skb->sk = NULL; 15871da177e4SLinus Torvalds } 15881da177e4SLinus Torvalds 158928a89453SHerbert Xu /* Allow local fragmentation. */ 1590b5c15fc0SHerbert Xu if (np->pmtudisc < IPV6_PMTUDISC_DO) 159128a89453SHerbert Xu skb->local_df = 1; 159228a89453SHerbert Xu 15934c9483b2SDavid S. Miller ipv6_addr_copy(final_dst, &fl6->daddr); 1594cfe1fc77SArnaldo Carvalho de Melo __skb_pull(skb, skb_network_header_len(skb)); 15951da177e4SLinus Torvalds if (opt && opt->opt_flen) 15961da177e4SLinus Torvalds ipv6_push_frag_opts(skb, opt, &proto); 15971da177e4SLinus Torvalds if (opt && opt->opt_nflen) 15981da177e4SLinus Torvalds ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); 15991da177e4SLinus Torvalds 1600e2d1bca7SArnaldo Carvalho de Melo skb_push(skb, sizeof(struct ipv6hdr)); 1601e2d1bca7SArnaldo Carvalho de Melo skb_reset_network_header(skb); 16020660e03fSArnaldo Carvalho de Melo hdr = ipv6_hdr(skb); 16031da177e4SLinus Torvalds 16044c9483b2SDavid S. Miller *(__be32*)hdr = fl6->flowlabel | 160541a1f8eaSYOSHIFUJI Hideaki htonl(0x60000000 | ((int)np->cork.tclass << 20)); 16061da177e4SLinus Torvalds 16071da177e4SLinus Torvalds hdr->hop_limit = np->cork.hop_limit; 16081da177e4SLinus Torvalds hdr->nexthdr = proto; 16094c9483b2SDavid S. Miller ipv6_addr_copy(&hdr->saddr, &fl6->saddr); 16101da177e4SLinus Torvalds ipv6_addr_copy(&hdr->daddr, final_dst); 16111da177e4SLinus Torvalds 1612a2c2064fSPatrick McHardy skb->priority = sk->sk_priority; 16134a19ec58SLaszlo Attila Toth skb->mark = sk->sk_mark; 1614a2c2064fSPatrick McHardy 1615d8d1f30bSChangli Gao skb_dst_set(skb, dst_clone(&rt->dst)); 1616edf391ffSNeil Horman IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 161714878f75SDavid L Stevens if (proto == IPPROTO_ICMPV6) { 1618adf30907SEric Dumazet struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 161914878f75SDavid L Stevens 16205a57d4c7SDenis V. Lunev ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); 1621e41b5368SDenis V. Lunev ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); 162214878f75SDavid L Stevens } 162314878f75SDavid L Stevens 1624ef76bc23SHerbert Xu err = ip6_local_out(skb); 16251da177e4SLinus Torvalds if (err) { 16261da177e4SLinus Torvalds if (err > 0) 16276ce9e7b5SEric Dumazet err = net_xmit_errno(err); 16281da177e4SLinus Torvalds if (err) 16291da177e4SLinus Torvalds goto error; 16301da177e4SLinus Torvalds } 16311da177e4SLinus Torvalds 16321da177e4SLinus Torvalds out: 1633bf138862SPavel Emelyanov ip6_cork_release(inet, np); 16341da177e4SLinus Torvalds return err; 16351da177e4SLinus Torvalds error: 163606254914SEric Dumazet IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 16371da177e4SLinus Torvalds goto out; 16381da177e4SLinus Torvalds } 16391da177e4SLinus Torvalds 16401da177e4SLinus Torvalds void ip6_flush_pending_frames(struct sock *sk) 16411da177e4SLinus Torvalds { 16421da177e4SLinus Torvalds struct sk_buff *skb; 16431da177e4SLinus Torvalds 16441da177e4SLinus Torvalds while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { 1645adf30907SEric Dumazet if (skb_dst(skb)) 1646adf30907SEric Dumazet IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 1647a11d206dSYOSHIFUJI Hideaki IPSTATS_MIB_OUTDISCARDS); 16481da177e4SLinus Torvalds kfree_skb(skb); 16491da177e4SLinus Torvalds } 16501da177e4SLinus Torvalds 1651bf138862SPavel Emelyanov ip6_cork_release(inet_sk(sk), inet6_sk(sk)); 16521da177e4SLinus Torvalds } 1653