12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 21da177e4SLinus Torvalds /* 31da177e4SLinus Torvalds * IPv6 output functions 41da177e4SLinus Torvalds * Linux INET6 implementation 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * Authors: 71da177e4SLinus Torvalds * Pedro Roque <roque@di.fc.ul.pt> 81da177e4SLinus Torvalds * 91da177e4SLinus Torvalds * Based on linux/net/ipv4/ip_output.c 101da177e4SLinus Torvalds * 111da177e4SLinus Torvalds * Changes: 121da177e4SLinus Torvalds * A.N.Kuznetsov : airthmetics in fragmentation. 131da177e4SLinus Torvalds * extension headers are implemented. 141da177e4SLinus Torvalds * route changes now work. 151da177e4SLinus Torvalds * ip6_forward does not confuse sniffers. 161da177e4SLinus Torvalds * etc. 171da177e4SLinus Torvalds * 181da177e4SLinus Torvalds * H. von Brand : Added missing #include <linux/string.h> 191da177e4SLinus Torvalds * Imran Patel : frag id should be in NBO 201da177e4SLinus Torvalds * Kazunori MIYAZAWA @USAGI 211da177e4SLinus Torvalds * : add ip6_append_data and related functions 221da177e4SLinus Torvalds * for datagram xmit 231da177e4SLinus Torvalds */ 241da177e4SLinus Torvalds 251da177e4SLinus Torvalds #include <linux/errno.h> 26ef76bc23SHerbert Xu #include <linux/kernel.h> 271da177e4SLinus Torvalds #include <linux/string.h> 281da177e4SLinus Torvalds #include <linux/socket.h> 291da177e4SLinus Torvalds #include <linux/net.h> 301da177e4SLinus Torvalds #include <linux/netdevice.h> 311da177e4SLinus Torvalds #include <linux/if_arp.h> 321da177e4SLinus Torvalds #include <linux/in6.h> 331da177e4SLinus Torvalds #include <linux/tcp.h> 341da177e4SLinus Torvalds #include <linux/route.h> 35b59f45d0SHerbert Xu #include <linux/module.h> 365a0e3ad6STejun Heo #include <linux/slab.h> 371da177e4SLinus Torvalds 3833b48679SDaniel Mack #include <linux/bpf-cgroup.h> 391da177e4SLinus Torvalds #include <linux/netfilter.h> 401da177e4SLinus Torvalds #include <linux/netfilter_ipv6.h> 411da177e4SLinus Torvalds 421da177e4SLinus Torvalds #include <net/sock.h> 431da177e4SLinus Torvalds #include <net/snmp.h> 441da177e4SLinus Torvalds 451da177e4SLinus Torvalds #include <net/ipv6.h> 461da177e4SLinus Torvalds #include <net/ndisc.h> 471da177e4SLinus Torvalds #include <net/protocol.h> 481da177e4SLinus Torvalds #include <net/ip6_route.h> 491da177e4SLinus Torvalds #include <net/addrconf.h> 501da177e4SLinus Torvalds #include <net/rawv6.h> 511da177e4SLinus Torvalds #include <net/icmp.h> 521da177e4SLinus Torvalds #include <net/xfrm.h> 531da177e4SLinus Torvalds #include <net/checksum.h> 547bc570c8SYOSHIFUJI Hideaki #include <linux/mroute6.h> 55ca254490SDavid Ahern #include <net/l3mdev.h> 5614972cbdSRoopa Prabhu #include <net/lwtunnel.h> 57571912c6SMartin Varghese #include <net/ip_tunnels.h> 581da177e4SLinus Torvalds 597d8c6e39SEric W. Biederman static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 601da177e4SLinus Torvalds { 61adf30907SEric Dumazet struct dst_entry *dst = skb_dst(skb); 621da177e4SLinus Torvalds struct net_device *dev = dst->dev; 639b1c1ef1SNicolas Dichtel const struct in6_addr *nexthop; 64f6b72b62SDavid S. Miller struct neighbour *neigh; 656fd6ce20SYOSHIFUJI Hideaki / 吉藤英明 int ret; 661da177e4SLinus Torvalds 670660e03fSArnaldo Carvalho de Melo if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { 68adf30907SEric Dumazet struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 691da177e4SLinus Torvalds 707026b1ddSDavid Miller if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && 718571ab47SYuval Mintz ((mroute6_is_socket(net, skb) && 72bd91b8bfSBenjamin Thery !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 730660e03fSArnaldo Carvalho de Melo ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 747bc570c8SYOSHIFUJI Hideaki &ipv6_hdr(skb)->saddr))) { 751da177e4SLinus Torvalds struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 761da177e4SLinus Torvalds 771da177e4SLinus Torvalds /* Do not check for IFF_ALLMULTI; multicast routing 781da177e4SLinus Torvalds is not supported in any case. 791da177e4SLinus Torvalds */ 801da177e4SLinus Torvalds if (newskb) 81b2e0b385SJan Engelhardt NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 8229a26a56SEric W. Biederman net, sk, newskb, NULL, newskb->dev, 8395603e22SMichel Machado dev_loopback_xmit); 841da177e4SLinus Torvalds 850660e03fSArnaldo Carvalho de Melo if (ipv6_hdr(skb)->hop_limit == 0) { 8678126c41SEric W. Biederman IP6_INC_STATS(net, idev, 873bd653c8SDenis V. Lunev IPSTATS_MIB_OUTDISCARDS); 881da177e4SLinus Torvalds kfree_skb(skb); 891da177e4SLinus Torvalds return 0; 901da177e4SLinus Torvalds } 911da177e4SLinus Torvalds } 921da177e4SLinus Torvalds 9378126c41SEric W. Biederman IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len); 94dd408515SHannes Frederic Sowa 95dd408515SHannes Frederic Sowa if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= 96dd408515SHannes Frederic Sowa IPV6_ADDR_SCOPE_NODELOCAL && 97dd408515SHannes Frederic Sowa !(dev->flags & IFF_LOOPBACK)) { 98dd408515SHannes Frederic Sowa kfree_skb(skb); 99dd408515SHannes Frederic Sowa return 0; 100dd408515SHannes Frederic Sowa } 1011da177e4SLinus Torvalds } 1021da177e4SLinus Torvalds 10314972cbdSRoopa Prabhu if (lwtunnel_xmit_redirect(dst->lwtstate)) { 10414972cbdSRoopa Prabhu int res = lwtunnel_xmit(skb); 10514972cbdSRoopa Prabhu 10614972cbdSRoopa Prabhu if (res < 0 || res == LWTUNNEL_XMIT_DONE) 10714972cbdSRoopa Prabhu return res; 10814972cbdSRoopa Prabhu } 10914972cbdSRoopa Prabhu 1106fd6ce20SYOSHIFUJI Hideaki / 吉藤英明 rcu_read_lock_bh(); 1112647a9b0SMartin KaFai Lau nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); 1126fd6ce20SYOSHIFUJI Hideaki / 吉藤英明 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); 1136fd6ce20SYOSHIFUJI Hideaki / 吉藤英明 if (unlikely(!neigh)) 1146fd6ce20SYOSHIFUJI Hideaki / 吉藤英明 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); 1156fd6ce20SYOSHIFUJI Hideaki / 吉藤英明 if (!IS_ERR(neigh)) { 1164ff06203SJulian Anastasov sock_confirm_neigh(skb, neigh); 1170353f282SDavid Ahern ret = neigh_output(neigh, skb, false); 1186fd6ce20SYOSHIFUJI Hideaki / 吉藤英明 rcu_read_unlock_bh(); 1196fd6ce20SYOSHIFUJI Hideaki / 吉藤英明 return ret; 1206fd6ce20SYOSHIFUJI Hideaki / 吉藤英明 } 1216fd6ce20SYOSHIFUJI Hideaki / 吉藤英明 rcu_read_unlock_bh(); 12205e3aa09SDavid S. Miller 12378126c41SEric W. Biederman IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 1249e508490SJan Engelhardt kfree_skb(skb); 1259e508490SJan Engelhardt return -EINVAL; 1261da177e4SLinus Torvalds } 1271da177e4SLinus Torvalds 128b210de4fSAya Levin static int 129b210de4fSAya Levin ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, 130b210de4fSAya Levin struct sk_buff *skb, unsigned int mtu) 131b210de4fSAya Levin { 132b210de4fSAya Levin struct sk_buff *segs, *nskb; 133b210de4fSAya Levin netdev_features_t features; 134b210de4fSAya Levin int ret = 0; 135b210de4fSAya Levin 136b210de4fSAya Levin /* Please see corresponding comment in ip_finish_output_gso 137b210de4fSAya Levin * describing the cases where GSO segment length exceeds the 138b210de4fSAya Levin * egress MTU. 139b210de4fSAya Levin */ 140b210de4fSAya Levin features = netif_skb_features(skb); 141b210de4fSAya Levin segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); 142b210de4fSAya Levin if (IS_ERR_OR_NULL(segs)) { 143b210de4fSAya Levin kfree_skb(skb); 144b210de4fSAya Levin return -ENOMEM; 145b210de4fSAya Levin } 146b210de4fSAya Levin 147b210de4fSAya Levin consume_skb(skb); 148b210de4fSAya Levin 149b210de4fSAya Levin skb_list_walk_safe(segs, segs, nskb) { 150b210de4fSAya Levin int err; 151b210de4fSAya Levin 152b210de4fSAya Levin skb_mark_not_on_list(segs); 153b210de4fSAya Levin err = ip6_fragment(net, sk, segs, ip6_finish_output2); 154b210de4fSAya Levin if (err && ret == 0) 155b210de4fSAya Levin ret = err; 156b210de4fSAya Levin } 157b210de4fSAya Levin 158b210de4fSAya Levin return ret; 159b210de4fSAya Levin } 160b210de4fSAya Levin 161956fe219Sbrakmo static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 1629e508490SJan Engelhardt { 163b210de4fSAya Levin unsigned int mtu; 164b210de4fSAya Levin 16509ee9dbaSTobias Brunner #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 16609ee9dbaSTobias Brunner /* Policy lookup after SNAT yielded a new policy */ 16709ee9dbaSTobias Brunner if (skb_dst(skb)->xfrm) { 16809ee9dbaSTobias Brunner IPCB(skb)->flags |= IPSKB_REROUTED; 16909ee9dbaSTobias Brunner return dst_output(net, sk, skb); 17009ee9dbaSTobias Brunner } 17109ee9dbaSTobias Brunner #endif 17209ee9dbaSTobias Brunner 173b210de4fSAya Levin mtu = ip6_skb_dst_mtu(skb); 174b210de4fSAya Levin if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu)) 175b210de4fSAya Levin return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); 176b210de4fSAya Levin 177b210de4fSAya Levin if ((skb->len > mtu && !skb_is_gso(skb)) || 1789037c357SJiri Pirko dst_allfrag(skb_dst(skb)) || 1799037c357SJiri Pirko (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) 1807d8c6e39SEric W. Biederman return ip6_fragment(net, sk, skb, ip6_finish_output2); 1819e508490SJan Engelhardt else 1827d8c6e39SEric W. Biederman return ip6_finish_output2(net, sk, skb); 1839e508490SJan Engelhardt } 1849e508490SJan Engelhardt 185956fe219Sbrakmo static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 186956fe219Sbrakmo { 187956fe219Sbrakmo int ret; 188956fe219Sbrakmo 189956fe219Sbrakmo ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); 190956fe219Sbrakmo switch (ret) { 191956fe219Sbrakmo case NET_XMIT_SUCCESS: 192956fe219Sbrakmo return __ip6_finish_output(net, sk, skb); 193956fe219Sbrakmo case NET_XMIT_CN: 194956fe219Sbrakmo return __ip6_finish_output(net, sk, skb) ? : ret; 195956fe219Sbrakmo default: 196956fe219Sbrakmo kfree_skb(skb); 197956fe219Sbrakmo return ret; 198956fe219Sbrakmo } 199956fe219Sbrakmo } 200956fe219Sbrakmo 201ede2059dSEric W. Biederman int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 2021da177e4SLinus Torvalds { 20328f8bfd1SPhil Sutter struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; 204adf30907SEric Dumazet struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 205be10de0aSEric W. Biederman 20697a7a37aSChenbo Feng skb->protocol = htons(ETH_P_IPV6); 20797a7a37aSChenbo Feng skb->dev = dev; 20897a7a37aSChenbo Feng 209778d80beSYOSHIFUJI Hideaki if (unlikely(idev->cnf.disable_ipv6)) { 21019a0644cSEric W. Biederman IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 211778d80beSYOSHIFUJI Hideaki kfree_skb(skb); 212778d80beSYOSHIFUJI Hideaki return 0; 213778d80beSYOSHIFUJI Hideaki } 214778d80beSYOSHIFUJI Hideaki 21529a26a56SEric W. Biederman return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, 21628f8bfd1SPhil Sutter net, sk, skb, indev, dev, 2179c6eb28aSJan Engelhardt ip6_finish_output, 2189c6eb28aSJan Engelhardt !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 2191da177e4SLinus Torvalds } 2206585d7dcSBrian Vazquez EXPORT_SYMBOL(ip6_output); 2211da177e4SLinus Torvalds 222e9191ffbSBen Hutchings bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) 223513674b5SShaohua Li { 224513674b5SShaohua Li if (!np->autoflowlabel_set) 225513674b5SShaohua Li return ip6_default_np_autolabel(net); 226513674b5SShaohua Li else 227513674b5SShaohua Li return np->autoflowlabel; 228513674b5SShaohua Li } 229513674b5SShaohua Li 2301da177e4SLinus Torvalds /* 231b5d43998SShan Wei * xmit an sk_buff (used by TCP, SCTP and DCCP) 2321c1e9d2bSEric Dumazet * Note : socket lock is not held for SYNACK packets, but might be modified 2331c1e9d2bSEric Dumazet * by calls to skb_set_owner_w() and ipv6_local_error(), 2341c1e9d2bSEric Dumazet * which are using proper atomic operations or spinlocks. 2351da177e4SLinus Torvalds */ 2361c1e9d2bSEric Dumazet int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, 2374f6570d7SEric Dumazet __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority) 2381da177e4SLinus Torvalds { 2393bd653c8SDenis V. Lunev struct net *net = sock_net(sk); 2401c1e9d2bSEric Dumazet const struct ipv6_pinfo *np = inet6_sk(sk); 2414c9483b2SDavid S. Miller struct in6_addr *first_hop = &fl6->daddr; 242adf30907SEric Dumazet struct dst_entry *dst = skb_dst(skb); 24366033f47SStefano Brivio unsigned int head_room; 2441da177e4SLinus Torvalds struct ipv6hdr *hdr; 2454c9483b2SDavid S. Miller u8 proto = fl6->flowi6_proto; 2461da177e4SLinus Torvalds int seg_len = skb->len; 247e651f03aSGerrit Renker int hlimit = -1; 2481da177e4SLinus Torvalds u32 mtu; 2491da177e4SLinus Torvalds 25066033f47SStefano Brivio head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); 25166033f47SStefano Brivio if (opt) 25266033f47SStefano Brivio head_room += opt->opt_nflen + opt->opt_flen; 2531da177e4SLinus Torvalds 25466033f47SStefano Brivio if (unlikely(skb_headroom(skb) < head_room)) { 2551da177e4SLinus Torvalds struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 25663159f29SIan Morris if (!skb2) { 257adf30907SEric Dumazet IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 258a11d206dSYOSHIFUJI Hideaki IPSTATS_MIB_OUTDISCARDS); 2591da177e4SLinus Torvalds kfree_skb(skb); 2601da177e4SLinus Torvalds return -ENOBUFS; 2611da177e4SLinus Torvalds } 262bbd6528dSEric Dumazet if (skb->sk) 263bbd6528dSEric Dumazet skb_set_owner_w(skb2, skb->sk); 264808db80aSEric Dumazet consume_skb(skb); 265a11d206dSYOSHIFUJI Hideaki skb = skb2; 2661da177e4SLinus Torvalds } 26766033f47SStefano Brivio 26866033f47SStefano Brivio if (opt) { 26966033f47SStefano Brivio seg_len += opt->opt_nflen + opt->opt_flen; 27066033f47SStefano Brivio 2711da177e4SLinus Torvalds if (opt->opt_flen) 2721da177e4SLinus Torvalds ipv6_push_frag_opts(skb, opt, &proto); 27366033f47SStefano Brivio 2741da177e4SLinus Torvalds if (opt->opt_nflen) 275613fa3caSDavid Lebrun ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, 276613fa3caSDavid Lebrun &fl6->saddr); 2771da177e4SLinus Torvalds } 2781da177e4SLinus Torvalds 279e2d1bca7SArnaldo Carvalho de Melo skb_push(skb, sizeof(struct ipv6hdr)); 280e2d1bca7SArnaldo Carvalho de Melo skb_reset_network_header(skb); 2810660e03fSArnaldo Carvalho de Melo hdr = ipv6_hdr(skb); 2821da177e4SLinus Torvalds 2831da177e4SLinus Torvalds /* 2841da177e4SLinus Torvalds * Fill in the IPv6 header 2851da177e4SLinus Torvalds */ 286b903d324SEric Dumazet if (np) 2871da177e4SLinus Torvalds hlimit = np->hop_limit; 2881da177e4SLinus Torvalds if (hlimit < 0) 2896b75d090SYOSHIFUJI Hideaki hlimit = ip6_dst_hoplimit(dst); 2901da177e4SLinus Torvalds 291cb1ce2efSTom Herbert ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, 292513674b5SShaohua Li ip6_autoflowlabel(net, np), fl6)); 29341a1f8eaSYOSHIFUJI Hideaki 2941da177e4SLinus Torvalds hdr->payload_len = htons(seg_len); 2951da177e4SLinus Torvalds hdr->nexthdr = proto; 2961da177e4SLinus Torvalds hdr->hop_limit = hlimit; 2971da177e4SLinus Torvalds 2984e3fd7a0SAlexey Dobriyan hdr->saddr = fl6->saddr; 2994e3fd7a0SAlexey Dobriyan hdr->daddr = *first_hop; 3001da177e4SLinus Torvalds 3019c9c9ad5SHannes Frederic Sowa skb->protocol = htons(ETH_P_IPV6); 3024f6570d7SEric Dumazet skb->priority = priority; 30392e55f41SPablo Neira skb->mark = mark; 304a2c2064fSPatrick McHardy 3051da177e4SLinus Torvalds mtu = dst_mtu(dst); 30660ff7467SWANG Cong if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { 307adf30907SEric Dumazet IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), 308edf391ffSNeil Horman IPSTATS_MIB_OUT, skb->len); 309a8e3e1a9SDavid Ahern 310a8e3e1a9SDavid Ahern /* if egress device is enslaved to an L3 master device pass the 311a8e3e1a9SDavid Ahern * skb to its handler for processing 312a8e3e1a9SDavid Ahern */ 313a8e3e1a9SDavid Ahern skb = l3mdev_ip6_out((struct sock *)sk, skb); 314a8e3e1a9SDavid Ahern if (unlikely(!skb)) 315a8e3e1a9SDavid Ahern return 0; 316a8e3e1a9SDavid Ahern 3171c1e9d2bSEric Dumazet /* hooks should never assume socket lock is held. 3181c1e9d2bSEric Dumazet * we promote our socket to non const 3191c1e9d2bSEric Dumazet */ 32029a26a56SEric W. Biederman return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, 3211c1e9d2bSEric Dumazet net, (struct sock *)sk, skb, NULL, dst->dev, 32213206b6bSEric W. Biederman dst_output); 3231da177e4SLinus Torvalds } 3241da177e4SLinus Torvalds 3251da177e4SLinus Torvalds skb->dev = dst->dev; 3261c1e9d2bSEric Dumazet /* ipv6_local_error() does not require socket lock, 3271c1e9d2bSEric Dumazet * we promote our socket to non const 3281c1e9d2bSEric Dumazet */ 3291c1e9d2bSEric Dumazet ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); 3301c1e9d2bSEric Dumazet 331adf30907SEric Dumazet IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); 3321da177e4SLinus Torvalds kfree_skb(skb); 3331da177e4SLinus Torvalds return -EMSGSIZE; 3341da177e4SLinus Torvalds } 3357159039aSYOSHIFUJI Hideaki EXPORT_SYMBOL(ip6_xmit); 3367159039aSYOSHIFUJI Hideaki 3371da177e4SLinus Torvalds static int ip6_call_ra_chain(struct sk_buff *skb, int sel) 3381da177e4SLinus Torvalds { 3391da177e4SLinus Torvalds struct ip6_ra_chain *ra; 3401da177e4SLinus Torvalds struct sock *last = NULL; 3411da177e4SLinus Torvalds 3421da177e4SLinus Torvalds read_lock(&ip6_ra_lock); 3431da177e4SLinus Torvalds for (ra = ip6_ra_chain; ra; ra = ra->next) { 3441da177e4SLinus Torvalds struct sock *sk = ra->sk; 3450bd1b59bSAndrew McDonald if (sk && ra->sel == sel && 3460bd1b59bSAndrew McDonald (!sk->sk_bound_dev_if || 3470bd1b59bSAndrew McDonald sk->sk_bound_dev_if == skb->dev->ifindex)) { 3489036b2feSFrancesco Ruggeri struct ipv6_pinfo *np = inet6_sk(sk); 3499036b2feSFrancesco Ruggeri 3509036b2feSFrancesco Ruggeri if (np && np->rtalert_isolate && 3519036b2feSFrancesco Ruggeri !net_eq(sock_net(sk), dev_net(skb->dev))) { 3529036b2feSFrancesco Ruggeri continue; 3539036b2feSFrancesco Ruggeri } 3541da177e4SLinus Torvalds if (last) { 3551da177e4SLinus Torvalds struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 3561da177e4SLinus Torvalds if (skb2) 3571da177e4SLinus Torvalds rawv6_rcv(last, skb2); 3581da177e4SLinus Torvalds } 3591da177e4SLinus Torvalds last = sk; 3601da177e4SLinus Torvalds } 3611da177e4SLinus Torvalds } 3621da177e4SLinus Torvalds 3631da177e4SLinus Torvalds if (last) { 3641da177e4SLinus Torvalds rawv6_rcv(last, skb); 3651da177e4SLinus Torvalds read_unlock(&ip6_ra_lock); 3661da177e4SLinus Torvalds return 1; 3671da177e4SLinus Torvalds } 3681da177e4SLinus Torvalds read_unlock(&ip6_ra_lock); 3691da177e4SLinus Torvalds return 0; 3701da177e4SLinus Torvalds } 3711da177e4SLinus Torvalds 372e21e0b5fSVille Nuorvala static int ip6_forward_proxy_check(struct sk_buff *skb) 373e21e0b5fSVille Nuorvala { 3740660e03fSArnaldo Carvalho de Melo struct ipv6hdr *hdr = ipv6_hdr(skb); 375e21e0b5fSVille Nuorvala u8 nexthdr = hdr->nexthdr; 37675f2811cSJesse Gross __be16 frag_off; 377e21e0b5fSVille Nuorvala int offset; 378e21e0b5fSVille Nuorvala 379e21e0b5fSVille Nuorvala if (ipv6_ext_hdr(nexthdr)) { 38075f2811cSJesse Gross offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); 381e21e0b5fSVille Nuorvala if (offset < 0) 382e21e0b5fSVille Nuorvala return 0; 383e21e0b5fSVille Nuorvala } else 384e21e0b5fSVille Nuorvala offset = sizeof(struct ipv6hdr); 385e21e0b5fSVille Nuorvala 386e21e0b5fSVille Nuorvala if (nexthdr == IPPROTO_ICMPV6) { 387e21e0b5fSVille Nuorvala struct icmp6hdr *icmp6; 388e21e0b5fSVille Nuorvala 389d56f90a7SArnaldo Carvalho de Melo if (!pskb_may_pull(skb, (skb_network_header(skb) + 390d56f90a7SArnaldo Carvalho de Melo offset + 1 - skb->data))) 391e21e0b5fSVille Nuorvala return 0; 392e21e0b5fSVille Nuorvala 393d56f90a7SArnaldo Carvalho de Melo icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); 394e21e0b5fSVille Nuorvala 395e21e0b5fSVille Nuorvala switch (icmp6->icmp6_type) { 396e21e0b5fSVille Nuorvala case NDISC_ROUTER_SOLICITATION: 397e21e0b5fSVille Nuorvala case NDISC_ROUTER_ADVERTISEMENT: 398e21e0b5fSVille Nuorvala case NDISC_NEIGHBOUR_SOLICITATION: 399e21e0b5fSVille Nuorvala case NDISC_NEIGHBOUR_ADVERTISEMENT: 400e21e0b5fSVille Nuorvala case NDISC_REDIRECT: 401e21e0b5fSVille Nuorvala /* For reaction involving unicast neighbor discovery 402e21e0b5fSVille Nuorvala * message destined to the proxied address, pass it to 403e21e0b5fSVille Nuorvala * input function. 404e21e0b5fSVille Nuorvala */ 405e21e0b5fSVille Nuorvala return 1; 406e21e0b5fSVille Nuorvala default: 407e21e0b5fSVille Nuorvala break; 408e21e0b5fSVille Nuorvala } 409e21e0b5fSVille Nuorvala } 410e21e0b5fSVille Nuorvala 41174553b09SVille Nuorvala /* 41274553b09SVille Nuorvala * The proxying router can't forward traffic sent to a link-local 41374553b09SVille Nuorvala * address, so signal the sender and discard the packet. This 41474553b09SVille Nuorvala * behavior is clarified by the MIPv6 specification. 41574553b09SVille Nuorvala */ 41674553b09SVille Nuorvala if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { 41774553b09SVille Nuorvala dst_link_failure(skb); 41874553b09SVille Nuorvala return -1; 41974553b09SVille Nuorvala } 42074553b09SVille Nuorvala 421e21e0b5fSVille Nuorvala return 0; 422e21e0b5fSVille Nuorvala } 423e21e0b5fSVille Nuorvala 4240c4b51f0SEric W. Biederman static inline int ip6_forward_finish(struct net *net, struct sock *sk, 4250c4b51f0SEric W. Biederman struct sk_buff *skb) 4261da177e4SLinus Torvalds { 42771a1c915SJeff Barnhill struct dst_entry *dst = skb_dst(skb); 42871a1c915SJeff Barnhill 42971a1c915SJeff Barnhill __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 43071a1c915SJeff Barnhill __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); 43171a1c915SJeff Barnhill 432f839a6c9SIdo Schimmel #ifdef CONFIG_NET_SWITCHDEV 433f839a6c9SIdo Schimmel if (skb->offload_l3_fwd_mark) { 434f839a6c9SIdo Schimmel consume_skb(skb); 435f839a6c9SIdo Schimmel return 0; 436f839a6c9SIdo Schimmel } 437f839a6c9SIdo Schimmel #endif 438f839a6c9SIdo Schimmel 4398203e2d8SEric Dumazet skb->tstamp = 0; 44013206b6bSEric W. Biederman return dst_output(net, sk, skb); 4411da177e4SLinus Torvalds } 4421da177e4SLinus Torvalds 443fe6cc55fSFlorian Westphal static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) 444fe6cc55fSFlorian Westphal { 445418a3156SFlorian Westphal if (skb->len <= mtu) 446fe6cc55fSFlorian Westphal return false; 447fe6cc55fSFlorian Westphal 44860ff7467SWANG Cong /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ 449fe6cc55fSFlorian Westphal if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) 450fe6cc55fSFlorian Westphal return true; 451fe6cc55fSFlorian Westphal 45260ff7467SWANG Cong if (skb->ignore_df) 453418a3156SFlorian Westphal return false; 454418a3156SFlorian Westphal 455779b7931SDaniel Axtens if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) 456fe6cc55fSFlorian Westphal return false; 457fe6cc55fSFlorian Westphal 458fe6cc55fSFlorian Westphal return true; 459fe6cc55fSFlorian Westphal } 460fe6cc55fSFlorian Westphal 4611da177e4SLinus Torvalds int ip6_forward(struct sk_buff *skb) 4621da177e4SLinus Torvalds { 463bdb7cc64SStephen Suryaputra struct inet6_dev *idev = __in6_dev_get_safely(skb->dev); 464adf30907SEric Dumazet struct dst_entry *dst = skb_dst(skb); 4650660e03fSArnaldo Carvalho de Melo struct ipv6hdr *hdr = ipv6_hdr(skb); 4661da177e4SLinus Torvalds struct inet6_skb_parm *opt = IP6CB(skb); 467c346dca1SYOSHIFUJI Hideaki struct net *net = dev_net(dst->dev); 46814f3ad6fSUlrich Weber u32 mtu; 4691da177e4SLinus Torvalds 47053b7997fSYOSHIFUJI Hideaki if (net->ipv6.devconf_all->forwarding == 0) 4711da177e4SLinus Torvalds goto error; 4721da177e4SLinus Torvalds 473090f1166SLi RongQing if (skb->pkt_type != PACKET_HOST) 474090f1166SLi RongQing goto drop; 475090f1166SLi RongQing 4769ef2e965SHannes Frederic Sowa if (unlikely(skb->sk)) 4779ef2e965SHannes Frederic Sowa goto drop; 4789ef2e965SHannes Frederic Sowa 4794497b076SBen Hutchings if (skb_warn_if_lro(skb)) 4804497b076SBen Hutchings goto drop; 4814497b076SBen Hutchings 4821da177e4SLinus Torvalds if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { 483bdb7cc64SStephen Suryaputra __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 4841da177e4SLinus Torvalds goto drop; 4851da177e4SLinus Torvalds } 4861da177e4SLinus Torvalds 48735fc92a9SHerbert Xu skb_forward_csum(skb); 4881da177e4SLinus Torvalds 4891da177e4SLinus Torvalds /* 4901da177e4SLinus Torvalds * We DO NOT make any processing on 4911da177e4SLinus Torvalds * RA packets, pushing them to user level AS IS 4921da177e4SLinus Torvalds * without ane WARRANTY that application will be able 4931da177e4SLinus Torvalds * to interpret them. The reason is that we 4941da177e4SLinus Torvalds * cannot make anything clever here. 4951da177e4SLinus Torvalds * 4961da177e4SLinus Torvalds * We are not end-node, so that if packet contains 4971da177e4SLinus Torvalds * AH/ESP, we cannot make anything. 4981da177e4SLinus Torvalds * Defragmentation also would be mistake, RA packets 4991da177e4SLinus Torvalds * cannot be fragmented, because there is no warranty 5001da177e4SLinus Torvalds * that different fragments will go along one path. --ANK 5011da177e4SLinus Torvalds */ 502ab4eb353SYOSHIFUJI Hideaki / 吉藤英明 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { 503ab4eb353SYOSHIFUJI Hideaki / 吉藤英明 if (ip6_call_ra_chain(skb, ntohs(opt->ra))) 5041da177e4SLinus Torvalds return 0; 5051da177e4SLinus Torvalds } 5061da177e4SLinus Torvalds 5071da177e4SLinus Torvalds /* 5081da177e4SLinus Torvalds * check and decrement ttl 5091da177e4SLinus Torvalds */ 5101da177e4SLinus Torvalds if (hdr->hop_limit <= 1) { 5113ffe533cSAlexey Dobriyan icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); 512bdb7cc64SStephen Suryaputra __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); 5131da177e4SLinus Torvalds 5141da177e4SLinus Torvalds kfree_skb(skb); 5151da177e4SLinus Torvalds return -ETIMEDOUT; 5161da177e4SLinus Torvalds } 5171da177e4SLinus Torvalds 518fbea49e1SYOSHIFUJI Hideaki /* XXX: idev->cnf.proxy_ndp? */ 51953b7997fSYOSHIFUJI Hideaki if (net->ipv6.devconf_all->proxy_ndp && 5208a3edd80SDaniel Lezcano pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { 52174553b09SVille Nuorvala int proxied = ip6_forward_proxy_check(skb); 52274553b09SVille Nuorvala if (proxied > 0) 523e21e0b5fSVille Nuorvala return ip6_input(skb); 52474553b09SVille Nuorvala else if (proxied < 0) { 525bdb7cc64SStephen Suryaputra __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 52674553b09SVille Nuorvala goto drop; 52774553b09SVille Nuorvala } 528e21e0b5fSVille Nuorvala } 529e21e0b5fSVille Nuorvala 5301da177e4SLinus Torvalds if (!xfrm6_route_forward(skb)) { 531bdb7cc64SStephen Suryaputra __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); 5321da177e4SLinus Torvalds goto drop; 5331da177e4SLinus Torvalds } 534adf30907SEric Dumazet dst = skb_dst(skb); 5351da177e4SLinus Torvalds 5361da177e4SLinus Torvalds /* IPv6 specs say nothing about it, but it is clear that we cannot 5371da177e4SLinus Torvalds send redirects to source routed frames. 5381e5dc146SMasahide NAKAMURA We don't send redirects to frames decapsulated from IPsec. 5391da177e4SLinus Torvalds */ 5402f17becfSStephen Suryaputra if (IP6CB(skb)->iif == dst->dev->ifindex && 5412f17becfSStephen Suryaputra opt->srcrt == 0 && !skb_sec_path(skb)) { 5421da177e4SLinus Torvalds struct in6_addr *target = NULL; 543fbfe95a4SDavid S. Miller struct inet_peer *peer; 5441da177e4SLinus Torvalds struct rt6_info *rt; 5451da177e4SLinus Torvalds 5461da177e4SLinus Torvalds /* 5471da177e4SLinus Torvalds * incoming and outgoing devices are the same 5481da177e4SLinus Torvalds * send a redirect. 5491da177e4SLinus Torvalds */ 5501da177e4SLinus Torvalds 5511da177e4SLinus Torvalds rt = (struct rt6_info *) dst; 552c45a3dfbSDavid S. Miller if (rt->rt6i_flags & RTF_GATEWAY) 553c45a3dfbSDavid S. Miller target = &rt->rt6i_gateway; 5541da177e4SLinus Torvalds else 5551da177e4SLinus Torvalds target = &hdr->daddr; 5561da177e4SLinus Torvalds 557fd0273d7SMartin KaFai Lau peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1); 55892d86829SDavid S. Miller 5591da177e4SLinus Torvalds /* Limit redirects both by destination (here) 5601da177e4SLinus Torvalds and by source (inside ndisc_send_redirect) 5611da177e4SLinus Torvalds */ 562fbfe95a4SDavid S. Miller if (inet_peer_xrlim_allow(peer, 1*HZ)) 5634991969aSDavid S. Miller ndisc_send_redirect(skb, target); 5641d861aa4SDavid S. Miller if (peer) 5651d861aa4SDavid S. Miller inet_putpeer(peer); 5665bb1ab09SDavid L Stevens } else { 5675bb1ab09SDavid L Stevens int addrtype = ipv6_addr_type(&hdr->saddr); 5685bb1ab09SDavid L Stevens 5691da177e4SLinus Torvalds /* This check is security critical. */ 570f81b2e7dSYOSHIFUJI Hideaki if (addrtype == IPV6_ADDR_ANY || 571f81b2e7dSYOSHIFUJI Hideaki addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) 5721da177e4SLinus Torvalds goto error; 5735bb1ab09SDavid L Stevens if (addrtype & IPV6_ADDR_LINKLOCAL) { 5745bb1ab09SDavid L Stevens icmpv6_send(skb, ICMPV6_DEST_UNREACH, 5753ffe533cSAlexey Dobriyan ICMPV6_NOT_NEIGHBOUR, 0); 5765bb1ab09SDavid L Stevens goto error; 5775bb1ab09SDavid L Stevens } 5781da177e4SLinus Torvalds } 5791da177e4SLinus Torvalds 5800954cf9cSHannes Frederic Sowa mtu = ip6_dst_mtu_forward(dst); 58114f3ad6fSUlrich Weber if (mtu < IPV6_MIN_MTU) 58214f3ad6fSUlrich Weber mtu = IPV6_MIN_MTU; 58314f3ad6fSUlrich Weber 584fe6cc55fSFlorian Westphal if (ip6_pkt_too_big(skb, mtu)) { 5851da177e4SLinus Torvalds /* Again, force OUTPUT device used as source address */ 5861da177e4SLinus Torvalds skb->dev = dst->dev; 58714f3ad6fSUlrich Weber icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 588bdb7cc64SStephen Suryaputra __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS); 5891d015503SEric Dumazet __IP6_INC_STATS(net, ip6_dst_idev(dst), 59015c77d8bSEric Dumazet IPSTATS_MIB_FRAGFAILS); 5911da177e4SLinus Torvalds kfree_skb(skb); 5921da177e4SLinus Torvalds return -EMSGSIZE; 5931da177e4SLinus Torvalds } 5941da177e4SLinus Torvalds 5951da177e4SLinus Torvalds if (skb_cow(skb, dst->dev->hard_header_len)) { 5961d015503SEric Dumazet __IP6_INC_STATS(net, ip6_dst_idev(dst), 59715c77d8bSEric Dumazet IPSTATS_MIB_OUTDISCARDS); 5981da177e4SLinus Torvalds goto drop; 5991da177e4SLinus Torvalds } 6001da177e4SLinus Torvalds 6010660e03fSArnaldo Carvalho de Melo hdr = ipv6_hdr(skb); 6021da177e4SLinus Torvalds 6031da177e4SLinus Torvalds /* Mangling hops number delayed to point after skb COW */ 6041da177e4SLinus Torvalds 6051da177e4SLinus Torvalds hdr->hop_limit--; 6061da177e4SLinus Torvalds 60729a26a56SEric W. Biederman return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, 60829a26a56SEric W. Biederman net, NULL, skb, skb->dev, dst->dev, 6096e23ae2aSPatrick McHardy ip6_forward_finish); 6101da177e4SLinus Torvalds 6111da177e4SLinus Torvalds error: 612bdb7cc64SStephen Suryaputra __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); 6131da177e4SLinus Torvalds drop: 6141da177e4SLinus Torvalds kfree_skb(skb); 6151da177e4SLinus Torvalds return -EINVAL; 6161da177e4SLinus Torvalds } 6171da177e4SLinus Torvalds 6181da177e4SLinus Torvalds static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) 6191da177e4SLinus Torvalds { 6201da177e4SLinus Torvalds to->pkt_type = from->pkt_type; 6211da177e4SLinus Torvalds to->priority = from->priority; 6221da177e4SLinus Torvalds to->protocol = from->protocol; 623adf30907SEric Dumazet skb_dst_drop(to); 624adf30907SEric Dumazet skb_dst_set(to, dst_clone(skb_dst(from))); 6251da177e4SLinus Torvalds to->dev = from->dev; 62682e91ffeSThomas Graf to->mark = from->mark; 6271da177e4SLinus Torvalds 6283dd1c9a1SPaolo Abeni skb_copy_hash(to, from); 6293dd1c9a1SPaolo Abeni 6301da177e4SLinus Torvalds #ifdef CONFIG_NET_SCHED 6311da177e4SLinus Torvalds to->tc_index = from->tc_index; 6321da177e4SLinus Torvalds #endif 633e7ac05f3SYasuyuki Kozakai nf_copy(to, from); 634df5042f4SFlorian Westphal skb_ext_copy(to, from); 635984bc16cSJames Morris skb_copy_secmark(to, from); 6361da177e4SLinus Torvalds } 6371da177e4SLinus Torvalds 6380feca619SPablo Neira Ayuso int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr, 6390feca619SPablo Neira Ayuso u8 nexthdr, __be32 frag_id, 6400feca619SPablo Neira Ayuso struct ip6_fraglist_iter *iter) 6410feca619SPablo Neira Ayuso { 6420feca619SPablo Neira Ayuso unsigned int first_len; 6430feca619SPablo Neira Ayuso struct frag_hdr *fh; 6440feca619SPablo Neira Ayuso 6450feca619SPablo Neira Ayuso /* BUILD HEADER */ 6460feca619SPablo Neira Ayuso *prevhdr = NEXTHDR_FRAGMENT; 6470feca619SPablo Neira Ayuso iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); 6480feca619SPablo Neira Ayuso if (!iter->tmp_hdr) 6490feca619SPablo Neira Ayuso return -ENOMEM; 6500feca619SPablo Neira Ayuso 651b7034146SEric Dumazet iter->frag = skb_shinfo(skb)->frag_list; 6520feca619SPablo Neira Ayuso skb_frag_list_init(skb); 6530feca619SPablo Neira Ayuso 6540feca619SPablo Neira Ayuso iter->offset = 0; 6550feca619SPablo Neira Ayuso iter->hlen = hlen; 6560feca619SPablo Neira Ayuso iter->frag_id = frag_id; 6570feca619SPablo Neira Ayuso iter->nexthdr = nexthdr; 6580feca619SPablo Neira Ayuso 6590feca619SPablo Neira Ayuso __skb_pull(skb, hlen); 6600feca619SPablo Neira Ayuso fh = __skb_push(skb, sizeof(struct frag_hdr)); 6610feca619SPablo Neira Ayuso __skb_push(skb, hlen); 6620feca619SPablo Neira Ayuso skb_reset_network_header(skb); 6630feca619SPablo Neira Ayuso memcpy(skb_network_header(skb), iter->tmp_hdr, hlen); 6640feca619SPablo Neira Ayuso 6650feca619SPablo Neira Ayuso fh->nexthdr = nexthdr; 6660feca619SPablo Neira Ayuso fh->reserved = 0; 6670feca619SPablo Neira Ayuso fh->frag_off = htons(IP6_MF); 6680feca619SPablo Neira Ayuso fh->identification = frag_id; 6690feca619SPablo Neira Ayuso 6700feca619SPablo Neira Ayuso first_len = skb_pagelen(skb); 6710feca619SPablo Neira Ayuso skb->data_len = first_len - skb_headlen(skb); 6720feca619SPablo Neira Ayuso skb->len = first_len; 6730feca619SPablo Neira Ayuso ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); 6740feca619SPablo Neira Ayuso 6750feca619SPablo Neira Ayuso return 0; 6760feca619SPablo Neira Ayuso } 6770feca619SPablo Neira Ayuso EXPORT_SYMBOL(ip6_fraglist_init); 6780feca619SPablo Neira Ayuso 6790feca619SPablo Neira Ayuso void ip6_fraglist_prepare(struct sk_buff *skb, 6800feca619SPablo Neira Ayuso struct ip6_fraglist_iter *iter) 6810feca619SPablo Neira Ayuso { 6820feca619SPablo Neira Ayuso struct sk_buff *frag = iter->frag; 6830feca619SPablo Neira Ayuso unsigned int hlen = iter->hlen; 6840feca619SPablo Neira Ayuso struct frag_hdr *fh; 6850feca619SPablo Neira Ayuso 6860feca619SPablo Neira Ayuso frag->ip_summed = CHECKSUM_NONE; 6870feca619SPablo Neira Ayuso skb_reset_transport_header(frag); 6880feca619SPablo Neira Ayuso fh = __skb_push(frag, sizeof(struct frag_hdr)); 6890feca619SPablo Neira Ayuso __skb_push(frag, hlen); 6900feca619SPablo Neira Ayuso skb_reset_network_header(frag); 6910feca619SPablo Neira Ayuso memcpy(skb_network_header(frag), iter->tmp_hdr, hlen); 6920feca619SPablo Neira Ayuso iter->offset += skb->len - hlen - sizeof(struct frag_hdr); 6930feca619SPablo Neira Ayuso fh->nexthdr = iter->nexthdr; 6940feca619SPablo Neira Ayuso fh->reserved = 0; 6950feca619SPablo Neira Ayuso fh->frag_off = htons(iter->offset); 6960feca619SPablo Neira Ayuso if (frag->next) 6970feca619SPablo Neira Ayuso fh->frag_off |= htons(IP6_MF); 6980feca619SPablo Neira Ayuso fh->identification = iter->frag_id; 6990feca619SPablo Neira Ayuso ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 7000feca619SPablo Neira Ayuso ip6_copy_metadata(frag, skb); 7010feca619SPablo Neira Ayuso } 7020feca619SPablo Neira Ayuso EXPORT_SYMBOL(ip6_fraglist_prepare); 7030feca619SPablo Neira Ayuso 7048a6a1f17SPablo Neira Ayuso void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu, 7058a6a1f17SPablo Neira Ayuso unsigned short needed_tailroom, int hdr_room, u8 *prevhdr, 7068a6a1f17SPablo Neira Ayuso u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state) 7078a6a1f17SPablo Neira Ayuso { 7088a6a1f17SPablo Neira Ayuso state->prevhdr = prevhdr; 7098a6a1f17SPablo Neira Ayuso state->nexthdr = nexthdr; 7108a6a1f17SPablo Neira Ayuso state->frag_id = frag_id; 7118a6a1f17SPablo Neira Ayuso 7128a6a1f17SPablo Neira Ayuso state->hlen = hlen; 7138a6a1f17SPablo Neira Ayuso state->mtu = mtu; 7148a6a1f17SPablo Neira Ayuso 7158a6a1f17SPablo Neira Ayuso state->left = skb->len - hlen; /* Space per frame */ 7168a6a1f17SPablo Neira Ayuso state->ptr = hlen; /* Where to start from */ 7178a6a1f17SPablo Neira Ayuso 7188a6a1f17SPablo Neira Ayuso state->hroom = hdr_room; 7198a6a1f17SPablo Neira Ayuso state->troom = needed_tailroom; 7208a6a1f17SPablo Neira Ayuso 7218a6a1f17SPablo Neira Ayuso state->offset = 0; 7228a6a1f17SPablo Neira Ayuso } 7238a6a1f17SPablo Neira Ayuso EXPORT_SYMBOL(ip6_frag_init); 7248a6a1f17SPablo Neira Ayuso 7258a6a1f17SPablo Neira Ayuso struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state) 7268a6a1f17SPablo Neira Ayuso { 7278a6a1f17SPablo Neira Ayuso u8 *prevhdr = state->prevhdr, *fragnexthdr_offset; 7288a6a1f17SPablo Neira Ayuso struct sk_buff *frag; 7298a6a1f17SPablo Neira Ayuso struct frag_hdr *fh; 7308a6a1f17SPablo Neira Ayuso unsigned int len; 7318a6a1f17SPablo Neira Ayuso 7328a6a1f17SPablo Neira Ayuso len = state->left; 7338a6a1f17SPablo Neira Ayuso /* IF: it doesn't fit, use 'mtu' - the data space left */ 7348a6a1f17SPablo Neira Ayuso if (len > state->mtu) 7358a6a1f17SPablo Neira Ayuso len = state->mtu; 7368a6a1f17SPablo Neira Ayuso /* IF: we are not sending up to and including the packet end 7378a6a1f17SPablo Neira Ayuso then align the next start on an eight byte boundary */ 7388a6a1f17SPablo Neira Ayuso if (len < state->left) 7398a6a1f17SPablo Neira Ayuso len &= ~7; 7408a6a1f17SPablo Neira Ayuso 7418a6a1f17SPablo Neira Ayuso /* Allocate buffer */ 7428a6a1f17SPablo Neira Ayuso frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) + 7438a6a1f17SPablo Neira Ayuso state->hroom + state->troom, GFP_ATOMIC); 7448a6a1f17SPablo Neira Ayuso if (!frag) 7458a6a1f17SPablo Neira Ayuso return ERR_PTR(-ENOMEM); 7468a6a1f17SPablo Neira Ayuso 7478a6a1f17SPablo Neira Ayuso /* 7488a6a1f17SPablo Neira Ayuso * Set up data on packet 7498a6a1f17SPablo Neira Ayuso */ 7508a6a1f17SPablo Neira Ayuso 7518a6a1f17SPablo Neira Ayuso ip6_copy_metadata(frag, skb); 7528a6a1f17SPablo Neira Ayuso skb_reserve(frag, state->hroom); 7538a6a1f17SPablo Neira Ayuso skb_put(frag, len + state->hlen + sizeof(struct frag_hdr)); 7548a6a1f17SPablo Neira Ayuso skb_reset_network_header(frag); 7558a6a1f17SPablo Neira Ayuso fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen); 7568a6a1f17SPablo Neira Ayuso frag->transport_header = (frag->network_header + state->hlen + 7578a6a1f17SPablo Neira Ayuso sizeof(struct frag_hdr)); 7588a6a1f17SPablo Neira Ayuso 7598a6a1f17SPablo Neira Ayuso /* 7608a6a1f17SPablo Neira Ayuso * Charge the memory for the fragment to any owner 7618a6a1f17SPablo Neira Ayuso * it might possess 7628a6a1f17SPablo Neira Ayuso */ 7638a6a1f17SPablo Neira Ayuso if (skb->sk) 7648a6a1f17SPablo Neira Ayuso skb_set_owner_w(frag, skb->sk); 7658a6a1f17SPablo Neira Ayuso 7668a6a1f17SPablo Neira Ayuso /* 7678a6a1f17SPablo Neira Ayuso * Copy the packet header into the new buffer. 7688a6a1f17SPablo Neira Ayuso */ 7698a6a1f17SPablo Neira Ayuso skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen); 7708a6a1f17SPablo Neira Ayuso 7718a6a1f17SPablo Neira Ayuso fragnexthdr_offset = skb_network_header(frag); 7728a6a1f17SPablo Neira Ayuso fragnexthdr_offset += prevhdr - skb_network_header(skb); 7738a6a1f17SPablo Neira Ayuso *fragnexthdr_offset = NEXTHDR_FRAGMENT; 7748a6a1f17SPablo Neira Ayuso 7758a6a1f17SPablo Neira Ayuso /* 7768a6a1f17SPablo Neira Ayuso * Build fragment header. 7778a6a1f17SPablo Neira Ayuso */ 7788a6a1f17SPablo Neira Ayuso fh->nexthdr = state->nexthdr; 7798a6a1f17SPablo Neira Ayuso fh->reserved = 0; 7808a6a1f17SPablo Neira Ayuso fh->identification = state->frag_id; 7818a6a1f17SPablo Neira Ayuso 7828a6a1f17SPablo Neira Ayuso /* 7838a6a1f17SPablo Neira Ayuso * Copy a block of the IP datagram. 7848a6a1f17SPablo Neira Ayuso */ 7858a6a1f17SPablo Neira Ayuso BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag), 7868a6a1f17SPablo Neira Ayuso len)); 7878a6a1f17SPablo Neira Ayuso state->left -= len; 7888a6a1f17SPablo Neira Ayuso 7898a6a1f17SPablo Neira Ayuso fh->frag_off = htons(state->offset); 7908a6a1f17SPablo Neira Ayuso if (state->left > 0) 7918a6a1f17SPablo Neira Ayuso fh->frag_off |= htons(IP6_MF); 7928a6a1f17SPablo Neira Ayuso ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); 7938a6a1f17SPablo Neira Ayuso 7948a6a1f17SPablo Neira Ayuso state->ptr += len; 7958a6a1f17SPablo Neira Ayuso state->offset += len; 7968a6a1f17SPablo Neira Ayuso 7978a6a1f17SPablo Neira Ayuso return frag; 7988a6a1f17SPablo Neira Ayuso } 7998a6a1f17SPablo Neira Ayuso EXPORT_SYMBOL(ip6_frag_next); 8008a6a1f17SPablo Neira Ayuso 8017d8c6e39SEric W. Biederman int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 8027d8c6e39SEric W. Biederman int (*output)(struct net *, struct sock *, struct sk_buff *)) 8031da177e4SLinus Torvalds { 8041da177e4SLinus Torvalds struct sk_buff *frag; 805adf30907SEric Dumazet struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 806f60e5990Shannes@stressinduktion.org struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? 807f60e5990Shannes@stressinduktion.org inet6_sk(skb->sk) : NULL; 8088a6a1f17SPablo Neira Ayuso struct ip6_frag_state state; 8098a6a1f17SPablo Neira Ayuso unsigned int mtu, hlen, nexthdr_offset; 8109669fffcSEric Dumazet ktime_t tstamp = skb->tstamp; 8118a6a1f17SPablo Neira Ayuso int hroom, err = 0; 812286c2349SMartin KaFai Lau __be32 frag_id; 8131da177e4SLinus Torvalds u8 *prevhdr, nexthdr = 0; 8141da177e4SLinus Torvalds 8157dd7eb95SDavid S. Miller err = ip6_find_1stfragopt(skb, &prevhdr); 8167dd7eb95SDavid S. Miller if (err < 0) 8172423496aSCraig Gallek goto fail; 8187dd7eb95SDavid S. Miller hlen = err; 8191da177e4SLinus Torvalds nexthdr = *prevhdr; 820ef0efcd3SJunwei Hu nexthdr_offset = prevhdr - skb_network_header(skb); 8211da177e4SLinus Torvalds 822628a5c56SJohn Heffner mtu = ip6_skb_dst_mtu(skb); 823b881ef76SJohn Heffner 824b881ef76SJohn Heffner /* We must not fragment if the socket is set to force MTU discovery 82514f3ad6fSUlrich Weber * or if the skb it not generated by a local socket. 826b881ef76SJohn Heffner */ 827485fca66SFlorian Westphal if (unlikely(!skb->ignore_df && skb->len > mtu)) 828485fca66SFlorian Westphal goto fail_toobig; 829a34a101eSEric Dumazet 830485fca66SFlorian Westphal if (IP6CB(skb)->frag_max_size) { 831485fca66SFlorian Westphal if (IP6CB(skb)->frag_max_size > mtu) 832485fca66SFlorian Westphal goto fail_toobig; 833485fca66SFlorian Westphal 834485fca66SFlorian Westphal /* don't send fragments larger than what we received */ 835485fca66SFlorian Westphal mtu = IP6CB(skb)->frag_max_size; 836485fca66SFlorian Westphal if (mtu < IPV6_MIN_MTU) 837485fca66SFlorian Westphal mtu = IPV6_MIN_MTU; 838b881ef76SJohn Heffner } 839b881ef76SJohn Heffner 840d91675f9SYOSHIFUJI Hideaki if (np && np->frag_size < mtu) { 841d91675f9SYOSHIFUJI Hideaki if (np->frag_size) 842d91675f9SYOSHIFUJI Hideaki mtu = np->frag_size; 843d91675f9SYOSHIFUJI Hideaki } 84489bc7848SHannes Frederic Sowa if (mtu < hlen + sizeof(struct frag_hdr) + 8) 845b72a2b01SHannes Frederic Sowa goto fail_toobig; 8461e0d69a9SHannes Frederic Sowa mtu -= hlen + sizeof(struct frag_hdr); 8471da177e4SLinus Torvalds 848fd0273d7SMartin KaFai Lau frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, 849fd0273d7SMartin KaFai Lau &ipv6_hdr(skb)->saddr); 850286c2349SMartin KaFai Lau 851405c92f7SHannes Frederic Sowa if (skb->ip_summed == CHECKSUM_PARTIAL && 852405c92f7SHannes Frederic Sowa (err = skb_checksum_help(skb))) 853405c92f7SHannes Frederic Sowa goto fail; 854405c92f7SHannes Frederic Sowa 855ef0efcd3SJunwei Hu prevhdr = skb_network_header(skb) + nexthdr_offset; 8561d325d21SFlorian Westphal hroom = LL_RESERVED_SPACE(rt->dst.dev); 85721dc3301SDavid S. Miller if (skb_has_frag_list(skb)) { 858c72d8cdaSAlexey Dobriyan unsigned int first_len = skb_pagelen(skb); 8590feca619SPablo Neira Ayuso struct ip6_fraglist_iter iter; 8603d13008eSEric Dumazet struct sk_buff *frag2; 8611da177e4SLinus Torvalds 8621da177e4SLinus Torvalds if (first_len - hlen > mtu || 8631da177e4SLinus Torvalds ((first_len - hlen) & 7) || 8641d325d21SFlorian Westphal skb_cloned(skb) || 8651d325d21SFlorian Westphal skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) 8661da177e4SLinus Torvalds goto slow_path; 8671da177e4SLinus Torvalds 8684d9092bbSDavid S. Miller skb_walk_frags(skb, frag) { 8691da177e4SLinus Torvalds /* Correct geometry. */ 8701da177e4SLinus Torvalds if (frag->len > mtu || 8711da177e4SLinus Torvalds ((frag->len & 7) && frag->next) || 8721d325d21SFlorian Westphal skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr))) 8733d13008eSEric Dumazet goto slow_path_clean; 8741da177e4SLinus Torvalds 8751da177e4SLinus Torvalds /* Partially cloned skb? */ 8761da177e4SLinus Torvalds if (skb_shared(frag)) 8773d13008eSEric Dumazet goto slow_path_clean; 8782fdba6b0SHerbert Xu 8792fdba6b0SHerbert Xu BUG_ON(frag->sk); 8802fdba6b0SHerbert Xu if (skb->sk) { 8812fdba6b0SHerbert Xu frag->sk = skb->sk; 8822fdba6b0SHerbert Xu frag->destructor = sock_wfree; 8832fdba6b0SHerbert Xu } 8843d13008eSEric Dumazet skb->truesize -= frag->truesize; 8851da177e4SLinus Torvalds } 8861da177e4SLinus Torvalds 8870feca619SPablo Neira Ayuso err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id, 8880feca619SPablo Neira Ayuso &iter); 8890feca619SPablo Neira Ayuso if (err < 0) 8901d325d21SFlorian Westphal goto fail; 8911da177e4SLinus Torvalds 8921da177e4SLinus Torvalds for (;;) { 8931da177e4SLinus Torvalds /* Prepare header of the next frame, 8941da177e4SLinus Torvalds * before previous one went down. */ 8950feca619SPablo Neira Ayuso if (iter.frag) 8960feca619SPablo Neira Ayuso ip6_fraglist_prepare(skb, &iter); 8971da177e4SLinus Torvalds 8989669fffcSEric Dumazet skb->tstamp = tstamp; 8997d8c6e39SEric W. Biederman err = output(net, sk, skb); 900dafee490SWei Dong if (!err) 901d8d1f30bSChangli Gao IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 9023bd653c8SDenis V. Lunev IPSTATS_MIB_FRAGCREATES); 903dafee490SWei Dong 9040feca619SPablo Neira Ayuso if (err || !iter.frag) 9051da177e4SLinus Torvalds break; 9061da177e4SLinus Torvalds 9070feca619SPablo Neira Ayuso skb = ip6_fraglist_next(&iter); 9081da177e4SLinus Torvalds } 9091da177e4SLinus Torvalds 9100feca619SPablo Neira Ayuso kfree(iter.tmp_hdr); 9111da177e4SLinus Torvalds 9121da177e4SLinus Torvalds if (err == 0) { 913d8d1f30bSChangli Gao IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 9143bd653c8SDenis V. Lunev IPSTATS_MIB_FRAGOKS); 9151da177e4SLinus Torvalds return 0; 9161da177e4SLinus Torvalds } 9171da177e4SLinus Torvalds 918b7034146SEric Dumazet kfree_skb_list(iter.frag); 9191da177e4SLinus Torvalds 920d8d1f30bSChangli Gao IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 9213bd653c8SDenis V. Lunev IPSTATS_MIB_FRAGFAILS); 9221da177e4SLinus Torvalds return err; 9233d13008eSEric Dumazet 9243d13008eSEric Dumazet slow_path_clean: 9253d13008eSEric Dumazet skb_walk_frags(skb, frag2) { 9263d13008eSEric Dumazet if (frag2 == frag) 9273d13008eSEric Dumazet break; 9283d13008eSEric Dumazet frag2->sk = NULL; 9293d13008eSEric Dumazet frag2->destructor = NULL; 9303d13008eSEric Dumazet skb->truesize += frag2->truesize; 9313d13008eSEric Dumazet } 9321da177e4SLinus Torvalds } 9331da177e4SLinus Torvalds 9341da177e4SLinus Torvalds slow_path: 9351da177e4SLinus Torvalds /* 9361da177e4SLinus Torvalds * Fragment the datagram. 9371da177e4SLinus Torvalds */ 9381da177e4SLinus Torvalds 9398a6a1f17SPablo Neira Ayuso ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom, 9408a6a1f17SPablo Neira Ayuso LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id, 9418a6a1f17SPablo Neira Ayuso &state); 9421da177e4SLinus Torvalds 9431da177e4SLinus Torvalds /* 9441da177e4SLinus Torvalds * Keep copying data until we run out. 9451da177e4SLinus Torvalds */ 94679e49503SFlorian Westphal 9478a6a1f17SPablo Neira Ayuso while (state.left > 0) { 9488a6a1f17SPablo Neira Ayuso frag = ip6_frag_next(skb, &state); 9498a6a1f17SPablo Neira Ayuso if (IS_ERR(frag)) { 9508a6a1f17SPablo Neira Ayuso err = PTR_ERR(frag); 9511da177e4SLinus Torvalds goto fail; 9521da177e4SLinus Torvalds } 9531da177e4SLinus Torvalds 9541da177e4SLinus Torvalds /* 9551da177e4SLinus Torvalds * Put this fragment into the sending queue. 9561da177e4SLinus Torvalds */ 9579669fffcSEric Dumazet frag->tstamp = tstamp; 9587d8c6e39SEric W. Biederman err = output(net, sk, frag); 9591da177e4SLinus Torvalds if (err) 9601da177e4SLinus Torvalds goto fail; 961dafee490SWei Dong 962adf30907SEric Dumazet IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 9633bd653c8SDenis V. Lunev IPSTATS_MIB_FRAGCREATES); 9641da177e4SLinus Torvalds } 965adf30907SEric Dumazet IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 966a11d206dSYOSHIFUJI Hideaki IPSTATS_MIB_FRAGOKS); 967808db80aSEric Dumazet consume_skb(skb); 9681da177e4SLinus Torvalds return err; 9691da177e4SLinus Torvalds 970485fca66SFlorian Westphal fail_toobig: 971485fca66SFlorian Westphal if (skb->sk && dst_allfrag(skb_dst(skb))) 972485fca66SFlorian Westphal sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); 973485fca66SFlorian Westphal 974485fca66SFlorian Westphal icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 975485fca66SFlorian Westphal err = -EMSGSIZE; 976485fca66SFlorian Westphal 9771da177e4SLinus Torvalds fail: 978adf30907SEric Dumazet IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 979a11d206dSYOSHIFUJI Hideaki IPSTATS_MIB_FRAGFAILS); 9801da177e4SLinus Torvalds kfree_skb(skb); 9811da177e4SLinus Torvalds return err; 9821da177e4SLinus Torvalds } 9831da177e4SLinus Torvalds 984b71d1d42SEric Dumazet static inline int ip6_rt_check(const struct rt6key *rt_key, 985b71d1d42SEric Dumazet const struct in6_addr *fl_addr, 986b71d1d42SEric Dumazet const struct in6_addr *addr_cache) 987cf6b1982SYOSHIFUJI Hideaki { 988a02cec21SEric Dumazet return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 98963159f29SIan Morris (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache)); 990cf6b1982SYOSHIFUJI Hideaki } 991cf6b1982SYOSHIFUJI Hideaki 992497c615aSHerbert Xu static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 993497c615aSHerbert Xu struct dst_entry *dst, 994b71d1d42SEric Dumazet const struct flowi6 *fl6) 9951da177e4SLinus Torvalds { 9961da177e4SLinus Torvalds struct ipv6_pinfo *np = inet6_sk(sk); 997a963a37dSEric Dumazet struct rt6_info *rt; 9981da177e4SLinus Torvalds 999497c615aSHerbert Xu if (!dst) 1000497c615aSHerbert Xu goto out; 10011da177e4SLinus Torvalds 1002a963a37dSEric Dumazet if (dst->ops->family != AF_INET6) { 1003a963a37dSEric Dumazet dst_release(dst); 1004a963a37dSEric Dumazet return NULL; 1005a963a37dSEric Dumazet } 1006a963a37dSEric Dumazet 1007a963a37dSEric Dumazet rt = (struct rt6_info *)dst; 10081da177e4SLinus Torvalds /* Yes, checking route validity in not connected 1009d76e60a5SDavid S. Miller * case is not very simple. Take into account, 1010d76e60a5SDavid S. Miller * that we do not support routing by source, TOS, 1011d76e60a5SDavid S. Miller * and MSG_DONTROUTE --ANK (980726) 1012d76e60a5SDavid S. Miller * 1013cf6b1982SYOSHIFUJI Hideaki * 1. ip6_rt_check(): If route was host route, 1014cf6b1982SYOSHIFUJI Hideaki * check that cached destination is current. 1015d76e60a5SDavid S. Miller * If it is network route, we still may 1016d76e60a5SDavid S. Miller * check its validity using saved pointer 1017d76e60a5SDavid S. Miller * to the last used address: daddr_cache. 1018d76e60a5SDavid S. Miller * We do not want to save whole address now, 1019d76e60a5SDavid S. Miller * (because main consumer of this service 1020d76e60a5SDavid S. Miller * is tcp, which has not this problem), 1021d76e60a5SDavid S. Miller * so that the last trick works only on connected 1022d76e60a5SDavid S. Miller * sockets. 1023d76e60a5SDavid S. Miller * 2. oif also should be the same. 10241da177e4SLinus Torvalds */ 10254c9483b2SDavid S. Miller if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || 10268e1ef0a9SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_SUBTREES 10274c9483b2SDavid S. Miller ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || 10288e1ef0a9SYOSHIFUJI Hideaki #endif 1029ca254490SDavid Ahern (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) && 1030ca254490SDavid Ahern (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) { 1031497c615aSHerbert Xu dst_release(dst); 1032497c615aSHerbert Xu dst = NULL; 10331da177e4SLinus Torvalds } 1034497c615aSHerbert Xu 1035497c615aSHerbert Xu out: 1036497c615aSHerbert Xu return dst; 10371da177e4SLinus Torvalds } 1038497c615aSHerbert Xu 10393aef934fSEric Dumazet static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, 10404c9483b2SDavid S. Miller struct dst_entry **dst, struct flowi6 *fl6) 1041497c615aSHerbert Xu { 104269cce1d1SDavid S. Miller #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 104369cce1d1SDavid S. Miller struct neighbour *n; 104497cac082SDavid S. Miller struct rt6_info *rt; 104569cce1d1SDavid S. Miller #endif 104669cce1d1SDavid S. Miller int err; 10476f21c96aSPaolo Abeni int flags = 0; 10481da177e4SLinus Torvalds 1049e16e888bSMarkus Stenberg /* The correct way to handle this would be to do 1050e16e888bSMarkus Stenberg * ip6_route_get_saddr, and then ip6_route_output; however, 1051e16e888bSMarkus Stenberg * the route-specific preferred source forces the 1052e16e888bSMarkus Stenberg * ip6_route_output call _before_ ip6_route_get_saddr. 1053e16e888bSMarkus Stenberg * 1054e16e888bSMarkus Stenberg * In source specific routing (no src=any default route), 1055e16e888bSMarkus Stenberg * ip6_route_output will fail given src=any saddr, though, so 1056e16e888bSMarkus Stenberg * that's why we try it again later. 1057e16e888bSMarkus Stenberg */ 1058*c305b9e6Szhang kai if (ipv6_addr_any(&fl6->saddr)) { 1059a68886a6SDavid Ahern struct fib6_info *from; 1060e16e888bSMarkus Stenberg struct rt6_info *rt; 1061e16e888bSMarkus Stenberg 1062e16e888bSMarkus Stenberg *dst = ip6_route_output(net, sk, fl6); 1063e16e888bSMarkus Stenberg rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; 1064a68886a6SDavid Ahern 1065a68886a6SDavid Ahern rcu_read_lock(); 1066a68886a6SDavid Ahern from = rt ? rcu_dereference(rt->from) : NULL; 1067a68886a6SDavid Ahern err = ip6_route_get_saddr(net, from, &fl6->daddr, 1068e16e888bSMarkus Stenberg sk ? inet6_sk(sk)->srcprefs : 0, 1069e16e888bSMarkus Stenberg &fl6->saddr); 1070a68886a6SDavid Ahern rcu_read_unlock(); 1071a68886a6SDavid Ahern 1072e16e888bSMarkus Stenberg if (err) 1073e16e888bSMarkus Stenberg goto out_err_release; 1074e16e888bSMarkus Stenberg 1075e16e888bSMarkus Stenberg /* If we had an erroneous initial result, pretend it 1076e16e888bSMarkus Stenberg * never existed and let the SA-enabled version take 1077e16e888bSMarkus Stenberg * over. 1078e16e888bSMarkus Stenberg */ 1079*c305b9e6Szhang kai if ((*dst)->error) { 1080e16e888bSMarkus Stenberg dst_release(*dst); 1081e16e888bSMarkus Stenberg *dst = NULL; 1082e16e888bSMarkus Stenberg } 10836f21c96aSPaolo Abeni 10846f21c96aSPaolo Abeni if (fl6->flowi6_oif) 10856f21c96aSPaolo Abeni flags |= RT6_LOOKUP_F_IFACE; 1086e16e888bSMarkus Stenberg } 1087e16e888bSMarkus Stenberg 108863159f29SIan Morris if (!*dst) 10896f21c96aSPaolo Abeni *dst = ip6_route_output_flags(net, sk, fl6, flags); 10901da177e4SLinus Torvalds 1091e5d08d71SIan Morris err = (*dst)->error; 1092e5d08d71SIan Morris if (err) 10931da177e4SLinus Torvalds goto out_err_release; 10941da177e4SLinus Torvalds 109595c385b4SNeil Horman #ifdef CONFIG_IPV6_OPTIMISTIC_DAD 109695c385b4SNeil Horman /* 109795c385b4SNeil Horman * Here if the dst entry we've looked up 109895c385b4SNeil Horman * has a neighbour entry that is in the INCOMPLETE 109995c385b4SNeil Horman * state and the src address from the flow is 110095c385b4SNeil Horman * marked as OPTIMISTIC, we release the found 110195c385b4SNeil Horman * dst entry and replace it instead with the 110295c385b4SNeil Horman * dst entry of the nexthop router 110395c385b4SNeil Horman */ 1104c56bf6feSEric Dumazet rt = (struct rt6_info *) *dst; 1105707be1ffSYOSHIFUJI Hideaki / 吉藤英明 rcu_read_lock_bh(); 11062647a9b0SMartin KaFai Lau n = __ipv6_neigh_lookup_noref(rt->dst.dev, 11072647a9b0SMartin KaFai Lau rt6_nexthop(rt, &fl6->daddr)); 1108707be1ffSYOSHIFUJI Hideaki / 吉藤英明 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; 1109707be1ffSYOSHIFUJI Hideaki / 吉藤英明 rcu_read_unlock_bh(); 1110707be1ffSYOSHIFUJI Hideaki / 吉藤英明 1111707be1ffSYOSHIFUJI Hideaki / 吉藤英明 if (err) { 111295c385b4SNeil Horman struct inet6_ifaddr *ifp; 11134c9483b2SDavid S. Miller struct flowi6 fl_gw6; 111495c385b4SNeil Horman int redirect; 111595c385b4SNeil Horman 11164c9483b2SDavid S. Miller ifp = ipv6_get_ifaddr(net, &fl6->saddr, 11171cab3da6SDaniel Lezcano (*dst)->dev, 1); 111895c385b4SNeil Horman 111995c385b4SNeil Horman redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); 112095c385b4SNeil Horman if (ifp) 112195c385b4SNeil Horman in6_ifa_put(ifp); 112295c385b4SNeil Horman 112395c385b4SNeil Horman if (redirect) { 112495c385b4SNeil Horman /* 112595c385b4SNeil Horman * We need to get the dst entry for the 112695c385b4SNeil Horman * default router instead 112795c385b4SNeil Horman */ 112895c385b4SNeil Horman dst_release(*dst); 11294c9483b2SDavid S. Miller memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); 11304c9483b2SDavid S. Miller memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); 11314c9483b2SDavid S. Miller *dst = ip6_route_output(net, sk, &fl_gw6); 1132e5d08d71SIan Morris err = (*dst)->error; 1133e5d08d71SIan Morris if (err) 113495c385b4SNeil Horman goto out_err_release; 113595c385b4SNeil Horman } 113695c385b4SNeil Horman } 113795c385b4SNeil Horman #endif 1138ec5e3b0aSJonathan T. Leighton if (ipv6_addr_v4mapped(&fl6->saddr) && 113900ea1ceeSWillem de Bruijn !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) { 114000ea1ceeSWillem de Bruijn err = -EAFNOSUPPORT; 114100ea1ceeSWillem de Bruijn goto out_err_release; 114200ea1ceeSWillem de Bruijn } 114395c385b4SNeil Horman 11441da177e4SLinus Torvalds return 0; 11451da177e4SLinus Torvalds 11461da177e4SLinus Torvalds out_err_release: 11471da177e4SLinus Torvalds dst_release(*dst); 11481da177e4SLinus Torvalds *dst = NULL; 11498a966fc0SDavid Ahern 11500d240e78SDavid Ahern if (err == -ENETUNREACH) 11510d240e78SDavid Ahern IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); 11521da177e4SLinus Torvalds return err; 11531da177e4SLinus Torvalds } 115434a0b3cdSAdrian Bunk 1155497c615aSHerbert Xu /** 1156497c615aSHerbert Xu * ip6_dst_lookup - perform route lookup on flow 1157b51cd7c8SAndrew Lunn * @net: Network namespace to perform lookup in 1158497c615aSHerbert Xu * @sk: socket which provides route info 1159497c615aSHerbert Xu * @dst: pointer to dst_entry * for result 11604c9483b2SDavid S. Miller * @fl6: flow to lookup 1161497c615aSHerbert Xu * 1162497c615aSHerbert Xu * This function performs a route lookup on the given flow. 1163497c615aSHerbert Xu * 1164497c615aSHerbert Xu * It returns zero on success, or a standard errno code on error. 1165497c615aSHerbert Xu */ 1166343d60aaSRoopa Prabhu int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, 1167343d60aaSRoopa Prabhu struct flowi6 *fl6) 1168497c615aSHerbert Xu { 1169497c615aSHerbert Xu *dst = NULL; 1170343d60aaSRoopa Prabhu return ip6_dst_lookup_tail(net, sk, dst, fl6); 1171497c615aSHerbert Xu } 11723cf3dc6cSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(ip6_dst_lookup); 11733cf3dc6cSArnaldo Carvalho de Melo 1174497c615aSHerbert Xu /** 117568d0c6d3SDavid S. Miller * ip6_dst_lookup_flow - perform route lookup on flow with ipsec 1176b51cd7c8SAndrew Lunn * @net: Network namespace to perform lookup in 117768d0c6d3SDavid S. Miller * @sk: socket which provides route info 11784c9483b2SDavid S. Miller * @fl6: flow to lookup 117968d0c6d3SDavid S. Miller * @final_dst: final destination address for ipsec lookup 118068d0c6d3SDavid S. Miller * 118168d0c6d3SDavid S. Miller * This function performs a route lookup on the given flow. 118268d0c6d3SDavid S. Miller * 118368d0c6d3SDavid S. Miller * It returns a valid dst pointer on success, or a pointer encoded 118468d0c6d3SDavid S. Miller * error code. 118568d0c6d3SDavid S. Miller */ 1186c4e85f73SSabrina Dubroca struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6, 11870e0d44abSSteffen Klassert const struct in6_addr *final_dst) 118868d0c6d3SDavid S. Miller { 118968d0c6d3SDavid S. Miller struct dst_entry *dst = NULL; 119068d0c6d3SDavid S. Miller int err; 119168d0c6d3SDavid S. Miller 1192c4e85f73SSabrina Dubroca err = ip6_dst_lookup_tail(net, sk, &dst, fl6); 119368d0c6d3SDavid S. Miller if (err) 119468d0c6d3SDavid S. Miller return ERR_PTR(err); 119568d0c6d3SDavid S. Miller if (final_dst) 11964e3fd7a0SAlexey Dobriyan fl6->daddr = *final_dst; 11972774c131SDavid S. Miller 1198c4e85f73SSabrina Dubroca return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0); 119968d0c6d3SDavid S. Miller } 120068d0c6d3SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); 120168d0c6d3SDavid S. Miller 120268d0c6d3SDavid S. Miller /** 120368d0c6d3SDavid S. Miller * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow 120468d0c6d3SDavid S. Miller * @sk: socket which provides the dst cache and route info 12054c9483b2SDavid S. Miller * @fl6: flow to lookup 120668d0c6d3SDavid S. Miller * @final_dst: final destination address for ipsec lookup 120796818159SAlexey Kodanev * @connected: whether @sk is connected or not 1208497c615aSHerbert Xu * 1209497c615aSHerbert Xu * This function performs a route lookup on the given flow with the 1210497c615aSHerbert Xu * possibility of using the cached route in the socket if it is valid. 1211497c615aSHerbert Xu * It will take the socket dst lock when operating on the dst cache. 1212497c615aSHerbert Xu * As a result, this function can only be used in process context. 1213497c615aSHerbert Xu * 121496818159SAlexey Kodanev * In addition, for a connected socket, cache the dst in the socket 121596818159SAlexey Kodanev * if the current cache is not valid. 121696818159SAlexey Kodanev * 121768d0c6d3SDavid S. Miller * It returns a valid dst pointer on success, or a pointer encoded 121868d0c6d3SDavid S. Miller * error code. 1219497c615aSHerbert Xu */ 12204c9483b2SDavid S. Miller struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 122196818159SAlexey Kodanev const struct in6_addr *final_dst, 122296818159SAlexey Kodanev bool connected) 1223497c615aSHerbert Xu { 122468d0c6d3SDavid S. Miller struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); 1225497c615aSHerbert Xu 12264c9483b2SDavid S. Miller dst = ip6_sk_dst_check(sk, dst, fl6); 122796818159SAlexey Kodanev if (dst) 122896818159SAlexey Kodanev return dst; 122996818159SAlexey Kodanev 1230c4e85f73SSabrina Dubroca dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst); 123196818159SAlexey Kodanev if (connected && !IS_ERR(dst)) 123296818159SAlexey Kodanev ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6); 123368d0c6d3SDavid S. Miller 123400bc0ef5SJakub Sitnicki return dst; 123568d0c6d3SDavid S. Miller } 123668d0c6d3SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); 1237497c615aSHerbert Xu 1238571912c6SMartin Varghese /** 1239571912c6SMartin Varghese * ip6_dst_lookup_tunnel - perform route lookup on tunnel 1240571912c6SMartin Varghese * @skb: Packet for which lookup is done 1241571912c6SMartin Varghese * @dev: Tunnel device 1242571912c6SMartin Varghese * @net: Network namespace of tunnel device 1243b51cd7c8SAndrew Lunn * @sock: Socket which provides route info 1244571912c6SMartin Varghese * @saddr: Memory to store the src ip address 1245571912c6SMartin Varghese * @info: Tunnel information 1246571912c6SMartin Varghese * @protocol: IP protocol 1247b51cd7c8SAndrew Lunn * @use_cache: Flag to enable cache usage 1248571912c6SMartin Varghese * This function performs a route lookup on a tunnel 1249571912c6SMartin Varghese * 1250571912c6SMartin Varghese * It returns a valid dst pointer and stores src address to be used in 1251571912c6SMartin Varghese * tunnel in param saddr on success, else a pointer encoded error code. 1252571912c6SMartin Varghese */ 1253571912c6SMartin Varghese 1254571912c6SMartin Varghese struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, 1255571912c6SMartin Varghese struct net_device *dev, 1256571912c6SMartin Varghese struct net *net, 1257571912c6SMartin Varghese struct socket *sock, 1258571912c6SMartin Varghese struct in6_addr *saddr, 1259571912c6SMartin Varghese const struct ip_tunnel_info *info, 1260571912c6SMartin Varghese u8 protocol, 1261571912c6SMartin Varghese bool use_cache) 1262571912c6SMartin Varghese { 1263571912c6SMartin Varghese struct dst_entry *dst = NULL; 1264571912c6SMartin Varghese #ifdef CONFIG_DST_CACHE 1265571912c6SMartin Varghese struct dst_cache *dst_cache; 1266571912c6SMartin Varghese #endif 1267571912c6SMartin Varghese struct flowi6 fl6; 1268571912c6SMartin Varghese __u8 prio; 1269571912c6SMartin Varghese 1270571912c6SMartin Varghese #ifdef CONFIG_DST_CACHE 1271571912c6SMartin Varghese dst_cache = (struct dst_cache *)&info->dst_cache; 1272571912c6SMartin Varghese if (use_cache) { 1273571912c6SMartin Varghese dst = dst_cache_get_ip6(dst_cache, saddr); 1274571912c6SMartin Varghese if (dst) 1275571912c6SMartin Varghese return dst; 1276571912c6SMartin Varghese } 1277571912c6SMartin Varghese #endif 1278571912c6SMartin Varghese memset(&fl6, 0, sizeof(fl6)); 1279571912c6SMartin Varghese fl6.flowi6_mark = skb->mark; 1280571912c6SMartin Varghese fl6.flowi6_proto = protocol; 1281571912c6SMartin Varghese fl6.daddr = info->key.u.ipv6.dst; 1282571912c6SMartin Varghese fl6.saddr = info->key.u.ipv6.src; 1283571912c6SMartin Varghese prio = info->key.tos; 1284571912c6SMartin Varghese fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio), 1285571912c6SMartin Varghese info->key.label); 1286571912c6SMartin Varghese 1287571912c6SMartin Varghese dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, 1288571912c6SMartin Varghese NULL); 1289571912c6SMartin Varghese if (IS_ERR(dst)) { 1290571912c6SMartin Varghese netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); 1291571912c6SMartin Varghese return ERR_PTR(-ENETUNREACH); 1292571912c6SMartin Varghese } 1293571912c6SMartin Varghese if (dst->dev == dev) { /* is this necessary? */ 1294571912c6SMartin Varghese netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); 1295571912c6SMartin Varghese dst_release(dst); 1296571912c6SMartin Varghese return ERR_PTR(-ELOOP); 1297571912c6SMartin Varghese } 1298571912c6SMartin Varghese #ifdef CONFIG_DST_CACHE 1299571912c6SMartin Varghese if (use_cache) 1300571912c6SMartin Varghese dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); 1301571912c6SMartin Varghese #endif 1302571912c6SMartin Varghese *saddr = fl6.saddr; 1303571912c6SMartin Varghese return dst; 1304571912c6SMartin Varghese } 1305571912c6SMartin Varghese EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel); 1306571912c6SMartin Varghese 13070178b695SHerbert Xu static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 13080178b695SHerbert Xu gfp_t gfp) 13090178b695SHerbert Xu { 13100178b695SHerbert Xu return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 13110178b695SHerbert Xu } 13120178b695SHerbert Xu 13130178b695SHerbert Xu static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, 13140178b695SHerbert Xu gfp_t gfp) 13150178b695SHerbert Xu { 13160178b695SHerbert Xu return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 13170178b695SHerbert Xu } 13180178b695SHerbert Xu 131975a493e6SHannes Frederic Sowa static void ip6_append_data_mtu(unsigned int *mtu, 13200c183379SGao feng int *maxfraglen, 13210c183379SGao feng unsigned int fragheaderlen, 13220c183379SGao feng struct sk_buff *skb, 132375a493e6SHannes Frederic Sowa struct rt6_info *rt, 1324e367c2d0Slucien unsigned int orig_mtu) 13250c183379SGao feng { 13260c183379SGao feng if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { 132763159f29SIan Morris if (!skb) { 13280c183379SGao feng /* first fragment, reserve header_len */ 1329e367c2d0Slucien *mtu = orig_mtu - rt->dst.header_len; 13300c183379SGao feng 13310c183379SGao feng } else { 13320c183379SGao feng /* 13330c183379SGao feng * this fragment is not first, the headers 13340c183379SGao feng * space is regarded as data space. 13350c183379SGao feng */ 1336e367c2d0Slucien *mtu = orig_mtu; 13370c183379SGao feng } 13380c183379SGao feng *maxfraglen = ((*mtu - fragheaderlen) & ~7) 13390c183379SGao feng + fragheaderlen - sizeof(struct frag_hdr); 13400c183379SGao feng } 13410c183379SGao feng } 13420c183379SGao feng 1343366e41d9SVlad Yasevich static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, 134426879da5SWei Wang struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6, 13455fdaa88dSWillem de Bruijn struct rt6_info *rt, struct flowi6 *fl6) 1346366e41d9SVlad Yasevich { 1347366e41d9SVlad Yasevich struct ipv6_pinfo *np = inet6_sk(sk); 1348366e41d9SVlad Yasevich unsigned int mtu; 134926879da5SWei Wang struct ipv6_txoptions *opt = ipc6->opt; 1350366e41d9SVlad Yasevich 1351366e41d9SVlad Yasevich /* 1352366e41d9SVlad Yasevich * setup for corking 1353366e41d9SVlad Yasevich */ 1354366e41d9SVlad Yasevich if (opt) { 1355366e41d9SVlad Yasevich if (WARN_ON(v6_cork->opt)) 1356366e41d9SVlad Yasevich return -EINVAL; 1357366e41d9SVlad Yasevich 1358864e2a1fSEric Dumazet v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); 135963159f29SIan Morris if (unlikely(!v6_cork->opt)) 1360366e41d9SVlad Yasevich return -ENOBUFS; 1361366e41d9SVlad Yasevich 1362864e2a1fSEric Dumazet v6_cork->opt->tot_len = sizeof(*opt); 1363366e41d9SVlad Yasevich v6_cork->opt->opt_flen = opt->opt_flen; 1364366e41d9SVlad Yasevich v6_cork->opt->opt_nflen = opt->opt_nflen; 1365366e41d9SVlad Yasevich 1366366e41d9SVlad Yasevich v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt, 1367366e41d9SVlad Yasevich sk->sk_allocation); 1368366e41d9SVlad Yasevich if (opt->dst0opt && !v6_cork->opt->dst0opt) 1369366e41d9SVlad Yasevich return -ENOBUFS; 1370366e41d9SVlad Yasevich 1371366e41d9SVlad Yasevich v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt, 1372366e41d9SVlad Yasevich sk->sk_allocation); 1373366e41d9SVlad Yasevich if (opt->dst1opt && !v6_cork->opt->dst1opt) 1374366e41d9SVlad Yasevich return -ENOBUFS; 1375366e41d9SVlad Yasevich 1376366e41d9SVlad Yasevich v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt, 1377366e41d9SVlad Yasevich sk->sk_allocation); 1378366e41d9SVlad Yasevich if (opt->hopopt && !v6_cork->opt->hopopt) 1379366e41d9SVlad Yasevich return -ENOBUFS; 1380366e41d9SVlad Yasevich 1381366e41d9SVlad Yasevich v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt, 1382366e41d9SVlad Yasevich sk->sk_allocation); 1383366e41d9SVlad Yasevich if (opt->srcrt && !v6_cork->opt->srcrt) 1384366e41d9SVlad Yasevich return -ENOBUFS; 1385366e41d9SVlad Yasevich 1386366e41d9SVlad Yasevich /* need source address above miyazawa*/ 1387366e41d9SVlad Yasevich } 1388366e41d9SVlad Yasevich dst_hold(&rt->dst); 1389366e41d9SVlad Yasevich cork->base.dst = &rt->dst; 1390366e41d9SVlad Yasevich cork->fl.u.ip6 = *fl6; 139126879da5SWei Wang v6_cork->hop_limit = ipc6->hlimit; 139226879da5SWei Wang v6_cork->tclass = ipc6->tclass; 1393366e41d9SVlad Yasevich if (rt->dst.flags & DST_XFRM_TUNNEL) 1394366e41d9SVlad Yasevich mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1395749439bfSMike Maloney READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); 1396366e41d9SVlad Yasevich else 1397366e41d9SVlad Yasevich mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? 1398c02b3741SDavid S. Miller READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); 1399366e41d9SVlad Yasevich if (np->frag_size < mtu) { 1400366e41d9SVlad Yasevich if (np->frag_size) 1401366e41d9SVlad Yasevich mtu = np->frag_size; 1402366e41d9SVlad Yasevich } 1403749439bfSMike Maloney if (mtu < IPV6_MIN_MTU) 1404749439bfSMike Maloney return -EINVAL; 1405366e41d9SVlad Yasevich cork->base.fragsize = mtu; 1406fbf47813SWillem de Bruijn cork->base.gso_size = ipc6->gso_size; 1407678ca42dSWillem de Bruijn cork->base.tx_flags = 0; 1408c6af0c22SWillem de Bruijn cork->base.mark = ipc6->sockc.mark; 1409678ca42dSWillem de Bruijn sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); 1410bec1f6f6SWillem de Bruijn 14110f6c480fSDavid Miller if (dst_allfrag(xfrm_dst_path(&rt->dst))) 1412366e41d9SVlad Yasevich cork->base.flags |= IPCORK_ALLFRAG; 1413366e41d9SVlad Yasevich cork->base.length = 0; 1414366e41d9SVlad Yasevich 14155fdaa88dSWillem de Bruijn cork->base.transmit_time = ipc6->sockc.transmit_time; 1416a818f75eSJesus Sanchez-Palencia 1417366e41d9SVlad Yasevich return 0; 1418366e41d9SVlad Yasevich } 1419366e41d9SVlad Yasevich 14200bbe84a6SVlad Yasevich static int __ip6_append_data(struct sock *sk, 14210bbe84a6SVlad Yasevich struct flowi6 *fl6, 14220bbe84a6SVlad Yasevich struct sk_buff_head *queue, 14230bbe84a6SVlad Yasevich struct inet_cork *cork, 14240bbe84a6SVlad Yasevich struct inet6_cork *v6_cork, 14250bbe84a6SVlad Yasevich struct page_frag *pfrag, 14260bbe84a6SVlad Yasevich int getfrag(void *from, char *to, int offset, 14270bbe84a6SVlad Yasevich int len, int odd, struct sk_buff *skb), 14281da177e4SLinus Torvalds void *from, int length, int transhdrlen, 14295fdaa88dSWillem de Bruijn unsigned int flags, struct ipcm6_cookie *ipc6) 14301da177e4SLinus Torvalds { 14310c183379SGao feng struct sk_buff *skb, *skb_prev = NULL; 143210b8a3deSPaolo Abeni unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; 1433b5947e5dSWillem de Bruijn struct ubuf_info *uarg = NULL; 14340bbe84a6SVlad Yasevich int exthdrlen = 0; 14350bbe84a6SVlad Yasevich int dst_exthdrlen = 0; 14361da177e4SLinus Torvalds int hh_len; 14371da177e4SLinus Torvalds int copy; 14381da177e4SLinus Torvalds int err; 14391da177e4SLinus Torvalds int offset = 0; 144009c2d251SWillem de Bruijn u32 tskey = 0; 14410bbe84a6SVlad Yasevich struct rt6_info *rt = (struct rt6_info *)cork->dst; 14420bbe84a6SVlad Yasevich struct ipv6_txoptions *opt = v6_cork->opt; 144332dce968SVlad Yasevich int csummode = CHECKSUM_NONE; 1444682b1a9dSHannes Frederic Sowa unsigned int maxnonfragsize, headersize; 14451f4c6eb2SEric Dumazet unsigned int wmem_alloc_delta = 0; 1446100f6d8eSWillem de Bruijn bool paged, extra_uref = false; 14471da177e4SLinus Torvalds 14480bbe84a6SVlad Yasevich skb = skb_peek_tail(queue); 14490bbe84a6SVlad Yasevich if (!skb) { 14500bbe84a6SVlad Yasevich exthdrlen = opt ? opt->opt_flen : 0; 14517efdba5bSRomain KUNTZ dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; 14521da177e4SLinus Torvalds } 14530bbe84a6SVlad Yasevich 145415e36f5bSWillem de Bruijn paged = !!cork->gso_size; 1455bec1f6f6SWillem de Bruijn mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize; 1456e367c2d0Slucien orig_mtu = mtu; 14571da177e4SLinus Torvalds 1458678ca42dSWillem de Bruijn if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && 1459678ca42dSWillem de Bruijn sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) 1460678ca42dSWillem de Bruijn tskey = sk->sk_tskey++; 1461678ca42dSWillem de Bruijn 1462d8d1f30bSChangli Gao hh_len = LL_RESERVED_SPACE(rt->dst.dev); 14631da177e4SLinus Torvalds 1464a1b05140SMasahide NAKAMURA fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1465b4ce9277SHerbert Xu (opt ? opt->opt_nflen : 0); 14664df98e76SHannes Frederic Sowa maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - 14674df98e76SHannes Frederic Sowa sizeof(struct frag_hdr); 14681da177e4SLinus Torvalds 14694df98e76SHannes Frederic Sowa headersize = sizeof(struct ipv6hdr) + 14703a1cebe7SHannes Frederic Sowa (opt ? opt->opt_flen + opt->opt_nflen : 0) + 14714df98e76SHannes Frederic Sowa (dst_allfrag(&rt->dst) ? 14724df98e76SHannes Frederic Sowa sizeof(struct frag_hdr) : 0) + 14734df98e76SHannes Frederic Sowa rt->rt6i_nfheader_len; 14744df98e76SHannes Frederic Sowa 147510b8a3deSPaolo Abeni /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit 147610b8a3deSPaolo Abeni * the first fragment 147710b8a3deSPaolo Abeni */ 147810b8a3deSPaolo Abeni if (headersize + transhdrlen > mtu) 147910b8a3deSPaolo Abeni goto emsgsize; 148010b8a3deSPaolo Abeni 148126879da5SWei Wang if (cork->length + length > mtu - headersize && ipc6->dontfrag && 14824df98e76SHannes Frederic Sowa (sk->sk_protocol == IPPROTO_UDP || 14834df98e76SHannes Frederic Sowa sk->sk_protocol == IPPROTO_RAW)) { 14844df98e76SHannes Frederic Sowa ipv6_local_rxpmtu(sk, fl6, mtu - headersize + 14854df98e76SHannes Frederic Sowa sizeof(struct ipv6hdr)); 14864df98e76SHannes Frederic Sowa goto emsgsize; 14874df98e76SHannes Frederic Sowa } 14884df98e76SHannes Frederic Sowa 1489682b1a9dSHannes Frederic Sowa if (ip6_sk_ignore_df(sk)) 1490682b1a9dSHannes Frederic Sowa maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; 1491682b1a9dSHannes Frederic Sowa else 1492682b1a9dSHannes Frederic Sowa maxnonfragsize = mtu; 1493682b1a9dSHannes Frederic Sowa 14944df98e76SHannes Frederic Sowa if (cork->length + length > maxnonfragsize - headersize) { 14954df98e76SHannes Frederic Sowa emsgsize: 149610b8a3deSPaolo Abeni pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0); 149710b8a3deSPaolo Abeni ipv6_local_error(sk, EMSGSIZE, fl6, pmtu); 14981da177e4SLinus Torvalds return -EMSGSIZE; 14991da177e4SLinus Torvalds } 1500682b1a9dSHannes Frederic Sowa 1501682b1a9dSHannes Frederic Sowa /* CHECKSUM_PARTIAL only with no extension headers and when 1502682b1a9dSHannes Frederic Sowa * we are not going to fragment 1503682b1a9dSHannes Frederic Sowa */ 1504682b1a9dSHannes Frederic Sowa if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && 1505682b1a9dSHannes Frederic Sowa headersize == sizeof(struct ipv6hdr) && 15062b89ed65SVlad Yasevich length <= mtu - headersize && 1507bec1f6f6SWillem de Bruijn (!(flags & MSG_MORE) || cork->gso_size) && 1508c8cd0989STom Herbert rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) 1509682b1a9dSHannes Frederic Sowa csummode = CHECKSUM_PARTIAL; 15101da177e4SLinus Torvalds 1511b5947e5dSWillem de Bruijn if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) { 15128c793822SJonathan Lemon uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb)); 1513b5947e5dSWillem de Bruijn if (!uarg) 1514b5947e5dSWillem de Bruijn return -ENOBUFS; 1515522924b5SWillem de Bruijn extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ 1516b5947e5dSWillem de Bruijn if (rt->dst.dev->features & NETIF_F_SG && 1517b5947e5dSWillem de Bruijn csummode == CHECKSUM_PARTIAL) { 1518b5947e5dSWillem de Bruijn paged = true; 1519b5947e5dSWillem de Bruijn } else { 1520b5947e5dSWillem de Bruijn uarg->zerocopy = 0; 152152900d22SWillem de Bruijn skb_zcopy_set(skb, uarg, &extra_uref); 1522b5947e5dSWillem de Bruijn } 1523b5947e5dSWillem de Bruijn } 1524b5947e5dSWillem de Bruijn 15251da177e4SLinus Torvalds /* 15261da177e4SLinus Torvalds * Let's try using as much space as possible. 15271da177e4SLinus Torvalds * Use MTU if total length of the message fits into the MTU. 15281da177e4SLinus Torvalds * Otherwise, we need to reserve fragment header and 15291da177e4SLinus Torvalds * fragment alignment (= 8-15 octects, in total). 15301da177e4SLinus Torvalds * 1531634a63e7SRandy Dunlap * Note that we may need to "move" the data from the tail 15321da177e4SLinus Torvalds * of the buffer to the new fragment when we split 15331da177e4SLinus Torvalds * the message. 15341da177e4SLinus Torvalds * 15351da177e4SLinus Torvalds * FIXME: It may be fragmented into multiple chunks 15361da177e4SLinus Torvalds * at once if non-fragmentable extension headers 15371da177e4SLinus Torvalds * are too large. 15381da177e4SLinus Torvalds * --yoshfuji 15391da177e4SLinus Torvalds */ 15401da177e4SLinus Torvalds 15412811ebacSHannes Frederic Sowa cork->length += length; 15422811ebacSHannes Frederic Sowa if (!skb) 15431da177e4SLinus Torvalds goto alloc_new_skb; 15441da177e4SLinus Torvalds 15451da177e4SLinus Torvalds while (length > 0) { 15461da177e4SLinus Torvalds /* Check if the remaining data fits into current packet. */ 1547bdc712b4SDavid S. Miller copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; 15481da177e4SLinus Torvalds if (copy < length) 15491da177e4SLinus Torvalds copy = maxfraglen - skb->len; 15501da177e4SLinus Torvalds 15511da177e4SLinus Torvalds if (copy <= 0) { 15521da177e4SLinus Torvalds char *data; 15531da177e4SLinus Torvalds unsigned int datalen; 15541da177e4SLinus Torvalds unsigned int fraglen; 15551da177e4SLinus Torvalds unsigned int fraggap; 15566d123b81SJakub Kicinski unsigned int alloclen, alloc_extra; 1557aba36930SWillem de Bruijn unsigned int pagedlen; 15581da177e4SLinus Torvalds alloc_new_skb: 15591da177e4SLinus Torvalds /* There's no room in the current skb */ 15600c183379SGao feng if (skb) 15610c183379SGao feng fraggap = skb->len - maxfraglen; 15621da177e4SLinus Torvalds else 15631da177e4SLinus Torvalds fraggap = 0; 15640c183379SGao feng /* update mtu and maxfraglen if necessary */ 156563159f29SIan Morris if (!skb || !skb_prev) 15660c183379SGao feng ip6_append_data_mtu(&mtu, &maxfraglen, 156775a493e6SHannes Frederic Sowa fragheaderlen, skb, rt, 1568e367c2d0Slucien orig_mtu); 15690c183379SGao feng 15700c183379SGao feng skb_prev = skb; 15711da177e4SLinus Torvalds 15721da177e4SLinus Torvalds /* 15731da177e4SLinus Torvalds * If remaining data exceeds the mtu, 15741da177e4SLinus Torvalds * we know we need more fragment(s). 15751da177e4SLinus Torvalds */ 15761da177e4SLinus Torvalds datalen = length + fraggap; 15771da177e4SLinus Torvalds 15780c183379SGao feng if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 15790c183379SGao feng datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; 158015e36f5bSWillem de Bruijn fraglen = datalen + fragheaderlen; 1581aba36930SWillem de Bruijn pagedlen = 0; 158215e36f5bSWillem de Bruijn 15836d123b81SJakub Kicinski alloc_extra = hh_len; 15846d123b81SJakub Kicinski alloc_extra += dst_exthdrlen; 15856d123b81SJakub Kicinski alloc_extra += rt->dst.trailer_len; 15866d123b81SJakub Kicinski 15876d123b81SJakub Kicinski /* We just reserve space for fragment header. 15886d123b81SJakub Kicinski * Note: this may be overallocation if the message 15896d123b81SJakub Kicinski * (without MSG_MORE) fits into the MTU. 15906d123b81SJakub Kicinski */ 15916d123b81SJakub Kicinski alloc_extra += sizeof(struct frag_hdr); 15926d123b81SJakub Kicinski 15931da177e4SLinus Torvalds if ((flags & MSG_MORE) && 1594d8d1f30bSChangli Gao !(rt->dst.dev->features&NETIF_F_SG)) 15951da177e4SLinus Torvalds alloclen = mtu; 15966d123b81SJakub Kicinski else if (!paged && 15976d123b81SJakub Kicinski (fraglen + alloc_extra < SKB_MAX_ALLOC || 15986d123b81SJakub Kicinski !(rt->dst.dev->features & NETIF_F_SG))) 159915e36f5bSWillem de Bruijn alloclen = fraglen; 160015e36f5bSWillem de Bruijn else { 160115e36f5bSWillem de Bruijn alloclen = min_t(int, fraglen, MAX_HEADER); 160215e36f5bSWillem de Bruijn pagedlen = fraglen - alloclen; 160315e36f5bSWillem de Bruijn } 16046d123b81SJakub Kicinski alloclen += alloc_extra; 1605299b0767SSteffen Klassert 16060c183379SGao feng if (datalen != length + fraggap) { 16071da177e4SLinus Torvalds /* 16080c183379SGao feng * this is not the last fragment, the trailer 16090c183379SGao feng * space is regarded as data space. 16101da177e4SLinus Torvalds */ 16110c183379SGao feng datalen += rt->dst.trailer_len; 16120c183379SGao feng } 16130c183379SGao feng 16140c183379SGao feng fraglen = datalen + fragheaderlen; 16151da177e4SLinus Torvalds 161615e36f5bSWillem de Bruijn copy = datalen - transhdrlen - fraggap - pagedlen; 1617232cd35dSEric Dumazet if (copy < 0) { 1618232cd35dSEric Dumazet err = -EINVAL; 1619232cd35dSEric Dumazet goto error; 1620232cd35dSEric Dumazet } 16211da177e4SLinus Torvalds if (transhdrlen) { 16226d123b81SJakub Kicinski skb = sock_alloc_send_skb(sk, alloclen, 16231da177e4SLinus Torvalds (flags & MSG_DONTWAIT), &err); 16241da177e4SLinus Torvalds } else { 16251da177e4SLinus Torvalds skb = NULL; 16261f4c6eb2SEric Dumazet if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <= 16271da177e4SLinus Torvalds 2 * sk->sk_sndbuf) 16286d123b81SJakub Kicinski skb = alloc_skb(alloclen, 16291da177e4SLinus Torvalds sk->sk_allocation); 163063159f29SIan Morris if (unlikely(!skb)) 16311da177e4SLinus Torvalds err = -ENOBUFS; 16321da177e4SLinus Torvalds } 163363159f29SIan Morris if (!skb) 16341da177e4SLinus Torvalds goto error; 16351da177e4SLinus Torvalds /* 16361da177e4SLinus Torvalds * Fill in the control structures 16371da177e4SLinus Torvalds */ 16389c9c9ad5SHannes Frederic Sowa skb->protocol = htons(ETH_P_IPV6); 163932dce968SVlad Yasevich skb->ip_summed = csummode; 16401da177e4SLinus Torvalds skb->csum = 0; 16411f85851eSGao feng /* reserve for fragmentation and ipsec header */ 16421f85851eSGao feng skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + 16431f85851eSGao feng dst_exthdrlen); 16441da177e4SLinus Torvalds 16451da177e4SLinus Torvalds /* 16461da177e4SLinus Torvalds * Find where to start putting bytes 16471da177e4SLinus Torvalds */ 164815e36f5bSWillem de Bruijn data = skb_put(skb, fraglen - pagedlen); 16491f85851eSGao feng skb_set_network_header(skb, exthdrlen); 16501f85851eSGao feng data += fragheaderlen; 1651b0e380b1SArnaldo Carvalho de Melo skb->transport_header = (skb->network_header + 1652b0e380b1SArnaldo Carvalho de Melo fragheaderlen); 16531da177e4SLinus Torvalds if (fraggap) { 16541da177e4SLinus Torvalds skb->csum = skb_copy_and_csum_bits( 16551da177e4SLinus Torvalds skb_prev, maxfraglen, 16568d5930dfSAl Viro data + transhdrlen, fraggap); 16571da177e4SLinus Torvalds skb_prev->csum = csum_sub(skb_prev->csum, 16581da177e4SLinus Torvalds skb->csum); 16591da177e4SLinus Torvalds data += fraggap; 1660e9fa4f7bSHerbert Xu pskb_trim_unique(skb_prev, maxfraglen); 16611da177e4SLinus Torvalds } 1662232cd35dSEric Dumazet if (copy > 0 && 1663232cd35dSEric Dumazet getfrag(from, data + transhdrlen, offset, 1664232cd35dSEric Dumazet copy, fraggap, skb) < 0) { 16651da177e4SLinus Torvalds err = -EFAULT; 16661da177e4SLinus Torvalds kfree_skb(skb); 16671da177e4SLinus Torvalds goto error; 16681da177e4SLinus Torvalds } 16691da177e4SLinus Torvalds 16701da177e4SLinus Torvalds offset += copy; 167115e36f5bSWillem de Bruijn length -= copy + transhdrlen; 16721da177e4SLinus Torvalds transhdrlen = 0; 16731da177e4SLinus Torvalds exthdrlen = 0; 1674299b0767SSteffen Klassert dst_exthdrlen = 0; 16751da177e4SLinus Torvalds 167652900d22SWillem de Bruijn /* Only the initial fragment is time stamped */ 167752900d22SWillem de Bruijn skb_shinfo(skb)->tx_flags = cork->tx_flags; 167852900d22SWillem de Bruijn cork->tx_flags = 0; 167952900d22SWillem de Bruijn skb_shinfo(skb)->tskey = tskey; 168052900d22SWillem de Bruijn tskey = 0; 168152900d22SWillem de Bruijn skb_zcopy_set(skb, uarg, &extra_uref); 168252900d22SWillem de Bruijn 16830dec879fSJulian Anastasov if ((flags & MSG_CONFIRM) && !skb_prev) 16840dec879fSJulian Anastasov skb_set_dst_pending_confirm(skb, 1); 16850dec879fSJulian Anastasov 16861da177e4SLinus Torvalds /* 16871da177e4SLinus Torvalds * Put the packet on the pending queue 16881da177e4SLinus Torvalds */ 16891f4c6eb2SEric Dumazet if (!skb->destructor) { 16901f4c6eb2SEric Dumazet skb->destructor = sock_wfree; 16911f4c6eb2SEric Dumazet skb->sk = sk; 16921f4c6eb2SEric Dumazet wmem_alloc_delta += skb->truesize; 16931f4c6eb2SEric Dumazet } 16940bbe84a6SVlad Yasevich __skb_queue_tail(queue, skb); 16951da177e4SLinus Torvalds continue; 16961da177e4SLinus Torvalds } 16971da177e4SLinus Torvalds 16981da177e4SLinus Torvalds if (copy > length) 16991da177e4SLinus Torvalds copy = length; 17001da177e4SLinus Torvalds 1701113f99c3SWillem de Bruijn if (!(rt->dst.dev->features&NETIF_F_SG) && 1702113f99c3SWillem de Bruijn skb_tailroom(skb) >= copy) { 17031da177e4SLinus Torvalds unsigned int off; 17041da177e4SLinus Torvalds 17051da177e4SLinus Torvalds off = skb->len; 17061da177e4SLinus Torvalds if (getfrag(from, skb_put(skb, copy), 17071da177e4SLinus Torvalds offset, copy, off, skb) < 0) { 17081da177e4SLinus Torvalds __skb_trim(skb, off); 17091da177e4SLinus Torvalds err = -EFAULT; 17101da177e4SLinus Torvalds goto error; 17111da177e4SLinus Torvalds } 1712b5947e5dSWillem de Bruijn } else if (!uarg || !uarg->zerocopy) { 17131da177e4SLinus Torvalds int i = skb_shinfo(skb)->nr_frags; 17141da177e4SLinus Torvalds 17151da177e4SLinus Torvalds err = -ENOMEM; 17165640f768SEric Dumazet if (!sk_page_frag_refill(sk, pfrag)) 17171da177e4SLinus Torvalds goto error; 17181da177e4SLinus Torvalds 17195640f768SEric Dumazet if (!skb_can_coalesce(skb, i, pfrag->page, 17205640f768SEric Dumazet pfrag->offset)) { 17211da177e4SLinus Torvalds err = -EMSGSIZE; 17225640f768SEric Dumazet if (i == MAX_SKB_FRAGS) 17231da177e4SLinus Torvalds goto error; 17245640f768SEric Dumazet 17255640f768SEric Dumazet __skb_fill_page_desc(skb, i, pfrag->page, 17265640f768SEric Dumazet pfrag->offset, 0); 17275640f768SEric Dumazet skb_shinfo(skb)->nr_frags = ++i; 17285640f768SEric Dumazet get_page(pfrag->page); 17291da177e4SLinus Torvalds } 17305640f768SEric Dumazet copy = min_t(int, copy, pfrag->size - pfrag->offset); 17319e903e08SEric Dumazet if (getfrag(from, 17325640f768SEric Dumazet page_address(pfrag->page) + pfrag->offset, 17335640f768SEric Dumazet offset, copy, skb->len, skb) < 0) 17345640f768SEric Dumazet goto error_efault; 17355640f768SEric Dumazet 17365640f768SEric Dumazet pfrag->offset += copy; 17375640f768SEric Dumazet skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); 17381da177e4SLinus Torvalds skb->len += copy; 17391da177e4SLinus Torvalds skb->data_len += copy; 1740f945fa7aSHerbert Xu skb->truesize += copy; 17411f4c6eb2SEric Dumazet wmem_alloc_delta += copy; 1742b5947e5dSWillem de Bruijn } else { 1743b5947e5dSWillem de Bruijn err = skb_zerocopy_iter_dgram(skb, from, copy); 1744b5947e5dSWillem de Bruijn if (err < 0) 1745b5947e5dSWillem de Bruijn goto error; 17461da177e4SLinus Torvalds } 17471da177e4SLinus Torvalds offset += copy; 17481da177e4SLinus Torvalds length -= copy; 17491da177e4SLinus Torvalds } 17505640f768SEric Dumazet 17519e8445a5SPaolo Abeni if (wmem_alloc_delta) 17521f4c6eb2SEric Dumazet refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 17531da177e4SLinus Torvalds return 0; 17545640f768SEric Dumazet 17555640f768SEric Dumazet error_efault: 17565640f768SEric Dumazet err = -EFAULT; 17571da177e4SLinus Torvalds error: 17588e044917SJonathan Lemon net_zcopy_put_abort(uarg, extra_uref); 1759bdc712b4SDavid S. Miller cork->length -= length; 17603bd653c8SDenis V. Lunev IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 17611f4c6eb2SEric Dumazet refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 17621da177e4SLinus Torvalds return err; 17631da177e4SLinus Torvalds } 17640bbe84a6SVlad Yasevich 17650bbe84a6SVlad Yasevich int ip6_append_data(struct sock *sk, 17660bbe84a6SVlad Yasevich int getfrag(void *from, char *to, int offset, int len, 17670bbe84a6SVlad Yasevich int odd, struct sk_buff *skb), 176826879da5SWei Wang void *from, int length, int transhdrlen, 176926879da5SWei Wang struct ipcm6_cookie *ipc6, struct flowi6 *fl6, 17705fdaa88dSWillem de Bruijn struct rt6_info *rt, unsigned int flags) 17710bbe84a6SVlad Yasevich { 17720bbe84a6SVlad Yasevich struct inet_sock *inet = inet_sk(sk); 17730bbe84a6SVlad Yasevich struct ipv6_pinfo *np = inet6_sk(sk); 17740bbe84a6SVlad Yasevich int exthdrlen; 17750bbe84a6SVlad Yasevich int err; 17760bbe84a6SVlad Yasevich 17770bbe84a6SVlad Yasevich if (flags&MSG_PROBE) 17780bbe84a6SVlad Yasevich return 0; 17790bbe84a6SVlad Yasevich if (skb_queue_empty(&sk->sk_write_queue)) { 17800bbe84a6SVlad Yasevich /* 17810bbe84a6SVlad Yasevich * setup for corking 17820bbe84a6SVlad Yasevich */ 178326879da5SWei Wang err = ip6_setup_cork(sk, &inet->cork, &np->cork, 17845fdaa88dSWillem de Bruijn ipc6, rt, fl6); 17850bbe84a6SVlad Yasevich if (err) 17860bbe84a6SVlad Yasevich return err; 17870bbe84a6SVlad Yasevich 178826879da5SWei Wang exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 17890bbe84a6SVlad Yasevich length += exthdrlen; 17900bbe84a6SVlad Yasevich transhdrlen += exthdrlen; 17910bbe84a6SVlad Yasevich } else { 17920bbe84a6SVlad Yasevich fl6 = &inet->cork.fl.u.ip6; 17930bbe84a6SVlad Yasevich transhdrlen = 0; 17940bbe84a6SVlad Yasevich } 17950bbe84a6SVlad Yasevich 17960bbe84a6SVlad Yasevich return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base, 17970bbe84a6SVlad Yasevich &np->cork, sk_page_frag(sk), getfrag, 17985fdaa88dSWillem de Bruijn from, length, transhdrlen, flags, ipc6); 17990bbe84a6SVlad Yasevich } 1800a495f836SChris Elston EXPORT_SYMBOL_GPL(ip6_append_data); 18011da177e4SLinus Torvalds 1802366e41d9SVlad Yasevich static void ip6_cork_release(struct inet_cork_full *cork, 1803366e41d9SVlad Yasevich struct inet6_cork *v6_cork) 1804bf138862SPavel Emelyanov { 1805366e41d9SVlad Yasevich if (v6_cork->opt) { 1806366e41d9SVlad Yasevich kfree(v6_cork->opt->dst0opt); 1807366e41d9SVlad Yasevich kfree(v6_cork->opt->dst1opt); 1808366e41d9SVlad Yasevich kfree(v6_cork->opt->hopopt); 1809366e41d9SVlad Yasevich kfree(v6_cork->opt->srcrt); 1810366e41d9SVlad Yasevich kfree(v6_cork->opt); 1811366e41d9SVlad Yasevich v6_cork->opt = NULL; 18120178b695SHerbert Xu } 18130178b695SHerbert Xu 1814366e41d9SVlad Yasevich if (cork->base.dst) { 1815366e41d9SVlad Yasevich dst_release(cork->base.dst); 1816366e41d9SVlad Yasevich cork->base.dst = NULL; 1817366e41d9SVlad Yasevich cork->base.flags &= ~IPCORK_ALLFRAG; 1818bf138862SPavel Emelyanov } 1819366e41d9SVlad Yasevich memset(&cork->fl, 0, sizeof(cork->fl)); 1820bf138862SPavel Emelyanov } 1821bf138862SPavel Emelyanov 18226422398cSVlad Yasevich struct sk_buff *__ip6_make_skb(struct sock *sk, 18236422398cSVlad Yasevich struct sk_buff_head *queue, 18246422398cSVlad Yasevich struct inet_cork_full *cork, 18256422398cSVlad Yasevich struct inet6_cork *v6_cork) 18261da177e4SLinus Torvalds { 18271da177e4SLinus Torvalds struct sk_buff *skb, *tmp_skb; 18281da177e4SLinus Torvalds struct sk_buff **tail_skb; 18291da177e4SLinus Torvalds struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; 18301da177e4SLinus Torvalds struct ipv6_pinfo *np = inet6_sk(sk); 18313bd653c8SDenis V. Lunev struct net *net = sock_net(sk); 18321da177e4SLinus Torvalds struct ipv6hdr *hdr; 18336422398cSVlad Yasevich struct ipv6_txoptions *opt = v6_cork->opt; 18346422398cSVlad Yasevich struct rt6_info *rt = (struct rt6_info *)cork->base.dst; 18356422398cSVlad Yasevich struct flowi6 *fl6 = &cork->fl.u.ip6; 18364c9483b2SDavid S. Miller unsigned char proto = fl6->flowi6_proto; 18371da177e4SLinus Torvalds 18386422398cSVlad Yasevich skb = __skb_dequeue(queue); 183963159f29SIan Morris if (!skb) 18401da177e4SLinus Torvalds goto out; 18411da177e4SLinus Torvalds tail_skb = &(skb_shinfo(skb)->frag_list); 18421da177e4SLinus Torvalds 18431da177e4SLinus Torvalds /* move skb->data to ip header from ext header */ 1844d56f90a7SArnaldo Carvalho de Melo if (skb->data < skb_network_header(skb)) 1845bbe735e4SArnaldo Carvalho de Melo __skb_pull(skb, skb_network_offset(skb)); 18466422398cSVlad Yasevich while ((tmp_skb = __skb_dequeue(queue)) != NULL) { 1847cfe1fc77SArnaldo Carvalho de Melo __skb_pull(tmp_skb, skb_network_header_len(skb)); 18481da177e4SLinus Torvalds *tail_skb = tmp_skb; 18491da177e4SLinus Torvalds tail_skb = &(tmp_skb->next); 18501da177e4SLinus Torvalds skb->len += tmp_skb->len; 18511da177e4SLinus Torvalds skb->data_len += tmp_skb->len; 18521da177e4SLinus Torvalds skb->truesize += tmp_skb->truesize; 18531da177e4SLinus Torvalds tmp_skb->destructor = NULL; 18541da177e4SLinus Torvalds tmp_skb->sk = NULL; 18551da177e4SLinus Torvalds } 18561da177e4SLinus Torvalds 185728a89453SHerbert Xu /* Allow local fragmentation. */ 185860ff7467SWANG Cong skb->ignore_df = ip6_sk_ignore_df(sk); 185928a89453SHerbert Xu 18604e3fd7a0SAlexey Dobriyan *final_dst = fl6->daddr; 1861cfe1fc77SArnaldo Carvalho de Melo __skb_pull(skb, skb_network_header_len(skb)); 18621da177e4SLinus Torvalds if (opt && opt->opt_flen) 18631da177e4SLinus Torvalds ipv6_push_frag_opts(skb, opt, &proto); 18641da177e4SLinus Torvalds if (opt && opt->opt_nflen) 1865613fa3caSDavid Lebrun ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr); 18661da177e4SLinus Torvalds 1867e2d1bca7SArnaldo Carvalho de Melo skb_push(skb, sizeof(struct ipv6hdr)); 1868e2d1bca7SArnaldo Carvalho de Melo skb_reset_network_header(skb); 18690660e03fSArnaldo Carvalho de Melo hdr = ipv6_hdr(skb); 18701da177e4SLinus Torvalds 18716422398cSVlad Yasevich ip6_flow_hdr(hdr, v6_cork->tclass, 1872cb1ce2efSTom Herbert ip6_make_flowlabel(net, skb, fl6->flowlabel, 1873513674b5SShaohua Li ip6_autoflowlabel(net, np), fl6)); 18746422398cSVlad Yasevich hdr->hop_limit = v6_cork->hop_limit; 18751da177e4SLinus Torvalds hdr->nexthdr = proto; 18764e3fd7a0SAlexey Dobriyan hdr->saddr = fl6->saddr; 18774e3fd7a0SAlexey Dobriyan hdr->daddr = *final_dst; 18781da177e4SLinus Torvalds 1879a2c2064fSPatrick McHardy skb->priority = sk->sk_priority; 1880c6af0c22SWillem de Bruijn skb->mark = cork->base.mark; 1881a2c2064fSPatrick McHardy 1882a818f75eSJesus Sanchez-Palencia skb->tstamp = cork->base.transmit_time; 1883a818f75eSJesus Sanchez-Palencia 1884d8d1f30bSChangli Gao skb_dst_set(skb, dst_clone(&rt->dst)); 1885edf391ffSNeil Horman IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 188614878f75SDavid L Stevens if (proto == IPPROTO_ICMPV6) { 1887adf30907SEric Dumazet struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 188814878f75SDavid L Stevens 188943a43b60SHannes Frederic Sowa ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); 189043a43b60SHannes Frederic Sowa ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 189114878f75SDavid L Stevens } 189214878f75SDavid L Stevens 18936422398cSVlad Yasevich ip6_cork_release(cork, v6_cork); 18946422398cSVlad Yasevich out: 18956422398cSVlad Yasevich return skb; 18966422398cSVlad Yasevich } 18976422398cSVlad Yasevich 18986422398cSVlad Yasevich int ip6_send_skb(struct sk_buff *skb) 18996422398cSVlad Yasevich { 19006422398cSVlad Yasevich struct net *net = sock_net(skb->sk); 19016422398cSVlad Yasevich struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 19026422398cSVlad Yasevich int err; 19036422398cSVlad Yasevich 190433224b16SEric W. Biederman err = ip6_local_out(net, skb->sk, skb); 19051da177e4SLinus Torvalds if (err) { 19061da177e4SLinus Torvalds if (err > 0) 19076ce9e7b5SEric Dumazet err = net_xmit_errno(err); 19081da177e4SLinus Torvalds if (err) 19096422398cSVlad Yasevich IP6_INC_STATS(net, rt->rt6i_idev, 19106422398cSVlad Yasevich IPSTATS_MIB_OUTDISCARDS); 19111da177e4SLinus Torvalds } 19121da177e4SLinus Torvalds 19131da177e4SLinus Torvalds return err; 19146422398cSVlad Yasevich } 19156422398cSVlad Yasevich 19166422398cSVlad Yasevich int ip6_push_pending_frames(struct sock *sk) 19176422398cSVlad Yasevich { 19186422398cSVlad Yasevich struct sk_buff *skb; 19196422398cSVlad Yasevich 19206422398cSVlad Yasevich skb = ip6_finish_skb(sk); 19216422398cSVlad Yasevich if (!skb) 19226422398cSVlad Yasevich return 0; 19236422398cSVlad Yasevich 19246422398cSVlad Yasevich return ip6_send_skb(skb); 19251da177e4SLinus Torvalds } 1926a495f836SChris Elston EXPORT_SYMBOL_GPL(ip6_push_pending_frames); 19271da177e4SLinus Torvalds 19280bbe84a6SVlad Yasevich static void __ip6_flush_pending_frames(struct sock *sk, 19296422398cSVlad Yasevich struct sk_buff_head *queue, 19306422398cSVlad Yasevich struct inet_cork_full *cork, 19316422398cSVlad Yasevich struct inet6_cork *v6_cork) 19321da177e4SLinus Torvalds { 19331da177e4SLinus Torvalds struct sk_buff *skb; 19341da177e4SLinus Torvalds 19350bbe84a6SVlad Yasevich while ((skb = __skb_dequeue_tail(queue)) != NULL) { 1936adf30907SEric Dumazet if (skb_dst(skb)) 1937adf30907SEric Dumazet IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), 1938a11d206dSYOSHIFUJI Hideaki IPSTATS_MIB_OUTDISCARDS); 19391da177e4SLinus Torvalds kfree_skb(skb); 19401da177e4SLinus Torvalds } 19411da177e4SLinus Torvalds 19426422398cSVlad Yasevich ip6_cork_release(cork, v6_cork); 19431da177e4SLinus Torvalds } 19440bbe84a6SVlad Yasevich 19450bbe84a6SVlad Yasevich void ip6_flush_pending_frames(struct sock *sk) 19460bbe84a6SVlad Yasevich { 19476422398cSVlad Yasevich __ip6_flush_pending_frames(sk, &sk->sk_write_queue, 19486422398cSVlad Yasevich &inet_sk(sk)->cork, &inet6_sk(sk)->cork); 19490bbe84a6SVlad Yasevich } 1950a495f836SChris Elston EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); 19516422398cSVlad Yasevich 19526422398cSVlad Yasevich struct sk_buff *ip6_make_skb(struct sock *sk, 19536422398cSVlad Yasevich int getfrag(void *from, char *to, int offset, 19546422398cSVlad Yasevich int len, int odd, struct sk_buff *skb), 19556422398cSVlad Yasevich void *from, int length, int transhdrlen, 195626879da5SWei Wang struct ipcm6_cookie *ipc6, struct flowi6 *fl6, 19576422398cSVlad Yasevich struct rt6_info *rt, unsigned int flags, 19585fdaa88dSWillem de Bruijn struct inet_cork_full *cork) 19596422398cSVlad Yasevich { 19606422398cSVlad Yasevich struct inet6_cork v6_cork; 19616422398cSVlad Yasevich struct sk_buff_head queue; 196226879da5SWei Wang int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); 19636422398cSVlad Yasevich int err; 19646422398cSVlad Yasevich 19656422398cSVlad Yasevich if (flags & MSG_PROBE) 19666422398cSVlad Yasevich return NULL; 19676422398cSVlad Yasevich 19686422398cSVlad Yasevich __skb_queue_head_init(&queue); 19696422398cSVlad Yasevich 19701cd7884dSWillem de Bruijn cork->base.flags = 0; 19711cd7884dSWillem de Bruijn cork->base.addr = 0; 19721cd7884dSWillem de Bruijn cork->base.opt = NULL; 19731cd7884dSWillem de Bruijn cork->base.dst = NULL; 19746422398cSVlad Yasevich v6_cork.opt = NULL; 19755fdaa88dSWillem de Bruijn err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6); 1976862c03eeSEric Dumazet if (err) { 19771cd7884dSWillem de Bruijn ip6_cork_release(cork, &v6_cork); 19786422398cSVlad Yasevich return ERR_PTR(err); 1979862c03eeSEric Dumazet } 198026879da5SWei Wang if (ipc6->dontfrag < 0) 198126879da5SWei Wang ipc6->dontfrag = inet6_sk(sk)->dontfrag; 19826422398cSVlad Yasevich 19831cd7884dSWillem de Bruijn err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork, 19846422398cSVlad Yasevich ¤t->task_frag, getfrag, from, 19856422398cSVlad Yasevich length + exthdrlen, transhdrlen + exthdrlen, 19865fdaa88dSWillem de Bruijn flags, ipc6); 19876422398cSVlad Yasevich if (err) { 19881cd7884dSWillem de Bruijn __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork); 19896422398cSVlad Yasevich return ERR_PTR(err); 19906422398cSVlad Yasevich } 19916422398cSVlad Yasevich 19921cd7884dSWillem de Bruijn return __ip6_make_skb(sk, &queue, cork, &v6_cork); 19936422398cSVlad Yasevich } 1994