xref: /openbmc/linux/net/ipv6/ip6_output.c (revision e7f3e5fb20c5b48dcab4011eab778d3353d0beb0)
12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  *	IPv6 output functions
41da177e4SLinus Torvalds  *	Linux INET6 implementation
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *	Authors:
71da177e4SLinus Torvalds  *	Pedro Roque		<roque@di.fc.ul.pt>
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  *	Based on linux/net/ipv4/ip_output.c
101da177e4SLinus Torvalds  *
111da177e4SLinus Torvalds  *	Changes:
121da177e4SLinus Torvalds  *	A.N.Kuznetsov	:	airthmetics in fragmentation.
131da177e4SLinus Torvalds  *				extension headers are implemented.
141da177e4SLinus Torvalds  *				route changes now work.
151da177e4SLinus Torvalds  *				ip6_forward does not confuse sniffers.
161da177e4SLinus Torvalds  *				etc.
171da177e4SLinus Torvalds  *
181da177e4SLinus Torvalds  *      H. von Brand    :       Added missing #include <linux/string.h>
191da177e4SLinus Torvalds  *	Imran Patel	:	frag id should be in NBO
201da177e4SLinus Torvalds  *      Kazunori MIYAZAWA @USAGI
211da177e4SLinus Torvalds  *			:       add ip6_append_data and related functions
221da177e4SLinus Torvalds  *				for datagram xmit
231da177e4SLinus Torvalds  */
241da177e4SLinus Torvalds 
251da177e4SLinus Torvalds #include <linux/errno.h>
26ef76bc23SHerbert Xu #include <linux/kernel.h>
271da177e4SLinus Torvalds #include <linux/string.h>
281da177e4SLinus Torvalds #include <linux/socket.h>
291da177e4SLinus Torvalds #include <linux/net.h>
301da177e4SLinus Torvalds #include <linux/netdevice.h>
311da177e4SLinus Torvalds #include <linux/if_arp.h>
321da177e4SLinus Torvalds #include <linux/in6.h>
331da177e4SLinus Torvalds #include <linux/tcp.h>
341da177e4SLinus Torvalds #include <linux/route.h>
35b59f45d0SHerbert Xu #include <linux/module.h>
365a0e3ad6STejun Heo #include <linux/slab.h>
371da177e4SLinus Torvalds 
3833b48679SDaniel Mack #include <linux/bpf-cgroup.h>
391da177e4SLinus Torvalds #include <linux/netfilter.h>
401da177e4SLinus Torvalds #include <linux/netfilter_ipv6.h>
411da177e4SLinus Torvalds 
421da177e4SLinus Torvalds #include <net/sock.h>
431da177e4SLinus Torvalds #include <net/snmp.h>
441da177e4SLinus Torvalds 
45d457a0e3SEric Dumazet #include <net/gso.h>
461da177e4SLinus Torvalds #include <net/ipv6.h>
471da177e4SLinus Torvalds #include <net/ndisc.h>
481da177e4SLinus Torvalds #include <net/protocol.h>
491da177e4SLinus Torvalds #include <net/ip6_route.h>
501da177e4SLinus Torvalds #include <net/addrconf.h>
511da177e4SLinus Torvalds #include <net/rawv6.h>
521da177e4SLinus Torvalds #include <net/icmp.h>
531da177e4SLinus Torvalds #include <net/xfrm.h>
541da177e4SLinus Torvalds #include <net/checksum.h>
557bc570c8SYOSHIFUJI Hideaki #include <linux/mroute6.h>
56ca254490SDavid Ahern #include <net/l3mdev.h>
5714972cbdSRoopa Prabhu #include <net/lwtunnel.h>
58571912c6SMartin Varghese #include <net/ip_tunnels.h>
591da177e4SLinus Torvalds 
607d8c6e39SEric W. Biederman static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
611da177e4SLinus Torvalds {
62adf30907SEric Dumazet 	struct dst_entry *dst = skb_dst(skb);
631da177e4SLinus Torvalds 	struct net_device *dev = dst->dev;
64e415ed3aSVasily Averin 	struct inet6_dev *idev = ip6_dst_idev(dst);
655796015fSVasily Averin 	unsigned int hh_len = LL_RESERVED_SPACE(dev);
66e415ed3aSVasily Averin 	const struct in6_addr *daddr, *nexthop;
67e415ed3aSVasily Averin 	struct ipv6hdr *hdr;
68f6b72b62SDavid S. Miller 	struct neighbour *neigh;
696fd6ce20SYOSHIFUJI Hideaki / 吉藤英明 	int ret;
701da177e4SLinus Torvalds 
715796015fSVasily Averin 	/* Be paranoid, rather than too clever. */
72e415ed3aSVasily Averin 	if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
73e415ed3aSVasily Averin 		skb = skb_expand_head(skb, hh_len);
745796015fSVasily Averin 		if (!skb) {
75e415ed3aSVasily Averin 			IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
765796015fSVasily Averin 			return -ENOMEM;
775796015fSVasily Averin 		}
785796015fSVasily Averin 	}
795796015fSVasily Averin 
80e415ed3aSVasily Averin 	hdr = ipv6_hdr(skb);
81e415ed3aSVasily Averin 	daddr = &hdr->daddr;
82e415ed3aSVasily Averin 	if (ipv6_addr_is_multicast(daddr)) {
837026b1ddSDavid Miller 		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
848571ab47SYuval Mintz 		    ((mroute6_is_socket(net, skb) &&
85bd91b8bfSBenjamin Thery 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
86e415ed3aSVasily Averin 		     ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) {
871da177e4SLinus Torvalds 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
881da177e4SLinus Torvalds 
891da177e4SLinus Torvalds 			/* Do not check for IFF_ALLMULTI; multicast routing
901da177e4SLinus Torvalds 			   is not supported in any case.
911da177e4SLinus Torvalds 			 */
921da177e4SLinus Torvalds 			if (newskb)
93b2e0b385SJan Engelhardt 				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
9429a26a56SEric W. Biederman 					net, sk, newskb, NULL, newskb->dev,
9595603e22SMichel Machado 					dev_loopback_xmit);
961da177e4SLinus Torvalds 
97e415ed3aSVasily Averin 			if (hdr->hop_limit == 0) {
9878126c41SEric W. Biederman 				IP6_INC_STATS(net, idev,
993bd653c8SDenis V. Lunev 					      IPSTATS_MIB_OUTDISCARDS);
1001da177e4SLinus Torvalds 				kfree_skb(skb);
1011da177e4SLinus Torvalds 				return 0;
1021da177e4SLinus Torvalds 			}
1031da177e4SLinus Torvalds 		}
1041da177e4SLinus Torvalds 
10578126c41SEric W. Biederman 		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
106e415ed3aSVasily Averin 		if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL &&
107dd408515SHannes Frederic Sowa 		    !(dev->flags & IFF_LOOPBACK)) {
108dd408515SHannes Frederic Sowa 			kfree_skb(skb);
109dd408515SHannes Frederic Sowa 			return 0;
110dd408515SHannes Frederic Sowa 		}
1111da177e4SLinus Torvalds 	}
1121da177e4SLinus Torvalds 
11314972cbdSRoopa Prabhu 	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
11414972cbdSRoopa Prabhu 		int res = lwtunnel_xmit(skb);
11514972cbdSRoopa Prabhu 
116a171fbecSYan Zhai 		if (res != LWTUNNEL_XMIT_CONTINUE)
11714972cbdSRoopa Prabhu 			return res;
11814972cbdSRoopa Prabhu 	}
11914972cbdSRoopa Prabhu 
12056712f74SHeng Guo 	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
12156712f74SHeng Guo 
12209eed119SEric Dumazet 	rcu_read_lock();
123e415ed3aSVasily Averin 	nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
124e415ed3aSVasily Averin 	neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
12558f71be5SPavel Begunkov 
12658f71be5SPavel Begunkov 	if (unlikely(IS_ERR_OR_NULL(neigh))) {
1276fd6ce20SYOSHIFUJI Hideaki / 吉藤英明 		if (unlikely(!neigh))
128e415ed3aSVasily Averin 			neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
12958f71be5SPavel Begunkov 		if (IS_ERR(neigh)) {
13009eed119SEric Dumazet 			rcu_read_unlock();
13158f71be5SPavel Begunkov 			IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
13258f71be5SPavel Begunkov 			kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL);
13358f71be5SPavel Begunkov 			return -EINVAL;
13458f71be5SPavel Begunkov 		}
13558f71be5SPavel Begunkov 	}
1364ff06203SJulian Anastasov 	sock_confirm_neigh(skb, neigh);
1370353f282SDavid Ahern 	ret = neigh_output(neigh, skb, false);
13809eed119SEric Dumazet 	rcu_read_unlock();
1396fd6ce20SYOSHIFUJI Hideaki / 吉藤英明 	return ret;
1406fd6ce20SYOSHIFUJI Hideaki / 吉藤英明 }
1411da177e4SLinus Torvalds 
142b210de4fSAya Levin static int
143b210de4fSAya Levin ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
144b210de4fSAya Levin 				    struct sk_buff *skb, unsigned int mtu)
145b210de4fSAya Levin {
146b210de4fSAya Levin 	struct sk_buff *segs, *nskb;
147b210de4fSAya Levin 	netdev_features_t features;
148b210de4fSAya Levin 	int ret = 0;
149b210de4fSAya Levin 
150b210de4fSAya Levin 	/* Please see corresponding comment in ip_finish_output_gso
151b210de4fSAya Levin 	 * describing the cases where GSO segment length exceeds the
152b210de4fSAya Levin 	 * egress MTU.
153b210de4fSAya Levin 	 */
154b210de4fSAya Levin 	features = netif_skb_features(skb);
155b210de4fSAya Levin 	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
156b210de4fSAya Levin 	if (IS_ERR_OR_NULL(segs)) {
157b210de4fSAya Levin 		kfree_skb(skb);
158b210de4fSAya Levin 		return -ENOMEM;
159b210de4fSAya Levin 	}
160b210de4fSAya Levin 
161b210de4fSAya Levin 	consume_skb(skb);
162b210de4fSAya Levin 
163b210de4fSAya Levin 	skb_list_walk_safe(segs, segs, nskb) {
164b210de4fSAya Levin 		int err;
165b210de4fSAya Levin 
166b210de4fSAya Levin 		skb_mark_not_on_list(segs);
167ee1c730fSYan Zhai 		/* Last GSO segment can be smaller than gso_size (and MTU).
168ee1c730fSYan Zhai 		 * Adding a fragment header would produce an "atomic fragment",
169ee1c730fSYan Zhai 		 * which is considered harmful (RFC-8021). Avoid that.
170ee1c730fSYan Zhai 		 */
171ee1c730fSYan Zhai 		err = segs->len > mtu ?
172ee1c730fSYan Zhai 			ip6_fragment(net, sk, segs, ip6_finish_output2) :
173ee1c730fSYan Zhai 			ip6_finish_output2(net, sk, segs);
174b210de4fSAya Levin 		if (err && ret == 0)
175b210de4fSAya Levin 			ret = err;
176b210de4fSAya Levin 	}
177b210de4fSAya Levin 
178b210de4fSAya Levin 	return ret;
179b210de4fSAya Levin }
180b210de4fSAya Levin 
181956fe219Sbrakmo static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1829e508490SJan Engelhardt {
183b210de4fSAya Levin 	unsigned int mtu;
184b210de4fSAya Levin 
18509ee9dbaSTobias Brunner #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
18609ee9dbaSTobias Brunner 	/* Policy lookup after SNAT yielded a new policy */
18709ee9dbaSTobias Brunner 	if (skb_dst(skb)->xfrm) {
18819d36c5fSEric Dumazet 		IP6CB(skb)->flags |= IP6SKB_REROUTED;
18909ee9dbaSTobias Brunner 		return dst_output(net, sk, skb);
19009ee9dbaSTobias Brunner 	}
19109ee9dbaSTobias Brunner #endif
19209ee9dbaSTobias Brunner 
193b210de4fSAya Levin 	mtu = ip6_skb_dst_mtu(skb);
19480e425b6SCoco Li 	if (skb_is_gso(skb) &&
19580e425b6SCoco Li 	    !(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) &&
19680e425b6SCoco Li 	    !skb_gso_validate_network_len(skb, mtu))
197b210de4fSAya Levin 		return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
198b210de4fSAya Levin 
199b210de4fSAya Levin 	if ((skb->len > mtu && !skb_is_gso(skb)) ||
2009037c357SJiri Pirko 	    dst_allfrag(skb_dst(skb)) ||
2019037c357SJiri Pirko 	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
2027d8c6e39SEric W. Biederman 		return ip6_fragment(net, sk, skb, ip6_finish_output2);
2039e508490SJan Engelhardt 	else
2047d8c6e39SEric W. Biederman 		return ip6_finish_output2(net, sk, skb);
2059e508490SJan Engelhardt }
2069e508490SJan Engelhardt 
207956fe219Sbrakmo static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
208956fe219Sbrakmo {
209956fe219Sbrakmo 	int ret;
210956fe219Sbrakmo 
211956fe219Sbrakmo 	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
212956fe219Sbrakmo 	switch (ret) {
213956fe219Sbrakmo 	case NET_XMIT_SUCCESS:
214956fe219Sbrakmo 	case NET_XMIT_CN:
215956fe219Sbrakmo 		return __ip6_finish_output(net, sk, skb) ? : ret;
216956fe219Sbrakmo 	default:
2175e187189SMenglong Dong 		kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS);
218956fe219Sbrakmo 		return ret;
219956fe219Sbrakmo 	}
220956fe219Sbrakmo }
221956fe219Sbrakmo 
222ede2059dSEric W. Biederman int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
2231da177e4SLinus Torvalds {
22428f8bfd1SPhil Sutter 	struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
225adf30907SEric Dumazet 	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
226be10de0aSEric W. Biederman 
22797a7a37aSChenbo Feng 	skb->protocol = htons(ETH_P_IPV6);
22897a7a37aSChenbo Feng 	skb->dev = dev;
22997a7a37aSChenbo Feng 
230e31b25ccSEric Dumazet 	if (unlikely(!idev || READ_ONCE(idev->cnf.disable_ipv6))) {
23119a0644cSEric W. Biederman 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
2325e187189SMenglong Dong 		kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED);
233778d80beSYOSHIFUJI Hideaki 		return 0;
234778d80beSYOSHIFUJI Hideaki 	}
235778d80beSYOSHIFUJI Hideaki 
23629a26a56SEric W. Biederman 	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
23728f8bfd1SPhil Sutter 			    net, sk, skb, indev, dev,
2389c6eb28aSJan Engelhardt 			    ip6_finish_output,
2399c6eb28aSJan Engelhardt 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
2401da177e4SLinus Torvalds }
2416585d7dcSBrian Vazquez EXPORT_SYMBOL(ip6_output);
2421da177e4SLinus Torvalds 
243e9191ffbSBen Hutchings bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
244513674b5SShaohua Li {
245513674b5SShaohua Li 	if (!np->autoflowlabel_set)
246513674b5SShaohua Li 		return ip6_default_np_autolabel(net);
247513674b5SShaohua Li 	else
248513674b5SShaohua Li 		return np->autoflowlabel;
249513674b5SShaohua Li }
250513674b5SShaohua Li 
2511da177e4SLinus Torvalds /*
252b5d43998SShan Wei  * xmit an sk_buff (used by TCP, SCTP and DCCP)
2531c1e9d2bSEric Dumazet  * Note : socket lock is not held for SYNACK packets, but might be modified
2541c1e9d2bSEric Dumazet  * by calls to skb_set_owner_w() and ipv6_local_error(),
2551c1e9d2bSEric Dumazet  * which are using proper atomic operations or spinlocks.
2561da177e4SLinus Torvalds  */
2571c1e9d2bSEric Dumazet int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
2584f6570d7SEric Dumazet 	     __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
2591da177e4SLinus Torvalds {
2603bd653c8SDenis V. Lunev 	struct net *net = sock_net(sk);
2611c1e9d2bSEric Dumazet 	const struct ipv6_pinfo *np = inet6_sk(sk);
2624c9483b2SDavid S. Miller 	struct in6_addr *first_hop = &fl6->daddr;
263adf30907SEric Dumazet 	struct dst_entry *dst = skb_dst(skb);
2640c9f227bSVasily Averin 	struct net_device *dev = dst->dev;
2650c9f227bSVasily Averin 	struct inet6_dev *idev = ip6_dst_idev(dst);
26680e425b6SCoco Li 	struct hop_jumbo_hdr *hop_jumbo;
26780e425b6SCoco Li 	int hoplen = sizeof(*hop_jumbo);
26866033f47SStefano Brivio 	unsigned int head_room;
2691da177e4SLinus Torvalds 	struct ipv6hdr *hdr;
2704c9483b2SDavid S. Miller 	u8  proto = fl6->flowi6_proto;
2711da177e4SLinus Torvalds 	int seg_len = skb->len;
272e651f03aSGerrit Renker 	int hlimit = -1;
2731da177e4SLinus Torvalds 	u32 mtu;
2741da177e4SLinus Torvalds 
27580e425b6SCoco Li 	head_room = sizeof(struct ipv6hdr) + hoplen + LL_RESERVED_SPACE(dev);
27666033f47SStefano Brivio 	if (opt)
27766033f47SStefano Brivio 		head_room += opt->opt_nflen + opt->opt_flen;
2781da177e4SLinus Torvalds 
2790c9f227bSVasily Averin 	if (unlikely(head_room > skb_headroom(skb))) {
2800c9f227bSVasily Averin 		skb = skb_expand_head(skb, head_room);
2810c9f227bSVasily Averin 		if (!skb) {
2820c9f227bSVasily Averin 			IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
2831da177e4SLinus Torvalds 			return -ENOBUFS;
2841da177e4SLinus Torvalds 		}
2851da177e4SLinus Torvalds 	}
28666033f47SStefano Brivio 
28766033f47SStefano Brivio 	if (opt) {
28866033f47SStefano Brivio 		seg_len += opt->opt_nflen + opt->opt_flen;
28966033f47SStefano Brivio 
2901da177e4SLinus Torvalds 		if (opt->opt_flen)
2911da177e4SLinus Torvalds 			ipv6_push_frag_opts(skb, opt, &proto);
29266033f47SStefano Brivio 
2931da177e4SLinus Torvalds 		if (opt->opt_nflen)
294613fa3caSDavid Lebrun 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
295613fa3caSDavid Lebrun 					     &fl6->saddr);
2961da177e4SLinus Torvalds 	}
2971da177e4SLinus Torvalds 
29880e425b6SCoco Li 	if (unlikely(seg_len > IPV6_MAXPLEN)) {
29980e425b6SCoco Li 		hop_jumbo = skb_push(skb, hoplen);
30080e425b6SCoco Li 
30180e425b6SCoco Li 		hop_jumbo->nexthdr = proto;
30280e425b6SCoco Li 		hop_jumbo->hdrlen = 0;
30380e425b6SCoco Li 		hop_jumbo->tlv_type = IPV6_TLV_JUMBO;
30480e425b6SCoco Li 		hop_jumbo->tlv_len = 4;
30580e425b6SCoco Li 		hop_jumbo->jumbo_payload_len = htonl(seg_len + hoplen);
30680e425b6SCoco Li 
30780e425b6SCoco Li 		proto = IPPROTO_HOPOPTS;
30880e425b6SCoco Li 		seg_len = 0;
30980e425b6SCoco Li 		IP6CB(skb)->flags |= IP6SKB_FAKEJUMBO;
31080e425b6SCoco Li 	}
31180e425b6SCoco Li 
312e2d1bca7SArnaldo Carvalho de Melo 	skb_push(skb, sizeof(struct ipv6hdr));
313e2d1bca7SArnaldo Carvalho de Melo 	skb_reset_network_header(skb);
3140660e03fSArnaldo Carvalho de Melo 	hdr = ipv6_hdr(skb);
3151da177e4SLinus Torvalds 
3161da177e4SLinus Torvalds 	/*
3171da177e4SLinus Torvalds 	 *	Fill in the IPv6 header
3181da177e4SLinus Torvalds 	 */
319b903d324SEric Dumazet 	if (np)
3201da177e4SLinus Torvalds 		hlimit = np->hop_limit;
3211da177e4SLinus Torvalds 	if (hlimit < 0)
3226b75d090SYOSHIFUJI Hideaki 		hlimit = ip6_dst_hoplimit(dst);
3231da177e4SLinus Torvalds 
324cb1ce2efSTom Herbert 	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
325513674b5SShaohua Li 				ip6_autoflowlabel(net, np), fl6));
32641a1f8eaSYOSHIFUJI Hideaki 
3271da177e4SLinus Torvalds 	hdr->payload_len = htons(seg_len);
3281da177e4SLinus Torvalds 	hdr->nexthdr = proto;
3291da177e4SLinus Torvalds 	hdr->hop_limit = hlimit;
3301da177e4SLinus Torvalds 
3314e3fd7a0SAlexey Dobriyan 	hdr->saddr = fl6->saddr;
3324e3fd7a0SAlexey Dobriyan 	hdr->daddr = *first_hop;
3331da177e4SLinus Torvalds 
3349c9c9ad5SHannes Frederic Sowa 	skb->protocol = htons(ETH_P_IPV6);
3354f6570d7SEric Dumazet 	skb->priority = priority;
33692e55f41SPablo Neira 	skb->mark = mark;
337a2c2064fSPatrick McHardy 
3381da177e4SLinus Torvalds 	mtu = dst_mtu(dst);
33960ff7467SWANG Cong 	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
34056712f74SHeng Guo 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
341a8e3e1a9SDavid Ahern 
342a8e3e1a9SDavid Ahern 		/* if egress device is enslaved to an L3 master device pass the
343a8e3e1a9SDavid Ahern 		 * skb to its handler for processing
344a8e3e1a9SDavid Ahern 		 */
345a8e3e1a9SDavid Ahern 		skb = l3mdev_ip6_out((struct sock *)sk, skb);
346a8e3e1a9SDavid Ahern 		if (unlikely(!skb))
347a8e3e1a9SDavid Ahern 			return 0;
348a8e3e1a9SDavid Ahern 
3491c1e9d2bSEric Dumazet 		/* hooks should never assume socket lock is held.
3501c1e9d2bSEric Dumazet 		 * we promote our socket to non const
3511c1e9d2bSEric Dumazet 		 */
35229a26a56SEric W. Biederman 		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
3530c9f227bSVasily Averin 			       net, (struct sock *)sk, skb, NULL, dev,
35413206b6bSEric W. Biederman 			       dst_output);
3551da177e4SLinus Torvalds 	}
3561da177e4SLinus Torvalds 
3570c9f227bSVasily Averin 	skb->dev = dev;
3581c1e9d2bSEric Dumazet 	/* ipv6_local_error() does not require socket lock,
3591c1e9d2bSEric Dumazet 	 * we promote our socket to non const
3601c1e9d2bSEric Dumazet 	 */
3611c1e9d2bSEric Dumazet 	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
3621c1e9d2bSEric Dumazet 
3630c9f227bSVasily Averin 	IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
3641da177e4SLinus Torvalds 	kfree_skb(skb);
3651da177e4SLinus Torvalds 	return -EMSGSIZE;
3661da177e4SLinus Torvalds }
3677159039aSYOSHIFUJI Hideaki EXPORT_SYMBOL(ip6_xmit);
3687159039aSYOSHIFUJI Hideaki 
3691da177e4SLinus Torvalds static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
3701da177e4SLinus Torvalds {
3711da177e4SLinus Torvalds 	struct ip6_ra_chain *ra;
3721da177e4SLinus Torvalds 	struct sock *last = NULL;
3731da177e4SLinus Torvalds 
3741da177e4SLinus Torvalds 	read_lock(&ip6_ra_lock);
3751da177e4SLinus Torvalds 	for (ra = ip6_ra_chain; ra; ra = ra->next) {
3761da177e4SLinus Torvalds 		struct sock *sk = ra->sk;
3770bd1b59bSAndrew McDonald 		if (sk && ra->sel == sel &&
3780bd1b59bSAndrew McDonald 		    (!sk->sk_bound_dev_if ||
3790bd1b59bSAndrew McDonald 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
3809036b2feSFrancesco Ruggeri 			struct ipv6_pinfo *np = inet6_sk(sk);
3819036b2feSFrancesco Ruggeri 
3829036b2feSFrancesco Ruggeri 			if (np && np->rtalert_isolate &&
3839036b2feSFrancesco Ruggeri 			    !net_eq(sock_net(sk), dev_net(skb->dev))) {
3849036b2feSFrancesco Ruggeri 				continue;
3859036b2feSFrancesco Ruggeri 			}
3861da177e4SLinus Torvalds 			if (last) {
3871da177e4SLinus Torvalds 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
3881da177e4SLinus Torvalds 				if (skb2)
3891da177e4SLinus Torvalds 					rawv6_rcv(last, skb2);
3901da177e4SLinus Torvalds 			}
3911da177e4SLinus Torvalds 			last = sk;
3921da177e4SLinus Torvalds 		}
3931da177e4SLinus Torvalds 	}
3941da177e4SLinus Torvalds 
3951da177e4SLinus Torvalds 	if (last) {
3961da177e4SLinus Torvalds 		rawv6_rcv(last, skb);
3971da177e4SLinus Torvalds 		read_unlock(&ip6_ra_lock);
3981da177e4SLinus Torvalds 		return 1;
3991da177e4SLinus Torvalds 	}
4001da177e4SLinus Torvalds 	read_unlock(&ip6_ra_lock);
4011da177e4SLinus Torvalds 	return 0;
4021da177e4SLinus Torvalds }
4031da177e4SLinus Torvalds 
404e21e0b5fSVille Nuorvala static int ip6_forward_proxy_check(struct sk_buff *skb)
405e21e0b5fSVille Nuorvala {
4060660e03fSArnaldo Carvalho de Melo 	struct ipv6hdr *hdr = ipv6_hdr(skb);
407e21e0b5fSVille Nuorvala 	u8 nexthdr = hdr->nexthdr;
40875f2811cSJesse Gross 	__be16 frag_off;
409e21e0b5fSVille Nuorvala 	int offset;
410e21e0b5fSVille Nuorvala 
411e21e0b5fSVille Nuorvala 	if (ipv6_ext_hdr(nexthdr)) {
41275f2811cSJesse Gross 		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
413e21e0b5fSVille Nuorvala 		if (offset < 0)
414e21e0b5fSVille Nuorvala 			return 0;
415e21e0b5fSVille Nuorvala 	} else
416e21e0b5fSVille Nuorvala 		offset = sizeof(struct ipv6hdr);
417e21e0b5fSVille Nuorvala 
418e21e0b5fSVille Nuorvala 	if (nexthdr == IPPROTO_ICMPV6) {
419e21e0b5fSVille Nuorvala 		struct icmp6hdr *icmp6;
420e21e0b5fSVille Nuorvala 
421d56f90a7SArnaldo Carvalho de Melo 		if (!pskb_may_pull(skb, (skb_network_header(skb) +
422d56f90a7SArnaldo Carvalho de Melo 					 offset + 1 - skb->data)))
423e21e0b5fSVille Nuorvala 			return 0;
424e21e0b5fSVille Nuorvala 
425d56f90a7SArnaldo Carvalho de Melo 		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
426e21e0b5fSVille Nuorvala 
427e21e0b5fSVille Nuorvala 		switch (icmp6->icmp6_type) {
428e21e0b5fSVille Nuorvala 		case NDISC_ROUTER_SOLICITATION:
429e21e0b5fSVille Nuorvala 		case NDISC_ROUTER_ADVERTISEMENT:
430e21e0b5fSVille Nuorvala 		case NDISC_NEIGHBOUR_SOLICITATION:
431e21e0b5fSVille Nuorvala 		case NDISC_NEIGHBOUR_ADVERTISEMENT:
432e21e0b5fSVille Nuorvala 		case NDISC_REDIRECT:
433e21e0b5fSVille Nuorvala 			/* For reaction involving unicast neighbor discovery
434e21e0b5fSVille Nuorvala 			 * message destined to the proxied address, pass it to
435e21e0b5fSVille Nuorvala 			 * input function.
436e21e0b5fSVille Nuorvala 			 */
437e21e0b5fSVille Nuorvala 			return 1;
438e21e0b5fSVille Nuorvala 		default:
439e21e0b5fSVille Nuorvala 			break;
440e21e0b5fSVille Nuorvala 		}
441e21e0b5fSVille Nuorvala 	}
442e21e0b5fSVille Nuorvala 
44374553b09SVille Nuorvala 	/*
44474553b09SVille Nuorvala 	 * The proxying router can't forward traffic sent to a link-local
44574553b09SVille Nuorvala 	 * address, so signal the sender and discard the packet. This
44674553b09SVille Nuorvala 	 * behavior is clarified by the MIPv6 specification.
44774553b09SVille Nuorvala 	 */
44874553b09SVille Nuorvala 	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
44974553b09SVille Nuorvala 		dst_link_failure(skb);
45074553b09SVille Nuorvala 		return -1;
45174553b09SVille Nuorvala 	}
45274553b09SVille Nuorvala 
453e21e0b5fSVille Nuorvala 	return 0;
454e21e0b5fSVille Nuorvala }
455e21e0b5fSVille Nuorvala 
4560c4b51f0SEric W. Biederman static inline int ip6_forward_finish(struct net *net, struct sock *sk,
4570c4b51f0SEric W. Biederman 				     struct sk_buff *skb)
4581da177e4SLinus Torvalds {
45971a1c915SJeff Barnhill 	struct dst_entry *dst = skb_dst(skb);
46071a1c915SJeff Barnhill 
46171a1c915SJeff Barnhill 	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
46271a1c915SJeff Barnhill 
463f839a6c9SIdo Schimmel #ifdef CONFIG_NET_SWITCHDEV
464f839a6c9SIdo Schimmel 	if (skb->offload_l3_fwd_mark) {
465f839a6c9SIdo Schimmel 		consume_skb(skb);
466f839a6c9SIdo Schimmel 		return 0;
467f839a6c9SIdo Schimmel 	}
468f839a6c9SIdo Schimmel #endif
469f839a6c9SIdo Schimmel 
470de799101SMartin KaFai Lau 	skb_clear_tstamp(skb);
47113206b6bSEric W. Biederman 	return dst_output(net, sk, skb);
4721da177e4SLinus Torvalds }
4731da177e4SLinus Torvalds 
474fe6cc55fSFlorian Westphal static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
475fe6cc55fSFlorian Westphal {
476418a3156SFlorian Westphal 	if (skb->len <= mtu)
477fe6cc55fSFlorian Westphal 		return false;
478fe6cc55fSFlorian Westphal 
47960ff7467SWANG Cong 	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
480fe6cc55fSFlorian Westphal 	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
481fe6cc55fSFlorian Westphal 		return true;
482fe6cc55fSFlorian Westphal 
48360ff7467SWANG Cong 	if (skb->ignore_df)
484418a3156SFlorian Westphal 		return false;
485418a3156SFlorian Westphal 
486779b7931SDaniel Axtens 	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
487fe6cc55fSFlorian Westphal 		return false;
488fe6cc55fSFlorian Westphal 
489fe6cc55fSFlorian Westphal 	return true;
490fe6cc55fSFlorian Westphal }
491fe6cc55fSFlorian Westphal 
4921da177e4SLinus Torvalds int ip6_forward(struct sk_buff *skb)
4931da177e4SLinus Torvalds {
494adf30907SEric Dumazet 	struct dst_entry *dst = skb_dst(skb);
4950660e03fSArnaldo Carvalho de Melo 	struct ipv6hdr *hdr = ipv6_hdr(skb);
4961da177e4SLinus Torvalds 	struct inet6_skb_parm *opt = IP6CB(skb);
497c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dst->dev);
4980857d6f8SStephen Suryaputra 	struct inet6_dev *idev;
4992edc1a38SMenglong Dong 	SKB_DR(reason);
50014f3ad6fSUlrich Weber 	u32 mtu;
5011da177e4SLinus Torvalds 
5020857d6f8SStephen Suryaputra 	idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
50353b7997fSYOSHIFUJI Hideaki 	if (net->ipv6.devconf_all->forwarding == 0)
5041da177e4SLinus Torvalds 		goto error;
5051da177e4SLinus Torvalds 
506090f1166SLi RongQing 	if (skb->pkt_type != PACKET_HOST)
507090f1166SLi RongQing 		goto drop;
508090f1166SLi RongQing 
5099ef2e965SHannes Frederic Sowa 	if (unlikely(skb->sk))
5109ef2e965SHannes Frederic Sowa 		goto drop;
5119ef2e965SHannes Frederic Sowa 
5124497b076SBen Hutchings 	if (skb_warn_if_lro(skb))
5134497b076SBen Hutchings 		goto drop;
5144497b076SBen Hutchings 
515ccd27f05SNicolas Dichtel 	if (!net->ipv6.devconf_all->disable_policy &&
516e3fa461dSNicolas Dichtel 	    (!idev || !idev->cnf.disable_policy) &&
517ccd27f05SNicolas Dichtel 	    !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
518bdb7cc64SStephen Suryaputra 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
5191da177e4SLinus Torvalds 		goto drop;
5201da177e4SLinus Torvalds 	}
5211da177e4SLinus Torvalds 
52235fc92a9SHerbert Xu 	skb_forward_csum(skb);
5231da177e4SLinus Torvalds 
5241da177e4SLinus Torvalds 	/*
5251da177e4SLinus Torvalds 	 *	We DO NOT make any processing on
5261da177e4SLinus Torvalds 	 *	RA packets, pushing them to user level AS IS
5271da177e4SLinus Torvalds 	 *	without ane WARRANTY that application will be able
5281da177e4SLinus Torvalds 	 *	to interpret them. The reason is that we
5291da177e4SLinus Torvalds 	 *	cannot make anything clever here.
5301da177e4SLinus Torvalds 	 *
5311da177e4SLinus Torvalds 	 *	We are not end-node, so that if packet contains
5321da177e4SLinus Torvalds 	 *	AH/ESP, we cannot make anything.
5331da177e4SLinus Torvalds 	 *	Defragmentation also would be mistake, RA packets
5341da177e4SLinus Torvalds 	 *	cannot be fragmented, because there is no warranty
5351da177e4SLinus Torvalds 	 *	that different fragments will go along one path. --ANK
5361da177e4SLinus Torvalds 	 */
537ab4eb353SYOSHIFUJI Hideaki / 吉藤英明 	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
538ab4eb353SYOSHIFUJI Hideaki / 吉藤英明 		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
5391da177e4SLinus Torvalds 			return 0;
5401da177e4SLinus Torvalds 	}
5411da177e4SLinus Torvalds 
5421da177e4SLinus Torvalds 	/*
5431da177e4SLinus Torvalds 	 *	check and decrement ttl
5441da177e4SLinus Torvalds 	 */
5451da177e4SLinus Torvalds 	if (hdr->hop_limit <= 1) {
5463ffe533cSAlexey Dobriyan 		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
547bdb7cc64SStephen Suryaputra 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
5481da177e4SLinus Torvalds 
5492edc1a38SMenglong Dong 		kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR);
5501da177e4SLinus Torvalds 		return -ETIMEDOUT;
5511da177e4SLinus Torvalds 	}
5521da177e4SLinus Torvalds 
553fbea49e1SYOSHIFUJI Hideaki 	/* XXX: idev->cnf.proxy_ndp? */
55453b7997fSYOSHIFUJI Hideaki 	if (net->ipv6.devconf_all->proxy_ndp &&
5558a3edd80SDaniel Lezcano 	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
55674553b09SVille Nuorvala 		int proxied = ip6_forward_proxy_check(skb);
55746c7655fSKangmin Park 		if (proxied > 0) {
5589f535c87SGergely Risko 			/* It's tempting to decrease the hop limit
5599f535c87SGergely Risko 			 * here by 1, as we do at the end of the
5609f535c87SGergely Risko 			 * function too.
5619f535c87SGergely Risko 			 *
5629f535c87SGergely Risko 			 * But that would be incorrect, as proxying is
5639f535c87SGergely Risko 			 * not forwarding.  The ip6_input function
5649f535c87SGergely Risko 			 * will handle this packet locally, and it
5659f535c87SGergely Risko 			 * depends on the hop limit being unchanged.
5669f535c87SGergely Risko 			 *
5679f535c87SGergely Risko 			 * One example is the NDP hop limit, that
5689f535c87SGergely Risko 			 * always has to stay 255, but other would be
5699f535c87SGergely Risko 			 * similar checks around RA packets, where the
5709f535c87SGergely Risko 			 * user can even change the desired limit.
5719f535c87SGergely Risko 			 */
572e21e0b5fSVille Nuorvala 			return ip6_input(skb);
57346c7655fSKangmin Park 		} else if (proxied < 0) {
574bdb7cc64SStephen Suryaputra 			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
57574553b09SVille Nuorvala 			goto drop;
57674553b09SVille Nuorvala 		}
577e21e0b5fSVille Nuorvala 	}
578e21e0b5fSVille Nuorvala 
5791da177e4SLinus Torvalds 	if (!xfrm6_route_forward(skb)) {
580bdb7cc64SStephen Suryaputra 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
5812edc1a38SMenglong Dong 		SKB_DR_SET(reason, XFRM_POLICY);
5821da177e4SLinus Torvalds 		goto drop;
5831da177e4SLinus Torvalds 	}
584adf30907SEric Dumazet 	dst = skb_dst(skb);
5851da177e4SLinus Torvalds 
5861da177e4SLinus Torvalds 	/* IPv6 specs say nothing about it, but it is clear that we cannot
5871da177e4SLinus Torvalds 	   send redirects to source routed frames.
5881e5dc146SMasahide NAKAMURA 	   We don't send redirects to frames decapsulated from IPsec.
5891da177e4SLinus Torvalds 	 */
5902f17becfSStephen Suryaputra 	if (IP6CB(skb)->iif == dst->dev->ifindex &&
5912f17becfSStephen Suryaputra 	    opt->srcrt == 0 && !skb_sec_path(skb)) {
5921da177e4SLinus Torvalds 		struct in6_addr *target = NULL;
593fbfe95a4SDavid S. Miller 		struct inet_peer *peer;
5941da177e4SLinus Torvalds 		struct rt6_info *rt;
5951da177e4SLinus Torvalds 
5961da177e4SLinus Torvalds 		/*
5971da177e4SLinus Torvalds 		 *	incoming and outgoing devices are the same
5981da177e4SLinus Torvalds 		 *	send a redirect.
5991da177e4SLinus Torvalds 		 */
6001da177e4SLinus Torvalds 
6011da177e4SLinus Torvalds 		rt = (struct rt6_info *) dst;
602c45a3dfbSDavid S. Miller 		if (rt->rt6i_flags & RTF_GATEWAY)
603c45a3dfbSDavid S. Miller 			target = &rt->rt6i_gateway;
6041da177e4SLinus Torvalds 		else
6051da177e4SLinus Torvalds 			target = &hdr->daddr;
6061da177e4SLinus Torvalds 
607fd0273d7SMartin KaFai Lau 		peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
60892d86829SDavid S. Miller 
6091da177e4SLinus Torvalds 		/* Limit redirects both by destination (here)
6101da177e4SLinus Torvalds 		   and by source (inside ndisc_send_redirect)
6111da177e4SLinus Torvalds 		 */
612fbfe95a4SDavid S. Miller 		if (inet_peer_xrlim_allow(peer, 1*HZ))
6134991969aSDavid S. Miller 			ndisc_send_redirect(skb, target);
6141d861aa4SDavid S. Miller 		if (peer)
6151d861aa4SDavid S. Miller 			inet_putpeer(peer);
6165bb1ab09SDavid L Stevens 	} else {
6175bb1ab09SDavid L Stevens 		int addrtype = ipv6_addr_type(&hdr->saddr);
6185bb1ab09SDavid L Stevens 
6191da177e4SLinus Torvalds 		/* This check is security critical. */
620f81b2e7dSYOSHIFUJI Hideaki 		if (addrtype == IPV6_ADDR_ANY ||
621f81b2e7dSYOSHIFUJI Hideaki 		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
6221da177e4SLinus Torvalds 			goto error;
6235bb1ab09SDavid L Stevens 		if (addrtype & IPV6_ADDR_LINKLOCAL) {
6245bb1ab09SDavid L Stevens 			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
6253ffe533cSAlexey Dobriyan 				    ICMPV6_NOT_NEIGHBOUR, 0);
6265bb1ab09SDavid L Stevens 			goto error;
6275bb1ab09SDavid L Stevens 		}
6281da177e4SLinus Torvalds 	}
6291da177e4SLinus Torvalds 
630427faee1SVadim Fedorenko 	mtu = ip6_dst_mtu_maybe_forward(dst, true);
63114f3ad6fSUlrich Weber 	if (mtu < IPV6_MIN_MTU)
63214f3ad6fSUlrich Weber 		mtu = IPV6_MIN_MTU;
63314f3ad6fSUlrich Weber 
634fe6cc55fSFlorian Westphal 	if (ip6_pkt_too_big(skb, mtu)) {
6351da177e4SLinus Torvalds 		/* Again, force OUTPUT device used as source address */
6361da177e4SLinus Torvalds 		skb->dev = dst->dev;
63714f3ad6fSUlrich Weber 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
638bdb7cc64SStephen Suryaputra 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
6391d015503SEric Dumazet 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
64015c77d8bSEric Dumazet 				IPSTATS_MIB_FRAGFAILS);
6412edc1a38SMenglong Dong 		kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG);
6421da177e4SLinus Torvalds 		return -EMSGSIZE;
6431da177e4SLinus Torvalds 	}
6441da177e4SLinus Torvalds 
6451da177e4SLinus Torvalds 	if (skb_cow(skb, dst->dev->hard_header_len)) {
6461d015503SEric Dumazet 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
64715c77d8bSEric Dumazet 				IPSTATS_MIB_OUTDISCARDS);
6481da177e4SLinus Torvalds 		goto drop;
6491da177e4SLinus Torvalds 	}
6501da177e4SLinus Torvalds 
6510660e03fSArnaldo Carvalho de Melo 	hdr = ipv6_hdr(skb);
6521da177e4SLinus Torvalds 
6531da177e4SLinus Torvalds 	/* Mangling hops number delayed to point after skb COW */
6541da177e4SLinus Torvalds 
6551da177e4SLinus Torvalds 	hdr->hop_limit--;
6561da177e4SLinus Torvalds 
65729a26a56SEric W. Biederman 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
65829a26a56SEric W. Biederman 		       net, NULL, skb, skb->dev, dst->dev,
6596e23ae2aSPatrick McHardy 		       ip6_forward_finish);
6601da177e4SLinus Torvalds 
6611da177e4SLinus Torvalds error:
662bdb7cc64SStephen Suryaputra 	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
6632edc1a38SMenglong Dong 	SKB_DR_SET(reason, IP_INADDRERRORS);
6641da177e4SLinus Torvalds drop:
6652edc1a38SMenglong Dong 	kfree_skb_reason(skb, reason);
6661da177e4SLinus Torvalds 	return -EINVAL;
6671da177e4SLinus Torvalds }
6681da177e4SLinus Torvalds 
6691da177e4SLinus Torvalds static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
6701da177e4SLinus Torvalds {
6711da177e4SLinus Torvalds 	to->pkt_type = from->pkt_type;
6721da177e4SLinus Torvalds 	to->priority = from->priority;
6731da177e4SLinus Torvalds 	to->protocol = from->protocol;
674adf30907SEric Dumazet 	skb_dst_drop(to);
675adf30907SEric Dumazet 	skb_dst_set(to, dst_clone(skb_dst(from)));
6761da177e4SLinus Torvalds 	to->dev = from->dev;
67782e91ffeSThomas Graf 	to->mark = from->mark;
6781da177e4SLinus Torvalds 
6793dd1c9a1SPaolo Abeni 	skb_copy_hash(to, from);
6803dd1c9a1SPaolo Abeni 
6811da177e4SLinus Torvalds #ifdef CONFIG_NET_SCHED
6821da177e4SLinus Torvalds 	to->tc_index = from->tc_index;
6831da177e4SLinus Torvalds #endif
684e7ac05f3SYasuyuki Kozakai 	nf_copy(to, from);
685df5042f4SFlorian Westphal 	skb_ext_copy(to, from);
686984bc16cSJames Morris 	skb_copy_secmark(to, from);
6871da177e4SLinus Torvalds }
6881da177e4SLinus Torvalds 
6890feca619SPablo Neira Ayuso int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
6900feca619SPablo Neira Ayuso 		      u8 nexthdr, __be32 frag_id,
6910feca619SPablo Neira Ayuso 		      struct ip6_fraglist_iter *iter)
6920feca619SPablo Neira Ayuso {
6930feca619SPablo Neira Ayuso 	unsigned int first_len;
6940feca619SPablo Neira Ayuso 	struct frag_hdr *fh;
6950feca619SPablo Neira Ayuso 
6960feca619SPablo Neira Ayuso 	/* BUILD HEADER */
6970feca619SPablo Neira Ayuso 	*prevhdr = NEXTHDR_FRAGMENT;
6980feca619SPablo Neira Ayuso 	iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
6990feca619SPablo Neira Ayuso 	if (!iter->tmp_hdr)
7000feca619SPablo Neira Ayuso 		return -ENOMEM;
7010feca619SPablo Neira Ayuso 
702b7034146SEric Dumazet 	iter->frag = skb_shinfo(skb)->frag_list;
7030feca619SPablo Neira Ayuso 	skb_frag_list_init(skb);
7040feca619SPablo Neira Ayuso 
7050feca619SPablo Neira Ayuso 	iter->offset = 0;
7060feca619SPablo Neira Ayuso 	iter->hlen = hlen;
7070feca619SPablo Neira Ayuso 	iter->frag_id = frag_id;
7080feca619SPablo Neira Ayuso 	iter->nexthdr = nexthdr;
7090feca619SPablo Neira Ayuso 
7100feca619SPablo Neira Ayuso 	__skb_pull(skb, hlen);
7110feca619SPablo Neira Ayuso 	fh = __skb_push(skb, sizeof(struct frag_hdr));
7120feca619SPablo Neira Ayuso 	__skb_push(skb, hlen);
7130feca619SPablo Neira Ayuso 	skb_reset_network_header(skb);
7140feca619SPablo Neira Ayuso 	memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
7150feca619SPablo Neira Ayuso 
7160feca619SPablo Neira Ayuso 	fh->nexthdr = nexthdr;
7170feca619SPablo Neira Ayuso 	fh->reserved = 0;
7180feca619SPablo Neira Ayuso 	fh->frag_off = htons(IP6_MF);
7190feca619SPablo Neira Ayuso 	fh->identification = frag_id;
7200feca619SPablo Neira Ayuso 
7210feca619SPablo Neira Ayuso 	first_len = skb_pagelen(skb);
7220feca619SPablo Neira Ayuso 	skb->data_len = first_len - skb_headlen(skb);
7230feca619SPablo Neira Ayuso 	skb->len = first_len;
7240feca619SPablo Neira Ayuso 	ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
7250feca619SPablo Neira Ayuso 
7260feca619SPablo Neira Ayuso 	return 0;
7270feca619SPablo Neira Ayuso }
7280feca619SPablo Neira Ayuso EXPORT_SYMBOL(ip6_fraglist_init);
7290feca619SPablo Neira Ayuso 
7300feca619SPablo Neira Ayuso void ip6_fraglist_prepare(struct sk_buff *skb,
7310feca619SPablo Neira Ayuso 			  struct ip6_fraglist_iter *iter)
7320feca619SPablo Neira Ayuso {
7330feca619SPablo Neira Ayuso 	struct sk_buff *frag = iter->frag;
7340feca619SPablo Neira Ayuso 	unsigned int hlen = iter->hlen;
7350feca619SPablo Neira Ayuso 	struct frag_hdr *fh;
7360feca619SPablo Neira Ayuso 
7370feca619SPablo Neira Ayuso 	frag->ip_summed = CHECKSUM_NONE;
7380feca619SPablo Neira Ayuso 	skb_reset_transport_header(frag);
7390feca619SPablo Neira Ayuso 	fh = __skb_push(frag, sizeof(struct frag_hdr));
7400feca619SPablo Neira Ayuso 	__skb_push(frag, hlen);
7410feca619SPablo Neira Ayuso 	skb_reset_network_header(frag);
7420feca619SPablo Neira Ayuso 	memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
7430feca619SPablo Neira Ayuso 	iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
7440feca619SPablo Neira Ayuso 	fh->nexthdr = iter->nexthdr;
7450feca619SPablo Neira Ayuso 	fh->reserved = 0;
7460feca619SPablo Neira Ayuso 	fh->frag_off = htons(iter->offset);
7470feca619SPablo Neira Ayuso 	if (frag->next)
7480feca619SPablo Neira Ayuso 		fh->frag_off |= htons(IP6_MF);
7490feca619SPablo Neira Ayuso 	fh->identification = iter->frag_id;
7500feca619SPablo Neira Ayuso 	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
7510feca619SPablo Neira Ayuso 	ip6_copy_metadata(frag, skb);
7520feca619SPablo Neira Ayuso }
7530feca619SPablo Neira Ayuso EXPORT_SYMBOL(ip6_fraglist_prepare);
7540feca619SPablo Neira Ayuso 
7558a6a1f17SPablo Neira Ayuso void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
7568a6a1f17SPablo Neira Ayuso 		   unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
7578a6a1f17SPablo Neira Ayuso 		   u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
7588a6a1f17SPablo Neira Ayuso {
7598a6a1f17SPablo Neira Ayuso 	state->prevhdr = prevhdr;
7608a6a1f17SPablo Neira Ayuso 	state->nexthdr = nexthdr;
7618a6a1f17SPablo Neira Ayuso 	state->frag_id = frag_id;
7628a6a1f17SPablo Neira Ayuso 
7638a6a1f17SPablo Neira Ayuso 	state->hlen = hlen;
7648a6a1f17SPablo Neira Ayuso 	state->mtu = mtu;
7658a6a1f17SPablo Neira Ayuso 
7668a6a1f17SPablo Neira Ayuso 	state->left = skb->len - hlen;	/* Space per frame */
7678a6a1f17SPablo Neira Ayuso 	state->ptr = hlen;		/* Where to start from */
7688a6a1f17SPablo Neira Ayuso 
7698a6a1f17SPablo Neira Ayuso 	state->hroom = hdr_room;
7708a6a1f17SPablo Neira Ayuso 	state->troom = needed_tailroom;
7718a6a1f17SPablo Neira Ayuso 
7728a6a1f17SPablo Neira Ayuso 	state->offset = 0;
7738a6a1f17SPablo Neira Ayuso }
7748a6a1f17SPablo Neira Ayuso EXPORT_SYMBOL(ip6_frag_init);
7758a6a1f17SPablo Neira Ayuso 
7768a6a1f17SPablo Neira Ayuso struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
7778a6a1f17SPablo Neira Ayuso {
7788a6a1f17SPablo Neira Ayuso 	u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
7798a6a1f17SPablo Neira Ayuso 	struct sk_buff *frag;
7808a6a1f17SPablo Neira Ayuso 	struct frag_hdr *fh;
7818a6a1f17SPablo Neira Ayuso 	unsigned int len;
7828a6a1f17SPablo Neira Ayuso 
7838a6a1f17SPablo Neira Ayuso 	len = state->left;
7848a6a1f17SPablo Neira Ayuso 	/* IF: it doesn't fit, use 'mtu' - the data space left */
7858a6a1f17SPablo Neira Ayuso 	if (len > state->mtu)
7868a6a1f17SPablo Neira Ayuso 		len = state->mtu;
7878a6a1f17SPablo Neira Ayuso 	/* IF: we are not sending up to and including the packet end
7888a6a1f17SPablo Neira Ayuso 	   then align the next start on an eight byte boundary */
7898a6a1f17SPablo Neira Ayuso 	if (len < state->left)
7908a6a1f17SPablo Neira Ayuso 		len &= ~7;
7918a6a1f17SPablo Neira Ayuso 
7928a6a1f17SPablo Neira Ayuso 	/* Allocate buffer */
7938a6a1f17SPablo Neira Ayuso 	frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
7948a6a1f17SPablo Neira Ayuso 			 state->hroom + state->troom, GFP_ATOMIC);
7958a6a1f17SPablo Neira Ayuso 	if (!frag)
7968a6a1f17SPablo Neira Ayuso 		return ERR_PTR(-ENOMEM);
7978a6a1f17SPablo Neira Ayuso 
7988a6a1f17SPablo Neira Ayuso 	/*
7998a6a1f17SPablo Neira Ayuso 	 *	Set up data on packet
8008a6a1f17SPablo Neira Ayuso 	 */
8018a6a1f17SPablo Neira Ayuso 
8028a6a1f17SPablo Neira Ayuso 	ip6_copy_metadata(frag, skb);
8038a6a1f17SPablo Neira Ayuso 	skb_reserve(frag, state->hroom);
8048a6a1f17SPablo Neira Ayuso 	skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
8058a6a1f17SPablo Neira Ayuso 	skb_reset_network_header(frag);
8068a6a1f17SPablo Neira Ayuso 	fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
8078a6a1f17SPablo Neira Ayuso 	frag->transport_header = (frag->network_header + state->hlen +
8088a6a1f17SPablo Neira Ayuso 				  sizeof(struct frag_hdr));
8098a6a1f17SPablo Neira Ayuso 
8108a6a1f17SPablo Neira Ayuso 	/*
8118a6a1f17SPablo Neira Ayuso 	 *	Charge the memory for the fragment to any owner
8128a6a1f17SPablo Neira Ayuso 	 *	it might possess
8138a6a1f17SPablo Neira Ayuso 	 */
8148a6a1f17SPablo Neira Ayuso 	if (skb->sk)
8158a6a1f17SPablo Neira Ayuso 		skb_set_owner_w(frag, skb->sk);
8168a6a1f17SPablo Neira Ayuso 
8178a6a1f17SPablo Neira Ayuso 	/*
8188a6a1f17SPablo Neira Ayuso 	 *	Copy the packet header into the new buffer.
8198a6a1f17SPablo Neira Ayuso 	 */
8208a6a1f17SPablo Neira Ayuso 	skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
8218a6a1f17SPablo Neira Ayuso 
8228a6a1f17SPablo Neira Ayuso 	fragnexthdr_offset = skb_network_header(frag);
8238a6a1f17SPablo Neira Ayuso 	fragnexthdr_offset += prevhdr - skb_network_header(skb);
8248a6a1f17SPablo Neira Ayuso 	*fragnexthdr_offset = NEXTHDR_FRAGMENT;
8258a6a1f17SPablo Neira Ayuso 
8268a6a1f17SPablo Neira Ayuso 	/*
8278a6a1f17SPablo Neira Ayuso 	 *	Build fragment header.
8288a6a1f17SPablo Neira Ayuso 	 */
8298a6a1f17SPablo Neira Ayuso 	fh->nexthdr = state->nexthdr;
8308a6a1f17SPablo Neira Ayuso 	fh->reserved = 0;
8318a6a1f17SPablo Neira Ayuso 	fh->identification = state->frag_id;
8328a6a1f17SPablo Neira Ayuso 
8338a6a1f17SPablo Neira Ayuso 	/*
8348a6a1f17SPablo Neira Ayuso 	 *	Copy a block of the IP datagram.
8358a6a1f17SPablo Neira Ayuso 	 */
8368a6a1f17SPablo Neira Ayuso 	BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
8378a6a1f17SPablo Neira Ayuso 			     len));
8388a6a1f17SPablo Neira Ayuso 	state->left -= len;
8398a6a1f17SPablo Neira Ayuso 
8408a6a1f17SPablo Neira Ayuso 	fh->frag_off = htons(state->offset);
8418a6a1f17SPablo Neira Ayuso 	if (state->left > 0)
8428a6a1f17SPablo Neira Ayuso 		fh->frag_off |= htons(IP6_MF);
8438a6a1f17SPablo Neira Ayuso 	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
8448a6a1f17SPablo Neira Ayuso 
8458a6a1f17SPablo Neira Ayuso 	state->ptr += len;
8468a6a1f17SPablo Neira Ayuso 	state->offset += len;
8478a6a1f17SPablo Neira Ayuso 
8488a6a1f17SPablo Neira Ayuso 	return frag;
8498a6a1f17SPablo Neira Ayuso }
8508a6a1f17SPablo Neira Ayuso EXPORT_SYMBOL(ip6_frag_next);
8518a6a1f17SPablo Neira Ayuso 
8527d8c6e39SEric W. Biederman int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
8537d8c6e39SEric W. Biederman 		 int (*output)(struct net *, struct sock *, struct sk_buff *))
8541da177e4SLinus Torvalds {
8551da177e4SLinus Torvalds 	struct sk_buff *frag;
856adf30907SEric Dumazet 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
857f60e5990Shannes@stressinduktion.org 	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
858f60e5990Shannes@stressinduktion.org 				inet6_sk(skb->sk) : NULL;
859a1ac9c8aSMartin KaFai Lau 	bool mono_delivery_time = skb->mono_delivery_time;
8608a6a1f17SPablo Neira Ayuso 	struct ip6_frag_state state;
8618a6a1f17SPablo Neira Ayuso 	unsigned int mtu, hlen, nexthdr_offset;
8629669fffcSEric Dumazet 	ktime_t tstamp = skb->tstamp;
8638a6a1f17SPablo Neira Ayuso 	int hroom, err = 0;
864286c2349SMartin KaFai Lau 	__be32 frag_id;
8651da177e4SLinus Torvalds 	u8 *prevhdr, nexthdr = 0;
8661da177e4SLinus Torvalds 
8677dd7eb95SDavid S. Miller 	err = ip6_find_1stfragopt(skb, &prevhdr);
8687dd7eb95SDavid S. Miller 	if (err < 0)
8692423496aSCraig Gallek 		goto fail;
8707dd7eb95SDavid S. Miller 	hlen = err;
8711da177e4SLinus Torvalds 	nexthdr = *prevhdr;
872ef0efcd3SJunwei Hu 	nexthdr_offset = prevhdr - skb_network_header(skb);
8731da177e4SLinus Torvalds 
874628a5c56SJohn Heffner 	mtu = ip6_skb_dst_mtu(skb);
875b881ef76SJohn Heffner 
876b881ef76SJohn Heffner 	/* We must not fragment if the socket is set to force MTU discovery
87714f3ad6fSUlrich Weber 	 * or if the skb it not generated by a local socket.
878b881ef76SJohn Heffner 	 */
879485fca66SFlorian Westphal 	if (unlikely(!skb->ignore_df && skb->len > mtu))
880485fca66SFlorian Westphal 		goto fail_toobig;
881a34a101eSEric Dumazet 
882485fca66SFlorian Westphal 	if (IP6CB(skb)->frag_max_size) {
883485fca66SFlorian Westphal 		if (IP6CB(skb)->frag_max_size > mtu)
884485fca66SFlorian Westphal 			goto fail_toobig;
885485fca66SFlorian Westphal 
886485fca66SFlorian Westphal 		/* don't send fragments larger than what we received */
887485fca66SFlorian Westphal 		mtu = IP6CB(skb)->frag_max_size;
888485fca66SFlorian Westphal 		if (mtu < IPV6_MIN_MTU)
889485fca66SFlorian Westphal 			mtu = IPV6_MIN_MTU;
890b881ef76SJohn Heffner 	}
891b881ef76SJohn Heffner 
892d91675f9SYOSHIFUJI Hideaki 	if (np && np->frag_size < mtu) {
893d91675f9SYOSHIFUJI Hideaki 		if (np->frag_size)
894d91675f9SYOSHIFUJI Hideaki 			mtu = np->frag_size;
895d91675f9SYOSHIFUJI Hideaki 	}
89689bc7848SHannes Frederic Sowa 	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
897b72a2b01SHannes Frederic Sowa 		goto fail_toobig;
8981e0d69a9SHannes Frederic Sowa 	mtu -= hlen + sizeof(struct frag_hdr);
8991da177e4SLinus Torvalds 
900fd0273d7SMartin KaFai Lau 	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
901fd0273d7SMartin KaFai Lau 				    &ipv6_hdr(skb)->saddr);
902286c2349SMartin KaFai Lau 
903405c92f7SHannes Frederic Sowa 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
904405c92f7SHannes Frederic Sowa 	    (err = skb_checksum_help(skb)))
905405c92f7SHannes Frederic Sowa 		goto fail;
906405c92f7SHannes Frederic Sowa 
907ef0efcd3SJunwei Hu 	prevhdr = skb_network_header(skb) + nexthdr_offset;
9081d325d21SFlorian Westphal 	hroom = LL_RESERVED_SPACE(rt->dst.dev);
90921dc3301SDavid S. Miller 	if (skb_has_frag_list(skb)) {
910c72d8cdaSAlexey Dobriyan 		unsigned int first_len = skb_pagelen(skb);
9110feca619SPablo Neira Ayuso 		struct ip6_fraglist_iter iter;
9123d13008eSEric Dumazet 		struct sk_buff *frag2;
9131da177e4SLinus Torvalds 
9141da177e4SLinus Torvalds 		if (first_len - hlen > mtu ||
9151da177e4SLinus Torvalds 		    ((first_len - hlen) & 7) ||
9161d325d21SFlorian Westphal 		    skb_cloned(skb) ||
9171d325d21SFlorian Westphal 		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
9181da177e4SLinus Torvalds 			goto slow_path;
9191da177e4SLinus Torvalds 
9204d9092bbSDavid S. Miller 		skb_walk_frags(skb, frag) {
9211da177e4SLinus Torvalds 			/* Correct geometry. */
9221da177e4SLinus Torvalds 			if (frag->len > mtu ||
9231da177e4SLinus Torvalds 			    ((frag->len & 7) && frag->next) ||
9241d325d21SFlorian Westphal 			    skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
9253d13008eSEric Dumazet 				goto slow_path_clean;
9261da177e4SLinus Torvalds 
9271da177e4SLinus Torvalds 			/* Partially cloned skb? */
9281da177e4SLinus Torvalds 			if (skb_shared(frag))
9293d13008eSEric Dumazet 				goto slow_path_clean;
9302fdba6b0SHerbert Xu 
9312fdba6b0SHerbert Xu 			BUG_ON(frag->sk);
9322fdba6b0SHerbert Xu 			if (skb->sk) {
9332fdba6b0SHerbert Xu 				frag->sk = skb->sk;
9342fdba6b0SHerbert Xu 				frag->destructor = sock_wfree;
9352fdba6b0SHerbert Xu 			}
9363d13008eSEric Dumazet 			skb->truesize -= frag->truesize;
9371da177e4SLinus Torvalds 		}
9381da177e4SLinus Torvalds 
9390feca619SPablo Neira Ayuso 		err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
9400feca619SPablo Neira Ayuso 					&iter);
9410feca619SPablo Neira Ayuso 		if (err < 0)
9421d325d21SFlorian Westphal 			goto fail;
9431da177e4SLinus Torvalds 
944803e8486SEric Dumazet 		/* We prevent @rt from being freed. */
945803e8486SEric Dumazet 		rcu_read_lock();
946803e8486SEric Dumazet 
9471da177e4SLinus Torvalds 		for (;;) {
9481da177e4SLinus Torvalds 			/* Prepare header of the next frame,
9491da177e4SLinus Torvalds 			 * before previous one went down. */
9500feca619SPablo Neira Ayuso 			if (iter.frag)
9510feca619SPablo Neira Ayuso 				ip6_fraglist_prepare(skb, &iter);
9521da177e4SLinus Torvalds 
953a1ac9c8aSMartin KaFai Lau 			skb_set_delivery_time(skb, tstamp, mono_delivery_time);
9547d8c6e39SEric W. Biederman 			err = output(net, sk, skb);
955dafee490SWei Dong 			if (!err)
956d8d1f30bSChangli Gao 				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
9573bd653c8SDenis V. Lunev 					      IPSTATS_MIB_FRAGCREATES);
958dafee490SWei Dong 
9590feca619SPablo Neira Ayuso 			if (err || !iter.frag)
9601da177e4SLinus Torvalds 				break;
9611da177e4SLinus Torvalds 
9620feca619SPablo Neira Ayuso 			skb = ip6_fraglist_next(&iter);
9631da177e4SLinus Torvalds 		}
9641da177e4SLinus Torvalds 
9650feca619SPablo Neira Ayuso 		kfree(iter.tmp_hdr);
9661da177e4SLinus Torvalds 
9671da177e4SLinus Torvalds 		if (err == 0) {
968d8d1f30bSChangli Gao 			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
9693bd653c8SDenis V. Lunev 				      IPSTATS_MIB_FRAGOKS);
970803e8486SEric Dumazet 			rcu_read_unlock();
9711da177e4SLinus Torvalds 			return 0;
9721da177e4SLinus Torvalds 		}
9731da177e4SLinus Torvalds 
974b7034146SEric Dumazet 		kfree_skb_list(iter.frag);
9751da177e4SLinus Torvalds 
976d8d1f30bSChangli Gao 		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
9773bd653c8SDenis V. Lunev 			      IPSTATS_MIB_FRAGFAILS);
978803e8486SEric Dumazet 		rcu_read_unlock();
9791da177e4SLinus Torvalds 		return err;
9803d13008eSEric Dumazet 
9813d13008eSEric Dumazet slow_path_clean:
9823d13008eSEric Dumazet 		skb_walk_frags(skb, frag2) {
9833d13008eSEric Dumazet 			if (frag2 == frag)
9843d13008eSEric Dumazet 				break;
9853d13008eSEric Dumazet 			frag2->sk = NULL;
9863d13008eSEric Dumazet 			frag2->destructor = NULL;
9873d13008eSEric Dumazet 			skb->truesize += frag2->truesize;
9883d13008eSEric Dumazet 		}
9891da177e4SLinus Torvalds 	}
9901da177e4SLinus Torvalds 
9911da177e4SLinus Torvalds slow_path:
9921da177e4SLinus Torvalds 	/*
9931da177e4SLinus Torvalds 	 *	Fragment the datagram.
9941da177e4SLinus Torvalds 	 */
9951da177e4SLinus Torvalds 
9968a6a1f17SPablo Neira Ayuso 	ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
9978a6a1f17SPablo Neira Ayuso 		      LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
9988a6a1f17SPablo Neira Ayuso 		      &state);
9991da177e4SLinus Torvalds 
10001da177e4SLinus Torvalds 	/*
10011da177e4SLinus Torvalds 	 *	Keep copying data until we run out.
10021da177e4SLinus Torvalds 	 */
100379e49503SFlorian Westphal 
10048a6a1f17SPablo Neira Ayuso 	while (state.left > 0) {
10058a6a1f17SPablo Neira Ayuso 		frag = ip6_frag_next(skb, &state);
10068a6a1f17SPablo Neira Ayuso 		if (IS_ERR(frag)) {
10078a6a1f17SPablo Neira Ayuso 			err = PTR_ERR(frag);
10081da177e4SLinus Torvalds 			goto fail;
10091da177e4SLinus Torvalds 		}
10101da177e4SLinus Torvalds 
10111da177e4SLinus Torvalds 		/*
10121da177e4SLinus Torvalds 		 *	Put this fragment into the sending queue.
10131da177e4SLinus Torvalds 		 */
1014a1ac9c8aSMartin KaFai Lau 		skb_set_delivery_time(frag, tstamp, mono_delivery_time);
10157d8c6e39SEric W. Biederman 		err = output(net, sk, frag);
10161da177e4SLinus Torvalds 		if (err)
10171da177e4SLinus Torvalds 			goto fail;
1018dafee490SWei Dong 
1019adf30907SEric Dumazet 		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
10203bd653c8SDenis V. Lunev 			      IPSTATS_MIB_FRAGCREATES);
10211da177e4SLinus Torvalds 	}
1022adf30907SEric Dumazet 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1023a11d206dSYOSHIFUJI Hideaki 		      IPSTATS_MIB_FRAGOKS);
1024808db80aSEric Dumazet 	consume_skb(skb);
10251da177e4SLinus Torvalds 	return err;
10261da177e4SLinus Torvalds 
1027485fca66SFlorian Westphal fail_toobig:
1028485fca66SFlorian Westphal 	if (skb->sk && dst_allfrag(skb_dst(skb)))
1029aba54656SEric Dumazet 		sk_gso_disable(skb->sk);
1030485fca66SFlorian Westphal 
1031485fca66SFlorian Westphal 	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1032485fca66SFlorian Westphal 	err = -EMSGSIZE;
1033485fca66SFlorian Westphal 
10341da177e4SLinus Torvalds fail:
1035adf30907SEric Dumazet 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1036a11d206dSYOSHIFUJI Hideaki 		      IPSTATS_MIB_FRAGFAILS);
10371da177e4SLinus Torvalds 	kfree_skb(skb);
10381da177e4SLinus Torvalds 	return err;
10391da177e4SLinus Torvalds }
10401da177e4SLinus Torvalds 
1041b71d1d42SEric Dumazet static inline int ip6_rt_check(const struct rt6key *rt_key,
1042b71d1d42SEric Dumazet 			       const struct in6_addr *fl_addr,
1043b71d1d42SEric Dumazet 			       const struct in6_addr *addr_cache)
1044cf6b1982SYOSHIFUJI Hideaki {
1045a02cec21SEric Dumazet 	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
104663159f29SIan Morris 		(!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
1047cf6b1982SYOSHIFUJI Hideaki }
1048cf6b1982SYOSHIFUJI Hideaki 
1049497c615aSHerbert Xu static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
1050497c615aSHerbert Xu 					  struct dst_entry *dst,
1051b71d1d42SEric Dumazet 					  const struct flowi6 *fl6)
10521da177e4SLinus Torvalds {
10531da177e4SLinus Torvalds 	struct ipv6_pinfo *np = inet6_sk(sk);
1054a963a37dSEric Dumazet 	struct rt6_info *rt;
10551da177e4SLinus Torvalds 
1056497c615aSHerbert Xu 	if (!dst)
1057497c615aSHerbert Xu 		goto out;
10581da177e4SLinus Torvalds 
1059a963a37dSEric Dumazet 	if (dst->ops->family != AF_INET6) {
1060a963a37dSEric Dumazet 		dst_release(dst);
1061a963a37dSEric Dumazet 		return NULL;
1062a963a37dSEric Dumazet 	}
1063a963a37dSEric Dumazet 
1064a963a37dSEric Dumazet 	rt = (struct rt6_info *)dst;
10651da177e4SLinus Torvalds 	/* Yes, checking route validity in not connected
1066d76e60a5SDavid S. Miller 	 * case is not very simple. Take into account,
1067d76e60a5SDavid S. Miller 	 * that we do not support routing by source, TOS,
1068d76e60a5SDavid S. Miller 	 * and MSG_DONTROUTE		--ANK (980726)
1069d76e60a5SDavid S. Miller 	 *
1070cf6b1982SYOSHIFUJI Hideaki 	 * 1. ip6_rt_check(): If route was host route,
1071cf6b1982SYOSHIFUJI Hideaki 	 *    check that cached destination is current.
1072d76e60a5SDavid S. Miller 	 *    If it is network route, we still may
1073d76e60a5SDavid S. Miller 	 *    check its validity using saved pointer
1074d76e60a5SDavid S. Miller 	 *    to the last used address: daddr_cache.
1075d76e60a5SDavid S. Miller 	 *    We do not want to save whole address now,
1076d76e60a5SDavid S. Miller 	 *    (because main consumer of this service
1077d76e60a5SDavid S. Miller 	 *    is tcp, which has not this problem),
1078d76e60a5SDavid S. Miller 	 *    so that the last trick works only on connected
1079d76e60a5SDavid S. Miller 	 *    sockets.
1080d76e60a5SDavid S. Miller 	 * 2. oif also should be the same.
10811da177e4SLinus Torvalds 	 */
10824c9483b2SDavid S. Miller 	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
10838e1ef0a9SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_SUBTREES
10844c9483b2SDavid S. Miller 	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
10858e1ef0a9SYOSHIFUJI Hideaki #endif
108640867d74SDavid Ahern 	   (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
1087497c615aSHerbert Xu 		dst_release(dst);
1088497c615aSHerbert Xu 		dst = NULL;
10891da177e4SLinus Torvalds 	}
1090497c615aSHerbert Xu 
1091497c615aSHerbert Xu out:
1092497c615aSHerbert Xu 	return dst;
10931da177e4SLinus Torvalds }
1094497c615aSHerbert Xu 
10953aef934fSEric Dumazet static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
10964c9483b2SDavid S. Miller 			       struct dst_entry **dst, struct flowi6 *fl6)
1097497c615aSHerbert Xu {
109869cce1d1SDavid S. Miller #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
109969cce1d1SDavid S. Miller 	struct neighbour *n;
110097cac082SDavid S. Miller 	struct rt6_info *rt;
110169cce1d1SDavid S. Miller #endif
110269cce1d1SDavid S. Miller 	int err;
11036f21c96aSPaolo Abeni 	int flags = 0;
11041da177e4SLinus Torvalds 
1105e16e888bSMarkus Stenberg 	/* The correct way to handle this would be to do
1106e16e888bSMarkus Stenberg 	 * ip6_route_get_saddr, and then ip6_route_output; however,
1107e16e888bSMarkus Stenberg 	 * the route-specific preferred source forces the
1108e16e888bSMarkus Stenberg 	 * ip6_route_output call _before_ ip6_route_get_saddr.
1109e16e888bSMarkus Stenberg 	 *
1110e16e888bSMarkus Stenberg 	 * In source specific routing (no src=any default route),
1111e16e888bSMarkus Stenberg 	 * ip6_route_output will fail given src=any saddr, though, so
1112e16e888bSMarkus Stenberg 	 * that's why we try it again later.
1113e16e888bSMarkus Stenberg 	 */
1114c305b9e6Szhang kai 	if (ipv6_addr_any(&fl6->saddr)) {
1115a68886a6SDavid Ahern 		struct fib6_info *from;
1116e16e888bSMarkus Stenberg 		struct rt6_info *rt;
1117e16e888bSMarkus Stenberg 
1118e16e888bSMarkus Stenberg 		*dst = ip6_route_output(net, sk, fl6);
1119e16e888bSMarkus Stenberg 		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1120a68886a6SDavid Ahern 
1121a68886a6SDavid Ahern 		rcu_read_lock();
1122a68886a6SDavid Ahern 		from = rt ? rcu_dereference(rt->from) : NULL;
1123a68886a6SDavid Ahern 		err = ip6_route_get_saddr(net, from, &fl6->daddr,
1124e16e888bSMarkus Stenberg 					  sk ? inet6_sk(sk)->srcprefs : 0,
1125*e7f3e5fbSNicolas Dichtel 					  fl6->flowi6_l3mdev,
1126e16e888bSMarkus Stenberg 					  &fl6->saddr);
1127a68886a6SDavid Ahern 		rcu_read_unlock();
1128a68886a6SDavid Ahern 
1129e16e888bSMarkus Stenberg 		if (err)
1130e16e888bSMarkus Stenberg 			goto out_err_release;
1131e16e888bSMarkus Stenberg 
1132e16e888bSMarkus Stenberg 		/* If we had an erroneous initial result, pretend it
1133e16e888bSMarkus Stenberg 		 * never existed and let the SA-enabled version take
1134e16e888bSMarkus Stenberg 		 * over.
1135e16e888bSMarkus Stenberg 		 */
1136c305b9e6Szhang kai 		if ((*dst)->error) {
1137e16e888bSMarkus Stenberg 			dst_release(*dst);
1138e16e888bSMarkus Stenberg 			*dst = NULL;
1139e16e888bSMarkus Stenberg 		}
11406f21c96aSPaolo Abeni 
11416f21c96aSPaolo Abeni 		if (fl6->flowi6_oif)
11426f21c96aSPaolo Abeni 			flags |= RT6_LOOKUP_F_IFACE;
1143e16e888bSMarkus Stenberg 	}
1144e16e888bSMarkus Stenberg 
114563159f29SIan Morris 	if (!*dst)
11466f21c96aSPaolo Abeni 		*dst = ip6_route_output_flags(net, sk, fl6, flags);
11471da177e4SLinus Torvalds 
1148e5d08d71SIan Morris 	err = (*dst)->error;
1149e5d08d71SIan Morris 	if (err)
11501da177e4SLinus Torvalds 		goto out_err_release;
11511da177e4SLinus Torvalds 
115295c385b4SNeil Horman #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
115395c385b4SNeil Horman 	/*
115495c385b4SNeil Horman 	 * Here if the dst entry we've looked up
115595c385b4SNeil Horman 	 * has a neighbour entry that is in the INCOMPLETE
115695c385b4SNeil Horman 	 * state and the src address from the flow is
115795c385b4SNeil Horman 	 * marked as OPTIMISTIC, we release the found
115895c385b4SNeil Horman 	 * dst entry and replace it instead with the
115995c385b4SNeil Horman 	 * dst entry of the nexthop router
116095c385b4SNeil Horman 	 */
1161c56bf6feSEric Dumazet 	rt = (struct rt6_info *) *dst;
116209eed119SEric Dumazet 	rcu_read_lock();
11632647a9b0SMartin KaFai Lau 	n = __ipv6_neigh_lookup_noref(rt->dst.dev,
11642647a9b0SMartin KaFai Lau 				      rt6_nexthop(rt, &fl6->daddr));
1165b071af52SEric Dumazet 	err = n && !(READ_ONCE(n->nud_state) & NUD_VALID) ? -EINVAL : 0;
116609eed119SEric Dumazet 	rcu_read_unlock();
1167707be1ffSYOSHIFUJI Hideaki / 吉藤英明 
1168707be1ffSYOSHIFUJI Hideaki / 吉藤英明 	if (err) {
116995c385b4SNeil Horman 		struct inet6_ifaddr *ifp;
11704c9483b2SDavid S. Miller 		struct flowi6 fl_gw6;
117195c385b4SNeil Horman 		int redirect;
117295c385b4SNeil Horman 
11734c9483b2SDavid S. Miller 		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
11741cab3da6SDaniel Lezcano 				      (*dst)->dev, 1);
117595c385b4SNeil Horman 
117695c385b4SNeil Horman 		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
117795c385b4SNeil Horman 		if (ifp)
117895c385b4SNeil Horman 			in6_ifa_put(ifp);
117995c385b4SNeil Horman 
118095c385b4SNeil Horman 		if (redirect) {
118195c385b4SNeil Horman 			/*
118295c385b4SNeil Horman 			 * We need to get the dst entry for the
118395c385b4SNeil Horman 			 * default router instead
118495c385b4SNeil Horman 			 */
118595c385b4SNeil Horman 			dst_release(*dst);
11864c9483b2SDavid S. Miller 			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
11874c9483b2SDavid S. Miller 			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
11884c9483b2SDavid S. Miller 			*dst = ip6_route_output(net, sk, &fl_gw6);
1189e5d08d71SIan Morris 			err = (*dst)->error;
1190e5d08d71SIan Morris 			if (err)
119195c385b4SNeil Horman 				goto out_err_release;
119295c385b4SNeil Horman 		}
119395c385b4SNeil Horman 	}
119495c385b4SNeil Horman #endif
1195ec5e3b0aSJonathan T. Leighton 	if (ipv6_addr_v4mapped(&fl6->saddr) &&
119600ea1ceeSWillem de Bruijn 	    !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
119700ea1ceeSWillem de Bruijn 		err = -EAFNOSUPPORT;
119800ea1ceeSWillem de Bruijn 		goto out_err_release;
119900ea1ceeSWillem de Bruijn 	}
120095c385b4SNeil Horman 
12011da177e4SLinus Torvalds 	return 0;
12021da177e4SLinus Torvalds 
12031da177e4SLinus Torvalds out_err_release:
12041da177e4SLinus Torvalds 	dst_release(*dst);
12051da177e4SLinus Torvalds 	*dst = NULL;
12068a966fc0SDavid Ahern 
12070d240e78SDavid Ahern 	if (err == -ENETUNREACH)
12080d240e78SDavid Ahern 		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
12091da177e4SLinus Torvalds 	return err;
12101da177e4SLinus Torvalds }
121134a0b3cdSAdrian Bunk 
1212497c615aSHerbert Xu /**
1213497c615aSHerbert Xu  *	ip6_dst_lookup - perform route lookup on flow
1214b51cd7c8SAndrew Lunn  *	@net: Network namespace to perform lookup in
1215497c615aSHerbert Xu  *	@sk: socket which provides route info
1216497c615aSHerbert Xu  *	@dst: pointer to dst_entry * for result
12174c9483b2SDavid S. Miller  *	@fl6: flow to lookup
1218497c615aSHerbert Xu  *
1219497c615aSHerbert Xu  *	This function performs a route lookup on the given flow.
1220497c615aSHerbert Xu  *
1221497c615aSHerbert Xu  *	It returns zero on success, or a standard errno code on error.
1222497c615aSHerbert Xu  */
1223343d60aaSRoopa Prabhu int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1224343d60aaSRoopa Prabhu 		   struct flowi6 *fl6)
1225497c615aSHerbert Xu {
1226497c615aSHerbert Xu 	*dst = NULL;
1227343d60aaSRoopa Prabhu 	return ip6_dst_lookup_tail(net, sk, dst, fl6);
1228497c615aSHerbert Xu }
12293cf3dc6cSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(ip6_dst_lookup);
12303cf3dc6cSArnaldo Carvalho de Melo 
1231497c615aSHerbert Xu /**
123268d0c6d3SDavid S. Miller  *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1233b51cd7c8SAndrew Lunn  *	@net: Network namespace to perform lookup in
123468d0c6d3SDavid S. Miller  *	@sk: socket which provides route info
12354c9483b2SDavid S. Miller  *	@fl6: flow to lookup
123668d0c6d3SDavid S. Miller  *	@final_dst: final destination address for ipsec lookup
123768d0c6d3SDavid S. Miller  *
123868d0c6d3SDavid S. Miller  *	This function performs a route lookup on the given flow.
123968d0c6d3SDavid S. Miller  *
124068d0c6d3SDavid S. Miller  *	It returns a valid dst pointer on success, or a pointer encoded
124168d0c6d3SDavid S. Miller  *	error code.
124268d0c6d3SDavid S. Miller  */
1243c4e85f73SSabrina Dubroca struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
12440e0d44abSSteffen Klassert 				      const struct in6_addr *final_dst)
124568d0c6d3SDavid S. Miller {
124668d0c6d3SDavid S. Miller 	struct dst_entry *dst = NULL;
124768d0c6d3SDavid S. Miller 	int err;
124868d0c6d3SDavid S. Miller 
1249c4e85f73SSabrina Dubroca 	err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
125068d0c6d3SDavid S. Miller 	if (err)
125168d0c6d3SDavid S. Miller 		return ERR_PTR(err);
125268d0c6d3SDavid S. Miller 	if (final_dst)
12534e3fd7a0SAlexey Dobriyan 		fl6->daddr = *final_dst;
12542774c131SDavid S. Miller 
1255c4e85f73SSabrina Dubroca 	return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
125668d0c6d3SDavid S. Miller }
125768d0c6d3SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
125868d0c6d3SDavid S. Miller 
125968d0c6d3SDavid S. Miller /**
126068d0c6d3SDavid S. Miller  *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
126168d0c6d3SDavid S. Miller  *	@sk: socket which provides the dst cache and route info
12624c9483b2SDavid S. Miller  *	@fl6: flow to lookup
126368d0c6d3SDavid S. Miller  *	@final_dst: final destination address for ipsec lookup
126496818159SAlexey Kodanev  *	@connected: whether @sk is connected or not
1265497c615aSHerbert Xu  *
1266497c615aSHerbert Xu  *	This function performs a route lookup on the given flow with the
1267497c615aSHerbert Xu  *	possibility of using the cached route in the socket if it is valid.
1268497c615aSHerbert Xu  *	It will take the socket dst lock when operating on the dst cache.
1269497c615aSHerbert Xu  *	As a result, this function can only be used in process context.
1270497c615aSHerbert Xu  *
127196818159SAlexey Kodanev  *	In addition, for a connected socket, cache the dst in the socket
127296818159SAlexey Kodanev  *	if the current cache is not valid.
127396818159SAlexey Kodanev  *
127468d0c6d3SDavid S. Miller  *	It returns a valid dst pointer on success, or a pointer encoded
127568d0c6d3SDavid S. Miller  *	error code.
1276497c615aSHerbert Xu  */
12774c9483b2SDavid S. Miller struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
127896818159SAlexey Kodanev 					 const struct in6_addr *final_dst,
127996818159SAlexey Kodanev 					 bool connected)
1280497c615aSHerbert Xu {
128168d0c6d3SDavid S. Miller 	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1282497c615aSHerbert Xu 
12834c9483b2SDavid S. Miller 	dst = ip6_sk_dst_check(sk, dst, fl6);
128496818159SAlexey Kodanev 	if (dst)
128596818159SAlexey Kodanev 		return dst;
128696818159SAlexey Kodanev 
1287c4e85f73SSabrina Dubroca 	dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
128896818159SAlexey Kodanev 	if (connected && !IS_ERR(dst))
128996818159SAlexey Kodanev 		ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
129068d0c6d3SDavid S. Miller 
129100bc0ef5SJakub Sitnicki 	return dst;
129268d0c6d3SDavid S. Miller }
129368d0c6d3SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1294497c615aSHerbert Xu 
1295571912c6SMartin Varghese /**
1296571912c6SMartin Varghese  *      ip6_dst_lookup_tunnel - perform route lookup on tunnel
1297571912c6SMartin Varghese  *      @skb: Packet for which lookup is done
1298571912c6SMartin Varghese  *      @dev: Tunnel device
1299571912c6SMartin Varghese  *      @net: Network namespace of tunnel device
1300b51cd7c8SAndrew Lunn  *      @sock: Socket which provides route info
1301571912c6SMartin Varghese  *      @saddr: Memory to store the src ip address
1302571912c6SMartin Varghese  *      @info: Tunnel information
1303571912c6SMartin Varghese  *      @protocol: IP protocol
1304b51cd7c8SAndrew Lunn  *      @use_cache: Flag to enable cache usage
1305571912c6SMartin Varghese  *      This function performs a route lookup on a tunnel
1306571912c6SMartin Varghese  *
1307571912c6SMartin Varghese  *      It returns a valid dst pointer and stores src address to be used in
1308571912c6SMartin Varghese  *      tunnel in param saddr on success, else a pointer encoded error code.
1309571912c6SMartin Varghese  */
1310571912c6SMartin Varghese 
1311571912c6SMartin Varghese struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1312571912c6SMartin Varghese 					struct net_device *dev,
1313571912c6SMartin Varghese 					struct net *net,
1314571912c6SMartin Varghese 					struct socket *sock,
1315571912c6SMartin Varghese 					struct in6_addr *saddr,
1316571912c6SMartin Varghese 					const struct ip_tunnel_info *info,
1317571912c6SMartin Varghese 					u8 protocol,
1318571912c6SMartin Varghese 					bool use_cache)
1319571912c6SMartin Varghese {
1320571912c6SMartin Varghese 	struct dst_entry *dst = NULL;
1321571912c6SMartin Varghese #ifdef CONFIG_DST_CACHE
1322571912c6SMartin Varghese 	struct dst_cache *dst_cache;
1323571912c6SMartin Varghese #endif
1324571912c6SMartin Varghese 	struct flowi6 fl6;
1325571912c6SMartin Varghese 	__u8 prio;
1326571912c6SMartin Varghese 
1327571912c6SMartin Varghese #ifdef CONFIG_DST_CACHE
1328571912c6SMartin Varghese 	dst_cache = (struct dst_cache *)&info->dst_cache;
1329571912c6SMartin Varghese 	if (use_cache) {
1330571912c6SMartin Varghese 		dst = dst_cache_get_ip6(dst_cache, saddr);
1331571912c6SMartin Varghese 		if (dst)
1332571912c6SMartin Varghese 			return dst;
1333571912c6SMartin Varghese 	}
1334571912c6SMartin Varghese #endif
1335571912c6SMartin Varghese 	memset(&fl6, 0, sizeof(fl6));
1336571912c6SMartin Varghese 	fl6.flowi6_mark = skb->mark;
1337571912c6SMartin Varghese 	fl6.flowi6_proto = protocol;
1338571912c6SMartin Varghese 	fl6.daddr = info->key.u.ipv6.dst;
1339571912c6SMartin Varghese 	fl6.saddr = info->key.u.ipv6.src;
1340571912c6SMartin Varghese 	prio = info->key.tos;
1341ab7e2e0dSMatthias May 	fl6.flowlabel = ip6_make_flowinfo(prio, info->key.label);
1342571912c6SMartin Varghese 
1343571912c6SMartin Varghese 	dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1344571912c6SMartin Varghese 					      NULL);
1345571912c6SMartin Varghese 	if (IS_ERR(dst)) {
1346571912c6SMartin Varghese 		netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1347571912c6SMartin Varghese 		return ERR_PTR(-ENETUNREACH);
1348571912c6SMartin Varghese 	}
1349571912c6SMartin Varghese 	if (dst->dev == dev) { /* is this necessary? */
1350571912c6SMartin Varghese 		netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1351571912c6SMartin Varghese 		dst_release(dst);
1352571912c6SMartin Varghese 		return ERR_PTR(-ELOOP);
1353571912c6SMartin Varghese 	}
1354571912c6SMartin Varghese #ifdef CONFIG_DST_CACHE
1355571912c6SMartin Varghese 	if (use_cache)
1356571912c6SMartin Varghese 		dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1357571912c6SMartin Varghese #endif
1358571912c6SMartin Varghese 	*saddr = fl6.saddr;
1359571912c6SMartin Varghese 	return dst;
1360571912c6SMartin Varghese }
1361571912c6SMartin Varghese EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1362571912c6SMartin Varghese 
13630178b695SHerbert Xu static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
13640178b695SHerbert Xu 					       gfp_t gfp)
13650178b695SHerbert Xu {
13660178b695SHerbert Xu 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
13670178b695SHerbert Xu }
13680178b695SHerbert Xu 
13690178b695SHerbert Xu static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
13700178b695SHerbert Xu 						gfp_t gfp)
13710178b695SHerbert Xu {
13720178b695SHerbert Xu 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
13730178b695SHerbert Xu }
13740178b695SHerbert Xu 
137575a493e6SHannes Frederic Sowa static void ip6_append_data_mtu(unsigned int *mtu,
13760c183379SGao feng 				int *maxfraglen,
13770c183379SGao feng 				unsigned int fragheaderlen,
13780c183379SGao feng 				struct sk_buff *skb,
137975a493e6SHannes Frederic Sowa 				struct rt6_info *rt,
1380e367c2d0Slucien 				unsigned int orig_mtu)
13810c183379SGao feng {
13820c183379SGao feng 	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
138363159f29SIan Morris 		if (!skb) {
13840c183379SGao feng 			/* first fragment, reserve header_len */
1385e367c2d0Slucien 			*mtu = orig_mtu - rt->dst.header_len;
13860c183379SGao feng 
13870c183379SGao feng 		} else {
13880c183379SGao feng 			/*
13890c183379SGao feng 			 * this fragment is not first, the headers
13900c183379SGao feng 			 * space is regarded as data space.
13910c183379SGao feng 			 */
1392e367c2d0Slucien 			*mtu = orig_mtu;
13930c183379SGao feng 		}
13940c183379SGao feng 		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
13950c183379SGao feng 			      + fragheaderlen - sizeof(struct frag_hdr);
13960c183379SGao feng 	}
13970c183379SGao feng }
13980c183379SGao feng 
1399366e41d9SVlad Yasevich static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
140026879da5SWei Wang 			  struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1401f37a4cc6SPavel Begunkov 			  struct rt6_info *rt)
1402366e41d9SVlad Yasevich {
1403366e41d9SVlad Yasevich 	struct ipv6_pinfo *np = inet6_sk(sk);
1404366e41d9SVlad Yasevich 	unsigned int mtu;
1405d656b2eaSPavel Begunkov 	struct ipv6_txoptions *nopt, *opt = ipc6->opt;
1406366e41d9SVlad Yasevich 
140740ac240cSPavel Begunkov 	/* callers pass dst together with a reference, set it first so
140840ac240cSPavel Begunkov 	 * ip6_cork_release() can put it down even in case of an error.
140940ac240cSPavel Begunkov 	 */
141040ac240cSPavel Begunkov 	cork->base.dst = &rt->dst;
141140ac240cSPavel Begunkov 
1412366e41d9SVlad Yasevich 	/*
1413366e41d9SVlad Yasevich 	 * setup for corking
1414366e41d9SVlad Yasevich 	 */
1415366e41d9SVlad Yasevich 	if (opt) {
1416366e41d9SVlad Yasevich 		if (WARN_ON(v6_cork->opt))
1417366e41d9SVlad Yasevich 			return -EINVAL;
1418366e41d9SVlad Yasevich 
1419d656b2eaSPavel Begunkov 		nopt = v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1420d656b2eaSPavel Begunkov 		if (unlikely(!nopt))
1421366e41d9SVlad Yasevich 			return -ENOBUFS;
1422366e41d9SVlad Yasevich 
1423d656b2eaSPavel Begunkov 		nopt->tot_len = sizeof(*opt);
1424d656b2eaSPavel Begunkov 		nopt->opt_flen = opt->opt_flen;
1425d656b2eaSPavel Begunkov 		nopt->opt_nflen = opt->opt_nflen;
1426366e41d9SVlad Yasevich 
1427d656b2eaSPavel Begunkov 		nopt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation);
1428d656b2eaSPavel Begunkov 		if (opt->dst0opt && !nopt->dst0opt)
1429366e41d9SVlad Yasevich 			return -ENOBUFS;
1430366e41d9SVlad Yasevich 
1431d656b2eaSPavel Begunkov 		nopt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation);
1432d656b2eaSPavel Begunkov 		if (opt->dst1opt && !nopt->dst1opt)
1433366e41d9SVlad Yasevich 			return -ENOBUFS;
1434366e41d9SVlad Yasevich 
1435d656b2eaSPavel Begunkov 		nopt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation);
1436d656b2eaSPavel Begunkov 		if (opt->hopopt && !nopt->hopopt)
1437366e41d9SVlad Yasevich 			return -ENOBUFS;
1438366e41d9SVlad Yasevich 
1439d656b2eaSPavel Begunkov 		nopt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation);
1440d656b2eaSPavel Begunkov 		if (opt->srcrt && !nopt->srcrt)
1441366e41d9SVlad Yasevich 			return -ENOBUFS;
1442366e41d9SVlad Yasevich 
1443366e41d9SVlad Yasevich 		/* need source address above miyazawa*/
1444366e41d9SVlad Yasevich 	}
144526879da5SWei Wang 	v6_cork->hop_limit = ipc6->hlimit;
144626879da5SWei Wang 	v6_cork->tclass = ipc6->tclass;
1447366e41d9SVlad Yasevich 	if (rt->dst.flags & DST_XFRM_TUNNEL)
1448366e41d9SVlad Yasevich 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1449749439bfSMike Maloney 		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1450366e41d9SVlad Yasevich 	else
1451366e41d9SVlad Yasevich 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1452c02b3741SDavid S. Miller 			READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1453366e41d9SVlad Yasevich 	if (np->frag_size < mtu) {
1454366e41d9SVlad Yasevich 		if (np->frag_size)
1455366e41d9SVlad Yasevich 			mtu = np->frag_size;
1456366e41d9SVlad Yasevich 	}
1457366e41d9SVlad Yasevich 	cork->base.fragsize = mtu;
1458fbf47813SWillem de Bruijn 	cork->base.gso_size = ipc6->gso_size;
1459678ca42dSWillem de Bruijn 	cork->base.tx_flags = 0;
1460c6af0c22SWillem de Bruijn 	cork->base.mark = ipc6->sockc.mark;
1461678ca42dSWillem de Bruijn 	sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1462bec1f6f6SWillem de Bruijn 
14630f6c480fSDavid Miller 	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1464366e41d9SVlad Yasevich 		cork->base.flags |= IPCORK_ALLFRAG;
1465366e41d9SVlad Yasevich 	cork->base.length = 0;
1466366e41d9SVlad Yasevich 
14675fdaa88dSWillem de Bruijn 	cork->base.transmit_time = ipc6->sockc.transmit_time;
1468a818f75eSJesus Sanchez-Palencia 
1469366e41d9SVlad Yasevich 	return 0;
1470366e41d9SVlad Yasevich }
1471366e41d9SVlad Yasevich 
14720bbe84a6SVlad Yasevich static int __ip6_append_data(struct sock *sk,
14730bbe84a6SVlad Yasevich 			     struct sk_buff_head *queue,
1474f3b46a3eSPavel Begunkov 			     struct inet_cork_full *cork_full,
14750bbe84a6SVlad Yasevich 			     struct inet6_cork *v6_cork,
14760bbe84a6SVlad Yasevich 			     struct page_frag *pfrag,
14770bbe84a6SVlad Yasevich 			     int getfrag(void *from, char *to, int offset,
14780bbe84a6SVlad Yasevich 					 int len, int odd, struct sk_buff *skb),
1479f93431c8SWang Yufen 			     void *from, size_t length, int transhdrlen,
14805fdaa88dSWillem de Bruijn 			     unsigned int flags, struct ipcm6_cookie *ipc6)
14811da177e4SLinus Torvalds {
14820c183379SGao feng 	struct sk_buff *skb, *skb_prev = NULL;
1483f3b46a3eSPavel Begunkov 	struct inet_cork *cork = &cork_full->base;
1484f37a4cc6SPavel Begunkov 	struct flowi6 *fl6 = &cork_full->fl.u.ip6;
148510b8a3deSPaolo Abeni 	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1486b5947e5dSWillem de Bruijn 	struct ubuf_info *uarg = NULL;
14870bbe84a6SVlad Yasevich 	int exthdrlen = 0;
14880bbe84a6SVlad Yasevich 	int dst_exthdrlen = 0;
14891da177e4SLinus Torvalds 	int hh_len;
14901da177e4SLinus Torvalds 	int copy;
14911da177e4SLinus Torvalds 	int err;
14921da177e4SLinus Torvalds 	int offset = 0;
1493773ba4feSPavel Begunkov 	bool zc = false;
149409c2d251SWillem de Bruijn 	u32 tskey = 0;
14950bbe84a6SVlad Yasevich 	struct rt6_info *rt = (struct rt6_info *)cork->dst;
14960bbe84a6SVlad Yasevich 	struct ipv6_txoptions *opt = v6_cork->opt;
149732dce968SVlad Yasevich 	int csummode = CHECKSUM_NONE;
1498682b1a9dSHannes Frederic Sowa 	unsigned int maxnonfragsize, headersize;
14991f4c6eb2SEric Dumazet 	unsigned int wmem_alloc_delta = 0;
1500100f6d8eSWillem de Bruijn 	bool paged, extra_uref = false;
15011da177e4SLinus Torvalds 
15020bbe84a6SVlad Yasevich 	skb = skb_peek_tail(queue);
15030bbe84a6SVlad Yasevich 	if (!skb) {
15040bbe84a6SVlad Yasevich 		exthdrlen = opt ? opt->opt_flen : 0;
15057efdba5bSRomain KUNTZ 		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
15061da177e4SLinus Torvalds 	}
15070bbe84a6SVlad Yasevich 
150815e36f5bSWillem de Bruijn 	paged = !!cork->gso_size;
1509bec1f6f6SWillem de Bruijn 	mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1510e367c2d0Slucien 	orig_mtu = mtu;
15111da177e4SLinus Torvalds 
15128ca5a579SVadim Fedorenko 	if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
1513e3390b30SEric Dumazet 	    READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
1514a1cdec57SEric Dumazet 		tskey = atomic_inc_return(&sk->sk_tskey) - 1;
1515678ca42dSWillem de Bruijn 
1516d8d1f30bSChangli Gao 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
15171da177e4SLinus Torvalds 
1518a1b05140SMasahide NAKAMURA 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1519b4ce9277SHerbert Xu 			(opt ? opt->opt_nflen : 0);
15201da177e4SLinus Torvalds 
15214df98e76SHannes Frederic Sowa 	headersize = sizeof(struct ipv6hdr) +
15223a1cebe7SHannes Frederic Sowa 		     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
15234df98e76SHannes Frederic Sowa 		     (dst_allfrag(&rt->dst) ?
15244df98e76SHannes Frederic Sowa 		      sizeof(struct frag_hdr) : 0) +
15254df98e76SHannes Frederic Sowa 		     rt->rt6i_nfheader_len;
15264df98e76SHannes Frederic Sowa 
15275e34af41STadeusz Struk 	if (mtu <= fragheaderlen ||
15285e34af41STadeusz Struk 	    ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
15296596a022SJiri Bohac 		goto emsgsize;
15306596a022SJiri Bohac 
15316596a022SJiri Bohac 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
15326596a022SJiri Bohac 		     sizeof(struct frag_hdr);
15336596a022SJiri Bohac 
153410b8a3deSPaolo Abeni 	/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
153510b8a3deSPaolo Abeni 	 * the first fragment
153610b8a3deSPaolo Abeni 	 */
153710b8a3deSPaolo Abeni 	if (headersize + transhdrlen > mtu)
153810b8a3deSPaolo Abeni 		goto emsgsize;
153910b8a3deSPaolo Abeni 
154026879da5SWei Wang 	if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
15414df98e76SHannes Frederic Sowa 	    (sk->sk_protocol == IPPROTO_UDP ||
154213651224SJakub Kicinski 	     sk->sk_protocol == IPPROTO_ICMPV6 ||
15434df98e76SHannes Frederic Sowa 	     sk->sk_protocol == IPPROTO_RAW)) {
15444df98e76SHannes Frederic Sowa 		ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
15454df98e76SHannes Frederic Sowa 				sizeof(struct ipv6hdr));
15464df98e76SHannes Frederic Sowa 		goto emsgsize;
15474df98e76SHannes Frederic Sowa 	}
15484df98e76SHannes Frederic Sowa 
1549682b1a9dSHannes Frederic Sowa 	if (ip6_sk_ignore_df(sk))
1550682b1a9dSHannes Frederic Sowa 		maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1551682b1a9dSHannes Frederic Sowa 	else
1552682b1a9dSHannes Frederic Sowa 		maxnonfragsize = mtu;
1553682b1a9dSHannes Frederic Sowa 
15544df98e76SHannes Frederic Sowa 	if (cork->length + length > maxnonfragsize - headersize) {
15554df98e76SHannes Frederic Sowa emsgsize:
155610b8a3deSPaolo Abeni 		pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
155710b8a3deSPaolo Abeni 		ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
15581da177e4SLinus Torvalds 		return -EMSGSIZE;
15591da177e4SLinus Torvalds 	}
1560682b1a9dSHannes Frederic Sowa 
1561682b1a9dSHannes Frederic Sowa 	/* CHECKSUM_PARTIAL only with no extension headers and when
1562682b1a9dSHannes Frederic Sowa 	 * we are not going to fragment
1563682b1a9dSHannes Frederic Sowa 	 */
1564682b1a9dSHannes Frederic Sowa 	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1565682b1a9dSHannes Frederic Sowa 	    headersize == sizeof(struct ipv6hdr) &&
15662b89ed65SVlad Yasevich 	    length <= mtu - headersize &&
1567bec1f6f6SWillem de Bruijn 	    (!(flags & MSG_MORE) || cork->gso_size) &&
1568c8cd0989STom Herbert 	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1569682b1a9dSHannes Frederic Sowa 		csummode = CHECKSUM_PARTIAL;
15701da177e4SLinus Torvalds 
15711fd3ae8cSPavel Begunkov 	if ((flags & MSG_ZEROCOPY) && length) {
15721fd3ae8cSPavel Begunkov 		struct msghdr *msg = from;
15731fd3ae8cSPavel Begunkov 
15741fd3ae8cSPavel Begunkov 		if (getfrag == ip_generic_getfrag && msg->msg_ubuf) {
15751fd3ae8cSPavel Begunkov 			if (skb_zcopy(skb) && msg->msg_ubuf != skb_zcopy(skb))
15761fd3ae8cSPavel Begunkov 				return -EINVAL;
15771fd3ae8cSPavel Begunkov 
15781fd3ae8cSPavel Begunkov 			/* Leave uarg NULL if can't zerocopy, callers should
15791fd3ae8cSPavel Begunkov 			 * be able to handle it.
15801fd3ae8cSPavel Begunkov 			 */
15811fd3ae8cSPavel Begunkov 			if ((rt->dst.dev->features & NETIF_F_SG) &&
15821fd3ae8cSPavel Begunkov 			    csummode == CHECKSUM_PARTIAL) {
15831fd3ae8cSPavel Begunkov 				paged = true;
15841fd3ae8cSPavel Begunkov 				zc = true;
15851fd3ae8cSPavel Begunkov 				uarg = msg->msg_ubuf;
15861fd3ae8cSPavel Begunkov 			}
15871fd3ae8cSPavel Begunkov 		} else if (sock_flag(sk, SOCK_ZEROCOPY)) {
15888c793822SJonathan Lemon 			uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
1589b5947e5dSWillem de Bruijn 			if (!uarg)
1590b5947e5dSWillem de Bruijn 				return -ENOBUFS;
1591522924b5SWillem de Bruijn 			extra_uref = !skb_zcopy(skb);	/* only ref on new uarg */
1592b5947e5dSWillem de Bruijn 			if (rt->dst.dev->features & NETIF_F_SG &&
1593b5947e5dSWillem de Bruijn 			    csummode == CHECKSUM_PARTIAL) {
1594b5947e5dSWillem de Bruijn 				paged = true;
1595773ba4feSPavel Begunkov 				zc = true;
1596b5947e5dSWillem de Bruijn 			} else {
1597e7d2b510SPavel Begunkov 				uarg_to_msgzc(uarg)->zerocopy = 0;
159852900d22SWillem de Bruijn 				skb_zcopy_set(skb, uarg, &extra_uref);
1599b5947e5dSWillem de Bruijn 			}
1600b5947e5dSWillem de Bruijn 		}
16016d8192bdSDavid Howells 	} else if ((flags & MSG_SPLICE_PAGES) && length) {
1602cafbe182SEric Dumazet 		if (inet_test_bit(HDRINCL, sk))
16036d8192bdSDavid Howells 			return -EPERM;
16045a6f6873SDavid Howells 		if (rt->dst.dev->features & NETIF_F_SG &&
16055a6f6873SDavid Howells 		    getfrag == ip_generic_getfrag)
16066d8192bdSDavid Howells 			/* We need an empty buffer to attach stuff to */
16076d8192bdSDavid Howells 			paged = true;
16086d8192bdSDavid Howells 		else
16096d8192bdSDavid Howells 			flags &= ~MSG_SPLICE_PAGES;
16101fd3ae8cSPavel Begunkov 	}
1611b5947e5dSWillem de Bruijn 
16121da177e4SLinus Torvalds 	/*
16131da177e4SLinus Torvalds 	 * Let's try using as much space as possible.
16141da177e4SLinus Torvalds 	 * Use MTU if total length of the message fits into the MTU.
16151da177e4SLinus Torvalds 	 * Otherwise, we need to reserve fragment header and
16161da177e4SLinus Torvalds 	 * fragment alignment (= 8-15 octects, in total).
16171da177e4SLinus Torvalds 	 *
1618634a63e7SRandy Dunlap 	 * Note that we may need to "move" the data from the tail
16191da177e4SLinus Torvalds 	 * of the buffer to the new fragment when we split
16201da177e4SLinus Torvalds 	 * the message.
16211da177e4SLinus Torvalds 	 *
16221da177e4SLinus Torvalds 	 * FIXME: It may be fragmented into multiple chunks
16231da177e4SLinus Torvalds 	 *        at once if non-fragmentable extension headers
16241da177e4SLinus Torvalds 	 *        are too large.
16251da177e4SLinus Torvalds 	 * --yoshfuji
16261da177e4SLinus Torvalds 	 */
16271da177e4SLinus Torvalds 
16282811ebacSHannes Frederic Sowa 	cork->length += length;
16292811ebacSHannes Frederic Sowa 	if (!skb)
16301da177e4SLinus Torvalds 		goto alloc_new_skb;
16311da177e4SLinus Torvalds 
16321da177e4SLinus Torvalds 	while (length > 0) {
16331da177e4SLinus Torvalds 		/* Check if the remaining data fits into current packet. */
1634bdc712b4SDavid S. Miller 		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
16351da177e4SLinus Torvalds 		if (copy < length)
16361da177e4SLinus Torvalds 			copy = maxfraglen - skb->len;
16371da177e4SLinus Torvalds 
16381da177e4SLinus Torvalds 		if (copy <= 0) {
16391da177e4SLinus Torvalds 			char *data;
16401da177e4SLinus Torvalds 			unsigned int datalen;
16411da177e4SLinus Torvalds 			unsigned int fraglen;
16421da177e4SLinus Torvalds 			unsigned int fraggap;
16436d123b81SJakub Kicinski 			unsigned int alloclen, alloc_extra;
1644aba36930SWillem de Bruijn 			unsigned int pagedlen;
16451da177e4SLinus Torvalds alloc_new_skb:
16461da177e4SLinus Torvalds 			/* There's no room in the current skb */
16470c183379SGao feng 			if (skb)
16480c183379SGao feng 				fraggap = skb->len - maxfraglen;
16491da177e4SLinus Torvalds 			else
16501da177e4SLinus Torvalds 				fraggap = 0;
16510c183379SGao feng 			/* update mtu and maxfraglen if necessary */
165263159f29SIan Morris 			if (!skb || !skb_prev)
16530c183379SGao feng 				ip6_append_data_mtu(&mtu, &maxfraglen,
165475a493e6SHannes Frederic Sowa 						    fragheaderlen, skb, rt,
1655e367c2d0Slucien 						    orig_mtu);
16560c183379SGao feng 
16570c183379SGao feng 			skb_prev = skb;
16581da177e4SLinus Torvalds 
16591da177e4SLinus Torvalds 			/*
16601da177e4SLinus Torvalds 			 * If remaining data exceeds the mtu,
16611da177e4SLinus Torvalds 			 * we know we need more fragment(s).
16621da177e4SLinus Torvalds 			 */
16631da177e4SLinus Torvalds 			datalen = length + fraggap;
16641da177e4SLinus Torvalds 
16650c183379SGao feng 			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
16660c183379SGao feng 				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
166715e36f5bSWillem de Bruijn 			fraglen = datalen + fragheaderlen;
1668aba36930SWillem de Bruijn 			pagedlen = 0;
166915e36f5bSWillem de Bruijn 
16706d123b81SJakub Kicinski 			alloc_extra = hh_len;
16716d123b81SJakub Kicinski 			alloc_extra += dst_exthdrlen;
16726d123b81SJakub Kicinski 			alloc_extra += rt->dst.trailer_len;
16736d123b81SJakub Kicinski 
16746d123b81SJakub Kicinski 			/* We just reserve space for fragment header.
16756d123b81SJakub Kicinski 			 * Note: this may be overallocation if the message
16766d123b81SJakub Kicinski 			 * (without MSG_MORE) fits into the MTU.
16776d123b81SJakub Kicinski 			 */
16786d123b81SJakub Kicinski 			alloc_extra += sizeof(struct frag_hdr);
16796d123b81SJakub Kicinski 
16801da177e4SLinus Torvalds 			if ((flags & MSG_MORE) &&
1681d8d1f30bSChangli Gao 			    !(rt->dst.dev->features&NETIF_F_SG))
16821da177e4SLinus Torvalds 				alloclen = mtu;
16836d123b81SJakub Kicinski 			else if (!paged &&
16846d123b81SJakub Kicinski 				 (fraglen + alloc_extra < SKB_MAX_ALLOC ||
16856d123b81SJakub Kicinski 				  !(rt->dst.dev->features & NETIF_F_SG)))
168615e36f5bSWillem de Bruijn 				alloclen = fraglen;
168747cf8899SPavel Begunkov 			else {
1688773ba4feSPavel Begunkov 				alloclen = fragheaderlen + transhdrlen;
1689773ba4feSPavel Begunkov 				pagedlen = datalen - transhdrlen;
169015e36f5bSWillem de Bruijn 			}
16916d123b81SJakub Kicinski 			alloclen += alloc_extra;
1692299b0767SSteffen Klassert 
16930c183379SGao feng 			if (datalen != length + fraggap) {
16941da177e4SLinus Torvalds 				/*
16950c183379SGao feng 				 * this is not the last fragment, the trailer
16960c183379SGao feng 				 * space is regarded as data space.
16971da177e4SLinus Torvalds 				 */
16980c183379SGao feng 				datalen += rt->dst.trailer_len;
16990c183379SGao feng 			}
17000c183379SGao feng 
17010c183379SGao feng 			fraglen = datalen + fragheaderlen;
17021da177e4SLinus Torvalds 
170315e36f5bSWillem de Bruijn 			copy = datalen - transhdrlen - fraggap - pagedlen;
1704ce650a16SDavid Howells 			/* [!] NOTE: copy may be negative if pagedlen>0
1705ce650a16SDavid Howells 			 * because then the equation may reduces to -fraggap.
1706ce650a16SDavid Howells 			 */
1707ce650a16SDavid Howells 			if (copy < 0 && !(flags & MSG_SPLICE_PAGES)) {
1708232cd35dSEric Dumazet 				err = -EINVAL;
1709232cd35dSEric Dumazet 				goto error;
1710232cd35dSEric Dumazet 			}
17111da177e4SLinus Torvalds 			if (transhdrlen) {
17126d123b81SJakub Kicinski 				skb = sock_alloc_send_skb(sk, alloclen,
17131da177e4SLinus Torvalds 						(flags & MSG_DONTWAIT), &err);
17141da177e4SLinus Torvalds 			} else {
17151da177e4SLinus Torvalds 				skb = NULL;
17161f4c6eb2SEric Dumazet 				if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
17171da177e4SLinus Torvalds 				    2 * sk->sk_sndbuf)
17186d123b81SJakub Kicinski 					skb = alloc_skb(alloclen,
17191da177e4SLinus Torvalds 							sk->sk_allocation);
172063159f29SIan Morris 				if (unlikely(!skb))
17211da177e4SLinus Torvalds 					err = -ENOBUFS;
17221da177e4SLinus Torvalds 			}
172363159f29SIan Morris 			if (!skb)
17241da177e4SLinus Torvalds 				goto error;
17251da177e4SLinus Torvalds 			/*
17261da177e4SLinus Torvalds 			 *	Fill in the control structures
17271da177e4SLinus Torvalds 			 */
17289c9c9ad5SHannes Frederic Sowa 			skb->protocol = htons(ETH_P_IPV6);
172932dce968SVlad Yasevich 			skb->ip_summed = csummode;
17301da177e4SLinus Torvalds 			skb->csum = 0;
17311f85851eSGao feng 			/* reserve for fragmentation and ipsec header */
17321f85851eSGao feng 			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
17331f85851eSGao feng 				    dst_exthdrlen);
17341da177e4SLinus Torvalds 
17351da177e4SLinus Torvalds 			/*
17361da177e4SLinus Torvalds 			 *	Find where to start putting bytes
17371da177e4SLinus Torvalds 			 */
173815e36f5bSWillem de Bruijn 			data = skb_put(skb, fraglen - pagedlen);
17391f85851eSGao feng 			skb_set_network_header(skb, exthdrlen);
17401f85851eSGao feng 			data += fragheaderlen;
1741b0e380b1SArnaldo Carvalho de Melo 			skb->transport_header = (skb->network_header +
1742b0e380b1SArnaldo Carvalho de Melo 						 fragheaderlen);
17431da177e4SLinus Torvalds 			if (fraggap) {
17441da177e4SLinus Torvalds 				skb->csum = skb_copy_and_csum_bits(
17451da177e4SLinus Torvalds 					skb_prev, maxfraglen,
17468d5930dfSAl Viro 					data + transhdrlen, fraggap);
17471da177e4SLinus Torvalds 				skb_prev->csum = csum_sub(skb_prev->csum,
17481da177e4SLinus Torvalds 							  skb->csum);
17491da177e4SLinus Torvalds 				data += fraggap;
1750e9fa4f7bSHerbert Xu 				pskb_trim_unique(skb_prev, maxfraglen);
17511da177e4SLinus Torvalds 			}
1752232cd35dSEric Dumazet 			if (copy > 0 &&
1753232cd35dSEric Dumazet 			    getfrag(from, data + transhdrlen, offset,
1754232cd35dSEric Dumazet 				    copy, fraggap, skb) < 0) {
17551da177e4SLinus Torvalds 				err = -EFAULT;
17561da177e4SLinus Torvalds 				kfree_skb(skb);
17571da177e4SLinus Torvalds 				goto error;
1758ce650a16SDavid Howells 			} else if (flags & MSG_SPLICE_PAGES) {
1759ce650a16SDavid Howells 				copy = 0;
17601da177e4SLinus Torvalds 			}
17611da177e4SLinus Torvalds 
17621da177e4SLinus Torvalds 			offset += copy;
176315e36f5bSWillem de Bruijn 			length -= copy + transhdrlen;
17641da177e4SLinus Torvalds 			transhdrlen = 0;
17651da177e4SLinus Torvalds 			exthdrlen = 0;
1766299b0767SSteffen Klassert 			dst_exthdrlen = 0;
17671da177e4SLinus Torvalds 
176852900d22SWillem de Bruijn 			/* Only the initial fragment is time stamped */
176952900d22SWillem de Bruijn 			skb_shinfo(skb)->tx_flags = cork->tx_flags;
177052900d22SWillem de Bruijn 			cork->tx_flags = 0;
177152900d22SWillem de Bruijn 			skb_shinfo(skb)->tskey = tskey;
177252900d22SWillem de Bruijn 			tskey = 0;
177352900d22SWillem de Bruijn 			skb_zcopy_set(skb, uarg, &extra_uref);
177452900d22SWillem de Bruijn 
17750dec879fSJulian Anastasov 			if ((flags & MSG_CONFIRM) && !skb_prev)
17760dec879fSJulian Anastasov 				skb_set_dst_pending_confirm(skb, 1);
17770dec879fSJulian Anastasov 
17781da177e4SLinus Torvalds 			/*
17791da177e4SLinus Torvalds 			 * Put the packet on the pending queue
17801da177e4SLinus Torvalds 			 */
17811f4c6eb2SEric Dumazet 			if (!skb->destructor) {
17821f4c6eb2SEric Dumazet 				skb->destructor = sock_wfree;
17831f4c6eb2SEric Dumazet 				skb->sk = sk;
17841f4c6eb2SEric Dumazet 				wmem_alloc_delta += skb->truesize;
17851f4c6eb2SEric Dumazet 			}
17860bbe84a6SVlad Yasevich 			__skb_queue_tail(queue, skb);
17871da177e4SLinus Torvalds 			continue;
17881da177e4SLinus Torvalds 		}
17891da177e4SLinus Torvalds 
17901da177e4SLinus Torvalds 		if (copy > length)
17911da177e4SLinus Torvalds 			copy = length;
17921da177e4SLinus Torvalds 
1793113f99c3SWillem de Bruijn 		if (!(rt->dst.dev->features&NETIF_F_SG) &&
1794113f99c3SWillem de Bruijn 		    skb_tailroom(skb) >= copy) {
17951da177e4SLinus Torvalds 			unsigned int off;
17961da177e4SLinus Torvalds 
17971da177e4SLinus Torvalds 			off = skb->len;
17981da177e4SLinus Torvalds 			if (getfrag(from, skb_put(skb, copy),
17991da177e4SLinus Torvalds 						offset, copy, off, skb) < 0) {
18001da177e4SLinus Torvalds 				__skb_trim(skb, off);
18011da177e4SLinus Torvalds 				err = -EFAULT;
18021da177e4SLinus Torvalds 				goto error;
18031da177e4SLinus Torvalds 			}
18046d8192bdSDavid Howells 		} else if (flags & MSG_SPLICE_PAGES) {
18056d8192bdSDavid Howells 			struct msghdr *msg = from;
18066d8192bdSDavid Howells 
1807ce650a16SDavid Howells 			err = -EIO;
1808ce650a16SDavid Howells 			if (WARN_ON_ONCE(copy > msg->msg_iter.count))
1809ce650a16SDavid Howells 				goto error;
1810ce650a16SDavid Howells 
18116d8192bdSDavid Howells 			err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
18126d8192bdSDavid Howells 						   sk->sk_allocation);
18136d8192bdSDavid Howells 			if (err < 0)
18146d8192bdSDavid Howells 				goto error;
18156d8192bdSDavid Howells 			copy = err;
18166d8192bdSDavid Howells 			wmem_alloc_delta += copy;
18171fd3ae8cSPavel Begunkov 		} else if (!zc) {
18181da177e4SLinus Torvalds 			int i = skb_shinfo(skb)->nr_frags;
18191da177e4SLinus Torvalds 
18201da177e4SLinus Torvalds 			err = -ENOMEM;
18215640f768SEric Dumazet 			if (!sk_page_frag_refill(sk, pfrag))
18221da177e4SLinus Torvalds 				goto error;
18231da177e4SLinus Torvalds 
18241fd3ae8cSPavel Begunkov 			skb_zcopy_downgrade_managed(skb);
18255640f768SEric Dumazet 			if (!skb_can_coalesce(skb, i, pfrag->page,
18265640f768SEric Dumazet 					      pfrag->offset)) {
18271da177e4SLinus Torvalds 				err = -EMSGSIZE;
18285640f768SEric Dumazet 				if (i == MAX_SKB_FRAGS)
18291da177e4SLinus Torvalds 					goto error;
18305640f768SEric Dumazet 
18315640f768SEric Dumazet 				__skb_fill_page_desc(skb, i, pfrag->page,
18325640f768SEric Dumazet 						     pfrag->offset, 0);
18335640f768SEric Dumazet 				skb_shinfo(skb)->nr_frags = ++i;
18345640f768SEric Dumazet 				get_page(pfrag->page);
18351da177e4SLinus Torvalds 			}
18365640f768SEric Dumazet 			copy = min_t(int, copy, pfrag->size - pfrag->offset);
18379e903e08SEric Dumazet 			if (getfrag(from,
18385640f768SEric Dumazet 				    page_address(pfrag->page) + pfrag->offset,
18395640f768SEric Dumazet 				    offset, copy, skb->len, skb) < 0)
18405640f768SEric Dumazet 				goto error_efault;
18415640f768SEric Dumazet 
18425640f768SEric Dumazet 			pfrag->offset += copy;
18435640f768SEric Dumazet 			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
18441da177e4SLinus Torvalds 			skb->len += copy;
18451da177e4SLinus Torvalds 			skb->data_len += copy;
1846f945fa7aSHerbert Xu 			skb->truesize += copy;
18471f4c6eb2SEric Dumazet 			wmem_alloc_delta += copy;
1848b5947e5dSWillem de Bruijn 		} else {
1849b5947e5dSWillem de Bruijn 			err = skb_zerocopy_iter_dgram(skb, from, copy);
1850b5947e5dSWillem de Bruijn 			if (err < 0)
1851b5947e5dSWillem de Bruijn 				goto error;
18521da177e4SLinus Torvalds 		}
18531da177e4SLinus Torvalds 		offset += copy;
18541da177e4SLinus Torvalds 		length -= copy;
18551da177e4SLinus Torvalds 	}
18565640f768SEric Dumazet 
18579e8445a5SPaolo Abeni 	if (wmem_alloc_delta)
18581f4c6eb2SEric Dumazet 		refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
18591da177e4SLinus Torvalds 	return 0;
18605640f768SEric Dumazet 
18615640f768SEric Dumazet error_efault:
18625640f768SEric Dumazet 	err = -EFAULT;
18631da177e4SLinus Torvalds error:
18648e044917SJonathan Lemon 	net_zcopy_put_abort(uarg, extra_uref);
1865bdc712b4SDavid S. Miller 	cork->length -= length;
18663bd653c8SDenis V. Lunev 	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
18671f4c6eb2SEric Dumazet 	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
18681da177e4SLinus Torvalds 	return err;
18691da177e4SLinus Torvalds }
18700bbe84a6SVlad Yasevich 
18710bbe84a6SVlad Yasevich int ip6_append_data(struct sock *sk,
18720bbe84a6SVlad Yasevich 		    int getfrag(void *from, char *to, int offset, int len,
18730bbe84a6SVlad Yasevich 				int odd, struct sk_buff *skb),
1874f93431c8SWang Yufen 		    void *from, size_t length, int transhdrlen,
187526879da5SWei Wang 		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
18765fdaa88dSWillem de Bruijn 		    struct rt6_info *rt, unsigned int flags)
18770bbe84a6SVlad Yasevich {
18780bbe84a6SVlad Yasevich 	struct inet_sock *inet = inet_sk(sk);
18790bbe84a6SVlad Yasevich 	struct ipv6_pinfo *np = inet6_sk(sk);
18800bbe84a6SVlad Yasevich 	int exthdrlen;
18810bbe84a6SVlad Yasevich 	int err;
18820bbe84a6SVlad Yasevich 
18830bbe84a6SVlad Yasevich 	if (flags&MSG_PROBE)
18840bbe84a6SVlad Yasevich 		return 0;
18850bbe84a6SVlad Yasevich 	if (skb_queue_empty(&sk->sk_write_queue)) {
18860bbe84a6SVlad Yasevich 		/*
18870bbe84a6SVlad Yasevich 		 * setup for corking
18880bbe84a6SVlad Yasevich 		 */
188940ac240cSPavel Begunkov 		dst_hold(&rt->dst);
189026879da5SWei Wang 		err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1891f37a4cc6SPavel Begunkov 				     ipc6, rt);
18920bbe84a6SVlad Yasevich 		if (err)
18930bbe84a6SVlad Yasevich 			return err;
18940bbe84a6SVlad Yasevich 
1895f37a4cc6SPavel Begunkov 		inet->cork.fl.u.ip6 = *fl6;
189626879da5SWei Wang 		exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
18970bbe84a6SVlad Yasevich 		length += exthdrlen;
18980bbe84a6SVlad Yasevich 		transhdrlen += exthdrlen;
18990bbe84a6SVlad Yasevich 	} else {
19000bbe84a6SVlad Yasevich 		transhdrlen = 0;
19010bbe84a6SVlad Yasevich 	}
19020bbe84a6SVlad Yasevich 
1903f37a4cc6SPavel Begunkov 	return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork,
19040bbe84a6SVlad Yasevich 				 &np->cork, sk_page_frag(sk), getfrag,
19055fdaa88dSWillem de Bruijn 				 from, length, transhdrlen, flags, ipc6);
19060bbe84a6SVlad Yasevich }
1907a495f836SChris Elston EXPORT_SYMBOL_GPL(ip6_append_data);
19081da177e4SLinus Torvalds 
1909cd3c7480SPavel Begunkov static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork)
1910cd3c7480SPavel Begunkov {
1911cd3c7480SPavel Begunkov 	struct dst_entry *dst = cork->base.dst;
1912cd3c7480SPavel Begunkov 
1913cd3c7480SPavel Begunkov 	cork->base.dst = NULL;
1914cd3c7480SPavel Begunkov 	cork->base.flags &= ~IPCORK_ALLFRAG;
1915cd3c7480SPavel Begunkov 	skb_dst_set(skb, dst);
1916cd3c7480SPavel Begunkov }
1917cd3c7480SPavel Begunkov 
1918366e41d9SVlad Yasevich static void ip6_cork_release(struct inet_cork_full *cork,
1919366e41d9SVlad Yasevich 			     struct inet6_cork *v6_cork)
1920bf138862SPavel Emelyanov {
1921366e41d9SVlad Yasevich 	if (v6_cork->opt) {
1922d656b2eaSPavel Begunkov 		struct ipv6_txoptions *opt = v6_cork->opt;
1923d656b2eaSPavel Begunkov 
1924d656b2eaSPavel Begunkov 		kfree(opt->dst0opt);
1925d656b2eaSPavel Begunkov 		kfree(opt->dst1opt);
1926d656b2eaSPavel Begunkov 		kfree(opt->hopopt);
1927d656b2eaSPavel Begunkov 		kfree(opt->srcrt);
1928d656b2eaSPavel Begunkov 		kfree(opt);
1929366e41d9SVlad Yasevich 		v6_cork->opt = NULL;
19300178b695SHerbert Xu 	}
19310178b695SHerbert Xu 
1932366e41d9SVlad Yasevich 	if (cork->base.dst) {
1933366e41d9SVlad Yasevich 		dst_release(cork->base.dst);
1934366e41d9SVlad Yasevich 		cork->base.dst = NULL;
1935366e41d9SVlad Yasevich 		cork->base.flags &= ~IPCORK_ALLFRAG;
1936bf138862SPavel Emelyanov 	}
1937bf138862SPavel Emelyanov }
1938bf138862SPavel Emelyanov 
19396422398cSVlad Yasevich struct sk_buff *__ip6_make_skb(struct sock *sk,
19406422398cSVlad Yasevich 			       struct sk_buff_head *queue,
19416422398cSVlad Yasevich 			       struct inet_cork_full *cork,
19426422398cSVlad Yasevich 			       struct inet6_cork *v6_cork)
19431da177e4SLinus Torvalds {
19441da177e4SLinus Torvalds 	struct sk_buff *skb, *tmp_skb;
19451da177e4SLinus Torvalds 	struct sk_buff **tail_skb;
1946b60d4e58SPavel Begunkov 	struct in6_addr *final_dst;
19471da177e4SLinus Torvalds 	struct ipv6_pinfo *np = inet6_sk(sk);
19483bd653c8SDenis V. Lunev 	struct net *net = sock_net(sk);
19491da177e4SLinus Torvalds 	struct ipv6hdr *hdr;
19506422398cSVlad Yasevich 	struct ipv6_txoptions *opt = v6_cork->opt;
19516422398cSVlad Yasevich 	struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
19526422398cSVlad Yasevich 	struct flowi6 *fl6 = &cork->fl.u.ip6;
19534c9483b2SDavid S. Miller 	unsigned char proto = fl6->flowi6_proto;
19541da177e4SLinus Torvalds 
19556422398cSVlad Yasevich 	skb = __skb_dequeue(queue);
195663159f29SIan Morris 	if (!skb)
19571da177e4SLinus Torvalds 		goto out;
19581da177e4SLinus Torvalds 	tail_skb = &(skb_shinfo(skb)->frag_list);
19591da177e4SLinus Torvalds 
19601da177e4SLinus Torvalds 	/* move skb->data to ip header from ext header */
1961d56f90a7SArnaldo Carvalho de Melo 	if (skb->data < skb_network_header(skb))
1962bbe735e4SArnaldo Carvalho de Melo 		__skb_pull(skb, skb_network_offset(skb));
19636422398cSVlad Yasevich 	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1964cfe1fc77SArnaldo Carvalho de Melo 		__skb_pull(tmp_skb, skb_network_header_len(skb));
19651da177e4SLinus Torvalds 		*tail_skb = tmp_skb;
19661da177e4SLinus Torvalds 		tail_skb = &(tmp_skb->next);
19671da177e4SLinus Torvalds 		skb->len += tmp_skb->len;
19681da177e4SLinus Torvalds 		skb->data_len += tmp_skb->len;
19691da177e4SLinus Torvalds 		skb->truesize += tmp_skb->truesize;
19701da177e4SLinus Torvalds 		tmp_skb->destructor = NULL;
19711da177e4SLinus Torvalds 		tmp_skb->sk = NULL;
19721da177e4SLinus Torvalds 	}
19731da177e4SLinus Torvalds 
197428a89453SHerbert Xu 	/* Allow local fragmentation. */
197560ff7467SWANG Cong 	skb->ignore_df = ip6_sk_ignore_df(sk);
1976cfe1fc77SArnaldo Carvalho de Melo 	__skb_pull(skb, skb_network_header_len(skb));
1977b60d4e58SPavel Begunkov 
1978b60d4e58SPavel Begunkov 	final_dst = &fl6->daddr;
19791da177e4SLinus Torvalds 	if (opt && opt->opt_flen)
19801da177e4SLinus Torvalds 		ipv6_push_frag_opts(skb, opt, &proto);
19811da177e4SLinus Torvalds 	if (opt && opt->opt_nflen)
1982613fa3caSDavid Lebrun 		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
19831da177e4SLinus Torvalds 
1984e2d1bca7SArnaldo Carvalho de Melo 	skb_push(skb, sizeof(struct ipv6hdr));
1985e2d1bca7SArnaldo Carvalho de Melo 	skb_reset_network_header(skb);
19860660e03fSArnaldo Carvalho de Melo 	hdr = ipv6_hdr(skb);
19871da177e4SLinus Torvalds 
19886422398cSVlad Yasevich 	ip6_flow_hdr(hdr, v6_cork->tclass,
1989cb1ce2efSTom Herbert 		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1990513674b5SShaohua Li 					ip6_autoflowlabel(net, np), fl6));
19916422398cSVlad Yasevich 	hdr->hop_limit = v6_cork->hop_limit;
19921da177e4SLinus Torvalds 	hdr->nexthdr = proto;
19934e3fd7a0SAlexey Dobriyan 	hdr->saddr = fl6->saddr;
19944e3fd7a0SAlexey Dobriyan 	hdr->daddr = *final_dst;
19951da177e4SLinus Torvalds 
1996a2c2064fSPatrick McHardy 	skb->priority = sk->sk_priority;
1997c6af0c22SWillem de Bruijn 	skb->mark = cork->base.mark;
1998a818f75eSJesus Sanchez-Palencia 	skb->tstamp = cork->base.transmit_time;
1999a818f75eSJesus Sanchez-Palencia 
2000cd3c7480SPavel Begunkov 	ip6_cork_steal_dst(skb, cork);
200156712f74SHeng Guo 	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
200214878f75SDavid L Stevens 	if (proto == IPPROTO_ICMPV6) {
2003adf30907SEric Dumazet 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
2004ea30388bSZiyang Xuan 		u8 icmp6_type;
200514878f75SDavid L Stevens 
2006cafbe182SEric Dumazet 		if (sk->sk_socket->type == SOCK_RAW &&
200768c8ba16SShigeru Yoshida 		   !(fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH))
2008ea30388bSZiyang Xuan 			icmp6_type = fl6->fl6_icmp_type;
2009ea30388bSZiyang Xuan 		else
2010ea30388bSZiyang Xuan 			icmp6_type = icmp6_hdr(skb)->icmp6_type;
2011ea30388bSZiyang Xuan 		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type);
201243a43b60SHannes Frederic Sowa 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
201314878f75SDavid L Stevens 	}
201414878f75SDavid L Stevens 
20156422398cSVlad Yasevich 	ip6_cork_release(cork, v6_cork);
20166422398cSVlad Yasevich out:
20176422398cSVlad Yasevich 	return skb;
20186422398cSVlad Yasevich }
20196422398cSVlad Yasevich 
20206422398cSVlad Yasevich int ip6_send_skb(struct sk_buff *skb)
20216422398cSVlad Yasevich {
20226422398cSVlad Yasevich 	struct net *net = sock_net(skb->sk);
20236422398cSVlad Yasevich 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
20246422398cSVlad Yasevich 	int err;
20256422398cSVlad Yasevich 
202633224b16SEric W. Biederman 	err = ip6_local_out(net, skb->sk, skb);
20271da177e4SLinus Torvalds 	if (err) {
20281da177e4SLinus Torvalds 		if (err > 0)
20296ce9e7b5SEric Dumazet 			err = net_xmit_errno(err);
20301da177e4SLinus Torvalds 		if (err)
20316422398cSVlad Yasevich 			IP6_INC_STATS(net, rt->rt6i_idev,
20326422398cSVlad Yasevich 				      IPSTATS_MIB_OUTDISCARDS);
20331da177e4SLinus Torvalds 	}
20341da177e4SLinus Torvalds 
20351da177e4SLinus Torvalds 	return err;
20366422398cSVlad Yasevich }
20376422398cSVlad Yasevich 
20386422398cSVlad Yasevich int ip6_push_pending_frames(struct sock *sk)
20396422398cSVlad Yasevich {
20406422398cSVlad Yasevich 	struct sk_buff *skb;
20416422398cSVlad Yasevich 
20426422398cSVlad Yasevich 	skb = ip6_finish_skb(sk);
20436422398cSVlad Yasevich 	if (!skb)
20446422398cSVlad Yasevich 		return 0;
20456422398cSVlad Yasevich 
20466422398cSVlad Yasevich 	return ip6_send_skb(skb);
20471da177e4SLinus Torvalds }
2048a495f836SChris Elston EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
20491da177e4SLinus Torvalds 
20500bbe84a6SVlad Yasevich static void __ip6_flush_pending_frames(struct sock *sk,
20516422398cSVlad Yasevich 				       struct sk_buff_head *queue,
20526422398cSVlad Yasevich 				       struct inet_cork_full *cork,
20536422398cSVlad Yasevich 				       struct inet6_cork *v6_cork)
20541da177e4SLinus Torvalds {
20551da177e4SLinus Torvalds 	struct sk_buff *skb;
20561da177e4SLinus Torvalds 
20570bbe84a6SVlad Yasevich 	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
2058adf30907SEric Dumazet 		if (skb_dst(skb))
2059adf30907SEric Dumazet 			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
2060a11d206dSYOSHIFUJI Hideaki 				      IPSTATS_MIB_OUTDISCARDS);
20611da177e4SLinus Torvalds 		kfree_skb(skb);
20621da177e4SLinus Torvalds 	}
20631da177e4SLinus Torvalds 
20646422398cSVlad Yasevich 	ip6_cork_release(cork, v6_cork);
20651da177e4SLinus Torvalds }
20660bbe84a6SVlad Yasevich 
20670bbe84a6SVlad Yasevich void ip6_flush_pending_frames(struct sock *sk)
20680bbe84a6SVlad Yasevich {
20696422398cSVlad Yasevich 	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
20706422398cSVlad Yasevich 				   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
20710bbe84a6SVlad Yasevich }
2072a495f836SChris Elston EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
20736422398cSVlad Yasevich 
20746422398cSVlad Yasevich struct sk_buff *ip6_make_skb(struct sock *sk,
20756422398cSVlad Yasevich 			     int getfrag(void *from, char *to, int offset,
20766422398cSVlad Yasevich 					 int len, int odd, struct sk_buff *skb),
2077f93431c8SWang Yufen 			     void *from, size_t length, int transhdrlen,
2078f37a4cc6SPavel Begunkov 			     struct ipcm6_cookie *ipc6, struct rt6_info *rt,
2079f37a4cc6SPavel Begunkov 			     unsigned int flags, struct inet_cork_full *cork)
20806422398cSVlad Yasevich {
20816422398cSVlad Yasevich 	struct inet6_cork v6_cork;
20826422398cSVlad Yasevich 	struct sk_buff_head queue;
208326879da5SWei Wang 	int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
20846422398cSVlad Yasevich 	int err;
20856422398cSVlad Yasevich 
208640ac240cSPavel Begunkov 	if (flags & MSG_PROBE) {
208740ac240cSPavel Begunkov 		dst_release(&rt->dst);
20886422398cSVlad Yasevich 		return NULL;
208940ac240cSPavel Begunkov 	}
20906422398cSVlad Yasevich 
20916422398cSVlad Yasevich 	__skb_queue_head_init(&queue);
20926422398cSVlad Yasevich 
20931cd7884dSWillem de Bruijn 	cork->base.flags = 0;
20941cd7884dSWillem de Bruijn 	cork->base.addr = 0;
20951cd7884dSWillem de Bruijn 	cork->base.opt = NULL;
20966422398cSVlad Yasevich 	v6_cork.opt = NULL;
2097f37a4cc6SPavel Begunkov 	err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt);
2098862c03eeSEric Dumazet 	if (err) {
20991cd7884dSWillem de Bruijn 		ip6_cork_release(cork, &v6_cork);
21006422398cSVlad Yasevich 		return ERR_PTR(err);
2101862c03eeSEric Dumazet 	}
210226879da5SWei Wang 	if (ipc6->dontfrag < 0)
210326879da5SWei Wang 		ipc6->dontfrag = inet6_sk(sk)->dontfrag;
21046422398cSVlad Yasevich 
2105f37a4cc6SPavel Begunkov 	err = __ip6_append_data(sk, &queue, cork, &v6_cork,
21066422398cSVlad Yasevich 				&current->task_frag, getfrag, from,
21076422398cSVlad Yasevich 				length + exthdrlen, transhdrlen + exthdrlen,
21085fdaa88dSWillem de Bruijn 				flags, ipc6);
21096422398cSVlad Yasevich 	if (err) {
21101cd7884dSWillem de Bruijn 		__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
21116422398cSVlad Yasevich 		return ERR_PTR(err);
21126422398cSVlad Yasevich 	}
21136422398cSVlad Yasevich 
21141cd7884dSWillem de Bruijn 	return __ip6_make_skb(sk, &queue, cork, &v6_cork);
21156422398cSVlad Yasevich }
2116