xref: /openbmc/linux/net/ipv6/ip6_output.c (revision 4464005a12b5c79e1a364e6272ee10a83413f928)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	IPv6 output functions
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on linux/net/ipv4/ip_output.c
10  *
11  *	Changes:
12  *	A.N.Kuznetsov	:	airthmetics in fragmentation.
13  *				extension headers are implemented.
14  *				route changes now work.
15  *				ip6_forward does not confuse sniffers.
16  *				etc.
17  *
18  *      H. von Brand    :       Added missing #include <linux/string.h>
19  *	Imran Patel	:	frag id should be in NBO
20  *      Kazunori MIYAZAWA @USAGI
21  *			:       add ip6_append_data and related functions
22  *				for datagram xmit
23  */
24 
25 #include <linux/errno.h>
26 #include <linux/kernel.h>
27 #include <linux/string.h>
28 #include <linux/socket.h>
29 #include <linux/net.h>
30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h>
32 #include <linux/in6.h>
33 #include <linux/tcp.h>
34 #include <linux/route.h>
35 #include <linux/module.h>
36 #include <linux/slab.h>
37 
38 #include <linux/bpf-cgroup.h>
39 #include <linux/netfilter.h>
40 #include <linux/netfilter_ipv6.h>
41 
42 #include <net/sock.h>
43 #include <net/snmp.h>
44 
45 #include <net/ipv6.h>
46 #include <net/ndisc.h>
47 #include <net/protocol.h>
48 #include <net/ip6_route.h>
49 #include <net/addrconf.h>
50 #include <net/rawv6.h>
51 #include <net/icmp.h>
52 #include <net/xfrm.h>
53 #include <net/checksum.h>
54 #include <linux/mroute6.h>
55 #include <net/l3mdev.h>
56 #include <net/lwtunnel.h>
57 #include <net/ip_tunnels.h>
58 
59 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
60 {
61 	struct dst_entry *dst = skb_dst(skb);
62 	struct net_device *dev = dst->dev;
63 	const struct in6_addr *nexthop;
64 	struct neighbour *neigh;
65 	int ret;
66 
67 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
68 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
69 
70 		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
71 		    ((mroute6_is_socket(net, skb) &&
72 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
73 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
74 					 &ipv6_hdr(skb)->saddr))) {
75 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
76 
77 			/* Do not check for IFF_ALLMULTI; multicast routing
78 			   is not supported in any case.
79 			 */
80 			if (newskb)
81 				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
82 					net, sk, newskb, NULL, newskb->dev,
83 					dev_loopback_xmit);
84 
85 			if (ipv6_hdr(skb)->hop_limit == 0) {
86 				IP6_INC_STATS(net, idev,
87 					      IPSTATS_MIB_OUTDISCARDS);
88 				kfree_skb(skb);
89 				return 0;
90 			}
91 		}
92 
93 		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
94 
95 		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
96 		    IPV6_ADDR_SCOPE_NODELOCAL &&
97 		    !(dev->flags & IFF_LOOPBACK)) {
98 			kfree_skb(skb);
99 			return 0;
100 		}
101 	}
102 
103 	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
104 		int res = lwtunnel_xmit(skb);
105 
106 		if (res < 0 || res == LWTUNNEL_XMIT_DONE)
107 			return res;
108 	}
109 
110 	rcu_read_lock_bh();
111 	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
112 	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
113 	if (unlikely(!neigh))
114 		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
115 	if (!IS_ERR(neigh)) {
116 		sock_confirm_neigh(skb, neigh);
117 		ret = neigh_output(neigh, skb, false);
118 		rcu_read_unlock_bh();
119 		return ret;
120 	}
121 	rcu_read_unlock_bh();
122 
123 	IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
124 	kfree_skb(skb);
125 	return -EINVAL;
126 }
127 
128 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
129 {
130 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
131 	/* Policy lookup after SNAT yielded a new policy */
132 	if (skb_dst(skb)->xfrm) {
133 		IPCB(skb)->flags |= IPSKB_REROUTED;
134 		return dst_output(net, sk, skb);
135 	}
136 #endif
137 
138 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
139 	    dst_allfrag(skb_dst(skb)) ||
140 	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
141 		return ip6_fragment(net, sk, skb, ip6_finish_output2);
142 	else
143 		return ip6_finish_output2(net, sk, skb);
144 }
145 
146 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
147 {
148 	int ret;
149 
150 	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
151 	switch (ret) {
152 	case NET_XMIT_SUCCESS:
153 		return __ip6_finish_output(net, sk, skb);
154 	case NET_XMIT_CN:
155 		return __ip6_finish_output(net, sk, skb) ? : ret;
156 	default:
157 		kfree_skb(skb);
158 		return ret;
159 	}
160 }
161 
162 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
163 {
164 	struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
165 	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
166 
167 	skb->protocol = htons(ETH_P_IPV6);
168 	skb->dev = dev;
169 
170 	if (unlikely(idev->cnf.disable_ipv6)) {
171 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
172 		kfree_skb(skb);
173 		return 0;
174 	}
175 
176 	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
177 			    net, sk, skb, indev, dev,
178 			    ip6_finish_output,
179 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
180 }
181 
182 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
183 {
184 	if (!np->autoflowlabel_set)
185 		return ip6_default_np_autolabel(net);
186 	else
187 		return np->autoflowlabel;
188 }
189 
190 /*
191  * xmit an sk_buff (used by TCP, SCTP and DCCP)
192  * Note : socket lock is not held for SYNACK packets, but might be modified
193  * by calls to skb_set_owner_w() and ipv6_local_error(),
194  * which are using proper atomic operations or spinlocks.
195  */
196 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
197 	     __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
198 {
199 	struct net *net = sock_net(sk);
200 	const struct ipv6_pinfo *np = inet6_sk(sk);
201 	struct in6_addr *first_hop = &fl6->daddr;
202 	struct dst_entry *dst = skb_dst(skb);
203 	unsigned int head_room;
204 	struct ipv6hdr *hdr;
205 	u8  proto = fl6->flowi6_proto;
206 	int seg_len = skb->len;
207 	int hlimit = -1;
208 	u32 mtu;
209 
210 	head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
211 	if (opt)
212 		head_room += opt->opt_nflen + opt->opt_flen;
213 
214 	if (unlikely(skb_headroom(skb) < head_room)) {
215 		struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
216 		if (!skb2) {
217 			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
218 				      IPSTATS_MIB_OUTDISCARDS);
219 			kfree_skb(skb);
220 			return -ENOBUFS;
221 		}
222 		if (skb->sk)
223 			skb_set_owner_w(skb2, skb->sk);
224 		consume_skb(skb);
225 		skb = skb2;
226 	}
227 
228 	if (opt) {
229 		seg_len += opt->opt_nflen + opt->opt_flen;
230 
231 		if (opt->opt_flen)
232 			ipv6_push_frag_opts(skb, opt, &proto);
233 
234 		if (opt->opt_nflen)
235 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
236 					     &fl6->saddr);
237 	}
238 
239 	skb_push(skb, sizeof(struct ipv6hdr));
240 	skb_reset_network_header(skb);
241 	hdr = ipv6_hdr(skb);
242 
243 	/*
244 	 *	Fill in the IPv6 header
245 	 */
246 	if (np)
247 		hlimit = np->hop_limit;
248 	if (hlimit < 0)
249 		hlimit = ip6_dst_hoplimit(dst);
250 
251 	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
252 				ip6_autoflowlabel(net, np), fl6));
253 
254 	hdr->payload_len = htons(seg_len);
255 	hdr->nexthdr = proto;
256 	hdr->hop_limit = hlimit;
257 
258 	hdr->saddr = fl6->saddr;
259 	hdr->daddr = *first_hop;
260 
261 	skb->protocol = htons(ETH_P_IPV6);
262 	skb->priority = priority;
263 	skb->mark = mark;
264 
265 	mtu = dst_mtu(dst);
266 	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
267 		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
268 			      IPSTATS_MIB_OUT, skb->len);
269 
270 		/* if egress device is enslaved to an L3 master device pass the
271 		 * skb to its handler for processing
272 		 */
273 		skb = l3mdev_ip6_out((struct sock *)sk, skb);
274 		if (unlikely(!skb))
275 			return 0;
276 
277 		/* hooks should never assume socket lock is held.
278 		 * we promote our socket to non const
279 		 */
280 		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
281 			       net, (struct sock *)sk, skb, NULL, dst->dev,
282 			       dst_output);
283 	}
284 
285 	skb->dev = dst->dev;
286 	/* ipv6_local_error() does not require socket lock,
287 	 * we promote our socket to non const
288 	 */
289 	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
290 
291 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
292 	kfree_skb(skb);
293 	return -EMSGSIZE;
294 }
295 EXPORT_SYMBOL(ip6_xmit);
296 
297 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
298 {
299 	struct ip6_ra_chain *ra;
300 	struct sock *last = NULL;
301 
302 	read_lock(&ip6_ra_lock);
303 	for (ra = ip6_ra_chain; ra; ra = ra->next) {
304 		struct sock *sk = ra->sk;
305 		if (sk && ra->sel == sel &&
306 		    (!sk->sk_bound_dev_if ||
307 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
308 			struct ipv6_pinfo *np = inet6_sk(sk);
309 
310 			if (np && np->rtalert_isolate &&
311 			    !net_eq(sock_net(sk), dev_net(skb->dev))) {
312 				continue;
313 			}
314 			if (last) {
315 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
316 				if (skb2)
317 					rawv6_rcv(last, skb2);
318 			}
319 			last = sk;
320 		}
321 	}
322 
323 	if (last) {
324 		rawv6_rcv(last, skb);
325 		read_unlock(&ip6_ra_lock);
326 		return 1;
327 	}
328 	read_unlock(&ip6_ra_lock);
329 	return 0;
330 }
331 
332 static int ip6_forward_proxy_check(struct sk_buff *skb)
333 {
334 	struct ipv6hdr *hdr = ipv6_hdr(skb);
335 	u8 nexthdr = hdr->nexthdr;
336 	__be16 frag_off;
337 	int offset;
338 
339 	if (ipv6_ext_hdr(nexthdr)) {
340 		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
341 		if (offset < 0)
342 			return 0;
343 	} else
344 		offset = sizeof(struct ipv6hdr);
345 
346 	if (nexthdr == IPPROTO_ICMPV6) {
347 		struct icmp6hdr *icmp6;
348 
349 		if (!pskb_may_pull(skb, (skb_network_header(skb) +
350 					 offset + 1 - skb->data)))
351 			return 0;
352 
353 		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
354 
355 		switch (icmp6->icmp6_type) {
356 		case NDISC_ROUTER_SOLICITATION:
357 		case NDISC_ROUTER_ADVERTISEMENT:
358 		case NDISC_NEIGHBOUR_SOLICITATION:
359 		case NDISC_NEIGHBOUR_ADVERTISEMENT:
360 		case NDISC_REDIRECT:
361 			/* For reaction involving unicast neighbor discovery
362 			 * message destined to the proxied address, pass it to
363 			 * input function.
364 			 */
365 			return 1;
366 		default:
367 			break;
368 		}
369 	}
370 
371 	/*
372 	 * The proxying router can't forward traffic sent to a link-local
373 	 * address, so signal the sender and discard the packet. This
374 	 * behavior is clarified by the MIPv6 specification.
375 	 */
376 	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
377 		dst_link_failure(skb);
378 		return -1;
379 	}
380 
381 	return 0;
382 }
383 
384 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
385 				     struct sk_buff *skb)
386 {
387 	struct dst_entry *dst = skb_dst(skb);
388 
389 	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
390 	__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
391 
392 #ifdef CONFIG_NET_SWITCHDEV
393 	if (skb->offload_l3_fwd_mark) {
394 		consume_skb(skb);
395 		return 0;
396 	}
397 #endif
398 
399 	skb->tstamp = 0;
400 	return dst_output(net, sk, skb);
401 }
402 
403 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
404 {
405 	if (skb->len <= mtu)
406 		return false;
407 
408 	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
409 	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
410 		return true;
411 
412 	if (skb->ignore_df)
413 		return false;
414 
415 	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
416 		return false;
417 
418 	return true;
419 }
420 
421 int ip6_forward(struct sk_buff *skb)
422 {
423 	struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
424 	struct dst_entry *dst = skb_dst(skb);
425 	struct ipv6hdr *hdr = ipv6_hdr(skb);
426 	struct inet6_skb_parm *opt = IP6CB(skb);
427 	struct net *net = dev_net(dst->dev);
428 	u32 mtu;
429 
430 	if (net->ipv6.devconf_all->forwarding == 0)
431 		goto error;
432 
433 	if (skb->pkt_type != PACKET_HOST)
434 		goto drop;
435 
436 	if (unlikely(skb->sk))
437 		goto drop;
438 
439 	if (skb_warn_if_lro(skb))
440 		goto drop;
441 
442 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
443 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
444 		goto drop;
445 	}
446 
447 	skb_forward_csum(skb);
448 
449 	/*
450 	 *	We DO NOT make any processing on
451 	 *	RA packets, pushing them to user level AS IS
452 	 *	without ane WARRANTY that application will be able
453 	 *	to interpret them. The reason is that we
454 	 *	cannot make anything clever here.
455 	 *
456 	 *	We are not end-node, so that if packet contains
457 	 *	AH/ESP, we cannot make anything.
458 	 *	Defragmentation also would be mistake, RA packets
459 	 *	cannot be fragmented, because there is no warranty
460 	 *	that different fragments will go along one path. --ANK
461 	 */
462 	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
463 		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
464 			return 0;
465 	}
466 
467 	/*
468 	 *	check and decrement ttl
469 	 */
470 	if (hdr->hop_limit <= 1) {
471 		/* Force OUTPUT device used as source address */
472 		skb->dev = dst->dev;
473 		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
474 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
475 
476 		kfree_skb(skb);
477 		return -ETIMEDOUT;
478 	}
479 
480 	/* XXX: idev->cnf.proxy_ndp? */
481 	if (net->ipv6.devconf_all->proxy_ndp &&
482 	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
483 		int proxied = ip6_forward_proxy_check(skb);
484 		if (proxied > 0)
485 			return ip6_input(skb);
486 		else if (proxied < 0) {
487 			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
488 			goto drop;
489 		}
490 	}
491 
492 	if (!xfrm6_route_forward(skb)) {
493 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
494 		goto drop;
495 	}
496 	dst = skb_dst(skb);
497 
498 	/* IPv6 specs say nothing about it, but it is clear that we cannot
499 	   send redirects to source routed frames.
500 	   We don't send redirects to frames decapsulated from IPsec.
501 	 */
502 	if (IP6CB(skb)->iif == dst->dev->ifindex &&
503 	    opt->srcrt == 0 && !skb_sec_path(skb)) {
504 		struct in6_addr *target = NULL;
505 		struct inet_peer *peer;
506 		struct rt6_info *rt;
507 
508 		/*
509 		 *	incoming and outgoing devices are the same
510 		 *	send a redirect.
511 		 */
512 
513 		rt = (struct rt6_info *) dst;
514 		if (rt->rt6i_flags & RTF_GATEWAY)
515 			target = &rt->rt6i_gateway;
516 		else
517 			target = &hdr->daddr;
518 
519 		peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
520 
521 		/* Limit redirects both by destination (here)
522 		   and by source (inside ndisc_send_redirect)
523 		 */
524 		if (inet_peer_xrlim_allow(peer, 1*HZ))
525 			ndisc_send_redirect(skb, target);
526 		if (peer)
527 			inet_putpeer(peer);
528 	} else {
529 		int addrtype = ipv6_addr_type(&hdr->saddr);
530 
531 		/* This check is security critical. */
532 		if (addrtype == IPV6_ADDR_ANY ||
533 		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
534 			goto error;
535 		if (addrtype & IPV6_ADDR_LINKLOCAL) {
536 			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
537 				    ICMPV6_NOT_NEIGHBOUR, 0);
538 			goto error;
539 		}
540 	}
541 
542 	mtu = ip6_dst_mtu_forward(dst);
543 	if (mtu < IPV6_MIN_MTU)
544 		mtu = IPV6_MIN_MTU;
545 
546 	if (ip6_pkt_too_big(skb, mtu)) {
547 		/* Again, force OUTPUT device used as source address */
548 		skb->dev = dst->dev;
549 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
550 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
551 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
552 				IPSTATS_MIB_FRAGFAILS);
553 		kfree_skb(skb);
554 		return -EMSGSIZE;
555 	}
556 
557 	if (skb_cow(skb, dst->dev->hard_header_len)) {
558 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
559 				IPSTATS_MIB_OUTDISCARDS);
560 		goto drop;
561 	}
562 
563 	hdr = ipv6_hdr(skb);
564 
565 	/* Mangling hops number delayed to point after skb COW */
566 
567 	hdr->hop_limit--;
568 
569 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
570 		       net, NULL, skb, skb->dev, dst->dev,
571 		       ip6_forward_finish);
572 
573 error:
574 	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
575 drop:
576 	kfree_skb(skb);
577 	return -EINVAL;
578 }
579 
580 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
581 {
582 	to->pkt_type = from->pkt_type;
583 	to->priority = from->priority;
584 	to->protocol = from->protocol;
585 	skb_dst_drop(to);
586 	skb_dst_set(to, dst_clone(skb_dst(from)));
587 	to->dev = from->dev;
588 	to->mark = from->mark;
589 
590 	skb_copy_hash(to, from);
591 
592 #ifdef CONFIG_NET_SCHED
593 	to->tc_index = from->tc_index;
594 #endif
595 	nf_copy(to, from);
596 	skb_ext_copy(to, from);
597 	skb_copy_secmark(to, from);
598 }
599 
600 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
601 		      u8 nexthdr, __be32 frag_id,
602 		      struct ip6_fraglist_iter *iter)
603 {
604 	unsigned int first_len;
605 	struct frag_hdr *fh;
606 
607 	/* BUILD HEADER */
608 	*prevhdr = NEXTHDR_FRAGMENT;
609 	iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
610 	if (!iter->tmp_hdr)
611 		return -ENOMEM;
612 
613 	iter->frag = skb_shinfo(skb)->frag_list;
614 	skb_frag_list_init(skb);
615 
616 	iter->offset = 0;
617 	iter->hlen = hlen;
618 	iter->frag_id = frag_id;
619 	iter->nexthdr = nexthdr;
620 
621 	__skb_pull(skb, hlen);
622 	fh = __skb_push(skb, sizeof(struct frag_hdr));
623 	__skb_push(skb, hlen);
624 	skb_reset_network_header(skb);
625 	memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
626 
627 	fh->nexthdr = nexthdr;
628 	fh->reserved = 0;
629 	fh->frag_off = htons(IP6_MF);
630 	fh->identification = frag_id;
631 
632 	first_len = skb_pagelen(skb);
633 	skb->data_len = first_len - skb_headlen(skb);
634 	skb->len = first_len;
635 	ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
636 
637 	return 0;
638 }
639 EXPORT_SYMBOL(ip6_fraglist_init);
640 
641 void ip6_fraglist_prepare(struct sk_buff *skb,
642 			  struct ip6_fraglist_iter *iter)
643 {
644 	struct sk_buff *frag = iter->frag;
645 	unsigned int hlen = iter->hlen;
646 	struct frag_hdr *fh;
647 
648 	frag->ip_summed = CHECKSUM_NONE;
649 	skb_reset_transport_header(frag);
650 	fh = __skb_push(frag, sizeof(struct frag_hdr));
651 	__skb_push(frag, hlen);
652 	skb_reset_network_header(frag);
653 	memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
654 	iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
655 	fh->nexthdr = iter->nexthdr;
656 	fh->reserved = 0;
657 	fh->frag_off = htons(iter->offset);
658 	if (frag->next)
659 		fh->frag_off |= htons(IP6_MF);
660 	fh->identification = iter->frag_id;
661 	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
662 	ip6_copy_metadata(frag, skb);
663 }
664 EXPORT_SYMBOL(ip6_fraglist_prepare);
665 
666 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
667 		   unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
668 		   u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
669 {
670 	state->prevhdr = prevhdr;
671 	state->nexthdr = nexthdr;
672 	state->frag_id = frag_id;
673 
674 	state->hlen = hlen;
675 	state->mtu = mtu;
676 
677 	state->left = skb->len - hlen;	/* Space per frame */
678 	state->ptr = hlen;		/* Where to start from */
679 
680 	state->hroom = hdr_room;
681 	state->troom = needed_tailroom;
682 
683 	state->offset = 0;
684 }
685 EXPORT_SYMBOL(ip6_frag_init);
686 
687 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
688 {
689 	u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
690 	struct sk_buff *frag;
691 	struct frag_hdr *fh;
692 	unsigned int len;
693 
694 	len = state->left;
695 	/* IF: it doesn't fit, use 'mtu' - the data space left */
696 	if (len > state->mtu)
697 		len = state->mtu;
698 	/* IF: we are not sending up to and including the packet end
699 	   then align the next start on an eight byte boundary */
700 	if (len < state->left)
701 		len &= ~7;
702 
703 	/* Allocate buffer */
704 	frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
705 			 state->hroom + state->troom, GFP_ATOMIC);
706 	if (!frag)
707 		return ERR_PTR(-ENOMEM);
708 
709 	/*
710 	 *	Set up data on packet
711 	 */
712 
713 	ip6_copy_metadata(frag, skb);
714 	skb_reserve(frag, state->hroom);
715 	skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
716 	skb_reset_network_header(frag);
717 	fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
718 	frag->transport_header = (frag->network_header + state->hlen +
719 				  sizeof(struct frag_hdr));
720 
721 	/*
722 	 *	Charge the memory for the fragment to any owner
723 	 *	it might possess
724 	 */
725 	if (skb->sk)
726 		skb_set_owner_w(frag, skb->sk);
727 
728 	/*
729 	 *	Copy the packet header into the new buffer.
730 	 */
731 	skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
732 
733 	fragnexthdr_offset = skb_network_header(frag);
734 	fragnexthdr_offset += prevhdr - skb_network_header(skb);
735 	*fragnexthdr_offset = NEXTHDR_FRAGMENT;
736 
737 	/*
738 	 *	Build fragment header.
739 	 */
740 	fh->nexthdr = state->nexthdr;
741 	fh->reserved = 0;
742 	fh->identification = state->frag_id;
743 
744 	/*
745 	 *	Copy a block of the IP datagram.
746 	 */
747 	BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
748 			     len));
749 	state->left -= len;
750 
751 	fh->frag_off = htons(state->offset);
752 	if (state->left > 0)
753 		fh->frag_off |= htons(IP6_MF);
754 	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
755 
756 	state->ptr += len;
757 	state->offset += len;
758 
759 	return frag;
760 }
761 EXPORT_SYMBOL(ip6_frag_next);
762 
763 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
764 		 int (*output)(struct net *, struct sock *, struct sk_buff *))
765 {
766 	struct sk_buff *frag;
767 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
768 	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
769 				inet6_sk(skb->sk) : NULL;
770 	struct ip6_frag_state state;
771 	unsigned int mtu, hlen, nexthdr_offset;
772 	ktime_t tstamp = skb->tstamp;
773 	int hroom, err = 0;
774 	__be32 frag_id;
775 	u8 *prevhdr, nexthdr = 0;
776 
777 	err = ip6_find_1stfragopt(skb, &prevhdr);
778 	if (err < 0)
779 		goto fail;
780 	hlen = err;
781 	nexthdr = *prevhdr;
782 	nexthdr_offset = prevhdr - skb_network_header(skb);
783 
784 	mtu = ip6_skb_dst_mtu(skb);
785 
786 	/* We must not fragment if the socket is set to force MTU discovery
787 	 * or if the skb it not generated by a local socket.
788 	 */
789 	if (unlikely(!skb->ignore_df && skb->len > mtu))
790 		goto fail_toobig;
791 
792 	if (IP6CB(skb)->frag_max_size) {
793 		if (IP6CB(skb)->frag_max_size > mtu)
794 			goto fail_toobig;
795 
796 		/* don't send fragments larger than what we received */
797 		mtu = IP6CB(skb)->frag_max_size;
798 		if (mtu < IPV6_MIN_MTU)
799 			mtu = IPV6_MIN_MTU;
800 	}
801 
802 	if (np && np->frag_size < mtu) {
803 		if (np->frag_size)
804 			mtu = np->frag_size;
805 	}
806 	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
807 		goto fail_toobig;
808 	mtu -= hlen + sizeof(struct frag_hdr);
809 
810 	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
811 				    &ipv6_hdr(skb)->saddr);
812 
813 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
814 	    (err = skb_checksum_help(skb)))
815 		goto fail;
816 
817 	prevhdr = skb_network_header(skb) + nexthdr_offset;
818 	hroom = LL_RESERVED_SPACE(rt->dst.dev);
819 	if (skb_has_frag_list(skb)) {
820 		unsigned int first_len = skb_pagelen(skb);
821 		struct ip6_fraglist_iter iter;
822 		struct sk_buff *frag2;
823 
824 		if (first_len - hlen > mtu ||
825 		    ((first_len - hlen) & 7) ||
826 		    skb_cloned(skb) ||
827 		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
828 			goto slow_path;
829 
830 		skb_walk_frags(skb, frag) {
831 			/* Correct geometry. */
832 			if (frag->len > mtu ||
833 			    ((frag->len & 7) && frag->next) ||
834 			    skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
835 				goto slow_path_clean;
836 
837 			/* Partially cloned skb? */
838 			if (skb_shared(frag))
839 				goto slow_path_clean;
840 
841 			BUG_ON(frag->sk);
842 			if (skb->sk) {
843 				frag->sk = skb->sk;
844 				frag->destructor = sock_wfree;
845 			}
846 			skb->truesize -= frag->truesize;
847 		}
848 
849 		err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
850 					&iter);
851 		if (err < 0)
852 			goto fail;
853 
854 		for (;;) {
855 			/* Prepare header of the next frame,
856 			 * before previous one went down. */
857 			if (iter.frag)
858 				ip6_fraglist_prepare(skb, &iter);
859 
860 			skb->tstamp = tstamp;
861 			err = output(net, sk, skb);
862 			if (!err)
863 				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
864 					      IPSTATS_MIB_FRAGCREATES);
865 
866 			if (err || !iter.frag)
867 				break;
868 
869 			skb = ip6_fraglist_next(&iter);
870 		}
871 
872 		kfree(iter.tmp_hdr);
873 
874 		if (err == 0) {
875 			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
876 				      IPSTATS_MIB_FRAGOKS);
877 			return 0;
878 		}
879 
880 		kfree_skb_list(iter.frag);
881 
882 		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
883 			      IPSTATS_MIB_FRAGFAILS);
884 		return err;
885 
886 slow_path_clean:
887 		skb_walk_frags(skb, frag2) {
888 			if (frag2 == frag)
889 				break;
890 			frag2->sk = NULL;
891 			frag2->destructor = NULL;
892 			skb->truesize += frag2->truesize;
893 		}
894 	}
895 
896 slow_path:
897 	/*
898 	 *	Fragment the datagram.
899 	 */
900 
901 	ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
902 		      LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
903 		      &state);
904 
905 	/*
906 	 *	Keep copying data until we run out.
907 	 */
908 
909 	while (state.left > 0) {
910 		frag = ip6_frag_next(skb, &state);
911 		if (IS_ERR(frag)) {
912 			err = PTR_ERR(frag);
913 			goto fail;
914 		}
915 
916 		/*
917 		 *	Put this fragment into the sending queue.
918 		 */
919 		frag->tstamp = tstamp;
920 		err = output(net, sk, frag);
921 		if (err)
922 			goto fail;
923 
924 		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
925 			      IPSTATS_MIB_FRAGCREATES);
926 	}
927 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
928 		      IPSTATS_MIB_FRAGOKS);
929 	consume_skb(skb);
930 	return err;
931 
932 fail_toobig:
933 	if (skb->sk && dst_allfrag(skb_dst(skb)))
934 		sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
935 
936 	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
937 	err = -EMSGSIZE;
938 
939 fail:
940 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
941 		      IPSTATS_MIB_FRAGFAILS);
942 	kfree_skb(skb);
943 	return err;
944 }
945 
946 static inline int ip6_rt_check(const struct rt6key *rt_key,
947 			       const struct in6_addr *fl_addr,
948 			       const struct in6_addr *addr_cache)
949 {
950 	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
951 		(!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
952 }
953 
954 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
955 					  struct dst_entry *dst,
956 					  const struct flowi6 *fl6)
957 {
958 	struct ipv6_pinfo *np = inet6_sk(sk);
959 	struct rt6_info *rt;
960 
961 	if (!dst)
962 		goto out;
963 
964 	if (dst->ops->family != AF_INET6) {
965 		dst_release(dst);
966 		return NULL;
967 	}
968 
969 	rt = (struct rt6_info *)dst;
970 	/* Yes, checking route validity in not connected
971 	 * case is not very simple. Take into account,
972 	 * that we do not support routing by source, TOS,
973 	 * and MSG_DONTROUTE		--ANK (980726)
974 	 *
975 	 * 1. ip6_rt_check(): If route was host route,
976 	 *    check that cached destination is current.
977 	 *    If it is network route, we still may
978 	 *    check its validity using saved pointer
979 	 *    to the last used address: daddr_cache.
980 	 *    We do not want to save whole address now,
981 	 *    (because main consumer of this service
982 	 *    is tcp, which has not this problem),
983 	 *    so that the last trick works only on connected
984 	 *    sockets.
985 	 * 2. oif also should be the same.
986 	 */
987 	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
988 #ifdef CONFIG_IPV6_SUBTREES
989 	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
990 #endif
991 	   (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
992 	      (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
993 		dst_release(dst);
994 		dst = NULL;
995 	}
996 
997 out:
998 	return dst;
999 }
1000 
1001 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1002 			       struct dst_entry **dst, struct flowi6 *fl6)
1003 {
1004 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1005 	struct neighbour *n;
1006 	struct rt6_info *rt;
1007 #endif
1008 	int err;
1009 	int flags = 0;
1010 
1011 	/* The correct way to handle this would be to do
1012 	 * ip6_route_get_saddr, and then ip6_route_output; however,
1013 	 * the route-specific preferred source forces the
1014 	 * ip6_route_output call _before_ ip6_route_get_saddr.
1015 	 *
1016 	 * In source specific routing (no src=any default route),
1017 	 * ip6_route_output will fail given src=any saddr, though, so
1018 	 * that's why we try it again later.
1019 	 */
1020 	if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1021 		struct fib6_info *from;
1022 		struct rt6_info *rt;
1023 		bool had_dst = *dst != NULL;
1024 
1025 		if (!had_dst)
1026 			*dst = ip6_route_output(net, sk, fl6);
1027 		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1028 
1029 		rcu_read_lock();
1030 		from = rt ? rcu_dereference(rt->from) : NULL;
1031 		err = ip6_route_get_saddr(net, from, &fl6->daddr,
1032 					  sk ? inet6_sk(sk)->srcprefs : 0,
1033 					  &fl6->saddr);
1034 		rcu_read_unlock();
1035 
1036 		if (err)
1037 			goto out_err_release;
1038 
1039 		/* If we had an erroneous initial result, pretend it
1040 		 * never existed and let the SA-enabled version take
1041 		 * over.
1042 		 */
1043 		if (!had_dst && (*dst)->error) {
1044 			dst_release(*dst);
1045 			*dst = NULL;
1046 		}
1047 
1048 		if (fl6->flowi6_oif)
1049 			flags |= RT6_LOOKUP_F_IFACE;
1050 	}
1051 
1052 	if (!*dst)
1053 		*dst = ip6_route_output_flags(net, sk, fl6, flags);
1054 
1055 	err = (*dst)->error;
1056 	if (err)
1057 		goto out_err_release;
1058 
1059 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1060 	/*
1061 	 * Here if the dst entry we've looked up
1062 	 * has a neighbour entry that is in the INCOMPLETE
1063 	 * state and the src address from the flow is
1064 	 * marked as OPTIMISTIC, we release the found
1065 	 * dst entry and replace it instead with the
1066 	 * dst entry of the nexthop router
1067 	 */
1068 	rt = (struct rt6_info *) *dst;
1069 	rcu_read_lock_bh();
1070 	n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1071 				      rt6_nexthop(rt, &fl6->daddr));
1072 	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1073 	rcu_read_unlock_bh();
1074 
1075 	if (err) {
1076 		struct inet6_ifaddr *ifp;
1077 		struct flowi6 fl_gw6;
1078 		int redirect;
1079 
1080 		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1081 				      (*dst)->dev, 1);
1082 
1083 		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1084 		if (ifp)
1085 			in6_ifa_put(ifp);
1086 
1087 		if (redirect) {
1088 			/*
1089 			 * We need to get the dst entry for the
1090 			 * default router instead
1091 			 */
1092 			dst_release(*dst);
1093 			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1094 			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1095 			*dst = ip6_route_output(net, sk, &fl_gw6);
1096 			err = (*dst)->error;
1097 			if (err)
1098 				goto out_err_release;
1099 		}
1100 	}
1101 #endif
1102 	if (ipv6_addr_v4mapped(&fl6->saddr) &&
1103 	    !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1104 		err = -EAFNOSUPPORT;
1105 		goto out_err_release;
1106 	}
1107 
1108 	return 0;
1109 
1110 out_err_release:
1111 	dst_release(*dst);
1112 	*dst = NULL;
1113 
1114 	if (err == -ENETUNREACH)
1115 		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1116 	return err;
1117 }
1118 
1119 /**
1120  *	ip6_dst_lookup - perform route lookup on flow
1121  *	@sk: socket which provides route info
1122  *	@dst: pointer to dst_entry * for result
1123  *	@fl6: flow to lookup
1124  *
1125  *	This function performs a route lookup on the given flow.
1126  *
1127  *	It returns zero on success, or a standard errno code on error.
1128  */
1129 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1130 		   struct flowi6 *fl6)
1131 {
1132 	*dst = NULL;
1133 	return ip6_dst_lookup_tail(net, sk, dst, fl6);
1134 }
1135 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1136 
1137 /**
1138  *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1139  *	@sk: socket which provides route info
1140  *	@fl6: flow to lookup
1141  *	@final_dst: final destination address for ipsec lookup
1142  *
1143  *	This function performs a route lookup on the given flow.
1144  *
1145  *	It returns a valid dst pointer on success, or a pointer encoded
1146  *	error code.
1147  */
1148 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1149 				      const struct in6_addr *final_dst)
1150 {
1151 	struct dst_entry *dst = NULL;
1152 	int err;
1153 
1154 	err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1155 	if (err)
1156 		return ERR_PTR(err);
1157 	if (final_dst)
1158 		fl6->daddr = *final_dst;
1159 
1160 	return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1161 }
1162 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1163 
1164 /**
1165  *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1166  *	@sk: socket which provides the dst cache and route info
1167  *	@fl6: flow to lookup
1168  *	@final_dst: final destination address for ipsec lookup
1169  *	@connected: whether @sk is connected or not
1170  *
1171  *	This function performs a route lookup on the given flow with the
1172  *	possibility of using the cached route in the socket if it is valid.
1173  *	It will take the socket dst lock when operating on the dst cache.
1174  *	As a result, this function can only be used in process context.
1175  *
1176  *	In addition, for a connected socket, cache the dst in the socket
1177  *	if the current cache is not valid.
1178  *
1179  *	It returns a valid dst pointer on success, or a pointer encoded
1180  *	error code.
1181  */
1182 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1183 					 const struct in6_addr *final_dst,
1184 					 bool connected)
1185 {
1186 	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1187 
1188 	dst = ip6_sk_dst_check(sk, dst, fl6);
1189 	if (dst)
1190 		return dst;
1191 
1192 	dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1193 	if (connected && !IS_ERR(dst))
1194 		ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1195 
1196 	return dst;
1197 }
1198 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1199 
1200 /**
1201  *      ip6_dst_lookup_tunnel - perform route lookup on tunnel
1202  *      @skb: Packet for which lookup is done
1203  *      @dev: Tunnel device
1204  *      @net: Network namespace of tunnel device
1205  *      @sk: Socket which provides route info
1206  *      @saddr: Memory to store the src ip address
1207  *      @info: Tunnel information
1208  *      @protocol: IP protocol
1209  *      @use_cahce: Flag to enable cache usage
1210  *      This function performs a route lookup on a tunnel
1211  *
1212  *      It returns a valid dst pointer and stores src address to be used in
1213  *      tunnel in param saddr on success, else a pointer encoded error code.
1214  */
1215 
1216 struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1217 					struct net_device *dev,
1218 					struct net *net,
1219 					struct socket *sock,
1220 					struct in6_addr *saddr,
1221 					const struct ip_tunnel_info *info,
1222 					u8 protocol,
1223 					bool use_cache)
1224 {
1225 	struct dst_entry *dst = NULL;
1226 #ifdef CONFIG_DST_CACHE
1227 	struct dst_cache *dst_cache;
1228 #endif
1229 	struct flowi6 fl6;
1230 	__u8 prio;
1231 
1232 #ifdef CONFIG_DST_CACHE
1233 	dst_cache = (struct dst_cache *)&info->dst_cache;
1234 	if (use_cache) {
1235 		dst = dst_cache_get_ip6(dst_cache, saddr);
1236 		if (dst)
1237 			return dst;
1238 	}
1239 #endif
1240 	memset(&fl6, 0, sizeof(fl6));
1241 	fl6.flowi6_mark = skb->mark;
1242 	fl6.flowi6_proto = protocol;
1243 	fl6.daddr = info->key.u.ipv6.dst;
1244 	fl6.saddr = info->key.u.ipv6.src;
1245 	prio = info->key.tos;
1246 	fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
1247 					  info->key.label);
1248 
1249 	dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1250 					      NULL);
1251 	if (IS_ERR(dst)) {
1252 		netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1253 		return ERR_PTR(-ENETUNREACH);
1254 	}
1255 	if (dst->dev == dev) { /* is this necessary? */
1256 		netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1257 		dst_release(dst);
1258 		return ERR_PTR(-ELOOP);
1259 	}
1260 #ifdef CONFIG_DST_CACHE
1261 	if (use_cache)
1262 		dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1263 #endif
1264 	*saddr = fl6.saddr;
1265 	return dst;
1266 }
1267 EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1268 
1269 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1270 					       gfp_t gfp)
1271 {
1272 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1273 }
1274 
1275 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1276 						gfp_t gfp)
1277 {
1278 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1279 }
1280 
1281 static void ip6_append_data_mtu(unsigned int *mtu,
1282 				int *maxfraglen,
1283 				unsigned int fragheaderlen,
1284 				struct sk_buff *skb,
1285 				struct rt6_info *rt,
1286 				unsigned int orig_mtu)
1287 {
1288 	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1289 		if (!skb) {
1290 			/* first fragment, reserve header_len */
1291 			*mtu = orig_mtu - rt->dst.header_len;
1292 
1293 		} else {
1294 			/*
1295 			 * this fragment is not first, the headers
1296 			 * space is regarded as data space.
1297 			 */
1298 			*mtu = orig_mtu;
1299 		}
1300 		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
1301 			      + fragheaderlen - sizeof(struct frag_hdr);
1302 	}
1303 }
1304 
1305 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1306 			  struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1307 			  struct rt6_info *rt, struct flowi6 *fl6)
1308 {
1309 	struct ipv6_pinfo *np = inet6_sk(sk);
1310 	unsigned int mtu;
1311 	struct ipv6_txoptions *opt = ipc6->opt;
1312 
1313 	/*
1314 	 * setup for corking
1315 	 */
1316 	if (opt) {
1317 		if (WARN_ON(v6_cork->opt))
1318 			return -EINVAL;
1319 
1320 		v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1321 		if (unlikely(!v6_cork->opt))
1322 			return -ENOBUFS;
1323 
1324 		v6_cork->opt->tot_len = sizeof(*opt);
1325 		v6_cork->opt->opt_flen = opt->opt_flen;
1326 		v6_cork->opt->opt_nflen = opt->opt_nflen;
1327 
1328 		v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1329 						    sk->sk_allocation);
1330 		if (opt->dst0opt && !v6_cork->opt->dst0opt)
1331 			return -ENOBUFS;
1332 
1333 		v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1334 						    sk->sk_allocation);
1335 		if (opt->dst1opt && !v6_cork->opt->dst1opt)
1336 			return -ENOBUFS;
1337 
1338 		v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1339 						   sk->sk_allocation);
1340 		if (opt->hopopt && !v6_cork->opt->hopopt)
1341 			return -ENOBUFS;
1342 
1343 		v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1344 						    sk->sk_allocation);
1345 		if (opt->srcrt && !v6_cork->opt->srcrt)
1346 			return -ENOBUFS;
1347 
1348 		/* need source address above miyazawa*/
1349 	}
1350 	dst_hold(&rt->dst);
1351 	cork->base.dst = &rt->dst;
1352 	cork->fl.u.ip6 = *fl6;
1353 	v6_cork->hop_limit = ipc6->hlimit;
1354 	v6_cork->tclass = ipc6->tclass;
1355 	if (rt->dst.flags & DST_XFRM_TUNNEL)
1356 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1357 		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1358 	else
1359 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1360 			READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1361 	if (np->frag_size < mtu) {
1362 		if (np->frag_size)
1363 			mtu = np->frag_size;
1364 	}
1365 	if (mtu < IPV6_MIN_MTU)
1366 		return -EINVAL;
1367 	cork->base.fragsize = mtu;
1368 	cork->base.gso_size = ipc6->gso_size;
1369 	cork->base.tx_flags = 0;
1370 	cork->base.mark = ipc6->sockc.mark;
1371 	sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1372 
1373 	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1374 		cork->base.flags |= IPCORK_ALLFRAG;
1375 	cork->base.length = 0;
1376 
1377 	cork->base.transmit_time = ipc6->sockc.transmit_time;
1378 
1379 	return 0;
1380 }
1381 
1382 static int __ip6_append_data(struct sock *sk,
1383 			     struct flowi6 *fl6,
1384 			     struct sk_buff_head *queue,
1385 			     struct inet_cork *cork,
1386 			     struct inet6_cork *v6_cork,
1387 			     struct page_frag *pfrag,
1388 			     int getfrag(void *from, char *to, int offset,
1389 					 int len, int odd, struct sk_buff *skb),
1390 			     void *from, int length, int transhdrlen,
1391 			     unsigned int flags, struct ipcm6_cookie *ipc6)
1392 {
1393 	struct sk_buff *skb, *skb_prev = NULL;
1394 	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1395 	struct ubuf_info *uarg = NULL;
1396 	int exthdrlen = 0;
1397 	int dst_exthdrlen = 0;
1398 	int hh_len;
1399 	int copy;
1400 	int err;
1401 	int offset = 0;
1402 	u32 tskey = 0;
1403 	struct rt6_info *rt = (struct rt6_info *)cork->dst;
1404 	struct ipv6_txoptions *opt = v6_cork->opt;
1405 	int csummode = CHECKSUM_NONE;
1406 	unsigned int maxnonfragsize, headersize;
1407 	unsigned int wmem_alloc_delta = 0;
1408 	bool paged, extra_uref = false;
1409 
1410 	skb = skb_peek_tail(queue);
1411 	if (!skb) {
1412 		exthdrlen = opt ? opt->opt_flen : 0;
1413 		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1414 	}
1415 
1416 	paged = !!cork->gso_size;
1417 	mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1418 	orig_mtu = mtu;
1419 
1420 	if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1421 	    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1422 		tskey = sk->sk_tskey++;
1423 
1424 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1425 
1426 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1427 			(opt ? opt->opt_nflen : 0);
1428 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1429 		     sizeof(struct frag_hdr);
1430 
1431 	headersize = sizeof(struct ipv6hdr) +
1432 		     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1433 		     (dst_allfrag(&rt->dst) ?
1434 		      sizeof(struct frag_hdr) : 0) +
1435 		     rt->rt6i_nfheader_len;
1436 
1437 	/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1438 	 * the first fragment
1439 	 */
1440 	if (headersize + transhdrlen > mtu)
1441 		goto emsgsize;
1442 
1443 	if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1444 	    (sk->sk_protocol == IPPROTO_UDP ||
1445 	     sk->sk_protocol == IPPROTO_RAW)) {
1446 		ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1447 				sizeof(struct ipv6hdr));
1448 		goto emsgsize;
1449 	}
1450 
1451 	if (ip6_sk_ignore_df(sk))
1452 		maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1453 	else
1454 		maxnonfragsize = mtu;
1455 
1456 	if (cork->length + length > maxnonfragsize - headersize) {
1457 emsgsize:
1458 		pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1459 		ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1460 		return -EMSGSIZE;
1461 	}
1462 
1463 	/* CHECKSUM_PARTIAL only with no extension headers and when
1464 	 * we are not going to fragment
1465 	 */
1466 	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1467 	    headersize == sizeof(struct ipv6hdr) &&
1468 	    length <= mtu - headersize &&
1469 	    (!(flags & MSG_MORE) || cork->gso_size) &&
1470 	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1471 		csummode = CHECKSUM_PARTIAL;
1472 
1473 	if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1474 		uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1475 		if (!uarg)
1476 			return -ENOBUFS;
1477 		extra_uref = !skb_zcopy(skb);	/* only ref on new uarg */
1478 		if (rt->dst.dev->features & NETIF_F_SG &&
1479 		    csummode == CHECKSUM_PARTIAL) {
1480 			paged = true;
1481 		} else {
1482 			uarg->zerocopy = 0;
1483 			skb_zcopy_set(skb, uarg, &extra_uref);
1484 		}
1485 	}
1486 
1487 	/*
1488 	 * Let's try using as much space as possible.
1489 	 * Use MTU if total length of the message fits into the MTU.
1490 	 * Otherwise, we need to reserve fragment header and
1491 	 * fragment alignment (= 8-15 octects, in total).
1492 	 *
1493 	 * Note that we may need to "move" the data from the tail of
1494 	 * of the buffer to the new fragment when we split
1495 	 * the message.
1496 	 *
1497 	 * FIXME: It may be fragmented into multiple chunks
1498 	 *        at once if non-fragmentable extension headers
1499 	 *        are too large.
1500 	 * --yoshfuji
1501 	 */
1502 
1503 	cork->length += length;
1504 	if (!skb)
1505 		goto alloc_new_skb;
1506 
1507 	while (length > 0) {
1508 		/* Check if the remaining data fits into current packet. */
1509 		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1510 		if (copy < length)
1511 			copy = maxfraglen - skb->len;
1512 
1513 		if (copy <= 0) {
1514 			char *data;
1515 			unsigned int datalen;
1516 			unsigned int fraglen;
1517 			unsigned int fraggap;
1518 			unsigned int alloclen;
1519 			unsigned int pagedlen;
1520 alloc_new_skb:
1521 			/* There's no room in the current skb */
1522 			if (skb)
1523 				fraggap = skb->len - maxfraglen;
1524 			else
1525 				fraggap = 0;
1526 			/* update mtu and maxfraglen if necessary */
1527 			if (!skb || !skb_prev)
1528 				ip6_append_data_mtu(&mtu, &maxfraglen,
1529 						    fragheaderlen, skb, rt,
1530 						    orig_mtu);
1531 
1532 			skb_prev = skb;
1533 
1534 			/*
1535 			 * If remaining data exceeds the mtu,
1536 			 * we know we need more fragment(s).
1537 			 */
1538 			datalen = length + fraggap;
1539 
1540 			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1541 				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1542 			fraglen = datalen + fragheaderlen;
1543 			pagedlen = 0;
1544 
1545 			if ((flags & MSG_MORE) &&
1546 			    !(rt->dst.dev->features&NETIF_F_SG))
1547 				alloclen = mtu;
1548 			else if (!paged)
1549 				alloclen = fraglen;
1550 			else {
1551 				alloclen = min_t(int, fraglen, MAX_HEADER);
1552 				pagedlen = fraglen - alloclen;
1553 			}
1554 
1555 			alloclen += dst_exthdrlen;
1556 
1557 			if (datalen != length + fraggap) {
1558 				/*
1559 				 * this is not the last fragment, the trailer
1560 				 * space is regarded as data space.
1561 				 */
1562 				datalen += rt->dst.trailer_len;
1563 			}
1564 
1565 			alloclen += rt->dst.trailer_len;
1566 			fraglen = datalen + fragheaderlen;
1567 
1568 			/*
1569 			 * We just reserve space for fragment header.
1570 			 * Note: this may be overallocation if the message
1571 			 * (without MSG_MORE) fits into the MTU.
1572 			 */
1573 			alloclen += sizeof(struct frag_hdr);
1574 
1575 			copy = datalen - transhdrlen - fraggap - pagedlen;
1576 			if (copy < 0) {
1577 				err = -EINVAL;
1578 				goto error;
1579 			}
1580 			if (transhdrlen) {
1581 				skb = sock_alloc_send_skb(sk,
1582 						alloclen + hh_len,
1583 						(flags & MSG_DONTWAIT), &err);
1584 			} else {
1585 				skb = NULL;
1586 				if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1587 				    2 * sk->sk_sndbuf)
1588 					skb = alloc_skb(alloclen + hh_len,
1589 							sk->sk_allocation);
1590 				if (unlikely(!skb))
1591 					err = -ENOBUFS;
1592 			}
1593 			if (!skb)
1594 				goto error;
1595 			/*
1596 			 *	Fill in the control structures
1597 			 */
1598 			skb->protocol = htons(ETH_P_IPV6);
1599 			skb->ip_summed = csummode;
1600 			skb->csum = 0;
1601 			/* reserve for fragmentation and ipsec header */
1602 			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1603 				    dst_exthdrlen);
1604 
1605 			/*
1606 			 *	Find where to start putting bytes
1607 			 */
1608 			data = skb_put(skb, fraglen - pagedlen);
1609 			skb_set_network_header(skb, exthdrlen);
1610 			data += fragheaderlen;
1611 			skb->transport_header = (skb->network_header +
1612 						 fragheaderlen);
1613 			if (fraggap) {
1614 				skb->csum = skb_copy_and_csum_bits(
1615 					skb_prev, maxfraglen,
1616 					data + transhdrlen, fraggap, 0);
1617 				skb_prev->csum = csum_sub(skb_prev->csum,
1618 							  skb->csum);
1619 				data += fraggap;
1620 				pskb_trim_unique(skb_prev, maxfraglen);
1621 			}
1622 			if (copy > 0 &&
1623 			    getfrag(from, data + transhdrlen, offset,
1624 				    copy, fraggap, skb) < 0) {
1625 				err = -EFAULT;
1626 				kfree_skb(skb);
1627 				goto error;
1628 			}
1629 
1630 			offset += copy;
1631 			length -= copy + transhdrlen;
1632 			transhdrlen = 0;
1633 			exthdrlen = 0;
1634 			dst_exthdrlen = 0;
1635 
1636 			/* Only the initial fragment is time stamped */
1637 			skb_shinfo(skb)->tx_flags = cork->tx_flags;
1638 			cork->tx_flags = 0;
1639 			skb_shinfo(skb)->tskey = tskey;
1640 			tskey = 0;
1641 			skb_zcopy_set(skb, uarg, &extra_uref);
1642 
1643 			if ((flags & MSG_CONFIRM) && !skb_prev)
1644 				skb_set_dst_pending_confirm(skb, 1);
1645 
1646 			/*
1647 			 * Put the packet on the pending queue
1648 			 */
1649 			if (!skb->destructor) {
1650 				skb->destructor = sock_wfree;
1651 				skb->sk = sk;
1652 				wmem_alloc_delta += skb->truesize;
1653 			}
1654 			__skb_queue_tail(queue, skb);
1655 			continue;
1656 		}
1657 
1658 		if (copy > length)
1659 			copy = length;
1660 
1661 		if (!(rt->dst.dev->features&NETIF_F_SG) &&
1662 		    skb_tailroom(skb) >= copy) {
1663 			unsigned int off;
1664 
1665 			off = skb->len;
1666 			if (getfrag(from, skb_put(skb, copy),
1667 						offset, copy, off, skb) < 0) {
1668 				__skb_trim(skb, off);
1669 				err = -EFAULT;
1670 				goto error;
1671 			}
1672 		} else if (!uarg || !uarg->zerocopy) {
1673 			int i = skb_shinfo(skb)->nr_frags;
1674 
1675 			err = -ENOMEM;
1676 			if (!sk_page_frag_refill(sk, pfrag))
1677 				goto error;
1678 
1679 			if (!skb_can_coalesce(skb, i, pfrag->page,
1680 					      pfrag->offset)) {
1681 				err = -EMSGSIZE;
1682 				if (i == MAX_SKB_FRAGS)
1683 					goto error;
1684 
1685 				__skb_fill_page_desc(skb, i, pfrag->page,
1686 						     pfrag->offset, 0);
1687 				skb_shinfo(skb)->nr_frags = ++i;
1688 				get_page(pfrag->page);
1689 			}
1690 			copy = min_t(int, copy, pfrag->size - pfrag->offset);
1691 			if (getfrag(from,
1692 				    page_address(pfrag->page) + pfrag->offset,
1693 				    offset, copy, skb->len, skb) < 0)
1694 				goto error_efault;
1695 
1696 			pfrag->offset += copy;
1697 			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1698 			skb->len += copy;
1699 			skb->data_len += copy;
1700 			skb->truesize += copy;
1701 			wmem_alloc_delta += copy;
1702 		} else {
1703 			err = skb_zerocopy_iter_dgram(skb, from, copy);
1704 			if (err < 0)
1705 				goto error;
1706 		}
1707 		offset += copy;
1708 		length -= copy;
1709 	}
1710 
1711 	if (wmem_alloc_delta)
1712 		refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1713 	return 0;
1714 
1715 error_efault:
1716 	err = -EFAULT;
1717 error:
1718 	if (uarg)
1719 		sock_zerocopy_put_abort(uarg, extra_uref);
1720 	cork->length -= length;
1721 	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1722 	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1723 	return err;
1724 }
1725 
1726 int ip6_append_data(struct sock *sk,
1727 		    int getfrag(void *from, char *to, int offset, int len,
1728 				int odd, struct sk_buff *skb),
1729 		    void *from, int length, int transhdrlen,
1730 		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1731 		    struct rt6_info *rt, unsigned int flags)
1732 {
1733 	struct inet_sock *inet = inet_sk(sk);
1734 	struct ipv6_pinfo *np = inet6_sk(sk);
1735 	int exthdrlen;
1736 	int err;
1737 
1738 	if (flags&MSG_PROBE)
1739 		return 0;
1740 	if (skb_queue_empty(&sk->sk_write_queue)) {
1741 		/*
1742 		 * setup for corking
1743 		 */
1744 		err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1745 				     ipc6, rt, fl6);
1746 		if (err)
1747 			return err;
1748 
1749 		exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1750 		length += exthdrlen;
1751 		transhdrlen += exthdrlen;
1752 	} else {
1753 		fl6 = &inet->cork.fl.u.ip6;
1754 		transhdrlen = 0;
1755 	}
1756 
1757 	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1758 				 &np->cork, sk_page_frag(sk), getfrag,
1759 				 from, length, transhdrlen, flags, ipc6);
1760 }
1761 EXPORT_SYMBOL_GPL(ip6_append_data);
1762 
1763 static void ip6_cork_release(struct inet_cork_full *cork,
1764 			     struct inet6_cork *v6_cork)
1765 {
1766 	if (v6_cork->opt) {
1767 		kfree(v6_cork->opt->dst0opt);
1768 		kfree(v6_cork->opt->dst1opt);
1769 		kfree(v6_cork->opt->hopopt);
1770 		kfree(v6_cork->opt->srcrt);
1771 		kfree(v6_cork->opt);
1772 		v6_cork->opt = NULL;
1773 	}
1774 
1775 	if (cork->base.dst) {
1776 		dst_release(cork->base.dst);
1777 		cork->base.dst = NULL;
1778 		cork->base.flags &= ~IPCORK_ALLFRAG;
1779 	}
1780 	memset(&cork->fl, 0, sizeof(cork->fl));
1781 }
1782 
1783 struct sk_buff *__ip6_make_skb(struct sock *sk,
1784 			       struct sk_buff_head *queue,
1785 			       struct inet_cork_full *cork,
1786 			       struct inet6_cork *v6_cork)
1787 {
1788 	struct sk_buff *skb, *tmp_skb;
1789 	struct sk_buff **tail_skb;
1790 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1791 	struct ipv6_pinfo *np = inet6_sk(sk);
1792 	struct net *net = sock_net(sk);
1793 	struct ipv6hdr *hdr;
1794 	struct ipv6_txoptions *opt = v6_cork->opt;
1795 	struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1796 	struct flowi6 *fl6 = &cork->fl.u.ip6;
1797 	unsigned char proto = fl6->flowi6_proto;
1798 
1799 	skb = __skb_dequeue(queue);
1800 	if (!skb)
1801 		goto out;
1802 	tail_skb = &(skb_shinfo(skb)->frag_list);
1803 
1804 	/* move skb->data to ip header from ext header */
1805 	if (skb->data < skb_network_header(skb))
1806 		__skb_pull(skb, skb_network_offset(skb));
1807 	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1808 		__skb_pull(tmp_skb, skb_network_header_len(skb));
1809 		*tail_skb = tmp_skb;
1810 		tail_skb = &(tmp_skb->next);
1811 		skb->len += tmp_skb->len;
1812 		skb->data_len += tmp_skb->len;
1813 		skb->truesize += tmp_skb->truesize;
1814 		tmp_skb->destructor = NULL;
1815 		tmp_skb->sk = NULL;
1816 	}
1817 
1818 	/* Allow local fragmentation. */
1819 	skb->ignore_df = ip6_sk_ignore_df(sk);
1820 
1821 	*final_dst = fl6->daddr;
1822 	__skb_pull(skb, skb_network_header_len(skb));
1823 	if (opt && opt->opt_flen)
1824 		ipv6_push_frag_opts(skb, opt, &proto);
1825 	if (opt && opt->opt_nflen)
1826 		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1827 
1828 	skb_push(skb, sizeof(struct ipv6hdr));
1829 	skb_reset_network_header(skb);
1830 	hdr = ipv6_hdr(skb);
1831 
1832 	ip6_flow_hdr(hdr, v6_cork->tclass,
1833 		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1834 					ip6_autoflowlabel(net, np), fl6));
1835 	hdr->hop_limit = v6_cork->hop_limit;
1836 	hdr->nexthdr = proto;
1837 	hdr->saddr = fl6->saddr;
1838 	hdr->daddr = *final_dst;
1839 
1840 	skb->priority = sk->sk_priority;
1841 	skb->mark = cork->base.mark;
1842 
1843 	skb->tstamp = cork->base.transmit_time;
1844 
1845 	skb_dst_set(skb, dst_clone(&rt->dst));
1846 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1847 	if (proto == IPPROTO_ICMPV6) {
1848 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1849 
1850 		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1851 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1852 	}
1853 
1854 	ip6_cork_release(cork, v6_cork);
1855 out:
1856 	return skb;
1857 }
1858 
1859 int ip6_send_skb(struct sk_buff *skb)
1860 {
1861 	struct net *net = sock_net(skb->sk);
1862 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1863 	int err;
1864 
1865 	err = ip6_local_out(net, skb->sk, skb);
1866 	if (err) {
1867 		if (err > 0)
1868 			err = net_xmit_errno(err);
1869 		if (err)
1870 			IP6_INC_STATS(net, rt->rt6i_idev,
1871 				      IPSTATS_MIB_OUTDISCARDS);
1872 	}
1873 
1874 	return err;
1875 }
1876 
1877 int ip6_push_pending_frames(struct sock *sk)
1878 {
1879 	struct sk_buff *skb;
1880 
1881 	skb = ip6_finish_skb(sk);
1882 	if (!skb)
1883 		return 0;
1884 
1885 	return ip6_send_skb(skb);
1886 }
1887 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1888 
1889 static void __ip6_flush_pending_frames(struct sock *sk,
1890 				       struct sk_buff_head *queue,
1891 				       struct inet_cork_full *cork,
1892 				       struct inet6_cork *v6_cork)
1893 {
1894 	struct sk_buff *skb;
1895 
1896 	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1897 		if (skb_dst(skb))
1898 			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1899 				      IPSTATS_MIB_OUTDISCARDS);
1900 		kfree_skb(skb);
1901 	}
1902 
1903 	ip6_cork_release(cork, v6_cork);
1904 }
1905 
1906 void ip6_flush_pending_frames(struct sock *sk)
1907 {
1908 	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1909 				   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1910 }
1911 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1912 
1913 struct sk_buff *ip6_make_skb(struct sock *sk,
1914 			     int getfrag(void *from, char *to, int offset,
1915 					 int len, int odd, struct sk_buff *skb),
1916 			     void *from, int length, int transhdrlen,
1917 			     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1918 			     struct rt6_info *rt, unsigned int flags,
1919 			     struct inet_cork_full *cork)
1920 {
1921 	struct inet6_cork v6_cork;
1922 	struct sk_buff_head queue;
1923 	int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1924 	int err;
1925 
1926 	if (flags & MSG_PROBE)
1927 		return NULL;
1928 
1929 	__skb_queue_head_init(&queue);
1930 
1931 	cork->base.flags = 0;
1932 	cork->base.addr = 0;
1933 	cork->base.opt = NULL;
1934 	cork->base.dst = NULL;
1935 	v6_cork.opt = NULL;
1936 	err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1937 	if (err) {
1938 		ip6_cork_release(cork, &v6_cork);
1939 		return ERR_PTR(err);
1940 	}
1941 	if (ipc6->dontfrag < 0)
1942 		ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1943 
1944 	err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1945 				&current->task_frag, getfrag, from,
1946 				length + exthdrlen, transhdrlen + exthdrlen,
1947 				flags, ipc6);
1948 	if (err) {
1949 		__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1950 		return ERR_PTR(err);
1951 	}
1952 
1953 	return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1954 }
1955