xref: /openbmc/linux/net/ipv6/ip6_output.c (revision ccd51b9f)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	IPv6 output functions
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on linux/net/ipv4/ip_output.c
10  *
11  *	Changes:
12  *	A.N.Kuznetsov	:	airthmetics in fragmentation.
13  *				extension headers are implemented.
14  *				route changes now work.
15  *				ip6_forward does not confuse sniffers.
16  *				etc.
17  *
18  *      H. von Brand    :       Added missing #include <linux/string.h>
19  *	Imran Patel	:	frag id should be in NBO
20  *      Kazunori MIYAZAWA @USAGI
21  *			:       add ip6_append_data and related functions
22  *				for datagram xmit
23  */
24 
25 #include <linux/errno.h>
26 #include <linux/kernel.h>
27 #include <linux/string.h>
28 #include <linux/socket.h>
29 #include <linux/net.h>
30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h>
32 #include <linux/in6.h>
33 #include <linux/tcp.h>
34 #include <linux/route.h>
35 #include <linux/module.h>
36 #include <linux/slab.h>
37 
38 #include <linux/bpf-cgroup.h>
39 #include <linux/netfilter.h>
40 #include <linux/netfilter_ipv6.h>
41 
42 #include <net/sock.h>
43 #include <net/snmp.h>
44 
45 #include <net/ipv6.h>
46 #include <net/ndisc.h>
47 #include <net/protocol.h>
48 #include <net/ip6_route.h>
49 #include <net/addrconf.h>
50 #include <net/rawv6.h>
51 #include <net/icmp.h>
52 #include <net/xfrm.h>
53 #include <net/checksum.h>
54 #include <linux/mroute6.h>
55 #include <net/l3mdev.h>
56 #include <net/lwtunnel.h>
57 
58 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
59 {
60 	struct dst_entry *dst = skb_dst(skb);
61 	struct net_device *dev = dst->dev;
62 	const struct in6_addr *nexthop;
63 	struct neighbour *neigh;
64 	int ret;
65 
66 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
67 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
68 
69 		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
70 		    ((mroute6_is_socket(net, skb) &&
71 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
72 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
73 					 &ipv6_hdr(skb)->saddr))) {
74 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
75 
76 			/* Do not check for IFF_ALLMULTI; multicast routing
77 			   is not supported in any case.
78 			 */
79 			if (newskb)
80 				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
81 					net, sk, newskb, NULL, newskb->dev,
82 					dev_loopback_xmit);
83 
84 			if (ipv6_hdr(skb)->hop_limit == 0) {
85 				IP6_INC_STATS(net, idev,
86 					      IPSTATS_MIB_OUTDISCARDS);
87 				kfree_skb(skb);
88 				return 0;
89 			}
90 		}
91 
92 		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
93 
94 		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
95 		    IPV6_ADDR_SCOPE_NODELOCAL &&
96 		    !(dev->flags & IFF_LOOPBACK)) {
97 			kfree_skb(skb);
98 			return 0;
99 		}
100 	}
101 
102 	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
103 		int res = lwtunnel_xmit(skb);
104 
105 		if (res < 0 || res == LWTUNNEL_XMIT_DONE)
106 			return res;
107 	}
108 
109 	rcu_read_lock_bh();
110 	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
111 	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
112 	if (unlikely(!neigh))
113 		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
114 	if (!IS_ERR(neigh)) {
115 		sock_confirm_neigh(skb, neigh);
116 		ret = neigh_output(neigh, skb, false);
117 		rcu_read_unlock_bh();
118 		return ret;
119 	}
120 	rcu_read_unlock_bh();
121 
122 	IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
123 	kfree_skb(skb);
124 	return -EINVAL;
125 }
126 
127 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
128 {
129 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
130 	/* Policy lookup after SNAT yielded a new policy */
131 	if (skb_dst(skb)->xfrm) {
132 		IPCB(skb)->flags |= IPSKB_REROUTED;
133 		return dst_output(net, sk, skb);
134 	}
135 #endif
136 
137 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
138 	    dst_allfrag(skb_dst(skb)) ||
139 	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
140 		return ip6_fragment(net, sk, skb, ip6_finish_output2);
141 	else
142 		return ip6_finish_output2(net, sk, skb);
143 }
144 
145 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
146 {
147 	int ret;
148 
149 	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
150 	switch (ret) {
151 	case NET_XMIT_SUCCESS:
152 		return __ip6_finish_output(net, sk, skb);
153 	case NET_XMIT_CN:
154 		return __ip6_finish_output(net, sk, skb) ? : ret;
155 	default:
156 		kfree_skb(skb);
157 		return ret;
158 	}
159 }
160 
161 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
162 {
163 	struct net_device *dev = skb_dst(skb)->dev;
164 	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
165 
166 	skb->protocol = htons(ETH_P_IPV6);
167 	skb->dev = dev;
168 
169 	if (unlikely(idev->cnf.disable_ipv6)) {
170 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
171 		kfree_skb(skb);
172 		return 0;
173 	}
174 
175 	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
176 			    net, sk, skb, NULL, dev,
177 			    ip6_finish_output,
178 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
179 }
180 
181 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
182 {
183 	if (!np->autoflowlabel_set)
184 		return ip6_default_np_autolabel(net);
185 	else
186 		return np->autoflowlabel;
187 }
188 
189 /*
190  * xmit an sk_buff (used by TCP, SCTP and DCCP)
191  * Note : socket lock is not held for SYNACK packets, but might be modified
192  * by calls to skb_set_owner_w() and ipv6_local_error(),
193  * which are using proper atomic operations or spinlocks.
194  */
195 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
196 	     __u32 mark, struct ipv6_txoptions *opt, int tclass)
197 {
198 	struct net *net = sock_net(sk);
199 	const struct ipv6_pinfo *np = inet6_sk(sk);
200 	struct in6_addr *first_hop = &fl6->daddr;
201 	struct dst_entry *dst = skb_dst(skb);
202 	unsigned int head_room;
203 	struct ipv6hdr *hdr;
204 	u8  proto = fl6->flowi6_proto;
205 	int seg_len = skb->len;
206 	int hlimit = -1;
207 	u32 mtu;
208 
209 	head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
210 	if (opt)
211 		head_room += opt->opt_nflen + opt->opt_flen;
212 
213 	if (unlikely(skb_headroom(skb) < head_room)) {
214 		struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
215 		if (!skb2) {
216 			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
217 				      IPSTATS_MIB_OUTDISCARDS);
218 			kfree_skb(skb);
219 			return -ENOBUFS;
220 		}
221 		if (skb->sk)
222 			skb_set_owner_w(skb2, skb->sk);
223 		consume_skb(skb);
224 		skb = skb2;
225 	}
226 
227 	if (opt) {
228 		seg_len += opt->opt_nflen + opt->opt_flen;
229 
230 		if (opt->opt_flen)
231 			ipv6_push_frag_opts(skb, opt, &proto);
232 
233 		if (opt->opt_nflen)
234 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
235 					     &fl6->saddr);
236 	}
237 
238 	skb_push(skb, sizeof(struct ipv6hdr));
239 	skb_reset_network_header(skb);
240 	hdr = ipv6_hdr(skb);
241 
242 	/*
243 	 *	Fill in the IPv6 header
244 	 */
245 	if (np)
246 		hlimit = np->hop_limit;
247 	if (hlimit < 0)
248 		hlimit = ip6_dst_hoplimit(dst);
249 
250 	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
251 				ip6_autoflowlabel(net, np), fl6));
252 
253 	hdr->payload_len = htons(seg_len);
254 	hdr->nexthdr = proto;
255 	hdr->hop_limit = hlimit;
256 
257 	hdr->saddr = fl6->saddr;
258 	hdr->daddr = *first_hop;
259 
260 	skb->protocol = htons(ETH_P_IPV6);
261 	skb->priority = sk->sk_priority;
262 	skb->mark = mark;
263 
264 	mtu = dst_mtu(dst);
265 	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
266 		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
267 			      IPSTATS_MIB_OUT, skb->len);
268 
269 		/* if egress device is enslaved to an L3 master device pass the
270 		 * skb to its handler for processing
271 		 */
272 		skb = l3mdev_ip6_out((struct sock *)sk, skb);
273 		if (unlikely(!skb))
274 			return 0;
275 
276 		/* hooks should never assume socket lock is held.
277 		 * we promote our socket to non const
278 		 */
279 		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
280 			       net, (struct sock *)sk, skb, NULL, dst->dev,
281 			       dst_output);
282 	}
283 
284 	skb->dev = dst->dev;
285 	/* ipv6_local_error() does not require socket lock,
286 	 * we promote our socket to non const
287 	 */
288 	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
289 
290 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
291 	kfree_skb(skb);
292 	return -EMSGSIZE;
293 }
294 EXPORT_SYMBOL(ip6_xmit);
295 
296 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
297 {
298 	struct ip6_ra_chain *ra;
299 	struct sock *last = NULL;
300 
301 	read_lock(&ip6_ra_lock);
302 	for (ra = ip6_ra_chain; ra; ra = ra->next) {
303 		struct sock *sk = ra->sk;
304 		if (sk && ra->sel == sel &&
305 		    (!sk->sk_bound_dev_if ||
306 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
307 			struct ipv6_pinfo *np = inet6_sk(sk);
308 
309 			if (np && np->rtalert_isolate &&
310 			    !net_eq(sock_net(sk), dev_net(skb->dev))) {
311 				continue;
312 			}
313 			if (last) {
314 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
315 				if (skb2)
316 					rawv6_rcv(last, skb2);
317 			}
318 			last = sk;
319 		}
320 	}
321 
322 	if (last) {
323 		rawv6_rcv(last, skb);
324 		read_unlock(&ip6_ra_lock);
325 		return 1;
326 	}
327 	read_unlock(&ip6_ra_lock);
328 	return 0;
329 }
330 
331 static int ip6_forward_proxy_check(struct sk_buff *skb)
332 {
333 	struct ipv6hdr *hdr = ipv6_hdr(skb);
334 	u8 nexthdr = hdr->nexthdr;
335 	__be16 frag_off;
336 	int offset;
337 
338 	if (ipv6_ext_hdr(nexthdr)) {
339 		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
340 		if (offset < 0)
341 			return 0;
342 	} else
343 		offset = sizeof(struct ipv6hdr);
344 
345 	if (nexthdr == IPPROTO_ICMPV6) {
346 		struct icmp6hdr *icmp6;
347 
348 		if (!pskb_may_pull(skb, (skb_network_header(skb) +
349 					 offset + 1 - skb->data)))
350 			return 0;
351 
352 		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
353 
354 		switch (icmp6->icmp6_type) {
355 		case NDISC_ROUTER_SOLICITATION:
356 		case NDISC_ROUTER_ADVERTISEMENT:
357 		case NDISC_NEIGHBOUR_SOLICITATION:
358 		case NDISC_NEIGHBOUR_ADVERTISEMENT:
359 		case NDISC_REDIRECT:
360 			/* For reaction involving unicast neighbor discovery
361 			 * message destined to the proxied address, pass it to
362 			 * input function.
363 			 */
364 			return 1;
365 		default:
366 			break;
367 		}
368 	}
369 
370 	/*
371 	 * The proxying router can't forward traffic sent to a link-local
372 	 * address, so signal the sender and discard the packet. This
373 	 * behavior is clarified by the MIPv6 specification.
374 	 */
375 	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
376 		dst_link_failure(skb);
377 		return -1;
378 	}
379 
380 	return 0;
381 }
382 
383 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
384 				     struct sk_buff *skb)
385 {
386 	struct dst_entry *dst = skb_dst(skb);
387 
388 	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
389 	__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
390 
391 #ifdef CONFIG_NET_SWITCHDEV
392 	if (skb->offload_l3_fwd_mark) {
393 		consume_skb(skb);
394 		return 0;
395 	}
396 #endif
397 
398 	skb->tstamp = 0;
399 	return dst_output(net, sk, skb);
400 }
401 
402 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
403 {
404 	if (skb->len <= mtu)
405 		return false;
406 
407 	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
408 	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
409 		return true;
410 
411 	if (skb->ignore_df)
412 		return false;
413 
414 	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
415 		return false;
416 
417 	return true;
418 }
419 
420 int ip6_forward(struct sk_buff *skb)
421 {
422 	struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
423 	struct dst_entry *dst = skb_dst(skb);
424 	struct ipv6hdr *hdr = ipv6_hdr(skb);
425 	struct inet6_skb_parm *opt = IP6CB(skb);
426 	struct net *net = dev_net(dst->dev);
427 	u32 mtu;
428 
429 	if (net->ipv6.devconf_all->forwarding == 0)
430 		goto error;
431 
432 	if (skb->pkt_type != PACKET_HOST)
433 		goto drop;
434 
435 	if (unlikely(skb->sk))
436 		goto drop;
437 
438 	if (skb_warn_if_lro(skb))
439 		goto drop;
440 
441 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
442 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
443 		goto drop;
444 	}
445 
446 	skb_forward_csum(skb);
447 
448 	/*
449 	 *	We DO NOT make any processing on
450 	 *	RA packets, pushing them to user level AS IS
451 	 *	without ane WARRANTY that application will be able
452 	 *	to interpret them. The reason is that we
453 	 *	cannot make anything clever here.
454 	 *
455 	 *	We are not end-node, so that if packet contains
456 	 *	AH/ESP, we cannot make anything.
457 	 *	Defragmentation also would be mistake, RA packets
458 	 *	cannot be fragmented, because there is no warranty
459 	 *	that different fragments will go along one path. --ANK
460 	 */
461 	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
462 		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
463 			return 0;
464 	}
465 
466 	/*
467 	 *	check and decrement ttl
468 	 */
469 	if (hdr->hop_limit <= 1) {
470 		/* Force OUTPUT device used as source address */
471 		skb->dev = dst->dev;
472 		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
473 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
474 
475 		kfree_skb(skb);
476 		return -ETIMEDOUT;
477 	}
478 
479 	/* XXX: idev->cnf.proxy_ndp? */
480 	if (net->ipv6.devconf_all->proxy_ndp &&
481 	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
482 		int proxied = ip6_forward_proxy_check(skb);
483 		if (proxied > 0)
484 			return ip6_input(skb);
485 		else if (proxied < 0) {
486 			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
487 			goto drop;
488 		}
489 	}
490 
491 	if (!xfrm6_route_forward(skb)) {
492 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
493 		goto drop;
494 	}
495 	dst = skb_dst(skb);
496 
497 	/* IPv6 specs say nothing about it, but it is clear that we cannot
498 	   send redirects to source routed frames.
499 	   We don't send redirects to frames decapsulated from IPsec.
500 	 */
501 	if (IP6CB(skb)->iif == dst->dev->ifindex &&
502 	    opt->srcrt == 0 && !skb_sec_path(skb)) {
503 		struct in6_addr *target = NULL;
504 		struct inet_peer *peer;
505 		struct rt6_info *rt;
506 
507 		/*
508 		 *	incoming and outgoing devices are the same
509 		 *	send a redirect.
510 		 */
511 
512 		rt = (struct rt6_info *) dst;
513 		if (rt->rt6i_flags & RTF_GATEWAY)
514 			target = &rt->rt6i_gateway;
515 		else
516 			target = &hdr->daddr;
517 
518 		peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
519 
520 		/* Limit redirects both by destination (here)
521 		   and by source (inside ndisc_send_redirect)
522 		 */
523 		if (inet_peer_xrlim_allow(peer, 1*HZ))
524 			ndisc_send_redirect(skb, target);
525 		if (peer)
526 			inet_putpeer(peer);
527 	} else {
528 		int addrtype = ipv6_addr_type(&hdr->saddr);
529 
530 		/* This check is security critical. */
531 		if (addrtype == IPV6_ADDR_ANY ||
532 		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
533 			goto error;
534 		if (addrtype & IPV6_ADDR_LINKLOCAL) {
535 			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
536 				    ICMPV6_NOT_NEIGHBOUR, 0);
537 			goto error;
538 		}
539 	}
540 
541 	mtu = ip6_dst_mtu_forward(dst);
542 	if (mtu < IPV6_MIN_MTU)
543 		mtu = IPV6_MIN_MTU;
544 
545 	if (ip6_pkt_too_big(skb, mtu)) {
546 		/* Again, force OUTPUT device used as source address */
547 		skb->dev = dst->dev;
548 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
549 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
550 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
551 				IPSTATS_MIB_FRAGFAILS);
552 		kfree_skb(skb);
553 		return -EMSGSIZE;
554 	}
555 
556 	if (skb_cow(skb, dst->dev->hard_header_len)) {
557 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
558 				IPSTATS_MIB_OUTDISCARDS);
559 		goto drop;
560 	}
561 
562 	hdr = ipv6_hdr(skb);
563 
564 	/* Mangling hops number delayed to point after skb COW */
565 
566 	hdr->hop_limit--;
567 
568 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
569 		       net, NULL, skb, skb->dev, dst->dev,
570 		       ip6_forward_finish);
571 
572 error:
573 	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
574 drop:
575 	kfree_skb(skb);
576 	return -EINVAL;
577 }
578 
579 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
580 {
581 	to->pkt_type = from->pkt_type;
582 	to->priority = from->priority;
583 	to->protocol = from->protocol;
584 	skb_dst_drop(to);
585 	skb_dst_set(to, dst_clone(skb_dst(from)));
586 	to->dev = from->dev;
587 	to->mark = from->mark;
588 
589 	skb_copy_hash(to, from);
590 
591 #ifdef CONFIG_NET_SCHED
592 	to->tc_index = from->tc_index;
593 #endif
594 	nf_copy(to, from);
595 	skb_ext_copy(to, from);
596 	skb_copy_secmark(to, from);
597 }
598 
599 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
600 		      u8 nexthdr, __be32 frag_id,
601 		      struct ip6_fraglist_iter *iter)
602 {
603 	unsigned int first_len;
604 	struct frag_hdr *fh;
605 
606 	/* BUILD HEADER */
607 	*prevhdr = NEXTHDR_FRAGMENT;
608 	iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
609 	if (!iter->tmp_hdr)
610 		return -ENOMEM;
611 
612 	iter->frag = skb_shinfo(skb)->frag_list;
613 	skb_frag_list_init(skb);
614 
615 	iter->offset = 0;
616 	iter->hlen = hlen;
617 	iter->frag_id = frag_id;
618 	iter->nexthdr = nexthdr;
619 
620 	__skb_pull(skb, hlen);
621 	fh = __skb_push(skb, sizeof(struct frag_hdr));
622 	__skb_push(skb, hlen);
623 	skb_reset_network_header(skb);
624 	memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
625 
626 	fh->nexthdr = nexthdr;
627 	fh->reserved = 0;
628 	fh->frag_off = htons(IP6_MF);
629 	fh->identification = frag_id;
630 
631 	first_len = skb_pagelen(skb);
632 	skb->data_len = first_len - skb_headlen(skb);
633 	skb->len = first_len;
634 	ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
635 
636 	return 0;
637 }
638 EXPORT_SYMBOL(ip6_fraglist_init);
639 
640 void ip6_fraglist_prepare(struct sk_buff *skb,
641 			  struct ip6_fraglist_iter *iter)
642 {
643 	struct sk_buff *frag = iter->frag;
644 	unsigned int hlen = iter->hlen;
645 	struct frag_hdr *fh;
646 
647 	frag->ip_summed = CHECKSUM_NONE;
648 	skb_reset_transport_header(frag);
649 	fh = __skb_push(frag, sizeof(struct frag_hdr));
650 	__skb_push(frag, hlen);
651 	skb_reset_network_header(frag);
652 	memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
653 	iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
654 	fh->nexthdr = iter->nexthdr;
655 	fh->reserved = 0;
656 	fh->frag_off = htons(iter->offset);
657 	if (frag->next)
658 		fh->frag_off |= htons(IP6_MF);
659 	fh->identification = iter->frag_id;
660 	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
661 	ip6_copy_metadata(frag, skb);
662 }
663 EXPORT_SYMBOL(ip6_fraglist_prepare);
664 
665 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
666 		   unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
667 		   u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
668 {
669 	state->prevhdr = prevhdr;
670 	state->nexthdr = nexthdr;
671 	state->frag_id = frag_id;
672 
673 	state->hlen = hlen;
674 	state->mtu = mtu;
675 
676 	state->left = skb->len - hlen;	/* Space per frame */
677 	state->ptr = hlen;		/* Where to start from */
678 
679 	state->hroom = hdr_room;
680 	state->troom = needed_tailroom;
681 
682 	state->offset = 0;
683 }
684 EXPORT_SYMBOL(ip6_frag_init);
685 
686 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
687 {
688 	u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
689 	struct sk_buff *frag;
690 	struct frag_hdr *fh;
691 	unsigned int len;
692 
693 	len = state->left;
694 	/* IF: it doesn't fit, use 'mtu' - the data space left */
695 	if (len > state->mtu)
696 		len = state->mtu;
697 	/* IF: we are not sending up to and including the packet end
698 	   then align the next start on an eight byte boundary */
699 	if (len < state->left)
700 		len &= ~7;
701 
702 	/* Allocate buffer */
703 	frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
704 			 state->hroom + state->troom, GFP_ATOMIC);
705 	if (!frag)
706 		return ERR_PTR(-ENOMEM);
707 
708 	/*
709 	 *	Set up data on packet
710 	 */
711 
712 	ip6_copy_metadata(frag, skb);
713 	skb_reserve(frag, state->hroom);
714 	skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
715 	skb_reset_network_header(frag);
716 	fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
717 	frag->transport_header = (frag->network_header + state->hlen +
718 				  sizeof(struct frag_hdr));
719 
720 	/*
721 	 *	Charge the memory for the fragment to any owner
722 	 *	it might possess
723 	 */
724 	if (skb->sk)
725 		skb_set_owner_w(frag, skb->sk);
726 
727 	/*
728 	 *	Copy the packet header into the new buffer.
729 	 */
730 	skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
731 
732 	fragnexthdr_offset = skb_network_header(frag);
733 	fragnexthdr_offset += prevhdr - skb_network_header(skb);
734 	*fragnexthdr_offset = NEXTHDR_FRAGMENT;
735 
736 	/*
737 	 *	Build fragment header.
738 	 */
739 	fh->nexthdr = state->nexthdr;
740 	fh->reserved = 0;
741 	fh->identification = state->frag_id;
742 
743 	/*
744 	 *	Copy a block of the IP datagram.
745 	 */
746 	BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
747 			     len));
748 	state->left -= len;
749 
750 	fh->frag_off = htons(state->offset);
751 	if (state->left > 0)
752 		fh->frag_off |= htons(IP6_MF);
753 	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
754 
755 	state->ptr += len;
756 	state->offset += len;
757 
758 	return frag;
759 }
760 EXPORT_SYMBOL(ip6_frag_next);
761 
762 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
763 		 int (*output)(struct net *, struct sock *, struct sk_buff *))
764 {
765 	struct sk_buff *frag;
766 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
767 	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
768 				inet6_sk(skb->sk) : NULL;
769 	struct ip6_frag_state state;
770 	unsigned int mtu, hlen, nexthdr_offset;
771 	int hroom, err = 0;
772 	__be32 frag_id;
773 	u8 *prevhdr, nexthdr = 0;
774 
775 	err = ip6_find_1stfragopt(skb, &prevhdr);
776 	if (err < 0)
777 		goto fail;
778 	hlen = err;
779 	nexthdr = *prevhdr;
780 	nexthdr_offset = prevhdr - skb_network_header(skb);
781 
782 	mtu = ip6_skb_dst_mtu(skb);
783 
784 	/* We must not fragment if the socket is set to force MTU discovery
785 	 * or if the skb it not generated by a local socket.
786 	 */
787 	if (unlikely(!skb->ignore_df && skb->len > mtu))
788 		goto fail_toobig;
789 
790 	if (IP6CB(skb)->frag_max_size) {
791 		if (IP6CB(skb)->frag_max_size > mtu)
792 			goto fail_toobig;
793 
794 		/* don't send fragments larger than what we received */
795 		mtu = IP6CB(skb)->frag_max_size;
796 		if (mtu < IPV6_MIN_MTU)
797 			mtu = IPV6_MIN_MTU;
798 	}
799 
800 	if (np && np->frag_size < mtu) {
801 		if (np->frag_size)
802 			mtu = np->frag_size;
803 	}
804 	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
805 		goto fail_toobig;
806 	mtu -= hlen + sizeof(struct frag_hdr);
807 
808 	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
809 				    &ipv6_hdr(skb)->saddr);
810 
811 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
812 	    (err = skb_checksum_help(skb)))
813 		goto fail;
814 
815 	prevhdr = skb_network_header(skb) + nexthdr_offset;
816 	hroom = LL_RESERVED_SPACE(rt->dst.dev);
817 	if (skb_has_frag_list(skb)) {
818 		unsigned int first_len = skb_pagelen(skb);
819 		struct ip6_fraglist_iter iter;
820 		struct sk_buff *frag2;
821 
822 		if (first_len - hlen > mtu ||
823 		    ((first_len - hlen) & 7) ||
824 		    skb_cloned(skb) ||
825 		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
826 			goto slow_path;
827 
828 		skb_walk_frags(skb, frag) {
829 			/* Correct geometry. */
830 			if (frag->len > mtu ||
831 			    ((frag->len & 7) && frag->next) ||
832 			    skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
833 				goto slow_path_clean;
834 
835 			/* Partially cloned skb? */
836 			if (skb_shared(frag))
837 				goto slow_path_clean;
838 
839 			BUG_ON(frag->sk);
840 			if (skb->sk) {
841 				frag->sk = skb->sk;
842 				frag->destructor = sock_wfree;
843 			}
844 			skb->truesize -= frag->truesize;
845 		}
846 
847 		err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
848 					&iter);
849 		if (err < 0)
850 			goto fail;
851 
852 		for (;;) {
853 			/* Prepare header of the next frame,
854 			 * before previous one went down. */
855 			if (iter.frag)
856 				ip6_fraglist_prepare(skb, &iter);
857 
858 			err = output(net, sk, skb);
859 			if (!err)
860 				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
861 					      IPSTATS_MIB_FRAGCREATES);
862 
863 			if (err || !iter.frag)
864 				break;
865 
866 			skb = ip6_fraglist_next(&iter);
867 		}
868 
869 		kfree(iter.tmp_hdr);
870 
871 		if (err == 0) {
872 			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
873 				      IPSTATS_MIB_FRAGOKS);
874 			return 0;
875 		}
876 
877 		kfree_skb_list(iter.frag);
878 
879 		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
880 			      IPSTATS_MIB_FRAGFAILS);
881 		return err;
882 
883 slow_path_clean:
884 		skb_walk_frags(skb, frag2) {
885 			if (frag2 == frag)
886 				break;
887 			frag2->sk = NULL;
888 			frag2->destructor = NULL;
889 			skb->truesize += frag2->truesize;
890 		}
891 	}
892 
893 slow_path:
894 	/*
895 	 *	Fragment the datagram.
896 	 */
897 
898 	ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
899 		      LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
900 		      &state);
901 
902 	/*
903 	 *	Keep copying data until we run out.
904 	 */
905 
906 	while (state.left > 0) {
907 		frag = ip6_frag_next(skb, &state);
908 		if (IS_ERR(frag)) {
909 			err = PTR_ERR(frag);
910 			goto fail;
911 		}
912 
913 		/*
914 		 *	Put this fragment into the sending queue.
915 		 */
916 		err = output(net, sk, frag);
917 		if (err)
918 			goto fail;
919 
920 		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
921 			      IPSTATS_MIB_FRAGCREATES);
922 	}
923 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
924 		      IPSTATS_MIB_FRAGOKS);
925 	consume_skb(skb);
926 	return err;
927 
928 fail_toobig:
929 	if (skb->sk && dst_allfrag(skb_dst(skb)))
930 		sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
931 
932 	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
933 	err = -EMSGSIZE;
934 
935 fail:
936 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
937 		      IPSTATS_MIB_FRAGFAILS);
938 	kfree_skb(skb);
939 	return err;
940 }
941 
942 static inline int ip6_rt_check(const struct rt6key *rt_key,
943 			       const struct in6_addr *fl_addr,
944 			       const struct in6_addr *addr_cache)
945 {
946 	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
947 		(!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
948 }
949 
950 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
951 					  struct dst_entry *dst,
952 					  const struct flowi6 *fl6)
953 {
954 	struct ipv6_pinfo *np = inet6_sk(sk);
955 	struct rt6_info *rt;
956 
957 	if (!dst)
958 		goto out;
959 
960 	if (dst->ops->family != AF_INET6) {
961 		dst_release(dst);
962 		return NULL;
963 	}
964 
965 	rt = (struct rt6_info *)dst;
966 	/* Yes, checking route validity in not connected
967 	 * case is not very simple. Take into account,
968 	 * that we do not support routing by source, TOS,
969 	 * and MSG_DONTROUTE		--ANK (980726)
970 	 *
971 	 * 1. ip6_rt_check(): If route was host route,
972 	 *    check that cached destination is current.
973 	 *    If it is network route, we still may
974 	 *    check its validity using saved pointer
975 	 *    to the last used address: daddr_cache.
976 	 *    We do not want to save whole address now,
977 	 *    (because main consumer of this service
978 	 *    is tcp, which has not this problem),
979 	 *    so that the last trick works only on connected
980 	 *    sockets.
981 	 * 2. oif also should be the same.
982 	 */
983 	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
984 #ifdef CONFIG_IPV6_SUBTREES
985 	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
986 #endif
987 	   (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
988 	      (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
989 		dst_release(dst);
990 		dst = NULL;
991 	}
992 
993 out:
994 	return dst;
995 }
996 
997 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
998 			       struct dst_entry **dst, struct flowi6 *fl6)
999 {
1000 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1001 	struct neighbour *n;
1002 	struct rt6_info *rt;
1003 #endif
1004 	int err;
1005 	int flags = 0;
1006 
1007 	/* The correct way to handle this would be to do
1008 	 * ip6_route_get_saddr, and then ip6_route_output; however,
1009 	 * the route-specific preferred source forces the
1010 	 * ip6_route_output call _before_ ip6_route_get_saddr.
1011 	 *
1012 	 * In source specific routing (no src=any default route),
1013 	 * ip6_route_output will fail given src=any saddr, though, so
1014 	 * that's why we try it again later.
1015 	 */
1016 	if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1017 		struct fib6_info *from;
1018 		struct rt6_info *rt;
1019 		bool had_dst = *dst != NULL;
1020 
1021 		if (!had_dst)
1022 			*dst = ip6_route_output(net, sk, fl6);
1023 		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1024 
1025 		rcu_read_lock();
1026 		from = rt ? rcu_dereference(rt->from) : NULL;
1027 		err = ip6_route_get_saddr(net, from, &fl6->daddr,
1028 					  sk ? inet6_sk(sk)->srcprefs : 0,
1029 					  &fl6->saddr);
1030 		rcu_read_unlock();
1031 
1032 		if (err)
1033 			goto out_err_release;
1034 
1035 		/* If we had an erroneous initial result, pretend it
1036 		 * never existed and let the SA-enabled version take
1037 		 * over.
1038 		 */
1039 		if (!had_dst && (*dst)->error) {
1040 			dst_release(*dst);
1041 			*dst = NULL;
1042 		}
1043 
1044 		if (fl6->flowi6_oif)
1045 			flags |= RT6_LOOKUP_F_IFACE;
1046 	}
1047 
1048 	if (!*dst)
1049 		*dst = ip6_route_output_flags(net, sk, fl6, flags);
1050 
1051 	err = (*dst)->error;
1052 	if (err)
1053 		goto out_err_release;
1054 
1055 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1056 	/*
1057 	 * Here if the dst entry we've looked up
1058 	 * has a neighbour entry that is in the INCOMPLETE
1059 	 * state and the src address from the flow is
1060 	 * marked as OPTIMISTIC, we release the found
1061 	 * dst entry and replace it instead with the
1062 	 * dst entry of the nexthop router
1063 	 */
1064 	rt = (struct rt6_info *) *dst;
1065 	rcu_read_lock_bh();
1066 	n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1067 				      rt6_nexthop(rt, &fl6->daddr));
1068 	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1069 	rcu_read_unlock_bh();
1070 
1071 	if (err) {
1072 		struct inet6_ifaddr *ifp;
1073 		struct flowi6 fl_gw6;
1074 		int redirect;
1075 
1076 		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1077 				      (*dst)->dev, 1);
1078 
1079 		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1080 		if (ifp)
1081 			in6_ifa_put(ifp);
1082 
1083 		if (redirect) {
1084 			/*
1085 			 * We need to get the dst entry for the
1086 			 * default router instead
1087 			 */
1088 			dst_release(*dst);
1089 			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1090 			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1091 			*dst = ip6_route_output(net, sk, &fl_gw6);
1092 			err = (*dst)->error;
1093 			if (err)
1094 				goto out_err_release;
1095 		}
1096 	}
1097 #endif
1098 	if (ipv6_addr_v4mapped(&fl6->saddr) &&
1099 	    !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1100 		err = -EAFNOSUPPORT;
1101 		goto out_err_release;
1102 	}
1103 
1104 	return 0;
1105 
1106 out_err_release:
1107 	dst_release(*dst);
1108 	*dst = NULL;
1109 
1110 	if (err == -ENETUNREACH)
1111 		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1112 	return err;
1113 }
1114 
1115 /**
1116  *	ip6_dst_lookup - perform route lookup on flow
1117  *	@sk: socket which provides route info
1118  *	@dst: pointer to dst_entry * for result
1119  *	@fl6: flow to lookup
1120  *
1121  *	This function performs a route lookup on the given flow.
1122  *
1123  *	It returns zero on success, or a standard errno code on error.
1124  */
1125 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1126 		   struct flowi6 *fl6)
1127 {
1128 	*dst = NULL;
1129 	return ip6_dst_lookup_tail(net, sk, dst, fl6);
1130 }
1131 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1132 
1133 /**
1134  *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1135  *	@sk: socket which provides route info
1136  *	@fl6: flow to lookup
1137  *	@final_dst: final destination address for ipsec lookup
1138  *
1139  *	This function performs a route lookup on the given flow.
1140  *
1141  *	It returns a valid dst pointer on success, or a pointer encoded
1142  *	error code.
1143  */
1144 struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
1145 				      const struct in6_addr *final_dst)
1146 {
1147 	struct dst_entry *dst = NULL;
1148 	int err;
1149 
1150 	err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1151 	if (err)
1152 		return ERR_PTR(err);
1153 	if (final_dst)
1154 		fl6->daddr = *final_dst;
1155 
1156 	return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1157 }
1158 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1159 
1160 /**
1161  *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1162  *	@sk: socket which provides the dst cache and route info
1163  *	@fl6: flow to lookup
1164  *	@final_dst: final destination address for ipsec lookup
1165  *	@connected: whether @sk is connected or not
1166  *
1167  *	This function performs a route lookup on the given flow with the
1168  *	possibility of using the cached route in the socket if it is valid.
1169  *	It will take the socket dst lock when operating on the dst cache.
1170  *	As a result, this function can only be used in process context.
1171  *
1172  *	In addition, for a connected socket, cache the dst in the socket
1173  *	if the current cache is not valid.
1174  *
1175  *	It returns a valid dst pointer on success, or a pointer encoded
1176  *	error code.
1177  */
1178 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1179 					 const struct in6_addr *final_dst,
1180 					 bool connected)
1181 {
1182 	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1183 
1184 	dst = ip6_sk_dst_check(sk, dst, fl6);
1185 	if (dst)
1186 		return dst;
1187 
1188 	dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
1189 	if (connected && !IS_ERR(dst))
1190 		ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1191 
1192 	return dst;
1193 }
1194 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1195 
1196 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1197 					       gfp_t gfp)
1198 {
1199 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1200 }
1201 
1202 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1203 						gfp_t gfp)
1204 {
1205 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1206 }
1207 
1208 static void ip6_append_data_mtu(unsigned int *mtu,
1209 				int *maxfraglen,
1210 				unsigned int fragheaderlen,
1211 				struct sk_buff *skb,
1212 				struct rt6_info *rt,
1213 				unsigned int orig_mtu)
1214 {
1215 	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1216 		if (!skb) {
1217 			/* first fragment, reserve header_len */
1218 			*mtu = orig_mtu - rt->dst.header_len;
1219 
1220 		} else {
1221 			/*
1222 			 * this fragment is not first, the headers
1223 			 * space is regarded as data space.
1224 			 */
1225 			*mtu = orig_mtu;
1226 		}
1227 		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
1228 			      + fragheaderlen - sizeof(struct frag_hdr);
1229 	}
1230 }
1231 
1232 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1233 			  struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1234 			  struct rt6_info *rt, struct flowi6 *fl6)
1235 {
1236 	struct ipv6_pinfo *np = inet6_sk(sk);
1237 	unsigned int mtu;
1238 	struct ipv6_txoptions *opt = ipc6->opt;
1239 
1240 	/*
1241 	 * setup for corking
1242 	 */
1243 	if (opt) {
1244 		if (WARN_ON(v6_cork->opt))
1245 			return -EINVAL;
1246 
1247 		v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1248 		if (unlikely(!v6_cork->opt))
1249 			return -ENOBUFS;
1250 
1251 		v6_cork->opt->tot_len = sizeof(*opt);
1252 		v6_cork->opt->opt_flen = opt->opt_flen;
1253 		v6_cork->opt->opt_nflen = opt->opt_nflen;
1254 
1255 		v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1256 						    sk->sk_allocation);
1257 		if (opt->dst0opt && !v6_cork->opt->dst0opt)
1258 			return -ENOBUFS;
1259 
1260 		v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1261 						    sk->sk_allocation);
1262 		if (opt->dst1opt && !v6_cork->opt->dst1opt)
1263 			return -ENOBUFS;
1264 
1265 		v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1266 						   sk->sk_allocation);
1267 		if (opt->hopopt && !v6_cork->opt->hopopt)
1268 			return -ENOBUFS;
1269 
1270 		v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1271 						    sk->sk_allocation);
1272 		if (opt->srcrt && !v6_cork->opt->srcrt)
1273 			return -ENOBUFS;
1274 
1275 		/* need source address above miyazawa*/
1276 	}
1277 	dst_hold(&rt->dst);
1278 	cork->base.dst = &rt->dst;
1279 	cork->fl.u.ip6 = *fl6;
1280 	v6_cork->hop_limit = ipc6->hlimit;
1281 	v6_cork->tclass = ipc6->tclass;
1282 	if (rt->dst.flags & DST_XFRM_TUNNEL)
1283 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1284 		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1285 	else
1286 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1287 			READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1288 	if (np->frag_size < mtu) {
1289 		if (np->frag_size)
1290 			mtu = np->frag_size;
1291 	}
1292 	if (mtu < IPV6_MIN_MTU)
1293 		return -EINVAL;
1294 	cork->base.fragsize = mtu;
1295 	cork->base.gso_size = ipc6->gso_size;
1296 	cork->base.tx_flags = 0;
1297 	sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1298 
1299 	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1300 		cork->base.flags |= IPCORK_ALLFRAG;
1301 	cork->base.length = 0;
1302 
1303 	cork->base.transmit_time = ipc6->sockc.transmit_time;
1304 
1305 	return 0;
1306 }
1307 
1308 static int __ip6_append_data(struct sock *sk,
1309 			     struct flowi6 *fl6,
1310 			     struct sk_buff_head *queue,
1311 			     struct inet_cork *cork,
1312 			     struct inet6_cork *v6_cork,
1313 			     struct page_frag *pfrag,
1314 			     int getfrag(void *from, char *to, int offset,
1315 					 int len, int odd, struct sk_buff *skb),
1316 			     void *from, int length, int transhdrlen,
1317 			     unsigned int flags, struct ipcm6_cookie *ipc6)
1318 {
1319 	struct sk_buff *skb, *skb_prev = NULL;
1320 	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1321 	struct ubuf_info *uarg = NULL;
1322 	int exthdrlen = 0;
1323 	int dst_exthdrlen = 0;
1324 	int hh_len;
1325 	int copy;
1326 	int err;
1327 	int offset = 0;
1328 	u32 tskey = 0;
1329 	struct rt6_info *rt = (struct rt6_info *)cork->dst;
1330 	struct ipv6_txoptions *opt = v6_cork->opt;
1331 	int csummode = CHECKSUM_NONE;
1332 	unsigned int maxnonfragsize, headersize;
1333 	unsigned int wmem_alloc_delta = 0;
1334 	bool paged, extra_uref = false;
1335 
1336 	skb = skb_peek_tail(queue);
1337 	if (!skb) {
1338 		exthdrlen = opt ? opt->opt_flen : 0;
1339 		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1340 	}
1341 
1342 	paged = !!cork->gso_size;
1343 	mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1344 	orig_mtu = mtu;
1345 
1346 	if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1347 	    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1348 		tskey = sk->sk_tskey++;
1349 
1350 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1351 
1352 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1353 			(opt ? opt->opt_nflen : 0);
1354 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1355 		     sizeof(struct frag_hdr);
1356 
1357 	headersize = sizeof(struct ipv6hdr) +
1358 		     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1359 		     (dst_allfrag(&rt->dst) ?
1360 		      sizeof(struct frag_hdr) : 0) +
1361 		     rt->rt6i_nfheader_len;
1362 
1363 	/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1364 	 * the first fragment
1365 	 */
1366 	if (headersize + transhdrlen > mtu)
1367 		goto emsgsize;
1368 
1369 	if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1370 	    (sk->sk_protocol == IPPROTO_UDP ||
1371 	     sk->sk_protocol == IPPROTO_RAW)) {
1372 		ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1373 				sizeof(struct ipv6hdr));
1374 		goto emsgsize;
1375 	}
1376 
1377 	if (ip6_sk_ignore_df(sk))
1378 		maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1379 	else
1380 		maxnonfragsize = mtu;
1381 
1382 	if (cork->length + length > maxnonfragsize - headersize) {
1383 emsgsize:
1384 		pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1385 		ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1386 		return -EMSGSIZE;
1387 	}
1388 
1389 	/* CHECKSUM_PARTIAL only with no extension headers and when
1390 	 * we are not going to fragment
1391 	 */
1392 	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1393 	    headersize == sizeof(struct ipv6hdr) &&
1394 	    length <= mtu - headersize &&
1395 	    (!(flags & MSG_MORE) || cork->gso_size) &&
1396 	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1397 		csummode = CHECKSUM_PARTIAL;
1398 
1399 	if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1400 		uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1401 		if (!uarg)
1402 			return -ENOBUFS;
1403 		extra_uref = !skb_zcopy(skb);	/* only ref on new uarg */
1404 		if (rt->dst.dev->features & NETIF_F_SG &&
1405 		    csummode == CHECKSUM_PARTIAL) {
1406 			paged = true;
1407 		} else {
1408 			uarg->zerocopy = 0;
1409 			skb_zcopy_set(skb, uarg, &extra_uref);
1410 		}
1411 	}
1412 
1413 	/*
1414 	 * Let's try using as much space as possible.
1415 	 * Use MTU if total length of the message fits into the MTU.
1416 	 * Otherwise, we need to reserve fragment header and
1417 	 * fragment alignment (= 8-15 octects, in total).
1418 	 *
1419 	 * Note that we may need to "move" the data from the tail of
1420 	 * of the buffer to the new fragment when we split
1421 	 * the message.
1422 	 *
1423 	 * FIXME: It may be fragmented into multiple chunks
1424 	 *        at once if non-fragmentable extension headers
1425 	 *        are too large.
1426 	 * --yoshfuji
1427 	 */
1428 
1429 	cork->length += length;
1430 	if (!skb)
1431 		goto alloc_new_skb;
1432 
1433 	while (length > 0) {
1434 		/* Check if the remaining data fits into current packet. */
1435 		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1436 		if (copy < length)
1437 			copy = maxfraglen - skb->len;
1438 
1439 		if (copy <= 0) {
1440 			char *data;
1441 			unsigned int datalen;
1442 			unsigned int fraglen;
1443 			unsigned int fraggap;
1444 			unsigned int alloclen;
1445 			unsigned int pagedlen;
1446 alloc_new_skb:
1447 			/* There's no room in the current skb */
1448 			if (skb)
1449 				fraggap = skb->len - maxfraglen;
1450 			else
1451 				fraggap = 0;
1452 			/* update mtu and maxfraglen if necessary */
1453 			if (!skb || !skb_prev)
1454 				ip6_append_data_mtu(&mtu, &maxfraglen,
1455 						    fragheaderlen, skb, rt,
1456 						    orig_mtu);
1457 
1458 			skb_prev = skb;
1459 
1460 			/*
1461 			 * If remaining data exceeds the mtu,
1462 			 * we know we need more fragment(s).
1463 			 */
1464 			datalen = length + fraggap;
1465 
1466 			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1467 				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1468 			fraglen = datalen + fragheaderlen;
1469 			pagedlen = 0;
1470 
1471 			if ((flags & MSG_MORE) &&
1472 			    !(rt->dst.dev->features&NETIF_F_SG))
1473 				alloclen = mtu;
1474 			else if (!paged)
1475 				alloclen = fraglen;
1476 			else {
1477 				alloclen = min_t(int, fraglen, MAX_HEADER);
1478 				pagedlen = fraglen - alloclen;
1479 			}
1480 
1481 			alloclen += dst_exthdrlen;
1482 
1483 			if (datalen != length + fraggap) {
1484 				/*
1485 				 * this is not the last fragment, the trailer
1486 				 * space is regarded as data space.
1487 				 */
1488 				datalen += rt->dst.trailer_len;
1489 			}
1490 
1491 			alloclen += rt->dst.trailer_len;
1492 			fraglen = datalen + fragheaderlen;
1493 
1494 			/*
1495 			 * We just reserve space for fragment header.
1496 			 * Note: this may be overallocation if the message
1497 			 * (without MSG_MORE) fits into the MTU.
1498 			 */
1499 			alloclen += sizeof(struct frag_hdr);
1500 
1501 			copy = datalen - transhdrlen - fraggap - pagedlen;
1502 			if (copy < 0) {
1503 				err = -EINVAL;
1504 				goto error;
1505 			}
1506 			if (transhdrlen) {
1507 				skb = sock_alloc_send_skb(sk,
1508 						alloclen + hh_len,
1509 						(flags & MSG_DONTWAIT), &err);
1510 			} else {
1511 				skb = NULL;
1512 				if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1513 				    2 * sk->sk_sndbuf)
1514 					skb = alloc_skb(alloclen + hh_len,
1515 							sk->sk_allocation);
1516 				if (unlikely(!skb))
1517 					err = -ENOBUFS;
1518 			}
1519 			if (!skb)
1520 				goto error;
1521 			/*
1522 			 *	Fill in the control structures
1523 			 */
1524 			skb->protocol = htons(ETH_P_IPV6);
1525 			skb->ip_summed = csummode;
1526 			skb->csum = 0;
1527 			/* reserve for fragmentation and ipsec header */
1528 			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1529 				    dst_exthdrlen);
1530 
1531 			/*
1532 			 *	Find where to start putting bytes
1533 			 */
1534 			data = skb_put(skb, fraglen - pagedlen);
1535 			skb_set_network_header(skb, exthdrlen);
1536 			data += fragheaderlen;
1537 			skb->transport_header = (skb->network_header +
1538 						 fragheaderlen);
1539 			if (fraggap) {
1540 				skb->csum = skb_copy_and_csum_bits(
1541 					skb_prev, maxfraglen,
1542 					data + transhdrlen, fraggap, 0);
1543 				skb_prev->csum = csum_sub(skb_prev->csum,
1544 							  skb->csum);
1545 				data += fraggap;
1546 				pskb_trim_unique(skb_prev, maxfraglen);
1547 			}
1548 			if (copy > 0 &&
1549 			    getfrag(from, data + transhdrlen, offset,
1550 				    copy, fraggap, skb) < 0) {
1551 				err = -EFAULT;
1552 				kfree_skb(skb);
1553 				goto error;
1554 			}
1555 
1556 			offset += copy;
1557 			length -= copy + transhdrlen;
1558 			transhdrlen = 0;
1559 			exthdrlen = 0;
1560 			dst_exthdrlen = 0;
1561 
1562 			/* Only the initial fragment is time stamped */
1563 			skb_shinfo(skb)->tx_flags = cork->tx_flags;
1564 			cork->tx_flags = 0;
1565 			skb_shinfo(skb)->tskey = tskey;
1566 			tskey = 0;
1567 			skb_zcopy_set(skb, uarg, &extra_uref);
1568 
1569 			if ((flags & MSG_CONFIRM) && !skb_prev)
1570 				skb_set_dst_pending_confirm(skb, 1);
1571 
1572 			/*
1573 			 * Put the packet on the pending queue
1574 			 */
1575 			if (!skb->destructor) {
1576 				skb->destructor = sock_wfree;
1577 				skb->sk = sk;
1578 				wmem_alloc_delta += skb->truesize;
1579 			}
1580 			__skb_queue_tail(queue, skb);
1581 			continue;
1582 		}
1583 
1584 		if (copy > length)
1585 			copy = length;
1586 
1587 		if (!(rt->dst.dev->features&NETIF_F_SG) &&
1588 		    skb_tailroom(skb) >= copy) {
1589 			unsigned int off;
1590 
1591 			off = skb->len;
1592 			if (getfrag(from, skb_put(skb, copy),
1593 						offset, copy, off, skb) < 0) {
1594 				__skb_trim(skb, off);
1595 				err = -EFAULT;
1596 				goto error;
1597 			}
1598 		} else if (!uarg || !uarg->zerocopy) {
1599 			int i = skb_shinfo(skb)->nr_frags;
1600 
1601 			err = -ENOMEM;
1602 			if (!sk_page_frag_refill(sk, pfrag))
1603 				goto error;
1604 
1605 			if (!skb_can_coalesce(skb, i, pfrag->page,
1606 					      pfrag->offset)) {
1607 				err = -EMSGSIZE;
1608 				if (i == MAX_SKB_FRAGS)
1609 					goto error;
1610 
1611 				__skb_fill_page_desc(skb, i, pfrag->page,
1612 						     pfrag->offset, 0);
1613 				skb_shinfo(skb)->nr_frags = ++i;
1614 				get_page(pfrag->page);
1615 			}
1616 			copy = min_t(int, copy, pfrag->size - pfrag->offset);
1617 			if (getfrag(from,
1618 				    page_address(pfrag->page) + pfrag->offset,
1619 				    offset, copy, skb->len, skb) < 0)
1620 				goto error_efault;
1621 
1622 			pfrag->offset += copy;
1623 			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1624 			skb->len += copy;
1625 			skb->data_len += copy;
1626 			skb->truesize += copy;
1627 			wmem_alloc_delta += copy;
1628 		} else {
1629 			err = skb_zerocopy_iter_dgram(skb, from, copy);
1630 			if (err < 0)
1631 				goto error;
1632 		}
1633 		offset += copy;
1634 		length -= copy;
1635 	}
1636 
1637 	if (wmem_alloc_delta)
1638 		refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1639 	return 0;
1640 
1641 error_efault:
1642 	err = -EFAULT;
1643 error:
1644 	if (uarg)
1645 		sock_zerocopy_put_abort(uarg, extra_uref);
1646 	cork->length -= length;
1647 	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1648 	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1649 	return err;
1650 }
1651 
1652 int ip6_append_data(struct sock *sk,
1653 		    int getfrag(void *from, char *to, int offset, int len,
1654 				int odd, struct sk_buff *skb),
1655 		    void *from, int length, int transhdrlen,
1656 		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1657 		    struct rt6_info *rt, unsigned int flags)
1658 {
1659 	struct inet_sock *inet = inet_sk(sk);
1660 	struct ipv6_pinfo *np = inet6_sk(sk);
1661 	int exthdrlen;
1662 	int err;
1663 
1664 	if (flags&MSG_PROBE)
1665 		return 0;
1666 	if (skb_queue_empty(&sk->sk_write_queue)) {
1667 		/*
1668 		 * setup for corking
1669 		 */
1670 		err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1671 				     ipc6, rt, fl6);
1672 		if (err)
1673 			return err;
1674 
1675 		exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1676 		length += exthdrlen;
1677 		transhdrlen += exthdrlen;
1678 	} else {
1679 		fl6 = &inet->cork.fl.u.ip6;
1680 		transhdrlen = 0;
1681 	}
1682 
1683 	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1684 				 &np->cork, sk_page_frag(sk), getfrag,
1685 				 from, length, transhdrlen, flags, ipc6);
1686 }
1687 EXPORT_SYMBOL_GPL(ip6_append_data);
1688 
1689 static void ip6_cork_release(struct inet_cork_full *cork,
1690 			     struct inet6_cork *v6_cork)
1691 {
1692 	if (v6_cork->opt) {
1693 		kfree(v6_cork->opt->dst0opt);
1694 		kfree(v6_cork->opt->dst1opt);
1695 		kfree(v6_cork->opt->hopopt);
1696 		kfree(v6_cork->opt->srcrt);
1697 		kfree(v6_cork->opt);
1698 		v6_cork->opt = NULL;
1699 	}
1700 
1701 	if (cork->base.dst) {
1702 		dst_release(cork->base.dst);
1703 		cork->base.dst = NULL;
1704 		cork->base.flags &= ~IPCORK_ALLFRAG;
1705 	}
1706 	memset(&cork->fl, 0, sizeof(cork->fl));
1707 }
1708 
1709 struct sk_buff *__ip6_make_skb(struct sock *sk,
1710 			       struct sk_buff_head *queue,
1711 			       struct inet_cork_full *cork,
1712 			       struct inet6_cork *v6_cork)
1713 {
1714 	struct sk_buff *skb, *tmp_skb;
1715 	struct sk_buff **tail_skb;
1716 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1717 	struct ipv6_pinfo *np = inet6_sk(sk);
1718 	struct net *net = sock_net(sk);
1719 	struct ipv6hdr *hdr;
1720 	struct ipv6_txoptions *opt = v6_cork->opt;
1721 	struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1722 	struct flowi6 *fl6 = &cork->fl.u.ip6;
1723 	unsigned char proto = fl6->flowi6_proto;
1724 
1725 	skb = __skb_dequeue(queue);
1726 	if (!skb)
1727 		goto out;
1728 	tail_skb = &(skb_shinfo(skb)->frag_list);
1729 
1730 	/* move skb->data to ip header from ext header */
1731 	if (skb->data < skb_network_header(skb))
1732 		__skb_pull(skb, skb_network_offset(skb));
1733 	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1734 		__skb_pull(tmp_skb, skb_network_header_len(skb));
1735 		*tail_skb = tmp_skb;
1736 		tail_skb = &(tmp_skb->next);
1737 		skb->len += tmp_skb->len;
1738 		skb->data_len += tmp_skb->len;
1739 		skb->truesize += tmp_skb->truesize;
1740 		tmp_skb->destructor = NULL;
1741 		tmp_skb->sk = NULL;
1742 	}
1743 
1744 	/* Allow local fragmentation. */
1745 	skb->ignore_df = ip6_sk_ignore_df(sk);
1746 
1747 	*final_dst = fl6->daddr;
1748 	__skb_pull(skb, skb_network_header_len(skb));
1749 	if (opt && opt->opt_flen)
1750 		ipv6_push_frag_opts(skb, opt, &proto);
1751 	if (opt && opt->opt_nflen)
1752 		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1753 
1754 	skb_push(skb, sizeof(struct ipv6hdr));
1755 	skb_reset_network_header(skb);
1756 	hdr = ipv6_hdr(skb);
1757 
1758 	ip6_flow_hdr(hdr, v6_cork->tclass,
1759 		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1760 					ip6_autoflowlabel(net, np), fl6));
1761 	hdr->hop_limit = v6_cork->hop_limit;
1762 	hdr->nexthdr = proto;
1763 	hdr->saddr = fl6->saddr;
1764 	hdr->daddr = *final_dst;
1765 
1766 	skb->priority = sk->sk_priority;
1767 	skb->mark = sk->sk_mark;
1768 
1769 	skb->tstamp = cork->base.transmit_time;
1770 
1771 	skb_dst_set(skb, dst_clone(&rt->dst));
1772 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1773 	if (proto == IPPROTO_ICMPV6) {
1774 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1775 
1776 		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1777 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1778 	}
1779 
1780 	ip6_cork_release(cork, v6_cork);
1781 out:
1782 	return skb;
1783 }
1784 
1785 int ip6_send_skb(struct sk_buff *skb)
1786 {
1787 	struct net *net = sock_net(skb->sk);
1788 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1789 	int err;
1790 
1791 	err = ip6_local_out(net, skb->sk, skb);
1792 	if (err) {
1793 		if (err > 0)
1794 			err = net_xmit_errno(err);
1795 		if (err)
1796 			IP6_INC_STATS(net, rt->rt6i_idev,
1797 				      IPSTATS_MIB_OUTDISCARDS);
1798 	}
1799 
1800 	return err;
1801 }
1802 
1803 int ip6_push_pending_frames(struct sock *sk)
1804 {
1805 	struct sk_buff *skb;
1806 
1807 	skb = ip6_finish_skb(sk);
1808 	if (!skb)
1809 		return 0;
1810 
1811 	return ip6_send_skb(skb);
1812 }
1813 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1814 
1815 static void __ip6_flush_pending_frames(struct sock *sk,
1816 				       struct sk_buff_head *queue,
1817 				       struct inet_cork_full *cork,
1818 				       struct inet6_cork *v6_cork)
1819 {
1820 	struct sk_buff *skb;
1821 
1822 	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1823 		if (skb_dst(skb))
1824 			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1825 				      IPSTATS_MIB_OUTDISCARDS);
1826 		kfree_skb(skb);
1827 	}
1828 
1829 	ip6_cork_release(cork, v6_cork);
1830 }
1831 
1832 void ip6_flush_pending_frames(struct sock *sk)
1833 {
1834 	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1835 				   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1836 }
1837 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1838 
1839 struct sk_buff *ip6_make_skb(struct sock *sk,
1840 			     int getfrag(void *from, char *to, int offset,
1841 					 int len, int odd, struct sk_buff *skb),
1842 			     void *from, int length, int transhdrlen,
1843 			     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1844 			     struct rt6_info *rt, unsigned int flags,
1845 			     struct inet_cork_full *cork)
1846 {
1847 	struct inet6_cork v6_cork;
1848 	struct sk_buff_head queue;
1849 	int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1850 	int err;
1851 
1852 	if (flags & MSG_PROBE)
1853 		return NULL;
1854 
1855 	__skb_queue_head_init(&queue);
1856 
1857 	cork->base.flags = 0;
1858 	cork->base.addr = 0;
1859 	cork->base.opt = NULL;
1860 	cork->base.dst = NULL;
1861 	v6_cork.opt = NULL;
1862 	err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1863 	if (err) {
1864 		ip6_cork_release(cork, &v6_cork);
1865 		return ERR_PTR(err);
1866 	}
1867 	if (ipc6->dontfrag < 0)
1868 		ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1869 
1870 	err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1871 				&current->task_frag, getfrag, from,
1872 				length + exthdrlen, transhdrlen + exthdrlen,
1873 				flags, ipc6);
1874 	if (err) {
1875 		__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1876 		return ERR_PTR(err);
1877 	}
1878 
1879 	return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1880 }
1881