xref: /openbmc/linux/net/ipv6/ip6_output.c (revision e620a1e0)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	IPv6 output functions
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on linux/net/ipv4/ip_output.c
10  *
11  *	Changes:
12  *	A.N.Kuznetsov	:	airthmetics in fragmentation.
13  *				extension headers are implemented.
14  *				route changes now work.
15  *				ip6_forward does not confuse sniffers.
16  *				etc.
17  *
18  *      H. von Brand    :       Added missing #include <linux/string.h>
19  *	Imran Patel	:	frag id should be in NBO
20  *      Kazunori MIYAZAWA @USAGI
21  *			:       add ip6_append_data and related functions
22  *				for datagram xmit
23  */
24 
25 #include <linux/errno.h>
26 #include <linux/kernel.h>
27 #include <linux/string.h>
28 #include <linux/socket.h>
29 #include <linux/net.h>
30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h>
32 #include <linux/in6.h>
33 #include <linux/tcp.h>
34 #include <linux/route.h>
35 #include <linux/module.h>
36 #include <linux/slab.h>
37 
38 #include <linux/bpf-cgroup.h>
39 #include <linux/netfilter.h>
40 #include <linux/netfilter_ipv6.h>
41 
42 #include <net/sock.h>
43 #include <net/snmp.h>
44 
45 #include <net/ipv6.h>
46 #include <net/ndisc.h>
47 #include <net/protocol.h>
48 #include <net/ip6_route.h>
49 #include <net/addrconf.h>
50 #include <net/rawv6.h>
51 #include <net/icmp.h>
52 #include <net/xfrm.h>
53 #include <net/checksum.h>
54 #include <linux/mroute6.h>
55 #include <net/l3mdev.h>
56 #include <net/lwtunnel.h>
57 
58 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
59 {
60 	struct dst_entry *dst = skb_dst(skb);
61 	struct net_device *dev = dst->dev;
62 	const struct in6_addr *nexthop;
63 	struct neighbour *neigh;
64 	int ret;
65 
66 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
67 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
68 
69 		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
70 		    ((mroute6_is_socket(net, skb) &&
71 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
72 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
73 					 &ipv6_hdr(skb)->saddr))) {
74 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
75 
76 			/* Do not check for IFF_ALLMULTI; multicast routing
77 			   is not supported in any case.
78 			 */
79 			if (newskb)
80 				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
81 					net, sk, newskb, NULL, newskb->dev,
82 					dev_loopback_xmit);
83 
84 			if (ipv6_hdr(skb)->hop_limit == 0) {
85 				IP6_INC_STATS(net, idev,
86 					      IPSTATS_MIB_OUTDISCARDS);
87 				kfree_skb(skb);
88 				return 0;
89 			}
90 		}
91 
92 		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
93 
94 		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
95 		    IPV6_ADDR_SCOPE_NODELOCAL &&
96 		    !(dev->flags & IFF_LOOPBACK)) {
97 			kfree_skb(skb);
98 			return 0;
99 		}
100 	}
101 
102 	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
103 		int res = lwtunnel_xmit(skb);
104 
105 		if (res < 0 || res == LWTUNNEL_XMIT_DONE)
106 			return res;
107 	}
108 
109 	rcu_read_lock_bh();
110 	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
111 	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
112 	if (unlikely(!neigh))
113 		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
114 	if (!IS_ERR(neigh)) {
115 		sock_confirm_neigh(skb, neigh);
116 		ret = neigh_output(neigh, skb, false);
117 		rcu_read_unlock_bh();
118 		return ret;
119 	}
120 	rcu_read_unlock_bh();
121 
122 	IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
123 	kfree_skb(skb);
124 	return -EINVAL;
125 }
126 
127 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
128 {
129 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
130 	/* Policy lookup after SNAT yielded a new policy */
131 	if (skb_dst(skb)->xfrm) {
132 		IPCB(skb)->flags |= IPSKB_REROUTED;
133 		return dst_output(net, sk, skb);
134 	}
135 #endif
136 
137 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
138 	    dst_allfrag(skb_dst(skb)) ||
139 	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
140 		return ip6_fragment(net, sk, skb, ip6_finish_output2);
141 	else
142 		return ip6_finish_output2(net, sk, skb);
143 }
144 
145 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
146 {
147 	int ret;
148 
149 	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
150 	switch (ret) {
151 	case NET_XMIT_SUCCESS:
152 		return __ip6_finish_output(net, sk, skb);
153 	case NET_XMIT_CN:
154 		return __ip6_finish_output(net, sk, skb) ? : ret;
155 	default:
156 		kfree_skb(skb);
157 		return ret;
158 	}
159 }
160 
161 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
162 {
163 	struct net_device *dev = skb_dst(skb)->dev;
164 	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
165 
166 	skb->protocol = htons(ETH_P_IPV6);
167 	skb->dev = dev;
168 
169 	if (unlikely(idev->cnf.disable_ipv6)) {
170 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
171 		kfree_skb(skb);
172 		return 0;
173 	}
174 
175 	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
176 			    net, sk, skb, NULL, dev,
177 			    ip6_finish_output,
178 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
179 }
180 
181 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
182 {
183 	if (!np->autoflowlabel_set)
184 		return ip6_default_np_autolabel(net);
185 	else
186 		return np->autoflowlabel;
187 }
188 
189 /*
190  * xmit an sk_buff (used by TCP, SCTP and DCCP)
191  * Note : socket lock is not held for SYNACK packets, but might be modified
192  * by calls to skb_set_owner_w() and ipv6_local_error(),
193  * which are using proper atomic operations or spinlocks.
194  */
195 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
196 	     __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
197 {
198 	struct net *net = sock_net(sk);
199 	const struct ipv6_pinfo *np = inet6_sk(sk);
200 	struct in6_addr *first_hop = &fl6->daddr;
201 	struct dst_entry *dst = skb_dst(skb);
202 	unsigned int head_room;
203 	struct ipv6hdr *hdr;
204 	u8  proto = fl6->flowi6_proto;
205 	int seg_len = skb->len;
206 	int hlimit = -1;
207 	u32 mtu;
208 
209 	head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
210 	if (opt)
211 		head_room += opt->opt_nflen + opt->opt_flen;
212 
213 	if (unlikely(skb_headroom(skb) < head_room)) {
214 		struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
215 		if (!skb2) {
216 			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
217 				      IPSTATS_MIB_OUTDISCARDS);
218 			kfree_skb(skb);
219 			return -ENOBUFS;
220 		}
221 		if (skb->sk)
222 			skb_set_owner_w(skb2, skb->sk);
223 		consume_skb(skb);
224 		skb = skb2;
225 	}
226 
227 	if (opt) {
228 		seg_len += opt->opt_nflen + opt->opt_flen;
229 
230 		if (opt->opt_flen)
231 			ipv6_push_frag_opts(skb, opt, &proto);
232 
233 		if (opt->opt_nflen)
234 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
235 					     &fl6->saddr);
236 	}
237 
238 	skb_push(skb, sizeof(struct ipv6hdr));
239 	skb_reset_network_header(skb);
240 	hdr = ipv6_hdr(skb);
241 
242 	/*
243 	 *	Fill in the IPv6 header
244 	 */
245 	if (np)
246 		hlimit = np->hop_limit;
247 	if (hlimit < 0)
248 		hlimit = ip6_dst_hoplimit(dst);
249 
250 	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
251 				ip6_autoflowlabel(net, np), fl6));
252 
253 	hdr->payload_len = htons(seg_len);
254 	hdr->nexthdr = proto;
255 	hdr->hop_limit = hlimit;
256 
257 	hdr->saddr = fl6->saddr;
258 	hdr->daddr = *first_hop;
259 
260 	skb->protocol = htons(ETH_P_IPV6);
261 	skb->priority = priority;
262 	skb->mark = mark;
263 
264 	mtu = dst_mtu(dst);
265 	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
266 		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
267 			      IPSTATS_MIB_OUT, skb->len);
268 
269 		/* if egress device is enslaved to an L3 master device pass the
270 		 * skb to its handler for processing
271 		 */
272 		skb = l3mdev_ip6_out((struct sock *)sk, skb);
273 		if (unlikely(!skb))
274 			return 0;
275 
276 		/* hooks should never assume socket lock is held.
277 		 * we promote our socket to non const
278 		 */
279 		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
280 			       net, (struct sock *)sk, skb, NULL, dst->dev,
281 			       dst_output);
282 	}
283 
284 	skb->dev = dst->dev;
285 	/* ipv6_local_error() does not require socket lock,
286 	 * we promote our socket to non const
287 	 */
288 	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
289 
290 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
291 	kfree_skb(skb);
292 	return -EMSGSIZE;
293 }
294 EXPORT_SYMBOL(ip6_xmit);
295 
296 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
297 {
298 	struct ip6_ra_chain *ra;
299 	struct sock *last = NULL;
300 
301 	read_lock(&ip6_ra_lock);
302 	for (ra = ip6_ra_chain; ra; ra = ra->next) {
303 		struct sock *sk = ra->sk;
304 		if (sk && ra->sel == sel &&
305 		    (!sk->sk_bound_dev_if ||
306 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
307 			struct ipv6_pinfo *np = inet6_sk(sk);
308 
309 			if (np && np->rtalert_isolate &&
310 			    !net_eq(sock_net(sk), dev_net(skb->dev))) {
311 				continue;
312 			}
313 			if (last) {
314 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
315 				if (skb2)
316 					rawv6_rcv(last, skb2);
317 			}
318 			last = sk;
319 		}
320 	}
321 
322 	if (last) {
323 		rawv6_rcv(last, skb);
324 		read_unlock(&ip6_ra_lock);
325 		return 1;
326 	}
327 	read_unlock(&ip6_ra_lock);
328 	return 0;
329 }
330 
331 static int ip6_forward_proxy_check(struct sk_buff *skb)
332 {
333 	struct ipv6hdr *hdr = ipv6_hdr(skb);
334 	u8 nexthdr = hdr->nexthdr;
335 	__be16 frag_off;
336 	int offset;
337 
338 	if (ipv6_ext_hdr(nexthdr)) {
339 		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
340 		if (offset < 0)
341 			return 0;
342 	} else
343 		offset = sizeof(struct ipv6hdr);
344 
345 	if (nexthdr == IPPROTO_ICMPV6) {
346 		struct icmp6hdr *icmp6;
347 
348 		if (!pskb_may_pull(skb, (skb_network_header(skb) +
349 					 offset + 1 - skb->data)))
350 			return 0;
351 
352 		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
353 
354 		switch (icmp6->icmp6_type) {
355 		case NDISC_ROUTER_SOLICITATION:
356 		case NDISC_ROUTER_ADVERTISEMENT:
357 		case NDISC_NEIGHBOUR_SOLICITATION:
358 		case NDISC_NEIGHBOUR_ADVERTISEMENT:
359 		case NDISC_REDIRECT:
360 			/* For reaction involving unicast neighbor discovery
361 			 * message destined to the proxied address, pass it to
362 			 * input function.
363 			 */
364 			return 1;
365 		default:
366 			break;
367 		}
368 	}
369 
370 	/*
371 	 * The proxying router can't forward traffic sent to a link-local
372 	 * address, so signal the sender and discard the packet. This
373 	 * behavior is clarified by the MIPv6 specification.
374 	 */
375 	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
376 		dst_link_failure(skb);
377 		return -1;
378 	}
379 
380 	return 0;
381 }
382 
383 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
384 				     struct sk_buff *skb)
385 {
386 	struct dst_entry *dst = skb_dst(skb);
387 
388 	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
389 	__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
390 
391 #ifdef CONFIG_NET_SWITCHDEV
392 	if (skb->offload_l3_fwd_mark) {
393 		consume_skb(skb);
394 		return 0;
395 	}
396 #endif
397 
398 	skb->tstamp = 0;
399 	return dst_output(net, sk, skb);
400 }
401 
402 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
403 {
404 	if (skb->len <= mtu)
405 		return false;
406 
407 	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
408 	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
409 		return true;
410 
411 	if (skb->ignore_df)
412 		return false;
413 
414 	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
415 		return false;
416 
417 	return true;
418 }
419 
420 int ip6_forward(struct sk_buff *skb)
421 {
422 	struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
423 	struct dst_entry *dst = skb_dst(skb);
424 	struct ipv6hdr *hdr = ipv6_hdr(skb);
425 	struct inet6_skb_parm *opt = IP6CB(skb);
426 	struct net *net = dev_net(dst->dev);
427 	u32 mtu;
428 
429 	if (net->ipv6.devconf_all->forwarding == 0)
430 		goto error;
431 
432 	if (skb->pkt_type != PACKET_HOST)
433 		goto drop;
434 
435 	if (unlikely(skb->sk))
436 		goto drop;
437 
438 	if (skb_warn_if_lro(skb))
439 		goto drop;
440 
441 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
442 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
443 		goto drop;
444 	}
445 
446 	skb_forward_csum(skb);
447 
448 	/*
449 	 *	We DO NOT make any processing on
450 	 *	RA packets, pushing them to user level AS IS
451 	 *	without ane WARRANTY that application will be able
452 	 *	to interpret them. The reason is that we
453 	 *	cannot make anything clever here.
454 	 *
455 	 *	We are not end-node, so that if packet contains
456 	 *	AH/ESP, we cannot make anything.
457 	 *	Defragmentation also would be mistake, RA packets
458 	 *	cannot be fragmented, because there is no warranty
459 	 *	that different fragments will go along one path. --ANK
460 	 */
461 	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
462 		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
463 			return 0;
464 	}
465 
466 	/*
467 	 *	check and decrement ttl
468 	 */
469 	if (hdr->hop_limit <= 1) {
470 		/* Force OUTPUT device used as source address */
471 		skb->dev = dst->dev;
472 		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
473 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
474 
475 		kfree_skb(skb);
476 		return -ETIMEDOUT;
477 	}
478 
479 	/* XXX: idev->cnf.proxy_ndp? */
480 	if (net->ipv6.devconf_all->proxy_ndp &&
481 	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
482 		int proxied = ip6_forward_proxy_check(skb);
483 		if (proxied > 0)
484 			return ip6_input(skb);
485 		else if (proxied < 0) {
486 			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
487 			goto drop;
488 		}
489 	}
490 
491 	if (!xfrm6_route_forward(skb)) {
492 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
493 		goto drop;
494 	}
495 	dst = skb_dst(skb);
496 
497 	/* IPv6 specs say nothing about it, but it is clear that we cannot
498 	   send redirects to source routed frames.
499 	   We don't send redirects to frames decapsulated from IPsec.
500 	 */
501 	if (IP6CB(skb)->iif == dst->dev->ifindex &&
502 	    opt->srcrt == 0 && !skb_sec_path(skb)) {
503 		struct in6_addr *target = NULL;
504 		struct inet_peer *peer;
505 		struct rt6_info *rt;
506 
507 		/*
508 		 *	incoming and outgoing devices are the same
509 		 *	send a redirect.
510 		 */
511 
512 		rt = (struct rt6_info *) dst;
513 		if (rt->rt6i_flags & RTF_GATEWAY)
514 			target = &rt->rt6i_gateway;
515 		else
516 			target = &hdr->daddr;
517 
518 		peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
519 
520 		/* Limit redirects both by destination (here)
521 		   and by source (inside ndisc_send_redirect)
522 		 */
523 		if (inet_peer_xrlim_allow(peer, 1*HZ))
524 			ndisc_send_redirect(skb, target);
525 		if (peer)
526 			inet_putpeer(peer);
527 	} else {
528 		int addrtype = ipv6_addr_type(&hdr->saddr);
529 
530 		/* This check is security critical. */
531 		if (addrtype == IPV6_ADDR_ANY ||
532 		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
533 			goto error;
534 		if (addrtype & IPV6_ADDR_LINKLOCAL) {
535 			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
536 				    ICMPV6_NOT_NEIGHBOUR, 0);
537 			goto error;
538 		}
539 	}
540 
541 	mtu = ip6_dst_mtu_forward(dst);
542 	if (mtu < IPV6_MIN_MTU)
543 		mtu = IPV6_MIN_MTU;
544 
545 	if (ip6_pkt_too_big(skb, mtu)) {
546 		/* Again, force OUTPUT device used as source address */
547 		skb->dev = dst->dev;
548 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
549 		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
550 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
551 				IPSTATS_MIB_FRAGFAILS);
552 		kfree_skb(skb);
553 		return -EMSGSIZE;
554 	}
555 
556 	if (skb_cow(skb, dst->dev->hard_header_len)) {
557 		__IP6_INC_STATS(net, ip6_dst_idev(dst),
558 				IPSTATS_MIB_OUTDISCARDS);
559 		goto drop;
560 	}
561 
562 	hdr = ipv6_hdr(skb);
563 
564 	/* Mangling hops number delayed to point after skb COW */
565 
566 	hdr->hop_limit--;
567 
568 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
569 		       net, NULL, skb, skb->dev, dst->dev,
570 		       ip6_forward_finish);
571 
572 error:
573 	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
574 drop:
575 	kfree_skb(skb);
576 	return -EINVAL;
577 }
578 
579 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
580 {
581 	to->pkt_type = from->pkt_type;
582 	to->priority = from->priority;
583 	to->protocol = from->protocol;
584 	skb_dst_drop(to);
585 	skb_dst_set(to, dst_clone(skb_dst(from)));
586 	to->dev = from->dev;
587 	to->mark = from->mark;
588 
589 	skb_copy_hash(to, from);
590 
591 #ifdef CONFIG_NET_SCHED
592 	to->tc_index = from->tc_index;
593 #endif
594 	nf_copy(to, from);
595 	skb_ext_copy(to, from);
596 	skb_copy_secmark(to, from);
597 }
598 
599 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
600 		      u8 nexthdr, __be32 frag_id,
601 		      struct ip6_fraglist_iter *iter)
602 {
603 	unsigned int first_len;
604 	struct frag_hdr *fh;
605 
606 	/* BUILD HEADER */
607 	*prevhdr = NEXTHDR_FRAGMENT;
608 	iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
609 	if (!iter->tmp_hdr)
610 		return -ENOMEM;
611 
612 	iter->frag = skb_shinfo(skb)->frag_list;
613 	skb_frag_list_init(skb);
614 
615 	iter->offset = 0;
616 	iter->hlen = hlen;
617 	iter->frag_id = frag_id;
618 	iter->nexthdr = nexthdr;
619 
620 	__skb_pull(skb, hlen);
621 	fh = __skb_push(skb, sizeof(struct frag_hdr));
622 	__skb_push(skb, hlen);
623 	skb_reset_network_header(skb);
624 	memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
625 
626 	fh->nexthdr = nexthdr;
627 	fh->reserved = 0;
628 	fh->frag_off = htons(IP6_MF);
629 	fh->identification = frag_id;
630 
631 	first_len = skb_pagelen(skb);
632 	skb->data_len = first_len - skb_headlen(skb);
633 	skb->len = first_len;
634 	ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
635 
636 	return 0;
637 }
638 EXPORT_SYMBOL(ip6_fraglist_init);
639 
640 void ip6_fraglist_prepare(struct sk_buff *skb,
641 			  struct ip6_fraglist_iter *iter)
642 {
643 	struct sk_buff *frag = iter->frag;
644 	unsigned int hlen = iter->hlen;
645 	struct frag_hdr *fh;
646 
647 	frag->ip_summed = CHECKSUM_NONE;
648 	skb_reset_transport_header(frag);
649 	fh = __skb_push(frag, sizeof(struct frag_hdr));
650 	__skb_push(frag, hlen);
651 	skb_reset_network_header(frag);
652 	memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
653 	iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
654 	fh->nexthdr = iter->nexthdr;
655 	fh->reserved = 0;
656 	fh->frag_off = htons(iter->offset);
657 	if (frag->next)
658 		fh->frag_off |= htons(IP6_MF);
659 	fh->identification = iter->frag_id;
660 	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
661 	ip6_copy_metadata(frag, skb);
662 }
663 EXPORT_SYMBOL(ip6_fraglist_prepare);
664 
665 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
666 		   unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
667 		   u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
668 {
669 	state->prevhdr = prevhdr;
670 	state->nexthdr = nexthdr;
671 	state->frag_id = frag_id;
672 
673 	state->hlen = hlen;
674 	state->mtu = mtu;
675 
676 	state->left = skb->len - hlen;	/* Space per frame */
677 	state->ptr = hlen;		/* Where to start from */
678 
679 	state->hroom = hdr_room;
680 	state->troom = needed_tailroom;
681 
682 	state->offset = 0;
683 }
684 EXPORT_SYMBOL(ip6_frag_init);
685 
686 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
687 {
688 	u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
689 	struct sk_buff *frag;
690 	struct frag_hdr *fh;
691 	unsigned int len;
692 
693 	len = state->left;
694 	/* IF: it doesn't fit, use 'mtu' - the data space left */
695 	if (len > state->mtu)
696 		len = state->mtu;
697 	/* IF: we are not sending up to and including the packet end
698 	   then align the next start on an eight byte boundary */
699 	if (len < state->left)
700 		len &= ~7;
701 
702 	/* Allocate buffer */
703 	frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
704 			 state->hroom + state->troom, GFP_ATOMIC);
705 	if (!frag)
706 		return ERR_PTR(-ENOMEM);
707 
708 	/*
709 	 *	Set up data on packet
710 	 */
711 
712 	ip6_copy_metadata(frag, skb);
713 	skb_reserve(frag, state->hroom);
714 	skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
715 	skb_reset_network_header(frag);
716 	fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
717 	frag->transport_header = (frag->network_header + state->hlen +
718 				  sizeof(struct frag_hdr));
719 
720 	/*
721 	 *	Charge the memory for the fragment to any owner
722 	 *	it might possess
723 	 */
724 	if (skb->sk)
725 		skb_set_owner_w(frag, skb->sk);
726 
727 	/*
728 	 *	Copy the packet header into the new buffer.
729 	 */
730 	skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
731 
732 	fragnexthdr_offset = skb_network_header(frag);
733 	fragnexthdr_offset += prevhdr - skb_network_header(skb);
734 	*fragnexthdr_offset = NEXTHDR_FRAGMENT;
735 
736 	/*
737 	 *	Build fragment header.
738 	 */
739 	fh->nexthdr = state->nexthdr;
740 	fh->reserved = 0;
741 	fh->identification = state->frag_id;
742 
743 	/*
744 	 *	Copy a block of the IP datagram.
745 	 */
746 	BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
747 			     len));
748 	state->left -= len;
749 
750 	fh->frag_off = htons(state->offset);
751 	if (state->left > 0)
752 		fh->frag_off |= htons(IP6_MF);
753 	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
754 
755 	state->ptr += len;
756 	state->offset += len;
757 
758 	return frag;
759 }
760 EXPORT_SYMBOL(ip6_frag_next);
761 
762 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
763 		 int (*output)(struct net *, struct sock *, struct sk_buff *))
764 {
765 	struct sk_buff *frag;
766 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
767 	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
768 				inet6_sk(skb->sk) : NULL;
769 	struct ip6_frag_state state;
770 	unsigned int mtu, hlen, nexthdr_offset;
771 	int hroom, err = 0;
772 	__be32 frag_id;
773 	u8 *prevhdr, nexthdr = 0;
774 
775 	err = ip6_find_1stfragopt(skb, &prevhdr);
776 	if (err < 0)
777 		goto fail;
778 	hlen = err;
779 	nexthdr = *prevhdr;
780 	nexthdr_offset = prevhdr - skb_network_header(skb);
781 
782 	mtu = ip6_skb_dst_mtu(skb);
783 
784 	/* We must not fragment if the socket is set to force MTU discovery
785 	 * or if the skb it not generated by a local socket.
786 	 */
787 	if (unlikely(!skb->ignore_df && skb->len > mtu))
788 		goto fail_toobig;
789 
790 	if (IP6CB(skb)->frag_max_size) {
791 		if (IP6CB(skb)->frag_max_size > mtu)
792 			goto fail_toobig;
793 
794 		/* don't send fragments larger than what we received */
795 		mtu = IP6CB(skb)->frag_max_size;
796 		if (mtu < IPV6_MIN_MTU)
797 			mtu = IPV6_MIN_MTU;
798 	}
799 
800 	if (np && np->frag_size < mtu) {
801 		if (np->frag_size)
802 			mtu = np->frag_size;
803 	}
804 	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
805 		goto fail_toobig;
806 	mtu -= hlen + sizeof(struct frag_hdr);
807 
808 	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
809 				    &ipv6_hdr(skb)->saddr);
810 
811 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
812 	    (err = skb_checksum_help(skb)))
813 		goto fail;
814 
815 	prevhdr = skb_network_header(skb) + nexthdr_offset;
816 	hroom = LL_RESERVED_SPACE(rt->dst.dev);
817 	if (skb_has_frag_list(skb)) {
818 		unsigned int first_len = skb_pagelen(skb);
819 		struct ip6_fraglist_iter iter;
820 		struct sk_buff *frag2;
821 
822 		if (first_len - hlen > mtu ||
823 		    ((first_len - hlen) & 7) ||
824 		    skb_cloned(skb) ||
825 		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
826 			goto slow_path;
827 
828 		skb_walk_frags(skb, frag) {
829 			/* Correct geometry. */
830 			if (frag->len > mtu ||
831 			    ((frag->len & 7) && frag->next) ||
832 			    skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
833 				goto slow_path_clean;
834 
835 			/* Partially cloned skb? */
836 			if (skb_shared(frag))
837 				goto slow_path_clean;
838 
839 			BUG_ON(frag->sk);
840 			if (skb->sk) {
841 				frag->sk = skb->sk;
842 				frag->destructor = sock_wfree;
843 			}
844 			skb->truesize -= frag->truesize;
845 		}
846 
847 		err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
848 					&iter);
849 		if (err < 0)
850 			goto fail;
851 
852 		for (;;) {
853 			/* Prepare header of the next frame,
854 			 * before previous one went down. */
855 			if (iter.frag)
856 				ip6_fraglist_prepare(skb, &iter);
857 
858 			err = output(net, sk, skb);
859 			if (!err)
860 				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
861 					      IPSTATS_MIB_FRAGCREATES);
862 
863 			if (err || !iter.frag)
864 				break;
865 
866 			skb = ip6_fraglist_next(&iter);
867 		}
868 
869 		kfree(iter.tmp_hdr);
870 
871 		if (err == 0) {
872 			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
873 				      IPSTATS_MIB_FRAGOKS);
874 			return 0;
875 		}
876 
877 		kfree_skb_list(iter.frag);
878 
879 		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
880 			      IPSTATS_MIB_FRAGFAILS);
881 		return err;
882 
883 slow_path_clean:
884 		skb_walk_frags(skb, frag2) {
885 			if (frag2 == frag)
886 				break;
887 			frag2->sk = NULL;
888 			frag2->destructor = NULL;
889 			skb->truesize += frag2->truesize;
890 		}
891 	}
892 
893 slow_path:
894 	/*
895 	 *	Fragment the datagram.
896 	 */
897 
898 	ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
899 		      LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
900 		      &state);
901 
902 	/*
903 	 *	Keep copying data until we run out.
904 	 */
905 
906 	while (state.left > 0) {
907 		frag = ip6_frag_next(skb, &state);
908 		if (IS_ERR(frag)) {
909 			err = PTR_ERR(frag);
910 			goto fail;
911 		}
912 
913 		/*
914 		 *	Put this fragment into the sending queue.
915 		 */
916 		err = output(net, sk, frag);
917 		if (err)
918 			goto fail;
919 
920 		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
921 			      IPSTATS_MIB_FRAGCREATES);
922 	}
923 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
924 		      IPSTATS_MIB_FRAGOKS);
925 	consume_skb(skb);
926 	return err;
927 
928 fail_toobig:
929 	if (skb->sk && dst_allfrag(skb_dst(skb)))
930 		sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
931 
932 	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
933 	err = -EMSGSIZE;
934 
935 fail:
936 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
937 		      IPSTATS_MIB_FRAGFAILS);
938 	kfree_skb(skb);
939 	return err;
940 }
941 
942 static inline int ip6_rt_check(const struct rt6key *rt_key,
943 			       const struct in6_addr *fl_addr,
944 			       const struct in6_addr *addr_cache)
945 {
946 	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
947 		(!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
948 }
949 
950 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
951 					  struct dst_entry *dst,
952 					  const struct flowi6 *fl6)
953 {
954 	struct ipv6_pinfo *np = inet6_sk(sk);
955 	struct rt6_info *rt;
956 
957 	if (!dst)
958 		goto out;
959 
960 	if (dst->ops->family != AF_INET6) {
961 		dst_release(dst);
962 		return NULL;
963 	}
964 
965 	rt = (struct rt6_info *)dst;
966 	/* Yes, checking route validity in not connected
967 	 * case is not very simple. Take into account,
968 	 * that we do not support routing by source, TOS,
969 	 * and MSG_DONTROUTE		--ANK (980726)
970 	 *
971 	 * 1. ip6_rt_check(): If route was host route,
972 	 *    check that cached destination is current.
973 	 *    If it is network route, we still may
974 	 *    check its validity using saved pointer
975 	 *    to the last used address: daddr_cache.
976 	 *    We do not want to save whole address now,
977 	 *    (because main consumer of this service
978 	 *    is tcp, which has not this problem),
979 	 *    so that the last trick works only on connected
980 	 *    sockets.
981 	 * 2. oif also should be the same.
982 	 */
983 	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
984 #ifdef CONFIG_IPV6_SUBTREES
985 	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
986 #endif
987 	   (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
988 	      (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
989 		dst_release(dst);
990 		dst = NULL;
991 	}
992 
993 out:
994 	return dst;
995 }
996 
997 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
998 			       struct dst_entry **dst, struct flowi6 *fl6)
999 {
1000 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1001 	struct neighbour *n;
1002 	struct rt6_info *rt;
1003 #endif
1004 	int err;
1005 	int flags = 0;
1006 
1007 	/* The correct way to handle this would be to do
1008 	 * ip6_route_get_saddr, and then ip6_route_output; however,
1009 	 * the route-specific preferred source forces the
1010 	 * ip6_route_output call _before_ ip6_route_get_saddr.
1011 	 *
1012 	 * In source specific routing (no src=any default route),
1013 	 * ip6_route_output will fail given src=any saddr, though, so
1014 	 * that's why we try it again later.
1015 	 */
1016 	if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1017 		struct fib6_info *from;
1018 		struct rt6_info *rt;
1019 		bool had_dst = *dst != NULL;
1020 
1021 		if (!had_dst)
1022 			*dst = ip6_route_output(net, sk, fl6);
1023 		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1024 
1025 		rcu_read_lock();
1026 		from = rt ? rcu_dereference(rt->from) : NULL;
1027 		err = ip6_route_get_saddr(net, from, &fl6->daddr,
1028 					  sk ? inet6_sk(sk)->srcprefs : 0,
1029 					  &fl6->saddr);
1030 		rcu_read_unlock();
1031 
1032 		if (err)
1033 			goto out_err_release;
1034 
1035 		/* If we had an erroneous initial result, pretend it
1036 		 * never existed and let the SA-enabled version take
1037 		 * over.
1038 		 */
1039 		if (!had_dst && (*dst)->error) {
1040 			dst_release(*dst);
1041 			*dst = NULL;
1042 		}
1043 
1044 		if (fl6->flowi6_oif)
1045 			flags |= RT6_LOOKUP_F_IFACE;
1046 	}
1047 
1048 	if (!*dst)
1049 		*dst = ip6_route_output_flags(net, sk, fl6, flags);
1050 
1051 	err = (*dst)->error;
1052 	if (err)
1053 		goto out_err_release;
1054 
1055 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1056 	/*
1057 	 * Here if the dst entry we've looked up
1058 	 * has a neighbour entry that is in the INCOMPLETE
1059 	 * state and the src address from the flow is
1060 	 * marked as OPTIMISTIC, we release the found
1061 	 * dst entry and replace it instead with the
1062 	 * dst entry of the nexthop router
1063 	 */
1064 	rt = (struct rt6_info *) *dst;
1065 	rcu_read_lock_bh();
1066 	n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1067 				      rt6_nexthop(rt, &fl6->daddr));
1068 	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1069 	rcu_read_unlock_bh();
1070 
1071 	if (err) {
1072 		struct inet6_ifaddr *ifp;
1073 		struct flowi6 fl_gw6;
1074 		int redirect;
1075 
1076 		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1077 				      (*dst)->dev, 1);
1078 
1079 		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1080 		if (ifp)
1081 			in6_ifa_put(ifp);
1082 
1083 		if (redirect) {
1084 			/*
1085 			 * We need to get the dst entry for the
1086 			 * default router instead
1087 			 */
1088 			dst_release(*dst);
1089 			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1090 			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1091 			*dst = ip6_route_output(net, sk, &fl_gw6);
1092 			err = (*dst)->error;
1093 			if (err)
1094 				goto out_err_release;
1095 		}
1096 	}
1097 #endif
1098 	if (ipv6_addr_v4mapped(&fl6->saddr) &&
1099 	    !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1100 		err = -EAFNOSUPPORT;
1101 		goto out_err_release;
1102 	}
1103 
1104 	return 0;
1105 
1106 out_err_release:
1107 	dst_release(*dst);
1108 	*dst = NULL;
1109 
1110 	if (err == -ENETUNREACH)
1111 		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1112 	return err;
1113 }
1114 
1115 /**
1116  *	ip6_dst_lookup - perform route lookup on flow
1117  *	@sk: socket which provides route info
1118  *	@dst: pointer to dst_entry * for result
1119  *	@fl6: flow to lookup
1120  *
1121  *	This function performs a route lookup on the given flow.
1122  *
1123  *	It returns zero on success, or a standard errno code on error.
1124  */
1125 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1126 		   struct flowi6 *fl6)
1127 {
1128 	*dst = NULL;
1129 	return ip6_dst_lookup_tail(net, sk, dst, fl6);
1130 }
1131 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1132 
1133 /**
1134  *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1135  *	@sk: socket which provides route info
1136  *	@fl6: flow to lookup
1137  *	@final_dst: final destination address for ipsec lookup
1138  *
1139  *	This function performs a route lookup on the given flow.
1140  *
1141  *	It returns a valid dst pointer on success, or a pointer encoded
1142  *	error code.
1143  */
1144 struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
1145 				      const struct in6_addr *final_dst)
1146 {
1147 	struct dst_entry *dst = NULL;
1148 	int err;
1149 
1150 	err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1151 	if (err)
1152 		return ERR_PTR(err);
1153 	if (final_dst)
1154 		fl6->daddr = *final_dst;
1155 
1156 	return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1157 }
1158 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1159 
1160 /**
1161  *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1162  *	@sk: socket which provides the dst cache and route info
1163  *	@fl6: flow to lookup
1164  *	@final_dst: final destination address for ipsec lookup
1165  *	@connected: whether @sk is connected or not
1166  *
1167  *	This function performs a route lookup on the given flow with the
1168  *	possibility of using the cached route in the socket if it is valid.
1169  *	It will take the socket dst lock when operating on the dst cache.
1170  *	As a result, this function can only be used in process context.
1171  *
1172  *	In addition, for a connected socket, cache the dst in the socket
1173  *	if the current cache is not valid.
1174  *
1175  *	It returns a valid dst pointer on success, or a pointer encoded
1176  *	error code.
1177  */
1178 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1179 					 const struct in6_addr *final_dst,
1180 					 bool connected)
1181 {
1182 	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1183 
1184 	dst = ip6_sk_dst_check(sk, dst, fl6);
1185 	if (dst)
1186 		return dst;
1187 
1188 	dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
1189 	if (connected && !IS_ERR(dst))
1190 		ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1191 
1192 	return dst;
1193 }
1194 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1195 
1196 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1197 					       gfp_t gfp)
1198 {
1199 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1200 }
1201 
1202 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1203 						gfp_t gfp)
1204 {
1205 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1206 }
1207 
1208 static void ip6_append_data_mtu(unsigned int *mtu,
1209 				int *maxfraglen,
1210 				unsigned int fragheaderlen,
1211 				struct sk_buff *skb,
1212 				struct rt6_info *rt,
1213 				unsigned int orig_mtu)
1214 {
1215 	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1216 		if (!skb) {
1217 			/* first fragment, reserve header_len */
1218 			*mtu = orig_mtu - rt->dst.header_len;
1219 
1220 		} else {
1221 			/*
1222 			 * this fragment is not first, the headers
1223 			 * space is regarded as data space.
1224 			 */
1225 			*mtu = orig_mtu;
1226 		}
1227 		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
1228 			      + fragheaderlen - sizeof(struct frag_hdr);
1229 	}
1230 }
1231 
1232 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1233 			  struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1234 			  struct rt6_info *rt, struct flowi6 *fl6)
1235 {
1236 	struct ipv6_pinfo *np = inet6_sk(sk);
1237 	unsigned int mtu;
1238 	struct ipv6_txoptions *opt = ipc6->opt;
1239 
1240 	/*
1241 	 * setup for corking
1242 	 */
1243 	if (opt) {
1244 		if (WARN_ON(v6_cork->opt))
1245 			return -EINVAL;
1246 
1247 		v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1248 		if (unlikely(!v6_cork->opt))
1249 			return -ENOBUFS;
1250 
1251 		v6_cork->opt->tot_len = sizeof(*opt);
1252 		v6_cork->opt->opt_flen = opt->opt_flen;
1253 		v6_cork->opt->opt_nflen = opt->opt_nflen;
1254 
1255 		v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1256 						    sk->sk_allocation);
1257 		if (opt->dst0opt && !v6_cork->opt->dst0opt)
1258 			return -ENOBUFS;
1259 
1260 		v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1261 						    sk->sk_allocation);
1262 		if (opt->dst1opt && !v6_cork->opt->dst1opt)
1263 			return -ENOBUFS;
1264 
1265 		v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1266 						   sk->sk_allocation);
1267 		if (opt->hopopt && !v6_cork->opt->hopopt)
1268 			return -ENOBUFS;
1269 
1270 		v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1271 						    sk->sk_allocation);
1272 		if (opt->srcrt && !v6_cork->opt->srcrt)
1273 			return -ENOBUFS;
1274 
1275 		/* need source address above miyazawa*/
1276 	}
1277 	dst_hold(&rt->dst);
1278 	cork->base.dst = &rt->dst;
1279 	cork->fl.u.ip6 = *fl6;
1280 	v6_cork->hop_limit = ipc6->hlimit;
1281 	v6_cork->tclass = ipc6->tclass;
1282 	if (rt->dst.flags & DST_XFRM_TUNNEL)
1283 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1284 		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1285 	else
1286 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1287 			READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1288 	if (np->frag_size < mtu) {
1289 		if (np->frag_size)
1290 			mtu = np->frag_size;
1291 	}
1292 	if (mtu < IPV6_MIN_MTU)
1293 		return -EINVAL;
1294 	cork->base.fragsize = mtu;
1295 	cork->base.gso_size = ipc6->gso_size;
1296 	cork->base.tx_flags = 0;
1297 	cork->base.mark = ipc6->sockc.mark;
1298 	sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1299 
1300 	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1301 		cork->base.flags |= IPCORK_ALLFRAG;
1302 	cork->base.length = 0;
1303 
1304 	cork->base.transmit_time = ipc6->sockc.transmit_time;
1305 
1306 	return 0;
1307 }
1308 
1309 static int __ip6_append_data(struct sock *sk,
1310 			     struct flowi6 *fl6,
1311 			     struct sk_buff_head *queue,
1312 			     struct inet_cork *cork,
1313 			     struct inet6_cork *v6_cork,
1314 			     struct page_frag *pfrag,
1315 			     int getfrag(void *from, char *to, int offset,
1316 					 int len, int odd, struct sk_buff *skb),
1317 			     void *from, int length, int transhdrlen,
1318 			     unsigned int flags, struct ipcm6_cookie *ipc6)
1319 {
1320 	struct sk_buff *skb, *skb_prev = NULL;
1321 	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1322 	struct ubuf_info *uarg = NULL;
1323 	int exthdrlen = 0;
1324 	int dst_exthdrlen = 0;
1325 	int hh_len;
1326 	int copy;
1327 	int err;
1328 	int offset = 0;
1329 	u32 tskey = 0;
1330 	struct rt6_info *rt = (struct rt6_info *)cork->dst;
1331 	struct ipv6_txoptions *opt = v6_cork->opt;
1332 	int csummode = CHECKSUM_NONE;
1333 	unsigned int maxnonfragsize, headersize;
1334 	unsigned int wmem_alloc_delta = 0;
1335 	bool paged, extra_uref = false;
1336 
1337 	skb = skb_peek_tail(queue);
1338 	if (!skb) {
1339 		exthdrlen = opt ? opt->opt_flen : 0;
1340 		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1341 	}
1342 
1343 	paged = !!cork->gso_size;
1344 	mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1345 	orig_mtu = mtu;
1346 
1347 	if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1348 	    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1349 		tskey = sk->sk_tskey++;
1350 
1351 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1352 
1353 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1354 			(opt ? opt->opt_nflen : 0);
1355 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1356 		     sizeof(struct frag_hdr);
1357 
1358 	headersize = sizeof(struct ipv6hdr) +
1359 		     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1360 		     (dst_allfrag(&rt->dst) ?
1361 		      sizeof(struct frag_hdr) : 0) +
1362 		     rt->rt6i_nfheader_len;
1363 
1364 	/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1365 	 * the first fragment
1366 	 */
1367 	if (headersize + transhdrlen > mtu)
1368 		goto emsgsize;
1369 
1370 	if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1371 	    (sk->sk_protocol == IPPROTO_UDP ||
1372 	     sk->sk_protocol == IPPROTO_RAW)) {
1373 		ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1374 				sizeof(struct ipv6hdr));
1375 		goto emsgsize;
1376 	}
1377 
1378 	if (ip6_sk_ignore_df(sk))
1379 		maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1380 	else
1381 		maxnonfragsize = mtu;
1382 
1383 	if (cork->length + length > maxnonfragsize - headersize) {
1384 emsgsize:
1385 		pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1386 		ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1387 		return -EMSGSIZE;
1388 	}
1389 
1390 	/* CHECKSUM_PARTIAL only with no extension headers and when
1391 	 * we are not going to fragment
1392 	 */
1393 	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1394 	    headersize == sizeof(struct ipv6hdr) &&
1395 	    length <= mtu - headersize &&
1396 	    (!(flags & MSG_MORE) || cork->gso_size) &&
1397 	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1398 		csummode = CHECKSUM_PARTIAL;
1399 
1400 	if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1401 		uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1402 		if (!uarg)
1403 			return -ENOBUFS;
1404 		extra_uref = !skb_zcopy(skb);	/* only ref on new uarg */
1405 		if (rt->dst.dev->features & NETIF_F_SG &&
1406 		    csummode == CHECKSUM_PARTIAL) {
1407 			paged = true;
1408 		} else {
1409 			uarg->zerocopy = 0;
1410 			skb_zcopy_set(skb, uarg, &extra_uref);
1411 		}
1412 	}
1413 
1414 	/*
1415 	 * Let's try using as much space as possible.
1416 	 * Use MTU if total length of the message fits into the MTU.
1417 	 * Otherwise, we need to reserve fragment header and
1418 	 * fragment alignment (= 8-15 octects, in total).
1419 	 *
1420 	 * Note that we may need to "move" the data from the tail of
1421 	 * of the buffer to the new fragment when we split
1422 	 * the message.
1423 	 *
1424 	 * FIXME: It may be fragmented into multiple chunks
1425 	 *        at once if non-fragmentable extension headers
1426 	 *        are too large.
1427 	 * --yoshfuji
1428 	 */
1429 
1430 	cork->length += length;
1431 	if (!skb)
1432 		goto alloc_new_skb;
1433 
1434 	while (length > 0) {
1435 		/* Check if the remaining data fits into current packet. */
1436 		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1437 		if (copy < length)
1438 			copy = maxfraglen - skb->len;
1439 
1440 		if (copy <= 0) {
1441 			char *data;
1442 			unsigned int datalen;
1443 			unsigned int fraglen;
1444 			unsigned int fraggap;
1445 			unsigned int alloclen;
1446 			unsigned int pagedlen;
1447 alloc_new_skb:
1448 			/* There's no room in the current skb */
1449 			if (skb)
1450 				fraggap = skb->len - maxfraglen;
1451 			else
1452 				fraggap = 0;
1453 			/* update mtu and maxfraglen if necessary */
1454 			if (!skb || !skb_prev)
1455 				ip6_append_data_mtu(&mtu, &maxfraglen,
1456 						    fragheaderlen, skb, rt,
1457 						    orig_mtu);
1458 
1459 			skb_prev = skb;
1460 
1461 			/*
1462 			 * If remaining data exceeds the mtu,
1463 			 * we know we need more fragment(s).
1464 			 */
1465 			datalen = length + fraggap;
1466 
1467 			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1468 				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1469 			fraglen = datalen + fragheaderlen;
1470 			pagedlen = 0;
1471 
1472 			if ((flags & MSG_MORE) &&
1473 			    !(rt->dst.dev->features&NETIF_F_SG))
1474 				alloclen = mtu;
1475 			else if (!paged)
1476 				alloclen = fraglen;
1477 			else {
1478 				alloclen = min_t(int, fraglen, MAX_HEADER);
1479 				pagedlen = fraglen - alloclen;
1480 			}
1481 
1482 			alloclen += dst_exthdrlen;
1483 
1484 			if (datalen != length + fraggap) {
1485 				/*
1486 				 * this is not the last fragment, the trailer
1487 				 * space is regarded as data space.
1488 				 */
1489 				datalen += rt->dst.trailer_len;
1490 			}
1491 
1492 			alloclen += rt->dst.trailer_len;
1493 			fraglen = datalen + fragheaderlen;
1494 
1495 			/*
1496 			 * We just reserve space for fragment header.
1497 			 * Note: this may be overallocation if the message
1498 			 * (without MSG_MORE) fits into the MTU.
1499 			 */
1500 			alloclen += sizeof(struct frag_hdr);
1501 
1502 			copy = datalen - transhdrlen - fraggap - pagedlen;
1503 			if (copy < 0) {
1504 				err = -EINVAL;
1505 				goto error;
1506 			}
1507 			if (transhdrlen) {
1508 				skb = sock_alloc_send_skb(sk,
1509 						alloclen + hh_len,
1510 						(flags & MSG_DONTWAIT), &err);
1511 			} else {
1512 				skb = NULL;
1513 				if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1514 				    2 * sk->sk_sndbuf)
1515 					skb = alloc_skb(alloclen + hh_len,
1516 							sk->sk_allocation);
1517 				if (unlikely(!skb))
1518 					err = -ENOBUFS;
1519 			}
1520 			if (!skb)
1521 				goto error;
1522 			/*
1523 			 *	Fill in the control structures
1524 			 */
1525 			skb->protocol = htons(ETH_P_IPV6);
1526 			skb->ip_summed = csummode;
1527 			skb->csum = 0;
1528 			/* reserve for fragmentation and ipsec header */
1529 			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1530 				    dst_exthdrlen);
1531 
1532 			/*
1533 			 *	Find where to start putting bytes
1534 			 */
1535 			data = skb_put(skb, fraglen - pagedlen);
1536 			skb_set_network_header(skb, exthdrlen);
1537 			data += fragheaderlen;
1538 			skb->transport_header = (skb->network_header +
1539 						 fragheaderlen);
1540 			if (fraggap) {
1541 				skb->csum = skb_copy_and_csum_bits(
1542 					skb_prev, maxfraglen,
1543 					data + transhdrlen, fraggap, 0);
1544 				skb_prev->csum = csum_sub(skb_prev->csum,
1545 							  skb->csum);
1546 				data += fraggap;
1547 				pskb_trim_unique(skb_prev, maxfraglen);
1548 			}
1549 			if (copy > 0 &&
1550 			    getfrag(from, data + transhdrlen, offset,
1551 				    copy, fraggap, skb) < 0) {
1552 				err = -EFAULT;
1553 				kfree_skb(skb);
1554 				goto error;
1555 			}
1556 
1557 			offset += copy;
1558 			length -= copy + transhdrlen;
1559 			transhdrlen = 0;
1560 			exthdrlen = 0;
1561 			dst_exthdrlen = 0;
1562 
1563 			/* Only the initial fragment is time stamped */
1564 			skb_shinfo(skb)->tx_flags = cork->tx_flags;
1565 			cork->tx_flags = 0;
1566 			skb_shinfo(skb)->tskey = tskey;
1567 			tskey = 0;
1568 			skb_zcopy_set(skb, uarg, &extra_uref);
1569 
1570 			if ((flags & MSG_CONFIRM) && !skb_prev)
1571 				skb_set_dst_pending_confirm(skb, 1);
1572 
1573 			/*
1574 			 * Put the packet on the pending queue
1575 			 */
1576 			if (!skb->destructor) {
1577 				skb->destructor = sock_wfree;
1578 				skb->sk = sk;
1579 				wmem_alloc_delta += skb->truesize;
1580 			}
1581 			__skb_queue_tail(queue, skb);
1582 			continue;
1583 		}
1584 
1585 		if (copy > length)
1586 			copy = length;
1587 
1588 		if (!(rt->dst.dev->features&NETIF_F_SG) &&
1589 		    skb_tailroom(skb) >= copy) {
1590 			unsigned int off;
1591 
1592 			off = skb->len;
1593 			if (getfrag(from, skb_put(skb, copy),
1594 						offset, copy, off, skb) < 0) {
1595 				__skb_trim(skb, off);
1596 				err = -EFAULT;
1597 				goto error;
1598 			}
1599 		} else if (!uarg || !uarg->zerocopy) {
1600 			int i = skb_shinfo(skb)->nr_frags;
1601 
1602 			err = -ENOMEM;
1603 			if (!sk_page_frag_refill(sk, pfrag))
1604 				goto error;
1605 
1606 			if (!skb_can_coalesce(skb, i, pfrag->page,
1607 					      pfrag->offset)) {
1608 				err = -EMSGSIZE;
1609 				if (i == MAX_SKB_FRAGS)
1610 					goto error;
1611 
1612 				__skb_fill_page_desc(skb, i, pfrag->page,
1613 						     pfrag->offset, 0);
1614 				skb_shinfo(skb)->nr_frags = ++i;
1615 				get_page(pfrag->page);
1616 			}
1617 			copy = min_t(int, copy, pfrag->size - pfrag->offset);
1618 			if (getfrag(from,
1619 				    page_address(pfrag->page) + pfrag->offset,
1620 				    offset, copy, skb->len, skb) < 0)
1621 				goto error_efault;
1622 
1623 			pfrag->offset += copy;
1624 			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1625 			skb->len += copy;
1626 			skb->data_len += copy;
1627 			skb->truesize += copy;
1628 			wmem_alloc_delta += copy;
1629 		} else {
1630 			err = skb_zerocopy_iter_dgram(skb, from, copy);
1631 			if (err < 0)
1632 				goto error;
1633 		}
1634 		offset += copy;
1635 		length -= copy;
1636 	}
1637 
1638 	if (wmem_alloc_delta)
1639 		refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1640 	return 0;
1641 
1642 error_efault:
1643 	err = -EFAULT;
1644 error:
1645 	if (uarg)
1646 		sock_zerocopy_put_abort(uarg, extra_uref);
1647 	cork->length -= length;
1648 	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1649 	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1650 	return err;
1651 }
1652 
1653 int ip6_append_data(struct sock *sk,
1654 		    int getfrag(void *from, char *to, int offset, int len,
1655 				int odd, struct sk_buff *skb),
1656 		    void *from, int length, int transhdrlen,
1657 		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1658 		    struct rt6_info *rt, unsigned int flags)
1659 {
1660 	struct inet_sock *inet = inet_sk(sk);
1661 	struct ipv6_pinfo *np = inet6_sk(sk);
1662 	int exthdrlen;
1663 	int err;
1664 
1665 	if (flags&MSG_PROBE)
1666 		return 0;
1667 	if (skb_queue_empty(&sk->sk_write_queue)) {
1668 		/*
1669 		 * setup for corking
1670 		 */
1671 		err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1672 				     ipc6, rt, fl6);
1673 		if (err)
1674 			return err;
1675 
1676 		exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1677 		length += exthdrlen;
1678 		transhdrlen += exthdrlen;
1679 	} else {
1680 		fl6 = &inet->cork.fl.u.ip6;
1681 		transhdrlen = 0;
1682 	}
1683 
1684 	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1685 				 &np->cork, sk_page_frag(sk), getfrag,
1686 				 from, length, transhdrlen, flags, ipc6);
1687 }
1688 EXPORT_SYMBOL_GPL(ip6_append_data);
1689 
1690 static void ip6_cork_release(struct inet_cork_full *cork,
1691 			     struct inet6_cork *v6_cork)
1692 {
1693 	if (v6_cork->opt) {
1694 		kfree(v6_cork->opt->dst0opt);
1695 		kfree(v6_cork->opt->dst1opt);
1696 		kfree(v6_cork->opt->hopopt);
1697 		kfree(v6_cork->opt->srcrt);
1698 		kfree(v6_cork->opt);
1699 		v6_cork->opt = NULL;
1700 	}
1701 
1702 	if (cork->base.dst) {
1703 		dst_release(cork->base.dst);
1704 		cork->base.dst = NULL;
1705 		cork->base.flags &= ~IPCORK_ALLFRAG;
1706 	}
1707 	memset(&cork->fl, 0, sizeof(cork->fl));
1708 }
1709 
1710 struct sk_buff *__ip6_make_skb(struct sock *sk,
1711 			       struct sk_buff_head *queue,
1712 			       struct inet_cork_full *cork,
1713 			       struct inet6_cork *v6_cork)
1714 {
1715 	struct sk_buff *skb, *tmp_skb;
1716 	struct sk_buff **tail_skb;
1717 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1718 	struct ipv6_pinfo *np = inet6_sk(sk);
1719 	struct net *net = sock_net(sk);
1720 	struct ipv6hdr *hdr;
1721 	struct ipv6_txoptions *opt = v6_cork->opt;
1722 	struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1723 	struct flowi6 *fl6 = &cork->fl.u.ip6;
1724 	unsigned char proto = fl6->flowi6_proto;
1725 
1726 	skb = __skb_dequeue(queue);
1727 	if (!skb)
1728 		goto out;
1729 	tail_skb = &(skb_shinfo(skb)->frag_list);
1730 
1731 	/* move skb->data to ip header from ext header */
1732 	if (skb->data < skb_network_header(skb))
1733 		__skb_pull(skb, skb_network_offset(skb));
1734 	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1735 		__skb_pull(tmp_skb, skb_network_header_len(skb));
1736 		*tail_skb = tmp_skb;
1737 		tail_skb = &(tmp_skb->next);
1738 		skb->len += tmp_skb->len;
1739 		skb->data_len += tmp_skb->len;
1740 		skb->truesize += tmp_skb->truesize;
1741 		tmp_skb->destructor = NULL;
1742 		tmp_skb->sk = NULL;
1743 	}
1744 
1745 	/* Allow local fragmentation. */
1746 	skb->ignore_df = ip6_sk_ignore_df(sk);
1747 
1748 	*final_dst = fl6->daddr;
1749 	__skb_pull(skb, skb_network_header_len(skb));
1750 	if (opt && opt->opt_flen)
1751 		ipv6_push_frag_opts(skb, opt, &proto);
1752 	if (opt && opt->opt_nflen)
1753 		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1754 
1755 	skb_push(skb, sizeof(struct ipv6hdr));
1756 	skb_reset_network_header(skb);
1757 	hdr = ipv6_hdr(skb);
1758 
1759 	ip6_flow_hdr(hdr, v6_cork->tclass,
1760 		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1761 					ip6_autoflowlabel(net, np), fl6));
1762 	hdr->hop_limit = v6_cork->hop_limit;
1763 	hdr->nexthdr = proto;
1764 	hdr->saddr = fl6->saddr;
1765 	hdr->daddr = *final_dst;
1766 
1767 	skb->priority = sk->sk_priority;
1768 	skb->mark = cork->base.mark;
1769 
1770 	skb->tstamp = cork->base.transmit_time;
1771 
1772 	skb_dst_set(skb, dst_clone(&rt->dst));
1773 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1774 	if (proto == IPPROTO_ICMPV6) {
1775 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1776 
1777 		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1778 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1779 	}
1780 
1781 	ip6_cork_release(cork, v6_cork);
1782 out:
1783 	return skb;
1784 }
1785 
1786 int ip6_send_skb(struct sk_buff *skb)
1787 {
1788 	struct net *net = sock_net(skb->sk);
1789 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1790 	int err;
1791 
1792 	err = ip6_local_out(net, skb->sk, skb);
1793 	if (err) {
1794 		if (err > 0)
1795 			err = net_xmit_errno(err);
1796 		if (err)
1797 			IP6_INC_STATS(net, rt->rt6i_idev,
1798 				      IPSTATS_MIB_OUTDISCARDS);
1799 	}
1800 
1801 	return err;
1802 }
1803 
1804 int ip6_push_pending_frames(struct sock *sk)
1805 {
1806 	struct sk_buff *skb;
1807 
1808 	skb = ip6_finish_skb(sk);
1809 	if (!skb)
1810 		return 0;
1811 
1812 	return ip6_send_skb(skb);
1813 }
1814 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1815 
1816 static void __ip6_flush_pending_frames(struct sock *sk,
1817 				       struct sk_buff_head *queue,
1818 				       struct inet_cork_full *cork,
1819 				       struct inet6_cork *v6_cork)
1820 {
1821 	struct sk_buff *skb;
1822 
1823 	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1824 		if (skb_dst(skb))
1825 			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1826 				      IPSTATS_MIB_OUTDISCARDS);
1827 		kfree_skb(skb);
1828 	}
1829 
1830 	ip6_cork_release(cork, v6_cork);
1831 }
1832 
1833 void ip6_flush_pending_frames(struct sock *sk)
1834 {
1835 	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1836 				   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1837 }
1838 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1839 
1840 struct sk_buff *ip6_make_skb(struct sock *sk,
1841 			     int getfrag(void *from, char *to, int offset,
1842 					 int len, int odd, struct sk_buff *skb),
1843 			     void *from, int length, int transhdrlen,
1844 			     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1845 			     struct rt6_info *rt, unsigned int flags,
1846 			     struct inet_cork_full *cork)
1847 {
1848 	struct inet6_cork v6_cork;
1849 	struct sk_buff_head queue;
1850 	int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1851 	int err;
1852 
1853 	if (flags & MSG_PROBE)
1854 		return NULL;
1855 
1856 	__skb_queue_head_init(&queue);
1857 
1858 	cork->base.flags = 0;
1859 	cork->base.addr = 0;
1860 	cork->base.opt = NULL;
1861 	cork->base.dst = NULL;
1862 	v6_cork.opt = NULL;
1863 	err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1864 	if (err) {
1865 		ip6_cork_release(cork, &v6_cork);
1866 		return ERR_PTR(err);
1867 	}
1868 	if (ipc6->dontfrag < 0)
1869 		ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1870 
1871 	err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1872 				&current->task_frag, getfrag, from,
1873 				length + exthdrlen, transhdrlen + exthdrlen,
1874 				flags, ipc6);
1875 	if (err) {
1876 		__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1877 		return ERR_PTR(err);
1878 	}
1879 
1880 	return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1881 }
1882