xref: /openbmc/linux/net/ipv6/ip6_output.c (revision 643d1f7f)
1 /*
2  *	IPv6 output functions
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	$Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
9  *
10  *	Based on linux/net/ipv4/ip_output.c
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  *
17  *	Changes:
18  *	A.N.Kuznetsov	:	airthmetics in fragmentation.
19  *				extension headers are implemented.
20  *				route changes now work.
21  *				ip6_forward does not confuse sniffers.
22  *				etc.
23  *
24  *      H. von Brand    :       Added missing #include <linux/string.h>
25  *	Imran Patel	: 	frag id should be in NBO
26  *      Kazunori MIYAZAWA @USAGI
27  *			:       add ip6_append_data and related functions
28  *				for datagram xmit
29  */
30 
31 #include <linux/errno.h>
32 #include <linux/kernel.h>
33 #include <linux/string.h>
34 #include <linux/socket.h>
35 #include <linux/net.h>
36 #include <linux/netdevice.h>
37 #include <linux/if_arp.h>
38 #include <linux/in6.h>
39 #include <linux/tcp.h>
40 #include <linux/route.h>
41 #include <linux/module.h>
42 
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
45 
46 #include <net/sock.h>
47 #include <net/snmp.h>
48 
49 #include <net/ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
55 #include <net/icmp.h>
56 #include <net/xfrm.h>
57 #include <net/checksum.h>
58 
59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60 
61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
62 {
63 	static u32 ipv6_fragmentation_id = 1;
64 	static DEFINE_SPINLOCK(ip6_id_lock);
65 
66 	spin_lock_bh(&ip6_id_lock);
67 	fhdr->identification = htonl(ipv6_fragmentation_id);
68 	if (++ipv6_fragmentation_id == 0)
69 		ipv6_fragmentation_id = 1;
70 	spin_unlock_bh(&ip6_id_lock);
71 }
72 
73 int __ip6_local_out(struct sk_buff *skb)
74 {
75 	int len;
76 
77 	len = skb->len - sizeof(struct ipv6hdr);
78 	if (len > IPV6_MAXPLEN)
79 		len = 0;
80 	ipv6_hdr(skb)->payload_len = htons(len);
81 
82 	return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
83 		       dst_output);
84 }
85 
86 int ip6_local_out(struct sk_buff *skb)
87 {
88 	int err;
89 
90 	err = __ip6_local_out(skb);
91 	if (likely(err == 1))
92 		err = dst_output(skb);
93 
94 	return err;
95 }
96 EXPORT_SYMBOL_GPL(ip6_local_out);
97 
98 static int ip6_output_finish(struct sk_buff *skb)
99 {
100 	struct dst_entry *dst = skb->dst;
101 
102 	if (dst->hh)
103 		return neigh_hh_output(dst->hh, skb);
104 	else if (dst->neighbour)
105 		return dst->neighbour->output(skb);
106 
107 	IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
108 	kfree_skb(skb);
109 	return -EINVAL;
110 
111 }
112 
113 /* dev_loopback_xmit for use with netfilter. */
114 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
115 {
116 	skb_reset_mac_header(newskb);
117 	__skb_pull(newskb, skb_network_offset(newskb));
118 	newskb->pkt_type = PACKET_LOOPBACK;
119 	newskb->ip_summed = CHECKSUM_UNNECESSARY;
120 	BUG_TRAP(newskb->dst);
121 
122 	netif_rx(newskb);
123 	return 0;
124 }
125 
126 
127 static int ip6_output2(struct sk_buff *skb)
128 {
129 	struct dst_entry *dst = skb->dst;
130 	struct net_device *dev = dst->dev;
131 
132 	skb->protocol = htons(ETH_P_IPV6);
133 	skb->dev = dev;
134 
135 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
136 		struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
137 		struct inet6_dev *idev = ip6_dst_idev(skb->dst);
138 
139 		if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
140 		    ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
141 					&ipv6_hdr(skb)->saddr)) {
142 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
143 
144 			/* Do not check for IFF_ALLMULTI; multicast routing
145 			   is not supported in any case.
146 			 */
147 			if (newskb)
148 				NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
149 					NULL, newskb->dev,
150 					ip6_dev_loopback_xmit);
151 
152 			if (ipv6_hdr(skb)->hop_limit == 0) {
153 				IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
154 				kfree_skb(skb);
155 				return 0;
156 			}
157 		}
158 
159 		IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
160 	}
161 
162 	return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
163 		       ip6_output_finish);
164 }
165 
166 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
167 {
168 	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
169 
170 	return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
171 	       skb->dst->dev->mtu : dst_mtu(skb->dst);
172 }
173 
174 int ip6_output(struct sk_buff *skb)
175 {
176 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
177 				dst_allfrag(skb->dst))
178 		return ip6_fragment(skb, ip6_output2);
179 	else
180 		return ip6_output2(skb);
181 }
182 
183 /*
184  *	xmit an sk_buff (used by TCP)
185  */
186 
187 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
188 	     struct ipv6_txoptions *opt, int ipfragok)
189 {
190 	struct ipv6_pinfo *np = inet6_sk(sk);
191 	struct in6_addr *first_hop = &fl->fl6_dst;
192 	struct dst_entry *dst = skb->dst;
193 	struct ipv6hdr *hdr;
194 	u8  proto = fl->proto;
195 	int seg_len = skb->len;
196 	int hlimit, tclass;
197 	u32 mtu;
198 
199 	if (opt) {
200 		unsigned int head_room;
201 
202 		/* First: exthdrs may take lots of space (~8K for now)
203 		   MAX_HEADER is not enough.
204 		 */
205 		head_room = opt->opt_nflen + opt->opt_flen;
206 		seg_len += head_room;
207 		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
208 
209 		if (skb_headroom(skb) < head_room) {
210 			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
211 			if (skb2 == NULL) {
212 				IP6_INC_STATS(ip6_dst_idev(skb->dst),
213 					      IPSTATS_MIB_OUTDISCARDS);
214 				kfree_skb(skb);
215 				return -ENOBUFS;
216 			}
217 			kfree_skb(skb);
218 			skb = skb2;
219 			if (sk)
220 				skb_set_owner_w(skb, sk);
221 		}
222 		if (opt->opt_flen)
223 			ipv6_push_frag_opts(skb, opt, &proto);
224 		if (opt->opt_nflen)
225 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
226 	}
227 
228 	skb_push(skb, sizeof(struct ipv6hdr));
229 	skb_reset_network_header(skb);
230 	hdr = ipv6_hdr(skb);
231 
232 	/*
233 	 *	Fill in the IPv6 header
234 	 */
235 
236 	hlimit = -1;
237 	if (np)
238 		hlimit = np->hop_limit;
239 	if (hlimit < 0)
240 		hlimit = dst_metric(dst, RTAX_HOPLIMIT);
241 	if (hlimit < 0)
242 		hlimit = ipv6_get_hoplimit(dst->dev);
243 
244 	tclass = -1;
245 	if (np)
246 		tclass = np->tclass;
247 	if (tclass < 0)
248 		tclass = 0;
249 
250 	*(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
251 
252 	hdr->payload_len = htons(seg_len);
253 	hdr->nexthdr = proto;
254 	hdr->hop_limit = hlimit;
255 
256 	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
257 	ipv6_addr_copy(&hdr->daddr, first_hop);
258 
259 	skb->priority = sk->sk_priority;
260 	skb->mark = sk->sk_mark;
261 
262 	mtu = dst_mtu(dst);
263 	if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
264 		IP6_INC_STATS(ip6_dst_idev(skb->dst),
265 			      IPSTATS_MIB_OUTREQUESTS);
266 		return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
267 				dst_output);
268 	}
269 
270 	if (net_ratelimit())
271 		printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
272 	skb->dev = dst->dev;
273 	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
274 	IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
275 	kfree_skb(skb);
276 	return -EMSGSIZE;
277 }
278 
279 EXPORT_SYMBOL(ip6_xmit);
280 
281 /*
282  *	To avoid extra problems ND packets are send through this
283  *	routine. It's code duplication but I really want to avoid
284  *	extra checks since ipv6_build_header is used by TCP (which
285  *	is for us performance critical)
286  */
287 
288 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
289 	       struct in6_addr *saddr, struct in6_addr *daddr,
290 	       int proto, int len)
291 {
292 	struct ipv6_pinfo *np = inet6_sk(sk);
293 	struct ipv6hdr *hdr;
294 	int totlen;
295 
296 	skb->protocol = htons(ETH_P_IPV6);
297 	skb->dev = dev;
298 
299 	totlen = len + sizeof(struct ipv6hdr);
300 
301 	skb_reset_network_header(skb);
302 	skb_put(skb, sizeof(struct ipv6hdr));
303 	hdr = ipv6_hdr(skb);
304 
305 	*(__be32*)hdr = htonl(0x60000000);
306 
307 	hdr->payload_len = htons(len);
308 	hdr->nexthdr = proto;
309 	hdr->hop_limit = np->hop_limit;
310 
311 	ipv6_addr_copy(&hdr->saddr, saddr);
312 	ipv6_addr_copy(&hdr->daddr, daddr);
313 
314 	return 0;
315 }
316 
317 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
318 {
319 	struct ip6_ra_chain *ra;
320 	struct sock *last = NULL;
321 
322 	read_lock(&ip6_ra_lock);
323 	for (ra = ip6_ra_chain; ra; ra = ra->next) {
324 		struct sock *sk = ra->sk;
325 		if (sk && ra->sel == sel &&
326 		    (!sk->sk_bound_dev_if ||
327 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
328 			if (last) {
329 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
330 				if (skb2)
331 					rawv6_rcv(last, skb2);
332 			}
333 			last = sk;
334 		}
335 	}
336 
337 	if (last) {
338 		rawv6_rcv(last, skb);
339 		read_unlock(&ip6_ra_lock);
340 		return 1;
341 	}
342 	read_unlock(&ip6_ra_lock);
343 	return 0;
344 }
345 
346 static int ip6_forward_proxy_check(struct sk_buff *skb)
347 {
348 	struct ipv6hdr *hdr = ipv6_hdr(skb);
349 	u8 nexthdr = hdr->nexthdr;
350 	int offset;
351 
352 	if (ipv6_ext_hdr(nexthdr)) {
353 		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
354 		if (offset < 0)
355 			return 0;
356 	} else
357 		offset = sizeof(struct ipv6hdr);
358 
359 	if (nexthdr == IPPROTO_ICMPV6) {
360 		struct icmp6hdr *icmp6;
361 
362 		if (!pskb_may_pull(skb, (skb_network_header(skb) +
363 					 offset + 1 - skb->data)))
364 			return 0;
365 
366 		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
367 
368 		switch (icmp6->icmp6_type) {
369 		case NDISC_ROUTER_SOLICITATION:
370 		case NDISC_ROUTER_ADVERTISEMENT:
371 		case NDISC_NEIGHBOUR_SOLICITATION:
372 		case NDISC_NEIGHBOUR_ADVERTISEMENT:
373 		case NDISC_REDIRECT:
374 			/* For reaction involving unicast neighbor discovery
375 			 * message destined to the proxied address, pass it to
376 			 * input function.
377 			 */
378 			return 1;
379 		default:
380 			break;
381 		}
382 	}
383 
384 	/*
385 	 * The proxying router can't forward traffic sent to a link-local
386 	 * address, so signal the sender and discard the packet. This
387 	 * behavior is clarified by the MIPv6 specification.
388 	 */
389 	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
390 		dst_link_failure(skb);
391 		return -1;
392 	}
393 
394 	return 0;
395 }
396 
397 static inline int ip6_forward_finish(struct sk_buff *skb)
398 {
399 	return dst_output(skb);
400 }
401 
402 int ip6_forward(struct sk_buff *skb)
403 {
404 	struct dst_entry *dst = skb->dst;
405 	struct ipv6hdr *hdr = ipv6_hdr(skb);
406 	struct inet6_skb_parm *opt = IP6CB(skb);
407 
408 	if (ipv6_devconf.forwarding == 0)
409 		goto error;
410 
411 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
412 		IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
413 		goto drop;
414 	}
415 
416 	skb_forward_csum(skb);
417 
418 	/*
419 	 *	We DO NOT make any processing on
420 	 *	RA packets, pushing them to user level AS IS
421 	 *	without ane WARRANTY that application will be able
422 	 *	to interpret them. The reason is that we
423 	 *	cannot make anything clever here.
424 	 *
425 	 *	We are not end-node, so that if packet contains
426 	 *	AH/ESP, we cannot make anything.
427 	 *	Defragmentation also would be mistake, RA packets
428 	 *	cannot be fragmented, because there is no warranty
429 	 *	that different fragments will go along one path. --ANK
430 	 */
431 	if (opt->ra) {
432 		u8 *ptr = skb_network_header(skb) + opt->ra;
433 		if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
434 			return 0;
435 	}
436 
437 	/*
438 	 *	check and decrement ttl
439 	 */
440 	if (hdr->hop_limit <= 1) {
441 		/* Force OUTPUT device used as source address */
442 		skb->dev = dst->dev;
443 		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
444 			    0, skb->dev);
445 		IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
446 
447 		kfree_skb(skb);
448 		return -ETIMEDOUT;
449 	}
450 
451 	/* XXX: idev->cnf.proxy_ndp? */
452 	if (ipv6_devconf.proxy_ndp &&
453 	    pneigh_lookup(&nd_tbl, &init_net, &hdr->daddr, skb->dev, 0)) {
454 		int proxied = ip6_forward_proxy_check(skb);
455 		if (proxied > 0)
456 			return ip6_input(skb);
457 		else if (proxied < 0) {
458 			IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
459 			goto drop;
460 		}
461 	}
462 
463 	if (!xfrm6_route_forward(skb)) {
464 		IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
465 		goto drop;
466 	}
467 	dst = skb->dst;
468 
469 	/* IPv6 specs say nothing about it, but it is clear that we cannot
470 	   send redirects to source routed frames.
471 	   We don't send redirects to frames decapsulated from IPsec.
472 	 */
473 	if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
474 	    !skb->sp) {
475 		struct in6_addr *target = NULL;
476 		struct rt6_info *rt;
477 		struct neighbour *n = dst->neighbour;
478 
479 		/*
480 		 *	incoming and outgoing devices are the same
481 		 *	send a redirect.
482 		 */
483 
484 		rt = (struct rt6_info *) dst;
485 		if ((rt->rt6i_flags & RTF_GATEWAY))
486 			target = (struct in6_addr*)&n->primary_key;
487 		else
488 			target = &hdr->daddr;
489 
490 		/* Limit redirects both by destination (here)
491 		   and by source (inside ndisc_send_redirect)
492 		 */
493 		if (xrlim_allow(dst, 1*HZ))
494 			ndisc_send_redirect(skb, n, target);
495 	} else {
496 		int addrtype = ipv6_addr_type(&hdr->saddr);
497 
498 		/* This check is security critical. */
499 		if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK))
500 			goto error;
501 		if (addrtype & IPV6_ADDR_LINKLOCAL) {
502 			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
503 				ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
504 			goto error;
505 		}
506 	}
507 
508 	if (skb->len > dst_mtu(dst)) {
509 		/* Again, force OUTPUT device used as source address */
510 		skb->dev = dst->dev;
511 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
512 		IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
513 		IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
514 		kfree_skb(skb);
515 		return -EMSGSIZE;
516 	}
517 
518 	if (skb_cow(skb, dst->dev->hard_header_len)) {
519 		IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
520 		goto drop;
521 	}
522 
523 	hdr = ipv6_hdr(skb);
524 
525 	/* Mangling hops number delayed to point after skb COW */
526 
527 	hdr->hop_limit--;
528 
529 	IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
530 	return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
531 		       ip6_forward_finish);
532 
533 error:
534 	IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
535 drop:
536 	kfree_skb(skb);
537 	return -EINVAL;
538 }
539 
540 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
541 {
542 	to->pkt_type = from->pkt_type;
543 	to->priority = from->priority;
544 	to->protocol = from->protocol;
545 	dst_release(to->dst);
546 	to->dst = dst_clone(from->dst);
547 	to->dev = from->dev;
548 	to->mark = from->mark;
549 
550 #ifdef CONFIG_NET_SCHED
551 	to->tc_index = from->tc_index;
552 #endif
553 	nf_copy(to, from);
554 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
555     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
556 	to->nf_trace = from->nf_trace;
557 #endif
558 	skb_copy_secmark(to, from);
559 }
560 
561 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
562 {
563 	u16 offset = sizeof(struct ipv6hdr);
564 	struct ipv6_opt_hdr *exthdr =
565 				(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
566 	unsigned int packet_len = skb->tail - skb->network_header;
567 	int found_rhdr = 0;
568 	*nexthdr = &ipv6_hdr(skb)->nexthdr;
569 
570 	while (offset + 1 <= packet_len) {
571 
572 		switch (**nexthdr) {
573 
574 		case NEXTHDR_HOP:
575 			break;
576 		case NEXTHDR_ROUTING:
577 			found_rhdr = 1;
578 			break;
579 		case NEXTHDR_DEST:
580 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
581 			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
582 				break;
583 #endif
584 			if (found_rhdr)
585 				return offset;
586 			break;
587 		default :
588 			return offset;
589 		}
590 
591 		offset += ipv6_optlen(exthdr);
592 		*nexthdr = &exthdr->nexthdr;
593 		exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
594 						 offset);
595 	}
596 
597 	return offset;
598 }
599 EXPORT_SYMBOL_GPL(ip6_find_1stfragopt);
600 
601 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
602 {
603 	struct net_device *dev;
604 	struct sk_buff *frag;
605 	struct rt6_info *rt = (struct rt6_info*)skb->dst;
606 	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
607 	struct ipv6hdr *tmp_hdr;
608 	struct frag_hdr *fh;
609 	unsigned int mtu, hlen, left, len;
610 	__be32 frag_id = 0;
611 	int ptr, offset = 0, err=0;
612 	u8 *prevhdr, nexthdr = 0;
613 
614 	dev = rt->u.dst.dev;
615 	hlen = ip6_find_1stfragopt(skb, &prevhdr);
616 	nexthdr = *prevhdr;
617 
618 	mtu = ip6_skb_dst_mtu(skb);
619 
620 	/* We must not fragment if the socket is set to force MTU discovery
621 	 * or if the skb it not generated by a local socket.  (This last
622 	 * check should be redundant, but it's free.)
623 	 */
624 	if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) {
625 		skb->dev = skb->dst->dev;
626 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
627 		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
628 		kfree_skb(skb);
629 		return -EMSGSIZE;
630 	}
631 
632 	if (np && np->frag_size < mtu) {
633 		if (np->frag_size)
634 			mtu = np->frag_size;
635 	}
636 	mtu -= hlen + sizeof(struct frag_hdr);
637 
638 	if (skb_shinfo(skb)->frag_list) {
639 		int first_len = skb_pagelen(skb);
640 		int truesizes = 0;
641 
642 		if (first_len - hlen > mtu ||
643 		    ((first_len - hlen) & 7) ||
644 		    skb_cloned(skb))
645 			goto slow_path;
646 
647 		for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
648 			/* Correct geometry. */
649 			if (frag->len > mtu ||
650 			    ((frag->len & 7) && frag->next) ||
651 			    skb_headroom(frag) < hlen)
652 			    goto slow_path;
653 
654 			/* Partially cloned skb? */
655 			if (skb_shared(frag))
656 				goto slow_path;
657 
658 			BUG_ON(frag->sk);
659 			if (skb->sk) {
660 				sock_hold(skb->sk);
661 				frag->sk = skb->sk;
662 				frag->destructor = sock_wfree;
663 				truesizes += frag->truesize;
664 			}
665 		}
666 
667 		err = 0;
668 		offset = 0;
669 		frag = skb_shinfo(skb)->frag_list;
670 		skb_shinfo(skb)->frag_list = NULL;
671 		/* BUILD HEADER */
672 
673 		*prevhdr = NEXTHDR_FRAGMENT;
674 		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
675 		if (!tmp_hdr) {
676 			IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
677 			return -ENOMEM;
678 		}
679 
680 		__skb_pull(skb, hlen);
681 		fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
682 		__skb_push(skb, hlen);
683 		skb_reset_network_header(skb);
684 		memcpy(skb_network_header(skb), tmp_hdr, hlen);
685 
686 		ipv6_select_ident(skb, fh);
687 		fh->nexthdr = nexthdr;
688 		fh->reserved = 0;
689 		fh->frag_off = htons(IP6_MF);
690 		frag_id = fh->identification;
691 
692 		first_len = skb_pagelen(skb);
693 		skb->data_len = first_len - skb_headlen(skb);
694 		skb->truesize -= truesizes;
695 		skb->len = first_len;
696 		ipv6_hdr(skb)->payload_len = htons(first_len -
697 						   sizeof(struct ipv6hdr));
698 
699 		dst_hold(&rt->u.dst);
700 
701 		for (;;) {
702 			/* Prepare header of the next frame,
703 			 * before previous one went down. */
704 			if (frag) {
705 				frag->ip_summed = CHECKSUM_NONE;
706 				skb_reset_transport_header(frag);
707 				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
708 				__skb_push(frag, hlen);
709 				skb_reset_network_header(frag);
710 				memcpy(skb_network_header(frag), tmp_hdr,
711 				       hlen);
712 				offset += skb->len - hlen - sizeof(struct frag_hdr);
713 				fh->nexthdr = nexthdr;
714 				fh->reserved = 0;
715 				fh->frag_off = htons(offset);
716 				if (frag->next != NULL)
717 					fh->frag_off |= htons(IP6_MF);
718 				fh->identification = frag_id;
719 				ipv6_hdr(frag)->payload_len =
720 						htons(frag->len -
721 						      sizeof(struct ipv6hdr));
722 				ip6_copy_metadata(frag, skb);
723 			}
724 
725 			err = output(skb);
726 			if(!err)
727 				IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
728 
729 			if (err || !frag)
730 				break;
731 
732 			skb = frag;
733 			frag = skb->next;
734 			skb->next = NULL;
735 		}
736 
737 		kfree(tmp_hdr);
738 
739 		if (err == 0) {
740 			IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
741 			dst_release(&rt->u.dst);
742 			return 0;
743 		}
744 
745 		while (frag) {
746 			skb = frag->next;
747 			kfree_skb(frag);
748 			frag = skb;
749 		}
750 
751 		IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
752 		dst_release(&rt->u.dst);
753 		return err;
754 	}
755 
756 slow_path:
757 	left = skb->len - hlen;		/* Space per frame */
758 	ptr = hlen;			/* Where to start from */
759 
760 	/*
761 	 *	Fragment the datagram.
762 	 */
763 
764 	*prevhdr = NEXTHDR_FRAGMENT;
765 
766 	/*
767 	 *	Keep copying data until we run out.
768 	 */
769 	while(left > 0)	{
770 		len = left;
771 		/* IF: it doesn't fit, use 'mtu' - the data space left */
772 		if (len > mtu)
773 			len = mtu;
774 		/* IF: we are not sending upto and including the packet end
775 		   then align the next start on an eight byte boundary */
776 		if (len < left)	{
777 			len &= ~7;
778 		}
779 		/*
780 		 *	Allocate buffer.
781 		 */
782 
783 		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
784 			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
785 			IP6_INC_STATS(ip6_dst_idev(skb->dst),
786 				      IPSTATS_MIB_FRAGFAILS);
787 			err = -ENOMEM;
788 			goto fail;
789 		}
790 
791 		/*
792 		 *	Set up data on packet
793 		 */
794 
795 		ip6_copy_metadata(frag, skb);
796 		skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
797 		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
798 		skb_reset_network_header(frag);
799 		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
800 		frag->transport_header = (frag->network_header + hlen +
801 					  sizeof(struct frag_hdr));
802 
803 		/*
804 		 *	Charge the memory for the fragment to any owner
805 		 *	it might possess
806 		 */
807 		if (skb->sk)
808 			skb_set_owner_w(frag, skb->sk);
809 
810 		/*
811 		 *	Copy the packet header into the new buffer.
812 		 */
813 		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
814 
815 		/*
816 		 *	Build fragment header.
817 		 */
818 		fh->nexthdr = nexthdr;
819 		fh->reserved = 0;
820 		if (!frag_id) {
821 			ipv6_select_ident(skb, fh);
822 			frag_id = fh->identification;
823 		} else
824 			fh->identification = frag_id;
825 
826 		/*
827 		 *	Copy a block of the IP datagram.
828 		 */
829 		if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
830 			BUG();
831 		left -= len;
832 
833 		fh->frag_off = htons(offset);
834 		if (left > 0)
835 			fh->frag_off |= htons(IP6_MF);
836 		ipv6_hdr(frag)->payload_len = htons(frag->len -
837 						    sizeof(struct ipv6hdr));
838 
839 		ptr += len;
840 		offset += len;
841 
842 		/*
843 		 *	Put this fragment into the sending queue.
844 		 */
845 		err = output(frag);
846 		if (err)
847 			goto fail;
848 
849 		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
850 	}
851 	IP6_INC_STATS(ip6_dst_idev(skb->dst),
852 		      IPSTATS_MIB_FRAGOKS);
853 	kfree_skb(skb);
854 	return err;
855 
856 fail:
857 	IP6_INC_STATS(ip6_dst_idev(skb->dst),
858 		      IPSTATS_MIB_FRAGFAILS);
859 	kfree_skb(skb);
860 	return err;
861 }
862 
863 static inline int ip6_rt_check(struct rt6key *rt_key,
864 			       struct in6_addr *fl_addr,
865 			       struct in6_addr *addr_cache)
866 {
867 	return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
868 		(addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
869 }
870 
871 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
872 					  struct dst_entry *dst,
873 					  struct flowi *fl)
874 {
875 	struct ipv6_pinfo *np = inet6_sk(sk);
876 	struct rt6_info *rt = (struct rt6_info *)dst;
877 
878 	if (!dst)
879 		goto out;
880 
881 	/* Yes, checking route validity in not connected
882 	 * case is not very simple. Take into account,
883 	 * that we do not support routing by source, TOS,
884 	 * and MSG_DONTROUTE 		--ANK (980726)
885 	 *
886 	 * 1. ip6_rt_check(): If route was host route,
887 	 *    check that cached destination is current.
888 	 *    If it is network route, we still may
889 	 *    check its validity using saved pointer
890 	 *    to the last used address: daddr_cache.
891 	 *    We do not want to save whole address now,
892 	 *    (because main consumer of this service
893 	 *    is tcp, which has not this problem),
894 	 *    so that the last trick works only on connected
895 	 *    sockets.
896 	 * 2. oif also should be the same.
897 	 */
898 	if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
899 #ifdef CONFIG_IPV6_SUBTREES
900 	    ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
901 #endif
902 	    (fl->oif && fl->oif != dst->dev->ifindex)) {
903 		dst_release(dst);
904 		dst = NULL;
905 	}
906 
907 out:
908 	return dst;
909 }
910 
911 static int ip6_dst_lookup_tail(struct sock *sk,
912 			       struct dst_entry **dst, struct flowi *fl)
913 {
914 	int err;
915 
916 	if (*dst == NULL)
917 		*dst = ip6_route_output(sk, fl);
918 
919 	if ((err = (*dst)->error))
920 		goto out_err_release;
921 
922 	if (ipv6_addr_any(&fl->fl6_src)) {
923 		err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
924 		if (err)
925 			goto out_err_release;
926 	}
927 
928 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
929 		/*
930 		 * Here if the dst entry we've looked up
931 		 * has a neighbour entry that is in the INCOMPLETE
932 		 * state and the src address from the flow is
933 		 * marked as OPTIMISTIC, we release the found
934 		 * dst entry and replace it instead with the
935 		 * dst entry of the nexthop router
936 		 */
937 		if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
938 			struct inet6_ifaddr *ifp;
939 			struct flowi fl_gw;
940 			int redirect;
941 
942 			ifp = ipv6_get_ifaddr(&init_net, &fl->fl6_src,
943 					      (*dst)->dev, 1);
944 
945 			redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
946 			if (ifp)
947 				in6_ifa_put(ifp);
948 
949 			if (redirect) {
950 				/*
951 				 * We need to get the dst entry for the
952 				 * default router instead
953 				 */
954 				dst_release(*dst);
955 				memcpy(&fl_gw, fl, sizeof(struct flowi));
956 				memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
957 				*dst = ip6_route_output(sk, &fl_gw);
958 				if ((err = (*dst)->error))
959 					goto out_err_release;
960 			}
961 		}
962 #endif
963 
964 	return 0;
965 
966 out_err_release:
967 	if (err == -ENETUNREACH)
968 		IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
969 	dst_release(*dst);
970 	*dst = NULL;
971 	return err;
972 }
973 
974 /**
975  *	ip6_dst_lookup - perform route lookup on flow
976  *	@sk: socket which provides route info
977  *	@dst: pointer to dst_entry * for result
978  *	@fl: flow to lookup
979  *
980  *	This function performs a route lookup on the given flow.
981  *
982  *	It returns zero on success, or a standard errno code on error.
983  */
984 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
985 {
986 	*dst = NULL;
987 	return ip6_dst_lookup_tail(sk, dst, fl);
988 }
989 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
990 
991 /**
992  *	ip6_sk_dst_lookup - perform socket cached route lookup on flow
993  *	@sk: socket which provides the dst cache and route info
994  *	@dst: pointer to dst_entry * for result
995  *	@fl: flow to lookup
996  *
997  *	This function performs a route lookup on the given flow with the
998  *	possibility of using the cached route in the socket if it is valid.
999  *	It will take the socket dst lock when operating on the dst cache.
1000  *	As a result, this function can only be used in process context.
1001  *
1002  *	It returns zero on success, or a standard errno code on error.
1003  */
1004 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1005 {
1006 	*dst = NULL;
1007 	if (sk) {
1008 		*dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1009 		*dst = ip6_sk_dst_check(sk, *dst, fl);
1010 	}
1011 
1012 	return ip6_dst_lookup_tail(sk, dst, fl);
1013 }
1014 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1015 
1016 static inline int ip6_ufo_append_data(struct sock *sk,
1017 			int getfrag(void *from, char *to, int offset, int len,
1018 			int odd, struct sk_buff *skb),
1019 			void *from, int length, int hh_len, int fragheaderlen,
1020 			int transhdrlen, int mtu,unsigned int flags)
1021 
1022 {
1023 	struct sk_buff *skb;
1024 	int err;
1025 
1026 	/* There is support for UDP large send offload by network
1027 	 * device, so create one single skb packet containing complete
1028 	 * udp datagram
1029 	 */
1030 	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1031 		skb = sock_alloc_send_skb(sk,
1032 			hh_len + fragheaderlen + transhdrlen + 20,
1033 			(flags & MSG_DONTWAIT), &err);
1034 		if (skb == NULL)
1035 			return -ENOMEM;
1036 
1037 		/* reserve space for Hardware header */
1038 		skb_reserve(skb, hh_len);
1039 
1040 		/* create space for UDP/IP header */
1041 		skb_put(skb,fragheaderlen + transhdrlen);
1042 
1043 		/* initialize network header pointer */
1044 		skb_reset_network_header(skb);
1045 
1046 		/* initialize protocol header pointer */
1047 		skb->transport_header = skb->network_header + fragheaderlen;
1048 
1049 		skb->ip_summed = CHECKSUM_PARTIAL;
1050 		skb->csum = 0;
1051 		sk->sk_sndmsg_off = 0;
1052 	}
1053 
1054 	err = skb_append_datato_frags(sk,skb, getfrag, from,
1055 				      (length - transhdrlen));
1056 	if (!err) {
1057 		struct frag_hdr fhdr;
1058 
1059 		/* specify the length of each IP datagram fragment*/
1060 		skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1061 					    sizeof(struct frag_hdr);
1062 		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1063 		ipv6_select_ident(skb, &fhdr);
1064 		skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1065 		__skb_queue_tail(&sk->sk_write_queue, skb);
1066 
1067 		return 0;
1068 	}
1069 	/* There is not enough support do UPD LSO,
1070 	 * so follow normal path
1071 	 */
1072 	kfree_skb(skb);
1073 
1074 	return err;
1075 }
1076 
1077 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1078 	int offset, int len, int odd, struct sk_buff *skb),
1079 	void *from, int length, int transhdrlen,
1080 	int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1081 	struct rt6_info *rt, unsigned int flags)
1082 {
1083 	struct inet_sock *inet = inet_sk(sk);
1084 	struct ipv6_pinfo *np = inet6_sk(sk);
1085 	struct sk_buff *skb;
1086 	unsigned int maxfraglen, fragheaderlen;
1087 	int exthdrlen;
1088 	int hh_len;
1089 	int mtu;
1090 	int copy;
1091 	int err;
1092 	int offset = 0;
1093 	int csummode = CHECKSUM_NONE;
1094 
1095 	if (flags&MSG_PROBE)
1096 		return 0;
1097 	if (skb_queue_empty(&sk->sk_write_queue)) {
1098 		/*
1099 		 * setup for corking
1100 		 */
1101 		if (opt) {
1102 			if (np->cork.opt == NULL) {
1103 				np->cork.opt = kmalloc(opt->tot_len,
1104 						       sk->sk_allocation);
1105 				if (unlikely(np->cork.opt == NULL))
1106 					return -ENOBUFS;
1107 			} else if (np->cork.opt->tot_len < opt->tot_len) {
1108 				printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1109 				return -EINVAL;
1110 			}
1111 			memcpy(np->cork.opt, opt, opt->tot_len);
1112 			inet->cork.flags |= IPCORK_OPT;
1113 			/* need source address above miyazawa*/
1114 		}
1115 		dst_hold(&rt->u.dst);
1116 		np->cork.rt = rt;
1117 		inet->cork.fl = *fl;
1118 		np->cork.hop_limit = hlimit;
1119 		np->cork.tclass = tclass;
1120 		mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1121 		      rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1122 		if (np->frag_size < mtu) {
1123 			if (np->frag_size)
1124 				mtu = np->frag_size;
1125 		}
1126 		inet->cork.fragsize = mtu;
1127 		if (dst_allfrag(rt->u.dst.path))
1128 			inet->cork.flags |= IPCORK_ALLFRAG;
1129 		inet->cork.length = 0;
1130 		sk->sk_sndmsg_page = NULL;
1131 		sk->sk_sndmsg_off = 0;
1132 		exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1133 			    rt->rt6i_nfheader_len;
1134 		length += exthdrlen;
1135 		transhdrlen += exthdrlen;
1136 	} else {
1137 		rt = np->cork.rt;
1138 		fl = &inet->cork.fl;
1139 		if (inet->cork.flags & IPCORK_OPT)
1140 			opt = np->cork.opt;
1141 		transhdrlen = 0;
1142 		exthdrlen = 0;
1143 		mtu = inet->cork.fragsize;
1144 	}
1145 
1146 	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1147 
1148 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1149 			(opt ? opt->opt_nflen : 0);
1150 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1151 
1152 	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1153 		if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1154 			ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1155 			return -EMSGSIZE;
1156 		}
1157 	}
1158 
1159 	/*
1160 	 * Let's try using as much space as possible.
1161 	 * Use MTU if total length of the message fits into the MTU.
1162 	 * Otherwise, we need to reserve fragment header and
1163 	 * fragment alignment (= 8-15 octects, in total).
1164 	 *
1165 	 * Note that we may need to "move" the data from the tail of
1166 	 * of the buffer to the new fragment when we split
1167 	 * the message.
1168 	 *
1169 	 * FIXME: It may be fragmented into multiple chunks
1170 	 *        at once if non-fragmentable extension headers
1171 	 *        are too large.
1172 	 * --yoshfuji
1173 	 */
1174 
1175 	inet->cork.length += length;
1176 	if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1177 	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
1178 
1179 		err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1180 					  fragheaderlen, transhdrlen, mtu,
1181 					  flags);
1182 		if (err)
1183 			goto error;
1184 		return 0;
1185 	}
1186 
1187 	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1188 		goto alloc_new_skb;
1189 
1190 	while (length > 0) {
1191 		/* Check if the remaining data fits into current packet. */
1192 		copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1193 		if (copy < length)
1194 			copy = maxfraglen - skb->len;
1195 
1196 		if (copy <= 0) {
1197 			char *data;
1198 			unsigned int datalen;
1199 			unsigned int fraglen;
1200 			unsigned int fraggap;
1201 			unsigned int alloclen;
1202 			struct sk_buff *skb_prev;
1203 alloc_new_skb:
1204 			skb_prev = skb;
1205 
1206 			/* There's no room in the current skb */
1207 			if (skb_prev)
1208 				fraggap = skb_prev->len - maxfraglen;
1209 			else
1210 				fraggap = 0;
1211 
1212 			/*
1213 			 * If remaining data exceeds the mtu,
1214 			 * we know we need more fragment(s).
1215 			 */
1216 			datalen = length + fraggap;
1217 			if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1218 				datalen = maxfraglen - fragheaderlen;
1219 
1220 			fraglen = datalen + fragheaderlen;
1221 			if ((flags & MSG_MORE) &&
1222 			    !(rt->u.dst.dev->features&NETIF_F_SG))
1223 				alloclen = mtu;
1224 			else
1225 				alloclen = datalen + fragheaderlen;
1226 
1227 			/*
1228 			 * The last fragment gets additional space at tail.
1229 			 * Note: we overallocate on fragments with MSG_MODE
1230 			 * because we have no idea if we're the last one.
1231 			 */
1232 			if (datalen == length + fraggap)
1233 				alloclen += rt->u.dst.trailer_len;
1234 
1235 			/*
1236 			 * We just reserve space for fragment header.
1237 			 * Note: this may be overallocation if the message
1238 			 * (without MSG_MORE) fits into the MTU.
1239 			 */
1240 			alloclen += sizeof(struct frag_hdr);
1241 
1242 			if (transhdrlen) {
1243 				skb = sock_alloc_send_skb(sk,
1244 						alloclen + hh_len,
1245 						(flags & MSG_DONTWAIT), &err);
1246 			} else {
1247 				skb = NULL;
1248 				if (atomic_read(&sk->sk_wmem_alloc) <=
1249 				    2 * sk->sk_sndbuf)
1250 					skb = sock_wmalloc(sk,
1251 							   alloclen + hh_len, 1,
1252 							   sk->sk_allocation);
1253 				if (unlikely(skb == NULL))
1254 					err = -ENOBUFS;
1255 			}
1256 			if (skb == NULL)
1257 				goto error;
1258 			/*
1259 			 *	Fill in the control structures
1260 			 */
1261 			skb->ip_summed = csummode;
1262 			skb->csum = 0;
1263 			/* reserve for fragmentation */
1264 			skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1265 
1266 			/*
1267 			 *	Find where to start putting bytes
1268 			 */
1269 			data = skb_put(skb, fraglen);
1270 			skb_set_network_header(skb, exthdrlen);
1271 			data += fragheaderlen;
1272 			skb->transport_header = (skb->network_header +
1273 						 fragheaderlen);
1274 			if (fraggap) {
1275 				skb->csum = skb_copy_and_csum_bits(
1276 					skb_prev, maxfraglen,
1277 					data + transhdrlen, fraggap, 0);
1278 				skb_prev->csum = csum_sub(skb_prev->csum,
1279 							  skb->csum);
1280 				data += fraggap;
1281 				pskb_trim_unique(skb_prev, maxfraglen);
1282 			}
1283 			copy = datalen - transhdrlen - fraggap;
1284 			if (copy < 0) {
1285 				err = -EINVAL;
1286 				kfree_skb(skb);
1287 				goto error;
1288 			} else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1289 				err = -EFAULT;
1290 				kfree_skb(skb);
1291 				goto error;
1292 			}
1293 
1294 			offset += copy;
1295 			length -= datalen - fraggap;
1296 			transhdrlen = 0;
1297 			exthdrlen = 0;
1298 			csummode = CHECKSUM_NONE;
1299 
1300 			/*
1301 			 * Put the packet on the pending queue
1302 			 */
1303 			__skb_queue_tail(&sk->sk_write_queue, skb);
1304 			continue;
1305 		}
1306 
1307 		if (copy > length)
1308 			copy = length;
1309 
1310 		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1311 			unsigned int off;
1312 
1313 			off = skb->len;
1314 			if (getfrag(from, skb_put(skb, copy),
1315 						offset, copy, off, skb) < 0) {
1316 				__skb_trim(skb, off);
1317 				err = -EFAULT;
1318 				goto error;
1319 			}
1320 		} else {
1321 			int i = skb_shinfo(skb)->nr_frags;
1322 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1323 			struct page *page = sk->sk_sndmsg_page;
1324 			int off = sk->sk_sndmsg_off;
1325 			unsigned int left;
1326 
1327 			if (page && (left = PAGE_SIZE - off) > 0) {
1328 				if (copy >= left)
1329 					copy = left;
1330 				if (page != frag->page) {
1331 					if (i == MAX_SKB_FRAGS) {
1332 						err = -EMSGSIZE;
1333 						goto error;
1334 					}
1335 					get_page(page);
1336 					skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1337 					frag = &skb_shinfo(skb)->frags[i];
1338 				}
1339 			} else if(i < MAX_SKB_FRAGS) {
1340 				if (copy > PAGE_SIZE)
1341 					copy = PAGE_SIZE;
1342 				page = alloc_pages(sk->sk_allocation, 0);
1343 				if (page == NULL) {
1344 					err = -ENOMEM;
1345 					goto error;
1346 				}
1347 				sk->sk_sndmsg_page = page;
1348 				sk->sk_sndmsg_off = 0;
1349 
1350 				skb_fill_page_desc(skb, i, page, 0, 0);
1351 				frag = &skb_shinfo(skb)->frags[i];
1352 			} else {
1353 				err = -EMSGSIZE;
1354 				goto error;
1355 			}
1356 			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1357 				err = -EFAULT;
1358 				goto error;
1359 			}
1360 			sk->sk_sndmsg_off += copy;
1361 			frag->size += copy;
1362 			skb->len += copy;
1363 			skb->data_len += copy;
1364 			skb->truesize += copy;
1365 			atomic_add(copy, &sk->sk_wmem_alloc);
1366 		}
1367 		offset += copy;
1368 		length -= copy;
1369 	}
1370 	return 0;
1371 error:
1372 	inet->cork.length -= length;
1373 	IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1374 	return err;
1375 }
1376 
1377 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1378 {
1379 	inet->cork.flags &= ~IPCORK_OPT;
1380 	kfree(np->cork.opt);
1381 	np->cork.opt = NULL;
1382 	if (np->cork.rt) {
1383 		dst_release(&np->cork.rt->u.dst);
1384 		np->cork.rt = NULL;
1385 		inet->cork.flags &= ~IPCORK_ALLFRAG;
1386 	}
1387 	memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1388 }
1389 
1390 int ip6_push_pending_frames(struct sock *sk)
1391 {
1392 	struct sk_buff *skb, *tmp_skb;
1393 	struct sk_buff **tail_skb;
1394 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1395 	struct inet_sock *inet = inet_sk(sk);
1396 	struct ipv6_pinfo *np = inet6_sk(sk);
1397 	struct ipv6hdr *hdr;
1398 	struct ipv6_txoptions *opt = np->cork.opt;
1399 	struct rt6_info *rt = np->cork.rt;
1400 	struct flowi *fl = &inet->cork.fl;
1401 	unsigned char proto = fl->proto;
1402 	int err = 0;
1403 
1404 	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1405 		goto out;
1406 	tail_skb = &(skb_shinfo(skb)->frag_list);
1407 
1408 	/* move skb->data to ip header from ext header */
1409 	if (skb->data < skb_network_header(skb))
1410 		__skb_pull(skb, skb_network_offset(skb));
1411 	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1412 		__skb_pull(tmp_skb, skb_network_header_len(skb));
1413 		*tail_skb = tmp_skb;
1414 		tail_skb = &(tmp_skb->next);
1415 		skb->len += tmp_skb->len;
1416 		skb->data_len += tmp_skb->len;
1417 		skb->truesize += tmp_skb->truesize;
1418 		__sock_put(tmp_skb->sk);
1419 		tmp_skb->destructor = NULL;
1420 		tmp_skb->sk = NULL;
1421 	}
1422 
1423 	ipv6_addr_copy(final_dst, &fl->fl6_dst);
1424 	__skb_pull(skb, skb_network_header_len(skb));
1425 	if (opt && opt->opt_flen)
1426 		ipv6_push_frag_opts(skb, opt, &proto);
1427 	if (opt && opt->opt_nflen)
1428 		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1429 
1430 	skb_push(skb, sizeof(struct ipv6hdr));
1431 	skb_reset_network_header(skb);
1432 	hdr = ipv6_hdr(skb);
1433 
1434 	*(__be32*)hdr = fl->fl6_flowlabel |
1435 		     htonl(0x60000000 | ((int)np->cork.tclass << 20));
1436 
1437 	hdr->hop_limit = np->cork.hop_limit;
1438 	hdr->nexthdr = proto;
1439 	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1440 	ipv6_addr_copy(&hdr->daddr, final_dst);
1441 
1442 	skb->priority = sk->sk_priority;
1443 	skb->mark = sk->sk_mark;
1444 
1445 	skb->dst = dst_clone(&rt->u.dst);
1446 	IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1447 	if (proto == IPPROTO_ICMPV6) {
1448 		struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1449 
1450 		ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1451 		ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1452 	}
1453 
1454 	err = ip6_local_out(skb);
1455 	if (err) {
1456 		if (err > 0)
1457 			err = np->recverr ? net_xmit_errno(err) : 0;
1458 		if (err)
1459 			goto error;
1460 	}
1461 
1462 out:
1463 	ip6_cork_release(inet, np);
1464 	return err;
1465 error:
1466 	goto out;
1467 }
1468 
1469 void ip6_flush_pending_frames(struct sock *sk)
1470 {
1471 	struct sk_buff *skb;
1472 
1473 	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1474 		if (skb->dst)
1475 			IP6_INC_STATS(ip6_dst_idev(skb->dst),
1476 				      IPSTATS_MIB_OUTDISCARDS);
1477 		kfree_skb(skb);
1478 	}
1479 
1480 	ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1481 }
1482