xref: /openbmc/linux/net/core/datagram.c (revision 84ed8a99)
1 /*
2  *	SUCS NET3:
3  *
4  *	Generic datagram handling routines. These are generic for all
5  *	protocols. Possibly a generic IP version on top of these would
6  *	make sense. Not tonight however 8-).
7  *	This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and
8  *	NetROM layer all have identical poll code and mostly
9  *	identical recvmsg() code. So we share it here. The poll was
10  *	shared before but buried in udp.c so I moved it.
11  *
12  *	Authors:	Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old
13  *						     udp.c code)
14  *
15  *	Fixes:
16  *		Alan Cox	:	NULL return from skb_peek_copy()
17  *					understood
18  *		Alan Cox	:	Rewrote skb_read_datagram to avoid the
19  *					skb_peek_copy stuff.
20  *		Alan Cox	:	Added support for SOCK_SEQPACKET.
21  *					IPX can no longer use the SO_TYPE hack
22  *					but AX.25 now works right, and SPX is
23  *					feasible.
24  *		Alan Cox	:	Fixed write poll of non IP protocol
25  *					crash.
26  *		Florian  La Roche:	Changed for my new skbuff handling.
27  *		Darryl Miles	:	Fixed non-blocking SOCK_SEQPACKET.
28  *		Linus Torvalds	:	BSD semantic fixes.
29  *		Alan Cox	:	Datagram iovec handling
30  *		Darryl Miles	:	Fixed non-blocking SOCK_STREAM.
31  *		Alan Cox	:	POSIXisms
32  *		Pete Wyckoff    :       Unconnected accept() fix.
33  *
34  */
35 
36 #include <linux/module.h>
37 #include <linux/types.h>
38 #include <linux/kernel.h>
39 #include <asm/uaccess.h>
40 #include <linux/mm.h>
41 #include <linux/interrupt.h>
42 #include <linux/errno.h>
43 #include <linux/sched.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/rtnetlink.h>
47 #include <linux/poll.h>
48 #include <linux/highmem.h>
49 #include <linux/spinlock.h>
50 #include <linux/slab.h>
51 #include <linux/pagemap.h>
52 
53 #include <net/protocol.h>
54 #include <linux/skbuff.h>
55 
56 #include <net/checksum.h>
57 #include <net/sock.h>
58 #include <net/tcp_states.h>
59 #include <trace/events/skb.h>
60 #include <net/busy_poll.h>
61 
62 /*
63  *	Is a socket 'connection oriented' ?
64  */
65 static inline int connection_based(struct sock *sk)
66 {
67 	return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
68 }
69 
70 static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int sync,
71 				  void *key)
72 {
73 	unsigned long bits = (unsigned long)key;
74 
75 	/*
76 	 * Avoid a wakeup if event not interesting for us
77 	 */
78 	if (bits && !(bits & (POLLIN | POLLERR)))
79 		return 0;
80 	return autoremove_wake_function(wait, mode, sync, key);
81 }
82 /*
83  * Wait for the last received packet to be different from skb
84  */
85 static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
86 				 const struct sk_buff *skb)
87 {
88 	int error;
89 	DEFINE_WAIT_FUNC(wait, receiver_wake_function);
90 
91 	prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
92 
93 	/* Socket errors? */
94 	error = sock_error(sk);
95 	if (error)
96 		goto out_err;
97 
98 	if (sk->sk_receive_queue.prev != skb)
99 		goto out;
100 
101 	/* Socket shut down? */
102 	if (sk->sk_shutdown & RCV_SHUTDOWN)
103 		goto out_noerr;
104 
105 	/* Sequenced packets can come disconnected.
106 	 * If so we report the problem
107 	 */
108 	error = -ENOTCONN;
109 	if (connection_based(sk) &&
110 	    !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
111 		goto out_err;
112 
113 	/* handle signals */
114 	if (signal_pending(current))
115 		goto interrupted;
116 
117 	error = 0;
118 	*timeo_p = schedule_timeout(*timeo_p);
119 out:
120 	finish_wait(sk_sleep(sk), &wait);
121 	return error;
122 interrupted:
123 	error = sock_intr_errno(*timeo_p);
124 out_err:
125 	*err = error;
126 	goto out;
127 out_noerr:
128 	*err = 0;
129 	error = 1;
130 	goto out;
131 }
132 
133 /**
134  *	__skb_recv_datagram - Receive a datagram skbuff
135  *	@sk: socket
136  *	@flags: MSG_ flags
137  *	@peeked: returns non-zero if this packet has been seen before
138  *	@off: an offset in bytes to peek skb from. Returns an offset
139  *	      within an skb where data actually starts
140  *	@err: error code returned
141  *
142  *	Get a datagram skbuff, understands the peeking, nonblocking wakeups
143  *	and possible races. This replaces identical code in packet, raw and
144  *	udp, as well as the IPX AX.25 and Appletalk. It also finally fixes
145  *	the long standing peek and read race for datagram sockets. If you
146  *	alter this routine remember it must be re-entrant.
147  *
148  *	This function will lock the socket if a skb is returned, so the caller
149  *	needs to unlock the socket in that case (usually by calling
150  *	skb_free_datagram)
151  *
152  *	* It does not lock socket since today. This function is
153  *	* free of race conditions. This measure should/can improve
154  *	* significantly datagram socket latencies at high loads,
155  *	* when data copying to user space takes lots of time.
156  *	* (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
157  *	*  8) Great win.)
158  *	*			                    --ANK (980729)
159  *
160  *	The order of the tests when we find no data waiting are specified
161  *	quite explicitly by POSIX 1003.1g, don't change them without having
162  *	the standard around please.
163  */
164 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
165 				    int *peeked, int *off, int *err)
166 {
167 	struct sk_buff *skb, *last;
168 	long timeo;
169 	/*
170 	 * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
171 	 */
172 	int error = sock_error(sk);
173 
174 	if (error)
175 		goto no_packet;
176 
177 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
178 
179 	do {
180 		/* Again only user level code calls this function, so nothing
181 		 * interrupt level will suddenly eat the receive_queue.
182 		 *
183 		 * Look at current nfs client by the way...
184 		 * However, this function was correct in any case. 8)
185 		 */
186 		unsigned long cpu_flags;
187 		struct sk_buff_head *queue = &sk->sk_receive_queue;
188 		int _off = *off;
189 
190 		last = (struct sk_buff *)queue;
191 		spin_lock_irqsave(&queue->lock, cpu_flags);
192 		skb_queue_walk(queue, skb) {
193 			last = skb;
194 			*peeked = skb->peeked;
195 			if (flags & MSG_PEEK) {
196 				if (_off >= skb->len && (skb->len || _off ||
197 							 skb->peeked)) {
198 					_off -= skb->len;
199 					continue;
200 				}
201 				skb->peeked = 1;
202 				atomic_inc(&skb->users);
203 			} else
204 				__skb_unlink(skb, queue);
205 
206 			spin_unlock_irqrestore(&queue->lock, cpu_flags);
207 			*off = _off;
208 			return skb;
209 		}
210 		spin_unlock_irqrestore(&queue->lock, cpu_flags);
211 
212 		if (sk_can_busy_loop(sk) &&
213 		    sk_busy_loop(sk, flags & MSG_DONTWAIT))
214 			continue;
215 
216 		/* User doesn't want to wait */
217 		error = -EAGAIN;
218 		if (!timeo)
219 			goto no_packet;
220 
221 	} while (!wait_for_more_packets(sk, err, &timeo, last));
222 
223 	return NULL;
224 
225 no_packet:
226 	*err = error;
227 	return NULL;
228 }
229 EXPORT_SYMBOL(__skb_recv_datagram);
230 
231 struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
232 				  int noblock, int *err)
233 {
234 	int peeked, off = 0;
235 
236 	return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
237 				   &peeked, &off, err);
238 }
239 EXPORT_SYMBOL(skb_recv_datagram);
240 
241 void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
242 {
243 	consume_skb(skb);
244 	sk_mem_reclaim_partial(sk);
245 }
246 EXPORT_SYMBOL(skb_free_datagram);
247 
248 void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
249 {
250 	bool slow;
251 
252 	if (likely(atomic_read(&skb->users) == 1))
253 		smp_rmb();
254 	else if (likely(!atomic_dec_and_test(&skb->users)))
255 		return;
256 
257 	slow = lock_sock_fast(sk);
258 	skb_orphan(skb);
259 	sk_mem_reclaim_partial(sk);
260 	unlock_sock_fast(sk, slow);
261 
262 	/* skb is now orphaned, can be freed outside of locked section */
263 	__kfree_skb(skb);
264 }
265 EXPORT_SYMBOL(skb_free_datagram_locked);
266 
267 /**
268  *	skb_kill_datagram - Free a datagram skbuff forcibly
269  *	@sk: socket
270  *	@skb: datagram skbuff
271  *	@flags: MSG_ flags
272  *
273  *	This function frees a datagram skbuff that was received by
274  *	skb_recv_datagram.  The flags argument must match the one
275  *	used for skb_recv_datagram.
276  *
277  *	If the MSG_PEEK flag is set, and the packet is still on the
278  *	receive queue of the socket, it will be taken off the queue
279  *	before it is freed.
280  *
281  *	This function currently only disables BH when acquiring the
282  *	sk_receive_queue lock.  Therefore it must not be used in a
283  *	context where that lock is acquired in an IRQ context.
284  *
285  *	It returns 0 if the packet was removed by us.
286  */
287 
288 int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
289 {
290 	int err = 0;
291 
292 	if (flags & MSG_PEEK) {
293 		err = -ENOENT;
294 		spin_lock_bh(&sk->sk_receive_queue.lock);
295 		if (skb == skb_peek(&sk->sk_receive_queue)) {
296 			__skb_unlink(skb, &sk->sk_receive_queue);
297 			atomic_dec(&skb->users);
298 			err = 0;
299 		}
300 		spin_unlock_bh(&sk->sk_receive_queue.lock);
301 	}
302 
303 	kfree_skb(skb);
304 	atomic_inc(&sk->sk_drops);
305 	sk_mem_reclaim_partial(sk);
306 
307 	return err;
308 }
309 EXPORT_SYMBOL(skb_kill_datagram);
310 
311 /**
312  *	skb_copy_datagram_iovec - Copy a datagram to an iovec.
313  *	@skb: buffer to copy
314  *	@offset: offset in the buffer to start copying from
315  *	@to: io vector to copy to
316  *	@len: amount of data to copy from buffer to iovec
317  *
318  *	Note: the iovec is modified during the copy.
319  */
320 int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
321 			    struct iovec *to, int len)
322 {
323 	int start = skb_headlen(skb);
324 	int i, copy = start - offset;
325 	struct sk_buff *frag_iter;
326 
327 	trace_skb_copy_datagram_iovec(skb, len);
328 
329 	/* Copy header. */
330 	if (copy > 0) {
331 		if (copy > len)
332 			copy = len;
333 		if (memcpy_toiovec(to, skb->data + offset, copy))
334 			goto fault;
335 		if ((len -= copy) == 0)
336 			return 0;
337 		offset += copy;
338 	}
339 
340 	/* Copy paged appendix. Hmm... why does this look so complicated? */
341 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
342 		int end;
343 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
344 
345 		WARN_ON(start > offset + len);
346 
347 		end = start + skb_frag_size(frag);
348 		if ((copy = end - offset) > 0) {
349 			int err;
350 			u8  *vaddr;
351 			struct page *page = skb_frag_page(frag);
352 
353 			if (copy > len)
354 				copy = len;
355 			vaddr = kmap(page);
356 			err = memcpy_toiovec(to, vaddr + frag->page_offset +
357 					     offset - start, copy);
358 			kunmap(page);
359 			if (err)
360 				goto fault;
361 			if (!(len -= copy))
362 				return 0;
363 			offset += copy;
364 		}
365 		start = end;
366 	}
367 
368 	skb_walk_frags(skb, frag_iter) {
369 		int end;
370 
371 		WARN_ON(start > offset + len);
372 
373 		end = start + frag_iter->len;
374 		if ((copy = end - offset) > 0) {
375 			if (copy > len)
376 				copy = len;
377 			if (skb_copy_datagram_iovec(frag_iter,
378 						    offset - start,
379 						    to, copy))
380 				goto fault;
381 			if ((len -= copy) == 0)
382 				return 0;
383 			offset += copy;
384 		}
385 		start = end;
386 	}
387 	if (!len)
388 		return 0;
389 
390 fault:
391 	return -EFAULT;
392 }
393 EXPORT_SYMBOL(skb_copy_datagram_iovec);
394 
395 /**
396  *	skb_copy_datagram_const_iovec - Copy a datagram to an iovec.
397  *	@skb: buffer to copy
398  *	@offset: offset in the buffer to start copying from
399  *	@to: io vector to copy to
400  *	@to_offset: offset in the io vector to start copying to
401  *	@len: amount of data to copy from buffer to iovec
402  *
403  *	Returns 0 or -EFAULT.
404  *	Note: the iovec is not modified during the copy.
405  */
406 int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset,
407 				  const struct iovec *to, int to_offset,
408 				  int len)
409 {
410 	int start = skb_headlen(skb);
411 	int i, copy = start - offset;
412 	struct sk_buff *frag_iter;
413 
414 	/* Copy header. */
415 	if (copy > 0) {
416 		if (copy > len)
417 			copy = len;
418 		if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy))
419 			goto fault;
420 		if ((len -= copy) == 0)
421 			return 0;
422 		offset += copy;
423 		to_offset += copy;
424 	}
425 
426 	/* Copy paged appendix. Hmm... why does this look so complicated? */
427 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
428 		int end;
429 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
430 
431 		WARN_ON(start > offset + len);
432 
433 		end = start + skb_frag_size(frag);
434 		if ((copy = end - offset) > 0) {
435 			int err;
436 			u8  *vaddr;
437 			struct page *page = skb_frag_page(frag);
438 
439 			if (copy > len)
440 				copy = len;
441 			vaddr = kmap(page);
442 			err = memcpy_toiovecend(to, vaddr + frag->page_offset +
443 						offset - start, to_offset, copy);
444 			kunmap(page);
445 			if (err)
446 				goto fault;
447 			if (!(len -= copy))
448 				return 0;
449 			offset += copy;
450 			to_offset += copy;
451 		}
452 		start = end;
453 	}
454 
455 	skb_walk_frags(skb, frag_iter) {
456 		int end;
457 
458 		WARN_ON(start > offset + len);
459 
460 		end = start + frag_iter->len;
461 		if ((copy = end - offset) > 0) {
462 			if (copy > len)
463 				copy = len;
464 			if (skb_copy_datagram_const_iovec(frag_iter,
465 							  offset - start,
466 							  to, to_offset,
467 							  copy))
468 				goto fault;
469 			if ((len -= copy) == 0)
470 				return 0;
471 			offset += copy;
472 			to_offset += copy;
473 		}
474 		start = end;
475 	}
476 	if (!len)
477 		return 0;
478 
479 fault:
480 	return -EFAULT;
481 }
482 EXPORT_SYMBOL(skb_copy_datagram_const_iovec);
483 
484 /**
485  *	skb_copy_datagram_from_iovec - Copy a datagram from an iovec.
486  *	@skb: buffer to copy
487  *	@offset: offset in the buffer to start copying to
488  *	@from: io vector to copy to
489  *	@from_offset: offset in the io vector to start copying from
490  *	@len: amount of data to copy to buffer from iovec
491  *
492  *	Returns 0 or -EFAULT.
493  *	Note: the iovec is not modified during the copy.
494  */
495 int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
496 				 const struct iovec *from, int from_offset,
497 				 int len)
498 {
499 	int start = skb_headlen(skb);
500 	int i, copy = start - offset;
501 	struct sk_buff *frag_iter;
502 
503 	/* Copy header. */
504 	if (copy > 0) {
505 		if (copy > len)
506 			copy = len;
507 		if (memcpy_fromiovecend(skb->data + offset, from, from_offset,
508 					copy))
509 			goto fault;
510 		if ((len -= copy) == 0)
511 			return 0;
512 		offset += copy;
513 		from_offset += copy;
514 	}
515 
516 	/* Copy paged appendix. Hmm... why does this look so complicated? */
517 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
518 		int end;
519 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
520 
521 		WARN_ON(start > offset + len);
522 
523 		end = start + skb_frag_size(frag);
524 		if ((copy = end - offset) > 0) {
525 			int err;
526 			u8  *vaddr;
527 			struct page *page = skb_frag_page(frag);
528 
529 			if (copy > len)
530 				copy = len;
531 			vaddr = kmap(page);
532 			err = memcpy_fromiovecend(vaddr + frag->page_offset +
533 						  offset - start,
534 						  from, from_offset, copy);
535 			kunmap(page);
536 			if (err)
537 				goto fault;
538 
539 			if (!(len -= copy))
540 				return 0;
541 			offset += copy;
542 			from_offset += copy;
543 		}
544 		start = end;
545 	}
546 
547 	skb_walk_frags(skb, frag_iter) {
548 		int end;
549 
550 		WARN_ON(start > offset + len);
551 
552 		end = start + frag_iter->len;
553 		if ((copy = end - offset) > 0) {
554 			if (copy > len)
555 				copy = len;
556 			if (skb_copy_datagram_from_iovec(frag_iter,
557 							 offset - start,
558 							 from,
559 							 from_offset,
560 							 copy))
561 				goto fault;
562 			if ((len -= copy) == 0)
563 				return 0;
564 			offset += copy;
565 			from_offset += copy;
566 		}
567 		start = end;
568 	}
569 	if (!len)
570 		return 0;
571 
572 fault:
573 	return -EFAULT;
574 }
575 EXPORT_SYMBOL(skb_copy_datagram_from_iovec);
576 
577 /**
578  *	zerocopy_sg_from_iovec - Build a zerocopy datagram from an iovec
579  *	@skb: buffer to copy
580  *	@from: io vector to copy from
581  *	@offset: offset in the io vector to start copying from
582  *	@count: amount of vectors to copy to buffer from
583  *
584  *	The function will first copy up to headlen, and then pin the userspace
585  *	pages and build frags through them.
586  *
587  *	Returns 0, -EFAULT or -EMSGSIZE.
588  *	Note: the iovec is not modified during the copy
589  */
590 int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
591 				  int offset, size_t count)
592 {
593 	int len = iov_length(from, count) - offset;
594 	int copy = min_t(int, skb_headlen(skb), len);
595 	int size;
596 	int i = 0;
597 
598 	/* copy up to skb headlen */
599 	if (skb_copy_datagram_from_iovec(skb, 0, from, offset, copy))
600 		return -EFAULT;
601 
602 	if (len == copy)
603 		return 0;
604 
605 	offset += copy;
606 	while (count--) {
607 		struct page *page[MAX_SKB_FRAGS];
608 		int num_pages;
609 		unsigned long base;
610 		unsigned long truesize;
611 
612 		/* Skip over from offset and copied */
613 		if (offset >= from->iov_len) {
614 			offset -= from->iov_len;
615 			++from;
616 			continue;
617 		}
618 		len = from->iov_len - offset;
619 		base = (unsigned long)from->iov_base + offset;
620 		size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
621 		if (i + size > MAX_SKB_FRAGS)
622 			return -EMSGSIZE;
623 		num_pages = get_user_pages_fast(base, size, 0, &page[i]);
624 		if (num_pages != size) {
625 			release_pages(&page[i], num_pages, 0);
626 			return -EFAULT;
627 		}
628 		truesize = size * PAGE_SIZE;
629 		skb->data_len += len;
630 		skb->len += len;
631 		skb->truesize += truesize;
632 		atomic_add(truesize, &skb->sk->sk_wmem_alloc);
633 		while (len) {
634 			int off = base & ~PAGE_MASK;
635 			int size = min_t(int, len, PAGE_SIZE - off);
636 			skb_fill_page_desc(skb, i, page[i], off, size);
637 			base += size;
638 			len -= size;
639 			i++;
640 		}
641 		offset = 0;
642 		++from;
643 	}
644 	return 0;
645 }
646 EXPORT_SYMBOL(zerocopy_sg_from_iovec);
647 
648 static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
649 				      u8 __user *to, int len,
650 				      __wsum *csump)
651 {
652 	int start = skb_headlen(skb);
653 	int i, copy = start - offset;
654 	struct sk_buff *frag_iter;
655 	int pos = 0;
656 
657 	/* Copy header. */
658 	if (copy > 0) {
659 		int err = 0;
660 		if (copy > len)
661 			copy = len;
662 		*csump = csum_and_copy_to_user(skb->data + offset, to, copy,
663 					       *csump, &err);
664 		if (err)
665 			goto fault;
666 		if ((len -= copy) == 0)
667 			return 0;
668 		offset += copy;
669 		to += copy;
670 		pos = copy;
671 	}
672 
673 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
674 		int end;
675 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
676 
677 		WARN_ON(start > offset + len);
678 
679 		end = start + skb_frag_size(frag);
680 		if ((copy = end - offset) > 0) {
681 			__wsum csum2;
682 			int err = 0;
683 			u8  *vaddr;
684 			struct page *page = skb_frag_page(frag);
685 
686 			if (copy > len)
687 				copy = len;
688 			vaddr = kmap(page);
689 			csum2 = csum_and_copy_to_user(vaddr +
690 							frag->page_offset +
691 							offset - start,
692 						      to, copy, 0, &err);
693 			kunmap(page);
694 			if (err)
695 				goto fault;
696 			*csump = csum_block_add(*csump, csum2, pos);
697 			if (!(len -= copy))
698 				return 0;
699 			offset += copy;
700 			to += copy;
701 			pos += copy;
702 		}
703 		start = end;
704 	}
705 
706 	skb_walk_frags(skb, frag_iter) {
707 		int end;
708 
709 		WARN_ON(start > offset + len);
710 
711 		end = start + frag_iter->len;
712 		if ((copy = end - offset) > 0) {
713 			__wsum csum2 = 0;
714 			if (copy > len)
715 				copy = len;
716 			if (skb_copy_and_csum_datagram(frag_iter,
717 						       offset - start,
718 						       to, copy,
719 						       &csum2))
720 				goto fault;
721 			*csump = csum_block_add(*csump, csum2, pos);
722 			if ((len -= copy) == 0)
723 				return 0;
724 			offset += copy;
725 			to += copy;
726 			pos += copy;
727 		}
728 		start = end;
729 	}
730 	if (!len)
731 		return 0;
732 
733 fault:
734 	return -EFAULT;
735 }
736 
737 __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
738 {
739 	__sum16 sum;
740 
741 	sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
742 	if (likely(!sum)) {
743 		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
744 			netdev_rx_csum_fault(skb->dev);
745 		skb->ip_summed = CHECKSUM_UNNECESSARY;
746 	}
747 	return sum;
748 }
749 EXPORT_SYMBOL(__skb_checksum_complete_head);
750 
751 __sum16 __skb_checksum_complete(struct sk_buff *skb)
752 {
753 	return __skb_checksum_complete_head(skb, skb->len);
754 }
755 EXPORT_SYMBOL(__skb_checksum_complete);
756 
757 /**
758  *	skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec.
759  *	@skb: skbuff
760  *	@hlen: hardware length
761  *	@iov: io vector
762  *
763  *	Caller _must_ check that skb will fit to this iovec.
764  *
765  *	Returns: 0       - success.
766  *		 -EINVAL - checksum failure.
767  *		 -EFAULT - fault during copy. Beware, in this case iovec
768  *			   can be modified!
769  */
770 int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
771 				     int hlen, struct iovec *iov)
772 {
773 	__wsum csum;
774 	int chunk = skb->len - hlen;
775 
776 	if (!chunk)
777 		return 0;
778 
779 	/* Skip filled elements.
780 	 * Pretty silly, look at memcpy_toiovec, though 8)
781 	 */
782 	while (!iov->iov_len)
783 		iov++;
784 
785 	if (iov->iov_len < chunk) {
786 		if (__skb_checksum_complete(skb))
787 			goto csum_error;
788 		if (skb_copy_datagram_iovec(skb, hlen, iov, chunk))
789 			goto fault;
790 	} else {
791 		csum = csum_partial(skb->data, hlen, skb->csum);
792 		if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base,
793 					       chunk, &csum))
794 			goto fault;
795 		if (csum_fold(csum))
796 			goto csum_error;
797 		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
798 			netdev_rx_csum_fault(skb->dev);
799 		iov->iov_len -= chunk;
800 		iov->iov_base += chunk;
801 	}
802 	return 0;
803 csum_error:
804 	return -EINVAL;
805 fault:
806 	return -EFAULT;
807 }
808 EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
809 
810 /**
811  * 	datagram_poll - generic datagram poll
812  *	@file: file struct
813  *	@sock: socket
814  *	@wait: poll table
815  *
816  *	Datagram poll: Again totally generic. This also handles
817  *	sequenced packet sockets providing the socket receive queue
818  *	is only ever holding data ready to receive.
819  *
820  *	Note: when you _don't_ use this routine for this protocol,
821  *	and you use a different write policy from sock_writeable()
822  *	then please supply your own write_space callback.
823  */
824 unsigned int datagram_poll(struct file *file, struct socket *sock,
825 			   poll_table *wait)
826 {
827 	struct sock *sk = sock->sk;
828 	unsigned int mask;
829 
830 	sock_poll_wait(file, sk_sleep(sk), wait);
831 	mask = 0;
832 
833 	/* exceptional events? */
834 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
835 		mask |= POLLERR |
836 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
837 
838 	if (sk->sk_shutdown & RCV_SHUTDOWN)
839 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
840 	if (sk->sk_shutdown == SHUTDOWN_MASK)
841 		mask |= POLLHUP;
842 
843 	/* readable? */
844 	if (!skb_queue_empty(&sk->sk_receive_queue))
845 		mask |= POLLIN | POLLRDNORM;
846 
847 	/* Connection-based need to check for termination and startup */
848 	if (connection_based(sk)) {
849 		if (sk->sk_state == TCP_CLOSE)
850 			mask |= POLLHUP;
851 		/* connection hasn't started yet? */
852 		if (sk->sk_state == TCP_SYN_SENT)
853 			return mask;
854 	}
855 
856 	/* writable? */
857 	if (sock_writeable(sk))
858 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
859 	else
860 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
861 
862 	return mask;
863 }
864 EXPORT_SYMBOL(datagram_poll);
865