xref: /openbmc/linux/net/core/datagram.c (revision 861e10be)
1 /*
2  *	SUCS NET3:
3  *
4  *	Generic datagram handling routines. These are generic for all
5  *	protocols. Possibly a generic IP version on top of these would
6  *	make sense. Not tonight however 8-).
7  *	This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and
8  *	NetROM layer all have identical poll code and mostly
9  *	identical recvmsg() code. So we share it here. The poll was
10  *	shared before but buried in udp.c so I moved it.
11  *
12  *	Authors:	Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old
13  *						     udp.c code)
14  *
15  *	Fixes:
16  *		Alan Cox	:	NULL return from skb_peek_copy()
17  *					understood
18  *		Alan Cox	:	Rewrote skb_read_datagram to avoid the
19  *					skb_peek_copy stuff.
20  *		Alan Cox	:	Added support for SOCK_SEQPACKET.
21  *					IPX can no longer use the SO_TYPE hack
22  *					but AX.25 now works right, and SPX is
23  *					feasible.
24  *		Alan Cox	:	Fixed write poll of non IP protocol
25  *					crash.
26  *		Florian  La Roche:	Changed for my new skbuff handling.
27  *		Darryl Miles	:	Fixed non-blocking SOCK_SEQPACKET.
28  *		Linus Torvalds	:	BSD semantic fixes.
29  *		Alan Cox	:	Datagram iovec handling
30  *		Darryl Miles	:	Fixed non-blocking SOCK_STREAM.
31  *		Alan Cox	:	POSIXisms
32  *		Pete Wyckoff    :       Unconnected accept() fix.
33  *
34  */
35 
36 #include <linux/module.h>
37 #include <linux/types.h>
38 #include <linux/kernel.h>
39 #include <asm/uaccess.h>
40 #include <linux/mm.h>
41 #include <linux/interrupt.h>
42 #include <linux/errno.h>
43 #include <linux/sched.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/rtnetlink.h>
47 #include <linux/poll.h>
48 #include <linux/highmem.h>
49 #include <linux/spinlock.h>
50 #include <linux/slab.h>
51 
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 
55 #include <net/checksum.h>
56 #include <net/sock.h>
57 #include <net/tcp_states.h>
58 #include <trace/events/skb.h>
59 
60 /*
61  *	Is a socket 'connection oriented' ?
62  */
63 static inline int connection_based(struct sock *sk)
64 {
65 	return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
66 }
67 
68 static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int sync,
69 				  void *key)
70 {
71 	unsigned long bits = (unsigned long)key;
72 
73 	/*
74 	 * Avoid a wakeup if event not interesting for us
75 	 */
76 	if (bits && !(bits & (POLLIN | POLLERR)))
77 		return 0;
78 	return autoremove_wake_function(wait, mode, sync, key);
79 }
80 /*
81  * Wait for a packet..
82  */
83 static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
84 {
85 	int error;
86 	DEFINE_WAIT_FUNC(wait, receiver_wake_function);
87 
88 	prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
89 
90 	/* Socket errors? */
91 	error = sock_error(sk);
92 	if (error)
93 		goto out_err;
94 
95 	if (!skb_queue_empty(&sk->sk_receive_queue))
96 		goto out;
97 
98 	/* Socket shut down? */
99 	if (sk->sk_shutdown & RCV_SHUTDOWN)
100 		goto out_noerr;
101 
102 	/* Sequenced packets can come disconnected.
103 	 * If so we report the problem
104 	 */
105 	error = -ENOTCONN;
106 	if (connection_based(sk) &&
107 	    !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
108 		goto out_err;
109 
110 	/* handle signals */
111 	if (signal_pending(current))
112 		goto interrupted;
113 
114 	error = 0;
115 	*timeo_p = schedule_timeout(*timeo_p);
116 out:
117 	finish_wait(sk_sleep(sk), &wait);
118 	return error;
119 interrupted:
120 	error = sock_intr_errno(*timeo_p);
121 out_err:
122 	*err = error;
123 	goto out;
124 out_noerr:
125 	*err = 0;
126 	error = 1;
127 	goto out;
128 }
129 
130 /**
131  *	__skb_recv_datagram - Receive a datagram skbuff
132  *	@sk: socket
133  *	@flags: MSG_ flags
134  *	@off: an offset in bytes to peek skb from. Returns an offset
135  *	      within an skb where data actually starts
136  *	@peeked: returns non-zero if this packet has been seen before
137  *	@err: error code returned
138  *
139  *	Get a datagram skbuff, understands the peeking, nonblocking wakeups
140  *	and possible races. This replaces identical code in packet, raw and
141  *	udp, as well as the IPX AX.25 and Appletalk. It also finally fixes
142  *	the long standing peek and read race for datagram sockets. If you
143  *	alter this routine remember it must be re-entrant.
144  *
145  *	This function will lock the socket if a skb is returned, so the caller
146  *	needs to unlock the socket in that case (usually by calling
147  *	skb_free_datagram)
148  *
149  *	* It does not lock socket since today. This function is
150  *	* free of race conditions. This measure should/can improve
151  *	* significantly datagram socket latencies at high loads,
152  *	* when data copying to user space takes lots of time.
153  *	* (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
154  *	*  8) Great win.)
155  *	*			                    --ANK (980729)
156  *
157  *	The order of the tests when we find no data waiting are specified
158  *	quite explicitly by POSIX 1003.1g, don't change them without having
159  *	the standard around please.
160  */
161 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
162 				    int *peeked, int *off, int *err)
163 {
164 	struct sk_buff *skb;
165 	long timeo;
166 	/*
167 	 * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
168 	 */
169 	int error = sock_error(sk);
170 
171 	if (error)
172 		goto no_packet;
173 
174 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
175 
176 	do {
177 		/* Again only user level code calls this function, so nothing
178 		 * interrupt level will suddenly eat the receive_queue.
179 		 *
180 		 * Look at current nfs client by the way...
181 		 * However, this function was correct in any case. 8)
182 		 */
183 		unsigned long cpu_flags;
184 		struct sk_buff_head *queue = &sk->sk_receive_queue;
185 
186 		spin_lock_irqsave(&queue->lock, cpu_flags);
187 		skb_queue_walk(queue, skb) {
188 			*peeked = skb->peeked;
189 			if (flags & MSG_PEEK) {
190 				if (*off >= skb->len && skb->len) {
191 					*off -= skb->len;
192 					continue;
193 				}
194 				skb->peeked = 1;
195 				atomic_inc(&skb->users);
196 			} else
197 				__skb_unlink(skb, queue);
198 
199 			spin_unlock_irqrestore(&queue->lock, cpu_flags);
200 			return skb;
201 		}
202 		spin_unlock_irqrestore(&queue->lock, cpu_flags);
203 
204 		/* User doesn't want to wait */
205 		error = -EAGAIN;
206 		if (!timeo)
207 			goto no_packet;
208 
209 	} while (!wait_for_packet(sk, err, &timeo));
210 
211 	return NULL;
212 
213 no_packet:
214 	*err = error;
215 	return NULL;
216 }
217 EXPORT_SYMBOL(__skb_recv_datagram);
218 
219 struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
220 				  int noblock, int *err)
221 {
222 	int peeked, off = 0;
223 
224 	return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
225 				   &peeked, &off, err);
226 }
227 EXPORT_SYMBOL(skb_recv_datagram);
228 
229 void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
230 {
231 	consume_skb(skb);
232 	sk_mem_reclaim_partial(sk);
233 }
234 EXPORT_SYMBOL(skb_free_datagram);
235 
236 void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
237 {
238 	bool slow;
239 
240 	if (likely(atomic_read(&skb->users) == 1))
241 		smp_rmb();
242 	else if (likely(!atomic_dec_and_test(&skb->users)))
243 		return;
244 
245 	slow = lock_sock_fast(sk);
246 	skb_orphan(skb);
247 	sk_mem_reclaim_partial(sk);
248 	unlock_sock_fast(sk, slow);
249 
250 	/* skb is now orphaned, can be freed outside of locked section */
251 	__kfree_skb(skb);
252 }
253 EXPORT_SYMBOL(skb_free_datagram_locked);
254 
255 /**
256  *	skb_kill_datagram - Free a datagram skbuff forcibly
257  *	@sk: socket
258  *	@skb: datagram skbuff
259  *	@flags: MSG_ flags
260  *
261  *	This function frees a datagram skbuff that was received by
262  *	skb_recv_datagram.  The flags argument must match the one
263  *	used for skb_recv_datagram.
264  *
265  *	If the MSG_PEEK flag is set, and the packet is still on the
266  *	receive queue of the socket, it will be taken off the queue
267  *	before it is freed.
268  *
269  *	This function currently only disables BH when acquiring the
270  *	sk_receive_queue lock.  Therefore it must not be used in a
271  *	context where that lock is acquired in an IRQ context.
272  *
273  *	It returns 0 if the packet was removed by us.
274  */
275 
276 int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
277 {
278 	int err = 0;
279 
280 	if (flags & MSG_PEEK) {
281 		err = -ENOENT;
282 		spin_lock_bh(&sk->sk_receive_queue.lock);
283 		if (skb == skb_peek(&sk->sk_receive_queue)) {
284 			__skb_unlink(skb, &sk->sk_receive_queue);
285 			atomic_dec(&skb->users);
286 			err = 0;
287 		}
288 		spin_unlock_bh(&sk->sk_receive_queue.lock);
289 	}
290 
291 	kfree_skb(skb);
292 	atomic_inc(&sk->sk_drops);
293 	sk_mem_reclaim_partial(sk);
294 
295 	return err;
296 }
297 EXPORT_SYMBOL(skb_kill_datagram);
298 
299 /**
300  *	skb_copy_datagram_iovec - Copy a datagram to an iovec.
301  *	@skb: buffer to copy
302  *	@offset: offset in the buffer to start copying from
303  *	@to: io vector to copy to
304  *	@len: amount of data to copy from buffer to iovec
305  *
306  *	Note: the iovec is modified during the copy.
307  */
308 int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
309 			    struct iovec *to, int len)
310 {
311 	int start = skb_headlen(skb);
312 	int i, copy = start - offset;
313 	struct sk_buff *frag_iter;
314 
315 	trace_skb_copy_datagram_iovec(skb, len);
316 
317 	/* Copy header. */
318 	if (copy > 0) {
319 		if (copy > len)
320 			copy = len;
321 		if (memcpy_toiovec(to, skb->data + offset, copy))
322 			goto fault;
323 		if ((len -= copy) == 0)
324 			return 0;
325 		offset += copy;
326 	}
327 
328 	/* Copy paged appendix. Hmm... why does this look so complicated? */
329 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
330 		int end;
331 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
332 
333 		WARN_ON(start > offset + len);
334 
335 		end = start + skb_frag_size(frag);
336 		if ((copy = end - offset) > 0) {
337 			int err;
338 			u8  *vaddr;
339 			struct page *page = skb_frag_page(frag);
340 
341 			if (copy > len)
342 				copy = len;
343 			vaddr = kmap(page);
344 			err = memcpy_toiovec(to, vaddr + frag->page_offset +
345 					     offset - start, copy);
346 			kunmap(page);
347 			if (err)
348 				goto fault;
349 			if (!(len -= copy))
350 				return 0;
351 			offset += copy;
352 		}
353 		start = end;
354 	}
355 
356 	skb_walk_frags(skb, frag_iter) {
357 		int end;
358 
359 		WARN_ON(start > offset + len);
360 
361 		end = start + frag_iter->len;
362 		if ((copy = end - offset) > 0) {
363 			if (copy > len)
364 				copy = len;
365 			if (skb_copy_datagram_iovec(frag_iter,
366 						    offset - start,
367 						    to, copy))
368 				goto fault;
369 			if ((len -= copy) == 0)
370 				return 0;
371 			offset += copy;
372 		}
373 		start = end;
374 	}
375 	if (!len)
376 		return 0;
377 
378 fault:
379 	return -EFAULT;
380 }
381 EXPORT_SYMBOL(skb_copy_datagram_iovec);
382 
383 /**
384  *	skb_copy_datagram_const_iovec - Copy a datagram to an iovec.
385  *	@skb: buffer to copy
386  *	@offset: offset in the buffer to start copying from
387  *	@to: io vector to copy to
388  *	@to_offset: offset in the io vector to start copying to
389  *	@len: amount of data to copy from buffer to iovec
390  *
391  *	Returns 0 or -EFAULT.
392  *	Note: the iovec is not modified during the copy.
393  */
394 int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset,
395 				  const struct iovec *to, int to_offset,
396 				  int len)
397 {
398 	int start = skb_headlen(skb);
399 	int i, copy = start - offset;
400 	struct sk_buff *frag_iter;
401 
402 	/* Copy header. */
403 	if (copy > 0) {
404 		if (copy > len)
405 			copy = len;
406 		if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy))
407 			goto fault;
408 		if ((len -= copy) == 0)
409 			return 0;
410 		offset += copy;
411 		to_offset += copy;
412 	}
413 
414 	/* Copy paged appendix. Hmm... why does this look so complicated? */
415 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
416 		int end;
417 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
418 
419 		WARN_ON(start > offset + len);
420 
421 		end = start + skb_frag_size(frag);
422 		if ((copy = end - offset) > 0) {
423 			int err;
424 			u8  *vaddr;
425 			struct page *page = skb_frag_page(frag);
426 
427 			if (copy > len)
428 				copy = len;
429 			vaddr = kmap(page);
430 			err = memcpy_toiovecend(to, vaddr + frag->page_offset +
431 						offset - start, to_offset, copy);
432 			kunmap(page);
433 			if (err)
434 				goto fault;
435 			if (!(len -= copy))
436 				return 0;
437 			offset += copy;
438 			to_offset += copy;
439 		}
440 		start = end;
441 	}
442 
443 	skb_walk_frags(skb, frag_iter) {
444 		int end;
445 
446 		WARN_ON(start > offset + len);
447 
448 		end = start + frag_iter->len;
449 		if ((copy = end - offset) > 0) {
450 			if (copy > len)
451 				copy = len;
452 			if (skb_copy_datagram_const_iovec(frag_iter,
453 							  offset - start,
454 							  to, to_offset,
455 							  copy))
456 				goto fault;
457 			if ((len -= copy) == 0)
458 				return 0;
459 			offset += copy;
460 			to_offset += copy;
461 		}
462 		start = end;
463 	}
464 	if (!len)
465 		return 0;
466 
467 fault:
468 	return -EFAULT;
469 }
470 EXPORT_SYMBOL(skb_copy_datagram_const_iovec);
471 
472 /**
473  *	skb_copy_datagram_from_iovec - Copy a datagram from an iovec.
474  *	@skb: buffer to copy
475  *	@offset: offset in the buffer to start copying to
476  *	@from: io vector to copy to
477  *	@from_offset: offset in the io vector to start copying from
478  *	@len: amount of data to copy to buffer from iovec
479  *
480  *	Returns 0 or -EFAULT.
481  *	Note: the iovec is not modified during the copy.
482  */
483 int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
484 				 const struct iovec *from, int from_offset,
485 				 int len)
486 {
487 	int start = skb_headlen(skb);
488 	int i, copy = start - offset;
489 	struct sk_buff *frag_iter;
490 
491 	/* Copy header. */
492 	if (copy > 0) {
493 		if (copy > len)
494 			copy = len;
495 		if (memcpy_fromiovecend(skb->data + offset, from, from_offset,
496 					copy))
497 			goto fault;
498 		if ((len -= copy) == 0)
499 			return 0;
500 		offset += copy;
501 		from_offset += copy;
502 	}
503 
504 	/* Copy paged appendix. Hmm... why does this look so complicated? */
505 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
506 		int end;
507 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
508 
509 		WARN_ON(start > offset + len);
510 
511 		end = start + skb_frag_size(frag);
512 		if ((copy = end - offset) > 0) {
513 			int err;
514 			u8  *vaddr;
515 			struct page *page = skb_frag_page(frag);
516 
517 			if (copy > len)
518 				copy = len;
519 			vaddr = kmap(page);
520 			err = memcpy_fromiovecend(vaddr + frag->page_offset +
521 						  offset - start,
522 						  from, from_offset, copy);
523 			kunmap(page);
524 			if (err)
525 				goto fault;
526 
527 			if (!(len -= copy))
528 				return 0;
529 			offset += copy;
530 			from_offset += copy;
531 		}
532 		start = end;
533 	}
534 
535 	skb_walk_frags(skb, frag_iter) {
536 		int end;
537 
538 		WARN_ON(start > offset + len);
539 
540 		end = start + frag_iter->len;
541 		if ((copy = end - offset) > 0) {
542 			if (copy > len)
543 				copy = len;
544 			if (skb_copy_datagram_from_iovec(frag_iter,
545 							 offset - start,
546 							 from,
547 							 from_offset,
548 							 copy))
549 				goto fault;
550 			if ((len -= copy) == 0)
551 				return 0;
552 			offset += copy;
553 			from_offset += copy;
554 		}
555 		start = end;
556 	}
557 	if (!len)
558 		return 0;
559 
560 fault:
561 	return -EFAULT;
562 }
563 EXPORT_SYMBOL(skb_copy_datagram_from_iovec);
564 
565 static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
566 				      u8 __user *to, int len,
567 				      __wsum *csump)
568 {
569 	int start = skb_headlen(skb);
570 	int i, copy = start - offset;
571 	struct sk_buff *frag_iter;
572 	int pos = 0;
573 
574 	/* Copy header. */
575 	if (copy > 0) {
576 		int err = 0;
577 		if (copy > len)
578 			copy = len;
579 		*csump = csum_and_copy_to_user(skb->data + offset, to, copy,
580 					       *csump, &err);
581 		if (err)
582 			goto fault;
583 		if ((len -= copy) == 0)
584 			return 0;
585 		offset += copy;
586 		to += copy;
587 		pos = copy;
588 	}
589 
590 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
591 		int end;
592 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
593 
594 		WARN_ON(start > offset + len);
595 
596 		end = start + skb_frag_size(frag);
597 		if ((copy = end - offset) > 0) {
598 			__wsum csum2;
599 			int err = 0;
600 			u8  *vaddr;
601 			struct page *page = skb_frag_page(frag);
602 
603 			if (copy > len)
604 				copy = len;
605 			vaddr = kmap(page);
606 			csum2 = csum_and_copy_to_user(vaddr +
607 							frag->page_offset +
608 							offset - start,
609 						      to, copy, 0, &err);
610 			kunmap(page);
611 			if (err)
612 				goto fault;
613 			*csump = csum_block_add(*csump, csum2, pos);
614 			if (!(len -= copy))
615 				return 0;
616 			offset += copy;
617 			to += copy;
618 			pos += copy;
619 		}
620 		start = end;
621 	}
622 
623 	skb_walk_frags(skb, frag_iter) {
624 		int end;
625 
626 		WARN_ON(start > offset + len);
627 
628 		end = start + frag_iter->len;
629 		if ((copy = end - offset) > 0) {
630 			__wsum csum2 = 0;
631 			if (copy > len)
632 				copy = len;
633 			if (skb_copy_and_csum_datagram(frag_iter,
634 						       offset - start,
635 						       to, copy,
636 						       &csum2))
637 				goto fault;
638 			*csump = csum_block_add(*csump, csum2, pos);
639 			if ((len -= copy) == 0)
640 				return 0;
641 			offset += copy;
642 			to += copy;
643 			pos += copy;
644 		}
645 		start = end;
646 	}
647 	if (!len)
648 		return 0;
649 
650 fault:
651 	return -EFAULT;
652 }
653 
654 __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
655 {
656 	__sum16 sum;
657 
658 	sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
659 	if (likely(!sum)) {
660 		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
661 			netdev_rx_csum_fault(skb->dev);
662 		skb->ip_summed = CHECKSUM_UNNECESSARY;
663 	}
664 	return sum;
665 }
666 EXPORT_SYMBOL(__skb_checksum_complete_head);
667 
668 __sum16 __skb_checksum_complete(struct sk_buff *skb)
669 {
670 	return __skb_checksum_complete_head(skb, skb->len);
671 }
672 EXPORT_SYMBOL(__skb_checksum_complete);
673 
674 /**
675  *	skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec.
676  *	@skb: skbuff
677  *	@hlen: hardware length
678  *	@iov: io vector
679  *
680  *	Caller _must_ check that skb will fit to this iovec.
681  *
682  *	Returns: 0       - success.
683  *		 -EINVAL - checksum failure.
684  *		 -EFAULT - fault during copy. Beware, in this case iovec
685  *			   can be modified!
686  */
687 int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
688 				     int hlen, struct iovec *iov)
689 {
690 	__wsum csum;
691 	int chunk = skb->len - hlen;
692 
693 	if (!chunk)
694 		return 0;
695 
696 	/* Skip filled elements.
697 	 * Pretty silly, look at memcpy_toiovec, though 8)
698 	 */
699 	while (!iov->iov_len)
700 		iov++;
701 
702 	if (iov->iov_len < chunk) {
703 		if (__skb_checksum_complete(skb))
704 			goto csum_error;
705 		if (skb_copy_datagram_iovec(skb, hlen, iov, chunk))
706 			goto fault;
707 	} else {
708 		csum = csum_partial(skb->data, hlen, skb->csum);
709 		if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base,
710 					       chunk, &csum))
711 			goto fault;
712 		if (csum_fold(csum))
713 			goto csum_error;
714 		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
715 			netdev_rx_csum_fault(skb->dev);
716 		iov->iov_len -= chunk;
717 		iov->iov_base += chunk;
718 	}
719 	return 0;
720 csum_error:
721 	return -EINVAL;
722 fault:
723 	return -EFAULT;
724 }
725 EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
726 
727 /**
728  * 	datagram_poll - generic datagram poll
729  *	@file: file struct
730  *	@sock: socket
731  *	@wait: poll table
732  *
733  *	Datagram poll: Again totally generic. This also handles
734  *	sequenced packet sockets providing the socket receive queue
735  *	is only ever holding data ready to receive.
736  *
737  *	Note: when you _don't_ use this routine for this protocol,
738  *	and you use a different write policy from sock_writeable()
739  *	then please supply your own write_space callback.
740  */
741 unsigned int datagram_poll(struct file *file, struct socket *sock,
742 			   poll_table *wait)
743 {
744 	struct sock *sk = sock->sk;
745 	unsigned int mask;
746 
747 	sock_poll_wait(file, sk_sleep(sk), wait);
748 	mask = 0;
749 
750 	/* exceptional events? */
751 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
752 		mask |= POLLERR;
753 	if (sk->sk_shutdown & RCV_SHUTDOWN)
754 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
755 	if (sk->sk_shutdown == SHUTDOWN_MASK)
756 		mask |= POLLHUP;
757 
758 	/* readable? */
759 	if (!skb_queue_empty(&sk->sk_receive_queue))
760 		mask |= POLLIN | POLLRDNORM;
761 
762 	/* Connection-based need to check for termination and startup */
763 	if (connection_based(sk)) {
764 		if (sk->sk_state == TCP_CLOSE)
765 			mask |= POLLHUP;
766 		/* connection hasn't started yet? */
767 		if (sk->sk_state == TCP_SYN_SENT)
768 			return mask;
769 	}
770 
771 	/* writable? */
772 	if (sock_writeable(sk))
773 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
774 	else
775 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
776 
777 	return mask;
778 }
779 EXPORT_SYMBOL(datagram_poll);
780