1 /* 2 * SUCS NET3: 3 * 4 * Generic datagram handling routines. These are generic for all 5 * protocols. Possibly a generic IP version on top of these would 6 * make sense. Not tonight however 8-). 7 * This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and 8 * NetROM layer all have identical poll code and mostly 9 * identical recvmsg() code. So we share it here. The poll was 10 * shared before but buried in udp.c so I moved it. 11 * 12 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old 13 * udp.c code) 14 * 15 * Fixes: 16 * Alan Cox : NULL return from skb_peek_copy() 17 * understood 18 * Alan Cox : Rewrote skb_read_datagram to avoid the 19 * skb_peek_copy stuff. 20 * Alan Cox : Added support for SOCK_SEQPACKET. 21 * IPX can no longer use the SO_TYPE hack 22 * but AX.25 now works right, and SPX is 23 * feasible. 24 * Alan Cox : Fixed write poll of non IP protocol 25 * crash. 26 * Florian La Roche: Changed for my new skbuff handling. 27 * Darryl Miles : Fixed non-blocking SOCK_SEQPACKET. 28 * Linus Torvalds : BSD semantic fixes. 29 * Alan Cox : Datagram iovec handling 30 * Darryl Miles : Fixed non-blocking SOCK_STREAM. 31 * Alan Cox : POSIXisms 32 * Pete Wyckoff : Unconnected accept() fix. 33 * 34 */ 35 36 #include <linux/module.h> 37 #include <linux/types.h> 38 #include <linux/kernel.h> 39 #include <asm/uaccess.h> 40 #include <linux/mm.h> 41 #include <linux/interrupt.h> 42 #include <linux/errno.h> 43 #include <linux/sched.h> 44 #include <linux/inet.h> 45 #include <linux/netdevice.h> 46 #include <linux/rtnetlink.h> 47 #include <linux/poll.h> 48 #include <linux/highmem.h> 49 #include <linux/spinlock.h> 50 #include <linux/slab.h> 51 #include <linux/pagemap.h> 52 #include <linux/uio.h> 53 54 #include <net/protocol.h> 55 #include <linux/skbuff.h> 56 57 #include <net/checksum.h> 58 #include <net/sock.h> 59 #include <net/tcp_states.h> 60 #include <trace/events/skb.h> 61 #include <net/busy_poll.h> 62 63 /* 64 * Is a socket 'connection oriented' ? 65 */ 66 static inline int connection_based(struct sock *sk) 67 { 68 return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM; 69 } 70 71 static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int sync, 72 void *key) 73 { 74 unsigned long bits = (unsigned long)key; 75 76 /* 77 * Avoid a wakeup if event not interesting for us 78 */ 79 if (bits && !(bits & (POLLIN | POLLERR))) 80 return 0; 81 return autoremove_wake_function(wait, mode, sync, key); 82 } 83 /* 84 * Wait for the last received packet to be different from skb 85 */ 86 int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, 87 const struct sk_buff *skb) 88 { 89 int error; 90 DEFINE_WAIT_FUNC(wait, receiver_wake_function); 91 92 prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 93 94 /* Socket errors? */ 95 error = sock_error(sk); 96 if (error) 97 goto out_err; 98 99 if (sk->sk_receive_queue.prev != skb) 100 goto out; 101 102 /* Socket shut down? */ 103 if (sk->sk_shutdown & RCV_SHUTDOWN) 104 goto out_noerr; 105 106 /* Sequenced packets can come disconnected. 107 * If so we report the problem 108 */ 109 error = -ENOTCONN; 110 if (connection_based(sk) && 111 !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN)) 112 goto out_err; 113 114 /* handle signals */ 115 if (signal_pending(current)) 116 goto interrupted; 117 118 error = 0; 119 *timeo_p = schedule_timeout(*timeo_p); 120 out: 121 finish_wait(sk_sleep(sk), &wait); 122 return error; 123 interrupted: 124 error = sock_intr_errno(*timeo_p); 125 out_err: 126 *err = error; 127 goto out; 128 out_noerr: 129 *err = 0; 130 error = 1; 131 goto out; 132 } 133 EXPORT_SYMBOL(__skb_wait_for_more_packets); 134 135 static struct sk_buff *skb_set_peeked(struct sk_buff *skb) 136 { 137 struct sk_buff *nskb; 138 139 if (skb->peeked) 140 return skb; 141 142 /* We have to unshare an skb before modifying it. */ 143 if (!skb_shared(skb)) 144 goto done; 145 146 nskb = skb_clone(skb, GFP_ATOMIC); 147 if (!nskb) 148 return ERR_PTR(-ENOMEM); 149 150 skb->prev->next = nskb; 151 skb->next->prev = nskb; 152 nskb->prev = skb->prev; 153 nskb->next = skb->next; 154 155 consume_skb(skb); 156 skb = nskb; 157 158 done: 159 skb->peeked = 1; 160 161 return skb; 162 } 163 164 /** 165 * __skb_try_recv_datagram - Receive a datagram skbuff 166 * @sk: socket 167 * @flags: MSG_ flags 168 * @peeked: returns non-zero if this packet has been seen before 169 * @off: an offset in bytes to peek skb from. Returns an offset 170 * within an skb where data actually starts 171 * @err: error code returned 172 * @last: set to last peeked message to inform the wait function 173 * what to look for when peeking 174 * 175 * Get a datagram skbuff, understands the peeking, nonblocking wakeups 176 * and possible races. This replaces identical code in packet, raw and 177 * udp, as well as the IPX AX.25 and Appletalk. It also finally fixes 178 * the long standing peek and read race for datagram sockets. If you 179 * alter this routine remember it must be re-entrant. 180 * 181 * This function will lock the socket if a skb is returned, so 182 * the caller needs to unlock the socket in that case (usually by 183 * calling skb_free_datagram). Returns NULL with *err set to 184 * -EAGAIN if no data was available or to some other value if an 185 * error was detected. 186 * 187 * * It does not lock socket since today. This function is 188 * * free of race conditions. This measure should/can improve 189 * * significantly datagram socket latencies at high loads, 190 * * when data copying to user space takes lots of time. 191 * * (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet 192 * * 8) Great win.) 193 * * --ANK (980729) 194 * 195 * The order of the tests when we find no data waiting are specified 196 * quite explicitly by POSIX 1003.1g, don't change them without having 197 * the standard around please. 198 */ 199 struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, 200 int *peeked, int *off, int *err, 201 struct sk_buff **last) 202 { 203 struct sk_buff_head *queue = &sk->sk_receive_queue; 204 struct sk_buff *skb; 205 unsigned long cpu_flags; 206 /* 207 * Caller is allowed not to check sk->sk_err before skb_recv_datagram() 208 */ 209 int error = sock_error(sk); 210 211 if (error) 212 goto no_packet; 213 214 do { 215 /* Again only user level code calls this function, so nothing 216 * interrupt level will suddenly eat the receive_queue. 217 * 218 * Look at current nfs client by the way... 219 * However, this function was correct in any case. 8) 220 */ 221 int _off = *off; 222 223 *last = (struct sk_buff *)queue; 224 spin_lock_irqsave(&queue->lock, cpu_flags); 225 skb_queue_walk(queue, skb) { 226 *last = skb; 227 *peeked = skb->peeked; 228 if (flags & MSG_PEEK) { 229 if (_off >= skb->len && (skb->len || _off || 230 skb->peeked)) { 231 _off -= skb->len; 232 continue; 233 } 234 235 skb = skb_set_peeked(skb); 236 error = PTR_ERR(skb); 237 if (IS_ERR(skb)) { 238 spin_unlock_irqrestore(&queue->lock, 239 cpu_flags); 240 goto no_packet; 241 } 242 243 atomic_inc(&skb->users); 244 } else 245 __skb_unlink(skb, queue); 246 247 spin_unlock_irqrestore(&queue->lock, cpu_flags); 248 *off = _off; 249 return skb; 250 } 251 252 spin_unlock_irqrestore(&queue->lock, cpu_flags); 253 } while (sk_can_busy_loop(sk) && 254 sk_busy_loop(sk, flags & MSG_DONTWAIT)); 255 256 error = -EAGAIN; 257 258 no_packet: 259 *err = error; 260 return NULL; 261 } 262 EXPORT_SYMBOL(__skb_try_recv_datagram); 263 264 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, 265 int *peeked, int *off, int *err) 266 { 267 struct sk_buff *skb, *last; 268 long timeo; 269 270 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 271 272 do { 273 skb = __skb_try_recv_datagram(sk, flags, peeked, off, err, 274 &last); 275 if (skb) 276 return skb; 277 278 if (*err != -EAGAIN) 279 break; 280 } while (timeo && 281 !__skb_wait_for_more_packets(sk, err, &timeo, last)); 282 283 return NULL; 284 } 285 EXPORT_SYMBOL(__skb_recv_datagram); 286 287 struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, 288 int noblock, int *err) 289 { 290 int peeked, off = 0; 291 292 return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), 293 &peeked, &off, err); 294 } 295 EXPORT_SYMBOL(skb_recv_datagram); 296 297 void skb_free_datagram(struct sock *sk, struct sk_buff *skb) 298 { 299 consume_skb(skb); 300 sk_mem_reclaim_partial(sk); 301 } 302 EXPORT_SYMBOL(skb_free_datagram); 303 304 void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len) 305 { 306 bool slow; 307 308 if (likely(atomic_read(&skb->users) == 1)) 309 smp_rmb(); 310 else if (likely(!atomic_dec_and_test(&skb->users))) { 311 sk_peek_offset_bwd(sk, len); 312 return; 313 } 314 315 slow = lock_sock_fast(sk); 316 sk_peek_offset_bwd(sk, len); 317 skb_orphan(skb); 318 sk_mem_reclaim_partial(sk); 319 unlock_sock_fast(sk, slow); 320 321 /* skb is now orphaned, can be freed outside of locked section */ 322 __kfree_skb(skb); 323 } 324 EXPORT_SYMBOL(__skb_free_datagram_locked); 325 326 /** 327 * skb_kill_datagram - Free a datagram skbuff forcibly 328 * @sk: socket 329 * @skb: datagram skbuff 330 * @flags: MSG_ flags 331 * 332 * This function frees a datagram skbuff that was received by 333 * skb_recv_datagram. The flags argument must match the one 334 * used for skb_recv_datagram. 335 * 336 * If the MSG_PEEK flag is set, and the packet is still on the 337 * receive queue of the socket, it will be taken off the queue 338 * before it is freed. 339 * 340 * This function currently only disables BH when acquiring the 341 * sk_receive_queue lock. Therefore it must not be used in a 342 * context where that lock is acquired in an IRQ context. 343 * 344 * It returns 0 if the packet was removed by us. 345 */ 346 347 int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) 348 { 349 int err = 0; 350 351 if (flags & MSG_PEEK) { 352 err = -ENOENT; 353 spin_lock_bh(&sk->sk_receive_queue.lock); 354 if (skb == skb_peek(&sk->sk_receive_queue)) { 355 __skb_unlink(skb, &sk->sk_receive_queue); 356 atomic_dec(&skb->users); 357 err = 0; 358 } 359 spin_unlock_bh(&sk->sk_receive_queue.lock); 360 } 361 362 kfree_skb(skb); 363 atomic_inc(&sk->sk_drops); 364 sk_mem_reclaim_partial(sk); 365 366 return err; 367 } 368 EXPORT_SYMBOL(skb_kill_datagram); 369 370 /** 371 * skb_copy_datagram_iter - Copy a datagram to an iovec iterator. 372 * @skb: buffer to copy 373 * @offset: offset in the buffer to start copying from 374 * @to: iovec iterator to copy to 375 * @len: amount of data to copy from buffer to iovec 376 */ 377 int skb_copy_datagram_iter(const struct sk_buff *skb, int offset, 378 struct iov_iter *to, int len) 379 { 380 int start = skb_headlen(skb); 381 int i, copy = start - offset; 382 struct sk_buff *frag_iter; 383 384 trace_skb_copy_datagram_iovec(skb, len); 385 386 /* Copy header. */ 387 if (copy > 0) { 388 if (copy > len) 389 copy = len; 390 if (copy_to_iter(skb->data + offset, copy, to) != copy) 391 goto short_copy; 392 if ((len -= copy) == 0) 393 return 0; 394 offset += copy; 395 } 396 397 /* Copy paged appendix. Hmm... why does this look so complicated? */ 398 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 399 int end; 400 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 401 402 WARN_ON(start > offset + len); 403 404 end = start + skb_frag_size(frag); 405 if ((copy = end - offset) > 0) { 406 if (copy > len) 407 copy = len; 408 if (copy_page_to_iter(skb_frag_page(frag), 409 frag->page_offset + offset - 410 start, copy, to) != copy) 411 goto short_copy; 412 if (!(len -= copy)) 413 return 0; 414 offset += copy; 415 } 416 start = end; 417 } 418 419 skb_walk_frags(skb, frag_iter) { 420 int end; 421 422 WARN_ON(start > offset + len); 423 424 end = start + frag_iter->len; 425 if ((copy = end - offset) > 0) { 426 if (copy > len) 427 copy = len; 428 if (skb_copy_datagram_iter(frag_iter, offset - start, 429 to, copy)) 430 goto fault; 431 if ((len -= copy) == 0) 432 return 0; 433 offset += copy; 434 } 435 start = end; 436 } 437 if (!len) 438 return 0; 439 440 /* This is not really a user copy fault, but rather someone 441 * gave us a bogus length on the skb. We should probably 442 * print a warning here as it may indicate a kernel bug. 443 */ 444 445 fault: 446 return -EFAULT; 447 448 short_copy: 449 if (iov_iter_count(to)) 450 goto fault; 451 452 return 0; 453 } 454 EXPORT_SYMBOL(skb_copy_datagram_iter); 455 456 /** 457 * skb_copy_datagram_from_iter - Copy a datagram from an iov_iter. 458 * @skb: buffer to copy 459 * @offset: offset in the buffer to start copying to 460 * @from: the copy source 461 * @len: amount of data to copy to buffer from iovec 462 * 463 * Returns 0 or -EFAULT. 464 */ 465 int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, 466 struct iov_iter *from, 467 int len) 468 { 469 int start = skb_headlen(skb); 470 int i, copy = start - offset; 471 struct sk_buff *frag_iter; 472 473 /* Copy header. */ 474 if (copy > 0) { 475 if (copy > len) 476 copy = len; 477 if (copy_from_iter(skb->data + offset, copy, from) != copy) 478 goto fault; 479 if ((len -= copy) == 0) 480 return 0; 481 offset += copy; 482 } 483 484 /* Copy paged appendix. Hmm... why does this look so complicated? */ 485 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 486 int end; 487 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 488 489 WARN_ON(start > offset + len); 490 491 end = start + skb_frag_size(frag); 492 if ((copy = end - offset) > 0) { 493 size_t copied; 494 495 if (copy > len) 496 copy = len; 497 copied = copy_page_from_iter(skb_frag_page(frag), 498 frag->page_offset + offset - start, 499 copy, from); 500 if (copied != copy) 501 goto fault; 502 503 if (!(len -= copy)) 504 return 0; 505 offset += copy; 506 } 507 start = end; 508 } 509 510 skb_walk_frags(skb, frag_iter) { 511 int end; 512 513 WARN_ON(start > offset + len); 514 515 end = start + frag_iter->len; 516 if ((copy = end - offset) > 0) { 517 if (copy > len) 518 copy = len; 519 if (skb_copy_datagram_from_iter(frag_iter, 520 offset - start, 521 from, copy)) 522 goto fault; 523 if ((len -= copy) == 0) 524 return 0; 525 offset += copy; 526 } 527 start = end; 528 } 529 if (!len) 530 return 0; 531 532 fault: 533 return -EFAULT; 534 } 535 EXPORT_SYMBOL(skb_copy_datagram_from_iter); 536 537 /** 538 * zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter 539 * @skb: buffer to copy 540 * @from: the source to copy from 541 * 542 * The function will first copy up to headlen, and then pin the userspace 543 * pages and build frags through them. 544 * 545 * Returns 0, -EFAULT or -EMSGSIZE. 546 */ 547 int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from) 548 { 549 int len = iov_iter_count(from); 550 int copy = min_t(int, skb_headlen(skb), len); 551 int frag = 0; 552 553 /* copy up to skb headlen */ 554 if (skb_copy_datagram_from_iter(skb, 0, from, copy)) 555 return -EFAULT; 556 557 while (iov_iter_count(from)) { 558 struct page *pages[MAX_SKB_FRAGS]; 559 size_t start; 560 ssize_t copied; 561 unsigned long truesize; 562 int n = 0; 563 564 if (frag == MAX_SKB_FRAGS) 565 return -EMSGSIZE; 566 567 copied = iov_iter_get_pages(from, pages, ~0U, 568 MAX_SKB_FRAGS - frag, &start); 569 if (copied < 0) 570 return -EFAULT; 571 572 iov_iter_advance(from, copied); 573 574 truesize = PAGE_ALIGN(copied + start); 575 skb->data_len += copied; 576 skb->len += copied; 577 skb->truesize += truesize; 578 atomic_add(truesize, &skb->sk->sk_wmem_alloc); 579 while (copied) { 580 int size = min_t(int, copied, PAGE_SIZE - start); 581 skb_fill_page_desc(skb, frag++, pages[n], start, size); 582 start = 0; 583 copied -= size; 584 n++; 585 } 586 } 587 return 0; 588 } 589 EXPORT_SYMBOL(zerocopy_sg_from_iter); 590 591 static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, 592 struct iov_iter *to, int len, 593 __wsum *csump) 594 { 595 int start = skb_headlen(skb); 596 int i, copy = start - offset; 597 struct sk_buff *frag_iter; 598 int pos = 0; 599 int n; 600 601 /* Copy header. */ 602 if (copy > 0) { 603 if (copy > len) 604 copy = len; 605 n = csum_and_copy_to_iter(skb->data + offset, copy, csump, to); 606 if (n != copy) 607 goto fault; 608 if ((len -= copy) == 0) 609 return 0; 610 offset += copy; 611 pos = copy; 612 } 613 614 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 615 int end; 616 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 617 618 WARN_ON(start > offset + len); 619 620 end = start + skb_frag_size(frag); 621 if ((copy = end - offset) > 0) { 622 __wsum csum2 = 0; 623 struct page *page = skb_frag_page(frag); 624 u8 *vaddr = kmap(page); 625 626 if (copy > len) 627 copy = len; 628 n = csum_and_copy_to_iter(vaddr + frag->page_offset + 629 offset - start, copy, 630 &csum2, to); 631 kunmap(page); 632 if (n != copy) 633 goto fault; 634 *csump = csum_block_add(*csump, csum2, pos); 635 if (!(len -= copy)) 636 return 0; 637 offset += copy; 638 pos += copy; 639 } 640 start = end; 641 } 642 643 skb_walk_frags(skb, frag_iter) { 644 int end; 645 646 WARN_ON(start > offset + len); 647 648 end = start + frag_iter->len; 649 if ((copy = end - offset) > 0) { 650 __wsum csum2 = 0; 651 if (copy > len) 652 copy = len; 653 if (skb_copy_and_csum_datagram(frag_iter, 654 offset - start, 655 to, copy, 656 &csum2)) 657 goto fault; 658 *csump = csum_block_add(*csump, csum2, pos); 659 if ((len -= copy) == 0) 660 return 0; 661 offset += copy; 662 pos += copy; 663 } 664 start = end; 665 } 666 if (!len) 667 return 0; 668 669 fault: 670 return -EFAULT; 671 } 672 673 __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) 674 { 675 __sum16 sum; 676 677 sum = csum_fold(skb_checksum(skb, 0, len, skb->csum)); 678 if (likely(!sum)) { 679 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && 680 !skb->csum_complete_sw) 681 netdev_rx_csum_fault(skb->dev); 682 } 683 if (!skb_shared(skb)) 684 skb->csum_valid = !sum; 685 return sum; 686 } 687 EXPORT_SYMBOL(__skb_checksum_complete_head); 688 689 __sum16 __skb_checksum_complete(struct sk_buff *skb) 690 { 691 __wsum csum; 692 __sum16 sum; 693 694 csum = skb_checksum(skb, 0, skb->len, 0); 695 696 /* skb->csum holds pseudo checksum */ 697 sum = csum_fold(csum_add(skb->csum, csum)); 698 if (likely(!sum)) { 699 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && 700 !skb->csum_complete_sw) 701 netdev_rx_csum_fault(skb->dev); 702 } 703 704 if (!skb_shared(skb)) { 705 /* Save full packet checksum */ 706 skb->csum = csum; 707 skb->ip_summed = CHECKSUM_COMPLETE; 708 skb->csum_complete_sw = 1; 709 skb->csum_valid = !sum; 710 } 711 712 return sum; 713 } 714 EXPORT_SYMBOL(__skb_checksum_complete); 715 716 /** 717 * skb_copy_and_csum_datagram_msg - Copy and checksum skb to user iovec. 718 * @skb: skbuff 719 * @hlen: hardware length 720 * @msg: destination 721 * 722 * Caller _must_ check that skb will fit to this iovec. 723 * 724 * Returns: 0 - success. 725 * -EINVAL - checksum failure. 726 * -EFAULT - fault during copy. 727 */ 728 int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, 729 int hlen, struct msghdr *msg) 730 { 731 __wsum csum; 732 int chunk = skb->len - hlen; 733 734 if (!chunk) 735 return 0; 736 737 if (msg_data_left(msg) < chunk) { 738 if (__skb_checksum_complete(skb)) 739 goto csum_error; 740 if (skb_copy_datagram_msg(skb, hlen, msg, chunk)) 741 goto fault; 742 } else { 743 csum = csum_partial(skb->data, hlen, skb->csum); 744 if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter, 745 chunk, &csum)) 746 goto fault; 747 if (csum_fold(csum)) 748 goto csum_error; 749 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) 750 netdev_rx_csum_fault(skb->dev); 751 } 752 return 0; 753 csum_error: 754 return -EINVAL; 755 fault: 756 return -EFAULT; 757 } 758 EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg); 759 760 /** 761 * datagram_poll - generic datagram poll 762 * @file: file struct 763 * @sock: socket 764 * @wait: poll table 765 * 766 * Datagram poll: Again totally generic. This also handles 767 * sequenced packet sockets providing the socket receive queue 768 * is only ever holding data ready to receive. 769 * 770 * Note: when you _don't_ use this routine for this protocol, 771 * and you use a different write policy from sock_writeable() 772 * then please supply your own write_space callback. 773 */ 774 unsigned int datagram_poll(struct file *file, struct socket *sock, 775 poll_table *wait) 776 { 777 struct sock *sk = sock->sk; 778 unsigned int mask; 779 780 sock_poll_wait(file, sk_sleep(sk), wait); 781 mask = 0; 782 783 /* exceptional events? */ 784 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 785 mask |= POLLERR | 786 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); 787 788 if (sk->sk_shutdown & RCV_SHUTDOWN) 789 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 790 if (sk->sk_shutdown == SHUTDOWN_MASK) 791 mask |= POLLHUP; 792 793 /* readable? */ 794 if (!skb_queue_empty(&sk->sk_receive_queue)) 795 mask |= POLLIN | POLLRDNORM; 796 797 /* Connection-based need to check for termination and startup */ 798 if (connection_based(sk)) { 799 if (sk->sk_state == TCP_CLOSE) 800 mask |= POLLHUP; 801 /* connection hasn't started yet? */ 802 if (sk->sk_state == TCP_SYN_SENT) 803 return mask; 804 } 805 806 /* writable? */ 807 if (sock_writeable(sk)) 808 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 809 else 810 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 811 812 return mask; 813 } 814 EXPORT_SYMBOL(datagram_poll); 815