1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * SUCS NET3: 4 * 5 * Generic datagram handling routines. These are generic for all 6 * protocols. Possibly a generic IP version on top of these would 7 * make sense. Not tonight however 8-). 8 * This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and 9 * NetROM layer all have identical poll code and mostly 10 * identical recvmsg() code. So we share it here. The poll was 11 * shared before but buried in udp.c so I moved it. 12 * 13 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old 14 * udp.c code) 15 * 16 * Fixes: 17 * Alan Cox : NULL return from skb_peek_copy() 18 * understood 19 * Alan Cox : Rewrote skb_read_datagram to avoid the 20 * skb_peek_copy stuff. 21 * Alan Cox : Added support for SOCK_SEQPACKET. 22 * IPX can no longer use the SO_TYPE hack 23 * but AX.25 now works right, and SPX is 24 * feasible. 25 * Alan Cox : Fixed write poll of non IP protocol 26 * crash. 27 * Florian La Roche: Changed for my new skbuff handling. 28 * Darryl Miles : Fixed non-blocking SOCK_SEQPACKET. 29 * Linus Torvalds : BSD semantic fixes. 30 * Alan Cox : Datagram iovec handling 31 * Darryl Miles : Fixed non-blocking SOCK_STREAM. 32 * Alan Cox : POSIXisms 33 * Pete Wyckoff : Unconnected accept() fix. 34 * 35 */ 36 37 #include <linux/module.h> 38 #include <linux/types.h> 39 #include <linux/kernel.h> 40 #include <linux/uaccess.h> 41 #include <linux/mm.h> 42 #include <linux/interrupt.h> 43 #include <linux/errno.h> 44 #include <linux/sched.h> 45 #include <linux/inet.h> 46 #include <linux/netdevice.h> 47 #include <linux/rtnetlink.h> 48 #include <linux/poll.h> 49 #include <linux/highmem.h> 50 #include <linux/spinlock.h> 51 #include <linux/slab.h> 52 #include <linux/pagemap.h> 53 #include <linux/uio.h> 54 55 #include <net/protocol.h> 56 #include <linux/skbuff.h> 57 58 #include <net/checksum.h> 59 #include <net/sock.h> 60 #include <net/tcp_states.h> 61 #include <trace/events/skb.h> 62 #include <net/busy_poll.h> 63 64 #include "datagram.h" 65 66 /* 67 * Is a socket 'connection oriented' ? 68 */ 69 static inline int connection_based(struct sock *sk) 70 { 71 return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM; 72 } 73 74 static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync, 75 void *key) 76 { 77 /* 78 * Avoid a wakeup if event not interesting for us 79 */ 80 if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR))) 81 return 0; 82 return autoremove_wake_function(wait, mode, sync, key); 83 } 84 /* 85 * Wait for the last received packet to be different from skb 86 */ 87 int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue, 88 int *err, long *timeo_p, 89 const struct sk_buff *skb) 90 { 91 int error; 92 DEFINE_WAIT_FUNC(wait, receiver_wake_function); 93 94 prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 95 96 /* Socket errors? */ 97 error = sock_error(sk); 98 if (error) 99 goto out_err; 100 101 if (READ_ONCE(queue->prev) != skb) 102 goto out; 103 104 /* Socket shut down? */ 105 if (sk->sk_shutdown & RCV_SHUTDOWN) 106 goto out_noerr; 107 108 /* Sequenced packets can come disconnected. 109 * If so we report the problem 110 */ 111 error = -ENOTCONN; 112 if (connection_based(sk) && 113 !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN)) 114 goto out_err; 115 116 /* handle signals */ 117 if (signal_pending(current)) 118 goto interrupted; 119 120 error = 0; 121 *timeo_p = schedule_timeout(*timeo_p); 122 out: 123 finish_wait(sk_sleep(sk), &wait); 124 return error; 125 interrupted: 126 error = sock_intr_errno(*timeo_p); 127 out_err: 128 *err = error; 129 goto out; 130 out_noerr: 131 *err = 0; 132 error = 1; 133 goto out; 134 } 135 EXPORT_SYMBOL(__skb_wait_for_more_packets); 136 137 static struct sk_buff *skb_set_peeked(struct sk_buff *skb) 138 { 139 struct sk_buff *nskb; 140 141 if (skb->peeked) 142 return skb; 143 144 /* We have to unshare an skb before modifying it. */ 145 if (!skb_shared(skb)) 146 goto done; 147 148 nskb = skb_clone(skb, GFP_ATOMIC); 149 if (!nskb) 150 return ERR_PTR(-ENOMEM); 151 152 skb->prev->next = nskb; 153 skb->next->prev = nskb; 154 nskb->prev = skb->prev; 155 nskb->next = skb->next; 156 157 consume_skb(skb); 158 skb = nskb; 159 160 done: 161 skb->peeked = 1; 162 163 return skb; 164 } 165 166 struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, 167 struct sk_buff_head *queue, 168 unsigned int flags, 169 void (*destructor)(struct sock *sk, 170 struct sk_buff *skb), 171 int *off, int *err, 172 struct sk_buff **last) 173 { 174 bool peek_at_off = false; 175 struct sk_buff *skb; 176 int _off = 0; 177 178 if (unlikely(flags & MSG_PEEK && *off >= 0)) { 179 peek_at_off = true; 180 _off = *off; 181 } 182 183 *last = queue->prev; 184 skb_queue_walk(queue, skb) { 185 if (flags & MSG_PEEK) { 186 if (peek_at_off && _off >= skb->len && 187 (_off || skb->peeked)) { 188 _off -= skb->len; 189 continue; 190 } 191 if (!skb->len) { 192 skb = skb_set_peeked(skb); 193 if (IS_ERR(skb)) { 194 *err = PTR_ERR(skb); 195 return NULL; 196 } 197 } 198 refcount_inc(&skb->users); 199 } else { 200 __skb_unlink(skb, queue); 201 if (destructor) 202 destructor(sk, skb); 203 } 204 *off = _off; 205 return skb; 206 } 207 return NULL; 208 } 209 210 /** 211 * __skb_try_recv_datagram - Receive a datagram skbuff 212 * @sk: socket 213 * @queue: socket queue from which to receive 214 * @flags: MSG\_ flags 215 * @destructor: invoked under the receive lock on successful dequeue 216 * @off: an offset in bytes to peek skb from. Returns an offset 217 * within an skb where data actually starts 218 * @err: error code returned 219 * @last: set to last peeked message to inform the wait function 220 * what to look for when peeking 221 * 222 * Get a datagram skbuff, understands the peeking, nonblocking wakeups 223 * and possible races. This replaces identical code in packet, raw and 224 * udp, as well as the IPX AX.25 and Appletalk. It also finally fixes 225 * the long standing peek and read race for datagram sockets. If you 226 * alter this routine remember it must be re-entrant. 227 * 228 * This function will lock the socket if a skb is returned, so 229 * the caller needs to unlock the socket in that case (usually by 230 * calling skb_free_datagram). Returns NULL with @err set to 231 * -EAGAIN if no data was available or to some other value if an 232 * error was detected. 233 * 234 * * It does not lock socket since today. This function is 235 * * free of race conditions. This measure should/can improve 236 * * significantly datagram socket latencies at high loads, 237 * * when data copying to user space takes lots of time. 238 * * (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet 239 * * 8) Great win.) 240 * * --ANK (980729) 241 * 242 * The order of the tests when we find no data waiting are specified 243 * quite explicitly by POSIX 1003.1g, don't change them without having 244 * the standard around please. 245 */ 246 struct sk_buff *__skb_try_recv_datagram(struct sock *sk, 247 struct sk_buff_head *queue, 248 unsigned int flags, 249 void (*destructor)(struct sock *sk, 250 struct sk_buff *skb), 251 int *off, int *err, 252 struct sk_buff **last) 253 { 254 struct sk_buff *skb; 255 unsigned long cpu_flags; 256 /* 257 * Caller is allowed not to check sk->sk_err before skb_recv_datagram() 258 */ 259 int error = sock_error(sk); 260 261 if (error) 262 goto no_packet; 263 264 do { 265 /* Again only user level code calls this function, so nothing 266 * interrupt level will suddenly eat the receive_queue. 267 * 268 * Look at current nfs client by the way... 269 * However, this function was correct in any case. 8) 270 */ 271 spin_lock_irqsave(&queue->lock, cpu_flags); 272 skb = __skb_try_recv_from_queue(sk, queue, flags, destructor, 273 off, &error, last); 274 spin_unlock_irqrestore(&queue->lock, cpu_flags); 275 if (error) 276 goto no_packet; 277 if (skb) 278 return skb; 279 280 if (!sk_can_busy_loop(sk)) 281 break; 282 283 sk_busy_loop(sk, flags & MSG_DONTWAIT); 284 } while (READ_ONCE(queue->prev) != *last); 285 286 error = -EAGAIN; 287 288 no_packet: 289 *err = error; 290 return NULL; 291 } 292 EXPORT_SYMBOL(__skb_try_recv_datagram); 293 294 struct sk_buff *__skb_recv_datagram(struct sock *sk, 295 struct sk_buff_head *sk_queue, 296 unsigned int flags, 297 void (*destructor)(struct sock *sk, 298 struct sk_buff *skb), 299 int *off, int *err) 300 { 301 struct sk_buff *skb, *last; 302 long timeo; 303 304 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 305 306 do { 307 skb = __skb_try_recv_datagram(sk, sk_queue, flags, destructor, 308 off, err, &last); 309 if (skb) 310 return skb; 311 312 if (*err != -EAGAIN) 313 break; 314 } while (timeo && 315 !__skb_wait_for_more_packets(sk, sk_queue, err, 316 &timeo, last)); 317 318 return NULL; 319 } 320 EXPORT_SYMBOL(__skb_recv_datagram); 321 322 struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, 323 int noblock, int *err) 324 { 325 int off = 0; 326 327 return __skb_recv_datagram(sk, &sk->sk_receive_queue, 328 flags | (noblock ? MSG_DONTWAIT : 0), 329 NULL, &off, err); 330 } 331 EXPORT_SYMBOL(skb_recv_datagram); 332 333 void skb_free_datagram(struct sock *sk, struct sk_buff *skb) 334 { 335 consume_skb(skb); 336 sk_mem_reclaim_partial(sk); 337 } 338 EXPORT_SYMBOL(skb_free_datagram); 339 340 void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len) 341 { 342 bool slow; 343 344 if (!skb_unref(skb)) { 345 sk_peek_offset_bwd(sk, len); 346 return; 347 } 348 349 slow = lock_sock_fast(sk); 350 sk_peek_offset_bwd(sk, len); 351 skb_orphan(skb); 352 sk_mem_reclaim_partial(sk); 353 unlock_sock_fast(sk, slow); 354 355 /* skb is now orphaned, can be freed outside of locked section */ 356 __kfree_skb(skb); 357 } 358 EXPORT_SYMBOL(__skb_free_datagram_locked); 359 360 int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue, 361 struct sk_buff *skb, unsigned int flags, 362 void (*destructor)(struct sock *sk, 363 struct sk_buff *skb)) 364 { 365 int err = 0; 366 367 if (flags & MSG_PEEK) { 368 err = -ENOENT; 369 spin_lock_bh(&sk_queue->lock); 370 if (skb->next) { 371 __skb_unlink(skb, sk_queue); 372 refcount_dec(&skb->users); 373 if (destructor) 374 destructor(sk, skb); 375 err = 0; 376 } 377 spin_unlock_bh(&sk_queue->lock); 378 } 379 380 atomic_inc(&sk->sk_drops); 381 return err; 382 } 383 EXPORT_SYMBOL(__sk_queue_drop_skb); 384 385 /** 386 * skb_kill_datagram - Free a datagram skbuff forcibly 387 * @sk: socket 388 * @skb: datagram skbuff 389 * @flags: MSG\_ flags 390 * 391 * This function frees a datagram skbuff that was received by 392 * skb_recv_datagram. The flags argument must match the one 393 * used for skb_recv_datagram. 394 * 395 * If the MSG_PEEK flag is set, and the packet is still on the 396 * receive queue of the socket, it will be taken off the queue 397 * before it is freed. 398 * 399 * This function currently only disables BH when acquiring the 400 * sk_receive_queue lock. Therefore it must not be used in a 401 * context where that lock is acquired in an IRQ context. 402 * 403 * It returns 0 if the packet was removed by us. 404 */ 405 406 int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) 407 { 408 int err = __sk_queue_drop_skb(sk, &sk->sk_receive_queue, skb, flags, 409 NULL); 410 411 kfree_skb(skb); 412 sk_mem_reclaim_partial(sk); 413 return err; 414 } 415 EXPORT_SYMBOL(skb_kill_datagram); 416 417 static int __skb_datagram_iter(const struct sk_buff *skb, int offset, 418 struct iov_iter *to, int len, bool fault_short, 419 size_t (*cb)(const void *, size_t, void *, 420 struct iov_iter *), void *data) 421 { 422 int start = skb_headlen(skb); 423 int i, copy = start - offset, start_off = offset, n; 424 struct sk_buff *frag_iter; 425 426 /* Copy header. */ 427 if (copy > 0) { 428 if (copy > len) 429 copy = len; 430 n = cb(skb->data + offset, copy, data, to); 431 offset += n; 432 if (n != copy) 433 goto short_copy; 434 if ((len -= copy) == 0) 435 return 0; 436 } 437 438 /* Copy paged appendix. Hmm... why does this look so complicated? */ 439 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 440 int end; 441 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 442 443 WARN_ON(start > offset + len); 444 445 end = start + skb_frag_size(frag); 446 if ((copy = end - offset) > 0) { 447 struct page *page = skb_frag_page(frag); 448 u8 *vaddr = kmap(page); 449 450 if (copy > len) 451 copy = len; 452 n = cb(vaddr + skb_frag_off(frag) + offset - start, 453 copy, data, to); 454 kunmap(page); 455 offset += n; 456 if (n != copy) 457 goto short_copy; 458 if (!(len -= copy)) 459 return 0; 460 } 461 start = end; 462 } 463 464 skb_walk_frags(skb, frag_iter) { 465 int end; 466 467 WARN_ON(start > offset + len); 468 469 end = start + frag_iter->len; 470 if ((copy = end - offset) > 0) { 471 if (copy > len) 472 copy = len; 473 if (__skb_datagram_iter(frag_iter, offset - start, 474 to, copy, fault_short, cb, data)) 475 goto fault; 476 if ((len -= copy) == 0) 477 return 0; 478 offset += copy; 479 } 480 start = end; 481 } 482 if (!len) 483 return 0; 484 485 /* This is not really a user copy fault, but rather someone 486 * gave us a bogus length on the skb. We should probably 487 * print a warning here as it may indicate a kernel bug. 488 */ 489 490 fault: 491 iov_iter_revert(to, offset - start_off); 492 return -EFAULT; 493 494 short_copy: 495 if (fault_short || iov_iter_count(to)) 496 goto fault; 497 498 return 0; 499 } 500 501 /** 502 * skb_copy_and_hash_datagram_iter - Copy datagram to an iovec iterator 503 * and update a hash. 504 * @skb: buffer to copy 505 * @offset: offset in the buffer to start copying from 506 * @to: iovec iterator to copy to 507 * @len: amount of data to copy from buffer to iovec 508 * @hash: hash request to update 509 */ 510 int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset, 511 struct iov_iter *to, int len, 512 struct ahash_request *hash) 513 { 514 return __skb_datagram_iter(skb, offset, to, len, true, 515 hash_and_copy_to_iter, hash); 516 } 517 EXPORT_SYMBOL(skb_copy_and_hash_datagram_iter); 518 519 static size_t simple_copy_to_iter(const void *addr, size_t bytes, 520 void *data __always_unused, struct iov_iter *i) 521 { 522 return copy_to_iter(addr, bytes, i); 523 } 524 525 /** 526 * skb_copy_datagram_iter - Copy a datagram to an iovec iterator. 527 * @skb: buffer to copy 528 * @offset: offset in the buffer to start copying from 529 * @to: iovec iterator to copy to 530 * @len: amount of data to copy from buffer to iovec 531 */ 532 int skb_copy_datagram_iter(const struct sk_buff *skb, int offset, 533 struct iov_iter *to, int len) 534 { 535 trace_skb_copy_datagram_iovec(skb, len); 536 return __skb_datagram_iter(skb, offset, to, len, false, 537 simple_copy_to_iter, NULL); 538 } 539 EXPORT_SYMBOL(skb_copy_datagram_iter); 540 541 /** 542 * skb_copy_datagram_from_iter - Copy a datagram from an iov_iter. 543 * @skb: buffer to copy 544 * @offset: offset in the buffer to start copying to 545 * @from: the copy source 546 * @len: amount of data to copy to buffer from iovec 547 * 548 * Returns 0 or -EFAULT. 549 */ 550 int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, 551 struct iov_iter *from, 552 int len) 553 { 554 int start = skb_headlen(skb); 555 int i, copy = start - offset; 556 struct sk_buff *frag_iter; 557 558 /* Copy header. */ 559 if (copy > 0) { 560 if (copy > len) 561 copy = len; 562 if (copy_from_iter(skb->data + offset, copy, from) != copy) 563 goto fault; 564 if ((len -= copy) == 0) 565 return 0; 566 offset += copy; 567 } 568 569 /* Copy paged appendix. Hmm... why does this look so complicated? */ 570 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 571 int end; 572 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 573 574 WARN_ON(start > offset + len); 575 576 end = start + skb_frag_size(frag); 577 if ((copy = end - offset) > 0) { 578 size_t copied; 579 580 if (copy > len) 581 copy = len; 582 copied = copy_page_from_iter(skb_frag_page(frag), 583 skb_frag_off(frag) + offset - start, 584 copy, from); 585 if (copied != copy) 586 goto fault; 587 588 if (!(len -= copy)) 589 return 0; 590 offset += copy; 591 } 592 start = end; 593 } 594 595 skb_walk_frags(skb, frag_iter) { 596 int end; 597 598 WARN_ON(start > offset + len); 599 600 end = start + frag_iter->len; 601 if ((copy = end - offset) > 0) { 602 if (copy > len) 603 copy = len; 604 if (skb_copy_datagram_from_iter(frag_iter, 605 offset - start, 606 from, copy)) 607 goto fault; 608 if ((len -= copy) == 0) 609 return 0; 610 offset += copy; 611 } 612 start = end; 613 } 614 if (!len) 615 return 0; 616 617 fault: 618 return -EFAULT; 619 } 620 EXPORT_SYMBOL(skb_copy_datagram_from_iter); 621 622 int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, 623 struct iov_iter *from, size_t length) 624 { 625 int frag = skb_shinfo(skb)->nr_frags; 626 627 while (length && iov_iter_count(from)) { 628 struct page *pages[MAX_SKB_FRAGS]; 629 size_t start; 630 ssize_t copied; 631 unsigned long truesize; 632 int n = 0; 633 634 if (frag == MAX_SKB_FRAGS) 635 return -EMSGSIZE; 636 637 copied = iov_iter_get_pages(from, pages, length, 638 MAX_SKB_FRAGS - frag, &start); 639 if (copied < 0) 640 return -EFAULT; 641 642 iov_iter_advance(from, copied); 643 length -= copied; 644 645 truesize = PAGE_ALIGN(copied + start); 646 skb->data_len += copied; 647 skb->len += copied; 648 skb->truesize += truesize; 649 if (sk && sk->sk_type == SOCK_STREAM) { 650 sk_wmem_queued_add(sk, truesize); 651 sk_mem_charge(sk, truesize); 652 } else { 653 refcount_add(truesize, &skb->sk->sk_wmem_alloc); 654 } 655 while (copied) { 656 int size = min_t(int, copied, PAGE_SIZE - start); 657 skb_fill_page_desc(skb, frag++, pages[n], start, size); 658 start = 0; 659 copied -= size; 660 n++; 661 } 662 } 663 return 0; 664 } 665 EXPORT_SYMBOL(__zerocopy_sg_from_iter); 666 667 /** 668 * zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter 669 * @skb: buffer to copy 670 * @from: the source to copy from 671 * 672 * The function will first copy up to headlen, and then pin the userspace 673 * pages and build frags through them. 674 * 675 * Returns 0, -EFAULT or -EMSGSIZE. 676 */ 677 int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from) 678 { 679 int copy = min_t(int, skb_headlen(skb), iov_iter_count(from)); 680 681 /* copy up to skb headlen */ 682 if (skb_copy_datagram_from_iter(skb, 0, from, copy)) 683 return -EFAULT; 684 685 return __zerocopy_sg_from_iter(NULL, skb, from, ~0U); 686 } 687 EXPORT_SYMBOL(zerocopy_sg_from_iter); 688 689 /** 690 * skb_copy_and_csum_datagram_iter - Copy datagram to an iovec iterator 691 * and update a checksum. 692 * @skb: buffer to copy 693 * @offset: offset in the buffer to start copying from 694 * @to: iovec iterator to copy to 695 * @len: amount of data to copy from buffer to iovec 696 * @csump: checksum pointer 697 */ 698 static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, 699 struct iov_iter *to, int len, 700 __wsum *csump) 701 { 702 return __skb_datagram_iter(skb, offset, to, len, true, 703 csum_and_copy_to_iter, csump); 704 } 705 706 /** 707 * skb_copy_and_csum_datagram_msg - Copy and checksum skb to user iovec. 708 * @skb: skbuff 709 * @hlen: hardware length 710 * @msg: destination 711 * 712 * Caller _must_ check that skb will fit to this iovec. 713 * 714 * Returns: 0 - success. 715 * -EINVAL - checksum failure. 716 * -EFAULT - fault during copy. 717 */ 718 int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, 719 int hlen, struct msghdr *msg) 720 { 721 __wsum csum; 722 int chunk = skb->len - hlen; 723 724 if (!chunk) 725 return 0; 726 727 if (msg_data_left(msg) < chunk) { 728 if (__skb_checksum_complete(skb)) 729 return -EINVAL; 730 if (skb_copy_datagram_msg(skb, hlen, msg, chunk)) 731 goto fault; 732 } else { 733 csum = csum_partial(skb->data, hlen, skb->csum); 734 if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter, 735 chunk, &csum)) 736 goto fault; 737 738 if (csum_fold(csum)) { 739 iov_iter_revert(&msg->msg_iter, chunk); 740 return -EINVAL; 741 } 742 743 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && 744 !skb->csum_complete_sw) 745 netdev_rx_csum_fault(NULL, skb); 746 } 747 return 0; 748 fault: 749 return -EFAULT; 750 } 751 EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg); 752 753 /** 754 * datagram_poll - generic datagram poll 755 * @file: file struct 756 * @sock: socket 757 * @wait: poll table 758 * 759 * Datagram poll: Again totally generic. This also handles 760 * sequenced packet sockets providing the socket receive queue 761 * is only ever holding data ready to receive. 762 * 763 * Note: when you *don't* use this routine for this protocol, 764 * and you use a different write policy from sock_writeable() 765 * then please supply your own write_space callback. 766 */ 767 __poll_t datagram_poll(struct file *file, struct socket *sock, 768 poll_table *wait) 769 { 770 struct sock *sk = sock->sk; 771 __poll_t mask; 772 773 sock_poll_wait(file, sock, wait); 774 mask = 0; 775 776 /* exceptional events? */ 777 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) 778 mask |= EPOLLERR | 779 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); 780 781 if (sk->sk_shutdown & RCV_SHUTDOWN) 782 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 783 if (sk->sk_shutdown == SHUTDOWN_MASK) 784 mask |= EPOLLHUP; 785 786 /* readable? */ 787 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 788 mask |= EPOLLIN | EPOLLRDNORM; 789 790 /* Connection-based need to check for termination and startup */ 791 if (connection_based(sk)) { 792 if (sk->sk_state == TCP_CLOSE) 793 mask |= EPOLLHUP; 794 /* connection hasn't started yet? */ 795 if (sk->sk_state == TCP_SYN_SENT) 796 return mask; 797 } 798 799 /* writable? */ 800 if (sock_writeable(sk)) 801 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 802 else 803 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 804 805 return mask; 806 } 807 EXPORT_SYMBOL(datagram_poll); 808