1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * SUCS NET3: 4 * 5 * Generic datagram handling routines. These are generic for all 6 * protocols. Possibly a generic IP version on top of these would 7 * make sense. Not tonight however 8-). 8 * This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and 9 * NetROM layer all have identical poll code and mostly 10 * identical recvmsg() code. So we share it here. The poll was 11 * shared before but buried in udp.c so I moved it. 12 * 13 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old 14 * udp.c code) 15 * 16 * Fixes: 17 * Alan Cox : NULL return from skb_peek_copy() 18 * understood 19 * Alan Cox : Rewrote skb_read_datagram to avoid the 20 * skb_peek_copy stuff. 21 * Alan Cox : Added support for SOCK_SEQPACKET. 22 * IPX can no longer use the SO_TYPE hack 23 * but AX.25 now works right, and SPX is 24 * feasible. 25 * Alan Cox : Fixed write poll of non IP protocol 26 * crash. 27 * Florian La Roche: Changed for my new skbuff handling. 28 * Darryl Miles : Fixed non-blocking SOCK_SEQPACKET. 29 * Linus Torvalds : BSD semantic fixes. 30 * Alan Cox : Datagram iovec handling 31 * Darryl Miles : Fixed non-blocking SOCK_STREAM. 32 * Alan Cox : POSIXisms 33 * Pete Wyckoff : Unconnected accept() fix. 34 * 35 */ 36 37 #include <linux/module.h> 38 #include <linux/types.h> 39 #include <linux/kernel.h> 40 #include <linux/uaccess.h> 41 #include <linux/mm.h> 42 #include <linux/interrupt.h> 43 #include <linux/errno.h> 44 #include <linux/sched.h> 45 #include <linux/inet.h> 46 #include <linux/netdevice.h> 47 #include <linux/rtnetlink.h> 48 #include <linux/poll.h> 49 #include <linux/highmem.h> 50 #include <linux/spinlock.h> 51 #include <linux/slab.h> 52 #include <linux/pagemap.h> 53 #include <linux/uio.h> 54 55 #include <net/protocol.h> 56 #include <linux/skbuff.h> 57 58 #include <net/checksum.h> 59 #include <net/sock.h> 60 #include <net/tcp_states.h> 61 #include <trace/events/skb.h> 62 #include <net/busy_poll.h> 63 64 /* 65 * Is a socket 'connection oriented' ? 66 */ 67 static inline int connection_based(struct sock *sk) 68 { 69 return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM; 70 } 71 72 static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync, 73 void *key) 74 { 75 /* 76 * Avoid a wakeup if event not interesting for us 77 */ 78 if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR))) 79 return 0; 80 return autoremove_wake_function(wait, mode, sync, key); 81 } 82 /* 83 * Wait for the last received packet to be different from skb 84 */ 85 int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, 86 const struct sk_buff *skb) 87 { 88 int error; 89 DEFINE_WAIT_FUNC(wait, receiver_wake_function); 90 91 prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 92 93 /* Socket errors? */ 94 error = sock_error(sk); 95 if (error) 96 goto out_err; 97 98 if (sk->sk_receive_queue.prev != skb) 99 goto out; 100 101 /* Socket shut down? */ 102 if (sk->sk_shutdown & RCV_SHUTDOWN) 103 goto out_noerr; 104 105 /* Sequenced packets can come disconnected. 106 * If so we report the problem 107 */ 108 error = -ENOTCONN; 109 if (connection_based(sk) && 110 !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN)) 111 goto out_err; 112 113 /* handle signals */ 114 if (signal_pending(current)) 115 goto interrupted; 116 117 error = 0; 118 *timeo_p = schedule_timeout(*timeo_p); 119 out: 120 finish_wait(sk_sleep(sk), &wait); 121 return error; 122 interrupted: 123 error = sock_intr_errno(*timeo_p); 124 out_err: 125 *err = error; 126 goto out; 127 out_noerr: 128 *err = 0; 129 error = 1; 130 goto out; 131 } 132 EXPORT_SYMBOL(__skb_wait_for_more_packets); 133 134 static struct sk_buff *skb_set_peeked(struct sk_buff *skb) 135 { 136 struct sk_buff *nskb; 137 138 if (skb->peeked) 139 return skb; 140 141 /* We have to unshare an skb before modifying it. */ 142 if (!skb_shared(skb)) 143 goto done; 144 145 nskb = skb_clone(skb, GFP_ATOMIC); 146 if (!nskb) 147 return ERR_PTR(-ENOMEM); 148 149 skb->prev->next = nskb; 150 skb->next->prev = nskb; 151 nskb->prev = skb->prev; 152 nskb->next = skb->next; 153 154 consume_skb(skb); 155 skb = nskb; 156 157 done: 158 skb->peeked = 1; 159 160 return skb; 161 } 162 163 struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, 164 struct sk_buff_head *queue, 165 unsigned int flags, 166 void (*destructor)(struct sock *sk, 167 struct sk_buff *skb), 168 int *peeked, int *off, int *err, 169 struct sk_buff **last) 170 { 171 bool peek_at_off = false; 172 struct sk_buff *skb; 173 int _off = 0; 174 175 if (unlikely(flags & MSG_PEEK && *off >= 0)) { 176 peek_at_off = true; 177 _off = *off; 178 } 179 180 *last = queue->prev; 181 skb_queue_walk(queue, skb) { 182 if (flags & MSG_PEEK) { 183 if (peek_at_off && _off >= skb->len && 184 (_off || skb->peeked)) { 185 _off -= skb->len; 186 continue; 187 } 188 if (!skb->len) { 189 skb = skb_set_peeked(skb); 190 if (IS_ERR(skb)) { 191 *err = PTR_ERR(skb); 192 return NULL; 193 } 194 } 195 *peeked = 1; 196 refcount_inc(&skb->users); 197 } else { 198 __skb_unlink(skb, queue); 199 if (destructor) 200 destructor(sk, skb); 201 } 202 *off = _off; 203 return skb; 204 } 205 return NULL; 206 } 207 208 /** 209 * __skb_try_recv_datagram - Receive a datagram skbuff 210 * @sk: socket 211 * @flags: MSG\_ flags 212 * @destructor: invoked under the receive lock on successful dequeue 213 * @peeked: returns non-zero if this packet has been seen before 214 * @off: an offset in bytes to peek skb from. Returns an offset 215 * within an skb where data actually starts 216 * @err: error code returned 217 * @last: set to last peeked message to inform the wait function 218 * what to look for when peeking 219 * 220 * Get a datagram skbuff, understands the peeking, nonblocking wakeups 221 * and possible races. This replaces identical code in packet, raw and 222 * udp, as well as the IPX AX.25 and Appletalk. It also finally fixes 223 * the long standing peek and read race for datagram sockets. If you 224 * alter this routine remember it must be re-entrant. 225 * 226 * This function will lock the socket if a skb is returned, so 227 * the caller needs to unlock the socket in that case (usually by 228 * calling skb_free_datagram). Returns NULL with @err set to 229 * -EAGAIN if no data was available or to some other value if an 230 * error was detected. 231 * 232 * * It does not lock socket since today. This function is 233 * * free of race conditions. This measure should/can improve 234 * * significantly datagram socket latencies at high loads, 235 * * when data copying to user space takes lots of time. 236 * * (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet 237 * * 8) Great win.) 238 * * --ANK (980729) 239 * 240 * The order of the tests when we find no data waiting are specified 241 * quite explicitly by POSIX 1003.1g, don't change them without having 242 * the standard around please. 243 */ 244 struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, 245 void (*destructor)(struct sock *sk, 246 struct sk_buff *skb), 247 int *peeked, int *off, int *err, 248 struct sk_buff **last) 249 { 250 struct sk_buff_head *queue = &sk->sk_receive_queue; 251 struct sk_buff *skb; 252 unsigned long cpu_flags; 253 /* 254 * Caller is allowed not to check sk->sk_err before skb_recv_datagram() 255 */ 256 int error = sock_error(sk); 257 258 if (error) 259 goto no_packet; 260 261 *peeked = 0; 262 do { 263 /* Again only user level code calls this function, so nothing 264 * interrupt level will suddenly eat the receive_queue. 265 * 266 * Look at current nfs client by the way... 267 * However, this function was correct in any case. 8) 268 */ 269 spin_lock_irqsave(&queue->lock, cpu_flags); 270 skb = __skb_try_recv_from_queue(sk, queue, flags, destructor, 271 peeked, off, &error, last); 272 spin_unlock_irqrestore(&queue->lock, cpu_flags); 273 if (error) 274 goto no_packet; 275 if (skb) 276 return skb; 277 278 if (!sk_can_busy_loop(sk)) 279 break; 280 281 sk_busy_loop(sk, flags & MSG_DONTWAIT); 282 } while (!skb_queue_empty(&sk->sk_receive_queue)); 283 284 error = -EAGAIN; 285 286 no_packet: 287 *err = error; 288 return NULL; 289 } 290 EXPORT_SYMBOL(__skb_try_recv_datagram); 291 292 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, 293 void (*destructor)(struct sock *sk, 294 struct sk_buff *skb), 295 int *peeked, int *off, int *err) 296 { 297 struct sk_buff *skb, *last; 298 long timeo; 299 300 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 301 302 do { 303 skb = __skb_try_recv_datagram(sk, flags, destructor, peeked, 304 off, err, &last); 305 if (skb) 306 return skb; 307 308 if (*err != -EAGAIN) 309 break; 310 } while (timeo && 311 !__skb_wait_for_more_packets(sk, err, &timeo, last)); 312 313 return NULL; 314 } 315 EXPORT_SYMBOL(__skb_recv_datagram); 316 317 struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, 318 int noblock, int *err) 319 { 320 int peeked, off = 0; 321 322 return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), 323 NULL, &peeked, &off, err); 324 } 325 EXPORT_SYMBOL(skb_recv_datagram); 326 327 void skb_free_datagram(struct sock *sk, struct sk_buff *skb) 328 { 329 consume_skb(skb); 330 sk_mem_reclaim_partial(sk); 331 } 332 EXPORT_SYMBOL(skb_free_datagram); 333 334 void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len) 335 { 336 bool slow; 337 338 if (!skb_unref(skb)) { 339 sk_peek_offset_bwd(sk, len); 340 return; 341 } 342 343 slow = lock_sock_fast(sk); 344 sk_peek_offset_bwd(sk, len); 345 skb_orphan(skb); 346 sk_mem_reclaim_partial(sk); 347 unlock_sock_fast(sk, slow); 348 349 /* skb is now orphaned, can be freed outside of locked section */ 350 __kfree_skb(skb); 351 } 352 EXPORT_SYMBOL(__skb_free_datagram_locked); 353 354 int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue, 355 struct sk_buff *skb, unsigned int flags, 356 void (*destructor)(struct sock *sk, 357 struct sk_buff *skb)) 358 { 359 int err = 0; 360 361 if (flags & MSG_PEEK) { 362 err = -ENOENT; 363 spin_lock_bh(&sk_queue->lock); 364 if (skb->next) { 365 __skb_unlink(skb, sk_queue); 366 refcount_dec(&skb->users); 367 if (destructor) 368 destructor(sk, skb); 369 err = 0; 370 } 371 spin_unlock_bh(&sk_queue->lock); 372 } 373 374 atomic_inc(&sk->sk_drops); 375 return err; 376 } 377 EXPORT_SYMBOL(__sk_queue_drop_skb); 378 379 /** 380 * skb_kill_datagram - Free a datagram skbuff forcibly 381 * @sk: socket 382 * @skb: datagram skbuff 383 * @flags: MSG\_ flags 384 * 385 * This function frees a datagram skbuff that was received by 386 * skb_recv_datagram. The flags argument must match the one 387 * used for skb_recv_datagram. 388 * 389 * If the MSG_PEEK flag is set, and the packet is still on the 390 * receive queue of the socket, it will be taken off the queue 391 * before it is freed. 392 * 393 * This function currently only disables BH when acquiring the 394 * sk_receive_queue lock. Therefore it must not be used in a 395 * context where that lock is acquired in an IRQ context. 396 * 397 * It returns 0 if the packet was removed by us. 398 */ 399 400 int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) 401 { 402 int err = __sk_queue_drop_skb(sk, &sk->sk_receive_queue, skb, flags, 403 NULL); 404 405 kfree_skb(skb); 406 sk_mem_reclaim_partial(sk); 407 return err; 408 } 409 EXPORT_SYMBOL(skb_kill_datagram); 410 411 /** 412 * skb_copy_datagram_iter - Copy a datagram to an iovec iterator. 413 * @skb: buffer to copy 414 * @offset: offset in the buffer to start copying from 415 * @to: iovec iterator to copy to 416 * @len: amount of data to copy from buffer to iovec 417 */ 418 int skb_copy_datagram_iter(const struct sk_buff *skb, int offset, 419 struct iov_iter *to, int len) 420 { 421 int start = skb_headlen(skb); 422 int i, copy = start - offset, start_off = offset, n; 423 struct sk_buff *frag_iter; 424 425 trace_skb_copy_datagram_iovec(skb, len); 426 427 /* Copy header. */ 428 if (copy > 0) { 429 if (copy > len) 430 copy = len; 431 n = copy_to_iter(skb->data + offset, copy, to); 432 offset += n; 433 if (n != copy) 434 goto short_copy; 435 if ((len -= copy) == 0) 436 return 0; 437 } 438 439 /* Copy paged appendix. Hmm... why does this look so complicated? */ 440 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 441 int end; 442 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 443 444 WARN_ON(start > offset + len); 445 446 end = start + skb_frag_size(frag); 447 if ((copy = end - offset) > 0) { 448 if (copy > len) 449 copy = len; 450 n = copy_page_to_iter(skb_frag_page(frag), 451 frag->page_offset + offset - 452 start, copy, to); 453 offset += n; 454 if (n != copy) 455 goto short_copy; 456 if (!(len -= copy)) 457 return 0; 458 } 459 start = end; 460 } 461 462 skb_walk_frags(skb, frag_iter) { 463 int end; 464 465 WARN_ON(start > offset + len); 466 467 end = start + frag_iter->len; 468 if ((copy = end - offset) > 0) { 469 if (copy > len) 470 copy = len; 471 if (skb_copy_datagram_iter(frag_iter, offset - start, 472 to, copy)) 473 goto fault; 474 if ((len -= copy) == 0) 475 return 0; 476 offset += copy; 477 } 478 start = end; 479 } 480 if (!len) 481 return 0; 482 483 /* This is not really a user copy fault, but rather someone 484 * gave us a bogus length on the skb. We should probably 485 * print a warning here as it may indicate a kernel bug. 486 */ 487 488 fault: 489 iov_iter_revert(to, offset - start_off); 490 return -EFAULT; 491 492 short_copy: 493 if (iov_iter_count(to)) 494 goto fault; 495 496 return 0; 497 } 498 EXPORT_SYMBOL(skb_copy_datagram_iter); 499 500 /** 501 * skb_copy_datagram_from_iter - Copy a datagram from an iov_iter. 502 * @skb: buffer to copy 503 * @offset: offset in the buffer to start copying to 504 * @from: the copy source 505 * @len: amount of data to copy to buffer from iovec 506 * 507 * Returns 0 or -EFAULT. 508 */ 509 int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, 510 struct iov_iter *from, 511 int len) 512 { 513 int start = skb_headlen(skb); 514 int i, copy = start - offset; 515 struct sk_buff *frag_iter; 516 517 /* Copy header. */ 518 if (copy > 0) { 519 if (copy > len) 520 copy = len; 521 if (copy_from_iter(skb->data + offset, copy, from) != copy) 522 goto fault; 523 if ((len -= copy) == 0) 524 return 0; 525 offset += copy; 526 } 527 528 /* Copy paged appendix. Hmm... why does this look so complicated? */ 529 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 530 int end; 531 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 532 533 WARN_ON(start > offset + len); 534 535 end = start + skb_frag_size(frag); 536 if ((copy = end - offset) > 0) { 537 size_t copied; 538 539 if (copy > len) 540 copy = len; 541 copied = copy_page_from_iter(skb_frag_page(frag), 542 frag->page_offset + offset - start, 543 copy, from); 544 if (copied != copy) 545 goto fault; 546 547 if (!(len -= copy)) 548 return 0; 549 offset += copy; 550 } 551 start = end; 552 } 553 554 skb_walk_frags(skb, frag_iter) { 555 int end; 556 557 WARN_ON(start > offset + len); 558 559 end = start + frag_iter->len; 560 if ((copy = end - offset) > 0) { 561 if (copy > len) 562 copy = len; 563 if (skb_copy_datagram_from_iter(frag_iter, 564 offset - start, 565 from, copy)) 566 goto fault; 567 if ((len -= copy) == 0) 568 return 0; 569 offset += copy; 570 } 571 start = end; 572 } 573 if (!len) 574 return 0; 575 576 fault: 577 return -EFAULT; 578 } 579 EXPORT_SYMBOL(skb_copy_datagram_from_iter); 580 581 int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, 582 struct iov_iter *from, size_t length) 583 { 584 int frag = skb_shinfo(skb)->nr_frags; 585 586 while (length && iov_iter_count(from)) { 587 struct page *pages[MAX_SKB_FRAGS]; 588 size_t start; 589 ssize_t copied; 590 unsigned long truesize; 591 int n = 0; 592 593 if (frag == MAX_SKB_FRAGS) 594 return -EMSGSIZE; 595 596 copied = iov_iter_get_pages(from, pages, length, 597 MAX_SKB_FRAGS - frag, &start); 598 if (copied < 0) 599 return -EFAULT; 600 601 iov_iter_advance(from, copied); 602 length -= copied; 603 604 truesize = PAGE_ALIGN(copied + start); 605 skb->data_len += copied; 606 skb->len += copied; 607 skb->truesize += truesize; 608 if (sk && sk->sk_type == SOCK_STREAM) { 609 sk->sk_wmem_queued += truesize; 610 sk_mem_charge(sk, truesize); 611 } else { 612 refcount_add(truesize, &skb->sk->sk_wmem_alloc); 613 } 614 while (copied) { 615 int size = min_t(int, copied, PAGE_SIZE - start); 616 skb_fill_page_desc(skb, frag++, pages[n], start, size); 617 start = 0; 618 copied -= size; 619 n++; 620 } 621 } 622 return 0; 623 } 624 EXPORT_SYMBOL(__zerocopy_sg_from_iter); 625 626 /** 627 * zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter 628 * @skb: buffer to copy 629 * @from: the source to copy from 630 * 631 * The function will first copy up to headlen, and then pin the userspace 632 * pages and build frags through them. 633 * 634 * Returns 0, -EFAULT or -EMSGSIZE. 635 */ 636 int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from) 637 { 638 int copy = min_t(int, skb_headlen(skb), iov_iter_count(from)); 639 640 /* copy up to skb headlen */ 641 if (skb_copy_datagram_from_iter(skb, 0, from, copy)) 642 return -EFAULT; 643 644 return __zerocopy_sg_from_iter(NULL, skb, from, ~0U); 645 } 646 EXPORT_SYMBOL(zerocopy_sg_from_iter); 647 648 static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, 649 struct iov_iter *to, int len, 650 __wsum *csump) 651 { 652 int start = skb_headlen(skb); 653 int i, copy = start - offset, start_off = offset; 654 struct sk_buff *frag_iter; 655 int pos = 0; 656 int n; 657 658 /* Copy header. */ 659 if (copy > 0) { 660 if (copy > len) 661 copy = len; 662 n = csum_and_copy_to_iter(skb->data + offset, copy, csump, to); 663 offset += n; 664 if (n != copy) 665 goto fault; 666 if ((len -= copy) == 0) 667 return 0; 668 pos = copy; 669 } 670 671 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 672 int end; 673 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 674 675 WARN_ON(start > offset + len); 676 677 end = start + skb_frag_size(frag); 678 if ((copy = end - offset) > 0) { 679 __wsum csum2 = 0; 680 struct page *page = skb_frag_page(frag); 681 u8 *vaddr = kmap(page); 682 683 if (copy > len) 684 copy = len; 685 n = csum_and_copy_to_iter(vaddr + frag->page_offset + 686 offset - start, copy, 687 &csum2, to); 688 kunmap(page); 689 offset += n; 690 if (n != copy) 691 goto fault; 692 *csump = csum_block_add(*csump, csum2, pos); 693 if (!(len -= copy)) 694 return 0; 695 pos += copy; 696 } 697 start = end; 698 } 699 700 skb_walk_frags(skb, frag_iter) { 701 int end; 702 703 WARN_ON(start > offset + len); 704 705 end = start + frag_iter->len; 706 if ((copy = end - offset) > 0) { 707 __wsum csum2 = 0; 708 if (copy > len) 709 copy = len; 710 if (skb_copy_and_csum_datagram(frag_iter, 711 offset - start, 712 to, copy, 713 &csum2)) 714 goto fault; 715 *csump = csum_block_add(*csump, csum2, pos); 716 if ((len -= copy) == 0) 717 return 0; 718 offset += copy; 719 pos += copy; 720 } 721 start = end; 722 } 723 if (!len) 724 return 0; 725 726 fault: 727 iov_iter_revert(to, offset - start_off); 728 return -EFAULT; 729 } 730 731 __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) 732 { 733 __sum16 sum; 734 735 sum = csum_fold(skb_checksum(skb, 0, len, skb->csum)); 736 if (likely(!sum)) { 737 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && 738 !skb->csum_complete_sw) 739 netdev_rx_csum_fault(skb->dev); 740 } 741 if (!skb_shared(skb)) 742 skb->csum_valid = !sum; 743 return sum; 744 } 745 EXPORT_SYMBOL(__skb_checksum_complete_head); 746 747 __sum16 __skb_checksum_complete(struct sk_buff *skb) 748 { 749 __wsum csum; 750 __sum16 sum; 751 752 csum = skb_checksum(skb, 0, skb->len, 0); 753 754 /* skb->csum holds pseudo checksum */ 755 sum = csum_fold(csum_add(skb->csum, csum)); 756 if (likely(!sum)) { 757 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && 758 !skb->csum_complete_sw) 759 netdev_rx_csum_fault(skb->dev); 760 } 761 762 if (!skb_shared(skb)) { 763 /* Save full packet checksum */ 764 skb->csum = csum; 765 skb->ip_summed = CHECKSUM_COMPLETE; 766 skb->csum_complete_sw = 1; 767 skb->csum_valid = !sum; 768 } 769 770 return sum; 771 } 772 EXPORT_SYMBOL(__skb_checksum_complete); 773 774 /** 775 * skb_copy_and_csum_datagram_msg - Copy and checksum skb to user iovec. 776 * @skb: skbuff 777 * @hlen: hardware length 778 * @msg: destination 779 * 780 * Caller _must_ check that skb will fit to this iovec. 781 * 782 * Returns: 0 - success. 783 * -EINVAL - checksum failure. 784 * -EFAULT - fault during copy. 785 */ 786 int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, 787 int hlen, struct msghdr *msg) 788 { 789 __wsum csum; 790 int chunk = skb->len - hlen; 791 792 if (!chunk) 793 return 0; 794 795 if (msg_data_left(msg) < chunk) { 796 if (__skb_checksum_complete(skb)) 797 return -EINVAL; 798 if (skb_copy_datagram_msg(skb, hlen, msg, chunk)) 799 goto fault; 800 } else { 801 csum = csum_partial(skb->data, hlen, skb->csum); 802 if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter, 803 chunk, &csum)) 804 goto fault; 805 806 if (csum_fold(csum)) { 807 iov_iter_revert(&msg->msg_iter, chunk); 808 return -EINVAL; 809 } 810 811 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) 812 netdev_rx_csum_fault(skb->dev); 813 } 814 return 0; 815 fault: 816 return -EFAULT; 817 } 818 EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg); 819 820 /** 821 * datagram_poll - generic datagram poll 822 * @file: file struct 823 * @sock: socket 824 * @wait: poll table 825 * 826 * Datagram poll: Again totally generic. This also handles 827 * sequenced packet sockets providing the socket receive queue 828 * is only ever holding data ready to receive. 829 * 830 * Note: when you *don't* use this routine for this protocol, 831 * and you use a different write policy from sock_writeable() 832 * then please supply your own write_space callback. 833 */ 834 __poll_t datagram_poll(struct file *file, struct socket *sock, 835 poll_table *wait) 836 { 837 struct sock *sk = sock->sk; 838 __poll_t mask; 839 840 sock_poll_wait(file, sk_sleep(sk), wait); 841 mask = 0; 842 843 /* exceptional events? */ 844 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 845 mask |= EPOLLERR | 846 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); 847 848 if (sk->sk_shutdown & RCV_SHUTDOWN) 849 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 850 if (sk->sk_shutdown == SHUTDOWN_MASK) 851 mask |= EPOLLHUP; 852 853 /* readable? */ 854 if (!skb_queue_empty(&sk->sk_receive_queue)) 855 mask |= EPOLLIN | EPOLLRDNORM; 856 857 /* Connection-based need to check for termination and startup */ 858 if (connection_based(sk)) { 859 if (sk->sk_state == TCP_CLOSE) 860 mask |= EPOLLHUP; 861 /* connection hasn't started yet? */ 862 if (sk->sk_state == TCP_SYN_SENT) 863 return mask; 864 } 865 866 /* writable? */ 867 if (sock_writeable(sk)) 868 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 869 else 870 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 871 872 return mask; 873 } 874 EXPORT_SYMBOL(datagram_poll); 875