1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * This file contains the procedures for the handling of select and poll 4 * 5 * Created for Linux based loosely upon Mathius Lattner's minix 6 * patches by Peter MacDonald. Heavily edited by Linus. 7 * 8 * 4 February 1994 9 * COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS 10 * flag set in its personality we do *not* modify the given timeout 11 * parameter to reflect time remaining. 12 * 13 * 24 January 2000 14 * Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation 15 * of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian). 16 */ 17 18 #include <linux/kernel.h> 19 #include <linux/sched/signal.h> 20 #include <linux/sched/rt.h> 21 #include <linux/syscalls.h> 22 #include <linux/export.h> 23 #include <linux/slab.h> 24 #include <linux/poll.h> 25 #include <linux/personality.h> /* for STICKY_TIMEOUTS */ 26 #include <linux/file.h> 27 #include <linux/fdtable.h> 28 #include <linux/fs.h> 29 #include <linux/rcupdate.h> 30 #include <linux/hrtimer.h> 31 #include <linux/freezer.h> 32 #include <net/busy_poll.h> 33 #include <linux/vmalloc.h> 34 35 #include <linux/uaccess.h> 36 37 __poll_t vfs_poll(struct file *file, struct poll_table_struct *pt) 38 { 39 if (file->f_op->poll) { 40 return file->f_op->poll(file, pt); 41 } else if (file_has_poll_mask(file)) { 42 unsigned int events = poll_requested_events(pt); 43 struct wait_queue_head *head; 44 45 if (pt && pt->_qproc) { 46 head = file->f_op->get_poll_head(file, events); 47 if (!head) 48 return DEFAULT_POLLMASK; 49 if (IS_ERR(head)) 50 return EPOLLERR; 51 pt->_qproc(file, head, pt); 52 } 53 54 return file->f_op->poll_mask(file, events); 55 } else { 56 return DEFAULT_POLLMASK; 57 } 58 } 59 EXPORT_SYMBOL_GPL(vfs_poll); 60 61 /* 62 * Estimate expected accuracy in ns from a timeval. 63 * 64 * After quite a bit of churning around, we've settled on 65 * a simple thing of taking 0.1% of the timeout as the 66 * slack, with a cap of 100 msec. 67 * "nice" tasks get a 0.5% slack instead. 68 * 69 * Consider this comment an open invitation to come up with even 70 * better solutions.. 71 */ 72 73 #define MAX_SLACK (100 * NSEC_PER_MSEC) 74 75 static long __estimate_accuracy(struct timespec64 *tv) 76 { 77 long slack; 78 int divfactor = 1000; 79 80 if (tv->tv_sec < 0) 81 return 0; 82 83 if (task_nice(current) > 0) 84 divfactor = divfactor / 5; 85 86 if (tv->tv_sec > MAX_SLACK / (NSEC_PER_SEC/divfactor)) 87 return MAX_SLACK; 88 89 slack = tv->tv_nsec / divfactor; 90 slack += tv->tv_sec * (NSEC_PER_SEC/divfactor); 91 92 if (slack > MAX_SLACK) 93 return MAX_SLACK; 94 95 return slack; 96 } 97 98 u64 select_estimate_accuracy(struct timespec64 *tv) 99 { 100 u64 ret; 101 struct timespec64 now; 102 103 /* 104 * Realtime tasks get a slack of 0 for obvious reasons. 105 */ 106 107 if (rt_task(current)) 108 return 0; 109 110 ktime_get_ts64(&now); 111 now = timespec64_sub(*tv, now); 112 ret = __estimate_accuracy(&now); 113 if (ret < current->timer_slack_ns) 114 return current->timer_slack_ns; 115 return ret; 116 } 117 118 119 120 struct poll_table_page { 121 struct poll_table_page * next; 122 struct poll_table_entry * entry; 123 struct poll_table_entry entries[0]; 124 }; 125 126 #define POLL_TABLE_FULL(table) \ 127 ((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table)) 128 129 /* 130 * Ok, Peter made a complicated, but straightforward multiple_wait() function. 131 * I have rewritten this, taking some shortcuts: This code may not be easy to 132 * follow, but it should be free of race-conditions, and it's practical. If you 133 * understand what I'm doing here, then you understand how the linux 134 * sleep/wakeup mechanism works. 135 * 136 * Two very simple procedures, poll_wait() and poll_freewait() make all the 137 * work. poll_wait() is an inline-function defined in <linux/poll.h>, 138 * as all select/poll functions have to call it to add an entry to the 139 * poll table. 140 */ 141 static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, 142 poll_table *p); 143 144 void poll_initwait(struct poll_wqueues *pwq) 145 { 146 init_poll_funcptr(&pwq->pt, __pollwait); 147 pwq->polling_task = current; 148 pwq->triggered = 0; 149 pwq->error = 0; 150 pwq->table = NULL; 151 pwq->inline_index = 0; 152 } 153 EXPORT_SYMBOL(poll_initwait); 154 155 static void free_poll_entry(struct poll_table_entry *entry) 156 { 157 remove_wait_queue(entry->wait_address, &entry->wait); 158 fput(entry->filp); 159 } 160 161 void poll_freewait(struct poll_wqueues *pwq) 162 { 163 struct poll_table_page * p = pwq->table; 164 int i; 165 for (i = 0; i < pwq->inline_index; i++) 166 free_poll_entry(pwq->inline_entries + i); 167 while (p) { 168 struct poll_table_entry * entry; 169 struct poll_table_page *old; 170 171 entry = p->entry; 172 do { 173 entry--; 174 free_poll_entry(entry); 175 } while (entry > p->entries); 176 old = p; 177 p = p->next; 178 free_page((unsigned long) old); 179 } 180 } 181 EXPORT_SYMBOL(poll_freewait); 182 183 static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p) 184 { 185 struct poll_table_page *table = p->table; 186 187 if (p->inline_index < N_INLINE_POLL_ENTRIES) 188 return p->inline_entries + p->inline_index++; 189 190 if (!table || POLL_TABLE_FULL(table)) { 191 struct poll_table_page *new_table; 192 193 new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL); 194 if (!new_table) { 195 p->error = -ENOMEM; 196 return NULL; 197 } 198 new_table->entry = new_table->entries; 199 new_table->next = table; 200 p->table = new_table; 201 table = new_table; 202 } 203 204 return table->entry++; 205 } 206 207 static int __pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) 208 { 209 struct poll_wqueues *pwq = wait->private; 210 DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task); 211 212 /* 213 * Although this function is called under waitqueue lock, LOCK 214 * doesn't imply write barrier and the users expect write 215 * barrier semantics on wakeup functions. The following 216 * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up() 217 * and is paired with smp_store_mb() in poll_schedule_timeout. 218 */ 219 smp_wmb(); 220 pwq->triggered = 1; 221 222 /* 223 * Perform the default wake up operation using a dummy 224 * waitqueue. 225 * 226 * TODO: This is hacky but there currently is no interface to 227 * pass in @sync. @sync is scheduled to be removed and once 228 * that happens, wake_up_process() can be used directly. 229 */ 230 return default_wake_function(&dummy_wait, mode, sync, key); 231 } 232 233 static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) 234 { 235 struct poll_table_entry *entry; 236 237 entry = container_of(wait, struct poll_table_entry, wait); 238 if (key && !(key_to_poll(key) & entry->key)) 239 return 0; 240 return __pollwake(wait, mode, sync, key); 241 } 242 243 /* Add a new entry */ 244 static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, 245 poll_table *p) 246 { 247 struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt); 248 struct poll_table_entry *entry = poll_get_entry(pwq); 249 if (!entry) 250 return; 251 entry->filp = get_file(filp); 252 entry->wait_address = wait_address; 253 entry->key = p->_key; 254 init_waitqueue_func_entry(&entry->wait, pollwake); 255 entry->wait.private = pwq; 256 add_wait_queue(wait_address, &entry->wait); 257 } 258 259 static int poll_schedule_timeout(struct poll_wqueues *pwq, int state, 260 ktime_t *expires, unsigned long slack) 261 { 262 int rc = -EINTR; 263 264 set_current_state(state); 265 if (!pwq->triggered) 266 rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS); 267 __set_current_state(TASK_RUNNING); 268 269 /* 270 * Prepare for the next iteration. 271 * 272 * The following smp_store_mb() serves two purposes. First, it's 273 * the counterpart rmb of the wmb in pollwake() such that data 274 * written before wake up is always visible after wake up. 275 * Second, the full barrier guarantees that triggered clearing 276 * doesn't pass event check of the next iteration. Note that 277 * this problem doesn't exist for the first iteration as 278 * add_wait_queue() has full barrier semantics. 279 */ 280 smp_store_mb(pwq->triggered, 0); 281 282 return rc; 283 } 284 285 /** 286 * poll_select_set_timeout - helper function to setup the timeout value 287 * @to: pointer to timespec64 variable for the final timeout 288 * @sec: seconds (from user space) 289 * @nsec: nanoseconds (from user space) 290 * 291 * Note, we do not use a timespec for the user space value here, That 292 * way we can use the function for timeval and compat interfaces as well. 293 * 294 * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0. 295 */ 296 int poll_select_set_timeout(struct timespec64 *to, time64_t sec, long nsec) 297 { 298 struct timespec64 ts = {.tv_sec = sec, .tv_nsec = nsec}; 299 300 if (!timespec64_valid(&ts)) 301 return -EINVAL; 302 303 /* Optimize for the zero timeout value here */ 304 if (!sec && !nsec) { 305 to->tv_sec = to->tv_nsec = 0; 306 } else { 307 ktime_get_ts64(to); 308 *to = timespec64_add_safe(*to, ts); 309 } 310 return 0; 311 } 312 313 static int poll_select_copy_remaining(struct timespec64 *end_time, 314 void __user *p, 315 int timeval, int ret) 316 { 317 struct timespec64 rts; 318 struct timeval rtv; 319 320 if (!p) 321 return ret; 322 323 if (current->personality & STICKY_TIMEOUTS) 324 goto sticky; 325 326 /* No update for zero timeout */ 327 if (!end_time->tv_sec && !end_time->tv_nsec) 328 return ret; 329 330 ktime_get_ts64(&rts); 331 rts = timespec64_sub(*end_time, rts); 332 if (rts.tv_sec < 0) 333 rts.tv_sec = rts.tv_nsec = 0; 334 335 336 if (timeval) { 337 if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec)) 338 memset(&rtv, 0, sizeof(rtv)); 339 rtv.tv_sec = rts.tv_sec; 340 rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC; 341 342 if (!copy_to_user(p, &rtv, sizeof(rtv))) 343 return ret; 344 345 } else if (!put_timespec64(&rts, p)) 346 return ret; 347 348 /* 349 * If an application puts its timeval in read-only memory, we 350 * don't want the Linux-specific update to the timeval to 351 * cause a fault after the select has completed 352 * successfully. However, because we're not updating the 353 * timeval, we can't restart the system call. 354 */ 355 356 sticky: 357 if (ret == -ERESTARTNOHAND) 358 ret = -EINTR; 359 return ret; 360 } 361 362 /* 363 * Scalable version of the fd_set. 364 */ 365 366 typedef struct { 367 unsigned long *in, *out, *ex; 368 unsigned long *res_in, *res_out, *res_ex; 369 } fd_set_bits; 370 371 /* 372 * How many longwords for "nr" bits? 373 */ 374 #define FDS_BITPERLONG (8*sizeof(long)) 375 #define FDS_LONGS(nr) (((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG) 376 #define FDS_BYTES(nr) (FDS_LONGS(nr)*sizeof(long)) 377 378 /* 379 * We do a VERIFY_WRITE here even though we are only reading this time: 380 * we'll write to it eventually.. 381 * 382 * Use "unsigned long" accesses to let user-mode fd_set's be long-aligned. 383 */ 384 static inline 385 int get_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset) 386 { 387 nr = FDS_BYTES(nr); 388 if (ufdset) 389 return copy_from_user(fdset, ufdset, nr) ? -EFAULT : 0; 390 391 memset(fdset, 0, nr); 392 return 0; 393 } 394 395 static inline unsigned long __must_check 396 set_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset) 397 { 398 if (ufdset) 399 return __copy_to_user(ufdset, fdset, FDS_BYTES(nr)); 400 return 0; 401 } 402 403 static inline 404 void zero_fd_set(unsigned long nr, unsigned long *fdset) 405 { 406 memset(fdset, 0, FDS_BYTES(nr)); 407 } 408 409 #define FDS_IN(fds, n) (fds->in + n) 410 #define FDS_OUT(fds, n) (fds->out + n) 411 #define FDS_EX(fds, n) (fds->ex + n) 412 413 #define BITS(fds, n) (*FDS_IN(fds, n)|*FDS_OUT(fds, n)|*FDS_EX(fds, n)) 414 415 static int max_select_fd(unsigned long n, fd_set_bits *fds) 416 { 417 unsigned long *open_fds; 418 unsigned long set; 419 int max; 420 struct fdtable *fdt; 421 422 /* handle last in-complete long-word first */ 423 set = ~(~0UL << (n & (BITS_PER_LONG-1))); 424 n /= BITS_PER_LONG; 425 fdt = files_fdtable(current->files); 426 open_fds = fdt->open_fds + n; 427 max = 0; 428 if (set) { 429 set &= BITS(fds, n); 430 if (set) { 431 if (!(set & ~*open_fds)) 432 goto get_max; 433 return -EBADF; 434 } 435 } 436 while (n) { 437 open_fds--; 438 n--; 439 set = BITS(fds, n); 440 if (!set) 441 continue; 442 if (set & ~*open_fds) 443 return -EBADF; 444 if (max) 445 continue; 446 get_max: 447 do { 448 max++; 449 set >>= 1; 450 } while (set); 451 max += n * BITS_PER_LONG; 452 } 453 454 return max; 455 } 456 457 #define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR) 458 #define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR) 459 #define POLLEX_SET (EPOLLPRI) 460 461 static inline void wait_key_set(poll_table *wait, unsigned long in, 462 unsigned long out, unsigned long bit, 463 __poll_t ll_flag) 464 { 465 wait->_key = POLLEX_SET | ll_flag; 466 if (in & bit) 467 wait->_key |= POLLIN_SET; 468 if (out & bit) 469 wait->_key |= POLLOUT_SET; 470 } 471 472 static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) 473 { 474 ktime_t expire, *to = NULL; 475 struct poll_wqueues table; 476 poll_table *wait; 477 int retval, i, timed_out = 0; 478 u64 slack = 0; 479 __poll_t busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0; 480 unsigned long busy_start = 0; 481 482 rcu_read_lock(); 483 retval = max_select_fd(n, fds); 484 rcu_read_unlock(); 485 486 if (retval < 0) 487 return retval; 488 n = retval; 489 490 poll_initwait(&table); 491 wait = &table.pt; 492 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { 493 wait->_qproc = NULL; 494 timed_out = 1; 495 } 496 497 if (end_time && !timed_out) 498 slack = select_estimate_accuracy(end_time); 499 500 retval = 0; 501 for (;;) { 502 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; 503 bool can_busy_loop = false; 504 505 inp = fds->in; outp = fds->out; exp = fds->ex; 506 rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex; 507 508 for (i = 0; i < n; ++rinp, ++routp, ++rexp) { 509 unsigned long in, out, ex, all_bits, bit = 1, j; 510 unsigned long res_in = 0, res_out = 0, res_ex = 0; 511 __poll_t mask; 512 513 in = *inp++; out = *outp++; ex = *exp++; 514 all_bits = in | out | ex; 515 if (all_bits == 0) { 516 i += BITS_PER_LONG; 517 continue; 518 } 519 520 for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) { 521 struct fd f; 522 if (i >= n) 523 break; 524 if (!(bit & all_bits)) 525 continue; 526 f = fdget(i); 527 if (f.file) { 528 wait_key_set(wait, in, out, bit, 529 busy_flag); 530 mask = vfs_poll(f.file, wait); 531 532 fdput(f); 533 if ((mask & POLLIN_SET) && (in & bit)) { 534 res_in |= bit; 535 retval++; 536 wait->_qproc = NULL; 537 } 538 if ((mask & POLLOUT_SET) && (out & bit)) { 539 res_out |= bit; 540 retval++; 541 wait->_qproc = NULL; 542 } 543 if ((mask & POLLEX_SET) && (ex & bit)) { 544 res_ex |= bit; 545 retval++; 546 wait->_qproc = NULL; 547 } 548 /* got something, stop busy polling */ 549 if (retval) { 550 can_busy_loop = false; 551 busy_flag = 0; 552 553 /* 554 * only remember a returned 555 * POLL_BUSY_LOOP if we asked for it 556 */ 557 } else if (busy_flag & mask) 558 can_busy_loop = true; 559 560 } 561 } 562 if (res_in) 563 *rinp = res_in; 564 if (res_out) 565 *routp = res_out; 566 if (res_ex) 567 *rexp = res_ex; 568 cond_resched(); 569 } 570 wait->_qproc = NULL; 571 if (retval || timed_out || signal_pending(current)) 572 break; 573 if (table.error) { 574 retval = table.error; 575 break; 576 } 577 578 /* only if found POLL_BUSY_LOOP sockets && not out of time */ 579 if (can_busy_loop && !need_resched()) { 580 if (!busy_start) { 581 busy_start = busy_loop_current_time(); 582 continue; 583 } 584 if (!busy_loop_timeout(busy_start)) 585 continue; 586 } 587 busy_flag = 0; 588 589 /* 590 * If this is the first loop and we have a timeout 591 * given, then we convert to ktime_t and set the to 592 * pointer to the expiry value. 593 */ 594 if (end_time && !to) { 595 expire = timespec64_to_ktime(*end_time); 596 to = &expire; 597 } 598 599 if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE, 600 to, slack)) 601 timed_out = 1; 602 } 603 604 poll_freewait(&table); 605 606 return retval; 607 } 608 609 /* 610 * We can actually return ERESTARTSYS instead of EINTR, but I'd 611 * like to be certain this leads to no problems. So I return 612 * EINTR just for safety. 613 * 614 * Update: ERESTARTSYS breaks at least the xview clock binary, so 615 * I'm trying ERESTARTNOHAND which restart only when you want to. 616 */ 617 int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, 618 fd_set __user *exp, struct timespec64 *end_time) 619 { 620 fd_set_bits fds; 621 void *bits; 622 int ret, max_fds; 623 size_t size, alloc_size; 624 struct fdtable *fdt; 625 /* Allocate small arguments on the stack to save memory and be faster */ 626 long stack_fds[SELECT_STACK_ALLOC/sizeof(long)]; 627 628 ret = -EINVAL; 629 if (n < 0) 630 goto out_nofds; 631 632 /* max_fds can increase, so grab it once to avoid race */ 633 rcu_read_lock(); 634 fdt = files_fdtable(current->files); 635 max_fds = fdt->max_fds; 636 rcu_read_unlock(); 637 if (n > max_fds) 638 n = max_fds; 639 640 /* 641 * We need 6 bitmaps (in/out/ex for both incoming and outgoing), 642 * since we used fdset we need to allocate memory in units of 643 * long-words. 644 */ 645 size = FDS_BYTES(n); 646 bits = stack_fds; 647 if (size > sizeof(stack_fds) / 6) { 648 /* Not enough space in on-stack array; must use kmalloc */ 649 ret = -ENOMEM; 650 if (size > (SIZE_MAX / 6)) 651 goto out_nofds; 652 653 alloc_size = 6 * size; 654 bits = kvmalloc(alloc_size, GFP_KERNEL); 655 if (!bits) 656 goto out_nofds; 657 } 658 fds.in = bits; 659 fds.out = bits + size; 660 fds.ex = bits + 2*size; 661 fds.res_in = bits + 3*size; 662 fds.res_out = bits + 4*size; 663 fds.res_ex = bits + 5*size; 664 665 if ((ret = get_fd_set(n, inp, fds.in)) || 666 (ret = get_fd_set(n, outp, fds.out)) || 667 (ret = get_fd_set(n, exp, fds.ex))) 668 goto out; 669 zero_fd_set(n, fds.res_in); 670 zero_fd_set(n, fds.res_out); 671 zero_fd_set(n, fds.res_ex); 672 673 ret = do_select(n, &fds, end_time); 674 675 if (ret < 0) 676 goto out; 677 if (!ret) { 678 ret = -ERESTARTNOHAND; 679 if (signal_pending(current)) 680 goto out; 681 ret = 0; 682 } 683 684 if (set_fd_set(n, inp, fds.res_in) || 685 set_fd_set(n, outp, fds.res_out) || 686 set_fd_set(n, exp, fds.res_ex)) 687 ret = -EFAULT; 688 689 out: 690 if (bits != stack_fds) 691 kvfree(bits); 692 out_nofds: 693 return ret; 694 } 695 696 static int kern_select(int n, fd_set __user *inp, fd_set __user *outp, 697 fd_set __user *exp, struct timeval __user *tvp) 698 { 699 struct timespec64 end_time, *to = NULL; 700 struct timeval tv; 701 int ret; 702 703 if (tvp) { 704 if (copy_from_user(&tv, tvp, sizeof(tv))) 705 return -EFAULT; 706 707 to = &end_time; 708 if (poll_select_set_timeout(to, 709 tv.tv_sec + (tv.tv_usec / USEC_PER_SEC), 710 (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC)) 711 return -EINVAL; 712 } 713 714 ret = core_sys_select(n, inp, outp, exp, to); 715 ret = poll_select_copy_remaining(&end_time, tvp, 1, ret); 716 717 return ret; 718 } 719 720 SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp, 721 fd_set __user *, exp, struct timeval __user *, tvp) 722 { 723 return kern_select(n, inp, outp, exp, tvp); 724 } 725 726 static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, 727 fd_set __user *exp, struct timespec __user *tsp, 728 const sigset_t __user *sigmask, size_t sigsetsize) 729 { 730 sigset_t ksigmask, sigsaved; 731 struct timespec64 ts, end_time, *to = NULL; 732 int ret; 733 734 if (tsp) { 735 if (get_timespec64(&ts, tsp)) 736 return -EFAULT; 737 738 to = &end_time; 739 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) 740 return -EINVAL; 741 } 742 743 if (sigmask) { 744 /* XXX: Don't preclude handling different sized sigset_t's. */ 745 if (sigsetsize != sizeof(sigset_t)) 746 return -EINVAL; 747 if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) 748 return -EFAULT; 749 750 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); 751 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 752 } 753 754 ret = core_sys_select(n, inp, outp, exp, to); 755 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); 756 757 if (ret == -ERESTARTNOHAND) { 758 /* 759 * Don't restore the signal mask yet. Let do_signal() deliver 760 * the signal on the way back to userspace, before the signal 761 * mask is restored. 762 */ 763 if (sigmask) { 764 memcpy(¤t->saved_sigmask, &sigsaved, 765 sizeof(sigsaved)); 766 set_restore_sigmask(); 767 } 768 } else if (sigmask) 769 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 770 771 return ret; 772 } 773 774 /* 775 * Most architectures can't handle 7-argument syscalls. So we provide a 776 * 6-argument version where the sixth argument is a pointer to a structure 777 * which has a pointer to the sigset_t itself followed by a size_t containing 778 * the sigset size. 779 */ 780 SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp, 781 fd_set __user *, exp, struct timespec __user *, tsp, 782 void __user *, sig) 783 { 784 size_t sigsetsize = 0; 785 sigset_t __user *up = NULL; 786 787 if (sig) { 788 if (!access_ok(VERIFY_READ, sig, sizeof(void *)+sizeof(size_t)) 789 || __get_user(up, (sigset_t __user * __user *)sig) 790 || __get_user(sigsetsize, 791 (size_t __user *)(sig+sizeof(void *)))) 792 return -EFAULT; 793 } 794 795 return do_pselect(n, inp, outp, exp, tsp, up, sigsetsize); 796 } 797 798 #ifdef __ARCH_WANT_SYS_OLD_SELECT 799 struct sel_arg_struct { 800 unsigned long n; 801 fd_set __user *inp, *outp, *exp; 802 struct timeval __user *tvp; 803 }; 804 805 SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg) 806 { 807 struct sel_arg_struct a; 808 809 if (copy_from_user(&a, arg, sizeof(a))) 810 return -EFAULT; 811 return kern_select(a.n, a.inp, a.outp, a.exp, a.tvp); 812 } 813 #endif 814 815 struct poll_list { 816 struct poll_list *next; 817 int len; 818 struct pollfd entries[0]; 819 }; 820 821 #define POLLFD_PER_PAGE ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd)) 822 823 /* 824 * Fish for pollable events on the pollfd->fd file descriptor. We're only 825 * interested in events matching the pollfd->events mask, and the result 826 * matching that mask is both recorded in pollfd->revents and returned. The 827 * pwait poll_table will be used by the fd-provided poll handler for waiting, 828 * if pwait->_qproc is non-NULL. 829 */ 830 static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait, 831 bool *can_busy_poll, 832 __poll_t busy_flag) 833 { 834 int fd = pollfd->fd; 835 __poll_t mask = 0, filter; 836 struct fd f; 837 838 if (fd < 0) 839 goto out; 840 mask = EPOLLNVAL; 841 f = fdget(fd); 842 if (!f.file) 843 goto out; 844 845 /* userland u16 ->events contains POLL... bitmap */ 846 filter = demangle_poll(pollfd->events) | EPOLLERR | EPOLLHUP; 847 pwait->_key = filter | busy_flag; 848 mask = vfs_poll(f.file, pwait); 849 if (mask & busy_flag) 850 *can_busy_poll = true; 851 mask &= filter; /* Mask out unneeded events. */ 852 fdput(f); 853 854 out: 855 /* ... and so does ->revents */ 856 pollfd->revents = mangle_poll(mask); 857 return mask; 858 } 859 860 static int do_poll(struct poll_list *list, struct poll_wqueues *wait, 861 struct timespec64 *end_time) 862 { 863 poll_table* pt = &wait->pt; 864 ktime_t expire, *to = NULL; 865 int timed_out = 0, count = 0; 866 u64 slack = 0; 867 __poll_t busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0; 868 unsigned long busy_start = 0; 869 870 /* Optimise the no-wait case */ 871 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { 872 pt->_qproc = NULL; 873 timed_out = 1; 874 } 875 876 if (end_time && !timed_out) 877 slack = select_estimate_accuracy(end_time); 878 879 for (;;) { 880 struct poll_list *walk; 881 bool can_busy_loop = false; 882 883 for (walk = list; walk != NULL; walk = walk->next) { 884 struct pollfd * pfd, * pfd_end; 885 886 pfd = walk->entries; 887 pfd_end = pfd + walk->len; 888 for (; pfd != pfd_end; pfd++) { 889 /* 890 * Fish for events. If we found one, record it 891 * and kill poll_table->_qproc, so we don't 892 * needlessly register any other waiters after 893 * this. They'll get immediately deregistered 894 * when we break out and return. 895 */ 896 if (do_pollfd(pfd, pt, &can_busy_loop, 897 busy_flag)) { 898 count++; 899 pt->_qproc = NULL; 900 /* found something, stop busy polling */ 901 busy_flag = 0; 902 can_busy_loop = false; 903 } 904 } 905 } 906 /* 907 * All waiters have already been registered, so don't provide 908 * a poll_table->_qproc to them on the next loop iteration. 909 */ 910 pt->_qproc = NULL; 911 if (!count) { 912 count = wait->error; 913 if (signal_pending(current)) 914 count = -EINTR; 915 } 916 if (count || timed_out) 917 break; 918 919 /* only if found POLL_BUSY_LOOP sockets && not out of time */ 920 if (can_busy_loop && !need_resched()) { 921 if (!busy_start) { 922 busy_start = busy_loop_current_time(); 923 continue; 924 } 925 if (!busy_loop_timeout(busy_start)) 926 continue; 927 } 928 busy_flag = 0; 929 930 /* 931 * If this is the first loop and we have a timeout 932 * given, then we convert to ktime_t and set the to 933 * pointer to the expiry value. 934 */ 935 if (end_time && !to) { 936 expire = timespec64_to_ktime(*end_time); 937 to = &expire; 938 } 939 940 if (!poll_schedule_timeout(wait, TASK_INTERRUPTIBLE, to, slack)) 941 timed_out = 1; 942 } 943 return count; 944 } 945 946 #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \ 947 sizeof(struct pollfd)) 948 949 static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, 950 struct timespec64 *end_time) 951 { 952 struct poll_wqueues table; 953 int err = -EFAULT, fdcount, len, size; 954 /* Allocate small arguments on the stack to save memory and be 955 faster - use long to make sure the buffer is aligned properly 956 on 64 bit archs to avoid unaligned access */ 957 long stack_pps[POLL_STACK_ALLOC/sizeof(long)]; 958 struct poll_list *const head = (struct poll_list *)stack_pps; 959 struct poll_list *walk = head; 960 unsigned long todo = nfds; 961 962 if (nfds > rlimit(RLIMIT_NOFILE)) 963 return -EINVAL; 964 965 len = min_t(unsigned int, nfds, N_STACK_PPS); 966 for (;;) { 967 walk->next = NULL; 968 walk->len = len; 969 if (!len) 970 break; 971 972 if (copy_from_user(walk->entries, ufds + nfds-todo, 973 sizeof(struct pollfd) * walk->len)) 974 goto out_fds; 975 976 todo -= walk->len; 977 if (!todo) 978 break; 979 980 len = min(todo, POLLFD_PER_PAGE); 981 size = sizeof(struct poll_list) + sizeof(struct pollfd) * len; 982 walk = walk->next = kmalloc(size, GFP_KERNEL); 983 if (!walk) { 984 err = -ENOMEM; 985 goto out_fds; 986 } 987 } 988 989 poll_initwait(&table); 990 fdcount = do_poll(head, &table, end_time); 991 poll_freewait(&table); 992 993 for (walk = head; walk; walk = walk->next) { 994 struct pollfd *fds = walk->entries; 995 int j; 996 997 for (j = 0; j < walk->len; j++, ufds++) 998 if (__put_user(fds[j].revents, &ufds->revents)) 999 goto out_fds; 1000 } 1001 1002 err = fdcount; 1003 out_fds: 1004 walk = head->next; 1005 while (walk) { 1006 struct poll_list *pos = walk; 1007 walk = walk->next; 1008 kfree(pos); 1009 } 1010 1011 return err; 1012 } 1013 1014 static long do_restart_poll(struct restart_block *restart_block) 1015 { 1016 struct pollfd __user *ufds = restart_block->poll.ufds; 1017 int nfds = restart_block->poll.nfds; 1018 struct timespec64 *to = NULL, end_time; 1019 int ret; 1020 1021 if (restart_block->poll.has_timeout) { 1022 end_time.tv_sec = restart_block->poll.tv_sec; 1023 end_time.tv_nsec = restart_block->poll.tv_nsec; 1024 to = &end_time; 1025 } 1026 1027 ret = do_sys_poll(ufds, nfds, to); 1028 1029 if (ret == -EINTR) { 1030 restart_block->fn = do_restart_poll; 1031 ret = -ERESTART_RESTARTBLOCK; 1032 } 1033 return ret; 1034 } 1035 1036 SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, 1037 int, timeout_msecs) 1038 { 1039 struct timespec64 end_time, *to = NULL; 1040 int ret; 1041 1042 if (timeout_msecs >= 0) { 1043 to = &end_time; 1044 poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC, 1045 NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC)); 1046 } 1047 1048 ret = do_sys_poll(ufds, nfds, to); 1049 1050 if (ret == -EINTR) { 1051 struct restart_block *restart_block; 1052 1053 restart_block = ¤t->restart_block; 1054 restart_block->fn = do_restart_poll; 1055 restart_block->poll.ufds = ufds; 1056 restart_block->poll.nfds = nfds; 1057 1058 if (timeout_msecs >= 0) { 1059 restart_block->poll.tv_sec = end_time.tv_sec; 1060 restart_block->poll.tv_nsec = end_time.tv_nsec; 1061 restart_block->poll.has_timeout = 1; 1062 } else 1063 restart_block->poll.has_timeout = 0; 1064 1065 ret = -ERESTART_RESTARTBLOCK; 1066 } 1067 return ret; 1068 } 1069 1070 SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, 1071 struct timespec __user *, tsp, const sigset_t __user *, sigmask, 1072 size_t, sigsetsize) 1073 { 1074 sigset_t ksigmask, sigsaved; 1075 struct timespec64 ts, end_time, *to = NULL; 1076 int ret; 1077 1078 if (tsp) { 1079 if (get_timespec64(&ts, tsp)) 1080 return -EFAULT; 1081 1082 to = &end_time; 1083 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) 1084 return -EINVAL; 1085 } 1086 1087 if (sigmask) { 1088 /* XXX: Don't preclude handling different sized sigset_t's. */ 1089 if (sigsetsize != sizeof(sigset_t)) 1090 return -EINVAL; 1091 if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) 1092 return -EFAULT; 1093 1094 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); 1095 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 1096 } 1097 1098 ret = do_sys_poll(ufds, nfds, to); 1099 1100 /* We can restart this syscall, usually */ 1101 if (ret == -EINTR) { 1102 /* 1103 * Don't restore the signal mask yet. Let do_signal() deliver 1104 * the signal on the way back to userspace, before the signal 1105 * mask is restored. 1106 */ 1107 if (sigmask) { 1108 memcpy(¤t->saved_sigmask, &sigsaved, 1109 sizeof(sigsaved)); 1110 set_restore_sigmask(); 1111 } 1112 ret = -ERESTARTNOHAND; 1113 } else if (sigmask) 1114 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1115 1116 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); 1117 1118 return ret; 1119 } 1120 1121 #ifdef CONFIG_COMPAT 1122 #define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) 1123 1124 static 1125 int compat_poll_select_copy_remaining(struct timespec64 *end_time, void __user *p, 1126 int timeval, int ret) 1127 { 1128 struct timespec64 ts; 1129 1130 if (!p) 1131 return ret; 1132 1133 if (current->personality & STICKY_TIMEOUTS) 1134 goto sticky; 1135 1136 /* No update for zero timeout */ 1137 if (!end_time->tv_sec && !end_time->tv_nsec) 1138 return ret; 1139 1140 ktime_get_ts64(&ts); 1141 ts = timespec64_sub(*end_time, ts); 1142 if (ts.tv_sec < 0) 1143 ts.tv_sec = ts.tv_nsec = 0; 1144 1145 if (timeval) { 1146 struct compat_timeval rtv; 1147 1148 rtv.tv_sec = ts.tv_sec; 1149 rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC; 1150 1151 if (!copy_to_user(p, &rtv, sizeof(rtv))) 1152 return ret; 1153 } else { 1154 if (!compat_put_timespec64(&ts, p)) 1155 return ret; 1156 } 1157 /* 1158 * If an application puts its timeval in read-only memory, we 1159 * don't want the Linux-specific update to the timeval to 1160 * cause a fault after the select has completed 1161 * successfully. However, because we're not updating the 1162 * timeval, we can't restart the system call. 1163 */ 1164 1165 sticky: 1166 if (ret == -ERESTARTNOHAND) 1167 ret = -EINTR; 1168 return ret; 1169 } 1170 1171 /* 1172 * Ooo, nasty. We need here to frob 32-bit unsigned longs to 1173 * 64-bit unsigned longs. 1174 */ 1175 static 1176 int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, 1177 unsigned long *fdset) 1178 { 1179 if (ufdset) { 1180 return compat_get_bitmap(fdset, ufdset, nr); 1181 } else { 1182 zero_fd_set(nr, fdset); 1183 return 0; 1184 } 1185 } 1186 1187 static 1188 int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, 1189 unsigned long *fdset) 1190 { 1191 if (!ufdset) 1192 return 0; 1193 return compat_put_bitmap(ufdset, fdset, nr); 1194 } 1195 1196 1197 /* 1198 * This is a virtual copy of sys_select from fs/select.c and probably 1199 * should be compared to it from time to time 1200 */ 1201 1202 /* 1203 * We can actually return ERESTARTSYS instead of EINTR, but I'd 1204 * like to be certain this leads to no problems. So I return 1205 * EINTR just for safety. 1206 * 1207 * Update: ERESTARTSYS breaks at least the xview clock binary, so 1208 * I'm trying ERESTARTNOHAND which restart only when you want to. 1209 */ 1210 static int compat_core_sys_select(int n, compat_ulong_t __user *inp, 1211 compat_ulong_t __user *outp, compat_ulong_t __user *exp, 1212 struct timespec64 *end_time) 1213 { 1214 fd_set_bits fds; 1215 void *bits; 1216 int size, max_fds, ret = -EINVAL; 1217 struct fdtable *fdt; 1218 long stack_fds[SELECT_STACK_ALLOC/sizeof(long)]; 1219 1220 if (n < 0) 1221 goto out_nofds; 1222 1223 /* max_fds can increase, so grab it once to avoid race */ 1224 rcu_read_lock(); 1225 fdt = files_fdtable(current->files); 1226 max_fds = fdt->max_fds; 1227 rcu_read_unlock(); 1228 if (n > max_fds) 1229 n = max_fds; 1230 1231 /* 1232 * We need 6 bitmaps (in/out/ex for both incoming and outgoing), 1233 * since we used fdset we need to allocate memory in units of 1234 * long-words. 1235 */ 1236 size = FDS_BYTES(n); 1237 bits = stack_fds; 1238 if (size > sizeof(stack_fds) / 6) { 1239 bits = kmalloc_array(6, size, GFP_KERNEL); 1240 ret = -ENOMEM; 1241 if (!bits) 1242 goto out_nofds; 1243 } 1244 fds.in = (unsigned long *) bits; 1245 fds.out = (unsigned long *) (bits + size); 1246 fds.ex = (unsigned long *) (bits + 2*size); 1247 fds.res_in = (unsigned long *) (bits + 3*size); 1248 fds.res_out = (unsigned long *) (bits + 4*size); 1249 fds.res_ex = (unsigned long *) (bits + 5*size); 1250 1251 if ((ret = compat_get_fd_set(n, inp, fds.in)) || 1252 (ret = compat_get_fd_set(n, outp, fds.out)) || 1253 (ret = compat_get_fd_set(n, exp, fds.ex))) 1254 goto out; 1255 zero_fd_set(n, fds.res_in); 1256 zero_fd_set(n, fds.res_out); 1257 zero_fd_set(n, fds.res_ex); 1258 1259 ret = do_select(n, &fds, end_time); 1260 1261 if (ret < 0) 1262 goto out; 1263 if (!ret) { 1264 ret = -ERESTARTNOHAND; 1265 if (signal_pending(current)) 1266 goto out; 1267 ret = 0; 1268 } 1269 1270 if (compat_set_fd_set(n, inp, fds.res_in) || 1271 compat_set_fd_set(n, outp, fds.res_out) || 1272 compat_set_fd_set(n, exp, fds.res_ex)) 1273 ret = -EFAULT; 1274 out: 1275 if (bits != stack_fds) 1276 kfree(bits); 1277 out_nofds: 1278 return ret; 1279 } 1280 1281 static int do_compat_select(int n, compat_ulong_t __user *inp, 1282 compat_ulong_t __user *outp, compat_ulong_t __user *exp, 1283 struct compat_timeval __user *tvp) 1284 { 1285 struct timespec64 end_time, *to = NULL; 1286 struct compat_timeval tv; 1287 int ret; 1288 1289 if (tvp) { 1290 if (copy_from_user(&tv, tvp, sizeof(tv))) 1291 return -EFAULT; 1292 1293 to = &end_time; 1294 if (poll_select_set_timeout(to, 1295 tv.tv_sec + (tv.tv_usec / USEC_PER_SEC), 1296 (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC)) 1297 return -EINVAL; 1298 } 1299 1300 ret = compat_core_sys_select(n, inp, outp, exp, to); 1301 ret = compat_poll_select_copy_remaining(&end_time, tvp, 1, ret); 1302 1303 return ret; 1304 } 1305 1306 COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp, 1307 compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, 1308 struct compat_timeval __user *, tvp) 1309 { 1310 return do_compat_select(n, inp, outp, exp, tvp); 1311 } 1312 1313 struct compat_sel_arg_struct { 1314 compat_ulong_t n; 1315 compat_uptr_t inp; 1316 compat_uptr_t outp; 1317 compat_uptr_t exp; 1318 compat_uptr_t tvp; 1319 }; 1320 1321 COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg) 1322 { 1323 struct compat_sel_arg_struct a; 1324 1325 if (copy_from_user(&a, arg, sizeof(a))) 1326 return -EFAULT; 1327 return do_compat_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp), 1328 compat_ptr(a.exp), compat_ptr(a.tvp)); 1329 } 1330 1331 static long do_compat_pselect(int n, compat_ulong_t __user *inp, 1332 compat_ulong_t __user *outp, compat_ulong_t __user *exp, 1333 struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask, 1334 compat_size_t sigsetsize) 1335 { 1336 sigset_t ksigmask, sigsaved; 1337 struct timespec64 ts, end_time, *to = NULL; 1338 int ret; 1339 1340 if (tsp) { 1341 if (compat_get_timespec64(&ts, tsp)) 1342 return -EFAULT; 1343 1344 to = &end_time; 1345 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) 1346 return -EINVAL; 1347 } 1348 1349 if (sigmask) { 1350 if (sigsetsize != sizeof(compat_sigset_t)) 1351 return -EINVAL; 1352 if (get_compat_sigset(&ksigmask, sigmask)) 1353 return -EFAULT; 1354 1355 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); 1356 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 1357 } 1358 1359 ret = compat_core_sys_select(n, inp, outp, exp, to); 1360 ret = compat_poll_select_copy_remaining(&end_time, tsp, 0, ret); 1361 1362 if (ret == -ERESTARTNOHAND) { 1363 /* 1364 * Don't restore the signal mask yet. Let do_signal() deliver 1365 * the signal on the way back to userspace, before the signal 1366 * mask is restored. 1367 */ 1368 if (sigmask) { 1369 memcpy(¤t->saved_sigmask, &sigsaved, 1370 sizeof(sigsaved)); 1371 set_restore_sigmask(); 1372 } 1373 } else if (sigmask) 1374 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1375 1376 return ret; 1377 } 1378 1379 COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp, 1380 compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, 1381 struct compat_timespec __user *, tsp, void __user *, sig) 1382 { 1383 compat_size_t sigsetsize = 0; 1384 compat_uptr_t up = 0; 1385 1386 if (sig) { 1387 if (!access_ok(VERIFY_READ, sig, 1388 sizeof(compat_uptr_t)+sizeof(compat_size_t)) || 1389 __get_user(up, (compat_uptr_t __user *)sig) || 1390 __get_user(sigsetsize, 1391 (compat_size_t __user *)(sig+sizeof(up)))) 1392 return -EFAULT; 1393 } 1394 return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(up), 1395 sigsetsize); 1396 } 1397 1398 COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, 1399 unsigned int, nfds, struct compat_timespec __user *, tsp, 1400 const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) 1401 { 1402 sigset_t ksigmask, sigsaved; 1403 struct timespec64 ts, end_time, *to = NULL; 1404 int ret; 1405 1406 if (tsp) { 1407 if (compat_get_timespec64(&ts, tsp)) 1408 return -EFAULT; 1409 1410 to = &end_time; 1411 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) 1412 return -EINVAL; 1413 } 1414 1415 if (sigmask) { 1416 if (sigsetsize != sizeof(compat_sigset_t)) 1417 return -EINVAL; 1418 if (get_compat_sigset(&ksigmask, sigmask)) 1419 return -EFAULT; 1420 1421 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); 1422 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 1423 } 1424 1425 ret = do_sys_poll(ufds, nfds, to); 1426 1427 /* We can restart this syscall, usually */ 1428 if (ret == -EINTR) { 1429 /* 1430 * Don't restore the signal mask yet. Let do_signal() deliver 1431 * the signal on the way back to userspace, before the signal 1432 * mask is restored. 1433 */ 1434 if (sigmask) { 1435 memcpy(¤t->saved_sigmask, &sigsaved, 1436 sizeof(sigsaved)); 1437 set_restore_sigmask(); 1438 } 1439 ret = -ERESTARTNOHAND; 1440 } else if (sigmask) 1441 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1442 1443 ret = compat_poll_select_copy_remaining(&end_time, tsp, 0, ret); 1444 1445 return ret; 1446 } 1447 #endif 1448