xref: /openbmc/linux/fs/select.c (revision a86854d0)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * This file contains the procedures for the handling of select and poll
4  *
5  * Created for Linux based loosely upon Mathius Lattner's minix
6  * patches by Peter MacDonald. Heavily edited by Linus.
7  *
8  *  4 February 1994
9  *     COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
10  *     flag set in its personality we do *not* modify the given timeout
11  *     parameter to reflect time remaining.
12  *
13  *  24 January 2000
14  *     Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation
15  *     of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian).
16  */
17 
18 #include <linux/kernel.h>
19 #include <linux/sched/signal.h>
20 #include <linux/sched/rt.h>
21 #include <linux/syscalls.h>
22 #include <linux/export.h>
23 #include <linux/slab.h>
24 #include <linux/poll.h>
25 #include <linux/personality.h> /* for STICKY_TIMEOUTS */
26 #include <linux/file.h>
27 #include <linux/fdtable.h>
28 #include <linux/fs.h>
29 #include <linux/rcupdate.h>
30 #include <linux/hrtimer.h>
31 #include <linux/freezer.h>
32 #include <net/busy_poll.h>
33 #include <linux/vmalloc.h>
34 
35 #include <linux/uaccess.h>
36 
37 __poll_t vfs_poll(struct file *file, struct poll_table_struct *pt)
38 {
39 	if (file->f_op->poll) {
40 		return file->f_op->poll(file, pt);
41 	} else if (file_has_poll_mask(file)) {
42 		unsigned int events = poll_requested_events(pt);
43 		struct wait_queue_head *head;
44 
45 		if (pt && pt->_qproc) {
46 			head = file->f_op->get_poll_head(file, events);
47 			if (!head)
48 				return DEFAULT_POLLMASK;
49 			if (IS_ERR(head))
50 				return EPOLLERR;
51 			pt->_qproc(file, head, pt);
52 		}
53 
54 		return file->f_op->poll_mask(file, events);
55 	} else {
56 		return DEFAULT_POLLMASK;
57 	}
58 }
59 EXPORT_SYMBOL_GPL(vfs_poll);
60 
61 /*
62  * Estimate expected accuracy in ns from a timeval.
63  *
64  * After quite a bit of churning around, we've settled on
65  * a simple thing of taking 0.1% of the timeout as the
66  * slack, with a cap of 100 msec.
67  * "nice" tasks get a 0.5% slack instead.
68  *
69  * Consider this comment an open invitation to come up with even
70  * better solutions..
71  */
72 
73 #define MAX_SLACK	(100 * NSEC_PER_MSEC)
74 
75 static long __estimate_accuracy(struct timespec64 *tv)
76 {
77 	long slack;
78 	int divfactor = 1000;
79 
80 	if (tv->tv_sec < 0)
81 		return 0;
82 
83 	if (task_nice(current) > 0)
84 		divfactor = divfactor / 5;
85 
86 	if (tv->tv_sec > MAX_SLACK / (NSEC_PER_SEC/divfactor))
87 		return MAX_SLACK;
88 
89 	slack = tv->tv_nsec / divfactor;
90 	slack += tv->tv_sec * (NSEC_PER_SEC/divfactor);
91 
92 	if (slack > MAX_SLACK)
93 		return MAX_SLACK;
94 
95 	return slack;
96 }
97 
98 u64 select_estimate_accuracy(struct timespec64 *tv)
99 {
100 	u64 ret;
101 	struct timespec64 now;
102 
103 	/*
104 	 * Realtime tasks get a slack of 0 for obvious reasons.
105 	 */
106 
107 	if (rt_task(current))
108 		return 0;
109 
110 	ktime_get_ts64(&now);
111 	now = timespec64_sub(*tv, now);
112 	ret = __estimate_accuracy(&now);
113 	if (ret < current->timer_slack_ns)
114 		return current->timer_slack_ns;
115 	return ret;
116 }
117 
118 
119 
120 struct poll_table_page {
121 	struct poll_table_page * next;
122 	struct poll_table_entry * entry;
123 	struct poll_table_entry entries[0];
124 };
125 
126 #define POLL_TABLE_FULL(table) \
127 	((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table))
128 
129 /*
130  * Ok, Peter made a complicated, but straightforward multiple_wait() function.
131  * I have rewritten this, taking some shortcuts: This code may not be easy to
132  * follow, but it should be free of race-conditions, and it's practical. If you
133  * understand what I'm doing here, then you understand how the linux
134  * sleep/wakeup mechanism works.
135  *
136  * Two very simple procedures, poll_wait() and poll_freewait() make all the
137  * work.  poll_wait() is an inline-function defined in <linux/poll.h>,
138  * as all select/poll functions have to call it to add an entry to the
139  * poll table.
140  */
141 static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
142 		       poll_table *p);
143 
144 void poll_initwait(struct poll_wqueues *pwq)
145 {
146 	init_poll_funcptr(&pwq->pt, __pollwait);
147 	pwq->polling_task = current;
148 	pwq->triggered = 0;
149 	pwq->error = 0;
150 	pwq->table = NULL;
151 	pwq->inline_index = 0;
152 }
153 EXPORT_SYMBOL(poll_initwait);
154 
155 static void free_poll_entry(struct poll_table_entry *entry)
156 {
157 	remove_wait_queue(entry->wait_address, &entry->wait);
158 	fput(entry->filp);
159 }
160 
161 void poll_freewait(struct poll_wqueues *pwq)
162 {
163 	struct poll_table_page * p = pwq->table;
164 	int i;
165 	for (i = 0; i < pwq->inline_index; i++)
166 		free_poll_entry(pwq->inline_entries + i);
167 	while (p) {
168 		struct poll_table_entry * entry;
169 		struct poll_table_page *old;
170 
171 		entry = p->entry;
172 		do {
173 			entry--;
174 			free_poll_entry(entry);
175 		} while (entry > p->entries);
176 		old = p;
177 		p = p->next;
178 		free_page((unsigned long) old);
179 	}
180 }
181 EXPORT_SYMBOL(poll_freewait);
182 
183 static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
184 {
185 	struct poll_table_page *table = p->table;
186 
187 	if (p->inline_index < N_INLINE_POLL_ENTRIES)
188 		return p->inline_entries + p->inline_index++;
189 
190 	if (!table || POLL_TABLE_FULL(table)) {
191 		struct poll_table_page *new_table;
192 
193 		new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
194 		if (!new_table) {
195 			p->error = -ENOMEM;
196 			return NULL;
197 		}
198 		new_table->entry = new_table->entries;
199 		new_table->next = table;
200 		p->table = new_table;
201 		table = new_table;
202 	}
203 
204 	return table->entry++;
205 }
206 
207 static int __pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
208 {
209 	struct poll_wqueues *pwq = wait->private;
210 	DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
211 
212 	/*
213 	 * Although this function is called under waitqueue lock, LOCK
214 	 * doesn't imply write barrier and the users expect write
215 	 * barrier semantics on wakeup functions.  The following
216 	 * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
217 	 * and is paired with smp_store_mb() in poll_schedule_timeout.
218 	 */
219 	smp_wmb();
220 	pwq->triggered = 1;
221 
222 	/*
223 	 * Perform the default wake up operation using a dummy
224 	 * waitqueue.
225 	 *
226 	 * TODO: This is hacky but there currently is no interface to
227 	 * pass in @sync.  @sync is scheduled to be removed and once
228 	 * that happens, wake_up_process() can be used directly.
229 	 */
230 	return default_wake_function(&dummy_wait, mode, sync, key);
231 }
232 
233 static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
234 {
235 	struct poll_table_entry *entry;
236 
237 	entry = container_of(wait, struct poll_table_entry, wait);
238 	if (key && !(key_to_poll(key) & entry->key))
239 		return 0;
240 	return __pollwake(wait, mode, sync, key);
241 }
242 
243 /* Add a new entry */
244 static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
245 				poll_table *p)
246 {
247 	struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt);
248 	struct poll_table_entry *entry = poll_get_entry(pwq);
249 	if (!entry)
250 		return;
251 	entry->filp = get_file(filp);
252 	entry->wait_address = wait_address;
253 	entry->key = p->_key;
254 	init_waitqueue_func_entry(&entry->wait, pollwake);
255 	entry->wait.private = pwq;
256 	add_wait_queue(wait_address, &entry->wait);
257 }
258 
259 static int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
260 			  ktime_t *expires, unsigned long slack)
261 {
262 	int rc = -EINTR;
263 
264 	set_current_state(state);
265 	if (!pwq->triggered)
266 		rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS);
267 	__set_current_state(TASK_RUNNING);
268 
269 	/*
270 	 * Prepare for the next iteration.
271 	 *
272 	 * The following smp_store_mb() serves two purposes.  First, it's
273 	 * the counterpart rmb of the wmb in pollwake() such that data
274 	 * written before wake up is always visible after wake up.
275 	 * Second, the full barrier guarantees that triggered clearing
276 	 * doesn't pass event check of the next iteration.  Note that
277 	 * this problem doesn't exist for the first iteration as
278 	 * add_wait_queue() has full barrier semantics.
279 	 */
280 	smp_store_mb(pwq->triggered, 0);
281 
282 	return rc;
283 }
284 
285 /**
286  * poll_select_set_timeout - helper function to setup the timeout value
287  * @to:		pointer to timespec64 variable for the final timeout
288  * @sec:	seconds (from user space)
289  * @nsec:	nanoseconds (from user space)
290  *
291  * Note, we do not use a timespec for the user space value here, That
292  * way we can use the function for timeval and compat interfaces as well.
293  *
294  * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0.
295  */
296 int poll_select_set_timeout(struct timespec64 *to, time64_t sec, long nsec)
297 {
298 	struct timespec64 ts = {.tv_sec = sec, .tv_nsec = nsec};
299 
300 	if (!timespec64_valid(&ts))
301 		return -EINVAL;
302 
303 	/* Optimize for the zero timeout value here */
304 	if (!sec && !nsec) {
305 		to->tv_sec = to->tv_nsec = 0;
306 	} else {
307 		ktime_get_ts64(to);
308 		*to = timespec64_add_safe(*to, ts);
309 	}
310 	return 0;
311 }
312 
313 static int poll_select_copy_remaining(struct timespec64 *end_time,
314 				      void __user *p,
315 				      int timeval, int ret)
316 {
317 	struct timespec64 rts;
318 	struct timeval rtv;
319 
320 	if (!p)
321 		return ret;
322 
323 	if (current->personality & STICKY_TIMEOUTS)
324 		goto sticky;
325 
326 	/* No update for zero timeout */
327 	if (!end_time->tv_sec && !end_time->tv_nsec)
328 		return ret;
329 
330 	ktime_get_ts64(&rts);
331 	rts = timespec64_sub(*end_time, rts);
332 	if (rts.tv_sec < 0)
333 		rts.tv_sec = rts.tv_nsec = 0;
334 
335 
336 	if (timeval) {
337 		if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec))
338 			memset(&rtv, 0, sizeof(rtv));
339 		rtv.tv_sec = rts.tv_sec;
340 		rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
341 
342 		if (!copy_to_user(p, &rtv, sizeof(rtv)))
343 			return ret;
344 
345 	} else if (!put_timespec64(&rts, p))
346 		return ret;
347 
348 	/*
349 	 * If an application puts its timeval in read-only memory, we
350 	 * don't want the Linux-specific update to the timeval to
351 	 * cause a fault after the select has completed
352 	 * successfully. However, because we're not updating the
353 	 * timeval, we can't restart the system call.
354 	 */
355 
356 sticky:
357 	if (ret == -ERESTARTNOHAND)
358 		ret = -EINTR;
359 	return ret;
360 }
361 
362 /*
363  * Scalable version of the fd_set.
364  */
365 
366 typedef struct {
367 	unsigned long *in, *out, *ex;
368 	unsigned long *res_in, *res_out, *res_ex;
369 } fd_set_bits;
370 
371 /*
372  * How many longwords for "nr" bits?
373  */
374 #define FDS_BITPERLONG	(8*sizeof(long))
375 #define FDS_LONGS(nr)	(((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG)
376 #define FDS_BYTES(nr)	(FDS_LONGS(nr)*sizeof(long))
377 
378 /*
379  * We do a VERIFY_WRITE here even though we are only reading this time:
380  * we'll write to it eventually..
381  *
382  * Use "unsigned long" accesses to let user-mode fd_set's be long-aligned.
383  */
384 static inline
385 int get_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset)
386 {
387 	nr = FDS_BYTES(nr);
388 	if (ufdset)
389 		return copy_from_user(fdset, ufdset, nr) ? -EFAULT : 0;
390 
391 	memset(fdset, 0, nr);
392 	return 0;
393 }
394 
395 static inline unsigned long __must_check
396 set_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset)
397 {
398 	if (ufdset)
399 		return __copy_to_user(ufdset, fdset, FDS_BYTES(nr));
400 	return 0;
401 }
402 
403 static inline
404 void zero_fd_set(unsigned long nr, unsigned long *fdset)
405 {
406 	memset(fdset, 0, FDS_BYTES(nr));
407 }
408 
409 #define FDS_IN(fds, n)		(fds->in + n)
410 #define FDS_OUT(fds, n)		(fds->out + n)
411 #define FDS_EX(fds, n)		(fds->ex + n)
412 
413 #define BITS(fds, n)	(*FDS_IN(fds, n)|*FDS_OUT(fds, n)|*FDS_EX(fds, n))
414 
415 static int max_select_fd(unsigned long n, fd_set_bits *fds)
416 {
417 	unsigned long *open_fds;
418 	unsigned long set;
419 	int max;
420 	struct fdtable *fdt;
421 
422 	/* handle last in-complete long-word first */
423 	set = ~(~0UL << (n & (BITS_PER_LONG-1)));
424 	n /= BITS_PER_LONG;
425 	fdt = files_fdtable(current->files);
426 	open_fds = fdt->open_fds + n;
427 	max = 0;
428 	if (set) {
429 		set &= BITS(fds, n);
430 		if (set) {
431 			if (!(set & ~*open_fds))
432 				goto get_max;
433 			return -EBADF;
434 		}
435 	}
436 	while (n) {
437 		open_fds--;
438 		n--;
439 		set = BITS(fds, n);
440 		if (!set)
441 			continue;
442 		if (set & ~*open_fds)
443 			return -EBADF;
444 		if (max)
445 			continue;
446 get_max:
447 		do {
448 			max++;
449 			set >>= 1;
450 		} while (set);
451 		max += n * BITS_PER_LONG;
452 	}
453 
454 	return max;
455 }
456 
457 #define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR)
458 #define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR)
459 #define POLLEX_SET (EPOLLPRI)
460 
461 static inline void wait_key_set(poll_table *wait, unsigned long in,
462 				unsigned long out, unsigned long bit,
463 				__poll_t ll_flag)
464 {
465 	wait->_key = POLLEX_SET | ll_flag;
466 	if (in & bit)
467 		wait->_key |= POLLIN_SET;
468 	if (out & bit)
469 		wait->_key |= POLLOUT_SET;
470 }
471 
472 static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
473 {
474 	ktime_t expire, *to = NULL;
475 	struct poll_wqueues table;
476 	poll_table *wait;
477 	int retval, i, timed_out = 0;
478 	u64 slack = 0;
479 	__poll_t busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
480 	unsigned long busy_start = 0;
481 
482 	rcu_read_lock();
483 	retval = max_select_fd(n, fds);
484 	rcu_read_unlock();
485 
486 	if (retval < 0)
487 		return retval;
488 	n = retval;
489 
490 	poll_initwait(&table);
491 	wait = &table.pt;
492 	if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
493 		wait->_qproc = NULL;
494 		timed_out = 1;
495 	}
496 
497 	if (end_time && !timed_out)
498 		slack = select_estimate_accuracy(end_time);
499 
500 	retval = 0;
501 	for (;;) {
502 		unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
503 		bool can_busy_loop = false;
504 
505 		inp = fds->in; outp = fds->out; exp = fds->ex;
506 		rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;
507 
508 		for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
509 			unsigned long in, out, ex, all_bits, bit = 1, j;
510 			unsigned long res_in = 0, res_out = 0, res_ex = 0;
511 			__poll_t mask;
512 
513 			in = *inp++; out = *outp++; ex = *exp++;
514 			all_bits = in | out | ex;
515 			if (all_bits == 0) {
516 				i += BITS_PER_LONG;
517 				continue;
518 			}
519 
520 			for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) {
521 				struct fd f;
522 				if (i >= n)
523 					break;
524 				if (!(bit & all_bits))
525 					continue;
526 				f = fdget(i);
527 				if (f.file) {
528 					wait_key_set(wait, in, out, bit,
529 						     busy_flag);
530 					mask = vfs_poll(f.file, wait);
531 
532 					fdput(f);
533 					if ((mask & POLLIN_SET) && (in & bit)) {
534 						res_in |= bit;
535 						retval++;
536 						wait->_qproc = NULL;
537 					}
538 					if ((mask & POLLOUT_SET) && (out & bit)) {
539 						res_out |= bit;
540 						retval++;
541 						wait->_qproc = NULL;
542 					}
543 					if ((mask & POLLEX_SET) && (ex & bit)) {
544 						res_ex |= bit;
545 						retval++;
546 						wait->_qproc = NULL;
547 					}
548 					/* got something, stop busy polling */
549 					if (retval) {
550 						can_busy_loop = false;
551 						busy_flag = 0;
552 
553 					/*
554 					 * only remember a returned
555 					 * POLL_BUSY_LOOP if we asked for it
556 					 */
557 					} else if (busy_flag & mask)
558 						can_busy_loop = true;
559 
560 				}
561 			}
562 			if (res_in)
563 				*rinp = res_in;
564 			if (res_out)
565 				*routp = res_out;
566 			if (res_ex)
567 				*rexp = res_ex;
568 			cond_resched();
569 		}
570 		wait->_qproc = NULL;
571 		if (retval || timed_out || signal_pending(current))
572 			break;
573 		if (table.error) {
574 			retval = table.error;
575 			break;
576 		}
577 
578 		/* only if found POLL_BUSY_LOOP sockets && not out of time */
579 		if (can_busy_loop && !need_resched()) {
580 			if (!busy_start) {
581 				busy_start = busy_loop_current_time();
582 				continue;
583 			}
584 			if (!busy_loop_timeout(busy_start))
585 				continue;
586 		}
587 		busy_flag = 0;
588 
589 		/*
590 		 * If this is the first loop and we have a timeout
591 		 * given, then we convert to ktime_t and set the to
592 		 * pointer to the expiry value.
593 		 */
594 		if (end_time && !to) {
595 			expire = timespec64_to_ktime(*end_time);
596 			to = &expire;
597 		}
598 
599 		if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE,
600 					   to, slack))
601 			timed_out = 1;
602 	}
603 
604 	poll_freewait(&table);
605 
606 	return retval;
607 }
608 
609 /*
610  * We can actually return ERESTARTSYS instead of EINTR, but I'd
611  * like to be certain this leads to no problems. So I return
612  * EINTR just for safety.
613  *
614  * Update: ERESTARTSYS breaks at least the xview clock binary, so
615  * I'm trying ERESTARTNOHAND which restart only when you want to.
616  */
617 int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
618 			   fd_set __user *exp, struct timespec64 *end_time)
619 {
620 	fd_set_bits fds;
621 	void *bits;
622 	int ret, max_fds;
623 	size_t size, alloc_size;
624 	struct fdtable *fdt;
625 	/* Allocate small arguments on the stack to save memory and be faster */
626 	long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
627 
628 	ret = -EINVAL;
629 	if (n < 0)
630 		goto out_nofds;
631 
632 	/* max_fds can increase, so grab it once to avoid race */
633 	rcu_read_lock();
634 	fdt = files_fdtable(current->files);
635 	max_fds = fdt->max_fds;
636 	rcu_read_unlock();
637 	if (n > max_fds)
638 		n = max_fds;
639 
640 	/*
641 	 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
642 	 * since we used fdset we need to allocate memory in units of
643 	 * long-words.
644 	 */
645 	size = FDS_BYTES(n);
646 	bits = stack_fds;
647 	if (size > sizeof(stack_fds) / 6) {
648 		/* Not enough space in on-stack array; must use kmalloc */
649 		ret = -ENOMEM;
650 		if (size > (SIZE_MAX / 6))
651 			goto out_nofds;
652 
653 		alloc_size = 6 * size;
654 		bits = kvmalloc(alloc_size, GFP_KERNEL);
655 		if (!bits)
656 			goto out_nofds;
657 	}
658 	fds.in      = bits;
659 	fds.out     = bits +   size;
660 	fds.ex      = bits + 2*size;
661 	fds.res_in  = bits + 3*size;
662 	fds.res_out = bits + 4*size;
663 	fds.res_ex  = bits + 5*size;
664 
665 	if ((ret = get_fd_set(n, inp, fds.in)) ||
666 	    (ret = get_fd_set(n, outp, fds.out)) ||
667 	    (ret = get_fd_set(n, exp, fds.ex)))
668 		goto out;
669 	zero_fd_set(n, fds.res_in);
670 	zero_fd_set(n, fds.res_out);
671 	zero_fd_set(n, fds.res_ex);
672 
673 	ret = do_select(n, &fds, end_time);
674 
675 	if (ret < 0)
676 		goto out;
677 	if (!ret) {
678 		ret = -ERESTARTNOHAND;
679 		if (signal_pending(current))
680 			goto out;
681 		ret = 0;
682 	}
683 
684 	if (set_fd_set(n, inp, fds.res_in) ||
685 	    set_fd_set(n, outp, fds.res_out) ||
686 	    set_fd_set(n, exp, fds.res_ex))
687 		ret = -EFAULT;
688 
689 out:
690 	if (bits != stack_fds)
691 		kvfree(bits);
692 out_nofds:
693 	return ret;
694 }
695 
696 static int kern_select(int n, fd_set __user *inp, fd_set __user *outp,
697 		       fd_set __user *exp, struct timeval __user *tvp)
698 {
699 	struct timespec64 end_time, *to = NULL;
700 	struct timeval tv;
701 	int ret;
702 
703 	if (tvp) {
704 		if (copy_from_user(&tv, tvp, sizeof(tv)))
705 			return -EFAULT;
706 
707 		to = &end_time;
708 		if (poll_select_set_timeout(to,
709 				tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
710 				(tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
711 			return -EINVAL;
712 	}
713 
714 	ret = core_sys_select(n, inp, outp, exp, to);
715 	ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
716 
717 	return ret;
718 }
719 
720 SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,
721 		fd_set __user *, exp, struct timeval __user *, tvp)
722 {
723 	return kern_select(n, inp, outp, exp, tvp);
724 }
725 
726 static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
727 		       fd_set __user *exp, struct timespec __user *tsp,
728 		       const sigset_t __user *sigmask, size_t sigsetsize)
729 {
730 	sigset_t ksigmask, sigsaved;
731 	struct timespec64 ts, end_time, *to = NULL;
732 	int ret;
733 
734 	if (tsp) {
735 		if (get_timespec64(&ts, tsp))
736 			return -EFAULT;
737 
738 		to = &end_time;
739 		if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
740 			return -EINVAL;
741 	}
742 
743 	if (sigmask) {
744 		/* XXX: Don't preclude handling different sized sigset_t's.  */
745 		if (sigsetsize != sizeof(sigset_t))
746 			return -EINVAL;
747 		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
748 			return -EFAULT;
749 
750 		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
751 		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
752 	}
753 
754 	ret = core_sys_select(n, inp, outp, exp, to);
755 	ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
756 
757 	if (ret == -ERESTARTNOHAND) {
758 		/*
759 		 * Don't restore the signal mask yet. Let do_signal() deliver
760 		 * the signal on the way back to userspace, before the signal
761 		 * mask is restored.
762 		 */
763 		if (sigmask) {
764 			memcpy(&current->saved_sigmask, &sigsaved,
765 					sizeof(sigsaved));
766 			set_restore_sigmask();
767 		}
768 	} else if (sigmask)
769 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
770 
771 	return ret;
772 }
773 
774 /*
775  * Most architectures can't handle 7-argument syscalls. So we provide a
776  * 6-argument version where the sixth argument is a pointer to a structure
777  * which has a pointer to the sigset_t itself followed by a size_t containing
778  * the sigset size.
779  */
780 SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp,
781 		fd_set __user *, exp, struct timespec __user *, tsp,
782 		void __user *, sig)
783 {
784 	size_t sigsetsize = 0;
785 	sigset_t __user *up = NULL;
786 
787 	if (sig) {
788 		if (!access_ok(VERIFY_READ, sig, sizeof(void *)+sizeof(size_t))
789 		    || __get_user(up, (sigset_t __user * __user *)sig)
790 		    || __get_user(sigsetsize,
791 				(size_t __user *)(sig+sizeof(void *))))
792 			return -EFAULT;
793 	}
794 
795 	return do_pselect(n, inp, outp, exp, tsp, up, sigsetsize);
796 }
797 
798 #ifdef __ARCH_WANT_SYS_OLD_SELECT
799 struct sel_arg_struct {
800 	unsigned long n;
801 	fd_set __user *inp, *outp, *exp;
802 	struct timeval __user *tvp;
803 };
804 
805 SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg)
806 {
807 	struct sel_arg_struct a;
808 
809 	if (copy_from_user(&a, arg, sizeof(a)))
810 		return -EFAULT;
811 	return kern_select(a.n, a.inp, a.outp, a.exp, a.tvp);
812 }
813 #endif
814 
815 struct poll_list {
816 	struct poll_list *next;
817 	int len;
818 	struct pollfd entries[0];
819 };
820 
821 #define POLLFD_PER_PAGE  ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd))
822 
823 /*
824  * Fish for pollable events on the pollfd->fd file descriptor. We're only
825  * interested in events matching the pollfd->events mask, and the result
826  * matching that mask is both recorded in pollfd->revents and returned. The
827  * pwait poll_table will be used by the fd-provided poll handler for waiting,
828  * if pwait->_qproc is non-NULL.
829  */
830 static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait,
831 				     bool *can_busy_poll,
832 				     __poll_t busy_flag)
833 {
834 	int fd = pollfd->fd;
835 	__poll_t mask = 0, filter;
836 	struct fd f;
837 
838 	if (fd < 0)
839 		goto out;
840 	mask = EPOLLNVAL;
841 	f = fdget(fd);
842 	if (!f.file)
843 		goto out;
844 
845 	/* userland u16 ->events contains POLL... bitmap */
846 	filter = demangle_poll(pollfd->events) | EPOLLERR | EPOLLHUP;
847 	pwait->_key = filter | busy_flag;
848 	mask = vfs_poll(f.file, pwait);
849 	if (mask & busy_flag)
850 		*can_busy_poll = true;
851 	mask &= filter;		/* Mask out unneeded events. */
852 	fdput(f);
853 
854 out:
855 	/* ... and so does ->revents */
856 	pollfd->revents = mangle_poll(mask);
857 	return mask;
858 }
859 
860 static int do_poll(struct poll_list *list, struct poll_wqueues *wait,
861 		   struct timespec64 *end_time)
862 {
863 	poll_table* pt = &wait->pt;
864 	ktime_t expire, *to = NULL;
865 	int timed_out = 0, count = 0;
866 	u64 slack = 0;
867 	__poll_t busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
868 	unsigned long busy_start = 0;
869 
870 	/* Optimise the no-wait case */
871 	if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
872 		pt->_qproc = NULL;
873 		timed_out = 1;
874 	}
875 
876 	if (end_time && !timed_out)
877 		slack = select_estimate_accuracy(end_time);
878 
879 	for (;;) {
880 		struct poll_list *walk;
881 		bool can_busy_loop = false;
882 
883 		for (walk = list; walk != NULL; walk = walk->next) {
884 			struct pollfd * pfd, * pfd_end;
885 
886 			pfd = walk->entries;
887 			pfd_end = pfd + walk->len;
888 			for (; pfd != pfd_end; pfd++) {
889 				/*
890 				 * Fish for events. If we found one, record it
891 				 * and kill poll_table->_qproc, so we don't
892 				 * needlessly register any other waiters after
893 				 * this. They'll get immediately deregistered
894 				 * when we break out and return.
895 				 */
896 				if (do_pollfd(pfd, pt, &can_busy_loop,
897 					      busy_flag)) {
898 					count++;
899 					pt->_qproc = NULL;
900 					/* found something, stop busy polling */
901 					busy_flag = 0;
902 					can_busy_loop = false;
903 				}
904 			}
905 		}
906 		/*
907 		 * All waiters have already been registered, so don't provide
908 		 * a poll_table->_qproc to them on the next loop iteration.
909 		 */
910 		pt->_qproc = NULL;
911 		if (!count) {
912 			count = wait->error;
913 			if (signal_pending(current))
914 				count = -EINTR;
915 		}
916 		if (count || timed_out)
917 			break;
918 
919 		/* only if found POLL_BUSY_LOOP sockets && not out of time */
920 		if (can_busy_loop && !need_resched()) {
921 			if (!busy_start) {
922 				busy_start = busy_loop_current_time();
923 				continue;
924 			}
925 			if (!busy_loop_timeout(busy_start))
926 				continue;
927 		}
928 		busy_flag = 0;
929 
930 		/*
931 		 * If this is the first loop and we have a timeout
932 		 * given, then we convert to ktime_t and set the to
933 		 * pointer to the expiry value.
934 		 */
935 		if (end_time && !to) {
936 			expire = timespec64_to_ktime(*end_time);
937 			to = &expire;
938 		}
939 
940 		if (!poll_schedule_timeout(wait, TASK_INTERRUPTIBLE, to, slack))
941 			timed_out = 1;
942 	}
943 	return count;
944 }
945 
946 #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list))  / \
947 			sizeof(struct pollfd))
948 
949 static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
950 		struct timespec64 *end_time)
951 {
952 	struct poll_wqueues table;
953  	int err = -EFAULT, fdcount, len, size;
954 	/* Allocate small arguments on the stack to save memory and be
955 	   faster - use long to make sure the buffer is aligned properly
956 	   on 64 bit archs to avoid unaligned access */
957 	long stack_pps[POLL_STACK_ALLOC/sizeof(long)];
958 	struct poll_list *const head = (struct poll_list *)stack_pps;
959  	struct poll_list *walk = head;
960  	unsigned long todo = nfds;
961 
962 	if (nfds > rlimit(RLIMIT_NOFILE))
963 		return -EINVAL;
964 
965 	len = min_t(unsigned int, nfds, N_STACK_PPS);
966 	for (;;) {
967 		walk->next = NULL;
968 		walk->len = len;
969 		if (!len)
970 			break;
971 
972 		if (copy_from_user(walk->entries, ufds + nfds-todo,
973 					sizeof(struct pollfd) * walk->len))
974 			goto out_fds;
975 
976 		todo -= walk->len;
977 		if (!todo)
978 			break;
979 
980 		len = min(todo, POLLFD_PER_PAGE);
981 		size = sizeof(struct poll_list) + sizeof(struct pollfd) * len;
982 		walk = walk->next = kmalloc(size, GFP_KERNEL);
983 		if (!walk) {
984 			err = -ENOMEM;
985 			goto out_fds;
986 		}
987 	}
988 
989 	poll_initwait(&table);
990 	fdcount = do_poll(head, &table, end_time);
991 	poll_freewait(&table);
992 
993 	for (walk = head; walk; walk = walk->next) {
994 		struct pollfd *fds = walk->entries;
995 		int j;
996 
997 		for (j = 0; j < walk->len; j++, ufds++)
998 			if (__put_user(fds[j].revents, &ufds->revents))
999 				goto out_fds;
1000   	}
1001 
1002 	err = fdcount;
1003 out_fds:
1004 	walk = head->next;
1005 	while (walk) {
1006 		struct poll_list *pos = walk;
1007 		walk = walk->next;
1008 		kfree(pos);
1009 	}
1010 
1011 	return err;
1012 }
1013 
1014 static long do_restart_poll(struct restart_block *restart_block)
1015 {
1016 	struct pollfd __user *ufds = restart_block->poll.ufds;
1017 	int nfds = restart_block->poll.nfds;
1018 	struct timespec64 *to = NULL, end_time;
1019 	int ret;
1020 
1021 	if (restart_block->poll.has_timeout) {
1022 		end_time.tv_sec = restart_block->poll.tv_sec;
1023 		end_time.tv_nsec = restart_block->poll.tv_nsec;
1024 		to = &end_time;
1025 	}
1026 
1027 	ret = do_sys_poll(ufds, nfds, to);
1028 
1029 	if (ret == -EINTR) {
1030 		restart_block->fn = do_restart_poll;
1031 		ret = -ERESTART_RESTARTBLOCK;
1032 	}
1033 	return ret;
1034 }
1035 
1036 SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
1037 		int, timeout_msecs)
1038 {
1039 	struct timespec64 end_time, *to = NULL;
1040 	int ret;
1041 
1042 	if (timeout_msecs >= 0) {
1043 		to = &end_time;
1044 		poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC,
1045 			NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC));
1046 	}
1047 
1048 	ret = do_sys_poll(ufds, nfds, to);
1049 
1050 	if (ret == -EINTR) {
1051 		struct restart_block *restart_block;
1052 
1053 		restart_block = &current->restart_block;
1054 		restart_block->fn = do_restart_poll;
1055 		restart_block->poll.ufds = ufds;
1056 		restart_block->poll.nfds = nfds;
1057 
1058 		if (timeout_msecs >= 0) {
1059 			restart_block->poll.tv_sec = end_time.tv_sec;
1060 			restart_block->poll.tv_nsec = end_time.tv_nsec;
1061 			restart_block->poll.has_timeout = 1;
1062 		} else
1063 			restart_block->poll.has_timeout = 0;
1064 
1065 		ret = -ERESTART_RESTARTBLOCK;
1066 	}
1067 	return ret;
1068 }
1069 
1070 SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
1071 		struct timespec __user *, tsp, const sigset_t __user *, sigmask,
1072 		size_t, sigsetsize)
1073 {
1074 	sigset_t ksigmask, sigsaved;
1075 	struct timespec64 ts, end_time, *to = NULL;
1076 	int ret;
1077 
1078 	if (tsp) {
1079 		if (get_timespec64(&ts, tsp))
1080 			return -EFAULT;
1081 
1082 		to = &end_time;
1083 		if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
1084 			return -EINVAL;
1085 	}
1086 
1087 	if (sigmask) {
1088 		/* XXX: Don't preclude handling different sized sigset_t's.  */
1089 		if (sigsetsize != sizeof(sigset_t))
1090 			return -EINVAL;
1091 		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
1092 			return -EFAULT;
1093 
1094 		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
1095 		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1096 	}
1097 
1098 	ret = do_sys_poll(ufds, nfds, to);
1099 
1100 	/* We can restart this syscall, usually */
1101 	if (ret == -EINTR) {
1102 		/*
1103 		 * Don't restore the signal mask yet. Let do_signal() deliver
1104 		 * the signal on the way back to userspace, before the signal
1105 		 * mask is restored.
1106 		 */
1107 		if (sigmask) {
1108 			memcpy(&current->saved_sigmask, &sigsaved,
1109 					sizeof(sigsaved));
1110 			set_restore_sigmask();
1111 		}
1112 		ret = -ERESTARTNOHAND;
1113 	} else if (sigmask)
1114 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1115 
1116 	ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
1117 
1118 	return ret;
1119 }
1120 
1121 #ifdef CONFIG_COMPAT
1122 #define __COMPAT_NFDBITS       (8 * sizeof(compat_ulong_t))
1123 
1124 static
1125 int compat_poll_select_copy_remaining(struct timespec64 *end_time, void __user *p,
1126 				      int timeval, int ret)
1127 {
1128 	struct timespec64 ts;
1129 
1130 	if (!p)
1131 		return ret;
1132 
1133 	if (current->personality & STICKY_TIMEOUTS)
1134 		goto sticky;
1135 
1136 	/* No update for zero timeout */
1137 	if (!end_time->tv_sec && !end_time->tv_nsec)
1138 		return ret;
1139 
1140 	ktime_get_ts64(&ts);
1141 	ts = timespec64_sub(*end_time, ts);
1142 	if (ts.tv_sec < 0)
1143 		ts.tv_sec = ts.tv_nsec = 0;
1144 
1145 	if (timeval) {
1146 		struct compat_timeval rtv;
1147 
1148 		rtv.tv_sec = ts.tv_sec;
1149 		rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC;
1150 
1151 		if (!copy_to_user(p, &rtv, sizeof(rtv)))
1152 			return ret;
1153 	} else {
1154 		if (!compat_put_timespec64(&ts, p))
1155 			return ret;
1156 	}
1157 	/*
1158 	 * If an application puts its timeval in read-only memory, we
1159 	 * don't want the Linux-specific update to the timeval to
1160 	 * cause a fault after the select has completed
1161 	 * successfully. However, because we're not updating the
1162 	 * timeval, we can't restart the system call.
1163 	 */
1164 
1165 sticky:
1166 	if (ret == -ERESTARTNOHAND)
1167 		ret = -EINTR;
1168 	return ret;
1169 }
1170 
1171 /*
1172  * Ooo, nasty.  We need here to frob 32-bit unsigned longs to
1173  * 64-bit unsigned longs.
1174  */
1175 static
1176 int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
1177 			unsigned long *fdset)
1178 {
1179 	if (ufdset) {
1180 		return compat_get_bitmap(fdset, ufdset, nr);
1181 	} else {
1182 		zero_fd_set(nr, fdset);
1183 		return 0;
1184 	}
1185 }
1186 
1187 static
1188 int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
1189 		      unsigned long *fdset)
1190 {
1191 	if (!ufdset)
1192 		return 0;
1193 	return compat_put_bitmap(ufdset, fdset, nr);
1194 }
1195 
1196 
1197 /*
1198  * This is a virtual copy of sys_select from fs/select.c and probably
1199  * should be compared to it from time to time
1200  */
1201 
1202 /*
1203  * We can actually return ERESTARTSYS instead of EINTR, but I'd
1204  * like to be certain this leads to no problems. So I return
1205  * EINTR just for safety.
1206  *
1207  * Update: ERESTARTSYS breaks at least the xview clock binary, so
1208  * I'm trying ERESTARTNOHAND which restart only when you want to.
1209  */
1210 static int compat_core_sys_select(int n, compat_ulong_t __user *inp,
1211 	compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1212 	struct timespec64 *end_time)
1213 {
1214 	fd_set_bits fds;
1215 	void *bits;
1216 	int size, max_fds, ret = -EINVAL;
1217 	struct fdtable *fdt;
1218 	long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
1219 
1220 	if (n < 0)
1221 		goto out_nofds;
1222 
1223 	/* max_fds can increase, so grab it once to avoid race */
1224 	rcu_read_lock();
1225 	fdt = files_fdtable(current->files);
1226 	max_fds = fdt->max_fds;
1227 	rcu_read_unlock();
1228 	if (n > max_fds)
1229 		n = max_fds;
1230 
1231 	/*
1232 	 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
1233 	 * since we used fdset we need to allocate memory in units of
1234 	 * long-words.
1235 	 */
1236 	size = FDS_BYTES(n);
1237 	bits = stack_fds;
1238 	if (size > sizeof(stack_fds) / 6) {
1239 		bits = kmalloc_array(6, size, GFP_KERNEL);
1240 		ret = -ENOMEM;
1241 		if (!bits)
1242 			goto out_nofds;
1243 	}
1244 	fds.in      = (unsigned long *)  bits;
1245 	fds.out     = (unsigned long *) (bits +   size);
1246 	fds.ex      = (unsigned long *) (bits + 2*size);
1247 	fds.res_in  = (unsigned long *) (bits + 3*size);
1248 	fds.res_out = (unsigned long *) (bits + 4*size);
1249 	fds.res_ex  = (unsigned long *) (bits + 5*size);
1250 
1251 	if ((ret = compat_get_fd_set(n, inp, fds.in)) ||
1252 	    (ret = compat_get_fd_set(n, outp, fds.out)) ||
1253 	    (ret = compat_get_fd_set(n, exp, fds.ex)))
1254 		goto out;
1255 	zero_fd_set(n, fds.res_in);
1256 	zero_fd_set(n, fds.res_out);
1257 	zero_fd_set(n, fds.res_ex);
1258 
1259 	ret = do_select(n, &fds, end_time);
1260 
1261 	if (ret < 0)
1262 		goto out;
1263 	if (!ret) {
1264 		ret = -ERESTARTNOHAND;
1265 		if (signal_pending(current))
1266 			goto out;
1267 		ret = 0;
1268 	}
1269 
1270 	if (compat_set_fd_set(n, inp, fds.res_in) ||
1271 	    compat_set_fd_set(n, outp, fds.res_out) ||
1272 	    compat_set_fd_set(n, exp, fds.res_ex))
1273 		ret = -EFAULT;
1274 out:
1275 	if (bits != stack_fds)
1276 		kfree(bits);
1277 out_nofds:
1278 	return ret;
1279 }
1280 
1281 static int do_compat_select(int n, compat_ulong_t __user *inp,
1282 	compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1283 	struct compat_timeval __user *tvp)
1284 {
1285 	struct timespec64 end_time, *to = NULL;
1286 	struct compat_timeval tv;
1287 	int ret;
1288 
1289 	if (tvp) {
1290 		if (copy_from_user(&tv, tvp, sizeof(tv)))
1291 			return -EFAULT;
1292 
1293 		to = &end_time;
1294 		if (poll_select_set_timeout(to,
1295 				tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
1296 				(tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
1297 			return -EINVAL;
1298 	}
1299 
1300 	ret = compat_core_sys_select(n, inp, outp, exp, to);
1301 	ret = compat_poll_select_copy_remaining(&end_time, tvp, 1, ret);
1302 
1303 	return ret;
1304 }
1305 
1306 COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp,
1307 	compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
1308 	struct compat_timeval __user *, tvp)
1309 {
1310 	return do_compat_select(n, inp, outp, exp, tvp);
1311 }
1312 
1313 struct compat_sel_arg_struct {
1314 	compat_ulong_t n;
1315 	compat_uptr_t inp;
1316 	compat_uptr_t outp;
1317 	compat_uptr_t exp;
1318 	compat_uptr_t tvp;
1319 };
1320 
1321 COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg)
1322 {
1323 	struct compat_sel_arg_struct a;
1324 
1325 	if (copy_from_user(&a, arg, sizeof(a)))
1326 		return -EFAULT;
1327 	return do_compat_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
1328 				compat_ptr(a.exp), compat_ptr(a.tvp));
1329 }
1330 
1331 static long do_compat_pselect(int n, compat_ulong_t __user *inp,
1332 	compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1333 	struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask,
1334 	compat_size_t sigsetsize)
1335 {
1336 	sigset_t ksigmask, sigsaved;
1337 	struct timespec64 ts, end_time, *to = NULL;
1338 	int ret;
1339 
1340 	if (tsp) {
1341 		if (compat_get_timespec64(&ts, tsp))
1342 			return -EFAULT;
1343 
1344 		to = &end_time;
1345 		if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
1346 			return -EINVAL;
1347 	}
1348 
1349 	if (sigmask) {
1350 		if (sigsetsize != sizeof(compat_sigset_t))
1351 			return -EINVAL;
1352 		if (get_compat_sigset(&ksigmask, sigmask))
1353 			return -EFAULT;
1354 
1355 		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
1356 		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1357 	}
1358 
1359 	ret = compat_core_sys_select(n, inp, outp, exp, to);
1360 	ret = compat_poll_select_copy_remaining(&end_time, tsp, 0, ret);
1361 
1362 	if (ret == -ERESTARTNOHAND) {
1363 		/*
1364 		 * Don't restore the signal mask yet. Let do_signal() deliver
1365 		 * the signal on the way back to userspace, before the signal
1366 		 * mask is restored.
1367 		 */
1368 		if (sigmask) {
1369 			memcpy(&current->saved_sigmask, &sigsaved,
1370 					sizeof(sigsaved));
1371 			set_restore_sigmask();
1372 		}
1373 	} else if (sigmask)
1374 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1375 
1376 	return ret;
1377 }
1378 
1379 COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp,
1380 	compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
1381 	struct compat_timespec __user *, tsp, void __user *, sig)
1382 {
1383 	compat_size_t sigsetsize = 0;
1384 	compat_uptr_t up = 0;
1385 
1386 	if (sig) {
1387 		if (!access_ok(VERIFY_READ, sig,
1388 				sizeof(compat_uptr_t)+sizeof(compat_size_t)) ||
1389 		    	__get_user(up, (compat_uptr_t __user *)sig) ||
1390 		    	__get_user(sigsetsize,
1391 				(compat_size_t __user *)(sig+sizeof(up))))
1392 			return -EFAULT;
1393 	}
1394 	return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(up),
1395 				 sigsetsize);
1396 }
1397 
1398 COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds,
1399 	unsigned int,  nfds, struct compat_timespec __user *, tsp,
1400 	const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
1401 {
1402 	sigset_t ksigmask, sigsaved;
1403 	struct timespec64 ts, end_time, *to = NULL;
1404 	int ret;
1405 
1406 	if (tsp) {
1407 		if (compat_get_timespec64(&ts, tsp))
1408 			return -EFAULT;
1409 
1410 		to = &end_time;
1411 		if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
1412 			return -EINVAL;
1413 	}
1414 
1415 	if (sigmask) {
1416 		if (sigsetsize != sizeof(compat_sigset_t))
1417 			return -EINVAL;
1418 		if (get_compat_sigset(&ksigmask, sigmask))
1419 			return -EFAULT;
1420 
1421 		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
1422 		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1423 	}
1424 
1425 	ret = do_sys_poll(ufds, nfds, to);
1426 
1427 	/* We can restart this syscall, usually */
1428 	if (ret == -EINTR) {
1429 		/*
1430 		 * Don't restore the signal mask yet. Let do_signal() deliver
1431 		 * the signal on the way back to userspace, before the signal
1432 		 * mask is restored.
1433 		 */
1434 		if (sigmask) {
1435 			memcpy(&current->saved_sigmask, &sigsaved,
1436 				sizeof(sigsaved));
1437 			set_restore_sigmask();
1438 		}
1439 		ret = -ERESTARTNOHAND;
1440 	} else if (sigmask)
1441 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1442 
1443 	ret = compat_poll_select_copy_remaining(&end_time, tsp, 0, ret);
1444 
1445 	return ret;
1446 }
1447 #endif
1448