xref: /openbmc/linux/kernel/time/posix-timers.c (revision 476ec641)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * 2002-10-15  Posix Clocks & timers
4  *                           by George Anzinger george@mvista.com
5  *			     Copyright (C) 2002 2003 by MontaVista Software.
6  *
7  * 2004-06-01  Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
8  *			     Copyright (C) 2004 Boris Hu
9  *
10  * These are all the functions necessary to implement POSIX clocks & timers
11  */
12 #include <linux/mm.h>
13 #include <linux/interrupt.h>
14 #include <linux/slab.h>
15 #include <linux/time.h>
16 #include <linux/mutex.h>
17 #include <linux/sched/task.h>
18 
19 #include <linux/uaccess.h>
20 #include <linux/list.h>
21 #include <linux/init.h>
22 #include <linux/compiler.h>
23 #include <linux/hash.h>
24 #include <linux/posix-clock.h>
25 #include <linux/posix-timers.h>
26 #include <linux/syscalls.h>
27 #include <linux/wait.h>
28 #include <linux/workqueue.h>
29 #include <linux/export.h>
30 #include <linux/hashtable.h>
31 #include <linux/compat.h>
32 #include <linux/nospec.h>
33 #include <linux/time_namespace.h>
34 
35 #include "timekeeping.h"
36 #include "posix-timers.h"
37 
38 static struct kmem_cache *posix_timers_cache;
39 
40 /*
41  * Timers are managed in a hash table for lockless lookup. The hash key is
42  * constructed from current::signal and the timer ID and the timer is
43  * matched against current::signal and the timer ID when walking the hash
44  * bucket list.
45  *
46  * This allows checkpoint/restore to reconstruct the exact timer IDs for
47  * a process.
48  */
49 static DEFINE_HASHTABLE(posix_timers_hashtable, 9);
50 static DEFINE_SPINLOCK(hash_lock);
51 
52 static const struct k_clock * const posix_clocks[];
53 static const struct k_clock *clockid_to_kclock(const clockid_t id);
54 static const struct k_clock clock_realtime, clock_monotonic;
55 
56 /* SIGEV_THREAD_ID cannot share a bit with the other SIGEV values. */
57 #if SIGEV_THREAD_ID != (SIGEV_THREAD_ID & \
58 			~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD))
59 #error "SIGEV_THREAD_ID must not share bit with other SIGEV values!"
60 #endif
61 
62 static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags);
63 
64 #define lock_timer(tid, flags)						   \
65 ({	struct k_itimer *__timr;					   \
66 	__cond_lock(&__timr->it_lock, __timr = __lock_timer(tid, flags));  \
67 	__timr;								   \
68 })
69 
70 static int hash(struct signal_struct *sig, unsigned int nr)
71 {
72 	return hash_32(hash32_ptr(sig) ^ nr, HASH_BITS(posix_timers_hashtable));
73 }
74 
75 static struct k_itimer *__posix_timers_find(struct hlist_head *head,
76 					    struct signal_struct *sig,
77 					    timer_t id)
78 {
79 	struct k_itimer *timer;
80 
81 	hlist_for_each_entry_rcu(timer, head, t_hash, lockdep_is_held(&hash_lock)) {
82 		/* timer->it_signal can be set concurrently */
83 		if ((READ_ONCE(timer->it_signal) == sig) && (timer->it_id == id))
84 			return timer;
85 	}
86 	return NULL;
87 }
88 
89 static struct k_itimer *posix_timer_by_id(timer_t id)
90 {
91 	struct signal_struct *sig = current->signal;
92 	struct hlist_head *head = &posix_timers_hashtable[hash(sig, id)];
93 
94 	return __posix_timers_find(head, sig, id);
95 }
96 
97 static int posix_timer_add(struct k_itimer *timer)
98 {
99 	struct signal_struct *sig = current->signal;
100 	struct hlist_head *head;
101 	unsigned int cnt, id;
102 
103 	/*
104 	 * FIXME: Replace this by a per signal struct xarray once there is
105 	 * a plan to handle the resulting CRIU regression gracefully.
106 	 */
107 	for (cnt = 0; cnt <= INT_MAX; cnt++) {
108 		spin_lock(&hash_lock);
109 		id = sig->next_posix_timer_id;
110 
111 		/* Write the next ID back. Clamp it to the positive space */
112 		sig->next_posix_timer_id = (id + 1) & INT_MAX;
113 
114 		head = &posix_timers_hashtable[hash(sig, id)];
115 		if (!__posix_timers_find(head, sig, id)) {
116 			hlist_add_head_rcu(&timer->t_hash, head);
117 			spin_unlock(&hash_lock);
118 			return id;
119 		}
120 		spin_unlock(&hash_lock);
121 	}
122 	/* POSIX return code when no timer ID could be allocated */
123 	return -EAGAIN;
124 }
125 
126 static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
127 {
128 	spin_unlock_irqrestore(&timr->it_lock, flags);
129 }
130 
131 static int posix_get_realtime_timespec(clockid_t which_clock, struct timespec64 *tp)
132 {
133 	ktime_get_real_ts64(tp);
134 	return 0;
135 }
136 
137 static ktime_t posix_get_realtime_ktime(clockid_t which_clock)
138 {
139 	return ktime_get_real();
140 }
141 
142 static int posix_clock_realtime_set(const clockid_t which_clock,
143 				    const struct timespec64 *tp)
144 {
145 	return do_sys_settimeofday64(tp, NULL);
146 }
147 
148 static int posix_clock_realtime_adj(const clockid_t which_clock,
149 				    struct __kernel_timex *t)
150 {
151 	return do_adjtimex(t);
152 }
153 
154 static int posix_get_monotonic_timespec(clockid_t which_clock, struct timespec64 *tp)
155 {
156 	ktime_get_ts64(tp);
157 	timens_add_monotonic(tp);
158 	return 0;
159 }
160 
161 static ktime_t posix_get_monotonic_ktime(clockid_t which_clock)
162 {
163 	return ktime_get();
164 }
165 
166 static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec64 *tp)
167 {
168 	ktime_get_raw_ts64(tp);
169 	timens_add_monotonic(tp);
170 	return 0;
171 }
172 
173 static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec64 *tp)
174 {
175 	ktime_get_coarse_real_ts64(tp);
176 	return 0;
177 }
178 
179 static int posix_get_monotonic_coarse(clockid_t which_clock,
180 						struct timespec64 *tp)
181 {
182 	ktime_get_coarse_ts64(tp);
183 	timens_add_monotonic(tp);
184 	return 0;
185 }
186 
187 static int posix_get_coarse_res(const clockid_t which_clock, struct timespec64 *tp)
188 {
189 	*tp = ktime_to_timespec64(KTIME_LOW_RES);
190 	return 0;
191 }
192 
193 static int posix_get_boottime_timespec(const clockid_t which_clock, struct timespec64 *tp)
194 {
195 	ktime_get_boottime_ts64(tp);
196 	timens_add_boottime(tp);
197 	return 0;
198 }
199 
200 static ktime_t posix_get_boottime_ktime(const clockid_t which_clock)
201 {
202 	return ktime_get_boottime();
203 }
204 
205 static int posix_get_tai_timespec(clockid_t which_clock, struct timespec64 *tp)
206 {
207 	ktime_get_clocktai_ts64(tp);
208 	return 0;
209 }
210 
211 static ktime_t posix_get_tai_ktime(clockid_t which_clock)
212 {
213 	return ktime_get_clocktai();
214 }
215 
216 static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec64 *tp)
217 {
218 	tp->tv_sec = 0;
219 	tp->tv_nsec = hrtimer_resolution;
220 	return 0;
221 }
222 
223 static __init int init_posix_timers(void)
224 {
225 	posix_timers_cache = kmem_cache_create("posix_timers_cache",
226 					sizeof(struct k_itimer), 0,
227 					SLAB_PANIC | SLAB_ACCOUNT, NULL);
228 	return 0;
229 }
230 __initcall(init_posix_timers);
231 
232 /*
233  * The siginfo si_overrun field and the return value of timer_getoverrun(2)
234  * are of type int. Clamp the overrun value to INT_MAX
235  */
236 static inline int timer_overrun_to_int(struct k_itimer *timr, int baseval)
237 {
238 	s64 sum = timr->it_overrun_last + (s64)baseval;
239 
240 	return sum > (s64)INT_MAX ? INT_MAX : (int)sum;
241 }
242 
243 static void common_hrtimer_rearm(struct k_itimer *timr)
244 {
245 	struct hrtimer *timer = &timr->it.real.timer;
246 
247 	timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(),
248 					    timr->it_interval);
249 	hrtimer_restart(timer);
250 }
251 
252 /*
253  * This function is called from the signal delivery code if
254  * info->si_sys_private is not zero, which indicates that the timer has to
255  * be rearmed. Restart the timer and update info::si_overrun.
256  */
257 void posixtimer_rearm(struct kernel_siginfo *info)
258 {
259 	struct k_itimer *timr;
260 	unsigned long flags;
261 
262 	timr = lock_timer(info->si_tid, &flags);
263 	if (!timr)
264 		return;
265 
266 	if (timr->it_interval && timr->it_requeue_pending == info->si_sys_private) {
267 		timr->kclock->timer_rearm(timr);
268 
269 		timr->it_active = 1;
270 		timr->it_overrun_last = timr->it_overrun;
271 		timr->it_overrun = -1LL;
272 		++timr->it_requeue_pending;
273 
274 		info->si_overrun = timer_overrun_to_int(timr, info->si_overrun);
275 	}
276 
277 	unlock_timer(timr, flags);
278 }
279 
280 int posix_timer_event(struct k_itimer *timr, int si_private)
281 {
282 	enum pid_type type;
283 	int ret;
284 	/*
285 	 * FIXME: if ->sigq is queued we can race with
286 	 * dequeue_signal()->posixtimer_rearm().
287 	 *
288 	 * If dequeue_signal() sees the "right" value of
289 	 * si_sys_private it calls posixtimer_rearm().
290 	 * We re-queue ->sigq and drop ->it_lock().
291 	 * posixtimer_rearm() locks the timer
292 	 * and re-schedules it while ->sigq is pending.
293 	 * Not really bad, but not that we want.
294 	 */
295 	timr->sigq->info.si_sys_private = si_private;
296 
297 	type = !(timr->it_sigev_notify & SIGEV_THREAD_ID) ? PIDTYPE_TGID : PIDTYPE_PID;
298 	ret = send_sigqueue(timr->sigq, timr->it_pid, type);
299 	/* If we failed to send the signal the timer stops. */
300 	return ret > 0;
301 }
302 
303 /*
304  * This function gets called when a POSIX.1b interval timer expires from
305  * the HRTIMER interrupt (soft interrupt on RT kernels).
306  *
307  * Handles CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME and CLOCK_TAI
308  * based timers.
309  */
310 static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
311 {
312 	enum hrtimer_restart ret = HRTIMER_NORESTART;
313 	struct k_itimer *timr;
314 	unsigned long flags;
315 	int si_private = 0;
316 
317 	timr = container_of(timer, struct k_itimer, it.real.timer);
318 	spin_lock_irqsave(&timr->it_lock, flags);
319 
320 	timr->it_active = 0;
321 	if (timr->it_interval != 0)
322 		si_private = ++timr->it_requeue_pending;
323 
324 	if (posix_timer_event(timr, si_private)) {
325 		/*
326 		 * The signal was not queued due to SIG_IGN. As a
327 		 * consequence the timer is not going to be rearmed from
328 		 * the signal delivery path. But as a real signal handler
329 		 * can be installed later the timer must be rearmed here.
330 		 */
331 		if (timr->it_interval != 0) {
332 			ktime_t now = hrtimer_cb_get_time(timer);
333 
334 			/*
335 			 * FIXME: What we really want, is to stop this
336 			 * timer completely and restart it in case the
337 			 * SIG_IGN is removed. This is a non trivial
338 			 * change to the signal handling code.
339 			 *
340 			 * For now let timers with an interval less than a
341 			 * jiffie expire every jiffie and recheck for a
342 			 * valid signal handler.
343 			 *
344 			 * This avoids interrupt starvation in case of a
345 			 * very small interval, which would expire the
346 			 * timer immediately again.
347 			 *
348 			 * Moving now ahead of time by one jiffie tricks
349 			 * hrtimer_forward() to expire the timer later,
350 			 * while it still maintains the overrun accuracy
351 			 * for the price of a slight inconsistency in the
352 			 * timer_gettime() case. This is at least better
353 			 * than a timer storm.
354 			 *
355 			 * Only required when high resolution timers are
356 			 * enabled as the periodic tick based timers are
357 			 * automatically aligned to the next tick.
358 			 */
359 			if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS)) {
360 				ktime_t kj = TICK_NSEC;
361 
362 				if (timr->it_interval < kj)
363 					now = ktime_add(now, kj);
364 			}
365 
366 			timr->it_overrun += hrtimer_forward(timer, now, timr->it_interval);
367 			ret = HRTIMER_RESTART;
368 			++timr->it_requeue_pending;
369 			timr->it_active = 1;
370 		}
371 	}
372 
373 	unlock_timer(timr, flags);
374 	return ret;
375 }
376 
377 static struct pid *good_sigevent(sigevent_t * event)
378 {
379 	struct pid *pid = task_tgid(current);
380 	struct task_struct *rtn;
381 
382 	switch (event->sigev_notify) {
383 	case SIGEV_SIGNAL | SIGEV_THREAD_ID:
384 		pid = find_vpid(event->sigev_notify_thread_id);
385 		rtn = pid_task(pid, PIDTYPE_PID);
386 		if (!rtn || !same_thread_group(rtn, current))
387 			return NULL;
388 		fallthrough;
389 	case SIGEV_SIGNAL:
390 	case SIGEV_THREAD:
391 		if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX)
392 			return NULL;
393 		fallthrough;
394 	case SIGEV_NONE:
395 		return pid;
396 	default:
397 		return NULL;
398 	}
399 }
400 
401 static struct k_itimer * alloc_posix_timer(void)
402 {
403 	struct k_itimer *tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL);
404 
405 	if (!tmr)
406 		return tmr;
407 	if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
408 		kmem_cache_free(posix_timers_cache, tmr);
409 		return NULL;
410 	}
411 	clear_siginfo(&tmr->sigq->info);
412 	return tmr;
413 }
414 
415 static void k_itimer_rcu_free(struct rcu_head *head)
416 {
417 	struct k_itimer *tmr = container_of(head, struct k_itimer, rcu);
418 
419 	kmem_cache_free(posix_timers_cache, tmr);
420 }
421 
422 static void posix_timer_free(struct k_itimer *tmr)
423 {
424 	put_pid(tmr->it_pid);
425 	sigqueue_free(tmr->sigq);
426 	call_rcu(&tmr->rcu, k_itimer_rcu_free);
427 }
428 
429 static void posix_timer_unhash_and_free(struct k_itimer *tmr)
430 {
431 	spin_lock(&hash_lock);
432 	hlist_del_rcu(&tmr->t_hash);
433 	spin_unlock(&hash_lock);
434 	posix_timer_free(tmr);
435 }
436 
437 static int common_timer_create(struct k_itimer *new_timer)
438 {
439 	hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
440 	return 0;
441 }
442 
443 /* Create a POSIX.1b interval timer. */
444 static int do_timer_create(clockid_t which_clock, struct sigevent *event,
445 			   timer_t __user *created_timer_id)
446 {
447 	const struct k_clock *kc = clockid_to_kclock(which_clock);
448 	struct k_itimer *new_timer;
449 	int error, new_timer_id;
450 
451 	if (!kc)
452 		return -EINVAL;
453 	if (!kc->timer_create)
454 		return -EOPNOTSUPP;
455 
456 	new_timer = alloc_posix_timer();
457 	if (unlikely(!new_timer))
458 		return -EAGAIN;
459 
460 	spin_lock_init(&new_timer->it_lock);
461 
462 	/*
463 	 * Add the timer to the hash table. The timer is not yet valid
464 	 * because new_timer::it_signal is still NULL. The timer id is also
465 	 * not yet visible to user space.
466 	 */
467 	new_timer_id = posix_timer_add(new_timer);
468 	if (new_timer_id < 0) {
469 		posix_timer_free(new_timer);
470 		return new_timer_id;
471 	}
472 
473 	new_timer->it_id = (timer_t) new_timer_id;
474 	new_timer->it_clock = which_clock;
475 	new_timer->kclock = kc;
476 	new_timer->it_overrun = -1LL;
477 
478 	if (event) {
479 		rcu_read_lock();
480 		new_timer->it_pid = get_pid(good_sigevent(event));
481 		rcu_read_unlock();
482 		if (!new_timer->it_pid) {
483 			error = -EINVAL;
484 			goto out;
485 		}
486 		new_timer->it_sigev_notify     = event->sigev_notify;
487 		new_timer->sigq->info.si_signo = event->sigev_signo;
488 		new_timer->sigq->info.si_value = event->sigev_value;
489 	} else {
490 		new_timer->it_sigev_notify     = SIGEV_SIGNAL;
491 		new_timer->sigq->info.si_signo = SIGALRM;
492 		memset(&new_timer->sigq->info.si_value, 0, sizeof(sigval_t));
493 		new_timer->sigq->info.si_value.sival_int = new_timer->it_id;
494 		new_timer->it_pid = get_pid(task_tgid(current));
495 	}
496 
497 	new_timer->sigq->info.si_tid   = new_timer->it_id;
498 	new_timer->sigq->info.si_code  = SI_TIMER;
499 
500 	if (copy_to_user(created_timer_id, &new_timer_id, sizeof (new_timer_id))) {
501 		error = -EFAULT;
502 		goto out;
503 	}
504 	/*
505 	 * After succesful copy out, the timer ID is visible to user space
506 	 * now but not yet valid because new_timer::signal is still NULL.
507 	 *
508 	 * Complete the initialization with the clock specific create
509 	 * callback.
510 	 */
511 	error = kc->timer_create(new_timer);
512 	if (error)
513 		goto out;
514 
515 	spin_lock_irq(&current->sighand->siglock);
516 	/* This makes the timer valid in the hash table */
517 	WRITE_ONCE(new_timer->it_signal, current->signal);
518 	list_add(&new_timer->list, &current->signal->posix_timers);
519 	spin_unlock_irq(&current->sighand->siglock);
520 	/*
521 	 * After unlocking sighand::siglock @new_timer is subject to
522 	 * concurrent removal and cannot be touched anymore
523 	 */
524 	return 0;
525 out:
526 	posix_timer_unhash_and_free(new_timer);
527 	return error;
528 }
529 
530 SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
531 		struct sigevent __user *, timer_event_spec,
532 		timer_t __user *, created_timer_id)
533 {
534 	if (timer_event_spec) {
535 		sigevent_t event;
536 
537 		if (copy_from_user(&event, timer_event_spec, sizeof (event)))
538 			return -EFAULT;
539 		return do_timer_create(which_clock, &event, created_timer_id);
540 	}
541 	return do_timer_create(which_clock, NULL, created_timer_id);
542 }
543 
544 #ifdef CONFIG_COMPAT
545 COMPAT_SYSCALL_DEFINE3(timer_create, clockid_t, which_clock,
546 		       struct compat_sigevent __user *, timer_event_spec,
547 		       timer_t __user *, created_timer_id)
548 {
549 	if (timer_event_spec) {
550 		sigevent_t event;
551 
552 		if (get_compat_sigevent(&event, timer_event_spec))
553 			return -EFAULT;
554 		return do_timer_create(which_clock, &event, created_timer_id);
555 	}
556 	return do_timer_create(which_clock, NULL, created_timer_id);
557 }
558 #endif
559 
560 static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
561 {
562 	struct k_itimer *timr;
563 
564 	/*
565 	 * timer_t could be any type >= int and we want to make sure any
566 	 * @timer_id outside positive int range fails lookup.
567 	 */
568 	if ((unsigned long long)timer_id > INT_MAX)
569 		return NULL;
570 
571 	/*
572 	 * The hash lookup and the timers are RCU protected.
573 	 *
574 	 * Timers are added to the hash in invalid state where
575 	 * timr::it_signal == NULL. timer::it_signal is only set after the
576 	 * rest of the initialization succeeded.
577 	 *
578 	 * Timer destruction happens in steps:
579 	 *  1) Set timr::it_signal to NULL with timr::it_lock held
580 	 *  2) Release timr::it_lock
581 	 *  3) Remove from the hash under hash_lock
582 	 *  4) Call RCU for removal after the grace period
583 	 *
584 	 * Holding rcu_read_lock() accross the lookup ensures that
585 	 * the timer cannot be freed.
586 	 *
587 	 * The lookup validates locklessly that timr::it_signal ==
588 	 * current::it_signal and timr::it_id == @timer_id. timr::it_id
589 	 * can't change, but timr::it_signal becomes NULL during
590 	 * destruction.
591 	 */
592 	rcu_read_lock();
593 	timr = posix_timer_by_id(timer_id);
594 	if (timr) {
595 		spin_lock_irqsave(&timr->it_lock, *flags);
596 		/*
597 		 * Validate under timr::it_lock that timr::it_signal is
598 		 * still valid. Pairs with #1 above.
599 		 */
600 		if (timr->it_signal == current->signal) {
601 			rcu_read_unlock();
602 			return timr;
603 		}
604 		spin_unlock_irqrestore(&timr->it_lock, *flags);
605 	}
606 	rcu_read_unlock();
607 
608 	return NULL;
609 }
610 
611 static ktime_t common_hrtimer_remaining(struct k_itimer *timr, ktime_t now)
612 {
613 	struct hrtimer *timer = &timr->it.real.timer;
614 
615 	return __hrtimer_expires_remaining_adjusted(timer, now);
616 }
617 
618 static s64 common_hrtimer_forward(struct k_itimer *timr, ktime_t now)
619 {
620 	struct hrtimer *timer = &timr->it.real.timer;
621 
622 	return hrtimer_forward(timer, now, timr->it_interval);
623 }
624 
625 /*
626  * Get the time remaining on a POSIX.1b interval timer.
627  *
628  * Two issues to handle here:
629  *
630  *  1) The timer has a requeue pending. The return value must appear as
631  *     if the timer has been requeued right now.
632  *
633  *  2) The timer is a SIGEV_NONE timer. These timers are never enqueued
634  *     into the hrtimer queue and therefore never expired. Emulate expiry
635  *     here taking #1 into account.
636  */
637 void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting)
638 {
639 	const struct k_clock *kc = timr->kclock;
640 	ktime_t now, remaining, iv;
641 	bool sig_none;
642 
643 	sig_none = timr->it_sigev_notify == SIGEV_NONE;
644 	iv = timr->it_interval;
645 
646 	/* interval timer ? */
647 	if (iv) {
648 		cur_setting->it_interval = ktime_to_timespec64(iv);
649 	} else if (!timr->it_active) {
650 		/*
651 		 * SIGEV_NONE oneshot timers are never queued and therefore
652 		 * timr->it_active is always false. The check below
653 		 * vs. remaining time will handle this case.
654 		 *
655 		 * For all other timers there is nothing to update here, so
656 		 * return.
657 		 */
658 		if (!sig_none)
659 			return;
660 	}
661 
662 	now = kc->clock_get_ktime(timr->it_clock);
663 
664 	/*
665 	 * If this is an interval timer and either has requeue pending or
666 	 * is a SIGEV_NONE timer move the expiry time forward by intervals,
667 	 * so expiry is > now.
668 	 */
669 	if (iv && (timr->it_requeue_pending & REQUEUE_PENDING || sig_none))
670 		timr->it_overrun += kc->timer_forward(timr, now);
671 
672 	remaining = kc->timer_remaining(timr, now);
673 	/*
674 	 * As @now is retrieved before a possible timer_forward() and
675 	 * cannot be reevaluated by the compiler @remaining is based on the
676 	 * same @now value. Therefore @remaining is consistent vs. @now.
677 	 *
678 	 * Consequently all interval timers, i.e. @iv > 0, cannot have a
679 	 * remaining time <= 0 because timer_forward() guarantees to move
680 	 * them forward so that the next timer expiry is > @now.
681 	 */
682 	if (remaining <= 0) {
683 		/*
684 		 * A single shot SIGEV_NONE timer must return 0, when it is
685 		 * expired! Timers which have a real signal delivery mode
686 		 * must return a remaining time greater than 0 because the
687 		 * signal has not yet been delivered.
688 		 */
689 		if (!sig_none)
690 			cur_setting->it_value.tv_nsec = 1;
691 	} else {
692 		cur_setting->it_value = ktime_to_timespec64(remaining);
693 	}
694 }
695 
696 static int do_timer_gettime(timer_t timer_id,  struct itimerspec64 *setting)
697 {
698 	const struct k_clock *kc;
699 	struct k_itimer *timr;
700 	unsigned long flags;
701 	int ret = 0;
702 
703 	timr = lock_timer(timer_id, &flags);
704 	if (!timr)
705 		return -EINVAL;
706 
707 	memset(setting, 0, sizeof(*setting));
708 	kc = timr->kclock;
709 	if (WARN_ON_ONCE(!kc || !kc->timer_get))
710 		ret = -EINVAL;
711 	else
712 		kc->timer_get(timr, setting);
713 
714 	unlock_timer(timr, flags);
715 	return ret;
716 }
717 
718 /* Get the time remaining on a POSIX.1b interval timer. */
719 SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
720 		struct __kernel_itimerspec __user *, setting)
721 {
722 	struct itimerspec64 cur_setting;
723 
724 	int ret = do_timer_gettime(timer_id, &cur_setting);
725 	if (!ret) {
726 		if (put_itimerspec64(&cur_setting, setting))
727 			ret = -EFAULT;
728 	}
729 	return ret;
730 }
731 
732 #ifdef CONFIG_COMPAT_32BIT_TIME
733 
734 SYSCALL_DEFINE2(timer_gettime32, timer_t, timer_id,
735 		struct old_itimerspec32 __user *, setting)
736 {
737 	struct itimerspec64 cur_setting;
738 
739 	int ret = do_timer_gettime(timer_id, &cur_setting);
740 	if (!ret) {
741 		if (put_old_itimerspec32(&cur_setting, setting))
742 			ret = -EFAULT;
743 	}
744 	return ret;
745 }
746 
747 #endif
748 
749 /**
750  * sys_timer_getoverrun - Get the number of overruns of a POSIX.1b interval timer
751  * @timer_id:	The timer ID which identifies the timer
752  *
753  * The "overrun count" of a timer is one plus the number of expiration
754  * intervals which have elapsed between the first expiry, which queues the
755  * signal and the actual signal delivery. On signal delivery the "overrun
756  * count" is calculated and cached, so it can be returned directly here.
757  *
758  * As this is relative to the last queued signal the returned overrun count
759  * is meaningless outside of the signal delivery path and even there it
760  * does not accurately reflect the current state when user space evaluates
761  * it.
762  *
763  * Returns:
764  *	-EINVAL		@timer_id is invalid
765  *	1..INT_MAX	The number of overruns related to the last delivered signal
766  */
767 SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
768 {
769 	struct k_itimer *timr;
770 	unsigned long flags;
771 	int overrun;
772 
773 	timr = lock_timer(timer_id, &flags);
774 	if (!timr)
775 		return -EINVAL;
776 
777 	overrun = timer_overrun_to_int(timr, 0);
778 	unlock_timer(timr, flags);
779 
780 	return overrun;
781 }
782 
783 static void common_hrtimer_arm(struct k_itimer *timr, ktime_t expires,
784 			       bool absolute, bool sigev_none)
785 {
786 	struct hrtimer *timer = &timr->it.real.timer;
787 	enum hrtimer_mode mode;
788 
789 	mode = absolute ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL;
790 	/*
791 	 * Posix magic: Relative CLOCK_REALTIME timers are not affected by
792 	 * clock modifications, so they become CLOCK_MONOTONIC based under the
793 	 * hood. See hrtimer_init(). Update timr->kclock, so the generic
794 	 * functions which use timr->kclock->clock_get_*() work.
795 	 *
796 	 * Note: it_clock stays unmodified, because the next timer_set() might
797 	 * use ABSTIME, so it needs to switch back.
798 	 */
799 	if (timr->it_clock == CLOCK_REALTIME)
800 		timr->kclock = absolute ? &clock_realtime : &clock_monotonic;
801 
802 	hrtimer_init(&timr->it.real.timer, timr->it_clock, mode);
803 	timr->it.real.timer.function = posix_timer_fn;
804 
805 	if (!absolute)
806 		expires = ktime_add_safe(expires, timer->base->get_time());
807 	hrtimer_set_expires(timer, expires);
808 
809 	if (!sigev_none)
810 		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
811 }
812 
813 static int common_hrtimer_try_to_cancel(struct k_itimer *timr)
814 {
815 	return hrtimer_try_to_cancel(&timr->it.real.timer);
816 }
817 
818 static void common_timer_wait_running(struct k_itimer *timer)
819 {
820 	hrtimer_cancel_wait_running(&timer->it.real.timer);
821 }
822 
823 /*
824  * On PREEMPT_RT this prevents priority inversion and a potential livelock
825  * against the ksoftirqd thread in case that ksoftirqd gets preempted while
826  * executing a hrtimer callback.
827  *
828  * See the comments in hrtimer_cancel_wait_running(). For PREEMPT_RT=n this
829  * just results in a cpu_relax().
830  *
831  * For POSIX CPU timers with CONFIG_POSIX_CPU_TIMERS_TASK_WORK=n this is
832  * just a cpu_relax(). With CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y this
833  * prevents spinning on an eventually scheduled out task and a livelock
834  * when the task which tries to delete or disarm the timer has preempted
835  * the task which runs the expiry in task work context.
836  */
837 static struct k_itimer *timer_wait_running(struct k_itimer *timer,
838 					   unsigned long *flags)
839 {
840 	const struct k_clock *kc = READ_ONCE(timer->kclock);
841 	timer_t timer_id = READ_ONCE(timer->it_id);
842 
843 	/* Prevent kfree(timer) after dropping the lock */
844 	rcu_read_lock();
845 	unlock_timer(timer, *flags);
846 
847 	/*
848 	 * kc->timer_wait_running() might drop RCU lock. So @timer
849 	 * cannot be touched anymore after the function returns!
850 	 */
851 	if (!WARN_ON_ONCE(!kc->timer_wait_running))
852 		kc->timer_wait_running(timer);
853 
854 	rcu_read_unlock();
855 	/* Relock the timer. It might be not longer hashed. */
856 	return lock_timer(timer_id, flags);
857 }
858 
859 /* Set a POSIX.1b interval timer. */
860 int common_timer_set(struct k_itimer *timr, int flags,
861 		     struct itimerspec64 *new_setting,
862 		     struct itimerspec64 *old_setting)
863 {
864 	const struct k_clock *kc = timr->kclock;
865 	bool sigev_none;
866 	ktime_t expires;
867 
868 	if (old_setting)
869 		common_timer_get(timr, old_setting);
870 
871 	/* Prevent rearming by clearing the interval */
872 	timr->it_interval = 0;
873 	/*
874 	 * Careful here. On SMP systems the timer expiry function could be
875 	 * active and spinning on timr->it_lock.
876 	 */
877 	if (kc->timer_try_to_cancel(timr) < 0)
878 		return TIMER_RETRY;
879 
880 	timr->it_active = 0;
881 	timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
882 		~REQUEUE_PENDING;
883 	timr->it_overrun_last = 0;
884 
885 	/* Switch off the timer when it_value is zero */
886 	if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec)
887 		return 0;
888 
889 	timr->it_interval = timespec64_to_ktime(new_setting->it_interval);
890 	expires = timespec64_to_ktime(new_setting->it_value);
891 	if (flags & TIMER_ABSTIME)
892 		expires = timens_ktime_to_host(timr->it_clock, expires);
893 	sigev_none = timr->it_sigev_notify == SIGEV_NONE;
894 
895 	kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none);
896 	timr->it_active = !sigev_none;
897 	return 0;
898 }
899 
900 static int do_timer_settime(timer_t timer_id, int tmr_flags,
901 			    struct itimerspec64 *new_spec64,
902 			    struct itimerspec64 *old_spec64)
903 {
904 	const struct k_clock *kc;
905 	struct k_itimer *timr;
906 	unsigned long flags;
907 	int error = 0;
908 
909 	if (!timespec64_valid(&new_spec64->it_interval) ||
910 	    !timespec64_valid(&new_spec64->it_value))
911 		return -EINVAL;
912 
913 	if (old_spec64)
914 		memset(old_spec64, 0, sizeof(*old_spec64));
915 
916 	timr = lock_timer(timer_id, &flags);
917 retry:
918 	if (!timr)
919 		return -EINVAL;
920 
921 	kc = timr->kclock;
922 	if (WARN_ON_ONCE(!kc || !kc->timer_set))
923 		error = -EINVAL;
924 	else
925 		error = kc->timer_set(timr, tmr_flags, new_spec64, old_spec64);
926 
927 	if (error == TIMER_RETRY) {
928 		// We already got the old time...
929 		old_spec64 = NULL;
930 		/* Unlocks and relocks the timer if it still exists */
931 		timr = timer_wait_running(timr, &flags);
932 		goto retry;
933 	}
934 	unlock_timer(timr, flags);
935 
936 	return error;
937 }
938 
939 /* Set a POSIX.1b interval timer */
940 SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
941 		const struct __kernel_itimerspec __user *, new_setting,
942 		struct __kernel_itimerspec __user *, old_setting)
943 {
944 	struct itimerspec64 new_spec, old_spec, *rtn;
945 	int error = 0;
946 
947 	if (!new_setting)
948 		return -EINVAL;
949 
950 	if (get_itimerspec64(&new_spec, new_setting))
951 		return -EFAULT;
952 
953 	rtn = old_setting ? &old_spec : NULL;
954 	error = do_timer_settime(timer_id, flags, &new_spec, rtn);
955 	if (!error && old_setting) {
956 		if (put_itimerspec64(&old_spec, old_setting))
957 			error = -EFAULT;
958 	}
959 	return error;
960 }
961 
962 #ifdef CONFIG_COMPAT_32BIT_TIME
963 SYSCALL_DEFINE4(timer_settime32, timer_t, timer_id, int, flags,
964 		struct old_itimerspec32 __user *, new,
965 		struct old_itimerspec32 __user *, old)
966 {
967 	struct itimerspec64 new_spec, old_spec;
968 	struct itimerspec64 *rtn = old ? &old_spec : NULL;
969 	int error = 0;
970 
971 	if (!new)
972 		return -EINVAL;
973 	if (get_old_itimerspec32(&new_spec, new))
974 		return -EFAULT;
975 
976 	error = do_timer_settime(timer_id, flags, &new_spec, rtn);
977 	if (!error && old) {
978 		if (put_old_itimerspec32(&old_spec, old))
979 			error = -EFAULT;
980 	}
981 	return error;
982 }
983 #endif
984 
985 int common_timer_del(struct k_itimer *timer)
986 {
987 	const struct k_clock *kc = timer->kclock;
988 
989 	timer->it_interval = 0;
990 	if (kc->timer_try_to_cancel(timer) < 0)
991 		return TIMER_RETRY;
992 	timer->it_active = 0;
993 	return 0;
994 }
995 
996 static inline int timer_delete_hook(struct k_itimer *timer)
997 {
998 	const struct k_clock *kc = timer->kclock;
999 
1000 	if (WARN_ON_ONCE(!kc || !kc->timer_del))
1001 		return -EINVAL;
1002 	return kc->timer_del(timer);
1003 }
1004 
1005 /* Delete a POSIX.1b interval timer. */
1006 SYSCALL_DEFINE1(timer_delete, timer_t, timer_id)
1007 {
1008 	struct k_itimer *timer;
1009 	unsigned long flags;
1010 
1011 	timer = lock_timer(timer_id, &flags);
1012 
1013 retry_delete:
1014 	if (!timer)
1015 		return -EINVAL;
1016 
1017 	if (unlikely(timer_delete_hook(timer) == TIMER_RETRY)) {
1018 		/* Unlocks and relocks the timer if it still exists */
1019 		timer = timer_wait_running(timer, &flags);
1020 		goto retry_delete;
1021 	}
1022 
1023 	spin_lock(&current->sighand->siglock);
1024 	list_del(&timer->list);
1025 	spin_unlock(&current->sighand->siglock);
1026 	/*
1027 	 * A concurrent lookup could check timer::it_signal lockless. It
1028 	 * will reevaluate with timer::it_lock held and observe the NULL.
1029 	 */
1030 	WRITE_ONCE(timer->it_signal, NULL);
1031 
1032 	unlock_timer(timer, flags);
1033 	posix_timer_unhash_and_free(timer);
1034 	return 0;
1035 }
1036 
1037 /*
1038  * Delete a timer if it is armed, remove it from the hash and schedule it
1039  * for RCU freeing.
1040  */
1041 static void itimer_delete(struct k_itimer *timer)
1042 {
1043 	unsigned long flags;
1044 
1045 	/*
1046 	 * irqsave is required to make timer_wait_running() work.
1047 	 */
1048 	spin_lock_irqsave(&timer->it_lock, flags);
1049 
1050 retry_delete:
1051 	/*
1052 	 * Even if the timer is not longer accessible from other tasks
1053 	 * it still might be armed and queued in the underlying timer
1054 	 * mechanism. Worse, that timer mechanism might run the expiry
1055 	 * function concurrently.
1056 	 */
1057 	if (timer_delete_hook(timer) == TIMER_RETRY) {
1058 		/*
1059 		 * Timer is expired concurrently, prevent livelocks
1060 		 * and pointless spinning on RT.
1061 		 *
1062 		 * timer_wait_running() drops timer::it_lock, which opens
1063 		 * the possibility for another task to delete the timer.
1064 		 *
1065 		 * That's not possible here because this is invoked from
1066 		 * do_exit() only for the last thread of the thread group.
1067 		 * So no other task can access and delete that timer.
1068 		 */
1069 		if (WARN_ON_ONCE(timer_wait_running(timer, &flags) != timer))
1070 			return;
1071 
1072 		goto retry_delete;
1073 	}
1074 	list_del(&timer->list);
1075 
1076 	/*
1077 	 * Setting timer::it_signal to NULL is technically not required
1078 	 * here as nothing can access the timer anymore legitimately via
1079 	 * the hash table. Set it to NULL nevertheless so that all deletion
1080 	 * paths are consistent.
1081 	 */
1082 	WRITE_ONCE(timer->it_signal, NULL);
1083 
1084 	spin_unlock_irqrestore(&timer->it_lock, flags);
1085 	posix_timer_unhash_and_free(timer);
1086 }
1087 
1088 /*
1089  * Invoked from do_exit() when the last thread of a thread group exits.
1090  * At that point no other task can access the timers of the dying
1091  * task anymore.
1092  */
1093 void exit_itimers(struct task_struct *tsk)
1094 {
1095 	struct list_head timers;
1096 	struct k_itimer *tmr;
1097 
1098 	if (list_empty(&tsk->signal->posix_timers))
1099 		return;
1100 
1101 	/* Protect against concurrent read via /proc/$PID/timers */
1102 	spin_lock_irq(&tsk->sighand->siglock);
1103 	list_replace_init(&tsk->signal->posix_timers, &timers);
1104 	spin_unlock_irq(&tsk->sighand->siglock);
1105 
1106 	/* The timers are not longer accessible via tsk::signal */
1107 	while (!list_empty(&timers)) {
1108 		tmr = list_first_entry(&timers, struct k_itimer, list);
1109 		itimer_delete(tmr);
1110 	}
1111 }
1112 
1113 SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
1114 		const struct __kernel_timespec __user *, tp)
1115 {
1116 	const struct k_clock *kc = clockid_to_kclock(which_clock);
1117 	struct timespec64 new_tp;
1118 
1119 	if (!kc || !kc->clock_set)
1120 		return -EINVAL;
1121 
1122 	if (get_timespec64(&new_tp, tp))
1123 		return -EFAULT;
1124 
1125 	/*
1126 	 * Permission checks have to be done inside the clock specific
1127 	 * setter callback.
1128 	 */
1129 	return kc->clock_set(which_clock, &new_tp);
1130 }
1131 
1132 SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
1133 		struct __kernel_timespec __user *, tp)
1134 {
1135 	const struct k_clock *kc = clockid_to_kclock(which_clock);
1136 	struct timespec64 kernel_tp;
1137 	int error;
1138 
1139 	if (!kc)
1140 		return -EINVAL;
1141 
1142 	error = kc->clock_get_timespec(which_clock, &kernel_tp);
1143 
1144 	if (!error && put_timespec64(&kernel_tp, tp))
1145 		error = -EFAULT;
1146 
1147 	return error;
1148 }
1149 
1150 int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * ktx)
1151 {
1152 	const struct k_clock *kc = clockid_to_kclock(which_clock);
1153 
1154 	if (!kc)
1155 		return -EINVAL;
1156 	if (!kc->clock_adj)
1157 		return -EOPNOTSUPP;
1158 
1159 	return kc->clock_adj(which_clock, ktx);
1160 }
1161 
1162 SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
1163 		struct __kernel_timex __user *, utx)
1164 {
1165 	struct __kernel_timex ktx;
1166 	int err;
1167 
1168 	if (copy_from_user(&ktx, utx, sizeof(ktx)))
1169 		return -EFAULT;
1170 
1171 	err = do_clock_adjtime(which_clock, &ktx);
1172 
1173 	if (err >= 0 && copy_to_user(utx, &ktx, sizeof(ktx)))
1174 		return -EFAULT;
1175 
1176 	return err;
1177 }
1178 
1179 /**
1180  * sys_clock_getres - Get the resolution of a clock
1181  * @which_clock:	The clock to get the resolution for
1182  * @tp:			Pointer to a a user space timespec64 for storage
1183  *
1184  * POSIX defines:
1185  *
1186  * "The clock_getres() function shall return the resolution of any
1187  * clock. Clock resolutions are implementation-defined and cannot be set by
1188  * a process. If the argument res is not NULL, the resolution of the
1189  * specified clock shall be stored in the location pointed to by res. If
1190  * res is NULL, the clock resolution is not returned. If the time argument
1191  * of clock_settime() is not a multiple of res, then the value is truncated
1192  * to a multiple of res."
1193  *
1194  * Due to the various hardware constraints the real resolution can vary
1195  * wildly and even change during runtime when the underlying devices are
1196  * replaced. The kernel also can use hardware devices with different
1197  * resolutions for reading the time and for arming timers.
1198  *
1199  * The kernel therefore deviates from the POSIX spec in various aspects:
1200  *
1201  * 1) The resolution returned to user space
1202  *
1203  *    For CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME, CLOCK_TAI,
1204  *    CLOCK_REALTIME_ALARM, CLOCK_BOOTTIME_ALAREM and CLOCK_MONOTONIC_RAW
1205  *    the kernel differentiates only two cases:
1206  *
1207  *    I)  Low resolution mode:
1208  *
1209  *	  When high resolution timers are disabled at compile or runtime
1210  *	  the resolution returned is nanoseconds per tick, which represents
1211  *	  the precision at which timers expire.
1212  *
1213  *    II) High resolution mode:
1214  *
1215  *	  When high resolution timers are enabled the resolution returned
1216  *	  is always one nanosecond independent of the actual resolution of
1217  *	  the underlying hardware devices.
1218  *
1219  *	  For CLOCK_*_ALARM the actual resolution depends on system
1220  *	  state. When system is running the resolution is the same as the
1221  *	  resolution of the other clocks. During suspend the actual
1222  *	  resolution is the resolution of the underlying RTC device which
1223  *	  might be way less precise than the clockevent device used during
1224  *	  running state.
1225  *
1226  *   For CLOCK_REALTIME_COARSE and CLOCK_MONOTONIC_COARSE the resolution
1227  *   returned is always nanoseconds per tick.
1228  *
1229  *   For CLOCK_PROCESS_CPUTIME and CLOCK_THREAD_CPUTIME the resolution
1230  *   returned is always one nanosecond under the assumption that the
1231  *   underlying scheduler clock has a better resolution than nanoseconds
1232  *   per tick.
1233  *
1234  *   For dynamic POSIX clocks (PTP devices) the resolution returned is
1235  *   always one nanosecond.
1236  *
1237  * 2) Affect on sys_clock_settime()
1238  *
1239  *    The kernel does not truncate the time which is handed in to
1240  *    sys_clock_settime(). The kernel internal timekeeping is always using
1241  *    nanoseconds precision independent of the clocksource device which is
1242  *    used to read the time from. The resolution of that device only
1243  *    affects the presicion of the time returned by sys_clock_gettime().
1244  *
1245  * Returns:
1246  *	0		Success. @tp contains the resolution
1247  *	-EINVAL		@which_clock is not a valid clock ID
1248  *	-EFAULT		Copying the resolution to @tp faulted
1249  *	-ENODEV		Dynamic POSIX clock is not backed by a device
1250  *	-EOPNOTSUPP	Dynamic POSIX clock does not support getres()
1251  */
1252 SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
1253 		struct __kernel_timespec __user *, tp)
1254 {
1255 	const struct k_clock *kc = clockid_to_kclock(which_clock);
1256 	struct timespec64 rtn_tp;
1257 	int error;
1258 
1259 	if (!kc)
1260 		return -EINVAL;
1261 
1262 	error = kc->clock_getres(which_clock, &rtn_tp);
1263 
1264 	if (!error && tp && put_timespec64(&rtn_tp, tp))
1265 		error = -EFAULT;
1266 
1267 	return error;
1268 }
1269 
1270 #ifdef CONFIG_COMPAT_32BIT_TIME
1271 
1272 SYSCALL_DEFINE2(clock_settime32, clockid_t, which_clock,
1273 		struct old_timespec32 __user *, tp)
1274 {
1275 	const struct k_clock *kc = clockid_to_kclock(which_clock);
1276 	struct timespec64 ts;
1277 
1278 	if (!kc || !kc->clock_set)
1279 		return -EINVAL;
1280 
1281 	if (get_old_timespec32(&ts, tp))
1282 		return -EFAULT;
1283 
1284 	return kc->clock_set(which_clock, &ts);
1285 }
1286 
1287 SYSCALL_DEFINE2(clock_gettime32, clockid_t, which_clock,
1288 		struct old_timespec32 __user *, tp)
1289 {
1290 	const struct k_clock *kc = clockid_to_kclock(which_clock);
1291 	struct timespec64 ts;
1292 	int err;
1293 
1294 	if (!kc)
1295 		return -EINVAL;
1296 
1297 	err = kc->clock_get_timespec(which_clock, &ts);
1298 
1299 	if (!err && put_old_timespec32(&ts, tp))
1300 		err = -EFAULT;
1301 
1302 	return err;
1303 }
1304 
1305 SYSCALL_DEFINE2(clock_adjtime32, clockid_t, which_clock,
1306 		struct old_timex32 __user *, utp)
1307 {
1308 	struct __kernel_timex ktx;
1309 	int err;
1310 
1311 	err = get_old_timex32(&ktx, utp);
1312 	if (err)
1313 		return err;
1314 
1315 	err = do_clock_adjtime(which_clock, &ktx);
1316 
1317 	if (err >= 0 && put_old_timex32(utp, &ktx))
1318 		return -EFAULT;
1319 
1320 	return err;
1321 }
1322 
1323 SYSCALL_DEFINE2(clock_getres_time32, clockid_t, which_clock,
1324 		struct old_timespec32 __user *, tp)
1325 {
1326 	const struct k_clock *kc = clockid_to_kclock(which_clock);
1327 	struct timespec64 ts;
1328 	int err;
1329 
1330 	if (!kc)
1331 		return -EINVAL;
1332 
1333 	err = kc->clock_getres(which_clock, &ts);
1334 	if (!err && tp && put_old_timespec32(&ts, tp))
1335 		return -EFAULT;
1336 
1337 	return err;
1338 }
1339 
1340 #endif
1341 
1342 /*
1343  * sys_clock_nanosleep() for CLOCK_REALTIME and CLOCK_TAI
1344  */
1345 static int common_nsleep(const clockid_t which_clock, int flags,
1346 			 const struct timespec64 *rqtp)
1347 {
1348 	ktime_t texp = timespec64_to_ktime(*rqtp);
1349 
1350 	return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ?
1351 				 HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
1352 				 which_clock);
1353 }
1354 
1355 /*
1356  * sys_clock_nanosleep() for CLOCK_MONOTONIC and CLOCK_BOOTTIME
1357  *
1358  * Absolute nanosleeps for these clocks are time-namespace adjusted.
1359  */
1360 static int common_nsleep_timens(const clockid_t which_clock, int flags,
1361 				const struct timespec64 *rqtp)
1362 {
1363 	ktime_t texp = timespec64_to_ktime(*rqtp);
1364 
1365 	if (flags & TIMER_ABSTIME)
1366 		texp = timens_ktime_to_host(which_clock, texp);
1367 
1368 	return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ?
1369 				 HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
1370 				 which_clock);
1371 }
1372 
1373 SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
1374 		const struct __kernel_timespec __user *, rqtp,
1375 		struct __kernel_timespec __user *, rmtp)
1376 {
1377 	const struct k_clock *kc = clockid_to_kclock(which_clock);
1378 	struct timespec64 t;
1379 
1380 	if (!kc)
1381 		return -EINVAL;
1382 	if (!kc->nsleep)
1383 		return -EOPNOTSUPP;
1384 
1385 	if (get_timespec64(&t, rqtp))
1386 		return -EFAULT;
1387 
1388 	if (!timespec64_valid(&t))
1389 		return -EINVAL;
1390 	if (flags & TIMER_ABSTIME)
1391 		rmtp = NULL;
1392 	current->restart_block.fn = do_no_restart_syscall;
1393 	current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
1394 	current->restart_block.nanosleep.rmtp = rmtp;
1395 
1396 	return kc->nsleep(which_clock, flags, &t);
1397 }
1398 
1399 #ifdef CONFIG_COMPAT_32BIT_TIME
1400 
1401 SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags,
1402 		struct old_timespec32 __user *, rqtp,
1403 		struct old_timespec32 __user *, rmtp)
1404 {
1405 	const struct k_clock *kc = clockid_to_kclock(which_clock);
1406 	struct timespec64 t;
1407 
1408 	if (!kc)
1409 		return -EINVAL;
1410 	if (!kc->nsleep)
1411 		return -EOPNOTSUPP;
1412 
1413 	if (get_old_timespec32(&t, rqtp))
1414 		return -EFAULT;
1415 
1416 	if (!timespec64_valid(&t))
1417 		return -EINVAL;
1418 	if (flags & TIMER_ABSTIME)
1419 		rmtp = NULL;
1420 	current->restart_block.fn = do_no_restart_syscall;
1421 	current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
1422 	current->restart_block.nanosleep.compat_rmtp = rmtp;
1423 
1424 	return kc->nsleep(which_clock, flags, &t);
1425 }
1426 
1427 #endif
1428 
1429 static const struct k_clock clock_realtime = {
1430 	.clock_getres		= posix_get_hrtimer_res,
1431 	.clock_get_timespec	= posix_get_realtime_timespec,
1432 	.clock_get_ktime	= posix_get_realtime_ktime,
1433 	.clock_set		= posix_clock_realtime_set,
1434 	.clock_adj		= posix_clock_realtime_adj,
1435 	.nsleep			= common_nsleep,
1436 	.timer_create		= common_timer_create,
1437 	.timer_set		= common_timer_set,
1438 	.timer_get		= common_timer_get,
1439 	.timer_del		= common_timer_del,
1440 	.timer_rearm		= common_hrtimer_rearm,
1441 	.timer_forward		= common_hrtimer_forward,
1442 	.timer_remaining	= common_hrtimer_remaining,
1443 	.timer_try_to_cancel	= common_hrtimer_try_to_cancel,
1444 	.timer_wait_running	= common_timer_wait_running,
1445 	.timer_arm		= common_hrtimer_arm,
1446 };
1447 
1448 static const struct k_clock clock_monotonic = {
1449 	.clock_getres		= posix_get_hrtimer_res,
1450 	.clock_get_timespec	= posix_get_monotonic_timespec,
1451 	.clock_get_ktime	= posix_get_monotonic_ktime,
1452 	.nsleep			= common_nsleep_timens,
1453 	.timer_create		= common_timer_create,
1454 	.timer_set		= common_timer_set,
1455 	.timer_get		= common_timer_get,
1456 	.timer_del		= common_timer_del,
1457 	.timer_rearm		= common_hrtimer_rearm,
1458 	.timer_forward		= common_hrtimer_forward,
1459 	.timer_remaining	= common_hrtimer_remaining,
1460 	.timer_try_to_cancel	= common_hrtimer_try_to_cancel,
1461 	.timer_wait_running	= common_timer_wait_running,
1462 	.timer_arm		= common_hrtimer_arm,
1463 };
1464 
1465 static const struct k_clock clock_monotonic_raw = {
1466 	.clock_getres		= posix_get_hrtimer_res,
1467 	.clock_get_timespec	= posix_get_monotonic_raw,
1468 };
1469 
1470 static const struct k_clock clock_realtime_coarse = {
1471 	.clock_getres		= posix_get_coarse_res,
1472 	.clock_get_timespec	= posix_get_realtime_coarse,
1473 };
1474 
1475 static const struct k_clock clock_monotonic_coarse = {
1476 	.clock_getres		= posix_get_coarse_res,
1477 	.clock_get_timespec	= posix_get_monotonic_coarse,
1478 };
1479 
1480 static const struct k_clock clock_tai = {
1481 	.clock_getres		= posix_get_hrtimer_res,
1482 	.clock_get_ktime	= posix_get_tai_ktime,
1483 	.clock_get_timespec	= posix_get_tai_timespec,
1484 	.nsleep			= common_nsleep,
1485 	.timer_create		= common_timer_create,
1486 	.timer_set		= common_timer_set,
1487 	.timer_get		= common_timer_get,
1488 	.timer_del		= common_timer_del,
1489 	.timer_rearm		= common_hrtimer_rearm,
1490 	.timer_forward		= common_hrtimer_forward,
1491 	.timer_remaining	= common_hrtimer_remaining,
1492 	.timer_try_to_cancel	= common_hrtimer_try_to_cancel,
1493 	.timer_wait_running	= common_timer_wait_running,
1494 	.timer_arm		= common_hrtimer_arm,
1495 };
1496 
1497 static const struct k_clock clock_boottime = {
1498 	.clock_getres		= posix_get_hrtimer_res,
1499 	.clock_get_ktime	= posix_get_boottime_ktime,
1500 	.clock_get_timespec	= posix_get_boottime_timespec,
1501 	.nsleep			= common_nsleep_timens,
1502 	.timer_create		= common_timer_create,
1503 	.timer_set		= common_timer_set,
1504 	.timer_get		= common_timer_get,
1505 	.timer_del		= common_timer_del,
1506 	.timer_rearm		= common_hrtimer_rearm,
1507 	.timer_forward		= common_hrtimer_forward,
1508 	.timer_remaining	= common_hrtimer_remaining,
1509 	.timer_try_to_cancel	= common_hrtimer_try_to_cancel,
1510 	.timer_wait_running	= common_timer_wait_running,
1511 	.timer_arm		= common_hrtimer_arm,
1512 };
1513 
1514 static const struct k_clock * const posix_clocks[] = {
1515 	[CLOCK_REALTIME]		= &clock_realtime,
1516 	[CLOCK_MONOTONIC]		= &clock_monotonic,
1517 	[CLOCK_PROCESS_CPUTIME_ID]	= &clock_process,
1518 	[CLOCK_THREAD_CPUTIME_ID]	= &clock_thread,
1519 	[CLOCK_MONOTONIC_RAW]		= &clock_monotonic_raw,
1520 	[CLOCK_REALTIME_COARSE]		= &clock_realtime_coarse,
1521 	[CLOCK_MONOTONIC_COARSE]	= &clock_monotonic_coarse,
1522 	[CLOCK_BOOTTIME]		= &clock_boottime,
1523 	[CLOCK_REALTIME_ALARM]		= &alarm_clock,
1524 	[CLOCK_BOOTTIME_ALARM]		= &alarm_clock,
1525 	[CLOCK_TAI]			= &clock_tai,
1526 };
1527 
1528 static const struct k_clock *clockid_to_kclock(const clockid_t id)
1529 {
1530 	clockid_t idx = id;
1531 
1532 	if (id < 0) {
1533 		return (id & CLOCKFD_MASK) == CLOCKFD ?
1534 			&clock_posix_dynamic : &clock_posix_cpu;
1535 	}
1536 
1537 	if (id >= ARRAY_SIZE(posix_clocks))
1538 		return NULL;
1539 
1540 	return posix_clocks[array_index_nospec(idx, ARRAY_SIZE(posix_clocks))];
1541 }
1542