1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Implement CPU time clocks for the POSIX clock interface. 4 */ 5 6 #include <linux/sched/signal.h> 7 #include <linux/sched/cputime.h> 8 #include <linux/posix-timers.h> 9 #include <linux/errno.h> 10 #include <linux/math64.h> 11 #include <linux/uaccess.h> 12 #include <linux/kernel_stat.h> 13 #include <trace/events/timer.h> 14 #include <linux/tick.h> 15 #include <linux/workqueue.h> 16 #include <linux/compat.h> 17 #include <linux/sched/deadline.h> 18 19 #include "posix-timers.h" 20 21 static void posix_cpu_timer_rearm(struct k_itimer *timer); 22 23 void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit) 24 { 25 posix_cputimers_init(pct); 26 if (cpu_limit != RLIM_INFINITY) { 27 pct->bases[CPUCLOCK_PROF].nextevt = cpu_limit * NSEC_PER_SEC; 28 pct->timers_active = true; 29 } 30 } 31 32 /* 33 * Called after updating RLIMIT_CPU to run cpu timer and update 34 * tsk->signal->posix_cputimers.bases[clock].nextevt expiration cache if 35 * necessary. Needs siglock protection since other code may update the 36 * expiration cache as well. 37 */ 38 void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new) 39 { 40 u64 nsecs = rlim_new * NSEC_PER_SEC; 41 42 spin_lock_irq(&task->sighand->siglock); 43 set_process_cpu_timer(task, CPUCLOCK_PROF, &nsecs, NULL); 44 spin_unlock_irq(&task->sighand->siglock); 45 } 46 47 /* 48 * Functions for validating access to tasks. 49 */ 50 static struct pid *pid_for_clock(const clockid_t clock, bool gettime) 51 { 52 const bool thread = !!CPUCLOCK_PERTHREAD(clock); 53 const pid_t upid = CPUCLOCK_PID(clock); 54 struct pid *pid; 55 56 if (CPUCLOCK_WHICH(clock) >= CPUCLOCK_MAX) 57 return NULL; 58 59 /* 60 * If the encoded PID is 0, then the timer is targeted at current 61 * or the process to which current belongs. 62 */ 63 if (upid == 0) 64 return thread ? task_pid(current) : task_tgid(current); 65 66 pid = find_vpid(upid); 67 if (!pid) 68 return NULL; 69 70 if (thread) { 71 struct task_struct *tsk = pid_task(pid, PIDTYPE_PID); 72 return (tsk && same_thread_group(tsk, current)) ? pid : NULL; 73 } 74 75 /* 76 * For clock_gettime(PROCESS) allow finding the process by 77 * with the pid of the current task. The code needs the tgid 78 * of the process so that pid_task(pid, PIDTYPE_TGID) can be 79 * used to find the process. 80 */ 81 if (gettime && (pid == task_pid(current))) 82 return task_tgid(current); 83 84 /* 85 * For processes require that pid identifies a process. 86 */ 87 return pid_has_task(pid, PIDTYPE_TGID) ? pid : NULL; 88 } 89 90 static inline int validate_clock_permissions(const clockid_t clock) 91 { 92 int ret; 93 94 rcu_read_lock(); 95 ret = pid_for_clock(clock, false) ? 0 : -EINVAL; 96 rcu_read_unlock(); 97 98 return ret; 99 } 100 101 static inline enum pid_type clock_pid_type(const clockid_t clock) 102 { 103 return CPUCLOCK_PERTHREAD(clock) ? PIDTYPE_PID : PIDTYPE_TGID; 104 } 105 106 static inline struct task_struct *cpu_timer_task_rcu(struct k_itimer *timer) 107 { 108 return pid_task(timer->it.cpu.pid, clock_pid_type(timer->it_clock)); 109 } 110 111 /* 112 * Update expiry time from increment, and increase overrun count, 113 * given the current clock sample. 114 */ 115 static u64 bump_cpu_timer(struct k_itimer *timer, u64 now) 116 { 117 u64 delta, incr, expires = timer->it.cpu.node.expires; 118 int i; 119 120 if (!timer->it_interval) 121 return expires; 122 123 if (now < expires) 124 return expires; 125 126 incr = timer->it_interval; 127 delta = now + incr - expires; 128 129 /* Don't use (incr*2 < delta), incr*2 might overflow. */ 130 for (i = 0; incr < delta - incr; i++) 131 incr = incr << 1; 132 133 for (; i >= 0; incr >>= 1, i--) { 134 if (delta < incr) 135 continue; 136 137 timer->it.cpu.node.expires += incr; 138 timer->it_overrun += 1LL << i; 139 delta -= incr; 140 } 141 return timer->it.cpu.node.expires; 142 } 143 144 /* Check whether all cache entries contain U64_MAX, i.e. eternal expiry time */ 145 static inline bool expiry_cache_is_inactive(const struct posix_cputimers *pct) 146 { 147 return !(~pct->bases[CPUCLOCK_PROF].nextevt | 148 ~pct->bases[CPUCLOCK_VIRT].nextevt | 149 ~pct->bases[CPUCLOCK_SCHED].nextevt); 150 } 151 152 static int 153 posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp) 154 { 155 int error = validate_clock_permissions(which_clock); 156 157 if (!error) { 158 tp->tv_sec = 0; 159 tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ); 160 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { 161 /* 162 * If sched_clock is using a cycle counter, we 163 * don't have any idea of its true resolution 164 * exported, but it is much more than 1s/HZ. 165 */ 166 tp->tv_nsec = 1; 167 } 168 } 169 return error; 170 } 171 172 static int 173 posix_cpu_clock_set(const clockid_t clock, const struct timespec64 *tp) 174 { 175 int error = validate_clock_permissions(clock); 176 177 /* 178 * You can never reset a CPU clock, but we check for other errors 179 * in the call before failing with EPERM. 180 */ 181 return error ? : -EPERM; 182 } 183 184 /* 185 * Sample a per-thread clock for the given task. clkid is validated. 186 */ 187 static u64 cpu_clock_sample(const clockid_t clkid, struct task_struct *p) 188 { 189 u64 utime, stime; 190 191 if (clkid == CPUCLOCK_SCHED) 192 return task_sched_runtime(p); 193 194 task_cputime(p, &utime, &stime); 195 196 switch (clkid) { 197 case CPUCLOCK_PROF: 198 return utime + stime; 199 case CPUCLOCK_VIRT: 200 return utime; 201 default: 202 WARN_ON_ONCE(1); 203 } 204 return 0; 205 } 206 207 static inline void store_samples(u64 *samples, u64 stime, u64 utime, u64 rtime) 208 { 209 samples[CPUCLOCK_PROF] = stime + utime; 210 samples[CPUCLOCK_VIRT] = utime; 211 samples[CPUCLOCK_SCHED] = rtime; 212 } 213 214 static void task_sample_cputime(struct task_struct *p, u64 *samples) 215 { 216 u64 stime, utime; 217 218 task_cputime(p, &utime, &stime); 219 store_samples(samples, stime, utime, p->se.sum_exec_runtime); 220 } 221 222 static void proc_sample_cputime_atomic(struct task_cputime_atomic *at, 223 u64 *samples) 224 { 225 u64 stime, utime, rtime; 226 227 utime = atomic64_read(&at->utime); 228 stime = atomic64_read(&at->stime); 229 rtime = atomic64_read(&at->sum_exec_runtime); 230 store_samples(samples, stime, utime, rtime); 231 } 232 233 /* 234 * Set cputime to sum_cputime if sum_cputime > cputime. Use cmpxchg 235 * to avoid race conditions with concurrent updates to cputime. 236 */ 237 static inline void __update_gt_cputime(atomic64_t *cputime, u64 sum_cputime) 238 { 239 u64 curr_cputime; 240 retry: 241 curr_cputime = atomic64_read(cputime); 242 if (sum_cputime > curr_cputime) { 243 if (atomic64_cmpxchg(cputime, curr_cputime, sum_cputime) != curr_cputime) 244 goto retry; 245 } 246 } 247 248 static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, 249 struct task_cputime *sum) 250 { 251 __update_gt_cputime(&cputime_atomic->utime, sum->utime); 252 __update_gt_cputime(&cputime_atomic->stime, sum->stime); 253 __update_gt_cputime(&cputime_atomic->sum_exec_runtime, sum->sum_exec_runtime); 254 } 255 256 /** 257 * thread_group_sample_cputime - Sample cputime for a given task 258 * @tsk: Task for which cputime needs to be started 259 * @samples: Storage for time samples 260 * 261 * Called from sys_getitimer() to calculate the expiry time of an active 262 * timer. That means group cputime accounting is already active. Called 263 * with task sighand lock held. 264 * 265 * Updates @times with an uptodate sample of the thread group cputimes. 266 */ 267 void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples) 268 { 269 struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; 270 struct posix_cputimers *pct = &tsk->signal->posix_cputimers; 271 272 WARN_ON_ONCE(!pct->timers_active); 273 274 proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples); 275 } 276 277 /** 278 * thread_group_start_cputime - Start cputime and return a sample 279 * @tsk: Task for which cputime needs to be started 280 * @samples: Storage for time samples 281 * 282 * The thread group cputime accounting is avoided when there are no posix 283 * CPU timers armed. Before starting a timer it's required to check whether 284 * the time accounting is active. If not, a full update of the atomic 285 * accounting store needs to be done and the accounting enabled. 286 * 287 * Updates @times with an uptodate sample of the thread group cputimes. 288 */ 289 static void thread_group_start_cputime(struct task_struct *tsk, u64 *samples) 290 { 291 struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; 292 struct posix_cputimers *pct = &tsk->signal->posix_cputimers; 293 294 lockdep_assert_task_sighand_held(tsk); 295 296 /* Check if cputimer isn't running. This is accessed without locking. */ 297 if (!READ_ONCE(pct->timers_active)) { 298 struct task_cputime sum; 299 300 /* 301 * The POSIX timer interface allows for absolute time expiry 302 * values through the TIMER_ABSTIME flag, therefore we have 303 * to synchronize the timer to the clock every time we start it. 304 */ 305 thread_group_cputime(tsk, &sum); 306 update_gt_cputime(&cputimer->cputime_atomic, &sum); 307 308 /* 309 * We're setting timers_active without a lock. Ensure this 310 * only gets written to in one operation. We set it after 311 * update_gt_cputime() as a small optimization, but 312 * barriers are not required because update_gt_cputime() 313 * can handle concurrent updates. 314 */ 315 WRITE_ONCE(pct->timers_active, true); 316 } 317 proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples); 318 } 319 320 static void __thread_group_cputime(struct task_struct *tsk, u64 *samples) 321 { 322 struct task_cputime ct; 323 324 thread_group_cputime(tsk, &ct); 325 store_samples(samples, ct.stime, ct.utime, ct.sum_exec_runtime); 326 } 327 328 /* 329 * Sample a process (thread group) clock for the given task clkid. If the 330 * group's cputime accounting is already enabled, read the atomic 331 * store. Otherwise a full update is required. clkid is already validated. 332 */ 333 static u64 cpu_clock_sample_group(const clockid_t clkid, struct task_struct *p, 334 bool start) 335 { 336 struct thread_group_cputimer *cputimer = &p->signal->cputimer; 337 struct posix_cputimers *pct = &p->signal->posix_cputimers; 338 u64 samples[CPUCLOCK_MAX]; 339 340 if (!READ_ONCE(pct->timers_active)) { 341 if (start) 342 thread_group_start_cputime(p, samples); 343 else 344 __thread_group_cputime(p, samples); 345 } else { 346 proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples); 347 } 348 349 return samples[clkid]; 350 } 351 352 static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp) 353 { 354 const clockid_t clkid = CPUCLOCK_WHICH(clock); 355 struct task_struct *tsk; 356 u64 t; 357 358 rcu_read_lock(); 359 tsk = pid_task(pid_for_clock(clock, true), clock_pid_type(clock)); 360 if (!tsk) { 361 rcu_read_unlock(); 362 return -EINVAL; 363 } 364 365 if (CPUCLOCK_PERTHREAD(clock)) 366 t = cpu_clock_sample(clkid, tsk); 367 else 368 t = cpu_clock_sample_group(clkid, tsk, false); 369 rcu_read_unlock(); 370 371 *tp = ns_to_timespec64(t); 372 return 0; 373 } 374 375 /* 376 * Validate the clockid_t for a new CPU-clock timer, and initialize the timer. 377 * This is called from sys_timer_create() and do_cpu_nanosleep() with the 378 * new timer already all-zeros initialized. 379 */ 380 static int posix_cpu_timer_create(struct k_itimer *new_timer) 381 { 382 static struct lock_class_key posix_cpu_timers_key; 383 struct pid *pid; 384 385 rcu_read_lock(); 386 pid = pid_for_clock(new_timer->it_clock, false); 387 if (!pid) { 388 rcu_read_unlock(); 389 return -EINVAL; 390 } 391 392 /* 393 * If posix timer expiry is handled in task work context then 394 * timer::it_lock can be taken without disabling interrupts as all 395 * other locking happens in task context. This requires a separate 396 * lock class key otherwise regular posix timer expiry would record 397 * the lock class being taken in interrupt context and generate a 398 * false positive warning. 399 */ 400 if (IS_ENABLED(CONFIG_POSIX_CPU_TIMERS_TASK_WORK)) 401 lockdep_set_class(&new_timer->it_lock, &posix_cpu_timers_key); 402 403 new_timer->kclock = &clock_posix_cpu; 404 timerqueue_init(&new_timer->it.cpu.node); 405 new_timer->it.cpu.pid = get_pid(pid); 406 rcu_read_unlock(); 407 return 0; 408 } 409 410 static struct posix_cputimer_base *timer_base(struct k_itimer *timer, 411 struct task_struct *tsk) 412 { 413 int clkidx = CPUCLOCK_WHICH(timer->it_clock); 414 415 if (CPUCLOCK_PERTHREAD(timer->it_clock)) 416 return tsk->posix_cputimers.bases + clkidx; 417 else 418 return tsk->signal->posix_cputimers.bases + clkidx; 419 } 420 421 /* 422 * Force recalculating the base earliest expiration on the next tick. 423 * This will also re-evaluate the need to keep around the process wide 424 * cputime counter and tick dependency and eventually shut these down 425 * if necessary. 426 */ 427 static void trigger_base_recalc_expires(struct k_itimer *timer, 428 struct task_struct *tsk) 429 { 430 struct posix_cputimer_base *base = timer_base(timer, tsk); 431 432 base->nextevt = 0; 433 } 434 435 /* 436 * Dequeue the timer and reset the base if it was its earliest expiration. 437 * It makes sure the next tick recalculates the base next expiration so we 438 * don't keep the costly process wide cputime counter around for a random 439 * amount of time, along with the tick dependency. 440 * 441 * If another timer gets queued between this and the next tick, its 442 * expiration will update the base next event if necessary on the next 443 * tick. 444 */ 445 static void disarm_timer(struct k_itimer *timer, struct task_struct *p) 446 { 447 struct cpu_timer *ctmr = &timer->it.cpu; 448 struct posix_cputimer_base *base; 449 450 if (!cpu_timer_dequeue(ctmr)) 451 return; 452 453 base = timer_base(timer, p); 454 if (cpu_timer_getexpires(ctmr) == base->nextevt) 455 trigger_base_recalc_expires(timer, p); 456 } 457 458 459 /* 460 * Clean up a CPU-clock timer that is about to be destroyed. 461 * This is called from timer deletion with the timer already locked. 462 * If we return TIMER_RETRY, it's necessary to release the timer's lock 463 * and try again. (This happens when the timer is in the middle of firing.) 464 */ 465 static int posix_cpu_timer_del(struct k_itimer *timer) 466 { 467 struct cpu_timer *ctmr = &timer->it.cpu; 468 struct sighand_struct *sighand; 469 struct task_struct *p; 470 unsigned long flags; 471 int ret = 0; 472 473 rcu_read_lock(); 474 p = cpu_timer_task_rcu(timer); 475 if (!p) 476 goto out; 477 478 /* 479 * Protect against sighand release/switch in exit/exec and process/ 480 * thread timer list entry concurrent read/writes. 481 */ 482 sighand = lock_task_sighand(p, &flags); 483 if (unlikely(sighand == NULL)) { 484 /* 485 * This raced with the reaping of the task. The exit cleanup 486 * should have removed this timer from the timer queue. 487 */ 488 WARN_ON_ONCE(ctmr->head || timerqueue_node_queued(&ctmr->node)); 489 } else { 490 if (timer->it.cpu.firing) 491 ret = TIMER_RETRY; 492 else 493 disarm_timer(timer, p); 494 495 unlock_task_sighand(p, &flags); 496 } 497 498 out: 499 rcu_read_unlock(); 500 if (!ret) 501 put_pid(ctmr->pid); 502 503 return ret; 504 } 505 506 static void cleanup_timerqueue(struct timerqueue_head *head) 507 { 508 struct timerqueue_node *node; 509 struct cpu_timer *ctmr; 510 511 while ((node = timerqueue_getnext(head))) { 512 timerqueue_del(head, node); 513 ctmr = container_of(node, struct cpu_timer, node); 514 ctmr->head = NULL; 515 } 516 } 517 518 /* 519 * Clean out CPU timers which are still armed when a thread exits. The 520 * timers are only removed from the list. No other updates are done. The 521 * corresponding posix timers are still accessible, but cannot be rearmed. 522 * 523 * This must be called with the siglock held. 524 */ 525 static void cleanup_timers(struct posix_cputimers *pct) 526 { 527 cleanup_timerqueue(&pct->bases[CPUCLOCK_PROF].tqhead); 528 cleanup_timerqueue(&pct->bases[CPUCLOCK_VIRT].tqhead); 529 cleanup_timerqueue(&pct->bases[CPUCLOCK_SCHED].tqhead); 530 } 531 532 /* 533 * These are both called with the siglock held, when the current thread 534 * is being reaped. When the final (leader) thread in the group is reaped, 535 * posix_cpu_timers_exit_group will be called after posix_cpu_timers_exit. 536 */ 537 void posix_cpu_timers_exit(struct task_struct *tsk) 538 { 539 cleanup_timers(&tsk->posix_cputimers); 540 } 541 void posix_cpu_timers_exit_group(struct task_struct *tsk) 542 { 543 cleanup_timers(&tsk->signal->posix_cputimers); 544 } 545 546 /* 547 * Insert the timer on the appropriate list before any timers that 548 * expire later. This must be called with the sighand lock held. 549 */ 550 static void arm_timer(struct k_itimer *timer, struct task_struct *p) 551 { 552 struct posix_cputimer_base *base = timer_base(timer, p); 553 struct cpu_timer *ctmr = &timer->it.cpu; 554 u64 newexp = cpu_timer_getexpires(ctmr); 555 556 if (!cpu_timer_enqueue(&base->tqhead, ctmr)) 557 return; 558 559 /* 560 * We are the new earliest-expiring POSIX 1.b timer, hence 561 * need to update expiration cache. Take into account that 562 * for process timers we share expiration cache with itimers 563 * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME. 564 */ 565 if (newexp < base->nextevt) 566 base->nextevt = newexp; 567 568 if (CPUCLOCK_PERTHREAD(timer->it_clock)) 569 tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER); 570 else 571 tick_dep_set_signal(p, TICK_DEP_BIT_POSIX_TIMER); 572 } 573 574 /* 575 * The timer is locked, fire it and arrange for its reload. 576 */ 577 static void cpu_timer_fire(struct k_itimer *timer) 578 { 579 struct cpu_timer *ctmr = &timer->it.cpu; 580 581 if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { 582 /* 583 * User don't want any signal. 584 */ 585 cpu_timer_setexpires(ctmr, 0); 586 } else if (unlikely(timer->sigq == NULL)) { 587 /* 588 * This a special case for clock_nanosleep, 589 * not a normal timer from sys_timer_create. 590 */ 591 wake_up_process(timer->it_process); 592 cpu_timer_setexpires(ctmr, 0); 593 } else if (!timer->it_interval) { 594 /* 595 * One-shot timer. Clear it as soon as it's fired. 596 */ 597 posix_timer_event(timer, 0); 598 cpu_timer_setexpires(ctmr, 0); 599 } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) { 600 /* 601 * The signal did not get queued because the signal 602 * was ignored, so we won't get any callback to 603 * reload the timer. But we need to keep it 604 * ticking in case the signal is deliverable next time. 605 */ 606 posix_cpu_timer_rearm(timer); 607 ++timer->it_requeue_pending; 608 } 609 } 610 611 /* 612 * Guts of sys_timer_settime for CPU timers. 613 * This is called with the timer locked and interrupts disabled. 614 * If we return TIMER_RETRY, it's necessary to release the timer's lock 615 * and try again. (This happens when the timer is in the middle of firing.) 616 */ 617 static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, 618 struct itimerspec64 *new, struct itimerspec64 *old) 619 { 620 clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); 621 u64 old_expires, new_expires, old_incr, val; 622 struct cpu_timer *ctmr = &timer->it.cpu; 623 struct sighand_struct *sighand; 624 struct task_struct *p; 625 unsigned long flags; 626 int ret = 0; 627 628 rcu_read_lock(); 629 p = cpu_timer_task_rcu(timer); 630 if (!p) { 631 /* 632 * If p has just been reaped, we can no 633 * longer get any information about it at all. 634 */ 635 rcu_read_unlock(); 636 return -ESRCH; 637 } 638 639 /* 640 * Use the to_ktime conversion because that clamps the maximum 641 * value to KTIME_MAX and avoid multiplication overflows. 642 */ 643 new_expires = ktime_to_ns(timespec64_to_ktime(new->it_value)); 644 645 /* 646 * Protect against sighand release/switch in exit/exec and p->cpu_timers 647 * and p->signal->cpu_timers read/write in arm_timer() 648 */ 649 sighand = lock_task_sighand(p, &flags); 650 /* 651 * If p has just been reaped, we can no 652 * longer get any information about it at all. 653 */ 654 if (unlikely(sighand == NULL)) { 655 rcu_read_unlock(); 656 return -ESRCH; 657 } 658 659 /* 660 * Disarm any old timer after extracting its expiry time. 661 */ 662 old_incr = timer->it_interval; 663 old_expires = cpu_timer_getexpires(ctmr); 664 665 if (unlikely(timer->it.cpu.firing)) { 666 timer->it.cpu.firing = -1; 667 ret = TIMER_RETRY; 668 } else { 669 cpu_timer_dequeue(ctmr); 670 } 671 672 /* 673 * We need to sample the current value to convert the new 674 * value from to relative and absolute, and to convert the 675 * old value from absolute to relative. To set a process 676 * timer, we need a sample to balance the thread expiry 677 * times (in arm_timer). With an absolute time, we must 678 * check if it's already passed. In short, we need a sample. 679 */ 680 if (CPUCLOCK_PERTHREAD(timer->it_clock)) 681 val = cpu_clock_sample(clkid, p); 682 else 683 val = cpu_clock_sample_group(clkid, p, true); 684 685 if (old) { 686 if (old_expires == 0) { 687 old->it_value.tv_sec = 0; 688 old->it_value.tv_nsec = 0; 689 } else { 690 /* 691 * Update the timer in case it has overrun already. 692 * If it has, we'll report it as having overrun and 693 * with the next reloaded timer already ticking, 694 * though we are swallowing that pending 695 * notification here to install the new setting. 696 */ 697 u64 exp = bump_cpu_timer(timer, val); 698 699 if (val < exp) { 700 old_expires = exp - val; 701 old->it_value = ns_to_timespec64(old_expires); 702 } else { 703 old->it_value.tv_nsec = 1; 704 old->it_value.tv_sec = 0; 705 } 706 } 707 } 708 709 if (unlikely(ret)) { 710 /* 711 * We are colliding with the timer actually firing. 712 * Punt after filling in the timer's old value, and 713 * disable this firing since we are already reporting 714 * it as an overrun (thanks to bump_cpu_timer above). 715 */ 716 unlock_task_sighand(p, &flags); 717 goto out; 718 } 719 720 if (new_expires != 0 && !(timer_flags & TIMER_ABSTIME)) { 721 new_expires += val; 722 } 723 724 /* 725 * Install the new expiry time (or zero). 726 * For a timer with no notification action, we don't actually 727 * arm the timer (we'll just fake it for timer_gettime). 728 */ 729 cpu_timer_setexpires(ctmr, new_expires); 730 if (new_expires != 0 && val < new_expires) { 731 arm_timer(timer, p); 732 } 733 734 unlock_task_sighand(p, &flags); 735 /* 736 * Install the new reload setting, and 737 * set up the signal and overrun bookkeeping. 738 */ 739 timer->it_interval = timespec64_to_ktime(new->it_interval); 740 741 /* 742 * This acts as a modification timestamp for the timer, 743 * so any automatic reload attempt will punt on seeing 744 * that we have reset the timer manually. 745 */ 746 timer->it_requeue_pending = (timer->it_requeue_pending + 2) & 747 ~REQUEUE_PENDING; 748 timer->it_overrun_last = 0; 749 timer->it_overrun = -1; 750 751 if (val >= new_expires) { 752 if (new_expires != 0) { 753 /* 754 * The designated time already passed, so we notify 755 * immediately, even if the thread never runs to 756 * accumulate more time on this clock. 757 */ 758 cpu_timer_fire(timer); 759 } 760 761 /* 762 * Make sure we don't keep around the process wide cputime 763 * counter or the tick dependency if they are not necessary. 764 */ 765 sighand = lock_task_sighand(p, &flags); 766 if (!sighand) 767 goto out; 768 769 if (!cpu_timer_queued(ctmr)) 770 trigger_base_recalc_expires(timer, p); 771 772 unlock_task_sighand(p, &flags); 773 } 774 out: 775 rcu_read_unlock(); 776 if (old) 777 old->it_interval = ns_to_timespec64(old_incr); 778 779 return ret; 780 } 781 782 static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp) 783 { 784 clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); 785 struct cpu_timer *ctmr = &timer->it.cpu; 786 u64 now, expires = cpu_timer_getexpires(ctmr); 787 struct task_struct *p; 788 789 rcu_read_lock(); 790 p = cpu_timer_task_rcu(timer); 791 if (!p) 792 goto out; 793 794 /* 795 * Easy part: convert the reload time. 796 */ 797 itp->it_interval = ktime_to_timespec64(timer->it_interval); 798 799 if (!expires) 800 goto out; 801 802 /* 803 * Sample the clock to take the difference with the expiry time. 804 */ 805 if (CPUCLOCK_PERTHREAD(timer->it_clock)) 806 now = cpu_clock_sample(clkid, p); 807 else 808 now = cpu_clock_sample_group(clkid, p, false); 809 810 if (now < expires) { 811 itp->it_value = ns_to_timespec64(expires - now); 812 } else { 813 /* 814 * The timer should have expired already, but the firing 815 * hasn't taken place yet. Say it's just about to expire. 816 */ 817 itp->it_value.tv_nsec = 1; 818 itp->it_value.tv_sec = 0; 819 } 820 out: 821 rcu_read_unlock(); 822 } 823 824 #define MAX_COLLECTED 20 825 826 static u64 collect_timerqueue(struct timerqueue_head *head, 827 struct list_head *firing, u64 now) 828 { 829 struct timerqueue_node *next; 830 int i = 0; 831 832 while ((next = timerqueue_getnext(head))) { 833 struct cpu_timer *ctmr; 834 u64 expires; 835 836 ctmr = container_of(next, struct cpu_timer, node); 837 expires = cpu_timer_getexpires(ctmr); 838 /* Limit the number of timers to expire at once */ 839 if (++i == MAX_COLLECTED || now < expires) 840 return expires; 841 842 ctmr->firing = 1; 843 cpu_timer_dequeue(ctmr); 844 list_add_tail(&ctmr->elist, firing); 845 } 846 847 return U64_MAX; 848 } 849 850 static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples, 851 struct list_head *firing) 852 { 853 struct posix_cputimer_base *base = pct->bases; 854 int i; 855 856 for (i = 0; i < CPUCLOCK_MAX; i++, base++) { 857 base->nextevt = collect_timerqueue(&base->tqhead, firing, 858 samples[i]); 859 } 860 } 861 862 static inline void check_dl_overrun(struct task_struct *tsk) 863 { 864 if (tsk->dl.dl_overrun) { 865 tsk->dl.dl_overrun = 0; 866 __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); 867 } 868 } 869 870 static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard) 871 { 872 if (time < limit) 873 return false; 874 875 if (print_fatal_signals) { 876 pr_info("%s Watchdog Timeout (%s): %s[%d]\n", 877 rt ? "RT" : "CPU", hard ? "hard" : "soft", 878 current->comm, task_pid_nr(current)); 879 } 880 __group_send_sig_info(signo, SEND_SIG_PRIV, current); 881 return true; 882 } 883 884 /* 885 * Check for any per-thread CPU timers that have fired and move them off 886 * the tsk->cpu_timers[N] list onto the firing list. Here we update the 887 * tsk->it_*_expires values to reflect the remaining thread CPU timers. 888 */ 889 static void check_thread_timers(struct task_struct *tsk, 890 struct list_head *firing) 891 { 892 struct posix_cputimers *pct = &tsk->posix_cputimers; 893 u64 samples[CPUCLOCK_MAX]; 894 unsigned long soft; 895 896 if (dl_task(tsk)) 897 check_dl_overrun(tsk); 898 899 if (expiry_cache_is_inactive(pct)) 900 return; 901 902 task_sample_cputime(tsk, samples); 903 collect_posix_cputimers(pct, samples, firing); 904 905 /* 906 * Check for the special case thread timers. 907 */ 908 soft = task_rlimit(tsk, RLIMIT_RTTIME); 909 if (soft != RLIM_INFINITY) { 910 /* Task RT timeout is accounted in jiffies. RTTIME is usec */ 911 unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ); 912 unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME); 913 914 /* At the hard limit, send SIGKILL. No further action. */ 915 if (hard != RLIM_INFINITY && 916 check_rlimit(rttime, hard, SIGKILL, true, true)) 917 return; 918 919 /* At the soft limit, send a SIGXCPU every second */ 920 if (check_rlimit(rttime, soft, SIGXCPU, true, false)) { 921 soft += USEC_PER_SEC; 922 tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur = soft; 923 } 924 } 925 926 if (expiry_cache_is_inactive(pct)) 927 tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER); 928 } 929 930 static inline void stop_process_timers(struct signal_struct *sig) 931 { 932 struct posix_cputimers *pct = &sig->posix_cputimers; 933 934 /* Turn off the active flag. This is done without locking. */ 935 WRITE_ONCE(pct->timers_active, false); 936 tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER); 937 } 938 939 static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, 940 u64 *expires, u64 cur_time, int signo) 941 { 942 if (!it->expires) 943 return; 944 945 if (cur_time >= it->expires) { 946 if (it->incr) 947 it->expires += it->incr; 948 else 949 it->expires = 0; 950 951 trace_itimer_expire(signo == SIGPROF ? 952 ITIMER_PROF : ITIMER_VIRTUAL, 953 task_tgid(tsk), cur_time); 954 __group_send_sig_info(signo, SEND_SIG_PRIV, tsk); 955 } 956 957 if (it->expires && it->expires < *expires) 958 *expires = it->expires; 959 } 960 961 /* 962 * Check for any per-thread CPU timers that have fired and move them 963 * off the tsk->*_timers list onto the firing list. Per-thread timers 964 * have already been taken off. 965 */ 966 static void check_process_timers(struct task_struct *tsk, 967 struct list_head *firing) 968 { 969 struct signal_struct *const sig = tsk->signal; 970 struct posix_cputimers *pct = &sig->posix_cputimers; 971 u64 samples[CPUCLOCK_MAX]; 972 unsigned long soft; 973 974 /* 975 * If there are no active process wide timers (POSIX 1.b, itimers, 976 * RLIMIT_CPU) nothing to check. Also skip the process wide timer 977 * processing when there is already another task handling them. 978 */ 979 if (!READ_ONCE(pct->timers_active) || pct->expiry_active) 980 return; 981 982 /* 983 * Signify that a thread is checking for process timers. 984 * Write access to this field is protected by the sighand lock. 985 */ 986 pct->expiry_active = true; 987 988 /* 989 * Collect the current process totals. Group accounting is active 990 * so the sample can be taken directly. 991 */ 992 proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic, samples); 993 collect_posix_cputimers(pct, samples, firing); 994 995 /* 996 * Check for the special case process timers. 997 */ 998 check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], 999 &pct->bases[CPUCLOCK_PROF].nextevt, 1000 samples[CPUCLOCK_PROF], SIGPROF); 1001 check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], 1002 &pct->bases[CPUCLOCK_VIRT].nextevt, 1003 samples[CPUCLOCK_VIRT], SIGVTALRM); 1004 1005 soft = task_rlimit(tsk, RLIMIT_CPU); 1006 if (soft != RLIM_INFINITY) { 1007 /* RLIMIT_CPU is in seconds. Samples are nanoseconds */ 1008 unsigned long hard = task_rlimit_max(tsk, RLIMIT_CPU); 1009 u64 ptime = samples[CPUCLOCK_PROF]; 1010 u64 softns = (u64)soft * NSEC_PER_SEC; 1011 u64 hardns = (u64)hard * NSEC_PER_SEC; 1012 1013 /* At the hard limit, send SIGKILL. No further action. */ 1014 if (hard != RLIM_INFINITY && 1015 check_rlimit(ptime, hardns, SIGKILL, false, true)) 1016 return; 1017 1018 /* At the soft limit, send a SIGXCPU every second */ 1019 if (check_rlimit(ptime, softns, SIGXCPU, false, false)) { 1020 sig->rlim[RLIMIT_CPU].rlim_cur = soft + 1; 1021 softns += NSEC_PER_SEC; 1022 } 1023 1024 /* Update the expiry cache */ 1025 if (softns < pct->bases[CPUCLOCK_PROF].nextevt) 1026 pct->bases[CPUCLOCK_PROF].nextevt = softns; 1027 } 1028 1029 if (expiry_cache_is_inactive(pct)) 1030 stop_process_timers(sig); 1031 1032 pct->expiry_active = false; 1033 } 1034 1035 /* 1036 * This is called from the signal code (via posixtimer_rearm) 1037 * when the last timer signal was delivered and we have to reload the timer. 1038 */ 1039 static void posix_cpu_timer_rearm(struct k_itimer *timer) 1040 { 1041 clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); 1042 struct task_struct *p; 1043 struct sighand_struct *sighand; 1044 unsigned long flags; 1045 u64 now; 1046 1047 rcu_read_lock(); 1048 p = cpu_timer_task_rcu(timer); 1049 if (!p) 1050 goto out; 1051 1052 /* Protect timer list r/w in arm_timer() */ 1053 sighand = lock_task_sighand(p, &flags); 1054 if (unlikely(sighand == NULL)) 1055 goto out; 1056 1057 /* 1058 * Fetch the current sample and update the timer's expiry time. 1059 */ 1060 if (CPUCLOCK_PERTHREAD(timer->it_clock)) 1061 now = cpu_clock_sample(clkid, p); 1062 else 1063 now = cpu_clock_sample_group(clkid, p, true); 1064 1065 bump_cpu_timer(timer, now); 1066 1067 /* 1068 * Now re-arm for the new expiry time. 1069 */ 1070 arm_timer(timer, p); 1071 unlock_task_sighand(p, &flags); 1072 out: 1073 rcu_read_unlock(); 1074 } 1075 1076 /** 1077 * task_cputimers_expired - Check whether posix CPU timers are expired 1078 * 1079 * @samples: Array of current samples for the CPUCLOCK clocks 1080 * @pct: Pointer to a posix_cputimers container 1081 * 1082 * Returns true if any member of @samples is greater than the corresponding 1083 * member of @pct->bases[CLK].nextevt. False otherwise 1084 */ 1085 static inline bool 1086 task_cputimers_expired(const u64 *samples, struct posix_cputimers *pct) 1087 { 1088 int i; 1089 1090 for (i = 0; i < CPUCLOCK_MAX; i++) { 1091 if (samples[i] >= pct->bases[i].nextevt) 1092 return true; 1093 } 1094 return false; 1095 } 1096 1097 /** 1098 * fastpath_timer_check - POSIX CPU timers fast path. 1099 * 1100 * @tsk: The task (thread) being checked. 1101 * 1102 * Check the task and thread group timers. If both are zero (there are no 1103 * timers set) return false. Otherwise snapshot the task and thread group 1104 * timers and compare them with the corresponding expiration times. Return 1105 * true if a timer has expired, else return false. 1106 */ 1107 static inline bool fastpath_timer_check(struct task_struct *tsk) 1108 { 1109 struct posix_cputimers *pct = &tsk->posix_cputimers; 1110 struct signal_struct *sig; 1111 1112 if (!expiry_cache_is_inactive(pct)) { 1113 u64 samples[CPUCLOCK_MAX]; 1114 1115 task_sample_cputime(tsk, samples); 1116 if (task_cputimers_expired(samples, pct)) 1117 return true; 1118 } 1119 1120 sig = tsk->signal; 1121 pct = &sig->posix_cputimers; 1122 /* 1123 * Check if thread group timers expired when timers are active and 1124 * no other thread in the group is already handling expiry for 1125 * thread group cputimers. These fields are read without the 1126 * sighand lock. However, this is fine because this is meant to be 1127 * a fastpath heuristic to determine whether we should try to 1128 * acquire the sighand lock to handle timer expiry. 1129 * 1130 * In the worst case scenario, if concurrently timers_active is set 1131 * or expiry_active is cleared, but the current thread doesn't see 1132 * the change yet, the timer checks are delayed until the next 1133 * thread in the group gets a scheduler interrupt to handle the 1134 * timer. This isn't an issue in practice because these types of 1135 * delays with signals actually getting sent are expected. 1136 */ 1137 if (READ_ONCE(pct->timers_active) && !READ_ONCE(pct->expiry_active)) { 1138 u64 samples[CPUCLOCK_MAX]; 1139 1140 proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic, 1141 samples); 1142 1143 if (task_cputimers_expired(samples, pct)) 1144 return true; 1145 } 1146 1147 if (dl_task(tsk) && tsk->dl.dl_overrun) 1148 return true; 1149 1150 return false; 1151 } 1152 1153 static void handle_posix_cpu_timers(struct task_struct *tsk); 1154 1155 #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK 1156 static void posix_cpu_timers_work(struct callback_head *work) 1157 { 1158 handle_posix_cpu_timers(current); 1159 } 1160 1161 /* 1162 * Clear existing posix CPU timers task work. 1163 */ 1164 void clear_posix_cputimers_work(struct task_struct *p) 1165 { 1166 /* 1167 * A copied work entry from the old task is not meaningful, clear it. 1168 * N.B. init_task_work will not do this. 1169 */ 1170 memset(&p->posix_cputimers_work.work, 0, 1171 sizeof(p->posix_cputimers_work.work)); 1172 init_task_work(&p->posix_cputimers_work.work, 1173 posix_cpu_timers_work); 1174 p->posix_cputimers_work.scheduled = false; 1175 } 1176 1177 /* 1178 * Initialize posix CPU timers task work in init task. Out of line to 1179 * keep the callback static and to avoid header recursion hell. 1180 */ 1181 void __init posix_cputimers_init_work(void) 1182 { 1183 clear_posix_cputimers_work(current); 1184 } 1185 1186 /* 1187 * Note: All operations on tsk->posix_cputimer_work.scheduled happen either 1188 * in hard interrupt context or in task context with interrupts 1189 * disabled. Aside of that the writer/reader interaction is always in the 1190 * context of the current task, which means they are strict per CPU. 1191 */ 1192 static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk) 1193 { 1194 return tsk->posix_cputimers_work.scheduled; 1195 } 1196 1197 static inline void __run_posix_cpu_timers(struct task_struct *tsk) 1198 { 1199 if (WARN_ON_ONCE(tsk->posix_cputimers_work.scheduled)) 1200 return; 1201 1202 /* Schedule task work to actually expire the timers */ 1203 tsk->posix_cputimers_work.scheduled = true; 1204 task_work_add(tsk, &tsk->posix_cputimers_work.work, TWA_RESUME); 1205 } 1206 1207 static inline bool posix_cpu_timers_enable_work(struct task_struct *tsk, 1208 unsigned long start) 1209 { 1210 bool ret = true; 1211 1212 /* 1213 * On !RT kernels interrupts are disabled while collecting expired 1214 * timers, so no tick can happen and the fast path check can be 1215 * reenabled without further checks. 1216 */ 1217 if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { 1218 tsk->posix_cputimers_work.scheduled = false; 1219 return true; 1220 } 1221 1222 /* 1223 * On RT enabled kernels ticks can happen while the expired timers 1224 * are collected under sighand lock. But any tick which observes 1225 * the CPUTIMERS_WORK_SCHEDULED bit set, does not run the fastpath 1226 * checks. So reenabling the tick work has do be done carefully: 1227 * 1228 * Disable interrupts and run the fast path check if jiffies have 1229 * advanced since the collecting of expired timers started. If 1230 * jiffies have not advanced or the fast path check did not find 1231 * newly expired timers, reenable the fast path check in the timer 1232 * interrupt. If there are newly expired timers, return false and 1233 * let the collection loop repeat. 1234 */ 1235 local_irq_disable(); 1236 if (start != jiffies && fastpath_timer_check(tsk)) 1237 ret = false; 1238 else 1239 tsk->posix_cputimers_work.scheduled = false; 1240 local_irq_enable(); 1241 1242 return ret; 1243 } 1244 #else /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */ 1245 static inline void __run_posix_cpu_timers(struct task_struct *tsk) 1246 { 1247 lockdep_posixtimer_enter(); 1248 handle_posix_cpu_timers(tsk); 1249 lockdep_posixtimer_exit(); 1250 } 1251 1252 static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk) 1253 { 1254 return false; 1255 } 1256 1257 static inline bool posix_cpu_timers_enable_work(struct task_struct *tsk, 1258 unsigned long start) 1259 { 1260 return true; 1261 } 1262 #endif /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */ 1263 1264 static void handle_posix_cpu_timers(struct task_struct *tsk) 1265 { 1266 struct k_itimer *timer, *next; 1267 unsigned long flags, start; 1268 LIST_HEAD(firing); 1269 1270 if (!lock_task_sighand(tsk, &flags)) 1271 return; 1272 1273 do { 1274 /* 1275 * On RT locking sighand lock does not disable interrupts, 1276 * so this needs to be careful vs. ticks. Store the current 1277 * jiffies value. 1278 */ 1279 start = READ_ONCE(jiffies); 1280 barrier(); 1281 1282 /* 1283 * Here we take off tsk->signal->cpu_timers[N] and 1284 * tsk->cpu_timers[N] all the timers that are firing, and 1285 * put them on the firing list. 1286 */ 1287 check_thread_timers(tsk, &firing); 1288 1289 check_process_timers(tsk, &firing); 1290 1291 /* 1292 * The above timer checks have updated the expiry cache and 1293 * because nothing can have queued or modified timers after 1294 * sighand lock was taken above it is guaranteed to be 1295 * consistent. So the next timer interrupt fastpath check 1296 * will find valid data. 1297 * 1298 * If timer expiry runs in the timer interrupt context then 1299 * the loop is not relevant as timers will be directly 1300 * expired in interrupt context. The stub function below 1301 * returns always true which allows the compiler to 1302 * optimize the loop out. 1303 * 1304 * If timer expiry is deferred to task work context then 1305 * the following rules apply: 1306 * 1307 * - On !RT kernels no tick can have happened on this CPU 1308 * after sighand lock was acquired because interrupts are 1309 * disabled. So reenabling task work before dropping 1310 * sighand lock and reenabling interrupts is race free. 1311 * 1312 * - On RT kernels ticks might have happened but the tick 1313 * work ignored posix CPU timer handling because the 1314 * CPUTIMERS_WORK_SCHEDULED bit is set. Reenabling work 1315 * must be done very carefully including a check whether 1316 * ticks have happened since the start of the timer 1317 * expiry checks. posix_cpu_timers_enable_work() takes 1318 * care of that and eventually lets the expiry checks 1319 * run again. 1320 */ 1321 } while (!posix_cpu_timers_enable_work(tsk, start)); 1322 1323 /* 1324 * We must release sighand lock before taking any timer's lock. 1325 * There is a potential race with timer deletion here, as the 1326 * siglock now protects our private firing list. We have set 1327 * the firing flag in each timer, so that a deletion attempt 1328 * that gets the timer lock before we do will give it up and 1329 * spin until we've taken care of that timer below. 1330 */ 1331 unlock_task_sighand(tsk, &flags); 1332 1333 /* 1334 * Now that all the timers on our list have the firing flag, 1335 * no one will touch their list entries but us. We'll take 1336 * each timer's lock before clearing its firing flag, so no 1337 * timer call will interfere. 1338 */ 1339 list_for_each_entry_safe(timer, next, &firing, it.cpu.elist) { 1340 int cpu_firing; 1341 1342 /* 1343 * spin_lock() is sufficient here even independent of the 1344 * expiry context. If expiry happens in hard interrupt 1345 * context it's obvious. For task work context it's safe 1346 * because all other operations on timer::it_lock happen in 1347 * task context (syscall or exit). 1348 */ 1349 spin_lock(&timer->it_lock); 1350 list_del_init(&timer->it.cpu.elist); 1351 cpu_firing = timer->it.cpu.firing; 1352 timer->it.cpu.firing = 0; 1353 /* 1354 * The firing flag is -1 if we collided with a reset 1355 * of the timer, which already reported this 1356 * almost-firing as an overrun. So don't generate an event. 1357 */ 1358 if (likely(cpu_firing >= 0)) 1359 cpu_timer_fire(timer); 1360 spin_unlock(&timer->it_lock); 1361 } 1362 } 1363 1364 /* 1365 * This is called from the timer interrupt handler. The irq handler has 1366 * already updated our counts. We need to check if any timers fire now. 1367 * Interrupts are disabled. 1368 */ 1369 void run_posix_cpu_timers(void) 1370 { 1371 struct task_struct *tsk = current; 1372 1373 lockdep_assert_irqs_disabled(); 1374 1375 /* 1376 * If the actual expiry is deferred to task work context and the 1377 * work is already scheduled there is no point to do anything here. 1378 */ 1379 if (posix_cpu_timers_work_scheduled(tsk)) 1380 return; 1381 1382 /* 1383 * The fast path checks that there are no expired thread or thread 1384 * group timers. If that's so, just return. 1385 */ 1386 if (!fastpath_timer_check(tsk)) 1387 return; 1388 1389 __run_posix_cpu_timers(tsk); 1390 } 1391 1392 /* 1393 * Set one of the process-wide special case CPU timers or RLIMIT_CPU. 1394 * The tsk->sighand->siglock must be held by the caller. 1395 */ 1396 void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid, 1397 u64 *newval, u64 *oldval) 1398 { 1399 u64 now, *nextevt; 1400 1401 if (WARN_ON_ONCE(clkid >= CPUCLOCK_SCHED)) 1402 return; 1403 1404 nextevt = &tsk->signal->posix_cputimers.bases[clkid].nextevt; 1405 now = cpu_clock_sample_group(clkid, tsk, true); 1406 1407 if (oldval) { 1408 /* 1409 * We are setting itimer. The *oldval is absolute and we update 1410 * it to be relative, *newval argument is relative and we update 1411 * it to be absolute. 1412 */ 1413 if (*oldval) { 1414 if (*oldval <= now) { 1415 /* Just about to fire. */ 1416 *oldval = TICK_NSEC; 1417 } else { 1418 *oldval -= now; 1419 } 1420 } 1421 1422 if (*newval) 1423 *newval += now; 1424 } 1425 1426 /* 1427 * Update expiration cache if this is the earliest timer. CPUCLOCK_PROF 1428 * expiry cache is also used by RLIMIT_CPU!. 1429 */ 1430 if (*newval < *nextevt) 1431 *nextevt = *newval; 1432 1433 tick_dep_set_signal(tsk, TICK_DEP_BIT_POSIX_TIMER); 1434 } 1435 1436 static int do_cpu_nanosleep(const clockid_t which_clock, int flags, 1437 const struct timespec64 *rqtp) 1438 { 1439 struct itimerspec64 it; 1440 struct k_itimer timer; 1441 u64 expires; 1442 int error; 1443 1444 /* 1445 * Set up a temporary timer and then wait for it to go off. 1446 */ 1447 memset(&timer, 0, sizeof timer); 1448 spin_lock_init(&timer.it_lock); 1449 timer.it_clock = which_clock; 1450 timer.it_overrun = -1; 1451 error = posix_cpu_timer_create(&timer); 1452 timer.it_process = current; 1453 1454 if (!error) { 1455 static struct itimerspec64 zero_it; 1456 struct restart_block *restart; 1457 1458 memset(&it, 0, sizeof(it)); 1459 it.it_value = *rqtp; 1460 1461 spin_lock_irq(&timer.it_lock); 1462 error = posix_cpu_timer_set(&timer, flags, &it, NULL); 1463 if (error) { 1464 spin_unlock_irq(&timer.it_lock); 1465 return error; 1466 } 1467 1468 while (!signal_pending(current)) { 1469 if (!cpu_timer_getexpires(&timer.it.cpu)) { 1470 /* 1471 * Our timer fired and was reset, below 1472 * deletion can not fail. 1473 */ 1474 posix_cpu_timer_del(&timer); 1475 spin_unlock_irq(&timer.it_lock); 1476 return 0; 1477 } 1478 1479 /* 1480 * Block until cpu_timer_fire (or a signal) wakes us. 1481 */ 1482 __set_current_state(TASK_INTERRUPTIBLE); 1483 spin_unlock_irq(&timer.it_lock); 1484 schedule(); 1485 spin_lock_irq(&timer.it_lock); 1486 } 1487 1488 /* 1489 * We were interrupted by a signal. 1490 */ 1491 expires = cpu_timer_getexpires(&timer.it.cpu); 1492 error = posix_cpu_timer_set(&timer, 0, &zero_it, &it); 1493 if (!error) { 1494 /* 1495 * Timer is now unarmed, deletion can not fail. 1496 */ 1497 posix_cpu_timer_del(&timer); 1498 } 1499 spin_unlock_irq(&timer.it_lock); 1500 1501 while (error == TIMER_RETRY) { 1502 /* 1503 * We need to handle case when timer was or is in the 1504 * middle of firing. In other cases we already freed 1505 * resources. 1506 */ 1507 spin_lock_irq(&timer.it_lock); 1508 error = posix_cpu_timer_del(&timer); 1509 spin_unlock_irq(&timer.it_lock); 1510 } 1511 1512 if ((it.it_value.tv_sec | it.it_value.tv_nsec) == 0) { 1513 /* 1514 * It actually did fire already. 1515 */ 1516 return 0; 1517 } 1518 1519 error = -ERESTART_RESTARTBLOCK; 1520 /* 1521 * Report back to the user the time still remaining. 1522 */ 1523 restart = ¤t->restart_block; 1524 restart->nanosleep.expires = expires; 1525 if (restart->nanosleep.type != TT_NONE) 1526 error = nanosleep_copyout(restart, &it.it_value); 1527 } 1528 1529 return error; 1530 } 1531 1532 static long posix_cpu_nsleep_restart(struct restart_block *restart_block); 1533 1534 static int posix_cpu_nsleep(const clockid_t which_clock, int flags, 1535 const struct timespec64 *rqtp) 1536 { 1537 struct restart_block *restart_block = ¤t->restart_block; 1538 int error; 1539 1540 /* 1541 * Diagnose required errors first. 1542 */ 1543 if (CPUCLOCK_PERTHREAD(which_clock) && 1544 (CPUCLOCK_PID(which_clock) == 0 || 1545 CPUCLOCK_PID(which_clock) == task_pid_vnr(current))) 1546 return -EINVAL; 1547 1548 error = do_cpu_nanosleep(which_clock, flags, rqtp); 1549 1550 if (error == -ERESTART_RESTARTBLOCK) { 1551 1552 if (flags & TIMER_ABSTIME) 1553 return -ERESTARTNOHAND; 1554 1555 restart_block->nanosleep.clockid = which_clock; 1556 set_restart_fn(restart_block, posix_cpu_nsleep_restart); 1557 } 1558 return error; 1559 } 1560 1561 static long posix_cpu_nsleep_restart(struct restart_block *restart_block) 1562 { 1563 clockid_t which_clock = restart_block->nanosleep.clockid; 1564 struct timespec64 t; 1565 1566 t = ns_to_timespec64(restart_block->nanosleep.expires); 1567 1568 return do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t); 1569 } 1570 1571 #define PROCESS_CLOCK make_process_cpuclock(0, CPUCLOCK_SCHED) 1572 #define THREAD_CLOCK make_thread_cpuclock(0, CPUCLOCK_SCHED) 1573 1574 static int process_cpu_clock_getres(const clockid_t which_clock, 1575 struct timespec64 *tp) 1576 { 1577 return posix_cpu_clock_getres(PROCESS_CLOCK, tp); 1578 } 1579 static int process_cpu_clock_get(const clockid_t which_clock, 1580 struct timespec64 *tp) 1581 { 1582 return posix_cpu_clock_get(PROCESS_CLOCK, tp); 1583 } 1584 static int process_cpu_timer_create(struct k_itimer *timer) 1585 { 1586 timer->it_clock = PROCESS_CLOCK; 1587 return posix_cpu_timer_create(timer); 1588 } 1589 static int process_cpu_nsleep(const clockid_t which_clock, int flags, 1590 const struct timespec64 *rqtp) 1591 { 1592 return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp); 1593 } 1594 static int thread_cpu_clock_getres(const clockid_t which_clock, 1595 struct timespec64 *tp) 1596 { 1597 return posix_cpu_clock_getres(THREAD_CLOCK, tp); 1598 } 1599 static int thread_cpu_clock_get(const clockid_t which_clock, 1600 struct timespec64 *tp) 1601 { 1602 return posix_cpu_clock_get(THREAD_CLOCK, tp); 1603 } 1604 static int thread_cpu_timer_create(struct k_itimer *timer) 1605 { 1606 timer->it_clock = THREAD_CLOCK; 1607 return posix_cpu_timer_create(timer); 1608 } 1609 1610 const struct k_clock clock_posix_cpu = { 1611 .clock_getres = posix_cpu_clock_getres, 1612 .clock_set = posix_cpu_clock_set, 1613 .clock_get_timespec = posix_cpu_clock_get, 1614 .timer_create = posix_cpu_timer_create, 1615 .nsleep = posix_cpu_nsleep, 1616 .timer_set = posix_cpu_timer_set, 1617 .timer_del = posix_cpu_timer_del, 1618 .timer_get = posix_cpu_timer_get, 1619 .timer_rearm = posix_cpu_timer_rearm, 1620 }; 1621 1622 const struct k_clock clock_process = { 1623 .clock_getres = process_cpu_clock_getres, 1624 .clock_get_timespec = process_cpu_clock_get, 1625 .timer_create = process_cpu_timer_create, 1626 .nsleep = process_cpu_nsleep, 1627 }; 1628 1629 const struct k_clock clock_thread = { 1630 .clock_getres = thread_cpu_clock_getres, 1631 .clock_get_timespec = thread_cpu_clock_get, 1632 .timer_create = thread_cpu_timer_create, 1633 }; 1634