1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Implement CPU time clocks for the POSIX clock interface. 4 */ 5 6 #include <linux/sched/signal.h> 7 #include <linux/sched/cputime.h> 8 #include <linux/posix-timers.h> 9 #include <linux/errno.h> 10 #include <linux/math64.h> 11 #include <linux/uaccess.h> 12 #include <linux/kernel_stat.h> 13 #include <trace/events/timer.h> 14 #include <linux/tick.h> 15 #include <linux/workqueue.h> 16 #include <linux/compat.h> 17 #include <linux/sched/deadline.h> 18 19 #include "posix-timers.h" 20 21 static void posix_cpu_timer_rearm(struct k_itimer *timer); 22 23 void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit) 24 { 25 posix_cputimers_init(pct); 26 if (cpu_limit != RLIM_INFINITY) { 27 pct->bases[CPUCLOCK_PROF].nextevt = cpu_limit * NSEC_PER_SEC; 28 pct->timers_active = true; 29 } 30 } 31 32 /* 33 * Called after updating RLIMIT_CPU to run cpu timer and update 34 * tsk->signal->posix_cputimers.bases[clock].nextevt expiration cache if 35 * necessary. Needs siglock protection since other code may update the 36 * expiration cache as well. 37 * 38 * Returns 0 on success, -ESRCH on failure. Can fail if the task is exiting and 39 * we cannot lock_task_sighand. Cannot fail if task is current. 40 */ 41 int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new) 42 { 43 u64 nsecs = rlim_new * NSEC_PER_SEC; 44 unsigned long irq_fl; 45 46 if (!lock_task_sighand(task, &irq_fl)) 47 return -ESRCH; 48 set_process_cpu_timer(task, CPUCLOCK_PROF, &nsecs, NULL); 49 unlock_task_sighand(task, &irq_fl); 50 return 0; 51 } 52 53 /* 54 * Functions for validating access to tasks. 55 */ 56 static struct pid *pid_for_clock(const clockid_t clock, bool gettime) 57 { 58 const bool thread = !!CPUCLOCK_PERTHREAD(clock); 59 const pid_t upid = CPUCLOCK_PID(clock); 60 struct pid *pid; 61 62 if (CPUCLOCK_WHICH(clock) >= CPUCLOCK_MAX) 63 return NULL; 64 65 /* 66 * If the encoded PID is 0, then the timer is targeted at current 67 * or the process to which current belongs. 68 */ 69 if (upid == 0) 70 return thread ? task_pid(current) : task_tgid(current); 71 72 pid = find_vpid(upid); 73 if (!pid) 74 return NULL; 75 76 if (thread) { 77 struct task_struct *tsk = pid_task(pid, PIDTYPE_PID); 78 return (tsk && same_thread_group(tsk, current)) ? pid : NULL; 79 } 80 81 /* 82 * For clock_gettime(PROCESS) allow finding the process by 83 * with the pid of the current task. The code needs the tgid 84 * of the process so that pid_task(pid, PIDTYPE_TGID) can be 85 * used to find the process. 86 */ 87 if (gettime && (pid == task_pid(current))) 88 return task_tgid(current); 89 90 /* 91 * For processes require that pid identifies a process. 92 */ 93 return pid_has_task(pid, PIDTYPE_TGID) ? pid : NULL; 94 } 95 96 static inline int validate_clock_permissions(const clockid_t clock) 97 { 98 int ret; 99 100 rcu_read_lock(); 101 ret = pid_for_clock(clock, false) ? 0 : -EINVAL; 102 rcu_read_unlock(); 103 104 return ret; 105 } 106 107 static inline enum pid_type clock_pid_type(const clockid_t clock) 108 { 109 return CPUCLOCK_PERTHREAD(clock) ? PIDTYPE_PID : PIDTYPE_TGID; 110 } 111 112 static inline struct task_struct *cpu_timer_task_rcu(struct k_itimer *timer) 113 { 114 return pid_task(timer->it.cpu.pid, clock_pid_type(timer->it_clock)); 115 } 116 117 /* 118 * Update expiry time from increment, and increase overrun count, 119 * given the current clock sample. 120 */ 121 static u64 bump_cpu_timer(struct k_itimer *timer, u64 now) 122 { 123 u64 delta, incr, expires = timer->it.cpu.node.expires; 124 int i; 125 126 if (!timer->it_interval) 127 return expires; 128 129 if (now < expires) 130 return expires; 131 132 incr = timer->it_interval; 133 delta = now + incr - expires; 134 135 /* Don't use (incr*2 < delta), incr*2 might overflow. */ 136 for (i = 0; incr < delta - incr; i++) 137 incr = incr << 1; 138 139 for (; i >= 0; incr >>= 1, i--) { 140 if (delta < incr) 141 continue; 142 143 timer->it.cpu.node.expires += incr; 144 timer->it_overrun += 1LL << i; 145 delta -= incr; 146 } 147 return timer->it.cpu.node.expires; 148 } 149 150 /* Check whether all cache entries contain U64_MAX, i.e. eternal expiry time */ 151 static inline bool expiry_cache_is_inactive(const struct posix_cputimers *pct) 152 { 153 return !(~pct->bases[CPUCLOCK_PROF].nextevt | 154 ~pct->bases[CPUCLOCK_VIRT].nextevt | 155 ~pct->bases[CPUCLOCK_SCHED].nextevt); 156 } 157 158 static int 159 posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp) 160 { 161 int error = validate_clock_permissions(which_clock); 162 163 if (!error) { 164 tp->tv_sec = 0; 165 tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ); 166 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { 167 /* 168 * If sched_clock is using a cycle counter, we 169 * don't have any idea of its true resolution 170 * exported, but it is much more than 1s/HZ. 171 */ 172 tp->tv_nsec = 1; 173 } 174 } 175 return error; 176 } 177 178 static int 179 posix_cpu_clock_set(const clockid_t clock, const struct timespec64 *tp) 180 { 181 int error = validate_clock_permissions(clock); 182 183 /* 184 * You can never reset a CPU clock, but we check for other errors 185 * in the call before failing with EPERM. 186 */ 187 return error ? : -EPERM; 188 } 189 190 /* 191 * Sample a per-thread clock for the given task. clkid is validated. 192 */ 193 static u64 cpu_clock_sample(const clockid_t clkid, struct task_struct *p) 194 { 195 u64 utime, stime; 196 197 if (clkid == CPUCLOCK_SCHED) 198 return task_sched_runtime(p); 199 200 task_cputime(p, &utime, &stime); 201 202 switch (clkid) { 203 case CPUCLOCK_PROF: 204 return utime + stime; 205 case CPUCLOCK_VIRT: 206 return utime; 207 default: 208 WARN_ON_ONCE(1); 209 } 210 return 0; 211 } 212 213 static inline void store_samples(u64 *samples, u64 stime, u64 utime, u64 rtime) 214 { 215 samples[CPUCLOCK_PROF] = stime + utime; 216 samples[CPUCLOCK_VIRT] = utime; 217 samples[CPUCLOCK_SCHED] = rtime; 218 } 219 220 static void task_sample_cputime(struct task_struct *p, u64 *samples) 221 { 222 u64 stime, utime; 223 224 task_cputime(p, &utime, &stime); 225 store_samples(samples, stime, utime, p->se.sum_exec_runtime); 226 } 227 228 static void proc_sample_cputime_atomic(struct task_cputime_atomic *at, 229 u64 *samples) 230 { 231 u64 stime, utime, rtime; 232 233 utime = atomic64_read(&at->utime); 234 stime = atomic64_read(&at->stime); 235 rtime = atomic64_read(&at->sum_exec_runtime); 236 store_samples(samples, stime, utime, rtime); 237 } 238 239 /* 240 * Set cputime to sum_cputime if sum_cputime > cputime. Use cmpxchg 241 * to avoid race conditions with concurrent updates to cputime. 242 */ 243 static inline void __update_gt_cputime(atomic64_t *cputime, u64 sum_cputime) 244 { 245 u64 curr_cputime; 246 retry: 247 curr_cputime = atomic64_read(cputime); 248 if (sum_cputime > curr_cputime) { 249 if (atomic64_cmpxchg(cputime, curr_cputime, sum_cputime) != curr_cputime) 250 goto retry; 251 } 252 } 253 254 static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, 255 struct task_cputime *sum) 256 { 257 __update_gt_cputime(&cputime_atomic->utime, sum->utime); 258 __update_gt_cputime(&cputime_atomic->stime, sum->stime); 259 __update_gt_cputime(&cputime_atomic->sum_exec_runtime, sum->sum_exec_runtime); 260 } 261 262 /** 263 * thread_group_sample_cputime - Sample cputime for a given task 264 * @tsk: Task for which cputime needs to be started 265 * @samples: Storage for time samples 266 * 267 * Called from sys_getitimer() to calculate the expiry time of an active 268 * timer. That means group cputime accounting is already active. Called 269 * with task sighand lock held. 270 * 271 * Updates @times with an uptodate sample of the thread group cputimes. 272 */ 273 void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples) 274 { 275 struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; 276 struct posix_cputimers *pct = &tsk->signal->posix_cputimers; 277 278 WARN_ON_ONCE(!pct->timers_active); 279 280 proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples); 281 } 282 283 /** 284 * thread_group_start_cputime - Start cputime and return a sample 285 * @tsk: Task for which cputime needs to be started 286 * @samples: Storage for time samples 287 * 288 * The thread group cputime accounting is avoided when there are no posix 289 * CPU timers armed. Before starting a timer it's required to check whether 290 * the time accounting is active. If not, a full update of the atomic 291 * accounting store needs to be done and the accounting enabled. 292 * 293 * Updates @times with an uptodate sample of the thread group cputimes. 294 */ 295 static void thread_group_start_cputime(struct task_struct *tsk, u64 *samples) 296 { 297 struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; 298 struct posix_cputimers *pct = &tsk->signal->posix_cputimers; 299 300 lockdep_assert_task_sighand_held(tsk); 301 302 /* Check if cputimer isn't running. This is accessed without locking. */ 303 if (!READ_ONCE(pct->timers_active)) { 304 struct task_cputime sum; 305 306 /* 307 * The POSIX timer interface allows for absolute time expiry 308 * values through the TIMER_ABSTIME flag, therefore we have 309 * to synchronize the timer to the clock every time we start it. 310 */ 311 thread_group_cputime(tsk, &sum); 312 update_gt_cputime(&cputimer->cputime_atomic, &sum); 313 314 /* 315 * We're setting timers_active without a lock. Ensure this 316 * only gets written to in one operation. We set it after 317 * update_gt_cputime() as a small optimization, but 318 * barriers are not required because update_gt_cputime() 319 * can handle concurrent updates. 320 */ 321 WRITE_ONCE(pct->timers_active, true); 322 } 323 proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples); 324 } 325 326 static void __thread_group_cputime(struct task_struct *tsk, u64 *samples) 327 { 328 struct task_cputime ct; 329 330 thread_group_cputime(tsk, &ct); 331 store_samples(samples, ct.stime, ct.utime, ct.sum_exec_runtime); 332 } 333 334 /* 335 * Sample a process (thread group) clock for the given task clkid. If the 336 * group's cputime accounting is already enabled, read the atomic 337 * store. Otherwise a full update is required. clkid is already validated. 338 */ 339 static u64 cpu_clock_sample_group(const clockid_t clkid, struct task_struct *p, 340 bool start) 341 { 342 struct thread_group_cputimer *cputimer = &p->signal->cputimer; 343 struct posix_cputimers *pct = &p->signal->posix_cputimers; 344 u64 samples[CPUCLOCK_MAX]; 345 346 if (!READ_ONCE(pct->timers_active)) { 347 if (start) 348 thread_group_start_cputime(p, samples); 349 else 350 __thread_group_cputime(p, samples); 351 } else { 352 proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples); 353 } 354 355 return samples[clkid]; 356 } 357 358 static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp) 359 { 360 const clockid_t clkid = CPUCLOCK_WHICH(clock); 361 struct task_struct *tsk; 362 u64 t; 363 364 rcu_read_lock(); 365 tsk = pid_task(pid_for_clock(clock, true), clock_pid_type(clock)); 366 if (!tsk) { 367 rcu_read_unlock(); 368 return -EINVAL; 369 } 370 371 if (CPUCLOCK_PERTHREAD(clock)) 372 t = cpu_clock_sample(clkid, tsk); 373 else 374 t = cpu_clock_sample_group(clkid, tsk, false); 375 rcu_read_unlock(); 376 377 *tp = ns_to_timespec64(t); 378 return 0; 379 } 380 381 /* 382 * Validate the clockid_t for a new CPU-clock timer, and initialize the timer. 383 * This is called from sys_timer_create() and do_cpu_nanosleep() with the 384 * new timer already all-zeros initialized. 385 */ 386 static int posix_cpu_timer_create(struct k_itimer *new_timer) 387 { 388 static struct lock_class_key posix_cpu_timers_key; 389 struct pid *pid; 390 391 rcu_read_lock(); 392 pid = pid_for_clock(new_timer->it_clock, false); 393 if (!pid) { 394 rcu_read_unlock(); 395 return -EINVAL; 396 } 397 398 /* 399 * If posix timer expiry is handled in task work context then 400 * timer::it_lock can be taken without disabling interrupts as all 401 * other locking happens in task context. This requires a separate 402 * lock class key otherwise regular posix timer expiry would record 403 * the lock class being taken in interrupt context and generate a 404 * false positive warning. 405 */ 406 if (IS_ENABLED(CONFIG_POSIX_CPU_TIMERS_TASK_WORK)) 407 lockdep_set_class(&new_timer->it_lock, &posix_cpu_timers_key); 408 409 new_timer->kclock = &clock_posix_cpu; 410 timerqueue_init(&new_timer->it.cpu.node); 411 new_timer->it.cpu.pid = get_pid(pid); 412 rcu_read_unlock(); 413 return 0; 414 } 415 416 static struct posix_cputimer_base *timer_base(struct k_itimer *timer, 417 struct task_struct *tsk) 418 { 419 int clkidx = CPUCLOCK_WHICH(timer->it_clock); 420 421 if (CPUCLOCK_PERTHREAD(timer->it_clock)) 422 return tsk->posix_cputimers.bases + clkidx; 423 else 424 return tsk->signal->posix_cputimers.bases + clkidx; 425 } 426 427 /* 428 * Force recalculating the base earliest expiration on the next tick. 429 * This will also re-evaluate the need to keep around the process wide 430 * cputime counter and tick dependency and eventually shut these down 431 * if necessary. 432 */ 433 static void trigger_base_recalc_expires(struct k_itimer *timer, 434 struct task_struct *tsk) 435 { 436 struct posix_cputimer_base *base = timer_base(timer, tsk); 437 438 base->nextevt = 0; 439 } 440 441 /* 442 * Dequeue the timer and reset the base if it was its earliest expiration. 443 * It makes sure the next tick recalculates the base next expiration so we 444 * don't keep the costly process wide cputime counter around for a random 445 * amount of time, along with the tick dependency. 446 * 447 * If another timer gets queued between this and the next tick, its 448 * expiration will update the base next event if necessary on the next 449 * tick. 450 */ 451 static void disarm_timer(struct k_itimer *timer, struct task_struct *p) 452 { 453 struct cpu_timer *ctmr = &timer->it.cpu; 454 struct posix_cputimer_base *base; 455 456 if (!cpu_timer_dequeue(ctmr)) 457 return; 458 459 base = timer_base(timer, p); 460 if (cpu_timer_getexpires(ctmr) == base->nextevt) 461 trigger_base_recalc_expires(timer, p); 462 } 463 464 465 /* 466 * Clean up a CPU-clock timer that is about to be destroyed. 467 * This is called from timer deletion with the timer already locked. 468 * If we return TIMER_RETRY, it's necessary to release the timer's lock 469 * and try again. (This happens when the timer is in the middle of firing.) 470 */ 471 static int posix_cpu_timer_del(struct k_itimer *timer) 472 { 473 struct cpu_timer *ctmr = &timer->it.cpu; 474 struct sighand_struct *sighand; 475 struct task_struct *p; 476 unsigned long flags; 477 int ret = 0; 478 479 rcu_read_lock(); 480 p = cpu_timer_task_rcu(timer); 481 if (!p) 482 goto out; 483 484 /* 485 * Protect against sighand release/switch in exit/exec and process/ 486 * thread timer list entry concurrent read/writes. 487 */ 488 sighand = lock_task_sighand(p, &flags); 489 if (unlikely(sighand == NULL)) { 490 /* 491 * This raced with the reaping of the task. The exit cleanup 492 * should have removed this timer from the timer queue. 493 */ 494 WARN_ON_ONCE(ctmr->head || timerqueue_node_queued(&ctmr->node)); 495 } else { 496 if (timer->it.cpu.firing) 497 ret = TIMER_RETRY; 498 else 499 disarm_timer(timer, p); 500 501 unlock_task_sighand(p, &flags); 502 } 503 504 out: 505 rcu_read_unlock(); 506 if (!ret) 507 put_pid(ctmr->pid); 508 509 return ret; 510 } 511 512 static void cleanup_timerqueue(struct timerqueue_head *head) 513 { 514 struct timerqueue_node *node; 515 struct cpu_timer *ctmr; 516 517 while ((node = timerqueue_getnext(head))) { 518 timerqueue_del(head, node); 519 ctmr = container_of(node, struct cpu_timer, node); 520 ctmr->head = NULL; 521 } 522 } 523 524 /* 525 * Clean out CPU timers which are still armed when a thread exits. The 526 * timers are only removed from the list. No other updates are done. The 527 * corresponding posix timers are still accessible, but cannot be rearmed. 528 * 529 * This must be called with the siglock held. 530 */ 531 static void cleanup_timers(struct posix_cputimers *pct) 532 { 533 cleanup_timerqueue(&pct->bases[CPUCLOCK_PROF].tqhead); 534 cleanup_timerqueue(&pct->bases[CPUCLOCK_VIRT].tqhead); 535 cleanup_timerqueue(&pct->bases[CPUCLOCK_SCHED].tqhead); 536 } 537 538 /* 539 * These are both called with the siglock held, when the current thread 540 * is being reaped. When the final (leader) thread in the group is reaped, 541 * posix_cpu_timers_exit_group will be called after posix_cpu_timers_exit. 542 */ 543 void posix_cpu_timers_exit(struct task_struct *tsk) 544 { 545 cleanup_timers(&tsk->posix_cputimers); 546 } 547 void posix_cpu_timers_exit_group(struct task_struct *tsk) 548 { 549 cleanup_timers(&tsk->signal->posix_cputimers); 550 } 551 552 /* 553 * Insert the timer on the appropriate list before any timers that 554 * expire later. This must be called with the sighand lock held. 555 */ 556 static void arm_timer(struct k_itimer *timer, struct task_struct *p) 557 { 558 struct posix_cputimer_base *base = timer_base(timer, p); 559 struct cpu_timer *ctmr = &timer->it.cpu; 560 u64 newexp = cpu_timer_getexpires(ctmr); 561 562 if (!cpu_timer_enqueue(&base->tqhead, ctmr)) 563 return; 564 565 /* 566 * We are the new earliest-expiring POSIX 1.b timer, hence 567 * need to update expiration cache. Take into account that 568 * for process timers we share expiration cache with itimers 569 * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME. 570 */ 571 if (newexp < base->nextevt) 572 base->nextevt = newexp; 573 574 if (CPUCLOCK_PERTHREAD(timer->it_clock)) 575 tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER); 576 else 577 tick_dep_set_signal(p, TICK_DEP_BIT_POSIX_TIMER); 578 } 579 580 /* 581 * The timer is locked, fire it and arrange for its reload. 582 */ 583 static void cpu_timer_fire(struct k_itimer *timer) 584 { 585 struct cpu_timer *ctmr = &timer->it.cpu; 586 587 if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { 588 /* 589 * User don't want any signal. 590 */ 591 cpu_timer_setexpires(ctmr, 0); 592 } else if (unlikely(timer->sigq == NULL)) { 593 /* 594 * This a special case for clock_nanosleep, 595 * not a normal timer from sys_timer_create. 596 */ 597 wake_up_process(timer->it_process); 598 cpu_timer_setexpires(ctmr, 0); 599 } else if (!timer->it_interval) { 600 /* 601 * One-shot timer. Clear it as soon as it's fired. 602 */ 603 posix_timer_event(timer, 0); 604 cpu_timer_setexpires(ctmr, 0); 605 } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) { 606 /* 607 * The signal did not get queued because the signal 608 * was ignored, so we won't get any callback to 609 * reload the timer. But we need to keep it 610 * ticking in case the signal is deliverable next time. 611 */ 612 posix_cpu_timer_rearm(timer); 613 ++timer->it_requeue_pending; 614 } 615 } 616 617 /* 618 * Guts of sys_timer_settime for CPU timers. 619 * This is called with the timer locked and interrupts disabled. 620 * If we return TIMER_RETRY, it's necessary to release the timer's lock 621 * and try again. (This happens when the timer is in the middle of firing.) 622 */ 623 static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, 624 struct itimerspec64 *new, struct itimerspec64 *old) 625 { 626 clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); 627 u64 old_expires, new_expires, old_incr, val; 628 struct cpu_timer *ctmr = &timer->it.cpu; 629 struct sighand_struct *sighand; 630 struct task_struct *p; 631 unsigned long flags; 632 int ret = 0; 633 634 rcu_read_lock(); 635 p = cpu_timer_task_rcu(timer); 636 if (!p) { 637 /* 638 * If p has just been reaped, we can no 639 * longer get any information about it at all. 640 */ 641 rcu_read_unlock(); 642 return -ESRCH; 643 } 644 645 /* 646 * Use the to_ktime conversion because that clamps the maximum 647 * value to KTIME_MAX and avoid multiplication overflows. 648 */ 649 new_expires = ktime_to_ns(timespec64_to_ktime(new->it_value)); 650 651 /* 652 * Protect against sighand release/switch in exit/exec and p->cpu_timers 653 * and p->signal->cpu_timers read/write in arm_timer() 654 */ 655 sighand = lock_task_sighand(p, &flags); 656 /* 657 * If p has just been reaped, we can no 658 * longer get any information about it at all. 659 */ 660 if (unlikely(sighand == NULL)) { 661 rcu_read_unlock(); 662 return -ESRCH; 663 } 664 665 /* 666 * Disarm any old timer after extracting its expiry time. 667 */ 668 old_incr = timer->it_interval; 669 old_expires = cpu_timer_getexpires(ctmr); 670 671 if (unlikely(timer->it.cpu.firing)) { 672 timer->it.cpu.firing = -1; 673 ret = TIMER_RETRY; 674 } else { 675 cpu_timer_dequeue(ctmr); 676 } 677 678 /* 679 * We need to sample the current value to convert the new 680 * value from to relative and absolute, and to convert the 681 * old value from absolute to relative. To set a process 682 * timer, we need a sample to balance the thread expiry 683 * times (in arm_timer). With an absolute time, we must 684 * check if it's already passed. In short, we need a sample. 685 */ 686 if (CPUCLOCK_PERTHREAD(timer->it_clock)) 687 val = cpu_clock_sample(clkid, p); 688 else 689 val = cpu_clock_sample_group(clkid, p, true); 690 691 if (old) { 692 if (old_expires == 0) { 693 old->it_value.tv_sec = 0; 694 old->it_value.tv_nsec = 0; 695 } else { 696 /* 697 * Update the timer in case it has overrun already. 698 * If it has, we'll report it as having overrun and 699 * with the next reloaded timer already ticking, 700 * though we are swallowing that pending 701 * notification here to install the new setting. 702 */ 703 u64 exp = bump_cpu_timer(timer, val); 704 705 if (val < exp) { 706 old_expires = exp - val; 707 old->it_value = ns_to_timespec64(old_expires); 708 } else { 709 old->it_value.tv_nsec = 1; 710 old->it_value.tv_sec = 0; 711 } 712 } 713 } 714 715 if (unlikely(ret)) { 716 /* 717 * We are colliding with the timer actually firing. 718 * Punt after filling in the timer's old value, and 719 * disable this firing since we are already reporting 720 * it as an overrun (thanks to bump_cpu_timer above). 721 */ 722 unlock_task_sighand(p, &flags); 723 goto out; 724 } 725 726 if (new_expires != 0 && !(timer_flags & TIMER_ABSTIME)) { 727 new_expires += val; 728 } 729 730 /* 731 * Install the new expiry time (or zero). 732 * For a timer with no notification action, we don't actually 733 * arm the timer (we'll just fake it for timer_gettime). 734 */ 735 cpu_timer_setexpires(ctmr, new_expires); 736 if (new_expires != 0 && val < new_expires) { 737 arm_timer(timer, p); 738 } 739 740 unlock_task_sighand(p, &flags); 741 /* 742 * Install the new reload setting, and 743 * set up the signal and overrun bookkeeping. 744 */ 745 timer->it_interval = timespec64_to_ktime(new->it_interval); 746 747 /* 748 * This acts as a modification timestamp for the timer, 749 * so any automatic reload attempt will punt on seeing 750 * that we have reset the timer manually. 751 */ 752 timer->it_requeue_pending = (timer->it_requeue_pending + 2) & 753 ~REQUEUE_PENDING; 754 timer->it_overrun_last = 0; 755 timer->it_overrun = -1; 756 757 if (val >= new_expires) { 758 if (new_expires != 0) { 759 /* 760 * The designated time already passed, so we notify 761 * immediately, even if the thread never runs to 762 * accumulate more time on this clock. 763 */ 764 cpu_timer_fire(timer); 765 } 766 767 /* 768 * Make sure we don't keep around the process wide cputime 769 * counter or the tick dependency if they are not necessary. 770 */ 771 sighand = lock_task_sighand(p, &flags); 772 if (!sighand) 773 goto out; 774 775 if (!cpu_timer_queued(ctmr)) 776 trigger_base_recalc_expires(timer, p); 777 778 unlock_task_sighand(p, &flags); 779 } 780 out: 781 rcu_read_unlock(); 782 if (old) 783 old->it_interval = ns_to_timespec64(old_incr); 784 785 return ret; 786 } 787 788 static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp) 789 { 790 clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); 791 struct cpu_timer *ctmr = &timer->it.cpu; 792 u64 now, expires = cpu_timer_getexpires(ctmr); 793 struct task_struct *p; 794 795 rcu_read_lock(); 796 p = cpu_timer_task_rcu(timer); 797 if (!p) 798 goto out; 799 800 /* 801 * Easy part: convert the reload time. 802 */ 803 itp->it_interval = ktime_to_timespec64(timer->it_interval); 804 805 if (!expires) 806 goto out; 807 808 /* 809 * Sample the clock to take the difference with the expiry time. 810 */ 811 if (CPUCLOCK_PERTHREAD(timer->it_clock)) 812 now = cpu_clock_sample(clkid, p); 813 else 814 now = cpu_clock_sample_group(clkid, p, false); 815 816 if (now < expires) { 817 itp->it_value = ns_to_timespec64(expires - now); 818 } else { 819 /* 820 * The timer should have expired already, but the firing 821 * hasn't taken place yet. Say it's just about to expire. 822 */ 823 itp->it_value.tv_nsec = 1; 824 itp->it_value.tv_sec = 0; 825 } 826 out: 827 rcu_read_unlock(); 828 } 829 830 #define MAX_COLLECTED 20 831 832 static u64 collect_timerqueue(struct timerqueue_head *head, 833 struct list_head *firing, u64 now) 834 { 835 struct timerqueue_node *next; 836 int i = 0; 837 838 while ((next = timerqueue_getnext(head))) { 839 struct cpu_timer *ctmr; 840 u64 expires; 841 842 ctmr = container_of(next, struct cpu_timer, node); 843 expires = cpu_timer_getexpires(ctmr); 844 /* Limit the number of timers to expire at once */ 845 if (++i == MAX_COLLECTED || now < expires) 846 return expires; 847 848 ctmr->firing = 1; 849 cpu_timer_dequeue(ctmr); 850 list_add_tail(&ctmr->elist, firing); 851 } 852 853 return U64_MAX; 854 } 855 856 static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples, 857 struct list_head *firing) 858 { 859 struct posix_cputimer_base *base = pct->bases; 860 int i; 861 862 for (i = 0; i < CPUCLOCK_MAX; i++, base++) { 863 base->nextevt = collect_timerqueue(&base->tqhead, firing, 864 samples[i]); 865 } 866 } 867 868 static inline void check_dl_overrun(struct task_struct *tsk) 869 { 870 if (tsk->dl.dl_overrun) { 871 tsk->dl.dl_overrun = 0; 872 __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); 873 } 874 } 875 876 static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard) 877 { 878 if (time < limit) 879 return false; 880 881 if (print_fatal_signals) { 882 pr_info("%s Watchdog Timeout (%s): %s[%d]\n", 883 rt ? "RT" : "CPU", hard ? "hard" : "soft", 884 current->comm, task_pid_nr(current)); 885 } 886 __group_send_sig_info(signo, SEND_SIG_PRIV, current); 887 return true; 888 } 889 890 /* 891 * Check for any per-thread CPU timers that have fired and move them off 892 * the tsk->cpu_timers[N] list onto the firing list. Here we update the 893 * tsk->it_*_expires values to reflect the remaining thread CPU timers. 894 */ 895 static void check_thread_timers(struct task_struct *tsk, 896 struct list_head *firing) 897 { 898 struct posix_cputimers *pct = &tsk->posix_cputimers; 899 u64 samples[CPUCLOCK_MAX]; 900 unsigned long soft; 901 902 if (dl_task(tsk)) 903 check_dl_overrun(tsk); 904 905 if (expiry_cache_is_inactive(pct)) 906 return; 907 908 task_sample_cputime(tsk, samples); 909 collect_posix_cputimers(pct, samples, firing); 910 911 /* 912 * Check for the special case thread timers. 913 */ 914 soft = task_rlimit(tsk, RLIMIT_RTTIME); 915 if (soft != RLIM_INFINITY) { 916 /* Task RT timeout is accounted in jiffies. RTTIME is usec */ 917 unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ); 918 unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME); 919 920 /* At the hard limit, send SIGKILL. No further action. */ 921 if (hard != RLIM_INFINITY && 922 check_rlimit(rttime, hard, SIGKILL, true, true)) 923 return; 924 925 /* At the soft limit, send a SIGXCPU every second */ 926 if (check_rlimit(rttime, soft, SIGXCPU, true, false)) { 927 soft += USEC_PER_SEC; 928 tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur = soft; 929 } 930 } 931 932 if (expiry_cache_is_inactive(pct)) 933 tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER); 934 } 935 936 static inline void stop_process_timers(struct signal_struct *sig) 937 { 938 struct posix_cputimers *pct = &sig->posix_cputimers; 939 940 /* Turn off the active flag. This is done without locking. */ 941 WRITE_ONCE(pct->timers_active, false); 942 tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER); 943 } 944 945 static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, 946 u64 *expires, u64 cur_time, int signo) 947 { 948 if (!it->expires) 949 return; 950 951 if (cur_time >= it->expires) { 952 if (it->incr) 953 it->expires += it->incr; 954 else 955 it->expires = 0; 956 957 trace_itimer_expire(signo == SIGPROF ? 958 ITIMER_PROF : ITIMER_VIRTUAL, 959 task_tgid(tsk), cur_time); 960 __group_send_sig_info(signo, SEND_SIG_PRIV, tsk); 961 } 962 963 if (it->expires && it->expires < *expires) 964 *expires = it->expires; 965 } 966 967 /* 968 * Check for any per-thread CPU timers that have fired and move them 969 * off the tsk->*_timers list onto the firing list. Per-thread timers 970 * have already been taken off. 971 */ 972 static void check_process_timers(struct task_struct *tsk, 973 struct list_head *firing) 974 { 975 struct signal_struct *const sig = tsk->signal; 976 struct posix_cputimers *pct = &sig->posix_cputimers; 977 u64 samples[CPUCLOCK_MAX]; 978 unsigned long soft; 979 980 /* 981 * If there are no active process wide timers (POSIX 1.b, itimers, 982 * RLIMIT_CPU) nothing to check. Also skip the process wide timer 983 * processing when there is already another task handling them. 984 */ 985 if (!READ_ONCE(pct->timers_active) || pct->expiry_active) 986 return; 987 988 /* 989 * Signify that a thread is checking for process timers. 990 * Write access to this field is protected by the sighand lock. 991 */ 992 pct->expiry_active = true; 993 994 /* 995 * Collect the current process totals. Group accounting is active 996 * so the sample can be taken directly. 997 */ 998 proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic, samples); 999 collect_posix_cputimers(pct, samples, firing); 1000 1001 /* 1002 * Check for the special case process timers. 1003 */ 1004 check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], 1005 &pct->bases[CPUCLOCK_PROF].nextevt, 1006 samples[CPUCLOCK_PROF], SIGPROF); 1007 check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], 1008 &pct->bases[CPUCLOCK_VIRT].nextevt, 1009 samples[CPUCLOCK_VIRT], SIGVTALRM); 1010 1011 soft = task_rlimit(tsk, RLIMIT_CPU); 1012 if (soft != RLIM_INFINITY) { 1013 /* RLIMIT_CPU is in seconds. Samples are nanoseconds */ 1014 unsigned long hard = task_rlimit_max(tsk, RLIMIT_CPU); 1015 u64 ptime = samples[CPUCLOCK_PROF]; 1016 u64 softns = (u64)soft * NSEC_PER_SEC; 1017 u64 hardns = (u64)hard * NSEC_PER_SEC; 1018 1019 /* At the hard limit, send SIGKILL. No further action. */ 1020 if (hard != RLIM_INFINITY && 1021 check_rlimit(ptime, hardns, SIGKILL, false, true)) 1022 return; 1023 1024 /* At the soft limit, send a SIGXCPU every second */ 1025 if (check_rlimit(ptime, softns, SIGXCPU, false, false)) { 1026 sig->rlim[RLIMIT_CPU].rlim_cur = soft + 1; 1027 softns += NSEC_PER_SEC; 1028 } 1029 1030 /* Update the expiry cache */ 1031 if (softns < pct->bases[CPUCLOCK_PROF].nextevt) 1032 pct->bases[CPUCLOCK_PROF].nextevt = softns; 1033 } 1034 1035 if (expiry_cache_is_inactive(pct)) 1036 stop_process_timers(sig); 1037 1038 pct->expiry_active = false; 1039 } 1040 1041 /* 1042 * This is called from the signal code (via posixtimer_rearm) 1043 * when the last timer signal was delivered and we have to reload the timer. 1044 */ 1045 static void posix_cpu_timer_rearm(struct k_itimer *timer) 1046 { 1047 clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); 1048 struct task_struct *p; 1049 struct sighand_struct *sighand; 1050 unsigned long flags; 1051 u64 now; 1052 1053 rcu_read_lock(); 1054 p = cpu_timer_task_rcu(timer); 1055 if (!p) 1056 goto out; 1057 1058 /* Protect timer list r/w in arm_timer() */ 1059 sighand = lock_task_sighand(p, &flags); 1060 if (unlikely(sighand == NULL)) 1061 goto out; 1062 1063 /* 1064 * Fetch the current sample and update the timer's expiry time. 1065 */ 1066 if (CPUCLOCK_PERTHREAD(timer->it_clock)) 1067 now = cpu_clock_sample(clkid, p); 1068 else 1069 now = cpu_clock_sample_group(clkid, p, true); 1070 1071 bump_cpu_timer(timer, now); 1072 1073 /* 1074 * Now re-arm for the new expiry time. 1075 */ 1076 arm_timer(timer, p); 1077 unlock_task_sighand(p, &flags); 1078 out: 1079 rcu_read_unlock(); 1080 } 1081 1082 /** 1083 * task_cputimers_expired - Check whether posix CPU timers are expired 1084 * 1085 * @samples: Array of current samples for the CPUCLOCK clocks 1086 * @pct: Pointer to a posix_cputimers container 1087 * 1088 * Returns true if any member of @samples is greater than the corresponding 1089 * member of @pct->bases[CLK].nextevt. False otherwise 1090 */ 1091 static inline bool 1092 task_cputimers_expired(const u64 *samples, struct posix_cputimers *pct) 1093 { 1094 int i; 1095 1096 for (i = 0; i < CPUCLOCK_MAX; i++) { 1097 if (samples[i] >= pct->bases[i].nextevt) 1098 return true; 1099 } 1100 return false; 1101 } 1102 1103 /** 1104 * fastpath_timer_check - POSIX CPU timers fast path. 1105 * 1106 * @tsk: The task (thread) being checked. 1107 * 1108 * Check the task and thread group timers. If both are zero (there are no 1109 * timers set) return false. Otherwise snapshot the task and thread group 1110 * timers and compare them with the corresponding expiration times. Return 1111 * true if a timer has expired, else return false. 1112 */ 1113 static inline bool fastpath_timer_check(struct task_struct *tsk) 1114 { 1115 struct posix_cputimers *pct = &tsk->posix_cputimers; 1116 struct signal_struct *sig; 1117 1118 if (!expiry_cache_is_inactive(pct)) { 1119 u64 samples[CPUCLOCK_MAX]; 1120 1121 task_sample_cputime(tsk, samples); 1122 if (task_cputimers_expired(samples, pct)) 1123 return true; 1124 } 1125 1126 sig = tsk->signal; 1127 pct = &sig->posix_cputimers; 1128 /* 1129 * Check if thread group timers expired when timers are active and 1130 * no other thread in the group is already handling expiry for 1131 * thread group cputimers. These fields are read without the 1132 * sighand lock. However, this is fine because this is meant to be 1133 * a fastpath heuristic to determine whether we should try to 1134 * acquire the sighand lock to handle timer expiry. 1135 * 1136 * In the worst case scenario, if concurrently timers_active is set 1137 * or expiry_active is cleared, but the current thread doesn't see 1138 * the change yet, the timer checks are delayed until the next 1139 * thread in the group gets a scheduler interrupt to handle the 1140 * timer. This isn't an issue in practice because these types of 1141 * delays with signals actually getting sent are expected. 1142 */ 1143 if (READ_ONCE(pct->timers_active) && !READ_ONCE(pct->expiry_active)) { 1144 u64 samples[CPUCLOCK_MAX]; 1145 1146 proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic, 1147 samples); 1148 1149 if (task_cputimers_expired(samples, pct)) 1150 return true; 1151 } 1152 1153 if (dl_task(tsk) && tsk->dl.dl_overrun) 1154 return true; 1155 1156 return false; 1157 } 1158 1159 static void handle_posix_cpu_timers(struct task_struct *tsk); 1160 1161 #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK 1162 static void posix_cpu_timers_work(struct callback_head *work) 1163 { 1164 handle_posix_cpu_timers(current); 1165 } 1166 1167 /* 1168 * Clear existing posix CPU timers task work. 1169 */ 1170 void clear_posix_cputimers_work(struct task_struct *p) 1171 { 1172 /* 1173 * A copied work entry from the old task is not meaningful, clear it. 1174 * N.B. init_task_work will not do this. 1175 */ 1176 memset(&p->posix_cputimers_work.work, 0, 1177 sizeof(p->posix_cputimers_work.work)); 1178 init_task_work(&p->posix_cputimers_work.work, 1179 posix_cpu_timers_work); 1180 p->posix_cputimers_work.scheduled = false; 1181 } 1182 1183 /* 1184 * Initialize posix CPU timers task work in init task. Out of line to 1185 * keep the callback static and to avoid header recursion hell. 1186 */ 1187 void __init posix_cputimers_init_work(void) 1188 { 1189 clear_posix_cputimers_work(current); 1190 } 1191 1192 /* 1193 * Note: All operations on tsk->posix_cputimer_work.scheduled happen either 1194 * in hard interrupt context or in task context with interrupts 1195 * disabled. Aside of that the writer/reader interaction is always in the 1196 * context of the current task, which means they are strict per CPU. 1197 */ 1198 static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk) 1199 { 1200 return tsk->posix_cputimers_work.scheduled; 1201 } 1202 1203 static inline void __run_posix_cpu_timers(struct task_struct *tsk) 1204 { 1205 if (WARN_ON_ONCE(tsk->posix_cputimers_work.scheduled)) 1206 return; 1207 1208 /* Schedule task work to actually expire the timers */ 1209 tsk->posix_cputimers_work.scheduled = true; 1210 task_work_add(tsk, &tsk->posix_cputimers_work.work, TWA_RESUME); 1211 } 1212 1213 static inline bool posix_cpu_timers_enable_work(struct task_struct *tsk, 1214 unsigned long start) 1215 { 1216 bool ret = true; 1217 1218 /* 1219 * On !RT kernels interrupts are disabled while collecting expired 1220 * timers, so no tick can happen and the fast path check can be 1221 * reenabled without further checks. 1222 */ 1223 if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { 1224 tsk->posix_cputimers_work.scheduled = false; 1225 return true; 1226 } 1227 1228 /* 1229 * On RT enabled kernels ticks can happen while the expired timers 1230 * are collected under sighand lock. But any tick which observes 1231 * the CPUTIMERS_WORK_SCHEDULED bit set, does not run the fastpath 1232 * checks. So reenabling the tick work has do be done carefully: 1233 * 1234 * Disable interrupts and run the fast path check if jiffies have 1235 * advanced since the collecting of expired timers started. If 1236 * jiffies have not advanced or the fast path check did not find 1237 * newly expired timers, reenable the fast path check in the timer 1238 * interrupt. If there are newly expired timers, return false and 1239 * let the collection loop repeat. 1240 */ 1241 local_irq_disable(); 1242 if (start != jiffies && fastpath_timer_check(tsk)) 1243 ret = false; 1244 else 1245 tsk->posix_cputimers_work.scheduled = false; 1246 local_irq_enable(); 1247 1248 return ret; 1249 } 1250 #else /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */ 1251 static inline void __run_posix_cpu_timers(struct task_struct *tsk) 1252 { 1253 lockdep_posixtimer_enter(); 1254 handle_posix_cpu_timers(tsk); 1255 lockdep_posixtimer_exit(); 1256 } 1257 1258 static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk) 1259 { 1260 return false; 1261 } 1262 1263 static inline bool posix_cpu_timers_enable_work(struct task_struct *tsk, 1264 unsigned long start) 1265 { 1266 return true; 1267 } 1268 #endif /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */ 1269 1270 static void handle_posix_cpu_timers(struct task_struct *tsk) 1271 { 1272 struct k_itimer *timer, *next; 1273 unsigned long flags, start; 1274 LIST_HEAD(firing); 1275 1276 if (!lock_task_sighand(tsk, &flags)) 1277 return; 1278 1279 do { 1280 /* 1281 * On RT locking sighand lock does not disable interrupts, 1282 * so this needs to be careful vs. ticks. Store the current 1283 * jiffies value. 1284 */ 1285 start = READ_ONCE(jiffies); 1286 barrier(); 1287 1288 /* 1289 * Here we take off tsk->signal->cpu_timers[N] and 1290 * tsk->cpu_timers[N] all the timers that are firing, and 1291 * put them on the firing list. 1292 */ 1293 check_thread_timers(tsk, &firing); 1294 1295 check_process_timers(tsk, &firing); 1296 1297 /* 1298 * The above timer checks have updated the expiry cache and 1299 * because nothing can have queued or modified timers after 1300 * sighand lock was taken above it is guaranteed to be 1301 * consistent. So the next timer interrupt fastpath check 1302 * will find valid data. 1303 * 1304 * If timer expiry runs in the timer interrupt context then 1305 * the loop is not relevant as timers will be directly 1306 * expired in interrupt context. The stub function below 1307 * returns always true which allows the compiler to 1308 * optimize the loop out. 1309 * 1310 * If timer expiry is deferred to task work context then 1311 * the following rules apply: 1312 * 1313 * - On !RT kernels no tick can have happened on this CPU 1314 * after sighand lock was acquired because interrupts are 1315 * disabled. So reenabling task work before dropping 1316 * sighand lock and reenabling interrupts is race free. 1317 * 1318 * - On RT kernels ticks might have happened but the tick 1319 * work ignored posix CPU timer handling because the 1320 * CPUTIMERS_WORK_SCHEDULED bit is set. Reenabling work 1321 * must be done very carefully including a check whether 1322 * ticks have happened since the start of the timer 1323 * expiry checks. posix_cpu_timers_enable_work() takes 1324 * care of that and eventually lets the expiry checks 1325 * run again. 1326 */ 1327 } while (!posix_cpu_timers_enable_work(tsk, start)); 1328 1329 /* 1330 * We must release sighand lock before taking any timer's lock. 1331 * There is a potential race with timer deletion here, as the 1332 * siglock now protects our private firing list. We have set 1333 * the firing flag in each timer, so that a deletion attempt 1334 * that gets the timer lock before we do will give it up and 1335 * spin until we've taken care of that timer below. 1336 */ 1337 unlock_task_sighand(tsk, &flags); 1338 1339 /* 1340 * Now that all the timers on our list have the firing flag, 1341 * no one will touch their list entries but us. We'll take 1342 * each timer's lock before clearing its firing flag, so no 1343 * timer call will interfere. 1344 */ 1345 list_for_each_entry_safe(timer, next, &firing, it.cpu.elist) { 1346 int cpu_firing; 1347 1348 /* 1349 * spin_lock() is sufficient here even independent of the 1350 * expiry context. If expiry happens in hard interrupt 1351 * context it's obvious. For task work context it's safe 1352 * because all other operations on timer::it_lock happen in 1353 * task context (syscall or exit). 1354 */ 1355 spin_lock(&timer->it_lock); 1356 list_del_init(&timer->it.cpu.elist); 1357 cpu_firing = timer->it.cpu.firing; 1358 timer->it.cpu.firing = 0; 1359 /* 1360 * The firing flag is -1 if we collided with a reset 1361 * of the timer, which already reported this 1362 * almost-firing as an overrun. So don't generate an event. 1363 */ 1364 if (likely(cpu_firing >= 0)) 1365 cpu_timer_fire(timer); 1366 spin_unlock(&timer->it_lock); 1367 } 1368 } 1369 1370 /* 1371 * This is called from the timer interrupt handler. The irq handler has 1372 * already updated our counts. We need to check if any timers fire now. 1373 * Interrupts are disabled. 1374 */ 1375 void run_posix_cpu_timers(void) 1376 { 1377 struct task_struct *tsk = current; 1378 1379 lockdep_assert_irqs_disabled(); 1380 1381 /* 1382 * If the actual expiry is deferred to task work context and the 1383 * work is already scheduled there is no point to do anything here. 1384 */ 1385 if (posix_cpu_timers_work_scheduled(tsk)) 1386 return; 1387 1388 /* 1389 * The fast path checks that there are no expired thread or thread 1390 * group timers. If that's so, just return. 1391 */ 1392 if (!fastpath_timer_check(tsk)) 1393 return; 1394 1395 __run_posix_cpu_timers(tsk); 1396 } 1397 1398 /* 1399 * Set one of the process-wide special case CPU timers or RLIMIT_CPU. 1400 * The tsk->sighand->siglock must be held by the caller. 1401 */ 1402 void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid, 1403 u64 *newval, u64 *oldval) 1404 { 1405 u64 now, *nextevt; 1406 1407 if (WARN_ON_ONCE(clkid >= CPUCLOCK_SCHED)) 1408 return; 1409 1410 nextevt = &tsk->signal->posix_cputimers.bases[clkid].nextevt; 1411 now = cpu_clock_sample_group(clkid, tsk, true); 1412 1413 if (oldval) { 1414 /* 1415 * We are setting itimer. The *oldval is absolute and we update 1416 * it to be relative, *newval argument is relative and we update 1417 * it to be absolute. 1418 */ 1419 if (*oldval) { 1420 if (*oldval <= now) { 1421 /* Just about to fire. */ 1422 *oldval = TICK_NSEC; 1423 } else { 1424 *oldval -= now; 1425 } 1426 } 1427 1428 if (*newval) 1429 *newval += now; 1430 } 1431 1432 /* 1433 * Update expiration cache if this is the earliest timer. CPUCLOCK_PROF 1434 * expiry cache is also used by RLIMIT_CPU!. 1435 */ 1436 if (*newval < *nextevt) 1437 *nextevt = *newval; 1438 1439 tick_dep_set_signal(tsk, TICK_DEP_BIT_POSIX_TIMER); 1440 } 1441 1442 static int do_cpu_nanosleep(const clockid_t which_clock, int flags, 1443 const struct timespec64 *rqtp) 1444 { 1445 struct itimerspec64 it; 1446 struct k_itimer timer; 1447 u64 expires; 1448 int error; 1449 1450 /* 1451 * Set up a temporary timer and then wait for it to go off. 1452 */ 1453 memset(&timer, 0, sizeof timer); 1454 spin_lock_init(&timer.it_lock); 1455 timer.it_clock = which_clock; 1456 timer.it_overrun = -1; 1457 error = posix_cpu_timer_create(&timer); 1458 timer.it_process = current; 1459 1460 if (!error) { 1461 static struct itimerspec64 zero_it; 1462 struct restart_block *restart; 1463 1464 memset(&it, 0, sizeof(it)); 1465 it.it_value = *rqtp; 1466 1467 spin_lock_irq(&timer.it_lock); 1468 error = posix_cpu_timer_set(&timer, flags, &it, NULL); 1469 if (error) { 1470 spin_unlock_irq(&timer.it_lock); 1471 return error; 1472 } 1473 1474 while (!signal_pending(current)) { 1475 if (!cpu_timer_getexpires(&timer.it.cpu)) { 1476 /* 1477 * Our timer fired and was reset, below 1478 * deletion can not fail. 1479 */ 1480 posix_cpu_timer_del(&timer); 1481 spin_unlock_irq(&timer.it_lock); 1482 return 0; 1483 } 1484 1485 /* 1486 * Block until cpu_timer_fire (or a signal) wakes us. 1487 */ 1488 __set_current_state(TASK_INTERRUPTIBLE); 1489 spin_unlock_irq(&timer.it_lock); 1490 schedule(); 1491 spin_lock_irq(&timer.it_lock); 1492 } 1493 1494 /* 1495 * We were interrupted by a signal. 1496 */ 1497 expires = cpu_timer_getexpires(&timer.it.cpu); 1498 error = posix_cpu_timer_set(&timer, 0, &zero_it, &it); 1499 if (!error) { 1500 /* 1501 * Timer is now unarmed, deletion can not fail. 1502 */ 1503 posix_cpu_timer_del(&timer); 1504 } 1505 spin_unlock_irq(&timer.it_lock); 1506 1507 while (error == TIMER_RETRY) { 1508 /* 1509 * We need to handle case when timer was or is in the 1510 * middle of firing. In other cases we already freed 1511 * resources. 1512 */ 1513 spin_lock_irq(&timer.it_lock); 1514 error = posix_cpu_timer_del(&timer); 1515 spin_unlock_irq(&timer.it_lock); 1516 } 1517 1518 if ((it.it_value.tv_sec | it.it_value.tv_nsec) == 0) { 1519 /* 1520 * It actually did fire already. 1521 */ 1522 return 0; 1523 } 1524 1525 error = -ERESTART_RESTARTBLOCK; 1526 /* 1527 * Report back to the user the time still remaining. 1528 */ 1529 restart = ¤t->restart_block; 1530 restart->nanosleep.expires = expires; 1531 if (restart->nanosleep.type != TT_NONE) 1532 error = nanosleep_copyout(restart, &it.it_value); 1533 } 1534 1535 return error; 1536 } 1537 1538 static long posix_cpu_nsleep_restart(struct restart_block *restart_block); 1539 1540 static int posix_cpu_nsleep(const clockid_t which_clock, int flags, 1541 const struct timespec64 *rqtp) 1542 { 1543 struct restart_block *restart_block = ¤t->restart_block; 1544 int error; 1545 1546 /* 1547 * Diagnose required errors first. 1548 */ 1549 if (CPUCLOCK_PERTHREAD(which_clock) && 1550 (CPUCLOCK_PID(which_clock) == 0 || 1551 CPUCLOCK_PID(which_clock) == task_pid_vnr(current))) 1552 return -EINVAL; 1553 1554 error = do_cpu_nanosleep(which_clock, flags, rqtp); 1555 1556 if (error == -ERESTART_RESTARTBLOCK) { 1557 1558 if (flags & TIMER_ABSTIME) 1559 return -ERESTARTNOHAND; 1560 1561 restart_block->nanosleep.clockid = which_clock; 1562 set_restart_fn(restart_block, posix_cpu_nsleep_restart); 1563 } 1564 return error; 1565 } 1566 1567 static long posix_cpu_nsleep_restart(struct restart_block *restart_block) 1568 { 1569 clockid_t which_clock = restart_block->nanosleep.clockid; 1570 struct timespec64 t; 1571 1572 t = ns_to_timespec64(restart_block->nanosleep.expires); 1573 1574 return do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t); 1575 } 1576 1577 #define PROCESS_CLOCK make_process_cpuclock(0, CPUCLOCK_SCHED) 1578 #define THREAD_CLOCK make_thread_cpuclock(0, CPUCLOCK_SCHED) 1579 1580 static int process_cpu_clock_getres(const clockid_t which_clock, 1581 struct timespec64 *tp) 1582 { 1583 return posix_cpu_clock_getres(PROCESS_CLOCK, tp); 1584 } 1585 static int process_cpu_clock_get(const clockid_t which_clock, 1586 struct timespec64 *tp) 1587 { 1588 return posix_cpu_clock_get(PROCESS_CLOCK, tp); 1589 } 1590 static int process_cpu_timer_create(struct k_itimer *timer) 1591 { 1592 timer->it_clock = PROCESS_CLOCK; 1593 return posix_cpu_timer_create(timer); 1594 } 1595 static int process_cpu_nsleep(const clockid_t which_clock, int flags, 1596 const struct timespec64 *rqtp) 1597 { 1598 return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp); 1599 } 1600 static int thread_cpu_clock_getres(const clockid_t which_clock, 1601 struct timespec64 *tp) 1602 { 1603 return posix_cpu_clock_getres(THREAD_CLOCK, tp); 1604 } 1605 static int thread_cpu_clock_get(const clockid_t which_clock, 1606 struct timespec64 *tp) 1607 { 1608 return posix_cpu_clock_get(THREAD_CLOCK, tp); 1609 } 1610 static int thread_cpu_timer_create(struct k_itimer *timer) 1611 { 1612 timer->it_clock = THREAD_CLOCK; 1613 return posix_cpu_timer_create(timer); 1614 } 1615 1616 const struct k_clock clock_posix_cpu = { 1617 .clock_getres = posix_cpu_clock_getres, 1618 .clock_set = posix_cpu_clock_set, 1619 .clock_get_timespec = posix_cpu_clock_get, 1620 .timer_create = posix_cpu_timer_create, 1621 .nsleep = posix_cpu_nsleep, 1622 .timer_set = posix_cpu_timer_set, 1623 .timer_del = posix_cpu_timer_del, 1624 .timer_get = posix_cpu_timer_get, 1625 .timer_rearm = posix_cpu_timer_rearm, 1626 }; 1627 1628 const struct k_clock clock_process = { 1629 .clock_getres = process_cpu_clock_getres, 1630 .clock_get_timespec = process_cpu_clock_get, 1631 .timer_create = process_cpu_timer_create, 1632 .nsleep = process_cpu_nsleep, 1633 }; 1634 1635 const struct k_clock clock_thread = { 1636 .clock_getres = thread_cpu_clock_getres, 1637 .clock_get_timespec = thread_cpu_clock_get, 1638 .timer_create = thread_cpu_timer_create, 1639 }; 1640