1 #include <linux/export.h> 2 #include <linux/sched.h> 3 #include <linux/tsacct_kern.h> 4 #include <linux/kernel_stat.h> 5 #include <linux/static_key.h> 6 #include <linux/context_tracking.h> 7 #include <linux/sched/cputime.h> 8 #include "sched.h" 9 10 #ifdef CONFIG_IRQ_TIME_ACCOUNTING 11 12 /* 13 * There are no locks covering percpu hardirq/softirq time. 14 * They are only modified in vtime_account, on corresponding CPU 15 * with interrupts disabled. So, writes are safe. 16 * They are read and saved off onto struct rq in update_rq_clock(). 17 * This may result in other CPU reading this CPU's irq time and can 18 * race with irq/vtime_account on this CPU. We would either get old 19 * or new value with a side effect of accounting a slice of irq time to wrong 20 * task when irq is in progress while we read rq->clock. That is a worthy 21 * compromise in place of having locks on each irq in account_system_time. 22 */ 23 DEFINE_PER_CPU(struct irqtime, cpu_irqtime); 24 25 static int sched_clock_irqtime; 26 27 void enable_sched_clock_irqtime(void) 28 { 29 sched_clock_irqtime = 1; 30 } 31 32 void disable_sched_clock_irqtime(void) 33 { 34 sched_clock_irqtime = 0; 35 } 36 37 /* 38 * Called before incrementing preempt_count on {soft,}irq_enter 39 * and before decrementing preempt_count on {soft,}irq_exit. 40 */ 41 void irqtime_account_irq(struct task_struct *curr) 42 { 43 struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); 44 u64 *cpustat = kcpustat_this_cpu->cpustat; 45 s64 delta; 46 int cpu; 47 48 if (!sched_clock_irqtime) 49 return; 50 51 cpu = smp_processor_id(); 52 delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; 53 irqtime->irq_start_time += delta; 54 55 u64_stats_update_begin(&irqtime->sync); 56 /* 57 * We do not account for softirq time from ksoftirqd here. 58 * We want to continue accounting softirq time to ksoftirqd thread 59 * in that case, so as not to confuse scheduler with a special task 60 * that do not consume any time, but still wants to run. 61 */ 62 if (hardirq_count()) { 63 cpustat[CPUTIME_IRQ] += delta; 64 irqtime->tick_delta += delta; 65 } else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) { 66 cpustat[CPUTIME_SOFTIRQ] += delta; 67 irqtime->tick_delta += delta; 68 } 69 70 u64_stats_update_end(&irqtime->sync); 71 } 72 EXPORT_SYMBOL_GPL(irqtime_account_irq); 73 74 static u64 irqtime_tick_accounted(u64 maxtime) 75 { 76 struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); 77 u64 delta; 78 79 delta = min(irqtime->tick_delta, maxtime); 80 irqtime->tick_delta -= delta; 81 82 return delta; 83 } 84 85 #else /* CONFIG_IRQ_TIME_ACCOUNTING */ 86 87 #define sched_clock_irqtime (0) 88 89 static u64 irqtime_tick_accounted(u64 dummy) 90 { 91 return 0; 92 } 93 94 #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ 95 96 static inline void task_group_account_field(struct task_struct *p, int index, 97 u64 tmp) 98 { 99 /* 100 * Since all updates are sure to touch the root cgroup, we 101 * get ourselves ahead and touch it first. If the root cgroup 102 * is the only cgroup, then nothing else should be necessary. 103 * 104 */ 105 __this_cpu_add(kernel_cpustat.cpustat[index], tmp); 106 107 cpuacct_account_field(p, index, tmp); 108 } 109 110 /* 111 * Account user cpu time to a process. 112 * @p: the process that the cpu time gets accounted to 113 * @cputime: the cpu time spent in user space since the last update 114 */ 115 void account_user_time(struct task_struct *p, u64 cputime) 116 { 117 int index; 118 119 /* Add user time to process. */ 120 p->utime += cputime; 121 account_group_user_time(p, cputime); 122 123 index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; 124 125 /* Add user time to cpustat. */ 126 task_group_account_field(p, index, cputime); 127 128 /* Account for user time used */ 129 acct_account_cputime(p); 130 } 131 132 /* 133 * Account guest cpu time to a process. 134 * @p: the process that the cpu time gets accounted to 135 * @cputime: the cpu time spent in virtual machine since the last update 136 */ 137 void account_guest_time(struct task_struct *p, u64 cputime) 138 { 139 u64 *cpustat = kcpustat_this_cpu->cpustat; 140 141 /* Add guest time to process. */ 142 p->utime += cputime; 143 account_group_user_time(p, cputime); 144 p->gtime += cputime; 145 146 /* Add guest time to cpustat. */ 147 if (task_nice(p) > 0) { 148 cpustat[CPUTIME_NICE] += cputime; 149 cpustat[CPUTIME_GUEST_NICE] += cputime; 150 } else { 151 cpustat[CPUTIME_USER] += cputime; 152 cpustat[CPUTIME_GUEST] += cputime; 153 } 154 } 155 156 /* 157 * Account system cpu time to a process and desired cpustat field 158 * @p: the process that the cpu time gets accounted to 159 * @cputime: the cpu time spent in kernel space since the last update 160 * @index: pointer to cpustat field that has to be updated 161 */ 162 void account_system_index_time(struct task_struct *p, 163 u64 cputime, enum cpu_usage_stat index) 164 { 165 /* Add system time to process. */ 166 p->stime += cputime; 167 account_group_system_time(p, cputime); 168 169 /* Add system time to cpustat. */ 170 task_group_account_field(p, index, cputime); 171 172 /* Account for system time used */ 173 acct_account_cputime(p); 174 } 175 176 /* 177 * Account system cpu time to a process. 178 * @p: the process that the cpu time gets accounted to 179 * @hardirq_offset: the offset to subtract from hardirq_count() 180 * @cputime: the cpu time spent in kernel space since the last update 181 */ 182 void account_system_time(struct task_struct *p, int hardirq_offset, u64 cputime) 183 { 184 int index; 185 186 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { 187 account_guest_time(p, cputime); 188 return; 189 } 190 191 if (hardirq_count() - hardirq_offset) 192 index = CPUTIME_IRQ; 193 else if (in_serving_softirq()) 194 index = CPUTIME_SOFTIRQ; 195 else 196 index = CPUTIME_SYSTEM; 197 198 account_system_index_time(p, cputime, index); 199 } 200 201 /* 202 * Account for involuntary wait time. 203 * @cputime: the cpu time spent in involuntary wait 204 */ 205 void account_steal_time(u64 cputime) 206 { 207 u64 *cpustat = kcpustat_this_cpu->cpustat; 208 209 cpustat[CPUTIME_STEAL] += cputime; 210 } 211 212 /* 213 * Account for idle time. 214 * @cputime: the cpu time spent in idle wait 215 */ 216 void account_idle_time(u64 cputime) 217 { 218 u64 *cpustat = kcpustat_this_cpu->cpustat; 219 struct rq *rq = this_rq(); 220 221 if (atomic_read(&rq->nr_iowait) > 0) 222 cpustat[CPUTIME_IOWAIT] += cputime; 223 else 224 cpustat[CPUTIME_IDLE] += cputime; 225 } 226 227 /* 228 * When a guest is interrupted for a longer amount of time, missed clock 229 * ticks are not redelivered later. Due to that, this function may on 230 * occasion account more time than the calling functions think elapsed. 231 */ 232 static __always_inline u64 steal_account_process_time(u64 maxtime) 233 { 234 #ifdef CONFIG_PARAVIRT 235 if (static_key_false(¶virt_steal_enabled)) { 236 u64 steal; 237 238 steal = paravirt_steal_clock(smp_processor_id()); 239 steal -= this_rq()->prev_steal_time; 240 steal = min(steal, maxtime); 241 account_steal_time(steal); 242 this_rq()->prev_steal_time += steal; 243 244 return steal; 245 } 246 #endif 247 return 0; 248 } 249 250 /* 251 * Account how much elapsed time was spent in steal, irq, or softirq time. 252 */ 253 static inline u64 account_other_time(u64 max) 254 { 255 u64 accounted; 256 257 /* Shall be converted to a lockdep-enabled lightweight check */ 258 WARN_ON_ONCE(!irqs_disabled()); 259 260 accounted = steal_account_process_time(max); 261 262 if (accounted < max) 263 accounted += irqtime_tick_accounted(max - accounted); 264 265 return accounted; 266 } 267 268 #ifdef CONFIG_64BIT 269 static inline u64 read_sum_exec_runtime(struct task_struct *t) 270 { 271 return t->se.sum_exec_runtime; 272 } 273 #else 274 static u64 read_sum_exec_runtime(struct task_struct *t) 275 { 276 u64 ns; 277 struct rq_flags rf; 278 struct rq *rq; 279 280 rq = task_rq_lock(t, &rf); 281 ns = t->se.sum_exec_runtime; 282 task_rq_unlock(rq, t, &rf); 283 284 return ns; 285 } 286 #endif 287 288 /* 289 * Accumulate raw cputime values of dead tasks (sig->[us]time) and live 290 * tasks (sum on group iteration) belonging to @tsk's group. 291 */ 292 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) 293 { 294 struct signal_struct *sig = tsk->signal; 295 u64 utime, stime; 296 struct task_struct *t; 297 unsigned int seq, nextseq; 298 unsigned long flags; 299 300 /* 301 * Update current task runtime to account pending time since last 302 * scheduler action or thread_group_cputime() call. This thread group 303 * might have other running tasks on different CPUs, but updating 304 * their runtime can affect syscall performance, so we skip account 305 * those pending times and rely only on values updated on tick or 306 * other scheduler action. 307 */ 308 if (same_thread_group(current, tsk)) 309 (void) task_sched_runtime(current); 310 311 rcu_read_lock(); 312 /* Attempt a lockless read on the first round. */ 313 nextseq = 0; 314 do { 315 seq = nextseq; 316 flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); 317 times->utime = sig->utime; 318 times->stime = sig->stime; 319 times->sum_exec_runtime = sig->sum_sched_runtime; 320 321 for_each_thread(tsk, t) { 322 task_cputime(t, &utime, &stime); 323 times->utime += utime; 324 times->stime += stime; 325 times->sum_exec_runtime += read_sum_exec_runtime(t); 326 } 327 /* If lockless access failed, take the lock. */ 328 nextseq = 1; 329 } while (need_seqretry(&sig->stats_lock, seq)); 330 done_seqretry_irqrestore(&sig->stats_lock, seq, flags); 331 rcu_read_unlock(); 332 } 333 334 #ifdef CONFIG_IRQ_TIME_ACCOUNTING 335 /* 336 * Account a tick to a process and cpustat 337 * @p: the process that the cpu time gets accounted to 338 * @user_tick: is the tick from userspace 339 * @rq: the pointer to rq 340 * 341 * Tick demultiplexing follows the order 342 * - pending hardirq update 343 * - pending softirq update 344 * - user_time 345 * - idle_time 346 * - system time 347 * - check for guest_time 348 * - else account as system_time 349 * 350 * Check for hardirq is done both for system and user time as there is 351 * no timer going off while we are on hardirq and hence we may never get an 352 * opportunity to update it solely in system time. 353 * p->stime and friends are only updated on system time and not on irq 354 * softirq as those do not count in task exec_runtime any more. 355 */ 356 static void irqtime_account_process_tick(struct task_struct *p, int user_tick, 357 struct rq *rq, int ticks) 358 { 359 u64 other, cputime = TICK_NSEC * ticks; 360 361 /* 362 * When returning from idle, many ticks can get accounted at 363 * once, including some ticks of steal, irq, and softirq time. 364 * Subtract those ticks from the amount of time accounted to 365 * idle, or potentially user or system time. Due to rounding, 366 * other time can exceed ticks occasionally. 367 */ 368 other = account_other_time(ULONG_MAX); 369 if (other >= cputime) 370 return; 371 372 cputime -= other; 373 374 if (this_cpu_ksoftirqd() == p) { 375 /* 376 * ksoftirqd time do not get accounted in cpu_softirq_time. 377 * So, we have to handle it separately here. 378 * Also, p->stime needs to be updated for ksoftirqd. 379 */ 380 account_system_index_time(p, cputime, CPUTIME_SOFTIRQ); 381 } else if (user_tick) { 382 account_user_time(p, cputime); 383 } else if (p == rq->idle) { 384 account_idle_time(cputime); 385 } else if (p->flags & PF_VCPU) { /* System time or guest time */ 386 account_guest_time(p, cputime); 387 } else { 388 account_system_index_time(p, cputime, CPUTIME_SYSTEM); 389 } 390 } 391 392 static void irqtime_account_idle_ticks(int ticks) 393 { 394 struct rq *rq = this_rq(); 395 396 irqtime_account_process_tick(current, 0, rq, ticks); 397 } 398 #else /* CONFIG_IRQ_TIME_ACCOUNTING */ 399 static inline void irqtime_account_idle_ticks(int ticks) {} 400 static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, 401 struct rq *rq, int nr_ticks) {} 402 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ 403 404 /* 405 * Use precise platform statistics if available: 406 */ 407 #ifdef CONFIG_VIRT_CPU_ACCOUNTING 408 409 #ifndef __ARCH_HAS_VTIME_TASK_SWITCH 410 void vtime_common_task_switch(struct task_struct *prev) 411 { 412 if (is_idle_task(prev)) 413 vtime_account_idle(prev); 414 else 415 vtime_account_system(prev); 416 417 vtime_flush(prev); 418 arch_vtime_task_switch(prev); 419 } 420 #endif 421 422 #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ 423 424 425 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 426 /* 427 * Archs that account the whole time spent in the idle task 428 * (outside irq) as idle time can rely on this and just implement 429 * vtime_account_system() and vtime_account_idle(). Archs that 430 * have other meaning of the idle time (s390 only includes the 431 * time spent by the CPU when it's in low power mode) must override 432 * vtime_account(). 433 */ 434 #ifndef __ARCH_HAS_VTIME_ACCOUNT 435 void vtime_account_irq_enter(struct task_struct *tsk) 436 { 437 if (!in_interrupt() && is_idle_task(tsk)) 438 vtime_account_idle(tsk); 439 else 440 vtime_account_system(tsk); 441 } 442 EXPORT_SYMBOL_GPL(vtime_account_irq_enter); 443 #endif /* __ARCH_HAS_VTIME_ACCOUNT */ 444 445 void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) 446 { 447 *ut = p->utime; 448 *st = p->stime; 449 } 450 EXPORT_SYMBOL_GPL(task_cputime_adjusted); 451 452 void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) 453 { 454 struct task_cputime cputime; 455 456 thread_group_cputime(p, &cputime); 457 458 *ut = cputime.utime; 459 *st = cputime.stime; 460 } 461 #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 462 /* 463 * Account a single tick of cpu time. 464 * @p: the process that the cpu time gets accounted to 465 * @user_tick: indicates if the tick is a user or a system tick 466 */ 467 void account_process_tick(struct task_struct *p, int user_tick) 468 { 469 u64 cputime, steal; 470 struct rq *rq = this_rq(); 471 472 if (vtime_accounting_cpu_enabled()) 473 return; 474 475 if (sched_clock_irqtime) { 476 irqtime_account_process_tick(p, user_tick, rq, 1); 477 return; 478 } 479 480 cputime = TICK_NSEC; 481 steal = steal_account_process_time(ULONG_MAX); 482 483 if (steal >= cputime) 484 return; 485 486 cputime -= steal; 487 488 if (user_tick) 489 account_user_time(p, cputime); 490 else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) 491 account_system_time(p, HARDIRQ_OFFSET, cputime); 492 else 493 account_idle_time(cputime); 494 } 495 496 /* 497 * Account multiple ticks of idle time. 498 * @ticks: number of stolen ticks 499 */ 500 void account_idle_ticks(unsigned long ticks) 501 { 502 u64 cputime, steal; 503 504 if (sched_clock_irqtime) { 505 irqtime_account_idle_ticks(ticks); 506 return; 507 } 508 509 cputime = ticks * TICK_NSEC; 510 steal = steal_account_process_time(ULONG_MAX); 511 512 if (steal >= cputime) 513 return; 514 515 cputime -= steal; 516 account_idle_time(cputime); 517 } 518 519 /* 520 * Perform (stime * rtime) / total, but avoid multiplication overflow by 521 * loosing precision when the numbers are big. 522 */ 523 static u64 scale_stime(u64 stime, u64 rtime, u64 total) 524 { 525 u64 scaled; 526 527 for (;;) { 528 /* Make sure "rtime" is the bigger of stime/rtime */ 529 if (stime > rtime) 530 swap(rtime, stime); 531 532 /* Make sure 'total' fits in 32 bits */ 533 if (total >> 32) 534 goto drop_precision; 535 536 /* Does rtime (and thus stime) fit in 32 bits? */ 537 if (!(rtime >> 32)) 538 break; 539 540 /* Can we just balance rtime/stime rather than dropping bits? */ 541 if (stime >> 31) 542 goto drop_precision; 543 544 /* We can grow stime and shrink rtime and try to make them both fit */ 545 stime <<= 1; 546 rtime >>= 1; 547 continue; 548 549 drop_precision: 550 /* We drop from rtime, it has more bits than stime */ 551 rtime >>= 1; 552 total >>= 1; 553 } 554 555 /* 556 * Make sure gcc understands that this is a 32x32->64 multiply, 557 * followed by a 64/32->64 divide. 558 */ 559 scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total); 560 return scaled; 561 } 562 563 /* 564 * Adjust tick based cputime random precision against scheduler runtime 565 * accounting. 566 * 567 * Tick based cputime accounting depend on random scheduling timeslices of a 568 * task to be interrupted or not by the timer. Depending on these 569 * circumstances, the number of these interrupts may be over or 570 * under-optimistic, matching the real user and system cputime with a variable 571 * precision. 572 * 573 * Fix this by scaling these tick based values against the total runtime 574 * accounted by the CFS scheduler. 575 * 576 * This code provides the following guarantees: 577 * 578 * stime + utime == rtime 579 * stime_i+1 >= stime_i, utime_i+1 >= utime_i 580 * 581 * Assuming that rtime_i+1 >= rtime_i. 582 */ 583 static void cputime_adjust(struct task_cputime *curr, 584 struct prev_cputime *prev, 585 u64 *ut, u64 *st) 586 { 587 u64 rtime, stime, utime; 588 unsigned long flags; 589 590 /* Serialize concurrent callers such that we can honour our guarantees */ 591 raw_spin_lock_irqsave(&prev->lock, flags); 592 rtime = curr->sum_exec_runtime; 593 594 /* 595 * This is possible under two circumstances: 596 * - rtime isn't monotonic after all (a bug); 597 * - we got reordered by the lock. 598 * 599 * In both cases this acts as a filter such that the rest of the code 600 * can assume it is monotonic regardless of anything else. 601 */ 602 if (prev->stime + prev->utime >= rtime) 603 goto out; 604 605 stime = curr->stime; 606 utime = curr->utime; 607 608 /* 609 * If either stime or both stime and utime are 0, assume all runtime is 610 * userspace. Once a task gets some ticks, the monotonicy code at 611 * 'update' will ensure things converge to the observed ratio. 612 */ 613 if (stime == 0) { 614 utime = rtime; 615 goto update; 616 } 617 618 if (utime == 0) { 619 stime = rtime; 620 goto update; 621 } 622 623 stime = scale_stime(stime, rtime, stime + utime); 624 625 update: 626 /* 627 * Make sure stime doesn't go backwards; this preserves monotonicity 628 * for utime because rtime is monotonic. 629 * 630 * utime_i+1 = rtime_i+1 - stime_i 631 * = rtime_i+1 - (rtime_i - utime_i) 632 * = (rtime_i+1 - rtime_i) + utime_i 633 * >= utime_i 634 */ 635 if (stime < prev->stime) 636 stime = prev->stime; 637 utime = rtime - stime; 638 639 /* 640 * Make sure utime doesn't go backwards; this still preserves 641 * monotonicity for stime, analogous argument to above. 642 */ 643 if (utime < prev->utime) { 644 utime = prev->utime; 645 stime = rtime - utime; 646 } 647 648 prev->stime = stime; 649 prev->utime = utime; 650 out: 651 *ut = prev->utime; 652 *st = prev->stime; 653 raw_spin_unlock_irqrestore(&prev->lock, flags); 654 } 655 656 void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) 657 { 658 struct task_cputime cputime = { 659 .sum_exec_runtime = p->se.sum_exec_runtime, 660 }; 661 662 task_cputime(p, &cputime.utime, &cputime.stime); 663 cputime_adjust(&cputime, &p->prev_cputime, ut, st); 664 } 665 EXPORT_SYMBOL_GPL(task_cputime_adjusted); 666 667 void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) 668 { 669 struct task_cputime cputime; 670 671 thread_group_cputime(p, &cputime); 672 cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); 673 } 674 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 675 676 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN 677 static u64 vtime_delta(struct task_struct *tsk) 678 { 679 unsigned long now = READ_ONCE(jiffies); 680 681 if (time_before(now, (unsigned long)tsk->vtime_snap)) 682 return 0; 683 684 return jiffies_to_nsecs(now - tsk->vtime_snap); 685 } 686 687 static u64 get_vtime_delta(struct task_struct *tsk) 688 { 689 unsigned long now = READ_ONCE(jiffies); 690 u64 delta, other; 691 692 /* 693 * Unlike tick based timing, vtime based timing never has lost 694 * ticks, and no need for steal time accounting to make up for 695 * lost ticks. Vtime accounts a rounded version of actual 696 * elapsed time. Limit account_other_time to prevent rounding 697 * errors from causing elapsed vtime to go negative. 698 */ 699 delta = jiffies_to_nsecs(now - tsk->vtime_snap); 700 other = account_other_time(delta); 701 WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE); 702 tsk->vtime_snap = now; 703 704 return delta - other; 705 } 706 707 static void __vtime_account_system(struct task_struct *tsk) 708 { 709 account_system_time(tsk, irq_count(), get_vtime_delta(tsk)); 710 } 711 712 void vtime_account_system(struct task_struct *tsk) 713 { 714 if (!vtime_delta(tsk)) 715 return; 716 717 write_seqcount_begin(&tsk->vtime_seqcount); 718 __vtime_account_system(tsk); 719 write_seqcount_end(&tsk->vtime_seqcount); 720 } 721 722 void vtime_account_user(struct task_struct *tsk) 723 { 724 write_seqcount_begin(&tsk->vtime_seqcount); 725 tsk->vtime_snap_whence = VTIME_SYS; 726 if (vtime_delta(tsk)) 727 account_user_time(tsk, get_vtime_delta(tsk)); 728 write_seqcount_end(&tsk->vtime_seqcount); 729 } 730 731 void vtime_user_enter(struct task_struct *tsk) 732 { 733 write_seqcount_begin(&tsk->vtime_seqcount); 734 if (vtime_delta(tsk)) 735 __vtime_account_system(tsk); 736 tsk->vtime_snap_whence = VTIME_USER; 737 write_seqcount_end(&tsk->vtime_seqcount); 738 } 739 740 void vtime_guest_enter(struct task_struct *tsk) 741 { 742 /* 743 * The flags must be updated under the lock with 744 * the vtime_snap flush and update. 745 * That enforces a right ordering and update sequence 746 * synchronization against the reader (task_gtime()) 747 * that can thus safely catch up with a tickless delta. 748 */ 749 write_seqcount_begin(&tsk->vtime_seqcount); 750 if (vtime_delta(tsk)) 751 __vtime_account_system(tsk); 752 current->flags |= PF_VCPU; 753 write_seqcount_end(&tsk->vtime_seqcount); 754 } 755 EXPORT_SYMBOL_GPL(vtime_guest_enter); 756 757 void vtime_guest_exit(struct task_struct *tsk) 758 { 759 write_seqcount_begin(&tsk->vtime_seqcount); 760 __vtime_account_system(tsk); 761 current->flags &= ~PF_VCPU; 762 write_seqcount_end(&tsk->vtime_seqcount); 763 } 764 EXPORT_SYMBOL_GPL(vtime_guest_exit); 765 766 void vtime_account_idle(struct task_struct *tsk) 767 { 768 account_idle_time(get_vtime_delta(tsk)); 769 } 770 771 void arch_vtime_task_switch(struct task_struct *prev) 772 { 773 write_seqcount_begin(&prev->vtime_seqcount); 774 prev->vtime_snap_whence = VTIME_INACTIVE; 775 write_seqcount_end(&prev->vtime_seqcount); 776 777 write_seqcount_begin(¤t->vtime_seqcount); 778 current->vtime_snap_whence = VTIME_SYS; 779 current->vtime_snap = jiffies; 780 write_seqcount_end(¤t->vtime_seqcount); 781 } 782 783 void vtime_init_idle(struct task_struct *t, int cpu) 784 { 785 unsigned long flags; 786 787 local_irq_save(flags); 788 write_seqcount_begin(&t->vtime_seqcount); 789 t->vtime_snap_whence = VTIME_SYS; 790 t->vtime_snap = jiffies; 791 write_seqcount_end(&t->vtime_seqcount); 792 local_irq_restore(flags); 793 } 794 795 u64 task_gtime(struct task_struct *t) 796 { 797 unsigned int seq; 798 u64 gtime; 799 800 if (!vtime_accounting_enabled()) 801 return t->gtime; 802 803 do { 804 seq = read_seqcount_begin(&t->vtime_seqcount); 805 806 gtime = t->gtime; 807 if (t->vtime_snap_whence == VTIME_SYS && t->flags & PF_VCPU) 808 gtime += vtime_delta(t); 809 810 } while (read_seqcount_retry(&t->vtime_seqcount, seq)); 811 812 return gtime; 813 } 814 815 /* 816 * Fetch cputime raw values from fields of task_struct and 817 * add up the pending nohz execution time since the last 818 * cputime snapshot. 819 */ 820 void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) 821 { 822 u64 delta; 823 unsigned int seq; 824 825 if (!vtime_accounting_enabled()) { 826 *utime = t->utime; 827 *stime = t->stime; 828 return; 829 } 830 831 do { 832 seq = read_seqcount_begin(&t->vtime_seqcount); 833 834 *utime = t->utime; 835 *stime = t->stime; 836 837 /* Task is sleeping, nothing to add */ 838 if (t->vtime_snap_whence == VTIME_INACTIVE || is_idle_task(t)) 839 continue; 840 841 delta = vtime_delta(t); 842 843 /* 844 * Task runs either in user or kernel space, add pending nohz time to 845 * the right place. 846 */ 847 if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) 848 *utime += delta; 849 else if (t->vtime_snap_whence == VTIME_SYS) 850 *stime += delta; 851 } while (read_seqcount_retry(&t->vtime_seqcount, seq)); 852 } 853 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ 854