1 /* 2 * Simple CPU accounting cgroup controller 3 */ 4 #include "sched.h" 5 6 #ifdef CONFIG_IRQ_TIME_ACCOUNTING 7 8 /* 9 * There are no locks covering percpu hardirq/softirq time. 10 * They are only modified in vtime_account, on corresponding CPU 11 * with interrupts disabled. So, writes are safe. 12 * They are read and saved off onto struct rq in update_rq_clock(). 13 * This may result in other CPU reading this CPU's irq time and can 14 * race with irq/vtime_account on this CPU. We would either get old 15 * or new value with a side effect of accounting a slice of irq time to wrong 16 * task when irq is in progress while we read rq->clock. That is a worthy 17 * compromise in place of having locks on each irq in account_system_time. 18 */ 19 DEFINE_PER_CPU(struct irqtime, cpu_irqtime); 20 21 static int sched_clock_irqtime; 22 23 void enable_sched_clock_irqtime(void) 24 { 25 sched_clock_irqtime = 1; 26 } 27 28 void disable_sched_clock_irqtime(void) 29 { 30 sched_clock_irqtime = 0; 31 } 32 33 static void irqtime_account_delta(struct irqtime *irqtime, u64 delta, 34 enum cpu_usage_stat idx) 35 { 36 u64 *cpustat = kcpustat_this_cpu->cpustat; 37 38 u64_stats_update_begin(&irqtime->sync); 39 cpustat[idx] += delta; 40 irqtime->total += delta; 41 irqtime->tick_delta += delta; 42 u64_stats_update_end(&irqtime->sync); 43 } 44 45 /* 46 * Called before incrementing preempt_count on {soft,}irq_enter 47 * and before decrementing preempt_count on {soft,}irq_exit. 48 */ 49 void irqtime_account_irq(struct task_struct *curr) 50 { 51 struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); 52 s64 delta; 53 int cpu; 54 55 if (!sched_clock_irqtime) 56 return; 57 58 cpu = smp_processor_id(); 59 delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; 60 irqtime->irq_start_time += delta; 61 62 /* 63 * We do not account for softirq time from ksoftirqd here. 64 * We want to continue accounting softirq time to ksoftirqd thread 65 * in that case, so as not to confuse scheduler with a special task 66 * that do not consume any time, but still wants to run. 67 */ 68 if (hardirq_count()) 69 irqtime_account_delta(irqtime, delta, CPUTIME_IRQ); 70 else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) 71 irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ); 72 } 73 EXPORT_SYMBOL_GPL(irqtime_account_irq); 74 75 static u64 irqtime_tick_accounted(u64 maxtime) 76 { 77 struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); 78 u64 delta; 79 80 delta = min(irqtime->tick_delta, maxtime); 81 irqtime->tick_delta -= delta; 82 83 return delta; 84 } 85 86 #else /* CONFIG_IRQ_TIME_ACCOUNTING */ 87 88 #define sched_clock_irqtime (0) 89 90 static u64 irqtime_tick_accounted(u64 dummy) 91 { 92 return 0; 93 } 94 95 #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ 96 97 static inline void task_group_account_field(struct task_struct *p, int index, 98 u64 tmp) 99 { 100 /* 101 * Since all updates are sure to touch the root cgroup, we 102 * get ourselves ahead and touch it first. If the root cgroup 103 * is the only cgroup, then nothing else should be necessary. 104 * 105 */ 106 __this_cpu_add(kernel_cpustat.cpustat[index], tmp); 107 108 cgroup_account_cputime_field(p, index, tmp); 109 } 110 111 /* 112 * Account user CPU time to a process. 113 * @p: the process that the CPU time gets accounted to 114 * @cputime: the CPU time spent in user space since the last update 115 */ 116 void account_user_time(struct task_struct *p, u64 cputime) 117 { 118 int index; 119 120 /* Add user time to process. */ 121 p->utime += cputime; 122 account_group_user_time(p, cputime); 123 124 index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; 125 126 /* Add user time to cpustat. */ 127 task_group_account_field(p, index, cputime); 128 129 /* Account for user time used */ 130 acct_account_cputime(p); 131 } 132 133 /* 134 * Account guest CPU time to a process. 135 * @p: the process that the CPU time gets accounted to 136 * @cputime: the CPU time spent in virtual machine since the last update 137 */ 138 void account_guest_time(struct task_struct *p, u64 cputime) 139 { 140 u64 *cpustat = kcpustat_this_cpu->cpustat; 141 142 /* Add guest time to process. */ 143 p->utime += cputime; 144 account_group_user_time(p, cputime); 145 p->gtime += cputime; 146 147 /* Add guest time to cpustat. */ 148 if (task_nice(p) > 0) { 149 cpustat[CPUTIME_NICE] += cputime; 150 cpustat[CPUTIME_GUEST_NICE] += cputime; 151 } else { 152 cpustat[CPUTIME_USER] += cputime; 153 cpustat[CPUTIME_GUEST] += cputime; 154 } 155 } 156 157 /* 158 * Account system CPU time to a process and desired cpustat field 159 * @p: the process that the CPU time gets accounted to 160 * @cputime: the CPU time spent in kernel space since the last update 161 * @index: pointer to cpustat field that has to be updated 162 */ 163 void account_system_index_time(struct task_struct *p, 164 u64 cputime, enum cpu_usage_stat index) 165 { 166 /* Add system time to process. */ 167 p->stime += cputime; 168 account_group_system_time(p, cputime); 169 170 /* Add system time to cpustat. */ 171 task_group_account_field(p, index, cputime); 172 173 /* Account for system time used */ 174 acct_account_cputime(p); 175 } 176 177 /* 178 * Account system CPU time to a process. 179 * @p: the process that the CPU time gets accounted to 180 * @hardirq_offset: the offset to subtract from hardirq_count() 181 * @cputime: the CPU time spent in kernel space since the last update 182 */ 183 void account_system_time(struct task_struct *p, int hardirq_offset, u64 cputime) 184 { 185 int index; 186 187 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { 188 account_guest_time(p, cputime); 189 return; 190 } 191 192 if (hardirq_count() - hardirq_offset) 193 index = CPUTIME_IRQ; 194 else if (in_serving_softirq()) 195 index = CPUTIME_SOFTIRQ; 196 else 197 index = CPUTIME_SYSTEM; 198 199 account_system_index_time(p, cputime, index); 200 } 201 202 /* 203 * Account for involuntary wait time. 204 * @cputime: the CPU time spent in involuntary wait 205 */ 206 void account_steal_time(u64 cputime) 207 { 208 u64 *cpustat = kcpustat_this_cpu->cpustat; 209 210 cpustat[CPUTIME_STEAL] += cputime; 211 } 212 213 /* 214 * Account for idle time. 215 * @cputime: the CPU time spent in idle wait 216 */ 217 void account_idle_time(u64 cputime) 218 { 219 u64 *cpustat = kcpustat_this_cpu->cpustat; 220 struct rq *rq = this_rq(); 221 222 if (atomic_read(&rq->nr_iowait) > 0) 223 cpustat[CPUTIME_IOWAIT] += cputime; 224 else 225 cpustat[CPUTIME_IDLE] += cputime; 226 } 227 228 /* 229 * When a guest is interrupted for a longer amount of time, missed clock 230 * ticks are not redelivered later. Due to that, this function may on 231 * occasion account more time than the calling functions think elapsed. 232 */ 233 static __always_inline u64 steal_account_process_time(u64 maxtime) 234 { 235 #ifdef CONFIG_PARAVIRT 236 if (static_key_false(¶virt_steal_enabled)) { 237 u64 steal; 238 239 steal = paravirt_steal_clock(smp_processor_id()); 240 steal -= this_rq()->prev_steal_time; 241 steal = min(steal, maxtime); 242 account_steal_time(steal); 243 this_rq()->prev_steal_time += steal; 244 245 return steal; 246 } 247 #endif 248 return 0; 249 } 250 251 /* 252 * Account how much elapsed time was spent in steal, irq, or softirq time. 253 */ 254 static inline u64 account_other_time(u64 max) 255 { 256 u64 accounted; 257 258 lockdep_assert_irqs_disabled(); 259 260 accounted = steal_account_process_time(max); 261 262 if (accounted < max) 263 accounted += irqtime_tick_accounted(max - accounted); 264 265 return accounted; 266 } 267 268 #ifdef CONFIG_64BIT 269 static inline u64 read_sum_exec_runtime(struct task_struct *t) 270 { 271 return t->se.sum_exec_runtime; 272 } 273 #else 274 static u64 read_sum_exec_runtime(struct task_struct *t) 275 { 276 u64 ns; 277 struct rq_flags rf; 278 struct rq *rq; 279 280 rq = task_rq_lock(t, &rf); 281 ns = t->se.sum_exec_runtime; 282 task_rq_unlock(rq, t, &rf); 283 284 return ns; 285 } 286 #endif 287 288 /* 289 * Accumulate raw cputime values of dead tasks (sig->[us]time) and live 290 * tasks (sum on group iteration) belonging to @tsk's group. 291 */ 292 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) 293 { 294 struct signal_struct *sig = tsk->signal; 295 u64 utime, stime; 296 struct task_struct *t; 297 unsigned int seq, nextseq; 298 unsigned long flags; 299 300 /* 301 * Update current task runtime to account pending time since last 302 * scheduler action or thread_group_cputime() call. This thread group 303 * might have other running tasks on different CPUs, but updating 304 * their runtime can affect syscall performance, so we skip account 305 * those pending times and rely only on values updated on tick or 306 * other scheduler action. 307 */ 308 if (same_thread_group(current, tsk)) 309 (void) task_sched_runtime(current); 310 311 rcu_read_lock(); 312 /* Attempt a lockless read on the first round. */ 313 nextseq = 0; 314 do { 315 seq = nextseq; 316 flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); 317 times->utime = sig->utime; 318 times->stime = sig->stime; 319 times->sum_exec_runtime = sig->sum_sched_runtime; 320 321 for_each_thread(tsk, t) { 322 task_cputime(t, &utime, &stime); 323 times->utime += utime; 324 times->stime += stime; 325 times->sum_exec_runtime += read_sum_exec_runtime(t); 326 } 327 /* If lockless access failed, take the lock. */ 328 nextseq = 1; 329 } while (need_seqretry(&sig->stats_lock, seq)); 330 done_seqretry_irqrestore(&sig->stats_lock, seq, flags); 331 rcu_read_unlock(); 332 } 333 334 #ifdef CONFIG_IRQ_TIME_ACCOUNTING 335 /* 336 * Account a tick to a process and cpustat 337 * @p: the process that the CPU time gets accounted to 338 * @user_tick: is the tick from userspace 339 * @rq: the pointer to rq 340 * 341 * Tick demultiplexing follows the order 342 * - pending hardirq update 343 * - pending softirq update 344 * - user_time 345 * - idle_time 346 * - system time 347 * - check for guest_time 348 * - else account as system_time 349 * 350 * Check for hardirq is done both for system and user time as there is 351 * no timer going off while we are on hardirq and hence we may never get an 352 * opportunity to update it solely in system time. 353 * p->stime and friends are only updated on system time and not on irq 354 * softirq as those do not count in task exec_runtime any more. 355 */ 356 static void irqtime_account_process_tick(struct task_struct *p, int user_tick, 357 struct rq *rq, int ticks) 358 { 359 u64 other, cputime = TICK_NSEC * ticks; 360 361 /* 362 * When returning from idle, many ticks can get accounted at 363 * once, including some ticks of steal, irq, and softirq time. 364 * Subtract those ticks from the amount of time accounted to 365 * idle, or potentially user or system time. Due to rounding, 366 * other time can exceed ticks occasionally. 367 */ 368 other = account_other_time(ULONG_MAX); 369 if (other >= cputime) 370 return; 371 372 cputime -= other; 373 374 if (this_cpu_ksoftirqd() == p) { 375 /* 376 * ksoftirqd time do not get accounted in cpu_softirq_time. 377 * So, we have to handle it separately here. 378 * Also, p->stime needs to be updated for ksoftirqd. 379 */ 380 account_system_index_time(p, cputime, CPUTIME_SOFTIRQ); 381 } else if (user_tick) { 382 account_user_time(p, cputime); 383 } else if (p == rq->idle) { 384 account_idle_time(cputime); 385 } else if (p->flags & PF_VCPU) { /* System time or guest time */ 386 account_guest_time(p, cputime); 387 } else { 388 account_system_index_time(p, cputime, CPUTIME_SYSTEM); 389 } 390 } 391 392 static void irqtime_account_idle_ticks(int ticks) 393 { 394 struct rq *rq = this_rq(); 395 396 irqtime_account_process_tick(current, 0, rq, ticks); 397 } 398 #else /* CONFIG_IRQ_TIME_ACCOUNTING */ 399 static inline void irqtime_account_idle_ticks(int ticks) { } 400 static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, 401 struct rq *rq, int nr_ticks) { } 402 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ 403 404 /* 405 * Use precise platform statistics if available: 406 */ 407 #ifdef CONFIG_VIRT_CPU_ACCOUNTING 408 # ifndef __ARCH_HAS_VTIME_TASK_SWITCH 409 void vtime_common_task_switch(struct task_struct *prev) 410 { 411 if (is_idle_task(prev)) 412 vtime_account_idle(prev); 413 else 414 vtime_account_system(prev); 415 416 vtime_flush(prev); 417 arch_vtime_task_switch(prev); 418 } 419 # endif 420 #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ 421 422 423 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 424 /* 425 * Archs that account the whole time spent in the idle task 426 * (outside irq) as idle time can rely on this and just implement 427 * vtime_account_system() and vtime_account_idle(). Archs that 428 * have other meaning of the idle time (s390 only includes the 429 * time spent by the CPU when it's in low power mode) must override 430 * vtime_account(). 431 */ 432 #ifndef __ARCH_HAS_VTIME_ACCOUNT 433 void vtime_account_irq_enter(struct task_struct *tsk) 434 { 435 if (!in_interrupt() && is_idle_task(tsk)) 436 vtime_account_idle(tsk); 437 else 438 vtime_account_system(tsk); 439 } 440 EXPORT_SYMBOL_GPL(vtime_account_irq_enter); 441 #endif /* __ARCH_HAS_VTIME_ACCOUNT */ 442 443 void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, 444 u64 *ut, u64 *st) 445 { 446 *ut = curr->utime; 447 *st = curr->stime; 448 } 449 450 void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) 451 { 452 *ut = p->utime; 453 *st = p->stime; 454 } 455 EXPORT_SYMBOL_GPL(task_cputime_adjusted); 456 457 void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) 458 { 459 struct task_cputime cputime; 460 461 thread_group_cputime(p, &cputime); 462 463 *ut = cputime.utime; 464 *st = cputime.stime; 465 } 466 467 #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE: */ 468 469 /* 470 * Account a single tick of CPU time. 471 * @p: the process that the CPU time gets accounted to 472 * @user_tick: indicates if the tick is a user or a system tick 473 */ 474 void account_process_tick(struct task_struct *p, int user_tick) 475 { 476 u64 cputime, steal; 477 struct rq *rq = this_rq(); 478 479 if (vtime_accounting_cpu_enabled()) 480 return; 481 482 if (sched_clock_irqtime) { 483 irqtime_account_process_tick(p, user_tick, rq, 1); 484 return; 485 } 486 487 cputime = TICK_NSEC; 488 steal = steal_account_process_time(ULONG_MAX); 489 490 if (steal >= cputime) 491 return; 492 493 cputime -= steal; 494 495 if (user_tick) 496 account_user_time(p, cputime); 497 else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) 498 account_system_time(p, HARDIRQ_OFFSET, cputime); 499 else 500 account_idle_time(cputime); 501 } 502 503 /* 504 * Account multiple ticks of idle time. 505 * @ticks: number of stolen ticks 506 */ 507 void account_idle_ticks(unsigned long ticks) 508 { 509 u64 cputime, steal; 510 511 if (sched_clock_irqtime) { 512 irqtime_account_idle_ticks(ticks); 513 return; 514 } 515 516 cputime = ticks * TICK_NSEC; 517 steal = steal_account_process_time(ULONG_MAX); 518 519 if (steal >= cputime) 520 return; 521 522 cputime -= steal; 523 account_idle_time(cputime); 524 } 525 526 /* 527 * Perform (stime * rtime) / total, but avoid multiplication overflow by 528 * loosing precision when the numbers are big. 529 */ 530 static u64 scale_stime(u64 stime, u64 rtime, u64 total) 531 { 532 u64 scaled; 533 534 for (;;) { 535 /* Make sure "rtime" is the bigger of stime/rtime */ 536 if (stime > rtime) 537 swap(rtime, stime); 538 539 /* Make sure 'total' fits in 32 bits */ 540 if (total >> 32) 541 goto drop_precision; 542 543 /* Does rtime (and thus stime) fit in 32 bits? */ 544 if (!(rtime >> 32)) 545 break; 546 547 /* Can we just balance rtime/stime rather than dropping bits? */ 548 if (stime >> 31) 549 goto drop_precision; 550 551 /* We can grow stime and shrink rtime and try to make them both fit */ 552 stime <<= 1; 553 rtime >>= 1; 554 continue; 555 556 drop_precision: 557 /* We drop from rtime, it has more bits than stime */ 558 rtime >>= 1; 559 total >>= 1; 560 } 561 562 /* 563 * Make sure gcc understands that this is a 32x32->64 multiply, 564 * followed by a 64/32->64 divide. 565 */ 566 scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total); 567 return scaled; 568 } 569 570 /* 571 * Adjust tick based cputime random precision against scheduler runtime 572 * accounting. 573 * 574 * Tick based cputime accounting depend on random scheduling timeslices of a 575 * task to be interrupted or not by the timer. Depending on these 576 * circumstances, the number of these interrupts may be over or 577 * under-optimistic, matching the real user and system cputime with a variable 578 * precision. 579 * 580 * Fix this by scaling these tick based values against the total runtime 581 * accounted by the CFS scheduler. 582 * 583 * This code provides the following guarantees: 584 * 585 * stime + utime == rtime 586 * stime_i+1 >= stime_i, utime_i+1 >= utime_i 587 * 588 * Assuming that rtime_i+1 >= rtime_i. 589 */ 590 void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, 591 u64 *ut, u64 *st) 592 { 593 u64 rtime, stime, utime; 594 unsigned long flags; 595 596 /* Serialize concurrent callers such that we can honour our guarantees */ 597 raw_spin_lock_irqsave(&prev->lock, flags); 598 rtime = curr->sum_exec_runtime; 599 600 /* 601 * This is possible under two circumstances: 602 * - rtime isn't monotonic after all (a bug); 603 * - we got reordered by the lock. 604 * 605 * In both cases this acts as a filter such that the rest of the code 606 * can assume it is monotonic regardless of anything else. 607 */ 608 if (prev->stime + prev->utime >= rtime) 609 goto out; 610 611 stime = curr->stime; 612 utime = curr->utime; 613 614 /* 615 * If either stime or utime are 0, assume all runtime is userspace. 616 * Once a task gets some ticks, the monotonicy code at 'update:' 617 * will ensure things converge to the observed ratio. 618 */ 619 if (stime == 0) { 620 utime = rtime; 621 goto update; 622 } 623 624 if (utime == 0) { 625 stime = rtime; 626 goto update; 627 } 628 629 stime = scale_stime(stime, rtime, stime + utime); 630 631 update: 632 /* 633 * Make sure stime doesn't go backwards; this preserves monotonicity 634 * for utime because rtime is monotonic. 635 * 636 * utime_i+1 = rtime_i+1 - stime_i 637 * = rtime_i+1 - (rtime_i - utime_i) 638 * = (rtime_i+1 - rtime_i) + utime_i 639 * >= utime_i 640 */ 641 if (stime < prev->stime) 642 stime = prev->stime; 643 utime = rtime - stime; 644 645 /* 646 * Make sure utime doesn't go backwards; this still preserves 647 * monotonicity for stime, analogous argument to above. 648 */ 649 if (utime < prev->utime) { 650 utime = prev->utime; 651 stime = rtime - utime; 652 } 653 654 prev->stime = stime; 655 prev->utime = utime; 656 out: 657 *ut = prev->utime; 658 *st = prev->stime; 659 raw_spin_unlock_irqrestore(&prev->lock, flags); 660 } 661 662 void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) 663 { 664 struct task_cputime cputime = { 665 .sum_exec_runtime = p->se.sum_exec_runtime, 666 }; 667 668 task_cputime(p, &cputime.utime, &cputime.stime); 669 cputime_adjust(&cputime, &p->prev_cputime, ut, st); 670 } 671 EXPORT_SYMBOL_GPL(task_cputime_adjusted); 672 673 void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) 674 { 675 struct task_cputime cputime; 676 677 thread_group_cputime(p, &cputime); 678 cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); 679 } 680 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 681 682 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN 683 static u64 vtime_delta(struct vtime *vtime) 684 { 685 unsigned long long clock; 686 687 clock = sched_clock(); 688 if (clock < vtime->starttime) 689 return 0; 690 691 return clock - vtime->starttime; 692 } 693 694 static u64 get_vtime_delta(struct vtime *vtime) 695 { 696 u64 delta = vtime_delta(vtime); 697 u64 other; 698 699 /* 700 * Unlike tick based timing, vtime based timing never has lost 701 * ticks, and no need for steal time accounting to make up for 702 * lost ticks. Vtime accounts a rounded version of actual 703 * elapsed time. Limit account_other_time to prevent rounding 704 * errors from causing elapsed vtime to go negative. 705 */ 706 other = account_other_time(delta); 707 WARN_ON_ONCE(vtime->state == VTIME_INACTIVE); 708 vtime->starttime += delta; 709 710 return delta - other; 711 } 712 713 static void __vtime_account_system(struct task_struct *tsk, 714 struct vtime *vtime) 715 { 716 vtime->stime += get_vtime_delta(vtime); 717 if (vtime->stime >= TICK_NSEC) { 718 account_system_time(tsk, irq_count(), vtime->stime); 719 vtime->stime = 0; 720 } 721 } 722 723 static void vtime_account_guest(struct task_struct *tsk, 724 struct vtime *vtime) 725 { 726 vtime->gtime += get_vtime_delta(vtime); 727 if (vtime->gtime >= TICK_NSEC) { 728 account_guest_time(tsk, vtime->gtime); 729 vtime->gtime = 0; 730 } 731 } 732 733 void vtime_account_system(struct task_struct *tsk) 734 { 735 struct vtime *vtime = &tsk->vtime; 736 737 if (!vtime_delta(vtime)) 738 return; 739 740 write_seqcount_begin(&vtime->seqcount); 741 /* We might have scheduled out from guest path */ 742 if (current->flags & PF_VCPU) 743 vtime_account_guest(tsk, vtime); 744 else 745 __vtime_account_system(tsk, vtime); 746 write_seqcount_end(&vtime->seqcount); 747 } 748 749 void vtime_user_enter(struct task_struct *tsk) 750 { 751 struct vtime *vtime = &tsk->vtime; 752 753 write_seqcount_begin(&vtime->seqcount); 754 __vtime_account_system(tsk, vtime); 755 vtime->state = VTIME_USER; 756 write_seqcount_end(&vtime->seqcount); 757 } 758 759 void vtime_user_exit(struct task_struct *tsk) 760 { 761 struct vtime *vtime = &tsk->vtime; 762 763 write_seqcount_begin(&vtime->seqcount); 764 vtime->utime += get_vtime_delta(vtime); 765 if (vtime->utime >= TICK_NSEC) { 766 account_user_time(tsk, vtime->utime); 767 vtime->utime = 0; 768 } 769 vtime->state = VTIME_SYS; 770 write_seqcount_end(&vtime->seqcount); 771 } 772 773 void vtime_guest_enter(struct task_struct *tsk) 774 { 775 struct vtime *vtime = &tsk->vtime; 776 /* 777 * The flags must be updated under the lock with 778 * the vtime_starttime flush and update. 779 * That enforces a right ordering and update sequence 780 * synchronization against the reader (task_gtime()) 781 * that can thus safely catch up with a tickless delta. 782 */ 783 write_seqcount_begin(&vtime->seqcount); 784 __vtime_account_system(tsk, vtime); 785 current->flags |= PF_VCPU; 786 write_seqcount_end(&vtime->seqcount); 787 } 788 EXPORT_SYMBOL_GPL(vtime_guest_enter); 789 790 void vtime_guest_exit(struct task_struct *tsk) 791 { 792 struct vtime *vtime = &tsk->vtime; 793 794 write_seqcount_begin(&vtime->seqcount); 795 vtime_account_guest(tsk, vtime); 796 current->flags &= ~PF_VCPU; 797 write_seqcount_end(&vtime->seqcount); 798 } 799 EXPORT_SYMBOL_GPL(vtime_guest_exit); 800 801 void vtime_account_idle(struct task_struct *tsk) 802 { 803 account_idle_time(get_vtime_delta(&tsk->vtime)); 804 } 805 806 void arch_vtime_task_switch(struct task_struct *prev) 807 { 808 struct vtime *vtime = &prev->vtime; 809 810 write_seqcount_begin(&vtime->seqcount); 811 vtime->state = VTIME_INACTIVE; 812 write_seqcount_end(&vtime->seqcount); 813 814 vtime = ¤t->vtime; 815 816 write_seqcount_begin(&vtime->seqcount); 817 vtime->state = VTIME_SYS; 818 vtime->starttime = sched_clock(); 819 write_seqcount_end(&vtime->seqcount); 820 } 821 822 void vtime_init_idle(struct task_struct *t, int cpu) 823 { 824 struct vtime *vtime = &t->vtime; 825 unsigned long flags; 826 827 local_irq_save(flags); 828 write_seqcount_begin(&vtime->seqcount); 829 vtime->state = VTIME_SYS; 830 vtime->starttime = sched_clock(); 831 write_seqcount_end(&vtime->seqcount); 832 local_irq_restore(flags); 833 } 834 835 u64 task_gtime(struct task_struct *t) 836 { 837 struct vtime *vtime = &t->vtime; 838 unsigned int seq; 839 u64 gtime; 840 841 if (!vtime_accounting_enabled()) 842 return t->gtime; 843 844 do { 845 seq = read_seqcount_begin(&vtime->seqcount); 846 847 gtime = t->gtime; 848 if (vtime->state == VTIME_SYS && t->flags & PF_VCPU) 849 gtime += vtime->gtime + vtime_delta(vtime); 850 851 } while (read_seqcount_retry(&vtime->seqcount, seq)); 852 853 return gtime; 854 } 855 856 /* 857 * Fetch cputime raw values from fields of task_struct and 858 * add up the pending nohz execution time since the last 859 * cputime snapshot. 860 */ 861 void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) 862 { 863 struct vtime *vtime = &t->vtime; 864 unsigned int seq; 865 u64 delta; 866 867 if (!vtime_accounting_enabled()) { 868 *utime = t->utime; 869 *stime = t->stime; 870 return; 871 } 872 873 do { 874 seq = read_seqcount_begin(&vtime->seqcount); 875 876 *utime = t->utime; 877 *stime = t->stime; 878 879 /* Task is sleeping, nothing to add */ 880 if (vtime->state == VTIME_INACTIVE || is_idle_task(t)) 881 continue; 882 883 delta = vtime_delta(vtime); 884 885 /* 886 * Task runs either in user or kernel space, add pending nohz time to 887 * the right place. 888 */ 889 if (vtime->state == VTIME_USER || t->flags & PF_VCPU) 890 *utime += vtime->utime + delta; 891 else if (vtime->state == VTIME_SYS) 892 *stime += vtime->stime + delta; 893 } while (read_seqcount_retry(&vtime->seqcount, seq)); 894 } 895 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ 896