1 #include <linux/export.h> 2 #include <linux/sched.h> 3 #include <linux/tsacct_kern.h> 4 #include <linux/kernel_stat.h> 5 #include <linux/static_key.h> 6 #include <linux/context_tracking.h> 7 #include "sched.h" 8 #ifdef CONFIG_PARAVIRT 9 #include <asm/paravirt.h> 10 #endif 11 12 13 #ifdef CONFIG_IRQ_TIME_ACCOUNTING 14 15 /* 16 * There are no locks covering percpu hardirq/softirq time. 17 * They are only modified in vtime_account, on corresponding CPU 18 * with interrupts disabled. So, writes are safe. 19 * They are read and saved off onto struct rq in update_rq_clock(). 20 * This may result in other CPU reading this CPU's irq time and can 21 * race with irq/vtime_account on this CPU. We would either get old 22 * or new value with a side effect of accounting a slice of irq time to wrong 23 * task when irq is in progress while we read rq->clock. That is a worthy 24 * compromise in place of having locks on each irq in account_system_time. 25 */ 26 DEFINE_PER_CPU(struct irqtime, cpu_irqtime); 27 28 static int sched_clock_irqtime; 29 30 void enable_sched_clock_irqtime(void) 31 { 32 sched_clock_irqtime = 1; 33 } 34 35 void disable_sched_clock_irqtime(void) 36 { 37 sched_clock_irqtime = 0; 38 } 39 40 /* 41 * Called before incrementing preempt_count on {soft,}irq_enter 42 * and before decrementing preempt_count on {soft,}irq_exit. 43 */ 44 void irqtime_account_irq(struct task_struct *curr) 45 { 46 struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); 47 s64 delta; 48 int cpu; 49 50 if (!sched_clock_irqtime) 51 return; 52 53 cpu = smp_processor_id(); 54 delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; 55 irqtime->irq_start_time += delta; 56 57 u64_stats_update_begin(&irqtime->sync); 58 /* 59 * We do not account for softirq time from ksoftirqd here. 60 * We want to continue accounting softirq time to ksoftirqd thread 61 * in that case, so as not to confuse scheduler with a special task 62 * that do not consume any time, but still wants to run. 63 */ 64 if (hardirq_count()) 65 irqtime->hardirq_time += delta; 66 else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) 67 irqtime->softirq_time += delta; 68 69 u64_stats_update_end(&irqtime->sync); 70 } 71 EXPORT_SYMBOL_GPL(irqtime_account_irq); 72 73 static cputime_t irqtime_account_update(u64 irqtime, int idx, cputime_t maxtime) 74 { 75 u64 *cpustat = kcpustat_this_cpu->cpustat; 76 cputime_t irq_cputime; 77 78 irq_cputime = nsecs_to_cputime64(irqtime) - cpustat[idx]; 79 irq_cputime = min(irq_cputime, maxtime); 80 cpustat[idx] += irq_cputime; 81 82 return irq_cputime; 83 } 84 85 static cputime_t irqtime_account_hi_update(cputime_t maxtime) 86 { 87 return irqtime_account_update(__this_cpu_read(cpu_irqtime.hardirq_time), 88 CPUTIME_IRQ, maxtime); 89 } 90 91 static cputime_t irqtime_account_si_update(cputime_t maxtime) 92 { 93 return irqtime_account_update(__this_cpu_read(cpu_irqtime.softirq_time), 94 CPUTIME_SOFTIRQ, maxtime); 95 } 96 97 #else /* CONFIG_IRQ_TIME_ACCOUNTING */ 98 99 #define sched_clock_irqtime (0) 100 101 static cputime_t irqtime_account_hi_update(cputime_t dummy) 102 { 103 return 0; 104 } 105 106 static cputime_t irqtime_account_si_update(cputime_t dummy) 107 { 108 return 0; 109 } 110 111 #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ 112 113 static inline void task_group_account_field(struct task_struct *p, int index, 114 u64 tmp) 115 { 116 /* 117 * Since all updates are sure to touch the root cgroup, we 118 * get ourselves ahead and touch it first. If the root cgroup 119 * is the only cgroup, then nothing else should be necessary. 120 * 121 */ 122 __this_cpu_add(kernel_cpustat.cpustat[index], tmp); 123 124 cpuacct_account_field(p, index, tmp); 125 } 126 127 /* 128 * Account user cpu time to a process. 129 * @p: the process that the cpu time gets accounted to 130 * @cputime: the cpu time spent in user space since the last update 131 */ 132 void account_user_time(struct task_struct *p, cputime_t cputime) 133 { 134 int index; 135 136 /* Add user time to process. */ 137 p->utime += cputime; 138 account_group_user_time(p, cputime); 139 140 index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; 141 142 /* Add user time to cpustat. */ 143 task_group_account_field(p, index, (__force u64) cputime); 144 145 /* Account for user time used */ 146 acct_account_cputime(p); 147 } 148 149 /* 150 * Account guest cpu time to a process. 151 * @p: the process that the cpu time gets accounted to 152 * @cputime: the cpu time spent in virtual machine since the last update 153 */ 154 static void account_guest_time(struct task_struct *p, cputime_t cputime) 155 { 156 u64 *cpustat = kcpustat_this_cpu->cpustat; 157 158 /* Add guest time to process. */ 159 p->utime += cputime; 160 account_group_user_time(p, cputime); 161 p->gtime += cputime; 162 163 /* Add guest time to cpustat. */ 164 if (task_nice(p) > 0) { 165 cpustat[CPUTIME_NICE] += (__force u64) cputime; 166 cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime; 167 } else { 168 cpustat[CPUTIME_USER] += (__force u64) cputime; 169 cpustat[CPUTIME_GUEST] += (__force u64) cputime; 170 } 171 } 172 173 /* 174 * Account system cpu time to a process and desired cpustat field 175 * @p: the process that the cpu time gets accounted to 176 * @cputime: the cpu time spent in kernel space since the last update 177 * @index: pointer to cpustat field that has to be updated 178 */ 179 static inline 180 void __account_system_time(struct task_struct *p, cputime_t cputime, int index) 181 { 182 /* Add system time to process. */ 183 p->stime += cputime; 184 account_group_system_time(p, cputime); 185 186 /* Add system time to cpustat. */ 187 task_group_account_field(p, index, (__force u64) cputime); 188 189 /* Account for system time used */ 190 acct_account_cputime(p); 191 } 192 193 /* 194 * Account system cpu time to a process. 195 * @p: the process that the cpu time gets accounted to 196 * @hardirq_offset: the offset to subtract from hardirq_count() 197 * @cputime: the cpu time spent in kernel space since the last update 198 */ 199 void account_system_time(struct task_struct *p, int hardirq_offset, 200 cputime_t cputime) 201 { 202 int index; 203 204 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { 205 account_guest_time(p, cputime); 206 return; 207 } 208 209 if (hardirq_count() - hardirq_offset) 210 index = CPUTIME_IRQ; 211 else if (in_serving_softirq()) 212 index = CPUTIME_SOFTIRQ; 213 else 214 index = CPUTIME_SYSTEM; 215 216 __account_system_time(p, cputime, index); 217 } 218 219 /* 220 * Account for involuntary wait time. 221 * @cputime: the cpu time spent in involuntary wait 222 */ 223 void account_steal_time(cputime_t cputime) 224 { 225 u64 *cpustat = kcpustat_this_cpu->cpustat; 226 227 cpustat[CPUTIME_STEAL] += (__force u64) cputime; 228 } 229 230 /* 231 * Account for idle time. 232 * @cputime: the cpu time spent in idle wait 233 */ 234 void account_idle_time(cputime_t cputime) 235 { 236 u64 *cpustat = kcpustat_this_cpu->cpustat; 237 struct rq *rq = this_rq(); 238 239 if (atomic_read(&rq->nr_iowait) > 0) 240 cpustat[CPUTIME_IOWAIT] += (__force u64) cputime; 241 else 242 cpustat[CPUTIME_IDLE] += (__force u64) cputime; 243 } 244 245 /* 246 * When a guest is interrupted for a longer amount of time, missed clock 247 * ticks are not redelivered later. Due to that, this function may on 248 * occasion account more time than the calling functions think elapsed. 249 */ 250 static __always_inline cputime_t steal_account_process_time(cputime_t maxtime) 251 { 252 #ifdef CONFIG_PARAVIRT 253 if (static_key_false(¶virt_steal_enabled)) { 254 cputime_t steal_cputime; 255 u64 steal; 256 257 steal = paravirt_steal_clock(smp_processor_id()); 258 steal -= this_rq()->prev_steal_time; 259 260 steal_cputime = min(nsecs_to_cputime(steal), maxtime); 261 account_steal_time(steal_cputime); 262 this_rq()->prev_steal_time += cputime_to_nsecs(steal_cputime); 263 264 return steal_cputime; 265 } 266 #endif 267 return 0; 268 } 269 270 /* 271 * Account how much elapsed time was spent in steal, irq, or softirq time. 272 */ 273 static inline cputime_t account_other_time(cputime_t max) 274 { 275 cputime_t accounted; 276 277 /* Shall be converted to a lockdep-enabled lightweight check */ 278 WARN_ON_ONCE(!irqs_disabled()); 279 280 accounted = steal_account_process_time(max); 281 282 if (accounted < max) 283 accounted += irqtime_account_hi_update(max - accounted); 284 285 if (accounted < max) 286 accounted += irqtime_account_si_update(max - accounted); 287 288 return accounted; 289 } 290 291 #ifdef CONFIG_64BIT 292 static inline u64 read_sum_exec_runtime(struct task_struct *t) 293 { 294 return t->se.sum_exec_runtime; 295 } 296 #else 297 static u64 read_sum_exec_runtime(struct task_struct *t) 298 { 299 u64 ns; 300 struct rq_flags rf; 301 struct rq *rq; 302 303 rq = task_rq_lock(t, &rf); 304 ns = t->se.sum_exec_runtime; 305 task_rq_unlock(rq, t, &rf); 306 307 return ns; 308 } 309 #endif 310 311 /* 312 * Accumulate raw cputime values of dead tasks (sig->[us]time) and live 313 * tasks (sum on group iteration) belonging to @tsk's group. 314 */ 315 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) 316 { 317 struct signal_struct *sig = tsk->signal; 318 cputime_t utime, stime; 319 struct task_struct *t; 320 unsigned int seq, nextseq; 321 unsigned long flags; 322 323 /* 324 * Update current task runtime to account pending time since last 325 * scheduler action or thread_group_cputime() call. This thread group 326 * might have other running tasks on different CPUs, but updating 327 * their runtime can affect syscall performance, so we skip account 328 * those pending times and rely only on values updated on tick or 329 * other scheduler action. 330 */ 331 if (same_thread_group(current, tsk)) 332 (void) task_sched_runtime(current); 333 334 rcu_read_lock(); 335 /* Attempt a lockless read on the first round. */ 336 nextseq = 0; 337 do { 338 seq = nextseq; 339 flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); 340 times->utime = sig->utime; 341 times->stime = sig->stime; 342 times->sum_exec_runtime = sig->sum_sched_runtime; 343 344 for_each_thread(tsk, t) { 345 task_cputime(t, &utime, &stime); 346 times->utime += utime; 347 times->stime += stime; 348 times->sum_exec_runtime += read_sum_exec_runtime(t); 349 } 350 /* If lockless access failed, take the lock. */ 351 nextseq = 1; 352 } while (need_seqretry(&sig->stats_lock, seq)); 353 done_seqretry_irqrestore(&sig->stats_lock, seq, flags); 354 rcu_read_unlock(); 355 } 356 357 #ifdef CONFIG_IRQ_TIME_ACCOUNTING 358 /* 359 * Account a tick to a process and cpustat 360 * @p: the process that the cpu time gets accounted to 361 * @user_tick: is the tick from userspace 362 * @rq: the pointer to rq 363 * 364 * Tick demultiplexing follows the order 365 * - pending hardirq update 366 * - pending softirq update 367 * - user_time 368 * - idle_time 369 * - system time 370 * - check for guest_time 371 * - else account as system_time 372 * 373 * Check for hardirq is done both for system and user time as there is 374 * no timer going off while we are on hardirq and hence we may never get an 375 * opportunity to update it solely in system time. 376 * p->stime and friends are only updated on system time and not on irq 377 * softirq as those do not count in task exec_runtime any more. 378 */ 379 static void irqtime_account_process_tick(struct task_struct *p, int user_tick, 380 struct rq *rq, int ticks) 381 { 382 u64 cputime = (__force u64) cputime_one_jiffy * ticks; 383 cputime_t other; 384 385 /* 386 * When returning from idle, many ticks can get accounted at 387 * once, including some ticks of steal, irq, and softirq time. 388 * Subtract those ticks from the amount of time accounted to 389 * idle, or potentially user or system time. Due to rounding, 390 * other time can exceed ticks occasionally. 391 */ 392 other = account_other_time(ULONG_MAX); 393 if (other >= cputime) 394 return; 395 cputime -= other; 396 397 if (this_cpu_ksoftirqd() == p) { 398 /* 399 * ksoftirqd time do not get accounted in cpu_softirq_time. 400 * So, we have to handle it separately here. 401 * Also, p->stime needs to be updated for ksoftirqd. 402 */ 403 __account_system_time(p, cputime, CPUTIME_SOFTIRQ); 404 } else if (user_tick) { 405 account_user_time(p, cputime); 406 } else if (p == rq->idle) { 407 account_idle_time(cputime); 408 } else if (p->flags & PF_VCPU) { /* System time or guest time */ 409 account_guest_time(p, cputime); 410 } else { 411 __account_system_time(p, cputime, CPUTIME_SYSTEM); 412 } 413 } 414 415 static void irqtime_account_idle_ticks(int ticks) 416 { 417 struct rq *rq = this_rq(); 418 419 irqtime_account_process_tick(current, 0, rq, ticks); 420 } 421 #else /* CONFIG_IRQ_TIME_ACCOUNTING */ 422 static inline void irqtime_account_idle_ticks(int ticks) {} 423 static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, 424 struct rq *rq, int nr_ticks) {} 425 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ 426 427 /* 428 * Use precise platform statistics if available: 429 */ 430 #ifdef CONFIG_VIRT_CPU_ACCOUNTING 431 432 #ifndef __ARCH_HAS_VTIME_TASK_SWITCH 433 void vtime_common_task_switch(struct task_struct *prev) 434 { 435 if (is_idle_task(prev)) 436 vtime_account_idle(prev); 437 else 438 vtime_account_system(prev); 439 440 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 441 vtime_account_user(prev); 442 #endif 443 arch_vtime_task_switch(prev); 444 } 445 #endif 446 447 #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ 448 449 450 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 451 /* 452 * Archs that account the whole time spent in the idle task 453 * (outside irq) as idle time can rely on this and just implement 454 * vtime_account_system() and vtime_account_idle(). Archs that 455 * have other meaning of the idle time (s390 only includes the 456 * time spent by the CPU when it's in low power mode) must override 457 * vtime_account(). 458 */ 459 #ifndef __ARCH_HAS_VTIME_ACCOUNT 460 void vtime_account_irq_enter(struct task_struct *tsk) 461 { 462 if (!in_interrupt() && is_idle_task(tsk)) 463 vtime_account_idle(tsk); 464 else 465 vtime_account_system(tsk); 466 } 467 EXPORT_SYMBOL_GPL(vtime_account_irq_enter); 468 #endif /* __ARCH_HAS_VTIME_ACCOUNT */ 469 470 void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) 471 { 472 *ut = p->utime; 473 *st = p->stime; 474 } 475 EXPORT_SYMBOL_GPL(task_cputime_adjusted); 476 477 void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) 478 { 479 struct task_cputime cputime; 480 481 thread_group_cputime(p, &cputime); 482 483 *ut = cputime.utime; 484 *st = cputime.stime; 485 } 486 #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 487 /* 488 * Account a single tick of cpu time. 489 * @p: the process that the cpu time gets accounted to 490 * @user_tick: indicates if the tick is a user or a system tick 491 */ 492 void account_process_tick(struct task_struct *p, int user_tick) 493 { 494 cputime_t cputime, steal; 495 struct rq *rq = this_rq(); 496 497 if (vtime_accounting_cpu_enabled()) 498 return; 499 500 if (sched_clock_irqtime) { 501 irqtime_account_process_tick(p, user_tick, rq, 1); 502 return; 503 } 504 505 cputime = cputime_one_jiffy; 506 steal = steal_account_process_time(ULONG_MAX); 507 508 if (steal >= cputime) 509 return; 510 511 cputime -= steal; 512 513 if (user_tick) 514 account_user_time(p, cputime); 515 else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) 516 account_system_time(p, HARDIRQ_OFFSET, cputime); 517 else 518 account_idle_time(cputime); 519 } 520 521 /* 522 * Account multiple ticks of idle time. 523 * @ticks: number of stolen ticks 524 */ 525 void account_idle_ticks(unsigned long ticks) 526 { 527 cputime_t cputime, steal; 528 529 if (sched_clock_irqtime) { 530 irqtime_account_idle_ticks(ticks); 531 return; 532 } 533 534 cputime = jiffies_to_cputime(ticks); 535 steal = steal_account_process_time(ULONG_MAX); 536 537 if (steal >= cputime) 538 return; 539 540 cputime -= steal; 541 account_idle_time(cputime); 542 } 543 544 /* 545 * Perform (stime * rtime) / total, but avoid multiplication overflow by 546 * loosing precision when the numbers are big. 547 */ 548 static cputime_t scale_stime(u64 stime, u64 rtime, u64 total) 549 { 550 u64 scaled; 551 552 for (;;) { 553 /* Make sure "rtime" is the bigger of stime/rtime */ 554 if (stime > rtime) 555 swap(rtime, stime); 556 557 /* Make sure 'total' fits in 32 bits */ 558 if (total >> 32) 559 goto drop_precision; 560 561 /* Does rtime (and thus stime) fit in 32 bits? */ 562 if (!(rtime >> 32)) 563 break; 564 565 /* Can we just balance rtime/stime rather than dropping bits? */ 566 if (stime >> 31) 567 goto drop_precision; 568 569 /* We can grow stime and shrink rtime and try to make them both fit */ 570 stime <<= 1; 571 rtime >>= 1; 572 continue; 573 574 drop_precision: 575 /* We drop from rtime, it has more bits than stime */ 576 rtime >>= 1; 577 total >>= 1; 578 } 579 580 /* 581 * Make sure gcc understands that this is a 32x32->64 multiply, 582 * followed by a 64/32->64 divide. 583 */ 584 scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total); 585 return (__force cputime_t) scaled; 586 } 587 588 /* 589 * Adjust tick based cputime random precision against scheduler runtime 590 * accounting. 591 * 592 * Tick based cputime accounting depend on random scheduling timeslices of a 593 * task to be interrupted or not by the timer. Depending on these 594 * circumstances, the number of these interrupts may be over or 595 * under-optimistic, matching the real user and system cputime with a variable 596 * precision. 597 * 598 * Fix this by scaling these tick based values against the total runtime 599 * accounted by the CFS scheduler. 600 * 601 * This code provides the following guarantees: 602 * 603 * stime + utime == rtime 604 * stime_i+1 >= stime_i, utime_i+1 >= utime_i 605 * 606 * Assuming that rtime_i+1 >= rtime_i. 607 */ 608 static void cputime_adjust(struct task_cputime *curr, 609 struct prev_cputime *prev, 610 cputime_t *ut, cputime_t *st) 611 { 612 cputime_t rtime, stime, utime; 613 unsigned long flags; 614 615 /* Serialize concurrent callers such that we can honour our guarantees */ 616 raw_spin_lock_irqsave(&prev->lock, flags); 617 rtime = nsecs_to_cputime(curr->sum_exec_runtime); 618 619 /* 620 * This is possible under two circumstances: 621 * - rtime isn't monotonic after all (a bug); 622 * - we got reordered by the lock. 623 * 624 * In both cases this acts as a filter such that the rest of the code 625 * can assume it is monotonic regardless of anything else. 626 */ 627 if (prev->stime + prev->utime >= rtime) 628 goto out; 629 630 stime = curr->stime; 631 utime = curr->utime; 632 633 /* 634 * If either stime or both stime and utime are 0, assume all runtime is 635 * userspace. Once a task gets some ticks, the monotonicy code at 636 * 'update' will ensure things converge to the observed ratio. 637 */ 638 if (stime == 0) { 639 utime = rtime; 640 goto update; 641 } 642 643 if (utime == 0) { 644 stime = rtime; 645 goto update; 646 } 647 648 stime = scale_stime((__force u64)stime, (__force u64)rtime, 649 (__force u64)(stime + utime)); 650 651 update: 652 /* 653 * Make sure stime doesn't go backwards; this preserves monotonicity 654 * for utime because rtime is monotonic. 655 * 656 * utime_i+1 = rtime_i+1 - stime_i 657 * = rtime_i+1 - (rtime_i - utime_i) 658 * = (rtime_i+1 - rtime_i) + utime_i 659 * >= utime_i 660 */ 661 if (stime < prev->stime) 662 stime = prev->stime; 663 utime = rtime - stime; 664 665 /* 666 * Make sure utime doesn't go backwards; this still preserves 667 * monotonicity for stime, analogous argument to above. 668 */ 669 if (utime < prev->utime) { 670 utime = prev->utime; 671 stime = rtime - utime; 672 } 673 674 prev->stime = stime; 675 prev->utime = utime; 676 out: 677 *ut = prev->utime; 678 *st = prev->stime; 679 raw_spin_unlock_irqrestore(&prev->lock, flags); 680 } 681 682 void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) 683 { 684 struct task_cputime cputime = { 685 .sum_exec_runtime = p->se.sum_exec_runtime, 686 }; 687 688 task_cputime(p, &cputime.utime, &cputime.stime); 689 cputime_adjust(&cputime, &p->prev_cputime, ut, st); 690 } 691 EXPORT_SYMBOL_GPL(task_cputime_adjusted); 692 693 void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) 694 { 695 struct task_cputime cputime; 696 697 thread_group_cputime(p, &cputime); 698 cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); 699 } 700 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 701 702 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN 703 static cputime_t vtime_delta(struct task_struct *tsk) 704 { 705 unsigned long now = READ_ONCE(jiffies); 706 707 if (time_before(now, (unsigned long)tsk->vtime_snap)) 708 return 0; 709 710 return jiffies_to_cputime(now - tsk->vtime_snap); 711 } 712 713 static cputime_t get_vtime_delta(struct task_struct *tsk) 714 { 715 unsigned long now = READ_ONCE(jiffies); 716 cputime_t delta, other; 717 718 /* 719 * Unlike tick based timing, vtime based timing never has lost 720 * ticks, and no need for steal time accounting to make up for 721 * lost ticks. Vtime accounts a rounded version of actual 722 * elapsed time. Limit account_other_time to prevent rounding 723 * errors from causing elapsed vtime to go negative. 724 */ 725 delta = jiffies_to_cputime(now - tsk->vtime_snap); 726 other = account_other_time(delta); 727 WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE); 728 tsk->vtime_snap = now; 729 730 return delta - other; 731 } 732 733 static void __vtime_account_system(struct task_struct *tsk) 734 { 735 cputime_t delta_cpu = get_vtime_delta(tsk); 736 737 account_system_time(tsk, irq_count(), delta_cpu); 738 } 739 740 void vtime_account_system(struct task_struct *tsk) 741 { 742 if (!vtime_delta(tsk)) 743 return; 744 745 write_seqcount_begin(&tsk->vtime_seqcount); 746 __vtime_account_system(tsk); 747 write_seqcount_end(&tsk->vtime_seqcount); 748 } 749 750 void vtime_account_user(struct task_struct *tsk) 751 { 752 cputime_t delta_cpu; 753 754 write_seqcount_begin(&tsk->vtime_seqcount); 755 tsk->vtime_snap_whence = VTIME_SYS; 756 if (vtime_delta(tsk)) { 757 delta_cpu = get_vtime_delta(tsk); 758 account_user_time(tsk, delta_cpu); 759 } 760 write_seqcount_end(&tsk->vtime_seqcount); 761 } 762 763 void vtime_user_enter(struct task_struct *tsk) 764 { 765 write_seqcount_begin(&tsk->vtime_seqcount); 766 if (vtime_delta(tsk)) 767 __vtime_account_system(tsk); 768 tsk->vtime_snap_whence = VTIME_USER; 769 write_seqcount_end(&tsk->vtime_seqcount); 770 } 771 772 void vtime_guest_enter(struct task_struct *tsk) 773 { 774 /* 775 * The flags must be updated under the lock with 776 * the vtime_snap flush and update. 777 * That enforces a right ordering and update sequence 778 * synchronization against the reader (task_gtime()) 779 * that can thus safely catch up with a tickless delta. 780 */ 781 write_seqcount_begin(&tsk->vtime_seqcount); 782 if (vtime_delta(tsk)) 783 __vtime_account_system(tsk); 784 current->flags |= PF_VCPU; 785 write_seqcount_end(&tsk->vtime_seqcount); 786 } 787 EXPORT_SYMBOL_GPL(vtime_guest_enter); 788 789 void vtime_guest_exit(struct task_struct *tsk) 790 { 791 write_seqcount_begin(&tsk->vtime_seqcount); 792 __vtime_account_system(tsk); 793 current->flags &= ~PF_VCPU; 794 write_seqcount_end(&tsk->vtime_seqcount); 795 } 796 EXPORT_SYMBOL_GPL(vtime_guest_exit); 797 798 void vtime_account_idle(struct task_struct *tsk) 799 { 800 cputime_t delta_cpu = get_vtime_delta(tsk); 801 802 account_idle_time(delta_cpu); 803 } 804 805 void arch_vtime_task_switch(struct task_struct *prev) 806 { 807 write_seqcount_begin(&prev->vtime_seqcount); 808 prev->vtime_snap_whence = VTIME_INACTIVE; 809 write_seqcount_end(&prev->vtime_seqcount); 810 811 write_seqcount_begin(¤t->vtime_seqcount); 812 current->vtime_snap_whence = VTIME_SYS; 813 current->vtime_snap = jiffies; 814 write_seqcount_end(¤t->vtime_seqcount); 815 } 816 817 void vtime_init_idle(struct task_struct *t, int cpu) 818 { 819 unsigned long flags; 820 821 local_irq_save(flags); 822 write_seqcount_begin(&t->vtime_seqcount); 823 t->vtime_snap_whence = VTIME_SYS; 824 t->vtime_snap = jiffies; 825 write_seqcount_end(&t->vtime_seqcount); 826 local_irq_restore(flags); 827 } 828 829 cputime_t task_gtime(struct task_struct *t) 830 { 831 unsigned int seq; 832 cputime_t gtime; 833 834 if (!vtime_accounting_enabled()) 835 return t->gtime; 836 837 do { 838 seq = read_seqcount_begin(&t->vtime_seqcount); 839 840 gtime = t->gtime; 841 if (t->vtime_snap_whence == VTIME_SYS && t->flags & PF_VCPU) 842 gtime += vtime_delta(t); 843 844 } while (read_seqcount_retry(&t->vtime_seqcount, seq)); 845 846 return gtime; 847 } 848 849 /* 850 * Fetch cputime raw values from fields of task_struct and 851 * add up the pending nohz execution time since the last 852 * cputime snapshot. 853 */ 854 void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime) 855 { 856 cputime_t delta; 857 unsigned int seq; 858 859 if (!vtime_accounting_enabled()) { 860 *utime = t->utime; 861 *stime = t->stime; 862 return; 863 } 864 865 do { 866 seq = read_seqcount_begin(&t->vtime_seqcount); 867 868 *utime = t->utime; 869 *stime = t->stime; 870 871 /* Task is sleeping, nothing to add */ 872 if (t->vtime_snap_whence == VTIME_INACTIVE || is_idle_task(t)) 873 continue; 874 875 delta = vtime_delta(t); 876 877 /* 878 * Task runs either in user or kernel space, add pending nohz time to 879 * the right place. 880 */ 881 if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) 882 *utime += delta; 883 else if (t->vtime_snap_whence == VTIME_SYS) 884 *stime += delta; 885 } while (read_seqcount_retry(&t->vtime_seqcount, seq)); 886 } 887 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ 888