1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2a5c2b93fSTejun Heo #include "cgroup-internal.h"
3a5c2b93fSTejun Heo
4a5c2b93fSTejun Heo #include <linux/sched/cputime.h>
5a5c2b93fSTejun Heo
6a319185bSYosry Ahmed #include <linux/bpf.h>
7a319185bSYosry Ahmed #include <linux/btf.h>
8a319185bSYosry Ahmed #include <linux/btf_ids.h>
9a319185bSYosry Ahmed
100fa294fbSTejun Heo static DEFINE_SPINLOCK(cgroup_rstat_lock);
11c58632b3STejun Heo static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock);
12a5c2b93fSTejun Heo
13a17556f8STejun Heo static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu);
14a17556f8STejun Heo
cgroup_rstat_cpu(struct cgroup * cgrp,int cpu)15c58632b3STejun Heo static struct cgroup_rstat_cpu *cgroup_rstat_cpu(struct cgroup *cgrp, int cpu)
16a5c2b93fSTejun Heo {
17c58632b3STejun Heo return per_cpu_ptr(cgrp->rstat_cpu, cpu);
18a5c2b93fSTejun Heo }
19a5c2b93fSTejun Heo
20a5c2b93fSTejun Heo /**
216162cef0STejun Heo * cgroup_rstat_updated - keep track of updated rstat_cpu
22a5c2b93fSTejun Heo * @cgrp: target cgroup
23c58632b3STejun Heo * @cpu: cpu on which rstat_cpu was updated
24a5c2b93fSTejun Heo *
25c58632b3STejun Heo * @cgrp's rstat_cpu on @cpu was updated. Put it on the parent's matching
26c58632b3STejun Heo * rstat_cpu->updated_children list. See the comment on top of
27c58632b3STejun Heo * cgroup_rstat_cpu definition for details.
28a5c2b93fSTejun Heo */
cgroup_rstat_updated(struct cgroup * cgrp,int cpu)29400031e0SDavid Vernet __bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
30a5c2b93fSTejun Heo {
31c58632b3STejun Heo raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu);
32a5c2b93fSTejun Heo unsigned long flags;
33a5c2b93fSTejun Heo
34a5c2b93fSTejun Heo /*
35d8ef4b38STejun Heo * Speculative already-on-list test. This may race leading to
36d8ef4b38STejun Heo * temporary inaccuracies, which is fine.
37d8ef4b38STejun Heo *
38a5c2b93fSTejun Heo * Because @parent's updated_children is terminated with @parent
39a5c2b93fSTejun Heo * instead of NULL, we can tell whether @cgrp is on the list by
40a5c2b93fSTejun Heo * testing the next pointer for NULL.
41a5c2b93fSTejun Heo */
42eda09706SMichal Koutný if (data_race(cgroup_rstat_cpu(cgrp, cpu)->updated_next))
43a5c2b93fSTejun Heo return;
44a5c2b93fSTejun Heo
45a5c2b93fSTejun Heo raw_spin_lock_irqsave(cpu_lock, flags);
46a5c2b93fSTejun Heo
47a5c2b93fSTejun Heo /* put @cgrp and all ancestors on the corresponding updated lists */
48dc26532aSJohannes Weiner while (true) {
49c58632b3STejun Heo struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
50dc26532aSJohannes Weiner struct cgroup *parent = cgroup_parent(cgrp);
51dc26532aSJohannes Weiner struct cgroup_rstat_cpu *prstatc;
52a5c2b93fSTejun Heo
53a5c2b93fSTejun Heo /*
54a5c2b93fSTejun Heo * Both additions and removals are bottom-up. If a cgroup
55a5c2b93fSTejun Heo * is already in the tree, all ancestors are.
56a5c2b93fSTejun Heo */
57c58632b3STejun Heo if (rstatc->updated_next)
58a5c2b93fSTejun Heo break;
59a5c2b93fSTejun Heo
60dc26532aSJohannes Weiner /* Root has no parent to link it to, but mark it busy */
61dc26532aSJohannes Weiner if (!parent) {
62dc26532aSJohannes Weiner rstatc->updated_next = cgrp;
63dc26532aSJohannes Weiner break;
64dc26532aSJohannes Weiner }
65dc26532aSJohannes Weiner
66dc26532aSJohannes Weiner prstatc = cgroup_rstat_cpu(parent, cpu);
67c58632b3STejun Heo rstatc->updated_next = prstatc->updated_children;
68c58632b3STejun Heo prstatc->updated_children = cgrp;
69dc26532aSJohannes Weiner
70dc26532aSJohannes Weiner cgrp = parent;
71a5c2b93fSTejun Heo }
72a5c2b93fSTejun Heo
73a5c2b93fSTejun Heo raw_spin_unlock_irqrestore(cpu_lock, flags);
74a5c2b93fSTejun Heo }
75a5c2b93fSTejun Heo
76a5c2b93fSTejun Heo /**
77c58632b3STejun Heo * cgroup_rstat_cpu_pop_updated - iterate and dismantle rstat_cpu updated tree
78a5c2b93fSTejun Heo * @pos: current position
79a5c2b93fSTejun Heo * @root: root of the tree to traversal
80a5c2b93fSTejun Heo * @cpu: target cpu
81a5c2b93fSTejun Heo *
8208b2b6fdSZhen Lei * Walks the updated rstat_cpu tree on @cpu from @root. %NULL @pos starts
83a5c2b93fSTejun Heo * the traversal and %NULL return indicates the end. During traversal,
84a5c2b93fSTejun Heo * each returned cgroup is unlinked from the tree. Must be called with the
85c58632b3STejun Heo * matching cgroup_rstat_cpu_lock held.
86a5c2b93fSTejun Heo *
87a5c2b93fSTejun Heo * The only ordering guarantee is that, for a parent and a child pair
88a5c2b93fSTejun Heo * covered by a given traversal, if a child is visited, its parent is
89a5c2b93fSTejun Heo * guaranteed to be visited afterwards.
90a5c2b93fSTejun Heo */
cgroup_rstat_cpu_pop_updated(struct cgroup * pos,struct cgroup * root,int cpu)91c58632b3STejun Heo static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
92a5c2b93fSTejun Heo struct cgroup *root, int cpu)
93a5c2b93fSTejun Heo {
94c58632b3STejun Heo struct cgroup_rstat_cpu *rstatc;
95f5f60d23SWei Yang struct cgroup *parent;
96a5c2b93fSTejun Heo
97a5c2b93fSTejun Heo if (pos == root)
98a5c2b93fSTejun Heo return NULL;
99a5c2b93fSTejun Heo
100a5c2b93fSTejun Heo /*
101a5c2b93fSTejun Heo * We're gonna walk down to the first leaf and visit/remove it. We
102a5c2b93fSTejun Heo * can pick whatever unvisited node as the starting point.
103a5c2b93fSTejun Heo */
104f5f60d23SWei Yang if (!pos) {
105a5c2b93fSTejun Heo pos = root;
106f5f60d23SWei Yang /* return NULL if this subtree is not on-list */
107f5f60d23SWei Yang if (!cgroup_rstat_cpu(pos, cpu)->updated_next)
108f5f60d23SWei Yang return NULL;
109f5f60d23SWei Yang } else {
110a5c2b93fSTejun Heo pos = cgroup_parent(pos);
111f5f60d23SWei Yang }
112a5c2b93fSTejun Heo
113a5c2b93fSTejun Heo /* walk down to the first leaf */
114a5c2b93fSTejun Heo while (true) {
115c58632b3STejun Heo rstatc = cgroup_rstat_cpu(pos, cpu);
116c58632b3STejun Heo if (rstatc->updated_children == pos)
117a5c2b93fSTejun Heo break;
118c58632b3STejun Heo pos = rstatc->updated_children;
119a5c2b93fSTejun Heo }
120a5c2b93fSTejun Heo
121a5c2b93fSTejun Heo /*
122a5c2b93fSTejun Heo * Unlink @pos from the tree. As the updated_children list is
123a5c2b93fSTejun Heo * singly linked, we have to walk it to find the removal point.
124a5c2b93fSTejun Heo * However, due to the way we traverse, @pos will be the first
125a5c2b93fSTejun Heo * child in most cases. The only exception is @root.
126a5c2b93fSTejun Heo */
127f5f60d23SWei Yang parent = cgroup_parent(pos);
128dc26532aSJohannes Weiner if (parent) {
129dc26532aSJohannes Weiner struct cgroup_rstat_cpu *prstatc;
130a5c2b93fSTejun Heo struct cgroup **nextp;
131a5c2b93fSTejun Heo
132dc26532aSJohannes Weiner prstatc = cgroup_rstat_cpu(parent, cpu);
133c58632b3STejun Heo nextp = &prstatc->updated_children;
1340da41f73SWei Yang while (*nextp != pos) {
135dc26532aSJohannes Weiner struct cgroup_rstat_cpu *nrstatc;
136dc26532aSJohannes Weiner
137c58632b3STejun Heo nrstatc = cgroup_rstat_cpu(*nextp, cpu);
138a5c2b93fSTejun Heo WARN_ON_ONCE(*nextp == parent);
139c58632b3STejun Heo nextp = &nrstatc->updated_next;
140a5c2b93fSTejun Heo }
141c58632b3STejun Heo *nextp = rstatc->updated_next;
142dc26532aSJohannes Weiner }
1439a9e97b2STejun Heo
144dc26532aSJohannes Weiner rstatc->updated_next = NULL;
145a5c2b93fSTejun Heo return pos;
146a5c2b93fSTejun Heo }
147a5c2b93fSTejun Heo
148a319185bSYosry Ahmed /*
149a319185bSYosry Ahmed * A hook for bpf stat collectors to attach to and flush their stats.
150a319185bSYosry Ahmed * Together with providing bpf kfuncs for cgroup_rstat_updated() and
151a319185bSYosry Ahmed * cgroup_rstat_flush(), this enables a complete workflow where bpf progs that
152a319185bSYosry Ahmed * collect cgroup stats can integrate with rstat for efficient flushing.
153a319185bSYosry Ahmed *
154a319185bSYosry Ahmed * A static noinline declaration here could cause the compiler to optimize away
155a319185bSYosry Ahmed * the function. A global noinline declaration will keep the definition, but may
156a319185bSYosry Ahmed * optimize away the callsite. Therefore, __weak is needed to ensure that the
157a319185bSYosry Ahmed * call is still emitted, by telling the compiler that we don't know what the
158a319185bSYosry Ahmed * function might eventually be.
159a319185bSYosry Ahmed *
160a319185bSYosry Ahmed * __diag_* below are needed to dismiss the missing prototype warning.
161a319185bSYosry Ahmed */
162a319185bSYosry Ahmed __diag_push();
163a319185bSYosry Ahmed __diag_ignore_all("-Wmissing-prototypes",
164a319185bSYosry Ahmed "kfuncs which will be used in BPF programs");
165a319185bSYosry Ahmed
bpf_rstat_flush(struct cgroup * cgrp,struct cgroup * parent,int cpu)166a319185bSYosry Ahmed __weak noinline void bpf_rstat_flush(struct cgroup *cgrp,
167a319185bSYosry Ahmed struct cgroup *parent, int cpu)
168a319185bSYosry Ahmed {
169a319185bSYosry Ahmed }
170a319185bSYosry Ahmed
171a319185bSYosry Ahmed __diag_pop();
172a319185bSYosry Ahmed
173a17556f8STejun Heo /* see cgroup_rstat_flush() */
cgroup_rstat_flush_locked(struct cgroup * cgrp)1740a2dc6acSYosry Ahmed static void cgroup_rstat_flush_locked(struct cgroup *cgrp)
1750fa294fbSTejun Heo __releases(&cgroup_rstat_lock) __acquires(&cgroup_rstat_lock)
176a17556f8STejun Heo {
177a17556f8STejun Heo int cpu;
178a17556f8STejun Heo
1790fa294fbSTejun Heo lockdep_assert_held(&cgroup_rstat_lock);
180a17556f8STejun Heo
181a17556f8STejun Heo for_each_possible_cpu(cpu) {
182a17556f8STejun Heo raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock,
183a17556f8STejun Heo cpu);
184a17556f8STejun Heo struct cgroup *pos = NULL;
185b1e2c8dfSSebastian Andrzej Siewior unsigned long flags;
186a17556f8STejun Heo
187b1e2c8dfSSebastian Andrzej Siewior /*
188b1e2c8dfSSebastian Andrzej Siewior * The _irqsave() is needed because cgroup_rstat_lock is
189b1e2c8dfSSebastian Andrzej Siewior * spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring
190b1e2c8dfSSebastian Andrzej Siewior * this lock with the _irq() suffix only disables interrupts on
191b1e2c8dfSSebastian Andrzej Siewior * a non-PREEMPT_RT kernel. The raw_spinlock_t below disables
192b1e2c8dfSSebastian Andrzej Siewior * interrupts on both configurations. The _irqsave() ensures
193b1e2c8dfSSebastian Andrzej Siewior * that interrupts are always disabled and later restored.
194b1e2c8dfSSebastian Andrzej Siewior */
195b1e2c8dfSSebastian Andrzej Siewior raw_spin_lock_irqsave(cpu_lock, flags);
1968f53470bSTejun Heo while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) {
1978f53470bSTejun Heo struct cgroup_subsys_state *css;
1988f53470bSTejun Heo
199a17556f8STejun Heo cgroup_base_stat_flush(pos, cpu);
200a319185bSYosry Ahmed bpf_rstat_flush(pos, cgroup_parent(pos), cpu);
2018f53470bSTejun Heo
2028f53470bSTejun Heo rcu_read_lock();
2038f53470bSTejun Heo list_for_each_entry_rcu(css, &pos->rstat_css_list,
2048f53470bSTejun Heo rstat_css_node)
2058f53470bSTejun Heo css->ss->css_rstat_flush(css, cpu);
2068f53470bSTejun Heo rcu_read_unlock();
2078f53470bSTejun Heo }
208b1e2c8dfSSebastian Andrzej Siewior raw_spin_unlock_irqrestore(cpu_lock, flags);
2090fa294fbSTejun Heo
2100a2dc6acSYosry Ahmed /* play nice and yield if necessary */
2110a2dc6acSYosry Ahmed if (need_resched() || spin_needbreak(&cgroup_rstat_lock)) {
2120fa294fbSTejun Heo spin_unlock_irq(&cgroup_rstat_lock);
2130fa294fbSTejun Heo if (!cond_resched())
2140fa294fbSTejun Heo cpu_relax();
2150fa294fbSTejun Heo spin_lock_irq(&cgroup_rstat_lock);
2160fa294fbSTejun Heo }
217a17556f8STejun Heo }
218a17556f8STejun Heo }
219a17556f8STejun Heo
220a17556f8STejun Heo /**
221a17556f8STejun Heo * cgroup_rstat_flush - flush stats in @cgrp's subtree
222a17556f8STejun Heo * @cgrp: target cgroup
223a17556f8STejun Heo *
224a17556f8STejun Heo * Collect all per-cpu stats in @cgrp's subtree into the global counters
225a17556f8STejun Heo * and propagate them upwards. After this function returns, all cgroups in
226a17556f8STejun Heo * the subtree have up-to-date ->stat.
227a17556f8STejun Heo *
228a17556f8STejun Heo * This also gets all cgroups in the subtree including @cgrp off the
229a17556f8STejun Heo * ->updated_children lists.
2300fa294fbSTejun Heo *
2310fa294fbSTejun Heo * This function may block.
232a17556f8STejun Heo */
cgroup_rstat_flush(struct cgroup * cgrp)233400031e0SDavid Vernet __bpf_kfunc void cgroup_rstat_flush(struct cgroup *cgrp)
234a17556f8STejun Heo {
2350fa294fbSTejun Heo might_sleep();
2360fa294fbSTejun Heo
2370fa294fbSTejun Heo spin_lock_irq(&cgroup_rstat_lock);
2380a2dc6acSYosry Ahmed cgroup_rstat_flush_locked(cgrp);
2390fa294fbSTejun Heo spin_unlock_irq(&cgroup_rstat_lock);
2400fa294fbSTejun Heo }
2410fa294fbSTejun Heo
2420fa294fbSTejun Heo /**
2432ca11b0eSYang Li * cgroup_rstat_flush_hold - flush stats in @cgrp's subtree and hold
2446162cef0STejun Heo * @cgrp: target cgroup
2456162cef0STejun Heo *
2466162cef0STejun Heo * Flush stats in @cgrp's subtree and prevent further flushes. Must be
2476162cef0STejun Heo * paired with cgroup_rstat_flush_release().
2480fa294fbSTejun Heo *
2490fa294fbSTejun Heo * This function may block.
2506162cef0STejun Heo */
cgroup_rstat_flush_hold(struct cgroup * cgrp)2516162cef0STejun Heo void cgroup_rstat_flush_hold(struct cgroup *cgrp)
2520fa294fbSTejun Heo __acquires(&cgroup_rstat_lock)
2536162cef0STejun Heo {
2540fa294fbSTejun Heo might_sleep();
2550fa294fbSTejun Heo spin_lock_irq(&cgroup_rstat_lock);
2560a2dc6acSYosry Ahmed cgroup_rstat_flush_locked(cgrp);
2576162cef0STejun Heo }
2586162cef0STejun Heo
2596162cef0STejun Heo /**
2606162cef0STejun Heo * cgroup_rstat_flush_release - release cgroup_rstat_flush_hold()
2616162cef0STejun Heo */
cgroup_rstat_flush_release(void)2626162cef0STejun Heo void cgroup_rstat_flush_release(void)
2630fa294fbSTejun Heo __releases(&cgroup_rstat_lock)
2646162cef0STejun Heo {
2650fa294fbSTejun Heo spin_unlock_irq(&cgroup_rstat_lock);
2666162cef0STejun Heo }
2676162cef0STejun Heo
cgroup_rstat_init(struct cgroup * cgrp)268a17556f8STejun Heo int cgroup_rstat_init(struct cgroup *cgrp)
269a17556f8STejun Heo {
270a17556f8STejun Heo int cpu;
271a17556f8STejun Heo
272a17556f8STejun Heo /* the root cgrp has rstat_cpu preallocated */
273a17556f8STejun Heo if (!cgrp->rstat_cpu) {
274a17556f8STejun Heo cgrp->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
275a17556f8STejun Heo if (!cgrp->rstat_cpu)
276a17556f8STejun Heo return -ENOMEM;
277a17556f8STejun Heo }
278a17556f8STejun Heo
279a17556f8STejun Heo /* ->updated_children list is self terminated */
280a17556f8STejun Heo for_each_possible_cpu(cpu) {
281a17556f8STejun Heo struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
282a17556f8STejun Heo
283a17556f8STejun Heo rstatc->updated_children = cgrp;
284a17556f8STejun Heo u64_stats_init(&rstatc->bsync);
285a17556f8STejun Heo }
286a17556f8STejun Heo
287a17556f8STejun Heo return 0;
288a17556f8STejun Heo }
289a17556f8STejun Heo
cgroup_rstat_exit(struct cgroup * cgrp)290a17556f8STejun Heo void cgroup_rstat_exit(struct cgroup *cgrp)
291a17556f8STejun Heo {
292a17556f8STejun Heo int cpu;
293a17556f8STejun Heo
294a17556f8STejun Heo cgroup_rstat_flush(cgrp);
295a17556f8STejun Heo
296a17556f8STejun Heo /* sanity check */
297a17556f8STejun Heo for_each_possible_cpu(cpu) {
298a17556f8STejun Heo struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
299a17556f8STejun Heo
300a17556f8STejun Heo if (WARN_ON_ONCE(rstatc->updated_children != cgrp) ||
301a17556f8STejun Heo WARN_ON_ONCE(rstatc->updated_next))
302a17556f8STejun Heo return;
303a17556f8STejun Heo }
304a17556f8STejun Heo
305a17556f8STejun Heo free_percpu(cgrp->rstat_cpu);
306a17556f8STejun Heo cgrp->rstat_cpu = NULL;
307a17556f8STejun Heo }
308a17556f8STejun Heo
cgroup_rstat_boot(void)309a17556f8STejun Heo void __init cgroup_rstat_boot(void)
310a17556f8STejun Heo {
311a17556f8STejun Heo int cpu;
312a17556f8STejun Heo
313a17556f8STejun Heo for_each_possible_cpu(cpu)
314a17556f8STejun Heo raw_spin_lock_init(per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu));
315a17556f8STejun Heo }
316a17556f8STejun Heo
317a17556f8STejun Heo /*
318a17556f8STejun Heo * Functions for cgroup basic resource statistics implemented on top of
319a17556f8STejun Heo * rstat.
320a17556f8STejun Heo */
cgroup_base_stat_add(struct cgroup_base_stat * dst_bstat,struct cgroup_base_stat * src_bstat)3211bb5ec2eSTejun Heo static void cgroup_base_stat_add(struct cgroup_base_stat *dst_bstat,
322d4ff749bSTejun Heo struct cgroup_base_stat *src_bstat)
323a5c2b93fSTejun Heo {
324d4ff749bSTejun Heo dst_bstat->cputime.utime += src_bstat->cputime.utime;
325d4ff749bSTejun Heo dst_bstat->cputime.stime += src_bstat->cputime.stime;
326d4ff749bSTejun Heo dst_bstat->cputime.sum_exec_runtime += src_bstat->cputime.sum_exec_runtime;
3271fcf54deSJosh Don #ifdef CONFIG_SCHED_CORE
3281fcf54deSJosh Don dst_bstat->forceidle_sum += src_bstat->forceidle_sum;
3291fcf54deSJosh Don #endif
330a5c2b93fSTejun Heo }
331a5c2b93fSTejun Heo
cgroup_base_stat_sub(struct cgroup_base_stat * dst_bstat,struct cgroup_base_stat * src_bstat)3321bb5ec2eSTejun Heo static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat,
3331bb5ec2eSTejun Heo struct cgroup_base_stat *src_bstat)
3341bb5ec2eSTejun Heo {
3351bb5ec2eSTejun Heo dst_bstat->cputime.utime -= src_bstat->cputime.utime;
3361bb5ec2eSTejun Heo dst_bstat->cputime.stime -= src_bstat->cputime.stime;
3371bb5ec2eSTejun Heo dst_bstat->cputime.sum_exec_runtime -= src_bstat->cputime.sum_exec_runtime;
3381fcf54deSJosh Don #ifdef CONFIG_SCHED_CORE
3391fcf54deSJosh Don dst_bstat->forceidle_sum -= src_bstat->forceidle_sum;
3401fcf54deSJosh Don #endif
3411bb5ec2eSTejun Heo }
3421bb5ec2eSTejun Heo
cgroup_base_stat_flush(struct cgroup * cgrp,int cpu)343d4ff749bSTejun Heo static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
344a5c2b93fSTejun Heo {
345c58632b3STejun Heo struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
346dc26532aSJohannes Weiner struct cgroup *parent = cgroup_parent(cgrp);
347*0437719cSHao Jia struct cgroup_rstat_cpu *prstatc;
34895b99f35SWei Yang struct cgroup_base_stat delta;
349a5c2b93fSTejun Heo unsigned seq;
350a5c2b93fSTejun Heo
351dc26532aSJohannes Weiner /* Root-level stats are sourced from system-wide CPU stats */
352dc26532aSJohannes Weiner if (!parent)
353dc26532aSJohannes Weiner return;
354dc26532aSJohannes Weiner
355a5c2b93fSTejun Heo /* fetch the current per-cpu values */
356a5c2b93fSTejun Heo do {
357d4ff749bSTejun Heo seq = __u64_stats_fetch_begin(&rstatc->bsync);
35895b99f35SWei Yang delta = rstatc->bstat;
359d4ff749bSTejun Heo } while (__u64_stats_fetch_retry(&rstatc->bsync, seq));
360a5c2b93fSTejun Heo
361*0437719cSHao Jia /* propagate per-cpu delta to cgroup and per-cpu global statistics */
3621bb5ec2eSTejun Heo cgroup_base_stat_sub(&delta, &rstatc->last_bstat);
3631bb5ec2eSTejun Heo cgroup_base_stat_add(&cgrp->bstat, &delta);
3641bb5ec2eSTejun Heo cgroup_base_stat_add(&rstatc->last_bstat, &delta);
365*0437719cSHao Jia cgroup_base_stat_add(&rstatc->subtree_bstat, &delta);
366a5c2b93fSTejun Heo
367*0437719cSHao Jia /* propagate cgroup and per-cpu global delta to parent (unless that's root) */
368dc26532aSJohannes Weiner if (cgroup_parent(parent)) {
3691bb5ec2eSTejun Heo delta = cgrp->bstat;
3701bb5ec2eSTejun Heo cgroup_base_stat_sub(&delta, &cgrp->last_bstat);
3711bb5ec2eSTejun Heo cgroup_base_stat_add(&parent->bstat, &delta);
3721bb5ec2eSTejun Heo cgroup_base_stat_add(&cgrp->last_bstat, &delta);
373*0437719cSHao Jia
374*0437719cSHao Jia delta = rstatc->subtree_bstat;
375*0437719cSHao Jia prstatc = cgroup_rstat_cpu(parent, cpu);
376*0437719cSHao Jia cgroup_base_stat_sub(&delta, &rstatc->last_subtree_bstat);
377*0437719cSHao Jia cgroup_base_stat_add(&prstatc->subtree_bstat, &delta);
378*0437719cSHao Jia cgroup_base_stat_add(&rstatc->last_subtree_bstat, &delta);
3791bb5ec2eSTejun Heo }
380a5c2b93fSTejun Heo }
381a5c2b93fSTejun Heo
382c58632b3STejun Heo static struct cgroup_rstat_cpu *
cgroup_base_stat_cputime_account_begin(struct cgroup * cgrp,unsigned long * flags)383c3df5fb5STejun Heo cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp, unsigned long *flags)
384a5c2b93fSTejun Heo {
385c58632b3STejun Heo struct cgroup_rstat_cpu *rstatc;
386a5c2b93fSTejun Heo
387c58632b3STejun Heo rstatc = get_cpu_ptr(cgrp->rstat_cpu);
388c3df5fb5STejun Heo *flags = u64_stats_update_begin_irqsave(&rstatc->bsync);
389c58632b3STejun Heo return rstatc;
390a5c2b93fSTejun Heo }
391a5c2b93fSTejun Heo
cgroup_base_stat_cputime_account_end(struct cgroup * cgrp,struct cgroup_rstat_cpu * rstatc,unsigned long flags)392d4ff749bSTejun Heo static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp,
393c3df5fb5STejun Heo struct cgroup_rstat_cpu *rstatc,
394c3df5fb5STejun Heo unsigned long flags)
395a5c2b93fSTejun Heo {
396c3df5fb5STejun Heo u64_stats_update_end_irqrestore(&rstatc->bsync, flags);
3976162cef0STejun Heo cgroup_rstat_updated(cgrp, smp_processor_id());
398c58632b3STejun Heo put_cpu_ptr(rstatc);
399a5c2b93fSTejun Heo }
400a5c2b93fSTejun Heo
__cgroup_account_cputime(struct cgroup * cgrp,u64 delta_exec)401a5c2b93fSTejun Heo void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
402a5c2b93fSTejun Heo {
403c58632b3STejun Heo struct cgroup_rstat_cpu *rstatc;
404c3df5fb5STejun Heo unsigned long flags;
405a5c2b93fSTejun Heo
406c3df5fb5STejun Heo rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags);
407d4ff749bSTejun Heo rstatc->bstat.cputime.sum_exec_runtime += delta_exec;
408c3df5fb5STejun Heo cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags);
409a5c2b93fSTejun Heo }
410a5c2b93fSTejun Heo
__cgroup_account_cputime_field(struct cgroup * cgrp,enum cpu_usage_stat index,u64 delta_exec)411a5c2b93fSTejun Heo void __cgroup_account_cputime_field(struct cgroup *cgrp,
412a5c2b93fSTejun Heo enum cpu_usage_stat index, u64 delta_exec)
413a5c2b93fSTejun Heo {
414c58632b3STejun Heo struct cgroup_rstat_cpu *rstatc;
415c3df5fb5STejun Heo unsigned long flags;
416a5c2b93fSTejun Heo
417c3df5fb5STejun Heo rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags);
418a5c2b93fSTejun Heo
419a5c2b93fSTejun Heo switch (index) {
420a5c2b93fSTejun Heo case CPUTIME_USER:
421a5c2b93fSTejun Heo case CPUTIME_NICE:
422d4ff749bSTejun Heo rstatc->bstat.cputime.utime += delta_exec;
423a5c2b93fSTejun Heo break;
424a5c2b93fSTejun Heo case CPUTIME_SYSTEM:
425a5c2b93fSTejun Heo case CPUTIME_IRQ:
426a5c2b93fSTejun Heo case CPUTIME_SOFTIRQ:
427d4ff749bSTejun Heo rstatc->bstat.cputime.stime += delta_exec;
428a5c2b93fSTejun Heo break;
4291fcf54deSJosh Don #ifdef CONFIG_SCHED_CORE
4301fcf54deSJosh Don case CPUTIME_FORCEIDLE:
4311fcf54deSJosh Don rstatc->bstat.forceidle_sum += delta_exec;
4321fcf54deSJosh Don break;
4331fcf54deSJosh Don #endif
434a5c2b93fSTejun Heo default:
435a5c2b93fSTejun Heo break;
436a5c2b93fSTejun Heo }
437a5c2b93fSTejun Heo
438c3df5fb5STejun Heo cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags);
439a5c2b93fSTejun Heo }
440a5c2b93fSTejun Heo
441936f2a70SBoris Burkov /*
442936f2a70SBoris Burkov * compute the cputime for the root cgroup by getting the per cpu data
443936f2a70SBoris Burkov * at a global level, then categorizing the fields in a manner consistent
444936f2a70SBoris Burkov * with how it is done by __cgroup_account_cputime_field for each bit of
445936f2a70SBoris Burkov * cpu time attributed to a cgroup.
446936f2a70SBoris Burkov */
root_cgroup_cputime(struct cgroup_base_stat * bstat)4471fcf54deSJosh Don static void root_cgroup_cputime(struct cgroup_base_stat *bstat)
448936f2a70SBoris Burkov {
4491fcf54deSJosh Don struct task_cputime *cputime = &bstat->cputime;
450936f2a70SBoris Burkov int i;
451936f2a70SBoris Burkov
452fcdb1edaSJosh Don memset(bstat, 0, sizeof(*bstat));
453936f2a70SBoris Burkov for_each_possible_cpu(i) {
454936f2a70SBoris Burkov struct kernel_cpustat kcpustat;
455936f2a70SBoris Burkov u64 *cpustat = kcpustat.cpustat;
456936f2a70SBoris Burkov u64 user = 0;
457936f2a70SBoris Burkov u64 sys = 0;
458936f2a70SBoris Burkov
459936f2a70SBoris Burkov kcpustat_cpu_fetch(&kcpustat, i);
460936f2a70SBoris Burkov
461936f2a70SBoris Burkov user += cpustat[CPUTIME_USER];
462936f2a70SBoris Burkov user += cpustat[CPUTIME_NICE];
463936f2a70SBoris Burkov cputime->utime += user;
464936f2a70SBoris Burkov
465936f2a70SBoris Burkov sys += cpustat[CPUTIME_SYSTEM];
466936f2a70SBoris Burkov sys += cpustat[CPUTIME_IRQ];
467936f2a70SBoris Burkov sys += cpustat[CPUTIME_SOFTIRQ];
468936f2a70SBoris Burkov cputime->stime += sys;
469936f2a70SBoris Burkov
470936f2a70SBoris Burkov cputime->sum_exec_runtime += user;
471936f2a70SBoris Burkov cputime->sum_exec_runtime += sys;
4721fcf54deSJosh Don
4731fcf54deSJosh Don #ifdef CONFIG_SCHED_CORE
4741fcf54deSJosh Don bstat->forceidle_sum += cpustat[CPUTIME_FORCEIDLE];
4751fcf54deSJosh Don #endif
476936f2a70SBoris Burkov }
477936f2a70SBoris Burkov }
478936f2a70SBoris Burkov
cgroup_base_stat_cputime_show(struct seq_file * seq)479d4ff749bSTejun Heo void cgroup_base_stat_cputime_show(struct seq_file *seq)
480a5c2b93fSTejun Heo {
481a5c2b93fSTejun Heo struct cgroup *cgrp = seq_css(seq)->cgroup;
482a5c2b93fSTejun Heo u64 usage, utime, stime;
4831fcf54deSJosh Don struct cgroup_base_stat bstat;
4841fcf54deSJosh Don #ifdef CONFIG_SCHED_CORE
4851fcf54deSJosh Don u64 forceidle_time;
4861fcf54deSJosh Don #endif
487a5c2b93fSTejun Heo
488936f2a70SBoris Burkov if (cgroup_parent(cgrp)) {
4896162cef0STejun Heo cgroup_rstat_flush_hold(cgrp);
490d4ff749bSTejun Heo usage = cgrp->bstat.cputime.sum_exec_runtime;
491936f2a70SBoris Burkov cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime,
492936f2a70SBoris Burkov &utime, &stime);
4931fcf54deSJosh Don #ifdef CONFIG_SCHED_CORE
4941fcf54deSJosh Don forceidle_time = cgrp->bstat.forceidle_sum;
4951fcf54deSJosh Don #endif
4966162cef0STejun Heo cgroup_rstat_flush_release();
497936f2a70SBoris Burkov } else {
4981fcf54deSJosh Don root_cgroup_cputime(&bstat);
4991fcf54deSJosh Don usage = bstat.cputime.sum_exec_runtime;
5001fcf54deSJosh Don utime = bstat.cputime.utime;
5011fcf54deSJosh Don stime = bstat.cputime.stime;
5021fcf54deSJosh Don #ifdef CONFIG_SCHED_CORE
5031fcf54deSJosh Don forceidle_time = bstat.forceidle_sum;
5041fcf54deSJosh Don #endif
505936f2a70SBoris Burkov }
506a5c2b93fSTejun Heo
507a5c2b93fSTejun Heo do_div(usage, NSEC_PER_USEC);
508a5c2b93fSTejun Heo do_div(utime, NSEC_PER_USEC);
509a5c2b93fSTejun Heo do_div(stime, NSEC_PER_USEC);
5101fcf54deSJosh Don #ifdef CONFIG_SCHED_CORE
5111fcf54deSJosh Don do_div(forceidle_time, NSEC_PER_USEC);
5121fcf54deSJosh Don #endif
513a5c2b93fSTejun Heo
514a5c2b93fSTejun Heo seq_printf(seq, "usage_usec %llu\n"
515a5c2b93fSTejun Heo "user_usec %llu\n"
516a5c2b93fSTejun Heo "system_usec %llu\n",
517a5c2b93fSTejun Heo usage, utime, stime);
5181fcf54deSJosh Don
5191fcf54deSJosh Don #ifdef CONFIG_SCHED_CORE
5201fcf54deSJosh Don seq_printf(seq, "core_sched.force_idle_usec %llu\n", forceidle_time);
5211fcf54deSJosh Don #endif
522a5c2b93fSTejun Heo }
523a319185bSYosry Ahmed
524a319185bSYosry Ahmed /* Add bpf kfuncs for cgroup_rstat_updated() and cgroup_rstat_flush() */
525a319185bSYosry Ahmed BTF_SET8_START(bpf_rstat_kfunc_ids)
526a319185bSYosry Ahmed BTF_ID_FLAGS(func, cgroup_rstat_updated)
527a319185bSYosry Ahmed BTF_ID_FLAGS(func, cgroup_rstat_flush, KF_SLEEPABLE)
528a319185bSYosry Ahmed BTF_SET8_END(bpf_rstat_kfunc_ids)
529a319185bSYosry Ahmed
530a319185bSYosry Ahmed static const struct btf_kfunc_id_set bpf_rstat_kfunc_set = {
531a319185bSYosry Ahmed .owner = THIS_MODULE,
532a319185bSYosry Ahmed .set = &bpf_rstat_kfunc_ids,
533a319185bSYosry Ahmed };
534a319185bSYosry Ahmed
bpf_rstat_kfunc_init(void)535a319185bSYosry Ahmed static int __init bpf_rstat_kfunc_init(void)
536a319185bSYosry Ahmed {
537a319185bSYosry Ahmed return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING,
538a319185bSYosry Ahmed &bpf_rstat_kfunc_set);
539a319185bSYosry Ahmed }
540a319185bSYosry Ahmed late_initcall(bpf_rstat_kfunc_init);
541