xref: /openbmc/linux/kernel/sched/cpuacct.c (revision ca79522c)
1 #include <linux/cgroup.h>
2 #include <linux/slab.h>
3 #include <linux/percpu.h>
4 #include <linux/spinlock.h>
5 #include <linux/cpumask.h>
6 #include <linux/seq_file.h>
7 #include <linux/rcupdate.h>
8 #include <linux/kernel_stat.h>
9 #include <linux/err.h>
10 
11 #include "sched.h"
12 
13 /*
14  * CPU accounting code for task groups.
15  *
16  * Based on the work by Paul Menage (menage@google.com) and Balbir Singh
17  * (balbir@in.ibm.com).
18  */
19 
20 /* Time spent by the tasks of the cpu accounting group executing in ... */
21 enum cpuacct_stat_index {
22 	CPUACCT_STAT_USER,	/* ... user mode */
23 	CPUACCT_STAT_SYSTEM,	/* ... kernel mode */
24 
25 	CPUACCT_STAT_NSTATS,
26 };
27 
28 /* track cpu usage of a group of tasks and its child groups */
29 struct cpuacct {
30 	struct cgroup_subsys_state css;
31 	/* cpuusage holds pointer to a u64-type object on every cpu */
32 	u64 __percpu *cpuusage;
33 	struct kernel_cpustat __percpu *cpustat;
34 };
35 
36 /* return cpu accounting group corresponding to this container */
37 static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
38 {
39 	return container_of(cgroup_subsys_state(cgrp, cpuacct_subsys_id),
40 			    struct cpuacct, css);
41 }
42 
43 /* return cpu accounting group to which this task belongs */
44 static inline struct cpuacct *task_ca(struct task_struct *tsk)
45 {
46 	return container_of(task_subsys_state(tsk, cpuacct_subsys_id),
47 			    struct cpuacct, css);
48 }
49 
50 static inline struct cpuacct *__parent_ca(struct cpuacct *ca)
51 {
52 	return cgroup_ca(ca->css.cgroup->parent);
53 }
54 
55 static inline struct cpuacct *parent_ca(struct cpuacct *ca)
56 {
57 	if (!ca->css.cgroup->parent)
58 		return NULL;
59 	return cgroup_ca(ca->css.cgroup->parent);
60 }
61 
62 static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);
63 static struct cpuacct root_cpuacct = {
64 	.cpustat	= &kernel_cpustat,
65 	.cpuusage	= &root_cpuacct_cpuusage,
66 };
67 
68 /* create a new cpu accounting group */
69 static struct cgroup_subsys_state *cpuacct_css_alloc(struct cgroup *cgrp)
70 {
71 	struct cpuacct *ca;
72 
73 	if (!cgrp->parent)
74 		return &root_cpuacct.css;
75 
76 	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
77 	if (!ca)
78 		goto out;
79 
80 	ca->cpuusage = alloc_percpu(u64);
81 	if (!ca->cpuusage)
82 		goto out_free_ca;
83 
84 	ca->cpustat = alloc_percpu(struct kernel_cpustat);
85 	if (!ca->cpustat)
86 		goto out_free_cpuusage;
87 
88 	return &ca->css;
89 
90 out_free_cpuusage:
91 	free_percpu(ca->cpuusage);
92 out_free_ca:
93 	kfree(ca);
94 out:
95 	return ERR_PTR(-ENOMEM);
96 }
97 
98 /* destroy an existing cpu accounting group */
99 static void cpuacct_css_free(struct cgroup *cgrp)
100 {
101 	struct cpuacct *ca = cgroup_ca(cgrp);
102 
103 	free_percpu(ca->cpustat);
104 	free_percpu(ca->cpuusage);
105 	kfree(ca);
106 }
107 
108 static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
109 {
110 	u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
111 	u64 data;
112 
113 #ifndef CONFIG_64BIT
114 	/*
115 	 * Take rq->lock to make 64-bit read safe on 32-bit platforms.
116 	 */
117 	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
118 	data = *cpuusage;
119 	raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
120 #else
121 	data = *cpuusage;
122 #endif
123 
124 	return data;
125 }
126 
127 static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
128 {
129 	u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
130 
131 #ifndef CONFIG_64BIT
132 	/*
133 	 * Take rq->lock to make 64-bit write safe on 32-bit platforms.
134 	 */
135 	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
136 	*cpuusage = val;
137 	raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
138 #else
139 	*cpuusage = val;
140 #endif
141 }
142 
143 /* return total cpu usage (in nanoseconds) of a group */
144 static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
145 {
146 	struct cpuacct *ca = cgroup_ca(cgrp);
147 	u64 totalcpuusage = 0;
148 	int i;
149 
150 	for_each_present_cpu(i)
151 		totalcpuusage += cpuacct_cpuusage_read(ca, i);
152 
153 	return totalcpuusage;
154 }
155 
156 static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype,
157 								u64 reset)
158 {
159 	struct cpuacct *ca = cgroup_ca(cgrp);
160 	int err = 0;
161 	int i;
162 
163 	if (reset) {
164 		err = -EINVAL;
165 		goto out;
166 	}
167 
168 	for_each_present_cpu(i)
169 		cpuacct_cpuusage_write(ca, i, 0);
170 
171 out:
172 	return err;
173 }
174 
175 static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
176 				   struct seq_file *m)
177 {
178 	struct cpuacct *ca = cgroup_ca(cgroup);
179 	u64 percpu;
180 	int i;
181 
182 	for_each_present_cpu(i) {
183 		percpu = cpuacct_cpuusage_read(ca, i);
184 		seq_printf(m, "%llu ", (unsigned long long) percpu);
185 	}
186 	seq_printf(m, "\n");
187 	return 0;
188 }
189 
190 static const char * const cpuacct_stat_desc[] = {
191 	[CPUACCT_STAT_USER] = "user",
192 	[CPUACCT_STAT_SYSTEM] = "system",
193 };
194 
195 static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
196 			      struct cgroup_map_cb *cb)
197 {
198 	struct cpuacct *ca = cgroup_ca(cgrp);
199 	int cpu;
200 	s64 val = 0;
201 
202 	for_each_online_cpu(cpu) {
203 		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
204 		val += kcpustat->cpustat[CPUTIME_USER];
205 		val += kcpustat->cpustat[CPUTIME_NICE];
206 	}
207 	val = cputime64_to_clock_t(val);
208 	cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_USER], val);
209 
210 	val = 0;
211 	for_each_online_cpu(cpu) {
212 		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
213 		val += kcpustat->cpustat[CPUTIME_SYSTEM];
214 		val += kcpustat->cpustat[CPUTIME_IRQ];
215 		val += kcpustat->cpustat[CPUTIME_SOFTIRQ];
216 	}
217 
218 	val = cputime64_to_clock_t(val);
219 	cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
220 
221 	return 0;
222 }
223 
224 static struct cftype files[] = {
225 	{
226 		.name = "usage",
227 		.read_u64 = cpuusage_read,
228 		.write_u64 = cpuusage_write,
229 	},
230 	{
231 		.name = "usage_percpu",
232 		.read_seq_string = cpuacct_percpu_seq_read,
233 	},
234 	{
235 		.name = "stat",
236 		.read_map = cpuacct_stats_show,
237 	},
238 	{ }	/* terminate */
239 };
240 
241 /*
242  * charge this task's execution time to its accounting group.
243  *
244  * called with rq->lock held.
245  */
246 void cpuacct_charge(struct task_struct *tsk, u64 cputime)
247 {
248 	struct cpuacct *ca;
249 	int cpu;
250 
251 	cpu = task_cpu(tsk);
252 
253 	rcu_read_lock();
254 
255 	ca = task_ca(tsk);
256 
257 	while (true) {
258 		u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
259 		*cpuusage += cputime;
260 
261 		ca = parent_ca(ca);
262 		if (!ca)
263 			break;
264 	}
265 
266 	rcu_read_unlock();
267 }
268 
269 /*
270  * Add user/system time to cpuacct.
271  *
272  * Note: it's the caller that updates the account of the root cgroup.
273  */
274 void cpuacct_account_field(struct task_struct *p, int index, u64 val)
275 {
276 	struct kernel_cpustat *kcpustat;
277 	struct cpuacct *ca;
278 
279 	rcu_read_lock();
280 	ca = task_ca(p);
281 	while (ca != &root_cpuacct) {
282 		kcpustat = this_cpu_ptr(ca->cpustat);
283 		kcpustat->cpustat[index] += val;
284 		ca = __parent_ca(ca);
285 	}
286 	rcu_read_unlock();
287 }
288 
289 struct cgroup_subsys cpuacct_subsys = {
290 	.name		= "cpuacct",
291 	.css_alloc	= cpuacct_css_alloc,
292 	.css_free	= cpuacct_css_free,
293 	.subsys_id	= cpuacct_subsys_id,
294 	.base_cftypes	= files,
295 	.early_init	= 1,
296 };
297