xref: /openbmc/linux/kernel/sched/cpuacct.c (revision 174cd4b1)
1 #include <linux/cgroup.h>
2 #include <linux/slab.h>
3 #include <linux/percpu.h>
4 #include <linux/spinlock.h>
5 #include <linux/cpumask.h>
6 #include <linux/seq_file.h>
7 #include <linux/rcupdate.h>
8 #include <linux/kernel_stat.h>
9 #include <linux/err.h>
10 
11 #include "sched.h"
12 
13 /*
14  * CPU accounting code for task groups.
15  *
16  * Based on the work by Paul Menage (menage@google.com) and Balbir Singh
17  * (balbir@in.ibm.com).
18  */
19 
20 /* Time spent by the tasks of the cpu accounting group executing in ... */
21 enum cpuacct_stat_index {
22 	CPUACCT_STAT_USER,	/* ... user mode */
23 	CPUACCT_STAT_SYSTEM,	/* ... kernel mode */
24 
25 	CPUACCT_STAT_NSTATS,
26 };
27 
28 static const char * const cpuacct_stat_desc[] = {
29 	[CPUACCT_STAT_USER] = "user",
30 	[CPUACCT_STAT_SYSTEM] = "system",
31 };
32 
33 struct cpuacct_usage {
34 	u64	usages[CPUACCT_STAT_NSTATS];
35 };
36 
37 /* track cpu usage of a group of tasks and its child groups */
38 struct cpuacct {
39 	struct cgroup_subsys_state css;
40 	/* cpuusage holds pointer to a u64-type object on every cpu */
41 	struct cpuacct_usage __percpu *cpuusage;
42 	struct kernel_cpustat __percpu *cpustat;
43 };
44 
45 static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
46 {
47 	return css ? container_of(css, struct cpuacct, css) : NULL;
48 }
49 
50 /* return cpu accounting group to which this task belongs */
51 static inline struct cpuacct *task_ca(struct task_struct *tsk)
52 {
53 	return css_ca(task_css(tsk, cpuacct_cgrp_id));
54 }
55 
56 static inline struct cpuacct *parent_ca(struct cpuacct *ca)
57 {
58 	return css_ca(ca->css.parent);
59 }
60 
61 static DEFINE_PER_CPU(struct cpuacct_usage, root_cpuacct_cpuusage);
62 static struct cpuacct root_cpuacct = {
63 	.cpustat	= &kernel_cpustat,
64 	.cpuusage	= &root_cpuacct_cpuusage,
65 };
66 
67 /* create a new cpu accounting group */
68 static struct cgroup_subsys_state *
69 cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
70 {
71 	struct cpuacct *ca;
72 
73 	if (!parent_css)
74 		return &root_cpuacct.css;
75 
76 	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
77 	if (!ca)
78 		goto out;
79 
80 	ca->cpuusage = alloc_percpu(struct cpuacct_usage);
81 	if (!ca->cpuusage)
82 		goto out_free_ca;
83 
84 	ca->cpustat = alloc_percpu(struct kernel_cpustat);
85 	if (!ca->cpustat)
86 		goto out_free_cpuusage;
87 
88 	return &ca->css;
89 
90 out_free_cpuusage:
91 	free_percpu(ca->cpuusage);
92 out_free_ca:
93 	kfree(ca);
94 out:
95 	return ERR_PTR(-ENOMEM);
96 }
97 
98 /* destroy an existing cpu accounting group */
99 static void cpuacct_css_free(struct cgroup_subsys_state *css)
100 {
101 	struct cpuacct *ca = css_ca(css);
102 
103 	free_percpu(ca->cpustat);
104 	free_percpu(ca->cpuusage);
105 	kfree(ca);
106 }
107 
108 static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
109 				 enum cpuacct_stat_index index)
110 {
111 	struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
112 	u64 data;
113 
114 	/*
115 	 * We allow index == CPUACCT_STAT_NSTATS here to read
116 	 * the sum of suages.
117 	 */
118 	BUG_ON(index > CPUACCT_STAT_NSTATS);
119 
120 #ifndef CONFIG_64BIT
121 	/*
122 	 * Take rq->lock to make 64-bit read safe on 32-bit platforms.
123 	 */
124 	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
125 #endif
126 
127 	if (index == CPUACCT_STAT_NSTATS) {
128 		int i = 0;
129 
130 		data = 0;
131 		for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
132 			data += cpuusage->usages[i];
133 	} else {
134 		data = cpuusage->usages[index];
135 	}
136 
137 #ifndef CONFIG_64BIT
138 	raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
139 #endif
140 
141 	return data;
142 }
143 
144 static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
145 {
146 	struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
147 	int i;
148 
149 #ifndef CONFIG_64BIT
150 	/*
151 	 * Take rq->lock to make 64-bit write safe on 32-bit platforms.
152 	 */
153 	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
154 #endif
155 
156 	for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
157 		cpuusage->usages[i] = val;
158 
159 #ifndef CONFIG_64BIT
160 	raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
161 #endif
162 }
163 
164 /* return total cpu usage (in nanoseconds) of a group */
165 static u64 __cpuusage_read(struct cgroup_subsys_state *css,
166 			   enum cpuacct_stat_index index)
167 {
168 	struct cpuacct *ca = css_ca(css);
169 	u64 totalcpuusage = 0;
170 	int i;
171 
172 	for_each_possible_cpu(i)
173 		totalcpuusage += cpuacct_cpuusage_read(ca, i, index);
174 
175 	return totalcpuusage;
176 }
177 
178 static u64 cpuusage_user_read(struct cgroup_subsys_state *css,
179 			      struct cftype *cft)
180 {
181 	return __cpuusage_read(css, CPUACCT_STAT_USER);
182 }
183 
184 static u64 cpuusage_sys_read(struct cgroup_subsys_state *css,
185 			     struct cftype *cft)
186 {
187 	return __cpuusage_read(css, CPUACCT_STAT_SYSTEM);
188 }
189 
190 static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
191 {
192 	return __cpuusage_read(css, CPUACCT_STAT_NSTATS);
193 }
194 
195 static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
196 			  u64 val)
197 {
198 	struct cpuacct *ca = css_ca(css);
199 	int cpu;
200 
201 	/*
202 	 * Only allow '0' here to do a reset.
203 	 */
204 	if (val)
205 		return -EINVAL;
206 
207 	for_each_possible_cpu(cpu)
208 		cpuacct_cpuusage_write(ca, cpu, 0);
209 
210 	return 0;
211 }
212 
213 static int __cpuacct_percpu_seq_show(struct seq_file *m,
214 				     enum cpuacct_stat_index index)
215 {
216 	struct cpuacct *ca = css_ca(seq_css(m));
217 	u64 percpu;
218 	int i;
219 
220 	for_each_possible_cpu(i) {
221 		percpu = cpuacct_cpuusage_read(ca, i, index);
222 		seq_printf(m, "%llu ", (unsigned long long) percpu);
223 	}
224 	seq_printf(m, "\n");
225 	return 0;
226 }
227 
228 static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V)
229 {
230 	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER);
231 }
232 
233 static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V)
234 {
235 	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM);
236 }
237 
238 static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
239 {
240 	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS);
241 }
242 
243 static int cpuacct_all_seq_show(struct seq_file *m, void *V)
244 {
245 	struct cpuacct *ca = css_ca(seq_css(m));
246 	int index;
247 	int cpu;
248 
249 	seq_puts(m, "cpu");
250 	for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
251 		seq_printf(m, " %s", cpuacct_stat_desc[index]);
252 	seq_puts(m, "\n");
253 
254 	for_each_possible_cpu(cpu) {
255 		struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
256 
257 		seq_printf(m, "%d", cpu);
258 
259 		for (index = 0; index < CPUACCT_STAT_NSTATS; index++) {
260 #ifndef CONFIG_64BIT
261 			/*
262 			 * Take rq->lock to make 64-bit read safe on 32-bit
263 			 * platforms.
264 			 */
265 			raw_spin_lock_irq(&cpu_rq(cpu)->lock);
266 #endif
267 
268 			seq_printf(m, " %llu", cpuusage->usages[index]);
269 
270 #ifndef CONFIG_64BIT
271 			raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
272 #endif
273 		}
274 		seq_puts(m, "\n");
275 	}
276 	return 0;
277 }
278 
279 static int cpuacct_stats_show(struct seq_file *sf, void *v)
280 {
281 	struct cpuacct *ca = css_ca(seq_css(sf));
282 	s64 val[CPUACCT_STAT_NSTATS];
283 	int cpu;
284 	int stat;
285 
286 	memset(val, 0, sizeof(val));
287 	for_each_possible_cpu(cpu) {
288 		u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
289 
290 		val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_USER];
291 		val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_NICE];
292 		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM];
293 		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ];
294 		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ];
295 	}
296 
297 	for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
298 		seq_printf(sf, "%s %lld\n",
299 			   cpuacct_stat_desc[stat],
300 			   (long long)nsec_to_clock_t(val[stat]));
301 	}
302 
303 	return 0;
304 }
305 
306 static struct cftype files[] = {
307 	{
308 		.name = "usage",
309 		.read_u64 = cpuusage_read,
310 		.write_u64 = cpuusage_write,
311 	},
312 	{
313 		.name = "usage_user",
314 		.read_u64 = cpuusage_user_read,
315 	},
316 	{
317 		.name = "usage_sys",
318 		.read_u64 = cpuusage_sys_read,
319 	},
320 	{
321 		.name = "usage_percpu",
322 		.seq_show = cpuacct_percpu_seq_show,
323 	},
324 	{
325 		.name = "usage_percpu_user",
326 		.seq_show = cpuacct_percpu_user_seq_show,
327 	},
328 	{
329 		.name = "usage_percpu_sys",
330 		.seq_show = cpuacct_percpu_sys_seq_show,
331 	},
332 	{
333 		.name = "usage_all",
334 		.seq_show = cpuacct_all_seq_show,
335 	},
336 	{
337 		.name = "stat",
338 		.seq_show = cpuacct_stats_show,
339 	},
340 	{ }	/* terminate */
341 };
342 
343 /*
344  * charge this task's execution time to its accounting group.
345  *
346  * called with rq->lock held.
347  */
348 void cpuacct_charge(struct task_struct *tsk, u64 cputime)
349 {
350 	struct cpuacct *ca;
351 	int index = CPUACCT_STAT_SYSTEM;
352 	struct pt_regs *regs = task_pt_regs(tsk);
353 
354 	if (regs && user_mode(regs))
355 		index = CPUACCT_STAT_USER;
356 
357 	rcu_read_lock();
358 
359 	for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
360 		this_cpu_ptr(ca->cpuusage)->usages[index] += cputime;
361 
362 	rcu_read_unlock();
363 }
364 
365 /*
366  * Add user/system time to cpuacct.
367  *
368  * Note: it's the caller that updates the account of the root cgroup.
369  */
370 void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
371 {
372 	struct cpuacct *ca;
373 
374 	rcu_read_lock();
375 	for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
376 		this_cpu_ptr(ca->cpustat)->cpustat[index] += val;
377 	rcu_read_unlock();
378 }
379 
380 struct cgroup_subsys cpuacct_cgrp_subsys = {
381 	.css_alloc	= cpuacct_css_alloc,
382 	.css_free	= cpuacct_css_free,
383 	.legacy_cftypes	= files,
384 	.early_init	= true,
385 };
386