xref: /openbmc/linux/kernel/sched/cpuacct.c (revision 1fe3a33b)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * CPU accounting code for task groups.
4  *
5  * Based on the work by Paul Menage (menage@google.com) and Balbir Singh
6  * (balbir@in.ibm.com).
7  */
8 #include <asm/irq_regs.h>
9 #include "sched.h"
10 
11 /* Time spent by the tasks of the CPU accounting group executing in ... */
12 enum cpuacct_stat_index {
13 	CPUACCT_STAT_USER,	/* ... user mode */
14 	CPUACCT_STAT_SYSTEM,	/* ... kernel mode */
15 
16 	CPUACCT_STAT_NSTATS,
17 };
18 
19 static const char * const cpuacct_stat_desc[] = {
20 	[CPUACCT_STAT_USER] = "user",
21 	[CPUACCT_STAT_SYSTEM] = "system",
22 };
23 
24 /* track CPU usage of a group of tasks and its child groups */
25 struct cpuacct {
26 	struct cgroup_subsys_state	css;
27 	/* cpuusage holds pointer to a u64-type object on every CPU */
28 	u64 __percpu	*cpuusage;
29 	struct kernel_cpustat __percpu	*cpustat;
30 };
31 
32 static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
33 {
34 	return css ? container_of(css, struct cpuacct, css) : NULL;
35 }
36 
37 /* Return CPU accounting group to which this task belongs */
38 static inline struct cpuacct *task_ca(struct task_struct *tsk)
39 {
40 	return css_ca(task_css(tsk, cpuacct_cgrp_id));
41 }
42 
43 static inline struct cpuacct *parent_ca(struct cpuacct *ca)
44 {
45 	return css_ca(ca->css.parent);
46 }
47 
48 static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);
49 static struct cpuacct root_cpuacct = {
50 	.cpustat	= &kernel_cpustat,
51 	.cpuusage	= &root_cpuacct_cpuusage,
52 };
53 
54 /* Create a new CPU accounting group */
55 static struct cgroup_subsys_state *
56 cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
57 {
58 	struct cpuacct *ca;
59 
60 	if (!parent_css)
61 		return &root_cpuacct.css;
62 
63 	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
64 	if (!ca)
65 		goto out;
66 
67 	ca->cpuusage = alloc_percpu(u64);
68 	if (!ca->cpuusage)
69 		goto out_free_ca;
70 
71 	ca->cpustat = alloc_percpu(struct kernel_cpustat);
72 	if (!ca->cpustat)
73 		goto out_free_cpuusage;
74 
75 	return &ca->css;
76 
77 out_free_cpuusage:
78 	free_percpu(ca->cpuusage);
79 out_free_ca:
80 	kfree(ca);
81 out:
82 	return ERR_PTR(-ENOMEM);
83 }
84 
85 /* Destroy an existing CPU accounting group */
86 static void cpuacct_css_free(struct cgroup_subsys_state *css)
87 {
88 	struct cpuacct *ca = css_ca(css);
89 
90 	free_percpu(ca->cpustat);
91 	free_percpu(ca->cpuusage);
92 	kfree(ca);
93 }
94 
95 static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
96 				 enum cpuacct_stat_index index)
97 {
98 	u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
99 	u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
100 	u64 data;
101 
102 	/*
103 	 * We allow index == CPUACCT_STAT_NSTATS here to read
104 	 * the sum of usages.
105 	 */
106 	if (WARN_ON_ONCE(index > CPUACCT_STAT_NSTATS))
107 		return 0;
108 
109 #ifndef CONFIG_64BIT
110 	/*
111 	 * Take rq->lock to make 64-bit read safe on 32-bit platforms.
112 	 */
113 	raw_spin_rq_lock_irq(cpu_rq(cpu));
114 #endif
115 
116 	switch (index) {
117 	case CPUACCT_STAT_USER:
118 		data = cpustat[CPUTIME_USER] + cpustat[CPUTIME_NICE];
119 		break;
120 	case CPUACCT_STAT_SYSTEM:
121 		data = cpustat[CPUTIME_SYSTEM] + cpustat[CPUTIME_IRQ] +
122 			cpustat[CPUTIME_SOFTIRQ];
123 		break;
124 	case CPUACCT_STAT_NSTATS:
125 		data = *cpuusage;
126 		break;
127 	}
128 
129 #ifndef CONFIG_64BIT
130 	raw_spin_rq_unlock_irq(cpu_rq(cpu));
131 #endif
132 
133 	return data;
134 }
135 
136 static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu)
137 {
138 	u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
139 	u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
140 
141 	/* Don't allow to reset global kernel_cpustat */
142 	if (ca == &root_cpuacct)
143 		return;
144 
145 #ifndef CONFIG_64BIT
146 	/*
147 	 * Take rq->lock to make 64-bit write safe on 32-bit platforms.
148 	 */
149 	raw_spin_rq_lock_irq(cpu_rq(cpu));
150 #endif
151 	*cpuusage = 0;
152 	cpustat[CPUTIME_USER] = cpustat[CPUTIME_NICE] = 0;
153 	cpustat[CPUTIME_SYSTEM] = cpustat[CPUTIME_IRQ] = 0;
154 	cpustat[CPUTIME_SOFTIRQ] = 0;
155 
156 #ifndef CONFIG_64BIT
157 	raw_spin_rq_unlock_irq(cpu_rq(cpu));
158 #endif
159 }
160 
161 /* Return total CPU usage (in nanoseconds) of a group */
162 static u64 __cpuusage_read(struct cgroup_subsys_state *css,
163 			   enum cpuacct_stat_index index)
164 {
165 	struct cpuacct *ca = css_ca(css);
166 	u64 totalcpuusage = 0;
167 	int i;
168 
169 	for_each_possible_cpu(i)
170 		totalcpuusage += cpuacct_cpuusage_read(ca, i, index);
171 
172 	return totalcpuusage;
173 }
174 
175 static u64 cpuusage_user_read(struct cgroup_subsys_state *css,
176 			      struct cftype *cft)
177 {
178 	return __cpuusage_read(css, CPUACCT_STAT_USER);
179 }
180 
181 static u64 cpuusage_sys_read(struct cgroup_subsys_state *css,
182 			     struct cftype *cft)
183 {
184 	return __cpuusage_read(css, CPUACCT_STAT_SYSTEM);
185 }
186 
187 static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
188 {
189 	return __cpuusage_read(css, CPUACCT_STAT_NSTATS);
190 }
191 
192 static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
193 			  u64 val)
194 {
195 	struct cpuacct *ca = css_ca(css);
196 	int cpu;
197 
198 	/*
199 	 * Only allow '0' here to do a reset.
200 	 */
201 	if (val)
202 		return -EINVAL;
203 
204 	for_each_possible_cpu(cpu)
205 		cpuacct_cpuusage_write(ca, cpu);
206 
207 	return 0;
208 }
209 
210 static int __cpuacct_percpu_seq_show(struct seq_file *m,
211 				     enum cpuacct_stat_index index)
212 {
213 	struct cpuacct *ca = css_ca(seq_css(m));
214 	u64 percpu;
215 	int i;
216 
217 	for_each_possible_cpu(i) {
218 		percpu = cpuacct_cpuusage_read(ca, i, index);
219 		seq_printf(m, "%llu ", (unsigned long long) percpu);
220 	}
221 	seq_printf(m, "\n");
222 	return 0;
223 }
224 
225 static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V)
226 {
227 	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER);
228 }
229 
230 static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V)
231 {
232 	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM);
233 }
234 
235 static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
236 {
237 	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS);
238 }
239 
240 static int cpuacct_all_seq_show(struct seq_file *m, void *V)
241 {
242 	struct cpuacct *ca = css_ca(seq_css(m));
243 	int index;
244 	int cpu;
245 
246 	seq_puts(m, "cpu");
247 	for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
248 		seq_printf(m, " %s", cpuacct_stat_desc[index]);
249 	seq_puts(m, "\n");
250 
251 	for_each_possible_cpu(cpu) {
252 		seq_printf(m, "%d", cpu);
253 		for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
254 			seq_printf(m, " %llu",
255 				   cpuacct_cpuusage_read(ca, cpu, index));
256 		seq_puts(m, "\n");
257 	}
258 	return 0;
259 }
260 
261 static int cpuacct_stats_show(struct seq_file *sf, void *v)
262 {
263 	struct cpuacct *ca = css_ca(seq_css(sf));
264 	struct task_cputime cputime;
265 	u64 val[CPUACCT_STAT_NSTATS];
266 	int cpu;
267 	int stat;
268 
269 	memset(&cputime, 0, sizeof(cputime));
270 	for_each_possible_cpu(cpu) {
271 		u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
272 
273 		cputime.utime += cpustat[CPUTIME_USER];
274 		cputime.utime += cpustat[CPUTIME_NICE];
275 		cputime.stime += cpustat[CPUTIME_SYSTEM];
276 		cputime.stime += cpustat[CPUTIME_IRQ];
277 		cputime.stime += cpustat[CPUTIME_SOFTIRQ];
278 
279 		cputime.sum_exec_runtime += *per_cpu_ptr(ca->cpuusage, cpu);
280 	}
281 
282 	cputime_adjust(&cputime, &seq_css(sf)->cgroup->prev_cputime,
283 		&val[CPUACCT_STAT_USER], &val[CPUACCT_STAT_SYSTEM]);
284 
285 	for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
286 		seq_printf(sf, "%s %llu\n", cpuacct_stat_desc[stat],
287 			nsec_to_clock_t(val[stat]));
288 	}
289 
290 	return 0;
291 }
292 
293 static struct cftype files[] = {
294 	{
295 		.name = "usage",
296 		.read_u64 = cpuusage_read,
297 		.write_u64 = cpuusage_write,
298 	},
299 	{
300 		.name = "usage_user",
301 		.read_u64 = cpuusage_user_read,
302 	},
303 	{
304 		.name = "usage_sys",
305 		.read_u64 = cpuusage_sys_read,
306 	},
307 	{
308 		.name = "usage_percpu",
309 		.seq_show = cpuacct_percpu_seq_show,
310 	},
311 	{
312 		.name = "usage_percpu_user",
313 		.seq_show = cpuacct_percpu_user_seq_show,
314 	},
315 	{
316 		.name = "usage_percpu_sys",
317 		.seq_show = cpuacct_percpu_sys_seq_show,
318 	},
319 	{
320 		.name = "usage_all",
321 		.seq_show = cpuacct_all_seq_show,
322 	},
323 	{
324 		.name = "stat",
325 		.seq_show = cpuacct_stats_show,
326 	},
327 	{ }	/* terminate */
328 };
329 
330 /*
331  * charge this task's execution time to its accounting group.
332  *
333  * called with rq->lock held.
334  */
335 void cpuacct_charge(struct task_struct *tsk, u64 cputime)
336 {
337 	struct cpuacct *ca;
338 
339 	rcu_read_lock();
340 
341 	for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
342 		__this_cpu_add(*ca->cpuusage, cputime);
343 
344 	rcu_read_unlock();
345 }
346 
347 /*
348  * Add user/system time to cpuacct.
349  *
350  * Note: it's the caller that updates the account of the root cgroup.
351  */
352 void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
353 {
354 	struct cpuacct *ca;
355 
356 	rcu_read_lock();
357 	for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
358 		__this_cpu_add(ca->cpustat->cpustat[index], val);
359 	rcu_read_unlock();
360 }
361 
362 struct cgroup_subsys cpuacct_cgrp_subsys = {
363 	.css_alloc	= cpuacct_css_alloc,
364 	.css_free	= cpuacct_css_free,
365 	.legacy_cftypes	= files,
366 	.early_init	= true,
367 };
368