xref: /openbmc/linux/kernel/sched/cpuacct.c (revision 4f205687)
1 #include <linux/cgroup.h>
2 #include <linux/slab.h>
3 #include <linux/percpu.h>
4 #include <linux/spinlock.h>
5 #include <linux/cpumask.h>
6 #include <linux/seq_file.h>
7 #include <linux/rcupdate.h>
8 #include <linux/kernel_stat.h>
9 #include <linux/err.h>
10 
11 #include "sched.h"
12 
13 /*
14  * CPU accounting code for task groups.
15  *
16  * Based on the work by Paul Menage (menage@google.com) and Balbir Singh
17  * (balbir@in.ibm.com).
18  */
19 
20 /* Time spent by the tasks of the cpu accounting group executing in ... */
21 enum cpuacct_stat_index {
22 	CPUACCT_STAT_USER,	/* ... user mode */
23 	CPUACCT_STAT_SYSTEM,	/* ... kernel mode */
24 
25 	CPUACCT_STAT_NSTATS,
26 };
27 
28 enum cpuacct_usage_index {
29 	CPUACCT_USAGE_USER,	/* ... user mode */
30 	CPUACCT_USAGE_SYSTEM,	/* ... kernel mode */
31 
32 	CPUACCT_USAGE_NRUSAGE,
33 };
34 
35 struct cpuacct_usage {
36 	u64	usages[CPUACCT_USAGE_NRUSAGE];
37 };
38 
39 /* track cpu usage of a group of tasks and its child groups */
40 struct cpuacct {
41 	struct cgroup_subsys_state css;
42 	/* cpuusage holds pointer to a u64-type object on every cpu */
43 	struct cpuacct_usage __percpu *cpuusage;
44 	struct kernel_cpustat __percpu *cpustat;
45 };
46 
47 static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
48 {
49 	return css ? container_of(css, struct cpuacct, css) : NULL;
50 }
51 
52 /* return cpu accounting group to which this task belongs */
53 static inline struct cpuacct *task_ca(struct task_struct *tsk)
54 {
55 	return css_ca(task_css(tsk, cpuacct_cgrp_id));
56 }
57 
58 static inline struct cpuacct *parent_ca(struct cpuacct *ca)
59 {
60 	return css_ca(ca->css.parent);
61 }
62 
63 static DEFINE_PER_CPU(struct cpuacct_usage, root_cpuacct_cpuusage);
64 static struct cpuacct root_cpuacct = {
65 	.cpustat	= &kernel_cpustat,
66 	.cpuusage	= &root_cpuacct_cpuusage,
67 };
68 
69 /* create a new cpu accounting group */
70 static struct cgroup_subsys_state *
71 cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
72 {
73 	struct cpuacct *ca;
74 
75 	if (!parent_css)
76 		return &root_cpuacct.css;
77 
78 	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
79 	if (!ca)
80 		goto out;
81 
82 	ca->cpuusage = alloc_percpu(struct cpuacct_usage);
83 	if (!ca->cpuusage)
84 		goto out_free_ca;
85 
86 	ca->cpustat = alloc_percpu(struct kernel_cpustat);
87 	if (!ca->cpustat)
88 		goto out_free_cpuusage;
89 
90 	return &ca->css;
91 
92 out_free_cpuusage:
93 	free_percpu(ca->cpuusage);
94 out_free_ca:
95 	kfree(ca);
96 out:
97 	return ERR_PTR(-ENOMEM);
98 }
99 
100 /* destroy an existing cpu accounting group */
101 static void cpuacct_css_free(struct cgroup_subsys_state *css)
102 {
103 	struct cpuacct *ca = css_ca(css);
104 
105 	free_percpu(ca->cpustat);
106 	free_percpu(ca->cpuusage);
107 	kfree(ca);
108 }
109 
110 static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
111 				 enum cpuacct_usage_index index)
112 {
113 	struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
114 	u64 data;
115 
116 	/*
117 	 * We allow index == CPUACCT_USAGE_NRUSAGE here to read
118 	 * the sum of suages.
119 	 */
120 	BUG_ON(index > CPUACCT_USAGE_NRUSAGE);
121 
122 #ifndef CONFIG_64BIT
123 	/*
124 	 * Take rq->lock to make 64-bit read safe on 32-bit platforms.
125 	 */
126 	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
127 #endif
128 
129 	if (index == CPUACCT_USAGE_NRUSAGE) {
130 		int i = 0;
131 
132 		data = 0;
133 		for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++)
134 			data += cpuusage->usages[i];
135 	} else {
136 		data = cpuusage->usages[index];
137 	}
138 
139 #ifndef CONFIG_64BIT
140 	raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
141 #endif
142 
143 	return data;
144 }
145 
146 static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
147 {
148 	struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
149 	int i;
150 
151 #ifndef CONFIG_64BIT
152 	/*
153 	 * Take rq->lock to make 64-bit write safe on 32-bit platforms.
154 	 */
155 	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
156 #endif
157 
158 	for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++)
159 		cpuusage->usages[i] = val;
160 
161 #ifndef CONFIG_64BIT
162 	raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
163 #endif
164 }
165 
166 /* return total cpu usage (in nanoseconds) of a group */
167 static u64 __cpuusage_read(struct cgroup_subsys_state *css,
168 			   enum cpuacct_usage_index index)
169 {
170 	struct cpuacct *ca = css_ca(css);
171 	u64 totalcpuusage = 0;
172 	int i;
173 
174 	for_each_possible_cpu(i)
175 		totalcpuusage += cpuacct_cpuusage_read(ca, i, index);
176 
177 	return totalcpuusage;
178 }
179 
180 static u64 cpuusage_user_read(struct cgroup_subsys_state *css,
181 			      struct cftype *cft)
182 {
183 	return __cpuusage_read(css, CPUACCT_USAGE_USER);
184 }
185 
186 static u64 cpuusage_sys_read(struct cgroup_subsys_state *css,
187 			     struct cftype *cft)
188 {
189 	return __cpuusage_read(css, CPUACCT_USAGE_SYSTEM);
190 }
191 
192 static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
193 {
194 	return __cpuusage_read(css, CPUACCT_USAGE_NRUSAGE);
195 }
196 
197 static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
198 			  u64 val)
199 {
200 	struct cpuacct *ca = css_ca(css);
201 	int cpu;
202 
203 	/*
204 	 * Only allow '0' here to do a reset.
205 	 */
206 	if (val)
207 		return -EINVAL;
208 
209 	for_each_possible_cpu(cpu)
210 		cpuacct_cpuusage_write(ca, cpu, 0);
211 
212 	return 0;
213 }
214 
215 static int __cpuacct_percpu_seq_show(struct seq_file *m,
216 				     enum cpuacct_usage_index index)
217 {
218 	struct cpuacct *ca = css_ca(seq_css(m));
219 	u64 percpu;
220 	int i;
221 
222 	for_each_possible_cpu(i) {
223 		percpu = cpuacct_cpuusage_read(ca, i, index);
224 		seq_printf(m, "%llu ", (unsigned long long) percpu);
225 	}
226 	seq_printf(m, "\n");
227 	return 0;
228 }
229 
230 static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V)
231 {
232 	return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_USER);
233 }
234 
235 static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V)
236 {
237 	return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_SYSTEM);
238 }
239 
240 static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
241 {
242 	return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_NRUSAGE);
243 }
244 
245 static const char * const cpuacct_stat_desc[] = {
246 	[CPUACCT_STAT_USER] = "user",
247 	[CPUACCT_STAT_SYSTEM] = "system",
248 };
249 
250 static int cpuacct_stats_show(struct seq_file *sf, void *v)
251 {
252 	struct cpuacct *ca = css_ca(seq_css(sf));
253 	int cpu;
254 	s64 val = 0;
255 
256 	for_each_possible_cpu(cpu) {
257 		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
258 		val += kcpustat->cpustat[CPUTIME_USER];
259 		val += kcpustat->cpustat[CPUTIME_NICE];
260 	}
261 	val = cputime64_to_clock_t(val);
262 	seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val);
263 
264 	val = 0;
265 	for_each_possible_cpu(cpu) {
266 		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
267 		val += kcpustat->cpustat[CPUTIME_SYSTEM];
268 		val += kcpustat->cpustat[CPUTIME_IRQ];
269 		val += kcpustat->cpustat[CPUTIME_SOFTIRQ];
270 	}
271 
272 	val = cputime64_to_clock_t(val);
273 	seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
274 
275 	return 0;
276 }
277 
278 static struct cftype files[] = {
279 	{
280 		.name = "usage",
281 		.read_u64 = cpuusage_read,
282 		.write_u64 = cpuusage_write,
283 	},
284 	{
285 		.name = "usage_user",
286 		.read_u64 = cpuusage_user_read,
287 	},
288 	{
289 		.name = "usage_sys",
290 		.read_u64 = cpuusage_sys_read,
291 	},
292 	{
293 		.name = "usage_percpu",
294 		.seq_show = cpuacct_percpu_seq_show,
295 	},
296 	{
297 		.name = "usage_percpu_user",
298 		.seq_show = cpuacct_percpu_user_seq_show,
299 	},
300 	{
301 		.name = "usage_percpu_sys",
302 		.seq_show = cpuacct_percpu_sys_seq_show,
303 	},
304 	{
305 		.name = "stat",
306 		.seq_show = cpuacct_stats_show,
307 	},
308 	{ }	/* terminate */
309 };
310 
311 /*
312  * charge this task's execution time to its accounting group.
313  *
314  * called with rq->lock held.
315  */
316 void cpuacct_charge(struct task_struct *tsk, u64 cputime)
317 {
318 	struct cpuacct *ca;
319 	int index = CPUACCT_USAGE_SYSTEM;
320 	struct pt_regs *regs = task_pt_regs(tsk);
321 
322 	if (regs && user_mode(regs))
323 		index = CPUACCT_USAGE_USER;
324 
325 	rcu_read_lock();
326 
327 	for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
328 		this_cpu_ptr(ca->cpuusage)->usages[index] += cputime;
329 
330 	rcu_read_unlock();
331 }
332 
333 /*
334  * Add user/system time to cpuacct.
335  *
336  * Note: it's the caller that updates the account of the root cgroup.
337  */
338 void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
339 {
340 	struct cpuacct *ca;
341 
342 	rcu_read_lock();
343 	for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
344 		this_cpu_ptr(ca->cpustat)->cpustat[index] += val;
345 	rcu_read_unlock();
346 }
347 
348 struct cgroup_subsys cpuacct_cgrp_subsys = {
349 	.css_alloc	= cpuacct_css_alloc,
350 	.css_free	= cpuacct_css_free,
351 	.legacy_cftypes	= files,
352 	.early_init	= true,
353 };
354