xref: /openbmc/linux/kernel/taskstats.c (revision 053c095a82cf773075e83d7233b5cc19a1f73ece)
1c757249aSShailabh Nagar /*
2c757249aSShailabh Nagar  * taskstats.c - Export per-task statistics to userland
3c757249aSShailabh Nagar  *
4c757249aSShailabh Nagar  * Copyright (C) Shailabh Nagar, IBM Corp. 2006
5c757249aSShailabh Nagar  *           (C) Balbir Singh,   IBM Corp. 2006
6c757249aSShailabh Nagar  *
7c757249aSShailabh Nagar  * This program is free software; you can redistribute it and/or modify
8c757249aSShailabh Nagar  * it under the terms of the GNU General Public License as published by
9c757249aSShailabh Nagar  * the Free Software Foundation; either version 2 of the License, or
10c757249aSShailabh Nagar  * (at your option) any later version.
11c757249aSShailabh Nagar  *
12c757249aSShailabh Nagar  * This program is distributed in the hope that it will be useful,
13c757249aSShailabh Nagar  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14c757249aSShailabh Nagar  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15c757249aSShailabh Nagar  * GNU General Public License for more details.
16c757249aSShailabh Nagar  *
17c757249aSShailabh Nagar  */
18c757249aSShailabh Nagar 
19c757249aSShailabh Nagar #include <linux/kernel.h>
20c757249aSShailabh Nagar #include <linux/taskstats_kern.h>
21f3cef7a9SJay Lan #include <linux/tsacct_kern.h>
226f44993fSShailabh Nagar #include <linux/delayacct.h>
23f9fd8914SShailabh Nagar #include <linux/cpumask.h>
24f9fd8914SShailabh Nagar #include <linux/percpu.h>
255a0e3ad6STejun Heo #include <linux/slab.h>
26846c7bb0SBalbir Singh #include <linux/cgroupstats.h>
27846c7bb0SBalbir Singh #include <linux/cgroup.h>
28846c7bb0SBalbir Singh #include <linux/fs.h>
29846c7bb0SBalbir Singh #include <linux/file.h>
304bd6e32aSEric W. Biederman #include <linux/pid_namespace.h>
31c757249aSShailabh Nagar #include <net/genetlink.h>
3260063497SArun Sharma #include <linux/atomic.h>
33c757249aSShailabh Nagar 
34f9fd8914SShailabh Nagar /*
35f9fd8914SShailabh Nagar  * Maximum length of a cpumask that can be specified in
36f9fd8914SShailabh Nagar  * the TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK attribute
37f9fd8914SShailabh Nagar  */
38f9fd8914SShailabh Nagar #define TASKSTATS_CPUMASK_MAXLEN	(100+6*NR_CPUS)
39f9fd8914SShailabh Nagar 
40b81f3ea9SVegard Nossum static DEFINE_PER_CPU(__u32, taskstats_seqnum);
41c757249aSShailabh Nagar static int family_registered;
42e18b890bSChristoph Lameter struct kmem_cache *taskstats_cache;
43c757249aSShailabh Nagar 
44c757249aSShailabh Nagar static struct genl_family family = {
45c757249aSShailabh Nagar 	.id		= GENL_ID_GENERATE,
46c757249aSShailabh Nagar 	.name		= TASKSTATS_GENL_NAME,
47c757249aSShailabh Nagar 	.version	= TASKSTATS_GENL_VERSION,
48c757249aSShailabh Nagar 	.maxattr	= TASKSTATS_CMD_ATTR_MAX,
49c757249aSShailabh Nagar };
50c757249aSShailabh Nagar 
51b54452b0SAlexey Dobriyan static const struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] = {
52c757249aSShailabh Nagar 	[TASKSTATS_CMD_ATTR_PID]  = { .type = NLA_U32 },
53c757249aSShailabh Nagar 	[TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 },
54f9fd8914SShailabh Nagar 	[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
55f9fd8914SShailabh Nagar 	[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};
56f9fd8914SShailabh Nagar 
57b54452b0SAlexey Dobriyan static const struct nla_policy cgroupstats_cmd_get_policy[CGROUPSTATS_CMD_ATTR_MAX+1] = {
58846c7bb0SBalbir Singh 	[CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 },
59846c7bb0SBalbir Singh };
60846c7bb0SBalbir Singh 
61f9fd8914SShailabh Nagar struct listener {
62f9fd8914SShailabh Nagar 	struct list_head list;
63f9fd8914SShailabh Nagar 	pid_t pid;
64bb129994SShailabh Nagar 	char valid;
65c757249aSShailabh Nagar };
66c757249aSShailabh Nagar 
67f9fd8914SShailabh Nagar struct listener_list {
68f9fd8914SShailabh Nagar 	struct rw_semaphore sem;
69f9fd8914SShailabh Nagar 	struct list_head list;
70f9fd8914SShailabh Nagar };
71f9fd8914SShailabh Nagar static DEFINE_PER_CPU(struct listener_list, listener_array);
72f9fd8914SShailabh Nagar 
73f9fd8914SShailabh Nagar enum actions {
74f9fd8914SShailabh Nagar 	REGISTER,
75f9fd8914SShailabh Nagar 	DEREGISTER,
76f9fd8914SShailabh Nagar 	CPU_DONT_CARE
77f9fd8914SShailabh Nagar };
78c757249aSShailabh Nagar 
79c757249aSShailabh Nagar static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
8037167485SOleg Nesterov 				size_t size)
81c757249aSShailabh Nagar {
82c757249aSShailabh Nagar 	struct sk_buff *skb;
83c757249aSShailabh Nagar 	void *reply;
84c757249aSShailabh Nagar 
85c757249aSShailabh Nagar 	/*
86c757249aSShailabh Nagar 	 * If new attributes are added, please revisit this allocation
87c757249aSShailabh Nagar 	 */
883dabc715SThomas Graf 	skb = genlmsg_new(size, GFP_KERNEL);
89c757249aSShailabh Nagar 	if (!skb)
90c757249aSShailabh Nagar 		return -ENOMEM;
91c757249aSShailabh Nagar 
92c757249aSShailabh Nagar 	if (!info) {
93cd85fc58SChristoph Lameter 		int seq = this_cpu_inc_return(taskstats_seqnum) - 1;
94c757249aSShailabh Nagar 
9517c157c8SThomas Graf 		reply = genlmsg_put(skb, 0, seq, &family, 0, cmd);
96c757249aSShailabh Nagar 	} else
9717c157c8SThomas Graf 		reply = genlmsg_put_reply(skb, info, &family, 0, cmd);
98c757249aSShailabh Nagar 	if (reply == NULL) {
99c757249aSShailabh Nagar 		nlmsg_free(skb);
100c757249aSShailabh Nagar 		return -EINVAL;
101c757249aSShailabh Nagar 	}
102c757249aSShailabh Nagar 
103c757249aSShailabh Nagar 	*skbp = skb;
104c757249aSShailabh Nagar 	return 0;
105c757249aSShailabh Nagar }
106c757249aSShailabh Nagar 
107f9fd8914SShailabh Nagar /*
108f9fd8914SShailabh Nagar  * Send taskstats data in @skb to listener with nl_pid @pid
109f9fd8914SShailabh Nagar  */
110134e6375SJohannes Berg static int send_reply(struct sk_buff *skb, struct genl_info *info)
111c757249aSShailabh Nagar {
112b529ccf2SArnaldo Carvalho de Melo 	struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb));
113f9fd8914SShailabh Nagar 	void *reply = genlmsg_data(genlhdr);
114c757249aSShailabh Nagar 
115*053c095aSJohannes Berg 	genlmsg_end(skb, reply);
116c757249aSShailabh Nagar 
117134e6375SJohannes Berg 	return genlmsg_reply(skb, info);
118c757249aSShailabh Nagar }
119c757249aSShailabh Nagar 
120f9fd8914SShailabh Nagar /*
121f9fd8914SShailabh Nagar  * Send taskstats data in @skb to listeners registered for @cpu's exit data
122f9fd8914SShailabh Nagar  */
123115085eaSOleg Nesterov static void send_cpu_listeners(struct sk_buff *skb,
124115085eaSOleg Nesterov 					struct listener_list *listeners)
125f9fd8914SShailabh Nagar {
126b529ccf2SArnaldo Carvalho de Melo 	struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb));
127f9fd8914SShailabh Nagar 	struct listener *s, *tmp;
128f9fd8914SShailabh Nagar 	struct sk_buff *skb_next, *skb_cur = skb;
129f9fd8914SShailabh Nagar 	void *reply = genlmsg_data(genlhdr);
130d94a0415SShailabh Nagar 	int rc, delcount = 0;
131f9fd8914SShailabh Nagar 
132*053c095aSJohannes Berg 	genlmsg_end(skb, reply);
133f9fd8914SShailabh Nagar 
134f9fd8914SShailabh Nagar 	rc = 0;
135bb129994SShailabh Nagar 	down_read(&listeners->sem);
136d94a0415SShailabh Nagar 	list_for_each_entry(s, &listeners->list, list) {
137f9fd8914SShailabh Nagar 		skb_next = NULL;
138f9fd8914SShailabh Nagar 		if (!list_is_last(&s->list, &listeners->list)) {
139f9fd8914SShailabh Nagar 			skb_next = skb_clone(skb_cur, GFP_KERNEL);
140d94a0415SShailabh Nagar 			if (!skb_next)
141f9fd8914SShailabh Nagar 				break;
142f9fd8914SShailabh Nagar 		}
143134e6375SJohannes Berg 		rc = genlmsg_unicast(&init_net, skb_cur, s->pid);
144d94a0415SShailabh Nagar 		if (rc == -ECONNREFUSED) {
145bb129994SShailabh Nagar 			s->valid = 0;
146bb129994SShailabh Nagar 			delcount++;
147f9fd8914SShailabh Nagar 		}
148f9fd8914SShailabh Nagar 		skb_cur = skb_next;
149f9fd8914SShailabh Nagar 	}
150bb129994SShailabh Nagar 	up_read(&listeners->sem);
151f9fd8914SShailabh Nagar 
152d94a0415SShailabh Nagar 	if (skb_cur)
153d94a0415SShailabh Nagar 		nlmsg_free(skb_cur);
154d94a0415SShailabh Nagar 
155bb129994SShailabh Nagar 	if (!delcount)
156d94a0415SShailabh Nagar 		return;
157bb129994SShailabh Nagar 
158bb129994SShailabh Nagar 	/* Delete invalidated entries */
159bb129994SShailabh Nagar 	down_write(&listeners->sem);
160bb129994SShailabh Nagar 	list_for_each_entry_safe(s, tmp, &listeners->list, list) {
161bb129994SShailabh Nagar 		if (!s->valid) {
162bb129994SShailabh Nagar 			list_del(&s->list);
163bb129994SShailabh Nagar 			kfree(s);
164bb129994SShailabh Nagar 		}
165bb129994SShailabh Nagar 	}
166bb129994SShailabh Nagar 	up_write(&listeners->sem);
167f9fd8914SShailabh Nagar }
168f9fd8914SShailabh Nagar 
1694bd6e32aSEric W. Biederman static void fill_stats(struct user_namespace *user_ns,
1704bd6e32aSEric W. Biederman 		       struct pid_namespace *pid_ns,
1714bd6e32aSEric W. Biederman 		       struct task_struct *tsk, struct taskstats *stats)
172c757249aSShailabh Nagar {
17351de4d90SOleg Nesterov 	memset(stats, 0, sizeof(*stats));
174c757249aSShailabh Nagar 	/*
175c757249aSShailabh Nagar 	 * Each accounting subsystem adds calls to its functions to
176c757249aSShailabh Nagar 	 * fill in relevant parts of struct taskstsats as follows
177c757249aSShailabh Nagar 	 *
1787d94ddddSShailabh Nagar 	 *	per-task-foo(stats, tsk);
179c757249aSShailabh Nagar 	 */
180c757249aSShailabh Nagar 
1817d94ddddSShailabh Nagar 	delayacct_add_tsk(stats, tsk);
182f3cef7a9SJay Lan 
183f3cef7a9SJay Lan 	/* fill in basic acct fields */
1846f44993fSShailabh Nagar 	stats->version = TASKSTATS_VERSION;
185b663a79cSMaxim Uvarov 	stats->nvcsw = tsk->nvcsw;
186b663a79cSMaxim Uvarov 	stats->nivcsw = tsk->nivcsw;
1874bd6e32aSEric W. Biederman 	bacct_add_tsk(user_ns, pid_ns, stats, tsk);
1886f44993fSShailabh Nagar 
1899acc1853SJay Lan 	/* fill in extended acct fields */
1909acc1853SJay Lan 	xacct_add_tsk(stats, tsk);
191c757249aSShailabh Nagar }
192c757249aSShailabh Nagar 
1933d9e0cf1SMichael Holzheu static int fill_stats_for_pid(pid_t pid, struct taskstats *stats)
194c757249aSShailabh Nagar {
195a98b6094SOleg Nesterov 	struct task_struct *tsk;
1963d9e0cf1SMichael Holzheu 
1973d9e0cf1SMichael Holzheu 	rcu_read_lock();
1983d9e0cf1SMichael Holzheu 	tsk = find_task_by_vpid(pid);
1993d9e0cf1SMichael Holzheu 	if (tsk)
2003d9e0cf1SMichael Holzheu 		get_task_struct(tsk);
2013d9e0cf1SMichael Holzheu 	rcu_read_unlock();
2023d9e0cf1SMichael Holzheu 	if (!tsk)
2033d9e0cf1SMichael Holzheu 		return -ESRCH;
2044bd6e32aSEric W. Biederman 	fill_stats(current_user_ns(), task_active_pid_ns(current), tsk, stats);
2053d9e0cf1SMichael Holzheu 	put_task_struct(tsk);
2063d9e0cf1SMichael Holzheu 	return 0;
2073d9e0cf1SMichael Holzheu }
2083d9e0cf1SMichael Holzheu 
2093d9e0cf1SMichael Holzheu static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats)
2103d9e0cf1SMichael Holzheu {
2113d9e0cf1SMichael Holzheu 	struct task_struct *tsk, *first;
212ad4ecbcbSShailabh Nagar 	unsigned long flags;
213a98b6094SOleg Nesterov 	int rc = -ESRCH;
214c757249aSShailabh Nagar 
215ad4ecbcbSShailabh Nagar 	/*
216ad4ecbcbSShailabh Nagar 	 * Add additional stats from live tasks except zombie thread group
217ad4ecbcbSShailabh Nagar 	 * leaders who are already counted with the dead tasks
218ad4ecbcbSShailabh Nagar 	 */
219a98b6094SOleg Nesterov 	rcu_read_lock();
220cb41d6d0SPavel Emelyanov 	first = find_task_by_vpid(tgid);
221ad4ecbcbSShailabh Nagar 
222a98b6094SOleg Nesterov 	if (!first || !lock_task_sighand(first, &flags))
223a98b6094SOleg Nesterov 		goto out;
224fca178c0SOleg Nesterov 
225ad4ecbcbSShailabh Nagar 	if (first->signal->stats)
226ad4ecbcbSShailabh Nagar 		memcpy(stats, first->signal->stats, sizeof(*stats));
22751de4d90SOleg Nesterov 	else
22851de4d90SOleg Nesterov 		memset(stats, 0, sizeof(*stats));
229ad4ecbcbSShailabh Nagar 
230a98b6094SOleg Nesterov 	tsk = first;
231c757249aSShailabh Nagar 	do {
232d7c3f5f2SOleg Nesterov 		if (tsk->exit_state)
233ad4ecbcbSShailabh Nagar 			continue;
234c757249aSShailabh Nagar 		/*
235ad4ecbcbSShailabh Nagar 		 * Accounting subsystem can call its functions here to
236c757249aSShailabh Nagar 		 * fill in relevant parts of struct taskstsats as follows
237c757249aSShailabh Nagar 		 *
238ad4ecbcbSShailabh Nagar 		 *	per-task-foo(stats, tsk);
239c757249aSShailabh Nagar 		 */
240ad4ecbcbSShailabh Nagar 		delayacct_add_tsk(stats, tsk);
2416f44993fSShailabh Nagar 
242b663a79cSMaxim Uvarov 		stats->nvcsw += tsk->nvcsw;
243b663a79cSMaxim Uvarov 		stats->nivcsw += tsk->nivcsw;
244c757249aSShailabh Nagar 	} while_each_thread(first, tsk);
2456f44993fSShailabh Nagar 
246a98b6094SOleg Nesterov 	unlock_task_sighand(first, &flags);
247a98b6094SOleg Nesterov 	rc = 0;
248a98b6094SOleg Nesterov out:
249a98b6094SOleg Nesterov 	rcu_read_unlock();
250a98b6094SOleg Nesterov 
251a98b6094SOleg Nesterov 	stats->version = TASKSTATS_VERSION;
252c757249aSShailabh Nagar 	/*
2533a4fa0a2SRobert P. J. Day 	 * Accounting subsystems can also add calls here to modify
254ad4ecbcbSShailabh Nagar 	 * fields of taskstats.
255c757249aSShailabh Nagar 	 */
256a98b6094SOleg Nesterov 	return rc;
257c757249aSShailabh Nagar }
258c757249aSShailabh Nagar 
259ad4ecbcbSShailabh Nagar static void fill_tgid_exit(struct task_struct *tsk)
260ad4ecbcbSShailabh Nagar {
261ad4ecbcbSShailabh Nagar 	unsigned long flags;
262ad4ecbcbSShailabh Nagar 
263b8534d7bSOleg Nesterov 	spin_lock_irqsave(&tsk->sighand->siglock, flags);
264ad4ecbcbSShailabh Nagar 	if (!tsk->signal->stats)
265ad4ecbcbSShailabh Nagar 		goto ret;
266ad4ecbcbSShailabh Nagar 
267ad4ecbcbSShailabh Nagar 	/*
268ad4ecbcbSShailabh Nagar 	 * Each accounting subsystem calls its functions here to
269ad4ecbcbSShailabh Nagar 	 * accumalate its per-task stats for tsk, into the per-tgid structure
270ad4ecbcbSShailabh Nagar 	 *
271ad4ecbcbSShailabh Nagar 	 *	per-task-foo(tsk->signal->stats, tsk);
272ad4ecbcbSShailabh Nagar 	 */
273ad4ecbcbSShailabh Nagar 	delayacct_add_tsk(tsk->signal->stats, tsk);
274ad4ecbcbSShailabh Nagar ret:
275b8534d7bSOleg Nesterov 	spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
276ad4ecbcbSShailabh Nagar 	return;
277ad4ecbcbSShailabh Nagar }
278ad4ecbcbSShailabh Nagar 
27941c7bb95SRusty Russell static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd)
280f9fd8914SShailabh Nagar {
281f9fd8914SShailabh Nagar 	struct listener_list *listeners;
28226c4caeaSVasiliy Kulikov 	struct listener *s, *tmp, *s2;
283f9fd8914SShailabh Nagar 	unsigned int cpu;
2840d20633bSChen Gang 	int ret = 0;
285ad4ecbcbSShailabh Nagar 
28641c7bb95SRusty Russell 	if (!cpumask_subset(mask, cpu_possible_mask))
287f9fd8914SShailabh Nagar 		return -EINVAL;
288f9fd8914SShailabh Nagar 
2894bd6e32aSEric W. Biederman 	if (current_user_ns() != &init_user_ns)
2904bd6e32aSEric W. Biederman 		return -EINVAL;
2914bd6e32aSEric W. Biederman 
2924bd6e32aSEric W. Biederman 	if (task_active_pid_ns(current) != &init_pid_ns)
2934bd6e32aSEric W. Biederman 		return -EINVAL;
2944bd6e32aSEric W. Biederman 
295f9fd8914SShailabh Nagar 	if (isadd == REGISTER) {
29641c7bb95SRusty Russell 		for_each_cpu(cpu, mask) {
29726c4caeaSVasiliy Kulikov 			s = kmalloc_node(sizeof(struct listener),
29826c4caeaSVasiliy Kulikov 					GFP_KERNEL, cpu_to_node(cpu));
2990d20633bSChen Gang 			if (!s) {
3000d20633bSChen Gang 				ret = -ENOMEM;
301f9fd8914SShailabh Nagar 				goto cleanup;
3020d20633bSChen Gang 			}
303f9fd8914SShailabh Nagar 			s->pid = pid;
304bb129994SShailabh Nagar 			s->valid = 1;
305f9fd8914SShailabh Nagar 
306f9fd8914SShailabh Nagar 			listeners = &per_cpu(listener_array, cpu);
307f9fd8914SShailabh Nagar 			down_write(&listeners->sem);
308dfc428b6SOleg Nesterov 			list_for_each_entry(s2, &listeners->list, list) {
309a7295898SOleg Nesterov 				if (s2->pid == pid && s2->valid)
310dfc428b6SOleg Nesterov 					goto exists;
31126c4caeaSVasiliy Kulikov 			}
312f9fd8914SShailabh Nagar 			list_add(&s->list, &listeners->list);
31326c4caeaSVasiliy Kulikov 			s = NULL;
314dfc428b6SOleg Nesterov exists:
315f9fd8914SShailabh Nagar 			up_write(&listeners->sem);
316dfc428b6SOleg Nesterov 			kfree(s); /* nop if NULL */
317f9fd8914SShailabh Nagar 		}
318f9fd8914SShailabh Nagar 		return 0;
319f9fd8914SShailabh Nagar 	}
320f9fd8914SShailabh Nagar 
321f9fd8914SShailabh Nagar 	/* Deregister or cleanup */
322f9fd8914SShailabh Nagar cleanup:
32341c7bb95SRusty Russell 	for_each_cpu(cpu, mask) {
324f9fd8914SShailabh Nagar 		listeners = &per_cpu(listener_array, cpu);
325f9fd8914SShailabh Nagar 		down_write(&listeners->sem);
326f9fd8914SShailabh Nagar 		list_for_each_entry_safe(s, tmp, &listeners->list, list) {
327f9fd8914SShailabh Nagar 			if (s->pid == pid) {
328f9fd8914SShailabh Nagar 				list_del(&s->list);
329f9fd8914SShailabh Nagar 				kfree(s);
330f9fd8914SShailabh Nagar 				break;
331f9fd8914SShailabh Nagar 			}
332f9fd8914SShailabh Nagar 		}
333f9fd8914SShailabh Nagar 		up_write(&listeners->sem);
334f9fd8914SShailabh Nagar 	}
3350d20633bSChen Gang 	return ret;
336f9fd8914SShailabh Nagar }
337f9fd8914SShailabh Nagar 
33841c7bb95SRusty Russell static int parse(struct nlattr *na, struct cpumask *mask)
339f9fd8914SShailabh Nagar {
340f9fd8914SShailabh Nagar 	char *data;
341f9fd8914SShailabh Nagar 	int len;
342f9fd8914SShailabh Nagar 	int ret;
343f9fd8914SShailabh Nagar 
344f9fd8914SShailabh Nagar 	if (na == NULL)
345f9fd8914SShailabh Nagar 		return 1;
346f9fd8914SShailabh Nagar 	len = nla_len(na);
347f9fd8914SShailabh Nagar 	if (len > TASKSTATS_CPUMASK_MAXLEN)
348f9fd8914SShailabh Nagar 		return -E2BIG;
349f9fd8914SShailabh Nagar 	if (len < 1)
350f9fd8914SShailabh Nagar 		return -EINVAL;
351f9fd8914SShailabh Nagar 	data = kmalloc(len, GFP_KERNEL);
352f9fd8914SShailabh Nagar 	if (!data)
353f9fd8914SShailabh Nagar 		return -ENOMEM;
354f9fd8914SShailabh Nagar 	nla_strlcpy(data, na, len);
35529c0177eSRusty Russell 	ret = cpulist_parse(data, mask);
356f9fd8914SShailabh Nagar 	kfree(data);
357f9fd8914SShailabh Nagar 	return ret;
358f9fd8914SShailabh Nagar }
359f9fd8914SShailabh Nagar 
3609ab020cfSJeff Mahoney #if defined(CONFIG_64BIT) && !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
3614be2c95dSJeff Mahoney #define TASKSTATS_NEEDS_PADDING 1
3624be2c95dSJeff Mahoney #endif
3634be2c95dSJeff Mahoney 
36451de4d90SOleg Nesterov static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid)
36568062b86SOleg Nesterov {
36651de4d90SOleg Nesterov 	struct nlattr *na, *ret;
36768062b86SOleg Nesterov 	int aggr;
36868062b86SOleg Nesterov 
36937167485SOleg Nesterov 	aggr = (type == TASKSTATS_TYPE_PID)
37037167485SOleg Nesterov 			? TASKSTATS_TYPE_AGGR_PID
37137167485SOleg Nesterov 			: TASKSTATS_TYPE_AGGR_TGID;
37268062b86SOleg Nesterov 
3734be2c95dSJeff Mahoney 	/*
3744be2c95dSJeff Mahoney 	 * The taskstats structure is internally aligned on 8 byte
3754be2c95dSJeff Mahoney 	 * boundaries but the layout of the aggregrate reply, with
3764be2c95dSJeff Mahoney 	 * two NLA headers and the pid (each 4 bytes), actually
3774be2c95dSJeff Mahoney 	 * force the entire structure to be unaligned. This causes
3784be2c95dSJeff Mahoney 	 * the kernel to issue unaligned access warnings on some
3794be2c95dSJeff Mahoney 	 * architectures like ia64. Unfortunately, some software out there
3804be2c95dSJeff Mahoney 	 * doesn't properly unroll the NLA packet and assumes that the start
3814be2c95dSJeff Mahoney 	 * of the taskstats structure will always be 20 bytes from the start
3824be2c95dSJeff Mahoney 	 * of the netlink payload. Aligning the start of the taskstats
3834be2c95dSJeff Mahoney 	 * structure breaks this software, which we don't want. So, for now
3844be2c95dSJeff Mahoney 	 * the alignment only happens on architectures that require it
3854be2c95dSJeff Mahoney 	 * and those users will have to update to fixed versions of those
3864be2c95dSJeff Mahoney 	 * packages. Space is reserved in the packet only when needed.
3874be2c95dSJeff Mahoney 	 * This ifdef should be removed in several years e.g. 2012 once
3884be2c95dSJeff Mahoney 	 * we can be confident that fixed versions are installed on most
3894be2c95dSJeff Mahoney 	 * systems. We add the padding before the aggregate since the
3904be2c95dSJeff Mahoney 	 * aggregate is already a defined type.
3914be2c95dSJeff Mahoney 	 */
3924be2c95dSJeff Mahoney #ifdef TASKSTATS_NEEDS_PADDING
3934be2c95dSJeff Mahoney 	if (nla_put(skb, TASKSTATS_TYPE_NULL, 0, NULL) < 0)
3944be2c95dSJeff Mahoney 		goto err;
3954be2c95dSJeff Mahoney #endif
39668062b86SOleg Nesterov 	na = nla_nest_start(skb, aggr);
39737167485SOleg Nesterov 	if (!na)
39837167485SOleg Nesterov 		goto err;
3994be2c95dSJeff Mahoney 
4003fa58266SChen Gang 	if (nla_put(skb, type, sizeof(pid), &pid) < 0) {
4013fa58266SChen Gang 		nla_nest_cancel(skb, na);
40251de4d90SOleg Nesterov 		goto err;
4033fa58266SChen Gang 	}
40451de4d90SOleg Nesterov 	ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats));
4053fa58266SChen Gang 	if (!ret) {
4063fa58266SChen Gang 		nla_nest_cancel(skb, na);
40751de4d90SOleg Nesterov 		goto err;
4083fa58266SChen Gang 	}
40968062b86SOleg Nesterov 	nla_nest_end(skb, na);
41068062b86SOleg Nesterov 
41151de4d90SOleg Nesterov 	return nla_data(ret);
41251de4d90SOleg Nesterov err:
41351de4d90SOleg Nesterov 	return NULL;
41468062b86SOleg Nesterov }
41568062b86SOleg Nesterov 
416846c7bb0SBalbir Singh static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
417846c7bb0SBalbir Singh {
418846c7bb0SBalbir Singh 	int rc = 0;
419846c7bb0SBalbir Singh 	struct sk_buff *rep_skb;
420846c7bb0SBalbir Singh 	struct cgroupstats *stats;
421846c7bb0SBalbir Singh 	struct nlattr *na;
422846c7bb0SBalbir Singh 	size_t size;
423846c7bb0SBalbir Singh 	u32 fd;
4242903ff01SAl Viro 	struct fd f;
425846c7bb0SBalbir Singh 
426846c7bb0SBalbir Singh 	na = info->attrs[CGROUPSTATS_CMD_ATTR_FD];
427846c7bb0SBalbir Singh 	if (!na)
428846c7bb0SBalbir Singh 		return -EINVAL;
429846c7bb0SBalbir Singh 
430846c7bb0SBalbir Singh 	fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]);
4312903ff01SAl Viro 	f = fdget(fd);
4322903ff01SAl Viro 	if (!f.file)
433f9615984SAdrian Bunk 		return 0;
434f9615984SAdrian Bunk 
435846c7bb0SBalbir Singh 	size = nla_total_size(sizeof(struct cgroupstats));
436846c7bb0SBalbir Singh 
437846c7bb0SBalbir Singh 	rc = prepare_reply(info, CGROUPSTATS_CMD_NEW, &rep_skb,
438846c7bb0SBalbir Singh 				size);
439846c7bb0SBalbir Singh 	if (rc < 0)
440846c7bb0SBalbir Singh 		goto err;
441846c7bb0SBalbir Singh 
442846c7bb0SBalbir Singh 	na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS,
443846c7bb0SBalbir Singh 				sizeof(struct cgroupstats));
44425353b33SAlan Cox 	if (na == NULL) {
4450324b5a4SJesper Juhl 		nlmsg_free(rep_skb);
44625353b33SAlan Cox 		rc = -EMSGSIZE;
44725353b33SAlan Cox 		goto err;
44825353b33SAlan Cox 	}
44925353b33SAlan Cox 
450846c7bb0SBalbir Singh 	stats = nla_data(na);
451846c7bb0SBalbir Singh 	memset(stats, 0, sizeof(*stats));
452846c7bb0SBalbir Singh 
453b583043eSAl Viro 	rc = cgroupstats_build(stats, f.file->f_path.dentry);
454f9615984SAdrian Bunk 	if (rc < 0) {
455f9615984SAdrian Bunk 		nlmsg_free(rep_skb);
456846c7bb0SBalbir Singh 		goto err;
457846c7bb0SBalbir Singh 	}
458846c7bb0SBalbir Singh 
459134e6375SJohannes Berg 	rc = send_reply(rep_skb, info);
460f9615984SAdrian Bunk 
461846c7bb0SBalbir Singh err:
4622903ff01SAl Viro 	fdput(f);
463846c7bb0SBalbir Singh 	return rc;
464846c7bb0SBalbir Singh }
465846c7bb0SBalbir Singh 
46693233125SMichael Holzheu static int cmd_attr_register_cpumask(struct genl_info *info)
467c757249aSShailabh Nagar {
46841c7bb95SRusty Russell 	cpumask_var_t mask;
46993233125SMichael Holzheu 	int rc;
470f9fd8914SShailabh Nagar 
47141c7bb95SRusty Russell 	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
47241c7bb95SRusty Russell 		return -ENOMEM;
47341c7bb95SRusty Russell 	rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask);
474f9fd8914SShailabh Nagar 	if (rc < 0)
47593233125SMichael Holzheu 		goto out;
47615e47304SEric W. Biederman 	rc = add_del_listener(info->snd_portid, mask, REGISTER);
47793233125SMichael Holzheu out:
47841c7bb95SRusty Russell 	free_cpumask_var(mask);
479f9fd8914SShailabh Nagar 	return rc;
48041c7bb95SRusty Russell }
481c757249aSShailabh Nagar 
48293233125SMichael Holzheu static int cmd_attr_deregister_cpumask(struct genl_info *info)
48393233125SMichael Holzheu {
48493233125SMichael Holzheu 	cpumask_var_t mask;
48593233125SMichael Holzheu 	int rc;
48693233125SMichael Holzheu 
48793233125SMichael Holzheu 	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
48893233125SMichael Holzheu 		return -ENOMEM;
48993233125SMichael Holzheu 	rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask);
49093233125SMichael Holzheu 	if (rc < 0)
49193233125SMichael Holzheu 		goto out;
49215e47304SEric W. Biederman 	rc = add_del_listener(info->snd_portid, mask, DEREGISTER);
49393233125SMichael Holzheu out:
49493233125SMichael Holzheu 	free_cpumask_var(mask);
49593233125SMichael Holzheu 	return rc;
49693233125SMichael Holzheu }
49793233125SMichael Holzheu 
4984be2c95dSJeff Mahoney static size_t taskstats_packet_size(void)
4994be2c95dSJeff Mahoney {
5004be2c95dSJeff Mahoney 	size_t size;
5014be2c95dSJeff Mahoney 
5024be2c95dSJeff Mahoney 	size = nla_total_size(sizeof(u32)) +
5034be2c95dSJeff Mahoney 		nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
5044be2c95dSJeff Mahoney #ifdef TASKSTATS_NEEDS_PADDING
5054be2c95dSJeff Mahoney 	size += nla_total_size(0); /* Padding for alignment */
5064be2c95dSJeff Mahoney #endif
5074be2c95dSJeff Mahoney 	return size;
5084be2c95dSJeff Mahoney }
5094be2c95dSJeff Mahoney 
51093233125SMichael Holzheu static int cmd_attr_pid(struct genl_info *info)
51193233125SMichael Holzheu {
51293233125SMichael Holzheu 	struct taskstats *stats;
51393233125SMichael Holzheu 	struct sk_buff *rep_skb;
51493233125SMichael Holzheu 	size_t size;
51593233125SMichael Holzheu 	u32 pid;
51693233125SMichael Holzheu 	int rc;
51793233125SMichael Holzheu 
5184be2c95dSJeff Mahoney 	size = taskstats_packet_size();
519c757249aSShailabh Nagar 
52037167485SOleg Nesterov 	rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);
521c757249aSShailabh Nagar 	if (rc < 0)
522c757249aSShailabh Nagar 		return rc;
523c757249aSShailabh Nagar 
52451de4d90SOleg Nesterov 	rc = -EINVAL;
52593233125SMichael Holzheu 	pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]);
52651de4d90SOleg Nesterov 	stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid);
52751de4d90SOleg Nesterov 	if (!stats)
52837167485SOleg Nesterov 		goto err;
529c757249aSShailabh Nagar 
5303d9e0cf1SMichael Holzheu 	rc = fill_stats_for_pid(pid, stats);
53151de4d90SOleg Nesterov 	if (rc < 0)
53237167485SOleg Nesterov 		goto err;
53393233125SMichael Holzheu 	return send_reply(rep_skb, info);
53493233125SMichael Holzheu err:
53593233125SMichael Holzheu 	nlmsg_free(rep_skb);
53693233125SMichael Holzheu 	return rc;
53793233125SMichael Holzheu }
53893233125SMichael Holzheu 
53993233125SMichael Holzheu static int cmd_attr_tgid(struct genl_info *info)
54093233125SMichael Holzheu {
54193233125SMichael Holzheu 	struct taskstats *stats;
54293233125SMichael Holzheu 	struct sk_buff *rep_skb;
54393233125SMichael Holzheu 	size_t size;
54493233125SMichael Holzheu 	u32 tgid;
54593233125SMichael Holzheu 	int rc;
54693233125SMichael Holzheu 
5474be2c95dSJeff Mahoney 	size = taskstats_packet_size();
54893233125SMichael Holzheu 
54993233125SMichael Holzheu 	rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);
55093233125SMichael Holzheu 	if (rc < 0)
55193233125SMichael Holzheu 		return rc;
55293233125SMichael Holzheu 
55393233125SMichael Holzheu 	rc = -EINVAL;
55493233125SMichael Holzheu 	tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]);
55551de4d90SOleg Nesterov 	stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid);
55651de4d90SOleg Nesterov 	if (!stats)
55737167485SOleg Nesterov 		goto err;
558c757249aSShailabh Nagar 
5593d9e0cf1SMichael Holzheu 	rc = fill_stats_for_tgid(tgid, stats);
56051de4d90SOleg Nesterov 	if (rc < 0)
56137167485SOleg Nesterov 		goto err;
562134e6375SJohannes Berg 	return send_reply(rep_skb, info);
563c757249aSShailabh Nagar err:
564c757249aSShailabh Nagar 	nlmsg_free(rep_skb);
565c757249aSShailabh Nagar 	return rc;
566c757249aSShailabh Nagar }
567c757249aSShailabh Nagar 
56893233125SMichael Holzheu static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
56993233125SMichael Holzheu {
57093233125SMichael Holzheu 	if (info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK])
57193233125SMichael Holzheu 		return cmd_attr_register_cpumask(info);
57293233125SMichael Holzheu 	else if (info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK])
57393233125SMichael Holzheu 		return cmd_attr_deregister_cpumask(info);
57493233125SMichael Holzheu 	else if (info->attrs[TASKSTATS_CMD_ATTR_PID])
57593233125SMichael Holzheu 		return cmd_attr_pid(info);
57693233125SMichael Holzheu 	else if (info->attrs[TASKSTATS_CMD_ATTR_TGID])
57793233125SMichael Holzheu 		return cmd_attr_tgid(info);
57893233125SMichael Holzheu 	else
57993233125SMichael Holzheu 		return -EINVAL;
58093233125SMichael Holzheu }
58193233125SMichael Holzheu 
58234ec1234SOleg Nesterov static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk)
58334ec1234SOleg Nesterov {
58434ec1234SOleg Nesterov 	struct signal_struct *sig = tsk->signal;
58534ec1234SOleg Nesterov 	struct taskstats *stats;
58634ec1234SOleg Nesterov 
58734ec1234SOleg Nesterov 	if (sig->stats || thread_group_empty(tsk))
58834ec1234SOleg Nesterov 		goto ret;
58934ec1234SOleg Nesterov 
59034ec1234SOleg Nesterov 	/* No problem if kmem_cache_zalloc() fails */
59134ec1234SOleg Nesterov 	stats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL);
59234ec1234SOleg Nesterov 
59334ec1234SOleg Nesterov 	spin_lock_irq(&tsk->sighand->siglock);
59434ec1234SOleg Nesterov 	if (!sig->stats) {
59534ec1234SOleg Nesterov 		sig->stats = stats;
59634ec1234SOleg Nesterov 		stats = NULL;
59734ec1234SOleg Nesterov 	}
59834ec1234SOleg Nesterov 	spin_unlock_irq(&tsk->sighand->siglock);
59934ec1234SOleg Nesterov 
60034ec1234SOleg Nesterov 	if (stats)
60134ec1234SOleg Nesterov 		kmem_cache_free(taskstats_cache, stats);
60234ec1234SOleg Nesterov ret:
60334ec1234SOleg Nesterov 	return sig->stats;
60434ec1234SOleg Nesterov }
60534ec1234SOleg Nesterov 
606c757249aSShailabh Nagar /* Send pid data out on exit */
607115085eaSOleg Nesterov void taskstats_exit(struct task_struct *tsk, int group_dead)
608c757249aSShailabh Nagar {
609c757249aSShailabh Nagar 	int rc;
610115085eaSOleg Nesterov 	struct listener_list *listeners;
61151de4d90SOleg Nesterov 	struct taskstats *stats;
612c757249aSShailabh Nagar 	struct sk_buff *rep_skb;
613c757249aSShailabh Nagar 	size_t size;
614c757249aSShailabh Nagar 	int is_thread_group;
615c757249aSShailabh Nagar 
6164a279ff1SOleg Nesterov 	if (!family_registered)
617c757249aSShailabh Nagar 		return;
618c757249aSShailabh Nagar 
619c757249aSShailabh Nagar 	/*
620c757249aSShailabh Nagar 	 * Size includes space for nested attributes
621c757249aSShailabh Nagar 	 */
6224be2c95dSJeff Mahoney 	size = taskstats_packet_size();
623c757249aSShailabh Nagar 
62434ec1234SOleg Nesterov 	is_thread_group = !!taskstats_tgid_alloc(tsk);
6254a279ff1SOleg Nesterov 	if (is_thread_group) {
6264a279ff1SOleg Nesterov 		/* PID + STATS + TGID + STATS */
6274a279ff1SOleg Nesterov 		size = 2 * size;
6284a279ff1SOleg Nesterov 		/* fill the tsk->signal->stats structure */
6294a279ff1SOleg Nesterov 		fill_tgid_exit(tsk);
6304a279ff1SOleg Nesterov 	}
6314a279ff1SOleg Nesterov 
6324a32fea9SChristoph Lameter 	listeners = raw_cpu_ptr(&listener_array);
633115085eaSOleg Nesterov 	if (list_empty(&listeners->list))
634115085eaSOleg Nesterov 		return;
635115085eaSOleg Nesterov 
63637167485SOleg Nesterov 	rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, size);
637c757249aSShailabh Nagar 	if (rc < 0)
63851de4d90SOleg Nesterov 		return;
639c757249aSShailabh Nagar 
6404bd6e32aSEric W. Biederman 	stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID,
6414bd6e32aSEric W. Biederman 			 task_pid_nr_ns(tsk, &init_pid_ns));
64251de4d90SOleg Nesterov 	if (!stats)
64337167485SOleg Nesterov 		goto err;
64451de4d90SOleg Nesterov 
6454bd6e32aSEric W. Biederman 	fill_stats(&init_user_ns, &init_pid_ns, tsk, stats);
646c757249aSShailabh Nagar 
647c757249aSShailabh Nagar 	/*
648ad4ecbcbSShailabh Nagar 	 * Doesn't matter if tsk is the leader or the last group member leaving
649c757249aSShailabh Nagar 	 */
65068062b86SOleg Nesterov 	if (!is_thread_group || !group_dead)
651ad4ecbcbSShailabh Nagar 		goto send;
652c757249aSShailabh Nagar 
6534bd6e32aSEric W. Biederman 	stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID,
6544bd6e32aSEric W. Biederman 			 task_tgid_nr_ns(tsk, &init_pid_ns));
65551de4d90SOleg Nesterov 	if (!stats)
65637167485SOleg Nesterov 		goto err;
65751de4d90SOleg Nesterov 
65851de4d90SOleg Nesterov 	memcpy(stats, tsk->signal->stats, sizeof(*stats));
659c757249aSShailabh Nagar 
660ad4ecbcbSShailabh Nagar send:
661115085eaSOleg Nesterov 	send_cpu_listeners(rep_skb, listeners);
662ad4ecbcbSShailabh Nagar 	return;
66337167485SOleg Nesterov err:
664c757249aSShailabh Nagar 	nlmsg_free(rep_skb);
665c757249aSShailabh Nagar }
666c757249aSShailabh Nagar 
6674534de83SJohannes Berg static const struct genl_ops taskstats_ops[] = {
66888d36a99SJohannes Berg 	{
669c757249aSShailabh Nagar 		.cmd		= TASKSTATS_CMD_GET,
670f9fd8914SShailabh Nagar 		.doit		= taskstats_user_cmd,
671c757249aSShailabh Nagar 		.policy		= taskstats_cmd_get_policy,
6721a51410aSLinus Torvalds 		.flags		= GENL_ADMIN_PERM,
67388d36a99SJohannes Berg 	},
67488d36a99SJohannes Berg 	{
675846c7bb0SBalbir Singh 		.cmd		= CGROUPSTATS_CMD_GET,
676846c7bb0SBalbir Singh 		.doit		= cgroupstats_user_cmd,
677846c7bb0SBalbir Singh 		.policy		= cgroupstats_cmd_get_policy,
67888d36a99SJohannes Berg 	},
679846c7bb0SBalbir Singh };
680846c7bb0SBalbir Singh 
681c757249aSShailabh Nagar /* Needed early in initialization */
682c757249aSShailabh Nagar void __init taskstats_init_early(void)
683c757249aSShailabh Nagar {
684f9fd8914SShailabh Nagar 	unsigned int i;
685f9fd8914SShailabh Nagar 
6860a31bd5fSChristoph Lameter 	taskstats_cache = KMEM_CACHE(taskstats, SLAB_PANIC);
687f9fd8914SShailabh Nagar 	for_each_possible_cpu(i) {
688f9fd8914SShailabh Nagar 		INIT_LIST_HEAD(&(per_cpu(listener_array, i).list));
689f9fd8914SShailabh Nagar 		init_rwsem(&(per_cpu(listener_array, i).sem));
690f9fd8914SShailabh Nagar 	}
691c757249aSShailabh Nagar }
692c757249aSShailabh Nagar 
693c757249aSShailabh Nagar static int __init taskstats_init(void)
694c757249aSShailabh Nagar {
695c757249aSShailabh Nagar 	int rc;
696c757249aSShailabh Nagar 
697c53ed742SJohannes Berg 	rc = genl_register_family_with_ops(&family, taskstats_ops);
698c757249aSShailabh Nagar 	if (rc)
699c757249aSShailabh Nagar 		return rc;
700c757249aSShailabh Nagar 
701c757249aSShailabh Nagar 	family_registered = 1;
702f9b182e2SMandeep Singh Baines 	pr_info("registered taskstats version %d\n", TASKSTATS_GENL_VERSION);
703c757249aSShailabh Nagar 	return 0;
704c757249aSShailabh Nagar }
705c757249aSShailabh Nagar 
706c757249aSShailabh Nagar /*
707c757249aSShailabh Nagar  * late initcall ensures initialization of statistics collection
708c757249aSShailabh Nagar  * mechanisms precedes initialization of the taskstats interface
709c757249aSShailabh Nagar  */
710c757249aSShailabh Nagar late_initcall(taskstats_init);
711