1c757249aSShailabh Nagar /* 2c757249aSShailabh Nagar * taskstats.c - Export per-task statistics to userland 3c757249aSShailabh Nagar * 4c757249aSShailabh Nagar * Copyright (C) Shailabh Nagar, IBM Corp. 2006 5c757249aSShailabh Nagar * (C) Balbir Singh, IBM Corp. 2006 6c757249aSShailabh Nagar * 7c757249aSShailabh Nagar * This program is free software; you can redistribute it and/or modify 8c757249aSShailabh Nagar * it under the terms of the GNU General Public License as published by 9c757249aSShailabh Nagar * the Free Software Foundation; either version 2 of the License, or 10c757249aSShailabh Nagar * (at your option) any later version. 11c757249aSShailabh Nagar * 12c757249aSShailabh Nagar * This program is distributed in the hope that it will be useful, 13c757249aSShailabh Nagar * but WITHOUT ANY WARRANTY; without even the implied warranty of 14c757249aSShailabh Nagar * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15c757249aSShailabh Nagar * GNU General Public License for more details. 16c757249aSShailabh Nagar * 17c757249aSShailabh Nagar */ 18c757249aSShailabh Nagar 19c757249aSShailabh Nagar #include <linux/kernel.h> 20c757249aSShailabh Nagar #include <linux/taskstats_kern.h> 21f3cef7a9SJay Lan #include <linux/tsacct_kern.h> 226f44993fSShailabh Nagar #include <linux/delayacct.h> 23f9fd8914SShailabh Nagar #include <linux/cpumask.h> 24f9fd8914SShailabh Nagar #include <linux/percpu.h> 255a0e3ad6STejun Heo #include <linux/slab.h> 26846c7bb0SBalbir Singh #include <linux/cgroupstats.h> 27846c7bb0SBalbir Singh #include <linux/cgroup.h> 28846c7bb0SBalbir Singh #include <linux/fs.h> 29846c7bb0SBalbir Singh #include <linux/file.h> 304bd6e32aSEric W. Biederman #include <linux/pid_namespace.h> 31c757249aSShailabh Nagar #include <net/genetlink.h> 3260063497SArun Sharma #include <linux/atomic.h> 33c757249aSShailabh Nagar 34f9fd8914SShailabh Nagar /* 35f9fd8914SShailabh Nagar * Maximum length of a cpumask that can be specified in 36f9fd8914SShailabh Nagar * the TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK attribute 37f9fd8914SShailabh Nagar */ 38f9fd8914SShailabh Nagar #define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS) 39f9fd8914SShailabh Nagar 40b81f3ea9SVegard Nossum static DEFINE_PER_CPU(__u32, taskstats_seqnum); 41c757249aSShailabh Nagar static int family_registered; 42e18b890bSChristoph Lameter struct kmem_cache *taskstats_cache; 43c757249aSShailabh Nagar 44c757249aSShailabh Nagar static struct genl_family family = { 45c757249aSShailabh Nagar .id = GENL_ID_GENERATE, 46c757249aSShailabh Nagar .name = TASKSTATS_GENL_NAME, 47c757249aSShailabh Nagar .version = TASKSTATS_GENL_VERSION, 48c757249aSShailabh Nagar .maxattr = TASKSTATS_CMD_ATTR_MAX, 49c757249aSShailabh Nagar }; 50c757249aSShailabh Nagar 51b54452b0SAlexey Dobriyan static const struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] = { 52c757249aSShailabh Nagar [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, 53c757249aSShailabh Nagar [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, 54f9fd8914SShailabh Nagar [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, 55f9fd8914SShailabh Nagar [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; 56f9fd8914SShailabh Nagar 57b54452b0SAlexey Dobriyan static const struct nla_policy cgroupstats_cmd_get_policy[CGROUPSTATS_CMD_ATTR_MAX+1] = { 58846c7bb0SBalbir Singh [CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 }, 59846c7bb0SBalbir Singh }; 60846c7bb0SBalbir Singh 61f9fd8914SShailabh Nagar struct listener { 62f9fd8914SShailabh Nagar struct list_head list; 63f9fd8914SShailabh Nagar pid_t pid; 64bb129994SShailabh Nagar char valid; 65c757249aSShailabh Nagar }; 66c757249aSShailabh Nagar 67f9fd8914SShailabh Nagar struct listener_list { 68f9fd8914SShailabh Nagar struct rw_semaphore sem; 69f9fd8914SShailabh Nagar struct list_head list; 70f9fd8914SShailabh Nagar }; 71f9fd8914SShailabh Nagar static DEFINE_PER_CPU(struct listener_list, listener_array); 72f9fd8914SShailabh Nagar 73f9fd8914SShailabh Nagar enum actions { 74f9fd8914SShailabh Nagar REGISTER, 75f9fd8914SShailabh Nagar DEREGISTER, 76f9fd8914SShailabh Nagar CPU_DONT_CARE 77f9fd8914SShailabh Nagar }; 78c757249aSShailabh Nagar 79c757249aSShailabh Nagar static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, 8037167485SOleg Nesterov size_t size) 81c757249aSShailabh Nagar { 82c757249aSShailabh Nagar struct sk_buff *skb; 83c757249aSShailabh Nagar void *reply; 84c757249aSShailabh Nagar 85c757249aSShailabh Nagar /* 86c757249aSShailabh Nagar * If new attributes are added, please revisit this allocation 87c757249aSShailabh Nagar */ 883dabc715SThomas Graf skb = genlmsg_new(size, GFP_KERNEL); 89c757249aSShailabh Nagar if (!skb) 90c757249aSShailabh Nagar return -ENOMEM; 91c757249aSShailabh Nagar 92c757249aSShailabh Nagar if (!info) { 93cd85fc58SChristoph Lameter int seq = this_cpu_inc_return(taskstats_seqnum) - 1; 94c757249aSShailabh Nagar 9517c157c8SThomas Graf reply = genlmsg_put(skb, 0, seq, &family, 0, cmd); 96c757249aSShailabh Nagar } else 9717c157c8SThomas Graf reply = genlmsg_put_reply(skb, info, &family, 0, cmd); 98c757249aSShailabh Nagar if (reply == NULL) { 99c757249aSShailabh Nagar nlmsg_free(skb); 100c757249aSShailabh Nagar return -EINVAL; 101c757249aSShailabh Nagar } 102c757249aSShailabh Nagar 103c757249aSShailabh Nagar *skbp = skb; 104c757249aSShailabh Nagar return 0; 105c757249aSShailabh Nagar } 106c757249aSShailabh Nagar 107f9fd8914SShailabh Nagar /* 108f9fd8914SShailabh Nagar * Send taskstats data in @skb to listener with nl_pid @pid 109f9fd8914SShailabh Nagar */ 110134e6375SJohannes Berg static int send_reply(struct sk_buff *skb, struct genl_info *info) 111c757249aSShailabh Nagar { 112b529ccf2SArnaldo Carvalho de Melo struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); 113f9fd8914SShailabh Nagar void *reply = genlmsg_data(genlhdr); 114c757249aSShailabh Nagar 115*053c095aSJohannes Berg genlmsg_end(skb, reply); 116c757249aSShailabh Nagar 117134e6375SJohannes Berg return genlmsg_reply(skb, info); 118c757249aSShailabh Nagar } 119c757249aSShailabh Nagar 120f9fd8914SShailabh Nagar /* 121f9fd8914SShailabh Nagar * Send taskstats data in @skb to listeners registered for @cpu's exit data 122f9fd8914SShailabh Nagar */ 123115085eaSOleg Nesterov static void send_cpu_listeners(struct sk_buff *skb, 124115085eaSOleg Nesterov struct listener_list *listeners) 125f9fd8914SShailabh Nagar { 126b529ccf2SArnaldo Carvalho de Melo struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); 127f9fd8914SShailabh Nagar struct listener *s, *tmp; 128f9fd8914SShailabh Nagar struct sk_buff *skb_next, *skb_cur = skb; 129f9fd8914SShailabh Nagar void *reply = genlmsg_data(genlhdr); 130d94a0415SShailabh Nagar int rc, delcount = 0; 131f9fd8914SShailabh Nagar 132*053c095aSJohannes Berg genlmsg_end(skb, reply); 133f9fd8914SShailabh Nagar 134f9fd8914SShailabh Nagar rc = 0; 135bb129994SShailabh Nagar down_read(&listeners->sem); 136d94a0415SShailabh Nagar list_for_each_entry(s, &listeners->list, list) { 137f9fd8914SShailabh Nagar skb_next = NULL; 138f9fd8914SShailabh Nagar if (!list_is_last(&s->list, &listeners->list)) { 139f9fd8914SShailabh Nagar skb_next = skb_clone(skb_cur, GFP_KERNEL); 140d94a0415SShailabh Nagar if (!skb_next) 141f9fd8914SShailabh Nagar break; 142f9fd8914SShailabh Nagar } 143134e6375SJohannes Berg rc = genlmsg_unicast(&init_net, skb_cur, s->pid); 144d94a0415SShailabh Nagar if (rc == -ECONNREFUSED) { 145bb129994SShailabh Nagar s->valid = 0; 146bb129994SShailabh Nagar delcount++; 147f9fd8914SShailabh Nagar } 148f9fd8914SShailabh Nagar skb_cur = skb_next; 149f9fd8914SShailabh Nagar } 150bb129994SShailabh Nagar up_read(&listeners->sem); 151f9fd8914SShailabh Nagar 152d94a0415SShailabh Nagar if (skb_cur) 153d94a0415SShailabh Nagar nlmsg_free(skb_cur); 154d94a0415SShailabh Nagar 155bb129994SShailabh Nagar if (!delcount) 156d94a0415SShailabh Nagar return; 157bb129994SShailabh Nagar 158bb129994SShailabh Nagar /* Delete invalidated entries */ 159bb129994SShailabh Nagar down_write(&listeners->sem); 160bb129994SShailabh Nagar list_for_each_entry_safe(s, tmp, &listeners->list, list) { 161bb129994SShailabh Nagar if (!s->valid) { 162bb129994SShailabh Nagar list_del(&s->list); 163bb129994SShailabh Nagar kfree(s); 164bb129994SShailabh Nagar } 165bb129994SShailabh Nagar } 166bb129994SShailabh Nagar up_write(&listeners->sem); 167f9fd8914SShailabh Nagar } 168f9fd8914SShailabh Nagar 1694bd6e32aSEric W. Biederman static void fill_stats(struct user_namespace *user_ns, 1704bd6e32aSEric W. Biederman struct pid_namespace *pid_ns, 1714bd6e32aSEric W. Biederman struct task_struct *tsk, struct taskstats *stats) 172c757249aSShailabh Nagar { 17351de4d90SOleg Nesterov memset(stats, 0, sizeof(*stats)); 174c757249aSShailabh Nagar /* 175c757249aSShailabh Nagar * Each accounting subsystem adds calls to its functions to 176c757249aSShailabh Nagar * fill in relevant parts of struct taskstsats as follows 177c757249aSShailabh Nagar * 1787d94ddddSShailabh Nagar * per-task-foo(stats, tsk); 179c757249aSShailabh Nagar */ 180c757249aSShailabh Nagar 1817d94ddddSShailabh Nagar delayacct_add_tsk(stats, tsk); 182f3cef7a9SJay Lan 183f3cef7a9SJay Lan /* fill in basic acct fields */ 1846f44993fSShailabh Nagar stats->version = TASKSTATS_VERSION; 185b663a79cSMaxim Uvarov stats->nvcsw = tsk->nvcsw; 186b663a79cSMaxim Uvarov stats->nivcsw = tsk->nivcsw; 1874bd6e32aSEric W. Biederman bacct_add_tsk(user_ns, pid_ns, stats, tsk); 1886f44993fSShailabh Nagar 1899acc1853SJay Lan /* fill in extended acct fields */ 1909acc1853SJay Lan xacct_add_tsk(stats, tsk); 191c757249aSShailabh Nagar } 192c757249aSShailabh Nagar 1933d9e0cf1SMichael Holzheu static int fill_stats_for_pid(pid_t pid, struct taskstats *stats) 194c757249aSShailabh Nagar { 195a98b6094SOleg Nesterov struct task_struct *tsk; 1963d9e0cf1SMichael Holzheu 1973d9e0cf1SMichael Holzheu rcu_read_lock(); 1983d9e0cf1SMichael Holzheu tsk = find_task_by_vpid(pid); 1993d9e0cf1SMichael Holzheu if (tsk) 2003d9e0cf1SMichael Holzheu get_task_struct(tsk); 2013d9e0cf1SMichael Holzheu rcu_read_unlock(); 2023d9e0cf1SMichael Holzheu if (!tsk) 2033d9e0cf1SMichael Holzheu return -ESRCH; 2044bd6e32aSEric W. Biederman fill_stats(current_user_ns(), task_active_pid_ns(current), tsk, stats); 2053d9e0cf1SMichael Holzheu put_task_struct(tsk); 2063d9e0cf1SMichael Holzheu return 0; 2073d9e0cf1SMichael Holzheu } 2083d9e0cf1SMichael Holzheu 2093d9e0cf1SMichael Holzheu static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats) 2103d9e0cf1SMichael Holzheu { 2113d9e0cf1SMichael Holzheu struct task_struct *tsk, *first; 212ad4ecbcbSShailabh Nagar unsigned long flags; 213a98b6094SOleg Nesterov int rc = -ESRCH; 214c757249aSShailabh Nagar 215ad4ecbcbSShailabh Nagar /* 216ad4ecbcbSShailabh Nagar * Add additional stats from live tasks except zombie thread group 217ad4ecbcbSShailabh Nagar * leaders who are already counted with the dead tasks 218ad4ecbcbSShailabh Nagar */ 219a98b6094SOleg Nesterov rcu_read_lock(); 220cb41d6d0SPavel Emelyanov first = find_task_by_vpid(tgid); 221ad4ecbcbSShailabh Nagar 222a98b6094SOleg Nesterov if (!first || !lock_task_sighand(first, &flags)) 223a98b6094SOleg Nesterov goto out; 224fca178c0SOleg Nesterov 225ad4ecbcbSShailabh Nagar if (first->signal->stats) 226ad4ecbcbSShailabh Nagar memcpy(stats, first->signal->stats, sizeof(*stats)); 22751de4d90SOleg Nesterov else 22851de4d90SOleg Nesterov memset(stats, 0, sizeof(*stats)); 229ad4ecbcbSShailabh Nagar 230a98b6094SOleg Nesterov tsk = first; 231c757249aSShailabh Nagar do { 232d7c3f5f2SOleg Nesterov if (tsk->exit_state) 233ad4ecbcbSShailabh Nagar continue; 234c757249aSShailabh Nagar /* 235ad4ecbcbSShailabh Nagar * Accounting subsystem can call its functions here to 236c757249aSShailabh Nagar * fill in relevant parts of struct taskstsats as follows 237c757249aSShailabh Nagar * 238ad4ecbcbSShailabh Nagar * per-task-foo(stats, tsk); 239c757249aSShailabh Nagar */ 240ad4ecbcbSShailabh Nagar delayacct_add_tsk(stats, tsk); 2416f44993fSShailabh Nagar 242b663a79cSMaxim Uvarov stats->nvcsw += tsk->nvcsw; 243b663a79cSMaxim Uvarov stats->nivcsw += tsk->nivcsw; 244c757249aSShailabh Nagar } while_each_thread(first, tsk); 2456f44993fSShailabh Nagar 246a98b6094SOleg Nesterov unlock_task_sighand(first, &flags); 247a98b6094SOleg Nesterov rc = 0; 248a98b6094SOleg Nesterov out: 249a98b6094SOleg Nesterov rcu_read_unlock(); 250a98b6094SOleg Nesterov 251a98b6094SOleg Nesterov stats->version = TASKSTATS_VERSION; 252c757249aSShailabh Nagar /* 2533a4fa0a2SRobert P. J. Day * Accounting subsystems can also add calls here to modify 254ad4ecbcbSShailabh Nagar * fields of taskstats. 255c757249aSShailabh Nagar */ 256a98b6094SOleg Nesterov return rc; 257c757249aSShailabh Nagar } 258c757249aSShailabh Nagar 259ad4ecbcbSShailabh Nagar static void fill_tgid_exit(struct task_struct *tsk) 260ad4ecbcbSShailabh Nagar { 261ad4ecbcbSShailabh Nagar unsigned long flags; 262ad4ecbcbSShailabh Nagar 263b8534d7bSOleg Nesterov spin_lock_irqsave(&tsk->sighand->siglock, flags); 264ad4ecbcbSShailabh Nagar if (!tsk->signal->stats) 265ad4ecbcbSShailabh Nagar goto ret; 266ad4ecbcbSShailabh Nagar 267ad4ecbcbSShailabh Nagar /* 268ad4ecbcbSShailabh Nagar * Each accounting subsystem calls its functions here to 269ad4ecbcbSShailabh Nagar * accumalate its per-task stats for tsk, into the per-tgid structure 270ad4ecbcbSShailabh Nagar * 271ad4ecbcbSShailabh Nagar * per-task-foo(tsk->signal->stats, tsk); 272ad4ecbcbSShailabh Nagar */ 273ad4ecbcbSShailabh Nagar delayacct_add_tsk(tsk->signal->stats, tsk); 274ad4ecbcbSShailabh Nagar ret: 275b8534d7bSOleg Nesterov spin_unlock_irqrestore(&tsk->sighand->siglock, flags); 276ad4ecbcbSShailabh Nagar return; 277ad4ecbcbSShailabh Nagar } 278ad4ecbcbSShailabh Nagar 27941c7bb95SRusty Russell static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd) 280f9fd8914SShailabh Nagar { 281f9fd8914SShailabh Nagar struct listener_list *listeners; 28226c4caeaSVasiliy Kulikov struct listener *s, *tmp, *s2; 283f9fd8914SShailabh Nagar unsigned int cpu; 2840d20633bSChen Gang int ret = 0; 285ad4ecbcbSShailabh Nagar 28641c7bb95SRusty Russell if (!cpumask_subset(mask, cpu_possible_mask)) 287f9fd8914SShailabh Nagar return -EINVAL; 288f9fd8914SShailabh Nagar 2894bd6e32aSEric W. Biederman if (current_user_ns() != &init_user_ns) 2904bd6e32aSEric W. Biederman return -EINVAL; 2914bd6e32aSEric W. Biederman 2924bd6e32aSEric W. Biederman if (task_active_pid_ns(current) != &init_pid_ns) 2934bd6e32aSEric W. Biederman return -EINVAL; 2944bd6e32aSEric W. Biederman 295f9fd8914SShailabh Nagar if (isadd == REGISTER) { 29641c7bb95SRusty Russell for_each_cpu(cpu, mask) { 29726c4caeaSVasiliy Kulikov s = kmalloc_node(sizeof(struct listener), 29826c4caeaSVasiliy Kulikov GFP_KERNEL, cpu_to_node(cpu)); 2990d20633bSChen Gang if (!s) { 3000d20633bSChen Gang ret = -ENOMEM; 301f9fd8914SShailabh Nagar goto cleanup; 3020d20633bSChen Gang } 303f9fd8914SShailabh Nagar s->pid = pid; 304bb129994SShailabh Nagar s->valid = 1; 305f9fd8914SShailabh Nagar 306f9fd8914SShailabh Nagar listeners = &per_cpu(listener_array, cpu); 307f9fd8914SShailabh Nagar down_write(&listeners->sem); 308dfc428b6SOleg Nesterov list_for_each_entry(s2, &listeners->list, list) { 309a7295898SOleg Nesterov if (s2->pid == pid && s2->valid) 310dfc428b6SOleg Nesterov goto exists; 31126c4caeaSVasiliy Kulikov } 312f9fd8914SShailabh Nagar list_add(&s->list, &listeners->list); 31326c4caeaSVasiliy Kulikov s = NULL; 314dfc428b6SOleg Nesterov exists: 315f9fd8914SShailabh Nagar up_write(&listeners->sem); 316dfc428b6SOleg Nesterov kfree(s); /* nop if NULL */ 317f9fd8914SShailabh Nagar } 318f9fd8914SShailabh Nagar return 0; 319f9fd8914SShailabh Nagar } 320f9fd8914SShailabh Nagar 321f9fd8914SShailabh Nagar /* Deregister or cleanup */ 322f9fd8914SShailabh Nagar cleanup: 32341c7bb95SRusty Russell for_each_cpu(cpu, mask) { 324f9fd8914SShailabh Nagar listeners = &per_cpu(listener_array, cpu); 325f9fd8914SShailabh Nagar down_write(&listeners->sem); 326f9fd8914SShailabh Nagar list_for_each_entry_safe(s, tmp, &listeners->list, list) { 327f9fd8914SShailabh Nagar if (s->pid == pid) { 328f9fd8914SShailabh Nagar list_del(&s->list); 329f9fd8914SShailabh Nagar kfree(s); 330f9fd8914SShailabh Nagar break; 331f9fd8914SShailabh Nagar } 332f9fd8914SShailabh Nagar } 333f9fd8914SShailabh Nagar up_write(&listeners->sem); 334f9fd8914SShailabh Nagar } 3350d20633bSChen Gang return ret; 336f9fd8914SShailabh Nagar } 337f9fd8914SShailabh Nagar 33841c7bb95SRusty Russell static int parse(struct nlattr *na, struct cpumask *mask) 339f9fd8914SShailabh Nagar { 340f9fd8914SShailabh Nagar char *data; 341f9fd8914SShailabh Nagar int len; 342f9fd8914SShailabh Nagar int ret; 343f9fd8914SShailabh Nagar 344f9fd8914SShailabh Nagar if (na == NULL) 345f9fd8914SShailabh Nagar return 1; 346f9fd8914SShailabh Nagar len = nla_len(na); 347f9fd8914SShailabh Nagar if (len > TASKSTATS_CPUMASK_MAXLEN) 348f9fd8914SShailabh Nagar return -E2BIG; 349f9fd8914SShailabh Nagar if (len < 1) 350f9fd8914SShailabh Nagar return -EINVAL; 351f9fd8914SShailabh Nagar data = kmalloc(len, GFP_KERNEL); 352f9fd8914SShailabh Nagar if (!data) 353f9fd8914SShailabh Nagar return -ENOMEM; 354f9fd8914SShailabh Nagar nla_strlcpy(data, na, len); 35529c0177eSRusty Russell ret = cpulist_parse(data, mask); 356f9fd8914SShailabh Nagar kfree(data); 357f9fd8914SShailabh Nagar return ret; 358f9fd8914SShailabh Nagar } 359f9fd8914SShailabh Nagar 3609ab020cfSJeff Mahoney #if defined(CONFIG_64BIT) && !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) 3614be2c95dSJeff Mahoney #define TASKSTATS_NEEDS_PADDING 1 3624be2c95dSJeff Mahoney #endif 3634be2c95dSJeff Mahoney 36451de4d90SOleg Nesterov static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) 36568062b86SOleg Nesterov { 36651de4d90SOleg Nesterov struct nlattr *na, *ret; 36768062b86SOleg Nesterov int aggr; 36868062b86SOleg Nesterov 36937167485SOleg Nesterov aggr = (type == TASKSTATS_TYPE_PID) 37037167485SOleg Nesterov ? TASKSTATS_TYPE_AGGR_PID 37137167485SOleg Nesterov : TASKSTATS_TYPE_AGGR_TGID; 37268062b86SOleg Nesterov 3734be2c95dSJeff Mahoney /* 3744be2c95dSJeff Mahoney * The taskstats structure is internally aligned on 8 byte 3754be2c95dSJeff Mahoney * boundaries but the layout of the aggregrate reply, with 3764be2c95dSJeff Mahoney * two NLA headers and the pid (each 4 bytes), actually 3774be2c95dSJeff Mahoney * force the entire structure to be unaligned. This causes 3784be2c95dSJeff Mahoney * the kernel to issue unaligned access warnings on some 3794be2c95dSJeff Mahoney * architectures like ia64. Unfortunately, some software out there 3804be2c95dSJeff Mahoney * doesn't properly unroll the NLA packet and assumes that the start 3814be2c95dSJeff Mahoney * of the taskstats structure will always be 20 bytes from the start 3824be2c95dSJeff Mahoney * of the netlink payload. Aligning the start of the taskstats 3834be2c95dSJeff Mahoney * structure breaks this software, which we don't want. So, for now 3844be2c95dSJeff Mahoney * the alignment only happens on architectures that require it 3854be2c95dSJeff Mahoney * and those users will have to update to fixed versions of those 3864be2c95dSJeff Mahoney * packages. Space is reserved in the packet only when needed. 3874be2c95dSJeff Mahoney * This ifdef should be removed in several years e.g. 2012 once 3884be2c95dSJeff Mahoney * we can be confident that fixed versions are installed on most 3894be2c95dSJeff Mahoney * systems. We add the padding before the aggregate since the 3904be2c95dSJeff Mahoney * aggregate is already a defined type. 3914be2c95dSJeff Mahoney */ 3924be2c95dSJeff Mahoney #ifdef TASKSTATS_NEEDS_PADDING 3934be2c95dSJeff Mahoney if (nla_put(skb, TASKSTATS_TYPE_NULL, 0, NULL) < 0) 3944be2c95dSJeff Mahoney goto err; 3954be2c95dSJeff Mahoney #endif 39668062b86SOleg Nesterov na = nla_nest_start(skb, aggr); 39737167485SOleg Nesterov if (!na) 39837167485SOleg Nesterov goto err; 3994be2c95dSJeff Mahoney 4003fa58266SChen Gang if (nla_put(skb, type, sizeof(pid), &pid) < 0) { 4013fa58266SChen Gang nla_nest_cancel(skb, na); 40251de4d90SOleg Nesterov goto err; 4033fa58266SChen Gang } 40451de4d90SOleg Nesterov ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); 4053fa58266SChen Gang if (!ret) { 4063fa58266SChen Gang nla_nest_cancel(skb, na); 40751de4d90SOleg Nesterov goto err; 4083fa58266SChen Gang } 40968062b86SOleg Nesterov nla_nest_end(skb, na); 41068062b86SOleg Nesterov 41151de4d90SOleg Nesterov return nla_data(ret); 41251de4d90SOleg Nesterov err: 41351de4d90SOleg Nesterov return NULL; 41468062b86SOleg Nesterov } 41568062b86SOleg Nesterov 416846c7bb0SBalbir Singh static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) 417846c7bb0SBalbir Singh { 418846c7bb0SBalbir Singh int rc = 0; 419846c7bb0SBalbir Singh struct sk_buff *rep_skb; 420846c7bb0SBalbir Singh struct cgroupstats *stats; 421846c7bb0SBalbir Singh struct nlattr *na; 422846c7bb0SBalbir Singh size_t size; 423846c7bb0SBalbir Singh u32 fd; 4242903ff01SAl Viro struct fd f; 425846c7bb0SBalbir Singh 426846c7bb0SBalbir Singh na = info->attrs[CGROUPSTATS_CMD_ATTR_FD]; 427846c7bb0SBalbir Singh if (!na) 428846c7bb0SBalbir Singh return -EINVAL; 429846c7bb0SBalbir Singh 430846c7bb0SBalbir Singh fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]); 4312903ff01SAl Viro f = fdget(fd); 4322903ff01SAl Viro if (!f.file) 433f9615984SAdrian Bunk return 0; 434f9615984SAdrian Bunk 435846c7bb0SBalbir Singh size = nla_total_size(sizeof(struct cgroupstats)); 436846c7bb0SBalbir Singh 437846c7bb0SBalbir Singh rc = prepare_reply(info, CGROUPSTATS_CMD_NEW, &rep_skb, 438846c7bb0SBalbir Singh size); 439846c7bb0SBalbir Singh if (rc < 0) 440846c7bb0SBalbir Singh goto err; 441846c7bb0SBalbir Singh 442846c7bb0SBalbir Singh na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS, 443846c7bb0SBalbir Singh sizeof(struct cgroupstats)); 44425353b33SAlan Cox if (na == NULL) { 4450324b5a4SJesper Juhl nlmsg_free(rep_skb); 44625353b33SAlan Cox rc = -EMSGSIZE; 44725353b33SAlan Cox goto err; 44825353b33SAlan Cox } 44925353b33SAlan Cox 450846c7bb0SBalbir Singh stats = nla_data(na); 451846c7bb0SBalbir Singh memset(stats, 0, sizeof(*stats)); 452846c7bb0SBalbir Singh 453b583043eSAl Viro rc = cgroupstats_build(stats, f.file->f_path.dentry); 454f9615984SAdrian Bunk if (rc < 0) { 455f9615984SAdrian Bunk nlmsg_free(rep_skb); 456846c7bb0SBalbir Singh goto err; 457846c7bb0SBalbir Singh } 458846c7bb0SBalbir Singh 459134e6375SJohannes Berg rc = send_reply(rep_skb, info); 460f9615984SAdrian Bunk 461846c7bb0SBalbir Singh err: 4622903ff01SAl Viro fdput(f); 463846c7bb0SBalbir Singh return rc; 464846c7bb0SBalbir Singh } 465846c7bb0SBalbir Singh 46693233125SMichael Holzheu static int cmd_attr_register_cpumask(struct genl_info *info) 467c757249aSShailabh Nagar { 46841c7bb95SRusty Russell cpumask_var_t mask; 46993233125SMichael Holzheu int rc; 470f9fd8914SShailabh Nagar 47141c7bb95SRusty Russell if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 47241c7bb95SRusty Russell return -ENOMEM; 47341c7bb95SRusty Russell rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); 474f9fd8914SShailabh Nagar if (rc < 0) 47593233125SMichael Holzheu goto out; 47615e47304SEric W. Biederman rc = add_del_listener(info->snd_portid, mask, REGISTER); 47793233125SMichael Holzheu out: 47841c7bb95SRusty Russell free_cpumask_var(mask); 479f9fd8914SShailabh Nagar return rc; 48041c7bb95SRusty Russell } 481c757249aSShailabh Nagar 48293233125SMichael Holzheu static int cmd_attr_deregister_cpumask(struct genl_info *info) 48393233125SMichael Holzheu { 48493233125SMichael Holzheu cpumask_var_t mask; 48593233125SMichael Holzheu int rc; 48693233125SMichael Holzheu 48793233125SMichael Holzheu if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 48893233125SMichael Holzheu return -ENOMEM; 48993233125SMichael Holzheu rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); 49093233125SMichael Holzheu if (rc < 0) 49193233125SMichael Holzheu goto out; 49215e47304SEric W. Biederman rc = add_del_listener(info->snd_portid, mask, DEREGISTER); 49393233125SMichael Holzheu out: 49493233125SMichael Holzheu free_cpumask_var(mask); 49593233125SMichael Holzheu return rc; 49693233125SMichael Holzheu } 49793233125SMichael Holzheu 4984be2c95dSJeff Mahoney static size_t taskstats_packet_size(void) 4994be2c95dSJeff Mahoney { 5004be2c95dSJeff Mahoney size_t size; 5014be2c95dSJeff Mahoney 5024be2c95dSJeff Mahoney size = nla_total_size(sizeof(u32)) + 5034be2c95dSJeff Mahoney nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); 5044be2c95dSJeff Mahoney #ifdef TASKSTATS_NEEDS_PADDING 5054be2c95dSJeff Mahoney size += nla_total_size(0); /* Padding for alignment */ 5064be2c95dSJeff Mahoney #endif 5074be2c95dSJeff Mahoney return size; 5084be2c95dSJeff Mahoney } 5094be2c95dSJeff Mahoney 51093233125SMichael Holzheu static int cmd_attr_pid(struct genl_info *info) 51193233125SMichael Holzheu { 51293233125SMichael Holzheu struct taskstats *stats; 51393233125SMichael Holzheu struct sk_buff *rep_skb; 51493233125SMichael Holzheu size_t size; 51593233125SMichael Holzheu u32 pid; 51693233125SMichael Holzheu int rc; 51793233125SMichael Holzheu 5184be2c95dSJeff Mahoney size = taskstats_packet_size(); 519c757249aSShailabh Nagar 52037167485SOleg Nesterov rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); 521c757249aSShailabh Nagar if (rc < 0) 522c757249aSShailabh Nagar return rc; 523c757249aSShailabh Nagar 52451de4d90SOleg Nesterov rc = -EINVAL; 52593233125SMichael Holzheu pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); 52651de4d90SOleg Nesterov stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); 52751de4d90SOleg Nesterov if (!stats) 52837167485SOleg Nesterov goto err; 529c757249aSShailabh Nagar 5303d9e0cf1SMichael Holzheu rc = fill_stats_for_pid(pid, stats); 53151de4d90SOleg Nesterov if (rc < 0) 53237167485SOleg Nesterov goto err; 53393233125SMichael Holzheu return send_reply(rep_skb, info); 53493233125SMichael Holzheu err: 53593233125SMichael Holzheu nlmsg_free(rep_skb); 53693233125SMichael Holzheu return rc; 53793233125SMichael Holzheu } 53893233125SMichael Holzheu 53993233125SMichael Holzheu static int cmd_attr_tgid(struct genl_info *info) 54093233125SMichael Holzheu { 54193233125SMichael Holzheu struct taskstats *stats; 54293233125SMichael Holzheu struct sk_buff *rep_skb; 54393233125SMichael Holzheu size_t size; 54493233125SMichael Holzheu u32 tgid; 54593233125SMichael Holzheu int rc; 54693233125SMichael Holzheu 5474be2c95dSJeff Mahoney size = taskstats_packet_size(); 54893233125SMichael Holzheu 54993233125SMichael Holzheu rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); 55093233125SMichael Holzheu if (rc < 0) 55193233125SMichael Holzheu return rc; 55293233125SMichael Holzheu 55393233125SMichael Holzheu rc = -EINVAL; 55493233125SMichael Holzheu tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); 55551de4d90SOleg Nesterov stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); 55651de4d90SOleg Nesterov if (!stats) 55737167485SOleg Nesterov goto err; 558c757249aSShailabh Nagar 5593d9e0cf1SMichael Holzheu rc = fill_stats_for_tgid(tgid, stats); 56051de4d90SOleg Nesterov if (rc < 0) 56137167485SOleg Nesterov goto err; 562134e6375SJohannes Berg return send_reply(rep_skb, info); 563c757249aSShailabh Nagar err: 564c757249aSShailabh Nagar nlmsg_free(rep_skb); 565c757249aSShailabh Nagar return rc; 566c757249aSShailabh Nagar } 567c757249aSShailabh Nagar 56893233125SMichael Holzheu static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) 56993233125SMichael Holzheu { 57093233125SMichael Holzheu if (info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK]) 57193233125SMichael Holzheu return cmd_attr_register_cpumask(info); 57293233125SMichael Holzheu else if (info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK]) 57393233125SMichael Holzheu return cmd_attr_deregister_cpumask(info); 57493233125SMichael Holzheu else if (info->attrs[TASKSTATS_CMD_ATTR_PID]) 57593233125SMichael Holzheu return cmd_attr_pid(info); 57693233125SMichael Holzheu else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) 57793233125SMichael Holzheu return cmd_attr_tgid(info); 57893233125SMichael Holzheu else 57993233125SMichael Holzheu return -EINVAL; 58093233125SMichael Holzheu } 58193233125SMichael Holzheu 58234ec1234SOleg Nesterov static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) 58334ec1234SOleg Nesterov { 58434ec1234SOleg Nesterov struct signal_struct *sig = tsk->signal; 58534ec1234SOleg Nesterov struct taskstats *stats; 58634ec1234SOleg Nesterov 58734ec1234SOleg Nesterov if (sig->stats || thread_group_empty(tsk)) 58834ec1234SOleg Nesterov goto ret; 58934ec1234SOleg Nesterov 59034ec1234SOleg Nesterov /* No problem if kmem_cache_zalloc() fails */ 59134ec1234SOleg Nesterov stats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); 59234ec1234SOleg Nesterov 59334ec1234SOleg Nesterov spin_lock_irq(&tsk->sighand->siglock); 59434ec1234SOleg Nesterov if (!sig->stats) { 59534ec1234SOleg Nesterov sig->stats = stats; 59634ec1234SOleg Nesterov stats = NULL; 59734ec1234SOleg Nesterov } 59834ec1234SOleg Nesterov spin_unlock_irq(&tsk->sighand->siglock); 59934ec1234SOleg Nesterov 60034ec1234SOleg Nesterov if (stats) 60134ec1234SOleg Nesterov kmem_cache_free(taskstats_cache, stats); 60234ec1234SOleg Nesterov ret: 60334ec1234SOleg Nesterov return sig->stats; 60434ec1234SOleg Nesterov } 60534ec1234SOleg Nesterov 606c757249aSShailabh Nagar /* Send pid data out on exit */ 607115085eaSOleg Nesterov void taskstats_exit(struct task_struct *tsk, int group_dead) 608c757249aSShailabh Nagar { 609c757249aSShailabh Nagar int rc; 610115085eaSOleg Nesterov struct listener_list *listeners; 61151de4d90SOleg Nesterov struct taskstats *stats; 612c757249aSShailabh Nagar struct sk_buff *rep_skb; 613c757249aSShailabh Nagar size_t size; 614c757249aSShailabh Nagar int is_thread_group; 615c757249aSShailabh Nagar 6164a279ff1SOleg Nesterov if (!family_registered) 617c757249aSShailabh Nagar return; 618c757249aSShailabh Nagar 619c757249aSShailabh Nagar /* 620c757249aSShailabh Nagar * Size includes space for nested attributes 621c757249aSShailabh Nagar */ 6224be2c95dSJeff Mahoney size = taskstats_packet_size(); 623c757249aSShailabh Nagar 62434ec1234SOleg Nesterov is_thread_group = !!taskstats_tgid_alloc(tsk); 6254a279ff1SOleg Nesterov if (is_thread_group) { 6264a279ff1SOleg Nesterov /* PID + STATS + TGID + STATS */ 6274a279ff1SOleg Nesterov size = 2 * size; 6284a279ff1SOleg Nesterov /* fill the tsk->signal->stats structure */ 6294a279ff1SOleg Nesterov fill_tgid_exit(tsk); 6304a279ff1SOleg Nesterov } 6314a279ff1SOleg Nesterov 6324a32fea9SChristoph Lameter listeners = raw_cpu_ptr(&listener_array); 633115085eaSOleg Nesterov if (list_empty(&listeners->list)) 634115085eaSOleg Nesterov return; 635115085eaSOleg Nesterov 63637167485SOleg Nesterov rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, size); 637c757249aSShailabh Nagar if (rc < 0) 63851de4d90SOleg Nesterov return; 639c757249aSShailabh Nagar 6404bd6e32aSEric W. Biederman stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, 6414bd6e32aSEric W. Biederman task_pid_nr_ns(tsk, &init_pid_ns)); 64251de4d90SOleg Nesterov if (!stats) 64337167485SOleg Nesterov goto err; 64451de4d90SOleg Nesterov 6454bd6e32aSEric W. Biederman fill_stats(&init_user_ns, &init_pid_ns, tsk, stats); 646c757249aSShailabh Nagar 647c757249aSShailabh Nagar /* 648ad4ecbcbSShailabh Nagar * Doesn't matter if tsk is the leader or the last group member leaving 649c757249aSShailabh Nagar */ 65068062b86SOleg Nesterov if (!is_thread_group || !group_dead) 651ad4ecbcbSShailabh Nagar goto send; 652c757249aSShailabh Nagar 6534bd6e32aSEric W. Biederman stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, 6544bd6e32aSEric W. Biederman task_tgid_nr_ns(tsk, &init_pid_ns)); 65551de4d90SOleg Nesterov if (!stats) 65637167485SOleg Nesterov goto err; 65751de4d90SOleg Nesterov 65851de4d90SOleg Nesterov memcpy(stats, tsk->signal->stats, sizeof(*stats)); 659c757249aSShailabh Nagar 660ad4ecbcbSShailabh Nagar send: 661115085eaSOleg Nesterov send_cpu_listeners(rep_skb, listeners); 662ad4ecbcbSShailabh Nagar return; 66337167485SOleg Nesterov err: 664c757249aSShailabh Nagar nlmsg_free(rep_skb); 665c757249aSShailabh Nagar } 666c757249aSShailabh Nagar 6674534de83SJohannes Berg static const struct genl_ops taskstats_ops[] = { 66888d36a99SJohannes Berg { 669c757249aSShailabh Nagar .cmd = TASKSTATS_CMD_GET, 670f9fd8914SShailabh Nagar .doit = taskstats_user_cmd, 671c757249aSShailabh Nagar .policy = taskstats_cmd_get_policy, 6721a51410aSLinus Torvalds .flags = GENL_ADMIN_PERM, 67388d36a99SJohannes Berg }, 67488d36a99SJohannes Berg { 675846c7bb0SBalbir Singh .cmd = CGROUPSTATS_CMD_GET, 676846c7bb0SBalbir Singh .doit = cgroupstats_user_cmd, 677846c7bb0SBalbir Singh .policy = cgroupstats_cmd_get_policy, 67888d36a99SJohannes Berg }, 679846c7bb0SBalbir Singh }; 680846c7bb0SBalbir Singh 681c757249aSShailabh Nagar /* Needed early in initialization */ 682c757249aSShailabh Nagar void __init taskstats_init_early(void) 683c757249aSShailabh Nagar { 684f9fd8914SShailabh Nagar unsigned int i; 685f9fd8914SShailabh Nagar 6860a31bd5fSChristoph Lameter taskstats_cache = KMEM_CACHE(taskstats, SLAB_PANIC); 687f9fd8914SShailabh Nagar for_each_possible_cpu(i) { 688f9fd8914SShailabh Nagar INIT_LIST_HEAD(&(per_cpu(listener_array, i).list)); 689f9fd8914SShailabh Nagar init_rwsem(&(per_cpu(listener_array, i).sem)); 690f9fd8914SShailabh Nagar } 691c757249aSShailabh Nagar } 692c757249aSShailabh Nagar 693c757249aSShailabh Nagar static int __init taskstats_init(void) 694c757249aSShailabh Nagar { 695c757249aSShailabh Nagar int rc; 696c757249aSShailabh Nagar 697c53ed742SJohannes Berg rc = genl_register_family_with_ops(&family, taskstats_ops); 698c757249aSShailabh Nagar if (rc) 699c757249aSShailabh Nagar return rc; 700c757249aSShailabh Nagar 701c757249aSShailabh Nagar family_registered = 1; 702f9b182e2SMandeep Singh Baines pr_info("registered taskstats version %d\n", TASKSTATS_GENL_VERSION); 703c757249aSShailabh Nagar return 0; 704c757249aSShailabh Nagar } 705c757249aSShailabh Nagar 706c757249aSShailabh Nagar /* 707c757249aSShailabh Nagar * late initcall ensures initialization of statistics collection 708c757249aSShailabh Nagar * mechanisms precedes initialization of the taskstats interface 709c757249aSShailabh Nagar */ 710c757249aSShailabh Nagar late_initcall(taskstats_init); 711