1*c757249aSShailabh Nagar /* 2*c757249aSShailabh Nagar * taskstats.c - Export per-task statistics to userland 3*c757249aSShailabh Nagar * 4*c757249aSShailabh Nagar * Copyright (C) Shailabh Nagar, IBM Corp. 2006 5*c757249aSShailabh Nagar * (C) Balbir Singh, IBM Corp. 2006 6*c757249aSShailabh Nagar * 7*c757249aSShailabh Nagar * This program is free software; you can redistribute it and/or modify 8*c757249aSShailabh Nagar * it under the terms of the GNU General Public License as published by 9*c757249aSShailabh Nagar * the Free Software Foundation; either version 2 of the License, or 10*c757249aSShailabh Nagar * (at your option) any later version. 11*c757249aSShailabh Nagar * 12*c757249aSShailabh Nagar * This program is distributed in the hope that it will be useful, 13*c757249aSShailabh Nagar * but WITHOUT ANY WARRANTY; without even the implied warranty of 14*c757249aSShailabh Nagar * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15*c757249aSShailabh Nagar * GNU General Public License for more details. 16*c757249aSShailabh Nagar * 17*c757249aSShailabh Nagar */ 18*c757249aSShailabh Nagar 19*c757249aSShailabh Nagar #include <linux/kernel.h> 20*c757249aSShailabh Nagar #include <linux/taskstats_kern.h> 21*c757249aSShailabh Nagar #include <net/genetlink.h> 22*c757249aSShailabh Nagar #include <asm/atomic.h> 23*c757249aSShailabh Nagar 24*c757249aSShailabh Nagar static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; 25*c757249aSShailabh Nagar static int family_registered; 26*c757249aSShailabh Nagar kmem_cache_t *taskstats_cache; 27*c757249aSShailabh Nagar static DEFINE_MUTEX(taskstats_exit_mutex); 28*c757249aSShailabh Nagar 29*c757249aSShailabh Nagar static struct genl_family family = { 30*c757249aSShailabh Nagar .id = GENL_ID_GENERATE, 31*c757249aSShailabh Nagar .name = TASKSTATS_GENL_NAME, 32*c757249aSShailabh Nagar .version = TASKSTATS_GENL_VERSION, 33*c757249aSShailabh Nagar .maxattr = TASKSTATS_CMD_ATTR_MAX, 34*c757249aSShailabh Nagar }; 35*c757249aSShailabh Nagar 36*c757249aSShailabh Nagar static struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] 37*c757249aSShailabh Nagar __read_mostly = { 38*c757249aSShailabh Nagar [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, 39*c757249aSShailabh Nagar [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, 40*c757249aSShailabh Nagar }; 41*c757249aSShailabh Nagar 42*c757249aSShailabh Nagar 43*c757249aSShailabh Nagar static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, 44*c757249aSShailabh Nagar void **replyp, size_t size) 45*c757249aSShailabh Nagar { 46*c757249aSShailabh Nagar struct sk_buff *skb; 47*c757249aSShailabh Nagar void *reply; 48*c757249aSShailabh Nagar 49*c757249aSShailabh Nagar /* 50*c757249aSShailabh Nagar * If new attributes are added, please revisit this allocation 51*c757249aSShailabh Nagar */ 52*c757249aSShailabh Nagar skb = nlmsg_new(size); 53*c757249aSShailabh Nagar if (!skb) 54*c757249aSShailabh Nagar return -ENOMEM; 55*c757249aSShailabh Nagar 56*c757249aSShailabh Nagar if (!info) { 57*c757249aSShailabh Nagar int seq = get_cpu_var(taskstats_seqnum)++; 58*c757249aSShailabh Nagar put_cpu_var(taskstats_seqnum); 59*c757249aSShailabh Nagar 60*c757249aSShailabh Nagar reply = genlmsg_put(skb, 0, seq, 61*c757249aSShailabh Nagar family.id, 0, 0, 62*c757249aSShailabh Nagar cmd, family.version); 63*c757249aSShailabh Nagar } else 64*c757249aSShailabh Nagar reply = genlmsg_put(skb, info->snd_pid, info->snd_seq, 65*c757249aSShailabh Nagar family.id, 0, 0, 66*c757249aSShailabh Nagar cmd, family.version); 67*c757249aSShailabh Nagar if (reply == NULL) { 68*c757249aSShailabh Nagar nlmsg_free(skb); 69*c757249aSShailabh Nagar return -EINVAL; 70*c757249aSShailabh Nagar } 71*c757249aSShailabh Nagar 72*c757249aSShailabh Nagar *skbp = skb; 73*c757249aSShailabh Nagar *replyp = reply; 74*c757249aSShailabh Nagar return 0; 75*c757249aSShailabh Nagar } 76*c757249aSShailabh Nagar 77*c757249aSShailabh Nagar static int send_reply(struct sk_buff *skb, pid_t pid, int event) 78*c757249aSShailabh Nagar { 79*c757249aSShailabh Nagar struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); 80*c757249aSShailabh Nagar void *reply; 81*c757249aSShailabh Nagar int rc; 82*c757249aSShailabh Nagar 83*c757249aSShailabh Nagar reply = genlmsg_data(genlhdr); 84*c757249aSShailabh Nagar 85*c757249aSShailabh Nagar rc = genlmsg_end(skb, reply); 86*c757249aSShailabh Nagar if (rc < 0) { 87*c757249aSShailabh Nagar nlmsg_free(skb); 88*c757249aSShailabh Nagar return rc; 89*c757249aSShailabh Nagar } 90*c757249aSShailabh Nagar 91*c757249aSShailabh Nagar if (event == TASKSTATS_MSG_MULTICAST) 92*c757249aSShailabh Nagar return genlmsg_multicast(skb, pid, TASKSTATS_LISTEN_GROUP); 93*c757249aSShailabh Nagar return genlmsg_unicast(skb, pid); 94*c757249aSShailabh Nagar } 95*c757249aSShailabh Nagar 96*c757249aSShailabh Nagar static int fill_pid(pid_t pid, struct task_struct *pidtsk, 97*c757249aSShailabh Nagar struct taskstats *stats) 98*c757249aSShailabh Nagar { 99*c757249aSShailabh Nagar int rc; 100*c757249aSShailabh Nagar struct task_struct *tsk = pidtsk; 101*c757249aSShailabh Nagar 102*c757249aSShailabh Nagar if (!pidtsk) { 103*c757249aSShailabh Nagar read_lock(&tasklist_lock); 104*c757249aSShailabh Nagar tsk = find_task_by_pid(pid); 105*c757249aSShailabh Nagar if (!tsk) { 106*c757249aSShailabh Nagar read_unlock(&tasklist_lock); 107*c757249aSShailabh Nagar return -ESRCH; 108*c757249aSShailabh Nagar } 109*c757249aSShailabh Nagar get_task_struct(tsk); 110*c757249aSShailabh Nagar read_unlock(&tasklist_lock); 111*c757249aSShailabh Nagar } else 112*c757249aSShailabh Nagar get_task_struct(tsk); 113*c757249aSShailabh Nagar 114*c757249aSShailabh Nagar /* 115*c757249aSShailabh Nagar * Each accounting subsystem adds calls to its functions to 116*c757249aSShailabh Nagar * fill in relevant parts of struct taskstsats as follows 117*c757249aSShailabh Nagar * 118*c757249aSShailabh Nagar * rc = per-task-foo(stats, tsk); 119*c757249aSShailabh Nagar * if (rc) 120*c757249aSShailabh Nagar * goto err; 121*c757249aSShailabh Nagar */ 122*c757249aSShailabh Nagar 123*c757249aSShailabh Nagar err: 124*c757249aSShailabh Nagar put_task_struct(tsk); 125*c757249aSShailabh Nagar return rc; 126*c757249aSShailabh Nagar 127*c757249aSShailabh Nagar } 128*c757249aSShailabh Nagar 129*c757249aSShailabh Nagar static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk, 130*c757249aSShailabh Nagar struct taskstats *stats) 131*c757249aSShailabh Nagar { 132*c757249aSShailabh Nagar int rc; 133*c757249aSShailabh Nagar struct task_struct *tsk, *first; 134*c757249aSShailabh Nagar 135*c757249aSShailabh Nagar first = tgidtsk; 136*c757249aSShailabh Nagar read_lock(&tasklist_lock); 137*c757249aSShailabh Nagar if (!first) { 138*c757249aSShailabh Nagar first = find_task_by_pid(tgid); 139*c757249aSShailabh Nagar if (!first) { 140*c757249aSShailabh Nagar read_unlock(&tasklist_lock); 141*c757249aSShailabh Nagar return -ESRCH; 142*c757249aSShailabh Nagar } 143*c757249aSShailabh Nagar } 144*c757249aSShailabh Nagar tsk = first; 145*c757249aSShailabh Nagar do { 146*c757249aSShailabh Nagar /* 147*c757249aSShailabh Nagar * Each accounting subsystem adds calls its functions to 148*c757249aSShailabh Nagar * fill in relevant parts of struct taskstsats as follows 149*c757249aSShailabh Nagar * 150*c757249aSShailabh Nagar * rc = per-task-foo(stats, tsk); 151*c757249aSShailabh Nagar * if (rc) 152*c757249aSShailabh Nagar * break; 153*c757249aSShailabh Nagar */ 154*c757249aSShailabh Nagar 155*c757249aSShailabh Nagar } while_each_thread(first, tsk); 156*c757249aSShailabh Nagar read_unlock(&tasklist_lock); 157*c757249aSShailabh Nagar 158*c757249aSShailabh Nagar /* 159*c757249aSShailabh Nagar * Accounting subsytems can also add calls here if they don't 160*c757249aSShailabh Nagar * wish to aggregate statistics for per-tgid stats 161*c757249aSShailabh Nagar */ 162*c757249aSShailabh Nagar 163*c757249aSShailabh Nagar return rc; 164*c757249aSShailabh Nagar } 165*c757249aSShailabh Nagar 166*c757249aSShailabh Nagar static int taskstats_send_stats(struct sk_buff *skb, struct genl_info *info) 167*c757249aSShailabh Nagar { 168*c757249aSShailabh Nagar int rc = 0; 169*c757249aSShailabh Nagar struct sk_buff *rep_skb; 170*c757249aSShailabh Nagar struct taskstats stats; 171*c757249aSShailabh Nagar void *reply; 172*c757249aSShailabh Nagar size_t size; 173*c757249aSShailabh Nagar struct nlattr *na; 174*c757249aSShailabh Nagar 175*c757249aSShailabh Nagar /* 176*c757249aSShailabh Nagar * Size includes space for nested attributes 177*c757249aSShailabh Nagar */ 178*c757249aSShailabh Nagar size = nla_total_size(sizeof(u32)) + 179*c757249aSShailabh Nagar nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); 180*c757249aSShailabh Nagar 181*c757249aSShailabh Nagar memset(&stats, 0, sizeof(stats)); 182*c757249aSShailabh Nagar rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); 183*c757249aSShailabh Nagar if (rc < 0) 184*c757249aSShailabh Nagar return rc; 185*c757249aSShailabh Nagar 186*c757249aSShailabh Nagar if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { 187*c757249aSShailabh Nagar u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); 188*c757249aSShailabh Nagar rc = fill_pid(pid, NULL, &stats); 189*c757249aSShailabh Nagar if (rc < 0) 190*c757249aSShailabh Nagar goto err; 191*c757249aSShailabh Nagar 192*c757249aSShailabh Nagar na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID); 193*c757249aSShailabh Nagar NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, pid); 194*c757249aSShailabh Nagar NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, 195*c757249aSShailabh Nagar stats); 196*c757249aSShailabh Nagar } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { 197*c757249aSShailabh Nagar u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); 198*c757249aSShailabh Nagar rc = fill_tgid(tgid, NULL, &stats); 199*c757249aSShailabh Nagar if (rc < 0) 200*c757249aSShailabh Nagar goto err; 201*c757249aSShailabh Nagar 202*c757249aSShailabh Nagar na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID); 203*c757249aSShailabh Nagar NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, tgid); 204*c757249aSShailabh Nagar NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, 205*c757249aSShailabh Nagar stats); 206*c757249aSShailabh Nagar } else { 207*c757249aSShailabh Nagar rc = -EINVAL; 208*c757249aSShailabh Nagar goto err; 209*c757249aSShailabh Nagar } 210*c757249aSShailabh Nagar 211*c757249aSShailabh Nagar nla_nest_end(rep_skb, na); 212*c757249aSShailabh Nagar 213*c757249aSShailabh Nagar return send_reply(rep_skb, info->snd_pid, TASKSTATS_MSG_UNICAST); 214*c757249aSShailabh Nagar 215*c757249aSShailabh Nagar nla_put_failure: 216*c757249aSShailabh Nagar return genlmsg_cancel(rep_skb, reply); 217*c757249aSShailabh Nagar err: 218*c757249aSShailabh Nagar nlmsg_free(rep_skb); 219*c757249aSShailabh Nagar return rc; 220*c757249aSShailabh Nagar } 221*c757249aSShailabh Nagar 222*c757249aSShailabh Nagar /* Send pid data out on exit */ 223*c757249aSShailabh Nagar void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, 224*c757249aSShailabh Nagar struct taskstats *tgidstats) 225*c757249aSShailabh Nagar { 226*c757249aSShailabh Nagar int rc; 227*c757249aSShailabh Nagar struct sk_buff *rep_skb; 228*c757249aSShailabh Nagar void *reply; 229*c757249aSShailabh Nagar size_t size; 230*c757249aSShailabh Nagar int is_thread_group; 231*c757249aSShailabh Nagar struct nlattr *na; 232*c757249aSShailabh Nagar 233*c757249aSShailabh Nagar if (!family_registered || !tidstats) 234*c757249aSShailabh Nagar return; 235*c757249aSShailabh Nagar 236*c757249aSShailabh Nagar mutex_lock(&taskstats_exit_mutex); 237*c757249aSShailabh Nagar 238*c757249aSShailabh Nagar is_thread_group = !thread_group_empty(tsk); 239*c757249aSShailabh Nagar rc = 0; 240*c757249aSShailabh Nagar 241*c757249aSShailabh Nagar /* 242*c757249aSShailabh Nagar * Size includes space for nested attributes 243*c757249aSShailabh Nagar */ 244*c757249aSShailabh Nagar size = nla_total_size(sizeof(u32)) + 245*c757249aSShailabh Nagar nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); 246*c757249aSShailabh Nagar 247*c757249aSShailabh Nagar if (is_thread_group) 248*c757249aSShailabh Nagar size = 2 * size; /* PID + STATS + TGID + STATS */ 249*c757249aSShailabh Nagar 250*c757249aSShailabh Nagar rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); 251*c757249aSShailabh Nagar if (rc < 0) 252*c757249aSShailabh Nagar goto ret; 253*c757249aSShailabh Nagar 254*c757249aSShailabh Nagar rc = fill_pid(tsk->pid, tsk, tidstats); 255*c757249aSShailabh Nagar if (rc < 0) 256*c757249aSShailabh Nagar goto err_skb; 257*c757249aSShailabh Nagar 258*c757249aSShailabh Nagar na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID); 259*c757249aSShailabh Nagar NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, (u32)tsk->pid); 260*c757249aSShailabh Nagar NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, 261*c757249aSShailabh Nagar *tidstats); 262*c757249aSShailabh Nagar nla_nest_end(rep_skb, na); 263*c757249aSShailabh Nagar 264*c757249aSShailabh Nagar if (!is_thread_group || !tgidstats) { 265*c757249aSShailabh Nagar send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST); 266*c757249aSShailabh Nagar goto ret; 267*c757249aSShailabh Nagar } 268*c757249aSShailabh Nagar 269*c757249aSShailabh Nagar rc = fill_tgid(tsk->pid, tsk, tgidstats); 270*c757249aSShailabh Nagar /* 271*c757249aSShailabh Nagar * If fill_tgid() failed then one probable reason could be that the 272*c757249aSShailabh Nagar * thread group leader has exited. fill_tgid() will fail, send out 273*c757249aSShailabh Nagar * the pid statistics collected earlier. 274*c757249aSShailabh Nagar */ 275*c757249aSShailabh Nagar if (rc < 0) { 276*c757249aSShailabh Nagar send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST); 277*c757249aSShailabh Nagar goto ret; 278*c757249aSShailabh Nagar } 279*c757249aSShailabh Nagar 280*c757249aSShailabh Nagar na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID); 281*c757249aSShailabh Nagar NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, (u32)tsk->tgid); 282*c757249aSShailabh Nagar NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, 283*c757249aSShailabh Nagar *tgidstats); 284*c757249aSShailabh Nagar nla_nest_end(rep_skb, na); 285*c757249aSShailabh Nagar 286*c757249aSShailabh Nagar send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST); 287*c757249aSShailabh Nagar goto ret; 288*c757249aSShailabh Nagar 289*c757249aSShailabh Nagar nla_put_failure: 290*c757249aSShailabh Nagar genlmsg_cancel(rep_skb, reply); 291*c757249aSShailabh Nagar goto ret; 292*c757249aSShailabh Nagar err_skb: 293*c757249aSShailabh Nagar nlmsg_free(rep_skb); 294*c757249aSShailabh Nagar ret: 295*c757249aSShailabh Nagar mutex_unlock(&taskstats_exit_mutex); 296*c757249aSShailabh Nagar return; 297*c757249aSShailabh Nagar } 298*c757249aSShailabh Nagar 299*c757249aSShailabh Nagar static struct genl_ops taskstats_ops = { 300*c757249aSShailabh Nagar .cmd = TASKSTATS_CMD_GET, 301*c757249aSShailabh Nagar .doit = taskstats_send_stats, 302*c757249aSShailabh Nagar .policy = taskstats_cmd_get_policy, 303*c757249aSShailabh Nagar }; 304*c757249aSShailabh Nagar 305*c757249aSShailabh Nagar /* Needed early in initialization */ 306*c757249aSShailabh Nagar void __init taskstats_init_early(void) 307*c757249aSShailabh Nagar { 308*c757249aSShailabh Nagar taskstats_cache = kmem_cache_create("taskstats_cache", 309*c757249aSShailabh Nagar sizeof(struct taskstats), 310*c757249aSShailabh Nagar 0, SLAB_PANIC, NULL, NULL); 311*c757249aSShailabh Nagar } 312*c757249aSShailabh Nagar 313*c757249aSShailabh Nagar static int __init taskstats_init(void) 314*c757249aSShailabh Nagar { 315*c757249aSShailabh Nagar int rc; 316*c757249aSShailabh Nagar 317*c757249aSShailabh Nagar rc = genl_register_family(&family); 318*c757249aSShailabh Nagar if (rc) 319*c757249aSShailabh Nagar return rc; 320*c757249aSShailabh Nagar 321*c757249aSShailabh Nagar rc = genl_register_ops(&family, &taskstats_ops); 322*c757249aSShailabh Nagar if (rc < 0) 323*c757249aSShailabh Nagar goto err; 324*c757249aSShailabh Nagar 325*c757249aSShailabh Nagar family_registered = 1; 326*c757249aSShailabh Nagar return 0; 327*c757249aSShailabh Nagar err: 328*c757249aSShailabh Nagar genl_unregister_family(&family); 329*c757249aSShailabh Nagar return rc; 330*c757249aSShailabh Nagar } 331*c757249aSShailabh Nagar 332*c757249aSShailabh Nagar /* 333*c757249aSShailabh Nagar * late initcall ensures initialization of statistics collection 334*c757249aSShailabh Nagar * mechanisms precedes initialization of the taskstats interface 335*c757249aSShailabh Nagar */ 336*c757249aSShailabh Nagar late_initcall(taskstats_init); 337