1 /* 2 * taskstats.c - Export per-task statistics to userland 3 * 4 * Copyright (C) Shailabh Nagar, IBM Corp. 2006 5 * (C) Balbir Singh, IBM Corp. 2006 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 */ 18 19 #include <linux/kernel.h> 20 #include <linux/taskstats_kern.h> 21 #include <linux/tsacct_kern.h> 22 #include <linux/delayacct.h> 23 #include <linux/tsacct_kern.h> 24 #include <linux/cpumask.h> 25 #include <linux/percpu.h> 26 #include <net/genetlink.h> 27 #include <asm/atomic.h> 28 29 /* 30 * Maximum length of a cpumask that can be specified in 31 * the TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK attribute 32 */ 33 #define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS) 34 35 static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; 36 static int family_registered; 37 struct kmem_cache *taskstats_cache; 38 39 static struct genl_family family = { 40 .id = GENL_ID_GENERATE, 41 .name = TASKSTATS_GENL_NAME, 42 .version = TASKSTATS_GENL_VERSION, 43 .maxattr = TASKSTATS_CMD_ATTR_MAX, 44 }; 45 46 static struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] 47 __read_mostly = { 48 [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, 49 [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, 50 [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, 51 [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; 52 53 struct listener { 54 struct list_head list; 55 pid_t pid; 56 char valid; 57 }; 58 59 struct listener_list { 60 struct rw_semaphore sem; 61 struct list_head list; 62 }; 63 static DEFINE_PER_CPU(struct listener_list, listener_array); 64 65 enum actions { 66 REGISTER, 67 DEREGISTER, 68 CPU_DONT_CARE 69 }; 70 71 static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, 72 size_t size) 73 { 74 struct sk_buff *skb; 75 void *reply; 76 77 /* 78 * If new attributes are added, please revisit this allocation 79 */ 80 skb = genlmsg_new(size, GFP_KERNEL); 81 if (!skb) 82 return -ENOMEM; 83 84 if (!info) { 85 int seq = get_cpu_var(taskstats_seqnum)++; 86 put_cpu_var(taskstats_seqnum); 87 88 reply = genlmsg_put(skb, 0, seq, &family, 0, cmd); 89 } else 90 reply = genlmsg_put_reply(skb, info, &family, 0, cmd); 91 if (reply == NULL) { 92 nlmsg_free(skb); 93 return -EINVAL; 94 } 95 96 *skbp = skb; 97 return 0; 98 } 99 100 /* 101 * Send taskstats data in @skb to listener with nl_pid @pid 102 */ 103 static int send_reply(struct sk_buff *skb, pid_t pid) 104 { 105 struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); 106 void *reply = genlmsg_data(genlhdr); 107 int rc; 108 109 rc = genlmsg_end(skb, reply); 110 if (rc < 0) { 111 nlmsg_free(skb); 112 return rc; 113 } 114 115 return genlmsg_unicast(skb, pid); 116 } 117 118 /* 119 * Send taskstats data in @skb to listeners registered for @cpu's exit data 120 */ 121 static void send_cpu_listeners(struct sk_buff *skb, 122 struct listener_list *listeners) 123 { 124 struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); 125 struct listener *s, *tmp; 126 struct sk_buff *skb_next, *skb_cur = skb; 127 void *reply = genlmsg_data(genlhdr); 128 int rc, delcount = 0; 129 130 rc = genlmsg_end(skb, reply); 131 if (rc < 0) { 132 nlmsg_free(skb); 133 return; 134 } 135 136 rc = 0; 137 down_read(&listeners->sem); 138 list_for_each_entry(s, &listeners->list, list) { 139 skb_next = NULL; 140 if (!list_is_last(&s->list, &listeners->list)) { 141 skb_next = skb_clone(skb_cur, GFP_KERNEL); 142 if (!skb_next) 143 break; 144 } 145 rc = genlmsg_unicast(skb_cur, s->pid); 146 if (rc == -ECONNREFUSED) { 147 s->valid = 0; 148 delcount++; 149 } 150 skb_cur = skb_next; 151 } 152 up_read(&listeners->sem); 153 154 if (skb_cur) 155 nlmsg_free(skb_cur); 156 157 if (!delcount) 158 return; 159 160 /* Delete invalidated entries */ 161 down_write(&listeners->sem); 162 list_for_each_entry_safe(s, tmp, &listeners->list, list) { 163 if (!s->valid) { 164 list_del(&s->list); 165 kfree(s); 166 } 167 } 168 up_write(&listeners->sem); 169 } 170 171 static int fill_pid(pid_t pid, struct task_struct *tsk, 172 struct taskstats *stats) 173 { 174 int rc = 0; 175 176 if (!tsk) { 177 rcu_read_lock(); 178 tsk = find_task_by_pid(pid); 179 if (tsk) 180 get_task_struct(tsk); 181 rcu_read_unlock(); 182 if (!tsk) 183 return -ESRCH; 184 } else 185 get_task_struct(tsk); 186 187 memset(stats, 0, sizeof(*stats)); 188 /* 189 * Each accounting subsystem adds calls to its functions to 190 * fill in relevant parts of struct taskstsats as follows 191 * 192 * per-task-foo(stats, tsk); 193 */ 194 195 delayacct_add_tsk(stats, tsk); 196 197 /* fill in basic acct fields */ 198 stats->version = TASKSTATS_VERSION; 199 bacct_add_tsk(stats, tsk); 200 201 /* fill in extended acct fields */ 202 xacct_add_tsk(stats, tsk); 203 204 /* Define err: label here if needed */ 205 put_task_struct(tsk); 206 return rc; 207 208 } 209 210 static int fill_tgid(pid_t tgid, struct task_struct *first, 211 struct taskstats *stats) 212 { 213 struct task_struct *tsk; 214 unsigned long flags; 215 int rc = -ESRCH; 216 217 /* 218 * Add additional stats from live tasks except zombie thread group 219 * leaders who are already counted with the dead tasks 220 */ 221 rcu_read_lock(); 222 if (!first) 223 first = find_task_by_pid(tgid); 224 225 if (!first || !lock_task_sighand(first, &flags)) 226 goto out; 227 228 if (first->signal->stats) 229 memcpy(stats, first->signal->stats, sizeof(*stats)); 230 else 231 memset(stats, 0, sizeof(*stats)); 232 233 tsk = first; 234 do { 235 if (tsk->exit_state) 236 continue; 237 /* 238 * Accounting subsystem can call its functions here to 239 * fill in relevant parts of struct taskstsats as follows 240 * 241 * per-task-foo(stats, tsk); 242 */ 243 delayacct_add_tsk(stats, tsk); 244 245 } while_each_thread(first, tsk); 246 247 unlock_task_sighand(first, &flags); 248 rc = 0; 249 out: 250 rcu_read_unlock(); 251 252 stats->version = TASKSTATS_VERSION; 253 /* 254 * Accounting subsytems can also add calls here to modify 255 * fields of taskstats. 256 */ 257 return rc; 258 } 259 260 261 static void fill_tgid_exit(struct task_struct *tsk) 262 { 263 unsigned long flags; 264 265 spin_lock_irqsave(&tsk->sighand->siglock, flags); 266 if (!tsk->signal->stats) 267 goto ret; 268 269 /* 270 * Each accounting subsystem calls its functions here to 271 * accumalate its per-task stats for tsk, into the per-tgid structure 272 * 273 * per-task-foo(tsk->signal->stats, tsk); 274 */ 275 delayacct_add_tsk(tsk->signal->stats, tsk); 276 ret: 277 spin_unlock_irqrestore(&tsk->sighand->siglock, flags); 278 return; 279 } 280 281 static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd) 282 { 283 struct listener_list *listeners; 284 struct listener *s, *tmp; 285 unsigned int cpu; 286 cpumask_t mask = *maskp; 287 288 if (!cpus_subset(mask, cpu_possible_map)) 289 return -EINVAL; 290 291 if (isadd == REGISTER) { 292 for_each_cpu_mask(cpu, mask) { 293 s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, 294 cpu_to_node(cpu)); 295 if (!s) 296 goto cleanup; 297 s->pid = pid; 298 INIT_LIST_HEAD(&s->list); 299 s->valid = 1; 300 301 listeners = &per_cpu(listener_array, cpu); 302 down_write(&listeners->sem); 303 list_add(&s->list, &listeners->list); 304 up_write(&listeners->sem); 305 } 306 return 0; 307 } 308 309 /* Deregister or cleanup */ 310 cleanup: 311 for_each_cpu_mask(cpu, mask) { 312 listeners = &per_cpu(listener_array, cpu); 313 down_write(&listeners->sem); 314 list_for_each_entry_safe(s, tmp, &listeners->list, list) { 315 if (s->pid == pid) { 316 list_del(&s->list); 317 kfree(s); 318 break; 319 } 320 } 321 up_write(&listeners->sem); 322 } 323 return 0; 324 } 325 326 static int parse(struct nlattr *na, cpumask_t *mask) 327 { 328 char *data; 329 int len; 330 int ret; 331 332 if (na == NULL) 333 return 1; 334 len = nla_len(na); 335 if (len > TASKSTATS_CPUMASK_MAXLEN) 336 return -E2BIG; 337 if (len < 1) 338 return -EINVAL; 339 data = kmalloc(len, GFP_KERNEL); 340 if (!data) 341 return -ENOMEM; 342 nla_strlcpy(data, na, len); 343 ret = cpulist_parse(data, *mask); 344 kfree(data); 345 return ret; 346 } 347 348 static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) 349 { 350 struct nlattr *na, *ret; 351 int aggr; 352 353 aggr = (type == TASKSTATS_TYPE_PID) 354 ? TASKSTATS_TYPE_AGGR_PID 355 : TASKSTATS_TYPE_AGGR_TGID; 356 357 na = nla_nest_start(skb, aggr); 358 if (!na) 359 goto err; 360 if (nla_put(skb, type, sizeof(pid), &pid) < 0) 361 goto err; 362 ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); 363 if (!ret) 364 goto err; 365 nla_nest_end(skb, na); 366 367 return nla_data(ret); 368 err: 369 return NULL; 370 } 371 372 static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) 373 { 374 int rc = 0; 375 struct sk_buff *rep_skb; 376 struct taskstats *stats; 377 size_t size; 378 cpumask_t mask; 379 380 rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask); 381 if (rc < 0) 382 return rc; 383 if (rc == 0) 384 return add_del_listener(info->snd_pid, &mask, REGISTER); 385 386 rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], &mask); 387 if (rc < 0) 388 return rc; 389 if (rc == 0) 390 return add_del_listener(info->snd_pid, &mask, DEREGISTER); 391 392 /* 393 * Size includes space for nested attributes 394 */ 395 size = nla_total_size(sizeof(u32)) + 396 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); 397 398 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); 399 if (rc < 0) 400 return rc; 401 402 rc = -EINVAL; 403 if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { 404 u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); 405 stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); 406 if (!stats) 407 goto err; 408 409 rc = fill_pid(pid, NULL, stats); 410 if (rc < 0) 411 goto err; 412 } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { 413 u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); 414 stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); 415 if (!stats) 416 goto err; 417 418 rc = fill_tgid(tgid, NULL, stats); 419 if (rc < 0) 420 goto err; 421 } else 422 goto err; 423 424 return send_reply(rep_skb, info->snd_pid); 425 err: 426 nlmsg_free(rep_skb); 427 return rc; 428 } 429 430 static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) 431 { 432 struct signal_struct *sig = tsk->signal; 433 struct taskstats *stats; 434 435 if (sig->stats || thread_group_empty(tsk)) 436 goto ret; 437 438 /* No problem if kmem_cache_zalloc() fails */ 439 stats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); 440 441 spin_lock_irq(&tsk->sighand->siglock); 442 if (!sig->stats) { 443 sig->stats = stats; 444 stats = NULL; 445 } 446 spin_unlock_irq(&tsk->sighand->siglock); 447 448 if (stats) 449 kmem_cache_free(taskstats_cache, stats); 450 ret: 451 return sig->stats; 452 } 453 454 /* Send pid data out on exit */ 455 void taskstats_exit(struct task_struct *tsk, int group_dead) 456 { 457 int rc; 458 struct listener_list *listeners; 459 struct taskstats *stats; 460 struct sk_buff *rep_skb; 461 size_t size; 462 int is_thread_group; 463 464 if (!family_registered) 465 return; 466 467 /* 468 * Size includes space for nested attributes 469 */ 470 size = nla_total_size(sizeof(u32)) + 471 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); 472 473 is_thread_group = !!taskstats_tgid_alloc(tsk); 474 if (is_thread_group) { 475 /* PID + STATS + TGID + STATS */ 476 size = 2 * size; 477 /* fill the tsk->signal->stats structure */ 478 fill_tgid_exit(tsk); 479 } 480 481 listeners = &__raw_get_cpu_var(listener_array); 482 if (list_empty(&listeners->list)) 483 return; 484 485 rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, size); 486 if (rc < 0) 487 return; 488 489 stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, tsk->pid); 490 if (!stats) 491 goto err; 492 493 rc = fill_pid(tsk->pid, tsk, stats); 494 if (rc < 0) 495 goto err; 496 497 /* 498 * Doesn't matter if tsk is the leader or the last group member leaving 499 */ 500 if (!is_thread_group || !group_dead) 501 goto send; 502 503 stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tsk->tgid); 504 if (!stats) 505 goto err; 506 507 memcpy(stats, tsk->signal->stats, sizeof(*stats)); 508 509 send: 510 send_cpu_listeners(rep_skb, listeners); 511 return; 512 err: 513 nlmsg_free(rep_skb); 514 } 515 516 static struct genl_ops taskstats_ops = { 517 .cmd = TASKSTATS_CMD_GET, 518 .doit = taskstats_user_cmd, 519 .policy = taskstats_cmd_get_policy, 520 }; 521 522 /* Needed early in initialization */ 523 void __init taskstats_init_early(void) 524 { 525 unsigned int i; 526 527 taskstats_cache = KMEM_CACHE(taskstats, SLAB_PANIC); 528 for_each_possible_cpu(i) { 529 INIT_LIST_HEAD(&(per_cpu(listener_array, i).list)); 530 init_rwsem(&(per_cpu(listener_array, i).sem)); 531 } 532 } 533 534 static int __init taskstats_init(void) 535 { 536 int rc; 537 538 rc = genl_register_family(&family); 539 if (rc) 540 return rc; 541 542 rc = genl_register_ops(&family, &taskstats_ops); 543 if (rc < 0) 544 goto err; 545 546 family_registered = 1; 547 return 0; 548 err: 549 genl_unregister_family(&family); 550 return rc; 551 } 552 553 /* 554 * late initcall ensures initialization of statistics collection 555 * mechanisms precedes initialization of the taskstats interface 556 */ 557 late_initcall(taskstats_init); 558