1 /* 2 * Copyright (C) 2008 Mathieu Desnoyers 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 */ 18 #include <linux/module.h> 19 #include <linux/mutex.h> 20 #include <linux/types.h> 21 #include <linux/jhash.h> 22 #include <linux/list.h> 23 #include <linux/rcupdate.h> 24 #include <linux/tracepoint.h> 25 #include <linux/err.h> 26 #include <linux/slab.h> 27 #include <linux/sched.h> 28 #include <linux/jump_label.h> 29 30 extern struct tracepoint * const __start___tracepoints_ptrs[]; 31 extern struct tracepoint * const __stop___tracepoints_ptrs[]; 32 33 /* Set to 1 to enable tracepoint debug output */ 34 static const int tracepoint_debug; 35 36 /* 37 * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the 38 * builtin and module tracepoints and the hash table. 39 */ 40 static DEFINE_MUTEX(tracepoints_mutex); 41 42 /* 43 * Tracepoint hash table, containing the active tracepoints. 44 * Protected by tracepoints_mutex. 45 */ 46 #define TRACEPOINT_HASH_BITS 6 47 #define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS) 48 static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE]; 49 50 /* 51 * Note about RCU : 52 * It is used to delay the free of multiple probes array until a quiescent 53 * state is reached. 54 * Tracepoint entries modifications are protected by the tracepoints_mutex. 55 */ 56 struct tracepoint_entry { 57 struct hlist_node hlist; 58 struct tracepoint_func *funcs; 59 int refcount; /* Number of times armed. 0 if disarmed. */ 60 char name[0]; 61 }; 62 63 struct tp_probes { 64 union { 65 struct rcu_head rcu; 66 struct list_head list; 67 } u; 68 struct tracepoint_func probes[0]; 69 }; 70 71 static inline void *allocate_probes(int count) 72 { 73 struct tp_probes *p = kmalloc(count * sizeof(struct tracepoint_func) 74 + sizeof(struct tp_probes), GFP_KERNEL); 75 return p == NULL ? NULL : p->probes; 76 } 77 78 static void rcu_free_old_probes(struct rcu_head *head) 79 { 80 kfree(container_of(head, struct tp_probes, u.rcu)); 81 } 82 83 static inline void release_probes(struct tracepoint_func *old) 84 { 85 if (old) { 86 struct tp_probes *tp_probes = container_of(old, 87 struct tp_probes, probes[0]); 88 call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes); 89 } 90 } 91 92 static void debug_print_probes(struct tracepoint_entry *entry) 93 { 94 int i; 95 96 if (!tracepoint_debug || !entry->funcs) 97 return; 98 99 for (i = 0; entry->funcs[i].func; i++) 100 printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i].func); 101 } 102 103 static struct tracepoint_func * 104 tracepoint_entry_add_probe(struct tracepoint_entry *entry, 105 void *probe, void *data) 106 { 107 int nr_probes = 0; 108 struct tracepoint_func *old, *new; 109 110 WARN_ON(!probe); 111 112 debug_print_probes(entry); 113 old = entry->funcs; 114 if (old) { 115 /* (N -> N+1), (N != 0, 1) probes */ 116 for (nr_probes = 0; old[nr_probes].func; nr_probes++) 117 if (old[nr_probes].func == probe && 118 old[nr_probes].data == data) 119 return ERR_PTR(-EEXIST); 120 } 121 /* + 2 : one for new probe, one for NULL func */ 122 new = allocate_probes(nr_probes + 2); 123 if (new == NULL) 124 return ERR_PTR(-ENOMEM); 125 if (old) 126 memcpy(new, old, nr_probes * sizeof(struct tracepoint_func)); 127 new[nr_probes].func = probe; 128 new[nr_probes].data = data; 129 new[nr_probes + 1].func = NULL; 130 entry->refcount = nr_probes + 1; 131 entry->funcs = new; 132 debug_print_probes(entry); 133 return old; 134 } 135 136 static void * 137 tracepoint_entry_remove_probe(struct tracepoint_entry *entry, 138 void *probe, void *data) 139 { 140 int nr_probes = 0, nr_del = 0, i; 141 struct tracepoint_func *old, *new; 142 143 old = entry->funcs; 144 145 if (!old) 146 return ERR_PTR(-ENOENT); 147 148 debug_print_probes(entry); 149 /* (N -> M), (N > 1, M >= 0) probes */ 150 for (nr_probes = 0; old[nr_probes].func; nr_probes++) { 151 if (!probe || 152 (old[nr_probes].func == probe && 153 old[nr_probes].data == data)) 154 nr_del++; 155 } 156 157 if (nr_probes - nr_del == 0) { 158 /* N -> 0, (N > 1) */ 159 entry->funcs = NULL; 160 entry->refcount = 0; 161 debug_print_probes(entry); 162 return old; 163 } else { 164 int j = 0; 165 /* N -> M, (N > 1, M > 0) */ 166 /* + 1 for NULL */ 167 new = allocate_probes(nr_probes - nr_del + 1); 168 if (new == NULL) 169 return ERR_PTR(-ENOMEM); 170 for (i = 0; old[i].func; i++) 171 if (probe && 172 (old[i].func != probe || old[i].data != data)) 173 new[j++] = old[i]; 174 new[nr_probes - nr_del].func = NULL; 175 entry->refcount = nr_probes - nr_del; 176 entry->funcs = new; 177 } 178 debug_print_probes(entry); 179 return old; 180 } 181 182 /* 183 * Get tracepoint if the tracepoint is present in the tracepoint hash table. 184 * Must be called with tracepoints_mutex held. 185 * Returns NULL if not present. 186 */ 187 static struct tracepoint_entry *get_tracepoint(const char *name) 188 { 189 struct hlist_head *head; 190 struct hlist_node *node; 191 struct tracepoint_entry *e; 192 u32 hash = jhash(name, strlen(name), 0); 193 194 head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; 195 hlist_for_each_entry(e, node, head, hlist) { 196 if (!strcmp(name, e->name)) 197 return e; 198 } 199 return NULL; 200 } 201 202 /* 203 * Add the tracepoint to the tracepoint hash table. Must be called with 204 * tracepoints_mutex held. 205 */ 206 static struct tracepoint_entry *add_tracepoint(const char *name) 207 { 208 struct hlist_head *head; 209 struct hlist_node *node; 210 struct tracepoint_entry *e; 211 size_t name_len = strlen(name) + 1; 212 u32 hash = jhash(name, name_len-1, 0); 213 214 head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; 215 hlist_for_each_entry(e, node, head, hlist) { 216 if (!strcmp(name, e->name)) { 217 printk(KERN_NOTICE 218 "tracepoint %s busy\n", name); 219 return ERR_PTR(-EEXIST); /* Already there */ 220 } 221 } 222 /* 223 * Using kmalloc here to allocate a variable length element. Could 224 * cause some memory fragmentation if overused. 225 */ 226 e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL); 227 if (!e) 228 return ERR_PTR(-ENOMEM); 229 memcpy(&e->name[0], name, name_len); 230 e->funcs = NULL; 231 e->refcount = 0; 232 hlist_add_head(&e->hlist, head); 233 return e; 234 } 235 236 /* 237 * Remove the tracepoint from the tracepoint hash table. Must be called with 238 * mutex_lock held. 239 */ 240 static inline void remove_tracepoint(struct tracepoint_entry *e) 241 { 242 hlist_del(&e->hlist); 243 kfree(e); 244 } 245 246 /* 247 * Sets the probe callback corresponding to one tracepoint. 248 */ 249 static void set_tracepoint(struct tracepoint_entry **entry, 250 struct tracepoint *elem, int active) 251 { 252 WARN_ON(strcmp((*entry)->name, elem->name) != 0); 253 254 if (elem->regfunc && !elem->state && active) 255 elem->regfunc(); 256 else if (elem->unregfunc && elem->state && !active) 257 elem->unregfunc(); 258 259 /* 260 * rcu_assign_pointer has a smp_wmb() which makes sure that the new 261 * probe callbacks array is consistent before setting a pointer to it. 262 * This array is referenced by __DO_TRACE from 263 * include/linux/tracepoints.h. A matching smp_read_barrier_depends() 264 * is used. 265 */ 266 rcu_assign_pointer(elem->funcs, (*entry)->funcs); 267 if (!elem->state && active) { 268 jump_label_enable(&elem->state); 269 elem->state = active; 270 } else if (elem->state && !active) { 271 jump_label_disable(&elem->state); 272 elem->state = active; 273 } 274 } 275 276 /* 277 * Disable a tracepoint and its probe callback. 278 * Note: only waiting an RCU period after setting elem->call to the empty 279 * function insures that the original callback is not used anymore. This insured 280 * by preempt_disable around the call site. 281 */ 282 static void disable_tracepoint(struct tracepoint *elem) 283 { 284 if (elem->unregfunc && elem->state) 285 elem->unregfunc(); 286 287 if (elem->state) { 288 jump_label_disable(&elem->state); 289 elem->state = 0; 290 } 291 rcu_assign_pointer(elem->funcs, NULL); 292 } 293 294 /** 295 * tracepoint_update_probe_range - Update a probe range 296 * @begin: beginning of the range 297 * @end: end of the range 298 * 299 * Updates the probe callback corresponding to a range of tracepoints. 300 */ 301 void tracepoint_update_probe_range(struct tracepoint * const *begin, 302 struct tracepoint * const *end) 303 { 304 struct tracepoint * const *iter; 305 struct tracepoint_entry *mark_entry; 306 307 if (!begin) 308 return; 309 310 mutex_lock(&tracepoints_mutex); 311 for (iter = begin; iter < end; iter++) { 312 mark_entry = get_tracepoint((*iter)->name); 313 if (mark_entry) { 314 set_tracepoint(&mark_entry, *iter, 315 !!mark_entry->refcount); 316 } else { 317 disable_tracepoint(*iter); 318 } 319 } 320 mutex_unlock(&tracepoints_mutex); 321 } 322 323 /* 324 * Update probes, removing the faulty probes. 325 */ 326 static void tracepoint_update_probes(void) 327 { 328 /* Core kernel tracepoints */ 329 tracepoint_update_probe_range(__start___tracepoints_ptrs, 330 __stop___tracepoints_ptrs); 331 /* tracepoints in modules. */ 332 module_update_tracepoints(); 333 } 334 335 static struct tracepoint_func * 336 tracepoint_add_probe(const char *name, void *probe, void *data) 337 { 338 struct tracepoint_entry *entry; 339 struct tracepoint_func *old; 340 341 entry = get_tracepoint(name); 342 if (!entry) { 343 entry = add_tracepoint(name); 344 if (IS_ERR(entry)) 345 return (struct tracepoint_func *)entry; 346 } 347 old = tracepoint_entry_add_probe(entry, probe, data); 348 if (IS_ERR(old) && !entry->refcount) 349 remove_tracepoint(entry); 350 return old; 351 } 352 353 /** 354 * tracepoint_probe_register - Connect a probe to a tracepoint 355 * @name: tracepoint name 356 * @probe: probe handler 357 * 358 * Returns 0 if ok, error value on error. 359 * The probe address must at least be aligned on the architecture pointer size. 360 */ 361 int tracepoint_probe_register(const char *name, void *probe, void *data) 362 { 363 struct tracepoint_func *old; 364 365 mutex_lock(&tracepoints_mutex); 366 old = tracepoint_add_probe(name, probe, data); 367 mutex_unlock(&tracepoints_mutex); 368 if (IS_ERR(old)) 369 return PTR_ERR(old); 370 371 tracepoint_update_probes(); /* may update entry */ 372 release_probes(old); 373 return 0; 374 } 375 EXPORT_SYMBOL_GPL(tracepoint_probe_register); 376 377 static struct tracepoint_func * 378 tracepoint_remove_probe(const char *name, void *probe, void *data) 379 { 380 struct tracepoint_entry *entry; 381 struct tracepoint_func *old; 382 383 entry = get_tracepoint(name); 384 if (!entry) 385 return ERR_PTR(-ENOENT); 386 old = tracepoint_entry_remove_probe(entry, probe, data); 387 if (IS_ERR(old)) 388 return old; 389 if (!entry->refcount) 390 remove_tracepoint(entry); 391 return old; 392 } 393 394 /** 395 * tracepoint_probe_unregister - Disconnect a probe from a tracepoint 396 * @name: tracepoint name 397 * @probe: probe function pointer 398 * 399 * We do not need to call a synchronize_sched to make sure the probes have 400 * finished running before doing a module unload, because the module unload 401 * itself uses stop_machine(), which insures that every preempt disabled section 402 * have finished. 403 */ 404 int tracepoint_probe_unregister(const char *name, void *probe, void *data) 405 { 406 struct tracepoint_func *old; 407 408 mutex_lock(&tracepoints_mutex); 409 old = tracepoint_remove_probe(name, probe, data); 410 mutex_unlock(&tracepoints_mutex); 411 if (IS_ERR(old)) 412 return PTR_ERR(old); 413 414 tracepoint_update_probes(); /* may update entry */ 415 release_probes(old); 416 return 0; 417 } 418 EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); 419 420 static LIST_HEAD(old_probes); 421 static int need_update; 422 423 static void tracepoint_add_old_probes(void *old) 424 { 425 need_update = 1; 426 if (old) { 427 struct tp_probes *tp_probes = container_of(old, 428 struct tp_probes, probes[0]); 429 list_add(&tp_probes->u.list, &old_probes); 430 } 431 } 432 433 /** 434 * tracepoint_probe_register_noupdate - register a probe but not connect 435 * @name: tracepoint name 436 * @probe: probe handler 437 * 438 * caller must call tracepoint_probe_update_all() 439 */ 440 int tracepoint_probe_register_noupdate(const char *name, void *probe, 441 void *data) 442 { 443 struct tracepoint_func *old; 444 445 mutex_lock(&tracepoints_mutex); 446 old = tracepoint_add_probe(name, probe, data); 447 if (IS_ERR(old)) { 448 mutex_unlock(&tracepoints_mutex); 449 return PTR_ERR(old); 450 } 451 tracepoint_add_old_probes(old); 452 mutex_unlock(&tracepoints_mutex); 453 return 0; 454 } 455 EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate); 456 457 /** 458 * tracepoint_probe_unregister_noupdate - remove a probe but not disconnect 459 * @name: tracepoint name 460 * @probe: probe function pointer 461 * 462 * caller must call tracepoint_probe_update_all() 463 */ 464 int tracepoint_probe_unregister_noupdate(const char *name, void *probe, 465 void *data) 466 { 467 struct tracepoint_func *old; 468 469 mutex_lock(&tracepoints_mutex); 470 old = tracepoint_remove_probe(name, probe, data); 471 if (IS_ERR(old)) { 472 mutex_unlock(&tracepoints_mutex); 473 return PTR_ERR(old); 474 } 475 tracepoint_add_old_probes(old); 476 mutex_unlock(&tracepoints_mutex); 477 return 0; 478 } 479 EXPORT_SYMBOL_GPL(tracepoint_probe_unregister_noupdate); 480 481 /** 482 * tracepoint_probe_update_all - update tracepoints 483 */ 484 void tracepoint_probe_update_all(void) 485 { 486 LIST_HEAD(release_probes); 487 struct tp_probes *pos, *next; 488 489 mutex_lock(&tracepoints_mutex); 490 if (!need_update) { 491 mutex_unlock(&tracepoints_mutex); 492 return; 493 } 494 if (!list_empty(&old_probes)) 495 list_replace_init(&old_probes, &release_probes); 496 need_update = 0; 497 mutex_unlock(&tracepoints_mutex); 498 499 tracepoint_update_probes(); 500 list_for_each_entry_safe(pos, next, &release_probes, u.list) { 501 list_del(&pos->u.list); 502 call_rcu_sched(&pos->u.rcu, rcu_free_old_probes); 503 } 504 } 505 EXPORT_SYMBOL_GPL(tracepoint_probe_update_all); 506 507 /** 508 * tracepoint_get_iter_range - Get a next tracepoint iterator given a range. 509 * @tracepoint: current tracepoints (in), next tracepoint (out) 510 * @begin: beginning of the range 511 * @end: end of the range 512 * 513 * Returns whether a next tracepoint has been found (1) or not (0). 514 * Will return the first tracepoint in the range if the input tracepoint is 515 * NULL. 516 */ 517 int tracepoint_get_iter_range(struct tracepoint * const **tracepoint, 518 struct tracepoint * const *begin, struct tracepoint * const *end) 519 { 520 if (!*tracepoint && begin != end) { 521 *tracepoint = begin; 522 return 1; 523 } 524 if (*tracepoint >= begin && *tracepoint < end) 525 return 1; 526 return 0; 527 } 528 EXPORT_SYMBOL_GPL(tracepoint_get_iter_range); 529 530 static void tracepoint_get_iter(struct tracepoint_iter *iter) 531 { 532 int found = 0; 533 534 /* Core kernel tracepoints */ 535 if (!iter->module) { 536 found = tracepoint_get_iter_range(&iter->tracepoint, 537 __start___tracepoints_ptrs, 538 __stop___tracepoints_ptrs); 539 if (found) 540 goto end; 541 } 542 /* tracepoints in modules. */ 543 found = module_get_iter_tracepoints(iter); 544 end: 545 if (!found) 546 tracepoint_iter_reset(iter); 547 } 548 549 void tracepoint_iter_start(struct tracepoint_iter *iter) 550 { 551 tracepoint_get_iter(iter); 552 } 553 EXPORT_SYMBOL_GPL(tracepoint_iter_start); 554 555 void tracepoint_iter_next(struct tracepoint_iter *iter) 556 { 557 iter->tracepoint++; 558 /* 559 * iter->tracepoint may be invalid because we blindly incremented it. 560 * Make sure it is valid by marshalling on the tracepoints, getting the 561 * tracepoints from following modules if necessary. 562 */ 563 tracepoint_get_iter(iter); 564 } 565 EXPORT_SYMBOL_GPL(tracepoint_iter_next); 566 567 void tracepoint_iter_stop(struct tracepoint_iter *iter) 568 { 569 } 570 EXPORT_SYMBOL_GPL(tracepoint_iter_stop); 571 572 void tracepoint_iter_reset(struct tracepoint_iter *iter) 573 { 574 iter->module = NULL; 575 iter->tracepoint = NULL; 576 } 577 EXPORT_SYMBOL_GPL(tracepoint_iter_reset); 578 579 #ifdef CONFIG_MODULES 580 581 int tracepoint_module_notify(struct notifier_block *self, 582 unsigned long val, void *data) 583 { 584 struct module *mod = data; 585 586 switch (val) { 587 case MODULE_STATE_COMING: 588 case MODULE_STATE_GOING: 589 tracepoint_update_probe_range(mod->tracepoints_ptrs, 590 mod->tracepoints_ptrs + mod->num_tracepoints); 591 break; 592 } 593 return 0; 594 } 595 596 struct notifier_block tracepoint_module_nb = { 597 .notifier_call = tracepoint_module_notify, 598 .priority = 0, 599 }; 600 601 static int init_tracepoints(void) 602 { 603 return register_module_notifier(&tracepoint_module_nb); 604 } 605 __initcall(init_tracepoints); 606 607 #endif /* CONFIG_MODULES */ 608 609 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS 610 611 /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ 612 static int sys_tracepoint_refcount; 613 614 void syscall_regfunc(void) 615 { 616 unsigned long flags; 617 struct task_struct *g, *t; 618 619 if (!sys_tracepoint_refcount) { 620 read_lock_irqsave(&tasklist_lock, flags); 621 do_each_thread(g, t) { 622 /* Skip kernel threads. */ 623 if (t->mm) 624 set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); 625 } while_each_thread(g, t); 626 read_unlock_irqrestore(&tasklist_lock, flags); 627 } 628 sys_tracepoint_refcount++; 629 } 630 631 void syscall_unregfunc(void) 632 { 633 unsigned long flags; 634 struct task_struct *g, *t; 635 636 sys_tracepoint_refcount--; 637 if (!sys_tracepoint_refcount) { 638 read_lock_irqsave(&tasklist_lock, flags); 639 do_each_thread(g, t) { 640 clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); 641 } while_each_thread(g, t); 642 read_unlock_irqrestore(&tasklist_lock, flags); 643 } 644 } 645 #endif 646