1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * event tracer 4 * 5 * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> 6 * 7 * - Added format output of fields of the trace point. 8 * This was based off of work by Tom Zanussi <tzanussi@gmail.com>. 9 * 10 */ 11 12 #define pr_fmt(fmt) fmt 13 14 #include <linux/workqueue.h> 15 #include <linux/security.h> 16 #include <linux/spinlock.h> 17 #include <linux/kthread.h> 18 #include <linux/tracefs.h> 19 #include <linux/uaccess.h> 20 #include <linux/module.h> 21 #include <linux/ctype.h> 22 #include <linux/sort.h> 23 #include <linux/slab.h> 24 #include <linux/delay.h> 25 26 #include <trace/events/sched.h> 27 #include <trace/syscall.h> 28 29 #include <asm/setup.h> 30 31 #include "trace_output.h" 32 33 #undef TRACE_SYSTEM 34 #define TRACE_SYSTEM "TRACE_SYSTEM" 35 36 DEFINE_MUTEX(event_mutex); 37 38 LIST_HEAD(ftrace_events); 39 static LIST_HEAD(ftrace_generic_fields); 40 static LIST_HEAD(ftrace_common_fields); 41 static bool eventdir_initialized; 42 43 static LIST_HEAD(module_strings); 44 45 struct module_string { 46 struct list_head next; 47 struct module *module; 48 char *str; 49 }; 50 51 #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO) 52 53 static struct kmem_cache *field_cachep; 54 static struct kmem_cache *file_cachep; 55 56 static inline int system_refcount(struct event_subsystem *system) 57 { 58 return system->ref_count; 59 } 60 61 static int system_refcount_inc(struct event_subsystem *system) 62 { 63 return system->ref_count++; 64 } 65 66 static int system_refcount_dec(struct event_subsystem *system) 67 { 68 return --system->ref_count; 69 } 70 71 /* Double loops, do not use break, only goto's work */ 72 #define do_for_each_event_file(tr, file) \ 73 list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ 74 list_for_each_entry(file, &tr->events, list) 75 76 #define do_for_each_event_file_safe(tr, file) \ 77 list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ 78 struct trace_event_file *___n; \ 79 list_for_each_entry_safe(file, ___n, &tr->events, list) 80 81 #define while_for_each_event_file() \ 82 } 83 84 static struct ftrace_event_field * 85 __find_event_field(struct list_head *head, char *name) 86 { 87 struct ftrace_event_field *field; 88 89 list_for_each_entry(field, head, link) { 90 if (!strcmp(field->name, name)) 91 return field; 92 } 93 94 return NULL; 95 } 96 97 struct ftrace_event_field * 98 trace_find_event_field(struct trace_event_call *call, char *name) 99 { 100 struct ftrace_event_field *field; 101 struct list_head *head; 102 103 head = trace_get_fields(call); 104 field = __find_event_field(head, name); 105 if (field) 106 return field; 107 108 field = __find_event_field(&ftrace_generic_fields, name); 109 if (field) 110 return field; 111 112 return __find_event_field(&ftrace_common_fields, name); 113 } 114 115 static int __trace_define_field(struct list_head *head, const char *type, 116 const char *name, int offset, int size, 117 int is_signed, int filter_type) 118 { 119 struct ftrace_event_field *field; 120 121 field = kmem_cache_alloc(field_cachep, GFP_TRACE); 122 if (!field) 123 return -ENOMEM; 124 125 field->name = name; 126 field->type = type; 127 128 if (filter_type == FILTER_OTHER) 129 field->filter_type = filter_assign_type(type); 130 else 131 field->filter_type = filter_type; 132 133 field->offset = offset; 134 field->size = size; 135 field->is_signed = is_signed; 136 137 list_add(&field->link, head); 138 139 return 0; 140 } 141 142 int trace_define_field(struct trace_event_call *call, const char *type, 143 const char *name, int offset, int size, int is_signed, 144 int filter_type) 145 { 146 struct list_head *head; 147 148 if (WARN_ON(!call->class)) 149 return 0; 150 151 head = trace_get_fields(call); 152 return __trace_define_field(head, type, name, offset, size, 153 is_signed, filter_type); 154 } 155 EXPORT_SYMBOL_GPL(trace_define_field); 156 157 #define __generic_field(type, item, filter_type) \ 158 ret = __trace_define_field(&ftrace_generic_fields, #type, \ 159 #item, 0, 0, is_signed_type(type), \ 160 filter_type); \ 161 if (ret) \ 162 return ret; 163 164 #define __common_field(type, item) \ 165 ret = __trace_define_field(&ftrace_common_fields, #type, \ 166 "common_" #item, \ 167 offsetof(typeof(ent), item), \ 168 sizeof(ent.item), \ 169 is_signed_type(type), FILTER_OTHER); \ 170 if (ret) \ 171 return ret; 172 173 static int trace_define_generic_fields(void) 174 { 175 int ret; 176 177 __generic_field(int, CPU, FILTER_CPU); 178 __generic_field(int, cpu, FILTER_CPU); 179 __generic_field(int, common_cpu, FILTER_CPU); 180 __generic_field(char *, COMM, FILTER_COMM); 181 __generic_field(char *, comm, FILTER_COMM); 182 183 return ret; 184 } 185 186 static int trace_define_common_fields(void) 187 { 188 int ret; 189 struct trace_entry ent; 190 191 __common_field(unsigned short, type); 192 __common_field(unsigned char, flags); 193 /* Holds both preempt_count and migrate_disable */ 194 __common_field(unsigned char, preempt_count); 195 __common_field(int, pid); 196 197 return ret; 198 } 199 200 static void trace_destroy_fields(struct trace_event_call *call) 201 { 202 struct ftrace_event_field *field, *next; 203 struct list_head *head; 204 205 head = trace_get_fields(call); 206 list_for_each_entry_safe(field, next, head, link) { 207 list_del(&field->link); 208 kmem_cache_free(field_cachep, field); 209 } 210 } 211 212 /* 213 * run-time version of trace_event_get_offsets_<call>() that returns the last 214 * accessible offset of trace fields excluding __dynamic_array bytes 215 */ 216 int trace_event_get_offsets(struct trace_event_call *call) 217 { 218 struct ftrace_event_field *tail; 219 struct list_head *head; 220 221 head = trace_get_fields(call); 222 /* 223 * head->next points to the last field with the largest offset, 224 * since it was added last by trace_define_field() 225 */ 226 tail = list_first_entry(head, struct ftrace_event_field, link); 227 return tail->offset + tail->size; 228 } 229 230 /* 231 * Check if the referenced field is an array and return true, 232 * as arrays are OK to dereference. 233 */ 234 static bool test_field(const char *fmt, struct trace_event_call *call) 235 { 236 struct trace_event_fields *field = call->class->fields_array; 237 const char *array_descriptor; 238 const char *p = fmt; 239 int len; 240 241 if (!(len = str_has_prefix(fmt, "REC->"))) 242 return false; 243 fmt += len; 244 for (p = fmt; *p; p++) { 245 if (!isalnum(*p) && *p != '_') 246 break; 247 } 248 len = p - fmt; 249 250 for (; field->type; field++) { 251 if (strncmp(field->name, fmt, len) || 252 field->name[len]) 253 continue; 254 array_descriptor = strchr(field->type, '['); 255 /* This is an array and is OK to dereference. */ 256 return array_descriptor != NULL; 257 } 258 return false; 259 } 260 261 /* 262 * Examine the print fmt of the event looking for unsafe dereference 263 * pointers using %p* that could be recorded in the trace event and 264 * much later referenced after the pointer was freed. Dereferencing 265 * pointers are OK, if it is dereferenced into the event itself. 266 */ 267 static void test_event_printk(struct trace_event_call *call) 268 { 269 u64 dereference_flags = 0; 270 bool first = true; 271 const char *fmt, *c, *r, *a; 272 int parens = 0; 273 char in_quote = 0; 274 int start_arg = 0; 275 int arg = 0; 276 int i; 277 278 fmt = call->print_fmt; 279 280 if (!fmt) 281 return; 282 283 for (i = 0; fmt[i]; i++) { 284 switch (fmt[i]) { 285 case '\\': 286 i++; 287 if (!fmt[i]) 288 return; 289 continue; 290 case '"': 291 case '\'': 292 /* 293 * The print fmt starts with a string that 294 * is processed first to find %p* usage, 295 * then after the first string, the print fmt 296 * contains arguments that are used to check 297 * if the dereferenced %p* usage is safe. 298 */ 299 if (first) { 300 if (fmt[i] == '\'') 301 continue; 302 if (in_quote) { 303 arg = 0; 304 first = false; 305 /* 306 * If there was no %p* uses 307 * the fmt is OK. 308 */ 309 if (!dereference_flags) 310 return; 311 } 312 } 313 if (in_quote) { 314 if (in_quote == fmt[i]) 315 in_quote = 0; 316 } else { 317 in_quote = fmt[i]; 318 } 319 continue; 320 case '%': 321 if (!first || !in_quote) 322 continue; 323 i++; 324 if (!fmt[i]) 325 return; 326 switch (fmt[i]) { 327 case '%': 328 continue; 329 case 'p': 330 /* Find dereferencing fields */ 331 switch (fmt[i + 1]) { 332 case 'B': case 'R': case 'r': 333 case 'b': case 'M': case 'm': 334 case 'I': case 'i': case 'E': 335 case 'U': case 'V': case 'N': 336 case 'a': case 'd': case 'D': 337 case 'g': case 't': case 'C': 338 case 'O': case 'f': 339 if (WARN_ONCE(arg == 63, 340 "Too many args for event: %s", 341 trace_event_name(call))) 342 return; 343 dereference_flags |= 1ULL << arg; 344 } 345 break; 346 default: 347 { 348 bool star = false; 349 int j; 350 351 /* Increment arg if %*s exists. */ 352 for (j = 0; fmt[i + j]; j++) { 353 if (isdigit(fmt[i + j]) || 354 fmt[i + j] == '.') 355 continue; 356 if (fmt[i + j] == '*') { 357 star = true; 358 continue; 359 } 360 if ((fmt[i + j] == 's') && star) 361 arg++; 362 break; 363 } 364 break; 365 } /* default */ 366 367 } /* switch */ 368 arg++; 369 continue; 370 case '(': 371 if (in_quote) 372 continue; 373 parens++; 374 continue; 375 case ')': 376 if (in_quote) 377 continue; 378 parens--; 379 if (WARN_ONCE(parens < 0, 380 "Paren mismatch for event: %s\narg='%s'\n%*s", 381 trace_event_name(call), 382 fmt + start_arg, 383 (i - start_arg) + 5, "^")) 384 return; 385 continue; 386 case ',': 387 if (in_quote || parens) 388 continue; 389 i++; 390 while (isspace(fmt[i])) 391 i++; 392 start_arg = i; 393 if (!(dereference_flags & (1ULL << arg))) 394 goto next_arg; 395 396 /* Find the REC-> in the argument */ 397 c = strchr(fmt + i, ','); 398 r = strstr(fmt + i, "REC->"); 399 if (r && (!c || r < c)) { 400 /* 401 * Addresses of events on the buffer, 402 * or an array on the buffer is 403 * OK to dereference. 404 * There's ways to fool this, but 405 * this is to catch common mistakes, 406 * not malicious code. 407 */ 408 a = strchr(fmt + i, '&'); 409 if ((a && (a < r)) || test_field(r, call)) 410 dereference_flags &= ~(1ULL << arg); 411 } else if ((r = strstr(fmt + i, "__get_dynamic_array(")) && 412 (!c || r < c)) { 413 dereference_flags &= ~(1ULL << arg); 414 } else if ((r = strstr(fmt + i, "__get_sockaddr(")) && 415 (!c || r < c)) { 416 dereference_flags &= ~(1ULL << arg); 417 } 418 419 next_arg: 420 i--; 421 arg++; 422 } 423 } 424 425 /* 426 * If you triggered the below warning, the trace event reported 427 * uses an unsafe dereference pointer %p*. As the data stored 428 * at the trace event time may no longer exist when the trace 429 * event is printed, dereferencing to the original source is 430 * unsafe. The source of the dereference must be copied into the 431 * event itself, and the dereference must access the copy instead. 432 */ 433 if (WARN_ON_ONCE(dereference_flags)) { 434 arg = 1; 435 while (!(dereference_flags & 1)) { 436 dereference_flags >>= 1; 437 arg++; 438 } 439 pr_warn("event %s has unsafe dereference of argument %d\n", 440 trace_event_name(call), arg); 441 pr_warn("print_fmt: %s\n", fmt); 442 } 443 } 444 445 int trace_event_raw_init(struct trace_event_call *call) 446 { 447 int id; 448 449 id = register_trace_event(&call->event); 450 if (!id) 451 return -ENODEV; 452 453 test_event_printk(call); 454 455 return 0; 456 } 457 EXPORT_SYMBOL_GPL(trace_event_raw_init); 458 459 bool trace_event_ignore_this_pid(struct trace_event_file *trace_file) 460 { 461 struct trace_array *tr = trace_file->tr; 462 struct trace_array_cpu *data; 463 struct trace_pid_list *no_pid_list; 464 struct trace_pid_list *pid_list; 465 466 pid_list = rcu_dereference_raw(tr->filtered_pids); 467 no_pid_list = rcu_dereference_raw(tr->filtered_no_pids); 468 469 if (!pid_list && !no_pid_list) 470 return false; 471 472 data = this_cpu_ptr(tr->array_buffer.data); 473 474 return data->ignore_pid; 475 } 476 EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid); 477 478 void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, 479 struct trace_event_file *trace_file, 480 unsigned long len) 481 { 482 struct trace_event_call *event_call = trace_file->event_call; 483 484 if ((trace_file->flags & EVENT_FILE_FL_PID_FILTER) && 485 trace_event_ignore_this_pid(trace_file)) 486 return NULL; 487 488 /* 489 * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables 490 * preemption (adding one to the preempt_count). Since we are 491 * interested in the preempt_count at the time the tracepoint was 492 * hit, we need to subtract one to offset the increment. 493 */ 494 fbuffer->trace_ctx = tracing_gen_ctx_dec(); 495 fbuffer->trace_file = trace_file; 496 497 fbuffer->event = 498 trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file, 499 event_call->event.type, len, 500 fbuffer->trace_ctx); 501 if (!fbuffer->event) 502 return NULL; 503 504 fbuffer->regs = NULL; 505 fbuffer->entry = ring_buffer_event_data(fbuffer->event); 506 return fbuffer->entry; 507 } 508 EXPORT_SYMBOL_GPL(trace_event_buffer_reserve); 509 510 int trace_event_reg(struct trace_event_call *call, 511 enum trace_reg type, void *data) 512 { 513 struct trace_event_file *file = data; 514 515 WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT)); 516 switch (type) { 517 case TRACE_REG_REGISTER: 518 return tracepoint_probe_register(call->tp, 519 call->class->probe, 520 file); 521 case TRACE_REG_UNREGISTER: 522 tracepoint_probe_unregister(call->tp, 523 call->class->probe, 524 file); 525 return 0; 526 527 #ifdef CONFIG_PERF_EVENTS 528 case TRACE_REG_PERF_REGISTER: 529 return tracepoint_probe_register(call->tp, 530 call->class->perf_probe, 531 call); 532 case TRACE_REG_PERF_UNREGISTER: 533 tracepoint_probe_unregister(call->tp, 534 call->class->perf_probe, 535 call); 536 return 0; 537 case TRACE_REG_PERF_OPEN: 538 case TRACE_REG_PERF_CLOSE: 539 case TRACE_REG_PERF_ADD: 540 case TRACE_REG_PERF_DEL: 541 return 0; 542 #endif 543 } 544 return 0; 545 } 546 EXPORT_SYMBOL_GPL(trace_event_reg); 547 548 void trace_event_enable_cmd_record(bool enable) 549 { 550 struct trace_event_file *file; 551 struct trace_array *tr; 552 553 lockdep_assert_held(&event_mutex); 554 555 do_for_each_event_file(tr, file) { 556 557 if (!(file->flags & EVENT_FILE_FL_ENABLED)) 558 continue; 559 560 if (enable) { 561 tracing_start_cmdline_record(); 562 set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); 563 } else { 564 tracing_stop_cmdline_record(); 565 clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); 566 } 567 } while_for_each_event_file(); 568 } 569 570 void trace_event_enable_tgid_record(bool enable) 571 { 572 struct trace_event_file *file; 573 struct trace_array *tr; 574 575 lockdep_assert_held(&event_mutex); 576 577 do_for_each_event_file(tr, file) { 578 if (!(file->flags & EVENT_FILE_FL_ENABLED)) 579 continue; 580 581 if (enable) { 582 tracing_start_tgid_record(); 583 set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); 584 } else { 585 tracing_stop_tgid_record(); 586 clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, 587 &file->flags); 588 } 589 } while_for_each_event_file(); 590 } 591 592 static int __ftrace_event_enable_disable(struct trace_event_file *file, 593 int enable, int soft_disable) 594 { 595 struct trace_event_call *call = file->event_call; 596 struct trace_array *tr = file->tr; 597 unsigned long file_flags = file->flags; 598 int ret = 0; 599 int disable; 600 601 switch (enable) { 602 case 0: 603 /* 604 * When soft_disable is set and enable is cleared, the sm_ref 605 * reference counter is decremented. If it reaches 0, we want 606 * to clear the SOFT_DISABLED flag but leave the event in the 607 * state that it was. That is, if the event was enabled and 608 * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED 609 * is set we do not want the event to be enabled before we 610 * clear the bit. 611 * 612 * When soft_disable is not set but the SOFT_MODE flag is, 613 * we do nothing. Do not disable the tracepoint, otherwise 614 * "soft enable"s (clearing the SOFT_DISABLED bit) wont work. 615 */ 616 if (soft_disable) { 617 if (atomic_dec_return(&file->sm_ref) > 0) 618 break; 619 disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED; 620 clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags); 621 } else 622 disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE); 623 624 if (disable && (file->flags & EVENT_FILE_FL_ENABLED)) { 625 clear_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags); 626 if (file->flags & EVENT_FILE_FL_RECORDED_CMD) { 627 tracing_stop_cmdline_record(); 628 clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); 629 } 630 631 if (file->flags & EVENT_FILE_FL_RECORDED_TGID) { 632 tracing_stop_tgid_record(); 633 clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); 634 } 635 636 call->class->reg(call, TRACE_REG_UNREGISTER, file); 637 } 638 /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */ 639 if (file->flags & EVENT_FILE_FL_SOFT_MODE) 640 set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); 641 else 642 clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); 643 break; 644 case 1: 645 /* 646 * When soft_disable is set and enable is set, we want to 647 * register the tracepoint for the event, but leave the event 648 * as is. That means, if the event was already enabled, we do 649 * nothing (but set SOFT_MODE). If the event is disabled, we 650 * set SOFT_DISABLED before enabling the event tracepoint, so 651 * it still seems to be disabled. 652 */ 653 if (!soft_disable) 654 clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); 655 else { 656 if (atomic_inc_return(&file->sm_ref) > 1) 657 break; 658 set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags); 659 } 660 661 if (!(file->flags & EVENT_FILE_FL_ENABLED)) { 662 bool cmd = false, tgid = false; 663 664 /* Keep the event disabled, when going to SOFT_MODE. */ 665 if (soft_disable) 666 set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); 667 668 if (tr->trace_flags & TRACE_ITER_RECORD_CMD) { 669 cmd = true; 670 tracing_start_cmdline_record(); 671 set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); 672 } 673 674 if (tr->trace_flags & TRACE_ITER_RECORD_TGID) { 675 tgid = true; 676 tracing_start_tgid_record(); 677 set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); 678 } 679 680 ret = call->class->reg(call, TRACE_REG_REGISTER, file); 681 if (ret) { 682 if (cmd) 683 tracing_stop_cmdline_record(); 684 if (tgid) 685 tracing_stop_tgid_record(); 686 pr_info("event trace: Could not enable event " 687 "%s\n", trace_event_name(call)); 688 break; 689 } 690 set_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags); 691 692 /* WAS_ENABLED gets set but never cleared. */ 693 set_bit(EVENT_FILE_FL_WAS_ENABLED_BIT, &file->flags); 694 } 695 break; 696 } 697 698 /* Enable or disable use of trace_buffered_event */ 699 if ((file_flags & EVENT_FILE_FL_SOFT_DISABLED) != 700 (file->flags & EVENT_FILE_FL_SOFT_DISABLED)) { 701 if (file->flags & EVENT_FILE_FL_SOFT_DISABLED) 702 trace_buffered_event_enable(); 703 else 704 trace_buffered_event_disable(); 705 } 706 707 return ret; 708 } 709 710 int trace_event_enable_disable(struct trace_event_file *file, 711 int enable, int soft_disable) 712 { 713 return __ftrace_event_enable_disable(file, enable, soft_disable); 714 } 715 716 static int ftrace_event_enable_disable(struct trace_event_file *file, 717 int enable) 718 { 719 return __ftrace_event_enable_disable(file, enable, 0); 720 } 721 722 static void ftrace_clear_events(struct trace_array *tr) 723 { 724 struct trace_event_file *file; 725 726 mutex_lock(&event_mutex); 727 list_for_each_entry(file, &tr->events, list) { 728 ftrace_event_enable_disable(file, 0); 729 } 730 mutex_unlock(&event_mutex); 731 } 732 733 static void 734 event_filter_pid_sched_process_exit(void *data, struct task_struct *task) 735 { 736 struct trace_pid_list *pid_list; 737 struct trace_array *tr = data; 738 739 pid_list = rcu_dereference_raw(tr->filtered_pids); 740 trace_filter_add_remove_task(pid_list, NULL, task); 741 742 pid_list = rcu_dereference_raw(tr->filtered_no_pids); 743 trace_filter_add_remove_task(pid_list, NULL, task); 744 } 745 746 static void 747 event_filter_pid_sched_process_fork(void *data, 748 struct task_struct *self, 749 struct task_struct *task) 750 { 751 struct trace_pid_list *pid_list; 752 struct trace_array *tr = data; 753 754 pid_list = rcu_dereference_sched(tr->filtered_pids); 755 trace_filter_add_remove_task(pid_list, self, task); 756 757 pid_list = rcu_dereference_sched(tr->filtered_no_pids); 758 trace_filter_add_remove_task(pid_list, self, task); 759 } 760 761 void trace_event_follow_fork(struct trace_array *tr, bool enable) 762 { 763 if (enable) { 764 register_trace_prio_sched_process_fork(event_filter_pid_sched_process_fork, 765 tr, INT_MIN); 766 register_trace_prio_sched_process_free(event_filter_pid_sched_process_exit, 767 tr, INT_MAX); 768 } else { 769 unregister_trace_sched_process_fork(event_filter_pid_sched_process_fork, 770 tr); 771 unregister_trace_sched_process_free(event_filter_pid_sched_process_exit, 772 tr); 773 } 774 } 775 776 static void 777 event_filter_pid_sched_switch_probe_pre(void *data, bool preempt, 778 struct task_struct *prev, 779 struct task_struct *next, 780 unsigned int prev_state) 781 { 782 struct trace_array *tr = data; 783 struct trace_pid_list *no_pid_list; 784 struct trace_pid_list *pid_list; 785 bool ret; 786 787 pid_list = rcu_dereference_sched(tr->filtered_pids); 788 no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); 789 790 /* 791 * Sched switch is funny, as we only want to ignore it 792 * in the notrace case if both prev and next should be ignored. 793 */ 794 ret = trace_ignore_this_task(NULL, no_pid_list, prev) && 795 trace_ignore_this_task(NULL, no_pid_list, next); 796 797 this_cpu_write(tr->array_buffer.data->ignore_pid, ret || 798 (trace_ignore_this_task(pid_list, NULL, prev) && 799 trace_ignore_this_task(pid_list, NULL, next))); 800 } 801 802 static void 803 event_filter_pid_sched_switch_probe_post(void *data, bool preempt, 804 struct task_struct *prev, 805 struct task_struct *next, 806 unsigned int prev_state) 807 { 808 struct trace_array *tr = data; 809 struct trace_pid_list *no_pid_list; 810 struct trace_pid_list *pid_list; 811 812 pid_list = rcu_dereference_sched(tr->filtered_pids); 813 no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); 814 815 this_cpu_write(tr->array_buffer.data->ignore_pid, 816 trace_ignore_this_task(pid_list, no_pid_list, next)); 817 } 818 819 static void 820 event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task) 821 { 822 struct trace_array *tr = data; 823 struct trace_pid_list *no_pid_list; 824 struct trace_pid_list *pid_list; 825 826 /* Nothing to do if we are already tracing */ 827 if (!this_cpu_read(tr->array_buffer.data->ignore_pid)) 828 return; 829 830 pid_list = rcu_dereference_sched(tr->filtered_pids); 831 no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); 832 833 this_cpu_write(tr->array_buffer.data->ignore_pid, 834 trace_ignore_this_task(pid_list, no_pid_list, task)); 835 } 836 837 static void 838 event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task) 839 { 840 struct trace_array *tr = data; 841 struct trace_pid_list *no_pid_list; 842 struct trace_pid_list *pid_list; 843 844 /* Nothing to do if we are not tracing */ 845 if (this_cpu_read(tr->array_buffer.data->ignore_pid)) 846 return; 847 848 pid_list = rcu_dereference_sched(tr->filtered_pids); 849 no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); 850 851 /* Set tracing if current is enabled */ 852 this_cpu_write(tr->array_buffer.data->ignore_pid, 853 trace_ignore_this_task(pid_list, no_pid_list, current)); 854 } 855 856 static void unregister_pid_events(struct trace_array *tr) 857 { 858 unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr); 859 unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr); 860 861 unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr); 862 unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr); 863 864 unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr); 865 unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr); 866 867 unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr); 868 unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr); 869 } 870 871 static void __ftrace_clear_event_pids(struct trace_array *tr, int type) 872 { 873 struct trace_pid_list *pid_list; 874 struct trace_pid_list *no_pid_list; 875 struct trace_event_file *file; 876 int cpu; 877 878 pid_list = rcu_dereference_protected(tr->filtered_pids, 879 lockdep_is_held(&event_mutex)); 880 no_pid_list = rcu_dereference_protected(tr->filtered_no_pids, 881 lockdep_is_held(&event_mutex)); 882 883 /* Make sure there's something to do */ 884 if (!pid_type_enabled(type, pid_list, no_pid_list)) 885 return; 886 887 if (!still_need_pid_events(type, pid_list, no_pid_list)) { 888 unregister_pid_events(tr); 889 890 list_for_each_entry(file, &tr->events, list) { 891 clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags); 892 } 893 894 for_each_possible_cpu(cpu) 895 per_cpu_ptr(tr->array_buffer.data, cpu)->ignore_pid = false; 896 } 897 898 if (type & TRACE_PIDS) 899 rcu_assign_pointer(tr->filtered_pids, NULL); 900 901 if (type & TRACE_NO_PIDS) 902 rcu_assign_pointer(tr->filtered_no_pids, NULL); 903 904 /* Wait till all users are no longer using pid filtering */ 905 tracepoint_synchronize_unregister(); 906 907 if ((type & TRACE_PIDS) && pid_list) 908 trace_pid_list_free(pid_list); 909 910 if ((type & TRACE_NO_PIDS) && no_pid_list) 911 trace_pid_list_free(no_pid_list); 912 } 913 914 static void ftrace_clear_event_pids(struct trace_array *tr, int type) 915 { 916 mutex_lock(&event_mutex); 917 __ftrace_clear_event_pids(tr, type); 918 mutex_unlock(&event_mutex); 919 } 920 921 static void __put_system(struct event_subsystem *system) 922 { 923 struct event_filter *filter = system->filter; 924 925 WARN_ON_ONCE(system_refcount(system) == 0); 926 if (system_refcount_dec(system)) 927 return; 928 929 list_del(&system->list); 930 931 if (filter) { 932 kfree(filter->filter_string); 933 kfree(filter); 934 } 935 kfree_const(system->name); 936 kfree(system); 937 } 938 939 static void __get_system(struct event_subsystem *system) 940 { 941 WARN_ON_ONCE(system_refcount(system) == 0); 942 system_refcount_inc(system); 943 } 944 945 static void __get_system_dir(struct trace_subsystem_dir *dir) 946 { 947 WARN_ON_ONCE(dir->ref_count == 0); 948 dir->ref_count++; 949 __get_system(dir->subsystem); 950 } 951 952 static void __put_system_dir(struct trace_subsystem_dir *dir) 953 { 954 WARN_ON_ONCE(dir->ref_count == 0); 955 /* If the subsystem is about to be freed, the dir must be too */ 956 WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1); 957 958 __put_system(dir->subsystem); 959 if (!--dir->ref_count) 960 kfree(dir); 961 } 962 963 static void put_system(struct trace_subsystem_dir *dir) 964 { 965 mutex_lock(&event_mutex); 966 __put_system_dir(dir); 967 mutex_unlock(&event_mutex); 968 } 969 970 static void remove_subsystem(struct trace_subsystem_dir *dir) 971 { 972 if (!dir) 973 return; 974 975 if (!--dir->nr_events) { 976 tracefs_remove(dir->entry); 977 list_del(&dir->list); 978 __put_system_dir(dir); 979 } 980 } 981 982 static void remove_event_file_dir(struct trace_event_file *file) 983 { 984 struct dentry *dir = file->dir; 985 struct dentry *child; 986 987 if (dir) { 988 spin_lock(&dir->d_lock); /* probably unneeded */ 989 list_for_each_entry(child, &dir->d_subdirs, d_child) { 990 if (d_really_is_positive(child)) /* probably unneeded */ 991 d_inode(child)->i_private = NULL; 992 } 993 spin_unlock(&dir->d_lock); 994 995 tracefs_remove(dir); 996 } 997 998 list_del(&file->list); 999 remove_subsystem(file->system); 1000 free_event_filter(file->filter); 1001 kmem_cache_free(file_cachep, file); 1002 } 1003 1004 /* 1005 * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. 1006 */ 1007 static int 1008 __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match, 1009 const char *sub, const char *event, int set) 1010 { 1011 struct trace_event_file *file; 1012 struct trace_event_call *call; 1013 const char *name; 1014 int ret = -EINVAL; 1015 int eret = 0; 1016 1017 list_for_each_entry(file, &tr->events, list) { 1018 1019 call = file->event_call; 1020 name = trace_event_name(call); 1021 1022 if (!name || !call->class || !call->class->reg) 1023 continue; 1024 1025 if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) 1026 continue; 1027 1028 if (match && 1029 strcmp(match, name) != 0 && 1030 strcmp(match, call->class->system) != 0) 1031 continue; 1032 1033 if (sub && strcmp(sub, call->class->system) != 0) 1034 continue; 1035 1036 if (event && strcmp(event, name) != 0) 1037 continue; 1038 1039 ret = ftrace_event_enable_disable(file, set); 1040 1041 /* 1042 * Save the first error and return that. Some events 1043 * may still have been enabled, but let the user 1044 * know that something went wrong. 1045 */ 1046 if (ret && !eret) 1047 eret = ret; 1048 1049 ret = eret; 1050 } 1051 1052 return ret; 1053 } 1054 1055 static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, 1056 const char *sub, const char *event, int set) 1057 { 1058 int ret; 1059 1060 mutex_lock(&event_mutex); 1061 ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set); 1062 mutex_unlock(&event_mutex); 1063 1064 return ret; 1065 } 1066 1067 int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) 1068 { 1069 char *event = NULL, *sub = NULL, *match; 1070 int ret; 1071 1072 if (!tr) 1073 return -ENOENT; 1074 /* 1075 * The buf format can be <subsystem>:<event-name> 1076 * *:<event-name> means any event by that name. 1077 * :<event-name> is the same. 1078 * 1079 * <subsystem>:* means all events in that subsystem 1080 * <subsystem>: means the same. 1081 * 1082 * <name> (no ':') means all events in a subsystem with 1083 * the name <name> or any event that matches <name> 1084 */ 1085 1086 match = strsep(&buf, ":"); 1087 if (buf) { 1088 sub = match; 1089 event = buf; 1090 match = NULL; 1091 1092 if (!strlen(sub) || strcmp(sub, "*") == 0) 1093 sub = NULL; 1094 if (!strlen(event) || strcmp(event, "*") == 0) 1095 event = NULL; 1096 } 1097 1098 ret = __ftrace_set_clr_event(tr, match, sub, event, set); 1099 1100 /* Put back the colon to allow this to be called again */ 1101 if (buf) 1102 *(buf - 1) = ':'; 1103 1104 return ret; 1105 } 1106 1107 /** 1108 * trace_set_clr_event - enable or disable an event 1109 * @system: system name to match (NULL for any system) 1110 * @event: event name to match (NULL for all events, within system) 1111 * @set: 1 to enable, 0 to disable 1112 * 1113 * This is a way for other parts of the kernel to enable or disable 1114 * event recording. 1115 * 1116 * Returns 0 on success, -EINVAL if the parameters do not match any 1117 * registered events. 1118 */ 1119 int trace_set_clr_event(const char *system, const char *event, int set) 1120 { 1121 struct trace_array *tr = top_trace_array(); 1122 1123 if (!tr) 1124 return -ENODEV; 1125 1126 return __ftrace_set_clr_event(tr, NULL, system, event, set); 1127 } 1128 EXPORT_SYMBOL_GPL(trace_set_clr_event); 1129 1130 /** 1131 * trace_array_set_clr_event - enable or disable an event for a trace array. 1132 * @tr: concerned trace array. 1133 * @system: system name to match (NULL for any system) 1134 * @event: event name to match (NULL for all events, within system) 1135 * @enable: true to enable, false to disable 1136 * 1137 * This is a way for other parts of the kernel to enable or disable 1138 * event recording. 1139 * 1140 * Returns 0 on success, -EINVAL if the parameters do not match any 1141 * registered events. 1142 */ 1143 int trace_array_set_clr_event(struct trace_array *tr, const char *system, 1144 const char *event, bool enable) 1145 { 1146 int set; 1147 1148 if (!tr) 1149 return -ENOENT; 1150 1151 set = (enable == true) ? 1 : 0; 1152 return __ftrace_set_clr_event(tr, NULL, system, event, set); 1153 } 1154 EXPORT_SYMBOL_GPL(trace_array_set_clr_event); 1155 1156 /* 128 should be much more than enough */ 1157 #define EVENT_BUF_SIZE 127 1158 1159 static ssize_t 1160 ftrace_event_write(struct file *file, const char __user *ubuf, 1161 size_t cnt, loff_t *ppos) 1162 { 1163 struct trace_parser parser; 1164 struct seq_file *m = file->private_data; 1165 struct trace_array *tr = m->private; 1166 ssize_t read, ret; 1167 1168 if (!cnt) 1169 return 0; 1170 1171 ret = tracing_update_buffers(); 1172 if (ret < 0) 1173 return ret; 1174 1175 if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1)) 1176 return -ENOMEM; 1177 1178 read = trace_get_user(&parser, ubuf, cnt, ppos); 1179 1180 if (read >= 0 && trace_parser_loaded((&parser))) { 1181 int set = 1; 1182 1183 if (*parser.buffer == '!') 1184 set = 0; 1185 1186 ret = ftrace_set_clr_event(tr, parser.buffer + !set, set); 1187 if (ret) 1188 goto out_put; 1189 } 1190 1191 ret = read; 1192 1193 out_put: 1194 trace_parser_put(&parser); 1195 1196 return ret; 1197 } 1198 1199 static void * 1200 t_next(struct seq_file *m, void *v, loff_t *pos) 1201 { 1202 struct trace_event_file *file = v; 1203 struct trace_event_call *call; 1204 struct trace_array *tr = m->private; 1205 1206 (*pos)++; 1207 1208 list_for_each_entry_continue(file, &tr->events, list) { 1209 call = file->event_call; 1210 /* 1211 * The ftrace subsystem is for showing formats only. 1212 * They can not be enabled or disabled via the event files. 1213 */ 1214 if (call->class && call->class->reg && 1215 !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) 1216 return file; 1217 } 1218 1219 return NULL; 1220 } 1221 1222 static void *t_start(struct seq_file *m, loff_t *pos) 1223 { 1224 struct trace_event_file *file; 1225 struct trace_array *tr = m->private; 1226 loff_t l; 1227 1228 mutex_lock(&event_mutex); 1229 1230 file = list_entry(&tr->events, struct trace_event_file, list); 1231 for (l = 0; l <= *pos; ) { 1232 file = t_next(m, file, &l); 1233 if (!file) 1234 break; 1235 } 1236 return file; 1237 } 1238 1239 static void * 1240 s_next(struct seq_file *m, void *v, loff_t *pos) 1241 { 1242 struct trace_event_file *file = v; 1243 struct trace_array *tr = m->private; 1244 1245 (*pos)++; 1246 1247 list_for_each_entry_continue(file, &tr->events, list) { 1248 if (file->flags & EVENT_FILE_FL_ENABLED) 1249 return file; 1250 } 1251 1252 return NULL; 1253 } 1254 1255 static void *s_start(struct seq_file *m, loff_t *pos) 1256 { 1257 struct trace_event_file *file; 1258 struct trace_array *tr = m->private; 1259 loff_t l; 1260 1261 mutex_lock(&event_mutex); 1262 1263 file = list_entry(&tr->events, struct trace_event_file, list); 1264 for (l = 0; l <= *pos; ) { 1265 file = s_next(m, file, &l); 1266 if (!file) 1267 break; 1268 } 1269 return file; 1270 } 1271 1272 static int t_show(struct seq_file *m, void *v) 1273 { 1274 struct trace_event_file *file = v; 1275 struct trace_event_call *call = file->event_call; 1276 1277 if (strcmp(call->class->system, TRACE_SYSTEM) != 0) 1278 seq_printf(m, "%s:", call->class->system); 1279 seq_printf(m, "%s\n", trace_event_name(call)); 1280 1281 return 0; 1282 } 1283 1284 static void t_stop(struct seq_file *m, void *p) 1285 { 1286 mutex_unlock(&event_mutex); 1287 } 1288 1289 static void * 1290 __next(struct seq_file *m, void *v, loff_t *pos, int type) 1291 { 1292 struct trace_array *tr = m->private; 1293 struct trace_pid_list *pid_list; 1294 1295 if (type == TRACE_PIDS) 1296 pid_list = rcu_dereference_sched(tr->filtered_pids); 1297 else 1298 pid_list = rcu_dereference_sched(tr->filtered_no_pids); 1299 1300 return trace_pid_next(pid_list, v, pos); 1301 } 1302 1303 static void * 1304 p_next(struct seq_file *m, void *v, loff_t *pos) 1305 { 1306 return __next(m, v, pos, TRACE_PIDS); 1307 } 1308 1309 static void * 1310 np_next(struct seq_file *m, void *v, loff_t *pos) 1311 { 1312 return __next(m, v, pos, TRACE_NO_PIDS); 1313 } 1314 1315 static void *__start(struct seq_file *m, loff_t *pos, int type) 1316 __acquires(RCU) 1317 { 1318 struct trace_pid_list *pid_list; 1319 struct trace_array *tr = m->private; 1320 1321 /* 1322 * Grab the mutex, to keep calls to p_next() having the same 1323 * tr->filtered_pids as p_start() has. 1324 * If we just passed the tr->filtered_pids around, then RCU would 1325 * have been enough, but doing that makes things more complex. 1326 */ 1327 mutex_lock(&event_mutex); 1328 rcu_read_lock_sched(); 1329 1330 if (type == TRACE_PIDS) 1331 pid_list = rcu_dereference_sched(tr->filtered_pids); 1332 else 1333 pid_list = rcu_dereference_sched(tr->filtered_no_pids); 1334 1335 if (!pid_list) 1336 return NULL; 1337 1338 return trace_pid_start(pid_list, pos); 1339 } 1340 1341 static void *p_start(struct seq_file *m, loff_t *pos) 1342 __acquires(RCU) 1343 { 1344 return __start(m, pos, TRACE_PIDS); 1345 } 1346 1347 static void *np_start(struct seq_file *m, loff_t *pos) 1348 __acquires(RCU) 1349 { 1350 return __start(m, pos, TRACE_NO_PIDS); 1351 } 1352 1353 static void p_stop(struct seq_file *m, void *p) 1354 __releases(RCU) 1355 { 1356 rcu_read_unlock_sched(); 1357 mutex_unlock(&event_mutex); 1358 } 1359 1360 static ssize_t 1361 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, 1362 loff_t *ppos) 1363 { 1364 struct trace_event_file *file; 1365 unsigned long flags; 1366 char buf[4] = "0"; 1367 1368 mutex_lock(&event_mutex); 1369 file = event_file_data(filp); 1370 if (likely(file)) 1371 flags = file->flags; 1372 mutex_unlock(&event_mutex); 1373 1374 if (!file) 1375 return -ENODEV; 1376 1377 if (flags & EVENT_FILE_FL_ENABLED && 1378 !(flags & EVENT_FILE_FL_SOFT_DISABLED)) 1379 strcpy(buf, "1"); 1380 1381 if (flags & EVENT_FILE_FL_SOFT_DISABLED || 1382 flags & EVENT_FILE_FL_SOFT_MODE) 1383 strcat(buf, "*"); 1384 1385 strcat(buf, "\n"); 1386 1387 return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf)); 1388 } 1389 1390 static ssize_t 1391 event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, 1392 loff_t *ppos) 1393 { 1394 struct trace_event_file *file; 1395 unsigned long val; 1396 int ret; 1397 1398 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 1399 if (ret) 1400 return ret; 1401 1402 ret = tracing_update_buffers(); 1403 if (ret < 0) 1404 return ret; 1405 1406 switch (val) { 1407 case 0: 1408 case 1: 1409 ret = -ENODEV; 1410 mutex_lock(&event_mutex); 1411 file = event_file_data(filp); 1412 if (likely(file)) 1413 ret = ftrace_event_enable_disable(file, val); 1414 mutex_unlock(&event_mutex); 1415 break; 1416 1417 default: 1418 return -EINVAL; 1419 } 1420 1421 *ppos += cnt; 1422 1423 return ret ? ret : cnt; 1424 } 1425 1426 static ssize_t 1427 system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, 1428 loff_t *ppos) 1429 { 1430 const char set_to_char[4] = { '?', '0', '1', 'X' }; 1431 struct trace_subsystem_dir *dir = filp->private_data; 1432 struct event_subsystem *system = dir->subsystem; 1433 struct trace_event_call *call; 1434 struct trace_event_file *file; 1435 struct trace_array *tr = dir->tr; 1436 char buf[2]; 1437 int set = 0; 1438 int ret; 1439 1440 mutex_lock(&event_mutex); 1441 list_for_each_entry(file, &tr->events, list) { 1442 call = file->event_call; 1443 if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) || 1444 !trace_event_name(call) || !call->class || !call->class->reg) 1445 continue; 1446 1447 if (system && strcmp(call->class->system, system->name) != 0) 1448 continue; 1449 1450 /* 1451 * We need to find out if all the events are set 1452 * or if all events or cleared, or if we have 1453 * a mixture. 1454 */ 1455 set |= (1 << !!(file->flags & EVENT_FILE_FL_ENABLED)); 1456 1457 /* 1458 * If we have a mixture, no need to look further. 1459 */ 1460 if (set == 3) 1461 break; 1462 } 1463 mutex_unlock(&event_mutex); 1464 1465 buf[0] = set_to_char[set]; 1466 buf[1] = '\n'; 1467 1468 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); 1469 1470 return ret; 1471 } 1472 1473 static ssize_t 1474 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, 1475 loff_t *ppos) 1476 { 1477 struct trace_subsystem_dir *dir = filp->private_data; 1478 struct event_subsystem *system = dir->subsystem; 1479 const char *name = NULL; 1480 unsigned long val; 1481 ssize_t ret; 1482 1483 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 1484 if (ret) 1485 return ret; 1486 1487 ret = tracing_update_buffers(); 1488 if (ret < 0) 1489 return ret; 1490 1491 if (val != 0 && val != 1) 1492 return -EINVAL; 1493 1494 /* 1495 * Opening of "enable" adds a ref count to system, 1496 * so the name is safe to use. 1497 */ 1498 if (system) 1499 name = system->name; 1500 1501 ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val); 1502 if (ret) 1503 goto out; 1504 1505 ret = cnt; 1506 1507 out: 1508 *ppos += cnt; 1509 1510 return ret; 1511 } 1512 1513 enum { 1514 FORMAT_HEADER = 1, 1515 FORMAT_FIELD_SEPERATOR = 2, 1516 FORMAT_PRINTFMT = 3, 1517 }; 1518 1519 static void *f_next(struct seq_file *m, void *v, loff_t *pos) 1520 { 1521 struct trace_event_call *call = event_file_data(m->private); 1522 struct list_head *common_head = &ftrace_common_fields; 1523 struct list_head *head = trace_get_fields(call); 1524 struct list_head *node = v; 1525 1526 (*pos)++; 1527 1528 switch ((unsigned long)v) { 1529 case FORMAT_HEADER: 1530 node = common_head; 1531 break; 1532 1533 case FORMAT_FIELD_SEPERATOR: 1534 node = head; 1535 break; 1536 1537 case FORMAT_PRINTFMT: 1538 /* all done */ 1539 return NULL; 1540 } 1541 1542 node = node->prev; 1543 if (node == common_head) 1544 return (void *)FORMAT_FIELD_SEPERATOR; 1545 else if (node == head) 1546 return (void *)FORMAT_PRINTFMT; 1547 else 1548 return node; 1549 } 1550 1551 static int f_show(struct seq_file *m, void *v) 1552 { 1553 struct trace_event_call *call = event_file_data(m->private); 1554 struct ftrace_event_field *field; 1555 const char *array_descriptor; 1556 1557 switch ((unsigned long)v) { 1558 case FORMAT_HEADER: 1559 seq_printf(m, "name: %s\n", trace_event_name(call)); 1560 seq_printf(m, "ID: %d\n", call->event.type); 1561 seq_puts(m, "format:\n"); 1562 return 0; 1563 1564 case FORMAT_FIELD_SEPERATOR: 1565 seq_putc(m, '\n'); 1566 return 0; 1567 1568 case FORMAT_PRINTFMT: 1569 seq_printf(m, "\nprint fmt: %s\n", 1570 call->print_fmt); 1571 return 0; 1572 } 1573 1574 field = list_entry(v, struct ftrace_event_field, link); 1575 /* 1576 * Smartly shows the array type(except dynamic array). 1577 * Normal: 1578 * field:TYPE VAR 1579 * If TYPE := TYPE[LEN], it is shown: 1580 * field:TYPE VAR[LEN] 1581 */ 1582 array_descriptor = strchr(field->type, '['); 1583 1584 if (str_has_prefix(field->type, "__data_loc")) 1585 array_descriptor = NULL; 1586 1587 if (!array_descriptor) 1588 seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n", 1589 field->type, field->name, field->offset, 1590 field->size, !!field->is_signed); 1591 else 1592 seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n", 1593 (int)(array_descriptor - field->type), 1594 field->type, field->name, 1595 array_descriptor, field->offset, 1596 field->size, !!field->is_signed); 1597 1598 return 0; 1599 } 1600 1601 static void *f_start(struct seq_file *m, loff_t *pos) 1602 { 1603 void *p = (void *)FORMAT_HEADER; 1604 loff_t l = 0; 1605 1606 /* ->stop() is called even if ->start() fails */ 1607 mutex_lock(&event_mutex); 1608 if (!event_file_data(m->private)) 1609 return ERR_PTR(-ENODEV); 1610 1611 while (l < *pos && p) 1612 p = f_next(m, p, &l); 1613 1614 return p; 1615 } 1616 1617 static void f_stop(struct seq_file *m, void *p) 1618 { 1619 mutex_unlock(&event_mutex); 1620 } 1621 1622 static const struct seq_operations trace_format_seq_ops = { 1623 .start = f_start, 1624 .next = f_next, 1625 .stop = f_stop, 1626 .show = f_show, 1627 }; 1628 1629 static int trace_format_open(struct inode *inode, struct file *file) 1630 { 1631 struct seq_file *m; 1632 int ret; 1633 1634 /* Do we want to hide event format files on tracefs lockdown? */ 1635 1636 ret = seq_open(file, &trace_format_seq_ops); 1637 if (ret < 0) 1638 return ret; 1639 1640 m = file->private_data; 1641 m->private = file; 1642 1643 return 0; 1644 } 1645 1646 static ssize_t 1647 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 1648 { 1649 int id = (long)event_file_data(filp); 1650 char buf[32]; 1651 int len; 1652 1653 if (unlikely(!id)) 1654 return -ENODEV; 1655 1656 len = sprintf(buf, "%d\n", id); 1657 1658 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); 1659 } 1660 1661 static ssize_t 1662 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, 1663 loff_t *ppos) 1664 { 1665 struct trace_event_file *file; 1666 struct trace_seq *s; 1667 int r = -ENODEV; 1668 1669 if (*ppos) 1670 return 0; 1671 1672 s = kmalloc(sizeof(*s), GFP_KERNEL); 1673 1674 if (!s) 1675 return -ENOMEM; 1676 1677 trace_seq_init(s); 1678 1679 mutex_lock(&event_mutex); 1680 file = event_file_data(filp); 1681 if (file) 1682 print_event_filter(file, s); 1683 mutex_unlock(&event_mutex); 1684 1685 if (file) 1686 r = simple_read_from_buffer(ubuf, cnt, ppos, 1687 s->buffer, trace_seq_used(s)); 1688 1689 kfree(s); 1690 1691 return r; 1692 } 1693 1694 static ssize_t 1695 event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, 1696 loff_t *ppos) 1697 { 1698 struct trace_event_file *file; 1699 char *buf; 1700 int err = -ENODEV; 1701 1702 if (cnt >= PAGE_SIZE) 1703 return -EINVAL; 1704 1705 buf = memdup_user_nul(ubuf, cnt); 1706 if (IS_ERR(buf)) 1707 return PTR_ERR(buf); 1708 1709 mutex_lock(&event_mutex); 1710 file = event_file_data(filp); 1711 if (file) 1712 err = apply_event_filter(file, buf); 1713 mutex_unlock(&event_mutex); 1714 1715 kfree(buf); 1716 if (err < 0) 1717 return err; 1718 1719 *ppos += cnt; 1720 1721 return cnt; 1722 } 1723 1724 static LIST_HEAD(event_subsystems); 1725 1726 static int subsystem_open(struct inode *inode, struct file *filp) 1727 { 1728 struct trace_subsystem_dir *dir = NULL, *iter_dir; 1729 struct trace_array *tr = NULL, *iter_tr; 1730 struct event_subsystem *system = NULL; 1731 int ret; 1732 1733 if (tracing_is_disabled()) 1734 return -ENODEV; 1735 1736 /* Make sure the system still exists */ 1737 mutex_lock(&event_mutex); 1738 mutex_lock(&trace_types_lock); 1739 list_for_each_entry(iter_tr, &ftrace_trace_arrays, list) { 1740 list_for_each_entry(iter_dir, &iter_tr->systems, list) { 1741 if (iter_dir == inode->i_private) { 1742 /* Don't open systems with no events */ 1743 tr = iter_tr; 1744 dir = iter_dir; 1745 if (dir->nr_events) { 1746 __get_system_dir(dir); 1747 system = dir->subsystem; 1748 } 1749 goto exit_loop; 1750 } 1751 } 1752 } 1753 exit_loop: 1754 mutex_unlock(&trace_types_lock); 1755 mutex_unlock(&event_mutex); 1756 1757 if (!system) 1758 return -ENODEV; 1759 1760 /* Still need to increment the ref count of the system */ 1761 if (trace_array_get(tr) < 0) { 1762 put_system(dir); 1763 return -ENODEV; 1764 } 1765 1766 ret = tracing_open_generic(inode, filp); 1767 if (ret < 0) { 1768 trace_array_put(tr); 1769 put_system(dir); 1770 } 1771 1772 return ret; 1773 } 1774 1775 static int system_tr_open(struct inode *inode, struct file *filp) 1776 { 1777 struct trace_subsystem_dir *dir; 1778 struct trace_array *tr = inode->i_private; 1779 int ret; 1780 1781 /* Make a temporary dir that has no system but points to tr */ 1782 dir = kzalloc(sizeof(*dir), GFP_KERNEL); 1783 if (!dir) 1784 return -ENOMEM; 1785 1786 ret = tracing_open_generic_tr(inode, filp); 1787 if (ret < 0) { 1788 kfree(dir); 1789 return ret; 1790 } 1791 dir->tr = tr; 1792 filp->private_data = dir; 1793 1794 return 0; 1795 } 1796 1797 static int subsystem_release(struct inode *inode, struct file *file) 1798 { 1799 struct trace_subsystem_dir *dir = file->private_data; 1800 1801 trace_array_put(dir->tr); 1802 1803 /* 1804 * If dir->subsystem is NULL, then this is a temporary 1805 * descriptor that was made for a trace_array to enable 1806 * all subsystems. 1807 */ 1808 if (dir->subsystem) 1809 put_system(dir); 1810 else 1811 kfree(dir); 1812 1813 return 0; 1814 } 1815 1816 static ssize_t 1817 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, 1818 loff_t *ppos) 1819 { 1820 struct trace_subsystem_dir *dir = filp->private_data; 1821 struct event_subsystem *system = dir->subsystem; 1822 struct trace_seq *s; 1823 int r; 1824 1825 if (*ppos) 1826 return 0; 1827 1828 s = kmalloc(sizeof(*s), GFP_KERNEL); 1829 if (!s) 1830 return -ENOMEM; 1831 1832 trace_seq_init(s); 1833 1834 print_subsystem_event_filter(system, s); 1835 r = simple_read_from_buffer(ubuf, cnt, ppos, 1836 s->buffer, trace_seq_used(s)); 1837 1838 kfree(s); 1839 1840 return r; 1841 } 1842 1843 static ssize_t 1844 subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, 1845 loff_t *ppos) 1846 { 1847 struct trace_subsystem_dir *dir = filp->private_data; 1848 char *buf; 1849 int err; 1850 1851 if (cnt >= PAGE_SIZE) 1852 return -EINVAL; 1853 1854 buf = memdup_user_nul(ubuf, cnt); 1855 if (IS_ERR(buf)) 1856 return PTR_ERR(buf); 1857 1858 err = apply_subsystem_event_filter(dir, buf); 1859 kfree(buf); 1860 if (err < 0) 1861 return err; 1862 1863 *ppos += cnt; 1864 1865 return cnt; 1866 } 1867 1868 static ssize_t 1869 show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 1870 { 1871 int (*func)(struct trace_seq *s) = filp->private_data; 1872 struct trace_seq *s; 1873 int r; 1874 1875 if (*ppos) 1876 return 0; 1877 1878 s = kmalloc(sizeof(*s), GFP_KERNEL); 1879 if (!s) 1880 return -ENOMEM; 1881 1882 trace_seq_init(s); 1883 1884 func(s); 1885 r = simple_read_from_buffer(ubuf, cnt, ppos, 1886 s->buffer, trace_seq_used(s)); 1887 1888 kfree(s); 1889 1890 return r; 1891 } 1892 1893 static void ignore_task_cpu(void *data) 1894 { 1895 struct trace_array *tr = data; 1896 struct trace_pid_list *pid_list; 1897 struct trace_pid_list *no_pid_list; 1898 1899 /* 1900 * This function is called by on_each_cpu() while the 1901 * event_mutex is held. 1902 */ 1903 pid_list = rcu_dereference_protected(tr->filtered_pids, 1904 mutex_is_locked(&event_mutex)); 1905 no_pid_list = rcu_dereference_protected(tr->filtered_no_pids, 1906 mutex_is_locked(&event_mutex)); 1907 1908 this_cpu_write(tr->array_buffer.data->ignore_pid, 1909 trace_ignore_this_task(pid_list, no_pid_list, current)); 1910 } 1911 1912 static void register_pid_events(struct trace_array *tr) 1913 { 1914 /* 1915 * Register a probe that is called before all other probes 1916 * to set ignore_pid if next or prev do not match. 1917 * Register a probe this is called after all other probes 1918 * to only keep ignore_pid set if next pid matches. 1919 */ 1920 register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre, 1921 tr, INT_MAX); 1922 register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post, 1923 tr, 0); 1924 1925 register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, 1926 tr, INT_MAX); 1927 register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, 1928 tr, 0); 1929 1930 register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, 1931 tr, INT_MAX); 1932 register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, 1933 tr, 0); 1934 1935 register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre, 1936 tr, INT_MAX); 1937 register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post, 1938 tr, 0); 1939 } 1940 1941 static ssize_t 1942 event_pid_write(struct file *filp, const char __user *ubuf, 1943 size_t cnt, loff_t *ppos, int type) 1944 { 1945 struct seq_file *m = filp->private_data; 1946 struct trace_array *tr = m->private; 1947 struct trace_pid_list *filtered_pids = NULL; 1948 struct trace_pid_list *other_pids = NULL; 1949 struct trace_pid_list *pid_list; 1950 struct trace_event_file *file; 1951 ssize_t ret; 1952 1953 if (!cnt) 1954 return 0; 1955 1956 ret = tracing_update_buffers(); 1957 if (ret < 0) 1958 return ret; 1959 1960 mutex_lock(&event_mutex); 1961 1962 if (type == TRACE_PIDS) { 1963 filtered_pids = rcu_dereference_protected(tr->filtered_pids, 1964 lockdep_is_held(&event_mutex)); 1965 other_pids = rcu_dereference_protected(tr->filtered_no_pids, 1966 lockdep_is_held(&event_mutex)); 1967 } else { 1968 filtered_pids = rcu_dereference_protected(tr->filtered_no_pids, 1969 lockdep_is_held(&event_mutex)); 1970 other_pids = rcu_dereference_protected(tr->filtered_pids, 1971 lockdep_is_held(&event_mutex)); 1972 } 1973 1974 ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt); 1975 if (ret < 0) 1976 goto out; 1977 1978 if (type == TRACE_PIDS) 1979 rcu_assign_pointer(tr->filtered_pids, pid_list); 1980 else 1981 rcu_assign_pointer(tr->filtered_no_pids, pid_list); 1982 1983 list_for_each_entry(file, &tr->events, list) { 1984 set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags); 1985 } 1986 1987 if (filtered_pids) { 1988 tracepoint_synchronize_unregister(); 1989 trace_pid_list_free(filtered_pids); 1990 } else if (pid_list && !other_pids) { 1991 register_pid_events(tr); 1992 } 1993 1994 /* 1995 * Ignoring of pids is done at task switch. But we have to 1996 * check for those tasks that are currently running. 1997 * Always do this in case a pid was appended or removed. 1998 */ 1999 on_each_cpu(ignore_task_cpu, tr, 1); 2000 2001 out: 2002 mutex_unlock(&event_mutex); 2003 2004 if (ret > 0) 2005 *ppos += ret; 2006 2007 return ret; 2008 } 2009 2010 static ssize_t 2011 ftrace_event_pid_write(struct file *filp, const char __user *ubuf, 2012 size_t cnt, loff_t *ppos) 2013 { 2014 return event_pid_write(filp, ubuf, cnt, ppos, TRACE_PIDS); 2015 } 2016 2017 static ssize_t 2018 ftrace_event_npid_write(struct file *filp, const char __user *ubuf, 2019 size_t cnt, loff_t *ppos) 2020 { 2021 return event_pid_write(filp, ubuf, cnt, ppos, TRACE_NO_PIDS); 2022 } 2023 2024 static int ftrace_event_avail_open(struct inode *inode, struct file *file); 2025 static int ftrace_event_set_open(struct inode *inode, struct file *file); 2026 static int ftrace_event_set_pid_open(struct inode *inode, struct file *file); 2027 static int ftrace_event_set_npid_open(struct inode *inode, struct file *file); 2028 static int ftrace_event_release(struct inode *inode, struct file *file); 2029 2030 static const struct seq_operations show_event_seq_ops = { 2031 .start = t_start, 2032 .next = t_next, 2033 .show = t_show, 2034 .stop = t_stop, 2035 }; 2036 2037 static const struct seq_operations show_set_event_seq_ops = { 2038 .start = s_start, 2039 .next = s_next, 2040 .show = t_show, 2041 .stop = t_stop, 2042 }; 2043 2044 static const struct seq_operations show_set_pid_seq_ops = { 2045 .start = p_start, 2046 .next = p_next, 2047 .show = trace_pid_show, 2048 .stop = p_stop, 2049 }; 2050 2051 static const struct seq_operations show_set_no_pid_seq_ops = { 2052 .start = np_start, 2053 .next = np_next, 2054 .show = trace_pid_show, 2055 .stop = p_stop, 2056 }; 2057 2058 static const struct file_operations ftrace_avail_fops = { 2059 .open = ftrace_event_avail_open, 2060 .read = seq_read, 2061 .llseek = seq_lseek, 2062 .release = seq_release, 2063 }; 2064 2065 static const struct file_operations ftrace_set_event_fops = { 2066 .open = ftrace_event_set_open, 2067 .read = seq_read, 2068 .write = ftrace_event_write, 2069 .llseek = seq_lseek, 2070 .release = ftrace_event_release, 2071 }; 2072 2073 static const struct file_operations ftrace_set_event_pid_fops = { 2074 .open = ftrace_event_set_pid_open, 2075 .read = seq_read, 2076 .write = ftrace_event_pid_write, 2077 .llseek = seq_lseek, 2078 .release = ftrace_event_release, 2079 }; 2080 2081 static const struct file_operations ftrace_set_event_notrace_pid_fops = { 2082 .open = ftrace_event_set_npid_open, 2083 .read = seq_read, 2084 .write = ftrace_event_npid_write, 2085 .llseek = seq_lseek, 2086 .release = ftrace_event_release, 2087 }; 2088 2089 static const struct file_operations ftrace_enable_fops = { 2090 .open = tracing_open_generic, 2091 .read = event_enable_read, 2092 .write = event_enable_write, 2093 .llseek = default_llseek, 2094 }; 2095 2096 static const struct file_operations ftrace_event_format_fops = { 2097 .open = trace_format_open, 2098 .read = seq_read, 2099 .llseek = seq_lseek, 2100 .release = seq_release, 2101 }; 2102 2103 static const struct file_operations ftrace_event_id_fops = { 2104 .read = event_id_read, 2105 .llseek = default_llseek, 2106 }; 2107 2108 static const struct file_operations ftrace_event_filter_fops = { 2109 .open = tracing_open_generic, 2110 .read = event_filter_read, 2111 .write = event_filter_write, 2112 .llseek = default_llseek, 2113 }; 2114 2115 static const struct file_operations ftrace_subsystem_filter_fops = { 2116 .open = subsystem_open, 2117 .read = subsystem_filter_read, 2118 .write = subsystem_filter_write, 2119 .llseek = default_llseek, 2120 .release = subsystem_release, 2121 }; 2122 2123 static const struct file_operations ftrace_system_enable_fops = { 2124 .open = subsystem_open, 2125 .read = system_enable_read, 2126 .write = system_enable_write, 2127 .llseek = default_llseek, 2128 .release = subsystem_release, 2129 }; 2130 2131 static const struct file_operations ftrace_tr_enable_fops = { 2132 .open = system_tr_open, 2133 .read = system_enable_read, 2134 .write = system_enable_write, 2135 .llseek = default_llseek, 2136 .release = subsystem_release, 2137 }; 2138 2139 static const struct file_operations ftrace_show_header_fops = { 2140 .open = tracing_open_generic, 2141 .read = show_header, 2142 .llseek = default_llseek, 2143 }; 2144 2145 static int 2146 ftrace_event_open(struct inode *inode, struct file *file, 2147 const struct seq_operations *seq_ops) 2148 { 2149 struct seq_file *m; 2150 int ret; 2151 2152 ret = security_locked_down(LOCKDOWN_TRACEFS); 2153 if (ret) 2154 return ret; 2155 2156 ret = seq_open(file, seq_ops); 2157 if (ret < 0) 2158 return ret; 2159 m = file->private_data; 2160 /* copy tr over to seq ops */ 2161 m->private = inode->i_private; 2162 2163 return ret; 2164 } 2165 2166 static int ftrace_event_release(struct inode *inode, struct file *file) 2167 { 2168 struct trace_array *tr = inode->i_private; 2169 2170 trace_array_put(tr); 2171 2172 return seq_release(inode, file); 2173 } 2174 2175 static int 2176 ftrace_event_avail_open(struct inode *inode, struct file *file) 2177 { 2178 const struct seq_operations *seq_ops = &show_event_seq_ops; 2179 2180 /* Checks for tracefs lockdown */ 2181 return ftrace_event_open(inode, file, seq_ops); 2182 } 2183 2184 static int 2185 ftrace_event_set_open(struct inode *inode, struct file *file) 2186 { 2187 const struct seq_operations *seq_ops = &show_set_event_seq_ops; 2188 struct trace_array *tr = inode->i_private; 2189 int ret; 2190 2191 ret = tracing_check_open_get_tr(tr); 2192 if (ret) 2193 return ret; 2194 2195 if ((file->f_mode & FMODE_WRITE) && 2196 (file->f_flags & O_TRUNC)) 2197 ftrace_clear_events(tr); 2198 2199 ret = ftrace_event_open(inode, file, seq_ops); 2200 if (ret < 0) 2201 trace_array_put(tr); 2202 return ret; 2203 } 2204 2205 static int 2206 ftrace_event_set_pid_open(struct inode *inode, struct file *file) 2207 { 2208 const struct seq_operations *seq_ops = &show_set_pid_seq_ops; 2209 struct trace_array *tr = inode->i_private; 2210 int ret; 2211 2212 ret = tracing_check_open_get_tr(tr); 2213 if (ret) 2214 return ret; 2215 2216 if ((file->f_mode & FMODE_WRITE) && 2217 (file->f_flags & O_TRUNC)) 2218 ftrace_clear_event_pids(tr, TRACE_PIDS); 2219 2220 ret = ftrace_event_open(inode, file, seq_ops); 2221 if (ret < 0) 2222 trace_array_put(tr); 2223 return ret; 2224 } 2225 2226 static int 2227 ftrace_event_set_npid_open(struct inode *inode, struct file *file) 2228 { 2229 const struct seq_operations *seq_ops = &show_set_no_pid_seq_ops; 2230 struct trace_array *tr = inode->i_private; 2231 int ret; 2232 2233 ret = tracing_check_open_get_tr(tr); 2234 if (ret) 2235 return ret; 2236 2237 if ((file->f_mode & FMODE_WRITE) && 2238 (file->f_flags & O_TRUNC)) 2239 ftrace_clear_event_pids(tr, TRACE_NO_PIDS); 2240 2241 ret = ftrace_event_open(inode, file, seq_ops); 2242 if (ret < 0) 2243 trace_array_put(tr); 2244 return ret; 2245 } 2246 2247 static struct event_subsystem * 2248 create_new_subsystem(const char *name) 2249 { 2250 struct event_subsystem *system; 2251 2252 /* need to create new entry */ 2253 system = kmalloc(sizeof(*system), GFP_KERNEL); 2254 if (!system) 2255 return NULL; 2256 2257 system->ref_count = 1; 2258 2259 /* Only allocate if dynamic (kprobes and modules) */ 2260 system->name = kstrdup_const(name, GFP_KERNEL); 2261 if (!system->name) 2262 goto out_free; 2263 2264 system->filter = NULL; 2265 2266 system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL); 2267 if (!system->filter) 2268 goto out_free; 2269 2270 list_add(&system->list, &event_subsystems); 2271 2272 return system; 2273 2274 out_free: 2275 kfree_const(system->name); 2276 kfree(system); 2277 return NULL; 2278 } 2279 2280 static struct dentry * 2281 event_subsystem_dir(struct trace_array *tr, const char *name, 2282 struct trace_event_file *file, struct dentry *parent) 2283 { 2284 struct event_subsystem *system, *iter; 2285 struct trace_subsystem_dir *dir; 2286 struct dentry *entry; 2287 2288 /* First see if we did not already create this dir */ 2289 list_for_each_entry(dir, &tr->systems, list) { 2290 system = dir->subsystem; 2291 if (strcmp(system->name, name) == 0) { 2292 dir->nr_events++; 2293 file->system = dir; 2294 return dir->entry; 2295 } 2296 } 2297 2298 /* Now see if the system itself exists. */ 2299 system = NULL; 2300 list_for_each_entry(iter, &event_subsystems, list) { 2301 if (strcmp(iter->name, name) == 0) { 2302 system = iter; 2303 break; 2304 } 2305 } 2306 2307 dir = kmalloc(sizeof(*dir), GFP_KERNEL); 2308 if (!dir) 2309 goto out_fail; 2310 2311 if (!system) { 2312 system = create_new_subsystem(name); 2313 if (!system) 2314 goto out_free; 2315 } else 2316 __get_system(system); 2317 2318 dir->entry = tracefs_create_dir(name, parent); 2319 if (!dir->entry) { 2320 pr_warn("Failed to create system directory %s\n", name); 2321 __put_system(system); 2322 goto out_free; 2323 } 2324 2325 dir->tr = tr; 2326 dir->ref_count = 1; 2327 dir->nr_events = 1; 2328 dir->subsystem = system; 2329 file->system = dir; 2330 2331 /* the ftrace system is special, do not create enable or filter files */ 2332 if (strcmp(name, "ftrace") != 0) { 2333 2334 entry = tracefs_create_file("filter", TRACE_MODE_WRITE, 2335 dir->entry, dir, 2336 &ftrace_subsystem_filter_fops); 2337 if (!entry) { 2338 kfree(system->filter); 2339 system->filter = NULL; 2340 pr_warn("Could not create tracefs '%s/filter' entry\n", name); 2341 } 2342 2343 trace_create_file("enable", TRACE_MODE_WRITE, dir->entry, dir, 2344 &ftrace_system_enable_fops); 2345 } 2346 2347 list_add(&dir->list, &tr->systems); 2348 2349 return dir->entry; 2350 2351 out_free: 2352 kfree(dir); 2353 out_fail: 2354 /* Only print this message if failed on memory allocation */ 2355 if (!dir || !system) 2356 pr_warn("No memory to create event subsystem %s\n", name); 2357 return NULL; 2358 } 2359 2360 static int 2361 event_define_fields(struct trace_event_call *call) 2362 { 2363 struct list_head *head; 2364 int ret = 0; 2365 2366 /* 2367 * Other events may have the same class. Only update 2368 * the fields if they are not already defined. 2369 */ 2370 head = trace_get_fields(call); 2371 if (list_empty(head)) { 2372 struct trace_event_fields *field = call->class->fields_array; 2373 unsigned int offset = sizeof(struct trace_entry); 2374 2375 for (; field->type; field++) { 2376 if (field->type == TRACE_FUNCTION_TYPE) { 2377 field->define_fields(call); 2378 break; 2379 } 2380 2381 offset = ALIGN(offset, field->align); 2382 ret = trace_define_field(call, field->type, field->name, 2383 offset, field->size, 2384 field->is_signed, field->filter_type); 2385 if (WARN_ON_ONCE(ret)) { 2386 pr_err("error code is %d\n", ret); 2387 break; 2388 } 2389 2390 offset += field->size; 2391 } 2392 } 2393 2394 return ret; 2395 } 2396 2397 static int 2398 event_create_dir(struct dentry *parent, struct trace_event_file *file) 2399 { 2400 struct trace_event_call *call = file->event_call; 2401 struct trace_array *tr = file->tr; 2402 struct dentry *d_events; 2403 const char *name; 2404 int ret; 2405 2406 /* 2407 * If the trace point header did not define TRACE_SYSTEM 2408 * then the system would be called "TRACE_SYSTEM". 2409 */ 2410 if (strcmp(call->class->system, TRACE_SYSTEM) != 0) { 2411 d_events = event_subsystem_dir(tr, call->class->system, file, parent); 2412 if (!d_events) 2413 return -ENOMEM; 2414 } else 2415 d_events = parent; 2416 2417 name = trace_event_name(call); 2418 file->dir = tracefs_create_dir(name, d_events); 2419 if (!file->dir) { 2420 pr_warn("Could not create tracefs '%s' directory\n", name); 2421 return -1; 2422 } 2423 2424 if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) 2425 trace_create_file("enable", TRACE_MODE_WRITE, file->dir, file, 2426 &ftrace_enable_fops); 2427 2428 #ifdef CONFIG_PERF_EVENTS 2429 if (call->event.type && call->class->reg) 2430 trace_create_file("id", TRACE_MODE_READ, file->dir, 2431 (void *)(long)call->event.type, 2432 &ftrace_event_id_fops); 2433 #endif 2434 2435 ret = event_define_fields(call); 2436 if (ret < 0) { 2437 pr_warn("Could not initialize trace point events/%s\n", name); 2438 return ret; 2439 } 2440 2441 /* 2442 * Only event directories that can be enabled should have 2443 * triggers or filters. 2444 */ 2445 if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) { 2446 trace_create_file("filter", TRACE_MODE_WRITE, file->dir, 2447 file, &ftrace_event_filter_fops); 2448 2449 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir, 2450 file, &event_trigger_fops); 2451 } 2452 2453 #ifdef CONFIG_HIST_TRIGGERS 2454 trace_create_file("hist", TRACE_MODE_READ, file->dir, file, 2455 &event_hist_fops); 2456 #endif 2457 #ifdef CONFIG_HIST_TRIGGERS_DEBUG 2458 trace_create_file("hist_debug", TRACE_MODE_READ, file->dir, file, 2459 &event_hist_debug_fops); 2460 #endif 2461 trace_create_file("format", TRACE_MODE_READ, file->dir, call, 2462 &ftrace_event_format_fops); 2463 2464 #ifdef CONFIG_TRACE_EVENT_INJECT 2465 if (call->event.type && call->class->reg) 2466 trace_create_file("inject", 0200, file->dir, file, 2467 &event_inject_fops); 2468 #endif 2469 2470 return 0; 2471 } 2472 2473 static void remove_event_from_tracers(struct trace_event_call *call) 2474 { 2475 struct trace_event_file *file; 2476 struct trace_array *tr; 2477 2478 do_for_each_event_file_safe(tr, file) { 2479 if (file->event_call != call) 2480 continue; 2481 2482 remove_event_file_dir(file); 2483 /* 2484 * The do_for_each_event_file_safe() is 2485 * a double loop. After finding the call for this 2486 * trace_array, we use break to jump to the next 2487 * trace_array. 2488 */ 2489 break; 2490 } while_for_each_event_file(); 2491 } 2492 2493 static void event_remove(struct trace_event_call *call) 2494 { 2495 struct trace_array *tr; 2496 struct trace_event_file *file; 2497 2498 do_for_each_event_file(tr, file) { 2499 if (file->event_call != call) 2500 continue; 2501 2502 if (file->flags & EVENT_FILE_FL_WAS_ENABLED) 2503 tr->clear_trace = true; 2504 2505 ftrace_event_enable_disable(file, 0); 2506 /* 2507 * The do_for_each_event_file() is 2508 * a double loop. After finding the call for this 2509 * trace_array, we use break to jump to the next 2510 * trace_array. 2511 */ 2512 break; 2513 } while_for_each_event_file(); 2514 2515 if (call->event.funcs) 2516 __unregister_trace_event(&call->event); 2517 remove_event_from_tracers(call); 2518 list_del(&call->list); 2519 } 2520 2521 static int event_init(struct trace_event_call *call) 2522 { 2523 int ret = 0; 2524 const char *name; 2525 2526 name = trace_event_name(call); 2527 if (WARN_ON(!name)) 2528 return -EINVAL; 2529 2530 if (call->class->raw_init) { 2531 ret = call->class->raw_init(call); 2532 if (ret < 0 && ret != -ENOSYS) 2533 pr_warn("Could not initialize trace events/%s\n", name); 2534 } 2535 2536 return ret; 2537 } 2538 2539 static int 2540 __register_event(struct trace_event_call *call, struct module *mod) 2541 { 2542 int ret; 2543 2544 ret = event_init(call); 2545 if (ret < 0) 2546 return ret; 2547 2548 list_add(&call->list, &ftrace_events); 2549 if (call->flags & TRACE_EVENT_FL_DYNAMIC) 2550 atomic_set(&call->refcnt, 0); 2551 else 2552 call->module = mod; 2553 2554 return 0; 2555 } 2556 2557 static char *eval_replace(char *ptr, struct trace_eval_map *map, int len) 2558 { 2559 int rlen; 2560 int elen; 2561 2562 /* Find the length of the eval value as a string */ 2563 elen = snprintf(ptr, 0, "%ld", map->eval_value); 2564 /* Make sure there's enough room to replace the string with the value */ 2565 if (len < elen) 2566 return NULL; 2567 2568 snprintf(ptr, elen + 1, "%ld", map->eval_value); 2569 2570 /* Get the rest of the string of ptr */ 2571 rlen = strlen(ptr + len); 2572 memmove(ptr + elen, ptr + len, rlen); 2573 /* Make sure we end the new string */ 2574 ptr[elen + rlen] = 0; 2575 2576 return ptr + elen; 2577 } 2578 2579 static void update_event_printk(struct trace_event_call *call, 2580 struct trace_eval_map *map) 2581 { 2582 char *ptr; 2583 int quote = 0; 2584 int len = strlen(map->eval_string); 2585 2586 for (ptr = call->print_fmt; *ptr; ptr++) { 2587 if (*ptr == '\\') { 2588 ptr++; 2589 /* paranoid */ 2590 if (!*ptr) 2591 break; 2592 continue; 2593 } 2594 if (*ptr == '"') { 2595 quote ^= 1; 2596 continue; 2597 } 2598 if (quote) 2599 continue; 2600 if (isdigit(*ptr)) { 2601 /* skip numbers */ 2602 do { 2603 ptr++; 2604 /* Check for alpha chars like ULL */ 2605 } while (isalnum(*ptr)); 2606 if (!*ptr) 2607 break; 2608 /* 2609 * A number must have some kind of delimiter after 2610 * it, and we can ignore that too. 2611 */ 2612 continue; 2613 } 2614 if (isalpha(*ptr) || *ptr == '_') { 2615 if (strncmp(map->eval_string, ptr, len) == 0 && 2616 !isalnum(ptr[len]) && ptr[len] != '_') { 2617 ptr = eval_replace(ptr, map, len); 2618 /* enum/sizeof string smaller than value */ 2619 if (WARN_ON_ONCE(!ptr)) 2620 return; 2621 /* 2622 * No need to decrement here, as eval_replace() 2623 * returns the pointer to the character passed 2624 * the eval, and two evals can not be placed 2625 * back to back without something in between. 2626 * We can skip that something in between. 2627 */ 2628 continue; 2629 } 2630 skip_more: 2631 do { 2632 ptr++; 2633 } while (isalnum(*ptr) || *ptr == '_'); 2634 if (!*ptr) 2635 break; 2636 /* 2637 * If what comes after this variable is a '.' or 2638 * '->' then we can continue to ignore that string. 2639 */ 2640 if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) { 2641 ptr += *ptr == '.' ? 1 : 2; 2642 if (!*ptr) 2643 break; 2644 goto skip_more; 2645 } 2646 /* 2647 * Once again, we can skip the delimiter that came 2648 * after the string. 2649 */ 2650 continue; 2651 } 2652 } 2653 } 2654 2655 static void add_str_to_module(struct module *module, char *str) 2656 { 2657 struct module_string *modstr; 2658 2659 modstr = kmalloc(sizeof(*modstr), GFP_KERNEL); 2660 2661 /* 2662 * If we failed to allocate memory here, then we'll just 2663 * let the str memory leak when the module is removed. 2664 * If this fails to allocate, there's worse problems than 2665 * a leaked string on module removal. 2666 */ 2667 if (WARN_ON_ONCE(!modstr)) 2668 return; 2669 2670 modstr->module = module; 2671 modstr->str = str; 2672 2673 list_add(&modstr->next, &module_strings); 2674 } 2675 2676 static void update_event_fields(struct trace_event_call *call, 2677 struct trace_eval_map *map) 2678 { 2679 struct ftrace_event_field *field; 2680 struct list_head *head; 2681 char *ptr; 2682 char *str; 2683 int len = strlen(map->eval_string); 2684 2685 /* Dynamic events should never have field maps */ 2686 if (WARN_ON_ONCE(call->flags & TRACE_EVENT_FL_DYNAMIC)) 2687 return; 2688 2689 head = trace_get_fields(call); 2690 list_for_each_entry(field, head, link) { 2691 ptr = strchr(field->type, '['); 2692 if (!ptr) 2693 continue; 2694 ptr++; 2695 2696 if (!isalpha(*ptr) && *ptr != '_') 2697 continue; 2698 2699 if (strncmp(map->eval_string, ptr, len) != 0) 2700 continue; 2701 2702 str = kstrdup(field->type, GFP_KERNEL); 2703 if (WARN_ON_ONCE(!str)) 2704 return; 2705 ptr = str + (ptr - field->type); 2706 ptr = eval_replace(ptr, map, len); 2707 /* enum/sizeof string smaller than value */ 2708 if (WARN_ON_ONCE(!ptr)) { 2709 kfree(str); 2710 continue; 2711 } 2712 2713 /* 2714 * If the event is part of a module, then we need to free the string 2715 * when the module is removed. Otherwise, it will stay allocated 2716 * until a reboot. 2717 */ 2718 if (call->module) 2719 add_str_to_module(call->module, str); 2720 2721 field->type = str; 2722 } 2723 } 2724 2725 void trace_event_eval_update(struct trace_eval_map **map, int len) 2726 { 2727 struct trace_event_call *call, *p; 2728 const char *last_system = NULL; 2729 bool first = false; 2730 int last_i; 2731 int i; 2732 2733 down_write(&trace_event_sem); 2734 list_for_each_entry_safe(call, p, &ftrace_events, list) { 2735 /* events are usually grouped together with systems */ 2736 if (!last_system || call->class->system != last_system) { 2737 first = true; 2738 last_i = 0; 2739 last_system = call->class->system; 2740 } 2741 2742 /* 2743 * Since calls are grouped by systems, the likelihood that the 2744 * next call in the iteration belongs to the same system as the 2745 * previous call is high. As an optimization, we skip searching 2746 * for a map[] that matches the call's system if the last call 2747 * was from the same system. That's what last_i is for. If the 2748 * call has the same system as the previous call, then last_i 2749 * will be the index of the first map[] that has a matching 2750 * system. 2751 */ 2752 for (i = last_i; i < len; i++) { 2753 if (call->class->system == map[i]->system) { 2754 /* Save the first system if need be */ 2755 if (first) { 2756 last_i = i; 2757 first = false; 2758 } 2759 update_event_printk(call, map[i]); 2760 update_event_fields(call, map[i]); 2761 } 2762 } 2763 } 2764 up_write(&trace_event_sem); 2765 } 2766 2767 static struct trace_event_file * 2768 trace_create_new_event(struct trace_event_call *call, 2769 struct trace_array *tr) 2770 { 2771 struct trace_pid_list *no_pid_list; 2772 struct trace_pid_list *pid_list; 2773 struct trace_event_file *file; 2774 unsigned int first; 2775 2776 file = kmem_cache_alloc(file_cachep, GFP_TRACE); 2777 if (!file) 2778 return NULL; 2779 2780 pid_list = rcu_dereference_protected(tr->filtered_pids, 2781 lockdep_is_held(&event_mutex)); 2782 no_pid_list = rcu_dereference_protected(tr->filtered_no_pids, 2783 lockdep_is_held(&event_mutex)); 2784 2785 if (!trace_pid_list_first(pid_list, &first) || 2786 !trace_pid_list_first(no_pid_list, &first)) 2787 file->flags |= EVENT_FILE_FL_PID_FILTER; 2788 2789 file->event_call = call; 2790 file->tr = tr; 2791 atomic_set(&file->sm_ref, 0); 2792 atomic_set(&file->tm_ref, 0); 2793 INIT_LIST_HEAD(&file->triggers); 2794 list_add(&file->list, &tr->events); 2795 2796 return file; 2797 } 2798 2799 /* Add an event to a trace directory */ 2800 static int 2801 __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr) 2802 { 2803 struct trace_event_file *file; 2804 2805 file = trace_create_new_event(call, tr); 2806 if (!file) 2807 return -ENOMEM; 2808 2809 if (eventdir_initialized) 2810 return event_create_dir(tr->event_dir, file); 2811 else 2812 return event_define_fields(call); 2813 } 2814 2815 /* 2816 * Just create a descriptor for early init. A descriptor is required 2817 * for enabling events at boot. We want to enable events before 2818 * the filesystem is initialized. 2819 */ 2820 static int 2821 __trace_early_add_new_event(struct trace_event_call *call, 2822 struct trace_array *tr) 2823 { 2824 struct trace_event_file *file; 2825 2826 file = trace_create_new_event(call, tr); 2827 if (!file) 2828 return -ENOMEM; 2829 2830 return event_define_fields(call); 2831 } 2832 2833 struct ftrace_module_file_ops; 2834 static void __add_event_to_tracers(struct trace_event_call *call); 2835 2836 /* Add an additional event_call dynamically */ 2837 int trace_add_event_call(struct trace_event_call *call) 2838 { 2839 int ret; 2840 lockdep_assert_held(&event_mutex); 2841 2842 mutex_lock(&trace_types_lock); 2843 2844 ret = __register_event(call, NULL); 2845 if (ret >= 0) 2846 __add_event_to_tracers(call); 2847 2848 mutex_unlock(&trace_types_lock); 2849 return ret; 2850 } 2851 EXPORT_SYMBOL_GPL(trace_add_event_call); 2852 2853 /* 2854 * Must be called under locking of trace_types_lock, event_mutex and 2855 * trace_event_sem. 2856 */ 2857 static void __trace_remove_event_call(struct trace_event_call *call) 2858 { 2859 event_remove(call); 2860 trace_destroy_fields(call); 2861 free_event_filter(call->filter); 2862 call->filter = NULL; 2863 } 2864 2865 static int probe_remove_event_call(struct trace_event_call *call) 2866 { 2867 struct trace_array *tr; 2868 struct trace_event_file *file; 2869 2870 #ifdef CONFIG_PERF_EVENTS 2871 if (call->perf_refcount) 2872 return -EBUSY; 2873 #endif 2874 do_for_each_event_file(tr, file) { 2875 if (file->event_call != call) 2876 continue; 2877 /* 2878 * We can't rely on ftrace_event_enable_disable(enable => 0) 2879 * we are going to do, EVENT_FILE_FL_SOFT_MODE can suppress 2880 * TRACE_REG_UNREGISTER. 2881 */ 2882 if (file->flags & EVENT_FILE_FL_ENABLED) 2883 return -EBUSY; 2884 /* 2885 * The do_for_each_event_file_safe() is 2886 * a double loop. After finding the call for this 2887 * trace_array, we use break to jump to the next 2888 * trace_array. 2889 */ 2890 break; 2891 } while_for_each_event_file(); 2892 2893 __trace_remove_event_call(call); 2894 2895 return 0; 2896 } 2897 2898 /* Remove an event_call */ 2899 int trace_remove_event_call(struct trace_event_call *call) 2900 { 2901 int ret; 2902 2903 lockdep_assert_held(&event_mutex); 2904 2905 mutex_lock(&trace_types_lock); 2906 down_write(&trace_event_sem); 2907 ret = probe_remove_event_call(call); 2908 up_write(&trace_event_sem); 2909 mutex_unlock(&trace_types_lock); 2910 2911 return ret; 2912 } 2913 EXPORT_SYMBOL_GPL(trace_remove_event_call); 2914 2915 #define for_each_event(event, start, end) \ 2916 for (event = start; \ 2917 (unsigned long)event < (unsigned long)end; \ 2918 event++) 2919 2920 #ifdef CONFIG_MODULES 2921 2922 static void trace_module_add_events(struct module *mod) 2923 { 2924 struct trace_event_call **call, **start, **end; 2925 2926 if (!mod->num_trace_events) 2927 return; 2928 2929 /* Don't add infrastructure for mods without tracepoints */ 2930 if (trace_module_has_bad_taint(mod)) { 2931 pr_err("%s: module has bad taint, not creating trace events\n", 2932 mod->name); 2933 return; 2934 } 2935 2936 start = mod->trace_events; 2937 end = mod->trace_events + mod->num_trace_events; 2938 2939 for_each_event(call, start, end) { 2940 __register_event(*call, mod); 2941 __add_event_to_tracers(*call); 2942 } 2943 } 2944 2945 static void trace_module_remove_events(struct module *mod) 2946 { 2947 struct trace_event_call *call, *p; 2948 struct module_string *modstr, *m; 2949 2950 down_write(&trace_event_sem); 2951 list_for_each_entry_safe(call, p, &ftrace_events, list) { 2952 if ((call->flags & TRACE_EVENT_FL_DYNAMIC) || !call->module) 2953 continue; 2954 if (call->module == mod) 2955 __trace_remove_event_call(call); 2956 } 2957 /* Check for any strings allocade for this module */ 2958 list_for_each_entry_safe(modstr, m, &module_strings, next) { 2959 if (modstr->module != mod) 2960 continue; 2961 list_del(&modstr->next); 2962 kfree(modstr->str); 2963 kfree(modstr); 2964 } 2965 up_write(&trace_event_sem); 2966 2967 /* 2968 * It is safest to reset the ring buffer if the module being unloaded 2969 * registered any events that were used. The only worry is if 2970 * a new module gets loaded, and takes on the same id as the events 2971 * of this module. When printing out the buffer, traced events left 2972 * over from this module may be passed to the new module events and 2973 * unexpected results may occur. 2974 */ 2975 tracing_reset_all_online_cpus(); 2976 } 2977 2978 static int trace_module_notify(struct notifier_block *self, 2979 unsigned long val, void *data) 2980 { 2981 struct module *mod = data; 2982 2983 mutex_lock(&event_mutex); 2984 mutex_lock(&trace_types_lock); 2985 switch (val) { 2986 case MODULE_STATE_COMING: 2987 trace_module_add_events(mod); 2988 break; 2989 case MODULE_STATE_GOING: 2990 trace_module_remove_events(mod); 2991 break; 2992 } 2993 mutex_unlock(&trace_types_lock); 2994 mutex_unlock(&event_mutex); 2995 2996 return NOTIFY_OK; 2997 } 2998 2999 static struct notifier_block trace_module_nb = { 3000 .notifier_call = trace_module_notify, 3001 .priority = 1, /* higher than trace.c module notify */ 3002 }; 3003 #endif /* CONFIG_MODULES */ 3004 3005 /* Create a new event directory structure for a trace directory. */ 3006 static void 3007 __trace_add_event_dirs(struct trace_array *tr) 3008 { 3009 struct trace_event_call *call; 3010 int ret; 3011 3012 list_for_each_entry(call, &ftrace_events, list) { 3013 ret = __trace_add_new_event(call, tr); 3014 if (ret < 0) 3015 pr_warn("Could not create directory for event %s\n", 3016 trace_event_name(call)); 3017 } 3018 } 3019 3020 /* Returns any file that matches the system and event */ 3021 struct trace_event_file * 3022 __find_event_file(struct trace_array *tr, const char *system, const char *event) 3023 { 3024 struct trace_event_file *file; 3025 struct trace_event_call *call; 3026 const char *name; 3027 3028 list_for_each_entry(file, &tr->events, list) { 3029 3030 call = file->event_call; 3031 name = trace_event_name(call); 3032 3033 if (!name || !call->class) 3034 continue; 3035 3036 if (strcmp(event, name) == 0 && 3037 strcmp(system, call->class->system) == 0) 3038 return file; 3039 } 3040 return NULL; 3041 } 3042 3043 /* Returns valid trace event files that match system and event */ 3044 struct trace_event_file * 3045 find_event_file(struct trace_array *tr, const char *system, const char *event) 3046 { 3047 struct trace_event_file *file; 3048 3049 file = __find_event_file(tr, system, event); 3050 if (!file || !file->event_call->class->reg || 3051 file->event_call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) 3052 return NULL; 3053 3054 return file; 3055 } 3056 3057 /** 3058 * trace_get_event_file - Find and return a trace event file 3059 * @instance: The name of the trace instance containing the event 3060 * @system: The name of the system containing the event 3061 * @event: The name of the event 3062 * 3063 * Return a trace event file given the trace instance name, trace 3064 * system, and trace event name. If the instance name is NULL, it 3065 * refers to the top-level trace array. 3066 * 3067 * This function will look it up and return it if found, after calling 3068 * trace_array_get() to prevent the instance from going away, and 3069 * increment the event's module refcount to prevent it from being 3070 * removed. 3071 * 3072 * To release the file, call trace_put_event_file(), which will call 3073 * trace_array_put() and decrement the event's module refcount. 3074 * 3075 * Return: The trace event on success, ERR_PTR otherwise. 3076 */ 3077 struct trace_event_file *trace_get_event_file(const char *instance, 3078 const char *system, 3079 const char *event) 3080 { 3081 struct trace_array *tr = top_trace_array(); 3082 struct trace_event_file *file = NULL; 3083 int ret = -EINVAL; 3084 3085 if (instance) { 3086 tr = trace_array_find_get(instance); 3087 if (!tr) 3088 return ERR_PTR(-ENOENT); 3089 } else { 3090 ret = trace_array_get(tr); 3091 if (ret) 3092 return ERR_PTR(ret); 3093 } 3094 3095 mutex_lock(&event_mutex); 3096 3097 file = find_event_file(tr, system, event); 3098 if (!file) { 3099 trace_array_put(tr); 3100 ret = -EINVAL; 3101 goto out; 3102 } 3103 3104 /* Don't let event modules unload while in use */ 3105 ret = trace_event_try_get_ref(file->event_call); 3106 if (!ret) { 3107 trace_array_put(tr); 3108 ret = -EBUSY; 3109 goto out; 3110 } 3111 3112 ret = 0; 3113 out: 3114 mutex_unlock(&event_mutex); 3115 3116 if (ret) 3117 file = ERR_PTR(ret); 3118 3119 return file; 3120 } 3121 EXPORT_SYMBOL_GPL(trace_get_event_file); 3122 3123 /** 3124 * trace_put_event_file - Release a file from trace_get_event_file() 3125 * @file: The trace event file 3126 * 3127 * If a file was retrieved using trace_get_event_file(), this should 3128 * be called when it's no longer needed. It will cancel the previous 3129 * trace_array_get() called by that function, and decrement the 3130 * event's module refcount. 3131 */ 3132 void trace_put_event_file(struct trace_event_file *file) 3133 { 3134 mutex_lock(&event_mutex); 3135 trace_event_put_ref(file->event_call); 3136 mutex_unlock(&event_mutex); 3137 3138 trace_array_put(file->tr); 3139 } 3140 EXPORT_SYMBOL_GPL(trace_put_event_file); 3141 3142 #ifdef CONFIG_DYNAMIC_FTRACE 3143 3144 /* Avoid typos */ 3145 #define ENABLE_EVENT_STR "enable_event" 3146 #define DISABLE_EVENT_STR "disable_event" 3147 3148 struct event_probe_data { 3149 struct trace_event_file *file; 3150 unsigned long count; 3151 int ref; 3152 bool enable; 3153 }; 3154 3155 static void update_event_probe(struct event_probe_data *data) 3156 { 3157 if (data->enable) 3158 clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags); 3159 else 3160 set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags); 3161 } 3162 3163 static void 3164 event_enable_probe(unsigned long ip, unsigned long parent_ip, 3165 struct trace_array *tr, struct ftrace_probe_ops *ops, 3166 void *data) 3167 { 3168 struct ftrace_func_mapper *mapper = data; 3169 struct event_probe_data *edata; 3170 void **pdata; 3171 3172 pdata = ftrace_func_mapper_find_ip(mapper, ip); 3173 if (!pdata || !*pdata) 3174 return; 3175 3176 edata = *pdata; 3177 update_event_probe(edata); 3178 } 3179 3180 static void 3181 event_enable_count_probe(unsigned long ip, unsigned long parent_ip, 3182 struct trace_array *tr, struct ftrace_probe_ops *ops, 3183 void *data) 3184 { 3185 struct ftrace_func_mapper *mapper = data; 3186 struct event_probe_data *edata; 3187 void **pdata; 3188 3189 pdata = ftrace_func_mapper_find_ip(mapper, ip); 3190 if (!pdata || !*pdata) 3191 return; 3192 3193 edata = *pdata; 3194 3195 if (!edata->count) 3196 return; 3197 3198 /* Skip if the event is in a state we want to switch to */ 3199 if (edata->enable == !(edata->file->flags & EVENT_FILE_FL_SOFT_DISABLED)) 3200 return; 3201 3202 if (edata->count != -1) 3203 (edata->count)--; 3204 3205 update_event_probe(edata); 3206 } 3207 3208 static int 3209 event_enable_print(struct seq_file *m, unsigned long ip, 3210 struct ftrace_probe_ops *ops, void *data) 3211 { 3212 struct ftrace_func_mapper *mapper = data; 3213 struct event_probe_data *edata; 3214 void **pdata; 3215 3216 pdata = ftrace_func_mapper_find_ip(mapper, ip); 3217 3218 if (WARN_ON_ONCE(!pdata || !*pdata)) 3219 return 0; 3220 3221 edata = *pdata; 3222 3223 seq_printf(m, "%ps:", (void *)ip); 3224 3225 seq_printf(m, "%s:%s:%s", 3226 edata->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR, 3227 edata->file->event_call->class->system, 3228 trace_event_name(edata->file->event_call)); 3229 3230 if (edata->count == -1) 3231 seq_puts(m, ":unlimited\n"); 3232 else 3233 seq_printf(m, ":count=%ld\n", edata->count); 3234 3235 return 0; 3236 } 3237 3238 static int 3239 event_enable_init(struct ftrace_probe_ops *ops, struct trace_array *tr, 3240 unsigned long ip, void *init_data, void **data) 3241 { 3242 struct ftrace_func_mapper *mapper = *data; 3243 struct event_probe_data *edata = init_data; 3244 int ret; 3245 3246 if (!mapper) { 3247 mapper = allocate_ftrace_func_mapper(); 3248 if (!mapper) 3249 return -ENODEV; 3250 *data = mapper; 3251 } 3252 3253 ret = ftrace_func_mapper_add_ip(mapper, ip, edata); 3254 if (ret < 0) 3255 return ret; 3256 3257 edata->ref++; 3258 3259 return 0; 3260 } 3261 3262 static int free_probe_data(void *data) 3263 { 3264 struct event_probe_data *edata = data; 3265 3266 edata->ref--; 3267 if (!edata->ref) { 3268 /* Remove the SOFT_MODE flag */ 3269 __ftrace_event_enable_disable(edata->file, 0, 1); 3270 trace_event_put_ref(edata->file->event_call); 3271 kfree(edata); 3272 } 3273 return 0; 3274 } 3275 3276 static void 3277 event_enable_free(struct ftrace_probe_ops *ops, struct trace_array *tr, 3278 unsigned long ip, void *data) 3279 { 3280 struct ftrace_func_mapper *mapper = data; 3281 struct event_probe_data *edata; 3282 3283 if (!ip) { 3284 if (!mapper) 3285 return; 3286 free_ftrace_func_mapper(mapper, free_probe_data); 3287 return; 3288 } 3289 3290 edata = ftrace_func_mapper_remove_ip(mapper, ip); 3291 3292 if (WARN_ON_ONCE(!edata)) 3293 return; 3294 3295 if (WARN_ON_ONCE(edata->ref <= 0)) 3296 return; 3297 3298 free_probe_data(edata); 3299 } 3300 3301 static struct ftrace_probe_ops event_enable_probe_ops = { 3302 .func = event_enable_probe, 3303 .print = event_enable_print, 3304 .init = event_enable_init, 3305 .free = event_enable_free, 3306 }; 3307 3308 static struct ftrace_probe_ops event_enable_count_probe_ops = { 3309 .func = event_enable_count_probe, 3310 .print = event_enable_print, 3311 .init = event_enable_init, 3312 .free = event_enable_free, 3313 }; 3314 3315 static struct ftrace_probe_ops event_disable_probe_ops = { 3316 .func = event_enable_probe, 3317 .print = event_enable_print, 3318 .init = event_enable_init, 3319 .free = event_enable_free, 3320 }; 3321 3322 static struct ftrace_probe_ops event_disable_count_probe_ops = { 3323 .func = event_enable_count_probe, 3324 .print = event_enable_print, 3325 .init = event_enable_init, 3326 .free = event_enable_free, 3327 }; 3328 3329 static int 3330 event_enable_func(struct trace_array *tr, struct ftrace_hash *hash, 3331 char *glob, char *cmd, char *param, int enabled) 3332 { 3333 struct trace_event_file *file; 3334 struct ftrace_probe_ops *ops; 3335 struct event_probe_data *data; 3336 const char *system; 3337 const char *event; 3338 char *number; 3339 bool enable; 3340 int ret; 3341 3342 if (!tr) 3343 return -ENODEV; 3344 3345 /* hash funcs only work with set_ftrace_filter */ 3346 if (!enabled || !param) 3347 return -EINVAL; 3348 3349 system = strsep(¶m, ":"); 3350 if (!param) 3351 return -EINVAL; 3352 3353 event = strsep(¶m, ":"); 3354 3355 mutex_lock(&event_mutex); 3356 3357 ret = -EINVAL; 3358 file = find_event_file(tr, system, event); 3359 if (!file) 3360 goto out; 3361 3362 enable = strcmp(cmd, ENABLE_EVENT_STR) == 0; 3363 3364 if (enable) 3365 ops = param ? &event_enable_count_probe_ops : &event_enable_probe_ops; 3366 else 3367 ops = param ? &event_disable_count_probe_ops : &event_disable_probe_ops; 3368 3369 if (glob[0] == '!') { 3370 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops); 3371 goto out; 3372 } 3373 3374 ret = -ENOMEM; 3375 3376 data = kzalloc(sizeof(*data), GFP_KERNEL); 3377 if (!data) 3378 goto out; 3379 3380 data->enable = enable; 3381 data->count = -1; 3382 data->file = file; 3383 3384 if (!param) 3385 goto out_reg; 3386 3387 number = strsep(¶m, ":"); 3388 3389 ret = -EINVAL; 3390 if (!strlen(number)) 3391 goto out_free; 3392 3393 /* 3394 * We use the callback data field (which is a pointer) 3395 * as our counter. 3396 */ 3397 ret = kstrtoul(number, 0, &data->count); 3398 if (ret) 3399 goto out_free; 3400 3401 out_reg: 3402 /* Don't let event modules unload while probe registered */ 3403 ret = trace_event_try_get_ref(file->event_call); 3404 if (!ret) { 3405 ret = -EBUSY; 3406 goto out_free; 3407 } 3408 3409 ret = __ftrace_event_enable_disable(file, 1, 1); 3410 if (ret < 0) 3411 goto out_put; 3412 3413 ret = register_ftrace_function_probe(glob, tr, ops, data); 3414 /* 3415 * The above returns on success the # of functions enabled, 3416 * but if it didn't find any functions it returns zero. 3417 * Consider no functions a failure too. 3418 */ 3419 if (!ret) { 3420 ret = -ENOENT; 3421 goto out_disable; 3422 } else if (ret < 0) 3423 goto out_disable; 3424 /* Just return zero, not the number of enabled functions */ 3425 ret = 0; 3426 out: 3427 mutex_unlock(&event_mutex); 3428 return ret; 3429 3430 out_disable: 3431 __ftrace_event_enable_disable(file, 0, 1); 3432 out_put: 3433 trace_event_put_ref(file->event_call); 3434 out_free: 3435 kfree(data); 3436 goto out; 3437 } 3438 3439 static struct ftrace_func_command event_enable_cmd = { 3440 .name = ENABLE_EVENT_STR, 3441 .func = event_enable_func, 3442 }; 3443 3444 static struct ftrace_func_command event_disable_cmd = { 3445 .name = DISABLE_EVENT_STR, 3446 .func = event_enable_func, 3447 }; 3448 3449 static __init int register_event_cmds(void) 3450 { 3451 int ret; 3452 3453 ret = register_ftrace_command(&event_enable_cmd); 3454 if (WARN_ON(ret < 0)) 3455 return ret; 3456 ret = register_ftrace_command(&event_disable_cmd); 3457 if (WARN_ON(ret < 0)) 3458 unregister_ftrace_command(&event_enable_cmd); 3459 return ret; 3460 } 3461 #else 3462 static inline int register_event_cmds(void) { return 0; } 3463 #endif /* CONFIG_DYNAMIC_FTRACE */ 3464 3465 /* 3466 * The top level array and trace arrays created by boot-time tracing 3467 * have already had its trace_event_file descriptors created in order 3468 * to allow for early events to be recorded. 3469 * This function is called after the tracefs has been initialized, 3470 * and we now have to create the files associated to the events. 3471 */ 3472 static void __trace_early_add_event_dirs(struct trace_array *tr) 3473 { 3474 struct trace_event_file *file; 3475 int ret; 3476 3477 3478 list_for_each_entry(file, &tr->events, list) { 3479 ret = event_create_dir(tr->event_dir, file); 3480 if (ret < 0) 3481 pr_warn("Could not create directory for event %s\n", 3482 trace_event_name(file->event_call)); 3483 } 3484 } 3485 3486 /* 3487 * For early boot up, the top trace array and the trace arrays created 3488 * by boot-time tracing require to have a list of events that can be 3489 * enabled. This must be done before the filesystem is set up in order 3490 * to allow events to be traced early. 3491 */ 3492 void __trace_early_add_events(struct trace_array *tr) 3493 { 3494 struct trace_event_call *call; 3495 int ret; 3496 3497 list_for_each_entry(call, &ftrace_events, list) { 3498 /* Early boot up should not have any modules loaded */ 3499 if (!(call->flags & TRACE_EVENT_FL_DYNAMIC) && 3500 WARN_ON_ONCE(call->module)) 3501 continue; 3502 3503 ret = __trace_early_add_new_event(call, tr); 3504 if (ret < 0) 3505 pr_warn("Could not create early event %s\n", 3506 trace_event_name(call)); 3507 } 3508 } 3509 3510 /* Remove the event directory structure for a trace directory. */ 3511 static void 3512 __trace_remove_event_dirs(struct trace_array *tr) 3513 { 3514 struct trace_event_file *file, *next; 3515 3516 list_for_each_entry_safe(file, next, &tr->events, list) 3517 remove_event_file_dir(file); 3518 } 3519 3520 static void __add_event_to_tracers(struct trace_event_call *call) 3521 { 3522 struct trace_array *tr; 3523 3524 list_for_each_entry(tr, &ftrace_trace_arrays, list) 3525 __trace_add_new_event(call, tr); 3526 } 3527 3528 extern struct trace_event_call *__start_ftrace_events[]; 3529 extern struct trace_event_call *__stop_ftrace_events[]; 3530 3531 static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; 3532 3533 static __init int setup_trace_event(char *str) 3534 { 3535 strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE); 3536 ring_buffer_expanded = true; 3537 disable_tracing_selftest("running event tracing"); 3538 3539 return 1; 3540 } 3541 __setup("trace_event=", setup_trace_event); 3542 3543 /* Expects to have event_mutex held when called */ 3544 static int 3545 create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) 3546 { 3547 struct dentry *d_events; 3548 struct dentry *entry; 3549 3550 entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent, 3551 tr, &ftrace_set_event_fops); 3552 if (!entry) 3553 return -ENOMEM; 3554 3555 d_events = tracefs_create_dir("events", parent); 3556 if (!d_events) { 3557 pr_warn("Could not create tracefs 'events' directory\n"); 3558 return -ENOMEM; 3559 } 3560 3561 entry = trace_create_file("enable", TRACE_MODE_WRITE, d_events, 3562 tr, &ftrace_tr_enable_fops); 3563 if (!entry) 3564 return -ENOMEM; 3565 3566 /* There are not as crucial, just warn if they are not created */ 3567 3568 trace_create_file("set_event_pid", TRACE_MODE_WRITE, parent, 3569 tr, &ftrace_set_event_pid_fops); 3570 3571 trace_create_file("set_event_notrace_pid", 3572 TRACE_MODE_WRITE, parent, tr, 3573 &ftrace_set_event_notrace_pid_fops); 3574 3575 /* ring buffer internal formats */ 3576 trace_create_file("header_page", TRACE_MODE_READ, d_events, 3577 ring_buffer_print_page_header, 3578 &ftrace_show_header_fops); 3579 3580 trace_create_file("header_event", TRACE_MODE_READ, d_events, 3581 ring_buffer_print_entry_header, 3582 &ftrace_show_header_fops); 3583 3584 tr->event_dir = d_events; 3585 3586 return 0; 3587 } 3588 3589 /** 3590 * event_trace_add_tracer - add a instance of a trace_array to events 3591 * @parent: The parent dentry to place the files/directories for events in 3592 * @tr: The trace array associated with these events 3593 * 3594 * When a new instance is created, it needs to set up its events 3595 * directory, as well as other files associated with events. It also 3596 * creates the event hierarchy in the @parent/events directory. 3597 * 3598 * Returns 0 on success. 3599 * 3600 * Must be called with event_mutex held. 3601 */ 3602 int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr) 3603 { 3604 int ret; 3605 3606 lockdep_assert_held(&event_mutex); 3607 3608 ret = create_event_toplevel_files(parent, tr); 3609 if (ret) 3610 goto out; 3611 3612 down_write(&trace_event_sem); 3613 /* If tr already has the event list, it is initialized in early boot. */ 3614 if (unlikely(!list_empty(&tr->events))) 3615 __trace_early_add_event_dirs(tr); 3616 else 3617 __trace_add_event_dirs(tr); 3618 up_write(&trace_event_sem); 3619 3620 out: 3621 return ret; 3622 } 3623 3624 /* 3625 * The top trace array already had its file descriptors created. 3626 * Now the files themselves need to be created. 3627 */ 3628 static __init int 3629 early_event_add_tracer(struct dentry *parent, struct trace_array *tr) 3630 { 3631 int ret; 3632 3633 mutex_lock(&event_mutex); 3634 3635 ret = create_event_toplevel_files(parent, tr); 3636 if (ret) 3637 goto out_unlock; 3638 3639 down_write(&trace_event_sem); 3640 __trace_early_add_event_dirs(tr); 3641 up_write(&trace_event_sem); 3642 3643 out_unlock: 3644 mutex_unlock(&event_mutex); 3645 3646 return ret; 3647 } 3648 3649 /* Must be called with event_mutex held */ 3650 int event_trace_del_tracer(struct trace_array *tr) 3651 { 3652 lockdep_assert_held(&event_mutex); 3653 3654 /* Disable any event triggers and associated soft-disabled events */ 3655 clear_event_triggers(tr); 3656 3657 /* Clear the pid list */ 3658 __ftrace_clear_event_pids(tr, TRACE_PIDS | TRACE_NO_PIDS); 3659 3660 /* Disable any running events */ 3661 __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0); 3662 3663 /* Make sure no more events are being executed */ 3664 tracepoint_synchronize_unregister(); 3665 3666 down_write(&trace_event_sem); 3667 __trace_remove_event_dirs(tr); 3668 tracefs_remove(tr->event_dir); 3669 up_write(&trace_event_sem); 3670 3671 tr->event_dir = NULL; 3672 3673 return 0; 3674 } 3675 3676 static __init int event_trace_memsetup(void) 3677 { 3678 field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC); 3679 file_cachep = KMEM_CACHE(trace_event_file, SLAB_PANIC); 3680 return 0; 3681 } 3682 3683 static __init void 3684 early_enable_events(struct trace_array *tr, bool disable_first) 3685 { 3686 char *buf = bootup_event_buf; 3687 char *token; 3688 int ret; 3689 3690 while (true) { 3691 token = strsep(&buf, ","); 3692 3693 if (!token) 3694 break; 3695 3696 if (*token) { 3697 /* Restarting syscalls requires that we stop them first */ 3698 if (disable_first) 3699 ftrace_set_clr_event(tr, token, 0); 3700 3701 ret = ftrace_set_clr_event(tr, token, 1); 3702 if (ret) 3703 pr_warn("Failed to enable trace event: %s\n", token); 3704 } 3705 3706 /* Put back the comma to allow this to be called again */ 3707 if (buf) 3708 *(buf - 1) = ','; 3709 } 3710 } 3711 3712 static __init int event_trace_enable(void) 3713 { 3714 struct trace_array *tr = top_trace_array(); 3715 struct trace_event_call **iter, *call; 3716 int ret; 3717 3718 if (!tr) 3719 return -ENODEV; 3720 3721 for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) { 3722 3723 call = *iter; 3724 ret = event_init(call); 3725 if (!ret) 3726 list_add(&call->list, &ftrace_events); 3727 } 3728 3729 /* 3730 * We need the top trace array to have a working set of trace 3731 * points at early init, before the debug files and directories 3732 * are created. Create the file entries now, and attach them 3733 * to the actual file dentries later. 3734 */ 3735 __trace_early_add_events(tr); 3736 3737 early_enable_events(tr, false); 3738 3739 trace_printk_start_comm(); 3740 3741 register_event_cmds(); 3742 3743 register_trigger_cmds(); 3744 3745 return 0; 3746 } 3747 3748 /* 3749 * event_trace_enable() is called from trace_event_init() first to 3750 * initialize events and perhaps start any events that are on the 3751 * command line. Unfortunately, there are some events that will not 3752 * start this early, like the system call tracepoints that need 3753 * to set the %SYSCALL_WORK_SYSCALL_TRACEPOINT flag of pid 1. But 3754 * event_trace_enable() is called before pid 1 starts, and this flag 3755 * is never set, making the syscall tracepoint never get reached, but 3756 * the event is enabled regardless (and not doing anything). 3757 */ 3758 static __init int event_trace_enable_again(void) 3759 { 3760 struct trace_array *tr; 3761 3762 tr = top_trace_array(); 3763 if (!tr) 3764 return -ENODEV; 3765 3766 early_enable_events(tr, true); 3767 3768 return 0; 3769 } 3770 3771 early_initcall(event_trace_enable_again); 3772 3773 /* Init fields which doesn't related to the tracefs */ 3774 static __init int event_trace_init_fields(void) 3775 { 3776 if (trace_define_generic_fields()) 3777 pr_warn("tracing: Failed to allocated generic fields"); 3778 3779 if (trace_define_common_fields()) 3780 pr_warn("tracing: Failed to allocate common fields"); 3781 3782 return 0; 3783 } 3784 3785 __init int event_trace_init(void) 3786 { 3787 struct trace_array *tr; 3788 int ret; 3789 3790 tr = top_trace_array(); 3791 if (!tr) 3792 return -ENODEV; 3793 3794 trace_create_file("available_events", TRACE_MODE_READ, 3795 NULL, tr, &ftrace_avail_fops); 3796 3797 ret = early_event_add_tracer(NULL, tr); 3798 if (ret) 3799 return ret; 3800 3801 #ifdef CONFIG_MODULES 3802 ret = register_module_notifier(&trace_module_nb); 3803 if (ret) 3804 pr_warn("Failed to register trace events module notifier\n"); 3805 #endif 3806 3807 eventdir_initialized = true; 3808 3809 return 0; 3810 } 3811 3812 void __init trace_event_init(void) 3813 { 3814 event_trace_memsetup(); 3815 init_ftrace_syscalls(); 3816 event_trace_enable(); 3817 event_trace_init_fields(); 3818 } 3819 3820 #ifdef CONFIG_EVENT_TRACE_STARTUP_TEST 3821 3822 static DEFINE_SPINLOCK(test_spinlock); 3823 static DEFINE_SPINLOCK(test_spinlock_irq); 3824 static DEFINE_MUTEX(test_mutex); 3825 3826 static __init void test_work(struct work_struct *dummy) 3827 { 3828 spin_lock(&test_spinlock); 3829 spin_lock_irq(&test_spinlock_irq); 3830 udelay(1); 3831 spin_unlock_irq(&test_spinlock_irq); 3832 spin_unlock(&test_spinlock); 3833 3834 mutex_lock(&test_mutex); 3835 msleep(1); 3836 mutex_unlock(&test_mutex); 3837 } 3838 3839 static __init int event_test_thread(void *unused) 3840 { 3841 void *test_malloc; 3842 3843 test_malloc = kmalloc(1234, GFP_KERNEL); 3844 if (!test_malloc) 3845 pr_info("failed to kmalloc\n"); 3846 3847 schedule_on_each_cpu(test_work); 3848 3849 kfree(test_malloc); 3850 3851 set_current_state(TASK_INTERRUPTIBLE); 3852 while (!kthread_should_stop()) { 3853 schedule(); 3854 set_current_state(TASK_INTERRUPTIBLE); 3855 } 3856 __set_current_state(TASK_RUNNING); 3857 3858 return 0; 3859 } 3860 3861 /* 3862 * Do various things that may trigger events. 3863 */ 3864 static __init void event_test_stuff(void) 3865 { 3866 struct task_struct *test_thread; 3867 3868 test_thread = kthread_run(event_test_thread, NULL, "test-events"); 3869 msleep(1); 3870 kthread_stop(test_thread); 3871 } 3872 3873 /* 3874 * For every trace event defined, we will test each trace point separately, 3875 * and then by groups, and finally all trace points. 3876 */ 3877 static __init void event_trace_self_tests(void) 3878 { 3879 struct trace_subsystem_dir *dir; 3880 struct trace_event_file *file; 3881 struct trace_event_call *call; 3882 struct event_subsystem *system; 3883 struct trace_array *tr; 3884 int ret; 3885 3886 tr = top_trace_array(); 3887 if (!tr) 3888 return; 3889 3890 pr_info("Running tests on trace events:\n"); 3891 3892 list_for_each_entry(file, &tr->events, list) { 3893 3894 call = file->event_call; 3895 3896 /* Only test those that have a probe */ 3897 if (!call->class || !call->class->probe) 3898 continue; 3899 3900 /* 3901 * Testing syscall events here is pretty useless, but 3902 * we still do it if configured. But this is time consuming. 3903 * What we really need is a user thread to perform the 3904 * syscalls as we test. 3905 */ 3906 #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS 3907 if (call->class->system && 3908 strcmp(call->class->system, "syscalls") == 0) 3909 continue; 3910 #endif 3911 3912 pr_info("Testing event %s: ", trace_event_name(call)); 3913 3914 /* 3915 * If an event is already enabled, someone is using 3916 * it and the self test should not be on. 3917 */ 3918 if (file->flags & EVENT_FILE_FL_ENABLED) { 3919 pr_warn("Enabled event during self test!\n"); 3920 WARN_ON_ONCE(1); 3921 continue; 3922 } 3923 3924 ftrace_event_enable_disable(file, 1); 3925 event_test_stuff(); 3926 ftrace_event_enable_disable(file, 0); 3927 3928 pr_cont("OK\n"); 3929 } 3930 3931 /* Now test at the sub system level */ 3932 3933 pr_info("Running tests on trace event systems:\n"); 3934 3935 list_for_each_entry(dir, &tr->systems, list) { 3936 3937 system = dir->subsystem; 3938 3939 /* the ftrace system is special, skip it */ 3940 if (strcmp(system->name, "ftrace") == 0) 3941 continue; 3942 3943 pr_info("Testing event system %s: ", system->name); 3944 3945 ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1); 3946 if (WARN_ON_ONCE(ret)) { 3947 pr_warn("error enabling system %s\n", 3948 system->name); 3949 continue; 3950 } 3951 3952 event_test_stuff(); 3953 3954 ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0); 3955 if (WARN_ON_ONCE(ret)) { 3956 pr_warn("error disabling system %s\n", 3957 system->name); 3958 continue; 3959 } 3960 3961 pr_cont("OK\n"); 3962 } 3963 3964 /* Test with all events enabled */ 3965 3966 pr_info("Running tests on all trace events:\n"); 3967 pr_info("Testing all events: "); 3968 3969 ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1); 3970 if (WARN_ON_ONCE(ret)) { 3971 pr_warn("error enabling all events\n"); 3972 return; 3973 } 3974 3975 event_test_stuff(); 3976 3977 /* reset sysname */ 3978 ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0); 3979 if (WARN_ON_ONCE(ret)) { 3980 pr_warn("error disabling all events\n"); 3981 return; 3982 } 3983 3984 pr_cont("OK\n"); 3985 } 3986 3987 #ifdef CONFIG_FUNCTION_TRACER 3988 3989 static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable); 3990 3991 static struct trace_event_file event_trace_file __initdata; 3992 3993 static void __init 3994 function_test_events_call(unsigned long ip, unsigned long parent_ip, 3995 struct ftrace_ops *op, struct ftrace_regs *regs) 3996 { 3997 struct trace_buffer *buffer; 3998 struct ring_buffer_event *event; 3999 struct ftrace_entry *entry; 4000 unsigned int trace_ctx; 4001 long disabled; 4002 int cpu; 4003 4004 trace_ctx = tracing_gen_ctx(); 4005 preempt_disable_notrace(); 4006 cpu = raw_smp_processor_id(); 4007 disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu)); 4008 4009 if (disabled != 1) 4010 goto out; 4011 4012 event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file, 4013 TRACE_FN, sizeof(*entry), 4014 trace_ctx); 4015 if (!event) 4016 goto out; 4017 entry = ring_buffer_event_data(event); 4018 entry->ip = ip; 4019 entry->parent_ip = parent_ip; 4020 4021 event_trigger_unlock_commit(&event_trace_file, buffer, event, 4022 entry, trace_ctx); 4023 out: 4024 atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); 4025 preempt_enable_notrace(); 4026 } 4027 4028 static struct ftrace_ops trace_ops __initdata = 4029 { 4030 .func = function_test_events_call, 4031 }; 4032 4033 static __init void event_trace_self_test_with_function(void) 4034 { 4035 int ret; 4036 4037 event_trace_file.tr = top_trace_array(); 4038 if (WARN_ON(!event_trace_file.tr)) 4039 return; 4040 4041 ret = register_ftrace_function(&trace_ops); 4042 if (WARN_ON(ret < 0)) { 4043 pr_info("Failed to enable function tracer for event tests\n"); 4044 return; 4045 } 4046 pr_info("Running tests again, along with the function tracer\n"); 4047 event_trace_self_tests(); 4048 unregister_ftrace_function(&trace_ops); 4049 } 4050 #else 4051 static __init void event_trace_self_test_with_function(void) 4052 { 4053 } 4054 #endif 4055 4056 static __init int event_trace_self_tests_init(void) 4057 { 4058 if (!tracing_selftest_disabled) { 4059 event_trace_self_tests(); 4060 event_trace_self_test_with_function(); 4061 } 4062 4063 return 0; 4064 } 4065 4066 late_initcall(event_trace_self_tests_init); 4067 4068 #endif 4069