xref: /openbmc/linux/kernel/trace/trace_events.c (revision e8e0929d)
1 /*
2  * event tracer
3  *
4  * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
5  *
6  *  - Added format output of fields of the trace point.
7  *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
8  *
9  */
10 
11 #include <linux/workqueue.h>
12 #include <linux/spinlock.h>
13 #include <linux/kthread.h>
14 #include <linux/debugfs.h>
15 #include <linux/uaccess.h>
16 #include <linux/module.h>
17 #include <linux/ctype.h>
18 #include <linux/delay.h>
19 
20 #include <asm/setup.h>
21 
22 #include "trace_output.h"
23 
24 #undef TRACE_SYSTEM
25 #define TRACE_SYSTEM "TRACE_SYSTEM"
26 
27 DEFINE_MUTEX(event_mutex);
28 
29 LIST_HEAD(ftrace_events);
30 
31 int trace_define_field(struct ftrace_event_call *call, const char *type,
32 		       const char *name, int offset, int size, int is_signed,
33 		       int filter_type)
34 {
35 	struct ftrace_event_field *field;
36 
37 	field = kzalloc(sizeof(*field), GFP_KERNEL);
38 	if (!field)
39 		goto err;
40 
41 	field->name = kstrdup(name, GFP_KERNEL);
42 	if (!field->name)
43 		goto err;
44 
45 	field->type = kstrdup(type, GFP_KERNEL);
46 	if (!field->type)
47 		goto err;
48 
49 	if (filter_type == FILTER_OTHER)
50 		field->filter_type = filter_assign_type(type);
51 	else
52 		field->filter_type = filter_type;
53 
54 	field->offset = offset;
55 	field->size = size;
56 	field->is_signed = is_signed;
57 
58 	list_add(&field->link, &call->fields);
59 
60 	return 0;
61 
62 err:
63 	if (field) {
64 		kfree(field->name);
65 		kfree(field->type);
66 	}
67 	kfree(field);
68 
69 	return -ENOMEM;
70 }
71 EXPORT_SYMBOL_GPL(trace_define_field);
72 
73 #define __common_field(type, item)					\
74 	ret = trace_define_field(call, #type, "common_" #item,		\
75 				 offsetof(typeof(ent), item),		\
76 				 sizeof(ent.item),			\
77 				 is_signed_type(type), FILTER_OTHER);	\
78 	if (ret)							\
79 		return ret;
80 
81 int trace_define_common_fields(struct ftrace_event_call *call)
82 {
83 	int ret;
84 	struct trace_entry ent;
85 
86 	__common_field(unsigned short, type);
87 	__common_field(unsigned char, flags);
88 	__common_field(unsigned char, preempt_count);
89 	__common_field(int, pid);
90 	__common_field(int, lock_depth);
91 
92 	return ret;
93 }
94 EXPORT_SYMBOL_GPL(trace_define_common_fields);
95 
96 #ifdef CONFIG_MODULES
97 
98 static void trace_destroy_fields(struct ftrace_event_call *call)
99 {
100 	struct ftrace_event_field *field, *next;
101 
102 	list_for_each_entry_safe(field, next, &call->fields, link) {
103 		list_del(&field->link);
104 		kfree(field->type);
105 		kfree(field->name);
106 		kfree(field);
107 	}
108 }
109 
110 #endif /* CONFIG_MODULES */
111 
112 static void ftrace_event_enable_disable(struct ftrace_event_call *call,
113 					int enable)
114 {
115 	switch (enable) {
116 	case 0:
117 		if (call->enabled) {
118 			call->enabled = 0;
119 			tracing_stop_cmdline_record();
120 			call->unregfunc(call->data);
121 		}
122 		break;
123 	case 1:
124 		if (!call->enabled) {
125 			call->enabled = 1;
126 			tracing_start_cmdline_record();
127 			call->regfunc(call->data);
128 		}
129 		break;
130 	}
131 }
132 
133 static void ftrace_clear_events(void)
134 {
135 	struct ftrace_event_call *call;
136 
137 	mutex_lock(&event_mutex);
138 	list_for_each_entry(call, &ftrace_events, list) {
139 		ftrace_event_enable_disable(call, 0);
140 	}
141 	mutex_unlock(&event_mutex);
142 }
143 
144 /*
145  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
146  */
147 static int __ftrace_set_clr_event(const char *match, const char *sub,
148 				  const char *event, int set)
149 {
150 	struct ftrace_event_call *call;
151 	int ret = -EINVAL;
152 
153 	mutex_lock(&event_mutex);
154 	list_for_each_entry(call, &ftrace_events, list) {
155 
156 		if (!call->name || !call->regfunc)
157 			continue;
158 
159 		if (match &&
160 		    strcmp(match, call->name) != 0 &&
161 		    strcmp(match, call->system) != 0)
162 			continue;
163 
164 		if (sub && strcmp(sub, call->system) != 0)
165 			continue;
166 
167 		if (event && strcmp(event, call->name) != 0)
168 			continue;
169 
170 		ftrace_event_enable_disable(call, set);
171 
172 		ret = 0;
173 	}
174 	mutex_unlock(&event_mutex);
175 
176 	return ret;
177 }
178 
179 static int ftrace_set_clr_event(char *buf, int set)
180 {
181 	char *event = NULL, *sub = NULL, *match;
182 
183 	/*
184 	 * The buf format can be <subsystem>:<event-name>
185 	 *  *:<event-name> means any event by that name.
186 	 *  :<event-name> is the same.
187 	 *
188 	 *  <subsystem>:* means all events in that subsystem
189 	 *  <subsystem>: means the same.
190 	 *
191 	 *  <name> (no ':') means all events in a subsystem with
192 	 *  the name <name> or any event that matches <name>
193 	 */
194 
195 	match = strsep(&buf, ":");
196 	if (buf) {
197 		sub = match;
198 		event = buf;
199 		match = NULL;
200 
201 		if (!strlen(sub) || strcmp(sub, "*") == 0)
202 			sub = NULL;
203 		if (!strlen(event) || strcmp(event, "*") == 0)
204 			event = NULL;
205 	}
206 
207 	return __ftrace_set_clr_event(match, sub, event, set);
208 }
209 
210 /**
211  * trace_set_clr_event - enable or disable an event
212  * @system: system name to match (NULL for any system)
213  * @event: event name to match (NULL for all events, within system)
214  * @set: 1 to enable, 0 to disable
215  *
216  * This is a way for other parts of the kernel to enable or disable
217  * event recording.
218  *
219  * Returns 0 on success, -EINVAL if the parameters do not match any
220  * registered events.
221  */
222 int trace_set_clr_event(const char *system, const char *event, int set)
223 {
224 	return __ftrace_set_clr_event(NULL, system, event, set);
225 }
226 
227 /* 128 should be much more than enough */
228 #define EVENT_BUF_SIZE		127
229 
230 static ssize_t
231 ftrace_event_write(struct file *file, const char __user *ubuf,
232 		   size_t cnt, loff_t *ppos)
233 {
234 	struct trace_parser parser;
235 	ssize_t read, ret;
236 
237 	if (!cnt)
238 		return 0;
239 
240 	ret = tracing_update_buffers();
241 	if (ret < 0)
242 		return ret;
243 
244 	if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
245 		return -ENOMEM;
246 
247 	read = trace_get_user(&parser, ubuf, cnt, ppos);
248 
249 	if (read >= 0 && trace_parser_loaded((&parser))) {
250 		int set = 1;
251 
252 		if (*parser.buffer == '!')
253 			set = 0;
254 
255 		parser.buffer[parser.idx] = 0;
256 
257 		ret = ftrace_set_clr_event(parser.buffer + !set, set);
258 		if (ret)
259 			goto out_put;
260 	}
261 
262 	ret = read;
263 
264  out_put:
265 	trace_parser_put(&parser);
266 
267 	return ret;
268 }
269 
270 static void *
271 t_next(struct seq_file *m, void *v, loff_t *pos)
272 {
273 	struct ftrace_event_call *call = v;
274 
275 	(*pos)++;
276 
277 	list_for_each_entry_continue(call, &ftrace_events, list) {
278 		/*
279 		 * The ftrace subsystem is for showing formats only.
280 		 * They can not be enabled or disabled via the event files.
281 		 */
282 		if (call->regfunc)
283 			return call;
284 	}
285 
286 	return NULL;
287 }
288 
289 static void *t_start(struct seq_file *m, loff_t *pos)
290 {
291 	struct ftrace_event_call *call;
292 	loff_t l;
293 
294 	mutex_lock(&event_mutex);
295 
296 	call = list_entry(&ftrace_events, struct ftrace_event_call, list);
297 	for (l = 0; l <= *pos; ) {
298 		call = t_next(m, call, &l);
299 		if (!call)
300 			break;
301 	}
302 	return call;
303 }
304 
305 static void *
306 s_next(struct seq_file *m, void *v, loff_t *pos)
307 {
308 	struct ftrace_event_call *call = v;
309 
310 	(*pos)++;
311 
312 	list_for_each_entry_continue(call, &ftrace_events, list) {
313 		if (call->enabled)
314 			return call;
315 	}
316 
317 	return NULL;
318 }
319 
320 static void *s_start(struct seq_file *m, loff_t *pos)
321 {
322 	struct ftrace_event_call *call;
323 	loff_t l;
324 
325 	mutex_lock(&event_mutex);
326 
327 	call = list_entry(&ftrace_events, struct ftrace_event_call, list);
328 	for (l = 0; l <= *pos; ) {
329 		call = s_next(m, call, &l);
330 		if (!call)
331 			break;
332 	}
333 	return call;
334 }
335 
336 static int t_show(struct seq_file *m, void *v)
337 {
338 	struct ftrace_event_call *call = v;
339 
340 	if (strcmp(call->system, TRACE_SYSTEM) != 0)
341 		seq_printf(m, "%s:", call->system);
342 	seq_printf(m, "%s\n", call->name);
343 
344 	return 0;
345 }
346 
347 static void t_stop(struct seq_file *m, void *p)
348 {
349 	mutex_unlock(&event_mutex);
350 }
351 
352 static int
353 ftrace_event_seq_open(struct inode *inode, struct file *file)
354 {
355 	const struct seq_operations *seq_ops;
356 
357 	if ((file->f_mode & FMODE_WRITE) &&
358 	    (file->f_flags & O_TRUNC))
359 		ftrace_clear_events();
360 
361 	seq_ops = inode->i_private;
362 	return seq_open(file, seq_ops);
363 }
364 
365 static ssize_t
366 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
367 		  loff_t *ppos)
368 {
369 	struct ftrace_event_call *call = filp->private_data;
370 	char *buf;
371 
372 	if (call->enabled)
373 		buf = "1\n";
374 	else
375 		buf = "0\n";
376 
377 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
378 }
379 
380 static ssize_t
381 event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
382 		   loff_t *ppos)
383 {
384 	struct ftrace_event_call *call = filp->private_data;
385 	char buf[64];
386 	unsigned long val;
387 	int ret;
388 
389 	if (cnt >= sizeof(buf))
390 		return -EINVAL;
391 
392 	if (copy_from_user(&buf, ubuf, cnt))
393 		return -EFAULT;
394 
395 	buf[cnt] = 0;
396 
397 	ret = strict_strtoul(buf, 10, &val);
398 	if (ret < 0)
399 		return ret;
400 
401 	ret = tracing_update_buffers();
402 	if (ret < 0)
403 		return ret;
404 
405 	switch (val) {
406 	case 0:
407 	case 1:
408 		mutex_lock(&event_mutex);
409 		ftrace_event_enable_disable(call, val);
410 		mutex_unlock(&event_mutex);
411 		break;
412 
413 	default:
414 		return -EINVAL;
415 	}
416 
417 	*ppos += cnt;
418 
419 	return cnt;
420 }
421 
422 static ssize_t
423 system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
424 		   loff_t *ppos)
425 {
426 	const char set_to_char[4] = { '?', '0', '1', 'X' };
427 	const char *system = filp->private_data;
428 	struct ftrace_event_call *call;
429 	char buf[2];
430 	int set = 0;
431 	int ret;
432 
433 	mutex_lock(&event_mutex);
434 	list_for_each_entry(call, &ftrace_events, list) {
435 		if (!call->name || !call->regfunc)
436 			continue;
437 
438 		if (system && strcmp(call->system, system) != 0)
439 			continue;
440 
441 		/*
442 		 * We need to find out if all the events are set
443 		 * or if all events or cleared, or if we have
444 		 * a mixture.
445 		 */
446 		set |= (1 << !!call->enabled);
447 
448 		/*
449 		 * If we have a mixture, no need to look further.
450 		 */
451 		if (set == 3)
452 			break;
453 	}
454 	mutex_unlock(&event_mutex);
455 
456 	buf[0] = set_to_char[set];
457 	buf[1] = '\n';
458 
459 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
460 
461 	return ret;
462 }
463 
464 static ssize_t
465 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
466 		    loff_t *ppos)
467 {
468 	const char *system = filp->private_data;
469 	unsigned long val;
470 	char buf[64];
471 	ssize_t ret;
472 
473 	if (cnt >= sizeof(buf))
474 		return -EINVAL;
475 
476 	if (copy_from_user(&buf, ubuf, cnt))
477 		return -EFAULT;
478 
479 	buf[cnt] = 0;
480 
481 	ret = strict_strtoul(buf, 10, &val);
482 	if (ret < 0)
483 		return ret;
484 
485 	ret = tracing_update_buffers();
486 	if (ret < 0)
487 		return ret;
488 
489 	if (val != 0 && val != 1)
490 		return -EINVAL;
491 
492 	ret = __ftrace_set_clr_event(NULL, system, NULL, val);
493 	if (ret)
494 		goto out;
495 
496 	ret = cnt;
497 
498 out:
499 	*ppos += cnt;
500 
501 	return ret;
502 }
503 
504 extern char *__bad_type_size(void);
505 
506 #undef FIELD
507 #define FIELD(type, name)						\
508 	sizeof(type) != sizeof(field.name) ? __bad_type_size() :	\
509 	#type, "common_" #name, offsetof(typeof(field), name),		\
510 		sizeof(field.name)
511 
512 static int trace_write_header(struct trace_seq *s)
513 {
514 	struct trace_entry field;
515 
516 	/* struct trace_entry */
517 	return trace_seq_printf(s,
518 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
519 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
520 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
521 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
522 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
523 				"\n",
524 				FIELD(unsigned short, type),
525 				FIELD(unsigned char, flags),
526 				FIELD(unsigned char, preempt_count),
527 				FIELD(int, pid),
528 				FIELD(int, lock_depth));
529 }
530 
531 static ssize_t
532 event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
533 		  loff_t *ppos)
534 {
535 	struct ftrace_event_call *call = filp->private_data;
536 	struct trace_seq *s;
537 	char *buf;
538 	int r;
539 
540 	if (*ppos)
541 		return 0;
542 
543 	s = kmalloc(sizeof(*s), GFP_KERNEL);
544 	if (!s)
545 		return -ENOMEM;
546 
547 	trace_seq_init(s);
548 
549 	/* If any of the first writes fail, so will the show_format. */
550 
551 	trace_seq_printf(s, "name: %s\n", call->name);
552 	trace_seq_printf(s, "ID: %d\n", call->id);
553 	trace_seq_printf(s, "format:\n");
554 	trace_write_header(s);
555 
556 	r = call->show_format(call, s);
557 	if (!r) {
558 		/*
559 		 * ug!  The format output is bigger than a PAGE!!
560 		 */
561 		buf = "FORMAT TOO BIG\n";
562 		r = simple_read_from_buffer(ubuf, cnt, ppos,
563 					      buf, strlen(buf));
564 		goto out;
565 	}
566 
567 	r = simple_read_from_buffer(ubuf, cnt, ppos,
568 				    s->buffer, s->len);
569  out:
570 	kfree(s);
571 	return r;
572 }
573 
574 static ssize_t
575 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
576 {
577 	struct ftrace_event_call *call = filp->private_data;
578 	struct trace_seq *s;
579 	int r;
580 
581 	if (*ppos)
582 		return 0;
583 
584 	s = kmalloc(sizeof(*s), GFP_KERNEL);
585 	if (!s)
586 		return -ENOMEM;
587 
588 	trace_seq_init(s);
589 	trace_seq_printf(s, "%d\n", call->id);
590 
591 	r = simple_read_from_buffer(ubuf, cnt, ppos,
592 				    s->buffer, s->len);
593 	kfree(s);
594 	return r;
595 }
596 
597 static ssize_t
598 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
599 		  loff_t *ppos)
600 {
601 	struct ftrace_event_call *call = filp->private_data;
602 	struct trace_seq *s;
603 	int r;
604 
605 	if (*ppos)
606 		return 0;
607 
608 	s = kmalloc(sizeof(*s), GFP_KERNEL);
609 	if (!s)
610 		return -ENOMEM;
611 
612 	trace_seq_init(s);
613 
614 	print_event_filter(call, s);
615 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
616 
617 	kfree(s);
618 
619 	return r;
620 }
621 
622 static ssize_t
623 event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
624 		   loff_t *ppos)
625 {
626 	struct ftrace_event_call *call = filp->private_data;
627 	char *buf;
628 	int err;
629 
630 	if (cnt >= PAGE_SIZE)
631 		return -EINVAL;
632 
633 	buf = (char *)__get_free_page(GFP_TEMPORARY);
634 	if (!buf)
635 		return -ENOMEM;
636 
637 	if (copy_from_user(buf, ubuf, cnt)) {
638 		free_page((unsigned long) buf);
639 		return -EFAULT;
640 	}
641 	buf[cnt] = '\0';
642 
643 	err = apply_event_filter(call, buf);
644 	free_page((unsigned long) buf);
645 	if (err < 0)
646 		return err;
647 
648 	*ppos += cnt;
649 
650 	return cnt;
651 }
652 
653 static ssize_t
654 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
655 		      loff_t *ppos)
656 {
657 	struct event_subsystem *system = filp->private_data;
658 	struct trace_seq *s;
659 	int r;
660 
661 	if (*ppos)
662 		return 0;
663 
664 	s = kmalloc(sizeof(*s), GFP_KERNEL);
665 	if (!s)
666 		return -ENOMEM;
667 
668 	trace_seq_init(s);
669 
670 	print_subsystem_event_filter(system, s);
671 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
672 
673 	kfree(s);
674 
675 	return r;
676 }
677 
678 static ssize_t
679 subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
680 		       loff_t *ppos)
681 {
682 	struct event_subsystem *system = filp->private_data;
683 	char *buf;
684 	int err;
685 
686 	if (cnt >= PAGE_SIZE)
687 		return -EINVAL;
688 
689 	buf = (char *)__get_free_page(GFP_TEMPORARY);
690 	if (!buf)
691 		return -ENOMEM;
692 
693 	if (copy_from_user(buf, ubuf, cnt)) {
694 		free_page((unsigned long) buf);
695 		return -EFAULT;
696 	}
697 	buf[cnt] = '\0';
698 
699 	err = apply_subsystem_event_filter(system, buf);
700 	free_page((unsigned long) buf);
701 	if (err < 0)
702 		return err;
703 
704 	*ppos += cnt;
705 
706 	return cnt;
707 }
708 
709 static ssize_t
710 show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
711 {
712 	int (*func)(struct trace_seq *s) = filp->private_data;
713 	struct trace_seq *s;
714 	int r;
715 
716 	if (*ppos)
717 		return 0;
718 
719 	s = kmalloc(sizeof(*s), GFP_KERNEL);
720 	if (!s)
721 		return -ENOMEM;
722 
723 	trace_seq_init(s);
724 
725 	func(s);
726 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
727 
728 	kfree(s);
729 
730 	return r;
731 }
732 
733 static const struct seq_operations show_event_seq_ops = {
734 	.start = t_start,
735 	.next = t_next,
736 	.show = t_show,
737 	.stop = t_stop,
738 };
739 
740 static const struct seq_operations show_set_event_seq_ops = {
741 	.start = s_start,
742 	.next = s_next,
743 	.show = t_show,
744 	.stop = t_stop,
745 };
746 
747 static const struct file_operations ftrace_avail_fops = {
748 	.open = ftrace_event_seq_open,
749 	.read = seq_read,
750 	.llseek = seq_lseek,
751 	.release = seq_release,
752 };
753 
754 static const struct file_operations ftrace_set_event_fops = {
755 	.open = ftrace_event_seq_open,
756 	.read = seq_read,
757 	.write = ftrace_event_write,
758 	.llseek = seq_lseek,
759 	.release = seq_release,
760 };
761 
762 static const struct file_operations ftrace_enable_fops = {
763 	.open = tracing_open_generic,
764 	.read = event_enable_read,
765 	.write = event_enable_write,
766 };
767 
768 static const struct file_operations ftrace_event_format_fops = {
769 	.open = tracing_open_generic,
770 	.read = event_format_read,
771 };
772 
773 static const struct file_operations ftrace_event_id_fops = {
774 	.open = tracing_open_generic,
775 	.read = event_id_read,
776 };
777 
778 static const struct file_operations ftrace_event_filter_fops = {
779 	.open = tracing_open_generic,
780 	.read = event_filter_read,
781 	.write = event_filter_write,
782 };
783 
784 static const struct file_operations ftrace_subsystem_filter_fops = {
785 	.open = tracing_open_generic,
786 	.read = subsystem_filter_read,
787 	.write = subsystem_filter_write,
788 };
789 
790 static const struct file_operations ftrace_system_enable_fops = {
791 	.open = tracing_open_generic,
792 	.read = system_enable_read,
793 	.write = system_enable_write,
794 };
795 
796 static const struct file_operations ftrace_show_header_fops = {
797 	.open = tracing_open_generic,
798 	.read = show_header,
799 };
800 
801 static struct dentry *event_trace_events_dir(void)
802 {
803 	static struct dentry *d_tracer;
804 	static struct dentry *d_events;
805 
806 	if (d_events)
807 		return d_events;
808 
809 	d_tracer = tracing_init_dentry();
810 	if (!d_tracer)
811 		return NULL;
812 
813 	d_events = debugfs_create_dir("events", d_tracer);
814 	if (!d_events)
815 		pr_warning("Could not create debugfs "
816 			   "'events' directory\n");
817 
818 	return d_events;
819 }
820 
821 static LIST_HEAD(event_subsystems);
822 
823 static struct dentry *
824 event_subsystem_dir(const char *name, struct dentry *d_events)
825 {
826 	struct event_subsystem *system;
827 	struct dentry *entry;
828 
829 	/* First see if we did not already create this dir */
830 	list_for_each_entry(system, &event_subsystems, list) {
831 		if (strcmp(system->name, name) == 0) {
832 			system->nr_events++;
833 			return system->entry;
834 		}
835 	}
836 
837 	/* need to create new entry */
838 	system = kmalloc(sizeof(*system), GFP_KERNEL);
839 	if (!system) {
840 		pr_warning("No memory to create event subsystem %s\n",
841 			   name);
842 		return d_events;
843 	}
844 
845 	system->entry = debugfs_create_dir(name, d_events);
846 	if (!system->entry) {
847 		pr_warning("Could not create event subsystem %s\n",
848 			   name);
849 		kfree(system);
850 		return d_events;
851 	}
852 
853 	system->nr_events = 1;
854 	system->name = kstrdup(name, GFP_KERNEL);
855 	if (!system->name) {
856 		debugfs_remove(system->entry);
857 		kfree(system);
858 		return d_events;
859 	}
860 
861 	list_add(&system->list, &event_subsystems);
862 
863 	system->filter = NULL;
864 
865 	system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
866 	if (!system->filter) {
867 		pr_warning("Could not allocate filter for subsystem "
868 			   "'%s'\n", name);
869 		return system->entry;
870 	}
871 
872 	entry = debugfs_create_file("filter", 0644, system->entry, system,
873 				    &ftrace_subsystem_filter_fops);
874 	if (!entry) {
875 		kfree(system->filter);
876 		system->filter = NULL;
877 		pr_warning("Could not create debugfs "
878 			   "'%s/filter' entry\n", name);
879 	}
880 
881 	entry = trace_create_file("enable", 0644, system->entry,
882 				  (void *)system->name,
883 				  &ftrace_system_enable_fops);
884 
885 	return system->entry;
886 }
887 
888 static int
889 event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
890 		 const struct file_operations *id,
891 		 const struct file_operations *enable,
892 		 const struct file_operations *filter,
893 		 const struct file_operations *format)
894 {
895 	struct dentry *entry;
896 	int ret;
897 
898 	/*
899 	 * If the trace point header did not define TRACE_SYSTEM
900 	 * then the system would be called "TRACE_SYSTEM".
901 	 */
902 	if (strcmp(call->system, TRACE_SYSTEM) != 0)
903 		d_events = event_subsystem_dir(call->system, d_events);
904 
905 	call->dir = debugfs_create_dir(call->name, d_events);
906 	if (!call->dir) {
907 		pr_warning("Could not create debugfs "
908 			   "'%s' directory\n", call->name);
909 		return -1;
910 	}
911 
912 	if (call->regfunc)
913 		entry = trace_create_file("enable", 0644, call->dir, call,
914 					  enable);
915 
916 	if (call->id && call->profile_enable)
917 		entry = trace_create_file("id", 0444, call->dir, call,
918 					  id);
919 
920 	if (call->define_fields) {
921 		ret = call->define_fields(call);
922 		if (ret < 0) {
923 			pr_warning("Could not initialize trace point"
924 				   " events/%s\n", call->name);
925 			return ret;
926 		}
927 		entry = trace_create_file("filter", 0644, call->dir, call,
928 					  filter);
929 	}
930 
931 	/* A trace may not want to export its format */
932 	if (!call->show_format)
933 		return 0;
934 
935 	entry = trace_create_file("format", 0444, call->dir, call,
936 				  format);
937 
938 	return 0;
939 }
940 
941 #define for_each_event(event, start, end)			\
942 	for (event = start;					\
943 	     (unsigned long)event < (unsigned long)end;		\
944 	     event++)
945 
946 #ifdef CONFIG_MODULES
947 
948 static LIST_HEAD(ftrace_module_file_list);
949 
950 /*
951  * Modules must own their file_operations to keep up with
952  * reference counting.
953  */
954 struct ftrace_module_file_ops {
955 	struct list_head		list;
956 	struct module			*mod;
957 	struct file_operations		id;
958 	struct file_operations		enable;
959 	struct file_operations		format;
960 	struct file_operations		filter;
961 };
962 
963 static void remove_subsystem_dir(const char *name)
964 {
965 	struct event_subsystem *system;
966 
967 	if (strcmp(name, TRACE_SYSTEM) == 0)
968 		return;
969 
970 	list_for_each_entry(system, &event_subsystems, list) {
971 		if (strcmp(system->name, name) == 0) {
972 			if (!--system->nr_events) {
973 				struct event_filter *filter = system->filter;
974 
975 				debugfs_remove_recursive(system->entry);
976 				list_del(&system->list);
977 				if (filter) {
978 					kfree(filter->filter_string);
979 					kfree(filter);
980 				}
981 				kfree(system->name);
982 				kfree(system);
983 			}
984 			break;
985 		}
986 	}
987 }
988 
989 static struct ftrace_module_file_ops *
990 trace_create_file_ops(struct module *mod)
991 {
992 	struct ftrace_module_file_ops *file_ops;
993 
994 	/*
995 	 * This is a bit of a PITA. To allow for correct reference
996 	 * counting, modules must "own" their file_operations.
997 	 * To do this, we allocate the file operations that will be
998 	 * used in the event directory.
999 	 */
1000 
1001 	file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL);
1002 	if (!file_ops)
1003 		return NULL;
1004 
1005 	file_ops->mod = mod;
1006 
1007 	file_ops->id = ftrace_event_id_fops;
1008 	file_ops->id.owner = mod;
1009 
1010 	file_ops->enable = ftrace_enable_fops;
1011 	file_ops->enable.owner = mod;
1012 
1013 	file_ops->filter = ftrace_event_filter_fops;
1014 	file_ops->filter.owner = mod;
1015 
1016 	file_ops->format = ftrace_event_format_fops;
1017 	file_ops->format.owner = mod;
1018 
1019 	list_add(&file_ops->list, &ftrace_module_file_list);
1020 
1021 	return file_ops;
1022 }
1023 
1024 static void trace_module_add_events(struct module *mod)
1025 {
1026 	struct ftrace_module_file_ops *file_ops = NULL;
1027 	struct ftrace_event_call *call, *start, *end;
1028 	struct dentry *d_events;
1029 	int ret;
1030 
1031 	start = mod->trace_events;
1032 	end = mod->trace_events + mod->num_trace_events;
1033 
1034 	if (start == end)
1035 		return;
1036 
1037 	d_events = event_trace_events_dir();
1038 	if (!d_events)
1039 		return;
1040 
1041 	for_each_event(call, start, end) {
1042 		/* The linker may leave blanks */
1043 		if (!call->name)
1044 			continue;
1045 		if (call->raw_init) {
1046 			ret = call->raw_init();
1047 			if (ret < 0) {
1048 				if (ret != -ENOSYS)
1049 					pr_warning("Could not initialize trace "
1050 					"point events/%s\n", call->name);
1051 				continue;
1052 			}
1053 		}
1054 		/*
1055 		 * This module has events, create file ops for this module
1056 		 * if not already done.
1057 		 */
1058 		if (!file_ops) {
1059 			file_ops = trace_create_file_ops(mod);
1060 			if (!file_ops)
1061 				return;
1062 		}
1063 		call->mod = mod;
1064 		list_add(&call->list, &ftrace_events);
1065 		event_create_dir(call, d_events,
1066 				 &file_ops->id, &file_ops->enable,
1067 				 &file_ops->filter, &file_ops->format);
1068 	}
1069 }
1070 
1071 static void trace_module_remove_events(struct module *mod)
1072 {
1073 	struct ftrace_module_file_ops *file_ops;
1074 	struct ftrace_event_call *call, *p;
1075 	bool found = false;
1076 
1077 	down_write(&trace_event_mutex);
1078 	list_for_each_entry_safe(call, p, &ftrace_events, list) {
1079 		if (call->mod == mod) {
1080 			found = true;
1081 			ftrace_event_enable_disable(call, 0);
1082 			if (call->event)
1083 				__unregister_ftrace_event(call->event);
1084 			debugfs_remove_recursive(call->dir);
1085 			list_del(&call->list);
1086 			trace_destroy_fields(call);
1087 			destroy_preds(call);
1088 			remove_subsystem_dir(call->system);
1089 		}
1090 	}
1091 
1092 	/* Now free the file_operations */
1093 	list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
1094 		if (file_ops->mod == mod)
1095 			break;
1096 	}
1097 	if (&file_ops->list != &ftrace_module_file_list) {
1098 		list_del(&file_ops->list);
1099 		kfree(file_ops);
1100 	}
1101 
1102 	/*
1103 	 * It is safest to reset the ring buffer if the module being unloaded
1104 	 * registered any events.
1105 	 */
1106 	if (found)
1107 		tracing_reset_current_online_cpus();
1108 	up_write(&trace_event_mutex);
1109 }
1110 
1111 static int trace_module_notify(struct notifier_block *self,
1112 			       unsigned long val, void *data)
1113 {
1114 	struct module *mod = data;
1115 
1116 	mutex_lock(&event_mutex);
1117 	switch (val) {
1118 	case MODULE_STATE_COMING:
1119 		trace_module_add_events(mod);
1120 		break;
1121 	case MODULE_STATE_GOING:
1122 		trace_module_remove_events(mod);
1123 		break;
1124 	}
1125 	mutex_unlock(&event_mutex);
1126 
1127 	return 0;
1128 }
1129 #else
1130 static int trace_module_notify(struct notifier_block *self,
1131 			       unsigned long val, void *data)
1132 {
1133 	return 0;
1134 }
1135 #endif /* CONFIG_MODULES */
1136 
1137 static struct notifier_block trace_module_nb = {
1138 	.notifier_call = trace_module_notify,
1139 	.priority = 0,
1140 };
1141 
1142 extern struct ftrace_event_call __start_ftrace_events[];
1143 extern struct ftrace_event_call __stop_ftrace_events[];
1144 
1145 static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
1146 
1147 static __init int setup_trace_event(char *str)
1148 {
1149 	strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
1150 	ring_buffer_expanded = 1;
1151 	tracing_selftest_disabled = 1;
1152 
1153 	return 1;
1154 }
1155 __setup("trace_event=", setup_trace_event);
1156 
1157 static __init int event_trace_init(void)
1158 {
1159 	struct ftrace_event_call *call;
1160 	struct dentry *d_tracer;
1161 	struct dentry *entry;
1162 	struct dentry *d_events;
1163 	int ret;
1164 	char *buf = bootup_event_buf;
1165 	char *token;
1166 
1167 	d_tracer = tracing_init_dentry();
1168 	if (!d_tracer)
1169 		return 0;
1170 
1171 	entry = debugfs_create_file("available_events", 0444, d_tracer,
1172 				    (void *)&show_event_seq_ops,
1173 				    &ftrace_avail_fops);
1174 	if (!entry)
1175 		pr_warning("Could not create debugfs "
1176 			   "'available_events' entry\n");
1177 
1178 	entry = debugfs_create_file("set_event", 0644, d_tracer,
1179 				    (void *)&show_set_event_seq_ops,
1180 				    &ftrace_set_event_fops);
1181 	if (!entry)
1182 		pr_warning("Could not create debugfs "
1183 			   "'set_event' entry\n");
1184 
1185 	d_events = event_trace_events_dir();
1186 	if (!d_events)
1187 		return 0;
1188 
1189 	/* ring buffer internal formats */
1190 	trace_create_file("header_page", 0444, d_events,
1191 			  ring_buffer_print_page_header,
1192 			  &ftrace_show_header_fops);
1193 
1194 	trace_create_file("header_event", 0444, d_events,
1195 			  ring_buffer_print_entry_header,
1196 			  &ftrace_show_header_fops);
1197 
1198 	trace_create_file("enable", 0644, d_events,
1199 			  NULL, &ftrace_system_enable_fops);
1200 
1201 	for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
1202 		/* The linker may leave blanks */
1203 		if (!call->name)
1204 			continue;
1205 		if (call->raw_init) {
1206 			ret = call->raw_init();
1207 			if (ret < 0) {
1208 				if (ret != -ENOSYS)
1209 					pr_warning("Could not initialize trace "
1210 					"point events/%s\n", call->name);
1211 				continue;
1212 			}
1213 		}
1214 		list_add(&call->list, &ftrace_events);
1215 		event_create_dir(call, d_events, &ftrace_event_id_fops,
1216 				 &ftrace_enable_fops, &ftrace_event_filter_fops,
1217 				 &ftrace_event_format_fops);
1218 	}
1219 
1220 	while (true) {
1221 		token = strsep(&buf, ",");
1222 
1223 		if (!token)
1224 			break;
1225 		if (!*token)
1226 			continue;
1227 
1228 		ret = ftrace_set_clr_event(token, 1);
1229 		if (ret)
1230 			pr_warning("Failed to enable trace event: %s\n", token);
1231 	}
1232 
1233 	ret = register_module_notifier(&trace_module_nb);
1234 	if (ret)
1235 		pr_warning("Failed to register trace events module notifier\n");
1236 
1237 	return 0;
1238 }
1239 fs_initcall(event_trace_init);
1240 
1241 #ifdef CONFIG_FTRACE_STARTUP_TEST
1242 
1243 static DEFINE_SPINLOCK(test_spinlock);
1244 static DEFINE_SPINLOCK(test_spinlock_irq);
1245 static DEFINE_MUTEX(test_mutex);
1246 
1247 static __init void test_work(struct work_struct *dummy)
1248 {
1249 	spin_lock(&test_spinlock);
1250 	spin_lock_irq(&test_spinlock_irq);
1251 	udelay(1);
1252 	spin_unlock_irq(&test_spinlock_irq);
1253 	spin_unlock(&test_spinlock);
1254 
1255 	mutex_lock(&test_mutex);
1256 	msleep(1);
1257 	mutex_unlock(&test_mutex);
1258 }
1259 
1260 static __init int event_test_thread(void *unused)
1261 {
1262 	void *test_malloc;
1263 
1264 	test_malloc = kmalloc(1234, GFP_KERNEL);
1265 	if (!test_malloc)
1266 		pr_info("failed to kmalloc\n");
1267 
1268 	schedule_on_each_cpu(test_work);
1269 
1270 	kfree(test_malloc);
1271 
1272 	set_current_state(TASK_INTERRUPTIBLE);
1273 	while (!kthread_should_stop())
1274 		schedule();
1275 
1276 	return 0;
1277 }
1278 
1279 /*
1280  * Do various things that may trigger events.
1281  */
1282 static __init void event_test_stuff(void)
1283 {
1284 	struct task_struct *test_thread;
1285 
1286 	test_thread = kthread_run(event_test_thread, NULL, "test-events");
1287 	msleep(1);
1288 	kthread_stop(test_thread);
1289 }
1290 
1291 /*
1292  * For every trace event defined, we will test each trace point separately,
1293  * and then by groups, and finally all trace points.
1294  */
1295 static __init void event_trace_self_tests(void)
1296 {
1297 	struct ftrace_event_call *call;
1298 	struct event_subsystem *system;
1299 	int ret;
1300 
1301 	pr_info("Running tests on trace events:\n");
1302 
1303 	list_for_each_entry(call, &ftrace_events, list) {
1304 
1305 		/* Only test those that have a regfunc */
1306 		if (!call->regfunc)
1307 			continue;
1308 
1309 /*
1310  * Testing syscall events here is pretty useless, but
1311  * we still do it if configured. But this is time consuming.
1312  * What we really need is a user thread to perform the
1313  * syscalls as we test.
1314  */
1315 #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
1316 		if (call->system &&
1317 		    strcmp(call->system, "syscalls") == 0)
1318 			continue;
1319 #endif
1320 
1321 		pr_info("Testing event %s: ", call->name);
1322 
1323 		/*
1324 		 * If an event is already enabled, someone is using
1325 		 * it and the self test should not be on.
1326 		 */
1327 		if (call->enabled) {
1328 			pr_warning("Enabled event during self test!\n");
1329 			WARN_ON_ONCE(1);
1330 			continue;
1331 		}
1332 
1333 		ftrace_event_enable_disable(call, 1);
1334 		event_test_stuff();
1335 		ftrace_event_enable_disable(call, 0);
1336 
1337 		pr_cont("OK\n");
1338 	}
1339 
1340 	/* Now test at the sub system level */
1341 
1342 	pr_info("Running tests on trace event systems:\n");
1343 
1344 	list_for_each_entry(system, &event_subsystems, list) {
1345 
1346 		/* the ftrace system is special, skip it */
1347 		if (strcmp(system->name, "ftrace") == 0)
1348 			continue;
1349 
1350 		pr_info("Testing event system %s: ", system->name);
1351 
1352 		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 1);
1353 		if (WARN_ON_ONCE(ret)) {
1354 			pr_warning("error enabling system %s\n",
1355 				   system->name);
1356 			continue;
1357 		}
1358 
1359 		event_test_stuff();
1360 
1361 		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0);
1362 		if (WARN_ON_ONCE(ret))
1363 			pr_warning("error disabling system %s\n",
1364 				   system->name);
1365 
1366 		pr_cont("OK\n");
1367 	}
1368 
1369 	/* Test with all events enabled */
1370 
1371 	pr_info("Running tests on all trace events:\n");
1372 	pr_info("Testing all events: ");
1373 
1374 	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 1);
1375 	if (WARN_ON_ONCE(ret)) {
1376 		pr_warning("error enabling all events\n");
1377 		return;
1378 	}
1379 
1380 	event_test_stuff();
1381 
1382 	/* reset sysname */
1383 	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 0);
1384 	if (WARN_ON_ONCE(ret)) {
1385 		pr_warning("error disabling all events\n");
1386 		return;
1387 	}
1388 
1389 	pr_cont("OK\n");
1390 }
1391 
1392 #ifdef CONFIG_FUNCTION_TRACER
1393 
1394 static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
1395 
1396 static void
1397 function_test_events_call(unsigned long ip, unsigned long parent_ip)
1398 {
1399 	struct ring_buffer_event *event;
1400 	struct ring_buffer *buffer;
1401 	struct ftrace_entry *entry;
1402 	unsigned long flags;
1403 	long disabled;
1404 	int resched;
1405 	int cpu;
1406 	int pc;
1407 
1408 	pc = preempt_count();
1409 	resched = ftrace_preempt_disable();
1410 	cpu = raw_smp_processor_id();
1411 	disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
1412 
1413 	if (disabled != 1)
1414 		goto out;
1415 
1416 	local_save_flags(flags);
1417 
1418 	event = trace_current_buffer_lock_reserve(&buffer,
1419 						  TRACE_FN, sizeof(*entry),
1420 						  flags, pc);
1421 	if (!event)
1422 		goto out;
1423 	entry	= ring_buffer_event_data(event);
1424 	entry->ip			= ip;
1425 	entry->parent_ip		= parent_ip;
1426 
1427 	trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
1428 
1429  out:
1430 	atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
1431 	ftrace_preempt_enable(resched);
1432 }
1433 
1434 static struct ftrace_ops trace_ops __initdata  =
1435 {
1436 	.func = function_test_events_call,
1437 };
1438 
1439 static __init void event_trace_self_test_with_function(void)
1440 {
1441 	register_ftrace_function(&trace_ops);
1442 	pr_info("Running tests again, along with the function tracer\n");
1443 	event_trace_self_tests();
1444 	unregister_ftrace_function(&trace_ops);
1445 }
1446 #else
1447 static __init void event_trace_self_test_with_function(void)
1448 {
1449 }
1450 #endif
1451 
1452 static __init int event_trace_self_tests_init(void)
1453 {
1454 	if (!tracing_selftest_disabled) {
1455 		event_trace_self_tests();
1456 		event_trace_self_test_with_function();
1457 	}
1458 
1459 	return 0;
1460 }
1461 
1462 late_initcall(event_trace_self_tests_init);
1463 
1464 #endif
1465