xref: /openbmc/linux/kernel/trace/trace_events.c (revision 55e43d6abd078ed6d219902ce8cb4d68e3c993ba)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * event tracer
4  *
5  * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
6  *
7  *  - Added format output of fields of the trace point.
8  *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
9  *
10  */
11 
12 #define pr_fmt(fmt) fmt
13 
14 #include <linux/workqueue.h>
15 #include <linux/security.h>
16 #include <linux/spinlock.h>
17 #include <linux/kthread.h>
18 #include <linux/tracefs.h>
19 #include <linux/uaccess.h>
20 #include <linux/module.h>
21 #include <linux/ctype.h>
22 #include <linux/sort.h>
23 #include <linux/slab.h>
24 #include <linux/delay.h>
25 
26 #include <trace/events/sched.h>
27 #include <trace/syscall.h>
28 
29 #include <asm/setup.h>
30 
31 #include "trace_output.h"
32 
33 #undef TRACE_SYSTEM
34 #define TRACE_SYSTEM "TRACE_SYSTEM"
35 
36 DEFINE_MUTEX(event_mutex);
37 
38 LIST_HEAD(ftrace_events);
39 static LIST_HEAD(ftrace_generic_fields);
40 static LIST_HEAD(ftrace_common_fields);
41 static bool eventdir_initialized;
42 
43 static LIST_HEAD(module_strings);
44 
45 struct module_string {
46 	struct list_head	next;
47 	struct module		*module;
48 	char			*str;
49 };
50 
51 #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
52 
53 static struct kmem_cache *field_cachep;
54 static struct kmem_cache *file_cachep;
55 
system_refcount(struct event_subsystem * system)56 static inline int system_refcount(struct event_subsystem *system)
57 {
58 	return system->ref_count;
59 }
60 
system_refcount_inc(struct event_subsystem * system)61 static int system_refcount_inc(struct event_subsystem *system)
62 {
63 	return system->ref_count++;
64 }
65 
system_refcount_dec(struct event_subsystem * system)66 static int system_refcount_dec(struct event_subsystem *system)
67 {
68 	return --system->ref_count;
69 }
70 
71 /* Double loops, do not use break, only goto's work */
72 #define do_for_each_event_file(tr, file)			\
73 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {	\
74 		list_for_each_entry(file, &tr->events, list)
75 
76 #define do_for_each_event_file_safe(tr, file)			\
77 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {	\
78 		struct trace_event_file *___n;				\
79 		list_for_each_entry_safe(file, ___n, &tr->events, list)
80 
81 #define while_for_each_event_file()		\
82 	}
83 
84 static struct ftrace_event_field *
__find_event_field(struct list_head * head,char * name)85 __find_event_field(struct list_head *head, char *name)
86 {
87 	struct ftrace_event_field *field;
88 
89 	list_for_each_entry(field, head, link) {
90 		if (!strcmp(field->name, name))
91 			return field;
92 	}
93 
94 	return NULL;
95 }
96 
97 struct ftrace_event_field *
trace_find_event_field(struct trace_event_call * call,char * name)98 trace_find_event_field(struct trace_event_call *call, char *name)
99 {
100 	struct ftrace_event_field *field;
101 	struct list_head *head;
102 
103 	head = trace_get_fields(call);
104 	field = __find_event_field(head, name);
105 	if (field)
106 		return field;
107 
108 	field = __find_event_field(&ftrace_generic_fields, name);
109 	if (field)
110 		return field;
111 
112 	return __find_event_field(&ftrace_common_fields, name);
113 }
114 
__trace_define_field(struct list_head * head,const char * type,const char * name,int offset,int size,int is_signed,int filter_type,int len)115 static int __trace_define_field(struct list_head *head, const char *type,
116 				const char *name, int offset, int size,
117 				int is_signed, int filter_type, int len)
118 {
119 	struct ftrace_event_field *field;
120 
121 	field = kmem_cache_alloc(field_cachep, GFP_TRACE);
122 	if (!field)
123 		return -ENOMEM;
124 
125 	field->name = name;
126 	field->type = type;
127 
128 	if (filter_type == FILTER_OTHER)
129 		field->filter_type = filter_assign_type(type);
130 	else
131 		field->filter_type = filter_type;
132 
133 	field->offset = offset;
134 	field->size = size;
135 	field->is_signed = is_signed;
136 	field->len = len;
137 
138 	list_add(&field->link, head);
139 
140 	return 0;
141 }
142 
trace_define_field(struct trace_event_call * call,const char * type,const char * name,int offset,int size,int is_signed,int filter_type)143 int trace_define_field(struct trace_event_call *call, const char *type,
144 		       const char *name, int offset, int size, int is_signed,
145 		       int filter_type)
146 {
147 	struct list_head *head;
148 
149 	if (WARN_ON(!call->class))
150 		return 0;
151 
152 	head = trace_get_fields(call);
153 	return __trace_define_field(head, type, name, offset, size,
154 				    is_signed, filter_type, 0);
155 }
156 EXPORT_SYMBOL_GPL(trace_define_field);
157 
trace_define_field_ext(struct trace_event_call * call,const char * type,const char * name,int offset,int size,int is_signed,int filter_type,int len)158 static int trace_define_field_ext(struct trace_event_call *call, const char *type,
159 		       const char *name, int offset, int size, int is_signed,
160 		       int filter_type, int len)
161 {
162 	struct list_head *head;
163 
164 	if (WARN_ON(!call->class))
165 		return 0;
166 
167 	head = trace_get_fields(call);
168 	return __trace_define_field(head, type, name, offset, size,
169 				    is_signed, filter_type, len);
170 }
171 
172 #define __generic_field(type, item, filter_type)			\
173 	ret = __trace_define_field(&ftrace_generic_fields, #type,	\
174 				   #item, 0, 0, is_signed_type(type),	\
175 				   filter_type, 0);			\
176 	if (ret)							\
177 		return ret;
178 
179 #define __common_field(type, item)					\
180 	ret = __trace_define_field(&ftrace_common_fields, #type,	\
181 				   "common_" #item,			\
182 				   offsetof(typeof(ent), item),		\
183 				   sizeof(ent.item),			\
184 				   is_signed_type(type), FILTER_OTHER, 0);	\
185 	if (ret)							\
186 		return ret;
187 
trace_define_generic_fields(void)188 static int trace_define_generic_fields(void)
189 {
190 	int ret;
191 
192 	__generic_field(int, CPU, FILTER_CPU);
193 	__generic_field(int, cpu, FILTER_CPU);
194 	__generic_field(int, common_cpu, FILTER_CPU);
195 	__generic_field(char *, COMM, FILTER_COMM);
196 	__generic_field(char *, comm, FILTER_COMM);
197 	__generic_field(char *, stacktrace, FILTER_STACKTRACE);
198 	__generic_field(char *, STACKTRACE, FILTER_STACKTRACE);
199 
200 	return ret;
201 }
202 
trace_define_common_fields(void)203 static int trace_define_common_fields(void)
204 {
205 	int ret;
206 	struct trace_entry ent;
207 
208 	__common_field(unsigned short, type);
209 	__common_field(unsigned char, flags);
210 	/* Holds both preempt_count and migrate_disable */
211 	__common_field(unsigned char, preempt_count);
212 	__common_field(int, pid);
213 
214 	return ret;
215 }
216 
trace_destroy_fields(struct trace_event_call * call)217 static void trace_destroy_fields(struct trace_event_call *call)
218 {
219 	struct ftrace_event_field *field, *next;
220 	struct list_head *head;
221 
222 	head = trace_get_fields(call);
223 	list_for_each_entry_safe(field, next, head, link) {
224 		list_del(&field->link);
225 		kmem_cache_free(field_cachep, field);
226 	}
227 }
228 
229 /*
230  * run-time version of trace_event_get_offsets_<call>() that returns the last
231  * accessible offset of trace fields excluding __dynamic_array bytes
232  */
trace_event_get_offsets(struct trace_event_call * call)233 int trace_event_get_offsets(struct trace_event_call *call)
234 {
235 	struct ftrace_event_field *tail;
236 	struct list_head *head;
237 
238 	head = trace_get_fields(call);
239 	/*
240 	 * head->next points to the last field with the largest offset,
241 	 * since it was added last by trace_define_field()
242 	 */
243 	tail = list_first_entry(head, struct ftrace_event_field, link);
244 	return tail->offset + tail->size;
245 }
246 
247 
find_event_field(const char * fmt,struct trace_event_call * call)248 static struct trace_event_fields *find_event_field(const char *fmt,
249 						   struct trace_event_call *call)
250 {
251 	struct trace_event_fields *field = call->class->fields_array;
252 	const char *p = fmt;
253 	int len;
254 
255 	if (!(len = str_has_prefix(fmt, "REC->")))
256 		return NULL;
257 	fmt += len;
258 	for (p = fmt; *p; p++) {
259 		if (!isalnum(*p) && *p != '_')
260 			break;
261 	}
262 	len = p - fmt;
263 
264 	for (; field->type; field++) {
265 		if (strncmp(field->name, fmt, len) || field->name[len])
266 			continue;
267 
268 		return field;
269 	}
270 	return NULL;
271 }
272 
273 /*
274  * Check if the referenced field is an array and return true,
275  * as arrays are OK to dereference.
276  */
test_field(const char * fmt,struct trace_event_call * call)277 static bool test_field(const char *fmt, struct trace_event_call *call)
278 {
279 	struct trace_event_fields *field;
280 
281 	field = find_event_field(fmt, call);
282 	if (!field)
283 		return false;
284 
285 	/* This is an array and is OK to dereference. */
286 	return strchr(field->type, '[') != NULL;
287 }
288 
289 /* Look for a string within an argument */
find_print_string(const char * arg,const char * str,const char * end)290 static bool find_print_string(const char *arg, const char *str, const char *end)
291 {
292 	const char *r;
293 
294 	r = strstr(arg, str);
295 	return r && r < end;
296 }
297 
298 /* Return true if the argument pointer is safe */
process_pointer(const char * fmt,int len,struct trace_event_call * call)299 static bool process_pointer(const char *fmt, int len, struct trace_event_call *call)
300 {
301 	const char *r, *e, *a;
302 
303 	e = fmt + len;
304 
305 	/* Find the REC-> in the argument */
306 	r = strstr(fmt, "REC->");
307 	if (r && r < e) {
308 		/*
309 		 * Addresses of events on the buffer, or an array on the buffer is
310 		 * OK to dereference. There's ways to fool this, but
311 		 * this is to catch common mistakes, not malicious code.
312 		 */
313 		a = strchr(fmt, '&');
314 		if ((a && (a < r)) || test_field(r, call))
315 			return true;
316 	} else if (find_print_string(fmt, "__get_dynamic_array(", e)) {
317 		return true;
318 	} else if (find_print_string(fmt, "__get_rel_dynamic_array(", e)) {
319 		return true;
320 	} else if (find_print_string(fmt, "__get_dynamic_array_len(", e)) {
321 		return true;
322 	} else if (find_print_string(fmt, "__get_rel_dynamic_array_len(", e)) {
323 		return true;
324 	} else if (find_print_string(fmt, "__get_sockaddr(", e)) {
325 		return true;
326 	} else if (find_print_string(fmt, "__get_rel_sockaddr(", e)) {
327 		return true;
328 	}
329 	return false;
330 }
331 
332 /* Return true if the string is safe */
process_string(const char * fmt,int len,struct trace_event_call * call)333 static bool process_string(const char *fmt, int len, struct trace_event_call *call)
334 {
335 	const char *r, *e, *s;
336 
337 	e = fmt + len;
338 
339 	/*
340 	 * There are several helper functions that return strings.
341 	 * If the argument contains a function, then assume its field is valid.
342 	 * It is considered that the argument has a function if it has:
343 	 *   alphanumeric or '_' before a parenthesis.
344 	 */
345 	s = fmt;
346 	do {
347 		r = strstr(s, "(");
348 		if (!r || r >= e)
349 			break;
350 		for (int i = 1; r - i >= s; i++) {
351 			char ch = *(r - i);
352 			if (isspace(ch))
353 				continue;
354 			if (isalnum(ch) || ch == '_')
355 				return true;
356 			/* Anything else, this isn't a function */
357 			break;
358 		}
359 		/* A function could be wrapped in parethesis, try the next one */
360 		s = r + 1;
361 	} while (s < e);
362 
363 	/*
364 	 * If there's any strings in the argument consider this arg OK as it
365 	 * could be: REC->field ? "foo" : "bar" and we don't want to get into
366 	 * verifying that logic here.
367 	 */
368 	if (find_print_string(fmt, "\"", e))
369 		return true;
370 
371 	/* Dereferenced strings are also valid like any other pointer */
372 	if (process_pointer(fmt, len, call))
373 		return true;
374 
375 	/* Make sure the field is found, and consider it OK for now if it is */
376 	return find_event_field(fmt, call) != NULL;
377 }
378 
379 /*
380  * Examine the print fmt of the event looking for unsafe dereference
381  * pointers using %p* that could be recorded in the trace event and
382  * much later referenced after the pointer was freed. Dereferencing
383  * pointers are OK, if it is dereferenced into the event itself.
384  */
test_event_printk(struct trace_event_call * call)385 static void test_event_printk(struct trace_event_call *call)
386 {
387 	u64 dereference_flags = 0;
388 	u64 string_flags = 0;
389 	bool first = true;
390 	const char *fmt;
391 	int parens = 0;
392 	char in_quote = 0;
393 	int start_arg = 0;
394 	int arg = 0;
395 	int i, e;
396 
397 	fmt = call->print_fmt;
398 
399 	if (!fmt)
400 		return;
401 
402 	for (i = 0; fmt[i]; i++) {
403 		switch (fmt[i]) {
404 		case '\\':
405 			i++;
406 			if (!fmt[i])
407 				return;
408 			continue;
409 		case '"':
410 		case '\'':
411 			/*
412 			 * The print fmt starts with a string that
413 			 * is processed first to find %p* usage,
414 			 * then after the first string, the print fmt
415 			 * contains arguments that are used to check
416 			 * if the dereferenced %p* usage is safe.
417 			 */
418 			if (first) {
419 				if (fmt[i] == '\'')
420 					continue;
421 				if (in_quote) {
422 					arg = 0;
423 					first = false;
424 					/*
425 					 * If there was no %p* uses
426 					 * the fmt is OK.
427 					 */
428 					if (!dereference_flags)
429 						return;
430 				}
431 			}
432 			if (in_quote) {
433 				if (in_quote == fmt[i])
434 					in_quote = 0;
435 			} else {
436 				in_quote = fmt[i];
437 			}
438 			continue;
439 		case '%':
440 			if (!first || !in_quote)
441 				continue;
442 			i++;
443 			if (!fmt[i])
444 				return;
445 			switch (fmt[i]) {
446 			case '%':
447 				continue;
448 			case 'p':
449 				/* Find dereferencing fields */
450 				switch (fmt[i + 1]) {
451 				case 'B': case 'R': case 'r':
452 				case 'b': case 'M': case 'm':
453 				case 'I': case 'i': case 'E':
454 				case 'U': case 'V': case 'N':
455 				case 'a': case 'd': case 'D':
456 				case 'g': case 't': case 'C':
457 				case 'O': case 'f':
458 					if (WARN_ONCE(arg == 63,
459 						      "Too many args for event: %s",
460 						      trace_event_name(call)))
461 						return;
462 					dereference_flags |= 1ULL << arg;
463 				}
464 				break;
465 			default:
466 			{
467 				bool star = false;
468 				int j;
469 
470 				/* Increment arg if %*s exists. */
471 				for (j = 0; fmt[i + j]; j++) {
472 					if (isdigit(fmt[i + j]) ||
473 					    fmt[i + j] == '.')
474 						continue;
475 					if (fmt[i + j] == '*') {
476 						star = true;
477 						continue;
478 					}
479 					if ((fmt[i + j] == 's')) {
480 						if (star)
481 							arg++;
482 						if (WARN_ONCE(arg == 63,
483 							      "Too many args for event: %s",
484 							      trace_event_name(call)))
485 							return;
486 						dereference_flags |= 1ULL << arg;
487 						string_flags |= 1ULL << arg;
488 					}
489 					break;
490 				}
491 				break;
492 			} /* default */
493 
494 			} /* switch */
495 			arg++;
496 			continue;
497 		case '(':
498 			if (in_quote)
499 				continue;
500 			parens++;
501 			continue;
502 		case ')':
503 			if (in_quote)
504 				continue;
505 			parens--;
506 			if (WARN_ONCE(parens < 0,
507 				      "Paren mismatch for event: %s\narg='%s'\n%*s",
508 				      trace_event_name(call),
509 				      fmt + start_arg,
510 				      (i - start_arg) + 5, "^"))
511 				return;
512 			continue;
513 		case ',':
514 			if (in_quote || parens)
515 				continue;
516 			e = i;
517 			i++;
518 			while (isspace(fmt[i]))
519 				i++;
520 
521 			/*
522 			 * If start_arg is zero, then this is the start of the
523 			 * first argument. The processing of the argument happens
524 			 * when the end of the argument is found, as it needs to
525 			 * handle paranthesis and such.
526 			 */
527 			if (!start_arg) {
528 				start_arg = i;
529 				/* Balance out the i++ in the for loop */
530 				i--;
531 				continue;
532 			}
533 
534 			if (dereference_flags & (1ULL << arg)) {
535 				if (string_flags & (1ULL << arg)) {
536 					if (process_string(fmt + start_arg, e - start_arg, call))
537 						dereference_flags &= ~(1ULL << arg);
538 				} else if (process_pointer(fmt + start_arg, e - start_arg, call))
539 					dereference_flags &= ~(1ULL << arg);
540 			}
541 
542 			start_arg = i;
543 			arg++;
544 			/* Balance out the i++ in the for loop */
545 			i--;
546 		}
547 	}
548 
549 	if (dereference_flags & (1ULL << arg)) {
550 		if (string_flags & (1ULL << arg)) {
551 			if (process_string(fmt + start_arg, i - start_arg, call))
552 				dereference_flags &= ~(1ULL << arg);
553 		} else if (process_pointer(fmt + start_arg, i - start_arg, call))
554 			dereference_flags &= ~(1ULL << arg);
555 	}
556 
557 	/*
558 	 * If you triggered the below warning, the trace event reported
559 	 * uses an unsafe dereference pointer %p*. As the data stored
560 	 * at the trace event time may no longer exist when the trace
561 	 * event is printed, dereferencing to the original source is
562 	 * unsafe. The source of the dereference must be copied into the
563 	 * event itself, and the dereference must access the copy instead.
564 	 */
565 	if (WARN_ON_ONCE(dereference_flags)) {
566 		arg = 1;
567 		while (!(dereference_flags & 1)) {
568 			dereference_flags >>= 1;
569 			arg++;
570 		}
571 		pr_warn("event %s has unsafe dereference of argument %d\n",
572 			trace_event_name(call), arg);
573 		pr_warn("print_fmt: %s\n", fmt);
574 	}
575 }
576 
trace_event_raw_init(struct trace_event_call * call)577 int trace_event_raw_init(struct trace_event_call *call)
578 {
579 	int id;
580 
581 	id = register_trace_event(&call->event);
582 	if (!id)
583 		return -ENODEV;
584 
585 	test_event_printk(call);
586 
587 	return 0;
588 }
589 EXPORT_SYMBOL_GPL(trace_event_raw_init);
590 
trace_event_ignore_this_pid(struct trace_event_file * trace_file)591 bool trace_event_ignore_this_pid(struct trace_event_file *trace_file)
592 {
593 	struct trace_array *tr = trace_file->tr;
594 	struct trace_array_cpu *data;
595 	struct trace_pid_list *no_pid_list;
596 	struct trace_pid_list *pid_list;
597 
598 	pid_list = rcu_dereference_raw(tr->filtered_pids);
599 	no_pid_list = rcu_dereference_raw(tr->filtered_no_pids);
600 
601 	if (!pid_list && !no_pid_list)
602 		return false;
603 
604 	data = this_cpu_ptr(tr->array_buffer.data);
605 
606 	return data->ignore_pid;
607 }
608 EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid);
609 
trace_event_buffer_reserve(struct trace_event_buffer * fbuffer,struct trace_event_file * trace_file,unsigned long len)610 void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
611 				 struct trace_event_file *trace_file,
612 				 unsigned long len)
613 {
614 	struct trace_event_call *event_call = trace_file->event_call;
615 
616 	if ((trace_file->flags & EVENT_FILE_FL_PID_FILTER) &&
617 	    trace_event_ignore_this_pid(trace_file))
618 		return NULL;
619 
620 	/*
621 	 * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables
622 	 * preemption (adding one to the preempt_count). Since we are
623 	 * interested in the preempt_count at the time the tracepoint was
624 	 * hit, we need to subtract one to offset the increment.
625 	 */
626 	fbuffer->trace_ctx = tracing_gen_ctx_dec();
627 	fbuffer->trace_file = trace_file;
628 
629 	fbuffer->event =
630 		trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file,
631 						event_call->event.type, len,
632 						fbuffer->trace_ctx);
633 	if (!fbuffer->event)
634 		return NULL;
635 
636 	fbuffer->regs = NULL;
637 	fbuffer->entry = ring_buffer_event_data(fbuffer->event);
638 	return fbuffer->entry;
639 }
640 EXPORT_SYMBOL_GPL(trace_event_buffer_reserve);
641 
trace_event_reg(struct trace_event_call * call,enum trace_reg type,void * data)642 int trace_event_reg(struct trace_event_call *call,
643 		    enum trace_reg type, void *data)
644 {
645 	struct trace_event_file *file = data;
646 
647 	WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT));
648 	switch (type) {
649 	case TRACE_REG_REGISTER:
650 		return tracepoint_probe_register(call->tp,
651 						 call->class->probe,
652 						 file);
653 	case TRACE_REG_UNREGISTER:
654 		tracepoint_probe_unregister(call->tp,
655 					    call->class->probe,
656 					    file);
657 		return 0;
658 
659 #ifdef CONFIG_PERF_EVENTS
660 	case TRACE_REG_PERF_REGISTER:
661 		return tracepoint_probe_register(call->tp,
662 						 call->class->perf_probe,
663 						 call);
664 	case TRACE_REG_PERF_UNREGISTER:
665 		tracepoint_probe_unregister(call->tp,
666 					    call->class->perf_probe,
667 					    call);
668 		return 0;
669 	case TRACE_REG_PERF_OPEN:
670 	case TRACE_REG_PERF_CLOSE:
671 	case TRACE_REG_PERF_ADD:
672 	case TRACE_REG_PERF_DEL:
673 		return 0;
674 #endif
675 	}
676 	return 0;
677 }
678 EXPORT_SYMBOL_GPL(trace_event_reg);
679 
trace_event_enable_cmd_record(bool enable)680 void trace_event_enable_cmd_record(bool enable)
681 {
682 	struct trace_event_file *file;
683 	struct trace_array *tr;
684 
685 	lockdep_assert_held(&event_mutex);
686 
687 	do_for_each_event_file(tr, file) {
688 
689 		if (!(file->flags & EVENT_FILE_FL_ENABLED))
690 			continue;
691 
692 		if (enable) {
693 			tracing_start_cmdline_record();
694 			set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
695 		} else {
696 			tracing_stop_cmdline_record();
697 			clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
698 		}
699 	} while_for_each_event_file();
700 }
701 
trace_event_enable_tgid_record(bool enable)702 void trace_event_enable_tgid_record(bool enable)
703 {
704 	struct trace_event_file *file;
705 	struct trace_array *tr;
706 
707 	lockdep_assert_held(&event_mutex);
708 
709 	do_for_each_event_file(tr, file) {
710 		if (!(file->flags & EVENT_FILE_FL_ENABLED))
711 			continue;
712 
713 		if (enable) {
714 			tracing_start_tgid_record();
715 			set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
716 		} else {
717 			tracing_stop_tgid_record();
718 			clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT,
719 				  &file->flags);
720 		}
721 	} while_for_each_event_file();
722 }
723 
__ftrace_event_enable_disable(struct trace_event_file * file,int enable,int soft_disable)724 static int __ftrace_event_enable_disable(struct trace_event_file *file,
725 					 int enable, int soft_disable)
726 {
727 	struct trace_event_call *call = file->event_call;
728 	struct trace_array *tr = file->tr;
729 	int ret = 0;
730 	int disable;
731 
732 	switch (enable) {
733 	case 0:
734 		/*
735 		 * When soft_disable is set and enable is cleared, the sm_ref
736 		 * reference counter is decremented. If it reaches 0, we want
737 		 * to clear the SOFT_DISABLED flag but leave the event in the
738 		 * state that it was. That is, if the event was enabled and
739 		 * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED
740 		 * is set we do not want the event to be enabled before we
741 		 * clear the bit.
742 		 *
743 		 * When soft_disable is not set but the SOFT_MODE flag is,
744 		 * we do nothing. Do not disable the tracepoint, otherwise
745 		 * "soft enable"s (clearing the SOFT_DISABLED bit) wont work.
746 		 */
747 		if (soft_disable) {
748 			if (atomic_dec_return(&file->sm_ref) > 0)
749 				break;
750 			disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED;
751 			clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
752 			/* Disable use of trace_buffered_event */
753 			trace_buffered_event_disable();
754 		} else
755 			disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE);
756 
757 		if (disable && (file->flags & EVENT_FILE_FL_ENABLED)) {
758 			clear_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
759 			if (file->flags & EVENT_FILE_FL_RECORDED_CMD) {
760 				tracing_stop_cmdline_record();
761 				clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
762 			}
763 
764 			if (file->flags & EVENT_FILE_FL_RECORDED_TGID) {
765 				tracing_stop_tgid_record();
766 				clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
767 			}
768 
769 			call->class->reg(call, TRACE_REG_UNREGISTER, file);
770 		}
771 		/* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
772 		if (file->flags & EVENT_FILE_FL_SOFT_MODE)
773 			set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
774 		else
775 			clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
776 		break;
777 	case 1:
778 		/*
779 		 * When soft_disable is set and enable is set, we want to
780 		 * register the tracepoint for the event, but leave the event
781 		 * as is. That means, if the event was already enabled, we do
782 		 * nothing (but set SOFT_MODE). If the event is disabled, we
783 		 * set SOFT_DISABLED before enabling the event tracepoint, so
784 		 * it still seems to be disabled.
785 		 */
786 		if (!soft_disable)
787 			clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
788 		else {
789 			if (atomic_inc_return(&file->sm_ref) > 1)
790 				break;
791 			set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
792 			/* Enable use of trace_buffered_event */
793 			trace_buffered_event_enable();
794 		}
795 
796 		if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
797 			bool cmd = false, tgid = false;
798 
799 			/* Keep the event disabled, when going to SOFT_MODE. */
800 			if (soft_disable)
801 				set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
802 
803 			if (tr->trace_flags & TRACE_ITER_RECORD_CMD) {
804 				cmd = true;
805 				tracing_start_cmdline_record();
806 				set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
807 			}
808 
809 			if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
810 				tgid = true;
811 				tracing_start_tgid_record();
812 				set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
813 			}
814 
815 			ret = call->class->reg(call, TRACE_REG_REGISTER, file);
816 			if (ret) {
817 				if (cmd)
818 					tracing_stop_cmdline_record();
819 				if (tgid)
820 					tracing_stop_tgid_record();
821 				pr_info("event trace: Could not enable event "
822 					"%s\n", trace_event_name(call));
823 				break;
824 			}
825 			set_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
826 
827 			/* WAS_ENABLED gets set but never cleared. */
828 			set_bit(EVENT_FILE_FL_WAS_ENABLED_BIT, &file->flags);
829 		}
830 		break;
831 	}
832 
833 	return ret;
834 }
835 
trace_event_enable_disable(struct trace_event_file * file,int enable,int soft_disable)836 int trace_event_enable_disable(struct trace_event_file *file,
837 			       int enable, int soft_disable)
838 {
839 	return __ftrace_event_enable_disable(file, enable, soft_disable);
840 }
841 
ftrace_event_enable_disable(struct trace_event_file * file,int enable)842 static int ftrace_event_enable_disable(struct trace_event_file *file,
843 				       int enable)
844 {
845 	return __ftrace_event_enable_disable(file, enable, 0);
846 }
847 
ftrace_clear_events(struct trace_array * tr)848 static void ftrace_clear_events(struct trace_array *tr)
849 {
850 	struct trace_event_file *file;
851 
852 	mutex_lock(&event_mutex);
853 	list_for_each_entry(file, &tr->events, list) {
854 		ftrace_event_enable_disable(file, 0);
855 	}
856 	mutex_unlock(&event_mutex);
857 }
858 
859 static void
event_filter_pid_sched_process_exit(void * data,struct task_struct * task)860 event_filter_pid_sched_process_exit(void *data, struct task_struct *task)
861 {
862 	struct trace_pid_list *pid_list;
863 	struct trace_array *tr = data;
864 
865 	pid_list = rcu_dereference_raw(tr->filtered_pids);
866 	trace_filter_add_remove_task(pid_list, NULL, task);
867 
868 	pid_list = rcu_dereference_raw(tr->filtered_no_pids);
869 	trace_filter_add_remove_task(pid_list, NULL, task);
870 }
871 
872 static void
event_filter_pid_sched_process_fork(void * data,struct task_struct * self,struct task_struct * task)873 event_filter_pid_sched_process_fork(void *data,
874 				    struct task_struct *self,
875 				    struct task_struct *task)
876 {
877 	struct trace_pid_list *pid_list;
878 	struct trace_array *tr = data;
879 
880 	pid_list = rcu_dereference_sched(tr->filtered_pids);
881 	trace_filter_add_remove_task(pid_list, self, task);
882 
883 	pid_list = rcu_dereference_sched(tr->filtered_no_pids);
884 	trace_filter_add_remove_task(pid_list, self, task);
885 }
886 
trace_event_follow_fork(struct trace_array * tr,bool enable)887 void trace_event_follow_fork(struct trace_array *tr, bool enable)
888 {
889 	if (enable) {
890 		register_trace_prio_sched_process_fork(event_filter_pid_sched_process_fork,
891 						       tr, INT_MIN);
892 		register_trace_prio_sched_process_free(event_filter_pid_sched_process_exit,
893 						       tr, INT_MAX);
894 	} else {
895 		unregister_trace_sched_process_fork(event_filter_pid_sched_process_fork,
896 						    tr);
897 		unregister_trace_sched_process_free(event_filter_pid_sched_process_exit,
898 						    tr);
899 	}
900 }
901 
902 static void
event_filter_pid_sched_switch_probe_pre(void * data,bool preempt,struct task_struct * prev,struct task_struct * next,unsigned int prev_state)903 event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
904 					struct task_struct *prev,
905 					struct task_struct *next,
906 					unsigned int prev_state)
907 {
908 	struct trace_array *tr = data;
909 	struct trace_pid_list *no_pid_list;
910 	struct trace_pid_list *pid_list;
911 	bool ret;
912 
913 	pid_list = rcu_dereference_sched(tr->filtered_pids);
914 	no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
915 
916 	/*
917 	 * Sched switch is funny, as we only want to ignore it
918 	 * in the notrace case if both prev and next should be ignored.
919 	 */
920 	ret = trace_ignore_this_task(NULL, no_pid_list, prev) &&
921 		trace_ignore_this_task(NULL, no_pid_list, next);
922 
923 	this_cpu_write(tr->array_buffer.data->ignore_pid, ret ||
924 		       (trace_ignore_this_task(pid_list, NULL, prev) &&
925 			trace_ignore_this_task(pid_list, NULL, next)));
926 }
927 
928 static void
event_filter_pid_sched_switch_probe_post(void * data,bool preempt,struct task_struct * prev,struct task_struct * next,unsigned int prev_state)929 event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
930 					 struct task_struct *prev,
931 					 struct task_struct *next,
932 					 unsigned int prev_state)
933 {
934 	struct trace_array *tr = data;
935 	struct trace_pid_list *no_pid_list;
936 	struct trace_pid_list *pid_list;
937 
938 	pid_list = rcu_dereference_sched(tr->filtered_pids);
939 	no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
940 
941 	this_cpu_write(tr->array_buffer.data->ignore_pid,
942 		       trace_ignore_this_task(pid_list, no_pid_list, next));
943 }
944 
945 static void
event_filter_pid_sched_wakeup_probe_pre(void * data,struct task_struct * task)946 event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
947 {
948 	struct trace_array *tr = data;
949 	struct trace_pid_list *no_pid_list;
950 	struct trace_pid_list *pid_list;
951 
952 	/* Nothing to do if we are already tracing */
953 	if (!this_cpu_read(tr->array_buffer.data->ignore_pid))
954 		return;
955 
956 	pid_list = rcu_dereference_sched(tr->filtered_pids);
957 	no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
958 
959 	this_cpu_write(tr->array_buffer.data->ignore_pid,
960 		       trace_ignore_this_task(pid_list, no_pid_list, task));
961 }
962 
963 static void
event_filter_pid_sched_wakeup_probe_post(void * data,struct task_struct * task)964 event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
965 {
966 	struct trace_array *tr = data;
967 	struct trace_pid_list *no_pid_list;
968 	struct trace_pid_list *pid_list;
969 
970 	/* Nothing to do if we are not tracing */
971 	if (this_cpu_read(tr->array_buffer.data->ignore_pid))
972 		return;
973 
974 	pid_list = rcu_dereference_sched(tr->filtered_pids);
975 	no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
976 
977 	/* Set tracing if current is enabled */
978 	this_cpu_write(tr->array_buffer.data->ignore_pid,
979 		       trace_ignore_this_task(pid_list, no_pid_list, current));
980 }
981 
unregister_pid_events(struct trace_array * tr)982 static void unregister_pid_events(struct trace_array *tr)
983 {
984 	unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr);
985 	unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr);
986 
987 	unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr);
988 	unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr);
989 
990 	unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr);
991 	unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr);
992 
993 	unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr);
994 	unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr);
995 }
996 
__ftrace_clear_event_pids(struct trace_array * tr,int type)997 static void __ftrace_clear_event_pids(struct trace_array *tr, int type)
998 {
999 	struct trace_pid_list *pid_list;
1000 	struct trace_pid_list *no_pid_list;
1001 	struct trace_event_file *file;
1002 	int cpu;
1003 
1004 	pid_list = rcu_dereference_protected(tr->filtered_pids,
1005 					     lockdep_is_held(&event_mutex));
1006 	no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
1007 					     lockdep_is_held(&event_mutex));
1008 
1009 	/* Make sure there's something to do */
1010 	if (!pid_type_enabled(type, pid_list, no_pid_list))
1011 		return;
1012 
1013 	if (!still_need_pid_events(type, pid_list, no_pid_list)) {
1014 		unregister_pid_events(tr);
1015 
1016 		list_for_each_entry(file, &tr->events, list) {
1017 			clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
1018 		}
1019 
1020 		for_each_possible_cpu(cpu)
1021 			per_cpu_ptr(tr->array_buffer.data, cpu)->ignore_pid = false;
1022 	}
1023 
1024 	if (type & TRACE_PIDS)
1025 		rcu_assign_pointer(tr->filtered_pids, NULL);
1026 
1027 	if (type & TRACE_NO_PIDS)
1028 		rcu_assign_pointer(tr->filtered_no_pids, NULL);
1029 
1030 	/* Wait till all users are no longer using pid filtering */
1031 	tracepoint_synchronize_unregister();
1032 
1033 	if ((type & TRACE_PIDS) && pid_list)
1034 		trace_pid_list_free(pid_list);
1035 
1036 	if ((type & TRACE_NO_PIDS) && no_pid_list)
1037 		trace_pid_list_free(no_pid_list);
1038 }
1039 
ftrace_clear_event_pids(struct trace_array * tr,int type)1040 static void ftrace_clear_event_pids(struct trace_array *tr, int type)
1041 {
1042 	mutex_lock(&event_mutex);
1043 	__ftrace_clear_event_pids(tr, type);
1044 	mutex_unlock(&event_mutex);
1045 }
1046 
__put_system(struct event_subsystem * system)1047 static void __put_system(struct event_subsystem *system)
1048 {
1049 	struct event_filter *filter = system->filter;
1050 
1051 	WARN_ON_ONCE(system_refcount(system) == 0);
1052 	if (system_refcount_dec(system))
1053 		return;
1054 
1055 	list_del(&system->list);
1056 
1057 	if (filter) {
1058 		kfree(filter->filter_string);
1059 		kfree(filter);
1060 	}
1061 	kfree_const(system->name);
1062 	kfree(system);
1063 }
1064 
__get_system(struct event_subsystem * system)1065 static void __get_system(struct event_subsystem *system)
1066 {
1067 	WARN_ON_ONCE(system_refcount(system) == 0);
1068 	system_refcount_inc(system);
1069 }
1070 
__get_system_dir(struct trace_subsystem_dir * dir)1071 static void __get_system_dir(struct trace_subsystem_dir *dir)
1072 {
1073 	WARN_ON_ONCE(dir->ref_count == 0);
1074 	dir->ref_count++;
1075 	__get_system(dir->subsystem);
1076 }
1077 
__put_system_dir(struct trace_subsystem_dir * dir)1078 static void __put_system_dir(struct trace_subsystem_dir *dir)
1079 {
1080 	WARN_ON_ONCE(dir->ref_count == 0);
1081 	/* If the subsystem is about to be freed, the dir must be too */
1082 	WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1);
1083 
1084 	__put_system(dir->subsystem);
1085 	if (!--dir->ref_count)
1086 		kfree(dir);
1087 }
1088 
put_system(struct trace_subsystem_dir * dir)1089 static void put_system(struct trace_subsystem_dir *dir)
1090 {
1091 	mutex_lock(&event_mutex);
1092 	__put_system_dir(dir);
1093 	mutex_unlock(&event_mutex);
1094 }
1095 
remove_subsystem(struct trace_subsystem_dir * dir)1096 static void remove_subsystem(struct trace_subsystem_dir *dir)
1097 {
1098 	if (!dir)
1099 		return;
1100 
1101 	if (!--dir->nr_events) {
1102 		eventfs_remove_dir(dir->ei);
1103 		list_del(&dir->list);
1104 		__put_system_dir(dir);
1105 	}
1106 }
1107 
event_file_get(struct trace_event_file * file)1108 void event_file_get(struct trace_event_file *file)
1109 {
1110 	atomic_inc(&file->ref);
1111 }
1112 
event_file_put(struct trace_event_file * file)1113 void event_file_put(struct trace_event_file *file)
1114 {
1115 	if (WARN_ON_ONCE(!atomic_read(&file->ref))) {
1116 		if (file->flags & EVENT_FILE_FL_FREED)
1117 			kmem_cache_free(file_cachep, file);
1118 		return;
1119 	}
1120 
1121 	if (atomic_dec_and_test(&file->ref)) {
1122 		/* Count should only go to zero when it is freed */
1123 		if (WARN_ON_ONCE(!(file->flags & EVENT_FILE_FL_FREED)))
1124 			return;
1125 		kmem_cache_free(file_cachep, file);
1126 	}
1127 }
1128 
remove_event_file_dir(struct trace_event_file * file)1129 static void remove_event_file_dir(struct trace_event_file *file)
1130 {
1131 	eventfs_remove_dir(file->ei);
1132 	list_del(&file->list);
1133 	remove_subsystem(file->system);
1134 	free_event_filter(file->filter);
1135 	file->flags |= EVENT_FILE_FL_FREED;
1136 	event_file_put(file);
1137 }
1138 
1139 /*
1140  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
1141  */
1142 static int
__ftrace_set_clr_event_nolock(struct trace_array * tr,const char * match,const char * sub,const char * event,int set)1143 __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match,
1144 			      const char *sub, const char *event, int set)
1145 {
1146 	struct trace_event_file *file;
1147 	struct trace_event_call *call;
1148 	const char *name;
1149 	int ret = -EINVAL;
1150 	int eret = 0;
1151 
1152 	list_for_each_entry(file, &tr->events, list) {
1153 
1154 		call = file->event_call;
1155 		name = trace_event_name(call);
1156 
1157 		if (!name || !call->class || !call->class->reg)
1158 			continue;
1159 
1160 		if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
1161 			continue;
1162 
1163 		if (match &&
1164 		    strcmp(match, name) != 0 &&
1165 		    strcmp(match, call->class->system) != 0)
1166 			continue;
1167 
1168 		if (sub && strcmp(sub, call->class->system) != 0)
1169 			continue;
1170 
1171 		if (event && strcmp(event, name) != 0)
1172 			continue;
1173 
1174 		ret = ftrace_event_enable_disable(file, set);
1175 
1176 		/*
1177 		 * Save the first error and return that. Some events
1178 		 * may still have been enabled, but let the user
1179 		 * know that something went wrong.
1180 		 */
1181 		if (ret && !eret)
1182 			eret = ret;
1183 
1184 		ret = eret;
1185 	}
1186 
1187 	return ret;
1188 }
1189 
__ftrace_set_clr_event(struct trace_array * tr,const char * match,const char * sub,const char * event,int set)1190 static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
1191 				  const char *sub, const char *event, int set)
1192 {
1193 	int ret;
1194 
1195 	mutex_lock(&event_mutex);
1196 	ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set);
1197 	mutex_unlock(&event_mutex);
1198 
1199 	return ret;
1200 }
1201 
ftrace_set_clr_event(struct trace_array * tr,char * buf,int set)1202 int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
1203 {
1204 	char *event = NULL, *sub = NULL, *match;
1205 	int ret;
1206 
1207 	if (!tr)
1208 		return -ENOENT;
1209 	/*
1210 	 * The buf format can be <subsystem>:<event-name>
1211 	 *  *:<event-name> means any event by that name.
1212 	 *  :<event-name> is the same.
1213 	 *
1214 	 *  <subsystem>:* means all events in that subsystem
1215 	 *  <subsystem>: means the same.
1216 	 *
1217 	 *  <name> (no ':') means all events in a subsystem with
1218 	 *  the name <name> or any event that matches <name>
1219 	 */
1220 
1221 	match = strsep(&buf, ":");
1222 	if (buf) {
1223 		sub = match;
1224 		event = buf;
1225 		match = NULL;
1226 
1227 		if (!strlen(sub) || strcmp(sub, "*") == 0)
1228 			sub = NULL;
1229 		if (!strlen(event) || strcmp(event, "*") == 0)
1230 			event = NULL;
1231 	}
1232 
1233 	ret = __ftrace_set_clr_event(tr, match, sub, event, set);
1234 
1235 	/* Put back the colon to allow this to be called again */
1236 	if (buf)
1237 		*(buf - 1) = ':';
1238 
1239 	return ret;
1240 }
1241 
1242 /**
1243  * trace_set_clr_event - enable or disable an event
1244  * @system: system name to match (NULL for any system)
1245  * @event: event name to match (NULL for all events, within system)
1246  * @set: 1 to enable, 0 to disable
1247  *
1248  * This is a way for other parts of the kernel to enable or disable
1249  * event recording.
1250  *
1251  * Returns 0 on success, -EINVAL if the parameters do not match any
1252  * registered events.
1253  */
trace_set_clr_event(const char * system,const char * event,int set)1254 int trace_set_clr_event(const char *system, const char *event, int set)
1255 {
1256 	struct trace_array *tr = top_trace_array();
1257 
1258 	if (!tr)
1259 		return -ENODEV;
1260 
1261 	return __ftrace_set_clr_event(tr, NULL, system, event, set);
1262 }
1263 EXPORT_SYMBOL_GPL(trace_set_clr_event);
1264 
1265 /**
1266  * trace_array_set_clr_event - enable or disable an event for a trace array.
1267  * @tr: concerned trace array.
1268  * @system: system name to match (NULL for any system)
1269  * @event: event name to match (NULL for all events, within system)
1270  * @enable: true to enable, false to disable
1271  *
1272  * This is a way for other parts of the kernel to enable or disable
1273  * event recording.
1274  *
1275  * Returns 0 on success, -EINVAL if the parameters do not match any
1276  * registered events.
1277  */
trace_array_set_clr_event(struct trace_array * tr,const char * system,const char * event,bool enable)1278 int trace_array_set_clr_event(struct trace_array *tr, const char *system,
1279 		const char *event, bool enable)
1280 {
1281 	int set;
1282 
1283 	if (!tr)
1284 		return -ENOENT;
1285 
1286 	set = (enable == true) ? 1 : 0;
1287 	return __ftrace_set_clr_event(tr, NULL, system, event, set);
1288 }
1289 EXPORT_SYMBOL_GPL(trace_array_set_clr_event);
1290 
1291 /* 128 should be much more than enough */
1292 #define EVENT_BUF_SIZE		127
1293 
1294 static ssize_t
ftrace_event_write(struct file * file,const char __user * ubuf,size_t cnt,loff_t * ppos)1295 ftrace_event_write(struct file *file, const char __user *ubuf,
1296 		   size_t cnt, loff_t *ppos)
1297 {
1298 	struct trace_parser parser;
1299 	struct seq_file *m = file->private_data;
1300 	struct trace_array *tr = m->private;
1301 	ssize_t read, ret;
1302 
1303 	if (!cnt)
1304 		return 0;
1305 
1306 	ret = tracing_update_buffers();
1307 	if (ret < 0)
1308 		return ret;
1309 
1310 	if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
1311 		return -ENOMEM;
1312 
1313 	read = trace_get_user(&parser, ubuf, cnt, ppos);
1314 
1315 	if (read >= 0 && trace_parser_loaded((&parser))) {
1316 		int set = 1;
1317 
1318 		if (*parser.buffer == '!')
1319 			set = 0;
1320 
1321 		ret = ftrace_set_clr_event(tr, parser.buffer + !set, set);
1322 		if (ret)
1323 			goto out_put;
1324 	}
1325 
1326 	ret = read;
1327 
1328  out_put:
1329 	trace_parser_put(&parser);
1330 
1331 	return ret;
1332 }
1333 
1334 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)1335 t_next(struct seq_file *m, void *v, loff_t *pos)
1336 {
1337 	struct trace_event_file *file = v;
1338 	struct trace_event_call *call;
1339 	struct trace_array *tr = m->private;
1340 
1341 	(*pos)++;
1342 
1343 	list_for_each_entry_continue(file, &tr->events, list) {
1344 		call = file->event_call;
1345 		/*
1346 		 * The ftrace subsystem is for showing formats only.
1347 		 * They can not be enabled or disabled via the event files.
1348 		 */
1349 		if (call->class && call->class->reg &&
1350 		    !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
1351 			return file;
1352 	}
1353 
1354 	return NULL;
1355 }
1356 
t_start(struct seq_file * m,loff_t * pos)1357 static void *t_start(struct seq_file *m, loff_t *pos)
1358 {
1359 	struct trace_event_file *file;
1360 	struct trace_array *tr = m->private;
1361 	loff_t l;
1362 
1363 	mutex_lock(&event_mutex);
1364 
1365 	file = list_entry(&tr->events, struct trace_event_file, list);
1366 	for (l = 0; l <= *pos; ) {
1367 		file = t_next(m, file, &l);
1368 		if (!file)
1369 			break;
1370 	}
1371 	return file;
1372 }
1373 
1374 static void *
s_next(struct seq_file * m,void * v,loff_t * pos)1375 s_next(struct seq_file *m, void *v, loff_t *pos)
1376 {
1377 	struct trace_event_file *file = v;
1378 	struct trace_array *tr = m->private;
1379 
1380 	(*pos)++;
1381 
1382 	list_for_each_entry_continue(file, &tr->events, list) {
1383 		if (file->flags & EVENT_FILE_FL_ENABLED)
1384 			return file;
1385 	}
1386 
1387 	return NULL;
1388 }
1389 
s_start(struct seq_file * m,loff_t * pos)1390 static void *s_start(struct seq_file *m, loff_t *pos)
1391 {
1392 	struct trace_event_file *file;
1393 	struct trace_array *tr = m->private;
1394 	loff_t l;
1395 
1396 	mutex_lock(&event_mutex);
1397 
1398 	file = list_entry(&tr->events, struct trace_event_file, list);
1399 	for (l = 0; l <= *pos; ) {
1400 		file = s_next(m, file, &l);
1401 		if (!file)
1402 			break;
1403 	}
1404 	return file;
1405 }
1406 
t_show(struct seq_file * m,void * v)1407 static int t_show(struct seq_file *m, void *v)
1408 {
1409 	struct trace_event_file *file = v;
1410 	struct trace_event_call *call = file->event_call;
1411 
1412 	if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
1413 		seq_printf(m, "%s:", call->class->system);
1414 	seq_printf(m, "%s\n", trace_event_name(call));
1415 
1416 	return 0;
1417 }
1418 
t_stop(struct seq_file * m,void * p)1419 static void t_stop(struct seq_file *m, void *p)
1420 {
1421 	mutex_unlock(&event_mutex);
1422 }
1423 
1424 static void *
__next(struct seq_file * m,void * v,loff_t * pos,int type)1425 __next(struct seq_file *m, void *v, loff_t *pos, int type)
1426 {
1427 	struct trace_array *tr = m->private;
1428 	struct trace_pid_list *pid_list;
1429 
1430 	if (type == TRACE_PIDS)
1431 		pid_list = rcu_dereference_sched(tr->filtered_pids);
1432 	else
1433 		pid_list = rcu_dereference_sched(tr->filtered_no_pids);
1434 
1435 	return trace_pid_next(pid_list, v, pos);
1436 }
1437 
1438 static void *
p_next(struct seq_file * m,void * v,loff_t * pos)1439 p_next(struct seq_file *m, void *v, loff_t *pos)
1440 {
1441 	return __next(m, v, pos, TRACE_PIDS);
1442 }
1443 
1444 static void *
np_next(struct seq_file * m,void * v,loff_t * pos)1445 np_next(struct seq_file *m, void *v, loff_t *pos)
1446 {
1447 	return __next(m, v, pos, TRACE_NO_PIDS);
1448 }
1449 
__start(struct seq_file * m,loff_t * pos,int type)1450 static void *__start(struct seq_file *m, loff_t *pos, int type)
1451 	__acquires(RCU)
1452 {
1453 	struct trace_pid_list *pid_list;
1454 	struct trace_array *tr = m->private;
1455 
1456 	/*
1457 	 * Grab the mutex, to keep calls to p_next() having the same
1458 	 * tr->filtered_pids as p_start() has.
1459 	 * If we just passed the tr->filtered_pids around, then RCU would
1460 	 * have been enough, but doing that makes things more complex.
1461 	 */
1462 	mutex_lock(&event_mutex);
1463 	rcu_read_lock_sched();
1464 
1465 	if (type == TRACE_PIDS)
1466 		pid_list = rcu_dereference_sched(tr->filtered_pids);
1467 	else
1468 		pid_list = rcu_dereference_sched(tr->filtered_no_pids);
1469 
1470 	if (!pid_list)
1471 		return NULL;
1472 
1473 	return trace_pid_start(pid_list, pos);
1474 }
1475 
p_start(struct seq_file * m,loff_t * pos)1476 static void *p_start(struct seq_file *m, loff_t *pos)
1477 	__acquires(RCU)
1478 {
1479 	return __start(m, pos, TRACE_PIDS);
1480 }
1481 
np_start(struct seq_file * m,loff_t * pos)1482 static void *np_start(struct seq_file *m, loff_t *pos)
1483 	__acquires(RCU)
1484 {
1485 	return __start(m, pos, TRACE_NO_PIDS);
1486 }
1487 
p_stop(struct seq_file * m,void * p)1488 static void p_stop(struct seq_file *m, void *p)
1489 	__releases(RCU)
1490 {
1491 	rcu_read_unlock_sched();
1492 	mutex_unlock(&event_mutex);
1493 }
1494 
1495 static ssize_t
event_enable_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)1496 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
1497 		  loff_t *ppos)
1498 {
1499 	struct trace_event_file *file;
1500 	unsigned long flags;
1501 	char buf[4] = "0";
1502 
1503 	mutex_lock(&event_mutex);
1504 	file = event_file_file(filp);
1505 	if (likely(file))
1506 		flags = file->flags;
1507 	mutex_unlock(&event_mutex);
1508 
1509 	if (!file)
1510 		return -ENODEV;
1511 
1512 	if (flags & EVENT_FILE_FL_ENABLED &&
1513 	    !(flags & EVENT_FILE_FL_SOFT_DISABLED))
1514 		strcpy(buf, "1");
1515 
1516 	if (flags & EVENT_FILE_FL_SOFT_DISABLED ||
1517 	    flags & EVENT_FILE_FL_SOFT_MODE)
1518 		strcat(buf, "*");
1519 
1520 	strcat(buf, "\n");
1521 
1522 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf));
1523 }
1524 
1525 static ssize_t
event_enable_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)1526 event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
1527 		   loff_t *ppos)
1528 {
1529 	struct trace_event_file *file;
1530 	unsigned long val;
1531 	int ret;
1532 
1533 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1534 	if (ret)
1535 		return ret;
1536 
1537 	ret = tracing_update_buffers();
1538 	if (ret < 0)
1539 		return ret;
1540 
1541 	switch (val) {
1542 	case 0:
1543 	case 1:
1544 		ret = -ENODEV;
1545 		mutex_lock(&event_mutex);
1546 		file = event_file_file(filp);
1547 		if (likely(file))
1548 			ret = ftrace_event_enable_disable(file, val);
1549 		mutex_unlock(&event_mutex);
1550 		break;
1551 
1552 	default:
1553 		return -EINVAL;
1554 	}
1555 
1556 	*ppos += cnt;
1557 
1558 	return ret ? ret : cnt;
1559 }
1560 
1561 static ssize_t
system_enable_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)1562 system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
1563 		   loff_t *ppos)
1564 {
1565 	const char set_to_char[4] = { '?', '0', '1', 'X' };
1566 	struct trace_subsystem_dir *dir = filp->private_data;
1567 	struct event_subsystem *system = dir->subsystem;
1568 	struct trace_event_call *call;
1569 	struct trace_event_file *file;
1570 	struct trace_array *tr = dir->tr;
1571 	char buf[2];
1572 	int set = 0;
1573 	int ret;
1574 
1575 	mutex_lock(&event_mutex);
1576 	list_for_each_entry(file, &tr->events, list) {
1577 		call = file->event_call;
1578 		if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
1579 		    !trace_event_name(call) || !call->class || !call->class->reg)
1580 			continue;
1581 
1582 		if (system && strcmp(call->class->system, system->name) != 0)
1583 			continue;
1584 
1585 		/*
1586 		 * We need to find out if all the events are set
1587 		 * or if all events or cleared, or if we have
1588 		 * a mixture.
1589 		 */
1590 		set |= (1 << !!(file->flags & EVENT_FILE_FL_ENABLED));
1591 
1592 		/*
1593 		 * If we have a mixture, no need to look further.
1594 		 */
1595 		if (set == 3)
1596 			break;
1597 	}
1598 	mutex_unlock(&event_mutex);
1599 
1600 	buf[0] = set_to_char[set];
1601 	buf[1] = '\n';
1602 
1603 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
1604 
1605 	return ret;
1606 }
1607 
1608 static ssize_t
system_enable_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)1609 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
1610 		    loff_t *ppos)
1611 {
1612 	struct trace_subsystem_dir *dir = filp->private_data;
1613 	struct event_subsystem *system = dir->subsystem;
1614 	const char *name = NULL;
1615 	unsigned long val;
1616 	ssize_t ret;
1617 
1618 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1619 	if (ret)
1620 		return ret;
1621 
1622 	ret = tracing_update_buffers();
1623 	if (ret < 0)
1624 		return ret;
1625 
1626 	if (val != 0 && val != 1)
1627 		return -EINVAL;
1628 
1629 	/*
1630 	 * Opening of "enable" adds a ref count to system,
1631 	 * so the name is safe to use.
1632 	 */
1633 	if (system)
1634 		name = system->name;
1635 
1636 	ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val);
1637 	if (ret)
1638 		goto out;
1639 
1640 	ret = cnt;
1641 
1642 out:
1643 	*ppos += cnt;
1644 
1645 	return ret;
1646 }
1647 
1648 enum {
1649 	FORMAT_HEADER		= 1,
1650 	FORMAT_FIELD_SEPERATOR	= 2,
1651 	FORMAT_PRINTFMT		= 3,
1652 };
1653 
f_next(struct seq_file * m,void * v,loff_t * pos)1654 static void *f_next(struct seq_file *m, void *v, loff_t *pos)
1655 {
1656 	struct trace_event_file *file = event_file_data(m->private);
1657 	struct trace_event_call *call = file->event_call;
1658 	struct list_head *common_head = &ftrace_common_fields;
1659 	struct list_head *head = trace_get_fields(call);
1660 	struct list_head *node = v;
1661 
1662 	(*pos)++;
1663 
1664 	switch ((unsigned long)v) {
1665 	case FORMAT_HEADER:
1666 		node = common_head;
1667 		break;
1668 
1669 	case FORMAT_FIELD_SEPERATOR:
1670 		node = head;
1671 		break;
1672 
1673 	case FORMAT_PRINTFMT:
1674 		/* all done */
1675 		return NULL;
1676 	}
1677 
1678 	node = node->prev;
1679 	if (node == common_head)
1680 		return (void *)FORMAT_FIELD_SEPERATOR;
1681 	else if (node == head)
1682 		return (void *)FORMAT_PRINTFMT;
1683 	else
1684 		return node;
1685 }
1686 
f_show(struct seq_file * m,void * v)1687 static int f_show(struct seq_file *m, void *v)
1688 {
1689 	struct trace_event_file *file = event_file_data(m->private);
1690 	struct trace_event_call *call = file->event_call;
1691 	struct ftrace_event_field *field;
1692 	const char *array_descriptor;
1693 
1694 	switch ((unsigned long)v) {
1695 	case FORMAT_HEADER:
1696 		seq_printf(m, "name: %s\n", trace_event_name(call));
1697 		seq_printf(m, "ID: %d\n", call->event.type);
1698 		seq_puts(m, "format:\n");
1699 		return 0;
1700 
1701 	case FORMAT_FIELD_SEPERATOR:
1702 		seq_putc(m, '\n');
1703 		return 0;
1704 
1705 	case FORMAT_PRINTFMT:
1706 		seq_printf(m, "\nprint fmt: %s\n",
1707 			   call->print_fmt);
1708 		return 0;
1709 	}
1710 
1711 	field = list_entry(v, struct ftrace_event_field, link);
1712 	/*
1713 	 * Smartly shows the array type(except dynamic array).
1714 	 * Normal:
1715 	 *	field:TYPE VAR
1716 	 * If TYPE := TYPE[LEN], it is shown:
1717 	 *	field:TYPE VAR[LEN]
1718 	 */
1719 	array_descriptor = strchr(field->type, '[');
1720 
1721 	if (str_has_prefix(field->type, "__data_loc"))
1722 		array_descriptor = NULL;
1723 
1724 	if (!array_descriptor)
1725 		seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
1726 			   field->type, field->name, field->offset,
1727 			   field->size, !!field->is_signed);
1728 	else if (field->len)
1729 		seq_printf(m, "\tfield:%.*s %s[%d];\toffset:%u;\tsize:%u;\tsigned:%d;\n",
1730 			   (int)(array_descriptor - field->type),
1731 			   field->type, field->name,
1732 			   field->len, field->offset,
1733 			   field->size, !!field->is_signed);
1734 	else
1735 		seq_printf(m, "\tfield:%.*s %s[];\toffset:%u;\tsize:%u;\tsigned:%d;\n",
1736 				(int)(array_descriptor - field->type),
1737 				field->type, field->name,
1738 				field->offset, field->size, !!field->is_signed);
1739 
1740 	return 0;
1741 }
1742 
f_start(struct seq_file * m,loff_t * pos)1743 static void *f_start(struct seq_file *m, loff_t *pos)
1744 {
1745 	struct trace_event_file *file;
1746 	void *p = (void *)FORMAT_HEADER;
1747 	loff_t l = 0;
1748 
1749 	/* ->stop() is called even if ->start() fails */
1750 	mutex_lock(&event_mutex);
1751 	file = event_file_file(m->private);
1752 	if (!file)
1753 		return ERR_PTR(-ENODEV);
1754 
1755 	while (l < *pos && p)
1756 		p = f_next(m, p, &l);
1757 
1758 	return p;
1759 }
1760 
f_stop(struct seq_file * m,void * p)1761 static void f_stop(struct seq_file *m, void *p)
1762 {
1763 	mutex_unlock(&event_mutex);
1764 }
1765 
1766 static const struct seq_operations trace_format_seq_ops = {
1767 	.start		= f_start,
1768 	.next		= f_next,
1769 	.stop		= f_stop,
1770 	.show		= f_show,
1771 };
1772 
trace_format_open(struct inode * inode,struct file * file)1773 static int trace_format_open(struct inode *inode, struct file *file)
1774 {
1775 	struct seq_file *m;
1776 	int ret;
1777 
1778 	/* Do we want to hide event format files on tracefs lockdown? */
1779 
1780 	ret = seq_open(file, &trace_format_seq_ops);
1781 	if (ret < 0)
1782 		return ret;
1783 
1784 	m = file->private_data;
1785 	m->private = file;
1786 
1787 	return 0;
1788 }
1789 
1790 #ifdef CONFIG_PERF_EVENTS
1791 static ssize_t
event_id_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)1792 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1793 {
1794 	int id = (long)event_file_data(filp);
1795 	char buf[32];
1796 	int len;
1797 
1798 	if (unlikely(!id))
1799 		return -ENODEV;
1800 
1801 	len = sprintf(buf, "%d\n", id);
1802 
1803 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
1804 }
1805 #endif
1806 
1807 static ssize_t
event_filter_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)1808 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
1809 		  loff_t *ppos)
1810 {
1811 	struct trace_event_file *file;
1812 	struct trace_seq *s;
1813 	int r = -ENODEV;
1814 
1815 	if (*ppos)
1816 		return 0;
1817 
1818 	s = kmalloc(sizeof(*s), GFP_KERNEL);
1819 
1820 	if (!s)
1821 		return -ENOMEM;
1822 
1823 	trace_seq_init(s);
1824 
1825 	mutex_lock(&event_mutex);
1826 	file = event_file_file(filp);
1827 	if (file)
1828 		print_event_filter(file, s);
1829 	mutex_unlock(&event_mutex);
1830 
1831 	if (file)
1832 		r = simple_read_from_buffer(ubuf, cnt, ppos,
1833 					    s->buffer, trace_seq_used(s));
1834 
1835 	kfree(s);
1836 
1837 	return r;
1838 }
1839 
1840 static ssize_t
event_filter_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)1841 event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
1842 		   loff_t *ppos)
1843 {
1844 	struct trace_event_file *file;
1845 	char *buf;
1846 	int err = -ENODEV;
1847 
1848 	if (cnt >= PAGE_SIZE)
1849 		return -EINVAL;
1850 
1851 	buf = memdup_user_nul(ubuf, cnt);
1852 	if (IS_ERR(buf))
1853 		return PTR_ERR(buf);
1854 
1855 	mutex_lock(&event_mutex);
1856 	file = event_file_file(filp);
1857 	if (file) {
1858 		if (file->flags & EVENT_FILE_FL_FREED)
1859 			err = -ENODEV;
1860 		else
1861 			err = apply_event_filter(file, buf);
1862 	}
1863 	mutex_unlock(&event_mutex);
1864 
1865 	kfree(buf);
1866 	if (err < 0)
1867 		return err;
1868 
1869 	*ppos += cnt;
1870 
1871 	return cnt;
1872 }
1873 
1874 static LIST_HEAD(event_subsystems);
1875 
subsystem_open(struct inode * inode,struct file * filp)1876 static int subsystem_open(struct inode *inode, struct file *filp)
1877 {
1878 	struct trace_subsystem_dir *dir = NULL, *iter_dir;
1879 	struct trace_array *tr = NULL, *iter_tr;
1880 	struct event_subsystem *system = NULL;
1881 	int ret;
1882 
1883 	if (tracing_is_disabled())
1884 		return -ENODEV;
1885 
1886 	/* Make sure the system still exists */
1887 	mutex_lock(&event_mutex);
1888 	mutex_lock(&trace_types_lock);
1889 	list_for_each_entry(iter_tr, &ftrace_trace_arrays, list) {
1890 		list_for_each_entry(iter_dir, &iter_tr->systems, list) {
1891 			if (iter_dir == inode->i_private) {
1892 				/* Don't open systems with no events */
1893 				tr = iter_tr;
1894 				dir = iter_dir;
1895 				if (dir->nr_events) {
1896 					__get_system_dir(dir);
1897 					system = dir->subsystem;
1898 				}
1899 				goto exit_loop;
1900 			}
1901 		}
1902 	}
1903  exit_loop:
1904 	mutex_unlock(&trace_types_lock);
1905 	mutex_unlock(&event_mutex);
1906 
1907 	if (!system)
1908 		return -ENODEV;
1909 
1910 	/* Still need to increment the ref count of the system */
1911 	if (trace_array_get(tr) < 0) {
1912 		put_system(dir);
1913 		return -ENODEV;
1914 	}
1915 
1916 	ret = tracing_open_generic(inode, filp);
1917 	if (ret < 0) {
1918 		trace_array_put(tr);
1919 		put_system(dir);
1920 	}
1921 
1922 	return ret;
1923 }
1924 
system_tr_open(struct inode * inode,struct file * filp)1925 static int system_tr_open(struct inode *inode, struct file *filp)
1926 {
1927 	struct trace_subsystem_dir *dir;
1928 	struct trace_array *tr = inode->i_private;
1929 	int ret;
1930 
1931 	/* Make a temporary dir that has no system but points to tr */
1932 	dir = kzalloc(sizeof(*dir), GFP_KERNEL);
1933 	if (!dir)
1934 		return -ENOMEM;
1935 
1936 	ret = tracing_open_generic_tr(inode, filp);
1937 	if (ret < 0) {
1938 		kfree(dir);
1939 		return ret;
1940 	}
1941 	dir->tr = tr;
1942 	filp->private_data = dir;
1943 
1944 	return 0;
1945 }
1946 
subsystem_release(struct inode * inode,struct file * file)1947 static int subsystem_release(struct inode *inode, struct file *file)
1948 {
1949 	struct trace_subsystem_dir *dir = file->private_data;
1950 
1951 	trace_array_put(dir->tr);
1952 
1953 	/*
1954 	 * If dir->subsystem is NULL, then this is a temporary
1955 	 * descriptor that was made for a trace_array to enable
1956 	 * all subsystems.
1957 	 */
1958 	if (dir->subsystem)
1959 		put_system(dir);
1960 	else
1961 		kfree(dir);
1962 
1963 	return 0;
1964 }
1965 
1966 static ssize_t
subsystem_filter_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)1967 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
1968 		      loff_t *ppos)
1969 {
1970 	struct trace_subsystem_dir *dir = filp->private_data;
1971 	struct event_subsystem *system = dir->subsystem;
1972 	struct trace_seq *s;
1973 	int r;
1974 
1975 	if (*ppos)
1976 		return 0;
1977 
1978 	s = kmalloc(sizeof(*s), GFP_KERNEL);
1979 	if (!s)
1980 		return -ENOMEM;
1981 
1982 	trace_seq_init(s);
1983 
1984 	print_subsystem_event_filter(system, s);
1985 	r = simple_read_from_buffer(ubuf, cnt, ppos,
1986 				    s->buffer, trace_seq_used(s));
1987 
1988 	kfree(s);
1989 
1990 	return r;
1991 }
1992 
1993 static ssize_t
subsystem_filter_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)1994 subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
1995 		       loff_t *ppos)
1996 {
1997 	struct trace_subsystem_dir *dir = filp->private_data;
1998 	char *buf;
1999 	int err;
2000 
2001 	if (cnt >= PAGE_SIZE)
2002 		return -EINVAL;
2003 
2004 	buf = memdup_user_nul(ubuf, cnt);
2005 	if (IS_ERR(buf))
2006 		return PTR_ERR(buf);
2007 
2008 	err = apply_subsystem_event_filter(dir, buf);
2009 	kfree(buf);
2010 	if (err < 0)
2011 		return err;
2012 
2013 	*ppos += cnt;
2014 
2015 	return cnt;
2016 }
2017 
2018 static ssize_t
show_header(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)2019 show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
2020 {
2021 	int (*func)(struct trace_seq *s) = filp->private_data;
2022 	struct trace_seq *s;
2023 	int r;
2024 
2025 	if (*ppos)
2026 		return 0;
2027 
2028 	s = kmalloc(sizeof(*s), GFP_KERNEL);
2029 	if (!s)
2030 		return -ENOMEM;
2031 
2032 	trace_seq_init(s);
2033 
2034 	func(s);
2035 	r = simple_read_from_buffer(ubuf, cnt, ppos,
2036 				    s->buffer, trace_seq_used(s));
2037 
2038 	kfree(s);
2039 
2040 	return r;
2041 }
2042 
ignore_task_cpu(void * data)2043 static void ignore_task_cpu(void *data)
2044 {
2045 	struct trace_array *tr = data;
2046 	struct trace_pid_list *pid_list;
2047 	struct trace_pid_list *no_pid_list;
2048 
2049 	/*
2050 	 * This function is called by on_each_cpu() while the
2051 	 * event_mutex is held.
2052 	 */
2053 	pid_list = rcu_dereference_protected(tr->filtered_pids,
2054 					     mutex_is_locked(&event_mutex));
2055 	no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
2056 					     mutex_is_locked(&event_mutex));
2057 
2058 	this_cpu_write(tr->array_buffer.data->ignore_pid,
2059 		       trace_ignore_this_task(pid_list, no_pid_list, current));
2060 }
2061 
register_pid_events(struct trace_array * tr)2062 static void register_pid_events(struct trace_array *tr)
2063 {
2064 	/*
2065 	 * Register a probe that is called before all other probes
2066 	 * to set ignore_pid if next or prev do not match.
2067 	 * Register a probe this is called after all other probes
2068 	 * to only keep ignore_pid set if next pid matches.
2069 	 */
2070 	register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
2071 					 tr, INT_MAX);
2072 	register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
2073 					 tr, 0);
2074 
2075 	register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
2076 					 tr, INT_MAX);
2077 	register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
2078 					 tr, 0);
2079 
2080 	register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
2081 					     tr, INT_MAX);
2082 	register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
2083 					     tr, 0);
2084 
2085 	register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
2086 					 tr, INT_MAX);
2087 	register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
2088 					 tr, 0);
2089 }
2090 
2091 static ssize_t
event_pid_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos,int type)2092 event_pid_write(struct file *filp, const char __user *ubuf,
2093 		size_t cnt, loff_t *ppos, int type)
2094 {
2095 	struct seq_file *m = filp->private_data;
2096 	struct trace_array *tr = m->private;
2097 	struct trace_pid_list *filtered_pids = NULL;
2098 	struct trace_pid_list *other_pids = NULL;
2099 	struct trace_pid_list *pid_list;
2100 	struct trace_event_file *file;
2101 	ssize_t ret;
2102 
2103 	if (!cnt)
2104 		return 0;
2105 
2106 	ret = tracing_update_buffers();
2107 	if (ret < 0)
2108 		return ret;
2109 
2110 	mutex_lock(&event_mutex);
2111 
2112 	if (type == TRACE_PIDS) {
2113 		filtered_pids = rcu_dereference_protected(tr->filtered_pids,
2114 							  lockdep_is_held(&event_mutex));
2115 		other_pids = rcu_dereference_protected(tr->filtered_no_pids,
2116 							  lockdep_is_held(&event_mutex));
2117 	} else {
2118 		filtered_pids = rcu_dereference_protected(tr->filtered_no_pids,
2119 							  lockdep_is_held(&event_mutex));
2120 		other_pids = rcu_dereference_protected(tr->filtered_pids,
2121 							  lockdep_is_held(&event_mutex));
2122 	}
2123 
2124 	ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
2125 	if (ret < 0)
2126 		goto out;
2127 
2128 	if (type == TRACE_PIDS)
2129 		rcu_assign_pointer(tr->filtered_pids, pid_list);
2130 	else
2131 		rcu_assign_pointer(tr->filtered_no_pids, pid_list);
2132 
2133 	list_for_each_entry(file, &tr->events, list) {
2134 		set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
2135 	}
2136 
2137 	if (filtered_pids) {
2138 		tracepoint_synchronize_unregister();
2139 		trace_pid_list_free(filtered_pids);
2140 	} else if (pid_list && !other_pids) {
2141 		register_pid_events(tr);
2142 	}
2143 
2144 	/*
2145 	 * Ignoring of pids is done at task switch. But we have to
2146 	 * check for those tasks that are currently running.
2147 	 * Always do this in case a pid was appended or removed.
2148 	 */
2149 	on_each_cpu(ignore_task_cpu, tr, 1);
2150 
2151  out:
2152 	mutex_unlock(&event_mutex);
2153 
2154 	if (ret > 0)
2155 		*ppos += ret;
2156 
2157 	return ret;
2158 }
2159 
2160 static ssize_t
ftrace_event_pid_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)2161 ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
2162 		       size_t cnt, loff_t *ppos)
2163 {
2164 	return event_pid_write(filp, ubuf, cnt, ppos, TRACE_PIDS);
2165 }
2166 
2167 static ssize_t
ftrace_event_npid_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)2168 ftrace_event_npid_write(struct file *filp, const char __user *ubuf,
2169 			size_t cnt, loff_t *ppos)
2170 {
2171 	return event_pid_write(filp, ubuf, cnt, ppos, TRACE_NO_PIDS);
2172 }
2173 
2174 static int ftrace_event_avail_open(struct inode *inode, struct file *file);
2175 static int ftrace_event_set_open(struct inode *inode, struct file *file);
2176 static int ftrace_event_set_pid_open(struct inode *inode, struct file *file);
2177 static int ftrace_event_set_npid_open(struct inode *inode, struct file *file);
2178 static int ftrace_event_release(struct inode *inode, struct file *file);
2179 
2180 static const struct seq_operations show_event_seq_ops = {
2181 	.start = t_start,
2182 	.next = t_next,
2183 	.show = t_show,
2184 	.stop = t_stop,
2185 };
2186 
2187 static const struct seq_operations show_set_event_seq_ops = {
2188 	.start = s_start,
2189 	.next = s_next,
2190 	.show = t_show,
2191 	.stop = t_stop,
2192 };
2193 
2194 static const struct seq_operations show_set_pid_seq_ops = {
2195 	.start = p_start,
2196 	.next = p_next,
2197 	.show = trace_pid_show,
2198 	.stop = p_stop,
2199 };
2200 
2201 static const struct seq_operations show_set_no_pid_seq_ops = {
2202 	.start = np_start,
2203 	.next = np_next,
2204 	.show = trace_pid_show,
2205 	.stop = p_stop,
2206 };
2207 
2208 static const struct file_operations ftrace_avail_fops = {
2209 	.open = ftrace_event_avail_open,
2210 	.read = seq_read,
2211 	.llseek = seq_lseek,
2212 	.release = seq_release,
2213 };
2214 
2215 static const struct file_operations ftrace_set_event_fops = {
2216 	.open = ftrace_event_set_open,
2217 	.read = seq_read,
2218 	.write = ftrace_event_write,
2219 	.llseek = seq_lseek,
2220 	.release = ftrace_event_release,
2221 };
2222 
2223 static const struct file_operations ftrace_set_event_pid_fops = {
2224 	.open = ftrace_event_set_pid_open,
2225 	.read = seq_read,
2226 	.write = ftrace_event_pid_write,
2227 	.llseek = seq_lseek,
2228 	.release = ftrace_event_release,
2229 };
2230 
2231 static const struct file_operations ftrace_set_event_notrace_pid_fops = {
2232 	.open = ftrace_event_set_npid_open,
2233 	.read = seq_read,
2234 	.write = ftrace_event_npid_write,
2235 	.llseek = seq_lseek,
2236 	.release = ftrace_event_release,
2237 };
2238 
2239 static const struct file_operations ftrace_enable_fops = {
2240 	.open = tracing_open_file_tr,
2241 	.read = event_enable_read,
2242 	.write = event_enable_write,
2243 	.release = tracing_release_file_tr,
2244 	.llseek = default_llseek,
2245 };
2246 
2247 static const struct file_operations ftrace_event_format_fops = {
2248 	.open = trace_format_open,
2249 	.read = seq_read,
2250 	.llseek = seq_lseek,
2251 	.release = seq_release,
2252 };
2253 
2254 #ifdef CONFIG_PERF_EVENTS
2255 static const struct file_operations ftrace_event_id_fops = {
2256 	.read = event_id_read,
2257 	.llseek = default_llseek,
2258 };
2259 #endif
2260 
2261 static const struct file_operations ftrace_event_filter_fops = {
2262 	.open = tracing_open_file_tr,
2263 	.read = event_filter_read,
2264 	.write = event_filter_write,
2265 	.release = tracing_release_file_tr,
2266 	.llseek = default_llseek,
2267 };
2268 
2269 static const struct file_operations ftrace_subsystem_filter_fops = {
2270 	.open = subsystem_open,
2271 	.read = subsystem_filter_read,
2272 	.write = subsystem_filter_write,
2273 	.llseek = default_llseek,
2274 	.release = subsystem_release,
2275 };
2276 
2277 static const struct file_operations ftrace_system_enable_fops = {
2278 	.open = subsystem_open,
2279 	.read = system_enable_read,
2280 	.write = system_enable_write,
2281 	.llseek = default_llseek,
2282 	.release = subsystem_release,
2283 };
2284 
2285 static const struct file_operations ftrace_tr_enable_fops = {
2286 	.open = system_tr_open,
2287 	.read = system_enable_read,
2288 	.write = system_enable_write,
2289 	.llseek = default_llseek,
2290 	.release = subsystem_release,
2291 };
2292 
2293 static const struct file_operations ftrace_show_header_fops = {
2294 	.open = tracing_open_generic,
2295 	.read = show_header,
2296 	.llseek = default_llseek,
2297 };
2298 
2299 static int
ftrace_event_open(struct inode * inode,struct file * file,const struct seq_operations * seq_ops)2300 ftrace_event_open(struct inode *inode, struct file *file,
2301 		  const struct seq_operations *seq_ops)
2302 {
2303 	struct seq_file *m;
2304 	int ret;
2305 
2306 	ret = security_locked_down(LOCKDOWN_TRACEFS);
2307 	if (ret)
2308 		return ret;
2309 
2310 	ret = seq_open(file, seq_ops);
2311 	if (ret < 0)
2312 		return ret;
2313 	m = file->private_data;
2314 	/* copy tr over to seq ops */
2315 	m->private = inode->i_private;
2316 
2317 	return ret;
2318 }
2319 
ftrace_event_release(struct inode * inode,struct file * file)2320 static int ftrace_event_release(struct inode *inode, struct file *file)
2321 {
2322 	struct trace_array *tr = inode->i_private;
2323 
2324 	trace_array_put(tr);
2325 
2326 	return seq_release(inode, file);
2327 }
2328 
2329 static int
ftrace_event_avail_open(struct inode * inode,struct file * file)2330 ftrace_event_avail_open(struct inode *inode, struct file *file)
2331 {
2332 	const struct seq_operations *seq_ops = &show_event_seq_ops;
2333 
2334 	/* Checks for tracefs lockdown */
2335 	return ftrace_event_open(inode, file, seq_ops);
2336 }
2337 
2338 static int
ftrace_event_set_open(struct inode * inode,struct file * file)2339 ftrace_event_set_open(struct inode *inode, struct file *file)
2340 {
2341 	const struct seq_operations *seq_ops = &show_set_event_seq_ops;
2342 	struct trace_array *tr = inode->i_private;
2343 	int ret;
2344 
2345 	ret = tracing_check_open_get_tr(tr);
2346 	if (ret)
2347 		return ret;
2348 
2349 	if ((file->f_mode & FMODE_WRITE) &&
2350 	    (file->f_flags & O_TRUNC))
2351 		ftrace_clear_events(tr);
2352 
2353 	ret = ftrace_event_open(inode, file, seq_ops);
2354 	if (ret < 0)
2355 		trace_array_put(tr);
2356 	return ret;
2357 }
2358 
2359 static int
ftrace_event_set_pid_open(struct inode * inode,struct file * file)2360 ftrace_event_set_pid_open(struct inode *inode, struct file *file)
2361 {
2362 	const struct seq_operations *seq_ops = &show_set_pid_seq_ops;
2363 	struct trace_array *tr = inode->i_private;
2364 	int ret;
2365 
2366 	ret = tracing_check_open_get_tr(tr);
2367 	if (ret)
2368 		return ret;
2369 
2370 	if ((file->f_mode & FMODE_WRITE) &&
2371 	    (file->f_flags & O_TRUNC))
2372 		ftrace_clear_event_pids(tr, TRACE_PIDS);
2373 
2374 	ret = ftrace_event_open(inode, file, seq_ops);
2375 	if (ret < 0)
2376 		trace_array_put(tr);
2377 	return ret;
2378 }
2379 
2380 static int
ftrace_event_set_npid_open(struct inode * inode,struct file * file)2381 ftrace_event_set_npid_open(struct inode *inode, struct file *file)
2382 {
2383 	const struct seq_operations *seq_ops = &show_set_no_pid_seq_ops;
2384 	struct trace_array *tr = inode->i_private;
2385 	int ret;
2386 
2387 	ret = tracing_check_open_get_tr(tr);
2388 	if (ret)
2389 		return ret;
2390 
2391 	if ((file->f_mode & FMODE_WRITE) &&
2392 	    (file->f_flags & O_TRUNC))
2393 		ftrace_clear_event_pids(tr, TRACE_NO_PIDS);
2394 
2395 	ret = ftrace_event_open(inode, file, seq_ops);
2396 	if (ret < 0)
2397 		trace_array_put(tr);
2398 	return ret;
2399 }
2400 
2401 static struct event_subsystem *
create_new_subsystem(const char * name)2402 create_new_subsystem(const char *name)
2403 {
2404 	struct event_subsystem *system;
2405 
2406 	/* need to create new entry */
2407 	system = kmalloc(sizeof(*system), GFP_KERNEL);
2408 	if (!system)
2409 		return NULL;
2410 
2411 	system->ref_count = 1;
2412 
2413 	/* Only allocate if dynamic (kprobes and modules) */
2414 	system->name = kstrdup_const(name, GFP_KERNEL);
2415 	if (!system->name)
2416 		goto out_free;
2417 
2418 	system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
2419 	if (!system->filter)
2420 		goto out_free;
2421 
2422 	list_add(&system->list, &event_subsystems);
2423 
2424 	return system;
2425 
2426  out_free:
2427 	kfree_const(system->name);
2428 	kfree(system);
2429 	return NULL;
2430 }
2431 
system_callback(const char * name,umode_t * mode,void ** data,const struct file_operations ** fops)2432 static int system_callback(const char *name, umode_t *mode, void **data,
2433 		    const struct file_operations **fops)
2434 {
2435 	if (strcmp(name, "filter") == 0)
2436 		*fops = &ftrace_subsystem_filter_fops;
2437 
2438 	else if (strcmp(name, "enable") == 0)
2439 		*fops = &ftrace_system_enable_fops;
2440 
2441 	else
2442 		return 0;
2443 
2444 	*mode = TRACE_MODE_WRITE;
2445 	return 1;
2446 }
2447 
2448 static struct eventfs_inode *
event_subsystem_dir(struct trace_array * tr,const char * name,struct trace_event_file * file,struct eventfs_inode * parent)2449 event_subsystem_dir(struct trace_array *tr, const char *name,
2450 		    struct trace_event_file *file, struct eventfs_inode *parent)
2451 {
2452 	struct event_subsystem *system, *iter;
2453 	struct trace_subsystem_dir *dir;
2454 	struct eventfs_inode *ei;
2455 	int nr_entries;
2456 	static struct eventfs_entry system_entries[] = {
2457 		{
2458 			.name		= "filter",
2459 			.callback	= system_callback,
2460 		},
2461 		{
2462 			.name		= "enable",
2463 			.callback	= system_callback,
2464 		}
2465 	};
2466 
2467 	/* First see if we did not already create this dir */
2468 	list_for_each_entry(dir, &tr->systems, list) {
2469 		system = dir->subsystem;
2470 		if (strcmp(system->name, name) == 0) {
2471 			dir->nr_events++;
2472 			file->system = dir;
2473 			return dir->ei;
2474 		}
2475 	}
2476 
2477 	/* Now see if the system itself exists. */
2478 	system = NULL;
2479 	list_for_each_entry(iter, &event_subsystems, list) {
2480 		if (strcmp(iter->name, name) == 0) {
2481 			system = iter;
2482 			break;
2483 		}
2484 	}
2485 
2486 	dir = kmalloc(sizeof(*dir), GFP_KERNEL);
2487 	if (!dir)
2488 		goto out_fail;
2489 
2490 	if (!system) {
2491 		system = create_new_subsystem(name);
2492 		if (!system)
2493 			goto out_free;
2494 	} else
2495 		__get_system(system);
2496 
2497 	/* ftrace only has directories no files */
2498 	if (strcmp(name, "ftrace") == 0)
2499 		nr_entries = 0;
2500 	else
2501 		nr_entries = ARRAY_SIZE(system_entries);
2502 
2503 	ei = eventfs_create_dir(name, parent, system_entries, nr_entries, dir);
2504 	if (IS_ERR(ei)) {
2505 		pr_warn("Failed to create system directory %s\n", name);
2506 		__put_system(system);
2507 		goto out_free;
2508 	}
2509 
2510 	dir->ei = ei;
2511 	dir->tr = tr;
2512 	dir->ref_count = 1;
2513 	dir->nr_events = 1;
2514 	dir->subsystem = system;
2515 	file->system = dir;
2516 
2517 	list_add(&dir->list, &tr->systems);
2518 
2519 	return dir->ei;
2520 
2521  out_free:
2522 	kfree(dir);
2523  out_fail:
2524 	/* Only print this message if failed on memory allocation */
2525 	if (!dir || !system)
2526 		pr_warn("No memory to create event subsystem %s\n", name);
2527 	return NULL;
2528 }
2529 
2530 static int
event_define_fields(struct trace_event_call * call)2531 event_define_fields(struct trace_event_call *call)
2532 {
2533 	struct list_head *head;
2534 	int ret = 0;
2535 
2536 	/*
2537 	 * Other events may have the same class. Only update
2538 	 * the fields if they are not already defined.
2539 	 */
2540 	head = trace_get_fields(call);
2541 	if (list_empty(head)) {
2542 		struct trace_event_fields *field = call->class->fields_array;
2543 		unsigned int offset = sizeof(struct trace_entry);
2544 
2545 		for (; field->type; field++) {
2546 			if (field->type == TRACE_FUNCTION_TYPE) {
2547 				field->define_fields(call);
2548 				break;
2549 			}
2550 
2551 			offset = ALIGN(offset, field->align);
2552 			ret = trace_define_field_ext(call, field->type, field->name,
2553 						 offset, field->size,
2554 						 field->is_signed, field->filter_type,
2555 						 field->len);
2556 			if (WARN_ON_ONCE(ret)) {
2557 				pr_err("error code is %d\n", ret);
2558 				break;
2559 			}
2560 
2561 			offset += field->size;
2562 		}
2563 	}
2564 
2565 	return ret;
2566 }
2567 
event_callback(const char * name,umode_t * mode,void ** data,const struct file_operations ** fops)2568 static int event_callback(const char *name, umode_t *mode, void **data,
2569 			  const struct file_operations **fops)
2570 {
2571 	struct trace_event_file *file = *data;
2572 	struct trace_event_call *call = file->event_call;
2573 
2574 	if (strcmp(name, "format") == 0) {
2575 		*mode = TRACE_MODE_READ;
2576 		*fops = &ftrace_event_format_fops;
2577 		return 1;
2578 	}
2579 
2580 	/*
2581 	 * Only event directories that can be enabled should have
2582 	 * triggers or filters, with the exception of the "print"
2583 	 * event that can have a "trigger" file.
2584 	 */
2585 	if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) {
2586 		if (call->class->reg && strcmp(name, "enable") == 0) {
2587 			*mode = TRACE_MODE_WRITE;
2588 			*fops = &ftrace_enable_fops;
2589 			return 1;
2590 		}
2591 
2592 		if (strcmp(name, "filter") == 0) {
2593 			*mode = TRACE_MODE_WRITE;
2594 			*fops = &ftrace_event_filter_fops;
2595 			return 1;
2596 		}
2597 	}
2598 
2599 	if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
2600 	    strcmp(trace_event_name(call), "print") == 0) {
2601 		if (strcmp(name, "trigger") == 0) {
2602 			*mode = TRACE_MODE_WRITE;
2603 			*fops = &event_trigger_fops;
2604 			return 1;
2605 		}
2606 	}
2607 
2608 #ifdef CONFIG_PERF_EVENTS
2609 	if (call->event.type && call->class->reg &&
2610 	    strcmp(name, "id") == 0) {
2611 		*mode = TRACE_MODE_READ;
2612 		*data = (void *)(long)call->event.type;
2613 		*fops = &ftrace_event_id_fops;
2614 		return 1;
2615 	}
2616 #endif
2617 
2618 #ifdef CONFIG_HIST_TRIGGERS
2619 	if (strcmp(name, "hist") == 0) {
2620 		*mode = TRACE_MODE_READ;
2621 		*fops = &event_hist_fops;
2622 		return 1;
2623 	}
2624 #endif
2625 #ifdef CONFIG_HIST_TRIGGERS_DEBUG
2626 	if (strcmp(name, "hist_debug") == 0) {
2627 		*mode = TRACE_MODE_READ;
2628 		*fops = &event_hist_debug_fops;
2629 		return 1;
2630 	}
2631 #endif
2632 #ifdef CONFIG_TRACE_EVENT_INJECT
2633 	if (call->event.type && call->class->reg &&
2634 	    strcmp(name, "inject") == 0) {
2635 		*mode = 0200;
2636 		*fops = &event_inject_fops;
2637 		return 1;
2638 	}
2639 #endif
2640 	return 0;
2641 }
2642 
2643 /* The file is incremented on creation and freeing the enable file decrements it */
event_release(const char * name,void * data)2644 static void event_release(const char *name, void *data)
2645 {
2646 	struct trace_event_file *file = data;
2647 
2648 	event_file_put(file);
2649 }
2650 
2651 static int
event_create_dir(struct eventfs_inode * parent,struct trace_event_file * file)2652 event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file)
2653 {
2654 	struct trace_event_call *call = file->event_call;
2655 	struct trace_array *tr = file->tr;
2656 	struct eventfs_inode *e_events;
2657 	struct eventfs_inode *ei;
2658 	const char *name;
2659 	int nr_entries;
2660 	int ret;
2661 	static struct eventfs_entry event_entries[] = {
2662 		{
2663 			.name		= "enable",
2664 			.callback	= event_callback,
2665 			.release	= event_release,
2666 		},
2667 		{
2668 			.name		= "filter",
2669 			.callback	= event_callback,
2670 		},
2671 		{
2672 			.name		= "trigger",
2673 			.callback	= event_callback,
2674 		},
2675 		{
2676 			.name		= "format",
2677 			.callback	= event_callback,
2678 		},
2679 #ifdef CONFIG_PERF_EVENTS
2680 		{
2681 			.name		= "id",
2682 			.callback	= event_callback,
2683 		},
2684 #endif
2685 #ifdef CONFIG_HIST_TRIGGERS
2686 		{
2687 			.name		= "hist",
2688 			.callback	= event_callback,
2689 		},
2690 #endif
2691 #ifdef CONFIG_HIST_TRIGGERS_DEBUG
2692 		{
2693 			.name		= "hist_debug",
2694 			.callback	= event_callback,
2695 		},
2696 #endif
2697 #ifdef CONFIG_TRACE_EVENT_INJECT
2698 		{
2699 			.name		= "inject",
2700 			.callback	= event_callback,
2701 		},
2702 #endif
2703 	};
2704 
2705 	/*
2706 	 * If the trace point header did not define TRACE_SYSTEM
2707 	 * then the system would be called "TRACE_SYSTEM". This should
2708 	 * never happen.
2709 	 */
2710 	if (WARN_ON_ONCE(strcmp(call->class->system, TRACE_SYSTEM) == 0))
2711 		return -ENODEV;
2712 
2713 	e_events = event_subsystem_dir(tr, call->class->system, file, parent);
2714 	if (!e_events)
2715 		return -ENOMEM;
2716 
2717 	nr_entries = ARRAY_SIZE(event_entries);
2718 
2719 	name = trace_event_name(call);
2720 	ei = eventfs_create_dir(name, e_events, event_entries, nr_entries, file);
2721 	if (IS_ERR(ei)) {
2722 		pr_warn("Could not create tracefs '%s' directory\n", name);
2723 		return -1;
2724 	}
2725 
2726 	file->ei = ei;
2727 
2728 	ret = event_define_fields(call);
2729 	if (ret < 0) {
2730 		pr_warn("Could not initialize trace point events/%s\n", name);
2731 		return ret;
2732 	}
2733 
2734 	/* Gets decremented on freeing of the "enable" file */
2735 	event_file_get(file);
2736 
2737 	return 0;
2738 }
2739 
remove_event_from_tracers(struct trace_event_call * call)2740 static void remove_event_from_tracers(struct trace_event_call *call)
2741 {
2742 	struct trace_event_file *file;
2743 	struct trace_array *tr;
2744 
2745 	do_for_each_event_file_safe(tr, file) {
2746 		if (file->event_call != call)
2747 			continue;
2748 
2749 		remove_event_file_dir(file);
2750 		/*
2751 		 * The do_for_each_event_file_safe() is
2752 		 * a double loop. After finding the call for this
2753 		 * trace_array, we use break to jump to the next
2754 		 * trace_array.
2755 		 */
2756 		break;
2757 	} while_for_each_event_file();
2758 }
2759 
event_remove(struct trace_event_call * call)2760 static void event_remove(struct trace_event_call *call)
2761 {
2762 	struct trace_array *tr;
2763 	struct trace_event_file *file;
2764 
2765 	do_for_each_event_file(tr, file) {
2766 		if (file->event_call != call)
2767 			continue;
2768 
2769 		if (file->flags & EVENT_FILE_FL_WAS_ENABLED)
2770 			tr->clear_trace = true;
2771 
2772 		ftrace_event_enable_disable(file, 0);
2773 		/*
2774 		 * The do_for_each_event_file() is
2775 		 * a double loop. After finding the call for this
2776 		 * trace_array, we use break to jump to the next
2777 		 * trace_array.
2778 		 */
2779 		break;
2780 	} while_for_each_event_file();
2781 
2782 	if (call->event.funcs)
2783 		__unregister_trace_event(&call->event);
2784 	remove_event_from_tracers(call);
2785 	list_del(&call->list);
2786 }
2787 
event_init(struct trace_event_call * call)2788 static int event_init(struct trace_event_call *call)
2789 {
2790 	int ret = 0;
2791 	const char *name;
2792 
2793 	name = trace_event_name(call);
2794 	if (WARN_ON(!name))
2795 		return -EINVAL;
2796 
2797 	if (call->class->raw_init) {
2798 		ret = call->class->raw_init(call);
2799 		if (ret < 0 && ret != -ENOSYS)
2800 			pr_warn("Could not initialize trace events/%s\n", name);
2801 	}
2802 
2803 	return ret;
2804 }
2805 
2806 static int
__register_event(struct trace_event_call * call,struct module * mod)2807 __register_event(struct trace_event_call *call, struct module *mod)
2808 {
2809 	int ret;
2810 
2811 	ret = event_init(call);
2812 	if (ret < 0)
2813 		return ret;
2814 
2815 	list_add(&call->list, &ftrace_events);
2816 	if (call->flags & TRACE_EVENT_FL_DYNAMIC)
2817 		atomic_set(&call->refcnt, 0);
2818 	else
2819 		call->module = mod;
2820 
2821 	return 0;
2822 }
2823 
eval_replace(char * ptr,struct trace_eval_map * map,int len)2824 static char *eval_replace(char *ptr, struct trace_eval_map *map, int len)
2825 {
2826 	int rlen;
2827 	int elen;
2828 
2829 	/* Find the length of the eval value as a string */
2830 	elen = snprintf(ptr, 0, "%ld", map->eval_value);
2831 	/* Make sure there's enough room to replace the string with the value */
2832 	if (len < elen)
2833 		return NULL;
2834 
2835 	snprintf(ptr, elen + 1, "%ld", map->eval_value);
2836 
2837 	/* Get the rest of the string of ptr */
2838 	rlen = strlen(ptr + len);
2839 	memmove(ptr + elen, ptr + len, rlen);
2840 	/* Make sure we end the new string */
2841 	ptr[elen + rlen] = 0;
2842 
2843 	return ptr + elen;
2844 }
2845 
update_event_printk(struct trace_event_call * call,struct trace_eval_map * map)2846 static void update_event_printk(struct trace_event_call *call,
2847 				struct trace_eval_map *map)
2848 {
2849 	char *ptr;
2850 	int quote = 0;
2851 	int len = strlen(map->eval_string);
2852 
2853 	for (ptr = call->print_fmt; *ptr; ptr++) {
2854 		if (*ptr == '\\') {
2855 			ptr++;
2856 			/* paranoid */
2857 			if (!*ptr)
2858 				break;
2859 			continue;
2860 		}
2861 		if (*ptr == '"') {
2862 			quote ^= 1;
2863 			continue;
2864 		}
2865 		if (quote)
2866 			continue;
2867 		if (isdigit(*ptr)) {
2868 			/* skip numbers */
2869 			do {
2870 				ptr++;
2871 				/* Check for alpha chars like ULL */
2872 			} while (isalnum(*ptr));
2873 			if (!*ptr)
2874 				break;
2875 			/*
2876 			 * A number must have some kind of delimiter after
2877 			 * it, and we can ignore that too.
2878 			 */
2879 			continue;
2880 		}
2881 		if (isalpha(*ptr) || *ptr == '_') {
2882 			if (strncmp(map->eval_string, ptr, len) == 0 &&
2883 			    !isalnum(ptr[len]) && ptr[len] != '_') {
2884 				ptr = eval_replace(ptr, map, len);
2885 				/* enum/sizeof string smaller than value */
2886 				if (WARN_ON_ONCE(!ptr))
2887 					return;
2888 				/*
2889 				 * No need to decrement here, as eval_replace()
2890 				 * returns the pointer to the character passed
2891 				 * the eval, and two evals can not be placed
2892 				 * back to back without something in between.
2893 				 * We can skip that something in between.
2894 				 */
2895 				continue;
2896 			}
2897 		skip_more:
2898 			do {
2899 				ptr++;
2900 			} while (isalnum(*ptr) || *ptr == '_');
2901 			if (!*ptr)
2902 				break;
2903 			/*
2904 			 * If what comes after this variable is a '.' or
2905 			 * '->' then we can continue to ignore that string.
2906 			 */
2907 			if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) {
2908 				ptr += *ptr == '.' ? 1 : 2;
2909 				if (!*ptr)
2910 					break;
2911 				goto skip_more;
2912 			}
2913 			/*
2914 			 * Once again, we can skip the delimiter that came
2915 			 * after the string.
2916 			 */
2917 			continue;
2918 		}
2919 	}
2920 }
2921 
add_str_to_module(struct module * module,char * str)2922 static void add_str_to_module(struct module *module, char *str)
2923 {
2924 	struct module_string *modstr;
2925 
2926 	modstr = kmalloc(sizeof(*modstr), GFP_KERNEL);
2927 
2928 	/*
2929 	 * If we failed to allocate memory here, then we'll just
2930 	 * let the str memory leak when the module is removed.
2931 	 * If this fails to allocate, there's worse problems than
2932 	 * a leaked string on module removal.
2933 	 */
2934 	if (WARN_ON_ONCE(!modstr))
2935 		return;
2936 
2937 	modstr->module = module;
2938 	modstr->str = str;
2939 
2940 	list_add(&modstr->next, &module_strings);
2941 }
2942 
update_event_fields(struct trace_event_call * call,struct trace_eval_map * map)2943 static void update_event_fields(struct trace_event_call *call,
2944 				struct trace_eval_map *map)
2945 {
2946 	struct ftrace_event_field *field;
2947 	struct list_head *head;
2948 	char *ptr;
2949 	char *str;
2950 	int len = strlen(map->eval_string);
2951 
2952 	/* Dynamic events should never have field maps */
2953 	if (WARN_ON_ONCE(call->flags & TRACE_EVENT_FL_DYNAMIC))
2954 		return;
2955 
2956 	head = trace_get_fields(call);
2957 	list_for_each_entry(field, head, link) {
2958 		ptr = strchr(field->type, '[');
2959 		if (!ptr)
2960 			continue;
2961 		ptr++;
2962 
2963 		if (!isalpha(*ptr) && *ptr != '_')
2964 			continue;
2965 
2966 		if (strncmp(map->eval_string, ptr, len) != 0)
2967 			continue;
2968 
2969 		str = kstrdup(field->type, GFP_KERNEL);
2970 		if (WARN_ON_ONCE(!str))
2971 			return;
2972 		ptr = str + (ptr - field->type);
2973 		ptr = eval_replace(ptr, map, len);
2974 		/* enum/sizeof string smaller than value */
2975 		if (WARN_ON_ONCE(!ptr)) {
2976 			kfree(str);
2977 			continue;
2978 		}
2979 
2980 		/*
2981 		 * If the event is part of a module, then we need to free the string
2982 		 * when the module is removed. Otherwise, it will stay allocated
2983 		 * until a reboot.
2984 		 */
2985 		if (call->module)
2986 			add_str_to_module(call->module, str);
2987 
2988 		field->type = str;
2989 	}
2990 }
2991 
trace_event_eval_update(struct trace_eval_map ** map,int len)2992 void trace_event_eval_update(struct trace_eval_map **map, int len)
2993 {
2994 	struct trace_event_call *call, *p;
2995 	const char *last_system = NULL;
2996 	bool first = false;
2997 	int last_i;
2998 	int i;
2999 
3000 	down_write(&trace_event_sem);
3001 	list_for_each_entry_safe(call, p, &ftrace_events, list) {
3002 		/* events are usually grouped together with systems */
3003 		if (!last_system || call->class->system != last_system) {
3004 			first = true;
3005 			last_i = 0;
3006 			last_system = call->class->system;
3007 		}
3008 
3009 		/*
3010 		 * Since calls are grouped by systems, the likelihood that the
3011 		 * next call in the iteration belongs to the same system as the
3012 		 * previous call is high. As an optimization, we skip searching
3013 		 * for a map[] that matches the call's system if the last call
3014 		 * was from the same system. That's what last_i is for. If the
3015 		 * call has the same system as the previous call, then last_i
3016 		 * will be the index of the first map[] that has a matching
3017 		 * system.
3018 		 */
3019 		for (i = last_i; i < len; i++) {
3020 			if (call->class->system == map[i]->system) {
3021 				/* Save the first system if need be */
3022 				if (first) {
3023 					last_i = i;
3024 					first = false;
3025 				}
3026 				update_event_printk(call, map[i]);
3027 				update_event_fields(call, map[i]);
3028 			}
3029 		}
3030 		cond_resched();
3031 	}
3032 	up_write(&trace_event_sem);
3033 }
3034 
3035 static struct trace_event_file *
trace_create_new_event(struct trace_event_call * call,struct trace_array * tr)3036 trace_create_new_event(struct trace_event_call *call,
3037 		       struct trace_array *tr)
3038 {
3039 	struct trace_pid_list *no_pid_list;
3040 	struct trace_pid_list *pid_list;
3041 	struct trace_event_file *file;
3042 	unsigned int first;
3043 
3044 	file = kmem_cache_alloc(file_cachep, GFP_TRACE);
3045 	if (!file)
3046 		return NULL;
3047 
3048 	pid_list = rcu_dereference_protected(tr->filtered_pids,
3049 					     lockdep_is_held(&event_mutex));
3050 	no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
3051 					     lockdep_is_held(&event_mutex));
3052 
3053 	if (!trace_pid_list_first(pid_list, &first) ||
3054 	    !trace_pid_list_first(no_pid_list, &first))
3055 		file->flags |= EVENT_FILE_FL_PID_FILTER;
3056 
3057 	file->event_call = call;
3058 	file->tr = tr;
3059 	atomic_set(&file->sm_ref, 0);
3060 	atomic_set(&file->tm_ref, 0);
3061 	INIT_LIST_HEAD(&file->triggers);
3062 	list_add(&file->list, &tr->events);
3063 	event_file_get(file);
3064 
3065 	return file;
3066 }
3067 
3068 #define MAX_BOOT_TRIGGERS 32
3069 
3070 static struct boot_triggers {
3071 	const char		*event;
3072 	char			*trigger;
3073 } bootup_triggers[MAX_BOOT_TRIGGERS];
3074 
3075 static char bootup_trigger_buf[COMMAND_LINE_SIZE];
3076 static int nr_boot_triggers;
3077 
setup_trace_triggers(char * str)3078 static __init int setup_trace_triggers(char *str)
3079 {
3080 	char *trigger;
3081 	char *buf;
3082 	int i;
3083 
3084 	strscpy(bootup_trigger_buf, str, COMMAND_LINE_SIZE);
3085 	ring_buffer_expanded = true;
3086 	disable_tracing_selftest("running event triggers");
3087 
3088 	buf = bootup_trigger_buf;
3089 	for (i = 0; i < MAX_BOOT_TRIGGERS; i++) {
3090 		trigger = strsep(&buf, ",");
3091 		if (!trigger)
3092 			break;
3093 		bootup_triggers[i].event = strsep(&trigger, ".");
3094 		bootup_triggers[i].trigger = trigger;
3095 		if (!bootup_triggers[i].trigger)
3096 			break;
3097 	}
3098 
3099 	nr_boot_triggers = i;
3100 	return 1;
3101 }
3102 __setup("trace_trigger=", setup_trace_triggers);
3103 
3104 /* Add an event to a trace directory */
3105 static int
__trace_add_new_event(struct trace_event_call * call,struct trace_array * tr)3106 __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
3107 {
3108 	struct trace_event_file *file;
3109 
3110 	file = trace_create_new_event(call, tr);
3111 	if (!file)
3112 		return -ENOMEM;
3113 
3114 	if (eventdir_initialized)
3115 		return event_create_dir(tr->event_dir, file);
3116 	else
3117 		return event_define_fields(call);
3118 }
3119 
trace_early_triggers(struct trace_event_file * file,const char * name)3120 static void trace_early_triggers(struct trace_event_file *file, const char *name)
3121 {
3122 	int ret;
3123 	int i;
3124 
3125 	for (i = 0; i < nr_boot_triggers; i++) {
3126 		if (strcmp(name, bootup_triggers[i].event))
3127 			continue;
3128 		mutex_lock(&event_mutex);
3129 		ret = trigger_process_regex(file, bootup_triggers[i].trigger);
3130 		mutex_unlock(&event_mutex);
3131 		if (ret)
3132 			pr_err("Failed to register trigger '%s' on event %s\n",
3133 			       bootup_triggers[i].trigger,
3134 			       bootup_triggers[i].event);
3135 	}
3136 }
3137 
3138 /*
3139  * Just create a descriptor for early init. A descriptor is required
3140  * for enabling events at boot. We want to enable events before
3141  * the filesystem is initialized.
3142  */
3143 static int
__trace_early_add_new_event(struct trace_event_call * call,struct trace_array * tr)3144 __trace_early_add_new_event(struct trace_event_call *call,
3145 			    struct trace_array *tr)
3146 {
3147 	struct trace_event_file *file;
3148 	int ret;
3149 
3150 	file = trace_create_new_event(call, tr);
3151 	if (!file)
3152 		return -ENOMEM;
3153 
3154 	ret = event_define_fields(call);
3155 	if (ret)
3156 		return ret;
3157 
3158 	trace_early_triggers(file, trace_event_name(call));
3159 
3160 	return 0;
3161 }
3162 
3163 struct ftrace_module_file_ops;
3164 static void __add_event_to_tracers(struct trace_event_call *call);
3165 
3166 /* Add an additional event_call dynamically */
trace_add_event_call(struct trace_event_call * call)3167 int trace_add_event_call(struct trace_event_call *call)
3168 {
3169 	int ret;
3170 	lockdep_assert_held(&event_mutex);
3171 
3172 	mutex_lock(&trace_types_lock);
3173 
3174 	ret = __register_event(call, NULL);
3175 	if (ret >= 0)
3176 		__add_event_to_tracers(call);
3177 
3178 	mutex_unlock(&trace_types_lock);
3179 	return ret;
3180 }
3181 EXPORT_SYMBOL_GPL(trace_add_event_call);
3182 
3183 /*
3184  * Must be called under locking of trace_types_lock, event_mutex and
3185  * trace_event_sem.
3186  */
__trace_remove_event_call(struct trace_event_call * call)3187 static void __trace_remove_event_call(struct trace_event_call *call)
3188 {
3189 	event_remove(call);
3190 	trace_destroy_fields(call);
3191 	free_event_filter(call->filter);
3192 	call->filter = NULL;
3193 }
3194 
probe_remove_event_call(struct trace_event_call * call)3195 static int probe_remove_event_call(struct trace_event_call *call)
3196 {
3197 	struct trace_array *tr;
3198 	struct trace_event_file *file;
3199 
3200 #ifdef CONFIG_PERF_EVENTS
3201 	if (call->perf_refcount)
3202 		return -EBUSY;
3203 #endif
3204 	do_for_each_event_file(tr, file) {
3205 		if (file->event_call != call)
3206 			continue;
3207 		/*
3208 		 * We can't rely on ftrace_event_enable_disable(enable => 0)
3209 		 * we are going to do, EVENT_FILE_FL_SOFT_MODE can suppress
3210 		 * TRACE_REG_UNREGISTER.
3211 		 */
3212 		if (file->flags & EVENT_FILE_FL_ENABLED)
3213 			goto busy;
3214 
3215 		if (file->flags & EVENT_FILE_FL_WAS_ENABLED)
3216 			tr->clear_trace = true;
3217 		/*
3218 		 * The do_for_each_event_file_safe() is
3219 		 * a double loop. After finding the call for this
3220 		 * trace_array, we use break to jump to the next
3221 		 * trace_array.
3222 		 */
3223 		break;
3224 	} while_for_each_event_file();
3225 
3226 	__trace_remove_event_call(call);
3227 
3228 	return 0;
3229  busy:
3230 	/* No need to clear the trace now */
3231 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
3232 		tr->clear_trace = false;
3233 	}
3234 	return -EBUSY;
3235 }
3236 
3237 /* Remove an event_call */
trace_remove_event_call(struct trace_event_call * call)3238 int trace_remove_event_call(struct trace_event_call *call)
3239 {
3240 	int ret;
3241 
3242 	lockdep_assert_held(&event_mutex);
3243 
3244 	mutex_lock(&trace_types_lock);
3245 	down_write(&trace_event_sem);
3246 	ret = probe_remove_event_call(call);
3247 	up_write(&trace_event_sem);
3248 	mutex_unlock(&trace_types_lock);
3249 
3250 	return ret;
3251 }
3252 EXPORT_SYMBOL_GPL(trace_remove_event_call);
3253 
3254 #define for_each_event(event, start, end)			\
3255 	for (event = start;					\
3256 	     (unsigned long)event < (unsigned long)end;		\
3257 	     event++)
3258 
3259 #ifdef CONFIG_MODULES
3260 
trace_module_add_events(struct module * mod)3261 static void trace_module_add_events(struct module *mod)
3262 {
3263 	struct trace_event_call **call, **start, **end;
3264 
3265 	if (!mod->num_trace_events)
3266 		return;
3267 
3268 	/* Don't add infrastructure for mods without tracepoints */
3269 	if (trace_module_has_bad_taint(mod)) {
3270 		pr_err("%s: module has bad taint, not creating trace events\n",
3271 		       mod->name);
3272 		return;
3273 	}
3274 
3275 	start = mod->trace_events;
3276 	end = mod->trace_events + mod->num_trace_events;
3277 
3278 	for_each_event(call, start, end) {
3279 		__register_event(*call, mod);
3280 		__add_event_to_tracers(*call);
3281 	}
3282 }
3283 
trace_module_remove_events(struct module * mod)3284 static void trace_module_remove_events(struct module *mod)
3285 {
3286 	struct trace_event_call *call, *p;
3287 	struct module_string *modstr, *m;
3288 
3289 	down_write(&trace_event_sem);
3290 	list_for_each_entry_safe(call, p, &ftrace_events, list) {
3291 		if ((call->flags & TRACE_EVENT_FL_DYNAMIC) || !call->module)
3292 			continue;
3293 		if (call->module == mod)
3294 			__trace_remove_event_call(call);
3295 	}
3296 	/* Check for any strings allocade for this module */
3297 	list_for_each_entry_safe(modstr, m, &module_strings, next) {
3298 		if (modstr->module != mod)
3299 			continue;
3300 		list_del(&modstr->next);
3301 		kfree(modstr->str);
3302 		kfree(modstr);
3303 	}
3304 	up_write(&trace_event_sem);
3305 
3306 	/*
3307 	 * It is safest to reset the ring buffer if the module being unloaded
3308 	 * registered any events that were used. The only worry is if
3309 	 * a new module gets loaded, and takes on the same id as the events
3310 	 * of this module. When printing out the buffer, traced events left
3311 	 * over from this module may be passed to the new module events and
3312 	 * unexpected results may occur.
3313 	 */
3314 	tracing_reset_all_online_cpus_unlocked();
3315 }
3316 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)3317 static int trace_module_notify(struct notifier_block *self,
3318 			       unsigned long val, void *data)
3319 {
3320 	struct module *mod = data;
3321 
3322 	mutex_lock(&event_mutex);
3323 	mutex_lock(&trace_types_lock);
3324 	switch (val) {
3325 	case MODULE_STATE_COMING:
3326 		trace_module_add_events(mod);
3327 		break;
3328 	case MODULE_STATE_GOING:
3329 		trace_module_remove_events(mod);
3330 		break;
3331 	}
3332 	mutex_unlock(&trace_types_lock);
3333 	mutex_unlock(&event_mutex);
3334 
3335 	return NOTIFY_OK;
3336 }
3337 
3338 static struct notifier_block trace_module_nb = {
3339 	.notifier_call = trace_module_notify,
3340 	.priority = 1, /* higher than trace.c module notify */
3341 };
3342 #endif /* CONFIG_MODULES */
3343 
3344 /* Create a new event directory structure for a trace directory. */
3345 static void
__trace_add_event_dirs(struct trace_array * tr)3346 __trace_add_event_dirs(struct trace_array *tr)
3347 {
3348 	struct trace_event_call *call;
3349 	int ret;
3350 
3351 	list_for_each_entry(call, &ftrace_events, list) {
3352 		ret = __trace_add_new_event(call, tr);
3353 		if (ret < 0)
3354 			pr_warn("Could not create directory for event %s\n",
3355 				trace_event_name(call));
3356 	}
3357 }
3358 
3359 /* Returns any file that matches the system and event */
3360 struct trace_event_file *
__find_event_file(struct trace_array * tr,const char * system,const char * event)3361 __find_event_file(struct trace_array *tr, const char *system, const char *event)
3362 {
3363 	struct trace_event_file *file;
3364 	struct trace_event_call *call;
3365 	const char *name;
3366 
3367 	list_for_each_entry(file, &tr->events, list) {
3368 
3369 		call = file->event_call;
3370 		name = trace_event_name(call);
3371 
3372 		if (!name || !call->class)
3373 			continue;
3374 
3375 		if (strcmp(event, name) == 0 &&
3376 		    strcmp(system, call->class->system) == 0)
3377 			return file;
3378 	}
3379 	return NULL;
3380 }
3381 
3382 /* Returns valid trace event files that match system and event */
3383 struct trace_event_file *
find_event_file(struct trace_array * tr,const char * system,const char * event)3384 find_event_file(struct trace_array *tr, const char *system, const char *event)
3385 {
3386 	struct trace_event_file *file;
3387 
3388 	file = __find_event_file(tr, system, event);
3389 	if (!file || !file->event_call->class->reg ||
3390 	    file->event_call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
3391 		return NULL;
3392 
3393 	return file;
3394 }
3395 
3396 /**
3397  * trace_get_event_file - Find and return a trace event file
3398  * @instance: The name of the trace instance containing the event
3399  * @system: The name of the system containing the event
3400  * @event: The name of the event
3401  *
3402  * Return a trace event file given the trace instance name, trace
3403  * system, and trace event name.  If the instance name is NULL, it
3404  * refers to the top-level trace array.
3405  *
3406  * This function will look it up and return it if found, after calling
3407  * trace_array_get() to prevent the instance from going away, and
3408  * increment the event's module refcount to prevent it from being
3409  * removed.
3410  *
3411  * To release the file, call trace_put_event_file(), which will call
3412  * trace_array_put() and decrement the event's module refcount.
3413  *
3414  * Return: The trace event on success, ERR_PTR otherwise.
3415  */
trace_get_event_file(const char * instance,const char * system,const char * event)3416 struct trace_event_file *trace_get_event_file(const char *instance,
3417 					      const char *system,
3418 					      const char *event)
3419 {
3420 	struct trace_array *tr = top_trace_array();
3421 	struct trace_event_file *file = NULL;
3422 	int ret = -EINVAL;
3423 
3424 	if (instance) {
3425 		tr = trace_array_find_get(instance);
3426 		if (!tr)
3427 			return ERR_PTR(-ENOENT);
3428 	} else {
3429 		ret = trace_array_get(tr);
3430 		if (ret)
3431 			return ERR_PTR(ret);
3432 	}
3433 
3434 	mutex_lock(&event_mutex);
3435 
3436 	file = find_event_file(tr, system, event);
3437 	if (!file) {
3438 		trace_array_put(tr);
3439 		ret = -EINVAL;
3440 		goto out;
3441 	}
3442 
3443 	/* Don't let event modules unload while in use */
3444 	ret = trace_event_try_get_ref(file->event_call);
3445 	if (!ret) {
3446 		trace_array_put(tr);
3447 		ret = -EBUSY;
3448 		goto out;
3449 	}
3450 
3451 	ret = 0;
3452  out:
3453 	mutex_unlock(&event_mutex);
3454 
3455 	if (ret)
3456 		file = ERR_PTR(ret);
3457 
3458 	return file;
3459 }
3460 EXPORT_SYMBOL_GPL(trace_get_event_file);
3461 
3462 /**
3463  * trace_put_event_file - Release a file from trace_get_event_file()
3464  * @file: The trace event file
3465  *
3466  * If a file was retrieved using trace_get_event_file(), this should
3467  * be called when it's no longer needed.  It will cancel the previous
3468  * trace_array_get() called by that function, and decrement the
3469  * event's module refcount.
3470  */
trace_put_event_file(struct trace_event_file * file)3471 void trace_put_event_file(struct trace_event_file *file)
3472 {
3473 	mutex_lock(&event_mutex);
3474 	trace_event_put_ref(file->event_call);
3475 	mutex_unlock(&event_mutex);
3476 
3477 	trace_array_put(file->tr);
3478 }
3479 EXPORT_SYMBOL_GPL(trace_put_event_file);
3480 
3481 #ifdef CONFIG_DYNAMIC_FTRACE
3482 
3483 /* Avoid typos */
3484 #define ENABLE_EVENT_STR	"enable_event"
3485 #define DISABLE_EVENT_STR	"disable_event"
3486 
3487 struct event_probe_data {
3488 	struct trace_event_file	*file;
3489 	unsigned long			count;
3490 	int				ref;
3491 	bool				enable;
3492 };
3493 
update_event_probe(struct event_probe_data * data)3494 static void update_event_probe(struct event_probe_data *data)
3495 {
3496 	if (data->enable)
3497 		clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
3498 	else
3499 		set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
3500 }
3501 
3502 static void
event_enable_probe(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)3503 event_enable_probe(unsigned long ip, unsigned long parent_ip,
3504 		   struct trace_array *tr, struct ftrace_probe_ops *ops,
3505 		   void *data)
3506 {
3507 	struct ftrace_func_mapper *mapper = data;
3508 	struct event_probe_data *edata;
3509 	void **pdata;
3510 
3511 	pdata = ftrace_func_mapper_find_ip(mapper, ip);
3512 	if (!pdata || !*pdata)
3513 		return;
3514 
3515 	edata = *pdata;
3516 	update_event_probe(edata);
3517 }
3518 
3519 static void
event_enable_count_probe(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)3520 event_enable_count_probe(unsigned long ip, unsigned long parent_ip,
3521 			 struct trace_array *tr, struct ftrace_probe_ops *ops,
3522 			 void *data)
3523 {
3524 	struct ftrace_func_mapper *mapper = data;
3525 	struct event_probe_data *edata;
3526 	void **pdata;
3527 
3528 	pdata = ftrace_func_mapper_find_ip(mapper, ip);
3529 	if (!pdata || !*pdata)
3530 		return;
3531 
3532 	edata = *pdata;
3533 
3534 	if (!edata->count)
3535 		return;
3536 
3537 	/* Skip if the event is in a state we want to switch to */
3538 	if (edata->enable == !(edata->file->flags & EVENT_FILE_FL_SOFT_DISABLED))
3539 		return;
3540 
3541 	if (edata->count != -1)
3542 		(edata->count)--;
3543 
3544 	update_event_probe(edata);
3545 }
3546 
3547 static int
event_enable_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)3548 event_enable_print(struct seq_file *m, unsigned long ip,
3549 		   struct ftrace_probe_ops *ops, void *data)
3550 {
3551 	struct ftrace_func_mapper *mapper = data;
3552 	struct event_probe_data *edata;
3553 	void **pdata;
3554 
3555 	pdata = ftrace_func_mapper_find_ip(mapper, ip);
3556 
3557 	if (WARN_ON_ONCE(!pdata || !*pdata))
3558 		return 0;
3559 
3560 	edata = *pdata;
3561 
3562 	seq_printf(m, "%ps:", (void *)ip);
3563 
3564 	seq_printf(m, "%s:%s:%s",
3565 		   edata->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR,
3566 		   edata->file->event_call->class->system,
3567 		   trace_event_name(edata->file->event_call));
3568 
3569 	if (edata->count == -1)
3570 		seq_puts(m, ":unlimited\n");
3571 	else
3572 		seq_printf(m, ":count=%ld\n", edata->count);
3573 
3574 	return 0;
3575 }
3576 
3577 static int
event_enable_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)3578 event_enable_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
3579 		  unsigned long ip, void *init_data, void **data)
3580 {
3581 	struct ftrace_func_mapper *mapper = *data;
3582 	struct event_probe_data *edata = init_data;
3583 	int ret;
3584 
3585 	if (!mapper) {
3586 		mapper = allocate_ftrace_func_mapper();
3587 		if (!mapper)
3588 			return -ENODEV;
3589 		*data = mapper;
3590 	}
3591 
3592 	ret = ftrace_func_mapper_add_ip(mapper, ip, edata);
3593 	if (ret < 0)
3594 		return ret;
3595 
3596 	edata->ref++;
3597 
3598 	return 0;
3599 }
3600 
free_probe_data(void * data)3601 static int free_probe_data(void *data)
3602 {
3603 	struct event_probe_data *edata = data;
3604 
3605 	edata->ref--;
3606 	if (!edata->ref) {
3607 		/* Remove the SOFT_MODE flag */
3608 		__ftrace_event_enable_disable(edata->file, 0, 1);
3609 		trace_event_put_ref(edata->file->event_call);
3610 		kfree(edata);
3611 	}
3612 	return 0;
3613 }
3614 
3615 static void
event_enable_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)3616 event_enable_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
3617 		  unsigned long ip, void *data)
3618 {
3619 	struct ftrace_func_mapper *mapper = data;
3620 	struct event_probe_data *edata;
3621 
3622 	if (!ip) {
3623 		if (!mapper)
3624 			return;
3625 		free_ftrace_func_mapper(mapper, free_probe_data);
3626 		return;
3627 	}
3628 
3629 	edata = ftrace_func_mapper_remove_ip(mapper, ip);
3630 
3631 	if (WARN_ON_ONCE(!edata))
3632 		return;
3633 
3634 	if (WARN_ON_ONCE(edata->ref <= 0))
3635 		return;
3636 
3637 	free_probe_data(edata);
3638 }
3639 
3640 static struct ftrace_probe_ops event_enable_probe_ops = {
3641 	.func			= event_enable_probe,
3642 	.print			= event_enable_print,
3643 	.init			= event_enable_init,
3644 	.free			= event_enable_free,
3645 };
3646 
3647 static struct ftrace_probe_ops event_enable_count_probe_ops = {
3648 	.func			= event_enable_count_probe,
3649 	.print			= event_enable_print,
3650 	.init			= event_enable_init,
3651 	.free			= event_enable_free,
3652 };
3653 
3654 static struct ftrace_probe_ops event_disable_probe_ops = {
3655 	.func			= event_enable_probe,
3656 	.print			= event_enable_print,
3657 	.init			= event_enable_init,
3658 	.free			= event_enable_free,
3659 };
3660 
3661 static struct ftrace_probe_ops event_disable_count_probe_ops = {
3662 	.func			= event_enable_count_probe,
3663 	.print			= event_enable_print,
3664 	.init			= event_enable_init,
3665 	.free			= event_enable_free,
3666 };
3667 
3668 static int
event_enable_func(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enabled)3669 event_enable_func(struct trace_array *tr, struct ftrace_hash *hash,
3670 		  char *glob, char *cmd, char *param, int enabled)
3671 {
3672 	struct trace_event_file *file;
3673 	struct ftrace_probe_ops *ops;
3674 	struct event_probe_data *data;
3675 	const char *system;
3676 	const char *event;
3677 	char *number;
3678 	bool enable;
3679 	int ret;
3680 
3681 	if (!tr)
3682 		return -ENODEV;
3683 
3684 	/* hash funcs only work with set_ftrace_filter */
3685 	if (!enabled || !param)
3686 		return -EINVAL;
3687 
3688 	system = strsep(&param, ":");
3689 	if (!param)
3690 		return -EINVAL;
3691 
3692 	event = strsep(&param, ":");
3693 
3694 	mutex_lock(&event_mutex);
3695 
3696 	ret = -EINVAL;
3697 	file = find_event_file(tr, system, event);
3698 	if (!file)
3699 		goto out;
3700 
3701 	enable = strcmp(cmd, ENABLE_EVENT_STR) == 0;
3702 
3703 	if (enable)
3704 		ops = param ? &event_enable_count_probe_ops : &event_enable_probe_ops;
3705 	else
3706 		ops = param ? &event_disable_count_probe_ops : &event_disable_probe_ops;
3707 
3708 	if (glob[0] == '!') {
3709 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
3710 		goto out;
3711 	}
3712 
3713 	ret = -ENOMEM;
3714 
3715 	data = kzalloc(sizeof(*data), GFP_KERNEL);
3716 	if (!data)
3717 		goto out;
3718 
3719 	data->enable = enable;
3720 	data->count = -1;
3721 	data->file = file;
3722 
3723 	if (!param)
3724 		goto out_reg;
3725 
3726 	number = strsep(&param, ":");
3727 
3728 	ret = -EINVAL;
3729 	if (!strlen(number))
3730 		goto out_free;
3731 
3732 	/*
3733 	 * We use the callback data field (which is a pointer)
3734 	 * as our counter.
3735 	 */
3736 	ret = kstrtoul(number, 0, &data->count);
3737 	if (ret)
3738 		goto out_free;
3739 
3740  out_reg:
3741 	/* Don't let event modules unload while probe registered */
3742 	ret = trace_event_try_get_ref(file->event_call);
3743 	if (!ret) {
3744 		ret = -EBUSY;
3745 		goto out_free;
3746 	}
3747 
3748 	ret = __ftrace_event_enable_disable(file, 1, 1);
3749 	if (ret < 0)
3750 		goto out_put;
3751 
3752 	ret = register_ftrace_function_probe(glob, tr, ops, data);
3753 	/*
3754 	 * The above returns on success the # of functions enabled,
3755 	 * but if it didn't find any functions it returns zero.
3756 	 * Consider no functions a failure too.
3757 	 */
3758 	if (!ret) {
3759 		ret = -ENOENT;
3760 		goto out_disable;
3761 	} else if (ret < 0)
3762 		goto out_disable;
3763 	/* Just return zero, not the number of enabled functions */
3764 	ret = 0;
3765  out:
3766 	mutex_unlock(&event_mutex);
3767 	return ret;
3768 
3769  out_disable:
3770 	__ftrace_event_enable_disable(file, 0, 1);
3771  out_put:
3772 	trace_event_put_ref(file->event_call);
3773  out_free:
3774 	kfree(data);
3775 	goto out;
3776 }
3777 
3778 static struct ftrace_func_command event_enable_cmd = {
3779 	.name			= ENABLE_EVENT_STR,
3780 	.func			= event_enable_func,
3781 };
3782 
3783 static struct ftrace_func_command event_disable_cmd = {
3784 	.name			= DISABLE_EVENT_STR,
3785 	.func			= event_enable_func,
3786 };
3787 
register_event_cmds(void)3788 static __init int register_event_cmds(void)
3789 {
3790 	int ret;
3791 
3792 	ret = register_ftrace_command(&event_enable_cmd);
3793 	if (WARN_ON(ret < 0))
3794 		return ret;
3795 	ret = register_ftrace_command(&event_disable_cmd);
3796 	if (WARN_ON(ret < 0))
3797 		unregister_ftrace_command(&event_enable_cmd);
3798 	return ret;
3799 }
3800 #else
register_event_cmds(void)3801 static inline int register_event_cmds(void) { return 0; }
3802 #endif /* CONFIG_DYNAMIC_FTRACE */
3803 
3804 /*
3805  * The top level array and trace arrays created by boot-time tracing
3806  * have already had its trace_event_file descriptors created in order
3807  * to allow for early events to be recorded.
3808  * This function is called after the tracefs has been initialized,
3809  * and we now have to create the files associated to the events.
3810  */
__trace_early_add_event_dirs(struct trace_array * tr)3811 static void __trace_early_add_event_dirs(struct trace_array *tr)
3812 {
3813 	struct trace_event_file *file;
3814 	int ret;
3815 
3816 
3817 	list_for_each_entry(file, &tr->events, list) {
3818 		ret = event_create_dir(tr->event_dir, file);
3819 		if (ret < 0)
3820 			pr_warn("Could not create directory for event %s\n",
3821 				trace_event_name(file->event_call));
3822 	}
3823 }
3824 
3825 /*
3826  * For early boot up, the top trace array and the trace arrays created
3827  * by boot-time tracing require to have a list of events that can be
3828  * enabled. This must be done before the filesystem is set up in order
3829  * to allow events to be traced early.
3830  */
__trace_early_add_events(struct trace_array * tr)3831 void __trace_early_add_events(struct trace_array *tr)
3832 {
3833 	struct trace_event_call *call;
3834 	int ret;
3835 
3836 	list_for_each_entry(call, &ftrace_events, list) {
3837 		/* Early boot up should not have any modules loaded */
3838 		if (!(call->flags & TRACE_EVENT_FL_DYNAMIC) &&
3839 		    WARN_ON_ONCE(call->module))
3840 			continue;
3841 
3842 		ret = __trace_early_add_new_event(call, tr);
3843 		if (ret < 0)
3844 			pr_warn("Could not create early event %s\n",
3845 				trace_event_name(call));
3846 	}
3847 }
3848 
3849 /* Remove the event directory structure for a trace directory. */
3850 static void
__trace_remove_event_dirs(struct trace_array * tr)3851 __trace_remove_event_dirs(struct trace_array *tr)
3852 {
3853 	struct trace_event_file *file, *next;
3854 
3855 	list_for_each_entry_safe(file, next, &tr->events, list)
3856 		remove_event_file_dir(file);
3857 }
3858 
__add_event_to_tracers(struct trace_event_call * call)3859 static void __add_event_to_tracers(struct trace_event_call *call)
3860 {
3861 	struct trace_array *tr;
3862 
3863 	list_for_each_entry(tr, &ftrace_trace_arrays, list)
3864 		__trace_add_new_event(call, tr);
3865 }
3866 
3867 extern struct trace_event_call *__start_ftrace_events[];
3868 extern struct trace_event_call *__stop_ftrace_events[];
3869 
3870 static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
3871 
setup_trace_event(char * str)3872 static __init int setup_trace_event(char *str)
3873 {
3874 	strscpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
3875 	ring_buffer_expanded = true;
3876 	disable_tracing_selftest("running event tracing");
3877 
3878 	return 1;
3879 }
3880 __setup("trace_event=", setup_trace_event);
3881 
events_callback(const char * name,umode_t * mode,void ** data,const struct file_operations ** fops)3882 static int events_callback(const char *name, umode_t *mode, void **data,
3883 			   const struct file_operations **fops)
3884 {
3885 	if (strcmp(name, "enable") == 0) {
3886 		*mode = TRACE_MODE_WRITE;
3887 		*fops = &ftrace_tr_enable_fops;
3888 		return 1;
3889 	}
3890 
3891 	if (strcmp(name, "header_page") == 0)
3892 		*data = ring_buffer_print_page_header;
3893 
3894 	else if (strcmp(name, "header_event") == 0)
3895 		*data = ring_buffer_print_entry_header;
3896 
3897 	else
3898 		return 0;
3899 
3900 	*mode = TRACE_MODE_READ;
3901 	*fops = &ftrace_show_header_fops;
3902 	return 1;
3903 }
3904 
3905 /* Expects to have event_mutex held when called */
3906 static int
create_event_toplevel_files(struct dentry * parent,struct trace_array * tr)3907 create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
3908 {
3909 	struct eventfs_inode *e_events;
3910 	struct dentry *entry;
3911 	int nr_entries;
3912 	static struct eventfs_entry events_entries[] = {
3913 		{
3914 			.name		= "enable",
3915 			.callback	= events_callback,
3916 		},
3917 		{
3918 			.name		= "header_page",
3919 			.callback	= events_callback,
3920 		},
3921 		{
3922 			.name		= "header_event",
3923 			.callback	= events_callback,
3924 		},
3925 	};
3926 
3927 	entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent,
3928 				  tr, &ftrace_set_event_fops);
3929 	if (!entry)
3930 		return -ENOMEM;
3931 
3932 	nr_entries = ARRAY_SIZE(events_entries);
3933 
3934 	e_events = eventfs_create_events_dir("events", parent, events_entries,
3935 					     nr_entries, tr);
3936 	if (IS_ERR(e_events)) {
3937 		pr_warn("Could not create tracefs 'events' directory\n");
3938 		return -ENOMEM;
3939 	}
3940 
3941 	/* There are not as crucial, just warn if they are not created */
3942 
3943 	trace_create_file("set_event_pid", TRACE_MODE_WRITE, parent,
3944 			  tr, &ftrace_set_event_pid_fops);
3945 
3946 	trace_create_file("set_event_notrace_pid",
3947 			  TRACE_MODE_WRITE, parent, tr,
3948 			  &ftrace_set_event_notrace_pid_fops);
3949 
3950 	tr->event_dir = e_events;
3951 
3952 	return 0;
3953 }
3954 
3955 /**
3956  * event_trace_add_tracer - add a instance of a trace_array to events
3957  * @parent: The parent dentry to place the files/directories for events in
3958  * @tr: The trace array associated with these events
3959  *
3960  * When a new instance is created, it needs to set up its events
3961  * directory, as well as other files associated with events. It also
3962  * creates the event hierarchy in the @parent/events directory.
3963  *
3964  * Returns 0 on success.
3965  *
3966  * Must be called with event_mutex held.
3967  */
event_trace_add_tracer(struct dentry * parent,struct trace_array * tr)3968 int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr)
3969 {
3970 	int ret;
3971 
3972 	lockdep_assert_held(&event_mutex);
3973 
3974 	ret = create_event_toplevel_files(parent, tr);
3975 	if (ret)
3976 		goto out;
3977 
3978 	down_write(&trace_event_sem);
3979 	/* If tr already has the event list, it is initialized in early boot. */
3980 	if (unlikely(!list_empty(&tr->events)))
3981 		__trace_early_add_event_dirs(tr);
3982 	else
3983 		__trace_add_event_dirs(tr);
3984 	up_write(&trace_event_sem);
3985 
3986  out:
3987 	return ret;
3988 }
3989 
3990 /*
3991  * The top trace array already had its file descriptors created.
3992  * Now the files themselves need to be created.
3993  */
3994 static __init int
early_event_add_tracer(struct dentry * parent,struct trace_array * tr)3995 early_event_add_tracer(struct dentry *parent, struct trace_array *tr)
3996 {
3997 	int ret;
3998 
3999 	mutex_lock(&event_mutex);
4000 
4001 	ret = create_event_toplevel_files(parent, tr);
4002 	if (ret)
4003 		goto out_unlock;
4004 
4005 	down_write(&trace_event_sem);
4006 	__trace_early_add_event_dirs(tr);
4007 	up_write(&trace_event_sem);
4008 
4009  out_unlock:
4010 	mutex_unlock(&event_mutex);
4011 
4012 	return ret;
4013 }
4014 
4015 /* Must be called with event_mutex held */
event_trace_del_tracer(struct trace_array * tr)4016 int event_trace_del_tracer(struct trace_array *tr)
4017 {
4018 	lockdep_assert_held(&event_mutex);
4019 
4020 	/* Disable any event triggers and associated soft-disabled events */
4021 	clear_event_triggers(tr);
4022 
4023 	/* Clear the pid list */
4024 	__ftrace_clear_event_pids(tr, TRACE_PIDS | TRACE_NO_PIDS);
4025 
4026 	/* Disable any running events */
4027 	__ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0);
4028 
4029 	/* Make sure no more events are being executed */
4030 	tracepoint_synchronize_unregister();
4031 
4032 	down_write(&trace_event_sem);
4033 	__trace_remove_event_dirs(tr);
4034 	eventfs_remove_events_dir(tr->event_dir);
4035 	up_write(&trace_event_sem);
4036 
4037 	tr->event_dir = NULL;
4038 
4039 	return 0;
4040 }
4041 
event_trace_memsetup(void)4042 static __init int event_trace_memsetup(void)
4043 {
4044 	field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC);
4045 	file_cachep = KMEM_CACHE(trace_event_file, SLAB_PANIC);
4046 	return 0;
4047 }
4048 
4049 __init void
early_enable_events(struct trace_array * tr,char * buf,bool disable_first)4050 early_enable_events(struct trace_array *tr, char *buf, bool disable_first)
4051 {
4052 	char *token;
4053 	int ret;
4054 
4055 	while (true) {
4056 		token = strsep(&buf, ",");
4057 
4058 		if (!token)
4059 			break;
4060 
4061 		if (*token) {
4062 			/* Restarting syscalls requires that we stop them first */
4063 			if (disable_first)
4064 				ftrace_set_clr_event(tr, token, 0);
4065 
4066 			ret = ftrace_set_clr_event(tr, token, 1);
4067 			if (ret)
4068 				pr_warn("Failed to enable trace event: %s\n", token);
4069 		}
4070 
4071 		/* Put back the comma to allow this to be called again */
4072 		if (buf)
4073 			*(buf - 1) = ',';
4074 	}
4075 }
4076 
event_trace_enable(void)4077 static __init int event_trace_enable(void)
4078 {
4079 	struct trace_array *tr = top_trace_array();
4080 	struct trace_event_call **iter, *call;
4081 	int ret;
4082 
4083 	if (!tr)
4084 		return -ENODEV;
4085 
4086 	for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) {
4087 
4088 		call = *iter;
4089 		ret = event_init(call);
4090 		if (!ret)
4091 			list_add(&call->list, &ftrace_events);
4092 	}
4093 
4094 	register_trigger_cmds();
4095 
4096 	/*
4097 	 * We need the top trace array to have a working set of trace
4098 	 * points at early init, before the debug files and directories
4099 	 * are created. Create the file entries now, and attach them
4100 	 * to the actual file dentries later.
4101 	 */
4102 	__trace_early_add_events(tr);
4103 
4104 	early_enable_events(tr, bootup_event_buf, false);
4105 
4106 	trace_printk_start_comm();
4107 
4108 	register_event_cmds();
4109 
4110 
4111 	return 0;
4112 }
4113 
4114 /*
4115  * event_trace_enable() is called from trace_event_init() first to
4116  * initialize events and perhaps start any events that are on the
4117  * command line. Unfortunately, there are some events that will not
4118  * start this early, like the system call tracepoints that need
4119  * to set the %SYSCALL_WORK_SYSCALL_TRACEPOINT flag of pid 1. But
4120  * event_trace_enable() is called before pid 1 starts, and this flag
4121  * is never set, making the syscall tracepoint never get reached, but
4122  * the event is enabled regardless (and not doing anything).
4123  */
event_trace_enable_again(void)4124 static __init int event_trace_enable_again(void)
4125 {
4126 	struct trace_array *tr;
4127 
4128 	tr = top_trace_array();
4129 	if (!tr)
4130 		return -ENODEV;
4131 
4132 	early_enable_events(tr, bootup_event_buf, true);
4133 
4134 	return 0;
4135 }
4136 
4137 early_initcall(event_trace_enable_again);
4138 
4139 /* Init fields which doesn't related to the tracefs */
event_trace_init_fields(void)4140 static __init int event_trace_init_fields(void)
4141 {
4142 	if (trace_define_generic_fields())
4143 		pr_warn("tracing: Failed to allocated generic fields");
4144 
4145 	if (trace_define_common_fields())
4146 		pr_warn("tracing: Failed to allocate common fields");
4147 
4148 	return 0;
4149 }
4150 
event_trace_init(void)4151 __init int event_trace_init(void)
4152 {
4153 	struct trace_array *tr;
4154 	int ret;
4155 
4156 	tr = top_trace_array();
4157 	if (!tr)
4158 		return -ENODEV;
4159 
4160 	trace_create_file("available_events", TRACE_MODE_READ,
4161 			  NULL, tr, &ftrace_avail_fops);
4162 
4163 	ret = early_event_add_tracer(NULL, tr);
4164 	if (ret)
4165 		return ret;
4166 
4167 #ifdef CONFIG_MODULES
4168 	ret = register_module_notifier(&trace_module_nb);
4169 	if (ret)
4170 		pr_warn("Failed to register trace events module notifier\n");
4171 #endif
4172 
4173 	eventdir_initialized = true;
4174 
4175 	return 0;
4176 }
4177 
trace_event_init(void)4178 void __init trace_event_init(void)
4179 {
4180 	event_trace_memsetup();
4181 	init_ftrace_syscalls();
4182 	event_trace_enable();
4183 	event_trace_init_fields();
4184 }
4185 
4186 #ifdef CONFIG_EVENT_TRACE_STARTUP_TEST
4187 
4188 static DEFINE_SPINLOCK(test_spinlock);
4189 static DEFINE_SPINLOCK(test_spinlock_irq);
4190 static DEFINE_MUTEX(test_mutex);
4191 
test_work(struct work_struct * dummy)4192 static __init void test_work(struct work_struct *dummy)
4193 {
4194 	spin_lock(&test_spinlock);
4195 	spin_lock_irq(&test_spinlock_irq);
4196 	udelay(1);
4197 	spin_unlock_irq(&test_spinlock_irq);
4198 	spin_unlock(&test_spinlock);
4199 
4200 	mutex_lock(&test_mutex);
4201 	msleep(1);
4202 	mutex_unlock(&test_mutex);
4203 }
4204 
event_test_thread(void * unused)4205 static __init int event_test_thread(void *unused)
4206 {
4207 	void *test_malloc;
4208 
4209 	test_malloc = kmalloc(1234, GFP_KERNEL);
4210 	if (!test_malloc)
4211 		pr_info("failed to kmalloc\n");
4212 
4213 	schedule_on_each_cpu(test_work);
4214 
4215 	kfree(test_malloc);
4216 
4217 	set_current_state(TASK_INTERRUPTIBLE);
4218 	while (!kthread_should_stop()) {
4219 		schedule();
4220 		set_current_state(TASK_INTERRUPTIBLE);
4221 	}
4222 	__set_current_state(TASK_RUNNING);
4223 
4224 	return 0;
4225 }
4226 
4227 /*
4228  * Do various things that may trigger events.
4229  */
event_test_stuff(void)4230 static __init void event_test_stuff(void)
4231 {
4232 	struct task_struct *test_thread;
4233 
4234 	test_thread = kthread_run(event_test_thread, NULL, "test-events");
4235 	msleep(1);
4236 	kthread_stop(test_thread);
4237 }
4238 
4239 /*
4240  * For every trace event defined, we will test each trace point separately,
4241  * and then by groups, and finally all trace points.
4242  */
event_trace_self_tests(void)4243 static __init void event_trace_self_tests(void)
4244 {
4245 	struct trace_subsystem_dir *dir;
4246 	struct trace_event_file *file;
4247 	struct trace_event_call *call;
4248 	struct event_subsystem *system;
4249 	struct trace_array *tr;
4250 	int ret;
4251 
4252 	tr = top_trace_array();
4253 	if (!tr)
4254 		return;
4255 
4256 	pr_info("Running tests on trace events:\n");
4257 
4258 	list_for_each_entry(file, &tr->events, list) {
4259 
4260 		call = file->event_call;
4261 
4262 		/* Only test those that have a probe */
4263 		if (!call->class || !call->class->probe)
4264 			continue;
4265 
4266 /*
4267  * Testing syscall events here is pretty useless, but
4268  * we still do it if configured. But this is time consuming.
4269  * What we really need is a user thread to perform the
4270  * syscalls as we test.
4271  */
4272 #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
4273 		if (call->class->system &&
4274 		    strcmp(call->class->system, "syscalls") == 0)
4275 			continue;
4276 #endif
4277 
4278 		pr_info("Testing event %s: ", trace_event_name(call));
4279 
4280 		/*
4281 		 * If an event is already enabled, someone is using
4282 		 * it and the self test should not be on.
4283 		 */
4284 		if (file->flags & EVENT_FILE_FL_ENABLED) {
4285 			pr_warn("Enabled event during self test!\n");
4286 			WARN_ON_ONCE(1);
4287 			continue;
4288 		}
4289 
4290 		ftrace_event_enable_disable(file, 1);
4291 		event_test_stuff();
4292 		ftrace_event_enable_disable(file, 0);
4293 
4294 		pr_cont("OK\n");
4295 	}
4296 
4297 	/* Now test at the sub system level */
4298 
4299 	pr_info("Running tests on trace event systems:\n");
4300 
4301 	list_for_each_entry(dir, &tr->systems, list) {
4302 
4303 		system = dir->subsystem;
4304 
4305 		/* the ftrace system is special, skip it */
4306 		if (strcmp(system->name, "ftrace") == 0)
4307 			continue;
4308 
4309 		pr_info("Testing event system %s: ", system->name);
4310 
4311 		ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1);
4312 		if (WARN_ON_ONCE(ret)) {
4313 			pr_warn("error enabling system %s\n",
4314 				system->name);
4315 			continue;
4316 		}
4317 
4318 		event_test_stuff();
4319 
4320 		ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0);
4321 		if (WARN_ON_ONCE(ret)) {
4322 			pr_warn("error disabling system %s\n",
4323 				system->name);
4324 			continue;
4325 		}
4326 
4327 		pr_cont("OK\n");
4328 	}
4329 
4330 	/* Test with all events enabled */
4331 
4332 	pr_info("Running tests on all trace events:\n");
4333 	pr_info("Testing all events: ");
4334 
4335 	ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1);
4336 	if (WARN_ON_ONCE(ret)) {
4337 		pr_warn("error enabling all events\n");
4338 		return;
4339 	}
4340 
4341 	event_test_stuff();
4342 
4343 	/* reset sysname */
4344 	ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0);
4345 	if (WARN_ON_ONCE(ret)) {
4346 		pr_warn("error disabling all events\n");
4347 		return;
4348 	}
4349 
4350 	pr_cont("OK\n");
4351 }
4352 
4353 #ifdef CONFIG_FUNCTION_TRACER
4354 
4355 static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
4356 
4357 static struct trace_event_file event_trace_file __initdata;
4358 
4359 static void __init
function_test_events_call(unsigned long ip,unsigned long parent_ip,struct ftrace_ops * op,struct ftrace_regs * regs)4360 function_test_events_call(unsigned long ip, unsigned long parent_ip,
4361 			  struct ftrace_ops *op, struct ftrace_regs *regs)
4362 {
4363 	struct trace_buffer *buffer;
4364 	struct ring_buffer_event *event;
4365 	struct ftrace_entry *entry;
4366 	unsigned int trace_ctx;
4367 	long disabled;
4368 	int cpu;
4369 
4370 	trace_ctx = tracing_gen_ctx();
4371 	preempt_disable_notrace();
4372 	cpu = raw_smp_processor_id();
4373 	disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
4374 
4375 	if (disabled != 1)
4376 		goto out;
4377 
4378 	event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file,
4379 						TRACE_FN, sizeof(*entry),
4380 						trace_ctx);
4381 	if (!event)
4382 		goto out;
4383 	entry	= ring_buffer_event_data(event);
4384 	entry->ip			= ip;
4385 	entry->parent_ip		= parent_ip;
4386 
4387 	event_trigger_unlock_commit(&event_trace_file, buffer, event,
4388 				    entry, trace_ctx);
4389  out:
4390 	atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
4391 	preempt_enable_notrace();
4392 }
4393 
4394 static struct ftrace_ops trace_ops __initdata  =
4395 {
4396 	.func = function_test_events_call,
4397 };
4398 
event_trace_self_test_with_function(void)4399 static __init void event_trace_self_test_with_function(void)
4400 {
4401 	int ret;
4402 
4403 	event_trace_file.tr = top_trace_array();
4404 	if (WARN_ON(!event_trace_file.tr))
4405 		return;
4406 
4407 	ret = register_ftrace_function(&trace_ops);
4408 	if (WARN_ON(ret < 0)) {
4409 		pr_info("Failed to enable function tracer for event tests\n");
4410 		return;
4411 	}
4412 	pr_info("Running tests again, along with the function tracer\n");
4413 	event_trace_self_tests();
4414 	unregister_ftrace_function(&trace_ops);
4415 }
4416 #else
event_trace_self_test_with_function(void)4417 static __init void event_trace_self_test_with_function(void)
4418 {
4419 }
4420 #endif
4421 
event_trace_self_tests_init(void)4422 static __init int event_trace_self_tests_init(void)
4423 {
4424 	if (!tracing_selftest_disabled) {
4425 		event_trace_self_tests();
4426 		event_trace_self_test_with_function();
4427 	}
4428 
4429 	return 0;
4430 }
4431 
4432 late_initcall(event_trace_self_tests_init);
4433 
4434 #endif
4435