xref: /openbmc/linux/kernel/trace/trace_fprobe.c (revision 6db6b729)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Fprobe-based tracing events
4  * Copyright (C) 2022 Google LLC.
5  */
6 #define pr_fmt(fmt)	"trace_fprobe: " fmt
7 
8 #include <linux/fprobe.h>
9 #include <linux/module.h>
10 #include <linux/rculist.h>
11 #include <linux/security.h>
12 #include <linux/tracepoint.h>
13 #include <linux/uaccess.h>
14 
15 #include "trace_dynevent.h"
16 #include "trace_probe.h"
17 #include "trace_probe_kernel.h"
18 #include "trace_probe_tmpl.h"
19 
20 #define FPROBE_EVENT_SYSTEM "fprobes"
21 #define TRACEPOINT_EVENT_SYSTEM "tracepoints"
22 #define RETHOOK_MAXACTIVE_MAX 4096
23 
24 static int trace_fprobe_create(const char *raw_command);
25 static int trace_fprobe_show(struct seq_file *m, struct dyn_event *ev);
26 static int trace_fprobe_release(struct dyn_event *ev);
27 static bool trace_fprobe_is_busy(struct dyn_event *ev);
28 static bool trace_fprobe_match(const char *system, const char *event,
29 			int argc, const char **argv, struct dyn_event *ev);
30 
31 static struct dyn_event_operations trace_fprobe_ops = {
32 	.create = trace_fprobe_create,
33 	.show = trace_fprobe_show,
34 	.is_busy = trace_fprobe_is_busy,
35 	.free = trace_fprobe_release,
36 	.match = trace_fprobe_match,
37 };
38 
39 /*
40  * Fprobe event core functions
41  */
42 struct trace_fprobe {
43 	struct dyn_event	devent;
44 	struct fprobe		fp;
45 	const char		*symbol;
46 	struct tracepoint	*tpoint;
47 	struct module		*mod;
48 	struct trace_probe	tp;
49 };
50 
51 static bool is_trace_fprobe(struct dyn_event *ev)
52 {
53 	return ev->ops == &trace_fprobe_ops;
54 }
55 
56 static struct trace_fprobe *to_trace_fprobe(struct dyn_event *ev)
57 {
58 	return container_of(ev, struct trace_fprobe, devent);
59 }
60 
61 /**
62  * for_each_trace_fprobe - iterate over the trace_fprobe list
63  * @pos:	the struct trace_fprobe * for each entry
64  * @dpos:	the struct dyn_event * to use as a loop cursor
65  */
66 #define for_each_trace_fprobe(pos, dpos)	\
67 	for_each_dyn_event(dpos)		\
68 		if (is_trace_fprobe(dpos) && (pos = to_trace_fprobe(dpos)))
69 
70 static bool trace_fprobe_is_return(struct trace_fprobe *tf)
71 {
72 	return tf->fp.exit_handler != NULL;
73 }
74 
75 static bool trace_fprobe_is_tracepoint(struct trace_fprobe *tf)
76 {
77 	return tf->tpoint != NULL;
78 }
79 
80 static const char *trace_fprobe_symbol(struct trace_fprobe *tf)
81 {
82 	return tf->symbol ? tf->symbol : "unknown";
83 }
84 
85 static bool trace_fprobe_is_busy(struct dyn_event *ev)
86 {
87 	struct trace_fprobe *tf = to_trace_fprobe(ev);
88 
89 	return trace_probe_is_enabled(&tf->tp);
90 }
91 
92 static bool trace_fprobe_match_command_head(struct trace_fprobe *tf,
93 					    int argc, const char **argv)
94 {
95 	char buf[MAX_ARGSTR_LEN + 1];
96 
97 	if (!argc)
98 		return true;
99 
100 	snprintf(buf, sizeof(buf), "%s", trace_fprobe_symbol(tf));
101 	if (strcmp(buf, argv[0]))
102 		return false;
103 	argc--; argv++;
104 
105 	return trace_probe_match_command_args(&tf->tp, argc, argv);
106 }
107 
108 static bool trace_fprobe_match(const char *system, const char *event,
109 			int argc, const char **argv, struct dyn_event *ev)
110 {
111 	struct trace_fprobe *tf = to_trace_fprobe(ev);
112 
113 	if (event[0] != '\0' && strcmp(trace_probe_name(&tf->tp), event))
114 		return false;
115 
116 	if (system && strcmp(trace_probe_group_name(&tf->tp), system))
117 		return false;
118 
119 	return trace_fprobe_match_command_head(tf, argc, argv);
120 }
121 
122 static bool trace_fprobe_is_registered(struct trace_fprobe *tf)
123 {
124 	return fprobe_is_registered(&tf->fp);
125 }
126 
127 /*
128  * Note that we don't verify the fetch_insn code, since it does not come
129  * from user space.
130  */
131 static int
132 process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
133 		   void *base)
134 {
135 	struct pt_regs *regs = rec;
136 	unsigned long val;
137 	int ret;
138 
139 retry:
140 	/* 1st stage: get value from context */
141 	switch (code->op) {
142 	case FETCH_OP_STACK:
143 		val = regs_get_kernel_stack_nth(regs, code->param);
144 		break;
145 	case FETCH_OP_STACKP:
146 		val = kernel_stack_pointer(regs);
147 		break;
148 	case FETCH_OP_RETVAL:
149 		val = regs_return_value(regs);
150 		break;
151 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
152 	case FETCH_OP_ARG:
153 		val = regs_get_kernel_argument(regs, code->param);
154 		break;
155 #endif
156 	case FETCH_NOP_SYMBOL:	/* Ignore a place holder */
157 		code++;
158 		goto retry;
159 	default:
160 		ret = process_common_fetch_insn(code, &val);
161 		if (ret < 0)
162 			return ret;
163 	}
164 	code++;
165 
166 	return process_fetch_insn_bottom(code, val, dest, base);
167 }
168 NOKPROBE_SYMBOL(process_fetch_insn)
169 
170 /* function entry handler */
171 static nokprobe_inline void
172 __fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
173 		    struct pt_regs *regs,
174 		    struct trace_event_file *trace_file)
175 {
176 	struct fentry_trace_entry_head *entry;
177 	struct trace_event_call *call = trace_probe_event_call(&tf->tp);
178 	struct trace_event_buffer fbuffer;
179 	int dsize;
180 
181 	if (WARN_ON_ONCE(call != trace_file->event_call))
182 		return;
183 
184 	if (trace_trigger_soft_disabled(trace_file))
185 		return;
186 
187 	dsize = __get_data_size(&tf->tp, regs);
188 
189 	entry = trace_event_buffer_reserve(&fbuffer, trace_file,
190 					   sizeof(*entry) + tf->tp.size + dsize);
191 	if (!entry)
192 		return;
193 
194 	fbuffer.regs = regs;
195 	entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
196 	entry->ip = entry_ip;
197 	store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
198 
199 	trace_event_buffer_commit(&fbuffer);
200 }
201 
202 static void
203 fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
204 		  struct pt_regs *regs)
205 {
206 	struct event_file_link *link;
207 
208 	trace_probe_for_each_link_rcu(link, &tf->tp)
209 		__fentry_trace_func(tf, entry_ip, regs, link->file);
210 }
211 NOKPROBE_SYMBOL(fentry_trace_func);
212 
213 /* Kretprobe handler */
214 static nokprobe_inline void
215 __fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
216 		   unsigned long ret_ip, struct pt_regs *regs,
217 		   struct trace_event_file *trace_file)
218 {
219 	struct fexit_trace_entry_head *entry;
220 	struct trace_event_buffer fbuffer;
221 	struct trace_event_call *call = trace_probe_event_call(&tf->tp);
222 	int dsize;
223 
224 	if (WARN_ON_ONCE(call != trace_file->event_call))
225 		return;
226 
227 	if (trace_trigger_soft_disabled(trace_file))
228 		return;
229 
230 	dsize = __get_data_size(&tf->tp, regs);
231 
232 	entry = trace_event_buffer_reserve(&fbuffer, trace_file,
233 					   sizeof(*entry) + tf->tp.size + dsize);
234 	if (!entry)
235 		return;
236 
237 	fbuffer.regs = regs;
238 	entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
239 	entry->func = entry_ip;
240 	entry->ret_ip = ret_ip;
241 	store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
242 
243 	trace_event_buffer_commit(&fbuffer);
244 }
245 
246 static void
247 fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
248 		 unsigned long ret_ip, struct pt_regs *regs)
249 {
250 	struct event_file_link *link;
251 
252 	trace_probe_for_each_link_rcu(link, &tf->tp)
253 		__fexit_trace_func(tf, entry_ip, ret_ip, regs, link->file);
254 }
255 NOKPROBE_SYMBOL(fexit_trace_func);
256 
257 #ifdef CONFIG_PERF_EVENTS
258 
259 static int fentry_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
260 			    struct pt_regs *regs)
261 {
262 	struct trace_event_call *call = trace_probe_event_call(&tf->tp);
263 	struct fentry_trace_entry_head *entry;
264 	struct hlist_head *head;
265 	int size, __size, dsize;
266 	int rctx;
267 
268 	head = this_cpu_ptr(call->perf_events);
269 	if (hlist_empty(head))
270 		return 0;
271 
272 	dsize = __get_data_size(&tf->tp, regs);
273 	__size = sizeof(*entry) + tf->tp.size + dsize;
274 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
275 	size -= sizeof(u32);
276 
277 	entry = perf_trace_buf_alloc(size, NULL, &rctx);
278 	if (!entry)
279 		return 0;
280 
281 	entry->ip = entry_ip;
282 	memset(&entry[1], 0, dsize);
283 	store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
284 	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
285 			      head, NULL);
286 	return 0;
287 }
288 NOKPROBE_SYMBOL(fentry_perf_func);
289 
290 static void
291 fexit_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
292 		unsigned long ret_ip, struct pt_regs *regs)
293 {
294 	struct trace_event_call *call = trace_probe_event_call(&tf->tp);
295 	struct fexit_trace_entry_head *entry;
296 	struct hlist_head *head;
297 	int size, __size, dsize;
298 	int rctx;
299 
300 	head = this_cpu_ptr(call->perf_events);
301 	if (hlist_empty(head))
302 		return;
303 
304 	dsize = __get_data_size(&tf->tp, regs);
305 	__size = sizeof(*entry) + tf->tp.size + dsize;
306 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
307 	size -= sizeof(u32);
308 
309 	entry = perf_trace_buf_alloc(size, NULL, &rctx);
310 	if (!entry)
311 		return;
312 
313 	entry->func = entry_ip;
314 	entry->ret_ip = ret_ip;
315 	store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
316 	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
317 			      head, NULL);
318 }
319 NOKPROBE_SYMBOL(fexit_perf_func);
320 #endif	/* CONFIG_PERF_EVENTS */
321 
322 static int fentry_dispatcher(struct fprobe *fp, unsigned long entry_ip,
323 			     unsigned long ret_ip, struct pt_regs *regs,
324 			     void *entry_data)
325 {
326 	struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp);
327 	int ret = 0;
328 
329 	if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
330 		fentry_trace_func(tf, entry_ip, regs);
331 #ifdef CONFIG_PERF_EVENTS
332 	if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE))
333 		ret = fentry_perf_func(tf, entry_ip, regs);
334 #endif
335 	return ret;
336 }
337 NOKPROBE_SYMBOL(fentry_dispatcher);
338 
339 static void fexit_dispatcher(struct fprobe *fp, unsigned long entry_ip,
340 			     unsigned long ret_ip, struct pt_regs *regs,
341 			     void *entry_data)
342 {
343 	struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp);
344 
345 	if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
346 		fexit_trace_func(tf, entry_ip, ret_ip, regs);
347 #ifdef CONFIG_PERF_EVENTS
348 	if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE))
349 		fexit_perf_func(tf, entry_ip, ret_ip, regs);
350 #endif
351 }
352 NOKPROBE_SYMBOL(fexit_dispatcher);
353 
354 static void free_trace_fprobe(struct trace_fprobe *tf)
355 {
356 	if (tf) {
357 		trace_probe_cleanup(&tf->tp);
358 		kfree(tf->symbol);
359 		kfree(tf);
360 	}
361 }
362 
363 /*
364  * Allocate new trace_probe and initialize it (including fprobe).
365  */
366 static struct trace_fprobe *alloc_trace_fprobe(const char *group,
367 					       const char *event,
368 					       const char *symbol,
369 					       struct tracepoint *tpoint,
370 					       int maxactive,
371 					       int nargs, bool is_return)
372 {
373 	struct trace_fprobe *tf;
374 	int ret = -ENOMEM;
375 
376 	tf = kzalloc(struct_size(tf, tp.args, nargs), GFP_KERNEL);
377 	if (!tf)
378 		return ERR_PTR(ret);
379 
380 	tf->symbol = kstrdup(symbol, GFP_KERNEL);
381 	if (!tf->symbol)
382 		goto error;
383 
384 	if (is_return)
385 		tf->fp.exit_handler = fexit_dispatcher;
386 	else
387 		tf->fp.entry_handler = fentry_dispatcher;
388 
389 	tf->tpoint = tpoint;
390 	tf->fp.nr_maxactive = maxactive;
391 
392 	ret = trace_probe_init(&tf->tp, event, group, false);
393 	if (ret < 0)
394 		goto error;
395 
396 	dyn_event_init(&tf->devent, &trace_fprobe_ops);
397 	return tf;
398 error:
399 	free_trace_fprobe(tf);
400 	return ERR_PTR(ret);
401 }
402 
403 static struct trace_fprobe *find_trace_fprobe(const char *event,
404 					      const char *group)
405 {
406 	struct dyn_event *pos;
407 	struct trace_fprobe *tf;
408 
409 	for_each_trace_fprobe(tf, pos)
410 		if (strcmp(trace_probe_name(&tf->tp), event) == 0 &&
411 		    strcmp(trace_probe_group_name(&tf->tp), group) == 0)
412 			return tf;
413 	return NULL;
414 }
415 
416 static inline int __enable_trace_fprobe(struct trace_fprobe *tf)
417 {
418 	if (trace_fprobe_is_registered(tf))
419 		enable_fprobe(&tf->fp);
420 
421 	return 0;
422 }
423 
424 static void __disable_trace_fprobe(struct trace_probe *tp)
425 {
426 	struct trace_fprobe *tf;
427 
428 	list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) {
429 		if (!trace_fprobe_is_registered(tf))
430 			continue;
431 		disable_fprobe(&tf->fp);
432 	}
433 }
434 
435 /*
436  * Enable trace_probe
437  * if the file is NULL, enable "perf" handler, or enable "trace" handler.
438  */
439 static int enable_trace_fprobe(struct trace_event_call *call,
440 			       struct trace_event_file *file)
441 {
442 	struct trace_probe *tp;
443 	struct trace_fprobe *tf;
444 	bool enabled;
445 	int ret = 0;
446 
447 	tp = trace_probe_primary_from_call(call);
448 	if (WARN_ON_ONCE(!tp))
449 		return -ENODEV;
450 	enabled = trace_probe_is_enabled(tp);
451 
452 	/* This also changes "enabled" state */
453 	if (file) {
454 		ret = trace_probe_add_file(tp, file);
455 		if (ret)
456 			return ret;
457 	} else
458 		trace_probe_set_flag(tp, TP_FLAG_PROFILE);
459 
460 	if (!enabled) {
461 		list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) {
462 			/* TODO: check the fprobe is gone */
463 			__enable_trace_fprobe(tf);
464 		}
465 	}
466 
467 	return 0;
468 }
469 
470 /*
471  * Disable trace_probe
472  * if the file is NULL, disable "perf" handler, or disable "trace" handler.
473  */
474 static int disable_trace_fprobe(struct trace_event_call *call,
475 				struct trace_event_file *file)
476 {
477 	struct trace_probe *tp;
478 
479 	tp = trace_probe_primary_from_call(call);
480 	if (WARN_ON_ONCE(!tp))
481 		return -ENODEV;
482 
483 	if (file) {
484 		if (!trace_probe_get_file_link(tp, file))
485 			return -ENOENT;
486 		if (!trace_probe_has_single_file(tp))
487 			goto out;
488 		trace_probe_clear_flag(tp, TP_FLAG_TRACE);
489 	} else
490 		trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
491 
492 	if (!trace_probe_is_enabled(tp))
493 		__disable_trace_fprobe(tp);
494 
495  out:
496 	if (file)
497 		/*
498 		 * Synchronization is done in below function. For perf event,
499 		 * file == NULL and perf_trace_event_unreg() calls
500 		 * tracepoint_synchronize_unregister() to ensure synchronize
501 		 * event. We don't need to care about it.
502 		 */
503 		trace_probe_remove_file(tp, file);
504 
505 	return 0;
506 }
507 
508 /* Event entry printers */
509 static enum print_line_t
510 print_fentry_event(struct trace_iterator *iter, int flags,
511 		   struct trace_event *event)
512 {
513 	struct fentry_trace_entry_head *field;
514 	struct trace_seq *s = &iter->seq;
515 	struct trace_probe *tp;
516 
517 	field = (struct fentry_trace_entry_head *)iter->ent;
518 	tp = trace_probe_primary_from_call(
519 		container_of(event, struct trace_event_call, event));
520 	if (WARN_ON_ONCE(!tp))
521 		goto out;
522 
523 	trace_seq_printf(s, "%s: (", trace_probe_name(tp));
524 
525 	if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
526 		goto out;
527 
528 	trace_seq_putc(s, ')');
529 
530 	if (trace_probe_print_args(s, tp->args, tp->nr_args,
531 			     (u8 *)&field[1], field) < 0)
532 		goto out;
533 
534 	trace_seq_putc(s, '\n');
535  out:
536 	return trace_handle_return(s);
537 }
538 
539 static enum print_line_t
540 print_fexit_event(struct trace_iterator *iter, int flags,
541 		  struct trace_event *event)
542 {
543 	struct fexit_trace_entry_head *field;
544 	struct trace_seq *s = &iter->seq;
545 	struct trace_probe *tp;
546 
547 	field = (struct fexit_trace_entry_head *)iter->ent;
548 	tp = trace_probe_primary_from_call(
549 		container_of(event, struct trace_event_call, event));
550 	if (WARN_ON_ONCE(!tp))
551 		goto out;
552 
553 	trace_seq_printf(s, "%s: (", trace_probe_name(tp));
554 
555 	if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
556 		goto out;
557 
558 	trace_seq_puts(s, " <- ");
559 
560 	if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
561 		goto out;
562 
563 	trace_seq_putc(s, ')');
564 
565 	if (trace_probe_print_args(s, tp->args, tp->nr_args,
566 			     (u8 *)&field[1], field) < 0)
567 		goto out;
568 
569 	trace_seq_putc(s, '\n');
570 
571  out:
572 	return trace_handle_return(s);
573 }
574 
575 static int fentry_event_define_fields(struct trace_event_call *event_call)
576 {
577 	int ret;
578 	struct fentry_trace_entry_head field;
579 	struct trace_probe *tp;
580 
581 	tp = trace_probe_primary_from_call(event_call);
582 	if (WARN_ON_ONCE(!tp))
583 		return -ENOENT;
584 
585 	DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
586 
587 	return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
588 }
589 
590 static int fexit_event_define_fields(struct trace_event_call *event_call)
591 {
592 	int ret;
593 	struct fexit_trace_entry_head field;
594 	struct trace_probe *tp;
595 
596 	tp = trace_probe_primary_from_call(event_call);
597 	if (WARN_ON_ONCE(!tp))
598 		return -ENOENT;
599 
600 	DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
601 	DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
602 
603 	return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
604 }
605 
606 static struct trace_event_functions fentry_funcs = {
607 	.trace		= print_fentry_event
608 };
609 
610 static struct trace_event_functions fexit_funcs = {
611 	.trace		= print_fexit_event
612 };
613 
614 static struct trace_event_fields fentry_fields_array[] = {
615 	{ .type = TRACE_FUNCTION_TYPE,
616 	  .define_fields = fentry_event_define_fields },
617 	{}
618 };
619 
620 static struct trace_event_fields fexit_fields_array[] = {
621 	{ .type = TRACE_FUNCTION_TYPE,
622 	  .define_fields = fexit_event_define_fields },
623 	{}
624 };
625 
626 static int fprobe_register(struct trace_event_call *event,
627 			   enum trace_reg type, void *data);
628 
629 static inline void init_trace_event_call(struct trace_fprobe *tf)
630 {
631 	struct trace_event_call *call = trace_probe_event_call(&tf->tp);
632 
633 	if (trace_fprobe_is_return(tf)) {
634 		call->event.funcs = &fexit_funcs;
635 		call->class->fields_array = fexit_fields_array;
636 	} else {
637 		call->event.funcs = &fentry_funcs;
638 		call->class->fields_array = fentry_fields_array;
639 	}
640 
641 	call->flags = TRACE_EVENT_FL_FPROBE;
642 	call->class->reg = fprobe_register;
643 }
644 
645 static int register_fprobe_event(struct trace_fprobe *tf)
646 {
647 	init_trace_event_call(tf);
648 
649 	return trace_probe_register_event_call(&tf->tp);
650 }
651 
652 static int unregister_fprobe_event(struct trace_fprobe *tf)
653 {
654 	return trace_probe_unregister_event_call(&tf->tp);
655 }
656 
657 /* Internal register function - just handle fprobe and flags */
658 static int __register_trace_fprobe(struct trace_fprobe *tf)
659 {
660 	int i, ret;
661 
662 	/* Should we need new LOCKDOWN flag for fprobe? */
663 	ret = security_locked_down(LOCKDOWN_KPROBES);
664 	if (ret)
665 		return ret;
666 
667 	if (trace_fprobe_is_registered(tf))
668 		return -EINVAL;
669 
670 	for (i = 0; i < tf->tp.nr_args; i++) {
671 		ret = traceprobe_update_arg(&tf->tp.args[i]);
672 		if (ret)
673 			return ret;
674 	}
675 
676 	/* Set/clear disabled flag according to tp->flag */
677 	if (trace_probe_is_enabled(&tf->tp))
678 		tf->fp.flags &= ~FPROBE_FL_DISABLED;
679 	else
680 		tf->fp.flags |= FPROBE_FL_DISABLED;
681 
682 	if (trace_fprobe_is_tracepoint(tf)) {
683 		struct tracepoint *tpoint = tf->tpoint;
684 		unsigned long ip = (unsigned long)tpoint->probestub;
685 		/*
686 		 * Here, we do 2 steps to enable fprobe on a tracepoint.
687 		 * At first, put __probestub_##TP function on the tracepoint
688 		 * and put a fprobe on the stub function.
689 		 */
690 		ret = tracepoint_probe_register_prio_may_exist(tpoint,
691 					tpoint->probestub, NULL, 0);
692 		if (ret < 0)
693 			return ret;
694 		return register_fprobe_ips(&tf->fp, &ip, 1);
695 	}
696 
697 	/* TODO: handle filter, nofilter or symbol list */
698 	return register_fprobe(&tf->fp, tf->symbol, NULL);
699 }
700 
701 /* Internal unregister function - just handle fprobe and flags */
702 static void __unregister_trace_fprobe(struct trace_fprobe *tf)
703 {
704 	if (trace_fprobe_is_registered(tf)) {
705 		unregister_fprobe(&tf->fp);
706 		memset(&tf->fp, 0, sizeof(tf->fp));
707 		if (trace_fprobe_is_tracepoint(tf)) {
708 			tracepoint_probe_unregister(tf->tpoint,
709 					tf->tpoint->probestub, NULL);
710 			tf->tpoint = NULL;
711 			tf->mod = NULL;
712 		}
713 	}
714 }
715 
716 /* TODO: make this trace_*probe common function */
717 /* Unregister a trace_probe and probe_event */
718 static int unregister_trace_fprobe(struct trace_fprobe *tf)
719 {
720 	/* If other probes are on the event, just unregister fprobe */
721 	if (trace_probe_has_sibling(&tf->tp))
722 		goto unreg;
723 
724 	/* Enabled event can not be unregistered */
725 	if (trace_probe_is_enabled(&tf->tp))
726 		return -EBUSY;
727 
728 	/* If there's a reference to the dynamic event */
729 	if (trace_event_dyn_busy(trace_probe_event_call(&tf->tp)))
730 		return -EBUSY;
731 
732 	/* Will fail if probe is being used by ftrace or perf */
733 	if (unregister_fprobe_event(tf))
734 		return -EBUSY;
735 
736 unreg:
737 	__unregister_trace_fprobe(tf);
738 	dyn_event_remove(&tf->devent);
739 	trace_probe_unlink(&tf->tp);
740 
741 	return 0;
742 }
743 
744 static bool trace_fprobe_has_same_fprobe(struct trace_fprobe *orig,
745 					 struct trace_fprobe *comp)
746 {
747 	struct trace_probe_event *tpe = orig->tp.event;
748 	int i;
749 
750 	list_for_each_entry(orig, &tpe->probes, tp.list) {
751 		if (strcmp(trace_fprobe_symbol(orig),
752 			   trace_fprobe_symbol(comp)))
753 			continue;
754 
755 		/*
756 		 * trace_probe_compare_arg_type() ensured that nr_args and
757 		 * each argument name and type are same. Let's compare comm.
758 		 */
759 		for (i = 0; i < orig->tp.nr_args; i++) {
760 			if (strcmp(orig->tp.args[i].comm,
761 				   comp->tp.args[i].comm))
762 				break;
763 		}
764 
765 		if (i == orig->tp.nr_args)
766 			return true;
767 	}
768 
769 	return false;
770 }
771 
772 static int append_trace_fprobe(struct trace_fprobe *tf, struct trace_fprobe *to)
773 {
774 	int ret;
775 
776 	if (trace_fprobe_is_return(tf) != trace_fprobe_is_return(to) ||
777 	    trace_fprobe_is_tracepoint(tf) != trace_fprobe_is_tracepoint(to)) {
778 		trace_probe_log_set_index(0);
779 		trace_probe_log_err(0, DIFF_PROBE_TYPE);
780 		return -EEXIST;
781 	}
782 	ret = trace_probe_compare_arg_type(&tf->tp, &to->tp);
783 	if (ret) {
784 		/* Note that argument starts index = 2 */
785 		trace_probe_log_set_index(ret + 1);
786 		trace_probe_log_err(0, DIFF_ARG_TYPE);
787 		return -EEXIST;
788 	}
789 	if (trace_fprobe_has_same_fprobe(to, tf)) {
790 		trace_probe_log_set_index(0);
791 		trace_probe_log_err(0, SAME_PROBE);
792 		return -EEXIST;
793 	}
794 
795 	/* Append to existing event */
796 	ret = trace_probe_append(&tf->tp, &to->tp);
797 	if (ret)
798 		return ret;
799 
800 	ret = __register_trace_fprobe(tf);
801 	if (ret)
802 		trace_probe_unlink(&tf->tp);
803 	else
804 		dyn_event_add(&tf->devent, trace_probe_event_call(&tf->tp));
805 
806 	return ret;
807 }
808 
809 /* Register a trace_probe and probe_event */
810 static int register_trace_fprobe(struct trace_fprobe *tf)
811 {
812 	struct trace_fprobe *old_tf;
813 	int ret;
814 
815 	mutex_lock(&event_mutex);
816 
817 	old_tf = find_trace_fprobe(trace_probe_name(&tf->tp),
818 				   trace_probe_group_name(&tf->tp));
819 	if (old_tf) {
820 		ret = append_trace_fprobe(tf, old_tf);
821 		goto end;
822 	}
823 
824 	/* Register new event */
825 	ret = register_fprobe_event(tf);
826 	if (ret) {
827 		if (ret == -EEXIST) {
828 			trace_probe_log_set_index(0);
829 			trace_probe_log_err(0, EVENT_EXIST);
830 		} else
831 			pr_warn("Failed to register probe event(%d)\n", ret);
832 		goto end;
833 	}
834 
835 	/* Register fprobe */
836 	ret = __register_trace_fprobe(tf);
837 	if (ret < 0)
838 		unregister_fprobe_event(tf);
839 	else
840 		dyn_event_add(&tf->devent, trace_probe_event_call(&tf->tp));
841 
842 end:
843 	mutex_unlock(&event_mutex);
844 	return ret;
845 }
846 
847 #ifdef CONFIG_MODULES
848 static int __tracepoint_probe_module_cb(struct notifier_block *self,
849 					unsigned long val, void *data)
850 {
851 	struct tp_module *tp_mod = data;
852 	struct trace_fprobe *tf;
853 	struct dyn_event *pos;
854 
855 	if (val != MODULE_STATE_GOING)
856 		return NOTIFY_DONE;
857 
858 	mutex_lock(&event_mutex);
859 	for_each_trace_fprobe(tf, pos) {
860 		if (tp_mod->mod == tf->mod) {
861 			tracepoint_probe_unregister(tf->tpoint,
862 					tf->tpoint->probestub, NULL);
863 			tf->tpoint = NULL;
864 			tf->mod = NULL;
865 		}
866 	}
867 	mutex_unlock(&event_mutex);
868 
869 	return NOTIFY_DONE;
870 }
871 
872 static struct notifier_block tracepoint_module_nb = {
873 	.notifier_call = __tracepoint_probe_module_cb,
874 };
875 #endif /* CONFIG_MODULES */
876 
877 struct __find_tracepoint_cb_data {
878 	const char *tp_name;
879 	struct tracepoint *tpoint;
880 };
881 
882 static void __find_tracepoint_cb(struct tracepoint *tp, void *priv)
883 {
884 	struct __find_tracepoint_cb_data *data = priv;
885 
886 	if (!data->tpoint && !strcmp(data->tp_name, tp->name))
887 		data->tpoint = tp;
888 }
889 
890 static struct tracepoint *find_tracepoint(const char *tp_name)
891 {
892 	struct __find_tracepoint_cb_data data = {
893 		.tp_name = tp_name,
894 	};
895 
896 	for_each_kernel_tracepoint(__find_tracepoint_cb, &data);
897 
898 	return data.tpoint;
899 }
900 
901 static int parse_symbol_and_return(int argc, const char *argv[],
902 				   char **symbol, bool *is_return,
903 				   bool is_tracepoint)
904 {
905 	char *tmp = strchr(argv[1], '%');
906 	int i;
907 
908 	if (tmp) {
909 		int len = tmp - argv[1];
910 
911 		if (!is_tracepoint && !strcmp(tmp, "%return")) {
912 			*is_return = true;
913 		} else {
914 			trace_probe_log_err(len, BAD_ADDR_SUFFIX);
915 			return -EINVAL;
916 		}
917 		*symbol = kmemdup_nul(argv[1], len, GFP_KERNEL);
918 	} else
919 		*symbol = kstrdup(argv[1], GFP_KERNEL);
920 	if (!*symbol)
921 		return -ENOMEM;
922 
923 	if (*is_return)
924 		return 0;
925 
926 	/* If there is $retval, this should be a return fprobe. */
927 	for (i = 2; i < argc; i++) {
928 		tmp = strstr(argv[i], "$retval");
929 		if (tmp && !isalnum(tmp[7]) && tmp[7] != '_') {
930 			*is_return = true;
931 			/*
932 			 * NOTE: Don't check is_tracepoint here, because it will
933 			 * be checked when the argument is parsed.
934 			 */
935 			break;
936 		}
937 	}
938 	return 0;
939 }
940 
941 static int __trace_fprobe_create(int argc, const char *argv[])
942 {
943 	/*
944 	 * Argument syntax:
945 	 *  - Add fentry probe:
946 	 *      f[:[GRP/][EVENT]] [MOD:]KSYM [FETCHARGS]
947 	 *  - Add fexit probe:
948 	 *      f[N][:[GRP/][EVENT]] [MOD:]KSYM%return [FETCHARGS]
949 	 *  - Add tracepoint probe:
950 	 *      t[:[GRP/][EVENT]] TRACEPOINT [FETCHARGS]
951 	 *
952 	 * Fetch args:
953 	 *  $retval	: fetch return value
954 	 *  $stack	: fetch stack address
955 	 *  $stackN	: fetch Nth entry of stack (N:0-)
956 	 *  $argN	: fetch Nth argument (N:1-)
957 	 *  $comm       : fetch current task comm
958 	 *  @ADDR	: fetch memory at ADDR (ADDR should be in kernel)
959 	 *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
960 	 * Dereferencing memory fetch:
961 	 *  +|-offs(ARG) : fetch memory at ARG +|- offs address.
962 	 * Alias name of args:
963 	 *  NAME=FETCHARG : set NAME as alias of FETCHARG.
964 	 * Type of args:
965 	 *  FETCHARG:TYPE : use TYPE instead of unsigned long.
966 	 */
967 	struct trace_fprobe *tf = NULL;
968 	int i, len, new_argc = 0, ret = 0;
969 	bool is_return = false;
970 	char *symbol = NULL;
971 	const char *event = NULL, *group = FPROBE_EVENT_SYSTEM;
972 	const char **new_argv = NULL;
973 	int maxactive = 0;
974 	char buf[MAX_EVENT_NAME_LEN];
975 	char gbuf[MAX_EVENT_NAME_LEN];
976 	char sbuf[KSYM_NAME_LEN];
977 	char abuf[MAX_BTF_ARGS_LEN];
978 	bool is_tracepoint = false;
979 	struct tracepoint *tpoint = NULL;
980 	struct traceprobe_parse_context ctx = {
981 		.flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE,
982 	};
983 
984 	if ((argv[0][0] != 'f' && argv[0][0] != 't') || argc < 2)
985 		return -ECANCELED;
986 
987 	if (argv[0][0] == 't') {
988 		is_tracepoint = true;
989 		group = TRACEPOINT_EVENT_SYSTEM;
990 	}
991 
992 	trace_probe_log_init("trace_fprobe", argc, argv);
993 
994 	event = strchr(&argv[0][1], ':');
995 	if (event)
996 		event++;
997 
998 	if (isdigit(argv[0][1])) {
999 		if (event)
1000 			len = event - &argv[0][1] - 1;
1001 		else
1002 			len = strlen(&argv[0][1]);
1003 		if (len > MAX_EVENT_NAME_LEN - 1) {
1004 			trace_probe_log_err(1, BAD_MAXACT);
1005 			goto parse_error;
1006 		}
1007 		memcpy(buf, &argv[0][1], len);
1008 		buf[len] = '\0';
1009 		ret = kstrtouint(buf, 0, &maxactive);
1010 		if (ret || !maxactive) {
1011 			trace_probe_log_err(1, BAD_MAXACT);
1012 			goto parse_error;
1013 		}
1014 		/* fprobe rethook instances are iterated over via a list. The
1015 		 * maximum should stay reasonable.
1016 		 */
1017 		if (maxactive > RETHOOK_MAXACTIVE_MAX) {
1018 			trace_probe_log_err(1, MAXACT_TOO_BIG);
1019 			goto parse_error;
1020 		}
1021 	}
1022 
1023 	trace_probe_log_set_index(1);
1024 
1025 	/* a symbol(or tracepoint) must be specified */
1026 	ret = parse_symbol_and_return(argc, argv, &symbol, &is_return, is_tracepoint);
1027 	if (ret < 0)
1028 		goto parse_error;
1029 
1030 	if (!is_return && maxactive) {
1031 		trace_probe_log_set_index(0);
1032 		trace_probe_log_err(1, BAD_MAXACT_TYPE);
1033 		goto parse_error;
1034 	}
1035 
1036 	trace_probe_log_set_index(0);
1037 	if (event) {
1038 		ret = traceprobe_parse_event_name(&event, &group, gbuf,
1039 						  event - argv[0]);
1040 		if (ret)
1041 			goto parse_error;
1042 	}
1043 
1044 	if (!event) {
1045 		/* Make a new event name */
1046 		if (is_tracepoint)
1047 			snprintf(buf, MAX_EVENT_NAME_LEN, "%s%s",
1048 				 isdigit(*symbol) ? "_" : "", symbol);
1049 		else
1050 			snprintf(buf, MAX_EVENT_NAME_LEN, "%s__%s", symbol,
1051 				 is_return ? "exit" : "entry");
1052 		sanitize_event_name(buf);
1053 		event = buf;
1054 	}
1055 
1056 	if (is_return)
1057 		ctx.flags |= TPARG_FL_RETURN;
1058 	else
1059 		ctx.flags |= TPARG_FL_FENTRY;
1060 
1061 	if (is_tracepoint) {
1062 		ctx.flags |= TPARG_FL_TPOINT;
1063 		tpoint = find_tracepoint(symbol);
1064 		if (!tpoint) {
1065 			trace_probe_log_set_index(1);
1066 			trace_probe_log_err(0, NO_TRACEPOINT);
1067 			goto parse_error;
1068 		}
1069 		ctx.funcname = kallsyms_lookup(
1070 				(unsigned long)tpoint->probestub,
1071 				NULL, NULL, NULL, sbuf);
1072 	} else
1073 		ctx.funcname = symbol;
1074 
1075 	argc -= 2; argv += 2;
1076 	new_argv = traceprobe_expand_meta_args(argc, argv, &new_argc,
1077 					       abuf, MAX_BTF_ARGS_LEN, &ctx);
1078 	if (IS_ERR(new_argv)) {
1079 		ret = PTR_ERR(new_argv);
1080 		new_argv = NULL;
1081 		goto out;
1082 	}
1083 	if (new_argv) {
1084 		argc = new_argc;
1085 		argv = new_argv;
1086 	}
1087 
1088 	/* setup a probe */
1089 	tf = alloc_trace_fprobe(group, event, symbol, tpoint, maxactive,
1090 				argc, is_return);
1091 	if (IS_ERR(tf)) {
1092 		ret = PTR_ERR(tf);
1093 		/* This must return -ENOMEM, else there is a bug */
1094 		WARN_ON_ONCE(ret != -ENOMEM);
1095 		goto out;	/* We know tf is not allocated */
1096 	}
1097 
1098 	if (is_tracepoint)
1099 		tf->mod = __module_text_address(
1100 				(unsigned long)tf->tpoint->probestub);
1101 
1102 	/* parse arguments */
1103 	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
1104 		trace_probe_log_set_index(i + 2);
1105 		ctx.offset = 0;
1106 		ret = traceprobe_parse_probe_arg(&tf->tp, i, argv[i], &ctx);
1107 		if (ret)
1108 			goto error;	/* This can be -ENOMEM */
1109 	}
1110 
1111 	ret = traceprobe_set_print_fmt(&tf->tp,
1112 			is_return ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL);
1113 	if (ret < 0)
1114 		goto error;
1115 
1116 	ret = register_trace_fprobe(tf);
1117 	if (ret) {
1118 		trace_probe_log_set_index(1);
1119 		if (ret == -EILSEQ)
1120 			trace_probe_log_err(0, BAD_INSN_BNDRY);
1121 		else if (ret == -ENOENT)
1122 			trace_probe_log_err(0, BAD_PROBE_ADDR);
1123 		else if (ret != -ENOMEM && ret != -EEXIST)
1124 			trace_probe_log_err(0, FAIL_REG_PROBE);
1125 		goto error;
1126 	}
1127 
1128 out:
1129 	traceprobe_finish_parse(&ctx);
1130 	trace_probe_log_clear();
1131 	kfree(new_argv);
1132 	kfree(symbol);
1133 	return ret;
1134 
1135 parse_error:
1136 	ret = -EINVAL;
1137 error:
1138 	free_trace_fprobe(tf);
1139 	goto out;
1140 }
1141 
1142 static int trace_fprobe_create(const char *raw_command)
1143 {
1144 	return trace_probe_create(raw_command, __trace_fprobe_create);
1145 }
1146 
1147 static int trace_fprobe_release(struct dyn_event *ev)
1148 {
1149 	struct trace_fprobe *tf = to_trace_fprobe(ev);
1150 	int ret = unregister_trace_fprobe(tf);
1151 
1152 	if (!ret)
1153 		free_trace_fprobe(tf);
1154 	return ret;
1155 }
1156 
1157 static int trace_fprobe_show(struct seq_file *m, struct dyn_event *ev)
1158 {
1159 	struct trace_fprobe *tf = to_trace_fprobe(ev);
1160 	int i;
1161 
1162 	if (trace_fprobe_is_tracepoint(tf))
1163 		seq_putc(m, 't');
1164 	else
1165 		seq_putc(m, 'f');
1166 	if (trace_fprobe_is_return(tf) && tf->fp.nr_maxactive)
1167 		seq_printf(m, "%d", tf->fp.nr_maxactive);
1168 	seq_printf(m, ":%s/%s", trace_probe_group_name(&tf->tp),
1169 				trace_probe_name(&tf->tp));
1170 
1171 	seq_printf(m, " %s%s", trace_fprobe_symbol(tf),
1172 			       trace_fprobe_is_return(tf) ? "%return" : "");
1173 
1174 	for (i = 0; i < tf->tp.nr_args; i++)
1175 		seq_printf(m, " %s=%s", tf->tp.args[i].name, tf->tp.args[i].comm);
1176 	seq_putc(m, '\n');
1177 
1178 	return 0;
1179 }
1180 
1181 /*
1182  * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex.
1183  */
1184 static int fprobe_register(struct trace_event_call *event,
1185 			   enum trace_reg type, void *data)
1186 {
1187 	struct trace_event_file *file = data;
1188 
1189 	switch (type) {
1190 	case TRACE_REG_REGISTER:
1191 		return enable_trace_fprobe(event, file);
1192 	case TRACE_REG_UNREGISTER:
1193 		return disable_trace_fprobe(event, file);
1194 
1195 #ifdef CONFIG_PERF_EVENTS
1196 	case TRACE_REG_PERF_REGISTER:
1197 		return enable_trace_fprobe(event, NULL);
1198 	case TRACE_REG_PERF_UNREGISTER:
1199 		return disable_trace_fprobe(event, NULL);
1200 	case TRACE_REG_PERF_OPEN:
1201 	case TRACE_REG_PERF_CLOSE:
1202 	case TRACE_REG_PERF_ADD:
1203 	case TRACE_REG_PERF_DEL:
1204 		return 0;
1205 #endif
1206 	}
1207 	return 0;
1208 }
1209 
1210 /*
1211  * Register dynevent at core_initcall. This allows kernel to setup fprobe
1212  * events in postcore_initcall without tracefs.
1213  */
1214 static __init int init_fprobe_trace_early(void)
1215 {
1216 	int ret;
1217 
1218 	ret = dyn_event_register(&trace_fprobe_ops);
1219 	if (ret)
1220 		return ret;
1221 
1222 #ifdef CONFIG_MODULES
1223 	ret = register_tracepoint_module_notifier(&tracepoint_module_nb);
1224 	if (ret)
1225 		return ret;
1226 #endif
1227 
1228 	return 0;
1229 }
1230 core_initcall(init_fprobe_trace_early);
1231