xref: /openbmc/linux/kernel/trace/trace_kprobe.c (revision 068ac0db)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Kprobes-based tracing events
4  *
5  * Created by Masami Hiramatsu <mhiramat@redhat.com>
6  *
7  */
8 #define pr_fmt(fmt)	"trace_kprobe: " fmt
9 
10 #include <linux/security.h>
11 #include <linux/module.h>
12 #include <linux/uaccess.h>
13 #include <linux/rculist.h>
14 #include <linux/error-injection.h>
15 
16 #include <asm/setup.h>  /* for COMMAND_LINE_SIZE */
17 
18 #include "trace_dynevent.h"
19 #include "trace_kprobe_selftest.h"
20 #include "trace_probe.h"
21 #include "trace_probe_tmpl.h"
22 
23 #define KPROBE_EVENT_SYSTEM "kprobes"
24 #define KRETPROBE_MAXACTIVE_MAX 4096
25 #define MAX_KPROBE_CMDLINE_SIZE 1024
26 
27 /* Kprobe early definition from command line */
28 static char kprobe_boot_events_buf[COMMAND_LINE_SIZE] __initdata;
29 static bool kprobe_boot_events_enabled __initdata;
30 
31 static int __init set_kprobe_boot_events(char *str)
32 {
33 	strlcpy(kprobe_boot_events_buf, str, COMMAND_LINE_SIZE);
34 	return 0;
35 }
36 __setup("kprobe_event=", set_kprobe_boot_events);
37 
38 static int trace_kprobe_create(int argc, const char **argv);
39 static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev);
40 static int trace_kprobe_release(struct dyn_event *ev);
41 static bool trace_kprobe_is_busy(struct dyn_event *ev);
42 static bool trace_kprobe_match(const char *system, const char *event,
43 			int argc, const char **argv, struct dyn_event *ev);
44 
45 static struct dyn_event_operations trace_kprobe_ops = {
46 	.create = trace_kprobe_create,
47 	.show = trace_kprobe_show,
48 	.is_busy = trace_kprobe_is_busy,
49 	.free = trace_kprobe_release,
50 	.match = trace_kprobe_match,
51 };
52 
53 /*
54  * Kprobe event core functions
55  */
56 struct trace_kprobe {
57 	struct dyn_event	devent;
58 	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
59 	unsigned long __percpu *nhit;
60 	const char		*symbol;	/* symbol name */
61 	struct trace_probe	tp;
62 };
63 
64 static bool is_trace_kprobe(struct dyn_event *ev)
65 {
66 	return ev->ops == &trace_kprobe_ops;
67 }
68 
69 static struct trace_kprobe *to_trace_kprobe(struct dyn_event *ev)
70 {
71 	return container_of(ev, struct trace_kprobe, devent);
72 }
73 
74 /**
75  * for_each_trace_kprobe - iterate over the trace_kprobe list
76  * @pos:	the struct trace_kprobe * for each entry
77  * @dpos:	the struct dyn_event * to use as a loop cursor
78  */
79 #define for_each_trace_kprobe(pos, dpos)	\
80 	for_each_dyn_event(dpos)		\
81 		if (is_trace_kprobe(dpos) && (pos = to_trace_kprobe(dpos)))
82 
83 #define SIZEOF_TRACE_KPROBE(n)				\
84 	(offsetof(struct trace_kprobe, tp.args) +	\
85 	(sizeof(struct probe_arg) * (n)))
86 
87 static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
88 {
89 	return tk->rp.handler != NULL;
90 }
91 
92 static nokprobe_inline const char *trace_kprobe_symbol(struct trace_kprobe *tk)
93 {
94 	return tk->symbol ? tk->symbol : "unknown";
95 }
96 
97 static nokprobe_inline unsigned long trace_kprobe_offset(struct trace_kprobe *tk)
98 {
99 	return tk->rp.kp.offset;
100 }
101 
102 static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
103 {
104 	return !!(kprobe_gone(&tk->rp.kp));
105 }
106 
107 static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
108 						 struct module *mod)
109 {
110 	int len = strlen(mod->name);
111 	const char *name = trace_kprobe_symbol(tk);
112 	return strncmp(mod->name, name, len) == 0 && name[len] == ':';
113 }
114 
115 static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
116 {
117 	char *p;
118 	bool ret;
119 
120 	if (!tk->symbol)
121 		return false;
122 	p = strchr(tk->symbol, ':');
123 	if (!p)
124 		return true;
125 	*p = '\0';
126 	mutex_lock(&module_mutex);
127 	ret = !!find_module(tk->symbol);
128 	mutex_unlock(&module_mutex);
129 	*p = ':';
130 
131 	return ret;
132 }
133 
134 static bool trace_kprobe_is_busy(struct dyn_event *ev)
135 {
136 	struct trace_kprobe *tk = to_trace_kprobe(ev);
137 
138 	return trace_probe_is_enabled(&tk->tp);
139 }
140 
141 static bool trace_kprobe_match_command_head(struct trace_kprobe *tk,
142 					    int argc, const char **argv)
143 {
144 	char buf[MAX_ARGSTR_LEN + 1];
145 
146 	if (!argc)
147 		return true;
148 
149 	if (!tk->symbol)
150 		snprintf(buf, sizeof(buf), "0x%p", tk->rp.kp.addr);
151 	else if (tk->rp.kp.offset)
152 		snprintf(buf, sizeof(buf), "%s+%u",
153 			 trace_kprobe_symbol(tk), tk->rp.kp.offset);
154 	else
155 		snprintf(buf, sizeof(buf), "%s", trace_kprobe_symbol(tk));
156 	if (strcmp(buf, argv[0]))
157 		return false;
158 	argc--; argv++;
159 
160 	return trace_probe_match_command_args(&tk->tp, argc, argv);
161 }
162 
163 static bool trace_kprobe_match(const char *system, const char *event,
164 			int argc, const char **argv, struct dyn_event *ev)
165 {
166 	struct trace_kprobe *tk = to_trace_kprobe(ev);
167 
168 	return strcmp(trace_probe_name(&tk->tp), event) == 0 &&
169 	    (!system || strcmp(trace_probe_group_name(&tk->tp), system) == 0) &&
170 	    trace_kprobe_match_command_head(tk, argc, argv);
171 }
172 
173 static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
174 {
175 	unsigned long nhit = 0;
176 	int cpu;
177 
178 	for_each_possible_cpu(cpu)
179 		nhit += *per_cpu_ptr(tk->nhit, cpu);
180 
181 	return nhit;
182 }
183 
184 static nokprobe_inline bool trace_kprobe_is_registered(struct trace_kprobe *tk)
185 {
186 	return !(list_empty(&tk->rp.kp.list) &&
187 		 hlist_unhashed(&tk->rp.kp.hlist));
188 }
189 
190 /* Return 0 if it fails to find the symbol address */
191 static nokprobe_inline
192 unsigned long trace_kprobe_address(struct trace_kprobe *tk)
193 {
194 	unsigned long addr;
195 
196 	if (tk->symbol) {
197 		addr = (unsigned long)
198 			kallsyms_lookup_name(trace_kprobe_symbol(tk));
199 		if (addr)
200 			addr += tk->rp.kp.offset;
201 	} else {
202 		addr = (unsigned long)tk->rp.kp.addr;
203 	}
204 	return addr;
205 }
206 
207 static nokprobe_inline struct trace_kprobe *
208 trace_kprobe_primary_from_call(struct trace_event_call *call)
209 {
210 	struct trace_probe *tp;
211 
212 	tp = trace_probe_primary_from_call(call);
213 	if (WARN_ON_ONCE(!tp))
214 		return NULL;
215 
216 	return container_of(tp, struct trace_kprobe, tp);
217 }
218 
219 bool trace_kprobe_on_func_entry(struct trace_event_call *call)
220 {
221 	struct trace_kprobe *tk = trace_kprobe_primary_from_call(call);
222 
223 	return tk ? kprobe_on_func_entry(tk->rp.kp.addr,
224 			tk->rp.kp.addr ? NULL : tk->rp.kp.symbol_name,
225 			tk->rp.kp.addr ? 0 : tk->rp.kp.offset) : false;
226 }
227 
228 bool trace_kprobe_error_injectable(struct trace_event_call *call)
229 {
230 	struct trace_kprobe *tk = trace_kprobe_primary_from_call(call);
231 
232 	return tk ? within_error_injection_list(trace_kprobe_address(tk)) :
233 	       false;
234 }
235 
236 static int register_kprobe_event(struct trace_kprobe *tk);
237 static int unregister_kprobe_event(struct trace_kprobe *tk);
238 
239 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
240 static int kretprobe_dispatcher(struct kretprobe_instance *ri,
241 				struct pt_regs *regs);
242 
243 static void free_trace_kprobe(struct trace_kprobe *tk)
244 {
245 	if (tk) {
246 		trace_probe_cleanup(&tk->tp);
247 		kfree(tk->symbol);
248 		free_percpu(tk->nhit);
249 		kfree(tk);
250 	}
251 }
252 
253 /*
254  * Allocate new trace_probe and initialize it (including kprobes).
255  */
256 static struct trace_kprobe *alloc_trace_kprobe(const char *group,
257 					     const char *event,
258 					     void *addr,
259 					     const char *symbol,
260 					     unsigned long offs,
261 					     int maxactive,
262 					     int nargs, bool is_return)
263 {
264 	struct trace_kprobe *tk;
265 	int ret = -ENOMEM;
266 
267 	tk = kzalloc(SIZEOF_TRACE_KPROBE(nargs), GFP_KERNEL);
268 	if (!tk)
269 		return ERR_PTR(ret);
270 
271 	tk->nhit = alloc_percpu(unsigned long);
272 	if (!tk->nhit)
273 		goto error;
274 
275 	if (symbol) {
276 		tk->symbol = kstrdup(symbol, GFP_KERNEL);
277 		if (!tk->symbol)
278 			goto error;
279 		tk->rp.kp.symbol_name = tk->symbol;
280 		tk->rp.kp.offset = offs;
281 	} else
282 		tk->rp.kp.addr = addr;
283 
284 	if (is_return)
285 		tk->rp.handler = kretprobe_dispatcher;
286 	else
287 		tk->rp.kp.pre_handler = kprobe_dispatcher;
288 
289 	tk->rp.maxactive = maxactive;
290 	INIT_HLIST_NODE(&tk->rp.kp.hlist);
291 	INIT_LIST_HEAD(&tk->rp.kp.list);
292 
293 	ret = trace_probe_init(&tk->tp, event, group);
294 	if (ret < 0)
295 		goto error;
296 
297 	dyn_event_init(&tk->devent, &trace_kprobe_ops);
298 	return tk;
299 error:
300 	free_trace_kprobe(tk);
301 	return ERR_PTR(ret);
302 }
303 
304 static struct trace_kprobe *find_trace_kprobe(const char *event,
305 					      const char *group)
306 {
307 	struct dyn_event *pos;
308 	struct trace_kprobe *tk;
309 
310 	for_each_trace_kprobe(tk, pos)
311 		if (strcmp(trace_probe_name(&tk->tp), event) == 0 &&
312 		    strcmp(trace_probe_group_name(&tk->tp), group) == 0)
313 			return tk;
314 	return NULL;
315 }
316 
317 static inline int __enable_trace_kprobe(struct trace_kprobe *tk)
318 {
319 	int ret = 0;
320 
321 	if (trace_kprobe_is_registered(tk) && !trace_kprobe_has_gone(tk)) {
322 		if (trace_kprobe_is_return(tk))
323 			ret = enable_kretprobe(&tk->rp);
324 		else
325 			ret = enable_kprobe(&tk->rp.kp);
326 	}
327 
328 	return ret;
329 }
330 
331 static void __disable_trace_kprobe(struct trace_probe *tp)
332 {
333 	struct trace_probe *pos;
334 	struct trace_kprobe *tk;
335 
336 	list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
337 		tk = container_of(pos, struct trace_kprobe, tp);
338 		if (!trace_kprobe_is_registered(tk))
339 			continue;
340 		if (trace_kprobe_is_return(tk))
341 			disable_kretprobe(&tk->rp);
342 		else
343 			disable_kprobe(&tk->rp.kp);
344 	}
345 }
346 
347 /*
348  * Enable trace_probe
349  * if the file is NULL, enable "perf" handler, or enable "trace" handler.
350  */
351 static int enable_trace_kprobe(struct trace_event_call *call,
352 				struct trace_event_file *file)
353 {
354 	struct trace_probe *pos, *tp;
355 	struct trace_kprobe *tk;
356 	bool enabled;
357 	int ret = 0;
358 
359 	tp = trace_probe_primary_from_call(call);
360 	if (WARN_ON_ONCE(!tp))
361 		return -ENODEV;
362 	enabled = trace_probe_is_enabled(tp);
363 
364 	/* This also changes "enabled" state */
365 	if (file) {
366 		ret = trace_probe_add_file(tp, file);
367 		if (ret)
368 			return ret;
369 	} else
370 		trace_probe_set_flag(tp, TP_FLAG_PROFILE);
371 
372 	if (enabled)
373 		return 0;
374 
375 	list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
376 		tk = container_of(pos, struct trace_kprobe, tp);
377 		if (trace_kprobe_has_gone(tk))
378 			continue;
379 		ret = __enable_trace_kprobe(tk);
380 		if (ret)
381 			break;
382 		enabled = true;
383 	}
384 
385 	if (ret) {
386 		/* Failed to enable one of them. Roll back all */
387 		if (enabled)
388 			__disable_trace_kprobe(tp);
389 		if (file)
390 			trace_probe_remove_file(tp, file);
391 		else
392 			trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
393 	}
394 
395 	return ret;
396 }
397 
398 /*
399  * Disable trace_probe
400  * if the file is NULL, disable "perf" handler, or disable "trace" handler.
401  */
402 static int disable_trace_kprobe(struct trace_event_call *call,
403 				struct trace_event_file *file)
404 {
405 	struct trace_probe *tp;
406 
407 	tp = trace_probe_primary_from_call(call);
408 	if (WARN_ON_ONCE(!tp))
409 		return -ENODEV;
410 
411 	if (file) {
412 		if (!trace_probe_get_file_link(tp, file))
413 			return -ENOENT;
414 		if (!trace_probe_has_single_file(tp))
415 			goto out;
416 		trace_probe_clear_flag(tp, TP_FLAG_TRACE);
417 	} else
418 		trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
419 
420 	if (!trace_probe_is_enabled(tp))
421 		__disable_trace_kprobe(tp);
422 
423  out:
424 	if (file)
425 		/*
426 		 * Synchronization is done in below function. For perf event,
427 		 * file == NULL and perf_trace_event_unreg() calls
428 		 * tracepoint_synchronize_unregister() to ensure synchronize
429 		 * event. We don't need to care about it.
430 		 */
431 		trace_probe_remove_file(tp, file);
432 
433 	return 0;
434 }
435 
436 #if defined(CONFIG_KPROBES_ON_FTRACE) && \
437 	!defined(CONFIG_KPROBE_EVENTS_ON_NOTRACE)
438 static bool __within_notrace_func(unsigned long addr)
439 {
440 	unsigned long offset, size;
441 
442 	if (!addr || !kallsyms_lookup_size_offset(addr, &size, &offset))
443 		return false;
444 
445 	/* Get the entry address of the target function */
446 	addr -= offset;
447 
448 	/*
449 	 * Since ftrace_location_range() does inclusive range check, we need
450 	 * to subtract 1 byte from the end address.
451 	 */
452 	return !ftrace_location_range(addr, addr + size - 1);
453 }
454 
455 static bool within_notrace_func(struct trace_kprobe *tk)
456 {
457 	unsigned long addr = addr = trace_kprobe_address(tk);
458 	char symname[KSYM_NAME_LEN], *p;
459 
460 	if (!__within_notrace_func(addr))
461 		return false;
462 
463 	/* Check if the address is on a suffixed-symbol */
464 	if (!lookup_symbol_name(addr, symname)) {
465 		p = strchr(symname, '.');
466 		if (!p)
467 			return true;
468 		*p = '\0';
469 		addr = (unsigned long)kprobe_lookup_name(symname, 0);
470 		if (addr)
471 			return __within_notrace_func(addr);
472 	}
473 
474 	return true;
475 }
476 #else
477 #define within_notrace_func(tk)	(false)
478 #endif
479 
480 /* Internal register function - just handle k*probes and flags */
481 static int __register_trace_kprobe(struct trace_kprobe *tk)
482 {
483 	int i, ret;
484 
485 	ret = security_locked_down(LOCKDOWN_KPROBES);
486 	if (ret)
487 		return ret;
488 
489 	if (trace_kprobe_is_registered(tk))
490 		return -EINVAL;
491 
492 	if (within_notrace_func(tk)) {
493 		pr_warn("Could not probe notrace function %s\n",
494 			trace_kprobe_symbol(tk));
495 		return -EINVAL;
496 	}
497 
498 	for (i = 0; i < tk->tp.nr_args; i++) {
499 		ret = traceprobe_update_arg(&tk->tp.args[i]);
500 		if (ret)
501 			return ret;
502 	}
503 
504 	/* Set/clear disabled flag according to tp->flag */
505 	if (trace_probe_is_enabled(&tk->tp))
506 		tk->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;
507 	else
508 		tk->rp.kp.flags |= KPROBE_FLAG_DISABLED;
509 
510 	if (trace_kprobe_is_return(tk))
511 		ret = register_kretprobe(&tk->rp);
512 	else
513 		ret = register_kprobe(&tk->rp.kp);
514 
515 	return ret;
516 }
517 
518 /* Internal unregister function - just handle k*probes and flags */
519 static void __unregister_trace_kprobe(struct trace_kprobe *tk)
520 {
521 	if (trace_kprobe_is_registered(tk)) {
522 		if (trace_kprobe_is_return(tk))
523 			unregister_kretprobe(&tk->rp);
524 		else
525 			unregister_kprobe(&tk->rp.kp);
526 		/* Cleanup kprobe for reuse and mark it unregistered */
527 		INIT_HLIST_NODE(&tk->rp.kp.hlist);
528 		INIT_LIST_HEAD(&tk->rp.kp.list);
529 		if (tk->rp.kp.symbol_name)
530 			tk->rp.kp.addr = NULL;
531 	}
532 }
533 
534 /* Unregister a trace_probe and probe_event */
535 static int unregister_trace_kprobe(struct trace_kprobe *tk)
536 {
537 	/* If other probes are on the event, just unregister kprobe */
538 	if (trace_probe_has_sibling(&tk->tp))
539 		goto unreg;
540 
541 	/* Enabled event can not be unregistered */
542 	if (trace_probe_is_enabled(&tk->tp))
543 		return -EBUSY;
544 
545 	/* Will fail if probe is being used by ftrace or perf */
546 	if (unregister_kprobe_event(tk))
547 		return -EBUSY;
548 
549 unreg:
550 	__unregister_trace_kprobe(tk);
551 	dyn_event_remove(&tk->devent);
552 	trace_probe_unlink(&tk->tp);
553 
554 	return 0;
555 }
556 
557 static bool trace_kprobe_has_same_kprobe(struct trace_kprobe *orig,
558 					 struct trace_kprobe *comp)
559 {
560 	struct trace_probe_event *tpe = orig->tp.event;
561 	struct trace_probe *pos;
562 	int i;
563 
564 	list_for_each_entry(pos, &tpe->probes, list) {
565 		orig = container_of(pos, struct trace_kprobe, tp);
566 		if (strcmp(trace_kprobe_symbol(orig),
567 			   trace_kprobe_symbol(comp)) ||
568 		    trace_kprobe_offset(orig) != trace_kprobe_offset(comp))
569 			continue;
570 
571 		/*
572 		 * trace_probe_compare_arg_type() ensured that nr_args and
573 		 * each argument name and type are same. Let's compare comm.
574 		 */
575 		for (i = 0; i < orig->tp.nr_args; i++) {
576 			if (strcmp(orig->tp.args[i].comm,
577 				   comp->tp.args[i].comm))
578 				break;
579 		}
580 
581 		if (i == orig->tp.nr_args)
582 			return true;
583 	}
584 
585 	return false;
586 }
587 
588 static int append_trace_kprobe(struct trace_kprobe *tk, struct trace_kprobe *to)
589 {
590 	int ret;
591 
592 	ret = trace_probe_compare_arg_type(&tk->tp, &to->tp);
593 	if (ret) {
594 		/* Note that argument starts index = 2 */
595 		trace_probe_log_set_index(ret + 1);
596 		trace_probe_log_err(0, DIFF_ARG_TYPE);
597 		return -EEXIST;
598 	}
599 	if (trace_kprobe_has_same_kprobe(to, tk)) {
600 		trace_probe_log_set_index(0);
601 		trace_probe_log_err(0, SAME_PROBE);
602 		return -EEXIST;
603 	}
604 
605 	/* Append to existing event */
606 	ret = trace_probe_append(&tk->tp, &to->tp);
607 	if (ret)
608 		return ret;
609 
610 	/* Register k*probe */
611 	ret = __register_trace_kprobe(tk);
612 	if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) {
613 		pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
614 		ret = 0;
615 	}
616 
617 	if (ret)
618 		trace_probe_unlink(&tk->tp);
619 	else
620 		dyn_event_add(&tk->devent);
621 
622 	return ret;
623 }
624 
625 /* Register a trace_probe and probe_event */
626 static int register_trace_kprobe(struct trace_kprobe *tk)
627 {
628 	struct trace_kprobe *old_tk;
629 	int ret;
630 
631 	mutex_lock(&event_mutex);
632 
633 	old_tk = find_trace_kprobe(trace_probe_name(&tk->tp),
634 				   trace_probe_group_name(&tk->tp));
635 	if (old_tk) {
636 		if (trace_kprobe_is_return(tk) != trace_kprobe_is_return(old_tk)) {
637 			trace_probe_log_set_index(0);
638 			trace_probe_log_err(0, DIFF_PROBE_TYPE);
639 			ret = -EEXIST;
640 		} else {
641 			ret = append_trace_kprobe(tk, old_tk);
642 		}
643 		goto end;
644 	}
645 
646 	/* Register new event */
647 	ret = register_kprobe_event(tk);
648 	if (ret) {
649 		pr_warn("Failed to register probe event(%d)\n", ret);
650 		goto end;
651 	}
652 
653 	/* Register k*probe */
654 	ret = __register_trace_kprobe(tk);
655 	if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) {
656 		pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
657 		ret = 0;
658 	}
659 
660 	if (ret < 0)
661 		unregister_kprobe_event(tk);
662 	else
663 		dyn_event_add(&tk->devent);
664 
665 end:
666 	mutex_unlock(&event_mutex);
667 	return ret;
668 }
669 
670 /* Module notifier call back, checking event on the module */
671 static int trace_kprobe_module_callback(struct notifier_block *nb,
672 				       unsigned long val, void *data)
673 {
674 	struct module *mod = data;
675 	struct dyn_event *pos;
676 	struct trace_kprobe *tk;
677 	int ret;
678 
679 	if (val != MODULE_STATE_COMING)
680 		return NOTIFY_DONE;
681 
682 	/* Update probes on coming module */
683 	mutex_lock(&event_mutex);
684 	for_each_trace_kprobe(tk, pos) {
685 		if (trace_kprobe_within_module(tk, mod)) {
686 			/* Don't need to check busy - this should have gone. */
687 			__unregister_trace_kprobe(tk);
688 			ret = __register_trace_kprobe(tk);
689 			if (ret)
690 				pr_warn("Failed to re-register probe %s on %s: %d\n",
691 					trace_probe_name(&tk->tp),
692 					mod->name, ret);
693 		}
694 	}
695 	mutex_unlock(&event_mutex);
696 
697 	return NOTIFY_DONE;
698 }
699 
700 static struct notifier_block trace_kprobe_module_nb = {
701 	.notifier_call = trace_kprobe_module_callback,
702 	.priority = 1	/* Invoked after kprobe module callback */
703 };
704 
705 /* Convert certain expected symbols into '_' when generating event names */
706 static inline void sanitize_event_name(char *name)
707 {
708 	while (*name++ != '\0')
709 		if (*name == ':' || *name == '.')
710 			*name = '_';
711 }
712 
713 static int trace_kprobe_create(int argc, const char *argv[])
714 {
715 	/*
716 	 * Argument syntax:
717 	 *  - Add kprobe:
718 	 *      p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS]
719 	 *  - Add kretprobe:
720 	 *      r[MAXACTIVE][:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS]
721 	 * Fetch args:
722 	 *  $retval	: fetch return value
723 	 *  $stack	: fetch stack address
724 	 *  $stackN	: fetch Nth of stack (N:0-)
725 	 *  $comm       : fetch current task comm
726 	 *  @ADDR	: fetch memory at ADDR (ADDR should be in kernel)
727 	 *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
728 	 *  %REG	: fetch register REG
729 	 * Dereferencing memory fetch:
730 	 *  +|-offs(ARG) : fetch memory at ARG +|- offs address.
731 	 * Alias name of args:
732 	 *  NAME=FETCHARG : set NAME as alias of FETCHARG.
733 	 * Type of args:
734 	 *  FETCHARG:TYPE : use TYPE instead of unsigned long.
735 	 */
736 	struct trace_kprobe *tk = NULL;
737 	int i, len, ret = 0;
738 	bool is_return = false;
739 	char *symbol = NULL, *tmp = NULL;
740 	const char *event = NULL, *group = KPROBE_EVENT_SYSTEM;
741 	int maxactive = 0;
742 	long offset = 0;
743 	void *addr = NULL;
744 	char buf[MAX_EVENT_NAME_LEN];
745 	unsigned int flags = TPARG_FL_KERNEL;
746 
747 	switch (argv[0][0]) {
748 	case 'r':
749 		is_return = true;
750 		flags |= TPARG_FL_RETURN;
751 		break;
752 	case 'p':
753 		break;
754 	default:
755 		return -ECANCELED;
756 	}
757 	if (argc < 2)
758 		return -ECANCELED;
759 
760 	trace_probe_log_init("trace_kprobe", argc, argv);
761 
762 	event = strchr(&argv[0][1], ':');
763 	if (event)
764 		event++;
765 
766 	if (isdigit(argv[0][1])) {
767 		if (!is_return) {
768 			trace_probe_log_err(1, MAXACT_NO_KPROBE);
769 			goto parse_error;
770 		}
771 		if (event)
772 			len = event - &argv[0][1] - 1;
773 		else
774 			len = strlen(&argv[0][1]);
775 		if (len > MAX_EVENT_NAME_LEN - 1) {
776 			trace_probe_log_err(1, BAD_MAXACT);
777 			goto parse_error;
778 		}
779 		memcpy(buf, &argv[0][1], len);
780 		buf[len] = '\0';
781 		ret = kstrtouint(buf, 0, &maxactive);
782 		if (ret || !maxactive) {
783 			trace_probe_log_err(1, BAD_MAXACT);
784 			goto parse_error;
785 		}
786 		/* kretprobes instances are iterated over via a list. The
787 		 * maximum should stay reasonable.
788 		 */
789 		if (maxactive > KRETPROBE_MAXACTIVE_MAX) {
790 			trace_probe_log_err(1, MAXACT_TOO_BIG);
791 			goto parse_error;
792 		}
793 	}
794 
795 	/* try to parse an address. if that fails, try to read the
796 	 * input as a symbol. */
797 	if (kstrtoul(argv[1], 0, (unsigned long *)&addr)) {
798 		trace_probe_log_set_index(1);
799 		/* Check whether uprobe event specified */
800 		if (strchr(argv[1], '/') && strchr(argv[1], ':')) {
801 			ret = -ECANCELED;
802 			goto error;
803 		}
804 		/* a symbol specified */
805 		symbol = kstrdup(argv[1], GFP_KERNEL);
806 		if (!symbol)
807 			return -ENOMEM;
808 		/* TODO: support .init module functions */
809 		ret = traceprobe_split_symbol_offset(symbol, &offset);
810 		if (ret || offset < 0 || offset > UINT_MAX) {
811 			trace_probe_log_err(0, BAD_PROBE_ADDR);
812 			goto parse_error;
813 		}
814 		if (kprobe_on_func_entry(NULL, symbol, offset))
815 			flags |= TPARG_FL_FENTRY;
816 		if (offset && is_return && !(flags & TPARG_FL_FENTRY)) {
817 			trace_probe_log_err(0, BAD_RETPROBE);
818 			goto parse_error;
819 		}
820 	}
821 
822 	trace_probe_log_set_index(0);
823 	if (event) {
824 		ret = traceprobe_parse_event_name(&event, &group, buf,
825 						  event - argv[0]);
826 		if (ret)
827 			goto parse_error;
828 	} else {
829 		/* Make a new event name */
830 		if (symbol)
831 			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
832 				 is_return ? 'r' : 'p', symbol, offset);
833 		else
834 			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
835 				 is_return ? 'r' : 'p', addr);
836 		sanitize_event_name(buf);
837 		event = buf;
838 	}
839 
840 	/* setup a probe */
841 	tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive,
842 			       argc - 2, is_return);
843 	if (IS_ERR(tk)) {
844 		ret = PTR_ERR(tk);
845 		/* This must return -ENOMEM, else there is a bug */
846 		WARN_ON_ONCE(ret != -ENOMEM);
847 		goto out;	/* We know tk is not allocated */
848 	}
849 	argc -= 2; argv += 2;
850 
851 	/* parse arguments */
852 	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
853 		tmp = kstrdup(argv[i], GFP_KERNEL);
854 		if (!tmp) {
855 			ret = -ENOMEM;
856 			goto error;
857 		}
858 
859 		trace_probe_log_set_index(i + 2);
860 		ret = traceprobe_parse_probe_arg(&tk->tp, i, tmp, flags);
861 		kfree(tmp);
862 		if (ret)
863 			goto error;	/* This can be -ENOMEM */
864 	}
865 
866 	ret = traceprobe_set_print_fmt(&tk->tp, is_return);
867 	if (ret < 0)
868 		goto error;
869 
870 	ret = register_trace_kprobe(tk);
871 	if (ret) {
872 		trace_probe_log_set_index(1);
873 		if (ret == -EILSEQ)
874 			trace_probe_log_err(0, BAD_INSN_BNDRY);
875 		else if (ret == -ENOENT)
876 			trace_probe_log_err(0, BAD_PROBE_ADDR);
877 		else if (ret != -ENOMEM && ret != -EEXIST)
878 			trace_probe_log_err(0, FAIL_REG_PROBE);
879 		goto error;
880 	}
881 
882 out:
883 	trace_probe_log_clear();
884 	kfree(symbol);
885 	return ret;
886 
887 parse_error:
888 	ret = -EINVAL;
889 error:
890 	free_trace_kprobe(tk);
891 	goto out;
892 }
893 
894 static int create_or_delete_trace_kprobe(int argc, char **argv)
895 {
896 	int ret;
897 
898 	if (argv[0][0] == '-')
899 		return dyn_event_release(argc, argv, &trace_kprobe_ops);
900 
901 	ret = trace_kprobe_create(argc, (const char **)argv);
902 	return ret == -ECANCELED ? -EINVAL : ret;
903 }
904 
905 static int trace_kprobe_release(struct dyn_event *ev)
906 {
907 	struct trace_kprobe *tk = to_trace_kprobe(ev);
908 	int ret = unregister_trace_kprobe(tk);
909 
910 	if (!ret)
911 		free_trace_kprobe(tk);
912 	return ret;
913 }
914 
915 static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev)
916 {
917 	struct trace_kprobe *tk = to_trace_kprobe(ev);
918 	int i;
919 
920 	seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p');
921 	seq_printf(m, ":%s/%s", trace_probe_group_name(&tk->tp),
922 				trace_probe_name(&tk->tp));
923 
924 	if (!tk->symbol)
925 		seq_printf(m, " 0x%p", tk->rp.kp.addr);
926 	else if (tk->rp.kp.offset)
927 		seq_printf(m, " %s+%u", trace_kprobe_symbol(tk),
928 			   tk->rp.kp.offset);
929 	else
930 		seq_printf(m, " %s", trace_kprobe_symbol(tk));
931 
932 	for (i = 0; i < tk->tp.nr_args; i++)
933 		seq_printf(m, " %s=%s", tk->tp.args[i].name, tk->tp.args[i].comm);
934 	seq_putc(m, '\n');
935 
936 	return 0;
937 }
938 
939 static int probes_seq_show(struct seq_file *m, void *v)
940 {
941 	struct dyn_event *ev = v;
942 
943 	if (!is_trace_kprobe(ev))
944 		return 0;
945 
946 	return trace_kprobe_show(m, ev);
947 }
948 
949 static const struct seq_operations probes_seq_op = {
950 	.start  = dyn_event_seq_start,
951 	.next   = dyn_event_seq_next,
952 	.stop   = dyn_event_seq_stop,
953 	.show   = probes_seq_show
954 };
955 
956 static int probes_open(struct inode *inode, struct file *file)
957 {
958 	int ret;
959 
960 	ret = security_locked_down(LOCKDOWN_TRACEFS);
961 	if (ret)
962 		return ret;
963 
964 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
965 		ret = dyn_events_release_all(&trace_kprobe_ops);
966 		if (ret < 0)
967 			return ret;
968 	}
969 
970 	return seq_open(file, &probes_seq_op);
971 }
972 
973 static ssize_t probes_write(struct file *file, const char __user *buffer,
974 			    size_t count, loff_t *ppos)
975 {
976 	return trace_parse_run_command(file, buffer, count, ppos,
977 				       create_or_delete_trace_kprobe);
978 }
979 
980 static const struct file_operations kprobe_events_ops = {
981 	.owner          = THIS_MODULE,
982 	.open           = probes_open,
983 	.read           = seq_read,
984 	.llseek         = seq_lseek,
985 	.release        = seq_release,
986 	.write		= probes_write,
987 };
988 
989 /* Probes profiling interfaces */
990 static int probes_profile_seq_show(struct seq_file *m, void *v)
991 {
992 	struct dyn_event *ev = v;
993 	struct trace_kprobe *tk;
994 
995 	if (!is_trace_kprobe(ev))
996 		return 0;
997 
998 	tk = to_trace_kprobe(ev);
999 	seq_printf(m, "  %-44s %15lu %15lu\n",
1000 		   trace_probe_name(&tk->tp),
1001 		   trace_kprobe_nhit(tk),
1002 		   tk->rp.kp.nmissed);
1003 
1004 	return 0;
1005 }
1006 
1007 static const struct seq_operations profile_seq_op = {
1008 	.start  = dyn_event_seq_start,
1009 	.next   = dyn_event_seq_next,
1010 	.stop   = dyn_event_seq_stop,
1011 	.show   = probes_profile_seq_show
1012 };
1013 
1014 static int profile_open(struct inode *inode, struct file *file)
1015 {
1016 	int ret;
1017 
1018 	ret = security_locked_down(LOCKDOWN_TRACEFS);
1019 	if (ret)
1020 		return ret;
1021 
1022 	return seq_open(file, &profile_seq_op);
1023 }
1024 
1025 static const struct file_operations kprobe_profile_ops = {
1026 	.owner          = THIS_MODULE,
1027 	.open           = profile_open,
1028 	.read           = seq_read,
1029 	.llseek         = seq_lseek,
1030 	.release        = seq_release,
1031 };
1032 
1033 /* Kprobe specific fetch functions */
1034 
1035 /* Return the length of string -- including null terminal byte */
1036 static nokprobe_inline int
1037 fetch_store_strlen(unsigned long addr)
1038 {
1039 	int ret, len = 0;
1040 	u8 c;
1041 
1042 	do {
1043 		ret = probe_kernel_read(&c, (u8 *)addr + len, 1);
1044 		len++;
1045 	} while (c && ret == 0 && len < MAX_STRING_SIZE);
1046 
1047 	return (ret < 0) ? ret : len;
1048 }
1049 
1050 /* Return the length of string -- including null terminal byte */
1051 static nokprobe_inline int
1052 fetch_store_strlen_user(unsigned long addr)
1053 {
1054 	const void __user *uaddr =  (__force const void __user *)addr;
1055 
1056 	return strnlen_unsafe_user(uaddr, MAX_STRING_SIZE);
1057 }
1058 
1059 /*
1060  * Fetch a null-terminated string. Caller MUST set *(u32 *)buf with max
1061  * length and relative data location.
1062  */
1063 static nokprobe_inline int
1064 fetch_store_string(unsigned long addr, void *dest, void *base)
1065 {
1066 	int maxlen = get_loc_len(*(u32 *)dest);
1067 	void *__dest;
1068 	long ret;
1069 
1070 	if (unlikely(!maxlen))
1071 		return -ENOMEM;
1072 
1073 	__dest = get_loc_data(dest, base);
1074 
1075 	/*
1076 	 * Try to get string again, since the string can be changed while
1077 	 * probing.
1078 	 */
1079 	ret = strncpy_from_unsafe(__dest, (void *)addr, maxlen);
1080 	if (ret >= 0)
1081 		*(u32 *)dest = make_data_loc(ret, __dest - base);
1082 
1083 	return ret;
1084 }
1085 
1086 /*
1087  * Fetch a null-terminated string from user. Caller MUST set *(u32 *)buf
1088  * with max length and relative data location.
1089  */
1090 static nokprobe_inline int
1091 fetch_store_string_user(unsigned long addr, void *dest, void *base)
1092 {
1093 	const void __user *uaddr =  (__force const void __user *)addr;
1094 	int maxlen = get_loc_len(*(u32 *)dest);
1095 	void *__dest;
1096 	long ret;
1097 
1098 	if (unlikely(!maxlen))
1099 		return -ENOMEM;
1100 
1101 	__dest = get_loc_data(dest, base);
1102 
1103 	ret = strncpy_from_unsafe_user(__dest, uaddr, maxlen);
1104 	if (ret >= 0)
1105 		*(u32 *)dest = make_data_loc(ret, __dest - base);
1106 
1107 	return ret;
1108 }
1109 
1110 static nokprobe_inline int
1111 probe_mem_read(void *dest, void *src, size_t size)
1112 {
1113 	return probe_kernel_read(dest, src, size);
1114 }
1115 
1116 static nokprobe_inline int
1117 probe_mem_read_user(void *dest, void *src, size_t size)
1118 {
1119 	const void __user *uaddr =  (__force const void __user *)src;
1120 
1121 	return probe_user_read(dest, uaddr, size);
1122 }
1123 
1124 /* Note that we don't verify it, since the code does not come from user space */
1125 static int
1126 process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest,
1127 		   void *base)
1128 {
1129 	unsigned long val;
1130 
1131 retry:
1132 	/* 1st stage: get value from context */
1133 	switch (code->op) {
1134 	case FETCH_OP_REG:
1135 		val = regs_get_register(regs, code->param);
1136 		break;
1137 	case FETCH_OP_STACK:
1138 		val = regs_get_kernel_stack_nth(regs, code->param);
1139 		break;
1140 	case FETCH_OP_STACKP:
1141 		val = kernel_stack_pointer(regs);
1142 		break;
1143 	case FETCH_OP_RETVAL:
1144 		val = regs_return_value(regs);
1145 		break;
1146 	case FETCH_OP_IMM:
1147 		val = code->immediate;
1148 		break;
1149 	case FETCH_OP_COMM:
1150 		val = (unsigned long)current->comm;
1151 		break;
1152 	case FETCH_OP_DATA:
1153 		val = (unsigned long)code->data;
1154 		break;
1155 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
1156 	case FETCH_OP_ARG:
1157 		val = regs_get_kernel_argument(regs, code->param);
1158 		break;
1159 #endif
1160 	case FETCH_NOP_SYMBOL:	/* Ignore a place holder */
1161 		code++;
1162 		goto retry;
1163 	default:
1164 		return -EILSEQ;
1165 	}
1166 	code++;
1167 
1168 	return process_fetch_insn_bottom(code, val, dest, base);
1169 }
1170 NOKPROBE_SYMBOL(process_fetch_insn)
1171 
1172 /* Kprobe handler */
1173 static nokprobe_inline void
1174 __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
1175 		    struct trace_event_file *trace_file)
1176 {
1177 	struct kprobe_trace_entry_head *entry;
1178 	struct ring_buffer_event *event;
1179 	struct ring_buffer *buffer;
1180 	int size, dsize, pc;
1181 	unsigned long irq_flags;
1182 	struct trace_event_call *call = trace_probe_event_call(&tk->tp);
1183 
1184 	WARN_ON(call != trace_file->event_call);
1185 
1186 	if (trace_trigger_soft_disabled(trace_file))
1187 		return;
1188 
1189 	local_save_flags(irq_flags);
1190 	pc = preempt_count();
1191 
1192 	dsize = __get_data_size(&tk->tp, regs);
1193 	size = sizeof(*entry) + tk->tp.size + dsize;
1194 
1195 	event = trace_event_buffer_lock_reserve(&buffer, trace_file,
1196 						call->event.type,
1197 						size, irq_flags, pc);
1198 	if (!event)
1199 		return;
1200 
1201 	entry = ring_buffer_event_data(event);
1202 	entry->ip = (unsigned long)tk->rp.kp.addr;
1203 	store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
1204 
1205 	event_trigger_unlock_commit_regs(trace_file, buffer, event,
1206 					 entry, irq_flags, pc, regs);
1207 }
1208 
1209 static void
1210 kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs)
1211 {
1212 	struct event_file_link *link;
1213 
1214 	trace_probe_for_each_link_rcu(link, &tk->tp)
1215 		__kprobe_trace_func(tk, regs, link->file);
1216 }
1217 NOKPROBE_SYMBOL(kprobe_trace_func);
1218 
1219 /* Kretprobe handler */
1220 static nokprobe_inline void
1221 __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1222 		       struct pt_regs *regs,
1223 		       struct trace_event_file *trace_file)
1224 {
1225 	struct kretprobe_trace_entry_head *entry;
1226 	struct ring_buffer_event *event;
1227 	struct ring_buffer *buffer;
1228 	int size, pc, dsize;
1229 	unsigned long irq_flags;
1230 	struct trace_event_call *call = trace_probe_event_call(&tk->tp);
1231 
1232 	WARN_ON(call != trace_file->event_call);
1233 
1234 	if (trace_trigger_soft_disabled(trace_file))
1235 		return;
1236 
1237 	local_save_flags(irq_flags);
1238 	pc = preempt_count();
1239 
1240 	dsize = __get_data_size(&tk->tp, regs);
1241 	size = sizeof(*entry) + tk->tp.size + dsize;
1242 
1243 	event = trace_event_buffer_lock_reserve(&buffer, trace_file,
1244 						call->event.type,
1245 						size, irq_flags, pc);
1246 	if (!event)
1247 		return;
1248 
1249 	entry = ring_buffer_event_data(event);
1250 	entry->func = (unsigned long)tk->rp.kp.addr;
1251 	entry->ret_ip = (unsigned long)ri->ret_addr;
1252 	store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
1253 
1254 	event_trigger_unlock_commit_regs(trace_file, buffer, event,
1255 					 entry, irq_flags, pc, regs);
1256 }
1257 
1258 static void
1259 kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1260 		     struct pt_regs *regs)
1261 {
1262 	struct event_file_link *link;
1263 
1264 	trace_probe_for_each_link_rcu(link, &tk->tp)
1265 		__kretprobe_trace_func(tk, ri, regs, link->file);
1266 }
1267 NOKPROBE_SYMBOL(kretprobe_trace_func);
1268 
1269 /* Event entry printers */
1270 static enum print_line_t
1271 print_kprobe_event(struct trace_iterator *iter, int flags,
1272 		   struct trace_event *event)
1273 {
1274 	struct kprobe_trace_entry_head *field;
1275 	struct trace_seq *s = &iter->seq;
1276 	struct trace_probe *tp;
1277 
1278 	field = (struct kprobe_trace_entry_head *)iter->ent;
1279 	tp = trace_probe_primary_from_call(
1280 		container_of(event, struct trace_event_call, event));
1281 	if (WARN_ON_ONCE(!tp))
1282 		goto out;
1283 
1284 	trace_seq_printf(s, "%s: (", trace_probe_name(tp));
1285 
1286 	if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
1287 		goto out;
1288 
1289 	trace_seq_putc(s, ')');
1290 
1291 	if (print_probe_args(s, tp->args, tp->nr_args,
1292 			     (u8 *)&field[1], field) < 0)
1293 		goto out;
1294 
1295 	trace_seq_putc(s, '\n');
1296  out:
1297 	return trace_handle_return(s);
1298 }
1299 
1300 static enum print_line_t
1301 print_kretprobe_event(struct trace_iterator *iter, int flags,
1302 		      struct trace_event *event)
1303 {
1304 	struct kretprobe_trace_entry_head *field;
1305 	struct trace_seq *s = &iter->seq;
1306 	struct trace_probe *tp;
1307 
1308 	field = (struct kretprobe_trace_entry_head *)iter->ent;
1309 	tp = trace_probe_primary_from_call(
1310 		container_of(event, struct trace_event_call, event));
1311 	if (WARN_ON_ONCE(!tp))
1312 		goto out;
1313 
1314 	trace_seq_printf(s, "%s: (", trace_probe_name(tp));
1315 
1316 	if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1317 		goto out;
1318 
1319 	trace_seq_puts(s, " <- ");
1320 
1321 	if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1322 		goto out;
1323 
1324 	trace_seq_putc(s, ')');
1325 
1326 	if (print_probe_args(s, tp->args, tp->nr_args,
1327 			     (u8 *)&field[1], field) < 0)
1328 		goto out;
1329 
1330 	trace_seq_putc(s, '\n');
1331 
1332  out:
1333 	return trace_handle_return(s);
1334 }
1335 
1336 
1337 static int kprobe_event_define_fields(struct trace_event_call *event_call)
1338 {
1339 	int ret;
1340 	struct kprobe_trace_entry_head field;
1341 	struct trace_probe *tp;
1342 
1343 	tp = trace_probe_primary_from_call(event_call);
1344 	if (WARN_ON_ONCE(!tp))
1345 		return -ENOENT;
1346 
1347 	DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1348 
1349 	return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
1350 }
1351 
1352 static int kretprobe_event_define_fields(struct trace_event_call *event_call)
1353 {
1354 	int ret;
1355 	struct kretprobe_trace_entry_head field;
1356 	struct trace_probe *tp;
1357 
1358 	tp = trace_probe_primary_from_call(event_call);
1359 	if (WARN_ON_ONCE(!tp))
1360 		return -ENOENT;
1361 
1362 	DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1363 	DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1364 
1365 	return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
1366 }
1367 
1368 #ifdef CONFIG_PERF_EVENTS
1369 
1370 /* Kprobe profile handler */
1371 static int
1372 kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
1373 {
1374 	struct trace_event_call *call = trace_probe_event_call(&tk->tp);
1375 	struct kprobe_trace_entry_head *entry;
1376 	struct hlist_head *head;
1377 	int size, __size, dsize;
1378 	int rctx;
1379 
1380 	if (bpf_prog_array_valid(call)) {
1381 		unsigned long orig_ip = instruction_pointer(regs);
1382 		int ret;
1383 
1384 		ret = trace_call_bpf(call, regs);
1385 
1386 		/*
1387 		 * We need to check and see if we modified the pc of the
1388 		 * pt_regs, and if so return 1 so that we don't do the
1389 		 * single stepping.
1390 		 */
1391 		if (orig_ip != instruction_pointer(regs))
1392 			return 1;
1393 		if (!ret)
1394 			return 0;
1395 	}
1396 
1397 	head = this_cpu_ptr(call->perf_events);
1398 	if (hlist_empty(head))
1399 		return 0;
1400 
1401 	dsize = __get_data_size(&tk->tp, regs);
1402 	__size = sizeof(*entry) + tk->tp.size + dsize;
1403 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
1404 	size -= sizeof(u32);
1405 
1406 	entry = perf_trace_buf_alloc(size, NULL, &rctx);
1407 	if (!entry)
1408 		return 0;
1409 
1410 	entry->ip = (unsigned long)tk->rp.kp.addr;
1411 	memset(&entry[1], 0, dsize);
1412 	store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
1413 	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
1414 			      head, NULL);
1415 	return 0;
1416 }
1417 NOKPROBE_SYMBOL(kprobe_perf_func);
1418 
1419 /* Kretprobe profile handler */
1420 static void
1421 kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1422 		    struct pt_regs *regs)
1423 {
1424 	struct trace_event_call *call = trace_probe_event_call(&tk->tp);
1425 	struct kretprobe_trace_entry_head *entry;
1426 	struct hlist_head *head;
1427 	int size, __size, dsize;
1428 	int rctx;
1429 
1430 	if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
1431 		return;
1432 
1433 	head = this_cpu_ptr(call->perf_events);
1434 	if (hlist_empty(head))
1435 		return;
1436 
1437 	dsize = __get_data_size(&tk->tp, regs);
1438 	__size = sizeof(*entry) + tk->tp.size + dsize;
1439 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
1440 	size -= sizeof(u32);
1441 
1442 	entry = perf_trace_buf_alloc(size, NULL, &rctx);
1443 	if (!entry)
1444 		return;
1445 
1446 	entry->func = (unsigned long)tk->rp.kp.addr;
1447 	entry->ret_ip = (unsigned long)ri->ret_addr;
1448 	store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
1449 	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
1450 			      head, NULL);
1451 }
1452 NOKPROBE_SYMBOL(kretprobe_perf_func);
1453 
1454 int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
1455 			const char **symbol, u64 *probe_offset,
1456 			u64 *probe_addr, bool perf_type_tracepoint)
1457 {
1458 	const char *pevent = trace_event_name(event->tp_event);
1459 	const char *group = event->tp_event->class->system;
1460 	struct trace_kprobe *tk;
1461 
1462 	if (perf_type_tracepoint)
1463 		tk = find_trace_kprobe(pevent, group);
1464 	else
1465 		tk = event->tp_event->data;
1466 	if (!tk)
1467 		return -EINVAL;
1468 
1469 	*fd_type = trace_kprobe_is_return(tk) ? BPF_FD_TYPE_KRETPROBE
1470 					      : BPF_FD_TYPE_KPROBE;
1471 	if (tk->symbol) {
1472 		*symbol = tk->symbol;
1473 		*probe_offset = tk->rp.kp.offset;
1474 		*probe_addr = 0;
1475 	} else {
1476 		*symbol = NULL;
1477 		*probe_offset = 0;
1478 		*probe_addr = (unsigned long)tk->rp.kp.addr;
1479 	}
1480 	return 0;
1481 }
1482 #endif	/* CONFIG_PERF_EVENTS */
1483 
1484 /*
1485  * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex.
1486  *
1487  * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
1488  * lockless, but we can't race with this __init function.
1489  */
1490 static int kprobe_register(struct trace_event_call *event,
1491 			   enum trace_reg type, void *data)
1492 {
1493 	struct trace_event_file *file = data;
1494 
1495 	switch (type) {
1496 	case TRACE_REG_REGISTER:
1497 		return enable_trace_kprobe(event, file);
1498 	case TRACE_REG_UNREGISTER:
1499 		return disable_trace_kprobe(event, file);
1500 
1501 #ifdef CONFIG_PERF_EVENTS
1502 	case TRACE_REG_PERF_REGISTER:
1503 		return enable_trace_kprobe(event, NULL);
1504 	case TRACE_REG_PERF_UNREGISTER:
1505 		return disable_trace_kprobe(event, NULL);
1506 	case TRACE_REG_PERF_OPEN:
1507 	case TRACE_REG_PERF_CLOSE:
1508 	case TRACE_REG_PERF_ADD:
1509 	case TRACE_REG_PERF_DEL:
1510 		return 0;
1511 #endif
1512 	}
1513 	return 0;
1514 }
1515 
1516 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1517 {
1518 	struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
1519 	int ret = 0;
1520 
1521 	raw_cpu_inc(*tk->nhit);
1522 
1523 	if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE))
1524 		kprobe_trace_func(tk, regs);
1525 #ifdef CONFIG_PERF_EVENTS
1526 	if (trace_probe_test_flag(&tk->tp, TP_FLAG_PROFILE))
1527 		ret = kprobe_perf_func(tk, regs);
1528 #endif
1529 	return ret;
1530 }
1531 NOKPROBE_SYMBOL(kprobe_dispatcher);
1532 
1533 static int
1534 kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1535 {
1536 	struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
1537 
1538 	raw_cpu_inc(*tk->nhit);
1539 
1540 	if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE))
1541 		kretprobe_trace_func(tk, ri, regs);
1542 #ifdef CONFIG_PERF_EVENTS
1543 	if (trace_probe_test_flag(&tk->tp, TP_FLAG_PROFILE))
1544 		kretprobe_perf_func(tk, ri, regs);
1545 #endif
1546 	return 0;	/* We don't tweek kernel, so just return 0 */
1547 }
1548 NOKPROBE_SYMBOL(kretprobe_dispatcher);
1549 
1550 static struct trace_event_functions kretprobe_funcs = {
1551 	.trace		= print_kretprobe_event
1552 };
1553 
1554 static struct trace_event_functions kprobe_funcs = {
1555 	.trace		= print_kprobe_event
1556 };
1557 
1558 static inline void init_trace_event_call(struct trace_kprobe *tk)
1559 {
1560 	struct trace_event_call *call = trace_probe_event_call(&tk->tp);
1561 
1562 	if (trace_kprobe_is_return(tk)) {
1563 		call->event.funcs = &kretprobe_funcs;
1564 		call->class->define_fields = kretprobe_event_define_fields;
1565 	} else {
1566 		call->event.funcs = &kprobe_funcs;
1567 		call->class->define_fields = kprobe_event_define_fields;
1568 	}
1569 
1570 	call->flags = TRACE_EVENT_FL_KPROBE;
1571 	call->class->reg = kprobe_register;
1572 }
1573 
1574 static int register_kprobe_event(struct trace_kprobe *tk)
1575 {
1576 	init_trace_event_call(tk);
1577 
1578 	return trace_probe_register_event_call(&tk->tp);
1579 }
1580 
1581 static int unregister_kprobe_event(struct trace_kprobe *tk)
1582 {
1583 	return trace_probe_unregister_event_call(&tk->tp);
1584 }
1585 
1586 #ifdef CONFIG_PERF_EVENTS
1587 /* create a trace_kprobe, but don't add it to global lists */
1588 struct trace_event_call *
1589 create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
1590 			  bool is_return)
1591 {
1592 	struct trace_kprobe *tk;
1593 	int ret;
1594 	char *event;
1595 
1596 	/*
1597 	 * local trace_kprobes are not added to dyn_event, so they are never
1598 	 * searched in find_trace_kprobe(). Therefore, there is no concern of
1599 	 * duplicated name here.
1600 	 */
1601 	event = func ? func : "DUMMY_EVENT";
1602 
1603 	tk = alloc_trace_kprobe(KPROBE_EVENT_SYSTEM, event, (void *)addr, func,
1604 				offs, 0 /* maxactive */, 0 /* nargs */,
1605 				is_return);
1606 
1607 	if (IS_ERR(tk)) {
1608 		pr_info("Failed to allocate trace_probe.(%d)\n",
1609 			(int)PTR_ERR(tk));
1610 		return ERR_CAST(tk);
1611 	}
1612 
1613 	init_trace_event_call(tk);
1614 
1615 	if (traceprobe_set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) {
1616 		ret = -ENOMEM;
1617 		goto error;
1618 	}
1619 
1620 	ret = __register_trace_kprobe(tk);
1621 	if (ret < 0)
1622 		goto error;
1623 
1624 	return trace_probe_event_call(&tk->tp);
1625 error:
1626 	free_trace_kprobe(tk);
1627 	return ERR_PTR(ret);
1628 }
1629 
1630 void destroy_local_trace_kprobe(struct trace_event_call *event_call)
1631 {
1632 	struct trace_kprobe *tk;
1633 
1634 	tk = trace_kprobe_primary_from_call(event_call);
1635 	if (unlikely(!tk))
1636 		return;
1637 
1638 	if (trace_probe_is_enabled(&tk->tp)) {
1639 		WARN_ON(1);
1640 		return;
1641 	}
1642 
1643 	__unregister_trace_kprobe(tk);
1644 
1645 	free_trace_kprobe(tk);
1646 }
1647 #endif /* CONFIG_PERF_EVENTS */
1648 
1649 static __init void enable_boot_kprobe_events(void)
1650 {
1651 	struct trace_array *tr = top_trace_array();
1652 	struct trace_event_file *file;
1653 	struct trace_kprobe *tk;
1654 	struct dyn_event *pos;
1655 
1656 	mutex_lock(&event_mutex);
1657 	for_each_trace_kprobe(tk, pos) {
1658 		list_for_each_entry(file, &tr->events, list)
1659 			if (file->event_call == trace_probe_event_call(&tk->tp))
1660 				trace_event_enable_disable(file, 1, 0);
1661 	}
1662 	mutex_unlock(&event_mutex);
1663 }
1664 
1665 static __init void setup_boot_kprobe_events(void)
1666 {
1667 	char *p, *cmd = kprobe_boot_events_buf;
1668 	int ret;
1669 
1670 	strreplace(kprobe_boot_events_buf, ',', ' ');
1671 
1672 	while (cmd && *cmd != '\0') {
1673 		p = strchr(cmd, ';');
1674 		if (p)
1675 			*p++ = '\0';
1676 
1677 		ret = trace_run_command(cmd, create_or_delete_trace_kprobe);
1678 		if (ret)
1679 			pr_warn("Failed to add event(%d): %s\n", ret, cmd);
1680 		else
1681 			kprobe_boot_events_enabled = true;
1682 
1683 		cmd = p;
1684 	}
1685 
1686 	enable_boot_kprobe_events();
1687 }
1688 
1689 /* Make a tracefs interface for controlling probe points */
1690 static __init int init_kprobe_trace(void)
1691 {
1692 	struct dentry *d_tracer;
1693 	struct dentry *entry;
1694 	int ret;
1695 
1696 	ret = dyn_event_register(&trace_kprobe_ops);
1697 	if (ret)
1698 		return ret;
1699 
1700 	if (register_module_notifier(&trace_kprobe_module_nb))
1701 		return -EINVAL;
1702 
1703 	d_tracer = tracing_init_dentry();
1704 	if (IS_ERR(d_tracer))
1705 		return 0;
1706 
1707 	entry = tracefs_create_file("kprobe_events", 0644, d_tracer,
1708 				    NULL, &kprobe_events_ops);
1709 
1710 	/* Event list interface */
1711 	if (!entry)
1712 		pr_warn("Could not create tracefs 'kprobe_events' entry\n");
1713 
1714 	/* Profile interface */
1715 	entry = tracefs_create_file("kprobe_profile", 0444, d_tracer,
1716 				    NULL, &kprobe_profile_ops);
1717 
1718 	if (!entry)
1719 		pr_warn("Could not create tracefs 'kprobe_profile' entry\n");
1720 
1721 	setup_boot_kprobe_events();
1722 
1723 	return 0;
1724 }
1725 fs_initcall(init_kprobe_trace);
1726 
1727 
1728 #ifdef CONFIG_FTRACE_STARTUP_TEST
1729 static __init struct trace_event_file *
1730 find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr)
1731 {
1732 	struct trace_event_file *file;
1733 
1734 	list_for_each_entry(file, &tr->events, list)
1735 		if (file->event_call == trace_probe_event_call(&tk->tp))
1736 			return file;
1737 
1738 	return NULL;
1739 }
1740 
1741 /*
1742  * Nobody but us can call enable_trace_kprobe/disable_trace_kprobe at this
1743  * stage, we can do this lockless.
1744  */
1745 static __init int kprobe_trace_self_tests_init(void)
1746 {
1747 	int ret, warn = 0;
1748 	int (*target)(int, int, int, int, int, int);
1749 	struct trace_kprobe *tk;
1750 	struct trace_event_file *file;
1751 
1752 	if (tracing_is_disabled())
1753 		return -ENODEV;
1754 
1755 	if (kprobe_boot_events_enabled) {
1756 		pr_info("Skipping kprobe tests due to kprobe_event on cmdline\n");
1757 		return 0;
1758 	}
1759 
1760 	target = kprobe_trace_selftest_target;
1761 
1762 	pr_info("Testing kprobe tracing: ");
1763 
1764 	ret = trace_run_command("p:testprobe kprobe_trace_selftest_target $stack $stack0 +0($stack)",
1765 				create_or_delete_trace_kprobe);
1766 	if (WARN_ON_ONCE(ret)) {
1767 		pr_warn("error on probing function entry.\n");
1768 		warn++;
1769 	} else {
1770 		/* Enable trace point */
1771 		tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
1772 		if (WARN_ON_ONCE(tk == NULL)) {
1773 			pr_warn("error on getting new probe.\n");
1774 			warn++;
1775 		} else {
1776 			file = find_trace_probe_file(tk, top_trace_array());
1777 			if (WARN_ON_ONCE(file == NULL)) {
1778 				pr_warn("error on getting probe file.\n");
1779 				warn++;
1780 			} else
1781 				enable_trace_kprobe(
1782 					trace_probe_event_call(&tk->tp), file);
1783 		}
1784 	}
1785 
1786 	ret = trace_run_command("r:testprobe2 kprobe_trace_selftest_target $retval",
1787 				create_or_delete_trace_kprobe);
1788 	if (WARN_ON_ONCE(ret)) {
1789 		pr_warn("error on probing function return.\n");
1790 		warn++;
1791 	} else {
1792 		/* Enable trace point */
1793 		tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
1794 		if (WARN_ON_ONCE(tk == NULL)) {
1795 			pr_warn("error on getting 2nd new probe.\n");
1796 			warn++;
1797 		} else {
1798 			file = find_trace_probe_file(tk, top_trace_array());
1799 			if (WARN_ON_ONCE(file == NULL)) {
1800 				pr_warn("error on getting probe file.\n");
1801 				warn++;
1802 			} else
1803 				enable_trace_kprobe(
1804 					trace_probe_event_call(&tk->tp), file);
1805 		}
1806 	}
1807 
1808 	if (warn)
1809 		goto end;
1810 
1811 	ret = target(1, 2, 3, 4, 5, 6);
1812 
1813 	/*
1814 	 * Not expecting an error here, the check is only to prevent the
1815 	 * optimizer from removing the call to target() as otherwise there
1816 	 * are no side-effects and the call is never performed.
1817 	 */
1818 	if (ret != 21)
1819 		warn++;
1820 
1821 	/* Disable trace points before removing it */
1822 	tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
1823 	if (WARN_ON_ONCE(tk == NULL)) {
1824 		pr_warn("error on getting test probe.\n");
1825 		warn++;
1826 	} else {
1827 		if (trace_kprobe_nhit(tk) != 1) {
1828 			pr_warn("incorrect number of testprobe hits\n");
1829 			warn++;
1830 		}
1831 
1832 		file = find_trace_probe_file(tk, top_trace_array());
1833 		if (WARN_ON_ONCE(file == NULL)) {
1834 			pr_warn("error on getting probe file.\n");
1835 			warn++;
1836 		} else
1837 			disable_trace_kprobe(
1838 				trace_probe_event_call(&tk->tp), file);
1839 	}
1840 
1841 	tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
1842 	if (WARN_ON_ONCE(tk == NULL)) {
1843 		pr_warn("error on getting 2nd test probe.\n");
1844 		warn++;
1845 	} else {
1846 		if (trace_kprobe_nhit(tk) != 1) {
1847 			pr_warn("incorrect number of testprobe2 hits\n");
1848 			warn++;
1849 		}
1850 
1851 		file = find_trace_probe_file(tk, top_trace_array());
1852 		if (WARN_ON_ONCE(file == NULL)) {
1853 			pr_warn("error on getting probe file.\n");
1854 			warn++;
1855 		} else
1856 			disable_trace_kprobe(
1857 				trace_probe_event_call(&tk->tp), file);
1858 	}
1859 
1860 	ret = trace_run_command("-:testprobe", create_or_delete_trace_kprobe);
1861 	if (WARN_ON_ONCE(ret)) {
1862 		pr_warn("error on deleting a probe.\n");
1863 		warn++;
1864 	}
1865 
1866 	ret = trace_run_command("-:testprobe2", create_or_delete_trace_kprobe);
1867 	if (WARN_ON_ONCE(ret)) {
1868 		pr_warn("error on deleting a probe.\n");
1869 		warn++;
1870 	}
1871 
1872 end:
1873 	ret = dyn_events_release_all(&trace_kprobe_ops);
1874 	if (WARN_ON_ONCE(ret)) {
1875 		pr_warn("error on cleaning up probes.\n");
1876 		warn++;
1877 	}
1878 	/*
1879 	 * Wait for the optimizer work to finish. Otherwise it might fiddle
1880 	 * with probes in already freed __init text.
1881 	 */
1882 	wait_for_kprobe_optimizer();
1883 	if (warn)
1884 		pr_cont("NG: Some tests are failed. Please check them.\n");
1885 	else
1886 		pr_cont("OK\n");
1887 	return 0;
1888 }
1889 
1890 late_initcall(kprobe_trace_self_tests_init);
1891 
1892 #endif
1893