xref: /openbmc/linux/kernel/trace/trace_kprobe.c (revision 4fc4dca8)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Kprobes-based tracing events
4  *
5  * Created by Masami Hiramatsu <mhiramat@redhat.com>
6  *
7  */
8 #define pr_fmt(fmt)	"trace_kprobe: " fmt
9 
10 #include <linux/module.h>
11 #include <linux/uaccess.h>
12 #include <linux/rculist.h>
13 #include <linux/error-injection.h>
14 
15 #include "trace_dynevent.h"
16 #include "trace_kprobe_selftest.h"
17 #include "trace_probe.h"
18 #include "trace_probe_tmpl.h"
19 
20 #define KPROBE_EVENT_SYSTEM "kprobes"
21 #define KRETPROBE_MAXACTIVE_MAX 4096
22 
23 static int trace_kprobe_create(int argc, const char **argv);
24 static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev);
25 static int trace_kprobe_release(struct dyn_event *ev);
26 static bool trace_kprobe_is_busy(struct dyn_event *ev);
27 static bool trace_kprobe_match(const char *system, const char *event,
28 			       struct dyn_event *ev);
29 
30 static struct dyn_event_operations trace_kprobe_ops = {
31 	.create = trace_kprobe_create,
32 	.show = trace_kprobe_show,
33 	.is_busy = trace_kprobe_is_busy,
34 	.free = trace_kprobe_release,
35 	.match = trace_kprobe_match,
36 };
37 
38 /*
39  * Kprobe event core functions
40  */
41 struct trace_kprobe {
42 	struct dyn_event	devent;
43 	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
44 	unsigned long __percpu *nhit;
45 	const char		*symbol;	/* symbol name */
46 	struct trace_probe	tp;
47 };
48 
49 static bool is_trace_kprobe(struct dyn_event *ev)
50 {
51 	return ev->ops == &trace_kprobe_ops;
52 }
53 
54 static struct trace_kprobe *to_trace_kprobe(struct dyn_event *ev)
55 {
56 	return container_of(ev, struct trace_kprobe, devent);
57 }
58 
59 /**
60  * for_each_trace_kprobe - iterate over the trace_kprobe list
61  * @pos:	the struct trace_kprobe * for each entry
62  * @dpos:	the struct dyn_event * to use as a loop cursor
63  */
64 #define for_each_trace_kprobe(pos, dpos)	\
65 	for_each_dyn_event(dpos)		\
66 		if (is_trace_kprobe(dpos) && (pos = to_trace_kprobe(dpos)))
67 
68 #define SIZEOF_TRACE_KPROBE(n)				\
69 	(offsetof(struct trace_kprobe, tp.args) +	\
70 	(sizeof(struct probe_arg) * (n)))
71 
72 static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
73 {
74 	return tk->rp.handler != NULL;
75 }
76 
77 static nokprobe_inline const char *trace_kprobe_symbol(struct trace_kprobe *tk)
78 {
79 	return tk->symbol ? tk->symbol : "unknown";
80 }
81 
82 static nokprobe_inline unsigned long trace_kprobe_offset(struct trace_kprobe *tk)
83 {
84 	return tk->rp.kp.offset;
85 }
86 
87 static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
88 {
89 	return !!(kprobe_gone(&tk->rp.kp));
90 }
91 
92 static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
93 						 struct module *mod)
94 {
95 	int len = strlen(mod->name);
96 	const char *name = trace_kprobe_symbol(tk);
97 	return strncmp(mod->name, name, len) == 0 && name[len] == ':';
98 }
99 
100 static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
101 {
102 	char *p;
103 	bool ret;
104 
105 	if (!tk->symbol)
106 		return false;
107 	p = strchr(tk->symbol, ':');
108 	if (!p)
109 		return true;
110 	*p = '\0';
111 	mutex_lock(&module_mutex);
112 	ret = !!find_module(tk->symbol);
113 	mutex_unlock(&module_mutex);
114 	*p = ':';
115 
116 	return ret;
117 }
118 
119 static bool trace_kprobe_is_busy(struct dyn_event *ev)
120 {
121 	struct trace_kprobe *tk = to_trace_kprobe(ev);
122 
123 	return trace_probe_is_enabled(&tk->tp);
124 }
125 
126 static bool trace_kprobe_match(const char *system, const char *event,
127 			       struct dyn_event *ev)
128 {
129 	struct trace_kprobe *tk = to_trace_kprobe(ev);
130 
131 	return strcmp(trace_event_name(&tk->tp.call), event) == 0 &&
132 	    (!system || strcmp(tk->tp.call.class->system, system) == 0);
133 }
134 
135 static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
136 {
137 	unsigned long nhit = 0;
138 	int cpu;
139 
140 	for_each_possible_cpu(cpu)
141 		nhit += *per_cpu_ptr(tk->nhit, cpu);
142 
143 	return nhit;
144 }
145 
146 /* Return 0 if it fails to find the symbol address */
147 static nokprobe_inline
148 unsigned long trace_kprobe_address(struct trace_kprobe *tk)
149 {
150 	unsigned long addr;
151 
152 	if (tk->symbol) {
153 		addr = (unsigned long)
154 			kallsyms_lookup_name(trace_kprobe_symbol(tk));
155 		if (addr)
156 			addr += tk->rp.kp.offset;
157 	} else {
158 		addr = (unsigned long)tk->rp.kp.addr;
159 	}
160 	return addr;
161 }
162 
163 bool trace_kprobe_on_func_entry(struct trace_event_call *call)
164 {
165 	struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
166 
167 	return kprobe_on_func_entry(tk->rp.kp.addr,
168 			tk->rp.kp.addr ? NULL : tk->rp.kp.symbol_name,
169 			tk->rp.kp.addr ? 0 : tk->rp.kp.offset);
170 }
171 
172 bool trace_kprobe_error_injectable(struct trace_event_call *call)
173 {
174 	struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
175 
176 	return within_error_injection_list(trace_kprobe_address(tk));
177 }
178 
179 static int register_kprobe_event(struct trace_kprobe *tk);
180 static int unregister_kprobe_event(struct trace_kprobe *tk);
181 
182 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
183 static int kretprobe_dispatcher(struct kretprobe_instance *ri,
184 				struct pt_regs *regs);
185 
186 /*
187  * Allocate new trace_probe and initialize it (including kprobes).
188  */
189 static struct trace_kprobe *alloc_trace_kprobe(const char *group,
190 					     const char *event,
191 					     void *addr,
192 					     const char *symbol,
193 					     unsigned long offs,
194 					     int maxactive,
195 					     int nargs, bool is_return)
196 {
197 	struct trace_kprobe *tk;
198 	int ret = -ENOMEM;
199 
200 	tk = kzalloc(SIZEOF_TRACE_KPROBE(nargs), GFP_KERNEL);
201 	if (!tk)
202 		return ERR_PTR(ret);
203 
204 	tk->nhit = alloc_percpu(unsigned long);
205 	if (!tk->nhit)
206 		goto error;
207 
208 	if (symbol) {
209 		tk->symbol = kstrdup(symbol, GFP_KERNEL);
210 		if (!tk->symbol)
211 			goto error;
212 		tk->rp.kp.symbol_name = tk->symbol;
213 		tk->rp.kp.offset = offs;
214 	} else
215 		tk->rp.kp.addr = addr;
216 
217 	if (is_return)
218 		tk->rp.handler = kretprobe_dispatcher;
219 	else
220 		tk->rp.kp.pre_handler = kprobe_dispatcher;
221 
222 	tk->rp.maxactive = maxactive;
223 
224 	if (!event || !group) {
225 		ret = -EINVAL;
226 		goto error;
227 	}
228 
229 	tk->tp.call.class = &tk->tp.class;
230 	tk->tp.call.name = kstrdup(event, GFP_KERNEL);
231 	if (!tk->tp.call.name)
232 		goto error;
233 
234 	tk->tp.class.system = kstrdup(group, GFP_KERNEL);
235 	if (!tk->tp.class.system)
236 		goto error;
237 
238 	dyn_event_init(&tk->devent, &trace_kprobe_ops);
239 	INIT_LIST_HEAD(&tk->tp.files);
240 	return tk;
241 error:
242 	kfree(tk->tp.call.name);
243 	kfree(tk->symbol);
244 	free_percpu(tk->nhit);
245 	kfree(tk);
246 	return ERR_PTR(ret);
247 }
248 
249 static void free_trace_kprobe(struct trace_kprobe *tk)
250 {
251 	int i;
252 
253 	if (!tk)
254 		return;
255 
256 	for (i = 0; i < tk->tp.nr_args; i++)
257 		traceprobe_free_probe_arg(&tk->tp.args[i]);
258 
259 	kfree(tk->tp.call.class->system);
260 	kfree(tk->tp.call.name);
261 	kfree(tk->symbol);
262 	free_percpu(tk->nhit);
263 	kfree(tk);
264 }
265 
266 static struct trace_kprobe *find_trace_kprobe(const char *event,
267 					      const char *group)
268 {
269 	struct dyn_event *pos;
270 	struct trace_kprobe *tk;
271 
272 	for_each_trace_kprobe(tk, pos)
273 		if (strcmp(trace_event_name(&tk->tp.call), event) == 0 &&
274 		    strcmp(tk->tp.call.class->system, group) == 0)
275 			return tk;
276 	return NULL;
277 }
278 
279 static inline int __enable_trace_kprobe(struct trace_kprobe *tk)
280 {
281 	int ret = 0;
282 
283 	if (trace_probe_is_registered(&tk->tp) && !trace_kprobe_has_gone(tk)) {
284 		if (trace_kprobe_is_return(tk))
285 			ret = enable_kretprobe(&tk->rp);
286 		else
287 			ret = enable_kprobe(&tk->rp.kp);
288 	}
289 
290 	return ret;
291 }
292 
293 /*
294  * Enable trace_probe
295  * if the file is NULL, enable "perf" handler, or enable "trace" handler.
296  */
297 static int
298 enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
299 {
300 	struct event_file_link *link;
301 	int ret = 0;
302 
303 	if (file) {
304 		link = kmalloc(sizeof(*link), GFP_KERNEL);
305 		if (!link) {
306 			ret = -ENOMEM;
307 			goto out;
308 		}
309 
310 		link->file = file;
311 		list_add_tail_rcu(&link->list, &tk->tp.files);
312 
313 		tk->tp.flags |= TP_FLAG_TRACE;
314 		ret = __enable_trace_kprobe(tk);
315 		if (ret) {
316 			list_del_rcu(&link->list);
317 			kfree(link);
318 			tk->tp.flags &= ~TP_FLAG_TRACE;
319 		}
320 
321 	} else {
322 		tk->tp.flags |= TP_FLAG_PROFILE;
323 		ret = __enable_trace_kprobe(tk);
324 		if (ret)
325 			tk->tp.flags &= ~TP_FLAG_PROFILE;
326 	}
327  out:
328 	return ret;
329 }
330 
331 /*
332  * Disable trace_probe
333  * if the file is NULL, disable "perf" handler, or disable "trace" handler.
334  */
335 static int
336 disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
337 {
338 	struct event_file_link *link = NULL;
339 	int wait = 0;
340 	int ret = 0;
341 
342 	if (file) {
343 		link = find_event_file_link(&tk->tp, file);
344 		if (!link) {
345 			ret = -EINVAL;
346 			goto out;
347 		}
348 
349 		list_del_rcu(&link->list);
350 		wait = 1;
351 		if (!list_empty(&tk->tp.files))
352 			goto out;
353 
354 		tk->tp.flags &= ~TP_FLAG_TRACE;
355 	} else
356 		tk->tp.flags &= ~TP_FLAG_PROFILE;
357 
358 	if (!trace_probe_is_enabled(&tk->tp) && trace_probe_is_registered(&tk->tp)) {
359 		if (trace_kprobe_is_return(tk))
360 			disable_kretprobe(&tk->rp);
361 		else
362 			disable_kprobe(&tk->rp.kp);
363 		wait = 1;
364 	}
365 
366 	/*
367 	 * if tk is not added to any list, it must be a local trace_kprobe
368 	 * created with perf_event_open. We don't need to wait for these
369 	 * trace_kprobes
370 	 */
371 	if (list_empty(&tk->devent.list))
372 		wait = 0;
373  out:
374 	if (wait) {
375 		/*
376 		 * Synchronize with kprobe_trace_func/kretprobe_trace_func
377 		 * to ensure disabled (all running handlers are finished).
378 		 * This is not only for kfree(), but also the caller,
379 		 * trace_remove_event_call() supposes it for releasing
380 		 * event_call related objects, which will be accessed in
381 		 * the kprobe_trace_func/kretprobe_trace_func.
382 		 */
383 		synchronize_rcu();
384 		kfree(link);	/* Ignored if link == NULL */
385 	}
386 
387 	return ret;
388 }
389 
390 #if defined(CONFIG_KPROBES_ON_FTRACE) && \
391 	!defined(CONFIG_KPROBE_EVENTS_ON_NOTRACE)
392 static bool within_notrace_func(struct trace_kprobe *tk)
393 {
394 	unsigned long offset, size, addr;
395 
396 	addr = trace_kprobe_address(tk);
397 	if (!addr || !kallsyms_lookup_size_offset(addr, &size, &offset))
398 		return false;
399 
400 	/* Get the entry address of the target function */
401 	addr -= offset;
402 
403 	/*
404 	 * Since ftrace_location_range() does inclusive range check, we need
405 	 * to subtract 1 byte from the end address.
406 	 */
407 	return !ftrace_location_range(addr, addr + size - 1);
408 }
409 #else
410 #define within_notrace_func(tk)	(false)
411 #endif
412 
413 /* Internal register function - just handle k*probes and flags */
414 static int __register_trace_kprobe(struct trace_kprobe *tk)
415 {
416 	int i, ret;
417 
418 	if (trace_probe_is_registered(&tk->tp))
419 		return -EINVAL;
420 
421 	if (within_notrace_func(tk)) {
422 		pr_warn("Could not probe notrace function %s\n",
423 			trace_kprobe_symbol(tk));
424 		return -EINVAL;
425 	}
426 
427 	for (i = 0; i < tk->tp.nr_args; i++) {
428 		ret = traceprobe_update_arg(&tk->tp.args[i]);
429 		if (ret)
430 			return ret;
431 	}
432 
433 	/* Set/clear disabled flag according to tp->flag */
434 	if (trace_probe_is_enabled(&tk->tp))
435 		tk->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;
436 	else
437 		tk->rp.kp.flags |= KPROBE_FLAG_DISABLED;
438 
439 	if (trace_kprobe_is_return(tk))
440 		ret = register_kretprobe(&tk->rp);
441 	else
442 		ret = register_kprobe(&tk->rp.kp);
443 
444 	if (ret == 0)
445 		tk->tp.flags |= TP_FLAG_REGISTERED;
446 	return ret;
447 }
448 
449 /* Internal unregister function - just handle k*probes and flags */
450 static void __unregister_trace_kprobe(struct trace_kprobe *tk)
451 {
452 	if (trace_probe_is_registered(&tk->tp)) {
453 		if (trace_kprobe_is_return(tk))
454 			unregister_kretprobe(&tk->rp);
455 		else
456 			unregister_kprobe(&tk->rp.kp);
457 		tk->tp.flags &= ~TP_FLAG_REGISTERED;
458 		/* Cleanup kprobe for reuse */
459 		if (tk->rp.kp.symbol_name)
460 			tk->rp.kp.addr = NULL;
461 	}
462 }
463 
464 /* Unregister a trace_probe and probe_event */
465 static int unregister_trace_kprobe(struct trace_kprobe *tk)
466 {
467 	/* Enabled event can not be unregistered */
468 	if (trace_probe_is_enabled(&tk->tp))
469 		return -EBUSY;
470 
471 	/* Will fail if probe is being used by ftrace or perf */
472 	if (unregister_kprobe_event(tk))
473 		return -EBUSY;
474 
475 	__unregister_trace_kprobe(tk);
476 	dyn_event_remove(&tk->devent);
477 
478 	return 0;
479 }
480 
481 /* Register a trace_probe and probe_event */
482 static int register_trace_kprobe(struct trace_kprobe *tk)
483 {
484 	struct trace_kprobe *old_tk;
485 	int ret;
486 
487 	mutex_lock(&event_mutex);
488 
489 	/* Delete old (same name) event if exist */
490 	old_tk = find_trace_kprobe(trace_event_name(&tk->tp.call),
491 			tk->tp.call.class->system);
492 	if (old_tk) {
493 		ret = unregister_trace_kprobe(old_tk);
494 		if (ret < 0)
495 			goto end;
496 		free_trace_kprobe(old_tk);
497 	}
498 
499 	/* Register new event */
500 	ret = register_kprobe_event(tk);
501 	if (ret) {
502 		pr_warn("Failed to register probe event(%d)\n", ret);
503 		goto end;
504 	}
505 
506 	/* Register k*probe */
507 	ret = __register_trace_kprobe(tk);
508 	if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) {
509 		pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
510 		ret = 0;
511 	}
512 
513 	if (ret < 0)
514 		unregister_kprobe_event(tk);
515 	else
516 		dyn_event_add(&tk->devent);
517 
518 end:
519 	mutex_unlock(&event_mutex);
520 	return ret;
521 }
522 
523 /* Module notifier call back, checking event on the module */
524 static int trace_kprobe_module_callback(struct notifier_block *nb,
525 				       unsigned long val, void *data)
526 {
527 	struct module *mod = data;
528 	struct dyn_event *pos;
529 	struct trace_kprobe *tk;
530 	int ret;
531 
532 	if (val != MODULE_STATE_COMING)
533 		return NOTIFY_DONE;
534 
535 	/* Update probes on coming module */
536 	mutex_lock(&event_mutex);
537 	for_each_trace_kprobe(tk, pos) {
538 		if (trace_kprobe_within_module(tk, mod)) {
539 			/* Don't need to check busy - this should have gone. */
540 			__unregister_trace_kprobe(tk);
541 			ret = __register_trace_kprobe(tk);
542 			if (ret)
543 				pr_warn("Failed to re-register probe %s on %s: %d\n",
544 					trace_event_name(&tk->tp.call),
545 					mod->name, ret);
546 		}
547 	}
548 	mutex_unlock(&event_mutex);
549 
550 	return NOTIFY_DONE;
551 }
552 
553 static struct notifier_block trace_kprobe_module_nb = {
554 	.notifier_call = trace_kprobe_module_callback,
555 	.priority = 1	/* Invoked after kprobe module callback */
556 };
557 
558 /* Convert certain expected symbols into '_' when generating event names */
559 static inline void sanitize_event_name(char *name)
560 {
561 	while (*name++ != '\0')
562 		if (*name == ':' || *name == '.')
563 			*name = '_';
564 }
565 
566 static int trace_kprobe_create(int argc, const char *argv[])
567 {
568 	/*
569 	 * Argument syntax:
570 	 *  - Add kprobe:
571 	 *      p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS]
572 	 *  - Add kretprobe:
573 	 *      r[MAXACTIVE][:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS]
574 	 * Fetch args:
575 	 *  $retval	: fetch return value
576 	 *  $stack	: fetch stack address
577 	 *  $stackN	: fetch Nth of stack (N:0-)
578 	 *  $comm       : fetch current task comm
579 	 *  @ADDR	: fetch memory at ADDR (ADDR should be in kernel)
580 	 *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
581 	 *  %REG	: fetch register REG
582 	 * Dereferencing memory fetch:
583 	 *  +|-offs(ARG) : fetch memory at ARG +|- offs address.
584 	 * Alias name of args:
585 	 *  NAME=FETCHARG : set NAME as alias of FETCHARG.
586 	 * Type of args:
587 	 *  FETCHARG:TYPE : use TYPE instead of unsigned long.
588 	 */
589 	struct trace_kprobe *tk = NULL;
590 	int i, len, ret = 0;
591 	bool is_return = false;
592 	char *symbol = NULL, *tmp = NULL;
593 	const char *event = NULL, *group = KPROBE_EVENT_SYSTEM;
594 	int maxactive = 0;
595 	long offset = 0;
596 	void *addr = NULL;
597 	char buf[MAX_EVENT_NAME_LEN];
598 	unsigned int flags = TPARG_FL_KERNEL;
599 
600 	switch (argv[0][0]) {
601 	case 'r':
602 		is_return = true;
603 		flags |= TPARG_FL_RETURN;
604 		break;
605 	case 'p':
606 		break;
607 	default:
608 		return -ECANCELED;
609 	}
610 	if (argc < 2)
611 		return -ECANCELED;
612 
613 	trace_probe_log_init("trace_kprobe", argc, argv);
614 
615 	event = strchr(&argv[0][1], ':');
616 	if (event)
617 		event++;
618 
619 	if (isdigit(argv[0][1])) {
620 		if (!is_return) {
621 			trace_probe_log_err(1, MAXACT_NO_KPROBE);
622 			goto parse_error;
623 		}
624 		if (event)
625 			len = event - &argv[0][1] - 1;
626 		else
627 			len = strlen(&argv[0][1]);
628 		if (len > MAX_EVENT_NAME_LEN - 1) {
629 			trace_probe_log_err(1, BAD_MAXACT);
630 			goto parse_error;
631 		}
632 		memcpy(buf, &argv[0][1], len);
633 		buf[len] = '\0';
634 		ret = kstrtouint(buf, 0, &maxactive);
635 		if (ret || !maxactive) {
636 			trace_probe_log_err(1, BAD_MAXACT);
637 			goto parse_error;
638 		}
639 		/* kretprobes instances are iterated over via a list. The
640 		 * maximum should stay reasonable.
641 		 */
642 		if (maxactive > KRETPROBE_MAXACTIVE_MAX) {
643 			trace_probe_log_err(1, MAXACT_TOO_BIG);
644 			goto parse_error;
645 		}
646 	}
647 
648 	/* try to parse an address. if that fails, try to read the
649 	 * input as a symbol. */
650 	if (kstrtoul(argv[1], 0, (unsigned long *)&addr)) {
651 		trace_probe_log_set_index(1);
652 		/* Check whether uprobe event specified */
653 		if (strchr(argv[1], '/') && strchr(argv[1], ':')) {
654 			ret = -ECANCELED;
655 			goto error;
656 		}
657 		/* a symbol specified */
658 		symbol = kstrdup(argv[1], GFP_KERNEL);
659 		if (!symbol)
660 			return -ENOMEM;
661 		/* TODO: support .init module functions */
662 		ret = traceprobe_split_symbol_offset(symbol, &offset);
663 		if (ret || offset < 0 || offset > UINT_MAX) {
664 			trace_probe_log_err(0, BAD_PROBE_ADDR);
665 			goto parse_error;
666 		}
667 		if (kprobe_on_func_entry(NULL, symbol, offset))
668 			flags |= TPARG_FL_FENTRY;
669 		if (offset && is_return && !(flags & TPARG_FL_FENTRY)) {
670 			trace_probe_log_err(0, BAD_RETPROBE);
671 			goto parse_error;
672 		}
673 	}
674 
675 	trace_probe_log_set_index(0);
676 	if (event) {
677 		ret = traceprobe_parse_event_name(&event, &group, buf,
678 						  event - argv[0]);
679 		if (ret)
680 			goto parse_error;
681 	} else {
682 		/* Make a new event name */
683 		if (symbol)
684 			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
685 				 is_return ? 'r' : 'p', symbol, offset);
686 		else
687 			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
688 				 is_return ? 'r' : 'p', addr);
689 		sanitize_event_name(buf);
690 		event = buf;
691 	}
692 
693 	/* setup a probe */
694 	tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive,
695 			       argc - 2, is_return);
696 	if (IS_ERR(tk)) {
697 		ret = PTR_ERR(tk);
698 		/* This must return -ENOMEM, else there is a bug */
699 		WARN_ON_ONCE(ret != -ENOMEM);
700 		goto out;	/* We know tk is not allocated */
701 	}
702 	argc -= 2; argv += 2;
703 
704 	/* parse arguments */
705 	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
706 		tmp = kstrdup(argv[i], GFP_KERNEL);
707 		if (!tmp) {
708 			ret = -ENOMEM;
709 			goto error;
710 		}
711 
712 		trace_probe_log_set_index(i + 2);
713 		ret = traceprobe_parse_probe_arg(&tk->tp, i, tmp, flags);
714 		kfree(tmp);
715 		if (ret)
716 			goto error;	/* This can be -ENOMEM */
717 	}
718 
719 	ret = register_trace_kprobe(tk);
720 	if (ret) {
721 		trace_probe_log_set_index(1);
722 		if (ret == -EILSEQ)
723 			trace_probe_log_err(0, BAD_INSN_BNDRY);
724 		else if (ret == -ENOENT)
725 			trace_probe_log_err(0, BAD_PROBE_ADDR);
726 		else if (ret != -ENOMEM)
727 			trace_probe_log_err(0, FAIL_REG_PROBE);
728 		goto error;
729 	}
730 
731 out:
732 	trace_probe_log_clear();
733 	kfree(symbol);
734 	return ret;
735 
736 parse_error:
737 	ret = -EINVAL;
738 error:
739 	free_trace_kprobe(tk);
740 	goto out;
741 }
742 
743 static int create_or_delete_trace_kprobe(int argc, char **argv)
744 {
745 	int ret;
746 
747 	if (argv[0][0] == '-')
748 		return dyn_event_release(argc, argv, &trace_kprobe_ops);
749 
750 	ret = trace_kprobe_create(argc, (const char **)argv);
751 	return ret == -ECANCELED ? -EINVAL : ret;
752 }
753 
754 static int trace_kprobe_release(struct dyn_event *ev)
755 {
756 	struct trace_kprobe *tk = to_trace_kprobe(ev);
757 	int ret = unregister_trace_kprobe(tk);
758 
759 	if (!ret)
760 		free_trace_kprobe(tk);
761 	return ret;
762 }
763 
764 static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev)
765 {
766 	struct trace_kprobe *tk = to_trace_kprobe(ev);
767 	int i;
768 
769 	seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p');
770 	seq_printf(m, ":%s/%s", tk->tp.call.class->system,
771 			trace_event_name(&tk->tp.call));
772 
773 	if (!tk->symbol)
774 		seq_printf(m, " 0x%p", tk->rp.kp.addr);
775 	else if (tk->rp.kp.offset)
776 		seq_printf(m, " %s+%u", trace_kprobe_symbol(tk),
777 			   tk->rp.kp.offset);
778 	else
779 		seq_printf(m, " %s", trace_kprobe_symbol(tk));
780 
781 	for (i = 0; i < tk->tp.nr_args; i++)
782 		seq_printf(m, " %s=%s", tk->tp.args[i].name, tk->tp.args[i].comm);
783 	seq_putc(m, '\n');
784 
785 	return 0;
786 }
787 
788 static int probes_seq_show(struct seq_file *m, void *v)
789 {
790 	struct dyn_event *ev = v;
791 
792 	if (!is_trace_kprobe(ev))
793 		return 0;
794 
795 	return trace_kprobe_show(m, ev);
796 }
797 
798 static const struct seq_operations probes_seq_op = {
799 	.start  = dyn_event_seq_start,
800 	.next   = dyn_event_seq_next,
801 	.stop   = dyn_event_seq_stop,
802 	.show   = probes_seq_show
803 };
804 
805 static int probes_open(struct inode *inode, struct file *file)
806 {
807 	int ret;
808 
809 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
810 		ret = dyn_events_release_all(&trace_kprobe_ops);
811 		if (ret < 0)
812 			return ret;
813 	}
814 
815 	return seq_open(file, &probes_seq_op);
816 }
817 
818 static ssize_t probes_write(struct file *file, const char __user *buffer,
819 			    size_t count, loff_t *ppos)
820 {
821 	return trace_parse_run_command(file, buffer, count, ppos,
822 				       create_or_delete_trace_kprobe);
823 }
824 
825 static const struct file_operations kprobe_events_ops = {
826 	.owner          = THIS_MODULE,
827 	.open           = probes_open,
828 	.read           = seq_read,
829 	.llseek         = seq_lseek,
830 	.release        = seq_release,
831 	.write		= probes_write,
832 };
833 
834 /* Probes profiling interfaces */
835 static int probes_profile_seq_show(struct seq_file *m, void *v)
836 {
837 	struct dyn_event *ev = v;
838 	struct trace_kprobe *tk;
839 
840 	if (!is_trace_kprobe(ev))
841 		return 0;
842 
843 	tk = to_trace_kprobe(ev);
844 	seq_printf(m, "  %-44s %15lu %15lu\n",
845 		   trace_event_name(&tk->tp.call),
846 		   trace_kprobe_nhit(tk),
847 		   tk->rp.kp.nmissed);
848 
849 	return 0;
850 }
851 
852 static const struct seq_operations profile_seq_op = {
853 	.start  = dyn_event_seq_start,
854 	.next   = dyn_event_seq_next,
855 	.stop   = dyn_event_seq_stop,
856 	.show   = probes_profile_seq_show
857 };
858 
859 static int profile_open(struct inode *inode, struct file *file)
860 {
861 	return seq_open(file, &profile_seq_op);
862 }
863 
864 static const struct file_operations kprobe_profile_ops = {
865 	.owner          = THIS_MODULE,
866 	.open           = profile_open,
867 	.read           = seq_read,
868 	.llseek         = seq_lseek,
869 	.release        = seq_release,
870 };
871 
872 /* Kprobe specific fetch functions */
873 
874 /* Return the length of string -- including null terminal byte */
875 static nokprobe_inline int
876 fetch_store_strlen(unsigned long addr)
877 {
878 	int ret, len = 0;
879 	u8 c;
880 
881 	do {
882 		ret = probe_kernel_read(&c, (u8 *)addr + len, 1);
883 		len++;
884 	} while (c && ret == 0 && len < MAX_STRING_SIZE);
885 
886 	return (ret < 0) ? ret : len;
887 }
888 
889 /*
890  * Fetch a null-terminated string. Caller MUST set *(u32 *)buf with max
891  * length and relative data location.
892  */
893 static nokprobe_inline int
894 fetch_store_string(unsigned long addr, void *dest, void *base)
895 {
896 	int maxlen = get_loc_len(*(u32 *)dest);
897 	u8 *dst = get_loc_data(dest, base);
898 	long ret;
899 
900 	if (unlikely(!maxlen))
901 		return -ENOMEM;
902 	/*
903 	 * Try to get string again, since the string can be changed while
904 	 * probing.
905 	 */
906 	ret = strncpy_from_unsafe(dst, (void *)addr, maxlen);
907 
908 	if (ret >= 0)
909 		*(u32 *)dest = make_data_loc(ret, (void *)dst - base);
910 	return ret;
911 }
912 
913 static nokprobe_inline int
914 probe_mem_read(void *dest, void *src, size_t size)
915 {
916 	return probe_kernel_read(dest, src, size);
917 }
918 
919 /* Note that we don't verify it, since the code does not come from user space */
920 static int
921 process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest,
922 		   void *base)
923 {
924 	unsigned long val;
925 
926 retry:
927 	/* 1st stage: get value from context */
928 	switch (code->op) {
929 	case FETCH_OP_REG:
930 		val = regs_get_register(regs, code->param);
931 		break;
932 	case FETCH_OP_STACK:
933 		val = regs_get_kernel_stack_nth(regs, code->param);
934 		break;
935 	case FETCH_OP_STACKP:
936 		val = kernel_stack_pointer(regs);
937 		break;
938 	case FETCH_OP_RETVAL:
939 		val = regs_return_value(regs);
940 		break;
941 	case FETCH_OP_IMM:
942 		val = code->immediate;
943 		break;
944 	case FETCH_OP_COMM:
945 		val = (unsigned long)current->comm;
946 		break;
947 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
948 	case FETCH_OP_ARG:
949 		val = regs_get_kernel_argument(regs, code->param);
950 		break;
951 #endif
952 	case FETCH_NOP_SYMBOL:	/* Ignore a place holder */
953 		code++;
954 		goto retry;
955 	default:
956 		return -EILSEQ;
957 	}
958 	code++;
959 
960 	return process_fetch_insn_bottom(code, val, dest, base);
961 }
962 NOKPROBE_SYMBOL(process_fetch_insn)
963 
964 /* Kprobe handler */
965 static nokprobe_inline void
966 __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
967 		    struct trace_event_file *trace_file)
968 {
969 	struct kprobe_trace_entry_head *entry;
970 	struct ring_buffer_event *event;
971 	struct ring_buffer *buffer;
972 	int size, dsize, pc;
973 	unsigned long irq_flags;
974 	struct trace_event_call *call = &tk->tp.call;
975 
976 	WARN_ON(call != trace_file->event_call);
977 
978 	if (trace_trigger_soft_disabled(trace_file))
979 		return;
980 
981 	local_save_flags(irq_flags);
982 	pc = preempt_count();
983 
984 	dsize = __get_data_size(&tk->tp, regs);
985 	size = sizeof(*entry) + tk->tp.size + dsize;
986 
987 	event = trace_event_buffer_lock_reserve(&buffer, trace_file,
988 						call->event.type,
989 						size, irq_flags, pc);
990 	if (!event)
991 		return;
992 
993 	entry = ring_buffer_event_data(event);
994 	entry->ip = (unsigned long)tk->rp.kp.addr;
995 	store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
996 
997 	event_trigger_unlock_commit_regs(trace_file, buffer, event,
998 					 entry, irq_flags, pc, regs);
999 }
1000 
1001 static void
1002 kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs)
1003 {
1004 	struct event_file_link *link;
1005 
1006 	list_for_each_entry_rcu(link, &tk->tp.files, list)
1007 		__kprobe_trace_func(tk, regs, link->file);
1008 }
1009 NOKPROBE_SYMBOL(kprobe_trace_func);
1010 
1011 /* Kretprobe handler */
1012 static nokprobe_inline void
1013 __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1014 		       struct pt_regs *regs,
1015 		       struct trace_event_file *trace_file)
1016 {
1017 	struct kretprobe_trace_entry_head *entry;
1018 	struct ring_buffer_event *event;
1019 	struct ring_buffer *buffer;
1020 	int size, pc, dsize;
1021 	unsigned long irq_flags;
1022 	struct trace_event_call *call = &tk->tp.call;
1023 
1024 	WARN_ON(call != trace_file->event_call);
1025 
1026 	if (trace_trigger_soft_disabled(trace_file))
1027 		return;
1028 
1029 	local_save_flags(irq_flags);
1030 	pc = preempt_count();
1031 
1032 	dsize = __get_data_size(&tk->tp, regs);
1033 	size = sizeof(*entry) + tk->tp.size + dsize;
1034 
1035 	event = trace_event_buffer_lock_reserve(&buffer, trace_file,
1036 						call->event.type,
1037 						size, irq_flags, pc);
1038 	if (!event)
1039 		return;
1040 
1041 	entry = ring_buffer_event_data(event);
1042 	entry->func = (unsigned long)tk->rp.kp.addr;
1043 	entry->ret_ip = (unsigned long)ri->ret_addr;
1044 	store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
1045 
1046 	event_trigger_unlock_commit_regs(trace_file, buffer, event,
1047 					 entry, irq_flags, pc, regs);
1048 }
1049 
1050 static void
1051 kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1052 		     struct pt_regs *regs)
1053 {
1054 	struct event_file_link *link;
1055 
1056 	list_for_each_entry_rcu(link, &tk->tp.files, list)
1057 		__kretprobe_trace_func(tk, ri, regs, link->file);
1058 }
1059 NOKPROBE_SYMBOL(kretprobe_trace_func);
1060 
1061 /* Event entry printers */
1062 static enum print_line_t
1063 print_kprobe_event(struct trace_iterator *iter, int flags,
1064 		   struct trace_event *event)
1065 {
1066 	struct kprobe_trace_entry_head *field;
1067 	struct trace_seq *s = &iter->seq;
1068 	struct trace_probe *tp;
1069 
1070 	field = (struct kprobe_trace_entry_head *)iter->ent;
1071 	tp = container_of(event, struct trace_probe, call.event);
1072 
1073 	trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
1074 
1075 	if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
1076 		goto out;
1077 
1078 	trace_seq_putc(s, ')');
1079 
1080 	if (print_probe_args(s, tp->args, tp->nr_args,
1081 			     (u8 *)&field[1], field) < 0)
1082 		goto out;
1083 
1084 	trace_seq_putc(s, '\n');
1085  out:
1086 	return trace_handle_return(s);
1087 }
1088 
1089 static enum print_line_t
1090 print_kretprobe_event(struct trace_iterator *iter, int flags,
1091 		      struct trace_event *event)
1092 {
1093 	struct kretprobe_trace_entry_head *field;
1094 	struct trace_seq *s = &iter->seq;
1095 	struct trace_probe *tp;
1096 
1097 	field = (struct kretprobe_trace_entry_head *)iter->ent;
1098 	tp = container_of(event, struct trace_probe, call.event);
1099 
1100 	trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
1101 
1102 	if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1103 		goto out;
1104 
1105 	trace_seq_puts(s, " <- ");
1106 
1107 	if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1108 		goto out;
1109 
1110 	trace_seq_putc(s, ')');
1111 
1112 	if (print_probe_args(s, tp->args, tp->nr_args,
1113 			     (u8 *)&field[1], field) < 0)
1114 		goto out;
1115 
1116 	trace_seq_putc(s, '\n');
1117 
1118  out:
1119 	return trace_handle_return(s);
1120 }
1121 
1122 
1123 static int kprobe_event_define_fields(struct trace_event_call *event_call)
1124 {
1125 	int ret;
1126 	struct kprobe_trace_entry_head field;
1127 	struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;
1128 
1129 	DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1130 
1131 	return traceprobe_define_arg_fields(event_call, sizeof(field), &tk->tp);
1132 }
1133 
1134 static int kretprobe_event_define_fields(struct trace_event_call *event_call)
1135 {
1136 	int ret;
1137 	struct kretprobe_trace_entry_head field;
1138 	struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;
1139 
1140 	DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1141 	DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1142 
1143 	return traceprobe_define_arg_fields(event_call, sizeof(field), &tk->tp);
1144 }
1145 
1146 #ifdef CONFIG_PERF_EVENTS
1147 
1148 /* Kprobe profile handler */
1149 static int
1150 kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
1151 {
1152 	struct trace_event_call *call = &tk->tp.call;
1153 	struct kprobe_trace_entry_head *entry;
1154 	struct hlist_head *head;
1155 	int size, __size, dsize;
1156 	int rctx;
1157 
1158 	if (bpf_prog_array_valid(call)) {
1159 		unsigned long orig_ip = instruction_pointer(regs);
1160 		int ret;
1161 
1162 		ret = trace_call_bpf(call, regs);
1163 
1164 		/*
1165 		 * We need to check and see if we modified the pc of the
1166 		 * pt_regs, and if so return 1 so that we don't do the
1167 		 * single stepping.
1168 		 */
1169 		if (orig_ip != instruction_pointer(regs))
1170 			return 1;
1171 		if (!ret)
1172 			return 0;
1173 	}
1174 
1175 	head = this_cpu_ptr(call->perf_events);
1176 	if (hlist_empty(head))
1177 		return 0;
1178 
1179 	dsize = __get_data_size(&tk->tp, regs);
1180 	__size = sizeof(*entry) + tk->tp.size + dsize;
1181 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
1182 	size -= sizeof(u32);
1183 
1184 	entry = perf_trace_buf_alloc(size, NULL, &rctx);
1185 	if (!entry)
1186 		return 0;
1187 
1188 	entry->ip = (unsigned long)tk->rp.kp.addr;
1189 	memset(&entry[1], 0, dsize);
1190 	store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
1191 	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
1192 			      head, NULL);
1193 	return 0;
1194 }
1195 NOKPROBE_SYMBOL(kprobe_perf_func);
1196 
1197 /* Kretprobe profile handler */
1198 static void
1199 kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1200 		    struct pt_regs *regs)
1201 {
1202 	struct trace_event_call *call = &tk->tp.call;
1203 	struct kretprobe_trace_entry_head *entry;
1204 	struct hlist_head *head;
1205 	int size, __size, dsize;
1206 	int rctx;
1207 
1208 	if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
1209 		return;
1210 
1211 	head = this_cpu_ptr(call->perf_events);
1212 	if (hlist_empty(head))
1213 		return;
1214 
1215 	dsize = __get_data_size(&tk->tp, regs);
1216 	__size = sizeof(*entry) + tk->tp.size + dsize;
1217 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
1218 	size -= sizeof(u32);
1219 
1220 	entry = perf_trace_buf_alloc(size, NULL, &rctx);
1221 	if (!entry)
1222 		return;
1223 
1224 	entry->func = (unsigned long)tk->rp.kp.addr;
1225 	entry->ret_ip = (unsigned long)ri->ret_addr;
1226 	store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
1227 	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
1228 			      head, NULL);
1229 }
1230 NOKPROBE_SYMBOL(kretprobe_perf_func);
1231 
1232 int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
1233 			const char **symbol, u64 *probe_offset,
1234 			u64 *probe_addr, bool perf_type_tracepoint)
1235 {
1236 	const char *pevent = trace_event_name(event->tp_event);
1237 	const char *group = event->tp_event->class->system;
1238 	struct trace_kprobe *tk;
1239 
1240 	if (perf_type_tracepoint)
1241 		tk = find_trace_kprobe(pevent, group);
1242 	else
1243 		tk = event->tp_event->data;
1244 	if (!tk)
1245 		return -EINVAL;
1246 
1247 	*fd_type = trace_kprobe_is_return(tk) ? BPF_FD_TYPE_KRETPROBE
1248 					      : BPF_FD_TYPE_KPROBE;
1249 	if (tk->symbol) {
1250 		*symbol = tk->symbol;
1251 		*probe_offset = tk->rp.kp.offset;
1252 		*probe_addr = 0;
1253 	} else {
1254 		*symbol = NULL;
1255 		*probe_offset = 0;
1256 		*probe_addr = (unsigned long)tk->rp.kp.addr;
1257 	}
1258 	return 0;
1259 }
1260 #endif	/* CONFIG_PERF_EVENTS */
1261 
1262 /*
1263  * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex.
1264  *
1265  * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
1266  * lockless, but we can't race with this __init function.
1267  */
1268 static int kprobe_register(struct trace_event_call *event,
1269 			   enum trace_reg type, void *data)
1270 {
1271 	struct trace_kprobe *tk = (struct trace_kprobe *)event->data;
1272 	struct trace_event_file *file = data;
1273 
1274 	switch (type) {
1275 	case TRACE_REG_REGISTER:
1276 		return enable_trace_kprobe(tk, file);
1277 	case TRACE_REG_UNREGISTER:
1278 		return disable_trace_kprobe(tk, file);
1279 
1280 #ifdef CONFIG_PERF_EVENTS
1281 	case TRACE_REG_PERF_REGISTER:
1282 		return enable_trace_kprobe(tk, NULL);
1283 	case TRACE_REG_PERF_UNREGISTER:
1284 		return disable_trace_kprobe(tk, NULL);
1285 	case TRACE_REG_PERF_OPEN:
1286 	case TRACE_REG_PERF_CLOSE:
1287 	case TRACE_REG_PERF_ADD:
1288 	case TRACE_REG_PERF_DEL:
1289 		return 0;
1290 #endif
1291 	}
1292 	return 0;
1293 }
1294 
1295 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1296 {
1297 	struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
1298 	int ret = 0;
1299 
1300 	raw_cpu_inc(*tk->nhit);
1301 
1302 	if (tk->tp.flags & TP_FLAG_TRACE)
1303 		kprobe_trace_func(tk, regs);
1304 #ifdef CONFIG_PERF_EVENTS
1305 	if (tk->tp.flags & TP_FLAG_PROFILE)
1306 		ret = kprobe_perf_func(tk, regs);
1307 #endif
1308 	return ret;
1309 }
1310 NOKPROBE_SYMBOL(kprobe_dispatcher);
1311 
1312 static int
1313 kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1314 {
1315 	struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
1316 
1317 	raw_cpu_inc(*tk->nhit);
1318 
1319 	if (tk->tp.flags & TP_FLAG_TRACE)
1320 		kretprobe_trace_func(tk, ri, regs);
1321 #ifdef CONFIG_PERF_EVENTS
1322 	if (tk->tp.flags & TP_FLAG_PROFILE)
1323 		kretprobe_perf_func(tk, ri, regs);
1324 #endif
1325 	return 0;	/* We don't tweek kernel, so just return 0 */
1326 }
1327 NOKPROBE_SYMBOL(kretprobe_dispatcher);
1328 
1329 static struct trace_event_functions kretprobe_funcs = {
1330 	.trace		= print_kretprobe_event
1331 };
1332 
1333 static struct trace_event_functions kprobe_funcs = {
1334 	.trace		= print_kprobe_event
1335 };
1336 
1337 static inline void init_trace_event_call(struct trace_kprobe *tk,
1338 					 struct trace_event_call *call)
1339 {
1340 	INIT_LIST_HEAD(&call->class->fields);
1341 	if (trace_kprobe_is_return(tk)) {
1342 		call->event.funcs = &kretprobe_funcs;
1343 		call->class->define_fields = kretprobe_event_define_fields;
1344 	} else {
1345 		call->event.funcs = &kprobe_funcs;
1346 		call->class->define_fields = kprobe_event_define_fields;
1347 	}
1348 
1349 	call->flags = TRACE_EVENT_FL_KPROBE;
1350 	call->class->reg = kprobe_register;
1351 	call->data = tk;
1352 }
1353 
1354 static int register_kprobe_event(struct trace_kprobe *tk)
1355 {
1356 	struct trace_event_call *call = &tk->tp.call;
1357 	int ret = 0;
1358 
1359 	init_trace_event_call(tk, call);
1360 
1361 	if (traceprobe_set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
1362 		return -ENOMEM;
1363 	ret = register_trace_event(&call->event);
1364 	if (!ret) {
1365 		kfree(call->print_fmt);
1366 		return -ENODEV;
1367 	}
1368 	ret = trace_add_event_call(call);
1369 	if (ret) {
1370 		pr_info("Failed to register kprobe event: %s\n",
1371 			trace_event_name(call));
1372 		kfree(call->print_fmt);
1373 		unregister_trace_event(&call->event);
1374 	}
1375 	return ret;
1376 }
1377 
1378 static int unregister_kprobe_event(struct trace_kprobe *tk)
1379 {
1380 	int ret;
1381 
1382 	/* tp->event is unregistered in trace_remove_event_call() */
1383 	ret = trace_remove_event_call(&tk->tp.call);
1384 	if (!ret)
1385 		kfree(tk->tp.call.print_fmt);
1386 	return ret;
1387 }
1388 
1389 #ifdef CONFIG_PERF_EVENTS
1390 /* create a trace_kprobe, but don't add it to global lists */
1391 struct trace_event_call *
1392 create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
1393 			  bool is_return)
1394 {
1395 	struct trace_kprobe *tk;
1396 	int ret;
1397 	char *event;
1398 
1399 	/*
1400 	 * local trace_kprobes are not added to dyn_event, so they are never
1401 	 * searched in find_trace_kprobe(). Therefore, there is no concern of
1402 	 * duplicated name here.
1403 	 */
1404 	event = func ? func : "DUMMY_EVENT";
1405 
1406 	tk = alloc_trace_kprobe(KPROBE_EVENT_SYSTEM, event, (void *)addr, func,
1407 				offs, 0 /* maxactive */, 0 /* nargs */,
1408 				is_return);
1409 
1410 	if (IS_ERR(tk)) {
1411 		pr_info("Failed to allocate trace_probe.(%d)\n",
1412 			(int)PTR_ERR(tk));
1413 		return ERR_CAST(tk);
1414 	}
1415 
1416 	init_trace_event_call(tk, &tk->tp.call);
1417 
1418 	if (traceprobe_set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) {
1419 		ret = -ENOMEM;
1420 		goto error;
1421 	}
1422 
1423 	ret = __register_trace_kprobe(tk);
1424 	if (ret < 0) {
1425 		kfree(tk->tp.call.print_fmt);
1426 		goto error;
1427 	}
1428 
1429 	return &tk->tp.call;
1430 error:
1431 	free_trace_kprobe(tk);
1432 	return ERR_PTR(ret);
1433 }
1434 
1435 void destroy_local_trace_kprobe(struct trace_event_call *event_call)
1436 {
1437 	struct trace_kprobe *tk;
1438 
1439 	tk = container_of(event_call, struct trace_kprobe, tp.call);
1440 
1441 	if (trace_probe_is_enabled(&tk->tp)) {
1442 		WARN_ON(1);
1443 		return;
1444 	}
1445 
1446 	__unregister_trace_kprobe(tk);
1447 
1448 	kfree(tk->tp.call.print_fmt);
1449 	free_trace_kprobe(tk);
1450 }
1451 #endif /* CONFIG_PERF_EVENTS */
1452 
1453 /* Make a tracefs interface for controlling probe points */
1454 static __init int init_kprobe_trace(void)
1455 {
1456 	struct dentry *d_tracer;
1457 	struct dentry *entry;
1458 	int ret;
1459 
1460 	ret = dyn_event_register(&trace_kprobe_ops);
1461 	if (ret)
1462 		return ret;
1463 
1464 	if (register_module_notifier(&trace_kprobe_module_nb))
1465 		return -EINVAL;
1466 
1467 	d_tracer = tracing_init_dentry();
1468 	if (IS_ERR(d_tracer))
1469 		return 0;
1470 
1471 	entry = tracefs_create_file("kprobe_events", 0644, d_tracer,
1472 				    NULL, &kprobe_events_ops);
1473 
1474 	/* Event list interface */
1475 	if (!entry)
1476 		pr_warn("Could not create tracefs 'kprobe_events' entry\n");
1477 
1478 	/* Profile interface */
1479 	entry = tracefs_create_file("kprobe_profile", 0444, d_tracer,
1480 				    NULL, &kprobe_profile_ops);
1481 
1482 	if (!entry)
1483 		pr_warn("Could not create tracefs 'kprobe_profile' entry\n");
1484 	return 0;
1485 }
1486 fs_initcall(init_kprobe_trace);
1487 
1488 
1489 #ifdef CONFIG_FTRACE_STARTUP_TEST
1490 static __init struct trace_event_file *
1491 find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr)
1492 {
1493 	struct trace_event_file *file;
1494 
1495 	list_for_each_entry(file, &tr->events, list)
1496 		if (file->event_call == &tk->tp.call)
1497 			return file;
1498 
1499 	return NULL;
1500 }
1501 
1502 /*
1503  * Nobody but us can call enable_trace_kprobe/disable_trace_kprobe at this
1504  * stage, we can do this lockless.
1505  */
1506 static __init int kprobe_trace_self_tests_init(void)
1507 {
1508 	int ret, warn = 0;
1509 	int (*target)(int, int, int, int, int, int);
1510 	struct trace_kprobe *tk;
1511 	struct trace_event_file *file;
1512 
1513 	if (tracing_is_disabled())
1514 		return -ENODEV;
1515 
1516 	target = kprobe_trace_selftest_target;
1517 
1518 	pr_info("Testing kprobe tracing: ");
1519 
1520 	ret = trace_run_command("p:testprobe kprobe_trace_selftest_target $stack $stack0 +0($stack)",
1521 				create_or_delete_trace_kprobe);
1522 	if (WARN_ON_ONCE(ret)) {
1523 		pr_warn("error on probing function entry.\n");
1524 		warn++;
1525 	} else {
1526 		/* Enable trace point */
1527 		tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
1528 		if (WARN_ON_ONCE(tk == NULL)) {
1529 			pr_warn("error on getting new probe.\n");
1530 			warn++;
1531 		} else {
1532 			file = find_trace_probe_file(tk, top_trace_array());
1533 			if (WARN_ON_ONCE(file == NULL)) {
1534 				pr_warn("error on getting probe file.\n");
1535 				warn++;
1536 			} else
1537 				enable_trace_kprobe(tk, file);
1538 		}
1539 	}
1540 
1541 	ret = trace_run_command("r:testprobe2 kprobe_trace_selftest_target $retval",
1542 				create_or_delete_trace_kprobe);
1543 	if (WARN_ON_ONCE(ret)) {
1544 		pr_warn("error on probing function return.\n");
1545 		warn++;
1546 	} else {
1547 		/* Enable trace point */
1548 		tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
1549 		if (WARN_ON_ONCE(tk == NULL)) {
1550 			pr_warn("error on getting 2nd new probe.\n");
1551 			warn++;
1552 		} else {
1553 			file = find_trace_probe_file(tk, top_trace_array());
1554 			if (WARN_ON_ONCE(file == NULL)) {
1555 				pr_warn("error on getting probe file.\n");
1556 				warn++;
1557 			} else
1558 				enable_trace_kprobe(tk, file);
1559 		}
1560 	}
1561 
1562 	if (warn)
1563 		goto end;
1564 
1565 	ret = target(1, 2, 3, 4, 5, 6);
1566 
1567 	/*
1568 	 * Not expecting an error here, the check is only to prevent the
1569 	 * optimizer from removing the call to target() as otherwise there
1570 	 * are no side-effects and the call is never performed.
1571 	 */
1572 	if (ret != 21)
1573 		warn++;
1574 
1575 	/* Disable trace points before removing it */
1576 	tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
1577 	if (WARN_ON_ONCE(tk == NULL)) {
1578 		pr_warn("error on getting test probe.\n");
1579 		warn++;
1580 	} else {
1581 		if (trace_kprobe_nhit(tk) != 1) {
1582 			pr_warn("incorrect number of testprobe hits\n");
1583 			warn++;
1584 		}
1585 
1586 		file = find_trace_probe_file(tk, top_trace_array());
1587 		if (WARN_ON_ONCE(file == NULL)) {
1588 			pr_warn("error on getting probe file.\n");
1589 			warn++;
1590 		} else
1591 			disable_trace_kprobe(tk, file);
1592 	}
1593 
1594 	tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
1595 	if (WARN_ON_ONCE(tk == NULL)) {
1596 		pr_warn("error on getting 2nd test probe.\n");
1597 		warn++;
1598 	} else {
1599 		if (trace_kprobe_nhit(tk) != 1) {
1600 			pr_warn("incorrect number of testprobe2 hits\n");
1601 			warn++;
1602 		}
1603 
1604 		file = find_trace_probe_file(tk, top_trace_array());
1605 		if (WARN_ON_ONCE(file == NULL)) {
1606 			pr_warn("error on getting probe file.\n");
1607 			warn++;
1608 		} else
1609 			disable_trace_kprobe(tk, file);
1610 	}
1611 
1612 	ret = trace_run_command("-:testprobe", create_or_delete_trace_kprobe);
1613 	if (WARN_ON_ONCE(ret)) {
1614 		pr_warn("error on deleting a probe.\n");
1615 		warn++;
1616 	}
1617 
1618 	ret = trace_run_command("-:testprobe2", create_or_delete_trace_kprobe);
1619 	if (WARN_ON_ONCE(ret)) {
1620 		pr_warn("error on deleting a probe.\n");
1621 		warn++;
1622 	}
1623 
1624 end:
1625 	ret = dyn_events_release_all(&trace_kprobe_ops);
1626 	if (WARN_ON_ONCE(ret)) {
1627 		pr_warn("error on cleaning up probes.\n");
1628 		warn++;
1629 	}
1630 	/*
1631 	 * Wait for the optimizer work to finish. Otherwise it might fiddle
1632 	 * with probes in already freed __init text.
1633 	 */
1634 	wait_for_kprobe_optimizer();
1635 	if (warn)
1636 		pr_cont("NG: Some tests are failed. Please check them.\n");
1637 	else
1638 		pr_cont("OK\n");
1639 	return 0;
1640 }
1641 
1642 late_initcall(kprobe_trace_self_tests_init);
1643 
1644 #endif
1645