xref: /openbmc/linux/kernel/trace/trace_kprobe.c (revision 861e10be)
1 /*
2  * Kprobes-based tracing events
3  *
4  * Created by Masami Hiramatsu <mhiramat@redhat.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  */
19 
20 #include <linux/module.h>
21 #include <linux/uaccess.h>
22 
23 #include "trace_probe.h"
24 
25 #define KPROBE_EVENT_SYSTEM "kprobes"
26 
27 /**
28  * Kprobe event core functions
29  */
30 
31 struct trace_probe {
32 	struct list_head	list;
33 	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
34 	unsigned long 		nhit;
35 	unsigned int		flags;	/* For TP_FLAG_* */
36 	const char		*symbol;	/* symbol name */
37 	struct ftrace_event_class	class;
38 	struct ftrace_event_call	call;
39 	ssize_t			size;		/* trace entry size */
40 	unsigned int		nr_args;
41 	struct probe_arg	args[];
42 };
43 
44 #define SIZEOF_TRACE_PROBE(n)			\
45 	(offsetof(struct trace_probe, args) +	\
46 	(sizeof(struct probe_arg) * (n)))
47 
48 
49 static __kprobes int trace_probe_is_return(struct trace_probe *tp)
50 {
51 	return tp->rp.handler != NULL;
52 }
53 
54 static __kprobes const char *trace_probe_symbol(struct trace_probe *tp)
55 {
56 	return tp->symbol ? tp->symbol : "unknown";
57 }
58 
59 static __kprobes unsigned long trace_probe_offset(struct trace_probe *tp)
60 {
61 	return tp->rp.kp.offset;
62 }
63 
64 static __kprobes bool trace_probe_is_enabled(struct trace_probe *tp)
65 {
66 	return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE));
67 }
68 
69 static __kprobes bool trace_probe_is_registered(struct trace_probe *tp)
70 {
71 	return !!(tp->flags & TP_FLAG_REGISTERED);
72 }
73 
74 static __kprobes bool trace_probe_has_gone(struct trace_probe *tp)
75 {
76 	return !!(kprobe_gone(&tp->rp.kp));
77 }
78 
79 static __kprobes bool trace_probe_within_module(struct trace_probe *tp,
80 						struct module *mod)
81 {
82 	int len = strlen(mod->name);
83 	const char *name = trace_probe_symbol(tp);
84 	return strncmp(mod->name, name, len) == 0 && name[len] == ':';
85 }
86 
87 static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp)
88 {
89 	return !!strchr(trace_probe_symbol(tp), ':');
90 }
91 
92 static int register_probe_event(struct trace_probe *tp);
93 static void unregister_probe_event(struct trace_probe *tp);
94 
95 static DEFINE_MUTEX(probe_lock);
96 static LIST_HEAD(probe_list);
97 
98 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
99 static int kretprobe_dispatcher(struct kretprobe_instance *ri,
100 				struct pt_regs *regs);
101 
102 /*
103  * Allocate new trace_probe and initialize it (including kprobes).
104  */
105 static struct trace_probe *alloc_trace_probe(const char *group,
106 					     const char *event,
107 					     void *addr,
108 					     const char *symbol,
109 					     unsigned long offs,
110 					     int nargs, bool is_return)
111 {
112 	struct trace_probe *tp;
113 	int ret = -ENOMEM;
114 
115 	tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
116 	if (!tp)
117 		return ERR_PTR(ret);
118 
119 	if (symbol) {
120 		tp->symbol = kstrdup(symbol, GFP_KERNEL);
121 		if (!tp->symbol)
122 			goto error;
123 		tp->rp.kp.symbol_name = tp->symbol;
124 		tp->rp.kp.offset = offs;
125 	} else
126 		tp->rp.kp.addr = addr;
127 
128 	if (is_return)
129 		tp->rp.handler = kretprobe_dispatcher;
130 	else
131 		tp->rp.kp.pre_handler = kprobe_dispatcher;
132 
133 	if (!event || !is_good_name(event)) {
134 		ret = -EINVAL;
135 		goto error;
136 	}
137 
138 	tp->call.class = &tp->class;
139 	tp->call.name = kstrdup(event, GFP_KERNEL);
140 	if (!tp->call.name)
141 		goto error;
142 
143 	if (!group || !is_good_name(group)) {
144 		ret = -EINVAL;
145 		goto error;
146 	}
147 
148 	tp->class.system = kstrdup(group, GFP_KERNEL);
149 	if (!tp->class.system)
150 		goto error;
151 
152 	INIT_LIST_HEAD(&tp->list);
153 	return tp;
154 error:
155 	kfree(tp->call.name);
156 	kfree(tp->symbol);
157 	kfree(tp);
158 	return ERR_PTR(ret);
159 }
160 
161 static void free_trace_probe(struct trace_probe *tp)
162 {
163 	int i;
164 
165 	for (i = 0; i < tp->nr_args; i++)
166 		traceprobe_free_probe_arg(&tp->args[i]);
167 
168 	kfree(tp->call.class->system);
169 	kfree(tp->call.name);
170 	kfree(tp->symbol);
171 	kfree(tp);
172 }
173 
174 static struct trace_probe *find_trace_probe(const char *event,
175 					    const char *group)
176 {
177 	struct trace_probe *tp;
178 
179 	list_for_each_entry(tp, &probe_list, list)
180 		if (strcmp(tp->call.name, event) == 0 &&
181 		    strcmp(tp->call.class->system, group) == 0)
182 			return tp;
183 	return NULL;
184 }
185 
186 /* Enable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */
187 static int enable_trace_probe(struct trace_probe *tp, int flag)
188 {
189 	int ret = 0;
190 
191 	tp->flags |= flag;
192 	if (trace_probe_is_enabled(tp) && trace_probe_is_registered(tp) &&
193 	    !trace_probe_has_gone(tp)) {
194 		if (trace_probe_is_return(tp))
195 			ret = enable_kretprobe(&tp->rp);
196 		else
197 			ret = enable_kprobe(&tp->rp.kp);
198 	}
199 
200 	return ret;
201 }
202 
203 /* Disable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */
204 static void disable_trace_probe(struct trace_probe *tp, int flag)
205 {
206 	tp->flags &= ~flag;
207 	if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) {
208 		if (trace_probe_is_return(tp))
209 			disable_kretprobe(&tp->rp);
210 		else
211 			disable_kprobe(&tp->rp.kp);
212 	}
213 }
214 
215 /* Internal register function - just handle k*probes and flags */
216 static int __register_trace_probe(struct trace_probe *tp)
217 {
218 	int i, ret;
219 
220 	if (trace_probe_is_registered(tp))
221 		return -EINVAL;
222 
223 	for (i = 0; i < tp->nr_args; i++)
224 		traceprobe_update_arg(&tp->args[i]);
225 
226 	/* Set/clear disabled flag according to tp->flag */
227 	if (trace_probe_is_enabled(tp))
228 		tp->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;
229 	else
230 		tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
231 
232 	if (trace_probe_is_return(tp))
233 		ret = register_kretprobe(&tp->rp);
234 	else
235 		ret = register_kprobe(&tp->rp.kp);
236 
237 	if (ret == 0)
238 		tp->flags |= TP_FLAG_REGISTERED;
239 	else {
240 		pr_warning("Could not insert probe at %s+%lu: %d\n",
241 			   trace_probe_symbol(tp), trace_probe_offset(tp), ret);
242 		if (ret == -ENOENT && trace_probe_is_on_module(tp)) {
243 			pr_warning("This probe might be able to register after"
244 				   "target module is loaded. Continue.\n");
245 			ret = 0;
246 		} else if (ret == -EILSEQ) {
247 			pr_warning("Probing address(0x%p) is not an "
248 				   "instruction boundary.\n",
249 				   tp->rp.kp.addr);
250 			ret = -EINVAL;
251 		}
252 	}
253 
254 	return ret;
255 }
256 
257 /* Internal unregister function - just handle k*probes and flags */
258 static void __unregister_trace_probe(struct trace_probe *tp)
259 {
260 	if (trace_probe_is_registered(tp)) {
261 		if (trace_probe_is_return(tp))
262 			unregister_kretprobe(&tp->rp);
263 		else
264 			unregister_kprobe(&tp->rp.kp);
265 		tp->flags &= ~TP_FLAG_REGISTERED;
266 		/* Cleanup kprobe for reuse */
267 		if (tp->rp.kp.symbol_name)
268 			tp->rp.kp.addr = NULL;
269 	}
270 }
271 
272 /* Unregister a trace_probe and probe_event: call with locking probe_lock */
273 static int unregister_trace_probe(struct trace_probe *tp)
274 {
275 	/* Enabled event can not be unregistered */
276 	if (trace_probe_is_enabled(tp))
277 		return -EBUSY;
278 
279 	__unregister_trace_probe(tp);
280 	list_del(&tp->list);
281 	unregister_probe_event(tp);
282 
283 	return 0;
284 }
285 
286 /* Register a trace_probe and probe_event */
287 static int register_trace_probe(struct trace_probe *tp)
288 {
289 	struct trace_probe *old_tp;
290 	int ret;
291 
292 	mutex_lock(&probe_lock);
293 
294 	/* Delete old (same name) event if exist */
295 	old_tp = find_trace_probe(tp->call.name, tp->call.class->system);
296 	if (old_tp) {
297 		ret = unregister_trace_probe(old_tp);
298 		if (ret < 0)
299 			goto end;
300 		free_trace_probe(old_tp);
301 	}
302 
303 	/* Register new event */
304 	ret = register_probe_event(tp);
305 	if (ret) {
306 		pr_warning("Failed to register probe event(%d)\n", ret);
307 		goto end;
308 	}
309 
310 	/* Register k*probe */
311 	ret = __register_trace_probe(tp);
312 	if (ret < 0)
313 		unregister_probe_event(tp);
314 	else
315 		list_add_tail(&tp->list, &probe_list);
316 
317 end:
318 	mutex_unlock(&probe_lock);
319 	return ret;
320 }
321 
322 /* Module notifier call back, checking event on the module */
323 static int trace_probe_module_callback(struct notifier_block *nb,
324 				       unsigned long val, void *data)
325 {
326 	struct module *mod = data;
327 	struct trace_probe *tp;
328 	int ret;
329 
330 	if (val != MODULE_STATE_COMING)
331 		return NOTIFY_DONE;
332 
333 	/* Update probes on coming module */
334 	mutex_lock(&probe_lock);
335 	list_for_each_entry(tp, &probe_list, list) {
336 		if (trace_probe_within_module(tp, mod)) {
337 			/* Don't need to check busy - this should have gone. */
338 			__unregister_trace_probe(tp);
339 			ret = __register_trace_probe(tp);
340 			if (ret)
341 				pr_warning("Failed to re-register probe %s on"
342 					   "%s: %d\n",
343 					   tp->call.name, mod->name, ret);
344 		}
345 	}
346 	mutex_unlock(&probe_lock);
347 
348 	return NOTIFY_DONE;
349 }
350 
351 static struct notifier_block trace_probe_module_nb = {
352 	.notifier_call = trace_probe_module_callback,
353 	.priority = 1	/* Invoked after kprobe module callback */
354 };
355 
356 static int create_trace_probe(int argc, char **argv)
357 {
358 	/*
359 	 * Argument syntax:
360 	 *  - Add kprobe: p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS]
361 	 *  - Add kretprobe: r[:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS]
362 	 * Fetch args:
363 	 *  $retval	: fetch return value
364 	 *  $stack	: fetch stack address
365 	 *  $stackN	: fetch Nth of stack (N:0-)
366 	 *  @ADDR	: fetch memory at ADDR (ADDR should be in kernel)
367 	 *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
368 	 *  %REG	: fetch register REG
369 	 * Dereferencing memory fetch:
370 	 *  +|-offs(ARG) : fetch memory at ARG +|- offs address.
371 	 * Alias name of args:
372 	 *  NAME=FETCHARG : set NAME as alias of FETCHARG.
373 	 * Type of args:
374 	 *  FETCHARG:TYPE : use TYPE instead of unsigned long.
375 	 */
376 	struct trace_probe *tp;
377 	int i, ret = 0;
378 	bool is_return = false, is_delete = false;
379 	char *symbol = NULL, *event = NULL, *group = NULL;
380 	char *arg;
381 	unsigned long offset = 0;
382 	void *addr = NULL;
383 	char buf[MAX_EVENT_NAME_LEN];
384 
385 	/* argc must be >= 1 */
386 	if (argv[0][0] == 'p')
387 		is_return = false;
388 	else if (argv[0][0] == 'r')
389 		is_return = true;
390 	else if (argv[0][0] == '-')
391 		is_delete = true;
392 	else {
393 		pr_info("Probe definition must be started with 'p', 'r' or"
394 			" '-'.\n");
395 		return -EINVAL;
396 	}
397 
398 	if (argv[0][1] == ':') {
399 		event = &argv[0][2];
400 		if (strchr(event, '/')) {
401 			group = event;
402 			event = strchr(group, '/') + 1;
403 			event[-1] = '\0';
404 			if (strlen(group) == 0) {
405 				pr_info("Group name is not specified\n");
406 				return -EINVAL;
407 			}
408 		}
409 		if (strlen(event) == 0) {
410 			pr_info("Event name is not specified\n");
411 			return -EINVAL;
412 		}
413 	}
414 	if (!group)
415 		group = KPROBE_EVENT_SYSTEM;
416 
417 	if (is_delete) {
418 		if (!event) {
419 			pr_info("Delete command needs an event name.\n");
420 			return -EINVAL;
421 		}
422 		mutex_lock(&probe_lock);
423 		tp = find_trace_probe(event, group);
424 		if (!tp) {
425 			mutex_unlock(&probe_lock);
426 			pr_info("Event %s/%s doesn't exist.\n", group, event);
427 			return -ENOENT;
428 		}
429 		/* delete an event */
430 		ret = unregister_trace_probe(tp);
431 		if (ret == 0)
432 			free_trace_probe(tp);
433 		mutex_unlock(&probe_lock);
434 		return ret;
435 	}
436 
437 	if (argc < 2) {
438 		pr_info("Probe point is not specified.\n");
439 		return -EINVAL;
440 	}
441 	if (isdigit(argv[1][0])) {
442 		if (is_return) {
443 			pr_info("Return probe point must be a symbol.\n");
444 			return -EINVAL;
445 		}
446 		/* an address specified */
447 		ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr);
448 		if (ret) {
449 			pr_info("Failed to parse address.\n");
450 			return ret;
451 		}
452 	} else {
453 		/* a symbol specified */
454 		symbol = argv[1];
455 		/* TODO: support .init module functions */
456 		ret = traceprobe_split_symbol_offset(symbol, &offset);
457 		if (ret) {
458 			pr_info("Failed to parse symbol.\n");
459 			return ret;
460 		}
461 		if (offset && is_return) {
462 			pr_info("Return probe must be used without offset.\n");
463 			return -EINVAL;
464 		}
465 	}
466 	argc -= 2; argv += 2;
467 
468 	/* setup a probe */
469 	if (!event) {
470 		/* Make a new event name */
471 		if (symbol)
472 			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
473 				 is_return ? 'r' : 'p', symbol, offset);
474 		else
475 			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
476 				 is_return ? 'r' : 'p', addr);
477 		event = buf;
478 	}
479 	tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
480 			       is_return);
481 	if (IS_ERR(tp)) {
482 		pr_info("Failed to allocate trace_probe.(%d)\n",
483 			(int)PTR_ERR(tp));
484 		return PTR_ERR(tp);
485 	}
486 
487 	/* parse arguments */
488 	ret = 0;
489 	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
490 		/* Increment count for freeing args in error case */
491 		tp->nr_args++;
492 
493 		/* Parse argument name */
494 		arg = strchr(argv[i], '=');
495 		if (arg) {
496 			*arg++ = '\0';
497 			tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
498 		} else {
499 			arg = argv[i];
500 			/* If argument name is omitted, set "argN" */
501 			snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
502 			tp->args[i].name = kstrdup(buf, GFP_KERNEL);
503 		}
504 
505 		if (!tp->args[i].name) {
506 			pr_info("Failed to allocate argument[%d] name.\n", i);
507 			ret = -ENOMEM;
508 			goto error;
509 		}
510 
511 		if (!is_good_name(tp->args[i].name)) {
512 			pr_info("Invalid argument[%d] name: %s\n",
513 				i, tp->args[i].name);
514 			ret = -EINVAL;
515 			goto error;
516 		}
517 
518 		if (traceprobe_conflict_field_name(tp->args[i].name,
519 							tp->args, i)) {
520 			pr_info("Argument[%d] name '%s' conflicts with "
521 				"another field.\n", i, argv[i]);
522 			ret = -EINVAL;
523 			goto error;
524 		}
525 
526 		/* Parse fetch argument */
527 		ret = traceprobe_parse_probe_arg(arg, &tp->size, &tp->args[i],
528 						is_return, true);
529 		if (ret) {
530 			pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
531 			goto error;
532 		}
533 	}
534 
535 	ret = register_trace_probe(tp);
536 	if (ret)
537 		goto error;
538 	return 0;
539 
540 error:
541 	free_trace_probe(tp);
542 	return ret;
543 }
544 
545 static int release_all_trace_probes(void)
546 {
547 	struct trace_probe *tp;
548 	int ret = 0;
549 
550 	mutex_lock(&probe_lock);
551 	/* Ensure no probe is in use. */
552 	list_for_each_entry(tp, &probe_list, list)
553 		if (trace_probe_is_enabled(tp)) {
554 			ret = -EBUSY;
555 			goto end;
556 		}
557 	/* TODO: Use batch unregistration */
558 	while (!list_empty(&probe_list)) {
559 		tp = list_entry(probe_list.next, struct trace_probe, list);
560 		unregister_trace_probe(tp);
561 		free_trace_probe(tp);
562 	}
563 
564 end:
565 	mutex_unlock(&probe_lock);
566 
567 	return ret;
568 }
569 
570 /* Probes listing interfaces */
571 static void *probes_seq_start(struct seq_file *m, loff_t *pos)
572 {
573 	mutex_lock(&probe_lock);
574 	return seq_list_start(&probe_list, *pos);
575 }
576 
577 static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
578 {
579 	return seq_list_next(v, &probe_list, pos);
580 }
581 
582 static void probes_seq_stop(struct seq_file *m, void *v)
583 {
584 	mutex_unlock(&probe_lock);
585 }
586 
587 static int probes_seq_show(struct seq_file *m, void *v)
588 {
589 	struct trace_probe *tp = v;
590 	int i;
591 
592 	seq_printf(m, "%c", trace_probe_is_return(tp) ? 'r' : 'p');
593 	seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name);
594 
595 	if (!tp->symbol)
596 		seq_printf(m, " 0x%p", tp->rp.kp.addr);
597 	else if (tp->rp.kp.offset)
598 		seq_printf(m, " %s+%u", trace_probe_symbol(tp),
599 			   tp->rp.kp.offset);
600 	else
601 		seq_printf(m, " %s", trace_probe_symbol(tp));
602 
603 	for (i = 0; i < tp->nr_args; i++)
604 		seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm);
605 	seq_printf(m, "\n");
606 
607 	return 0;
608 }
609 
610 static const struct seq_operations probes_seq_op = {
611 	.start  = probes_seq_start,
612 	.next   = probes_seq_next,
613 	.stop   = probes_seq_stop,
614 	.show   = probes_seq_show
615 };
616 
617 static int probes_open(struct inode *inode, struct file *file)
618 {
619 	int ret;
620 
621 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
622 		ret = release_all_trace_probes();
623 		if (ret < 0)
624 			return ret;
625 	}
626 
627 	return seq_open(file, &probes_seq_op);
628 }
629 
630 static ssize_t probes_write(struct file *file, const char __user *buffer,
631 			    size_t count, loff_t *ppos)
632 {
633 	return traceprobe_probes_write(file, buffer, count, ppos,
634 			create_trace_probe);
635 }
636 
637 static const struct file_operations kprobe_events_ops = {
638 	.owner          = THIS_MODULE,
639 	.open           = probes_open,
640 	.read           = seq_read,
641 	.llseek         = seq_lseek,
642 	.release        = seq_release,
643 	.write		= probes_write,
644 };
645 
646 /* Probes profiling interfaces */
647 static int probes_profile_seq_show(struct seq_file *m, void *v)
648 {
649 	struct trace_probe *tp = v;
650 
651 	seq_printf(m, "  %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
652 		   tp->rp.kp.nmissed);
653 
654 	return 0;
655 }
656 
657 static const struct seq_operations profile_seq_op = {
658 	.start  = probes_seq_start,
659 	.next   = probes_seq_next,
660 	.stop   = probes_seq_stop,
661 	.show   = probes_profile_seq_show
662 };
663 
664 static int profile_open(struct inode *inode, struct file *file)
665 {
666 	return seq_open(file, &profile_seq_op);
667 }
668 
669 static const struct file_operations kprobe_profile_ops = {
670 	.owner          = THIS_MODULE,
671 	.open           = profile_open,
672 	.read           = seq_read,
673 	.llseek         = seq_lseek,
674 	.release        = seq_release,
675 };
676 
677 /* Sum up total data length for dynamic arraies (strings) */
678 static __kprobes int __get_data_size(struct trace_probe *tp,
679 				     struct pt_regs *regs)
680 {
681 	int i, ret = 0;
682 	u32 len;
683 
684 	for (i = 0; i < tp->nr_args; i++)
685 		if (unlikely(tp->args[i].fetch_size.fn)) {
686 			call_fetch(&tp->args[i].fetch_size, regs, &len);
687 			ret += len;
688 		}
689 
690 	return ret;
691 }
692 
693 /* Store the value of each argument */
694 static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp,
695 				       struct pt_regs *regs,
696 				       u8 *data, int maxlen)
697 {
698 	int i;
699 	u32 end = tp->size;
700 	u32 *dl;	/* Data (relative) location */
701 
702 	for (i = 0; i < tp->nr_args; i++) {
703 		if (unlikely(tp->args[i].fetch_size.fn)) {
704 			/*
705 			 * First, we set the relative location and
706 			 * maximum data length to *dl
707 			 */
708 			dl = (u32 *)(data + tp->args[i].offset);
709 			*dl = make_data_rloc(maxlen, end - tp->args[i].offset);
710 			/* Then try to fetch string or dynamic array data */
711 			call_fetch(&tp->args[i].fetch, regs, dl);
712 			/* Reduce maximum length */
713 			end += get_rloc_len(*dl);
714 			maxlen -= get_rloc_len(*dl);
715 			/* Trick here, convert data_rloc to data_loc */
716 			*dl = convert_rloc_to_loc(*dl,
717 				 ent_size + tp->args[i].offset);
718 		} else
719 			/* Just fetching data normally */
720 			call_fetch(&tp->args[i].fetch, regs,
721 				   data + tp->args[i].offset);
722 	}
723 }
724 
725 /* Kprobe handler */
726 static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
727 {
728 	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
729 	struct kprobe_trace_entry_head *entry;
730 	struct ring_buffer_event *event;
731 	struct ring_buffer *buffer;
732 	int size, dsize, pc;
733 	unsigned long irq_flags;
734 	struct ftrace_event_call *call = &tp->call;
735 
736 	tp->nhit++;
737 
738 	local_save_flags(irq_flags);
739 	pc = preempt_count();
740 
741 	dsize = __get_data_size(tp, regs);
742 	size = sizeof(*entry) + tp->size + dsize;
743 
744 	event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
745 						  size, irq_flags, pc);
746 	if (!event)
747 		return;
748 
749 	entry = ring_buffer_event_data(event);
750 	entry->ip = (unsigned long)kp->addr;
751 	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
752 
753 	if (!filter_current_check_discard(buffer, call, entry, event))
754 		trace_buffer_unlock_commit_regs(buffer, event,
755 						irq_flags, pc, regs);
756 }
757 
758 /* Kretprobe handler */
759 static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
760 					  struct pt_regs *regs)
761 {
762 	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
763 	struct kretprobe_trace_entry_head *entry;
764 	struct ring_buffer_event *event;
765 	struct ring_buffer *buffer;
766 	int size, pc, dsize;
767 	unsigned long irq_flags;
768 	struct ftrace_event_call *call = &tp->call;
769 
770 	local_save_flags(irq_flags);
771 	pc = preempt_count();
772 
773 	dsize = __get_data_size(tp, regs);
774 	size = sizeof(*entry) + tp->size + dsize;
775 
776 	event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
777 						  size, irq_flags, pc);
778 	if (!event)
779 		return;
780 
781 	entry = ring_buffer_event_data(event);
782 	entry->func = (unsigned long)tp->rp.kp.addr;
783 	entry->ret_ip = (unsigned long)ri->ret_addr;
784 	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
785 
786 	if (!filter_current_check_discard(buffer, call, entry, event))
787 		trace_buffer_unlock_commit_regs(buffer, event,
788 						irq_flags, pc, regs);
789 }
790 
791 /* Event entry printers */
792 enum print_line_t
793 print_kprobe_event(struct trace_iterator *iter, int flags,
794 		   struct trace_event *event)
795 {
796 	struct kprobe_trace_entry_head *field;
797 	struct trace_seq *s = &iter->seq;
798 	struct trace_probe *tp;
799 	u8 *data;
800 	int i;
801 
802 	field = (struct kprobe_trace_entry_head *)iter->ent;
803 	tp = container_of(event, struct trace_probe, call.event);
804 
805 	if (!trace_seq_printf(s, "%s: (", tp->call.name))
806 		goto partial;
807 
808 	if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
809 		goto partial;
810 
811 	if (!trace_seq_puts(s, ")"))
812 		goto partial;
813 
814 	data = (u8 *)&field[1];
815 	for (i = 0; i < tp->nr_args; i++)
816 		if (!tp->args[i].type->print(s, tp->args[i].name,
817 					     data + tp->args[i].offset, field))
818 			goto partial;
819 
820 	if (!trace_seq_puts(s, "\n"))
821 		goto partial;
822 
823 	return TRACE_TYPE_HANDLED;
824 partial:
825 	return TRACE_TYPE_PARTIAL_LINE;
826 }
827 
828 enum print_line_t
829 print_kretprobe_event(struct trace_iterator *iter, int flags,
830 		      struct trace_event *event)
831 {
832 	struct kretprobe_trace_entry_head *field;
833 	struct trace_seq *s = &iter->seq;
834 	struct trace_probe *tp;
835 	u8 *data;
836 	int i;
837 
838 	field = (struct kretprobe_trace_entry_head *)iter->ent;
839 	tp = container_of(event, struct trace_probe, call.event);
840 
841 	if (!trace_seq_printf(s, "%s: (", tp->call.name))
842 		goto partial;
843 
844 	if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
845 		goto partial;
846 
847 	if (!trace_seq_puts(s, " <- "))
848 		goto partial;
849 
850 	if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
851 		goto partial;
852 
853 	if (!trace_seq_puts(s, ")"))
854 		goto partial;
855 
856 	data = (u8 *)&field[1];
857 	for (i = 0; i < tp->nr_args; i++)
858 		if (!tp->args[i].type->print(s, tp->args[i].name,
859 					     data + tp->args[i].offset, field))
860 			goto partial;
861 
862 	if (!trace_seq_puts(s, "\n"))
863 		goto partial;
864 
865 	return TRACE_TYPE_HANDLED;
866 partial:
867 	return TRACE_TYPE_PARTIAL_LINE;
868 }
869 
870 
871 static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
872 {
873 	int ret, i;
874 	struct kprobe_trace_entry_head field;
875 	struct trace_probe *tp = (struct trace_probe *)event_call->data;
876 
877 	DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
878 	/* Set argument names as fields */
879 	for (i = 0; i < tp->nr_args; i++) {
880 		ret = trace_define_field(event_call, tp->args[i].type->fmttype,
881 					 tp->args[i].name,
882 					 sizeof(field) + tp->args[i].offset,
883 					 tp->args[i].type->size,
884 					 tp->args[i].type->is_signed,
885 					 FILTER_OTHER);
886 		if (ret)
887 			return ret;
888 	}
889 	return 0;
890 }
891 
892 static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
893 {
894 	int ret, i;
895 	struct kretprobe_trace_entry_head field;
896 	struct trace_probe *tp = (struct trace_probe *)event_call->data;
897 
898 	DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
899 	DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
900 	/* Set argument names as fields */
901 	for (i = 0; i < tp->nr_args; i++) {
902 		ret = trace_define_field(event_call, tp->args[i].type->fmttype,
903 					 tp->args[i].name,
904 					 sizeof(field) + tp->args[i].offset,
905 					 tp->args[i].type->size,
906 					 tp->args[i].type->is_signed,
907 					 FILTER_OTHER);
908 		if (ret)
909 			return ret;
910 	}
911 	return 0;
912 }
913 
914 static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
915 {
916 	int i;
917 	int pos = 0;
918 
919 	const char *fmt, *arg;
920 
921 	if (!trace_probe_is_return(tp)) {
922 		fmt = "(%lx)";
923 		arg = "REC->" FIELD_STRING_IP;
924 	} else {
925 		fmt = "(%lx <- %lx)";
926 		arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
927 	}
928 
929 	/* When len=0, we just calculate the needed length */
930 #define LEN_OR_ZERO (len ? len - pos : 0)
931 
932 	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
933 
934 	for (i = 0; i < tp->nr_args; i++) {
935 		pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
936 				tp->args[i].name, tp->args[i].type->fmt);
937 	}
938 
939 	pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
940 
941 	for (i = 0; i < tp->nr_args; i++) {
942 		if (strcmp(tp->args[i].type->name, "string") == 0)
943 			pos += snprintf(buf + pos, LEN_OR_ZERO,
944 					", __get_str(%s)",
945 					tp->args[i].name);
946 		else
947 			pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
948 					tp->args[i].name);
949 	}
950 
951 #undef LEN_OR_ZERO
952 
953 	/* return the length of print_fmt */
954 	return pos;
955 }
956 
957 static int set_print_fmt(struct trace_probe *tp)
958 {
959 	int len;
960 	char *print_fmt;
961 
962 	/* First: called with 0 length to calculate the needed length */
963 	len = __set_print_fmt(tp, NULL, 0);
964 	print_fmt = kmalloc(len + 1, GFP_KERNEL);
965 	if (!print_fmt)
966 		return -ENOMEM;
967 
968 	/* Second: actually write the @print_fmt */
969 	__set_print_fmt(tp, print_fmt, len + 1);
970 	tp->call.print_fmt = print_fmt;
971 
972 	return 0;
973 }
974 
975 #ifdef CONFIG_PERF_EVENTS
976 
977 /* Kprobe profile handler */
978 static __kprobes void kprobe_perf_func(struct kprobe *kp,
979 					 struct pt_regs *regs)
980 {
981 	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
982 	struct ftrace_event_call *call = &tp->call;
983 	struct kprobe_trace_entry_head *entry;
984 	struct hlist_head *head;
985 	int size, __size, dsize;
986 	int rctx;
987 
988 	dsize = __get_data_size(tp, regs);
989 	__size = sizeof(*entry) + tp->size + dsize;
990 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
991 	size -= sizeof(u32);
992 	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
993 		     "profile buffer not large enough"))
994 		return;
995 
996 	entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
997 	if (!entry)
998 		return;
999 
1000 	entry->ip = (unsigned long)kp->addr;
1001 	memset(&entry[1], 0, dsize);
1002 	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1003 
1004 	head = this_cpu_ptr(call->perf_events);
1005 	perf_trace_buf_submit(entry, size, rctx,
1006 					entry->ip, 1, regs, head, NULL);
1007 }
1008 
1009 /* Kretprobe profile handler */
1010 static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1011 					    struct pt_regs *regs)
1012 {
1013 	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1014 	struct ftrace_event_call *call = &tp->call;
1015 	struct kretprobe_trace_entry_head *entry;
1016 	struct hlist_head *head;
1017 	int size, __size, dsize;
1018 	int rctx;
1019 
1020 	dsize = __get_data_size(tp, regs);
1021 	__size = sizeof(*entry) + tp->size + dsize;
1022 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
1023 	size -= sizeof(u32);
1024 	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1025 		     "profile buffer not large enough"))
1026 		return;
1027 
1028 	entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1029 	if (!entry)
1030 		return;
1031 
1032 	entry->func = (unsigned long)tp->rp.kp.addr;
1033 	entry->ret_ip = (unsigned long)ri->ret_addr;
1034 	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1035 
1036 	head = this_cpu_ptr(call->perf_events);
1037 	perf_trace_buf_submit(entry, size, rctx,
1038 					entry->ret_ip, 1, regs, head, NULL);
1039 }
1040 #endif	/* CONFIG_PERF_EVENTS */
1041 
1042 static __kprobes
1043 int kprobe_register(struct ftrace_event_call *event,
1044 		    enum trace_reg type, void *data)
1045 {
1046 	struct trace_probe *tp = (struct trace_probe *)event->data;
1047 
1048 	switch (type) {
1049 	case TRACE_REG_REGISTER:
1050 		return enable_trace_probe(tp, TP_FLAG_TRACE);
1051 	case TRACE_REG_UNREGISTER:
1052 		disable_trace_probe(tp, TP_FLAG_TRACE);
1053 		return 0;
1054 
1055 #ifdef CONFIG_PERF_EVENTS
1056 	case TRACE_REG_PERF_REGISTER:
1057 		return enable_trace_probe(tp, TP_FLAG_PROFILE);
1058 	case TRACE_REG_PERF_UNREGISTER:
1059 		disable_trace_probe(tp, TP_FLAG_PROFILE);
1060 		return 0;
1061 	case TRACE_REG_PERF_OPEN:
1062 	case TRACE_REG_PERF_CLOSE:
1063 	case TRACE_REG_PERF_ADD:
1064 	case TRACE_REG_PERF_DEL:
1065 		return 0;
1066 #endif
1067 	}
1068 	return 0;
1069 }
1070 
1071 static __kprobes
1072 int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1073 {
1074 	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1075 
1076 	if (tp->flags & TP_FLAG_TRACE)
1077 		kprobe_trace_func(kp, regs);
1078 #ifdef CONFIG_PERF_EVENTS
1079 	if (tp->flags & TP_FLAG_PROFILE)
1080 		kprobe_perf_func(kp, regs);
1081 #endif
1082 	return 0;	/* We don't tweek kernel, so just return 0 */
1083 }
1084 
1085 static __kprobes
1086 int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1087 {
1088 	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1089 
1090 	if (tp->flags & TP_FLAG_TRACE)
1091 		kretprobe_trace_func(ri, regs);
1092 #ifdef CONFIG_PERF_EVENTS
1093 	if (tp->flags & TP_FLAG_PROFILE)
1094 		kretprobe_perf_func(ri, regs);
1095 #endif
1096 	return 0;	/* We don't tweek kernel, so just return 0 */
1097 }
1098 
1099 static struct trace_event_functions kretprobe_funcs = {
1100 	.trace		= print_kretprobe_event
1101 };
1102 
1103 static struct trace_event_functions kprobe_funcs = {
1104 	.trace		= print_kprobe_event
1105 };
1106 
1107 static int register_probe_event(struct trace_probe *tp)
1108 {
1109 	struct ftrace_event_call *call = &tp->call;
1110 	int ret;
1111 
1112 	/* Initialize ftrace_event_call */
1113 	INIT_LIST_HEAD(&call->class->fields);
1114 	if (trace_probe_is_return(tp)) {
1115 		call->event.funcs = &kretprobe_funcs;
1116 		call->class->define_fields = kretprobe_event_define_fields;
1117 	} else {
1118 		call->event.funcs = &kprobe_funcs;
1119 		call->class->define_fields = kprobe_event_define_fields;
1120 	}
1121 	if (set_print_fmt(tp) < 0)
1122 		return -ENOMEM;
1123 	ret = register_ftrace_event(&call->event);
1124 	if (!ret) {
1125 		kfree(call->print_fmt);
1126 		return -ENODEV;
1127 	}
1128 	call->flags = 0;
1129 	call->class->reg = kprobe_register;
1130 	call->data = tp;
1131 	ret = trace_add_event_call(call);
1132 	if (ret) {
1133 		pr_info("Failed to register kprobe event: %s\n", call->name);
1134 		kfree(call->print_fmt);
1135 		unregister_ftrace_event(&call->event);
1136 	}
1137 	return ret;
1138 }
1139 
1140 static void unregister_probe_event(struct trace_probe *tp)
1141 {
1142 	/* tp->event is unregistered in trace_remove_event_call() */
1143 	trace_remove_event_call(&tp->call);
1144 	kfree(tp->call.print_fmt);
1145 }
1146 
1147 /* Make a debugfs interface for controlling probe points */
1148 static __init int init_kprobe_trace(void)
1149 {
1150 	struct dentry *d_tracer;
1151 	struct dentry *entry;
1152 
1153 	if (register_module_notifier(&trace_probe_module_nb))
1154 		return -EINVAL;
1155 
1156 	d_tracer = tracing_init_dentry();
1157 	if (!d_tracer)
1158 		return 0;
1159 
1160 	entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
1161 				    NULL, &kprobe_events_ops);
1162 
1163 	/* Event list interface */
1164 	if (!entry)
1165 		pr_warning("Could not create debugfs "
1166 			   "'kprobe_events' entry\n");
1167 
1168 	/* Profile interface */
1169 	entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
1170 				    NULL, &kprobe_profile_ops);
1171 
1172 	if (!entry)
1173 		pr_warning("Could not create debugfs "
1174 			   "'kprobe_profile' entry\n");
1175 	return 0;
1176 }
1177 fs_initcall(init_kprobe_trace);
1178 
1179 
1180 #ifdef CONFIG_FTRACE_STARTUP_TEST
1181 
1182 /*
1183  * The "__used" keeps gcc from removing the function symbol
1184  * from the kallsyms table.
1185  */
1186 static __used int kprobe_trace_selftest_target(int a1, int a2, int a3,
1187 					       int a4, int a5, int a6)
1188 {
1189 	return a1 + a2 + a3 + a4 + a5 + a6;
1190 }
1191 
1192 static __init int kprobe_trace_self_tests_init(void)
1193 {
1194 	int ret, warn = 0;
1195 	int (*target)(int, int, int, int, int, int);
1196 	struct trace_probe *tp;
1197 
1198 	target = kprobe_trace_selftest_target;
1199 
1200 	pr_info("Testing kprobe tracing: ");
1201 
1202 	ret = traceprobe_command("p:testprobe kprobe_trace_selftest_target "
1203 				  "$stack $stack0 +0($stack)",
1204 				  create_trace_probe);
1205 	if (WARN_ON_ONCE(ret)) {
1206 		pr_warning("error on probing function entry.\n");
1207 		warn++;
1208 	} else {
1209 		/* Enable trace point */
1210 		tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM);
1211 		if (WARN_ON_ONCE(tp == NULL)) {
1212 			pr_warning("error on getting new probe.\n");
1213 			warn++;
1214 		} else
1215 			enable_trace_probe(tp, TP_FLAG_TRACE);
1216 	}
1217 
1218 	ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target "
1219 				  "$retval", create_trace_probe);
1220 	if (WARN_ON_ONCE(ret)) {
1221 		pr_warning("error on probing function return.\n");
1222 		warn++;
1223 	} else {
1224 		/* Enable trace point */
1225 		tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM);
1226 		if (WARN_ON_ONCE(tp == NULL)) {
1227 			pr_warning("error on getting new probe.\n");
1228 			warn++;
1229 		} else
1230 			enable_trace_probe(tp, TP_FLAG_TRACE);
1231 	}
1232 
1233 	if (warn)
1234 		goto end;
1235 
1236 	ret = target(1, 2, 3, 4, 5, 6);
1237 
1238 	/* Disable trace points before removing it */
1239 	tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM);
1240 	if (WARN_ON_ONCE(tp == NULL)) {
1241 		pr_warning("error on getting test probe.\n");
1242 		warn++;
1243 	} else
1244 		disable_trace_probe(tp, TP_FLAG_TRACE);
1245 
1246 	tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM);
1247 	if (WARN_ON_ONCE(tp == NULL)) {
1248 		pr_warning("error on getting 2nd test probe.\n");
1249 		warn++;
1250 	} else
1251 		disable_trace_probe(tp, TP_FLAG_TRACE);
1252 
1253 	ret = traceprobe_command("-:testprobe", create_trace_probe);
1254 	if (WARN_ON_ONCE(ret)) {
1255 		pr_warning("error on deleting a probe.\n");
1256 		warn++;
1257 	}
1258 
1259 	ret = traceprobe_command("-:testprobe2", create_trace_probe);
1260 	if (WARN_ON_ONCE(ret)) {
1261 		pr_warning("error on deleting a probe.\n");
1262 		warn++;
1263 	}
1264 
1265 end:
1266 	release_all_trace_probes();
1267 	if (warn)
1268 		pr_cont("NG: Some tests are failed. Please check them.\n");
1269 	else
1270 		pr_cont("OK\n");
1271 	return 0;
1272 }
1273 
1274 late_initcall(kprobe_trace_self_tests_init);
1275 
1276 #endif
1277