xref: /openbmc/linux/kernel/trace/trace_uprobe.c (revision c4ee0af3)
1 /*
2  * uprobes-based tracing events
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16  *
17  * Copyright (C) IBM Corporation, 2010-2012
18  * Author:	Srikar Dronamraju <srikar@linux.vnet.ibm.com>
19  */
20 
21 #include <linux/module.h>
22 #include <linux/uaccess.h>
23 #include <linux/uprobes.h>
24 #include <linux/namei.h>
25 #include <linux/string.h>
26 
27 #include "trace_probe.h"
28 
29 #define UPROBE_EVENT_SYSTEM	"uprobes"
30 
31 struct uprobe_trace_entry_head {
32 	struct trace_entry	ent;
33 	unsigned long		vaddr[];
34 };
35 
36 #define SIZEOF_TRACE_ENTRY(is_return)			\
37 	(sizeof(struct uprobe_trace_entry_head) +	\
38 	 sizeof(unsigned long) * (is_return ? 2 : 1))
39 
40 #define DATAOF_TRACE_ENTRY(entry, is_return)		\
41 	((void*)(entry) + SIZEOF_TRACE_ENTRY(is_return))
42 
43 struct trace_uprobe_filter {
44 	rwlock_t		rwlock;
45 	int			nr_systemwide;
46 	struct list_head	perf_events;
47 };
48 
49 /*
50  * uprobe event core functions
51  */
52 struct trace_uprobe {
53 	struct list_head		list;
54 	struct ftrace_event_class	class;
55 	struct ftrace_event_call	call;
56 	struct trace_uprobe_filter	filter;
57 	struct uprobe_consumer		consumer;
58 	struct inode			*inode;
59 	char				*filename;
60 	unsigned long			offset;
61 	unsigned long			nhit;
62 	unsigned int			flags;	/* For TP_FLAG_* */
63 	ssize_t				size;	/* trace entry size */
64 	unsigned int			nr_args;
65 	struct probe_arg		args[];
66 };
67 
68 #define SIZEOF_TRACE_UPROBE(n)			\
69 	(offsetof(struct trace_uprobe, args) +	\
70 	(sizeof(struct probe_arg) * (n)))
71 
72 static int register_uprobe_event(struct trace_uprobe *tu);
73 static int unregister_uprobe_event(struct trace_uprobe *tu);
74 
75 static DEFINE_MUTEX(uprobe_lock);
76 static LIST_HEAD(uprobe_list);
77 
78 static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
79 static int uretprobe_dispatcher(struct uprobe_consumer *con,
80 				unsigned long func, struct pt_regs *regs);
81 
82 static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter)
83 {
84 	rwlock_init(&filter->rwlock);
85 	filter->nr_systemwide = 0;
86 	INIT_LIST_HEAD(&filter->perf_events);
87 }
88 
89 static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter)
90 {
91 	return !filter->nr_systemwide && list_empty(&filter->perf_events);
92 }
93 
94 static inline bool is_ret_probe(struct trace_uprobe *tu)
95 {
96 	return tu->consumer.ret_handler != NULL;
97 }
98 
99 /*
100  * Allocate new trace_uprobe and initialize it (including uprobes).
101  */
102 static struct trace_uprobe *
103 alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
104 {
105 	struct trace_uprobe *tu;
106 
107 	if (!event || !is_good_name(event))
108 		return ERR_PTR(-EINVAL);
109 
110 	if (!group || !is_good_name(group))
111 		return ERR_PTR(-EINVAL);
112 
113 	tu = kzalloc(SIZEOF_TRACE_UPROBE(nargs), GFP_KERNEL);
114 	if (!tu)
115 		return ERR_PTR(-ENOMEM);
116 
117 	tu->call.class = &tu->class;
118 	tu->call.name = kstrdup(event, GFP_KERNEL);
119 	if (!tu->call.name)
120 		goto error;
121 
122 	tu->class.system = kstrdup(group, GFP_KERNEL);
123 	if (!tu->class.system)
124 		goto error;
125 
126 	INIT_LIST_HEAD(&tu->list);
127 	tu->consumer.handler = uprobe_dispatcher;
128 	if (is_ret)
129 		tu->consumer.ret_handler = uretprobe_dispatcher;
130 	init_trace_uprobe_filter(&tu->filter);
131 	tu->call.flags |= TRACE_EVENT_FL_USE_CALL_FILTER;
132 	return tu;
133 
134 error:
135 	kfree(tu->call.name);
136 	kfree(tu);
137 
138 	return ERR_PTR(-ENOMEM);
139 }
140 
141 static void free_trace_uprobe(struct trace_uprobe *tu)
142 {
143 	int i;
144 
145 	for (i = 0; i < tu->nr_args; i++)
146 		traceprobe_free_probe_arg(&tu->args[i]);
147 
148 	iput(tu->inode);
149 	kfree(tu->call.class->system);
150 	kfree(tu->call.name);
151 	kfree(tu->filename);
152 	kfree(tu);
153 }
154 
155 static struct trace_uprobe *find_probe_event(const char *event, const char *group)
156 {
157 	struct trace_uprobe *tu;
158 
159 	list_for_each_entry(tu, &uprobe_list, list)
160 		if (strcmp(tu->call.name, event) == 0 &&
161 		    strcmp(tu->call.class->system, group) == 0)
162 			return tu;
163 
164 	return NULL;
165 }
166 
167 /* Unregister a trace_uprobe and probe_event: call with locking uprobe_lock */
168 static int unregister_trace_uprobe(struct trace_uprobe *tu)
169 {
170 	int ret;
171 
172 	ret = unregister_uprobe_event(tu);
173 	if (ret)
174 		return ret;
175 
176 	list_del(&tu->list);
177 	free_trace_uprobe(tu);
178 	return 0;
179 }
180 
181 /* Register a trace_uprobe and probe_event */
182 static int register_trace_uprobe(struct trace_uprobe *tu)
183 {
184 	struct trace_uprobe *old_tp;
185 	int ret;
186 
187 	mutex_lock(&uprobe_lock);
188 
189 	/* register as an event */
190 	old_tp = find_probe_event(tu->call.name, tu->call.class->system);
191 	if (old_tp) {
192 		/* delete old event */
193 		ret = unregister_trace_uprobe(old_tp);
194 		if (ret)
195 			goto end;
196 	}
197 
198 	ret = register_uprobe_event(tu);
199 	if (ret) {
200 		pr_warning("Failed to register probe event(%d)\n", ret);
201 		goto end;
202 	}
203 
204 	list_add_tail(&tu->list, &uprobe_list);
205 
206 end:
207 	mutex_unlock(&uprobe_lock);
208 
209 	return ret;
210 }
211 
212 /*
213  * Argument syntax:
214  *  - Add uprobe: p|r[:[GRP/]EVENT] PATH:SYMBOL [FETCHARGS]
215  *
216  *  - Remove uprobe: -:[GRP/]EVENT
217  */
218 static int create_trace_uprobe(int argc, char **argv)
219 {
220 	struct trace_uprobe *tu;
221 	struct inode *inode;
222 	char *arg, *event, *group, *filename;
223 	char buf[MAX_EVENT_NAME_LEN];
224 	struct path path;
225 	unsigned long offset;
226 	bool is_delete, is_return;
227 	int i, ret;
228 
229 	inode = NULL;
230 	ret = 0;
231 	is_delete = false;
232 	is_return = false;
233 	event = NULL;
234 	group = NULL;
235 
236 	/* argc must be >= 1 */
237 	if (argv[0][0] == '-')
238 		is_delete = true;
239 	else if (argv[0][0] == 'r')
240 		is_return = true;
241 	else if (argv[0][0] != 'p') {
242 		pr_info("Probe definition must be started with 'p', 'r' or '-'.\n");
243 		return -EINVAL;
244 	}
245 
246 	if (argv[0][1] == ':') {
247 		event = &argv[0][2];
248 		arg = strchr(event, '/');
249 
250 		if (arg) {
251 			group = event;
252 			event = arg + 1;
253 			event[-1] = '\0';
254 
255 			if (strlen(group) == 0) {
256 				pr_info("Group name is not specified\n");
257 				return -EINVAL;
258 			}
259 		}
260 		if (strlen(event) == 0) {
261 			pr_info("Event name is not specified\n");
262 			return -EINVAL;
263 		}
264 	}
265 	if (!group)
266 		group = UPROBE_EVENT_SYSTEM;
267 
268 	if (is_delete) {
269 		int ret;
270 
271 		if (!event) {
272 			pr_info("Delete command needs an event name.\n");
273 			return -EINVAL;
274 		}
275 		mutex_lock(&uprobe_lock);
276 		tu = find_probe_event(event, group);
277 
278 		if (!tu) {
279 			mutex_unlock(&uprobe_lock);
280 			pr_info("Event %s/%s doesn't exist.\n", group, event);
281 			return -ENOENT;
282 		}
283 		/* delete an event */
284 		ret = unregister_trace_uprobe(tu);
285 		mutex_unlock(&uprobe_lock);
286 		return ret;
287 	}
288 
289 	if (argc < 2) {
290 		pr_info("Probe point is not specified.\n");
291 		return -EINVAL;
292 	}
293 	if (isdigit(argv[1][0])) {
294 		pr_info("probe point must be have a filename.\n");
295 		return -EINVAL;
296 	}
297 	arg = strchr(argv[1], ':');
298 	if (!arg) {
299 		ret = -EINVAL;
300 		goto fail_address_parse;
301 	}
302 
303 	*arg++ = '\0';
304 	filename = argv[1];
305 	ret = kern_path(filename, LOOKUP_FOLLOW, &path);
306 	if (ret)
307 		goto fail_address_parse;
308 
309 	inode = igrab(path.dentry->d_inode);
310 	path_put(&path);
311 
312 	if (!inode || !S_ISREG(inode->i_mode)) {
313 		ret = -EINVAL;
314 		goto fail_address_parse;
315 	}
316 
317 	ret = kstrtoul(arg, 0, &offset);
318 	if (ret)
319 		goto fail_address_parse;
320 
321 	argc -= 2;
322 	argv += 2;
323 
324 	/* setup a probe */
325 	if (!event) {
326 		char *tail;
327 		char *ptr;
328 
329 		tail = kstrdup(kbasename(filename), GFP_KERNEL);
330 		if (!tail) {
331 			ret = -ENOMEM;
332 			goto fail_address_parse;
333 		}
334 
335 		ptr = strpbrk(tail, ".-_");
336 		if (ptr)
337 			*ptr = '\0';
338 
339 		snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_0x%lx", 'p', tail, offset);
340 		event = buf;
341 		kfree(tail);
342 	}
343 
344 	tu = alloc_trace_uprobe(group, event, argc, is_return);
345 	if (IS_ERR(tu)) {
346 		pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu));
347 		ret = PTR_ERR(tu);
348 		goto fail_address_parse;
349 	}
350 	tu->offset = offset;
351 	tu->inode = inode;
352 	tu->filename = kstrdup(filename, GFP_KERNEL);
353 
354 	if (!tu->filename) {
355 		pr_info("Failed to allocate filename.\n");
356 		ret = -ENOMEM;
357 		goto error;
358 	}
359 
360 	/* parse arguments */
361 	ret = 0;
362 	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
363 		/* Increment count for freeing args in error case */
364 		tu->nr_args++;
365 
366 		/* Parse argument name */
367 		arg = strchr(argv[i], '=');
368 		if (arg) {
369 			*arg++ = '\0';
370 			tu->args[i].name = kstrdup(argv[i], GFP_KERNEL);
371 		} else {
372 			arg = argv[i];
373 			/* If argument name is omitted, set "argN" */
374 			snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
375 			tu->args[i].name = kstrdup(buf, GFP_KERNEL);
376 		}
377 
378 		if (!tu->args[i].name) {
379 			pr_info("Failed to allocate argument[%d] name.\n", i);
380 			ret = -ENOMEM;
381 			goto error;
382 		}
383 
384 		if (!is_good_name(tu->args[i].name)) {
385 			pr_info("Invalid argument[%d] name: %s\n", i, tu->args[i].name);
386 			ret = -EINVAL;
387 			goto error;
388 		}
389 
390 		if (traceprobe_conflict_field_name(tu->args[i].name, tu->args, i)) {
391 			pr_info("Argument[%d] name '%s' conflicts with "
392 				"another field.\n", i, argv[i]);
393 			ret = -EINVAL;
394 			goto error;
395 		}
396 
397 		/* Parse fetch argument */
398 		ret = traceprobe_parse_probe_arg(arg, &tu->size, &tu->args[i], false, false);
399 		if (ret) {
400 			pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
401 			goto error;
402 		}
403 	}
404 
405 	ret = register_trace_uprobe(tu);
406 	if (ret)
407 		goto error;
408 	return 0;
409 
410 error:
411 	free_trace_uprobe(tu);
412 	return ret;
413 
414 fail_address_parse:
415 	if (inode)
416 		iput(inode);
417 
418 	pr_info("Failed to parse address or file.\n");
419 
420 	return ret;
421 }
422 
423 static int cleanup_all_probes(void)
424 {
425 	struct trace_uprobe *tu;
426 	int ret = 0;
427 
428 	mutex_lock(&uprobe_lock);
429 	while (!list_empty(&uprobe_list)) {
430 		tu = list_entry(uprobe_list.next, struct trace_uprobe, list);
431 		ret = unregister_trace_uprobe(tu);
432 		if (ret)
433 			break;
434 	}
435 	mutex_unlock(&uprobe_lock);
436 	return ret;
437 }
438 
439 /* Probes listing interfaces */
440 static void *probes_seq_start(struct seq_file *m, loff_t *pos)
441 {
442 	mutex_lock(&uprobe_lock);
443 	return seq_list_start(&uprobe_list, *pos);
444 }
445 
446 static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
447 {
448 	return seq_list_next(v, &uprobe_list, pos);
449 }
450 
451 static void probes_seq_stop(struct seq_file *m, void *v)
452 {
453 	mutex_unlock(&uprobe_lock);
454 }
455 
456 static int probes_seq_show(struct seq_file *m, void *v)
457 {
458 	struct trace_uprobe *tu = v;
459 	char c = is_ret_probe(tu) ? 'r' : 'p';
460 	int i;
461 
462 	seq_printf(m, "%c:%s/%s", c, tu->call.class->system, tu->call.name);
463 	seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset);
464 
465 	for (i = 0; i < tu->nr_args; i++)
466 		seq_printf(m, " %s=%s", tu->args[i].name, tu->args[i].comm);
467 
468 	seq_printf(m, "\n");
469 	return 0;
470 }
471 
472 static const struct seq_operations probes_seq_op = {
473 	.start	= probes_seq_start,
474 	.next	= probes_seq_next,
475 	.stop	= probes_seq_stop,
476 	.show	= probes_seq_show
477 };
478 
479 static int probes_open(struct inode *inode, struct file *file)
480 {
481 	int ret;
482 
483 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
484 		ret = cleanup_all_probes();
485 		if (ret)
486 			return ret;
487 	}
488 
489 	return seq_open(file, &probes_seq_op);
490 }
491 
492 static ssize_t probes_write(struct file *file, const char __user *buffer,
493 			    size_t count, loff_t *ppos)
494 {
495 	return traceprobe_probes_write(file, buffer, count, ppos, create_trace_uprobe);
496 }
497 
498 static const struct file_operations uprobe_events_ops = {
499 	.owner		= THIS_MODULE,
500 	.open		= probes_open,
501 	.read		= seq_read,
502 	.llseek		= seq_lseek,
503 	.release	= seq_release,
504 	.write		= probes_write,
505 };
506 
507 /* Probes profiling interfaces */
508 static int probes_profile_seq_show(struct seq_file *m, void *v)
509 {
510 	struct trace_uprobe *tu = v;
511 
512 	seq_printf(m, "  %s %-44s %15lu\n", tu->filename, tu->call.name, tu->nhit);
513 	return 0;
514 }
515 
516 static const struct seq_operations profile_seq_op = {
517 	.start	= probes_seq_start,
518 	.next	= probes_seq_next,
519 	.stop	= probes_seq_stop,
520 	.show	= probes_profile_seq_show
521 };
522 
523 static int profile_open(struct inode *inode, struct file *file)
524 {
525 	return seq_open(file, &profile_seq_op);
526 }
527 
528 static const struct file_operations uprobe_profile_ops = {
529 	.owner		= THIS_MODULE,
530 	.open		= profile_open,
531 	.read		= seq_read,
532 	.llseek		= seq_lseek,
533 	.release	= seq_release,
534 };
535 
536 static void uprobe_trace_print(struct trace_uprobe *tu,
537 				unsigned long func, struct pt_regs *regs)
538 {
539 	struct uprobe_trace_entry_head *entry;
540 	struct ring_buffer_event *event;
541 	struct ring_buffer *buffer;
542 	void *data;
543 	int size, i;
544 	struct ftrace_event_call *call = &tu->call;
545 
546 	size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
547 	event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
548 						  size + tu->size, 0, 0);
549 	if (!event)
550 		return;
551 
552 	entry = ring_buffer_event_data(event);
553 	if (is_ret_probe(tu)) {
554 		entry->vaddr[0] = func;
555 		entry->vaddr[1] = instruction_pointer(regs);
556 		data = DATAOF_TRACE_ENTRY(entry, true);
557 	} else {
558 		entry->vaddr[0] = instruction_pointer(regs);
559 		data = DATAOF_TRACE_ENTRY(entry, false);
560 	}
561 
562 	for (i = 0; i < tu->nr_args; i++)
563 		call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
564 
565 	if (!call_filter_check_discard(call, entry, buffer, event))
566 		trace_buffer_unlock_commit(buffer, event, 0, 0);
567 }
568 
569 /* uprobe handler */
570 static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
571 {
572 	if (!is_ret_probe(tu))
573 		uprobe_trace_print(tu, 0, regs);
574 	return 0;
575 }
576 
577 static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
578 				struct pt_regs *regs)
579 {
580 	uprobe_trace_print(tu, func, regs);
581 }
582 
583 /* Event entry printers */
584 static enum print_line_t
585 print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event)
586 {
587 	struct uprobe_trace_entry_head *entry;
588 	struct trace_seq *s = &iter->seq;
589 	struct trace_uprobe *tu;
590 	u8 *data;
591 	int i;
592 
593 	entry = (struct uprobe_trace_entry_head *)iter->ent;
594 	tu = container_of(event, struct trace_uprobe, call.event);
595 
596 	if (is_ret_probe(tu)) {
597 		if (!trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", tu->call.name,
598 					entry->vaddr[1], entry->vaddr[0]))
599 			goto partial;
600 		data = DATAOF_TRACE_ENTRY(entry, true);
601 	} else {
602 		if (!trace_seq_printf(s, "%s: (0x%lx)", tu->call.name,
603 					entry->vaddr[0]))
604 			goto partial;
605 		data = DATAOF_TRACE_ENTRY(entry, false);
606 	}
607 
608 	for (i = 0; i < tu->nr_args; i++) {
609 		if (!tu->args[i].type->print(s, tu->args[i].name,
610 					     data + tu->args[i].offset, entry))
611 			goto partial;
612 	}
613 
614 	if (trace_seq_puts(s, "\n"))
615 		return TRACE_TYPE_HANDLED;
616 
617 partial:
618 	return TRACE_TYPE_PARTIAL_LINE;
619 }
620 
621 static inline bool is_trace_uprobe_enabled(struct trace_uprobe *tu)
622 {
623 	return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE);
624 }
625 
626 typedef bool (*filter_func_t)(struct uprobe_consumer *self,
627 				enum uprobe_filter_ctx ctx,
628 				struct mm_struct *mm);
629 
630 static int
631 probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter)
632 {
633 	int ret = 0;
634 
635 	if (is_trace_uprobe_enabled(tu))
636 		return -EINTR;
637 
638 	WARN_ON(!uprobe_filter_is_empty(&tu->filter));
639 
640 	tu->flags |= flag;
641 	tu->consumer.filter = filter;
642 	ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
643 	if (ret)
644 		tu->flags &= ~flag;
645 
646 	return ret;
647 }
648 
649 static void probe_event_disable(struct trace_uprobe *tu, int flag)
650 {
651 	if (!is_trace_uprobe_enabled(tu))
652 		return;
653 
654 	WARN_ON(!uprobe_filter_is_empty(&tu->filter));
655 
656 	uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
657 	tu->flags &= ~flag;
658 }
659 
660 static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
661 {
662 	int ret, i, size;
663 	struct uprobe_trace_entry_head field;
664 	struct trace_uprobe *tu = event_call->data;
665 
666 	if (is_ret_probe(tu)) {
667 		DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_FUNC, 0);
668 		DEFINE_FIELD(unsigned long, vaddr[1], FIELD_STRING_RETIP, 0);
669 		size = SIZEOF_TRACE_ENTRY(true);
670 	} else {
671 		DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_IP, 0);
672 		size = SIZEOF_TRACE_ENTRY(false);
673 	}
674 	/* Set argument names as fields */
675 	for (i = 0; i < tu->nr_args; i++) {
676 		ret = trace_define_field(event_call, tu->args[i].type->fmttype,
677 					 tu->args[i].name,
678 					 size + tu->args[i].offset,
679 					 tu->args[i].type->size,
680 					 tu->args[i].type->is_signed,
681 					 FILTER_OTHER);
682 
683 		if (ret)
684 			return ret;
685 	}
686 	return 0;
687 }
688 
689 #define LEN_OR_ZERO		(len ? len - pos : 0)
690 static int __set_print_fmt(struct trace_uprobe *tu, char *buf, int len)
691 {
692 	const char *fmt, *arg;
693 	int i;
694 	int pos = 0;
695 
696 	if (is_ret_probe(tu)) {
697 		fmt = "(%lx <- %lx)";
698 		arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
699 	} else {
700 		fmt = "(%lx)";
701 		arg = "REC->" FIELD_STRING_IP;
702 	}
703 
704 	/* When len=0, we just calculate the needed length */
705 
706 	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
707 
708 	for (i = 0; i < tu->nr_args; i++) {
709 		pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
710 				tu->args[i].name, tu->args[i].type->fmt);
711 	}
712 
713 	pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
714 
715 	for (i = 0; i < tu->nr_args; i++) {
716 		pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
717 				tu->args[i].name);
718 	}
719 
720 	return pos;	/* return the length of print_fmt */
721 }
722 #undef LEN_OR_ZERO
723 
724 static int set_print_fmt(struct trace_uprobe *tu)
725 {
726 	char *print_fmt;
727 	int len;
728 
729 	/* First: called with 0 length to calculate the needed length */
730 	len = __set_print_fmt(tu, NULL, 0);
731 	print_fmt = kmalloc(len + 1, GFP_KERNEL);
732 	if (!print_fmt)
733 		return -ENOMEM;
734 
735 	/* Second: actually write the @print_fmt */
736 	__set_print_fmt(tu, print_fmt, len + 1);
737 	tu->call.print_fmt = print_fmt;
738 
739 	return 0;
740 }
741 
742 #ifdef CONFIG_PERF_EVENTS
743 static bool
744 __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm)
745 {
746 	struct perf_event *event;
747 
748 	if (filter->nr_systemwide)
749 		return true;
750 
751 	list_for_each_entry(event, &filter->perf_events, hw.tp_list) {
752 		if (event->hw.tp_target->mm == mm)
753 			return true;
754 	}
755 
756 	return false;
757 }
758 
759 static inline bool
760 uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
761 {
762 	return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
763 }
764 
765 static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
766 {
767 	bool done;
768 
769 	write_lock(&tu->filter.rwlock);
770 	if (event->hw.tp_target) {
771 		/*
772 		 * event->parent != NULL means copy_process(), we can avoid
773 		 * uprobe_apply(). current->mm must be probed and we can rely
774 		 * on dup_mmap() which preserves the already installed bp's.
775 		 *
776 		 * attr.enable_on_exec means that exec/mmap will install the
777 		 * breakpoints we need.
778 		 */
779 		done = tu->filter.nr_systemwide ||
780 			event->parent || event->attr.enable_on_exec ||
781 			uprobe_filter_event(tu, event);
782 		list_add(&event->hw.tp_list, &tu->filter.perf_events);
783 	} else {
784 		done = tu->filter.nr_systemwide;
785 		tu->filter.nr_systemwide++;
786 	}
787 	write_unlock(&tu->filter.rwlock);
788 
789 	if (!done)
790 		uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
791 
792 	return 0;
793 }
794 
795 static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
796 {
797 	bool done;
798 
799 	write_lock(&tu->filter.rwlock);
800 	if (event->hw.tp_target) {
801 		list_del(&event->hw.tp_list);
802 		done = tu->filter.nr_systemwide ||
803 			(event->hw.tp_target->flags & PF_EXITING) ||
804 			uprobe_filter_event(tu, event);
805 	} else {
806 		tu->filter.nr_systemwide--;
807 		done = tu->filter.nr_systemwide;
808 	}
809 	write_unlock(&tu->filter.rwlock);
810 
811 	if (!done)
812 		uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
813 
814 	return 0;
815 }
816 
817 static bool uprobe_perf_filter(struct uprobe_consumer *uc,
818 				enum uprobe_filter_ctx ctx, struct mm_struct *mm)
819 {
820 	struct trace_uprobe *tu;
821 	int ret;
822 
823 	tu = container_of(uc, struct trace_uprobe, consumer);
824 	read_lock(&tu->filter.rwlock);
825 	ret = __uprobe_perf_filter(&tu->filter, mm);
826 	read_unlock(&tu->filter.rwlock);
827 
828 	return ret;
829 }
830 
831 static void uprobe_perf_print(struct trace_uprobe *tu,
832 				unsigned long func, struct pt_regs *regs)
833 {
834 	struct ftrace_event_call *call = &tu->call;
835 	struct uprobe_trace_entry_head *entry;
836 	struct hlist_head *head;
837 	void *data;
838 	int size, rctx, i;
839 
840 	size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
841 	size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32);
842 
843 	preempt_disable();
844 	head = this_cpu_ptr(call->perf_events);
845 	if (hlist_empty(head))
846 		goto out;
847 
848 	entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
849 	if (!entry)
850 		goto out;
851 
852 	if (is_ret_probe(tu)) {
853 		entry->vaddr[0] = func;
854 		entry->vaddr[1] = instruction_pointer(regs);
855 		data = DATAOF_TRACE_ENTRY(entry, true);
856 	} else {
857 		entry->vaddr[0] = instruction_pointer(regs);
858 		data = DATAOF_TRACE_ENTRY(entry, false);
859 	}
860 
861 	for (i = 0; i < tu->nr_args; i++)
862 		call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
863 
864 	perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
865  out:
866 	preempt_enable();
867 }
868 
869 /* uprobe profile handler */
870 static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
871 {
872 	if (!uprobe_perf_filter(&tu->consumer, 0, current->mm))
873 		return UPROBE_HANDLER_REMOVE;
874 
875 	if (!is_ret_probe(tu))
876 		uprobe_perf_print(tu, 0, regs);
877 	return 0;
878 }
879 
880 static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
881 				struct pt_regs *regs)
882 {
883 	uprobe_perf_print(tu, func, regs);
884 }
885 #endif	/* CONFIG_PERF_EVENTS */
886 
887 static
888 int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data)
889 {
890 	struct trace_uprobe *tu = event->data;
891 
892 	switch (type) {
893 	case TRACE_REG_REGISTER:
894 		return probe_event_enable(tu, TP_FLAG_TRACE, NULL);
895 
896 	case TRACE_REG_UNREGISTER:
897 		probe_event_disable(tu, TP_FLAG_TRACE);
898 		return 0;
899 
900 #ifdef CONFIG_PERF_EVENTS
901 	case TRACE_REG_PERF_REGISTER:
902 		return probe_event_enable(tu, TP_FLAG_PROFILE, uprobe_perf_filter);
903 
904 	case TRACE_REG_PERF_UNREGISTER:
905 		probe_event_disable(tu, TP_FLAG_PROFILE);
906 		return 0;
907 
908 	case TRACE_REG_PERF_OPEN:
909 		return uprobe_perf_open(tu, data);
910 
911 	case TRACE_REG_PERF_CLOSE:
912 		return uprobe_perf_close(tu, data);
913 
914 #endif
915 	default:
916 		return 0;
917 	}
918 	return 0;
919 }
920 
921 static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
922 {
923 	struct trace_uprobe *tu;
924 	int ret = 0;
925 
926 	tu = container_of(con, struct trace_uprobe, consumer);
927 	tu->nhit++;
928 
929 	if (tu->flags & TP_FLAG_TRACE)
930 		ret |= uprobe_trace_func(tu, regs);
931 
932 #ifdef CONFIG_PERF_EVENTS
933 	if (tu->flags & TP_FLAG_PROFILE)
934 		ret |= uprobe_perf_func(tu, regs);
935 #endif
936 	return ret;
937 }
938 
939 static int uretprobe_dispatcher(struct uprobe_consumer *con,
940 				unsigned long func, struct pt_regs *regs)
941 {
942 	struct trace_uprobe *tu;
943 
944 	tu = container_of(con, struct trace_uprobe, consumer);
945 
946 	if (tu->flags & TP_FLAG_TRACE)
947 		uretprobe_trace_func(tu, func, regs);
948 
949 #ifdef CONFIG_PERF_EVENTS
950 	if (tu->flags & TP_FLAG_PROFILE)
951 		uretprobe_perf_func(tu, func, regs);
952 #endif
953 	return 0;
954 }
955 
956 static struct trace_event_functions uprobe_funcs = {
957 	.trace		= print_uprobe_event
958 };
959 
960 static int register_uprobe_event(struct trace_uprobe *tu)
961 {
962 	struct ftrace_event_call *call = &tu->call;
963 	int ret;
964 
965 	/* Initialize ftrace_event_call */
966 	INIT_LIST_HEAD(&call->class->fields);
967 	call->event.funcs = &uprobe_funcs;
968 	call->class->define_fields = uprobe_event_define_fields;
969 
970 	if (set_print_fmt(tu) < 0)
971 		return -ENOMEM;
972 
973 	ret = register_ftrace_event(&call->event);
974 	if (!ret) {
975 		kfree(call->print_fmt);
976 		return -ENODEV;
977 	}
978 	call->flags = 0;
979 	call->class->reg = trace_uprobe_register;
980 	call->data = tu;
981 	ret = trace_add_event_call(call);
982 
983 	if (ret) {
984 		pr_info("Failed to register uprobe event: %s\n", call->name);
985 		kfree(call->print_fmt);
986 		unregister_ftrace_event(&call->event);
987 	}
988 
989 	return ret;
990 }
991 
992 static int unregister_uprobe_event(struct trace_uprobe *tu)
993 {
994 	int ret;
995 
996 	/* tu->event is unregistered in trace_remove_event_call() */
997 	ret = trace_remove_event_call(&tu->call);
998 	if (ret)
999 		return ret;
1000 	kfree(tu->call.print_fmt);
1001 	tu->call.print_fmt = NULL;
1002 	return 0;
1003 }
1004 
1005 /* Make a trace interface for controling probe points */
1006 static __init int init_uprobe_trace(void)
1007 {
1008 	struct dentry *d_tracer;
1009 
1010 	d_tracer = tracing_init_dentry();
1011 	if (!d_tracer)
1012 		return 0;
1013 
1014 	trace_create_file("uprobe_events", 0644, d_tracer,
1015 				    NULL, &uprobe_events_ops);
1016 	/* Profile interface */
1017 	trace_create_file("uprobe_profile", 0444, d_tracer,
1018 				    NULL, &uprobe_profile_ops);
1019 	return 0;
1020 }
1021 
1022 fs_initcall(init_uprobe_trace);
1023