1 /*
2  * trace event based perf event profiling/tracing
3  *
4  * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
5  * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
6  */
7 
8 #include <linux/module.h>
9 #include <linux/kprobes.h>
10 #include "trace.h"
11 
12 static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
13 
14 /*
15  * Force it to be aligned to unsigned long to avoid misaligned accesses
16  * suprises
17  */
18 typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
19 	perf_trace_t;
20 
21 /* Count the events in use (per event id, not per instance) */
22 static int	total_ref_count;
23 
24 static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
25 				 struct perf_event *p_event)
26 {
27 	/* No tracing, just counting, so no obvious leak */
28 	if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
29 		return 0;
30 
31 	/* Some events are ok to be traced by non-root users... */
32 	if (p_event->attach_state == PERF_ATTACH_TASK) {
33 		if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
34 			return 0;
35 	}
36 
37 	/*
38 	 * ...otherwise raw tracepoint data can be a severe data leak,
39 	 * only allow root to have these.
40 	 */
41 	if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
42 		return -EPERM;
43 
44 	return 0;
45 }
46 
47 static int perf_trace_event_init(struct ftrace_event_call *tp_event,
48 				 struct perf_event *p_event)
49 {
50 	struct hlist_head __percpu *list;
51 	int ret;
52 	int cpu;
53 
54 	ret = perf_trace_event_perm(tp_event, p_event);
55 	if (ret)
56 		return ret;
57 
58 	p_event->tp_event = tp_event;
59 	if (tp_event->perf_refcount++ > 0)
60 		return 0;
61 
62 	ret = -ENOMEM;
63 
64 	list = alloc_percpu(struct hlist_head);
65 	if (!list)
66 		goto fail;
67 
68 	for_each_possible_cpu(cpu)
69 		INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));
70 
71 	tp_event->perf_events = list;
72 
73 	if (!total_ref_count) {
74 		char __percpu *buf;
75 		int i;
76 
77 		for (i = 0; i < PERF_NR_CONTEXTS; i++) {
78 			buf = (char __percpu *)alloc_percpu(perf_trace_t);
79 			if (!buf)
80 				goto fail;
81 
82 			perf_trace_buf[i] = buf;
83 		}
84 	}
85 
86 	ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
87 	if (ret)
88 		goto fail;
89 
90 	total_ref_count++;
91 	return 0;
92 
93 fail:
94 	if (!total_ref_count) {
95 		int i;
96 
97 		for (i = 0; i < PERF_NR_CONTEXTS; i++) {
98 			free_percpu(perf_trace_buf[i]);
99 			perf_trace_buf[i] = NULL;
100 		}
101 	}
102 
103 	if (!--tp_event->perf_refcount) {
104 		free_percpu(tp_event->perf_events);
105 		tp_event->perf_events = NULL;
106 	}
107 
108 	return ret;
109 }
110 
111 int perf_trace_init(struct perf_event *p_event)
112 {
113 	struct ftrace_event_call *tp_event;
114 	int event_id = p_event->attr.config;
115 	int ret = -EINVAL;
116 
117 	mutex_lock(&event_mutex);
118 	list_for_each_entry(tp_event, &ftrace_events, list) {
119 		if (tp_event->event.type == event_id &&
120 		    tp_event->class && tp_event->class->reg &&
121 		    try_module_get(tp_event->mod)) {
122 			ret = perf_trace_event_init(tp_event, p_event);
123 			if (ret)
124 				module_put(tp_event->mod);
125 			break;
126 		}
127 	}
128 	mutex_unlock(&event_mutex);
129 
130 	return ret;
131 }
132 
133 int perf_trace_add(struct perf_event *p_event, int flags)
134 {
135 	struct ftrace_event_call *tp_event = p_event->tp_event;
136 	struct hlist_head __percpu *pcpu_list;
137 	struct hlist_head *list;
138 
139 	pcpu_list = tp_event->perf_events;
140 	if (WARN_ON_ONCE(!pcpu_list))
141 		return -EINVAL;
142 
143 	if (!(flags & PERF_EF_START))
144 		p_event->hw.state = PERF_HES_STOPPED;
145 
146 	list = this_cpu_ptr(pcpu_list);
147 	hlist_add_head_rcu(&p_event->hlist_entry, list);
148 
149 	return 0;
150 }
151 
152 void perf_trace_del(struct perf_event *p_event, int flags)
153 {
154 	hlist_del_rcu(&p_event->hlist_entry);
155 }
156 
157 void perf_trace_destroy(struct perf_event *p_event)
158 {
159 	struct ftrace_event_call *tp_event = p_event->tp_event;
160 	int i;
161 
162 	mutex_lock(&event_mutex);
163 	if (--tp_event->perf_refcount > 0)
164 		goto out;
165 
166 	tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
167 
168 	/*
169 	 * Ensure our callback won't be called anymore. The buffers
170 	 * will be freed after that.
171 	 */
172 	tracepoint_synchronize_unregister();
173 
174 	free_percpu(tp_event->perf_events);
175 	tp_event->perf_events = NULL;
176 
177 	if (!--total_ref_count) {
178 		for (i = 0; i < PERF_NR_CONTEXTS; i++) {
179 			free_percpu(perf_trace_buf[i]);
180 			perf_trace_buf[i] = NULL;
181 		}
182 	}
183 out:
184 	module_put(tp_event->mod);
185 	mutex_unlock(&event_mutex);
186 }
187 
188 __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
189 				       struct pt_regs *regs, int *rctxp)
190 {
191 	struct trace_entry *entry;
192 	unsigned long flags;
193 	char *raw_data;
194 	int pc;
195 
196 	BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
197 
198 	pc = preempt_count();
199 
200 	*rctxp = perf_swevent_get_recursion_context();
201 	if (*rctxp < 0)
202 		return NULL;
203 
204 	raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]);
205 
206 	/* zero the dead bytes from align to not leak stack to user */
207 	memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
208 
209 	entry = (struct trace_entry *)raw_data;
210 	local_save_flags(flags);
211 	tracing_generic_entry_update(entry, flags, pc);
212 	entry->type = type;
213 
214 	return raw_data;
215 }
216 EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
217