1 /* 2 * trace event based perf event profiling/tracing 3 * 4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com> 5 * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com> 6 */ 7 8 #include <linux/module.h> 9 #include <linux/kprobes.h> 10 #include "trace.h" 11 12 static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS]; 13 14 /* 15 * Force it to be aligned to unsigned long to avoid misaligned accesses 16 * suprises 17 */ 18 typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) 19 perf_trace_t; 20 21 /* Count the events in use (per event id, not per instance) */ 22 static int total_ref_count; 23 24 static int perf_trace_event_perm(struct ftrace_event_call *tp_event, 25 struct perf_event *p_event) 26 { 27 /* No tracing, just counting, so no obvious leak */ 28 if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) 29 return 0; 30 31 /* Some events are ok to be traced by non-root users... */ 32 if (p_event->attach_state == PERF_ATTACH_TASK) { 33 if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY) 34 return 0; 35 } 36 37 /* 38 * ...otherwise raw tracepoint data can be a severe data leak, 39 * only allow root to have these. 40 */ 41 if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) 42 return -EPERM; 43 44 return 0; 45 } 46 47 static int perf_trace_event_init(struct ftrace_event_call *tp_event, 48 struct perf_event *p_event) 49 { 50 struct hlist_head __percpu *list; 51 int ret; 52 int cpu; 53 54 ret = perf_trace_event_perm(tp_event, p_event); 55 if (ret) 56 return ret; 57 58 p_event->tp_event = tp_event; 59 if (tp_event->perf_refcount++ > 0) 60 return 0; 61 62 ret = -ENOMEM; 63 64 list = alloc_percpu(struct hlist_head); 65 if (!list) 66 goto fail; 67 68 for_each_possible_cpu(cpu) 69 INIT_HLIST_HEAD(per_cpu_ptr(list, cpu)); 70 71 tp_event->perf_events = list; 72 73 if (!total_ref_count) { 74 char __percpu *buf; 75 int i; 76 77 for (i = 0; i < PERF_NR_CONTEXTS; i++) { 78 buf = (char __percpu *)alloc_percpu(perf_trace_t); 79 if (!buf) 80 goto fail; 81 82 perf_trace_buf[i] = buf; 83 } 84 } 85 86 ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER); 87 if (ret) 88 goto fail; 89 90 total_ref_count++; 91 return 0; 92 93 fail: 94 if (!total_ref_count) { 95 int i; 96 97 for (i = 0; i < PERF_NR_CONTEXTS; i++) { 98 free_percpu(perf_trace_buf[i]); 99 perf_trace_buf[i] = NULL; 100 } 101 } 102 103 if (!--tp_event->perf_refcount) { 104 free_percpu(tp_event->perf_events); 105 tp_event->perf_events = NULL; 106 } 107 108 return ret; 109 } 110 111 int perf_trace_init(struct perf_event *p_event) 112 { 113 struct ftrace_event_call *tp_event; 114 int event_id = p_event->attr.config; 115 int ret = -EINVAL; 116 117 mutex_lock(&event_mutex); 118 list_for_each_entry(tp_event, &ftrace_events, list) { 119 if (tp_event->event.type == event_id && 120 tp_event->class && tp_event->class->reg && 121 try_module_get(tp_event->mod)) { 122 ret = perf_trace_event_init(tp_event, p_event); 123 if (ret) 124 module_put(tp_event->mod); 125 break; 126 } 127 } 128 mutex_unlock(&event_mutex); 129 130 return ret; 131 } 132 133 int perf_trace_add(struct perf_event *p_event, int flags) 134 { 135 struct ftrace_event_call *tp_event = p_event->tp_event; 136 struct hlist_head __percpu *pcpu_list; 137 struct hlist_head *list; 138 139 pcpu_list = tp_event->perf_events; 140 if (WARN_ON_ONCE(!pcpu_list)) 141 return -EINVAL; 142 143 if (!(flags & PERF_EF_START)) 144 p_event->hw.state = PERF_HES_STOPPED; 145 146 list = this_cpu_ptr(pcpu_list); 147 hlist_add_head_rcu(&p_event->hlist_entry, list); 148 149 return 0; 150 } 151 152 void perf_trace_del(struct perf_event *p_event, int flags) 153 { 154 hlist_del_rcu(&p_event->hlist_entry); 155 } 156 157 void perf_trace_destroy(struct perf_event *p_event) 158 { 159 struct ftrace_event_call *tp_event = p_event->tp_event; 160 int i; 161 162 mutex_lock(&event_mutex); 163 if (--tp_event->perf_refcount > 0) 164 goto out; 165 166 tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER); 167 168 /* 169 * Ensure our callback won't be called anymore. The buffers 170 * will be freed after that. 171 */ 172 tracepoint_synchronize_unregister(); 173 174 free_percpu(tp_event->perf_events); 175 tp_event->perf_events = NULL; 176 177 if (!--total_ref_count) { 178 for (i = 0; i < PERF_NR_CONTEXTS; i++) { 179 free_percpu(perf_trace_buf[i]); 180 perf_trace_buf[i] = NULL; 181 } 182 } 183 out: 184 module_put(tp_event->mod); 185 mutex_unlock(&event_mutex); 186 } 187 188 __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, 189 struct pt_regs *regs, int *rctxp) 190 { 191 struct trace_entry *entry; 192 unsigned long flags; 193 char *raw_data; 194 int pc; 195 196 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); 197 198 pc = preempt_count(); 199 200 *rctxp = perf_swevent_get_recursion_context(); 201 if (*rctxp < 0) 202 return NULL; 203 204 raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]); 205 206 /* zero the dead bytes from align to not leak stack to user */ 207 memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); 208 209 entry = (struct trace_entry *)raw_data; 210 local_save_flags(flags); 211 tracing_generic_entry_update(entry, flags, pc); 212 entry->type = type; 213 214 return raw_data; 215 } 216 EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); 217