1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2019 Facebook
3 #include <linux/sched.h>
4 #include <linux/ptrace.h>
5 #include <stdint.h>
6 #include <stddef.h>
7 #include <stdbool.h>
8 #include <linux/bpf.h>
9 #include <bpf/bpf_helpers.h>
10 #include "bpf_misc.h"
11 
12 #define FUNCTION_NAME_LEN 64
13 #define FILE_NAME_LEN 128
14 #define TASK_COMM_LEN 16
15 
16 typedef struct {
17 	int PyThreadState_frame;
18 	int PyThreadState_thread;
19 	int PyFrameObject_back;
20 	int PyFrameObject_code;
21 	int PyFrameObject_lineno;
22 	int PyCodeObject_filename;
23 	int PyCodeObject_name;
24 	int String_data;
25 	int String_size;
26 } OffsetConfig;
27 
28 typedef struct {
29 	uintptr_t current_state_addr;
30 	uintptr_t tls_key_addr;
31 	OffsetConfig offsets;
32 	bool use_tls;
33 } PidData;
34 
35 typedef struct {
36 	uint32_t success;
37 } Stats;
38 
39 typedef struct {
40 	char name[FUNCTION_NAME_LEN];
41 	char file[FILE_NAME_LEN];
42 } Symbol;
43 
44 typedef struct {
45 	uint32_t pid;
46 	uint32_t tid;
47 	char comm[TASK_COMM_LEN];
48 	int32_t kernel_stack_id;
49 	int32_t user_stack_id;
50 	bool thread_current;
51 	bool pthread_match;
52 	bool stack_complete;
53 	int16_t stack_len;
54 	int32_t stack[STACK_MAX_LEN];
55 
56 	int has_meta;
57 	int metadata;
58 	char dummy_safeguard;
59 } Event;
60 
61 
62 typedef int pid_t;
63 
64 typedef struct {
65 	void* f_back; // PyFrameObject.f_back, previous frame
66 	void* f_code; // PyFrameObject.f_code, pointer to PyCodeObject
67 	void* co_filename; // PyCodeObject.co_filename
68 	void* co_name; // PyCodeObject.co_name
69 } FrameData;
70 
71 #ifdef SUBPROGS
72 __noinline
73 #else
74 __always_inline
75 #endif
get_thread_state(void * tls_base,PidData * pidData)76 static void *get_thread_state(void *tls_base, PidData *pidData)
77 {
78 	void* thread_state;
79 	int key;
80 
81 	bpf_probe_read_user(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
82 	bpf_probe_read_user(&thread_state, sizeof(thread_state),
83 			    tls_base + 0x310 + key * 0x10 + 0x08);
84 	return thread_state;
85 }
86 
get_frame_data(void * frame_ptr,PidData * pidData,FrameData * frame,Symbol * symbol)87 static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData,
88 					   FrameData *frame, Symbol *symbol)
89 {
90 	// read data from PyFrameObject
91 	bpf_probe_read_user(&frame->f_back,
92 			    sizeof(frame->f_back),
93 			    frame_ptr + pidData->offsets.PyFrameObject_back);
94 	bpf_probe_read_user(&frame->f_code,
95 			    sizeof(frame->f_code),
96 			    frame_ptr + pidData->offsets.PyFrameObject_code);
97 
98 	// read data from PyCodeObject
99 	if (!frame->f_code)
100 		return false;
101 	bpf_probe_read_user(&frame->co_filename,
102 			    sizeof(frame->co_filename),
103 			    frame->f_code + pidData->offsets.PyCodeObject_filename);
104 	bpf_probe_read_user(&frame->co_name,
105 			    sizeof(frame->co_name),
106 			    frame->f_code + pidData->offsets.PyCodeObject_name);
107 	// read actual names into symbol
108 	if (frame->co_filename)
109 		bpf_probe_read_user_str(&symbol->file,
110 					sizeof(symbol->file),
111 					frame->co_filename +
112 					pidData->offsets.String_data);
113 	if (frame->co_name)
114 		bpf_probe_read_user_str(&symbol->name,
115 					sizeof(symbol->name),
116 					frame->co_name +
117 					pidData->offsets.String_data);
118 	return true;
119 }
120 
121 struct {
122 	__uint(type, BPF_MAP_TYPE_HASH);
123 	__uint(max_entries, 1);
124 	__type(key, int);
125 	__type(value, PidData);
126 } pidmap SEC(".maps");
127 
128 struct {
129 	__uint(type, BPF_MAP_TYPE_HASH);
130 	__uint(max_entries, 1);
131 	__type(key, int);
132 	__type(value, Event);
133 } eventmap SEC(".maps");
134 
135 struct {
136 	__uint(type, BPF_MAP_TYPE_HASH);
137 	__uint(max_entries, 1);
138 	__type(key, Symbol);
139 	__type(value, int);
140 } symbolmap SEC(".maps");
141 
142 struct {
143 	__uint(type, BPF_MAP_TYPE_ARRAY);
144 	__uint(max_entries, 1);
145 	__type(key, int);
146 	__type(value, Stats);
147 } statsmap SEC(".maps");
148 
149 struct {
150 	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
151 	__uint(max_entries, 32);
152 	__uint(key_size, sizeof(int));
153 	__uint(value_size, sizeof(int));
154 } perfmap SEC(".maps");
155 
156 struct {
157 	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
158 	__uint(max_entries, 1000);
159 	__uint(key_size, sizeof(int));
160 	__uint(value_size, sizeof(long long) * 127);
161 } stackmap SEC(".maps");
162 
163 #ifdef USE_BPF_LOOP
164 struct process_frame_ctx {
165 	int cur_cpu;
166 	int32_t *symbol_counter;
167 	void *frame_ptr;
168 	FrameData *frame;
169 	PidData *pidData;
170 	Symbol *sym;
171 	Event *event;
172 	bool done;
173 };
174 
process_frame_callback(__u32 i,struct process_frame_ctx * ctx)175 static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx)
176 {
177 	int zero = 0;
178 	void *frame_ptr = ctx->frame_ptr;
179 	PidData *pidData = ctx->pidData;
180 	FrameData *frame = ctx->frame;
181 	int32_t *symbol_counter = ctx->symbol_counter;
182 	int cur_cpu = ctx->cur_cpu;
183 	Event *event = ctx->event;
184 	Symbol *sym = ctx->sym;
185 
186 	if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) {
187 		int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
188 		int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
189 
190 		if (!symbol_id) {
191 			bpf_map_update_elem(&symbolmap, sym, &zero, 0);
192 			symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
193 			if (!symbol_id) {
194 				ctx->done = true;
195 				return 1;
196 			}
197 		}
198 		if (*symbol_id == new_symbol_id)
199 			(*symbol_counter)++;
200 
201 		barrier_var(i);
202 		if (i >= STACK_MAX_LEN)
203 			return 1;
204 
205 		event->stack[i] = *symbol_id;
206 
207 		event->stack_len = i + 1;
208 		frame_ptr = frame->f_back;
209 	}
210 	return 0;
211 }
212 #endif /* USE_BPF_LOOP */
213 
214 #ifdef GLOBAL_FUNC
215 __noinline
216 #elif defined(SUBPROGS)
217 static __noinline
218 #else
219 static __always_inline
220 #endif
__on_event(struct bpf_raw_tracepoint_args * ctx)221 int __on_event(struct bpf_raw_tracepoint_args *ctx)
222 {
223 	uint64_t pid_tgid = bpf_get_current_pid_tgid();
224 	pid_t pid = (pid_t)(pid_tgid >> 32);
225 	PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid);
226 	if (!pidData)
227 		return 0;
228 
229 	int zero = 0;
230 	Event* event = bpf_map_lookup_elem(&eventmap, &zero);
231 	if (!event)
232 		return 0;
233 
234 	event->pid = pid;
235 
236 	event->tid = (pid_t)pid_tgid;
237 	bpf_get_current_comm(&event->comm, sizeof(event->comm));
238 
239 	event->user_stack_id = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
240 	event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0);
241 
242 	void* thread_state_current = (void*)0;
243 	bpf_probe_read_user(&thread_state_current,
244 			    sizeof(thread_state_current),
245 			    (void*)(long)pidData->current_state_addr);
246 
247 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
248 	void* tls_base = (void*)task;
249 
250 	void* thread_state = pidData->use_tls ? get_thread_state(tls_base, pidData)
251 		: thread_state_current;
252 	event->thread_current = thread_state == thread_state_current;
253 
254 	if (pidData->use_tls) {
255 		uint64_t pthread_created;
256 		uint64_t pthread_self;
257 		bpf_probe_read_user(&pthread_self, sizeof(pthread_self),
258 				    tls_base + 0x10);
259 
260 		bpf_probe_read_user(&pthread_created,
261 				    sizeof(pthread_created),
262 				    thread_state +
263 				    pidData->offsets.PyThreadState_thread);
264 		event->pthread_match = pthread_created == pthread_self;
265 	} else {
266 		event->pthread_match = 1;
267 	}
268 
269 	if (event->pthread_match || !pidData->use_tls) {
270 		void* frame_ptr;
271 		FrameData frame;
272 		Symbol sym = {};
273 		int cur_cpu = bpf_get_smp_processor_id();
274 
275 		bpf_probe_read_user(&frame_ptr,
276 				    sizeof(frame_ptr),
277 				    thread_state +
278 				    pidData->offsets.PyThreadState_frame);
279 
280 		int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
281 		if (symbol_counter == NULL)
282 			return 0;
283 #ifdef USE_BPF_LOOP
284 	struct process_frame_ctx ctx = {
285 		.cur_cpu = cur_cpu,
286 		.symbol_counter = symbol_counter,
287 		.frame_ptr = frame_ptr,
288 		.frame = &frame,
289 		.pidData = pidData,
290 		.sym = &sym,
291 		.event = event,
292 	};
293 
294 	bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0);
295 	if (ctx.done)
296 		return 0;
297 #else
298 #if defined(USE_ITER)
299 /* no for loop, no unrolling */
300 #elif defined(NO_UNROLL)
301 #pragma clang loop unroll(disable)
302 #elif defined(UNROLL_COUNT)
303 #pragma clang loop unroll_count(UNROLL_COUNT)
304 #else
305 #pragma clang loop unroll(full)
306 #endif /* NO_UNROLL */
307 		/* Unwind python stack */
308 #ifdef USE_ITER
309 		int i;
310 		bpf_for(i, 0, STACK_MAX_LEN) {
311 #else /* !USE_ITER */
312 		for (int i = 0; i < STACK_MAX_LEN; ++i) {
313 #endif
314 			if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
315 				int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
316 				int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
317 				if (!symbol_id) {
318 					bpf_map_update_elem(&symbolmap, &sym, &zero, 0);
319 					symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
320 					if (!symbol_id)
321 						return 0;
322 				}
323 				if (*symbol_id == new_symbol_id)
324 					(*symbol_counter)++;
325 				event->stack[i] = *symbol_id;
326 				event->stack_len = i + 1;
327 				frame_ptr = frame.f_back;
328 			}
329 		}
330 #endif /* USE_BPF_LOOP */
331 		event->stack_complete = frame_ptr == NULL;
332 	} else {
333 		event->stack_complete = 1;
334 	}
335 
336 	Stats* stats = bpf_map_lookup_elem(&statsmap, &zero);
337 	if (stats)
338 		stats->success++;
339 
340 	event->has_meta = 0;
341 	bpf_perf_event_output(ctx, &perfmap, 0, event, offsetof(Event, metadata));
342 	return 0;
343 }
344 
345 SEC("raw_tracepoint/kfree_skb")
346 int on_event(struct bpf_raw_tracepoint_args* ctx)
347 {
348 	int ret = 0;
349 	ret |= __on_event(ctx);
350 	ret |= __on_event(ctx);
351 	ret |= __on_event(ctx);
352 	ret |= __on_event(ctx);
353 	ret |= __on_event(ctx);
354 	return ret;
355 }
356 
357 char _license[] SEC("license") = "GPL";
358