1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2019 Facebook
3 #include <linux/sched.h>
4 #include <linux/ptrace.h>
5 #include <stdint.h>
6 #include <stddef.h>
7 #include <stdbool.h>
8 #include <linux/bpf.h>
9 #include <bpf/bpf_helpers.h>
10 
11 #define FUNCTION_NAME_LEN 64
12 #define FILE_NAME_LEN 128
13 #define TASK_COMM_LEN 16
14 
15 typedef struct {
16 	int PyThreadState_frame;
17 	int PyThreadState_thread;
18 	int PyFrameObject_back;
19 	int PyFrameObject_code;
20 	int PyFrameObject_lineno;
21 	int PyCodeObject_filename;
22 	int PyCodeObject_name;
23 	int String_data;
24 	int String_size;
25 } OffsetConfig;
26 
27 typedef struct {
28 	uintptr_t current_state_addr;
29 	uintptr_t tls_key_addr;
30 	OffsetConfig offsets;
31 	bool use_tls;
32 } PidData;
33 
34 typedef struct {
35 	uint32_t success;
36 } Stats;
37 
38 typedef struct {
39 	char name[FUNCTION_NAME_LEN];
40 	char file[FILE_NAME_LEN];
41 } Symbol;
42 
43 typedef struct {
44 	uint32_t pid;
45 	uint32_t tid;
46 	char comm[TASK_COMM_LEN];
47 	int32_t kernel_stack_id;
48 	int32_t user_stack_id;
49 	bool thread_current;
50 	bool pthread_match;
51 	bool stack_complete;
52 	int16_t stack_len;
53 	int32_t stack[STACK_MAX_LEN];
54 
55 	int has_meta;
56 	int metadata;
57 	char dummy_safeguard;
58 } Event;
59 
60 
61 typedef int pid_t;
62 
63 typedef struct {
64 	void* f_back; // PyFrameObject.f_back, previous frame
65 	void* f_code; // PyFrameObject.f_code, pointer to PyCodeObject
66 	void* co_filename; // PyCodeObject.co_filename
67 	void* co_name; // PyCodeObject.co_name
68 } FrameData;
69 
70 static __always_inline void *get_thread_state(void *tls_base, PidData *pidData)
71 {
72 	void* thread_state;
73 	int key;
74 
75 	bpf_probe_read_user(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
76 	bpf_probe_read_user(&thread_state, sizeof(thread_state),
77 			    tls_base + 0x310 + key * 0x10 + 0x08);
78 	return thread_state;
79 }
80 
81 static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData,
82 					   FrameData *frame, Symbol *symbol)
83 {
84 	// read data from PyFrameObject
85 	bpf_probe_read_user(&frame->f_back,
86 			    sizeof(frame->f_back),
87 			    frame_ptr + pidData->offsets.PyFrameObject_back);
88 	bpf_probe_read_user(&frame->f_code,
89 			    sizeof(frame->f_code),
90 			    frame_ptr + pidData->offsets.PyFrameObject_code);
91 
92 	// read data from PyCodeObject
93 	if (!frame->f_code)
94 		return false;
95 	bpf_probe_read_user(&frame->co_filename,
96 			    sizeof(frame->co_filename),
97 			    frame->f_code + pidData->offsets.PyCodeObject_filename);
98 	bpf_probe_read_user(&frame->co_name,
99 			    sizeof(frame->co_name),
100 			    frame->f_code + pidData->offsets.PyCodeObject_name);
101 	// read actual names into symbol
102 	if (frame->co_filename)
103 		bpf_probe_read_user_str(&symbol->file,
104 					sizeof(symbol->file),
105 					frame->co_filename +
106 					pidData->offsets.String_data);
107 	if (frame->co_name)
108 		bpf_probe_read_user_str(&symbol->name,
109 					sizeof(symbol->name),
110 					frame->co_name +
111 					pidData->offsets.String_data);
112 	return true;
113 }
114 
115 struct {
116 	__uint(type, BPF_MAP_TYPE_HASH);
117 	__uint(max_entries, 1);
118 	__type(key, int);
119 	__type(value, PidData);
120 } pidmap SEC(".maps");
121 
122 struct {
123 	__uint(type, BPF_MAP_TYPE_HASH);
124 	__uint(max_entries, 1);
125 	__type(key, int);
126 	__type(value, Event);
127 } eventmap SEC(".maps");
128 
129 struct {
130 	__uint(type, BPF_MAP_TYPE_HASH);
131 	__uint(max_entries, 1);
132 	__type(key, Symbol);
133 	__type(value, int);
134 } symbolmap SEC(".maps");
135 
136 struct {
137 	__uint(type, BPF_MAP_TYPE_ARRAY);
138 	__uint(max_entries, 1);
139 	__type(key, int);
140 	__type(value, Stats);
141 } statsmap SEC(".maps");
142 
143 struct {
144 	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
145 	__uint(max_entries, 32);
146 	__uint(key_size, sizeof(int));
147 	__uint(value_size, sizeof(int));
148 } perfmap SEC(".maps");
149 
150 struct {
151 	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
152 	__uint(max_entries, 1000);
153 	__uint(key_size, sizeof(int));
154 	__uint(value_size, sizeof(long long) * 127);
155 } stackmap SEC(".maps");
156 
157 #ifdef GLOBAL_FUNC
158 __attribute__((noinline))
159 #else
160 static __always_inline
161 #endif
162 int __on_event(struct bpf_raw_tracepoint_args *ctx)
163 {
164 	uint64_t pid_tgid = bpf_get_current_pid_tgid();
165 	pid_t pid = (pid_t)(pid_tgid >> 32);
166 	PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid);
167 	if (!pidData)
168 		return 0;
169 
170 	int zero = 0;
171 	Event* event = bpf_map_lookup_elem(&eventmap, &zero);
172 	if (!event)
173 		return 0;
174 
175 	event->pid = pid;
176 
177 	event->tid = (pid_t)pid_tgid;
178 	bpf_get_current_comm(&event->comm, sizeof(event->comm));
179 
180 	event->user_stack_id = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
181 	event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0);
182 
183 	void* thread_state_current = (void*)0;
184 	bpf_probe_read_user(&thread_state_current,
185 			    sizeof(thread_state_current),
186 			    (void*)(long)pidData->current_state_addr);
187 
188 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
189 	void* tls_base = (void*)task;
190 
191 	void* thread_state = pidData->use_tls ? get_thread_state(tls_base, pidData)
192 		: thread_state_current;
193 	event->thread_current = thread_state == thread_state_current;
194 
195 	if (pidData->use_tls) {
196 		uint64_t pthread_created;
197 		uint64_t pthread_self;
198 		bpf_probe_read_user(&pthread_self, sizeof(pthread_self),
199 				    tls_base + 0x10);
200 
201 		bpf_probe_read_user(&pthread_created,
202 				    sizeof(pthread_created),
203 				    thread_state +
204 				    pidData->offsets.PyThreadState_thread);
205 		event->pthread_match = pthread_created == pthread_self;
206 	} else {
207 		event->pthread_match = 1;
208 	}
209 
210 	if (event->pthread_match || !pidData->use_tls) {
211 		void* frame_ptr;
212 		FrameData frame;
213 		Symbol sym = {};
214 		int cur_cpu = bpf_get_smp_processor_id();
215 
216 		bpf_probe_read_user(&frame_ptr,
217 				    sizeof(frame_ptr),
218 				    thread_state +
219 				    pidData->offsets.PyThreadState_frame);
220 
221 		int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
222 		if (symbol_counter == NULL)
223 			return 0;
224 #ifdef NO_UNROLL
225 #pragma clang loop unroll(disable)
226 #else
227 #pragma clang loop unroll(full)
228 #endif
229 		/* Unwind python stack */
230 		for (int i = 0; i < STACK_MAX_LEN; ++i) {
231 			if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
232 				int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
233 				int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
234 				if (!symbol_id) {
235 					bpf_map_update_elem(&symbolmap, &sym, &zero, 0);
236 					symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
237 					if (!symbol_id)
238 						return 0;
239 				}
240 				if (*symbol_id == new_symbol_id)
241 					(*symbol_counter)++;
242 				event->stack[i] = *symbol_id;
243 				event->stack_len = i + 1;
244 				frame_ptr = frame.f_back;
245 			}
246 		}
247 		event->stack_complete = frame_ptr == NULL;
248 	} else {
249 		event->stack_complete = 1;
250 	}
251 
252 	Stats* stats = bpf_map_lookup_elem(&statsmap, &zero);
253 	if (stats)
254 		stats->success++;
255 
256 	event->has_meta = 0;
257 	bpf_perf_event_output(ctx, &perfmap, 0, event, offsetof(Event, metadata));
258 	return 0;
259 }
260 
261 SEC("raw_tracepoint/kfree_skb")
262 int on_event(struct bpf_raw_tracepoint_args* ctx)
263 {
264 	int i, ret = 0;
265 	ret |= __on_event(ctx);
266 	ret |= __on_event(ctx);
267 	ret |= __on_event(ctx);
268 	ret |= __on_event(ctx);
269 	ret |= __on_event(ctx);
270 	return ret;
271 }
272 
273 char _license[] SEC("license") = "GPL";
274