1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020 Facebook */
3 #include <vmlinux.h>
4 #include <bpf/bpf_core_read.h>
5 #include <bpf/bpf_helpers.h>
6 #include <bpf/bpf_tracing.h>
7 
8 #include "profiler.h"
9 #include "err.h"
10 
11 #ifndef NULL
12 #define NULL 0
13 #endif
14 
15 #define O_WRONLY 00000001
16 #define O_RDWR 00000002
17 #define O_DIRECTORY 00200000
18 #define __O_TMPFILE 020000000
19 #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
20 #define S_IFMT 00170000
21 #define S_IFSOCK 0140000
22 #define S_IFLNK 0120000
23 #define S_IFREG 0100000
24 #define S_IFBLK 0060000
25 #define S_IFDIR 0040000
26 #define S_IFCHR 0020000
27 #define S_IFIFO 0010000
28 #define S_ISUID 0004000
29 #define S_ISGID 0002000
30 #define S_ISVTX 0001000
31 #define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK)
32 #define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
33 #define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR)
34 #define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK)
35 #define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO)
36 #define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK)
37 
38 #define KILL_DATA_ARRAY_SIZE 8
39 
40 struct var_kill_data_arr_t {
41 	struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE];
42 };
43 
44 union any_profiler_data_t {
45 	struct var_exec_data_t var_exec;
46 	struct var_kill_data_t var_kill;
47 	struct var_sysctl_data_t var_sysctl;
48 	struct var_filemod_data_t var_filemod;
49 	struct var_fork_data_t var_fork;
50 	struct var_kill_data_arr_t var_kill_data_arr;
51 };
52 
53 volatile struct profiler_config_struct bpf_config = {};
54 
55 #define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf)
56 #define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode)
57 #define CGROUP_LOGIN_SESSION_INODE \
58 	(bpf_config.cgroup_login_session_inode)
59 #define KILL_SIGNALS (bpf_config.kill_signals_mask)
60 #define STALE_INFO (bpf_config.stale_info_secs)
61 #define INODE_FILTER (bpf_config.inode_filter)
62 #define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec)
63 #define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver)
64 
65 struct kernfs_iattrs___52 {
66 	struct iattr ia_iattr;
67 };
68 
69 struct kernfs_node___52 {
70 	union /* kernfs_node_id */ {
71 		struct {
72 			u32 ino;
73 			u32 generation;
74 		};
75 		u64 id;
76 	} id;
77 };
78 
79 struct {
80 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
81 	__uint(max_entries, 1);
82 	__type(key, u32);
83 	__type(value, union any_profiler_data_t);
84 } data_heap SEC(".maps");
85 
86 struct {
87 	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
88 	__uint(key_size, sizeof(int));
89 	__uint(value_size, sizeof(int));
90 } events SEC(".maps");
91 
92 struct {
93 	__uint(type, BPF_MAP_TYPE_HASH);
94 	__uint(max_entries, KILL_DATA_ARRAY_SIZE);
95 	__type(key, u32);
96 	__type(value, struct var_kill_data_arr_t);
97 } var_tpid_to_data SEC(".maps");
98 
99 struct {
100 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
101 	__uint(max_entries, profiler_bpf_max_function_id);
102 	__type(key, u32);
103 	__type(value, struct bpf_func_stats_data);
104 } bpf_func_stats SEC(".maps");
105 
106 struct {
107 	__uint(type, BPF_MAP_TYPE_HASH);
108 	__type(key, u32);
109 	__type(value, bool);
110 	__uint(max_entries, 16);
111 } allowed_devices SEC(".maps");
112 
113 struct {
114 	__uint(type, BPF_MAP_TYPE_HASH);
115 	__type(key, u64);
116 	__type(value, bool);
117 	__uint(max_entries, 1024);
118 } allowed_file_inodes SEC(".maps");
119 
120 struct {
121 	__uint(type, BPF_MAP_TYPE_HASH);
122 	__type(key, u64);
123 	__type(value, bool);
124 	__uint(max_entries, 1024);
125 } allowed_directory_inodes SEC(".maps");
126 
127 struct {
128 	__uint(type, BPF_MAP_TYPE_HASH);
129 	__type(key, u32);
130 	__type(value, bool);
131 	__uint(max_entries, 16);
132 } disallowed_exec_inodes SEC(".maps");
133 
134 #ifndef ARRAY_SIZE
135 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
136 #endif
137 
IS_ERR(const void * ptr)138 static INLINE bool IS_ERR(const void* ptr)
139 {
140 	return IS_ERR_VALUE((unsigned long)ptr);
141 }
142 
get_userspace_pid()143 static INLINE u32 get_userspace_pid()
144 {
145 	return bpf_get_current_pid_tgid() >> 32;
146 }
147 
is_init_process(u32 tgid)148 static INLINE bool is_init_process(u32 tgid)
149 {
150 	return tgid == 1 || tgid == 0;
151 }
152 
153 static INLINE unsigned long
probe_read_lim(void * dst,void * src,unsigned long len,unsigned long max)154 probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max)
155 {
156 	len = len < max ? len : max;
157 	if (len > 1) {
158 		if (bpf_probe_read_kernel(dst, len, src))
159 			return 0;
160 	} else if (len == 1) {
161 		if (bpf_probe_read_kernel(dst, 1, src))
162 			return 0;
163 	}
164 	return len;
165 }
166 
get_var_spid_index(struct var_kill_data_arr_t * arr_struct,int spid)167 static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
168 				     int spid)
169 {
170 #ifdef UNROLL
171 #pragma unroll
172 #endif
173 	for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
174 		if (arr_struct->array[i].meta.pid == spid)
175 			return i;
176 	return -1;
177 }
178 
populate_ancestors(struct task_struct * task,struct ancestors_data_t * ancestors_data)179 static INLINE void populate_ancestors(struct task_struct* task,
180 				      struct ancestors_data_t* ancestors_data)
181 {
182 	struct task_struct* parent = task;
183 	u32 num_ancestors, ppid;
184 
185 	ancestors_data->num_ancestors = 0;
186 #ifdef UNROLL
187 #pragma unroll
188 #endif
189 	for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
190 		parent = BPF_CORE_READ(parent, real_parent);
191 		if (parent == NULL)
192 			break;
193 		ppid = BPF_CORE_READ(parent, tgid);
194 		if (is_init_process(ppid))
195 			break;
196 		ancestors_data->ancestor_pids[num_ancestors] = ppid;
197 		ancestors_data->ancestor_exec_ids[num_ancestors] =
198 			BPF_CORE_READ(parent, self_exec_id);
199 		ancestors_data->ancestor_start_times[num_ancestors] =
200 			BPF_CORE_READ(parent, start_time);
201 		ancestors_data->num_ancestors = num_ancestors;
202 	}
203 }
204 
read_full_cgroup_path(struct kernfs_node * cgroup_node,struct kernfs_node * cgroup_root_node,void * payload,int * root_pos)205 static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
206 					  struct kernfs_node* cgroup_root_node,
207 					  void* payload,
208 					  int* root_pos)
209 {
210 	void* payload_start = payload;
211 	size_t filepart_length;
212 
213 #ifdef UNROLL
214 #pragma unroll
215 #endif
216 	for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
217 		filepart_length =
218 			bpf_probe_read_kernel_str(payload, MAX_PATH,
219 						  BPF_CORE_READ(cgroup_node, name));
220 		if (!cgroup_node)
221 			return payload;
222 		if (cgroup_node == cgroup_root_node)
223 			*root_pos = payload - payload_start;
224 		if (filepart_length <= MAX_PATH) {
225 			barrier_var(filepart_length);
226 			payload += filepart_length;
227 		}
228 		cgroup_node = BPF_CORE_READ(cgroup_node, parent);
229 	}
230 	return payload;
231 }
232 
get_inode_from_kernfs(struct kernfs_node * node)233 static ino_t get_inode_from_kernfs(struct kernfs_node* node)
234 {
235 	struct kernfs_node___52* node52 = (void*)node;
236 
237 	if (bpf_core_field_exists(node52->id.ino)) {
238 		barrier_var(node52);
239 		return BPF_CORE_READ(node52, id.ino);
240 	} else {
241 		barrier_var(node);
242 		return (u64)BPF_CORE_READ(node, id);
243 	}
244 }
245 
246 extern bool CONFIG_CGROUP_PIDS __kconfig __weak;
247 enum cgroup_subsys_id___local {
248 	pids_cgrp_id___local = 123, /* value doesn't matter */
249 };
250 
populate_cgroup_info(struct cgroup_data_t * cgroup_data,struct task_struct * task,void * payload)251 static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
252 					 struct task_struct* task,
253 					 void* payload)
254 {
255 	struct kernfs_node* root_kernfs =
256 		BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
257 	struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
258 
259 #if __has_builtin(__builtin_preserve_enum_value)
260 	if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) {
261 		int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
262 						  pids_cgrp_id___local);
263 #ifdef UNROLL
264 #pragma unroll
265 #endif
266 		for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
267 			struct cgroup_subsys_state* subsys =
268 				BPF_CORE_READ(task, cgroups, subsys[i]);
269 			if (subsys != NULL) {
270 				int subsys_id = BPF_CORE_READ(subsys, ss, id);
271 				if (subsys_id == cgrp_id) {
272 					proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
273 					root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
274 					break;
275 				}
276 			}
277 		}
278 	}
279 #endif
280 
281 	cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs);
282 	cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs);
283 
284 	if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) {
285 		cgroup_data->cgroup_root_mtime =
286 			BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec);
287 		cgroup_data->cgroup_proc_mtime =
288 			BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec);
289 	} else {
290 		struct kernfs_iattrs___52* root_iattr =
291 			(struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr);
292 		cgroup_data->cgroup_root_mtime =
293 			BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec);
294 
295 		struct kernfs_iattrs___52* proc_iattr =
296 			(struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr);
297 		cgroup_data->cgroup_proc_mtime =
298 			BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec);
299 	}
300 
301 	cgroup_data->cgroup_root_length = 0;
302 	cgroup_data->cgroup_proc_length = 0;
303 	cgroup_data->cgroup_full_length = 0;
304 
305 	size_t cgroup_root_length =
306 		bpf_probe_read_kernel_str(payload, MAX_PATH,
307 					  BPF_CORE_READ(root_kernfs, name));
308 	barrier_var(cgroup_root_length);
309 	if (cgroup_root_length <= MAX_PATH) {
310 		barrier_var(cgroup_root_length);
311 		cgroup_data->cgroup_root_length = cgroup_root_length;
312 		payload += cgroup_root_length;
313 	}
314 
315 	size_t cgroup_proc_length =
316 		bpf_probe_read_kernel_str(payload, MAX_PATH,
317 					  BPF_CORE_READ(proc_kernfs, name));
318 	barrier_var(cgroup_proc_length);
319 	if (cgroup_proc_length <= MAX_PATH) {
320 		barrier_var(cgroup_proc_length);
321 		cgroup_data->cgroup_proc_length = cgroup_proc_length;
322 		payload += cgroup_proc_length;
323 	}
324 
325 	if (FETCH_CGROUPS_FROM_BPF) {
326 		cgroup_data->cgroup_full_path_root_pos = -1;
327 		void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload,
328 							      &cgroup_data->cgroup_full_path_root_pos);
329 		cgroup_data->cgroup_full_length = payload_end_pos - payload;
330 		payload = payload_end_pos;
331 	}
332 
333 	return (void*)payload;
334 }
335 
populate_var_metadata(struct var_metadata_t * metadata,struct task_struct * task,u32 pid,void * payload)336 static INLINE void* populate_var_metadata(struct var_metadata_t* metadata,
337 					  struct task_struct* task,
338 					  u32 pid, void* payload)
339 {
340 	u64 uid_gid = bpf_get_current_uid_gid();
341 
342 	metadata->uid = (u32)uid_gid;
343 	metadata->gid = uid_gid >> 32;
344 	metadata->pid = pid;
345 	metadata->exec_id = BPF_CORE_READ(task, self_exec_id);
346 	metadata->start_time = BPF_CORE_READ(task, start_time);
347 	metadata->comm_length = 0;
348 
349 	size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
350 	barrier_var(comm_length);
351 	if (comm_length <= TASK_COMM_LEN) {
352 		barrier_var(comm_length);
353 		metadata->comm_length = comm_length;
354 		payload += comm_length;
355 	}
356 
357 	return (void*)payload;
358 }
359 
360 static INLINE struct var_kill_data_t*
get_var_kill_data(struct pt_regs * ctx,int spid,int tpid,int sig)361 get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig)
362 {
363 	int zero = 0;
364 	struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
365 
366 	if (kill_data == NULL)
367 		return NULL;
368 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
369 
370 	void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload);
371 	payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload);
372 	size_t payload_length = payload - (void*)kill_data->payload;
373 	kill_data->payload_length = payload_length;
374 	populate_ancestors(task, &kill_data->ancestors_info);
375 	kill_data->meta.type = KILL_EVENT;
376 	kill_data->kill_target_pid = tpid;
377 	kill_data->kill_sig = sig;
378 	kill_data->kill_count = 1;
379 	kill_data->last_kill_time = bpf_ktime_get_ns();
380 	return kill_data;
381 }
382 
trace_var_sys_kill(void * ctx,int tpid,int sig)383 static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
384 {
385 	if ((KILL_SIGNALS & (1ULL << sig)) == 0)
386 		return 0;
387 
388 	u32 spid = get_userspace_pid();
389 	struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
390 
391 	if (arr_struct == NULL) {
392 		struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig);
393 		int zero = 0;
394 
395 		if (kill_data == NULL)
396 			return 0;
397 		arr_struct = bpf_map_lookup_elem(&data_heap, &zero);
398 		if (arr_struct == NULL)
399 			return 0;
400 		bpf_probe_read_kernel(&arr_struct->array[0],
401 				      sizeof(arr_struct->array[0]), kill_data);
402 	} else {
403 		int index = get_var_spid_index(arr_struct, spid);
404 
405 		if (index == -1) {
406 			struct var_kill_data_t* kill_data =
407 				get_var_kill_data(ctx, spid, tpid, sig);
408 			if (kill_data == NULL)
409 				return 0;
410 #ifdef UNROLL
411 #pragma unroll
412 #endif
413 			for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
414 				if (arr_struct->array[i].meta.pid == 0) {
415 					bpf_probe_read_kernel(&arr_struct->array[i],
416 							      sizeof(arr_struct->array[i]),
417 							      kill_data);
418 					bpf_map_update_elem(&var_tpid_to_data, &tpid,
419 							    arr_struct, 0);
420 
421 					return 0;
422 				}
423 			return 0;
424 		}
425 
426 		struct var_kill_data_t* kill_data = &arr_struct->array[index];
427 
428 		u64 delta_sec =
429 			(bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000;
430 
431 		if (delta_sec < STALE_INFO) {
432 			kill_data->kill_count++;
433 			kill_data->last_kill_time = bpf_ktime_get_ns();
434 			bpf_probe_read_kernel(&arr_struct->array[index],
435 					      sizeof(arr_struct->array[index]),
436 					      kill_data);
437 		} else {
438 			struct var_kill_data_t* kill_data =
439 				get_var_kill_data(ctx, spid, tpid, sig);
440 			if (kill_data == NULL)
441 				return 0;
442 			bpf_probe_read_kernel(&arr_struct->array[index],
443 					      sizeof(arr_struct->array[index]),
444 					      kill_data);
445 		}
446 	}
447 	bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0);
448 	return 0;
449 }
450 
bpf_stats_enter(struct bpf_func_stats_ctx * bpf_stat_ctx,enum bpf_function_id func_id)451 static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx,
452 				   enum bpf_function_id func_id)
453 {
454 	int func_id_key = func_id;
455 
456 	bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns();
457 	bpf_stat_ctx->bpf_func_stats_data_val =
458 		bpf_map_lookup_elem(&bpf_func_stats, &func_id_key);
459 	if (bpf_stat_ctx->bpf_func_stats_data_val)
460 		bpf_stat_ctx->bpf_func_stats_data_val->num_executions++;
461 }
462 
bpf_stats_exit(struct bpf_func_stats_ctx * bpf_stat_ctx)463 static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx)
464 {
465 	if (bpf_stat_ctx->bpf_func_stats_data_val)
466 		bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns +=
467 			bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns;
468 }
469 
470 static INLINE void
bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx * bpf_stat_ctx,struct var_metadata_t * meta)471 bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx,
472 				    struct var_metadata_t* meta)
473 {
474 	if (bpf_stat_ctx->bpf_func_stats_data_val) {
475 		bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++;
476 		meta->bpf_stats_num_perf_events =
477 			bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events;
478 	}
479 	meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns;
480 	meta->cpu_id = bpf_get_smp_processor_id();
481 }
482 
483 static INLINE size_t
read_absolute_file_path_from_dentry(struct dentry * filp_dentry,void * payload)484 read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
485 {
486 	size_t length = 0;
487 	size_t filepart_length;
488 	struct dentry* parent_dentry;
489 
490 #ifdef UNROLL
491 #pragma unroll
492 #endif
493 	for (int i = 0; i < MAX_PATH_DEPTH; i++) {
494 		filepart_length =
495 			bpf_probe_read_kernel_str(payload, MAX_PATH,
496 						  BPF_CORE_READ(filp_dentry, d_name.name));
497 		barrier_var(filepart_length);
498 		if (filepart_length > MAX_PATH)
499 			break;
500 		barrier_var(filepart_length);
501 		payload += filepart_length;
502 		length += filepart_length;
503 
504 		parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
505 		if (filp_dentry == parent_dentry)
506 			break;
507 		filp_dentry = parent_dentry;
508 	}
509 
510 	return length;
511 }
512 
513 static INLINE bool
is_ancestor_in_allowed_inodes(struct dentry * filp_dentry)514 is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
515 {
516 	struct dentry* parent_dentry;
517 #ifdef UNROLL
518 #pragma unroll
519 #endif
520 	for (int i = 0; i < MAX_PATH_DEPTH; i++) {
521 		u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
522 		bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino);
523 
524 		if (allowed_dir != NULL)
525 			return true;
526 		parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
527 		if (filp_dentry == parent_dentry)
528 			break;
529 		filp_dentry = parent_dentry;
530 	}
531 	return false;
532 }
533 
is_dentry_allowed_for_filemod(struct dentry * file_dentry,u32 * device_id,u64 * file_ino)534 static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry,
535 						 u32* device_id,
536 						 u64* file_ino)
537 {
538 	u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev);
539 	*device_id = dev_id;
540 	bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id);
541 
542 	if (allowed_device == NULL)
543 		return false;
544 
545 	u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino);
546 	*file_ino = ino;
547 	bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino);
548 
549 	if (allowed_file == NULL)
550 		if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent)))
551 			return false;
552 	return true;
553 }
554 
555 SEC("kprobe/proc_sys_write")
BPF_KPROBE(kprobe__proc_sys_write,struct file * filp,const char * buf,size_t count,loff_t * ppos)556 ssize_t BPF_KPROBE(kprobe__proc_sys_write,
557 		   struct file* filp, const char* buf,
558 		   size_t count, loff_t* ppos)
559 {
560 	struct bpf_func_stats_ctx stats_ctx;
561 	bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write);
562 
563 	u32 pid = get_userspace_pid();
564 	int zero = 0;
565 	struct var_sysctl_data_t* sysctl_data =
566 		bpf_map_lookup_elem(&data_heap, &zero);
567 	if (!sysctl_data)
568 		goto out;
569 
570 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
571 	sysctl_data->meta.type = SYSCTL_EVENT;
572 	void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload);
573 	payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload);
574 
575 	populate_ancestors(task, &sysctl_data->ancestors_info);
576 
577 	sysctl_data->sysctl_val_length = 0;
578 	sysctl_data->sysctl_path_length = 0;
579 
580 	size_t sysctl_val_length = bpf_probe_read_kernel_str(payload,
581 							     CTL_MAXNAME, buf);
582 	barrier_var(sysctl_val_length);
583 	if (sysctl_val_length <= CTL_MAXNAME) {
584 		barrier_var(sysctl_val_length);
585 		sysctl_data->sysctl_val_length = sysctl_val_length;
586 		payload += sysctl_val_length;
587 	}
588 
589 	size_t sysctl_path_length =
590 		bpf_probe_read_kernel_str(payload, MAX_PATH,
591 					  BPF_CORE_READ(filp, f_path.dentry,
592 							d_name.name));
593 	barrier_var(sysctl_path_length);
594 	if (sysctl_path_length <= MAX_PATH) {
595 		barrier_var(sysctl_path_length);
596 		sysctl_data->sysctl_path_length = sysctl_path_length;
597 		payload += sysctl_path_length;
598 	}
599 
600 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta);
601 	unsigned long data_len = payload - (void*)sysctl_data;
602 	data_len = data_len > sizeof(struct var_sysctl_data_t)
603 		? sizeof(struct var_sysctl_data_t)
604 		: data_len;
605 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len);
606 out:
607 	bpf_stats_exit(&stats_ctx);
608 	return 0;
609 }
610 
611 SEC("tracepoint/syscalls/sys_enter_kill")
tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter * ctx)612 int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx)
613 {
614 	struct bpf_func_stats_ctx stats_ctx;
615 
616 	bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill);
617 	int pid = ctx->args[0];
618 	int sig = ctx->args[1];
619 	int ret = trace_var_sys_kill(ctx, pid, sig);
620 	bpf_stats_exit(&stats_ctx);
621 	return ret;
622 };
623 
624 SEC("raw_tracepoint/sched_process_exit")
raw_tracepoint__sched_process_exit(void * ctx)625 int raw_tracepoint__sched_process_exit(void* ctx)
626 {
627 	int zero = 0;
628 	struct bpf_func_stats_ctx stats_ctx;
629 	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit);
630 
631 	u32 tpid = get_userspace_pid();
632 
633 	struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
634 	struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
635 
636 	if (arr_struct == NULL || kill_data == NULL)
637 		goto out;
638 
639 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
640 	struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
641 
642 #ifdef UNROLL
643 #pragma unroll
644 #endif
645 	for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
646 		struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
647 
648 		if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) {
649 			bpf_probe_read_kernel(kill_data, sizeof(*past_kill_data),
650 					      past_kill_data);
651 			void* payload = kill_data->payload;
652 			size_t offset = kill_data->payload_length;
653 			if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
654 				return 0;
655 			payload += offset;
656 
657 			kill_data->kill_target_name_length = 0;
658 			kill_data->kill_target_cgroup_proc_length = 0;
659 
660 			size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
661 			barrier_var(comm_length);
662 			if (comm_length <= TASK_COMM_LEN) {
663 				barrier_var(comm_length);
664 				kill_data->kill_target_name_length = comm_length;
665 				payload += comm_length;
666 			}
667 
668 			size_t cgroup_proc_length =
669 				bpf_probe_read_kernel_str(payload,
670 							  KILL_TARGET_LEN,
671 							  BPF_CORE_READ(proc_kernfs, name));
672 			barrier_var(cgroup_proc_length);
673 			if (cgroup_proc_length <= KILL_TARGET_LEN) {
674 				barrier_var(cgroup_proc_length);
675 				kill_data->kill_target_cgroup_proc_length = cgroup_proc_length;
676 				payload += cgroup_proc_length;
677 			}
678 
679 			bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta);
680 			unsigned long data_len = (void*)payload - (void*)kill_data;
681 			data_len = data_len > sizeof(struct var_kill_data_t)
682 				? sizeof(struct var_kill_data_t)
683 				: data_len;
684 			bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len);
685 		}
686 	}
687 	bpf_map_delete_elem(&var_tpid_to_data, &tpid);
688 out:
689 	bpf_stats_exit(&stats_ctx);
690 	return 0;
691 }
692 
693 SEC("raw_tracepoint/sched_process_exec")
raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args * ctx)694 int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
695 {
696 	struct bpf_func_stats_ctx stats_ctx;
697 	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec);
698 
699 	struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2];
700 	u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino);
701 
702 	bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode);
703 	if (should_filter_binprm != NULL)
704 		goto out;
705 
706 	int zero = 0;
707 	struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero);
708 	if (!proc_exec_data)
709 		goto out;
710 
711 	if (INODE_FILTER && inode != INODE_FILTER)
712 		return 0;
713 
714 	u32 pid = get_userspace_pid();
715 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
716 
717 	proc_exec_data->meta.type = EXEC_EVENT;
718 	proc_exec_data->bin_path_length = 0;
719 	proc_exec_data->cmdline_length = 0;
720 	proc_exec_data->environment_length = 0;
721 	void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid,
722 					      proc_exec_data->payload);
723 	payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload);
724 
725 	struct task_struct* parent_task = BPF_CORE_READ(task, real_parent);
726 	proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid);
727 	proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val);
728 	proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id);
729 	proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time);
730 
731 	const char* filename = BPF_CORE_READ(bprm, filename);
732 	size_t bin_path_length =
733 		bpf_probe_read_kernel_str(payload, MAX_FILENAME_LEN, filename);
734 	barrier_var(bin_path_length);
735 	if (bin_path_length <= MAX_FILENAME_LEN) {
736 		barrier_var(bin_path_length);
737 		proc_exec_data->bin_path_length = bin_path_length;
738 		payload += bin_path_length;
739 	}
740 
741 	void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start);
742 	void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end);
743 	unsigned int cmdline_length = probe_read_lim(payload, arg_start,
744 						     arg_end - arg_start, MAX_ARGS_LEN);
745 
746 	if (cmdline_length <= MAX_ARGS_LEN) {
747 		barrier_var(cmdline_length);
748 		proc_exec_data->cmdline_length = cmdline_length;
749 		payload += cmdline_length;
750 	}
751 
752 	if (READ_ENVIRON_FROM_EXEC) {
753 		void* env_start = (void*)BPF_CORE_READ(task, mm, env_start);
754 		void* env_end = (void*)BPF_CORE_READ(task, mm, env_end);
755 		unsigned long env_len = probe_read_lim(payload, env_start,
756 						       env_end - env_start, MAX_ENVIRON_LEN);
757 		if (cmdline_length <= MAX_ENVIRON_LEN) {
758 			proc_exec_data->environment_length = env_len;
759 			payload += env_len;
760 		}
761 	}
762 
763 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta);
764 	unsigned long data_len = payload - (void*)proc_exec_data;
765 	data_len = data_len > sizeof(struct var_exec_data_t)
766 		? sizeof(struct var_exec_data_t)
767 		: data_len;
768 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len);
769 out:
770 	bpf_stats_exit(&stats_ctx);
771 	return 0;
772 }
773 
774 SEC("kretprobe/do_filp_open")
kprobe_ret__do_filp_open(struct pt_regs * ctx)775 int kprobe_ret__do_filp_open(struct pt_regs* ctx)
776 {
777 	struct bpf_func_stats_ctx stats_ctx;
778 	bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret);
779 
780 	struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx);
781 
782 	if (filp == NULL || IS_ERR(filp))
783 		goto out;
784 	unsigned int flags = BPF_CORE_READ(filp, f_flags);
785 	if ((flags & (O_RDWR | O_WRONLY)) == 0)
786 		goto out;
787 	if ((flags & O_TMPFILE) > 0)
788 		goto out;
789 	struct inode* file_inode = BPF_CORE_READ(filp, f_inode);
790 	umode_t mode = BPF_CORE_READ(file_inode, i_mode);
791 	if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
792 	    S_ISSOCK(mode))
793 		goto out;
794 
795 	struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry);
796 	u32 device_id = 0;
797 	u64 file_ino = 0;
798 	if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino))
799 		goto out;
800 
801 	int zero = 0;
802 	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
803 	if (!filemod_data)
804 		goto out;
805 
806 	u32 pid = get_userspace_pid();
807 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
808 
809 	filemod_data->meta.type = FILEMOD_EVENT;
810 	filemod_data->fmod_type = FMOD_OPEN;
811 	filemod_data->dst_flags = flags;
812 	filemod_data->src_inode = 0;
813 	filemod_data->dst_inode = file_ino;
814 	filemod_data->src_device_id = 0;
815 	filemod_data->dst_device_id = device_id;
816 	filemod_data->src_filepath_length = 0;
817 	filemod_data->dst_filepath_length = 0;
818 
819 	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
820 					      filemod_data->payload);
821 	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
822 
823 	size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload);
824 	barrier_var(len);
825 	if (len <= MAX_FILEPATH_LENGTH) {
826 		barrier_var(len);
827 		payload += len;
828 		filemod_data->dst_filepath_length = len;
829 	}
830 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
831 	unsigned long data_len = payload - (void*)filemod_data;
832 	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
833 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
834 out:
835 	bpf_stats_exit(&stats_ctx);
836 	return 0;
837 }
838 
839 SEC("kprobe/vfs_link")
BPF_KPROBE(kprobe__vfs_link,struct dentry * old_dentry,struct mnt_idmap * idmap,struct inode * dir,struct dentry * new_dentry,struct inode ** delegated_inode)840 int BPF_KPROBE(kprobe__vfs_link,
841 	       struct dentry* old_dentry, struct mnt_idmap *idmap,
842 	       struct inode* dir, struct dentry* new_dentry,
843 	       struct inode** delegated_inode)
844 {
845 	struct bpf_func_stats_ctx stats_ctx;
846 	bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
847 
848 	u32 src_device_id = 0;
849 	u64 src_file_ino = 0;
850 	u32 dst_device_id = 0;
851 	u64 dst_file_ino = 0;
852 	if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) &&
853 	    !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino))
854 		goto out;
855 
856 	int zero = 0;
857 	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
858 	if (!filemod_data)
859 		goto out;
860 
861 	u32 pid = get_userspace_pid();
862 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
863 
864 	filemod_data->meta.type = FILEMOD_EVENT;
865 	filemod_data->fmod_type = FMOD_LINK;
866 	filemod_data->dst_flags = 0;
867 	filemod_data->src_inode = src_file_ino;
868 	filemod_data->dst_inode = dst_file_ino;
869 	filemod_data->src_device_id = src_device_id;
870 	filemod_data->dst_device_id = dst_device_id;
871 	filemod_data->src_filepath_length = 0;
872 	filemod_data->dst_filepath_length = 0;
873 
874 	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
875 					      filemod_data->payload);
876 	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
877 
878 	size_t len = read_absolute_file_path_from_dentry(old_dentry, payload);
879 	barrier_var(len);
880 	if (len <= MAX_FILEPATH_LENGTH) {
881 		barrier_var(len);
882 		payload += len;
883 		filemod_data->src_filepath_length = len;
884 	}
885 
886 	len = read_absolute_file_path_from_dentry(new_dentry, payload);
887 	barrier_var(len);
888 	if (len <= MAX_FILEPATH_LENGTH) {
889 		barrier_var(len);
890 		payload += len;
891 		filemod_data->dst_filepath_length = len;
892 	}
893 
894 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
895 	unsigned long data_len = payload - (void*)filemod_data;
896 	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
897 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
898 out:
899 	bpf_stats_exit(&stats_ctx);
900 	return 0;
901 }
902 
903 SEC("kprobe/vfs_symlink")
BPF_KPROBE(kprobe__vfs_symlink,struct inode * dir,struct dentry * dentry,const char * oldname)904 int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry,
905 	       const char* oldname)
906 {
907 	struct bpf_func_stats_ctx stats_ctx;
908 	bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink);
909 
910 	u32 dst_device_id = 0;
911 	u64 dst_file_ino = 0;
912 	if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino))
913 		goto out;
914 
915 	int zero = 0;
916 	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
917 	if (!filemod_data)
918 		goto out;
919 
920 	u32 pid = get_userspace_pid();
921 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
922 
923 	filemod_data->meta.type = FILEMOD_EVENT;
924 	filemod_data->fmod_type = FMOD_SYMLINK;
925 	filemod_data->dst_flags = 0;
926 	filemod_data->src_inode = 0;
927 	filemod_data->dst_inode = dst_file_ino;
928 	filemod_data->src_device_id = 0;
929 	filemod_data->dst_device_id = dst_device_id;
930 	filemod_data->src_filepath_length = 0;
931 	filemod_data->dst_filepath_length = 0;
932 
933 	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
934 					      filemod_data->payload);
935 	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
936 
937 	size_t len = bpf_probe_read_kernel_str(payload, MAX_FILEPATH_LENGTH,
938 					       oldname);
939 	barrier_var(len);
940 	if (len <= MAX_FILEPATH_LENGTH) {
941 		barrier_var(len);
942 		payload += len;
943 		filemod_data->src_filepath_length = len;
944 	}
945 	len = read_absolute_file_path_from_dentry(dentry, payload);
946 	barrier_var(len);
947 	if (len <= MAX_FILEPATH_LENGTH) {
948 		barrier_var(len);
949 		payload += len;
950 		filemod_data->dst_filepath_length = len;
951 	}
952 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
953 	unsigned long data_len = payload - (void*)filemod_data;
954 	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
955 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
956 out:
957 	bpf_stats_exit(&stats_ctx);
958 	return 0;
959 }
960 
961 SEC("raw_tracepoint/sched_process_fork")
raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args * ctx)962 int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx)
963 {
964 	struct bpf_func_stats_ctx stats_ctx;
965 	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork);
966 
967 	int zero = 0;
968 	struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero);
969 	if (!fork_data)
970 		goto out;
971 
972 	struct task_struct* parent = (struct task_struct*)ctx->args[0];
973 	struct task_struct* child = (struct task_struct*)ctx->args[1];
974 	fork_data->meta.type = FORK_EVENT;
975 
976 	void* payload = populate_var_metadata(&fork_data->meta, child,
977 					      BPF_CORE_READ(child, pid), fork_data->payload);
978 	fork_data->parent_pid = BPF_CORE_READ(parent, pid);
979 	fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id);
980 	fork_data->parent_start_time = BPF_CORE_READ(parent, start_time);
981 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta);
982 
983 	unsigned long data_len = payload - (void*)fork_data;
984 	data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len;
985 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len);
986 out:
987 	bpf_stats_exit(&stats_ctx);
988 	return 0;
989 }
990 char _license[] SEC("license") = "GPL";
991