1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020 Facebook */
3 #include <vmlinux.h>
4 #include <bpf/bpf_core_read.h>
5 #include <bpf/bpf_helpers.h>
6 #include <bpf/bpf_tracing.h>
7 
8 #include "profiler.h"
9 
10 #ifndef NULL
11 #define NULL 0
12 #endif
13 
14 #define O_WRONLY 00000001
15 #define O_RDWR 00000002
16 #define O_DIRECTORY 00200000
17 #define __O_TMPFILE 020000000
18 #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
19 #define MAX_ERRNO 4095
20 #define S_IFMT 00170000
21 #define S_IFSOCK 0140000
22 #define S_IFLNK 0120000
23 #define S_IFREG 0100000
24 #define S_IFBLK 0060000
25 #define S_IFDIR 0040000
26 #define S_IFCHR 0020000
27 #define S_IFIFO 0010000
28 #define S_ISUID 0004000
29 #define S_ISGID 0002000
30 #define S_ISVTX 0001000
31 #define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK)
32 #define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
33 #define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR)
34 #define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK)
35 #define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO)
36 #define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK)
37 #define IS_ERR_VALUE(x) (unsigned long)(void*)(x) >= (unsigned long)-MAX_ERRNO
38 
39 #define KILL_DATA_ARRAY_SIZE 8
40 
41 struct var_kill_data_arr_t {
42 	struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE];
43 };
44 
45 union any_profiler_data_t {
46 	struct var_exec_data_t var_exec;
47 	struct var_kill_data_t var_kill;
48 	struct var_sysctl_data_t var_sysctl;
49 	struct var_filemod_data_t var_filemod;
50 	struct var_fork_data_t var_fork;
51 	struct var_kill_data_arr_t var_kill_data_arr;
52 };
53 
54 volatile struct profiler_config_struct bpf_config = {};
55 
56 #define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf)
57 #define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode)
58 #define CGROUP_LOGIN_SESSION_INODE \
59 	(bpf_config.cgroup_login_session_inode)
60 #define KILL_SIGNALS (bpf_config.kill_signals_mask)
61 #define STALE_INFO (bpf_config.stale_info_secs)
62 #define INODE_FILTER (bpf_config.inode_filter)
63 #define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec)
64 #define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver)
65 
66 struct kernfs_iattrs___52 {
67 	struct iattr ia_iattr;
68 };
69 
70 struct kernfs_node___52 {
71 	union /* kernfs_node_id */ {
72 		struct {
73 			u32 ino;
74 			u32 generation;
75 		};
76 		u64 id;
77 	} id;
78 };
79 
80 struct {
81 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
82 	__uint(max_entries, 1);
83 	__type(key, u32);
84 	__type(value, union any_profiler_data_t);
85 } data_heap SEC(".maps");
86 
87 struct {
88 	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
89 	__uint(key_size, sizeof(int));
90 	__uint(value_size, sizeof(int));
91 } events SEC(".maps");
92 
93 struct {
94 	__uint(type, BPF_MAP_TYPE_HASH);
95 	__uint(max_entries, KILL_DATA_ARRAY_SIZE);
96 	__type(key, u32);
97 	__type(value, struct var_kill_data_arr_t);
98 } var_tpid_to_data SEC(".maps");
99 
100 struct {
101 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
102 	__uint(max_entries, profiler_bpf_max_function_id);
103 	__type(key, u32);
104 	__type(value, struct bpf_func_stats_data);
105 } bpf_func_stats SEC(".maps");
106 
107 struct {
108 	__uint(type, BPF_MAP_TYPE_HASH);
109 	__type(key, u32);
110 	__type(value, bool);
111 	__uint(max_entries, 16);
112 } allowed_devices SEC(".maps");
113 
114 struct {
115 	__uint(type, BPF_MAP_TYPE_HASH);
116 	__type(key, u64);
117 	__type(value, bool);
118 	__uint(max_entries, 1024);
119 } allowed_file_inodes SEC(".maps");
120 
121 struct {
122 	__uint(type, BPF_MAP_TYPE_HASH);
123 	__type(key, u64);
124 	__type(value, bool);
125 	__uint(max_entries, 1024);
126 } allowed_directory_inodes SEC(".maps");
127 
128 struct {
129 	__uint(type, BPF_MAP_TYPE_HASH);
130 	__type(key, u32);
131 	__type(value, bool);
132 	__uint(max_entries, 16);
133 } disallowed_exec_inodes SEC(".maps");
134 
135 #ifndef ARRAY_SIZE
136 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
137 #endif
138 
139 static INLINE bool IS_ERR(const void* ptr)
140 {
141 	return IS_ERR_VALUE((unsigned long)ptr);
142 }
143 
144 static INLINE u32 get_userspace_pid()
145 {
146 	return bpf_get_current_pid_tgid() >> 32;
147 }
148 
149 static INLINE bool is_init_process(u32 tgid)
150 {
151 	return tgid == 1 || tgid == 0;
152 }
153 
154 static INLINE unsigned long
155 probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max)
156 {
157 	len = len < max ? len : max;
158 	if (len > 1) {
159 		if (bpf_probe_read_kernel(dst, len, src))
160 			return 0;
161 	} else if (len == 1) {
162 		if (bpf_probe_read_kernel(dst, 1, src))
163 			return 0;
164 	}
165 	return len;
166 }
167 
168 static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
169 				     int spid)
170 {
171 #ifdef UNROLL
172 #pragma unroll
173 #endif
174 	for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
175 		if (arr_struct->array[i].meta.pid == spid)
176 			return i;
177 	return -1;
178 }
179 
180 static INLINE void populate_ancestors(struct task_struct* task,
181 				      struct ancestors_data_t* ancestors_data)
182 {
183 	struct task_struct* parent = task;
184 	u32 num_ancestors, ppid;
185 
186 	ancestors_data->num_ancestors = 0;
187 #ifdef UNROLL
188 #pragma unroll
189 #endif
190 	for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
191 		parent = BPF_CORE_READ(parent, real_parent);
192 		if (parent == NULL)
193 			break;
194 		ppid = BPF_CORE_READ(parent, tgid);
195 		if (is_init_process(ppid))
196 			break;
197 		ancestors_data->ancestor_pids[num_ancestors] = ppid;
198 		ancestors_data->ancestor_exec_ids[num_ancestors] =
199 			BPF_CORE_READ(parent, self_exec_id);
200 		ancestors_data->ancestor_start_times[num_ancestors] =
201 			BPF_CORE_READ(parent, start_time);
202 		ancestors_data->num_ancestors = num_ancestors;
203 	}
204 }
205 
206 static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
207 					  struct kernfs_node* cgroup_root_node,
208 					  void* payload,
209 					  int* root_pos)
210 {
211 	void* payload_start = payload;
212 	size_t filepart_length;
213 
214 #ifdef UNROLL
215 #pragma unroll
216 #endif
217 	for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
218 		filepart_length =
219 			bpf_probe_read_kernel_str(payload, MAX_PATH,
220 						  BPF_CORE_READ(cgroup_node, name));
221 		if (!cgroup_node)
222 			return payload;
223 		if (cgroup_node == cgroup_root_node)
224 			*root_pos = payload - payload_start;
225 		if (filepart_length <= MAX_PATH) {
226 			barrier_var(filepart_length);
227 			payload += filepart_length;
228 		}
229 		cgroup_node = BPF_CORE_READ(cgroup_node, parent);
230 	}
231 	return payload;
232 }
233 
234 static ino_t get_inode_from_kernfs(struct kernfs_node* node)
235 {
236 	struct kernfs_node___52* node52 = (void*)node;
237 
238 	if (bpf_core_field_exists(node52->id.ino)) {
239 		barrier_var(node52);
240 		return BPF_CORE_READ(node52, id.ino);
241 	} else {
242 		barrier_var(node);
243 		return (u64)BPF_CORE_READ(node, id);
244 	}
245 }
246 
247 extern bool CONFIG_CGROUP_PIDS __kconfig __weak;
248 enum cgroup_subsys_id___local {
249 	pids_cgrp_id___local = 123, /* value doesn't matter */
250 };
251 
252 static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
253 					 struct task_struct* task,
254 					 void* payload)
255 {
256 	struct kernfs_node* root_kernfs =
257 		BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
258 	struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
259 
260 #if __has_builtin(__builtin_preserve_enum_value)
261 	if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) {
262 		int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
263 						  pids_cgrp_id___local);
264 #ifdef UNROLL
265 #pragma unroll
266 #endif
267 		for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
268 			struct cgroup_subsys_state* subsys =
269 				BPF_CORE_READ(task, cgroups, subsys[i]);
270 			if (subsys != NULL) {
271 				int subsys_id = BPF_CORE_READ(subsys, ss, id);
272 				if (subsys_id == cgrp_id) {
273 					proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
274 					root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
275 					break;
276 				}
277 			}
278 		}
279 	}
280 #endif
281 
282 	cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs);
283 	cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs);
284 
285 	if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) {
286 		cgroup_data->cgroup_root_mtime =
287 			BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec);
288 		cgroup_data->cgroup_proc_mtime =
289 			BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec);
290 	} else {
291 		struct kernfs_iattrs___52* root_iattr =
292 			(struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr);
293 		cgroup_data->cgroup_root_mtime =
294 			BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec);
295 
296 		struct kernfs_iattrs___52* proc_iattr =
297 			(struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr);
298 		cgroup_data->cgroup_proc_mtime =
299 			BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec);
300 	}
301 
302 	cgroup_data->cgroup_root_length = 0;
303 	cgroup_data->cgroup_proc_length = 0;
304 	cgroup_data->cgroup_full_length = 0;
305 
306 	size_t cgroup_root_length =
307 		bpf_probe_read_kernel_str(payload, MAX_PATH,
308 					  BPF_CORE_READ(root_kernfs, name));
309 	barrier_var(cgroup_root_length);
310 	if (cgroup_root_length <= MAX_PATH) {
311 		barrier_var(cgroup_root_length);
312 		cgroup_data->cgroup_root_length = cgroup_root_length;
313 		payload += cgroup_root_length;
314 	}
315 
316 	size_t cgroup_proc_length =
317 		bpf_probe_read_kernel_str(payload, MAX_PATH,
318 					  BPF_CORE_READ(proc_kernfs, name));
319 	barrier_var(cgroup_proc_length);
320 	if (cgroup_proc_length <= MAX_PATH) {
321 		barrier_var(cgroup_proc_length);
322 		cgroup_data->cgroup_proc_length = cgroup_proc_length;
323 		payload += cgroup_proc_length;
324 	}
325 
326 	if (FETCH_CGROUPS_FROM_BPF) {
327 		cgroup_data->cgroup_full_path_root_pos = -1;
328 		void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload,
329 							      &cgroup_data->cgroup_full_path_root_pos);
330 		cgroup_data->cgroup_full_length = payload_end_pos - payload;
331 		payload = payload_end_pos;
332 	}
333 
334 	return (void*)payload;
335 }
336 
337 static INLINE void* populate_var_metadata(struct var_metadata_t* metadata,
338 					  struct task_struct* task,
339 					  u32 pid, void* payload)
340 {
341 	u64 uid_gid = bpf_get_current_uid_gid();
342 
343 	metadata->uid = (u32)uid_gid;
344 	metadata->gid = uid_gid >> 32;
345 	metadata->pid = pid;
346 	metadata->exec_id = BPF_CORE_READ(task, self_exec_id);
347 	metadata->start_time = BPF_CORE_READ(task, start_time);
348 	metadata->comm_length = 0;
349 
350 	size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
351 	barrier_var(comm_length);
352 	if (comm_length <= TASK_COMM_LEN) {
353 		barrier_var(comm_length);
354 		metadata->comm_length = comm_length;
355 		payload += comm_length;
356 	}
357 
358 	return (void*)payload;
359 }
360 
361 static INLINE struct var_kill_data_t*
362 get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig)
363 {
364 	int zero = 0;
365 	struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
366 
367 	if (kill_data == NULL)
368 		return NULL;
369 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
370 
371 	void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload);
372 	payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload);
373 	size_t payload_length = payload - (void*)kill_data->payload;
374 	kill_data->payload_length = payload_length;
375 	populate_ancestors(task, &kill_data->ancestors_info);
376 	kill_data->meta.type = KILL_EVENT;
377 	kill_data->kill_target_pid = tpid;
378 	kill_data->kill_sig = sig;
379 	kill_data->kill_count = 1;
380 	kill_data->last_kill_time = bpf_ktime_get_ns();
381 	return kill_data;
382 }
383 
384 static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
385 {
386 	if ((KILL_SIGNALS & (1ULL << sig)) == 0)
387 		return 0;
388 
389 	u32 spid = get_userspace_pid();
390 	struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
391 
392 	if (arr_struct == NULL) {
393 		struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig);
394 		int zero = 0;
395 
396 		if (kill_data == NULL)
397 			return 0;
398 		arr_struct = bpf_map_lookup_elem(&data_heap, &zero);
399 		if (arr_struct == NULL)
400 			return 0;
401 		bpf_probe_read_kernel(&arr_struct->array[0],
402 				      sizeof(arr_struct->array[0]), kill_data);
403 	} else {
404 		int index = get_var_spid_index(arr_struct, spid);
405 
406 		if (index == -1) {
407 			struct var_kill_data_t* kill_data =
408 				get_var_kill_data(ctx, spid, tpid, sig);
409 			if (kill_data == NULL)
410 				return 0;
411 #ifdef UNROLL
412 #pragma unroll
413 #endif
414 			for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
415 				if (arr_struct->array[i].meta.pid == 0) {
416 					bpf_probe_read_kernel(&arr_struct->array[i],
417 							      sizeof(arr_struct->array[i]),
418 							      kill_data);
419 					bpf_map_update_elem(&var_tpid_to_data, &tpid,
420 							    arr_struct, 0);
421 
422 					return 0;
423 				}
424 			return 0;
425 		}
426 
427 		struct var_kill_data_t* kill_data = &arr_struct->array[index];
428 
429 		u64 delta_sec =
430 			(bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000;
431 
432 		if (delta_sec < STALE_INFO) {
433 			kill_data->kill_count++;
434 			kill_data->last_kill_time = bpf_ktime_get_ns();
435 			bpf_probe_read_kernel(&arr_struct->array[index],
436 					      sizeof(arr_struct->array[index]),
437 					      kill_data);
438 		} else {
439 			struct var_kill_data_t* kill_data =
440 				get_var_kill_data(ctx, spid, tpid, sig);
441 			if (kill_data == NULL)
442 				return 0;
443 			bpf_probe_read_kernel(&arr_struct->array[index],
444 					      sizeof(arr_struct->array[index]),
445 					      kill_data);
446 		}
447 	}
448 	bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0);
449 	return 0;
450 }
451 
452 static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx,
453 				   enum bpf_function_id func_id)
454 {
455 	int func_id_key = func_id;
456 
457 	bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns();
458 	bpf_stat_ctx->bpf_func_stats_data_val =
459 		bpf_map_lookup_elem(&bpf_func_stats, &func_id_key);
460 	if (bpf_stat_ctx->bpf_func_stats_data_val)
461 		bpf_stat_ctx->bpf_func_stats_data_val->num_executions++;
462 }
463 
464 static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx)
465 {
466 	if (bpf_stat_ctx->bpf_func_stats_data_val)
467 		bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns +=
468 			bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns;
469 }
470 
471 static INLINE void
472 bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx,
473 				    struct var_metadata_t* meta)
474 {
475 	if (bpf_stat_ctx->bpf_func_stats_data_val) {
476 		bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++;
477 		meta->bpf_stats_num_perf_events =
478 			bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events;
479 	}
480 	meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns;
481 	meta->cpu_id = bpf_get_smp_processor_id();
482 }
483 
484 static INLINE size_t
485 read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
486 {
487 	size_t length = 0;
488 	size_t filepart_length;
489 	struct dentry* parent_dentry;
490 
491 #ifdef UNROLL
492 #pragma unroll
493 #endif
494 	for (int i = 0; i < MAX_PATH_DEPTH; i++) {
495 		filepart_length =
496 			bpf_probe_read_kernel_str(payload, MAX_PATH,
497 						  BPF_CORE_READ(filp_dentry, d_name.name));
498 		barrier_var(filepart_length);
499 		if (filepart_length > MAX_PATH)
500 			break;
501 		barrier_var(filepart_length);
502 		payload += filepart_length;
503 		length += filepart_length;
504 
505 		parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
506 		if (filp_dentry == parent_dentry)
507 			break;
508 		filp_dentry = parent_dentry;
509 	}
510 
511 	return length;
512 }
513 
514 static INLINE bool
515 is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
516 {
517 	struct dentry* parent_dentry;
518 #ifdef UNROLL
519 #pragma unroll
520 #endif
521 	for (int i = 0; i < MAX_PATH_DEPTH; i++) {
522 		u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
523 		bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino);
524 
525 		if (allowed_dir != NULL)
526 			return true;
527 		parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
528 		if (filp_dentry == parent_dentry)
529 			break;
530 		filp_dentry = parent_dentry;
531 	}
532 	return false;
533 }
534 
535 static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry,
536 						 u32* device_id,
537 						 u64* file_ino)
538 {
539 	u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev);
540 	*device_id = dev_id;
541 	bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id);
542 
543 	if (allowed_device == NULL)
544 		return false;
545 
546 	u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino);
547 	*file_ino = ino;
548 	bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino);
549 
550 	if (allowed_file == NULL)
551 		if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent)))
552 			return false;
553 	return true;
554 }
555 
556 SEC("kprobe/proc_sys_write")
557 ssize_t BPF_KPROBE(kprobe__proc_sys_write,
558 		   struct file* filp, const char* buf,
559 		   size_t count, loff_t* ppos)
560 {
561 	struct bpf_func_stats_ctx stats_ctx;
562 	bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write);
563 
564 	u32 pid = get_userspace_pid();
565 	int zero = 0;
566 	struct var_sysctl_data_t* sysctl_data =
567 		bpf_map_lookup_elem(&data_heap, &zero);
568 	if (!sysctl_data)
569 		goto out;
570 
571 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
572 	sysctl_data->meta.type = SYSCTL_EVENT;
573 	void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload);
574 	payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload);
575 
576 	populate_ancestors(task, &sysctl_data->ancestors_info);
577 
578 	sysctl_data->sysctl_val_length = 0;
579 	sysctl_data->sysctl_path_length = 0;
580 
581 	size_t sysctl_val_length = bpf_probe_read_kernel_str(payload,
582 							     CTL_MAXNAME, buf);
583 	barrier_var(sysctl_val_length);
584 	if (sysctl_val_length <= CTL_MAXNAME) {
585 		barrier_var(sysctl_val_length);
586 		sysctl_data->sysctl_val_length = sysctl_val_length;
587 		payload += sysctl_val_length;
588 	}
589 
590 	size_t sysctl_path_length =
591 		bpf_probe_read_kernel_str(payload, MAX_PATH,
592 					  BPF_CORE_READ(filp, f_path.dentry,
593 							d_name.name));
594 	barrier_var(sysctl_path_length);
595 	if (sysctl_path_length <= MAX_PATH) {
596 		barrier_var(sysctl_path_length);
597 		sysctl_data->sysctl_path_length = sysctl_path_length;
598 		payload += sysctl_path_length;
599 	}
600 
601 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta);
602 	unsigned long data_len = payload - (void*)sysctl_data;
603 	data_len = data_len > sizeof(struct var_sysctl_data_t)
604 		? sizeof(struct var_sysctl_data_t)
605 		: data_len;
606 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len);
607 out:
608 	bpf_stats_exit(&stats_ctx);
609 	return 0;
610 }
611 
612 SEC("tracepoint/syscalls/sys_enter_kill")
613 int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx)
614 {
615 	struct bpf_func_stats_ctx stats_ctx;
616 
617 	bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill);
618 	int pid = ctx->args[0];
619 	int sig = ctx->args[1];
620 	int ret = trace_var_sys_kill(ctx, pid, sig);
621 	bpf_stats_exit(&stats_ctx);
622 	return ret;
623 };
624 
625 SEC("raw_tracepoint/sched_process_exit")
626 int raw_tracepoint__sched_process_exit(void* ctx)
627 {
628 	int zero = 0;
629 	struct bpf_func_stats_ctx stats_ctx;
630 	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit);
631 
632 	u32 tpid = get_userspace_pid();
633 
634 	struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
635 	struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
636 
637 	if (arr_struct == NULL || kill_data == NULL)
638 		goto out;
639 
640 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
641 	struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
642 
643 #ifdef UNROLL
644 #pragma unroll
645 #endif
646 	for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
647 		struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
648 
649 		if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) {
650 			bpf_probe_read_kernel(kill_data, sizeof(*past_kill_data),
651 					      past_kill_data);
652 			void* payload = kill_data->payload;
653 			size_t offset = kill_data->payload_length;
654 			if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
655 				return 0;
656 			payload += offset;
657 
658 			kill_data->kill_target_name_length = 0;
659 			kill_data->kill_target_cgroup_proc_length = 0;
660 
661 			size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
662 			barrier_var(comm_length);
663 			if (comm_length <= TASK_COMM_LEN) {
664 				barrier_var(comm_length);
665 				kill_data->kill_target_name_length = comm_length;
666 				payload += comm_length;
667 			}
668 
669 			size_t cgroup_proc_length =
670 				bpf_probe_read_kernel_str(payload,
671 							  KILL_TARGET_LEN,
672 							  BPF_CORE_READ(proc_kernfs, name));
673 			barrier_var(cgroup_proc_length);
674 			if (cgroup_proc_length <= KILL_TARGET_LEN) {
675 				barrier_var(cgroup_proc_length);
676 				kill_data->kill_target_cgroup_proc_length = cgroup_proc_length;
677 				payload += cgroup_proc_length;
678 			}
679 
680 			bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta);
681 			unsigned long data_len = (void*)payload - (void*)kill_data;
682 			data_len = data_len > sizeof(struct var_kill_data_t)
683 				? sizeof(struct var_kill_data_t)
684 				: data_len;
685 			bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len);
686 		}
687 	}
688 	bpf_map_delete_elem(&var_tpid_to_data, &tpid);
689 out:
690 	bpf_stats_exit(&stats_ctx);
691 	return 0;
692 }
693 
694 SEC("raw_tracepoint/sched_process_exec")
695 int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
696 {
697 	struct bpf_func_stats_ctx stats_ctx;
698 	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec);
699 
700 	struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2];
701 	u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino);
702 
703 	bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode);
704 	if (should_filter_binprm != NULL)
705 		goto out;
706 
707 	int zero = 0;
708 	struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero);
709 	if (!proc_exec_data)
710 		goto out;
711 
712 	if (INODE_FILTER && inode != INODE_FILTER)
713 		return 0;
714 
715 	u32 pid = get_userspace_pid();
716 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
717 
718 	proc_exec_data->meta.type = EXEC_EVENT;
719 	proc_exec_data->bin_path_length = 0;
720 	proc_exec_data->cmdline_length = 0;
721 	proc_exec_data->environment_length = 0;
722 	void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid,
723 					      proc_exec_data->payload);
724 	payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload);
725 
726 	struct task_struct* parent_task = BPF_CORE_READ(task, real_parent);
727 	proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid);
728 	proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val);
729 	proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id);
730 	proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time);
731 
732 	const char* filename = BPF_CORE_READ(bprm, filename);
733 	size_t bin_path_length =
734 		bpf_probe_read_kernel_str(payload, MAX_FILENAME_LEN, filename);
735 	barrier_var(bin_path_length);
736 	if (bin_path_length <= MAX_FILENAME_LEN) {
737 		barrier_var(bin_path_length);
738 		proc_exec_data->bin_path_length = bin_path_length;
739 		payload += bin_path_length;
740 	}
741 
742 	void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start);
743 	void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end);
744 	unsigned int cmdline_length = probe_read_lim(payload, arg_start,
745 						     arg_end - arg_start, MAX_ARGS_LEN);
746 
747 	if (cmdline_length <= MAX_ARGS_LEN) {
748 		barrier_var(cmdline_length);
749 		proc_exec_data->cmdline_length = cmdline_length;
750 		payload += cmdline_length;
751 	}
752 
753 	if (READ_ENVIRON_FROM_EXEC) {
754 		void* env_start = (void*)BPF_CORE_READ(task, mm, env_start);
755 		void* env_end = (void*)BPF_CORE_READ(task, mm, env_end);
756 		unsigned long env_len = probe_read_lim(payload, env_start,
757 						       env_end - env_start, MAX_ENVIRON_LEN);
758 		if (cmdline_length <= MAX_ENVIRON_LEN) {
759 			proc_exec_data->environment_length = env_len;
760 			payload += env_len;
761 		}
762 	}
763 
764 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta);
765 	unsigned long data_len = payload - (void*)proc_exec_data;
766 	data_len = data_len > sizeof(struct var_exec_data_t)
767 		? sizeof(struct var_exec_data_t)
768 		: data_len;
769 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len);
770 out:
771 	bpf_stats_exit(&stats_ctx);
772 	return 0;
773 }
774 
775 SEC("kretprobe/do_filp_open")
776 int kprobe_ret__do_filp_open(struct pt_regs* ctx)
777 {
778 	struct bpf_func_stats_ctx stats_ctx;
779 	bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret);
780 
781 	struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx);
782 
783 	if (filp == NULL || IS_ERR(filp))
784 		goto out;
785 	unsigned int flags = BPF_CORE_READ(filp, f_flags);
786 	if ((flags & (O_RDWR | O_WRONLY)) == 0)
787 		goto out;
788 	if ((flags & O_TMPFILE) > 0)
789 		goto out;
790 	struct inode* file_inode = BPF_CORE_READ(filp, f_inode);
791 	umode_t mode = BPF_CORE_READ(file_inode, i_mode);
792 	if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
793 	    S_ISSOCK(mode))
794 		goto out;
795 
796 	struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry);
797 	u32 device_id = 0;
798 	u64 file_ino = 0;
799 	if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino))
800 		goto out;
801 
802 	int zero = 0;
803 	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
804 	if (!filemod_data)
805 		goto out;
806 
807 	u32 pid = get_userspace_pid();
808 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
809 
810 	filemod_data->meta.type = FILEMOD_EVENT;
811 	filemod_data->fmod_type = FMOD_OPEN;
812 	filemod_data->dst_flags = flags;
813 	filemod_data->src_inode = 0;
814 	filemod_data->dst_inode = file_ino;
815 	filemod_data->src_device_id = 0;
816 	filemod_data->dst_device_id = device_id;
817 	filemod_data->src_filepath_length = 0;
818 	filemod_data->dst_filepath_length = 0;
819 
820 	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
821 					      filemod_data->payload);
822 	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
823 
824 	size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload);
825 	barrier_var(len);
826 	if (len <= MAX_FILEPATH_LENGTH) {
827 		barrier_var(len);
828 		payload += len;
829 		filemod_data->dst_filepath_length = len;
830 	}
831 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
832 	unsigned long data_len = payload - (void*)filemod_data;
833 	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
834 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
835 out:
836 	bpf_stats_exit(&stats_ctx);
837 	return 0;
838 }
839 
840 SEC("kprobe/vfs_link")
841 int BPF_KPROBE(kprobe__vfs_link,
842 	       struct dentry* old_dentry, struct user_namespace *mnt_userns,
843 	       struct inode* dir, struct dentry* new_dentry,
844 	       struct inode** delegated_inode)
845 {
846 	struct bpf_func_stats_ctx stats_ctx;
847 	bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
848 
849 	u32 src_device_id = 0;
850 	u64 src_file_ino = 0;
851 	u32 dst_device_id = 0;
852 	u64 dst_file_ino = 0;
853 	if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) &&
854 	    !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino))
855 		goto out;
856 
857 	int zero = 0;
858 	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
859 	if (!filemod_data)
860 		goto out;
861 
862 	u32 pid = get_userspace_pid();
863 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
864 
865 	filemod_data->meta.type = FILEMOD_EVENT;
866 	filemod_data->fmod_type = FMOD_LINK;
867 	filemod_data->dst_flags = 0;
868 	filemod_data->src_inode = src_file_ino;
869 	filemod_data->dst_inode = dst_file_ino;
870 	filemod_data->src_device_id = src_device_id;
871 	filemod_data->dst_device_id = dst_device_id;
872 	filemod_data->src_filepath_length = 0;
873 	filemod_data->dst_filepath_length = 0;
874 
875 	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
876 					      filemod_data->payload);
877 	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
878 
879 	size_t len = read_absolute_file_path_from_dentry(old_dentry, payload);
880 	barrier_var(len);
881 	if (len <= MAX_FILEPATH_LENGTH) {
882 		barrier_var(len);
883 		payload += len;
884 		filemod_data->src_filepath_length = len;
885 	}
886 
887 	len = read_absolute_file_path_from_dentry(new_dentry, payload);
888 	barrier_var(len);
889 	if (len <= MAX_FILEPATH_LENGTH) {
890 		barrier_var(len);
891 		payload += len;
892 		filemod_data->dst_filepath_length = len;
893 	}
894 
895 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
896 	unsigned long data_len = payload - (void*)filemod_data;
897 	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
898 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
899 out:
900 	bpf_stats_exit(&stats_ctx);
901 	return 0;
902 }
903 
904 SEC("kprobe/vfs_symlink")
905 int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry,
906 	       const char* oldname)
907 {
908 	struct bpf_func_stats_ctx stats_ctx;
909 	bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink);
910 
911 	u32 dst_device_id = 0;
912 	u64 dst_file_ino = 0;
913 	if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino))
914 		goto out;
915 
916 	int zero = 0;
917 	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
918 	if (!filemod_data)
919 		goto out;
920 
921 	u32 pid = get_userspace_pid();
922 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
923 
924 	filemod_data->meta.type = FILEMOD_EVENT;
925 	filemod_data->fmod_type = FMOD_SYMLINK;
926 	filemod_data->dst_flags = 0;
927 	filemod_data->src_inode = 0;
928 	filemod_data->dst_inode = dst_file_ino;
929 	filemod_data->src_device_id = 0;
930 	filemod_data->dst_device_id = dst_device_id;
931 	filemod_data->src_filepath_length = 0;
932 	filemod_data->dst_filepath_length = 0;
933 
934 	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
935 					      filemod_data->payload);
936 	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
937 
938 	size_t len = bpf_probe_read_kernel_str(payload, MAX_FILEPATH_LENGTH,
939 					       oldname);
940 	barrier_var(len);
941 	if (len <= MAX_FILEPATH_LENGTH) {
942 		barrier_var(len);
943 		payload += len;
944 		filemod_data->src_filepath_length = len;
945 	}
946 	len = read_absolute_file_path_from_dentry(dentry, payload);
947 	barrier_var(len);
948 	if (len <= MAX_FILEPATH_LENGTH) {
949 		barrier_var(len);
950 		payload += len;
951 		filemod_data->dst_filepath_length = len;
952 	}
953 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
954 	unsigned long data_len = payload - (void*)filemod_data;
955 	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
956 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
957 out:
958 	bpf_stats_exit(&stats_ctx);
959 	return 0;
960 }
961 
962 SEC("raw_tracepoint/sched_process_fork")
963 int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx)
964 {
965 	struct bpf_func_stats_ctx stats_ctx;
966 	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork);
967 
968 	int zero = 0;
969 	struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero);
970 	if (!fork_data)
971 		goto out;
972 
973 	struct task_struct* parent = (struct task_struct*)ctx->args[0];
974 	struct task_struct* child = (struct task_struct*)ctx->args[1];
975 	fork_data->meta.type = FORK_EVENT;
976 
977 	void* payload = populate_var_metadata(&fork_data->meta, child,
978 					      BPF_CORE_READ(child, pid), fork_data->payload);
979 	fork_data->parent_pid = BPF_CORE_READ(parent, pid);
980 	fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id);
981 	fork_data->parent_start_time = BPF_CORE_READ(parent, start_time);
982 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta);
983 
984 	unsigned long data_len = payload - (void*)fork_data;
985 	data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len;
986 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len);
987 out:
988 	bpf_stats_exit(&stats_ctx);
989 	return 0;
990 }
991 char _license[] SEC("license") = "GPL";
992