1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2020 Facebook */ 3 #include <vmlinux.h> 4 #include <bpf/bpf_core_read.h> 5 #include <bpf/bpf_helpers.h> 6 #include <bpf/bpf_tracing.h> 7 8 #include "profiler.h" 9 10 #ifndef NULL 11 #define NULL 0 12 #endif 13 14 #define O_WRONLY 00000001 15 #define O_RDWR 00000002 16 #define O_DIRECTORY 00200000 17 #define __O_TMPFILE 020000000 18 #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) 19 #define MAX_ERRNO 4095 20 #define S_IFMT 00170000 21 #define S_IFSOCK 0140000 22 #define S_IFLNK 0120000 23 #define S_IFREG 0100000 24 #define S_IFBLK 0060000 25 #define S_IFDIR 0040000 26 #define S_IFCHR 0020000 27 #define S_IFIFO 0010000 28 #define S_ISUID 0004000 29 #define S_ISGID 0002000 30 #define S_ISVTX 0001000 31 #define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK) 32 #define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR) 33 #define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR) 34 #define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK) 35 #define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO) 36 #define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK) 37 #define IS_ERR_VALUE(x) (unsigned long)(void*)(x) >= (unsigned long)-MAX_ERRNO 38 39 #define KILL_DATA_ARRAY_SIZE 8 40 41 struct var_kill_data_arr_t { 42 struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE]; 43 }; 44 45 union any_profiler_data_t { 46 struct var_exec_data_t var_exec; 47 struct var_kill_data_t var_kill; 48 struct var_sysctl_data_t var_sysctl; 49 struct var_filemod_data_t var_filemod; 50 struct var_fork_data_t var_fork; 51 struct var_kill_data_arr_t var_kill_data_arr; 52 }; 53 54 volatile struct profiler_config_struct bpf_config = {}; 55 56 #define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf) 57 #define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode) 58 #define CGROUP_LOGIN_SESSION_INODE \ 59 (bpf_config.cgroup_login_session_inode) 60 #define KILL_SIGNALS (bpf_config.kill_signals_mask) 61 #define STALE_INFO (bpf_config.stale_info_secs) 62 #define INODE_FILTER (bpf_config.inode_filter) 63 #define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec) 64 #define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver) 65 66 struct kernfs_iattrs___52 { 67 struct iattr ia_iattr; 68 }; 69 70 struct kernfs_node___52 { 71 union /* kernfs_node_id */ { 72 struct { 73 u32 ino; 74 u32 generation; 75 }; 76 u64 id; 77 } id; 78 }; 79 80 struct { 81 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 82 __uint(max_entries, 1); 83 __type(key, u32); 84 __type(value, union any_profiler_data_t); 85 } data_heap SEC(".maps"); 86 87 struct { 88 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 89 __uint(key_size, sizeof(int)); 90 __uint(value_size, sizeof(int)); 91 } events SEC(".maps"); 92 93 struct { 94 __uint(type, BPF_MAP_TYPE_HASH); 95 __uint(max_entries, KILL_DATA_ARRAY_SIZE); 96 __type(key, u32); 97 __type(value, struct var_kill_data_arr_t); 98 } var_tpid_to_data SEC(".maps"); 99 100 struct { 101 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 102 __uint(max_entries, profiler_bpf_max_function_id); 103 __type(key, u32); 104 __type(value, struct bpf_func_stats_data); 105 } bpf_func_stats SEC(".maps"); 106 107 struct { 108 __uint(type, BPF_MAP_TYPE_HASH); 109 __type(key, u32); 110 __type(value, bool); 111 __uint(max_entries, 16); 112 } allowed_devices SEC(".maps"); 113 114 struct { 115 __uint(type, BPF_MAP_TYPE_HASH); 116 __type(key, u64); 117 __type(value, bool); 118 __uint(max_entries, 1024); 119 } allowed_file_inodes SEC(".maps"); 120 121 struct { 122 __uint(type, BPF_MAP_TYPE_HASH); 123 __type(key, u64); 124 __type(value, bool); 125 __uint(max_entries, 1024); 126 } allowed_directory_inodes SEC(".maps"); 127 128 struct { 129 __uint(type, BPF_MAP_TYPE_HASH); 130 __type(key, u32); 131 __type(value, bool); 132 __uint(max_entries, 16); 133 } disallowed_exec_inodes SEC(".maps"); 134 135 #ifndef ARRAY_SIZE 136 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) 137 #endif 138 139 static INLINE bool IS_ERR(const void* ptr) 140 { 141 return IS_ERR_VALUE((unsigned long)ptr); 142 } 143 144 static INLINE u32 get_userspace_pid() 145 { 146 return bpf_get_current_pid_tgid() >> 32; 147 } 148 149 static INLINE bool is_init_process(u32 tgid) 150 { 151 return tgid == 1 || tgid == 0; 152 } 153 154 static INLINE unsigned long 155 probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max) 156 { 157 len = len < max ? len : max; 158 if (len > 1) { 159 if (bpf_probe_read_kernel(dst, len, src)) 160 return 0; 161 } else if (len == 1) { 162 if (bpf_probe_read_kernel(dst, 1, src)) 163 return 0; 164 } 165 return len; 166 } 167 168 static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct, 169 int spid) 170 { 171 #ifdef UNROLL 172 #pragma unroll 173 #endif 174 for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) 175 if (arr_struct->array[i].meta.pid == spid) 176 return i; 177 return -1; 178 } 179 180 static INLINE void populate_ancestors(struct task_struct* task, 181 struct ancestors_data_t* ancestors_data) 182 { 183 struct task_struct* parent = task; 184 u32 num_ancestors, ppid; 185 186 ancestors_data->num_ancestors = 0; 187 #ifdef UNROLL 188 #pragma unroll 189 #endif 190 for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) { 191 parent = BPF_CORE_READ(parent, real_parent); 192 if (parent == NULL) 193 break; 194 ppid = BPF_CORE_READ(parent, tgid); 195 if (is_init_process(ppid)) 196 break; 197 ancestors_data->ancestor_pids[num_ancestors] = ppid; 198 ancestors_data->ancestor_exec_ids[num_ancestors] = 199 BPF_CORE_READ(parent, self_exec_id); 200 ancestors_data->ancestor_start_times[num_ancestors] = 201 BPF_CORE_READ(parent, start_time); 202 ancestors_data->num_ancestors = num_ancestors; 203 } 204 } 205 206 static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node, 207 struct kernfs_node* cgroup_root_node, 208 void* payload, 209 int* root_pos) 210 { 211 void* payload_start = payload; 212 size_t filepart_length; 213 214 #ifdef UNROLL 215 #pragma unroll 216 #endif 217 for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) { 218 filepart_length = 219 bpf_probe_read_kernel_str(payload, MAX_PATH, 220 BPF_CORE_READ(cgroup_node, name)); 221 if (!cgroup_node) 222 return payload; 223 if (cgroup_node == cgroup_root_node) 224 *root_pos = payload - payload_start; 225 if (filepart_length <= MAX_PATH) { 226 barrier_var(filepart_length); 227 payload += filepart_length; 228 } 229 cgroup_node = BPF_CORE_READ(cgroup_node, parent); 230 } 231 return payload; 232 } 233 234 static ino_t get_inode_from_kernfs(struct kernfs_node* node) 235 { 236 struct kernfs_node___52* node52 = (void*)node; 237 238 if (bpf_core_field_exists(node52->id.ino)) { 239 barrier_var(node52); 240 return BPF_CORE_READ(node52, id.ino); 241 } else { 242 barrier_var(node); 243 return (u64)BPF_CORE_READ(node, id); 244 } 245 } 246 247 extern bool CONFIG_CGROUP_PIDS __kconfig __weak; 248 enum cgroup_subsys_id___local { 249 pids_cgrp_id___local = 123, /* value doesn't matter */ 250 }; 251 252 static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, 253 struct task_struct* task, 254 void* payload) 255 { 256 struct kernfs_node* root_kernfs = 257 BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn); 258 struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); 259 260 #if __has_builtin(__builtin_preserve_enum_value) 261 if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) { 262 int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local, 263 pids_cgrp_id___local); 264 #ifdef UNROLL 265 #pragma unroll 266 #endif 267 for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 268 struct cgroup_subsys_state* subsys = 269 BPF_CORE_READ(task, cgroups, subsys[i]); 270 if (subsys != NULL) { 271 int subsys_id = BPF_CORE_READ(subsys, ss, id); 272 if (subsys_id == cgrp_id) { 273 proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn); 274 root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn); 275 break; 276 } 277 } 278 } 279 } 280 #endif 281 282 cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs); 283 cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs); 284 285 if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) { 286 cgroup_data->cgroup_root_mtime = 287 BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec); 288 cgroup_data->cgroup_proc_mtime = 289 BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec); 290 } else { 291 struct kernfs_iattrs___52* root_iattr = 292 (struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr); 293 cgroup_data->cgroup_root_mtime = 294 BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec); 295 296 struct kernfs_iattrs___52* proc_iattr = 297 (struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr); 298 cgroup_data->cgroup_proc_mtime = 299 BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec); 300 } 301 302 cgroup_data->cgroup_root_length = 0; 303 cgroup_data->cgroup_proc_length = 0; 304 cgroup_data->cgroup_full_length = 0; 305 306 size_t cgroup_root_length = 307 bpf_probe_read_kernel_str(payload, MAX_PATH, 308 BPF_CORE_READ(root_kernfs, name)); 309 barrier_var(cgroup_root_length); 310 if (cgroup_root_length <= MAX_PATH) { 311 barrier_var(cgroup_root_length); 312 cgroup_data->cgroup_root_length = cgroup_root_length; 313 payload += cgroup_root_length; 314 } 315 316 size_t cgroup_proc_length = 317 bpf_probe_read_kernel_str(payload, MAX_PATH, 318 BPF_CORE_READ(proc_kernfs, name)); 319 barrier_var(cgroup_proc_length); 320 if (cgroup_proc_length <= MAX_PATH) { 321 barrier_var(cgroup_proc_length); 322 cgroup_data->cgroup_proc_length = cgroup_proc_length; 323 payload += cgroup_proc_length; 324 } 325 326 if (FETCH_CGROUPS_FROM_BPF) { 327 cgroup_data->cgroup_full_path_root_pos = -1; 328 void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload, 329 &cgroup_data->cgroup_full_path_root_pos); 330 cgroup_data->cgroup_full_length = payload_end_pos - payload; 331 payload = payload_end_pos; 332 } 333 334 return (void*)payload; 335 } 336 337 static INLINE void* populate_var_metadata(struct var_metadata_t* metadata, 338 struct task_struct* task, 339 u32 pid, void* payload) 340 { 341 u64 uid_gid = bpf_get_current_uid_gid(); 342 343 metadata->uid = (u32)uid_gid; 344 metadata->gid = uid_gid >> 32; 345 metadata->pid = pid; 346 metadata->exec_id = BPF_CORE_READ(task, self_exec_id); 347 metadata->start_time = BPF_CORE_READ(task, start_time); 348 metadata->comm_length = 0; 349 350 size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm); 351 barrier_var(comm_length); 352 if (comm_length <= TASK_COMM_LEN) { 353 barrier_var(comm_length); 354 metadata->comm_length = comm_length; 355 payload += comm_length; 356 } 357 358 return (void*)payload; 359 } 360 361 static INLINE struct var_kill_data_t* 362 get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig) 363 { 364 int zero = 0; 365 struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero); 366 367 if (kill_data == NULL) 368 return NULL; 369 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 370 371 void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload); 372 payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload); 373 size_t payload_length = payload - (void*)kill_data->payload; 374 kill_data->payload_length = payload_length; 375 populate_ancestors(task, &kill_data->ancestors_info); 376 kill_data->meta.type = KILL_EVENT; 377 kill_data->kill_target_pid = tpid; 378 kill_data->kill_sig = sig; 379 kill_data->kill_count = 1; 380 kill_data->last_kill_time = bpf_ktime_get_ns(); 381 return kill_data; 382 } 383 384 static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig) 385 { 386 if ((KILL_SIGNALS & (1ULL << sig)) == 0) 387 return 0; 388 389 u32 spid = get_userspace_pid(); 390 struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid); 391 392 if (arr_struct == NULL) { 393 struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig); 394 int zero = 0; 395 396 if (kill_data == NULL) 397 return 0; 398 arr_struct = bpf_map_lookup_elem(&data_heap, &zero); 399 if (arr_struct == NULL) 400 return 0; 401 bpf_probe_read_kernel(&arr_struct->array[0], 402 sizeof(arr_struct->array[0]), kill_data); 403 } else { 404 int index = get_var_spid_index(arr_struct, spid); 405 406 if (index == -1) { 407 struct var_kill_data_t* kill_data = 408 get_var_kill_data(ctx, spid, tpid, sig); 409 if (kill_data == NULL) 410 return 0; 411 #ifdef UNROLL 412 #pragma unroll 413 #endif 414 for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) 415 if (arr_struct->array[i].meta.pid == 0) { 416 bpf_probe_read_kernel(&arr_struct->array[i], 417 sizeof(arr_struct->array[i]), 418 kill_data); 419 bpf_map_update_elem(&var_tpid_to_data, &tpid, 420 arr_struct, 0); 421 422 return 0; 423 } 424 return 0; 425 } 426 427 struct var_kill_data_t* kill_data = &arr_struct->array[index]; 428 429 u64 delta_sec = 430 (bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000; 431 432 if (delta_sec < STALE_INFO) { 433 kill_data->kill_count++; 434 kill_data->last_kill_time = bpf_ktime_get_ns(); 435 bpf_probe_read_kernel(&arr_struct->array[index], 436 sizeof(arr_struct->array[index]), 437 kill_data); 438 } else { 439 struct var_kill_data_t* kill_data = 440 get_var_kill_data(ctx, spid, tpid, sig); 441 if (kill_data == NULL) 442 return 0; 443 bpf_probe_read_kernel(&arr_struct->array[index], 444 sizeof(arr_struct->array[index]), 445 kill_data); 446 } 447 } 448 bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0); 449 return 0; 450 } 451 452 static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx, 453 enum bpf_function_id func_id) 454 { 455 int func_id_key = func_id; 456 457 bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns(); 458 bpf_stat_ctx->bpf_func_stats_data_val = 459 bpf_map_lookup_elem(&bpf_func_stats, &func_id_key); 460 if (bpf_stat_ctx->bpf_func_stats_data_val) 461 bpf_stat_ctx->bpf_func_stats_data_val->num_executions++; 462 } 463 464 static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx) 465 { 466 if (bpf_stat_ctx->bpf_func_stats_data_val) 467 bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns += 468 bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns; 469 } 470 471 static INLINE void 472 bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx, 473 struct var_metadata_t* meta) 474 { 475 if (bpf_stat_ctx->bpf_func_stats_data_val) { 476 bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++; 477 meta->bpf_stats_num_perf_events = 478 bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events; 479 } 480 meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns; 481 meta->cpu_id = bpf_get_smp_processor_id(); 482 } 483 484 static INLINE size_t 485 read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload) 486 { 487 size_t length = 0; 488 size_t filepart_length; 489 struct dentry* parent_dentry; 490 491 #ifdef UNROLL 492 #pragma unroll 493 #endif 494 for (int i = 0; i < MAX_PATH_DEPTH; i++) { 495 filepart_length = 496 bpf_probe_read_kernel_str(payload, MAX_PATH, 497 BPF_CORE_READ(filp_dentry, d_name.name)); 498 barrier_var(filepart_length); 499 if (filepart_length > MAX_PATH) 500 break; 501 barrier_var(filepart_length); 502 payload += filepart_length; 503 length += filepart_length; 504 505 parent_dentry = BPF_CORE_READ(filp_dentry, d_parent); 506 if (filp_dentry == parent_dentry) 507 break; 508 filp_dentry = parent_dentry; 509 } 510 511 return length; 512 } 513 514 static INLINE bool 515 is_ancestor_in_allowed_inodes(struct dentry* filp_dentry) 516 { 517 struct dentry* parent_dentry; 518 #ifdef UNROLL 519 #pragma unroll 520 #endif 521 for (int i = 0; i < MAX_PATH_DEPTH; i++) { 522 u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino); 523 bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino); 524 525 if (allowed_dir != NULL) 526 return true; 527 parent_dentry = BPF_CORE_READ(filp_dentry, d_parent); 528 if (filp_dentry == parent_dentry) 529 break; 530 filp_dentry = parent_dentry; 531 } 532 return false; 533 } 534 535 static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry, 536 u32* device_id, 537 u64* file_ino) 538 { 539 u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev); 540 *device_id = dev_id; 541 bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id); 542 543 if (allowed_device == NULL) 544 return false; 545 546 u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino); 547 *file_ino = ino; 548 bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino); 549 550 if (allowed_file == NULL) 551 if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent))) 552 return false; 553 return true; 554 } 555 556 SEC("kprobe/proc_sys_write") 557 ssize_t BPF_KPROBE(kprobe__proc_sys_write, 558 struct file* filp, const char* buf, 559 size_t count, loff_t* ppos) 560 { 561 struct bpf_func_stats_ctx stats_ctx; 562 bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write); 563 564 u32 pid = get_userspace_pid(); 565 int zero = 0; 566 struct var_sysctl_data_t* sysctl_data = 567 bpf_map_lookup_elem(&data_heap, &zero); 568 if (!sysctl_data) 569 goto out; 570 571 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 572 sysctl_data->meta.type = SYSCTL_EVENT; 573 void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload); 574 payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload); 575 576 populate_ancestors(task, &sysctl_data->ancestors_info); 577 578 sysctl_data->sysctl_val_length = 0; 579 sysctl_data->sysctl_path_length = 0; 580 581 size_t sysctl_val_length = bpf_probe_read_kernel_str(payload, 582 CTL_MAXNAME, buf); 583 barrier_var(sysctl_val_length); 584 if (sysctl_val_length <= CTL_MAXNAME) { 585 barrier_var(sysctl_val_length); 586 sysctl_data->sysctl_val_length = sysctl_val_length; 587 payload += sysctl_val_length; 588 } 589 590 size_t sysctl_path_length = 591 bpf_probe_read_kernel_str(payload, MAX_PATH, 592 BPF_CORE_READ(filp, f_path.dentry, 593 d_name.name)); 594 barrier_var(sysctl_path_length); 595 if (sysctl_path_length <= MAX_PATH) { 596 barrier_var(sysctl_path_length); 597 sysctl_data->sysctl_path_length = sysctl_path_length; 598 payload += sysctl_path_length; 599 } 600 601 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta); 602 unsigned long data_len = payload - (void*)sysctl_data; 603 data_len = data_len > sizeof(struct var_sysctl_data_t) 604 ? sizeof(struct var_sysctl_data_t) 605 : data_len; 606 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len); 607 out: 608 bpf_stats_exit(&stats_ctx); 609 return 0; 610 } 611 612 SEC("tracepoint/syscalls/sys_enter_kill") 613 int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx) 614 { 615 struct bpf_func_stats_ctx stats_ctx; 616 617 bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill); 618 int pid = ctx->args[0]; 619 int sig = ctx->args[1]; 620 int ret = trace_var_sys_kill(ctx, pid, sig); 621 bpf_stats_exit(&stats_ctx); 622 return ret; 623 }; 624 625 SEC("raw_tracepoint/sched_process_exit") 626 int raw_tracepoint__sched_process_exit(void* ctx) 627 { 628 int zero = 0; 629 struct bpf_func_stats_ctx stats_ctx; 630 bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit); 631 632 u32 tpid = get_userspace_pid(); 633 634 struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid); 635 struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero); 636 637 if (arr_struct == NULL || kill_data == NULL) 638 goto out; 639 640 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 641 struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); 642 643 #ifdef UNROLL 644 #pragma unroll 645 #endif 646 for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) { 647 struct var_kill_data_t* past_kill_data = &arr_struct->array[i]; 648 649 if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) { 650 bpf_probe_read_kernel(kill_data, sizeof(*past_kill_data), 651 past_kill_data); 652 void* payload = kill_data->payload; 653 size_t offset = kill_data->payload_length; 654 if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN) 655 return 0; 656 payload += offset; 657 658 kill_data->kill_target_name_length = 0; 659 kill_data->kill_target_cgroup_proc_length = 0; 660 661 size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm); 662 barrier_var(comm_length); 663 if (comm_length <= TASK_COMM_LEN) { 664 barrier_var(comm_length); 665 kill_data->kill_target_name_length = comm_length; 666 payload += comm_length; 667 } 668 669 size_t cgroup_proc_length = 670 bpf_probe_read_kernel_str(payload, 671 KILL_TARGET_LEN, 672 BPF_CORE_READ(proc_kernfs, name)); 673 barrier_var(cgroup_proc_length); 674 if (cgroup_proc_length <= KILL_TARGET_LEN) { 675 barrier_var(cgroup_proc_length); 676 kill_data->kill_target_cgroup_proc_length = cgroup_proc_length; 677 payload += cgroup_proc_length; 678 } 679 680 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta); 681 unsigned long data_len = (void*)payload - (void*)kill_data; 682 data_len = data_len > sizeof(struct var_kill_data_t) 683 ? sizeof(struct var_kill_data_t) 684 : data_len; 685 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len); 686 } 687 } 688 bpf_map_delete_elem(&var_tpid_to_data, &tpid); 689 out: 690 bpf_stats_exit(&stats_ctx); 691 return 0; 692 } 693 694 SEC("raw_tracepoint/sched_process_exec") 695 int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx) 696 { 697 struct bpf_func_stats_ctx stats_ctx; 698 bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec); 699 700 struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2]; 701 u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino); 702 703 bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode); 704 if (should_filter_binprm != NULL) 705 goto out; 706 707 int zero = 0; 708 struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero); 709 if (!proc_exec_data) 710 goto out; 711 712 if (INODE_FILTER && inode != INODE_FILTER) 713 return 0; 714 715 u32 pid = get_userspace_pid(); 716 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 717 718 proc_exec_data->meta.type = EXEC_EVENT; 719 proc_exec_data->bin_path_length = 0; 720 proc_exec_data->cmdline_length = 0; 721 proc_exec_data->environment_length = 0; 722 void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid, 723 proc_exec_data->payload); 724 payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload); 725 726 struct task_struct* parent_task = BPF_CORE_READ(task, real_parent); 727 proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid); 728 proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val); 729 proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id); 730 proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time); 731 732 const char* filename = BPF_CORE_READ(bprm, filename); 733 size_t bin_path_length = 734 bpf_probe_read_kernel_str(payload, MAX_FILENAME_LEN, filename); 735 barrier_var(bin_path_length); 736 if (bin_path_length <= MAX_FILENAME_LEN) { 737 barrier_var(bin_path_length); 738 proc_exec_data->bin_path_length = bin_path_length; 739 payload += bin_path_length; 740 } 741 742 void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start); 743 void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end); 744 unsigned int cmdline_length = probe_read_lim(payload, arg_start, 745 arg_end - arg_start, MAX_ARGS_LEN); 746 747 if (cmdline_length <= MAX_ARGS_LEN) { 748 barrier_var(cmdline_length); 749 proc_exec_data->cmdline_length = cmdline_length; 750 payload += cmdline_length; 751 } 752 753 if (READ_ENVIRON_FROM_EXEC) { 754 void* env_start = (void*)BPF_CORE_READ(task, mm, env_start); 755 void* env_end = (void*)BPF_CORE_READ(task, mm, env_end); 756 unsigned long env_len = probe_read_lim(payload, env_start, 757 env_end - env_start, MAX_ENVIRON_LEN); 758 if (cmdline_length <= MAX_ENVIRON_LEN) { 759 proc_exec_data->environment_length = env_len; 760 payload += env_len; 761 } 762 } 763 764 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta); 765 unsigned long data_len = payload - (void*)proc_exec_data; 766 data_len = data_len > sizeof(struct var_exec_data_t) 767 ? sizeof(struct var_exec_data_t) 768 : data_len; 769 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len); 770 out: 771 bpf_stats_exit(&stats_ctx); 772 return 0; 773 } 774 775 SEC("kretprobe/do_filp_open") 776 int kprobe_ret__do_filp_open(struct pt_regs* ctx) 777 { 778 struct bpf_func_stats_ctx stats_ctx; 779 bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret); 780 781 struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx); 782 783 if (filp == NULL || IS_ERR(filp)) 784 goto out; 785 unsigned int flags = BPF_CORE_READ(filp, f_flags); 786 if ((flags & (O_RDWR | O_WRONLY)) == 0) 787 goto out; 788 if ((flags & O_TMPFILE) > 0) 789 goto out; 790 struct inode* file_inode = BPF_CORE_READ(filp, f_inode); 791 umode_t mode = BPF_CORE_READ(file_inode, i_mode); 792 if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) || 793 S_ISSOCK(mode)) 794 goto out; 795 796 struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry); 797 u32 device_id = 0; 798 u64 file_ino = 0; 799 if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino)) 800 goto out; 801 802 int zero = 0; 803 struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); 804 if (!filemod_data) 805 goto out; 806 807 u32 pid = get_userspace_pid(); 808 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 809 810 filemod_data->meta.type = FILEMOD_EVENT; 811 filemod_data->fmod_type = FMOD_OPEN; 812 filemod_data->dst_flags = flags; 813 filemod_data->src_inode = 0; 814 filemod_data->dst_inode = file_ino; 815 filemod_data->src_device_id = 0; 816 filemod_data->dst_device_id = device_id; 817 filemod_data->src_filepath_length = 0; 818 filemod_data->dst_filepath_length = 0; 819 820 void* payload = populate_var_metadata(&filemod_data->meta, task, pid, 821 filemod_data->payload); 822 payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); 823 824 size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload); 825 barrier_var(len); 826 if (len <= MAX_FILEPATH_LENGTH) { 827 barrier_var(len); 828 payload += len; 829 filemod_data->dst_filepath_length = len; 830 } 831 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); 832 unsigned long data_len = payload - (void*)filemod_data; 833 data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; 834 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); 835 out: 836 bpf_stats_exit(&stats_ctx); 837 return 0; 838 } 839 840 SEC("kprobe/vfs_link") 841 int BPF_KPROBE(kprobe__vfs_link, 842 struct dentry* old_dentry, struct user_namespace *mnt_userns, 843 struct inode* dir, struct dentry* new_dentry, 844 struct inode** delegated_inode) 845 { 846 struct bpf_func_stats_ctx stats_ctx; 847 bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link); 848 849 u32 src_device_id = 0; 850 u64 src_file_ino = 0; 851 u32 dst_device_id = 0; 852 u64 dst_file_ino = 0; 853 if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) && 854 !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino)) 855 goto out; 856 857 int zero = 0; 858 struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); 859 if (!filemod_data) 860 goto out; 861 862 u32 pid = get_userspace_pid(); 863 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 864 865 filemod_data->meta.type = FILEMOD_EVENT; 866 filemod_data->fmod_type = FMOD_LINK; 867 filemod_data->dst_flags = 0; 868 filemod_data->src_inode = src_file_ino; 869 filemod_data->dst_inode = dst_file_ino; 870 filemod_data->src_device_id = src_device_id; 871 filemod_data->dst_device_id = dst_device_id; 872 filemod_data->src_filepath_length = 0; 873 filemod_data->dst_filepath_length = 0; 874 875 void* payload = populate_var_metadata(&filemod_data->meta, task, pid, 876 filemod_data->payload); 877 payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); 878 879 size_t len = read_absolute_file_path_from_dentry(old_dentry, payload); 880 barrier_var(len); 881 if (len <= MAX_FILEPATH_LENGTH) { 882 barrier_var(len); 883 payload += len; 884 filemod_data->src_filepath_length = len; 885 } 886 887 len = read_absolute_file_path_from_dentry(new_dentry, payload); 888 barrier_var(len); 889 if (len <= MAX_FILEPATH_LENGTH) { 890 barrier_var(len); 891 payload += len; 892 filemod_data->dst_filepath_length = len; 893 } 894 895 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); 896 unsigned long data_len = payload - (void*)filemod_data; 897 data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; 898 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); 899 out: 900 bpf_stats_exit(&stats_ctx); 901 return 0; 902 } 903 904 SEC("kprobe/vfs_symlink") 905 int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry, 906 const char* oldname) 907 { 908 struct bpf_func_stats_ctx stats_ctx; 909 bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink); 910 911 u32 dst_device_id = 0; 912 u64 dst_file_ino = 0; 913 if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino)) 914 goto out; 915 916 int zero = 0; 917 struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); 918 if (!filemod_data) 919 goto out; 920 921 u32 pid = get_userspace_pid(); 922 struct task_struct* task = (struct task_struct*)bpf_get_current_task(); 923 924 filemod_data->meta.type = FILEMOD_EVENT; 925 filemod_data->fmod_type = FMOD_SYMLINK; 926 filemod_data->dst_flags = 0; 927 filemod_data->src_inode = 0; 928 filemod_data->dst_inode = dst_file_ino; 929 filemod_data->src_device_id = 0; 930 filemod_data->dst_device_id = dst_device_id; 931 filemod_data->src_filepath_length = 0; 932 filemod_data->dst_filepath_length = 0; 933 934 void* payload = populate_var_metadata(&filemod_data->meta, task, pid, 935 filemod_data->payload); 936 payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); 937 938 size_t len = bpf_probe_read_kernel_str(payload, MAX_FILEPATH_LENGTH, 939 oldname); 940 barrier_var(len); 941 if (len <= MAX_FILEPATH_LENGTH) { 942 barrier_var(len); 943 payload += len; 944 filemod_data->src_filepath_length = len; 945 } 946 len = read_absolute_file_path_from_dentry(dentry, payload); 947 barrier_var(len); 948 if (len <= MAX_FILEPATH_LENGTH) { 949 barrier_var(len); 950 payload += len; 951 filemod_data->dst_filepath_length = len; 952 } 953 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); 954 unsigned long data_len = payload - (void*)filemod_data; 955 data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; 956 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); 957 out: 958 bpf_stats_exit(&stats_ctx); 959 return 0; 960 } 961 962 SEC("raw_tracepoint/sched_process_fork") 963 int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx) 964 { 965 struct bpf_func_stats_ctx stats_ctx; 966 bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork); 967 968 int zero = 0; 969 struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero); 970 if (!fork_data) 971 goto out; 972 973 struct task_struct* parent = (struct task_struct*)ctx->args[0]; 974 struct task_struct* child = (struct task_struct*)ctx->args[1]; 975 fork_data->meta.type = FORK_EVENT; 976 977 void* payload = populate_var_metadata(&fork_data->meta, child, 978 BPF_CORE_READ(child, pid), fork_data->payload); 979 fork_data->parent_pid = BPF_CORE_READ(parent, pid); 980 fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id); 981 fork_data->parent_start_time = BPF_CORE_READ(parent, start_time); 982 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta); 983 984 unsigned long data_len = payload - (void*)fork_data; 985 data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len; 986 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len); 987 out: 988 bpf_stats_exit(&stats_ctx); 989 return 0; 990 } 991 char _license[] SEC("license") = "GPL"; 992