1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Augment the raw_syscalls tracepoints with the contents of the pointer arguments. 4 * 5 * This exactly matches what is marshalled into the raw_syscall:sys_enter 6 * payload expected by the 'perf trace' beautifiers. 7 */ 8 9 #include <linux/bpf.h> 10 #include <bpf/bpf_helpers.h> 11 #include <linux/limits.h> 12 13 /** 14 * is_power_of_2() - check if a value is a power of two 15 * @n: the value to check 16 * 17 * Determine whether some value is a power of two, where zero is *not* 18 * considered a power of two. Return: true if @n is a power of 2, otherwise 19 * false. 20 */ 21 #define is_power_of_2(n) (n != 0 && ((n & (n - 1)) == 0)) 22 23 #define MAX_CPUS 4096 24 25 // FIXME: These should come from system headers 26 typedef char bool; 27 typedef int pid_t; 28 typedef long long int __s64; 29 typedef __s64 time64_t; 30 31 struct timespec64 { 32 time64_t tv_sec; 33 long int tv_nsec; 34 }; 35 36 /* bpf-output associated map */ 37 struct __augmented_syscalls__ { 38 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 39 __type(key, int); 40 __type(value, __u32); 41 __uint(max_entries, MAX_CPUS); 42 } __augmented_syscalls__ SEC(".maps"); 43 44 /* 45 * What to augment at entry? 46 * 47 * Pointer arg payloads (filenames, etc) passed from userspace to the kernel 48 */ 49 struct syscalls_sys_enter { 50 __uint(type, BPF_MAP_TYPE_PROG_ARRAY); 51 __type(key, __u32); 52 __type(value, __u32); 53 __uint(max_entries, 512); 54 } syscalls_sys_enter SEC(".maps"); 55 56 /* 57 * What to augment at exit? 58 * 59 * Pointer arg payloads returned from the kernel (struct stat, etc) to userspace. 60 */ 61 struct syscalls_sys_exit { 62 __uint(type, BPF_MAP_TYPE_PROG_ARRAY); 63 __type(key, __u32); 64 __type(value, __u32); 65 __uint(max_entries, 512); 66 } syscalls_sys_exit SEC(".maps"); 67 68 struct syscall_enter_args { 69 unsigned long long common_tp_fields; 70 long syscall_nr; 71 unsigned long args[6]; 72 }; 73 74 struct syscall_exit_args { 75 unsigned long long common_tp_fields; 76 long syscall_nr; 77 long ret; 78 }; 79 80 struct augmented_arg { 81 unsigned int size; 82 int err; 83 char value[PATH_MAX]; 84 }; 85 86 struct pids_filtered { 87 __uint(type, BPF_MAP_TYPE_HASH); 88 __type(key, pid_t); 89 __type(value, bool); 90 __uint(max_entries, 64); 91 } pids_filtered SEC(".maps"); 92 93 /* 94 * Desired design of maximum size and alignment (see RFC2553) 95 */ 96 #define SS_MAXSIZE 128 /* Implementation specific max size */ 97 98 typedef unsigned short sa_family_t; 99 100 /* 101 * FIXME: Should come from system headers 102 * 103 * The definition uses anonymous union and struct in order to control the 104 * default alignment. 105 */ 106 struct sockaddr_storage { 107 union { 108 struct { 109 sa_family_t ss_family; /* address family */ 110 /* Following field(s) are implementation specific */ 111 char __data[SS_MAXSIZE - sizeof(unsigned short)]; 112 /* space to achieve desired size, */ 113 /* _SS_MAXSIZE value minus size of ss_family */ 114 }; 115 void *__align; /* implementation specific desired alignment */ 116 }; 117 }; 118 119 struct augmented_args_payload { 120 struct syscall_enter_args args; 121 union { 122 struct { 123 struct augmented_arg arg, arg2; 124 }; 125 struct sockaddr_storage saddr; 126 char __data[sizeof(struct augmented_arg)]; 127 }; 128 }; 129 130 // We need more tmp space than the BPF stack can give us 131 struct augmented_args_tmp { 132 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 133 __type(key, int); 134 __type(value, struct augmented_args_payload); 135 __uint(max_entries, 1); 136 } augmented_args_tmp SEC(".maps"); 137 138 static inline struct augmented_args_payload *augmented_args_payload(void) 139 { 140 int key = 0; 141 return bpf_map_lookup_elem(&augmented_args_tmp, &key); 142 } 143 144 static inline int augmented__output(void *ctx, struct augmented_args_payload *args, int len) 145 { 146 /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ 147 return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len); 148 } 149 150 static inline 151 unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len) 152 { 153 unsigned int augmented_len = sizeof(*augmented_arg); 154 int string_len = bpf_probe_read_str(&augmented_arg->value, arg_len, arg); 155 156 augmented_arg->size = augmented_arg->err = 0; 157 /* 158 * probe_read_str may return < 0, e.g. -EFAULT 159 * So we leave that in the augmented_arg->size that userspace will 160 */ 161 if (string_len > 0) { 162 augmented_len -= sizeof(augmented_arg->value) - string_len; 163 augmented_len &= sizeof(augmented_arg->value) - 1; 164 augmented_arg->size = string_len; 165 } else { 166 /* 167 * So that username notice the error while still being able 168 * to skip this augmented arg record 169 */ 170 augmented_arg->err = string_len; 171 augmented_len = offsetof(struct augmented_arg, value); 172 } 173 174 return augmented_len; 175 } 176 177 SEC("tp/raw_syscalls/sys_enter") 178 int syscall_unaugmented(struct syscall_enter_args *args) 179 { 180 return 1; 181 } 182 183 /* 184 * These will be tail_called from SEC("raw_syscalls:sys_enter"), so will find in 185 * augmented_args_tmp what was read by that raw_syscalls:sys_enter and go 186 * on from there, reading the first syscall arg as a string, i.e. open's 187 * filename. 188 */ 189 SEC("tp/syscalls/sys_enter_connect") 190 int sys_enter_connect(struct syscall_enter_args *args) 191 { 192 struct augmented_args_payload *augmented_args = augmented_args_payload(); 193 const void *sockaddr_arg = (const void *)args->args[1]; 194 unsigned int socklen = args->args[2]; 195 unsigned int len = sizeof(augmented_args->args); 196 197 if (augmented_args == NULL) 198 return 1; /* Failure: don't filter */ 199 200 _Static_assert(is_power_of_2(sizeof(augmented_args->saddr)), "sizeof(augmented_args->saddr) needs to be a power of two"); 201 socklen &= sizeof(augmented_args->saddr) - 1; 202 203 bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg); 204 205 return augmented__output(args, augmented_args, len + socklen); 206 } 207 208 SEC("tp/syscalls/sys_enter_sendto") 209 int sys_enter_sendto(struct syscall_enter_args *args) 210 { 211 struct augmented_args_payload *augmented_args = augmented_args_payload(); 212 const void *sockaddr_arg = (const void *)args->args[4]; 213 unsigned int socklen = args->args[5]; 214 unsigned int len = sizeof(augmented_args->args); 215 216 if (augmented_args == NULL) 217 return 1; /* Failure: don't filter */ 218 219 socklen &= sizeof(augmented_args->saddr) - 1; 220 221 bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg); 222 223 return augmented__output(args, augmented_args, len + socklen); 224 } 225 226 SEC("tp/syscalls/sys_enter_open") 227 int sys_enter_open(struct syscall_enter_args *args) 228 { 229 struct augmented_args_payload *augmented_args = augmented_args_payload(); 230 const void *filename_arg = (const void *)args->args[0]; 231 unsigned int len = sizeof(augmented_args->args); 232 233 if (augmented_args == NULL) 234 return 1; /* Failure: don't filter */ 235 236 len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value)); 237 238 return augmented__output(args, augmented_args, len); 239 } 240 241 SEC("tp/syscalls/sys_enter_openat") 242 int sys_enter_openat(struct syscall_enter_args *args) 243 { 244 struct augmented_args_payload *augmented_args = augmented_args_payload(); 245 const void *filename_arg = (const void *)args->args[1]; 246 unsigned int len = sizeof(augmented_args->args); 247 248 if (augmented_args == NULL) 249 return 1; /* Failure: don't filter */ 250 251 len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value)); 252 253 return augmented__output(args, augmented_args, len); 254 } 255 256 SEC("tp/syscalls/sys_enter_rename") 257 int sys_enter_rename(struct syscall_enter_args *args) 258 { 259 struct augmented_args_payload *augmented_args = augmented_args_payload(); 260 const void *oldpath_arg = (const void *)args->args[0], 261 *newpath_arg = (const void *)args->args[1]; 262 unsigned int len = sizeof(augmented_args->args), oldpath_len; 263 264 if (augmented_args == NULL) 265 return 1; /* Failure: don't filter */ 266 267 oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value)); 268 len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value)); 269 270 return augmented__output(args, augmented_args, len); 271 } 272 273 SEC("tp/syscalls/sys_enter_renameat") 274 int sys_enter_renameat(struct syscall_enter_args *args) 275 { 276 struct augmented_args_payload *augmented_args = augmented_args_payload(); 277 const void *oldpath_arg = (const void *)args->args[1], 278 *newpath_arg = (const void *)args->args[3]; 279 unsigned int len = sizeof(augmented_args->args), oldpath_len; 280 281 if (augmented_args == NULL) 282 return 1; /* Failure: don't filter */ 283 284 oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value)); 285 len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value)); 286 287 return augmented__output(args, augmented_args, len); 288 } 289 290 #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ 291 292 // we need just the start, get the size to then copy it 293 struct perf_event_attr_size { 294 __u32 type; 295 /* 296 * Size of the attr structure, for fwd/bwd compat. 297 */ 298 __u32 size; 299 }; 300 301 SEC("tp/syscalls/sys_enter_perf_event_open") 302 int sys_enter_perf_event_open(struct syscall_enter_args *args) 303 { 304 struct augmented_args_payload *augmented_args = augmented_args_payload(); 305 const struct perf_event_attr_size *attr = (const struct perf_event_attr_size *)args->args[0], *attr_read; 306 unsigned int len = sizeof(augmented_args->args); 307 308 if (augmented_args == NULL) 309 goto failure; 310 311 if (bpf_probe_read(&augmented_args->__data, sizeof(*attr), attr) < 0) 312 goto failure; 313 314 attr_read = (const struct perf_event_attr_size *)augmented_args->__data; 315 316 __u32 size = attr_read->size; 317 318 if (!size) 319 size = PERF_ATTR_SIZE_VER0; 320 321 if (size > sizeof(augmented_args->__data)) 322 goto failure; 323 324 // Now that we read attr->size and tested it against the size limits, read it completely 325 if (bpf_probe_read(&augmented_args->__data, size, attr) < 0) 326 goto failure; 327 328 return augmented__output(args, augmented_args, len + size); 329 failure: 330 return 1; /* Failure: don't filter */ 331 } 332 333 SEC("tp/syscalls/sys_enter_clock_nanosleep") 334 int sys_enter_clock_nanosleep(struct syscall_enter_args *args) 335 { 336 struct augmented_args_payload *augmented_args = augmented_args_payload(); 337 const void *rqtp_arg = (const void *)args->args[2]; 338 unsigned int len = sizeof(augmented_args->args); 339 __u32 size = sizeof(struct timespec64); 340 341 if (augmented_args == NULL) 342 goto failure; 343 344 if (size > sizeof(augmented_args->__data)) 345 goto failure; 346 347 bpf_probe_read(&augmented_args->__data, size, rqtp_arg); 348 349 return augmented__output(args, augmented_args, len + size); 350 failure: 351 return 1; /* Failure: don't filter */ 352 } 353 354 static pid_t getpid(void) 355 { 356 return bpf_get_current_pid_tgid(); 357 } 358 359 static bool pid_filter__has(struct pids_filtered *pids, pid_t pid) 360 { 361 return bpf_map_lookup_elem(pids, &pid) != NULL; 362 } 363 364 SEC("tp/raw_syscalls/sys_enter") 365 int sys_enter(struct syscall_enter_args *args) 366 { 367 struct augmented_args_payload *augmented_args; 368 /* 369 * We start len, the amount of data that will be in the perf ring 370 * buffer, if this is not filtered out by one of pid_filter__has(), 371 * syscall->enabled, etc, with the non-augmented raw syscall payload, 372 * i.e. sizeof(augmented_args->args). 373 * 374 * We'll add to this as we add augmented syscalls right after that 375 * initial, non-augmented raw_syscalls:sys_enter payload. 376 */ 377 378 if (pid_filter__has(&pids_filtered, getpid())) 379 return 0; 380 381 augmented_args = augmented_args_payload(); 382 if (augmented_args == NULL) 383 return 1; 384 385 bpf_probe_read(&augmented_args->args, sizeof(augmented_args->args), args); 386 387 /* 388 * Jump to syscall specific augmenter, even if the default one, 389 * "!raw_syscalls:unaugmented" that will just return 1 to return the 390 * unaugmented tracepoint payload. 391 */ 392 bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr); 393 394 // If not found on the PROG_ARRAY syscalls map, then we're filtering it: 395 return 0; 396 } 397 398 SEC("tp/raw_syscalls/sys_exit") 399 int sys_exit(struct syscall_exit_args *args) 400 { 401 struct syscall_exit_args exit_args; 402 403 if (pid_filter__has(&pids_filtered, getpid())) 404 return 0; 405 406 bpf_probe_read(&exit_args, sizeof(exit_args), args); 407 /* 408 * Jump to syscall specific return augmenter, even if the default one, 409 * "!raw_syscalls:unaugmented" that will just return 1 to return the 410 * unaugmented tracepoint payload. 411 */ 412 bpf_tail_call(args, &syscalls_sys_exit, exit_args.syscall_nr); 413 /* 414 * If not found on the PROG_ARRAY syscalls map, then we're filtering it: 415 */ 416 return 0; 417 } 418 419 char _license[] SEC("license") = "GPL"; 420