1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Augment the raw_syscalls tracepoints with the contents of the pointer arguments. 4 * 5 * Test it with: 6 * 7 * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null 8 * 9 * This exactly matches what is marshalled into the raw_syscall:sys_enter 10 * payload expected by the 'perf trace' beautifiers. 11 * 12 * For now it just uses the existing tracepoint augmentation code in 'perf 13 * trace', in the next csets we'll hook up these with the sys_enter/sys_exit 14 * code that will combine entry/exit in a strace like way. 15 */ 16 17 #include <linux/bpf.h> 18 #include <bpf/bpf_helpers.h> 19 #include <linux/limits.h> 20 21 #define MAX_CPUS 4096 22 23 // FIXME: These should come from system headers 24 typedef char bool; 25 typedef int pid_t; 26 typedef long long int __s64; 27 typedef __s64 time64_t; 28 29 struct timespec64 { 30 time64_t tv_sec; 31 long int tv_nsec; 32 }; 33 34 /* bpf-output associated map */ 35 struct __augmented_syscalls__ { 36 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 37 __type(key, int); 38 __type(value, __u32); 39 __uint(max_entries, MAX_CPUS); 40 } __augmented_syscalls__ SEC(".maps"); 41 42 /* 43 * What to augment at entry? 44 * 45 * Pointer arg payloads (filenames, etc) passed from userspace to the kernel 46 */ 47 struct syscalls_sys_enter { 48 __uint(type, BPF_MAP_TYPE_PROG_ARRAY); 49 __type(key, __u32); 50 __type(value, __u32); 51 __uint(max_entries, 512); 52 } syscalls_sys_enter SEC(".maps"); 53 54 /* 55 * What to augment at exit? 56 * 57 * Pointer arg payloads returned from the kernel (struct stat, etc) to userspace. 58 */ 59 struct syscalls_sys_exit { 60 __uint(type, BPF_MAP_TYPE_PROG_ARRAY); 61 __type(key, __u32); 62 __type(value, __u32); 63 __uint(max_entries, 512); 64 } syscalls_sys_exit SEC(".maps"); 65 66 struct syscall_enter_args { 67 unsigned long long common_tp_fields; 68 long syscall_nr; 69 unsigned long args[6]; 70 }; 71 72 struct syscall_exit_args { 73 unsigned long long common_tp_fields; 74 long syscall_nr; 75 long ret; 76 }; 77 78 struct augmented_arg { 79 unsigned int size; 80 int err; 81 char value[PATH_MAX]; 82 }; 83 84 struct pids_filtered { 85 __uint(type, BPF_MAP_TYPE_HASH); 86 __type(key, pid_t); 87 __type(value, bool); 88 __uint(max_entries, 64); 89 } pids_filtered SEC(".maps"); 90 91 /* 92 * Desired design of maximum size and alignment (see RFC2553) 93 */ 94 #define SS_MAXSIZE 128 /* Implementation specific max size */ 95 96 typedef unsigned short sa_family_t; 97 98 /* 99 * FIXME: Should come from system headers 100 * 101 * The definition uses anonymous union and struct in order to control the 102 * default alignment. 103 */ 104 struct sockaddr_storage { 105 union { 106 struct { 107 sa_family_t ss_family; /* address family */ 108 /* Following field(s) are implementation specific */ 109 char __data[SS_MAXSIZE - sizeof(unsigned short)]; 110 /* space to achieve desired size, */ 111 /* _SS_MAXSIZE value minus size of ss_family */ 112 }; 113 void *__align; /* implementation specific desired alignment */ 114 }; 115 }; 116 117 struct augmented_args_payload { 118 struct syscall_enter_args args; 119 union { 120 struct { 121 struct augmented_arg arg, arg2; 122 }; 123 struct sockaddr_storage saddr; 124 char __data[sizeof(struct augmented_arg)]; 125 }; 126 }; 127 128 // We need more tmp space than the BPF stack can give us 129 struct augmented_args_tmp { 130 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 131 __type(key, int); 132 __type(value, struct augmented_args_payload); 133 __uint(max_entries, 1); 134 } augmented_args_tmp SEC(".maps"); 135 136 static inline struct augmented_args_payload *augmented_args_payload(void) 137 { 138 int key = 0; 139 return bpf_map_lookup_elem(&augmented_args_tmp, &key); 140 } 141 142 static inline int augmented__output(void *ctx, struct augmented_args_payload *args, int len) 143 { 144 /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ 145 return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len); 146 } 147 148 static inline 149 unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len) 150 { 151 unsigned int augmented_len = sizeof(*augmented_arg); 152 int string_len = bpf_probe_read_str(&augmented_arg->value, arg_len, arg); 153 154 augmented_arg->size = augmented_arg->err = 0; 155 /* 156 * probe_read_str may return < 0, e.g. -EFAULT 157 * So we leave that in the augmented_arg->size that userspace will 158 */ 159 if (string_len > 0) { 160 augmented_len -= sizeof(augmented_arg->value) - string_len; 161 augmented_len &= sizeof(augmented_arg->value) - 1; 162 augmented_arg->size = string_len; 163 } else { 164 /* 165 * So that username notice the error while still being able 166 * to skip this augmented arg record 167 */ 168 augmented_arg->err = string_len; 169 augmented_len = offsetof(struct augmented_arg, value); 170 } 171 172 return augmented_len; 173 } 174 175 SEC("tp/raw_syscalls/sys_enter") 176 int syscall_unaugmented(struct syscall_enter_args *args) 177 { 178 return 1; 179 } 180 181 /* 182 * These will be tail_called from SEC("raw_syscalls:sys_enter"), so will find in 183 * augmented_args_tmp what was read by that raw_syscalls:sys_enter and go 184 * on from there, reading the first syscall arg as a string, i.e. open's 185 * filename. 186 */ 187 SEC("tp/syscalls/sys_enter_connect") 188 int sys_enter_connect(struct syscall_enter_args *args) 189 { 190 struct augmented_args_payload *augmented_args = augmented_args_payload(); 191 const void *sockaddr_arg = (const void *)args->args[1]; 192 unsigned int socklen = args->args[2]; 193 unsigned int len = sizeof(augmented_args->args); 194 195 if (augmented_args == NULL) 196 return 1; /* Failure: don't filter */ 197 198 if (socklen > sizeof(augmented_args->saddr)) 199 socklen = sizeof(augmented_args->saddr); 200 201 bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg); 202 203 return augmented__output(args, augmented_args, len + socklen); 204 } 205 206 SEC("tp/syscalls/sys_enter_sendto") 207 int sys_enter_sendto(struct syscall_enter_args *args) 208 { 209 struct augmented_args_payload *augmented_args = augmented_args_payload(); 210 const void *sockaddr_arg = (const void *)args->args[4]; 211 unsigned int socklen = args->args[5]; 212 unsigned int len = sizeof(augmented_args->args); 213 214 if (augmented_args == NULL) 215 return 1; /* Failure: don't filter */ 216 217 if (socklen > sizeof(augmented_args->saddr)) 218 socklen = sizeof(augmented_args->saddr); 219 220 bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg); 221 222 return augmented__output(args, augmented_args, len + socklen); 223 } 224 225 SEC("tp/syscalls/sys_enter_open") 226 int sys_enter_open(struct syscall_enter_args *args) 227 { 228 struct augmented_args_payload *augmented_args = augmented_args_payload(); 229 const void *filename_arg = (const void *)args->args[0]; 230 unsigned int len = sizeof(augmented_args->args); 231 232 if (augmented_args == NULL) 233 return 1; /* Failure: don't filter */ 234 235 len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value)); 236 237 return augmented__output(args, augmented_args, len); 238 } 239 240 SEC("tp/syscalls/sys_enter_openat") 241 int sys_enter_openat(struct syscall_enter_args *args) 242 { 243 struct augmented_args_payload *augmented_args = augmented_args_payload(); 244 const void *filename_arg = (const void *)args->args[1]; 245 unsigned int len = sizeof(augmented_args->args); 246 247 if (augmented_args == NULL) 248 return 1; /* Failure: don't filter */ 249 250 len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value)); 251 252 return augmented__output(args, augmented_args, len); 253 } 254 255 SEC("tp/syscalls/sys_enter_rename") 256 int sys_enter_rename(struct syscall_enter_args *args) 257 { 258 struct augmented_args_payload *augmented_args = augmented_args_payload(); 259 const void *oldpath_arg = (const void *)args->args[0], 260 *newpath_arg = (const void *)args->args[1]; 261 unsigned int len = sizeof(augmented_args->args), oldpath_len; 262 263 if (augmented_args == NULL) 264 return 1; /* Failure: don't filter */ 265 266 oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value)); 267 len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value)); 268 269 return augmented__output(args, augmented_args, len); 270 } 271 272 SEC("tp/syscalls/sys_enter_renameat") 273 int sys_enter_renameat(struct syscall_enter_args *args) 274 { 275 struct augmented_args_payload *augmented_args = augmented_args_payload(); 276 const void *oldpath_arg = (const void *)args->args[1], 277 *newpath_arg = (const void *)args->args[3]; 278 unsigned int len = sizeof(augmented_args->args), oldpath_len; 279 280 if (augmented_args == NULL) 281 return 1; /* Failure: don't filter */ 282 283 oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value)); 284 len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value)); 285 286 return augmented__output(args, augmented_args, len); 287 } 288 289 #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ 290 291 // we need just the start, get the size to then copy it 292 struct perf_event_attr_size { 293 __u32 type; 294 /* 295 * Size of the attr structure, for fwd/bwd compat. 296 */ 297 __u32 size; 298 }; 299 300 SEC("tp/syscalls/sys_enter_perf_event_open") 301 int sys_enter_perf_event_open(struct syscall_enter_args *args) 302 { 303 struct augmented_args_payload *augmented_args = augmented_args_payload(); 304 const struct perf_event_attr_size *attr = (const struct perf_event_attr_size *)args->args[0], *attr_read; 305 unsigned int len = sizeof(augmented_args->args); 306 307 if (augmented_args == NULL) 308 goto failure; 309 310 if (bpf_probe_read(&augmented_args->__data, sizeof(*attr), attr) < 0) 311 goto failure; 312 313 attr_read = (const struct perf_event_attr_size *)augmented_args->__data; 314 315 __u32 size = attr_read->size; 316 317 if (!size) 318 size = PERF_ATTR_SIZE_VER0; 319 320 if (size > sizeof(augmented_args->__data)) 321 goto failure; 322 323 // Now that we read attr->size and tested it against the size limits, read it completely 324 if (bpf_probe_read(&augmented_args->__data, size, attr) < 0) 325 goto failure; 326 327 return augmented__output(args, augmented_args, len + size); 328 failure: 329 return 1; /* Failure: don't filter */ 330 } 331 332 SEC("tp/syscalls/sys_enter_clock_nanosleep") 333 int sys_enter_clock_nanosleep(struct syscall_enter_args *args) 334 { 335 struct augmented_args_payload *augmented_args = augmented_args_payload(); 336 const void *rqtp_arg = (const void *)args->args[2]; 337 unsigned int len = sizeof(augmented_args->args); 338 __u32 size = sizeof(struct timespec64); 339 340 if (augmented_args == NULL) 341 goto failure; 342 343 if (size > sizeof(augmented_args->__data)) 344 goto failure; 345 346 bpf_probe_read(&augmented_args->__data, size, rqtp_arg); 347 348 return augmented__output(args, augmented_args, len + size); 349 failure: 350 return 1; /* Failure: don't filter */ 351 } 352 353 static pid_t getpid(void) 354 { 355 return bpf_get_current_pid_tgid(); 356 } 357 358 static bool pid_filter__has(struct pids_filtered *pids, pid_t pid) 359 { 360 return bpf_map_lookup_elem(pids, &pid) != NULL; 361 } 362 363 SEC("tp/raw_syscalls/sys_enter") 364 int sys_enter(struct syscall_enter_args *args) 365 { 366 struct augmented_args_payload *augmented_args; 367 /* 368 * We start len, the amount of data that will be in the perf ring 369 * buffer, if this is not filtered out by one of pid_filter__has(), 370 * syscall->enabled, etc, with the non-augmented raw syscall payload, 371 * i.e. sizeof(augmented_args->args). 372 * 373 * We'll add to this as we add augmented syscalls right after that 374 * initial, non-augmented raw_syscalls:sys_enter payload. 375 */ 376 377 if (pid_filter__has(&pids_filtered, getpid())) 378 return 0; 379 380 augmented_args = augmented_args_payload(); 381 if (augmented_args == NULL) 382 return 1; 383 384 bpf_probe_read(&augmented_args->args, sizeof(augmented_args->args), args); 385 386 /* 387 * Jump to syscall specific augmenter, even if the default one, 388 * "!raw_syscalls:unaugmented" that will just return 1 to return the 389 * unaugmented tracepoint payload. 390 */ 391 bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr); 392 393 // If not found on the PROG_ARRAY syscalls map, then we're filtering it: 394 return 0; 395 } 396 397 SEC("tp/raw_syscalls/sys_exit") 398 int sys_exit(struct syscall_exit_args *args) 399 { 400 struct syscall_exit_args exit_args; 401 402 if (pid_filter__has(&pids_filtered, getpid())) 403 return 0; 404 405 bpf_probe_read(&exit_args, sizeof(exit_args), args); 406 /* 407 * Jump to syscall specific return augmenter, even if the default one, 408 * "!raw_syscalls:unaugmented" that will just return 1 to return the 409 * unaugmented tracepoint payload. 410 */ 411 bpf_tail_call(args, &syscalls_sys_exit, exit_args.syscall_nr); 412 /* 413 * If not found on the PROG_ARRAY syscalls map, then we're filtering it: 414 */ 415 return 0; 416 } 417 418 char _license[] SEC("license") = "GPL"; 419