xref: /openbmc/linux/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c (revision cd2cece61ac5f900c43df366c9a64ddb62173707)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Augment the raw_syscalls tracepoints with the contents of the pointer arguments.
4  *
5  * This exactly matches what is marshalled into the raw_syscall:sys_enter
6  * payload expected by the 'perf trace' beautifiers.
7  */
8 
9 #include <linux/bpf.h>
10 #include <bpf/bpf_helpers.h>
11 #include <linux/limits.h>
12 
13 #define MAX_CPUS  4096
14 
15 // FIXME: These should come from system headers
16 typedef char bool;
17 typedef int pid_t;
18 typedef long long int __s64;
19 typedef __s64 time64_t;
20 
21 struct timespec64 {
22 	time64_t	tv_sec;
23 	long int	tv_nsec;
24 };
25 
26 /* bpf-output associated map */
27 struct __augmented_syscalls__ {
28 	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
29 	__type(key, int);
30 	__type(value, __u32);
31 	__uint(max_entries, MAX_CPUS);
32 } __augmented_syscalls__ SEC(".maps");
33 
34 /*
35  * What to augment at entry?
36  *
37  * Pointer arg payloads (filenames, etc) passed from userspace to the kernel
38  */
39 struct syscalls_sys_enter {
40 	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
41 	__type(key, __u32);
42 	__type(value, __u32);
43 	__uint(max_entries, 512);
44 } syscalls_sys_enter SEC(".maps");
45 
46 /*
47  * What to augment at exit?
48  *
49  * Pointer arg payloads returned from the kernel (struct stat, etc) to userspace.
50  */
51 struct syscalls_sys_exit {
52 	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
53 	__type(key, __u32);
54 	__type(value, __u32);
55 	__uint(max_entries, 512);
56 } syscalls_sys_exit SEC(".maps");
57 
58 struct syscall_enter_args {
59 	unsigned long long common_tp_fields;
60 	long		   syscall_nr;
61 	unsigned long	   args[6];
62 };
63 
64 struct syscall_exit_args {
65 	unsigned long long common_tp_fields;
66 	long		   syscall_nr;
67 	long		   ret;
68 };
69 
70 struct augmented_arg {
71 	unsigned int	size;
72 	int		err;
73 	char		value[PATH_MAX];
74 };
75 
76 struct pids_filtered {
77 	__uint(type, BPF_MAP_TYPE_HASH);
78 	__type(key, pid_t);
79 	__type(value, bool);
80 	__uint(max_entries, 64);
81 } pids_filtered SEC(".maps");
82 
83 /*
84  * Desired design of maximum size and alignment (see RFC2553)
85  */
86 #define SS_MAXSIZE   128     /* Implementation specific max size */
87 
88 typedef unsigned short sa_family_t;
89 
90 /*
91  * FIXME: Should come from system headers
92  *
93  * The definition uses anonymous union and struct in order to control the
94  * default alignment.
95  */
96 struct sockaddr_storage {
97 	union {
98 		struct {
99 			sa_family_t    ss_family; /* address family */
100 			/* Following field(s) are implementation specific */
101 			char __data[SS_MAXSIZE - sizeof(unsigned short)];
102 				/* space to achieve desired size, */
103 				/* _SS_MAXSIZE value minus size of ss_family */
104 		};
105 		void *__align; /* implementation specific desired alignment */
106 	};
107 };
108 
109 struct augmented_args_payload {
110        struct syscall_enter_args args;
111        union {
112 		struct {
113 			struct augmented_arg arg, arg2;
114 		};
115 		struct sockaddr_storage saddr;
116 		char   __data[sizeof(struct augmented_arg)];
117 	};
118 };
119 
120 // We need more tmp space than the BPF stack can give us
121 struct augmented_args_tmp {
122 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
123 	__type(key, int);
124 	__type(value, struct augmented_args_payload);
125 	__uint(max_entries, 1);
126 } augmented_args_tmp SEC(".maps");
127 
128 static inline struct augmented_args_payload *augmented_args_payload(void)
129 {
130 	int key = 0;
131 	return bpf_map_lookup_elem(&augmented_args_tmp, &key);
132 }
133 
134 static inline int augmented__output(void *ctx, struct augmented_args_payload *args, int len)
135 {
136 	/* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */
137 	return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len);
138 }
139 
140 static inline
141 unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len)
142 {
143 	unsigned int augmented_len = sizeof(*augmented_arg);
144 	int string_len = bpf_probe_read_str(&augmented_arg->value, arg_len, arg);
145 
146 	augmented_arg->size = augmented_arg->err = 0;
147 	/*
148 	 * probe_read_str may return < 0, e.g. -EFAULT
149 	 * So we leave that in the augmented_arg->size that userspace will
150 	 */
151 	if (string_len > 0) {
152 		augmented_len -= sizeof(augmented_arg->value) - string_len;
153 		augmented_len &= sizeof(augmented_arg->value) - 1;
154 		augmented_arg->size = string_len;
155 	} else {
156 		/*
157 		 * So that username notice the error while still being able
158 		 * to skip this augmented arg record
159 		 */
160 		augmented_arg->err = string_len;
161 		augmented_len = offsetof(struct augmented_arg, value);
162 	}
163 
164 	return augmented_len;
165 }
166 
167 SEC("tp/raw_syscalls/sys_enter")
168 int syscall_unaugmented(struct syscall_enter_args *args)
169 {
170 	return 1;
171 }
172 
173 /*
174  * These will be tail_called from SEC("raw_syscalls:sys_enter"), so will find in
175  * augmented_args_tmp what was read by that raw_syscalls:sys_enter and go
176  * on from there, reading the first syscall arg as a string, i.e. open's
177  * filename.
178  */
179 SEC("tp/syscalls/sys_enter_connect")
180 int sys_enter_connect(struct syscall_enter_args *args)
181 {
182 	struct augmented_args_payload *augmented_args = augmented_args_payload();
183 	const void *sockaddr_arg = (const void *)args->args[1];
184 	unsigned int socklen = args->args[2];
185 	unsigned int len = sizeof(augmented_args->args);
186 
187         if (augmented_args == NULL)
188                 return 1; /* Failure: don't filter */
189 
190 	if (socklen > sizeof(augmented_args->saddr))
191 		socklen = sizeof(augmented_args->saddr);
192 
193 	bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
194 
195 	return augmented__output(args, augmented_args, len + socklen);
196 }
197 
198 SEC("tp/syscalls/sys_enter_sendto")
199 int sys_enter_sendto(struct syscall_enter_args *args)
200 {
201 	struct augmented_args_payload *augmented_args = augmented_args_payload();
202 	const void *sockaddr_arg = (const void *)args->args[4];
203 	unsigned int socklen = args->args[5];
204 	unsigned int len = sizeof(augmented_args->args);
205 
206         if (augmented_args == NULL)
207                 return 1; /* Failure: don't filter */
208 
209 	if (socklen > sizeof(augmented_args->saddr))
210 		socklen = sizeof(augmented_args->saddr);
211 
212 	bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
213 
214 	return augmented__output(args, augmented_args, len + socklen);
215 }
216 
217 SEC("tp/syscalls/sys_enter_open")
218 int sys_enter_open(struct syscall_enter_args *args)
219 {
220 	struct augmented_args_payload *augmented_args = augmented_args_payload();
221 	const void *filename_arg = (const void *)args->args[0];
222 	unsigned int len = sizeof(augmented_args->args);
223 
224         if (augmented_args == NULL)
225                 return 1; /* Failure: don't filter */
226 
227 	len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value));
228 
229 	return augmented__output(args, augmented_args, len);
230 }
231 
232 SEC("tp/syscalls/sys_enter_openat")
233 int sys_enter_openat(struct syscall_enter_args *args)
234 {
235 	struct augmented_args_payload *augmented_args = augmented_args_payload();
236 	const void *filename_arg = (const void *)args->args[1];
237 	unsigned int len = sizeof(augmented_args->args);
238 
239         if (augmented_args == NULL)
240                 return 1; /* Failure: don't filter */
241 
242 	len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value));
243 
244 	return augmented__output(args, augmented_args, len);
245 }
246 
247 SEC("tp/syscalls/sys_enter_rename")
248 int sys_enter_rename(struct syscall_enter_args *args)
249 {
250 	struct augmented_args_payload *augmented_args = augmented_args_payload();
251 	const void *oldpath_arg = (const void *)args->args[0],
252 		   *newpath_arg = (const void *)args->args[1];
253 	unsigned int len = sizeof(augmented_args->args), oldpath_len;
254 
255         if (augmented_args == NULL)
256                 return 1; /* Failure: don't filter */
257 
258 	oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
259 	len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value));
260 
261 	return augmented__output(args, augmented_args, len);
262 }
263 
264 SEC("tp/syscalls/sys_enter_renameat")
265 int sys_enter_renameat(struct syscall_enter_args *args)
266 {
267 	struct augmented_args_payload *augmented_args = augmented_args_payload();
268 	const void *oldpath_arg = (const void *)args->args[1],
269 		   *newpath_arg = (const void *)args->args[3];
270 	unsigned int len = sizeof(augmented_args->args), oldpath_len;
271 
272         if (augmented_args == NULL)
273                 return 1; /* Failure: don't filter */
274 
275 	oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
276 	len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value));
277 
278 	return augmented__output(args, augmented_args, len);
279 }
280 
281 #define PERF_ATTR_SIZE_VER0     64      /* sizeof first published struct */
282 
283 // we need just the start, get the size to then copy it
284 struct perf_event_attr_size {
285         __u32                   type;
286         /*
287          * Size of the attr structure, for fwd/bwd compat.
288          */
289         __u32                   size;
290 };
291 
292 SEC("tp/syscalls/sys_enter_perf_event_open")
293 int sys_enter_perf_event_open(struct syscall_enter_args *args)
294 {
295 	struct augmented_args_payload *augmented_args = augmented_args_payload();
296 	const struct perf_event_attr_size *attr = (const struct perf_event_attr_size *)args->args[0], *attr_read;
297 	unsigned int len = sizeof(augmented_args->args);
298 
299         if (augmented_args == NULL)
300 		goto failure;
301 
302 	if (bpf_probe_read(&augmented_args->__data, sizeof(*attr), attr) < 0)
303 		goto failure;
304 
305 	attr_read = (const struct perf_event_attr_size *)augmented_args->__data;
306 
307 	__u32 size = attr_read->size;
308 
309 	if (!size)
310 		size = PERF_ATTR_SIZE_VER0;
311 
312 	if (size > sizeof(augmented_args->__data))
313                 goto failure;
314 
315 	// Now that we read attr->size and tested it against the size limits, read it completely
316 	if (bpf_probe_read(&augmented_args->__data, size, attr) < 0)
317 		goto failure;
318 
319 	return augmented__output(args, augmented_args, len + size);
320 failure:
321 	return 1; /* Failure: don't filter */
322 }
323 
324 SEC("tp/syscalls/sys_enter_clock_nanosleep")
325 int sys_enter_clock_nanosleep(struct syscall_enter_args *args)
326 {
327 	struct augmented_args_payload *augmented_args = augmented_args_payload();
328 	const void *rqtp_arg = (const void *)args->args[2];
329 	unsigned int len = sizeof(augmented_args->args);
330 	__u32 size = sizeof(struct timespec64);
331 
332         if (augmented_args == NULL)
333 		goto failure;
334 
335 	if (size > sizeof(augmented_args->__data))
336                 goto failure;
337 
338 	bpf_probe_read(&augmented_args->__data, size, rqtp_arg);
339 
340 	return augmented__output(args, augmented_args, len + size);
341 failure:
342 	return 1; /* Failure: don't filter */
343 }
344 
345 static pid_t getpid(void)
346 {
347 	return bpf_get_current_pid_tgid();
348 }
349 
350 static bool pid_filter__has(struct pids_filtered *pids, pid_t pid)
351 {
352 	return bpf_map_lookup_elem(pids, &pid) != NULL;
353 }
354 
355 SEC("tp/raw_syscalls/sys_enter")
356 int sys_enter(struct syscall_enter_args *args)
357 {
358 	struct augmented_args_payload *augmented_args;
359 	/*
360 	 * We start len, the amount of data that will be in the perf ring
361 	 * buffer, if this is not filtered out by one of pid_filter__has(),
362 	 * syscall->enabled, etc, with the non-augmented raw syscall payload,
363 	 * i.e. sizeof(augmented_args->args).
364 	 *
365 	 * We'll add to this as we add augmented syscalls right after that
366 	 * initial, non-augmented raw_syscalls:sys_enter payload.
367 	 */
368 
369 	if (pid_filter__has(&pids_filtered, getpid()))
370 		return 0;
371 
372 	augmented_args = augmented_args_payload();
373 	if (augmented_args == NULL)
374 		return 1;
375 
376 	bpf_probe_read(&augmented_args->args, sizeof(augmented_args->args), args);
377 
378 	/*
379 	 * Jump to syscall specific augmenter, even if the default one,
380 	 * "!raw_syscalls:unaugmented" that will just return 1 to return the
381 	 * unaugmented tracepoint payload.
382 	 */
383 	bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
384 
385 	// If not found on the PROG_ARRAY syscalls map, then we're filtering it:
386 	return 0;
387 }
388 
389 SEC("tp/raw_syscalls/sys_exit")
390 int sys_exit(struct syscall_exit_args *args)
391 {
392 	struct syscall_exit_args exit_args;
393 
394 	if (pid_filter__has(&pids_filtered, getpid()))
395 		return 0;
396 
397 	bpf_probe_read(&exit_args, sizeof(exit_args), args);
398 	/*
399 	 * Jump to syscall specific return augmenter, even if the default one,
400 	 * "!raw_syscalls:unaugmented" that will just return 1 to return the
401 	 * unaugmented tracepoint payload.
402 	 */
403 	bpf_tail_call(args, &syscalls_sys_exit, exit_args.syscall_nr);
404 	/*
405 	 * If not found on the PROG_ARRAY syscalls map, then we're filtering it:
406 	 */
407 	return 0;
408 }
409 
410 char _license[] SEC("license") = "GPL";
411