xref: /openbmc/linux/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c (revision 1836480429d173c01664a633b61e525b13d41a2a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Augment the raw_syscalls tracepoints with the contents of the pointer arguments.
4  *
5  * This exactly matches what is marshalled into the raw_syscall:sys_enter
6  * payload expected by the 'perf trace' beautifiers.
7  */
8 
9 #include <linux/bpf.h>
10 #include <bpf/bpf_helpers.h>
11 #include <linux/limits.h>
12 
13 #define MAX_CPUS  4096
14 
15 // FIXME: These should come from system headers
16 typedef char bool;
17 typedef int pid_t;
18 typedef long long int __s64;
19 typedef __s64 time64_t;
20 
21 struct timespec64 {
22 	time64_t	tv_sec;
23 	long int	tv_nsec;
24 };
25 
26 /* bpf-output associated map */
27 struct __augmented_syscalls__ {
28 	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
29 	__type(key, int);
30 	__type(value, __u32);
31 	__uint(max_entries, MAX_CPUS);
32 } __augmented_syscalls__ SEC(".maps");
33 
34 /*
35  * What to augment at entry?
36  *
37  * Pointer arg payloads (filenames, etc) passed from userspace to the kernel
38  */
39 struct syscalls_sys_enter {
40 	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
41 	__type(key, __u32);
42 	__type(value, __u32);
43 	__uint(max_entries, 512);
44 } syscalls_sys_enter SEC(".maps");
45 
46 /*
47  * What to augment at exit?
48  *
49  * Pointer arg payloads returned from the kernel (struct stat, etc) to userspace.
50  */
51 struct syscalls_sys_exit {
52 	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
53 	__type(key, __u32);
54 	__type(value, __u32);
55 	__uint(max_entries, 512);
56 } syscalls_sys_exit SEC(".maps");
57 
58 struct syscall_enter_args {
59 	unsigned long long common_tp_fields;
60 	long		   syscall_nr;
61 	unsigned long	   args[6];
62 };
63 
64 struct syscall_exit_args {
65 	unsigned long long common_tp_fields;
66 	long		   syscall_nr;
67 	long		   ret;
68 };
69 
70 struct augmented_arg {
71 	unsigned int	size;
72 	int		err;
73 	char		value[PATH_MAX];
74 };
75 
76 struct pids_filtered {
77 	__uint(type, BPF_MAP_TYPE_HASH);
78 	__type(key, pid_t);
79 	__type(value, bool);
80 	__uint(max_entries, 64);
81 } pids_filtered SEC(".maps");
82 
83 /*
84  * Desired design of maximum size and alignment (see RFC2553)
85  */
86 #define SS_MAXSIZE   128     /* Implementation specific max size */
87 
88 typedef unsigned short sa_family_t;
89 
90 /*
91  * FIXME: Should come from system headers
92  *
93  * The definition uses anonymous union and struct in order to control the
94  * default alignment.
95  */
96 struct sockaddr_storage {
97 	union {
98 		struct {
99 			sa_family_t    ss_family; /* address family */
100 			/* Following field(s) are implementation specific */
101 			char __data[SS_MAXSIZE - sizeof(unsigned short)];
102 				/* space to achieve desired size, */
103 				/* _SS_MAXSIZE value minus size of ss_family */
104 		};
105 		void *__align; /* implementation specific desired alignment */
106 	};
107 };
108 
109 struct augmented_args_payload {
110        struct syscall_enter_args args;
111        union {
112 		struct {
113 			struct augmented_arg arg, arg2;
114 		};
115 		struct sockaddr_storage saddr;
116 		char   __data[sizeof(struct augmented_arg)];
117 	};
118 };
119 
120 // We need more tmp space than the BPF stack can give us
121 struct augmented_args_tmp {
122 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
123 	__type(key, int);
124 	__type(value, struct augmented_args_payload);
125 	__uint(max_entries, 1);
126 } augmented_args_tmp SEC(".maps");
127 
128 static inline struct augmented_args_payload *augmented_args_payload(void)
129 {
130 	int key = 0;
131 	return bpf_map_lookup_elem(&augmented_args_tmp, &key);
132 }
133 
134 static inline int augmented__output(void *ctx, struct augmented_args_payload *args, int len)
135 {
136 	/* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */
137 	return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len);
138 }
139 
140 static inline
141 unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len)
142 {
143 	unsigned int augmented_len = sizeof(*augmented_arg);
144 	int string_len = bpf_probe_read_str(&augmented_arg->value, arg_len, arg);
145 
146 	augmented_arg->size = augmented_arg->err = 0;
147 	/*
148 	 * probe_read_str may return < 0, e.g. -EFAULT
149 	 * So we leave that in the augmented_arg->size that userspace will
150 	 */
151 	if (string_len > 0) {
152 		augmented_len -= sizeof(augmented_arg->value) - string_len;
153 		augmented_len &= sizeof(augmented_arg->value) - 1;
154 		augmented_arg->size = string_len;
155 	} else {
156 		/*
157 		 * So that username notice the error while still being able
158 		 * to skip this augmented arg record
159 		 */
160 		augmented_arg->err = string_len;
161 		augmented_len = offsetof(struct augmented_arg, value);
162 	}
163 
164 	return augmented_len;
165 }
166 
167 SEC("tp/raw_syscalls/sys_enter")
168 int syscall_unaugmented(struct syscall_enter_args *args)
169 {
170 	return 1;
171 }
172 
173 /*
174  * These will be tail_called from SEC("raw_syscalls:sys_enter"), so will find in
175  * augmented_args_tmp what was read by that raw_syscalls:sys_enter and go
176  * on from there, reading the first syscall arg as a string, i.e. open's
177  * filename.
178  */
179 SEC("tp/syscalls/sys_enter_connect")
180 int sys_enter_connect(struct syscall_enter_args *args)
181 {
182 	struct augmented_args_payload *augmented_args = augmented_args_payload();
183 	const void *sockaddr_arg = (const void *)args->args[1];
184 	unsigned int socklen = args->args[2];
185 	unsigned int len = sizeof(augmented_args->args);
186 
187         if (augmented_args == NULL)
188                 return 1; /* Failure: don't filter */
189 
190 	socklen &= sizeof(augmented_args->saddr) - 1;
191 
192 	bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
193 
194 	return augmented__output(args, augmented_args, len + socklen);
195 }
196 
197 SEC("tp/syscalls/sys_enter_sendto")
198 int sys_enter_sendto(struct syscall_enter_args *args)
199 {
200 	struct augmented_args_payload *augmented_args = augmented_args_payload();
201 	const void *sockaddr_arg = (const void *)args->args[4];
202 	unsigned int socklen = args->args[5];
203 	unsigned int len = sizeof(augmented_args->args);
204 
205         if (augmented_args == NULL)
206                 return 1; /* Failure: don't filter */
207 
208 	socklen &= sizeof(augmented_args->saddr) - 1;
209 
210 	bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
211 
212 	return augmented__output(args, augmented_args, len + socklen);
213 }
214 
215 SEC("tp/syscalls/sys_enter_open")
216 int sys_enter_open(struct syscall_enter_args *args)
217 {
218 	struct augmented_args_payload *augmented_args = augmented_args_payload();
219 	const void *filename_arg = (const void *)args->args[0];
220 	unsigned int len = sizeof(augmented_args->args);
221 
222         if (augmented_args == NULL)
223                 return 1; /* Failure: don't filter */
224 
225 	len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value));
226 
227 	return augmented__output(args, augmented_args, len);
228 }
229 
230 SEC("tp/syscalls/sys_enter_openat")
231 int sys_enter_openat(struct syscall_enter_args *args)
232 {
233 	struct augmented_args_payload *augmented_args = augmented_args_payload();
234 	const void *filename_arg = (const void *)args->args[1];
235 	unsigned int len = sizeof(augmented_args->args);
236 
237         if (augmented_args == NULL)
238                 return 1; /* Failure: don't filter */
239 
240 	len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value));
241 
242 	return augmented__output(args, augmented_args, len);
243 }
244 
245 SEC("tp/syscalls/sys_enter_rename")
246 int sys_enter_rename(struct syscall_enter_args *args)
247 {
248 	struct augmented_args_payload *augmented_args = augmented_args_payload();
249 	const void *oldpath_arg = (const void *)args->args[0],
250 		   *newpath_arg = (const void *)args->args[1];
251 	unsigned int len = sizeof(augmented_args->args), oldpath_len;
252 
253         if (augmented_args == NULL)
254                 return 1; /* Failure: don't filter */
255 
256 	oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
257 	len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value));
258 
259 	return augmented__output(args, augmented_args, len);
260 }
261 
262 SEC("tp/syscalls/sys_enter_renameat")
263 int sys_enter_renameat(struct syscall_enter_args *args)
264 {
265 	struct augmented_args_payload *augmented_args = augmented_args_payload();
266 	const void *oldpath_arg = (const void *)args->args[1],
267 		   *newpath_arg = (const void *)args->args[3];
268 	unsigned int len = sizeof(augmented_args->args), oldpath_len;
269 
270         if (augmented_args == NULL)
271                 return 1; /* Failure: don't filter */
272 
273 	oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
274 	len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value));
275 
276 	return augmented__output(args, augmented_args, len);
277 }
278 
279 #define PERF_ATTR_SIZE_VER0     64      /* sizeof first published struct */
280 
281 // we need just the start, get the size to then copy it
282 struct perf_event_attr_size {
283         __u32                   type;
284         /*
285          * Size of the attr structure, for fwd/bwd compat.
286          */
287         __u32                   size;
288 };
289 
290 SEC("tp/syscalls/sys_enter_perf_event_open")
291 int sys_enter_perf_event_open(struct syscall_enter_args *args)
292 {
293 	struct augmented_args_payload *augmented_args = augmented_args_payload();
294 	const struct perf_event_attr_size *attr = (const struct perf_event_attr_size *)args->args[0], *attr_read;
295 	unsigned int len = sizeof(augmented_args->args);
296 
297         if (augmented_args == NULL)
298 		goto failure;
299 
300 	if (bpf_probe_read(&augmented_args->__data, sizeof(*attr), attr) < 0)
301 		goto failure;
302 
303 	attr_read = (const struct perf_event_attr_size *)augmented_args->__data;
304 
305 	__u32 size = attr_read->size;
306 
307 	if (!size)
308 		size = PERF_ATTR_SIZE_VER0;
309 
310 	if (size > sizeof(augmented_args->__data))
311                 goto failure;
312 
313 	// Now that we read attr->size and tested it against the size limits, read it completely
314 	if (bpf_probe_read(&augmented_args->__data, size, attr) < 0)
315 		goto failure;
316 
317 	return augmented__output(args, augmented_args, len + size);
318 failure:
319 	return 1; /* Failure: don't filter */
320 }
321 
322 SEC("tp/syscalls/sys_enter_clock_nanosleep")
323 int sys_enter_clock_nanosleep(struct syscall_enter_args *args)
324 {
325 	struct augmented_args_payload *augmented_args = augmented_args_payload();
326 	const void *rqtp_arg = (const void *)args->args[2];
327 	unsigned int len = sizeof(augmented_args->args);
328 	__u32 size = sizeof(struct timespec64);
329 
330         if (augmented_args == NULL)
331 		goto failure;
332 
333 	if (size > sizeof(augmented_args->__data))
334                 goto failure;
335 
336 	bpf_probe_read(&augmented_args->__data, size, rqtp_arg);
337 
338 	return augmented__output(args, augmented_args, len + size);
339 failure:
340 	return 1; /* Failure: don't filter */
341 }
342 
343 static pid_t getpid(void)
344 {
345 	return bpf_get_current_pid_tgid();
346 }
347 
348 static bool pid_filter__has(struct pids_filtered *pids, pid_t pid)
349 {
350 	return bpf_map_lookup_elem(pids, &pid) != NULL;
351 }
352 
353 SEC("tp/raw_syscalls/sys_enter")
354 int sys_enter(struct syscall_enter_args *args)
355 {
356 	struct augmented_args_payload *augmented_args;
357 	/*
358 	 * We start len, the amount of data that will be in the perf ring
359 	 * buffer, if this is not filtered out by one of pid_filter__has(),
360 	 * syscall->enabled, etc, with the non-augmented raw syscall payload,
361 	 * i.e. sizeof(augmented_args->args).
362 	 *
363 	 * We'll add to this as we add augmented syscalls right after that
364 	 * initial, non-augmented raw_syscalls:sys_enter payload.
365 	 */
366 
367 	if (pid_filter__has(&pids_filtered, getpid()))
368 		return 0;
369 
370 	augmented_args = augmented_args_payload();
371 	if (augmented_args == NULL)
372 		return 1;
373 
374 	bpf_probe_read(&augmented_args->args, sizeof(augmented_args->args), args);
375 
376 	/*
377 	 * Jump to syscall specific augmenter, even if the default one,
378 	 * "!raw_syscalls:unaugmented" that will just return 1 to return the
379 	 * unaugmented tracepoint payload.
380 	 */
381 	bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
382 
383 	// If not found on the PROG_ARRAY syscalls map, then we're filtering it:
384 	return 0;
385 }
386 
387 SEC("tp/raw_syscalls/sys_exit")
388 int sys_exit(struct syscall_exit_args *args)
389 {
390 	struct syscall_exit_args exit_args;
391 
392 	if (pid_filter__has(&pids_filtered, getpid()))
393 		return 0;
394 
395 	bpf_probe_read(&exit_args, sizeof(exit_args), args);
396 	/*
397 	 * Jump to syscall specific return augmenter, even if the default one,
398 	 * "!raw_syscalls:unaugmented" that will just return 1 to return the
399 	 * unaugmented tracepoint payload.
400 	 */
401 	bpf_tail_call(args, &syscalls_sys_exit, exit_args.syscall_nr);
402 	/*
403 	 * If not found on the PROG_ARRAY syscalls map, then we're filtering it:
404 	 */
405 	return 0;
406 }
407 
408 char _license[] SEC("license") = "GPL";
409